[lasagne] 01/17: Imported Upstream version 0.1+git20160728.8b66737
Stephen Sinclair
sinclairs-guest at moszumanska.debian.org
Thu Nov 2 23:24:38 UTC 2017
This is an automated email from the git hooks/post-receive script.
sinclairs-guest pushed a commit to branch master
in repository lasagne.
commit 14f47d8ad110401a0125fff50e4213c7d172fe97
Author: Daniel Stender <stender at debian.org>
Date: Thu Aug 18 20:59:26 2016 +0200
Imported Upstream version 0.1+git20160728.8b66737
---
.coveragerc | 2 +
.coveragerc-nogpu | 6 +
.github/CONTRIBUTING.md | 16 +
.github/ISSUE_TEMPLATE.md | 12 +
.github/PULL_REQUEST_TEMPLATE.md | 27 +
.gitignore | 102 ++
.travis.yml | 25 +
CHANGES.rst | 35 +
LICENSE | 28 +
MANIFEST.in | 11 +
README.rst | 138 +++
docs/Makefile | 177 ++++
docs/_static/fix_rtd.css | 4 +
docs/conf.py | 340 +++++++
docs/index.rst | 50 +
docs/modules/init.rst | 60 ++
docs/modules/layers.rst | 212 ++++
docs/modules/layers/base.rst | 13 +
docs/modules/layers/conv.rst | 37 +
docs/modules/layers/corrmm.rst | 8 +
docs/modules/layers/cuda_convnet.rst | 8 +
docs/modules/layers/dense.rst | 13 +
docs/modules/layers/dnn.rst | 11 +
docs/modules/layers/embedding.rst | 10 +
docs/modules/layers/helper.rst | 15 +
docs/modules/layers/input.rst | 10 +
docs/modules/layers/merge.rst | 18 +
docs/modules/layers/noise.rst | 15 +
docs/modules/layers/normalization.rst | 15 +
docs/modules/layers/pool.rst | 34 +
docs/modules/layers/recurrent.rst | 22 +
docs/modules/layers/shape.rst | 29 +
docs/modules/layers/special.rst | 40 +
docs/modules/nonlinearities.rst | 38 +
docs/modules/objectives.rst | 27 +
docs/modules/random.rst | 7 +
docs/modules/regularization.rst | 20 +
docs/modules/updates.rst | 31 +
docs/modules/utils.rst | 13 +
docs/user/custom_layers.rst | 159 +++
docs/user/development.rst | 234 +++++
docs/user/installation.rst | 249 +++++
docs/user/layers.rst | 203 ++++
docs/user/tutorial.rst | 620 ++++++++++++
examples/mnist.py | 362 +++++++
examples/recurrent.py | 171 ++++
lasagne/__init__.py | 34 +
lasagne/conftest.py | 12 +
lasagne/init.py | 367 +++++++
lasagne/layers/__init__.py | 13 +
lasagne/layers/base.py | 328 ++++++
lasagne/layers/conv.py | 934 ++++++++++++++++++
lasagne/layers/corrmm.py | 147 +++
lasagne/layers/cuda_convnet.py | 634 ++++++++++++
lasagne/layers/dense.py | 192 ++++
lasagne/layers/dnn.py | 593 +++++++++++
lasagne/layers/embedding.py | 69 ++
lasagne/layers/helper.py | 520 ++++++++++
lasagne/layers/input.py | 75 ++
lasagne/layers/merge.py | 403 ++++++++
lasagne/layers/noise.py | 136 +++
lasagne/layers/normalization.py | 375 +++++++
lasagne/layers/pool.py | 639 ++++++++++++
lasagne/layers/recurrent.py | 1480 ++++++++++++++++++++++++++++
lasagne/layers/shape.py | 397 ++++++++
lasagne/layers/special.py | 1155 ++++++++++++++++++++++
lasagne/nonlinearities.py | 305 ++++++
lasagne/objectives.py | 379 +++++++
lasagne/random.py | 36 +
lasagne/regularization.py | 189 ++++
lasagne/tests/conftest.py | 10 +
lasagne/tests/layers/conftest.py | 13 +
lasagne/tests/layers/test_base.py | 180 ++++
lasagne/tests/layers/test_conv.py | 781 +++++++++++++++
lasagne/tests/layers/test_dense.py | 361 +++++++
lasagne/tests/layers/test_embedding.py | 56 ++
lasagne/tests/layers/test_helper.py | 791 +++++++++++++++
lasagne/tests/layers/test_input.py | 41 +
lasagne/tests/layers/test_merge.py | 256 +++++
lasagne/tests/layers/test_noise.py | 127 +++
lasagne/tests/layers/test_normalization.py | 327 ++++++
lasagne/tests/layers/test_pool.py | 905 +++++++++++++++++
lasagne/tests/layers/test_recurrent.py | 1101 +++++++++++++++++++++
lasagne/tests/layers/test_shape.py | 291 ++++++
lasagne/tests/layers/test_special.py | 793 +++++++++++++++
lasagne/tests/test_examples.py | 38 +
lasagne/tests/test_init.py | 351 +++++++
lasagne/tests/test_nonlinearities.py | 69 ++
lasagne/tests/test_objectives.py | 236 +++++
lasagne/tests/test_regularization.py | 99 ++
lasagne/tests/test_theano_extensions.py | 155 +++
lasagne/tests/test_updates.py | 227 +++++
lasagne/tests/test_utils.py | 308 ++++++
lasagne/theano_extensions/__init__.py | 0
lasagne/theano_extensions/conv.py | 273 +++++
lasagne/theano_extensions/padding.py | 53 +
lasagne/updates.py | 819 +++++++++++++++
lasagne/utils.py | 450 +++++++++
requirements-dev.txt | 10 +
requirements.txt | 1 +
setup.cfg | 10 +
setup.py | 67 ++
102 files changed, 22288 insertions(+)
diff --git a/.coveragerc b/.coveragerc
new file mode 100644
index 0000000..3aa94fb
--- /dev/null
+++ b/.coveragerc
@@ -0,0 +1,2 @@
+[run]
+omit = lasagne/tests/*
diff --git a/.coveragerc-nogpu b/.coveragerc-nogpu
new file mode 100644
index 0000000..6feb141
--- /dev/null
+++ b/.coveragerc-nogpu
@@ -0,0 +1,6 @@
+[run]
+omit =
+ lasagne/tests/*
+ lasagne/layers/corrmm.py
+ lasagne/layers/cuda_convnet.py
+ lasagne/layers/dnn.py
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
new file mode 100644
index 0000000..ffffa7a
--- /dev/null
+++ b/.github/CONTRIBUTING.md
@@ -0,0 +1,16 @@
+- **If you have a question or need help using Lasagne**, please post on [our mailing list](https://groups.google.com/forum/#!forum/lasagne-users) instead of creating an issue. Make sure to check the [Lasagne documentation](http://lasagne.readthedocs.org/en/latest/) and the [Theano documentation](http://deeplearning.net/software/theano/) first! You can search the mailing list as well to see if your question has come up before.
+
+- **If you would like to report a bug**, feel free to open an issue. Please verify first that the problem is not in your own code by reviewing the documentation. If you are able to provide a minimal code example that reproduces the bug, this will greatly speed up the process of tracking down the problem.
+
+- **If you would like to contribute**, feel free to open a pull request. Please review our documentation on [what to contribute](http://lasagne.readthedocs.org/en/latest/user/development.html#what-to-contribute) and [how to contribute](http://lasagne.readthedocs.org/en/latest/user/development.html#how-to-contribute). Some contributions may be better suited for our [Recipes repository](https://github.com/Lasagne/Recipes), where we collect examples, tutorials, trained models, utilities and [...]
+
+Links
+-----
+
+- Mailing list: https://groups.google.com/forum/#!forum/lasagne-users
+- Lasagne documentation: http://lasagne.readthedocs.org/en/latest/
+- Theano documentation: http://deeplearning.net/software/theano/
+
+- What to contribute: http://lasagne.readthedocs.org/en/latest/user/development.html#what-to-contribute
+- How to contribute: http://lasagne.readthedocs.org/en/latest/user/development.html#how-to-contribute
+- Recipes repository: https://github.com/Lasagne/Recipes
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 0000000..3034f40
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,12 @@
+Before submitting your issue, please check these hints!
+
+- **If you have a usage question**, please please post on [our mailing list](https://groups.google.com/forum/#!forum/lasagne-users) instead of creating an issue.
+ Make sure to check the [Lasagne documentation](http://lasagne.readthedocs.org/en/latest/) and the [Theano documentation](http://deeplearning.net/software/theano/) first!
+ You can search the mailing list as well to see if your question has come up before.
+
+- **If you suspect you have found a bug**, please first try [updating to the bleeding-edge versions of Theano and Lasagne](http://lasagne.readthedocs.io/en/latest/user/installation.html#bleeding-edge-version). It may have been fixed already.
+ If you are not sure whether the problem lies within your code, Theano, or Lasagne, first post on [our mailing list](https://groups.google.com/forum/#!forum/lasagne-users).
+ In any case, try to provide a minimal code example that reproduces the bug, this will greatly speed up the process of tracking down the problem.
+
+- **If you have a feature request or idea**, please include a clear description of the use case(s) it would enable, referencing research papers if applicable, and indicate whether you would be willing to implement the feature yourself.
+ We are happy to discuss your suggestion, help refining it, and decide upfront whether it would fit the main library or our [Lasagne/Recipes](https://github.com/Lasagne/Recipes).
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..194342c
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,27 @@
+Before submitting your pull request, please check these hints!
+
+- If you are not familiar with the github workflow, have a look:
+ https://guides.github.com/introduction/flow/
+ In particular, note that in order to update your pull request to include any
+ changes we asked for, you just need to push to your branch again.
+- If your pull request addresses a particular issue from our issue tracker,
+ reference it in your pull request description on github (not the commit
+ message) using the syntax `Closes #123` or `Fixes #123`.
+
+Pull request check list:
+
+- Install Lasagne in editable mode to be able to run tests locally:
+ http://lasagne.readthedocs.io/en/latest/user/development.html#development-setup
+- Make sure PEP8 is followed:
+ `python -m pep8 lasagne/`
+- Make sure the test suite runs through:
+ `python -m py.test`
+ (or, to only run tests that include the substring `foo` in their name:
+ `python -m py.test -k foo`)
+- At the end of the test run output, check if coverage is at 100%. If not (or
+ not for the files you changed), you will need to add tests covering the code
+ you added.
+- It is fine to submit a PR without tests to get initial feedback on the
+ implementation, but we cannot merge it without tests.
+- If you added/changed any documentation, verify that it renders correctly:
+ http://lasagne.readthedocs.io/en/latest/user/development.html#documentation
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b0f5aff
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,102 @@
+# Virtualenv
+bin/
+include/
+src/
+
+# Dataset used in examples
+train-images-idx3-ubyte.gz
+train-labels-idx1-ubyte.gz
+t10k-images-idx3-ubyte.gz
+t10k-labels-idx1-ubyte.gz
+# Dataset used in earlier versions
+mnist.pkl.gz
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+### vim ###
+[._]*.s[a-w][a-z]
+[._]s[a-w][a-z]
+*.un~
+Session.vim
+.netrwhist
+*~
+
+
+### OSX ###
+.DS_Store
+.AppleDouble
+.LSOverride
+
+# Icon must end with two \r
+Icon
+
+
+# Thumbnails
+._*
+
+# Files that might appear on external disk
+.Spotlight-V100
+.Trashes
+
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+
+# Directories from IDE
+.idea
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..a2698fe
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,25 @@
+language: python
+sudo: false
+python:
+ - "2.7"
+ - "3.4"
+addons:
+ apt:
+ packages:
+ - libblas-dev
+ - liblapack-dev
+ - gfortran
+before_install:
+ - pip install -U pip
+install:
+ - travis_wait travis_retry pip install -r requirements-dev.txt
+ - travis_retry pip install python-coveralls
+ - travis_retry python setup.py dev
+script: py.test --runslow --cov-config=.coveragerc-nogpu
+after_success:
+ - coveralls
+cache:
+ - apt
+ - directories:
+ - $HOME/.cache/pip
+ - $HOME/.theano
diff --git a/CHANGES.rst b/CHANGES.rst
new file mode 100644
index 0000000..8ad9658
--- /dev/null
+++ b/CHANGES.rst
@@ -0,0 +1,35 @@
+Changelog
+---------
+
+0.1 (2015-08-13)
+~~~~~~~~~~~~~~~~
+
+First release.
+
+* core contributors, in alphabetical order:
+
+ * Eric Battenberg (@ebattenberg)
+ * Sander Dieleman (@benanne)
+ * Daniel Nouri (@dnouri)
+ * Eben Olson (@ebenolson)
+ * Aäron van den Oord (@avdnoord)
+ * Colin Raffel (@craffel)
+ * Jan Schlüter (@f0k)
+ * Søren Kaae Sønderby (@skaae)
+
+* extra contributors, in chronological order:
+
+ * Daniel Maturana (@dimatura): documentation, cuDNN layers, LRN
+ * Jonas Degrave (@317070): get_all_param_values() fix
+ * Jack Kelly (@JackKelly): help with recurrent layers
+ * Gábor Takács (@takacsg84): support broadcastable parameters in lasagne.updates
+ * Diogo Moitinho de Almeida (@diogo149): MNIST example fixes
+ * Brian McFee (@bmcfee): MaxPool2DLayer fix
+ * Martin Thoma (@MartinThoma): documentation
+ * Jeffrey De Fauw (@JeffreyDF): documentation, ADAM fix
+ * Michael Heilman (@mheilman): NonlinearityLayer, lasagne.random
+ * Gregory Sanders (@instagibbs): documentation fix
+ * Jon Crall (@erotemic): check for non-positive input shapes
+ * Hendrik Weideman (@hjweide): set_all_param_values() test, MaxPool2DCCLayer fix
+ * Kashif Rasul (@kashif): ADAM simplification
+ * Peter de Rivaz (@peterderivaz): documentation fix
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..940a5d0
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,28 @@
+The MIT License (MIT)
+
+Copyright (c) 2014-2015 Lasagne contributors
+
+Lasagne uses a shared copyright model: each contributor holds copyright over
+their contributions to Lasagne. The project versioning records all such
+contribution and copyright details.
+By contributing to the Lasagne repository through pull-request, comment,
+or otherwise, the contributor releases their content to the license and
+copyright terms herein.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..044ffc6
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,11 @@
+include *.rst
+include *.txt
+include LICENSE
+
+recursive-include lasagne/tests *.py
+include .coveragerc
+recursive-include examples *.py
+recursive-include docs *.rst conf.py *.css Makefile
+
+recursive-exclude * __pycache__
+recursive-exclude * *.py[co]
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..a1846e1
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,138 @@
+.. image:: https://readthedocs.org/projects/lasagne/badge/
+ :target: http://lasagne.readthedocs.org/en/latest/
+
+.. image:: https://travis-ci.org/Lasagne/Lasagne.svg
+ :target: https://travis-ci.org/Lasagne/Lasagne
+
+.. image:: https://img.shields.io/coveralls/Lasagne/Lasagne.svg
+ :target: https://coveralls.io/r/Lasagne/Lasagne
+
+.. image:: https://img.shields.io/badge/license-MIT-blue.svg
+ :target: https://github.com/Lasagne/Lasagne/blob/master/LICENSE
+
+.. image:: https://zenodo.org/badge/16974/Lasagne/Lasagne.svg
+ :target: https://zenodo.org/badge/latestdoi/16974/Lasagne/Lasagne
+
+Lasagne
+=======
+
+Lasagne is a lightweight library to build and train neural networks in Theano.
+Its main features are:
+
+* Supports feed-forward networks such as Convolutional Neural Networks (CNNs),
+ recurrent networks including Long Short-Term Memory (LSTM), and any
+ combination thereof
+* Allows architectures of multiple inputs and multiple outputs, including
+ auxiliary classifiers
+* Many optimization methods including Nesterov momentum, RMSprop and ADAM
+* Freely definable cost function and no need to derive gradients due to
+ Theano's symbolic differentiation
+* Transparent support of CPUs and GPUs due to Theano's expression compiler
+
+Its design is governed by `six principles
+<http://lasagne.readthedocs.org/en/latest/user/development.html#philosophy>`_:
+
+* Simplicity: Be easy to use, easy to understand and easy to extend, to
+ facilitate use in research
+* Transparency: Do not hide Theano behind abstractions, directly process and
+ return Theano expressions or Python / numpy data types
+* Modularity: Allow all parts (layers, regularizers, optimizers, ...) to be
+ used independently of Lasagne
+* Pragmatism: Make common use cases easy, do not overrate uncommon cases
+* Restraint: Do not obstruct users with features they decide not to use
+* Focus: "Do one thing and do it well"
+
+
+Installation
+------------
+
+In short, you can install a known compatible version of Theano and the latest
+Lasagne development version via:
+
+.. code-block:: bash
+
+ pip install -r https://raw.githubusercontent.com/Lasagne/Lasagne/master/requirements.txt
+ pip install https://github.com/Lasagne/Lasagne/archive/master.zip
+
+For more details and alternatives, please see the `Installation instructions
+<http://lasagne.readthedocs.org/en/latest/user/installation.html>`_.
+
+
+Documentation
+-------------
+
+Documentation is available online: http://lasagne.readthedocs.org/
+
+For support, please refer to the `lasagne-users mailing list
+<https://groups.google.com/forum/#!forum/lasagne-users>`_.
+
+
+Example
+-------
+
+.. code-block:: python
+
+ import lasagne
+ import theano
+ import theano.tensor as T
+
+ # create Theano variables for input and target minibatch
+ input_var = T.tensor4('X')
+ target_var = T.ivector('y')
+
+ # create a small convolutional neural network
+ from lasagne.nonlinearities import leaky_rectify, softmax
+ network = lasagne.layers.InputLayer((None, 3, 32, 32), input_var)
+ network = lasagne.layers.Conv2DLayer(network, 64, (3, 3),
+ nonlinearity=leaky_rectify)
+ network = lasagne.layers.Conv2DLayer(network, 32, (3, 3),
+ nonlinearity=leaky_rectify)
+ network = lasagne.layers.Pool2DLayer(network, (3, 3), stride=2, mode='max')
+ network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, 0.5),
+ 128, nonlinearity=leaky_rectify,
+ W=lasagne.init.Orthogonal())
+ network = lasagne.layers.DenseLayer(lasagne.layers.dropout(network, 0.5),
+ 10, nonlinearity=softmax)
+
+ # create loss function
+ prediction = lasagne.layers.get_output(network)
+ loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
+ loss = loss.mean() + 1e-4 * lasagne.regularization.regularize_network_params(
+ network, lasagne.regularization.l2)
+
+ # create parameter update expressions
+ params = lasagne.layers.get_all_params(network, trainable=True)
+ updates = lasagne.updates.nesterov_momentum(loss, params, learning_rate=0.01,
+ momentum=0.9)
+
+ # compile training function that updates parameters and returns training loss
+ train_fn = theano.function([input_var, target_var], loss, updates=updates)
+
+ # train network (assuming you've got some training data in numpy arrays)
+ for epoch in range(100):
+ loss = 0
+ for input_batch, target_batch in training_data:
+ loss += train_fn(input_batch, target_batch)
+ print("Epoch %d: Loss %g" % (epoch + 1, loss / len(training_data)))
+
+ # use trained network for predictions
+ test_prediction = lasagne.layers.get_output(network, deterministic=True)
+ predict_fn = theano.function([input_var], T.argmax(test_prediction, axis=1))
+ print("Predicted class for first test input: %r" % predict_fn(test_data[0]))
+
+For a fully-functional example, see `examples/mnist.py <examples/mnist.py>`_,
+and check the `Tutorial
+<http://lasagne.readthedocs.org/en/latest/user/tutorial.html>`_ for in-depth
+explanations of the same. More examples, code snippets and reproductions of
+recent research papers are maintained in the separate `Lasagne Recipes
+<https://github.com/Lasagne/Recipes>`_ repository.
+
+
+Development
+-----------
+
+Lasagne is a work in progress, input is welcome.
+
+Please see the `Contribution instructions
+<http://lasagne.readthedocs.org/en/latest/user/development.html>`_ for details
+on how you can contribute!
diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..a454085
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,177 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS =
+SPHINXBUILD = sphinx-build
+PAPER =
+BUILDDIR = _build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4 = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+help:
+ @echo "Please use \`make <target>' where <target> is one of"
+ @echo " html to make standalone HTML files"
+ @echo " dirhtml to make HTML files named index.html in directories"
+ @echo " singlehtml to make a single large HTML file"
+ @echo " pickle to make pickle files"
+ @echo " json to make JSON files"
+ @echo " htmlhelp to make HTML files and a HTML help project"
+ @echo " qthelp to make HTML files and a qthelp project"
+ @echo " devhelp to make HTML files and a Devhelp project"
+ @echo " epub to make an epub"
+ @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+ @echo " latexpdf to make LaTeX files and run them through pdflatex"
+ @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+ @echo " text to make text files"
+ @echo " man to make manual pages"
+ @echo " texinfo to make Texinfo files"
+ @echo " info to make Texinfo files and run them through makeinfo"
+ @echo " gettext to make PO message catalogs"
+ @echo " changes to make an overview of all changed/added/deprecated items"
+ @echo " xml to make Docutils-native XML files"
+ @echo " pseudoxml to make pseudoxml-XML files for display purposes"
+ @echo " linkcheck to check all external links for integrity"
+ @echo " doctest to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+ rm -rf $(BUILDDIR)/*
+
+html:
+ $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+ $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+ @echo
+ @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+ $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+ @echo
+ @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+ $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+ @echo
+ @echo "Build finished; now you can process the pickle files."
+
+json:
+ $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+ @echo
+ @echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+ $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+ @echo
+ @echo "Build finished; now you can run HTML Help Workshop with the" \
+ ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+ $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+ @echo
+ @echo "Build finished; now you can run "qcollectiongenerator" with the" \
+ ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+ @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/lasagne.qhcp"
+ @echo "To view the help file:"
+ @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/lasagne.qhc"
+
+devhelp:
+ $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+ @echo
+ @echo "Build finished."
+ @echo "To view the help file:"
+ @echo "# mkdir -p $$HOME/.local/share/devhelp/lasagne"
+ @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/lasagne"
+ @echo "# devhelp"
+
+epub:
+ $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+ @echo
+ @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo
+ @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+ @echo "Run \`make' in that directory to run these through (pdf)latex" \
+ "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through pdflatex..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+ $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+ @echo "Running LaTeX files through platex and dvipdfmx..."
+ $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+ @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+ $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+ @echo
+ @echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+ $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+ @echo
+ @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo
+ @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+ @echo "Run \`make' in that directory to run these through makeinfo" \
+ "(use \`make info' here to do that automatically)."
+
+info:
+ $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+ @echo "Running Texinfo files through makeinfo..."
+ make -C $(BUILDDIR)/texinfo info
+ @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+ $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+ @echo
+ @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+ $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+ @echo
+ @echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+ $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+ @echo
+ @echo "Link check complete; look for any errors in the above output " \
+ "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+ $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+ @echo "Testing of doctests in the sources finished, look at the " \
+ "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+ $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+ @echo
+ @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+ $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+ @echo
+ @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/docs/_static/fix_rtd.css b/docs/_static/fix_rtd.css
new file mode 100644
index 0000000..11e85fa
--- /dev/null
+++ b/docs/_static/fix_rtd.css
@@ -0,0 +1,4 @@
+/* work around https://github.com/snide/sphinx_rtd_theme/issues/149 */
+.rst-content table.field-list .field-body {
+ padding-top: 8px;
+}
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..a641f9d
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,340 @@
+# -*- coding: utf-8 -*-
+#
+# Lasagne documentation build configuration file, created by
+# sphinx-quickstart on Sat Nov 8 11:00:12 2014.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# import sys
+import os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+# sys.path.insert(0, os.path.abspath('.'))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+ 'sphinx.ext.autodoc',
+ 'sphinx.ext.autosummary',
+ 'sphinx.ext.doctest',
+ 'sphinx.ext.mathjax',
+# 'sphinx.ext.viewcode', # create HTML file of source code and link to it
+ 'sphinx.ext.linkcode', # link to github, see linkcode_resolve() below
+ 'numpydoc',
+# 'sphinx.ext.napoleon', # alternative to numpydoc -- looks a bit worse.
+]
+
+# See https://github.com/rtfd/readthedocs.org/issues/283
+mathjax_path = ('https://cdn.mathjax.org/mathjax/latest/MathJax.js?'
+ 'config=TeX-AMS-MML_HTMLorMML')
+
+# see http://stackoverflow.com/q/12206334/562769
+numpydoc_show_class_members = False
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+# source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'Lasagne'
+copyright = u'2014–2015, Lasagne contributors'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+import lasagne
+# The short X.Y version.
+version = '.'.join(lasagne.__version__.split('.', 2)[:2])
+# The full version, including alpha/beta/rc tags.
+release = lasagne.__version__
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+# language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+# today = ''
+# Else, today_fmt is used as the format for a strftime call.
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+# keep_warnings = False
+
+# Resolve function for the linkcode extension.
+def linkcode_resolve(domain, info):
+ def find_source():
+ # try to find the file and line number, based on code from numpy:
+ # https://github.com/numpy/numpy/blob/master/doc/source/conf.py#L286
+ obj = sys.modules[info['module']]
+ for part in info['fullname'].split('.'):
+ obj = getattr(obj, part)
+ import inspect
+ import os
+ fn = inspect.getsourcefile(obj)
+ fn = os.path.relpath(fn, start=os.path.dirname(lasagne.__file__))
+ source, lineno = inspect.getsourcelines(obj)
+ return fn, lineno, lineno + len(source) - 1
+
+ if domain != 'py' or not info['module']:
+ return None
+ try:
+ filename = 'lasagne/%s#L%d-L%d' % find_source()
+ except Exception:
+ filename = info['module'].replace('.', '/') + '.py'
+ tag = 'master' if 'dev' in release else ('v' + release)
+ return "https://github.com/Lasagne/Lasagne/blob/%s/%s" % (tag, filename)
+
+
+# -- Options for HTML output ----------------------------------------------
+
+## Classic Python style:
+#html_theme = 'classic'
+#html_theme_options = {
+# 'stickysidebar': True,
+#}
+
+## Read the docs style:
+if os.environ.get('READTHEDOCS') != 'True':
+ try:
+ import sphinx_rtd_theme
+ except ImportError:
+ pass # assume we have sphinx >= 1.3
+ else:
+ html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
+ html_theme = 'sphinx_rtd_theme'
+def setup(app):
+ app.add_stylesheet("fix_rtd.css")
+
+## Bootstrap style:
+#import sphinx_bootstrap_theme
+#html_theme = 'bootstrap'
+#html_theme_options = {
+# 'bootswatch_theme': 'cosmo', # see http://bootswatch.com/ for more
+# 'bootstrap_version': '3',
+# 'navbar_title': 'Lasagne',
+# 'source_link_position': 'footer',
+#}
+#html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
+
+# The name for this set of Sphinx documents. If None, it defaults to
+# "<project> v<release> documentation".
+# html_title = None
+
+# A shorter title for the navigation bar. Default is the same as html_title.
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+# html_logo = None
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+# html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+# html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+# html_domain_indices = True
+
+# If false, no index is generated.
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+# html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it. The value of this option must be the
+# base URL from which the finished HTML is served.
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'lasagnedoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+ # The paper size ('letterpaper' or 'a4paper').
+ # 'papersize': 'letterpaper',
+
+ # The font size ('10pt', '11pt' or '12pt').
+ # 'pointsize': '10pt',
+
+ # Additional stuff for the LaTeX preamble.
+ # 'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+# author, documentclass [howto, manual, or own class]).
+latex_documents = [
+ ('index', 'lasagne.tex', u'lasagne Documentation',
+ u'Lasagne contributors', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+# latex_appendices = []
+
+# If false, no module index is generated.
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+ ('index', 'lasagne', u'Lasagne Documentation',
+ [u'Lasagne contributors'], 1)
+]
+
+# If true, show URL addresses after external links.
+# man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+# dir menu entry, description, category)
+texinfo_documents = [
+ ('index', 'lasagne', u'Lasagne Documentation',
+ u'Lasagne contributors', 'Lasagne',
+ 'One line description of project.', 'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+# texinfo_no_detailmenu = False
+
+
+# fool rtd into thinking a GPU is available, so all modules are importable
+try:
+ from unittest.mock import Mock
+except ImportError:
+ from mock import Mock
+
+import theano
+import theano.sandbox.cuda
+
+theano.config = Mock(device='gpu')
+theano.sandbox.cuda.cuda_enabled = True
+theano.sandbox.cuda.dnn = Mock(dnn_available=lambda: True)
+
+import sys
+
+sys.modules['pylearn2'] = Mock()
+sys.modules['pylearn2.sandbox'] = Mock()
+sys.modules['pylearn2.sandbox.cuda_convnet'] = Mock()
+sys.modules['pylearn2.sandbox.cuda_convnet.filter_acts'] = \
+ Mock(FilterActs=None)
+
+sys.modules['theano.sandbox.cuda.blas'] = Mock(GpuCorrMM=None)
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..d975985
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,50 @@
+Welcome to Lasagne
+==================
+
+Lasagne is a lightweight library to build and train neural networks in Theano.
+
+Lasagne is a work in progress, input is welcome. The available documentation is
+limited for now. The project is on `GitHub`_.
+
+User Guide
+------------
+
+The Lasagne user guide explains how to install Lasagne, how to build and train
+neural networks using Lasagne, and how to contribute to the library as a
+developer.
+
+.. toctree::
+ :maxdepth: 2
+
+ user/installation
+ user/tutorial
+ user/layers
+ user/custom_layers
+ user/development
+
+API Reference
+-------------
+
+If you are looking for information on a specific function, class or
+method, this part of the documentation is for you.
+
+.. toctree::
+ :maxdepth: 2
+
+ modules/layers
+ modules/updates
+ modules/init
+ modules/nonlinearities
+ modules/objectives
+ modules/regularization
+ modules/random
+ modules/utils
+
+Indices and tables
+------------------
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
+.. _GitHub: https://github.com/Lasagne/Lasagne
\ No newline at end of file
diff --git a/docs/modules/init.rst b/docs/modules/init.rst
new file mode 100644
index 0000000..45a6a90
--- /dev/null
+++ b/docs/modules/init.rst
@@ -0,0 +1,60 @@
+:mod:`lasagne.init`
+===================
+
+.. automodule:: lasagne.init
+
+Initializers
+------------
+
+.. autosummary::
+
+ Constant
+ Normal
+ Uniform
+ Glorot
+ GlorotNormal
+ GlorotUniform
+ He
+ HeNormal
+ HeUniform
+ Orthogonal
+ Sparse
+
+Detailed description
+--------------------
+
+.. autoclass:: Initializer
+ :members:
+
+.. autoclass:: Constant
+ :members:
+
+.. autoclass:: Normal
+ :members:
+
+.. autoclass:: Uniform
+ :members:
+
+.. autoclass:: Glorot
+ :members:
+
+.. autoclass:: GlorotNormal
+ :members:
+
+.. autoclass:: GlorotUniform
+ :members:
+
+.. autoclass:: He
+ :members:
+
+.. autoclass:: HeNormal
+ :members:
+
+.. autoclass:: HeUniform
+ :members:
+
+.. autoclass:: Orthogonal
+ :members:
+
+.. autoclass:: Sparse
+ :members:
diff --git a/docs/modules/layers.rst b/docs/modules/layers.rst
new file mode 100644
index 0000000..9aa3e8d
--- /dev/null
+++ b/docs/modules/layers.rst
@@ -0,0 +1,212 @@
+:mod:`lasagne.layers`
+=====================
+
+.. automodule:: lasagne.layers
+
+.. toctree::
+ :hidden:
+
+ layers/helper
+ layers/base
+ layers/input
+ layers/dense
+ layers/conv
+ layers/pool
+ layers/recurrent
+ layers/noise
+ layers/shape
+ layers/merge
+ layers/normalization
+ layers/embedding
+ layers/special
+ layers/corrmm
+ layers/cuda_convnet
+ layers/dnn
+
+
+.. rubric:: :doc:`layers/helper`
+
+.. autosummary::
+ :nosignatures:
+
+ get_output
+ get_output_shape
+ get_all_layers
+ get_all_params
+ count_params
+ get_all_param_values
+ set_all_param_values
+
+
+.. rubric:: :doc:`layers/base`
+
+.. autosummary::
+ :nosignatures:
+
+ Layer
+ MergeLayer
+
+
+.. rubric:: :doc:`layers/input`
+
+.. autosummary::
+ :nosignatures:
+
+ InputLayer
+
+
+.. rubric:: :doc:`layers/dense`
+
+.. autosummary::
+ :nosignatures:
+
+ DenseLayer
+ NINLayer
+
+
+.. rubric:: :doc:`layers/conv`
+
+.. autosummary::
+ :nosignatures:
+
+ Conv1DLayer
+ Conv2DLayer
+ TransposedConv2DLayer
+ Deconv2DLayer
+ DilatedConv2DLayer
+
+
+.. rubric:: :doc:`layers/pool`
+
+.. autosummary::
+ :nosignatures:
+
+ MaxPool1DLayer
+ MaxPool2DLayer
+ Pool1DLayer
+ Pool2DLayer
+ Upscale1DLayer
+ Upscale2DLayer
+ GlobalPoolLayer
+ FeaturePoolLayer
+ FeatureWTALayer
+
+
+.. rubric:: :doc:`layers/recurrent`
+
+.. autosummary::
+ :nosignatures:
+
+ CustomRecurrentLayer
+ RecurrentLayer
+ LSTMLayer
+ GRULayer
+ Gate
+
+
+.. rubric:: :doc:`layers/noise`
+
+.. autosummary::
+ :nosignatures:
+
+ DropoutLayer
+ dropout
+ GaussianNoiseLayer
+
+
+.. rubric:: :doc:`layers/shape`
+
+.. autosummary::
+ :nosignatures:
+
+ ReshapeLayer
+ reshape
+ FlattenLayer
+ flatten
+ DimshuffleLayer
+ dimshuffle
+ PadLayer
+ pad
+ SliceLayer
+
+
+.. rubric:: :doc:`layers/merge`
+
+.. autosummary::
+ :nosignatures:
+
+ ConcatLayer
+ concat
+ ElemwiseMergeLayer
+ ElemwiseSumLayer
+
+
+.. rubric:: :doc:`layers/normalization`
+
+.. autosummary::
+ :nosignatures:
+
+ LocalResponseNormalization2DLayer
+ BatchNormLayer
+ batch_norm
+
+
+.. rubric:: :doc:`layers/embedding`
+
+.. autosummary::
+ :nosignatures:
+
+ EmbeddingLayer
+
+
+.. rubric:: :doc:`layers/special`
+
+.. autosummary::
+ :nosignatures:
+
+ NonlinearityLayer
+ BiasLayer
+ ExpressionLayer
+ InverseLayer
+ TransformerLayer
+ ParametricRectifierLayer
+ prelu
+ RandomizedRectifierLayer
+ rrelu
+
+
+.. rubric:: :doc:`layers/corrmm`
+
+.. autosummary::
+ :nosignatures:
+
+ corrmm.Conv2DMMLayer
+
+
+.. rubric:: :doc:`layers/cuda_convnet`
+
+.. autosummary::
+ :nosignatures:
+
+ cuda_convnet.Conv2DCCLayer
+ cuda_convnet.MaxPool2DCCLayer
+ cuda_convnet.ShuffleBC01ToC01BLayer
+ cuda_convnet.bc01_to_c01b
+ cuda_convnet.ShuffleC01BToBC01Layer
+ cuda_convnet.c01b_to_bc01
+ cuda_convnet.NINLayer_c01b
+
+
+.. rubric:: :doc:`layers/dnn`
+
+.. autosummary::
+ :nosignatures:
+
+ dnn.Conv2DDNNLayer
+ dnn.Conv3DDNNLayer
+ dnn.MaxPool2DDNNLayer
+ dnn.Pool2DDNNLayer
+ dnn.MaxPool3DDNNLayer
+ dnn.Pool3DDNNLayer
+ dnn.SpatialPyramidPoolingDNNLayer
+
diff --git a/docs/modules/layers/base.rst b/docs/modules/layers/base.rst
new file mode 100644
index 0000000..55c1869
--- /dev/null
+++ b/docs/modules/layers/base.rst
@@ -0,0 +1,13 @@
+Layer base classes
+------------------
+
+.. automodule:: lasagne.layers.base
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: Layer
+ :members:
+
+.. autoclass:: MergeLayer
+ :members:
+
diff --git a/docs/modules/layers/conv.rst b/docs/modules/layers/conv.rst
new file mode 100644
index 0000000..9938d2b
--- /dev/null
+++ b/docs/modules/layers/conv.rst
@@ -0,0 +1,37 @@
+Convolutional layers
+--------------------
+
+.. automodule:: lasagne.layers.conv
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: Conv1DLayer
+ :members:
+
+.. autoclass:: Conv2DLayer
+ :members:
+
+.. note::
+ For experts: ``Conv2DLayer`` will create a convolutional layer using
+ ``T.nnet.conv2d``, Theano's default convolution. On compilation for GPU,
+ Theano replaces this with a `cuDNN`_-based implementation if available,
+ otherwise falls back to a gemm-based implementation. For details on this,
+ please see the `Theano convolution documentation`_.
+
+ Lasagne also provides convolutional layers directly enforcing a specific
+ implementation: :class:`lasagne.layers.dnn.Conv2DDNNLayer` to enforce
+ cuDNN, :class:`lasagne.layers.corrmm.Conv2DMMLayer` to enforce the
+ gemm-based one, :class:`lasagne.layers.cuda_convnet.Conv2DCCLayer` for
+ Krizhevsky's `cuda-convnet`_.
+
+.. _cuda-convnet: https://code.google.com/p/cuda-convnet/
+.. _cuDNN: https://developer.nvidia.com/cudnn
+.. _Theano convolution documentation: http://deeplearning.net/software/theano/library/tensor/nnet/conv.html
+
+.. autoclass:: TransposedConv2DLayer
+ :members:
+
+.. autoclass:: Deconv2DLayer
+
+.. autoclass:: DilatedConv2DLayer
+ :members:
diff --git a/docs/modules/layers/corrmm.rst b/docs/modules/layers/corrmm.rst
new file mode 100644
index 0000000..83fa795
--- /dev/null
+++ b/docs/modules/layers/corrmm.rst
@@ -0,0 +1,8 @@
+:mod:`lasagne.layers.corrmm`
+----------------------------
+
+This module houses layers that require a GPU to work. Its layers are not automatically imported into the :mod:`lasagne.layers` namespace: To use these layers, you need to ``import lasagne.layers.corrmm`` explicitly.
+
+.. automodule:: lasagne.layers.corrmm
+ :members:
+
diff --git a/docs/modules/layers/cuda_convnet.rst b/docs/modules/layers/cuda_convnet.rst
new file mode 100644
index 0000000..4e69a6f
--- /dev/null
+++ b/docs/modules/layers/cuda_convnet.rst
@@ -0,0 +1,8 @@
+:mod:`lasagne.layers.cuda_convnet`
+----------------------------------
+
+This module houses layers that require `pylearn2 <https://deeplearning.net/software/pylearn2>` to work. Its layers are not automatically imported into the :mod:`lasagne.layers` namespace: To use these layers, you need to ``import lasagne.layers.cuda_convnet`` explicitly.
+
+.. automodule:: lasagne.layers.cuda_convnet
+ :members:
+
diff --git a/docs/modules/layers/dense.rst b/docs/modules/layers/dense.rst
new file mode 100644
index 0000000..2f2fa97
--- /dev/null
+++ b/docs/modules/layers/dense.rst
@@ -0,0 +1,13 @@
+Dense layers
+------------
+
+.. automodule:: lasagne.layers.dense
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: DenseLayer
+ :members:
+
+.. autoclass:: NINLayer
+ :members:
+
diff --git a/docs/modules/layers/dnn.rst b/docs/modules/layers/dnn.rst
new file mode 100644
index 0000000..167abc5
--- /dev/null
+++ b/docs/modules/layers/dnn.rst
@@ -0,0 +1,11 @@
+:mod:`lasagne.layers.dnn`
+-------------------------
+
+This module houses layers that require `cuDNN <https://developer.nvidia.com/cudnn>`_ to work. Its layers are not automatically imported into the :mod:`lasagne.layers` namespace: To use these layers, you need to ``import lasagne.layers.dnn`` explicitly.
+
+Note that these layers are not required to use cuDNN: If cuDNN is available, Theano will use it for the default convolution and pooling layers anyway.
+However, they allow you to enforce the usage of cuDNN or use features not available in :mod:`lasagne.layers`.
+
+.. automodule:: lasagne.layers.dnn
+ :members:
+
diff --git a/docs/modules/layers/embedding.rst b/docs/modules/layers/embedding.rst
new file mode 100644
index 0000000..da750ea
--- /dev/null
+++ b/docs/modules/layers/embedding.rst
@@ -0,0 +1,10 @@
+Embedding layers
+----------------
+
+.. automodule:: lasagne.layers.embedding
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: EmbeddingLayer
+ :members:
+
diff --git a/docs/modules/layers/helper.rst b/docs/modules/layers/helper.rst
new file mode 100644
index 0000000..c27eb86
--- /dev/null
+++ b/docs/modules/layers/helper.rst
@@ -0,0 +1,15 @@
+Helper functions
+----------------
+
+.. automodule:: lasagne.layers.helper
+
+.. currentmodule:: lasagne.layers
+
+.. autofunction:: get_output
+.. autofunction:: get_output_shape
+.. autofunction:: get_all_layers
+.. autofunction:: get_all_params
+.. autofunction:: count_params
+.. autofunction:: get_all_param_values
+.. autofunction:: set_all_param_values
+
diff --git a/docs/modules/layers/input.rst b/docs/modules/layers/input.rst
new file mode 100644
index 0000000..83a509c
--- /dev/null
+++ b/docs/modules/layers/input.rst
@@ -0,0 +1,10 @@
+Network input
+-------------
+
+.. automodule:: lasagne.layers.input
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: InputLayer
+ :members:
+
diff --git a/docs/modules/layers/merge.rst b/docs/modules/layers/merge.rst
new file mode 100644
index 0000000..ff79ef0
--- /dev/null
+++ b/docs/modules/layers/merge.rst
@@ -0,0 +1,18 @@
+Merge layers
+------------
+
+.. automodule:: lasagne.layers.merge
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: ConcatLayer
+ :members:
+
+.. autoclass:: concat
+
+.. autoclass:: ElemwiseMergeLayer
+ :members:
+
+.. autoclass:: ElemwiseSumLayer
+ :members:
+
diff --git a/docs/modules/layers/noise.rst b/docs/modules/layers/noise.rst
new file mode 100644
index 0000000..883bc9c
--- /dev/null
+++ b/docs/modules/layers/noise.rst
@@ -0,0 +1,15 @@
+Noise layers
+------------
+
+.. automodule:: lasagne.layers.noise
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: DropoutLayer
+ :members:
+
+.. autoclass:: dropout
+
+.. autoclass:: GaussianNoiseLayer
+ :members:
+
diff --git a/docs/modules/layers/normalization.rst b/docs/modules/layers/normalization.rst
new file mode 100644
index 0000000..9ca062c
--- /dev/null
+++ b/docs/modules/layers/normalization.rst
@@ -0,0 +1,15 @@
+Normalization layers
+--------------------
+
+.. automodule:: lasagne.layers.normalization
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: LocalResponseNormalization2DLayer
+ :members:
+
+.. autoclass:: BatchNormLayer
+ :members:
+
+.. autofunction:: batch_norm
+
diff --git a/docs/modules/layers/pool.rst b/docs/modules/layers/pool.rst
new file mode 100644
index 0000000..72a873e
--- /dev/null
+++ b/docs/modules/layers/pool.rst
@@ -0,0 +1,34 @@
+Pooling layers
+--------------
+
+.. automodule:: lasagne.layers.pool
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: MaxPool1DLayer
+ :members:
+
+.. autoclass:: MaxPool2DLayer
+ :members:
+
+.. autoclass:: Pool1DLayer
+ :members:
+
+.. autoclass:: Pool2DLayer
+ :members:
+
+.. autoclass:: Upscale1DLayer
+ :members:
+
+.. autoclass:: Upscale2DLayer
+ :members:
+
+.. autoclass:: GlobalPoolLayer
+ :members:
+
+.. autoclass:: FeaturePoolLayer
+ :members:
+
+.. autoclass:: FeatureWTALayer
+ :members:
+
diff --git a/docs/modules/layers/recurrent.rst b/docs/modules/layers/recurrent.rst
new file mode 100644
index 0000000..81fa90e
--- /dev/null
+++ b/docs/modules/layers/recurrent.rst
@@ -0,0 +1,22 @@
+Recurrent layers
+----------------
+
+.. automodule:: lasagne.layers.recurrent
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: CustomRecurrentLayer
+ :members:
+
+.. autoclass:: RecurrentLayer
+ :members:
+
+.. autoclass:: LSTMLayer
+ :members:
+
+.. autoclass:: GRULayer
+ :members:
+
+.. autoclass:: Gate
+ :members:
+
diff --git a/docs/modules/layers/shape.rst b/docs/modules/layers/shape.rst
new file mode 100644
index 0000000..5e7baac
--- /dev/null
+++ b/docs/modules/layers/shape.rst
@@ -0,0 +1,29 @@
+Shape layers
+------------
+
+.. automodule:: lasagne.layers.shape
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: ReshapeLayer
+ :members:
+
+.. autoclass:: reshape
+
+.. autoclass:: FlattenLayer
+ :members:
+
+.. autoclass:: flatten
+
+.. autoclass:: DimshuffleLayer
+ :members:
+
+.. autoclass:: dimshuffle
+
+.. autoclass:: PadLayer
+ :members:
+
+.. autoclass:: pad
+
+.. autoclass:: SliceLayer
+
diff --git a/docs/modules/layers/special.rst b/docs/modules/layers/special.rst
new file mode 100644
index 0000000..2e9998d
--- /dev/null
+++ b/docs/modules/layers/special.rst
@@ -0,0 +1,40 @@
+Special-purpose layers
+----------------------
+
+.. automodule:: lasagne.layers.special
+
+.. currentmodule:: lasagne.layers
+
+.. autoclass:: NonlinearityLayer
+ :members:
+
+.. autoclass:: BiasLayer
+ :members:
+
+.. autoclass:: ScaleLayer
+ :members:
+
+.. autofunction:: standardize
+
+.. autoclass:: ExpressionLayer
+ :members:
+
+.. autoclass:: InverseLayer
+ :members:
+
+.. autoclass:: TransformerLayer
+ :members:
+
+.. autoclass:: TPSTransformerLayer
+ :members:
+
+.. autoclass:: ParametricRectifierLayer
+ :members:
+
+.. autofunction:: prelu
+
+.. autoclass:: RandomizedRectifierLayer
+ :members:
+
+.. autofunction:: rrelu
+
diff --git a/docs/modules/nonlinearities.rst b/docs/modules/nonlinearities.rst
new file mode 100644
index 0000000..704c35f
--- /dev/null
+++ b/docs/modules/nonlinearities.rst
@@ -0,0 +1,38 @@
+:mod:`lasagne.nonlinearities`
+=============================
+
+.. automodule:: lasagne.nonlinearities
+
+.. autosummary::
+
+ sigmoid
+ softmax
+ tanh
+ ScaledTanH
+ rectify
+ LeakyRectify
+ leaky_rectify
+ very_leaky_rectify
+ elu
+ softplus
+ linear
+ identity
+
+Detailed description
+--------------------
+
+.. autofunction:: sigmoid
+.. autofunction:: softmax
+.. autofunction:: tanh
+.. autoclass:: ScaledTanH
+ :members:
+.. autoclass:: ScaledTanh
+.. autofunction:: rectify
+.. autoclass:: LeakyRectify
+ :members:
+.. autofunction:: leaky_rectify
+.. autofunction:: very_leaky_rectify
+.. autofunction:: elu
+.. autofunction:: softplus
+.. autofunction:: linear
+.. autofunction:: identity
diff --git a/docs/modules/objectives.rst b/docs/modules/objectives.rst
new file mode 100644
index 0000000..565dbaa
--- /dev/null
+++ b/docs/modules/objectives.rst
@@ -0,0 +1,27 @@
+:mod:`lasagne.objectives`
+=========================
+
+.. automodule:: lasagne.objectives
+
+
+Loss functions
+--------------
+
+.. autofunction:: binary_crossentropy
+.. autofunction:: categorical_crossentropy
+.. autofunction:: squared_error
+.. autofunction:: binary_hinge_loss
+.. autofunction:: multiclass_hinge_loss
+
+
+Aggregation functions
+---------------------
+
+.. autofunction:: aggregate
+
+
+Evaluation functions
+--------------------
+
+.. autofunction:: binary_accuracy
+.. autofunction:: categorical_accuracy
diff --git a/docs/modules/random.rst b/docs/modules/random.rst
new file mode 100644
index 0000000..ec0a101
--- /dev/null
+++ b/docs/modules/random.rst
@@ -0,0 +1,7 @@
+:mod:`lasagne.random`
+=====================
+
+.. automodule:: lasagne.random
+
+.. autofunction:: get_rng
+.. autofunction:: set_rng
diff --git a/docs/modules/regularization.rst b/docs/modules/regularization.rst
new file mode 100644
index 0000000..92f0f91
--- /dev/null
+++ b/docs/modules/regularization.rst
@@ -0,0 +1,20 @@
+:mod:`lasagne.regularization`
+=============================
+
+.. automodule:: lasagne.regularization
+
+Helper functions
+----------------
+
+.. autofunction:: apply_penalty
+.. autofunction:: regularize_layer_params
+.. autofunction:: regularize_layer_params_weighted
+.. autofunction:: regularize_network_params
+
+
+Penalty functions
+-----------------
+
+.. autofunction:: l1
+.. autofunction:: l2
+
diff --git a/docs/modules/updates.rst b/docs/modules/updates.rst
new file mode 100644
index 0000000..526aea4
--- /dev/null
+++ b/docs/modules/updates.rst
@@ -0,0 +1,31 @@
+:mod:`lasagne.updates`
+======================
+
+.. automodule:: lasagne.updates
+
+
+Update functions
+----------------
+
+.. autofunction:: sgd
+.. autofunction:: momentum
+.. autofunction:: nesterov_momentum
+.. autofunction:: adagrad
+.. autofunction:: rmsprop
+.. autofunction:: adadelta
+.. autofunction:: adam
+.. autofunction:: adamax
+
+
+Update modification functions
+-----------------------------
+
+.. autofunction:: apply_momentum
+.. autofunction:: apply_nesterov_momentum
+
+
+Helper functions
+----------------
+
+.. autofunction:: norm_constraint
+.. autofunction:: total_norm_constraint
diff --git a/docs/modules/utils.rst b/docs/modules/utils.rst
new file mode 100644
index 0000000..9c8c559
--- /dev/null
+++ b/docs/modules/utils.rst
@@ -0,0 +1,13 @@
+:mod:`lasagne.utils`
+====================
+
+.. automodule:: lasagne.utils
+
+.. autofunction:: floatX
+.. autofunction:: shared_empty
+.. autofunction:: as_theano_expression
+.. autofunction:: collect_shared_vars
+.. autofunction:: one_hot
+.. autofunction:: unique
+.. autofunction:: compute_norms
+.. autofunction:: create_param
diff --git a/docs/user/custom_layers.rst b/docs/user/custom_layers.rst
new file mode 100644
index 0000000..fc9ba11
--- /dev/null
+++ b/docs/user/custom_layers.rst
@@ -0,0 +1,159 @@
+Creating custom layers
+======================
+
+
+A simple layer
+--------------
+
+To implement a custom layer in Lasagne, you will have to write a Python class
+that subclasses :class:`Layer` and implement at least one method:
+`get_output_for()`. This method computes the output of the layer given its
+input. Note that both the output and the input are Theano expressions, so they
+are symbolic.
+
+The following is an example implementation of a layer that multiplies its input
+by 2:
+
+.. code:: python
+
+ class DoubleLayer(lasagne.layers.Layer):
+ def get_output_for(self, input, **kwargs):
+ return 2 * input
+
+This is all that's required to implement a functioning custom layer class in
+Lasagne.
+
+
+A layer that changes the shape
+------------------------------
+
+If the layer does not change the shape of the data (for example because it
+applies an elementwise operation), then implementing only this one method is
+sufficient. Lasagne will assume that the output of the layer has the same shape
+as its input.
+
+However, if the operation performed by the layer changes the shape of the data,
+you also need to implement `get_output_shape_for()`. This method computes the
+shape of the layer output given the shape of its input. Note that this shape
+computation should result in a tuple of integers, so it is *not* symbolic.
+
+This method exists because Lasagne needs a way to propagate shape information
+when a network is defined, so it can determine what sizes the parameter tensors
+should be, for example. This mechanism allows each layer to obtain the size of
+its input from the previous layer, which means you don't have to specify the
+input size manually. This also prevents errors stemming from inconsistencies
+between the layers' expected and actual shapes.
+
+We can implement a layer that computes the sum across the trailing axis of its
+input as follows:
+
+.. code:: python
+
+ class SumLayer(lasagne.layers.Layer):
+ def get_output_for(self, input, **kwargs):
+ return input.sum(axis=-1)
+
+ def get_output_shape_for(self, input_shape):
+ return input_shape[:-1]
+
+
+It is important that the shape computation is correct, as this shape
+information may be used to initialize other layers in the network.
+
+
+A layer with parameters
+-----------------------
+
+If the layer has parameters, these should be initialized in the constructor.
+In Lasagne, parameters are represented by Theano shared variables. A method
+is provided to create and register parameter variables:
+:meth:`lasagne.layers.Layer.add_param()`.
+
+To show how this can be used, here is a layer that multiplies its input
+by a matrix ``W`` (much like a typical fully connected layer in a neural
+network would). This matrix is a parameter of the layer. The shape of the
+matrix will be ``(num_inputs, num_units)``, where ``num_inputs`` is the
+number of input features and ``num_units`` has to be specified when the layer
+is created.
+
+.. code:: python
+
+ class DotLayer(lasagne.layers.Layer):
+ def __init__(self, incoming, num_units, W=lasagne.init.Normal(0.01), **kwargs):
+ super(DotLayer, self).__init__(incoming, **kwargs)
+ num_inputs = self.input_shape[1]
+ self.num_units = num_units
+ self.W = self.add_param(W, (num_inputs, num_units), name='W')
+
+ def get_output_for(self, input, **kwargs):
+ return T.dot(input, self.W)
+
+ def get_output_shape_for(self, input_shape):
+ return (input_shape[0], self.num_units)
+
+A few things are worth noting here: when overriding the constructor, we need
+to call the superclass constructor on the first line. This is important to
+ensure the layer functions properly.
+Note that we pass ``**kwargs`` - although this is not strictly necessary, it
+enables some other cool Lasagne features, such as making it possible to give
+the layer a name:
+
+>>> l_dot = DotLayer(l_in, num_units=50, name='my_dot_layer')
+
+The call to ``self.add_param()`` creates the Theano shared variable
+representing the parameter, and registers it so it can later be retrieved using
+:meth:`lasagne.layers.Layer.get_params()`. It returns the created variable,
+which we tuck away in ``self.W`` for easy access.
+
+Note that we've also made it possible to specify a custom initialization
+strategy for ``W`` by adding a constructor argument for it, e.g.:
+
+>>> l_dot = DotLayer(l_in, num_units=50, W=lasagne.init.Constant(0.0))
+
+This 'Lasagne idiom' of tucking away a created parameter variable in an
+attribute for easy access and adding a constructor argument with the same name
+to specify the initialization strategy is very common throughout the library.
+
+Finally, note that we used ``self.input_shape`` to determine the shape of the
+parameter matrix. This property is available in all Lasagne layers, once the
+superclass constructor has been called.
+
+
+A layer with multiple behaviors
+-------------------------------
+
+Some layers can have multiple behaviors. For example, a layer implementing
+dropout should be able to be switched on or off. During training, we want it
+to apply dropout noise to its input and scale up the remaining values, but
+during evaluation we don't want it to do anything.
+
+For this purpose, the `get_output_for()` method takes optional keyword
+arguments (``kwargs``). When `get_output()` is called to compute an expression
+for the output of a network, all specified keyword arguments are passed to the
+`get_output_for()` methods of all layers in the network.
+
+For layers that add noise for regularization purposes, such as dropout, the
+convention in Lasagne is to use the keyword argument ``deterministic`` to
+control its behavior.
+
+Lasagne's :class:`lasagne.layers.DropoutLayer` looks roughly like this
+(simplified implementation for illustration purposes):
+
+.. code:: python
+
+ from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
+ _srng = RandomStreams()
+
+ class DropoutLayer(Layer):
+ def __init__(self, incoming, p=0.5, **kwargs):
+ super(DropoutLayer, self).__init__(incoming, **kwargs)
+ self.p = p
+
+ def get_output_for(self, input, deterministic=False, **kwargs):
+ if deterministic: # do nothing in the deterministic case
+ return input
+ else: # add dropout noise otherwise
+ retain_prob = 1 - self.p
+ input /= retain_prob
+ return input * _srng.binomial(input.shape, p=retain_prob,
+ dtype=theano.config.floatX)
diff --git a/docs/user/development.rst b/docs/user/development.rst
new file mode 100644
index 0000000..1c548c2
--- /dev/null
+++ b/docs/user/development.rst
@@ -0,0 +1,234 @@
+Development
+===========
+
+The Lasagne project was started by Sander Dieleman in September 2014. It is
+developed by a core team of eight people (in alphabetical order:
+`Eric Battenberg <http://ericbattenberg.com/>`_,
+`Sander Dieleman <http://benanne.github.io>`_,
+`Daniel Nouri <http://danielnouri.org>`_,
+`Eben Olson <https://github.com/ebenolson>`_,
+`Aäron van den Oord <https://twitter.com/avdnoord>`_,
+`Colin Raffel <http://colinraffel.com/>`_,
+`Jan Schlüter <http://www.ofai.at/~jan.schlueter/>`_,
+`Søren Kaae Sønderby <http://www1.bio.ku.dk/english/staff/?pure=en/persons/418078>`_)
+and `numerous additional contributors
+<https://github.com/Lasagne/Lasagne/graphs/contributors>`_ on GitHub:
+https://github.com/Lasagne/Lasagne
+
+As an open-source project by researchers for researchers, we highly welcome
+contributions! Every bit helps and will be credited.
+
+
+
+.. _lasagne-philosopy:
+
+Philosophy
+----------
+
+Lasagne grew out of a need to combine the flexibility of Theano with the availability of the right building blocks for training neural networks. Its development is guided by a number of design goals:
+
+* **Simplicity**: Be easy to use, easy to understand and easy to extend, to
+ facilitate use in research. Interfaces should be kept small, with as few
+ classes and methods as possible. Every added abstraction and feature should
+ be carefully scrutinized, to determine whether the added complexity is
+ justified.
+
+* **Transparency**: Do not hide Theano behind abstractions, directly process
+ and return Theano expressions or Python / numpy data types. Try to rely on
+ Theano's functionality where possible, and follow Theano's conventions.
+
+* **Modularity**: Allow all parts (layers, regularizers, optimizers, ...) to be
+ used independently of Lasagne. Make it easy to use components in isolation or
+ in conjunction with other frameworks.
+
+* **Pragmatism**: Make common use cases easy, do not overrate uncommon cases.
+ Ideally, everything should be possible, but common use cases shouldn't be
+ made more difficult just to cater for exotic ones.
+
+* **Restraint**: Do not obstruct users with features they decide not to use.
+ Both in using and in extending components, it should be possible for users to
+ be fully oblivious to features they do not need.
+
+* **Focus**: "Do one thing and do it well". Do not try to provide a library for
+ everything to do with deep learning.
+
+
+
+What to contribute
+------------------
+
+Give feedback
+~~~~~~~~~~~~~
+
+To send us general feedback, questions or ideas for improvement, please post on
+`our mailing list`_.
+
+If you have a very concrete feature proposal, add it to the `issue tracker on
+GitHub`_:
+
+* Explain how it would work, and link to a scientific paper if applicable.
+* Keep the scope as narrow as possible, to make it easier to implement.
+
+
+Report bugs
+~~~~~~~~~~~
+
+Report bugs at the `issue tracker on GitHub`_.
+If you are reporting a bug, please include:
+
+* your Lasagne and Theano version.
+* steps to reproduce the bug, ideally reduced to a few Python commands.
+* the results you obtain, and the results you expected instead.
+
+If you are unsure whether the behavior you experience is a bug, or if you are
+unsure whether it is related to Lasagne or Theano, please just ask on `our
+mailing list`_ first.
+
+
+Fix bugs
+~~~~~~~~
+
+Look through the GitHub issues for bug reports. Anything tagged with "bug" is
+open to whoever wants to implement it. If you discover a bug in Lasagne you can
+fix yourself, by all means feel free to just implement a fix and not report it
+first.
+
+
+Implement features
+~~~~~~~~~~~~~~~~~~
+
+Look through the GitHub issues for feature proposals. Anything tagged with
+"feature" or "enhancement" is open to whoever wants to implement it. If you
+have a feature in mind you want to implement yourself, please note that Lasagne
+has a fairly narrow focus and we strictly follow a set of :ref:`design
+principles <lasagne-philosopy>`, so we cannot guarantee upfront that your code
+will be included. Please do not hesitate to just propose your idea in a GitHub
+issue or on the mailing list first, so we can discuss it and/or guide you
+through the implementation.
+
+
+Write documentation
+~~~~~~~~~~~~~~~~~~~
+
+Whenever you find something not explained well, misleading, glossed over or
+just wrong, please update it! The *Edit on GitHub* link on the top right of
+every documentation page and the *[source]* link for every documented entity
+in the API reference will help you to quickly locate the origin of any text.
+
+
+
+How to contribute
+-----------------
+
+Edit on GitHub
+~~~~~~~~~~~~~~
+
+As a very easy way of just fixing issues in the documentation, use the *Edit
+on GitHub* link on the top right of a documentation page or the *[source]* link
+of an entity in the API reference to open the corresponding source file in
+GitHub, then click the *Edit this file* link to edit the file in your browser
+and send us a Pull Request. All you need for this is a free GitHub account.
+
+For any more substantial changes, please follow the steps below to setup
+Lasagne for development.
+
+
+Development setup
+~~~~~~~~~~~~~~~~~
+
+First, follow the instructions for performing a development installation of
+Lasagne (including forking on GitHub): :ref:`lasagne-development-install`
+
+To be able to run the tests and build the documentation locally, install
+additional requirements with: ``pip install -r requirements-dev.txt`` (adding
+``--user`` if you want to install to your home directory instead).
+
+If you use the bleeding-edge version of Theano, then instead of running that
+command, just use ``pip install`` to manually install all dependencies listed
+in ``requirements-dev.txt`` with their correct versions; otherwise it will
+attempt to downgrade Theano to the known good version in ``requirements.txt``.
+
+
+Documentation
+~~~~~~~~~~~~~
+
+The documentation is generated with `Sphinx
+<http://sphinx-doc.org/latest/index.html>`_. To build it locally, run the
+following commands:
+
+.. code:: bash
+
+ cd docs
+ make html
+
+Afterwards, open ``docs/_build/html/index.html`` to view the documentation as
+it would appear on `readthedocs <http://lasagne.readthedocs.org/>`_. If you
+changed a lot and seem to get misleading error messages or warnings, run
+``make clean html`` to force Sphinx to recreate all files from scratch.
+
+When writing docstrings, follow existing documentation as much as possible to
+ensure consistency throughout the library. For additional information on the
+syntax and conventions used, please refer to the following documents:
+
+* `reStructuredText Primer <http://sphinx-doc.org/rest.html>`_
+* `Sphinx reST markup constructs <http://sphinx-doc.org/markup/index.html>`_
+* `A Guide to NumPy/SciPy Documentation <https://github.com/numpy/numpy/blob/master/doc/HOWTO_DOCUMENT.rst.txt>`_
+
+
+Testing
+~~~~~~~
+
+Lasagne has a code coverage of 100%, which has proven very helpful in the past,
+but also creates some duties:
+
+* Whenever you change any code, you should test whether it breaks existing
+ features by just running the test suite. The test suite will also be run by
+ `Travis <https://travis-ci.org/>`_ for any Pull Request to Lasagne.
+* Any code you add needs to be accompanied by tests ensuring that nobody else
+ breaks it in future. `Coveralls <https://coveralls.io/>`_ will check whether
+ the code coverage stays at 100% for any Pull Request to Lasagne.
+* Every bug you fix indicates a missing test case, so a proposed bug fix should
+ come with a new test that fails without your fix.
+
+To run the full test suite, just do
+
+.. code:: bash
+
+ py.test
+
+Testing will take over 5 minutes for the first run, but less than a minute for
+subsequent runs when Theano can reuse compiled code. It will end with a code
+coverage report specifying which code lines are not covered by tests, if any.
+Furthermore, it will list any failed tests, and failed `PEP8
+<https://www.python.org/dev/peps/pep-0008/>`_ checks.
+
+To only run tests matching a certain name pattern, use the ``-k`` command line
+switch, e.g., ``-k pool`` will run the pooling layer tests only.
+
+To land in a ``pdb`` debug prompt on a failure to inspect it more closely, use
+the ``--pdb`` switch.
+
+Finally, for a loop-on-failing mode, do ``pip install pytest-xdist`` and run
+``py.test -f``. This will pause after the run, wait for any source file to
+change and run all previously failing tests again.
+
+
+Sending Pull Requests
+~~~~~~~~~~~~~~~~~~~~~
+
+When you're satisfied with your addition, the tests pass and the documentation
+looks good without any markup errors, commit your changes to a new branch, push
+that branch to your fork and send us a Pull Request via GitHub's web interface.
+
+All these steps are nicely explained on GitHub:
+https://guides.github.com/introduction/flow/
+
+When filing your Pull Request, please include a description of what it does, to
+help us reviewing it. If it is fixing an open issue, say, issue #123, add
+*Fixes #123*, *Resolves #123* or *Closes #123* to the description text, so
+GitHub will close it when your request is merged.
+
+
+
+.. _issue tracker on GitHub: https://github.com/Lasagne/Lasagne/issues
+.. _our mailing list: https://groups.google.com/forum/#!forum/lasagne-users
diff --git a/docs/user/installation.rst b/docs/user/installation.rst
new file mode 100644
index 0000000..01d7abd
--- /dev/null
+++ b/docs/user/installation.rst
@@ -0,0 +1,249 @@
+.. _installation:
+
+============
+Installation
+============
+
+Lasagne has a couple of prerequisites that need to be installed first, but it
+is not very picky about versions. The single exception is Theano: Due to its
+tight coupling to Theano, you will have to install a recent version of Theano
+(usually more recent than the latest official release!) fitting the version of
+Lasagne you choose to install.
+
+Most of the instructions below assume you are running a Linux or Mac system,
+but are otherwise very generic. For detailed step-by-step instructions for
+specific platforms including Windows, check our `From Zero to Lasagne
+<https://github.com/Lasagne/Lasagne/wiki/From-Zero-to-Lasagne>`_ guides.
+
+If you run into any trouble, please check the `Theano installation instructions
+<http://deeplearning.net/software/theano/install.html>`_ which cover installing
+the prerequisites for a range of operating systems, or ask for help on `our
+mailing list <https://groups.google.com/d/forum/lasagne-users>`_.
+
+
+Prerequisites
+=============
+
+Python + pip
+------------
+
+Lasagne currently requires Python 2.7 or 3.4 to run. Please install Python via
+the package manager of your operating system if it is not included already.
+
+Python includes ``pip`` for installing additional modules that are not shipped
+with your operating system, or shipped in an old version, and we will make use
+of it below. We recommend installing these modules into your home directory
+via ``--user``, or into a `virtual environment
+<http://www.dabapps.com/blog/introduction-to-pip-and-virtualenv-python/>`_
+via ``virtualenv``.
+
+C compiler
+----------
+
+Theano requires a working C compiler, and numpy/scipy require a compiler as
+well if you install them via ``pip``. On Linux, the default compiler is usually
+``gcc``, and on Mac OS, it's ``clang``. Again, please install them via the
+package manager of your operating system.
+
+numpy/scipy + BLAS
+------------------
+
+Lasagne requires numpy of version 1.6.2 or above, and Theano also requires
+scipy 0.11 or above. Numpy/scipy rely on a BLAS library to provide fast linear
+algebra routines. They will work fine without one, but a lot slower, so it is
+worth getting this right (but this is less important if you plan to use a GPU).
+
+If you install numpy and scipy via your operating system's package manager,
+they should link to the BLAS library installed in your system. If you install
+numpy and scipy via ``pip install numpy`` and ``pip install scipy``, make sure
+to have development headers for your BLAS library installed (e.g., the
+``libopenblas-dev`` package on Debian/Ubuntu) while running the installation
+command. Please refer to the `numpy/scipy build instructions
+<http://www.scipy.org/scipylib/building/index.html>`_ if in doubt.
+
+Theano
+------
+
+The version to install depends on the Lasagne version you choose, so this will
+be handled below.
+
+
+Stable Lasagne release
+======================
+
+Lasagne 0.1 requires a more recent version of Theano than the one available
+on PyPI. To install a version that is known to work, run the following command:
+
+.. code-block:: bash
+
+ pip install -r https://raw.githubusercontent.com/Lasagne/Lasagne/v0.1/requirements.txt
+
+.. warning::
+ An even more recent version of Theano will often work as well, but at the
+ time of writing, a simple ``pip install Theano`` will give you a version that
+ is too old.
+
+To install release 0.1 of Lasagne from PyPI, run the following command:
+
+.. code-block:: bash
+
+ pip install Lasagne==0.1
+
+If you do not use ``virtualenv``, add ``--user`` to both commands to install
+into your home directory instead. To upgrade from an earlier installation, add
+``--upgrade``.
+
+
+Bleeding-edge version
+=====================
+
+The latest development version of Lasagne usually works fine with the latest
+development version of Theano. To install both, run the following commands:
+
+.. code-block:: bash
+
+ pip install --upgrade https://github.com/Theano/Theano/archive/master.zip
+ pip install --upgrade https://github.com/Lasagne/Lasagne/archive/master.zip
+
+Again, add ``--user`` if you want to install to your home directory instead.
+
+
+.. _lasagne-development-install:
+
+Development installation
+========================
+
+Alternatively, you can install Lasagne (and optionally Theano) from source,
+in a way that any changes to your local copy of the source tree take effect
+without requiring a reinstall. This is often referred to as *editable* or
+*development* mode. Firstly, you will need to obtain a copy of the source tree:
+
+.. code-block:: bash
+
+ git clone https://github.com/Lasagne/Lasagne.git
+
+It will be cloned to a subdirectory called ``Lasagne``. Make sure to place it
+in some permanent location, as for an *editable* installation, Python will
+import the module directly from this directory and not copy over the files.
+Enter the directory and install the known good version of Theano:
+
+.. code-block:: bash
+
+ cd Lasagne
+ pip install -r requirements.txt
+
+Alternatively, install the bleeding-edge version of Theano as described in the
+previous section.
+
+To install the Lasagne package itself, in editable mode, run:
+
+.. code-block:: bash
+
+ pip install --editable .
+
+As always, add ``--user`` to install it to your home directory instead.
+
+**Optional**: If you plan to contribute to Lasagne, you will need to fork the
+Lasagne repository on GitHub. This will create a repository under your user
+account. Update your local clone to refer to the official repository as
+``upstream``, and your personal fork as ``origin``:
+
+.. code-block:: bash
+
+ git remote rename origin upstream
+ git remote add origin https://github.com/<your-github-name>/Lasagne.git
+
+If you set up an `SSH key <https://help.github.com/categories/ssh/>`_, use the
+SSH clone URL instead: ``git at github.com:<your-github-name>/Lasagne.git``.
+
+You can now use this installation to develop features and send us pull requests
+on GitHub, see :doc:`development`!
+
+
+GPU support
+===========
+
+Thanks to Theano, Lasagne transparently supports training your networks on a
+GPU, which may be 10 to 50 times faster than training them on a CPU. Currently,
+this requires an NVIDIA GPU with CUDA support, and some additional software for
+Theano to use it.
+
+CUDA
+----
+
+Install the latest CUDA Toolkit and possibly the corresponding driver available
+from NVIDIA: https://developer.nvidia.com/cuda-downloads
+
+Closely follow the *Getting Started Guide* linked underneath the download table
+to be sure you don't mess up your system by installing conflicting drivers.
+
+After installation, make sure ``/usr/local/cuda/bin`` is in your ``PATH``, so
+``nvcc --version`` works. Also make sure ``/usr/local/cuda/lib64`` is in your
+``LD_LIBRARY_PATH``, so the toolkit libraries can be found.
+
+Theano
+------
+
+If CUDA is set up correctly, the following should print some information on
+your GPU (the first CUDA-capable GPU in your system if you have multiple ones):
+
+.. code-block:: bash
+
+ THEANO_FLAGS=device=gpu python -c "import theano; print(theano.sandbox.cuda.device_properties(0))"
+
+To configure Theano to use the GPU by default, create a file ``.theanorc``
+directly in your home directory, with the following contents:
+
+.. code-block:: none
+
+ [global]
+ floatX = float32
+ device = gpu
+
+Optionally add ``allow_gc = False`` for some extra performance at the expense
+of (sometimes substantially) higher GPU memory usage.
+
+If you run into problems, please check Theano's instructions for `Using the GPU
+<http://deeplearning.net/software/theano/tutorial/using_gpu.html>`_.
+
+cuDNN
+-----
+
+NVIDIA provides a library for common neural network operations that especially
+speeds up Convolutional Neural Networks (CNNs). Again, it can be obtained from
+NVIDIA (after registering as a developer): https://developer.nvidia.com/cudnn
+
+Note that it requires a reasonably modern GPU with Compute Capability 3.0 or higher;
+see `NVIDIA's list of CUDA GPUs <https://developer.nvidia.com/cuda-gpus>`_.
+
+To install it, copy the ``*.h`` files to ``/usr/local/cuda/include`` and the
+``lib*`` files to ``/usr/local/cuda/lib64``.
+
+To check whether it is found by Theano, run the following command:
+
+.. code-block:: bash
+
+ python -c "from theano.sandbox.cuda.dnn import dnn_available as d; print(d() or d.msg)"
+
+It will print ``True`` if everything is fine, or an error message otherwise.
+There are no additional steps required for Theano to make use of cuDNN.
+
+Docker
+======
+
+Instead of manually installing Theano and Lasagne on your machines as described above,
+you may want to use a pre-made `Docker <https://www.docker.com/what-docker>`_
+image: `Lasagne Docker (CPU) <https://hub.docker.com/r/kaixhin/lasagne/>`_ or
+`Lasagne Docker (CUDA) <https://hub.docker.com/r/kaixhin/cuda-lasagne/>`_. These
+are updated on a weekly basis with bleeding-edge builds of Theano and Lasagne.
+Examples of running bash in a Docker container are as follows:
+
+.. code-block:: bash
+
+ sudo docker run -it kaixhin/lasagne
+ sudo nvidia-docker run -it kaixhin/cuda-lasagne:7.0
+
+For a guide to Docker, see the `official docs <https://docs.docker.com>`_.
+CUDA support requires `NVIDIA Docker <https://github.com/NVIDIA/nvidia-docker>`_.
+For more details on how to use the Lasagne Docker images,
+consult the `source project <https://github.com/Kaixhin/dockerfiles>`_.
diff --git a/docs/user/layers.rst b/docs/user/layers.rst
new file mode 100644
index 0000000..a5b1f30
--- /dev/null
+++ b/docs/user/layers.rst
@@ -0,0 +1,203 @@
+Layers
+======
+
+
+The `lasagne.layers` module provides various classes representing the layers
+of a neural network. All of them are subclasses of the
+:class:`lasagne.layers.Layer` base class.
+
+Creating a layer
+----------------
+
+A layer can be created as an instance of a `Layer` subclass. For example, a
+dense layer can be created as follows:
+
+>>> import lasagne
+>>> l = lasagne.layers.DenseLayer(l_in, num_units=100) # doctest: +SKIP
+
+This will create a dense layer with 100 units, connected to another layer
+`l_in`.
+
+Creating a network
+------------------
+
+Note that for almost all types of layers, you will have to specify one or more
+other layers that the layer you are creating gets its input from. The main
+exception is :class:`InputLayer`, which can be used to represent the input of
+a network.
+
+Chaining layer instances together like this will allow you to specify your
+desired network structure. Note that the same layer can be used as input to
+multiple other layers, allowing for arbitrary tree and directed acyclic graph
+(DAG) structures.
+
+Here is an example of an MLP with a single hidden layer:
+
+>>> import theano.tensor as T
+>>> l_in = lasagne.layers.InputLayer((100, 50))
+>>> l_hidden = lasagne.layers.DenseLayer(l_in, num_units=200)
+>>> l_out = lasagne.layers.DenseLayer(l_hidden, num_units=10,
+... nonlinearity=T.nnet.softmax)
+
+The first layer of the network is an `InputLayer`, which represents the input.
+When creating an input layer, you should specify the shape of the input data.
+In this example, the input is a matrix with shape (100, 50), representing a
+batch of 100 data points, where each data point is a vector of length 50.
+The first dimension of a tensor is usually the batch dimension, following the
+established Theano and scikit-learn conventions.
+
+The hidden layer of the network is a dense layer with 200 units, taking its
+input from the input layer. Note that we did not specify the nonlinearity of
+the hidden layer. A layer with rectified linear units will be created by
+default.
+
+The output layer of the network is a dense layer with 10 units and a softmax
+nonlinearity, allowing for 10-way classification of the input vectors.
+
+Note also that we did not create any object representing the entire network.
+Instead, the output layer instance `l_out` is also used to refer to the entire
+network in Lasagne.
+
+Naming layers
+-------------
+
+For convenience, you can name a layer by specifying the `name` keyword
+argument:
+
+>>> l_hidden = lasagne.layers.DenseLayer(l_in, num_units=200,
+... name="hidden_layer")
+
+Initializing parameters
+-----------------------
+
+Many types of layers, such as :class:`DenseLayer`, have trainable parameters.
+These are referred to by short names that match the conventions used in modern
+deep learning literature. For example, a weight matrix will usually be called
+`W`, and a bias vector will usually be `b`.
+
+When creating a layer with trainable parameters, Theano shared variables will
+be created for them and initialized automatically. You can optionally specify
+your own initialization strategy by using keyword arguments that match the
+parameter variable names. For example:
+
+>>> l = lasagne.layers.DenseLayer(l_in, num_units=100,
+... W=lasagne.init.Normal(0.01))
+
+The weight matrix `W` of this dense layer will be initialized using samples
+from a normal distribution with standard deviation 0.01 (see `lasagne.init`
+for more information).
+
+There are several ways to manually initialize parameters:
+
+- Theano shared variable
+ If a shared variable instance is provided, this is used unchanged as the
+ parameter variable. For example:
+
+ >>> import theano
+ >>> import numpy as np
+ >>> W = theano.shared(np.random.normal(0, 0.01, (50, 100)))
+ >>> l = lasagne.layers.DenseLayer(l_in, num_units=100, W=W)
+
+- numpy array
+ If a numpy array is provided, a shared variable is created and initialized
+ using the array. For example:
+
+ >>> W_init = np.random.normal(0, 0.01, (50, 100))
+ >>> l = lasagne.layers.DenseLayer(l_in, num_units=100, W=W_init)
+
+- callable
+ If a callable is provided (e.g. a function or a
+ :class:`lasagne.init.Initializer` instance), a shared variable is created
+ and the callable is called with the desired shape to generate suitable
+ initial parameter values. The variable is then initialized with those
+ values. For example:
+
+ >>> l = lasagne.layers.DenseLayer(l_in, num_units=100,
+ ... W=lasagne.init.Normal(0.01))
+
+ Or, using a custom initialization function:
+
+ >>> def init_W(shape):
+ ... return np.random.normal(0, 0.01, shape)
+ >>> l = lasagne.layers.DenseLayer(l_in, num_units=100, W=init_W)
+
+Some types of parameter variables can also be set to ``None`` at initialization
+(e.g. biases). In that case, the parameter variable will be omitted.
+For example, creating a dense layer without biases is done as follows:
+
+>>> l = lasagne.layers.DenseLayer(l_in, num_units=100, b=None)
+
+Parameter sharing
+-----------------
+
+Parameter sharing between multiple layers can be achieved by using the
+same Theano shared variable instance for their parameters. For example:
+
+>>> l1 = lasagne.layers.DenseLayer(l_in, num_units=100)
+>>> l2 = lasagne.layers.DenseLayer(l_in, num_units=100, W=l1.W)
+
+These two layers will now share weights (but have separate biases).
+
+Propagating data through layers
+-------------------------------
+
+To compute an expression for the output of a single layer given its input, the
+`get_output_for()` method can be used. To compute the output of a network, you
+should instead call :func:`lasagne.layers.get_output()` on it. This will
+traverse the network graph.
+
+You can call this function with the layer you want to compute the output
+expression for:
+
+>>> y = lasagne.layers.get_output(l_out)
+
+In that case, a Theano expression will be returned that represents the output
+in function of the input variables associated with the
+:class:`lasagne.layers.InputLayer` instance (or instances) in the network,
+so given the example network from before, you could compile a Theano function
+to compute its output given an input as follows:
+
+>>> f = theano.function([l_in.input_var], lasagne.layers.get_output(l_out))
+
+You can also specify a Theano expression to use as input as a second argument
+to :func:`lasagne.layers.get_output()`:
+
+>>> x = T.matrix('x')
+>>> y = lasagne.layers.get_output(l_out, x)
+>>> f = theano.function([x], y)
+
+This only works when there is only a single :class:`InputLayer` in the network.
+If there is more than one, you can specify input expressions in a dictionary.
+For example, in a network with two input layers `l_in1` and `l_in2` and an
+output layer `l_out`:
+
+>>> x1 = T.matrix('x1')
+>>> x2 = T.matrix('x2')
+>>> y = lasagne.layers.get_output(l_out, { l_in1: x1, l_in2: x2 })
+
+Any keyword arguments passed to `get_output()` are propagated to all layers.
+This makes it possible to control the behavior of the entire network. The
+main use case for this is the ``deterministic`` keyword argument, which
+disables stochastic behaviour such as dropout when set to ``True``. This is
+useful because a deterministic output is desirable at evaluation time.
+
+>>> y = lasagne.layers.get_output(l_out, deterministic=True)
+
+Some networks may have multiple output layers - or you may just want to
+compute output expressions for intermediate layers in the network. In that
+case, you can pass a list of layers. For example, in a network with two output
+layers `l_out1` and `l_out2`:
+
+>>> y1, y2 = lasagne.layers.get_output([l_out1, l_out2])
+
+You could also just call :func:`lasagne.layers.get_output()` twice:
+
+>>> y1 = lasagne.layers.get_output(l_out1)
+>>> y2 = lasagne.layers.get_output(l_out2)
+
+However, this is **not recommended**! Some network layers may have
+non-deterministic output, such as dropout layers. If you compute the network
+output expressions with separate calls to :func:`lasagne.layers.get_output()`,
+they will not use the same samples. Furthermore, this may lead to unnecessary
+computation because Theano is not always able to merge identical computations
+properly. Calling `get_output()` only once prevents both of these issues.
\ No newline at end of file
diff --git a/docs/user/tutorial.rst b/docs/user/tutorial.rst
new file mode 100644
index 0000000..c11580c
--- /dev/null
+++ b/docs/user/tutorial.rst
@@ -0,0 +1,620 @@
+.. _tutorial:
+
+========
+Tutorial
+========
+
+This tutorial will walk you through building a handwritten digits classifier
+using the MNIST dataset, arguably the "Hello World" of neural networks.
+More tutorials and examples can be found in the `Lasagne Recipes`_ repository.
+
+
+Before we start
+===============
+
+The tutorial assumes that you are somewhat familiar with neural networks and
+Theano (the library which Lasagne is built on top of). You can try to learn
+both at once from the `Deeplearning Tutorial`_.
+
+For a more slow-paced introduction to artificial neural networks, we recommend
+`Convolutional Neural Networks for Visual Recognition`_ by Andrej Karpathy et
+al., `Neural Networks and Deep Learning`_ by Michael Nielsen or a standard text
+book such as "Machine Learning" by Tom Mitchell.
+
+To learn more about Theano, have a look at the `Theano tutorial`_. You will not
+need all of it, but a basic understanding of how Theano works is required to be
+able to use Lasagne. If you're new to Theano, going through that tutorial up to
+(and including) "More Examples" should get you covered! `Graph Structures`_ is
+a good extra read if you're curious about its inner workings.
+
+
+Run the MNIST example
+=====================
+
+In this first part of the tutorial, we will just run the MNIST example that's
+included in the source distribution of Lasagne.
+
+We assume that you have already run through the :ref:`installation`. If you
+haven't done so already, get a copy of the source tree of Lasagne, and navigate
+to the folder in a terminal window. Enter the ``examples`` folder and run the
+``mnist.py`` example script:
+
+.. code-block:: bash
+
+ cd examples
+ python mnist.py
+
+If everything is set up correctly, you will get an output like the following:
+
+.. code-block:: text
+
+ Using gpu device 0: GeForce GT 640
+ Loading data...
+ Downloading train-images-idx3-ubyte.gz
+ Downloading train-labels-idx1-ubyte.gz
+ Downloading t10k-images-idx3-ubyte.gz
+ Downloading t10k-labels-idx1-ubyte.gz
+ Building model and compiling functions...
+ Starting training...
+
+ Epoch 1 of 500 took 1.858s
+ training loss: 1.233348
+ validation loss: 0.405868
+ validation accuracy: 88.78 %
+ Epoch 2 of 500 took 1.845s
+ training loss: 0.571644
+ validation loss: 0.310221
+ validation accuracy: 91.24 %
+ Epoch 3 of 500 took 1.845s
+ training loss: 0.471582
+ validation loss: 0.265931
+ validation accuracy: 92.35 %
+ Epoch 4 of 500 took 1.847s
+ training loss: 0.412204
+ validation loss: 0.238558
+ validation accuracy: 93.05 %
+ ...
+
+The example script allows you to try three different models, selected via the
+first command line argument. Run the script with ``python mnist.py --help`` for
+more information and feel free to play around with it some more before we have
+a look at the implementation.
+
+
+Understand the MNIST example
+============================
+
+Let's now investigate what's needed to make that happen! To follow along, open
+up the source code in your favorite editor (or online: `mnist.py`_).
+
+
+Preface
+-------
+
+The first thing you might notice is that besides Lasagne, we also import numpy
+and Theano:
+
+.. code-block:: python
+
+ import numpy as np
+ import theano
+ import theano.tensor as T
+
+ import lasagne
+
+While Lasagne is built on top of Theano, it is meant as a supplement helping
+with some tasks, not as a replacement. You will always mix Lasagne with some
+vanilla Theano code.
+
+
+Loading data
+------------
+
+The first piece of code defines a function ``load_dataset()``. Its purpose is
+to download the MNIST dataset (if it hasn't been downloaded yet) and return it
+in the form of regular numpy arrays. There is no Lasagne involved at all, so
+for the purpose of this tutorial, we can regard it as:
+
+.. code-block:: python
+
+ def load_dataset():
+ ...
+ return X_train, y_train, X_val, y_val, X_test, y_test
+
+``X_train.shape`` is ``(50000, 1, 28, 28)``, to be interpreted as: 50,000
+images of 1 channel, 28 rows and 28 columns each. Note that the number of
+channels is 1 because we have monochrome input. Color images would have 3
+channels, spectrograms also would have a single channel.
+``y_train.shape`` is simply ``(50000,)``, that is, it is a vector the same
+length of ``X_train`` giving an integer class label for each image -- namely,
+the digit between 0 and 9 depicted in the image (according to the human
+annotator who drew that digit).
+
+
+Building the model
+------------------
+
+This is where Lasagne steps in. It allows you to define an arbitrarily
+structured neural network by creating and stacking or merging layers.
+Since every layer knows its immediate incoming layers, the output layer (or
+output layers) of a network double as a handle to the network as a whole, so
+usually this is the only thing we will pass on to the rest of the code.
+
+As mentioned above, ``mnist.py`` supports three types of models, and we
+implement that via three easily exchangeable functions of the same interface.
+First, we'll define a function that creates a Multi-Layer Perceptron (MLP) of
+a fixed architecture, explaining all the steps in detail. We'll then present
+a function generating an MLP of a custom architecture. Finally, we'll
+show how to create a Convolutional Neural Network (CNN).
+
+
+Multi-Layer Perceptron (MLP)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The first function, ``build_mlp()``, creates an MLP of two hidden layers of
+800 units each, followed by a softmax output layer of 10 units. It applies 20%
+dropout to the input data and 50% dropout to the hidden layers. It is similar,
+but not fully equivalent to the smallest MLP in [Hinton2012]_ (that paper uses
+different nonlinearities, weight initialization and training).
+
+The foundation of each neural network in Lasagne is an
+:class:`InputLayer <lasagne.layers.InputLayer>` instance (or multiple of those)
+representing the input data that will subsequently be fed to the network. Note
+that the ``InputLayer`` is not tied to any specific data yet, but only holds
+the shape of the data that will be passed to the network. In addition, it
+creates or can be linked to a `Theano variable
+<http://deeplearning.net/software/theano/glossary.html#term-variable>`_ that
+will represent the network input in the `Theano graph
+<http://deeplearning.net/software/theano/glossary.html#term-expression-graph>`_
+we'll build from the network later.
+Thus, our function starts like this:
+
+.. code-block:: python
+
+ def build_mlp(input_var=None):
+ l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
+ input_var=input_var)
+
+The four numbers in the shape tuple represent, in order:
+``(batchsize, channels, rows, columns)``.
+Here we've set the batchsize to ``None``, which means the network will accept
+input data of arbitrary batchsize after compilation. If you know the batchsize
+beforehand and do not need this flexibility, you should give the batchsize
+here -- especially for convolutional layers, this can allow Theano to apply
+some optimizations.
+``input_var`` denotes the Theano variable we want to link the network's input
+layer to. If it is omitted (or set to ``None``), the layer will just create a
+suitable variable itself, but it can be handy to link an existing variable to
+the network at construction time -- especially if you're creating networks of
+multiple input layers. Here, we link it to a variable given as an argument to
+the ``build_mlp()`` function.
+
+Before adding the first hidden layer, we'll apply 20% dropout to the input
+data. This is realized via a :class:`DropoutLayer
+<lasagne.layers.DropoutLayer>` instance:
+
+.. code-block:: python
+
+ l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
+
+Note that the first constructor argument is the incoming layer, such that
+``l_in_drop`` is now stacked on top of ``l_in``. All layers work this way,
+except for layers that merge multiple inputs: those accept a list of incoming
+layers as their first constructor argument instead.
+
+We'll proceed with the first fully-connected hidden layer of 800 units. Note
+that when stacking a :class:`DenseLayer <lasagne.layers.DenseLayer>` on
+higher-order input tensors, they will be flattened implicitly so we don't need
+to care about that. In this case, the input will be flattened from 1x28x28
+images to 784-dimensional vectors.
+
+.. code-block:: python
+
+ l_hid1 = lasagne.layers.DenseLayer(
+ l_in_drop, num_units=800,
+ nonlinearity=lasagne.nonlinearities.rectify,
+ W=lasagne.init.GlorotUniform())
+
+Again, the first constructor argument means that we're stacking ``l_hid1`` on
+top of ``l_in_drop``.
+``num_units`` simply gives the number of units for this fully-connected layer.
+``nonlinearity`` takes a nonlinearity function, several of which are defined
+in :mod:`lasagne.nonlinearities`. Here we've chosen the linear rectifier, so
+we'll obtain ReLUs.
+Finally, :class:`lasagne.init.GlorotUniform()` gives the initializer for the
+weight matrix ``W``. This particular initializer samples weights from a uniform
+distribution of a carefully chosen range. Other initializers are available in
+:mod:`lasagne.init`, and alternatively, ``W`` could also have been initialized
+from a Theano shared variable or numpy array of the correct shape (784x800 in
+this case, as the input to this layer has 1*28*28=784 dimensions).
+Note that ``lasagne.init.GlorotUniform()`` is the default, so we'll omit it
+from here -- we just wanted to highlight that there is a choice.
+
+We'll now add dropout of 50%, another 800-unit dense layer and 50% dropout
+again:
+
+.. code-block:: python
+
+ l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)
+
+ l_hid2 = lasagne.layers.DenseLayer(
+ l_hid1_drop, num_units=800,
+ nonlinearity=lasagne.nonlinearities.rectify)
+
+ l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
+
+Finally, we'll add the fully-connected output layer. The main difference is
+that it uses the softmax nonlinearity, as we're planning to solve a 10-class
+classification problem with this network.
+
+.. code-block:: python
+
+ l_out = lasagne.layers.DenseLayer(
+ l_hid2_drop, num_units=10,
+ nonlinearity=lasagne.nonlinearities.softmax)
+
+As mentioned above, each layer is linked to its incoming layer(s), so we only
+need the output layer(s) to access a network in Lasagne:
+
+.. code-block:: python
+
+ return l_out
+
+
+Custom MLP
+^^^^^^^^^^
+
+The second function has a slightly more extensive signature:
+
+.. code-block:: python
+
+ def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2,
+ drop_hidden=.5):
+
+By default, it creates the same network as ``build_mlp()`` described above, but
+it can be customized with respect to the number and size of hidden layers, as
+well as the amount of input and hidden dropout. This demonstrates how creating
+a network in Python code can be a lot more flexible than a configuration file.
+See for yourself:
+
+.. code-block:: python
+
+ # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
+ network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
+ input_var=input_var)
+ if drop_input:
+ network = lasagne.layers.dropout(network, p=drop_input)
+ # Hidden layers and dropout:
+ nonlin = lasagne.nonlinearities.rectify
+ for _ in range(depth):
+ network = lasagne.layers.DenseLayer(
+ network, width, nonlinearity=nonlin)
+ if drop_hidden:
+ network = lasagne.layers.dropout(network, p=drop_hidden)
+ # Output layer:
+ softmax = lasagne.nonlinearities.softmax
+ network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax)
+ return network
+
+With two ``if`` clauses and a ``for`` loop, this network definition allows
+varying the architecture in a way that would be impossible for a ``.yaml`` file
+in `Pylearn2`_ or a ``.cfg`` file in `cuda-convnet`_.
+
+Note that to make the code easier, all the layers are just called ``network``
+here -- there is no need to give them different names if all we return is the
+last one we created anyway; we just used different names before for clarity.
+
+
+Convolutional Neural Network (CNN)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Finally, the ``build_cnn()`` function creates a CNN of two convolution and
+pooling stages, a fully-connected hidden layer and a fully-connected output
+layer.
+The function begins like the others:
+
+.. code-block:: python
+
+ def build_cnn(input_var=None):
+ network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
+ input_var=input_var)
+
+We don't apply dropout to the inputs, as this tends to work less well for
+convolutional layers. Instead of a :class:`DenseLayer
+<lasagne.layers.DenseLayer>`, we now add a :class:`Conv2DLayer
+<lasagne.layers.Conv2DLayer>` with 32 filters of size 5x5 on top:
+
+.. code-block:: python
+
+ network = lasagne.layers.Conv2DLayer(
+ network, num_filters=32, filter_size=(5, 5),
+ nonlinearity=lasagne.nonlinearities.rectify,
+ W=lasagne.init.GlorotUniform())
+
+The nonlinearity and weight initializer can be given just as for the
+``DenseLayer`` (and again, ``GlorotUniform()`` is the default, we'll omit it
+from now). Strided and padded convolutions are supported as well; see the
+:class:`Conv2DLayer <lasagne.layers.Conv2DLayer>` docstring.
+
+.. note::
+ For experts: ``Conv2DLayer`` will create a convolutional layer using
+ ``T.nnet.conv2d``, Theano's default convolution. On compilation for GPU,
+ Theano replaces this with a `cuDNN`_-based implementation if available,
+ otherwise falls back to a gemm-based implementation. For details on this,
+ please see the `Theano convolution documentation`_.
+
+ Lasagne also provides convolutional layers directly enforcing a specific
+ implementation: :class:`lasagne.layers.dnn.Conv2DDNNLayer` to enforce
+ cuDNN, :class:`lasagne.layers.corrmm.Conv2DMMLayer` to enforce the
+ gemm-based one, :class:`lasagne.layers.cuda_convnet.Conv2DCCLayer` for
+ Krizhevsky's `cuda-convnet`_.
+
+We then apply max-pooling of factor 2 in both dimensions, using a
+:class:`MaxPool2DLayer <lasagne.layers.MaxPool2DLayer>` instance:
+
+.. code-block:: python
+
+ network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
+
+We add another convolution and pooling stage like the ones before:
+
+.. code-block:: python
+
+ network = lasagne.layers.Conv2DLayer(
+ network, num_filters=32, filter_size=(5, 5),
+ nonlinearity=lasagne.nonlinearities.rectify)
+ network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
+
+Then a fully-connected layer of 256 units with 50% dropout on its inputs
+(using the :class:`lasagne.layers.dropout` shortcut directly inline):
+
+.. code-block:: python
+
+ network = lasagne.layers.DenseLayer(
+ lasagne.layers.dropout(network, p=.5),
+ num_units=256,
+ nonlinearity=lasagne.nonlinearities.rectify)
+
+And finally a 10-unit softmax output layer, again with 50% dropout:
+
+.. code-block:: python
+
+ network = lasagne.layers.DenseLayer(
+ lasagne.layers.dropout(network, p=.5),
+ num_units=10,
+ nonlinearity=lasagne.nonlinearities.softmax)
+
+ return network
+
+
+Training the model
+------------------
+
+The remaining part of the ``mnist.py`` script copes with setting up and running
+a training loop over the MNIST dataset.
+
+
+Dataset iteration
+^^^^^^^^^^^^^^^^^
+
+It first defines a short helper function for synchronously iterating over two
+numpy arrays of input data and targets, respectively, in mini-batches of a
+given number of items. For the purpose of this tutorial, we can shorten it to:
+
+.. code-block:: python
+
+ def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
+ if shuffle:
+ ...
+ for ...:
+ yield inputs[...], targets[...]
+
+All that's relevant is that it is a generator function that serves one batch of
+inputs and targets at a time until the given dataset (in ``inputs`` and
+``targets``) is exhausted, either in sequence or in random order. Below we will
+plug this function into our training loop, validation loop and test loop.
+
+
+Preparation
+^^^^^^^^^^^
+
+Let's now focus on the ``main()`` function. A bit simplified, it begins like
+this:
+
+.. code-block:: python
+
+ # Load the dataset
+ X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
+ # Prepare Theano variables for inputs and targets
+ input_var = T.tensor4('inputs')
+ target_var = T.ivector('targets')
+ # Create neural network model
+ network = build_mlp(input_var)
+
+The first line loads the inputs and targets of the MNIST dataset as numpy
+arrays, split into training, validation and test data.
+The next two statements define symbolic Theano variables that will represent
+a mini-batch of inputs and targets in all the Theano expressions we will
+generate for network training and inference. They are not tied to any data yet,
+but their dimensionality and data type is fixed already and matches the actual
+inputs and targets we will process later.
+Finally, we call one of the three functions for building the Lasagne network,
+depending on the first command line argument -- we've just removed command line
+handling here for clarity. Note that we hand the symbolic input variable to
+``build_mlp()`` so it will be linked to the network's input layer.
+
+
+Loss and update expressions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Continuing, we create a loss expression to be minimized in training:
+
+.. code-block:: python
+
+ prediction = lasagne.layers.get_output(network)
+ loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
+ loss = loss.mean()
+
+The first step generates a Theano expression for the network output given the
+input variable linked to the network's input layer(s). The second step defines
+a Theano expression for the categorical cross-entropy loss between said network
+output and the targets. Finally, as we need a scalar loss, we simply take the
+mean over the mini-batch. Depending on the problem you are solving, you will
+need different loss functions, see :mod:`lasagne.objectives` for more.
+
+Having the model and the loss function defined, we create update expressions
+for training the network. An update expression describes how to change the
+trainable parameters of the network at each presented mini-batch. We will use
+Stochastic Gradient Descent (SGD) with Nesterov momentum here, but the
+:mod:`lasagne.updates` module offers several others you can plug in instead:
+
+.. code-block:: python
+
+ params = lasagne.layers.get_all_params(network, trainable=True)
+ updates = lasagne.updates.nesterov_momentum(
+ loss, params, learning_rate=0.01, momentum=0.9)
+
+The first step collects all Theano ``SharedVariable`` instances making up the
+trainable parameters of the layer, and the second step generates an update
+expression for each parameter.
+
+For monitoring progress during training, after each epoch, we evaluate the
+network on the validation set. We need a slightly different loss expression
+for that:
+
+.. code-block:: python
+
+ test_prediction = lasagne.layers.get_output(network, deterministic=True)
+ test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
+ target_var)
+ test_loss = test_loss.mean()
+
+The crucial difference is that we pass ``deterministic=True`` to the
+:func:`get_output <lasagne.layers.get_output>` call. This causes all
+nondeterministic layers to switch to a deterministic implementation, so in our
+case, it disables the dropout layers.
+As an additional monitoring quantity, we create an expression for the
+classification accuracy:
+
+.. code-block:: python
+
+ test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
+ dtype=theano.config.floatX)
+
+It also builds on the deterministic ``test_prediction`` expression.
+
+
+Compilation
+^^^^^^^^^^^
+
+Equipped with all the necessary Theano expressions, we're now ready to compile
+a function performing a training step:
+
+.. code-block:: python
+
+ train_fn = theano.function([input_var, target_var], loss, updates=updates)
+
+This tells Theano to generate and compile a function taking two inputs -- a
+mini-batch of images and a vector of corresponding targets -- and returning a
+single output: the training loss. Additionally, each time it is invoked, it
+applies all parameter updates in the ``updates`` dictionary, thus performing a
+gradient descent step with Nesterov momentum.
+
+For validation, we compile a second function:
+
+.. code-block:: python
+
+ val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
+
+This one also takes a mini-batch of images and targets, then returns the
+(deterministic) loss and classification accuracy, not performing any updates.
+
+
+Training loop
+^^^^^^^^^^^^^
+
+We're finally ready to write the training loop. In essence, we just need to do
+the following:
+
+.. code-block:: python
+
+ for epoch in range(num_epochs):
+ for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
+ inputs, targets = batch
+ train_fn(inputs, targets)
+
+This uses our dataset iteration helper function to iterate over the training
+data in random order, in mini-batches of 500 items each, for ``num_epochs``
+epochs, and calls the training function we compiled to perform an update step
+of the network parameters.
+
+But to be able to monitor the training progress, we capture the training loss,
+compute the validation loss and print some information to the console every
+time an epoch finishes:
+
+.. code-block:: python
+
+ for epoch in range(num_epochs):
+ # In each epoch, we do a full pass over the training data:
+ train_err = 0
+ train_batches = 0
+ start_time = time.time()
+ for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
+ inputs, targets = batch
+ train_err += train_fn(inputs, targets)
+ train_batches += 1
+
+ # And a full pass over the validation data:
+ val_err = 0
+ val_acc = 0
+ val_batches = 0
+ for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
+ inputs, targets = batch
+ err, acc = val_fn(inputs, targets)
+ val_err += err
+ val_acc += acc
+ val_batches += 1
+
+ # Then we print the results for this epoch:
+ print("Epoch {} of {} took {:.3f}s".format(
+ epoch + 1, num_epochs, time.time() - start_time))
+ print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
+ print(" validation loss:\t\t{:.6f}".format(val_err / val_batches))
+ print(" validation accuracy:\t\t{:.2f} %".format(
+ val_acc / val_batches * 100))
+
+At the very end, we re-use the ``val_fn()`` function to compute the loss and
+accuracy on the test set, finishing the script.
+
+
+
+Where to go from here
+=====================
+
+This finishes our introductory tutorial. For more information on what you can
+do with Lasagne's layers, just continue reading through :doc:`layers` and
+:doc:`custom_layers`.
+More tutorials, examples and code snippets can be found in the `Lasagne
+Recipes`_ repository.
+Finally, the reference lists and explains all layers (:mod:`lasagne.layers`),
+weight initializers (:mod:`lasagne.init`), nonlinearities
+(:mod:`lasagne.nonlinearities`), loss expressions (:mod:`lasagne.objectives`),
+training methods (:mod:`lasagne.updates`) and regularizers
+(:mod:`lasagne.regularization`) included in the library, and should also make
+it simple to create your own.
+
+
+
+.. _Lasagne Recipes: https://github.com/Lasagne/Recipes
+.. _Deeplearning Tutorial: http://deeplearning.net/tutorial/
+.. _Convolutional Neural Networks for Visual Recognition: http://cs231n.github.io/
+.. _Neural Networks and Deep Learning: http://neuralnetworksanddeeplearning.com/
+.. _Theano tutorial: http://deeplearning.net/software/theano/tutorial/
+.. _Graph Structures: http://deeplearning.net/software/theano/extending/graphstructures.html
+.. _mnist.py: https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
+.. [Hinton2012] Improving neural networks by preventing co-adaptation
+ of feature detectors. http://arxiv.org/abs/1207.0580
+.. _Pylearn2: http://deeplearning.net/software/pylearn2/
+.. _cuda-convnet: https://code.google.com/p/cuda-convnet/
+.. _cuDNN: https://developer.nvidia.com/cudnn
+.. _Theano convolution documentation: http://deeplearning.net/software/theano/library/tensor/nnet/conv.html
diff --git a/examples/mnist.py b/examples/mnist.py
new file mode 100755
index 0000000..1ce6192
--- /dev/null
+++ b/examples/mnist.py
@@ -0,0 +1,362 @@
+#!/usr/bin/env python
+
+"""
+Usage example employing Lasagne for digit recognition using the MNIST dataset.
+
+This example is deliberately structured as a long flat file, focusing on how
+to use Lasagne, instead of focusing on writing maximally modular and reusable
+code. It is used as the foundation for the introductory Lasagne tutorial:
+http://lasagne.readthedocs.org/en/latest/user/tutorial.html
+
+More in-depth examples and reproductions of paper results are maintained in
+a separate repository: https://github.com/Lasagne/Recipes
+"""
+
+from __future__ import print_function
+
+import sys
+import os
+import time
+
+import numpy as np
+import theano
+import theano.tensor as T
+
+import lasagne
+
+
+# ################## Download and prepare the MNIST dataset ##################
+# This is just some way of getting the MNIST dataset from an online location
+# and loading it into numpy arrays. It doesn't involve Lasagne at all.
+
+def load_dataset():
+ # We first define a download function, supporting both Python 2 and 3.
+ if sys.version_info[0] == 2:
+ from urllib import urlretrieve
+ else:
+ from urllib.request import urlretrieve
+
+ def download(filename, source='http://yann.lecun.com/exdb/mnist/'):
+ print("Downloading %s" % filename)
+ urlretrieve(source + filename, filename)
+
+ # We then define functions for loading MNIST images and labels.
+ # For convenience, they also download the requested files if needed.
+ import gzip
+
+ def load_mnist_images(filename):
+ if not os.path.exists(filename):
+ download(filename)
+ # Read the inputs in Yann LeCun's binary format.
+ with gzip.open(filename, 'rb') as f:
+ data = np.frombuffer(f.read(), np.uint8, offset=16)
+ # The inputs are vectors now, we reshape them to monochrome 2D images,
+ # following the shape convention: (examples, channels, rows, columns)
+ data = data.reshape(-1, 1, 28, 28)
+ # The inputs come as bytes, we convert them to float32 in range [0,1].
+ # (Actually to range [0, 255/256], for compatibility to the version
+ # provided at http://deeplearning.net/data/mnist/mnist.pkl.gz.)
+ return data / np.float32(256)
+
+ def load_mnist_labels(filename):
+ if not os.path.exists(filename):
+ download(filename)
+ # Read the labels in Yann LeCun's binary format.
+ with gzip.open(filename, 'rb') as f:
+ data = np.frombuffer(f.read(), np.uint8, offset=8)
+ # The labels are vectors of integers now, that's exactly what we want.
+ return data
+
+ # We can now download and read the training and test set images and labels.
+ X_train = load_mnist_images('train-images-idx3-ubyte.gz')
+ y_train = load_mnist_labels('train-labels-idx1-ubyte.gz')
+ X_test = load_mnist_images('t10k-images-idx3-ubyte.gz')
+ y_test = load_mnist_labels('t10k-labels-idx1-ubyte.gz')
+
+ # We reserve the last 10000 training examples for validation.
+ X_train, X_val = X_train[:-10000], X_train[-10000:]
+ y_train, y_val = y_train[:-10000], y_train[-10000:]
+
+ # We just return all the arrays in order, as expected in main().
+ # (It doesn't matter how we do this as long as we can read them again.)
+ return X_train, y_train, X_val, y_val, X_test, y_test
+
+
+# ##################### Build the neural network model #######################
+# This script supports three types of models. For each one, we define a
+# function that takes a Theano variable representing the input and returns
+# the output layer of a neural network model built in Lasagne.
+
+def build_mlp(input_var=None):
+ # This creates an MLP of two hidden layers of 800 units each, followed by
+ # a softmax output layer of 10 units. It applies 20% dropout to the input
+ # data and 50% dropout to the hidden layers.
+
+ # Input layer, specifying the expected input shape of the network
+ # (unspecified batchsize, 1 channel, 28 rows and 28 columns) and
+ # linking it to the given Theano variable `input_var`, if any:
+ l_in = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
+ input_var=input_var)
+
+ # Apply 20% dropout to the input data:
+ l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
+
+ # Add a fully-connected layer of 800 units, using the linear rectifier, and
+ # initializing weights with Glorot's scheme (which is the default anyway):
+ l_hid1 = lasagne.layers.DenseLayer(
+ l_in_drop, num_units=800,
+ nonlinearity=lasagne.nonlinearities.rectify,
+ W=lasagne.init.GlorotUniform())
+
+ # We'll now add dropout of 50%:
+ l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)
+
+ # Another 800-unit layer:
+ l_hid2 = lasagne.layers.DenseLayer(
+ l_hid1_drop, num_units=800,
+ nonlinearity=lasagne.nonlinearities.rectify)
+
+ # 50% dropout again:
+ l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
+
+ # Finally, we'll add the fully-connected output layer, of 10 softmax units:
+ l_out = lasagne.layers.DenseLayer(
+ l_hid2_drop, num_units=10,
+ nonlinearity=lasagne.nonlinearities.softmax)
+
+ # Each layer is linked to its incoming layer(s), so we only need to pass
+ # the output layer to give access to a network in Lasagne:
+ return l_out
+
+
+def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2,
+ drop_hidden=.5):
+ # By default, this creates the same network as `build_mlp`, but it can be
+ # customized with respect to the number and size of hidden layers. This
+ # mostly showcases how creating a network in Python code can be a lot more
+ # flexible than a configuration file. Note that to make the code easier,
+ # all the layers are just called `network` -- there is no need to give them
+ # different names if all we return is the last one we created anyway; we
+ # just used different names above for clarity.
+
+ # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
+ network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
+ input_var=input_var)
+ if drop_input:
+ network = lasagne.layers.dropout(network, p=drop_input)
+ # Hidden layers and dropout:
+ nonlin = lasagne.nonlinearities.rectify
+ for _ in range(depth):
+ network = lasagne.layers.DenseLayer(
+ network, width, nonlinearity=nonlin)
+ if drop_hidden:
+ network = lasagne.layers.dropout(network, p=drop_hidden)
+ # Output layer:
+ softmax = lasagne.nonlinearities.softmax
+ network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax)
+ return network
+
+
+def build_cnn(input_var=None):
+ # As a third model, we'll create a CNN of two convolution + pooling stages
+ # and a fully-connected hidden layer in front of the output layer.
+
+ # Input layer, as usual:
+ network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
+ input_var=input_var)
+ # This time we do not apply input dropout, as it tends to work less well
+ # for convolutional layers.
+
+ # Convolutional layer with 32 kernels of size 5x5. Strided and padded
+ # convolutions are supported as well; see the docstring.
+ network = lasagne.layers.Conv2DLayer(
+ network, num_filters=32, filter_size=(5, 5),
+ nonlinearity=lasagne.nonlinearities.rectify,
+ W=lasagne.init.GlorotUniform())
+ # Expert note: Lasagne provides alternative convolutional layers that
+ # override Theano's choice of which implementation to use; for details
+ # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.
+
+ # Max-pooling layer of factor 2 in both dimensions:
+ network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
+
+ # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
+ network = lasagne.layers.Conv2DLayer(
+ network, num_filters=32, filter_size=(5, 5),
+ nonlinearity=lasagne.nonlinearities.rectify)
+ network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
+
+ # A fully-connected layer of 256 units with 50% dropout on its inputs:
+ network = lasagne.layers.DenseLayer(
+ lasagne.layers.dropout(network, p=.5),
+ num_units=256,
+ nonlinearity=lasagne.nonlinearities.rectify)
+
+ # And, finally, the 10-unit output layer with 50% dropout on its inputs:
+ network = lasagne.layers.DenseLayer(
+ lasagne.layers.dropout(network, p=.5),
+ num_units=10,
+ nonlinearity=lasagne.nonlinearities.softmax)
+
+ return network
+
+
+# ############################# Batch iterator ###############################
+# This is just a simple helper function iterating over training data in
+# mini-batches of a particular size, optionally in random order. It assumes
+# data is available as numpy arrays. For big datasets, you could load numpy
+# arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your
+# own custom data iteration function. For small datasets, you can also copy
+# them to GPU at once for slightly improved performance. This would involve
+# several changes in the main program, though, and is not demonstrated here.
+# Notice that this function returns only mini-batches of size `batchsize`.
+# If the size of the data is not a multiple of `batchsize`, it will not
+# return the last (remaining) mini-batch.
+
+def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
+ assert len(inputs) == len(targets)
+ if shuffle:
+ indices = np.arange(len(inputs))
+ np.random.shuffle(indices)
+ for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
+ if shuffle:
+ excerpt = indices[start_idx:start_idx + batchsize]
+ else:
+ excerpt = slice(start_idx, start_idx + batchsize)
+ yield inputs[excerpt], targets[excerpt]
+
+
+# ############################## Main program ################################
+# Everything else will be handled in our main program now. We could pull out
+# more functions to better separate the code, but it wouldn't make it any
+# easier to read.
+
+def main(model='mlp', num_epochs=500):
+ # Load the dataset
+ print("Loading data...")
+ X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
+
+ # Prepare Theano variables for inputs and targets
+ input_var = T.tensor4('inputs')
+ target_var = T.ivector('targets')
+
+ # Create neural network model (depending on first command line parameter)
+ print("Building model and compiling functions...")
+ if model == 'mlp':
+ network = build_mlp(input_var)
+ elif model.startswith('custom_mlp:'):
+ depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
+ network = build_custom_mlp(input_var, int(depth), int(width),
+ float(drop_in), float(drop_hid))
+ elif model == 'cnn':
+ network = build_cnn(input_var)
+ else:
+ print("Unrecognized model type %r." % model)
+ return
+
+ # Create a loss expression for training, i.e., a scalar objective we want
+ # to minimize (for our multi-class problem, it is the cross-entropy loss):
+ prediction = lasagne.layers.get_output(network)
+ loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
+ loss = loss.mean()
+ # We could add some weight decay as well here, see lasagne.regularization.
+
+ # Create update expressions for training, i.e., how to modify the
+ # parameters at each training step. Here, we'll use Stochastic Gradient
+ # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
+ params = lasagne.layers.get_all_params(network, trainable=True)
+ updates = lasagne.updates.nesterov_momentum(
+ loss, params, learning_rate=0.01, momentum=0.9)
+
+ # Create a loss expression for validation/testing. The crucial difference
+ # here is that we do a deterministic forward pass through the network,
+ # disabling dropout layers.
+ test_prediction = lasagne.layers.get_output(network, deterministic=True)
+ test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
+ target_var)
+ test_loss = test_loss.mean()
+ # As a bonus, also create an expression for the classification accuracy:
+ test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
+ dtype=theano.config.floatX)
+
+ # Compile a function performing a training step on a mini-batch (by giving
+ # the updates dictionary) and returning the corresponding training loss:
+ train_fn = theano.function([input_var, target_var], loss, updates=updates)
+
+ # Compile a second function computing the validation loss and accuracy:
+ val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
+
+ # Finally, launch the training loop.
+ print("Starting training...")
+ # We iterate over epochs:
+ for epoch in range(num_epochs):
+ # In each epoch, we do a full pass over the training data:
+ train_err = 0
+ train_batches = 0
+ start_time = time.time()
+ for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
+ inputs, targets = batch
+ train_err += train_fn(inputs, targets)
+ train_batches += 1
+
+ # And a full pass over the validation data:
+ val_err = 0
+ val_acc = 0
+ val_batches = 0
+ for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
+ inputs, targets = batch
+ err, acc = val_fn(inputs, targets)
+ val_err += err
+ val_acc += acc
+ val_batches += 1
+
+ # Then we print the results for this epoch:
+ print("Epoch {} of {} took {:.3f}s".format(
+ epoch + 1, num_epochs, time.time() - start_time))
+ print(" training loss:\t\t{:.6f}".format(train_err / train_batches))
+ print(" validation loss:\t\t{:.6f}".format(val_err / val_batches))
+ print(" validation accuracy:\t\t{:.2f} %".format(
+ val_acc / val_batches * 100))
+
+ # After training, we compute and print the test error:
+ test_err = 0
+ test_acc = 0
+ test_batches = 0
+ for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
+ inputs, targets = batch
+ err, acc = val_fn(inputs, targets)
+ test_err += err
+ test_acc += acc
+ test_batches += 1
+ print("Final results:")
+ print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches))
+ print(" test accuracy:\t\t{:.2f} %".format(
+ test_acc / test_batches * 100))
+
+ # Optionally, you could now dump the network weights to a file like this:
+ # np.savez('model.npz', *lasagne.layers.get_all_param_values(network))
+ #
+ # And load them again later on like this:
+ # with np.load('model.npz') as f:
+ # param_values = [f['arr_%d' % i] for i in range(len(f.files))]
+ # lasagne.layers.set_all_param_values(network, param_values)
+
+
+if __name__ == '__main__':
+ if ('--help' in sys.argv) or ('-h' in sys.argv):
+ print("Trains a neural network on MNIST using Lasagne.")
+ print("Usage: %s [MODEL [EPOCHS]]" % sys.argv[0])
+ print()
+ print("MODEL: 'mlp' for a simple Multi-Layer Perceptron (MLP),")
+ print(" 'custom_mlp:DEPTH,WIDTH,DROP_IN,DROP_HID' for an MLP")
+ print(" with DEPTH hidden layers of WIDTH units, DROP_IN")
+ print(" input dropout and DROP_HID hidden dropout,")
+ print(" 'cnn' for a simple Convolutional Neural Network (CNN).")
+ print("EPOCHS: number of training epochs to perform (default: 500)")
+ else:
+ kwargs = {}
+ if len(sys.argv) > 1:
+ kwargs['model'] = sys.argv[1]
+ if len(sys.argv) > 2:
+ kwargs['num_epochs'] = int(sys.argv[2])
+ main(**kwargs)
diff --git a/examples/recurrent.py b/examples/recurrent.py
new file mode 100755
index 0000000..274e83b
--- /dev/null
+++ b/examples/recurrent.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+'''
+Recurrent network example. Trains a bidirectional vanilla RNN to output the
+sum of two numbers in a sequence of random numbers sampled uniformly from
+[0, 1] based on a separate marker sequence.
+'''
+
+from __future__ import print_function
+
+
+import numpy as np
+import theano
+import theano.tensor as T
+import lasagne
+
+
+# Min/max sequence length
+MIN_LENGTH = 50
+MAX_LENGTH = 55
+# Number of units in the hidden (recurrent) layer
+N_HIDDEN = 100
+# Number of training sequences in each batch
+N_BATCH = 100
+# Optimization learning rate
+LEARNING_RATE = .001
+# All gradients above this will be clipped
+GRAD_CLIP = 100
+# How often should we check the output?
+EPOCH_SIZE = 100
+# Number of epochs to train the net
+NUM_EPOCHS = 10
+
+
+def gen_data(min_length=MIN_LENGTH, max_length=MAX_LENGTH, n_batch=N_BATCH):
+ '''
+ Generate a batch of sequences for the "add" task, e.g. the target for the
+ following
+
+ ``| 0.5 | 0.7 | 0.3 | 0.1 | 0.2 | ... | 0.5 | 0.9 | ... | 0.8 | 0.2 |
+ | 0 | 0 | 1 | 0 | 0 | | 0 | 1 | | 0 | 0 |``
+
+ would be 0.3 + .9 = 1.2. This task was proposed in [1]_ and explored in
+ e.g. [2]_.
+
+ Parameters
+ ----------
+ min_length : int
+ Minimum sequence length.
+ max_length : int
+ Maximum sequence length.
+ n_batch : int
+ Number of samples in the batch.
+
+ Returns
+ -------
+ X : np.ndarray
+ Input to the network, of shape (n_batch, max_length, 2), where the last
+ dimension corresponds to the two sequences shown above.
+ y : np.ndarray
+ Correct output for each sample, shape (n_batch,).
+ mask : np.ndarray
+ A binary matrix of shape (n_batch, max_length) where ``mask[i, j] = 1``
+ when ``j <= (length of sequence i)`` and ``mask[i, j] = 0`` when ``j >
+ (length of sequence i)``.
+
+ References
+ ----------
+ .. [1] Hochreiter, Sepp, and Jürgen Schmidhuber. "Long short-term memory."
+ Neural computation 9.8 (1997): 1735-1780.
+
+ .. [2] Sutskever, Ilya, et al. "On the importance of initialization and
+ momentum in deep learning." Proceedings of the 30th international
+ conference on machine learning (ICML-13). 2013.
+ '''
+ # Generate X - we'll fill the last dimension later
+ X = np.concatenate([np.random.uniform(size=(n_batch, max_length, 1)),
+ np.zeros((n_batch, max_length, 1))],
+ axis=-1)
+ mask = np.zeros((n_batch, max_length))
+ y = np.zeros((n_batch,))
+ # Compute masks and correct values
+ for n in range(n_batch):
+ # Randomly choose the sequence length
+ length = np.random.randint(min_length, max_length)
+ # Make the mask for this sample 1 within the range of length
+ mask[n, :length] = 1
+ # Zero out X after the end of the sequence
+ X[n, length:, 0] = 0
+ # Set the second dimension to 1 at the indices to add
+ X[n, np.random.randint(length/10), 1] = 1
+ X[n, np.random.randint(length/2, length), 1] = 1
+ # Multiply and sum the dimensions of X to get the target value
+ y[n] = np.sum(X[n, :, 0]*X[n, :, 1])
+ # Center the inputs and outputs
+ X -= X.reshape(-1, 2).mean(axis=0)
+ y -= y.mean()
+ return (X.astype(theano.config.floatX), y.astype(theano.config.floatX),
+ mask.astype(theano.config.floatX))
+
+
+def main(num_epochs=NUM_EPOCHS):
+ print("Building network ...")
+ # First, we build the network, starting with an input layer
+ # Recurrent layers expect input of shape
+ # (batch size, max sequence length, number of features)
+ l_in = lasagne.layers.InputLayer(shape=(N_BATCH, MAX_LENGTH, 2))
+ # The network also needs a way to provide a mask for each sequence. We'll
+ # use a separate input layer for that. Since the mask only determines
+ # which indices are part of the sequence for each batch entry, they are
+ # supplied as matrices of dimensionality (N_BATCH, MAX_LENGTH)
+ l_mask = lasagne.layers.InputLayer(shape=(N_BATCH, MAX_LENGTH))
+ # We're using a bidirectional network, which means we will combine two
+ # RecurrentLayers, one with the backwards=True keyword argument.
+ # Setting a value for grad_clipping will clip the gradients in the layer
+ # Setting only_return_final=True makes the layers only return their output
+ # for the final time step, which is all we need for this task
+ l_forward = lasagne.layers.RecurrentLayer(
+ l_in, N_HIDDEN, mask_input=l_mask, grad_clipping=GRAD_CLIP,
+ W_in_to_hid=lasagne.init.HeUniform(),
+ W_hid_to_hid=lasagne.init.HeUniform(),
+ nonlinearity=lasagne.nonlinearities.tanh, only_return_final=True)
+ l_backward = lasagne.layers.RecurrentLayer(
+ l_in, N_HIDDEN, mask_input=l_mask, grad_clipping=GRAD_CLIP,
+ W_in_to_hid=lasagne.init.HeUniform(),
+ W_hid_to_hid=lasagne.init.HeUniform(),
+ nonlinearity=lasagne.nonlinearities.tanh,
+ only_return_final=True, backwards=True)
+ # Now, we'll concatenate the outputs to combine them.
+ l_concat = lasagne.layers.ConcatLayer([l_forward, l_backward])
+ # Our output layer is a simple dense connection, with 1 output unit
+ l_out = lasagne.layers.DenseLayer(
+ l_concat, num_units=1, nonlinearity=lasagne.nonlinearities.tanh)
+
+ target_values = T.vector('target_output')
+
+ # lasagne.layers.get_output produces a variable for the output of the net
+ network_output = lasagne.layers.get_output(l_out)
+ # The network output will have shape (n_batch, 1); let's flatten to get a
+ # 1-dimensional vector of predicted values
+ predicted_values = network_output.flatten()
+ # Our cost will be mean-squared error
+ cost = T.mean((predicted_values - target_values)**2)
+ # Retrieve all parameters from the network
+ all_params = lasagne.layers.get_all_params(l_out)
+ # Compute SGD updates for training
+ print("Computing updates ...")
+ updates = lasagne.updates.adagrad(cost, all_params, LEARNING_RATE)
+ # Theano functions for training and computing cost
+ print("Compiling functions ...")
+ train = theano.function([l_in.input_var, target_values, l_mask.input_var],
+ cost, updates=updates)
+ compute_cost = theano.function(
+ [l_in.input_var, target_values, l_mask.input_var], cost)
+
+ # We'll use this "validation set" to periodically check progress
+ X_val, y_val, mask_val = gen_data()
+
+ print("Training ...")
+ try:
+ for epoch in range(num_epochs):
+ for _ in range(EPOCH_SIZE):
+ X, y, m = gen_data()
+ train(X, y, m)
+ cost_val = compute_cost(X_val, y_val, mask_val)
+ print("Epoch {} validation cost = {}".format(epoch, cost_val))
+ except KeyboardInterrupt:
+ pass
+
+if __name__ == '__main__':
+ main()
diff --git a/lasagne/__init__.py b/lasagne/__init__.py
new file mode 100644
index 0000000..107f711
--- /dev/null
+++ b/lasagne/__init__.py
@@ -0,0 +1,34 @@
+"""
+Tools to train neural nets in Theano
+"""
+
+try:
+ install_instr = """
+
+Please make sure you install a recent enough version of Theano. Note that a
+simple 'pip install theano' will usually give you a version that is too old
+for Lasagne. See the installation docs for more details:
+http://lasagne.readthedocs.org/en/latest/user/installation.html#theano"""
+ import theano
+except ImportError: # pragma: no cover
+ raise ImportError("Could not import Theano." + install_instr)
+else:
+ try:
+ import theano.tensor.signal.pool
+ except ImportError: # pragma: no cover
+ raise ImportError("Your Theano version is too old." + install_instr)
+ del install_instr
+ del theano
+
+
+from . import nonlinearities
+from . import init
+from . import layers
+from . import objectives
+from . import random
+from . import regularization
+from . import updates
+from . import utils
+
+
+__version__ = "0.2.dev1"
diff --git a/lasagne/conftest.py b/lasagne/conftest.py
new file mode 100644
index 0000000..39ffa9f
--- /dev/null
+++ b/lasagne/conftest.py
@@ -0,0 +1,12 @@
+ignore_test_paths = [
+ "*/layers/corrmm.py",
+ "*/layers/cuda_convnet.py",
+ "*/layers/dnn.py",
+ ]
+
+
+def pytest_ignore_collect(path, config):
+ """Ignore paths that would otherwise be collceted by the doctest
+ plugin and lead to ImportError due to missing dependencies.
+ """
+ return any(path.fnmatch(ignore) for ignore in ignore_test_paths)
diff --git a/lasagne/init.py b/lasagne/init.py
new file mode 100644
index 0000000..348ddc1
--- /dev/null
+++ b/lasagne/init.py
@@ -0,0 +1,367 @@
+"""
+Functions to create initializers for parameter variables.
+
+Examples
+--------
+>>> from lasagne.layers import DenseLayer
+>>> from lasagne.init import Constant, GlorotUniform
+>>> l1 = DenseLayer((100,20), num_units=50,
+... W=GlorotUniform('relu'), b=Constant(0.0))
+"""
+
+import numpy as np
+
+from .utils import floatX
+from .random import get_rng
+
+
+class Initializer(object):
+ """Base class for parameter tensor initializers.
+
+ The :class:`Initializer` class represents a weight initializer used
+ to initialize weight parameters in a neural network layer. It should be
+ subclassed when implementing new types of weight initializers.
+
+ """
+ def __call__(self, shape):
+ """
+ Makes :class:`Initializer` instances callable like a function, invoking
+ their :meth:`sample()` method.
+ """
+ return self.sample(shape)
+
+ def sample(self, shape):
+ """
+ Sample should return a theano.tensor of size shape and data type
+ theano.config.floatX.
+
+ Parameters
+ -----------
+ shape : tuple or int
+ Integer or tuple specifying the size of the returned
+ matrix.
+ returns : theano.tensor
+ Matrix of size shape and dtype theano.config.floatX.
+ """
+ raise NotImplementedError()
+
+
+class Normal(Initializer):
+ """Sample initial weights from the Gaussian distribution.
+
+ Initial weight parameters are sampled from N(mean, std).
+
+ Parameters
+ ----------
+ std : float
+ Std of initial parameters.
+ mean : float
+ Mean of initial parameters.
+ """
+ def __init__(self, std=0.01, mean=0.0):
+ self.std = std
+ self.mean = mean
+
+ def sample(self, shape):
+ return floatX(get_rng().normal(self.mean, self.std, size=shape))
+
+
+class Uniform(Initializer):
+ """Sample initial weights from the uniform distribution.
+
+ Parameters are sampled from U(a, b).
+
+ Parameters
+ ----------
+ range : float or tuple
+ When std is None then range determines a, b. If range is a float the
+ weights are sampled from U(-range, range). If range is a tuple the
+ weights are sampled from U(range[0], range[1]).
+ std : float or None
+ If std is a float then the weights are sampled from
+ U(mean - np.sqrt(3) * std, mean + np.sqrt(3) * std).
+ mean : float
+ see std for description.
+ """
+ def __init__(self, range=0.01, std=None, mean=0.0):
+ if std is not None:
+ a = mean - np.sqrt(3) * std
+ b = mean + np.sqrt(3) * std
+ else:
+ try:
+ a, b = range # range is a tuple
+ except TypeError:
+ a, b = -range, range # range is a number
+
+ self.range = (a, b)
+
+ def sample(self, shape):
+ return floatX(get_rng().uniform(
+ low=self.range[0], high=self.range[1], size=shape))
+
+
+class Glorot(Initializer):
+ """Glorot weight initialization.
+
+ This is also known as Xavier initialization [1]_.
+
+ Parameters
+ ----------
+ initializer : lasagne.init.Initializer
+ Initializer used to sample the weights, must accept `std` in its
+ constructor to sample from a distribution with a given standard
+ deviation.
+ gain : float or 'relu'
+ Scaling factor for the weights. Set this to ``1.0`` for linear and
+ sigmoid units, to 'relu' or ``sqrt(2)`` for rectified linear units, and
+ to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with
+ leakiness ``alpha``. Other transfer functions may need different
+ factors.
+ c01b : bool
+ For a :class:`lasagne.layers.cuda_convnet.Conv2DCCLayer` constructed
+ with ``dimshuffle=False``, `c01b` must be set to ``True`` to compute
+ the correct fan-in and fan-out.
+
+ References
+ ----------
+ .. [1] Xavier Glorot and Yoshua Bengio (2010):
+ Understanding the difficulty of training deep feedforward neural
+ networks. International conference on artificial intelligence and
+ statistics.
+
+ Notes
+ -----
+ For a :class:`DenseLayer <lasagne.layers.DenseLayer>`, if ``gain='relu'``
+ and ``initializer=Uniform``, the weights are initialized as
+
+ .. math::
+ a &= \\sqrt{\\frac{12}{fan_{in}+fan_{out}}}\\\\
+ W &\sim U[-a, a]
+
+ If ``gain=1`` and ``initializer=Normal``, the weights are initialized as
+
+ .. math::
+ \\sigma &= \\sqrt{\\frac{2}{fan_{in}+fan_{out}}}\\\\
+ W &\sim N(0, \\sigma)
+
+ See Also
+ --------
+ GlorotNormal : Shortcut with Gaussian initializer.
+ GlorotUniform : Shortcut with uniform initializer.
+ """
+ def __init__(self, initializer, gain=1.0, c01b=False):
+ if gain == 'relu':
+ gain = np.sqrt(2)
+
+ self.initializer = initializer
+ self.gain = gain
+ self.c01b = c01b
+
+ def sample(self, shape):
+ if self.c01b:
+ if len(shape) != 4:
+ raise RuntimeError(
+ "If c01b is True, only shapes of length 4 are accepted")
+
+ n1, n2 = shape[0], shape[3]
+ receptive_field_size = shape[1] * shape[2]
+ else:
+ if len(shape) < 2:
+ raise RuntimeError(
+ "This initializer only works with shapes of length >= 2")
+
+ n1, n2 = shape[:2]
+ receptive_field_size = np.prod(shape[2:])
+
+ std = self.gain * np.sqrt(2.0 / ((n1 + n2) * receptive_field_size))
+ return self.initializer(std=std).sample(shape)
+
+
+class GlorotNormal(Glorot):
+ """Glorot with weights sampled from the Normal distribution.
+
+ See :class:`Glorot` for a description of the parameters.
+ """
+ def __init__(self, gain=1.0, c01b=False):
+ super(GlorotNormal, self).__init__(Normal, gain, c01b)
+
+
+class GlorotUniform(Glorot):
+ """Glorot with weights sampled from the Uniform distribution.
+
+ See :class:`Glorot` for a description of the parameters.
+ """
+ def __init__(self, gain=1.0, c01b=False):
+ super(GlorotUniform, self).__init__(Uniform, gain, c01b)
+
+
+class He(Initializer):
+ """He weight initialization.
+
+ Weights are initialized with a standard deviation of
+ :math:`\\sigma = gain \\sqrt{\\frac{1}{fan_{in}}}` [1]_.
+
+ Parameters
+ ----------
+ initializer : lasagne.init.Initializer
+ Initializer used to sample the weights, must accept `std` in its
+ constructor to sample from a distribution with a given standard
+ deviation.
+ gain : float or 'relu'
+ Scaling factor for the weights. Set this to ``1.0`` for linear and
+ sigmoid units, to 'relu' or ``sqrt(2)`` for rectified linear units, and
+ to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with
+ leakiness ``alpha``. Other transfer functions may need different
+ factors.
+ c01b : bool
+ For a :class:`lasagne.layers.cuda_convnet.Conv2DCCLayer` constructed
+ with ``dimshuffle=False``, `c01b` must be set to ``True`` to compute
+ the correct fan-in and fan-out.
+
+ References
+ ----------
+ .. [1] Kaiming He et al. (2015):
+ Delving deep into rectifiers: Surpassing human-level performance on
+ imagenet classification. arXiv preprint arXiv:1502.01852.
+
+ See Also
+ ----------
+ HeNormal : Shortcut with Gaussian initializer.
+ HeUniform : Shortcut with uniform initializer.
+ """
+ def __init__(self, initializer, gain=1.0, c01b=False):
+ if gain == 'relu':
+ gain = np.sqrt(2)
+
+ self.initializer = initializer
+ self.gain = gain
+ self.c01b = c01b
+
+ def sample(self, shape):
+ if self.c01b:
+ if len(shape) != 4:
+ raise RuntimeError(
+ "If c01b is True, only shapes of length 4 are accepted")
+
+ fan_in = np.prod(shape[:3])
+ else:
+ if len(shape) == 2:
+ fan_in = shape[0]
+ elif len(shape) > 2:
+ fan_in = np.prod(shape[1:])
+ else:
+ raise RuntimeError(
+ "This initializer only works with shapes of length >= 2")
+
+ std = self.gain * np.sqrt(1.0 / fan_in)
+ return self.initializer(std=std).sample(shape)
+
+
+class HeNormal(He):
+ """He initializer with weights sampled from the Normal distribution.
+
+ See :class:`He` for a description of the parameters.
+ """
+ def __init__(self, gain=1.0, c01b=False):
+ super(HeNormal, self).__init__(Normal, gain, c01b)
+
+
+class HeUniform(He):
+ """He initializer with weights sampled from the Uniform distribution.
+
+ See :class:`He` for a description of the parameters.
+ """
+ def __init__(self, gain=1.0, c01b=False):
+ super(HeUniform, self).__init__(Uniform, gain, c01b)
+
+
+class Constant(Initializer):
+ """Initialize weights with constant value.
+
+ Parameters
+ ----------
+ val : float
+ Constant value for weights.
+ """
+ def __init__(self, val=0.0):
+ self.val = val
+
+ def sample(self, shape):
+ return floatX(np.ones(shape) * self.val)
+
+
+class Sparse(Initializer):
+ """Initialize weights as sparse matrix.
+
+ Parameters
+ ----------
+ sparsity : float
+ Exact fraction of non-zero values per column. Larger values give less
+ sparsity.
+ std : float
+ Non-zero weights are sampled from N(0, std).
+ """
+ def __init__(self, sparsity=0.1, std=0.01):
+ self.sparsity = sparsity
+ self.std = std
+
+ def sample(self, shape):
+ if len(shape) != 2:
+ raise RuntimeError(
+ "sparse initializer only works with shapes of length 2")
+
+ w = floatX(np.zeros(shape))
+ n_inputs, n_outputs = shape
+ size = int(self.sparsity * n_inputs) # fraction of number of inputs
+
+ for k in range(n_outputs):
+ indices = np.arange(n_inputs)
+ get_rng().shuffle(indices)
+ indices = indices[:size]
+ values = floatX(get_rng().normal(0.0, self.std, size=size))
+ w[indices, k] = values
+
+ return w
+
+
+class Orthogonal(Initializer):
+ """Intialize weights as Orthogonal matrix.
+
+ Orthogonal matrix initialization [1]_. For n-dimensional shapes where
+ n > 2, the n-1 trailing axes are flattened. For convolutional layers, this
+ corresponds to the fan-in, so this makes the initialization usable for
+ both dense and convolutional layers.
+
+ Parameters
+ ----------
+ gain : float or 'relu'
+ Scaling factor for the weights. Set this to ``1.0`` for linear and
+ sigmoid units, to 'relu' or ``sqrt(2)`` for rectified linear units, and
+ to ``sqrt(2/(1+alpha**2))`` for leaky rectified linear units with
+ leakiness ``alpha``. Other transfer functions may need different
+ factors.
+
+ References
+ ----------
+ .. [1] Saxe, Andrew M., James L. McClelland, and Surya Ganguli.
+ "Exact solutions to the nonlinear dynamics of learning in deep
+ linear neural networks." arXiv preprint arXiv:1312.6120 (2013).
+ """
+ def __init__(self, gain=1.0):
+ if gain == 'relu':
+ gain = np.sqrt(2)
+
+ self.gain = gain
+
+ def sample(self, shape):
+ if len(shape) < 2:
+ raise RuntimeError("Only shapes of length 2 or more are "
+ "supported.")
+
+ flat_shape = (shape[0], np.prod(shape[1:]))
+ a = get_rng().normal(0.0, 1.0, flat_shape)
+ u, _, v = np.linalg.svd(a, full_matrices=False)
+ # pick the one with the correct shape
+ q = u if u.shape == flat_shape else v
+ q = q.reshape(shape)
+ return floatX(self.gain * q)
diff --git a/lasagne/layers/__init__.py b/lasagne/layers/__init__.py
new file mode 100644
index 0000000..f11c37e
--- /dev/null
+++ b/lasagne/layers/__init__.py
@@ -0,0 +1,13 @@
+from .base import *
+from .helper import *
+from .input import *
+from .dense import *
+from .noise import *
+from .conv import *
+from .pool import *
+from .shape import *
+from .merge import *
+from .normalization import *
+from .embedding import *
+from .recurrent import *
+from .special import *
diff --git a/lasagne/layers/base.py b/lasagne/layers/base.py
new file mode 100644
index 0000000..868df84
--- /dev/null
+++ b/lasagne/layers/base.py
@@ -0,0 +1,328 @@
+from collections import OrderedDict
+
+import theano.tensor as T
+
+from .. import utils
+
+
+__all__ = [
+ "Layer",
+ "MergeLayer",
+]
+
+
+# Layer base class
+
+class Layer(object):
+ """
+ The :class:`Layer` class represents a single layer of a neural network. It
+ should be subclassed when implementing new types of layers.
+
+ Because each layer can keep track of the layer(s) feeding into it, a
+ network's output :class:`Layer` instance can double as a handle to the full
+ network.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape.
+ name : a string or None
+ An optional name to attach to this layer.
+ """
+ def __init__(self, incoming, name=None):
+ if isinstance(incoming, tuple):
+ self.input_shape = incoming
+ self.input_layer = None
+ else:
+ self.input_shape = incoming.output_shape
+ self.input_layer = incoming
+
+ self.name = name
+ self.params = OrderedDict()
+ self.get_output_kwargs = []
+
+ if any(d is not None and d <= 0 for d in self.input_shape):
+ raise ValueError((
+ "Cannot create Layer with a non-positive input_shape "
+ "dimension. input_shape=%r, self.name=%r") % (
+ self.input_shape, self.name))
+
+ @property
+ def output_shape(self):
+ shape = self.get_output_shape_for(self.input_shape)
+ if any(isinstance(s, T.Variable) for s in shape):
+ raise ValueError("%s returned a symbolic output shape from its "
+ "get_output_shape_for() method: %r. This is not "
+ "allowed; shapes must be tuples of integers for "
+ "fixed-size dimensions and Nones for variable "
+ "dimensions." % (self.__class__.__name__, shape))
+ return shape
+
+ def get_params(self, unwrap_shared=True, **tags):
+ """
+ Returns a list of Theano shared variables or expressions that
+ parameterize the layer.
+
+ By default, all shared variables that participate in the forward pass
+ will be returned (in the order they were registered in the Layer's
+ constructor via :meth:`add_param()`). The list can optionally be
+ filtered by specifying tags as keyword arguments. For example,
+ ``trainable=True`` will only return trainable parameters, and
+ ``regularizable=True`` will only return parameters that can be
+ regularized (e.g., by L2 decay).
+
+ If any of the layer's parameters was set to a Theano expression instead
+ of a shared variable, `unwrap_shared` controls whether to return the
+ shared variables involved in that expression (``unwrap_shared=True``,
+ the default), or the expression itself (``unwrap_shared=False``). In
+ either case, tag filtering applies to the expressions, considering all
+ variables within an expression to be tagged the same.
+
+ Parameters
+ ----------
+ unwrap_shared : bool (default: True)
+ Affects only parameters that were set to a Theano expression. If
+ ``True`` the function returns the shared variables contained in
+ the expression, otherwise the Theano expression itself.
+
+ **tags (optional)
+ tags can be specified to filter the list. Specifying ``tag1=True``
+ will limit the list to parameters that are tagged with ``tag1``.
+ Specifying ``tag1=False`` will limit the list to parameters that
+ are not tagged with ``tag1``. Commonly used tags are
+ ``regularizable`` and ``trainable``.
+
+ Returns
+ -------
+ list of Theano shared variables or expressions
+ A list of variables that parameterize the layer
+
+ Notes
+ -----
+ For layers without any parameters, this will return an empty list.
+ """
+ result = list(self.params.keys())
+
+ only = set(tag for tag, value in tags.items() if value)
+ if only:
+ # retain all parameters that have all of the tags in `only`
+ result = [param for param in result
+ if not (only - self.params[param])]
+
+ exclude = set(tag for tag, value in tags.items() if not value)
+ if exclude:
+ # retain all parameters that have none of the tags in `exclude`
+ result = [param for param in result
+ if not (self.params[param] & exclude)]
+
+ if unwrap_shared:
+ return utils.collect_shared_vars(result)
+ else:
+ return result
+
+ def get_output_shape_for(self, input_shape):
+ """
+ Computes the output shape of this layer, given an input shape.
+
+ Parameters
+ ----------
+ input_shape : tuple
+ A tuple representing the shape of the input. The tuple should have
+ as many elements as there are input dimensions, and the elements
+ should be integers or `None`.
+
+ Returns
+ -------
+ tuple
+ A tuple representing the shape of the output of this layer. The
+ tuple has as many elements as there are output dimensions, and the
+ elements are all either integers or `None`.
+
+ Notes
+ -----
+ This method will typically be overridden when implementing a new
+ :class:`Layer` class. By default it simply returns the input
+ shape. This means that a layer that does not modify the shape
+ (e.g. because it applies an elementwise operation) does not need
+ to override this method.
+ """
+ return input_shape
+
+ def get_output_for(self, input, **kwargs):
+ """
+ Propagates the given input through this layer (and only this layer).
+
+ Parameters
+ ----------
+ input : Theano expression
+ The expression to propagate through this layer.
+
+ Returns
+ -------
+ output : Theano expression
+ The output of this layer given the input to this layer.
+
+
+ Notes
+ -----
+ This is called by the base :meth:`lasagne.layers.get_output()`
+ to propagate data through a network.
+
+ This method should be overridden when implementing a new
+ :class:`Layer` class. By default it raises `NotImplementedError`.
+ """
+ raise NotImplementedError
+
+ def add_param(self, spec, shape, name=None, **tags):
+ """
+ Register and possibly initialize a parameter tensor for the layer.
+
+ When defining a layer class, this method is called in the constructor
+ to define which parameters the layer has, what their shapes are, how
+ they should be initialized and what tags are associated with them.
+ This allows layer classes to transparently support parameter
+ initialization from numpy arrays and callables, as well as setting
+ parameters to existing Theano shared variables or Theano expressions.
+
+ All registered parameters are stored along with their tags in the
+ ordered dictionary :attr:`Layer.params`, and can be retrieved with
+ :meth:`Layer.get_params()`, optionally filtered by their tags.
+
+ Parameters
+ ----------
+ spec : Theano shared variable, expression, numpy array or callable
+ initial value, expression or initializer for this parameter.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ shape : tuple of int
+ a tuple of integers representing the desired shape of the
+ parameter tensor.
+
+ name : str (optional)
+ a descriptive name for the parameter variable. This will be passed
+ to ``theano.shared`` when the variable is created, prefixed by the
+ layer's name if any (in the form ``'layer_name.param_name'``). If
+ ``spec`` is already a shared variable or expression, this parameter
+ will be ignored to avoid overwriting an existing name.
+
+ **tags (optional)
+ tags associated with the parameter can be specified as keyword
+ arguments. To associate the tag ``tag1`` with the parameter, pass
+ ``tag1=True``.
+
+ By default, the tags ``regularizable`` and ``trainable`` are
+ associated with the parameter. Pass ``regularizable=False`` or
+ ``trainable=False`` respectively to prevent this.
+
+ Returns
+ -------
+ Theano shared variable or Theano expression
+ the resulting parameter variable or parameter expression
+
+ Notes
+ -----
+ It is recommended to assign the resulting parameter variable/expression
+ to an attribute of the layer for easy access, for example:
+
+ >>> self.W = self.add_param(W, (2, 3), name='W') #doctest: +SKIP
+ """
+ # prefix the param name with the layer name if it exists
+ if name is not None:
+ if self.name is not None:
+ name = "%s.%s" % (self.name, name)
+ # create shared variable, or pass through given variable/expression
+ param = utils.create_param(spec, shape, name)
+ # parameters should be trainable and regularizable by default
+ tags['trainable'] = tags.get('trainable', True)
+ tags['regularizable'] = tags.get('regularizable', True)
+ self.params[param] = set(tag for tag, value in tags.items() if value)
+
+ return param
+
+
+class MergeLayer(Layer):
+ """
+ This class represents a layer that aggregates input from multiple layers.
+ It should be subclassed when implementing new types of layers that obtain
+ their input from multiple layers.
+
+ Parameters
+ ----------
+ incomings : a list of :class:`Layer` instances or tuples
+ The layers feeding into this layer, or expected input shapes.
+ name : a string or None
+ An optional name to attach to this layer.
+ """
+ def __init__(self, incomings, name=None):
+ self.input_shapes = [incoming if isinstance(incoming, tuple)
+ else incoming.output_shape
+ for incoming in incomings]
+ self.input_layers = [None if isinstance(incoming, tuple)
+ else incoming
+ for incoming in incomings]
+ self.name = name
+ self.params = OrderedDict()
+ self.get_output_kwargs = []
+
+ @Layer.output_shape.getter
+ def output_shape(self):
+ shape = self.get_output_shape_for(self.input_shapes)
+ if any(isinstance(s, T.Variable) for s in shape):
+ raise ValueError("%s returned a symbolic output shape from its "
+ "get_output_shape_for() method: %r. This is not "
+ "allowed; shapes must be tuples of integers for "
+ "fixed-size dimensions and Nones for variable "
+ "dimensions." % (self.__class__.__name__, shape))
+ return shape
+
+ def get_output_shape_for(self, input_shapes):
+ """
+ Computes the output shape of this layer, given a list of input shapes.
+
+ Parameters
+ ----------
+ input_shape : list of tuple
+ A list of tuples, with each tuple representing the shape of one of
+ the inputs (in the correct order). These tuples should have as many
+ elements as there are input dimensions, and the elements should be
+ integers or `None`.
+
+ Returns
+ -------
+ tuple
+ A tuple representing the shape of the output of this layer. The
+ tuple has as many elements as there are output dimensions, and the
+ elements are all either integers or `None`.
+
+ Notes
+ -----
+ This method must be overridden when implementing a new
+ :class:`Layer` class with multiple inputs. By default it raises
+ `NotImplementedError`.
+ """
+ raise NotImplementedError
+
+ def get_output_for(self, inputs, **kwargs):
+ """
+ Propagates the given inputs through this layer (and only this layer).
+
+ Parameters
+ ----------
+ inputs : list of Theano expressions
+ The Theano expressions to propagate through this layer.
+
+ Returns
+ -------
+ Theano expressions
+ The output of this layer given the inputs to this layer.
+
+ Notes
+ -----
+ This is called by the base :meth:`lasagne.layers.get_output()`
+ to propagate data through a network.
+
+ This method should be overridden when implementing a new
+ :class:`Layer` class with multiple inputs. By default it raises
+ `NotImplementedError`.
+ """
+ raise NotImplementedError
diff --git a/lasagne/layers/conv.py b/lasagne/layers/conv.py
new file mode 100644
index 0000000..9288d23
--- /dev/null
+++ b/lasagne/layers/conv.py
@@ -0,0 +1,934 @@
+import theano.tensor as T
+
+from .. import init
+from .. import nonlinearities
+from ..utils import as_tuple
+from ..theano_extensions import conv, padding
+
+from .base import Layer
+
+
+__all__ = [
+ "Conv1DLayer",
+ "Conv2DLayer",
+ "TransposedConv2DLayer",
+ "Deconv2DLayer",
+ "DilatedConv2DLayer",
+]
+
+
+def conv_output_length(input_length, filter_size, stride, pad=0):
+ """Helper function to compute the output size of a convolution operation
+
+ This function computes the length along a single axis, which corresponds
+ to a 1D convolution. It can also be used for convolutions with higher
+ dimensionalities by using it individually for each axis.
+
+ Parameters
+ ----------
+ input_length : int or None
+ The size of the input.
+
+ filter_size : int
+ The size of the filter.
+
+ stride : int
+ The stride of the convolution operation.
+
+ pad : int, 'full' or 'same' (default: 0)
+ By default, the convolution is only computed where the input and the
+ filter fully overlap (a valid convolution). When ``stride=1``, this
+ yields an output that is smaller than the input by ``filter_size - 1``.
+ The `pad` argument allows you to implicitly pad the input with zeros,
+ extending the output size.
+
+ A single integer results in symmetric zero-padding of the given size on
+ both borders.
+
+ ``'full'`` pads with one less than the filter size on both sides. This
+ is equivalent to computing the convolution wherever the input and the
+ filter overlap by at least one position.
+
+ ``'same'`` pads with half the filter size on both sides (one less on
+ the second side for an even filter size). When ``stride=1``, this
+ results in an output size equal to the input size.
+
+ Returns
+ -------
+ int or None
+ The output size corresponding to the given convolution parameters, or
+ ``None`` if `input_size` is ``None``.
+
+ Raises
+ ------
+ ValueError
+ When an invalid padding is specified, a `ValueError` is raised.
+ """
+ if input_length is None:
+ return None
+ if pad == 'valid':
+ output_length = input_length - filter_size + 1
+ elif pad == 'full':
+ output_length = input_length + filter_size - 1
+ elif pad == 'same':
+ output_length = input_length
+ elif isinstance(pad, int):
+ output_length = input_length + 2 * pad - filter_size + 1
+ else:
+ raise ValueError('Invalid pad: {0}'.format(pad))
+
+ # This is the integer arithmetic equivalent to
+ # np.ceil(output_length / stride)
+ output_length = (output_length + stride - 1) // stride
+
+ return output_length
+
+
+def conv_input_length(output_length, filter_size, stride, pad=0):
+ """Helper function to compute the input size of a convolution operation
+
+ This function computes the length along a single axis, which corresponds
+ to a 1D convolution. It can also be used for convolutions with higher
+ dimensionalities by using it individually for each axis.
+
+ Parameters
+ ----------
+ output_length : int or None
+ The size of the output.
+
+ filter_size : int
+ The size of the filter.
+
+ stride : int
+ The stride of the convolution operation.
+
+ pad : int, 'full' or 'same' (default: 0)
+ By default, the convolution is only computed where the input and the
+ filter fully overlap (a valid convolution). When ``stride=1``, this
+ yields an output that is smaller than the input by ``filter_size - 1``.
+ The `pad` argument allows you to implicitly pad the input with zeros,
+ extending the output size.
+
+ A single integer results in symmetric zero-padding of the given size on
+ both borders.
+
+ ``'full'`` pads with one less than the filter size on both sides. This
+ is equivalent to computing the convolution wherever the input and the
+ filter overlap by at least one position.
+
+ ``'same'`` pads with half the filter size on both sides (one less on
+ the second side for an even filter size). When ``stride=1``, this
+ results in an output size equal to the input size.
+
+ Returns
+ -------
+ int or None
+ The smallest input size corresponding to the given convolution
+ parameters for the given output size, or ``None`` if `output_size` is
+ ``None``. For a strided convolution, any input size of up to
+ ``stride - 1`` elements larger than returned will still give the same
+ output size.
+
+ Raises
+ ------
+ ValueError
+ When an invalid padding is specified, a `ValueError` is raised.
+
+ Notes
+ -----
+ This can be used to compute the output size of a convolution backward pass,
+ also called transposed convolution, fractionally-strided convolution or
+ (wrongly) deconvolution in the literature.
+ """
+ if output_length is None:
+ return None
+ if pad == 'valid':
+ pad = 0
+ elif pad == 'full':
+ pad = filter_size - 1
+ elif pad == 'same':
+ pad = filter_size // 2
+ if not isinstance(pad, int):
+ raise ValueError('Invalid pad: {0}'.format(pad))
+ return (output_length - 1) * stride - 2 * pad + filter_size
+
+
+class BaseConvLayer(Layer):
+ """
+ lasagne.layers.BaseConvLayer(incoming, num_filters, filter_size,
+ stride=1, pad=0, untie_biases=False,
+ W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
+ nonlinearity=lasagne.nonlinearities.rectify, flip_filters=True,
+ n=None, **kwargs)
+
+ Convolutional layer base class
+
+ Base class for performing an `n`-dimensional convolution on its input,
+ optionally adding a bias and applying an elementwise nonlinearity. Note
+ that this class cannot be used in a Lasagne network, only its subclasses
+ can (e.g., :class:`Conv1DLayer`, :class:`Conv2DLayer`).
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. Must
+ be a tensor of 2+`n` dimensions:
+ ``(batch_size, num_input_channels, <n spatial dimensions>)``.
+
+ num_filters : int
+ The number of learnable convolutional filters this layer has.
+
+ filter_size : int or iterable of int
+ An integer or an `n`-element tuple specifying the size of the filters.
+
+ stride : int or iterable of int
+ An integer or an `n`-element tuple specifying the stride of the
+ convolution operation.
+
+ pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0)
+ By default, the convolution is only computed where the input and the
+ filter fully overlap (a valid convolution). When ``stride=1``, this
+ yields an output that is smaller than the input by ``filter_size - 1``.
+ The `pad` argument allows you to implicitly pad the input with zeros,
+ extending the output size.
+
+ A single integer results in symmetric zero-padding of the given size on
+ all borders, a tuple of `n` integers allows different symmetric padding
+ per dimension.
+
+ ``'full'`` pads with one less than the filter size on both sides. This
+ is equivalent to computing the convolution wherever the input and the
+ filter overlap by at least one position.
+
+ ``'same'`` pads with half the filter size (rounded down) on both sides.
+ When ``stride=1`` this results in an output size equal to the input
+ size. Even filter size is not supported.
+
+ ``'valid'`` is an alias for ``0`` (no padding / a valid convolution).
+
+ Note that ``'full'`` and ``'same'`` can be faster than equivalent
+ integer values due to optimizations by Theano.
+
+ untie_biases : bool (default: False)
+ If ``False``, the layer will have a bias parameter for each channel,
+ which is shared across all positions in this channel. As a result, the
+ `b` attribute will be a vector (1D).
+
+ If ``True``, the layer will have separate bias parameters for each
+ position in each channel. As a result, the `b` attribute will be an
+ `n`-dimensional tensor.
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a tensor of 2+`n` dimensions with shape
+ ``(num_filters, num_input_channels, <n spatial dimensions>)``.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to
+ ``False``. If it is set to ``True``, its shape should be
+ ``(num_filters, <n spatial dimensions>)`` instead.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ flip_filters : bool (default: True)
+ Whether to flip the filters before sliding them over the input,
+ performing a convolution (this is the default), or not to flip them and
+ perform a correlation. Note that for some other convolutional layers in
+ Lasagne, flipping incurs an overhead and is disabled by default --
+ check the documentation when using learned weights from another layer.
+
+ n : int or None
+ The dimensionality of the convolution (i.e., the number of spatial
+ dimensions of each feature map and each convolutional filter). If
+ ``None``, will be inferred from the input shape.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ Attributes
+ ----------
+ W : Theano shared variable or expression
+ Variable or expression representing the filter weights.
+
+ b : Theano shared variable or expression
+ Variable or expression representing the biases.
+ """
+ def __init__(self, incoming, num_filters, filter_size, stride=1, pad=0,
+ untie_biases=False,
+ W=init.GlorotUniform(), b=init.Constant(0.),
+ nonlinearity=nonlinearities.rectify, flip_filters=True,
+ n=None, **kwargs):
+ super(BaseConvLayer, self).__init__(incoming, **kwargs)
+ if nonlinearity is None:
+ self.nonlinearity = nonlinearities.identity
+ else:
+ self.nonlinearity = nonlinearity
+
+ if n is None:
+ n = len(self.input_shape) - 2
+ elif n != len(self.input_shape) - 2:
+ raise ValueError("Tried to create a %dD convolution layer with "
+ "input shape %r. Expected %d input dimensions "
+ "(batchsize, channels, %d spatial dimensions)." %
+ (n, self.input_shape, n+2, n))
+ self.n = n
+ self.num_filters = num_filters
+ self.filter_size = as_tuple(filter_size, n, int)
+ self.flip_filters = flip_filters
+ self.stride = as_tuple(stride, n, int)
+ self.untie_biases = untie_biases
+
+ if pad == 'same':
+ if any(s % 2 == 0 for s in self.filter_size):
+ raise NotImplementedError(
+ '`same` padding requires odd filter size.')
+ if pad == 'valid':
+ self.pad = as_tuple(0, n)
+ elif pad in ('full', 'same'):
+ self.pad = pad
+ else:
+ self.pad = as_tuple(pad, n, int)
+
+ self.W = self.add_param(W, self.get_W_shape(), name="W")
+ if b is None:
+ self.b = None
+ else:
+ if self.untie_biases:
+ biases_shape = (num_filters,) + self.output_shape[2:]
+ else:
+ biases_shape = (num_filters,)
+ self.b = self.add_param(b, biases_shape, name="b",
+ regularizable=False)
+
+ def get_W_shape(self):
+ """Get the shape of the weight matrix `W`.
+
+ Returns
+ -------
+ tuple of int
+ The shape of the weight matrix.
+ """
+ num_input_channels = self.input_shape[1]
+ return (self.num_filters, num_input_channels) + self.filter_size
+
+ def get_output_shape_for(self, input_shape):
+ pad = self.pad if isinstance(self.pad, tuple) else (self.pad,) * self.n
+ batchsize = input_shape[0]
+ return ((batchsize, self.num_filters) +
+ tuple(conv_output_length(input, filter, stride, p)
+ for input, filter, stride, p
+ in zip(input_shape[2:], self.filter_size,
+ self.stride, pad)))
+
+ def get_output_for(self, input, **kwargs):
+ conved = self.convolve(input, **kwargs)
+
+ if self.b is None:
+ activation = conved
+ elif self.untie_biases:
+ activation = conved + T.shape_padleft(self.b, 1)
+ else:
+ activation = conved + self.b.dimshuffle(('x', 0) + ('x',) * self.n)
+
+ return self.nonlinearity(activation)
+
+ def convolve(self, input, **kwargs):
+ """
+ Symbolically convolves `input` with ``self.W``, producing an output of
+ shape ``self.output_shape``. To be implemented by subclasses.
+
+ Parameters
+ ----------
+ input : Theano tensor
+ The input minibatch to convolve
+ **kwargs
+ Any additional keyword arguments from :meth:`get_output_for`
+
+ Returns
+ -------
+ Theano tensor
+ `input` convolved according to the configuration of this layer,
+ without any bias or nonlinearity applied.
+ """
+ raise NotImplementedError("BaseConvLayer does not implement the "
+ "convolve() method. You will want to "
+ "use a subclass such as Conv2DLayer.")
+
+
+class Conv1DLayer(BaseConvLayer):
+ """
+ lasagne.layers.Conv1DLayer(incoming, num_filters, filter_size, stride=1,
+ pad=0, untie_biases=False, W=lasagne.init.GlorotUniform(),
+ b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify,
+ flip_filters=True, convolution=lasagne.theano_extensions.conv.conv1d_mc0,
+ **kwargs)
+
+ 1D convolutional layer
+
+ Performs a 1D convolution on its input and optionally adds a bias and
+ applies an elementwise nonlinearity.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. The
+ output of this layer should be a 3D tensor, with shape
+ ``(batch_size, num_input_channels, input_length)``.
+
+ num_filters : int
+ The number of learnable convolutional filters this layer has.
+
+ filter_size : int or iterable of int
+ An integer or a 1-element tuple specifying the size of the filters.
+
+ stride : int or iterable of int
+ An integer or a 1-element tuple specifying the stride of the
+ convolution operation.
+
+ pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0)
+ By default, the convolution is only computed where the input and the
+ filter fully overlap (a valid convolution). When ``stride=1``, this
+ yields an output that is smaller than the input by ``filter_size - 1``.
+ The `pad` argument allows you to implicitly pad the input with zeros,
+ extending the output size.
+
+ An integer or a 1-element tuple results in symmetric zero-padding of
+ the given size on both borders.
+
+ ``'full'`` pads with one less than the filter size on both sides. This
+ is equivalent to computing the convolution wherever the input and the
+ filter overlap by at least one position.
+
+ ``'same'`` pads with half the filter size (rounded down) on both sides.
+ When ``stride=1`` this results in an output size equal to the input
+ size. Even filter size is not supported.
+
+ ``'valid'`` is an alias for ``0`` (no padding / a valid convolution).
+
+ untie_biases : bool (default: False)
+ If ``False``, the layer will have a bias parameter for each channel,
+ which is shared across all positions in this channel. As a result, the
+ `b` attribute will be a vector (1D).
+
+ If True, the layer will have separate bias parameters for each
+ position in each channel. As a result, the `b` attribute will be a
+ matrix (2D).
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a 3D tensor with shape
+ ``(num_filters, num_input_channels, filter_length)``.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to
+ ``False``. If it is set to ``True``, its shape should be
+ ``(num_filters, input_length)`` instead.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ flip_filters : bool (default: True)
+ Whether to flip the filters before sliding them over the input,
+ performing a convolution (this is the default), or not to flip them and
+ perform a correlation. Note that for some other convolutional layers in
+ Lasagne, flipping incurs an overhead and is disabled by default --
+ check the documentation when using learned weights from another layer.
+
+ convolution : callable
+ The convolution implementation to use. The
+ `lasagne.theano_extensions.conv` module provides some alternative
+ implementations for 1D convolutions, because the Theano API only
+ features a 2D convolution implementation. Usually it should be fine
+ to leave this at the default value. Note that not all implementations
+ support all settings for `pad` and `subsample`.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ Attributes
+ ----------
+ W : Theano shared variable or expression
+ Variable or expression representing the filter weights.
+
+ b : Theano shared variable or expression
+ Variable or expression representing the biases.
+ """
+ def __init__(self, incoming, num_filters, filter_size, stride=1,
+ pad=0, untie_biases=False,
+ W=init.GlorotUniform(), b=init.Constant(0.),
+ nonlinearity=nonlinearities.rectify, flip_filters=True,
+ convolution=conv.conv1d_mc0, **kwargs):
+ super(Conv1DLayer, self).__init__(incoming, num_filters, filter_size,
+ stride, pad, untie_biases, W, b,
+ nonlinearity, flip_filters, n=1,
+ **kwargs)
+ self.convolution = convolution
+
+ def convolve(self, input, **kwargs):
+ border_mode = 'half' if self.pad == 'same' else self.pad
+ conved = self.convolution(input, self.W,
+ self.input_shape, self.get_W_shape(),
+ subsample=self.stride,
+ border_mode=border_mode,
+ filter_flip=self.flip_filters)
+ return conved
+
+
+class Conv2DLayer(BaseConvLayer):
+ """
+ lasagne.layers.Conv2DLayer(incoming, num_filters, filter_size,
+ stride=(1, 1), pad=0, untie_biases=False,
+ W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
+ nonlinearity=lasagne.nonlinearities.rectify, flip_filters=True,
+ convolution=theano.tensor.nnet.conv2d, **kwargs)
+
+ 2D convolutional layer
+
+ Performs a 2D convolution on its input and optionally adds a bias and
+ applies an elementwise nonlinearity.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. The
+ output of this layer should be a 4D tensor, with shape
+ ``(batch_size, num_input_channels, input_rows, input_columns)``.
+
+ num_filters : int
+ The number of learnable convolutional filters this layer has.
+
+ filter_size : int or iterable of int
+ An integer or a 2-element tuple specifying the size of the filters.
+
+ stride : int or iterable of int
+ An integer or a 2-element tuple specifying the stride of the
+ convolution operation.
+
+ pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0)
+ By default, the convolution is only computed where the input and the
+ filter fully overlap (a valid convolution). When ``stride=1``, this
+ yields an output that is smaller than the input by ``filter_size - 1``.
+ The `pad` argument allows you to implicitly pad the input with zeros,
+ extending the output size.
+
+ A single integer results in symmetric zero-padding of the given size on
+ all borders, a tuple of two integers allows different symmetric padding
+ per dimension.
+
+ ``'full'`` pads with one less than the filter size on both sides. This
+ is equivalent to computing the convolution wherever the input and the
+ filter overlap by at least one position.
+
+ ``'same'`` pads with half the filter size (rounded down) on both sides.
+ When ``stride=1`` this results in an output size equal to the input
+ size. Even filter size is not supported.
+
+ ``'valid'`` is an alias for ``0`` (no padding / a valid convolution).
+
+ Note that ``'full'`` and ``'same'`` can be faster than equivalent
+ integer values due to optimizations by Theano.
+
+ untie_biases : bool (default: False)
+ If ``False``, the layer will have a bias parameter for each channel,
+ which is shared across all positions in this channel. As a result, the
+ `b` attribute will be a vector (1D).
+
+ If True, the layer will have separate bias parameters for each
+ position in each channel. As a result, the `b` attribute will be a
+ 3D tensor.
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a 4D tensor with shape
+ ``(num_filters, num_input_channels, filter_rows, filter_columns)``.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to
+ ``False``. If it is set to ``True``, its shape should be
+ ``(num_filters, output_rows, output_columns)`` instead.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ flip_filters : bool (default: True)
+ Whether to flip the filters before sliding them over the input,
+ performing a convolution (this is the default), or not to flip them and
+ perform a correlation. Note that for some other convolutional layers in
+ Lasagne, flipping incurs an overhead and is disabled by default --
+ check the documentation when using learned weights from another layer.
+
+ convolution : callable
+ The convolution implementation to use. Usually it should be fine to
+ leave this at the default value.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ Attributes
+ ----------
+ W : Theano shared variable or expression
+ Variable or expression representing the filter weights.
+
+ b : Theano shared variable or expression
+ Variable or expression representing the biases.
+ """
+ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1),
+ pad=0, untie_biases=False,
+ W=init.GlorotUniform(), b=init.Constant(0.),
+ nonlinearity=nonlinearities.rectify, flip_filters=True,
+ convolution=T.nnet.conv2d, **kwargs):
+ super(Conv2DLayer, self).__init__(incoming, num_filters, filter_size,
+ stride, pad, untie_biases, W, b,
+ nonlinearity, flip_filters, n=2,
+ **kwargs)
+ self.convolution = convolution
+
+ def convolve(self, input, **kwargs):
+ border_mode = 'half' if self.pad == 'same' else self.pad
+ conved = self.convolution(input, self.W,
+ self.input_shape, self.get_W_shape(),
+ subsample=self.stride,
+ border_mode=border_mode,
+ filter_flip=self.flip_filters)
+ return conved
+
+# TODO: add Conv3DLayer
+
+
+class TransposedConv2DLayer(BaseConvLayer):
+ """
+ lasagne.layers.TransposedConv2DLayer(incoming, num_filters, filter_size,
+ stride=(1, 1), crop=0, untie_biases=False,
+ W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
+ nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs)
+
+ 2D transposed convolution layer
+
+ Performs the backward pass of a 2D convolution (also called transposed
+ convolution, fractionally-strided convolution or deconvolution in the
+ literature) on its input and optionally adds a bias and applies an
+ elementwise nonlinearity.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. The
+ output of this layer should be a 4D tensor, with shape
+ ``(batch_size, num_input_channels, input_rows, input_columns)``.
+
+ num_filters : int
+ The number of learnable convolutional filters this layer has.
+
+ filter_size : int or iterable of int
+ An integer or a 2-element tuple specifying the size of the filters.
+
+ stride : int or iterable of int
+ An integer or a 2-element tuple specifying the stride of the
+ transposed convolution operation. For the transposed convolution, this
+ gives the dilation factor for the input -- increasing it increases the
+ output size.
+
+ crop : int, iterable of int, 'full', 'same' or 'valid' (default: 0)
+ By default, the transposed convolution is computed where the input and
+ the filter overlap by at least one position (a full convolution). When
+ ``stride=1``, this yields an output that is larger than the input by
+ ``filter_size - 1``. It can be thought of as a valid convolution padded
+ with zeros. The `crop` argument allows you to decrease the amount of
+ this zero-padding, reducing the output size. It is the counterpart to
+ the `pad` argument in a non-transposed convolution.
+
+ A single integer results in symmetric cropping of the given size on all
+ borders, a tuple of two integers allows different symmetric cropping
+ per dimension.
+
+ ``'full'`` disables zero-padding. It is is equivalent to computing the
+ convolution wherever the input and the filter fully overlap.
+
+ ``'same'`` pads with half the filter size (rounded down) on both sides.
+ When ``stride=1`` this results in an output size equal to the input
+ size. Even filter size is not supported.
+
+ ``'valid'`` is an alias for ``0`` (no cropping / a full convolution).
+
+ Note that ``'full'`` and ``'same'`` can be faster than equivalent
+ integer values due to optimizations by Theano.
+
+ untie_biases : bool (default: False)
+ If ``False``, the layer will have a bias parameter for each channel,
+ which is shared across all positions in this channel. As a result, the
+ `b` attribute will be a vector (1D).
+
+ If True, the layer will have separate bias parameters for each
+ position in each channel. As a result, the `b` attribute will be a
+ 3D tensor.
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a 4D tensor with shape
+ ``(num_input_channels, num_filters, filter_rows, filter_columns)``.
+ Note that the first two dimensions are swapped compared to a
+ non-transposed convolution.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to
+ ``False``. If it is set to ``True``, its shape should be
+ ``(num_filters, output_rows, output_columns)`` instead.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ flip_filters : bool (default: False)
+ Whether to flip the filters before sliding them over the input,
+ performing a convolution, or not to flip them and perform a
+ correlation (this is the default). Note that this flag is inverted
+ compared to a non-transposed convolution.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ Attributes
+ ----------
+ W : Theano shared variable or expression
+ Variable or expression representing the filter weights.
+
+ b : Theano shared variable or expression
+ Variable or expression representing the biases.
+
+ Notes
+ -----
+ The transposed convolution is implemented as the backward pass of a
+ corresponding non-transposed convolution. It can be thought of as dilating
+ the input (by adding ``stride - 1`` zeros between adjacent input elements),
+ padding it with ``filter_size - 1 - crop`` zeros, and cross-correlating it
+ with the filters. See [1]_ for more background.
+
+ Examples
+ --------
+ To transpose an existing convolution, with tied filter weights:
+
+ >>> from lasagne.layers import Conv2DLayer, TransposedConv2DLayer
+ >>> conv = Conv2DLayer((None, 1, 32, 32), 16, 3, stride=2, pad=2)
+ >>> deconv = TransposedConv2DLayer(conv, conv.input_shape[1],
+ ... conv.filter_size, stride=conv.stride, crop=conv.pad,
+ ... W=conv.W, flip_filters=not conv.flip_filters)
+
+ References
+ ----------
+ .. [1] Vincent Dumoulin, Francesco Visin (2016):
+ A guide to convolution arithmetic for deep learning. arXiv.
+ http://arxiv.org/abs/1603.07285,
+ https://github.com/vdumoulin/conv_arithmetic
+ """
+ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1),
+ crop=0, untie_biases=False,
+ W=init.GlorotUniform(), b=init.Constant(0.),
+ nonlinearity=nonlinearities.rectify, flip_filters=False,
+ **kwargs):
+ super(TransposedConv2DLayer, self).__init__(
+ incoming, num_filters, filter_size, stride, crop, untie_biases,
+ W, b, nonlinearity, flip_filters, n=2, **kwargs)
+ # rename self.pad to self.crop:
+ self.crop = self.pad
+ del self.pad
+
+ def get_W_shape(self):
+ num_input_channels = self.input_shape[1]
+ # first two sizes are swapped compared to a forward convolution
+ return (num_input_channels, self.num_filters) + self.filter_size
+
+ def get_output_shape_for(self, input_shape):
+ # when called from the constructor, self.crop is still called self.pad:
+ crop = getattr(self, 'crop', getattr(self, 'pad', None))
+ crop = crop if isinstance(crop, tuple) else (crop,) * self.n
+ batchsize = input_shape[0]
+ return ((batchsize, self.num_filters) +
+ tuple(conv_input_length(input, filter, stride, p)
+ for input, filter, stride, p
+ in zip(input_shape[2:], self.filter_size,
+ self.stride, crop)))
+
+ def convolve(self, input, **kwargs):
+ border_mode = 'half' if self.crop == 'same' else self.crop
+ op = T.nnet.abstract_conv.AbstractConv2d_gradInputs(
+ imshp=self.output_shape,
+ kshp=self.get_W_shape(),
+ subsample=self.stride, border_mode=border_mode,
+ filter_flip=not self.flip_filters)
+ output_size = self.output_shape[2:]
+ if any(s is None for s in output_size):
+ output_size = self.get_output_shape_for(input.shape)[2:]
+ conved = op(self.W, input, output_size)
+ return conved
+
+Deconv2DLayer = TransposedConv2DLayer
+
+
+class DilatedConv2DLayer(BaseConvLayer):
+ """
+ lasagne.layers.DilatedConv2DLayer(incoming, num_filters, filter_size,
+ dilation=(1, 1), pad=0, untie_biases=False,
+ W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
+ nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False, **kwargs)
+
+ 2D dilated convolution layer
+
+ Performs a 2D convolution with dilated filters, then optionally adds a bias
+ and applies an elementwise nonlinearity.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. The
+ output of this layer should be a 4D tensor, with shape
+ ``(batch_size, num_input_channels, input_rows, input_columns)``.
+
+ num_filters : int
+ The number of learnable convolutional filters this layer has.
+
+ filter_size : int or iterable of int
+ An integer or a 2-element tuple specifying the size of the filters.
+
+ dilation : int or iterable of int
+ An integer or a 2-element tuple specifying the dilation factor of the
+ filters. A factor of :math:`x` corresponds to :math:`x - 1` zeros
+ inserted between adjacent filter elements.
+
+ pad : int, iterable of int, or 'valid' (default: 0)
+ The amount of implicit zero padding of the input.
+ This implementation does not support padding, the argument is provided
+ for compatibility to other convolutional layers only.
+
+ untie_biases : bool (default: False)
+ If ``False``, the layer will have a bias parameter for each channel,
+ which is shared across all positions in this channel. As a result, the
+ `b` attribute will be a vector (1D).
+
+ If True, the layer will have separate bias parameters for each
+ position in each channel. As a result, the `b` attribute will be a
+ 3D tensor.
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a 4D tensor with shape
+ ``(num_input_channels, num_filters, filter_rows, filter_columns)``.
+ Note that the first two dimensions are swapped compared to a
+ non-dilated convolution.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to
+ ``False``. If it is set to ``True``, its shape should be
+ ``(num_filters, output_rows, output_columns)`` instead.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ flip_filters : bool (default: False)
+ Whether to flip the filters before sliding them over the input,
+ performing a convolution, or not to flip them and perform a
+ correlation (this is the default).
+ This implementation does not support flipped filters, the argument is
+ provided for compatibility to other convolutional layers only.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ Attributes
+ ----------
+ W : Theano shared variable or expression
+ Variable or expression representing the filter weights.
+
+ b : Theano shared variable or expression
+ Variable or expression representing the biases.
+
+ Notes
+ -----
+ The dilated convolution is implemented as the backward pass of a
+ convolution wrt. weights, passing the filters as the output gradient.
+ It can be thought of as dilating the filters (by adding ``dilation - 1``
+ zeros between adjacent filter elements) and cross-correlating them with the
+ input. See [1]_ for more background.
+
+ References
+ ----------
+ .. [1] Fisher Yu, Vladlen Koltun (2016),
+ Multi-Scale Context Aggregation by Dilated Convolutions. ICLR 2016.
+ http://arxiv.org/abs/1511.07122, https://github.com/fyu/dilation
+ """
+ def __init__(self, incoming, num_filters, filter_size, dilation=(1, 1),
+ pad=0, untie_biases=False,
+ W=init.GlorotUniform(), b=init.Constant(0.),
+ nonlinearity=nonlinearities.rectify, flip_filters=False,
+ **kwargs):
+ self.dilation = as_tuple(dilation, 2, int)
+ super(DilatedConv2DLayer, self).__init__(
+ incoming, num_filters, filter_size, 1, pad,
+ untie_biases, W, b, nonlinearity, flip_filters, n=2, **kwargs)
+ # remove self.stride:
+ del self.stride
+ # require valid convolution
+ if self.pad != (0, 0):
+ raise NotImplementedError(
+ "DilatedConv2DLayer requires pad=0 / (0,0) / 'valid', but "
+ "got %r. For a padded dilated convolution, add a PadLayer."
+ % (pad,))
+ # require unflipped filters
+ if self.flip_filters:
+ raise NotImplementedError(
+ "DilatedConv2DLayer requires flip_filters=False.")
+
+ def get_W_shape(self):
+ num_input_channels = self.input_shape[1]
+ # first two sizes are swapped compared to a forward convolution
+ return (num_input_channels, self.num_filters) + self.filter_size
+
+ def get_output_shape_for(self, input_shape):
+ batchsize = input_shape[0]
+ return ((batchsize, self.num_filters) +
+ tuple(conv_output_length(input, (filter-1) * dilate + 1, 1, 0)
+ for input, filter, dilate
+ in zip(input_shape[2:], self.filter_size,
+ self.dilation)))
+
+ def convolve(self, input, **kwargs):
+ # we perform a convolution backward pass wrt weights,
+ # passing kernels as output gradient
+ imshp = self.input_shape
+ kshp = self.output_shape
+ # and swapping channels and batchsize
+ imshp = (imshp[1], imshp[0]) + imshp[2:]
+ kshp = (kshp[1], kshp[0]) + kshp[2:]
+ op = T.nnet.abstract_conv.AbstractConv2d_gradWeights(
+ imshp=imshp, kshp=kshp,
+ subsample=self.dilation, border_mode='valid',
+ filter_flip=False)
+ output_size = self.output_shape[2:]
+ if any(s is None for s in output_size):
+ output_size = self.get_output_shape_for(input.shape)[2:]
+ conved = op(input.transpose(1, 0, 2, 3), self.W, output_size)
+ return conved.transpose(1, 0, 2, 3)
diff --git a/lasagne/layers/corrmm.py b/lasagne/layers/corrmm.py
new file mode 100644
index 0000000..e487397
--- /dev/null
+++ b/lasagne/layers/corrmm.py
@@ -0,0 +1,147 @@
+import theano
+
+from .. import init
+from .. import nonlinearities
+
+from .base import Layer
+
+from .conv import conv_output_length, BaseConvLayer
+from ..utils import as_tuple
+
+from theano.sandbox.cuda.basic_ops import gpu_contiguous
+from theano.sandbox.cuda.blas import GpuCorrMM
+
+
+__all__ = [
+ "Conv2DMMLayer",
+]
+
+
+if not theano.sandbox.cuda.cuda_enabled:
+ raise ImportError(
+ "requires GPU support -- see http://lasagne.readthedocs.org/en/"
+ "latest/user/installation.html#gpu-support") # pragma: no cover
+
+
+class Conv2DMMLayer(BaseConvLayer):
+ """
+ lasagne.layers.Conv2DMMLayer(incoming, num_filters, filter_size,
+ stride=(1, 1), pad=0, untie_biases=False,
+ W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
+ nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False,
+ **kwargs)
+
+ 2D convolutional layer
+
+ Performs a 2D convolution on its input and optionally adds a bias and
+ applies an elementwise nonlinearity. This is an alternative implementation
+ which uses ``theano.sandbox.cuda.blas.GpuCorrMM`` directly.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. The
+ output of this layer should be a 4D tensor, with shape
+ ``(batch_size, num_input_channels, input_rows, input_columns)``.
+
+ num_filters : int
+ The number of learnable convolutional filters this layer has.
+
+ filter_size : int or iterable of int
+ An integer or a 2-element tuple specifying the size of the filters.
+
+ stride : int or iterable of int
+ An integer or a 2-element tuple specifying the stride of the
+ convolution operation.
+
+ pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0)
+ By default, the convolution is only computed where the input and the
+ filter fully overlap (a valid convolution). When ``stride=1``, this
+ yields an output that is smaller than the input by ``filter_size - 1``.
+ The `pad` argument allows you to implicitly pad the input with zeros,
+ extending the output size.
+
+ A single integer results in symmetric zero-padding of the given size on
+ all borders, a tuple of two integers allows different symmetric padding
+ per dimension.
+
+ ``'full'`` pads with one less than the filter size on both sides. This
+ is equivalent to computing the convolution wherever the input and the
+ filter overlap by at least one position.
+
+ ``'same'`` pads with half the filter size (rounded down) on both sides.
+ When ``stride=1`` this results in an output size equal to the input
+ size. Even filter size is not supported.
+
+ ``'valid'`` is an alias for ``0`` (no padding / a valid convolution).
+
+ Note that ``'full'`` and ``'same'`` can be faster than equivalent
+ integer values due to optimizations by Theano.
+
+ untie_biases : bool (default: False)
+ If ``False``, the layer will have a bias parameter for each channel,
+ which is shared across all positions in this channel. As a result, the
+ `b` attribute will be a vector (1D).
+
+ If True, the layer will have separate bias parameters for each
+ position in each channel. As a result, the `b` attribute will be a
+ 3D tensor.
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a 4D tensor with shape
+ ``(num_filters, num_input_channels, filter_rows, filter_columns)``.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to
+ ``False``. If it is set to ``True``, its shape should be
+ ``(num_filters, output_rows, output_columns)`` instead.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ flip_filters : bool (default: False)
+ Whether to flip the filters and perform a convolution, or not to flip
+ them and perform a correlation. Flipping adds a bit of overhead, so it
+ is disabled by default. In most cases this does not make a difference
+ anyway because the filters are learnt. However, ``flip_filters`` should
+ be set to ``True`` if weights are loaded into it that were learnt using
+ a regular :class:`lasagne.layers.Conv2DLayer`, for example.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ Attributes
+ ----------
+ W : Theano shared variable
+ Variable representing the filter weights.
+
+ b : Theano shared variable
+ Variable representing the biases.
+ """
+ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1),
+ pad=0, untie_biases=False, W=init.GlorotUniform(),
+ b=init.Constant(0.), nonlinearity=nonlinearities.rectify,
+ flip_filters=False, **kwargs):
+ super(Conv2DMMLayer, self).__init__(incoming, num_filters, filter_size,
+ stride, pad, untie_biases, W, b,
+ nonlinearity, flip_filters, n=2,
+ **kwargs)
+ border_mode = 'half' if self.pad == 'same' else self.pad
+ self.corr_mm_op = GpuCorrMM(subsample=self.stride,
+ border_mode=border_mode)
+
+ def convolve(self, input, **kwargs):
+ filters = self.W
+ if self.flip_filters:
+ filters = filters[:, :, ::-1, ::-1] # flip top-down, left-right
+
+ contiguous_filters = gpu_contiguous(filters)
+ contiguous_input = gpu_contiguous(input)
+ conved = self.corr_mm_op(contiguous_input, contiguous_filters)
+ return conved
diff --git a/lasagne/layers/cuda_convnet.py b/lasagne/layers/cuda_convnet.py
new file mode 100644
index 0000000..092730c
--- /dev/null
+++ b/lasagne/layers/cuda_convnet.py
@@ -0,0 +1,634 @@
+import numpy as np
+import theano
+import theano.tensor as T
+
+from .. import init
+from .. import nonlinearities
+
+from .base import Layer
+
+from .conv import conv_output_length, BaseConvLayer
+from .pool import pool_output_length
+from ..utils import as_tuple
+
+from theano.sandbox.cuda.basic_ops import gpu_contiguous
+from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
+
+__all__ = [
+ "Conv2DCCLayer",
+ "MaxPool2DCCLayer",
+ "ShuffleBC01ToC01BLayer",
+ "bc01_to_c01b",
+ "ShuffleC01BToBC01Layer",
+ "c01b_to_bc01",
+ "NINLayer_c01b",
+]
+
+
+if not theano.sandbox.cuda.cuda_enabled:
+ raise ImportError(
+ "requires GPU support -- see http://lasagne.readthedocs.org/en/"
+ "latest/user/installation.html#gpu-support") # pragma: no cover
+
+
+class Conv2DCCLayer(BaseConvLayer):
+ """
+ lasagne.layers.Conv2DCCLayer(incoming, num_filters, filter_size,
+ stride=(1, 1), pad=0, untie_biases=False, W=None,
+ b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify,
+ dimshuffle=True, flip_filters=False, partial_sum=1, **kwargs)
+
+ 2D convolutional layer
+
+ Performs a 2D convolution on its input and optionally adds a bias and
+ applies an elementwise nonlinearity. This is an alternative implementation
+ which uses the cuda-convnet wrappers from pylearn2:
+ ``pylearn2.sandbox.cuda_convnet.filter_acts.FilterActs``.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. This
+ layer expects a 4D tensor as its input, with shape
+ ``(batch_size, num_input_channels, input_rows, input_columns)``.
+ If automatic dimshuffling is disabled (see notes), the shape should be
+ ``(num_input_channels, input_rows, input_columns, batch_size)``
+ instead (c01b axis order).
+
+ num_filters : int
+ The number of learnable convolutional filters this layer has.
+
+ filter_size : int or iterable of int
+ An integer or a 2-element tuple specifying the size of the filters.
+ This layer does not support non-square filters.
+
+ stride : int or iterable of int
+ An integer or a 2-element tuple specifying the stride of the
+ convolution operation. This layer does not support using different
+ strides along both axes.
+
+ pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0)
+ By default, the convolution is only computed where the input and the
+ filter fully overlap (a valid convolution). When ``stride=1``, this
+ yields an output that is smaller than the input by ``filter_size - 1``.
+ The `pad` argument allows you to implicitly pad the input with zeros,
+ extending the output size.
+
+ A single integer results in symmetric zero-padding of the given size on
+ all borders. This layer does not support using different amounts of
+ padding along both axes, but for compatibility to other layers you can
+ still specify the padding as a tuple of two same-valued integers.
+
+ ``'full'`` pads with one less than the filter size on both sides. This
+ is equivalent to computing the convolution wherever the input and the
+ filter overlap by at least one position.
+
+ ``'same'`` pads with half the filter size (rounded down) on both sides.
+ When ``stride=1`` this results in an output size equal to the input
+ size. Even filter size is not supported.
+
+ ``'valid'`` is an alias for ``0`` (no padding / a valid convolution).
+
+ Note that ``'full'`` and ``'same'`` can be faster than equivalent
+ integer values due to optimizations by Theano.
+
+ untie_biases : bool (default: False)
+ If ``False``, the layer will have a bias parameter for each channel,
+ which is shared across all positions in this channel. As a result, the
+ `b` attribute will be a vector (1D).
+
+ If ``True``, the layer will have separate bias parameters for each
+ position in each channel. As a result, the `b` attribute will be a
+ 3D tensor.
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a 4D tensor with shape
+ ``(num_filters, num_input_channels, filter_rows, filter_columns)``.
+ If automatic dimshuffling is disabled (see notes), the shape should be
+ ``(num_input_channels, input_rows, input_columns, num_filters)``
+ instead (c01b axis order).
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to
+ ``False``. If it is set to ``True``, its shape should be
+ ``(num_filters, output_rows, output_columns)`` instead.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ dimshuffle : bool (default: True)
+ If ``True``, the layer will automatically apply the necessary
+ dimshuffle operations to deal with the fact that the cuda-convnet
+ implementation uses c01b (batch-size-last) axis order instead of bc01
+ (batch-size-first), which is the Lasagne/Theano default. This makes the
+ layer interoperable with other Lasagne layers.
+
+ If ``False``, this automatic dimshuffling is disabled and the layer
+ will expect its input and parameters to have c01b axis order. It is up
+ to the user to ensure this. :class:`ShuffleBC01ToC01BLayer` and
+ :class:`ShuffleC01BToBC01Layer` can be used to convert between bc01 and
+ c01b axis order.
+
+ flip_filters : bool (default: False)
+ Whether to flip the filters and perform a convolution, or not to flip
+ them and perform a correlation. Flipping adds a bit of overhead, so it
+ is disabled by default. In most cases this does not make a difference
+ anyway because the filters are learnt. However, ``flip_filters`` should
+ be set to ``True`` if weights are loaded into it that were learnt using
+ a regular :class:`lasagne.layers.Conv2DLayer`, for example.
+
+ partial_sum : int or None (default: 1)
+ This value tunes the trade-off between memory usage and performance.
+ You can specify any positive integer that is a divisor of the output
+ feature map size (i.e. output rows times output columns). Higher
+ values decrease memory usage, but also performance. Specifying 0 or
+ ``None`` means the highest possible value will be used. The Lasagne
+ default of ``1`` gives the best performance, but also the highest
+ memory usage.
+
+ More information about this parameter can be found in the
+ `cuda-convnet documentation
+ <https://code.google.com/p/cuda-convnet/wiki/LayerParams>`_.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ Attributes
+ ----------
+ W : Theano shared variable or expression
+ Variable or expression representing the filter weights.
+
+ b : Theano shared variable or expression
+ Variable or expression representing the biases.
+
+ Notes
+ -----
+ The cuda-convnet convolution implementation has several limitations:
+
+ * only square filters are supported.
+ * only identical strides in the horizontal and vertical direction are
+ supported.
+ * the number of filters must be a multiple of 16.
+ * the number of input channels must be even, or less than or equal to
+ 3.
+ * if the gradient w.r.t. the input is to be computed, the number of
+ channels must be divisible by 4.
+ * performance is optimal when the batch size is a multiple of 128 (but
+ other batch sizes are supported).
+ * this layer only works on the GPU.
+
+ The cuda-convnet convolution implementation uses c01b (batch-size-last)
+ axis order by default. The Theano/Lasagne default is bc01
+ (batch-size-first). This layer automatically adds the necessary dimshuffle
+ operations for the input and the parameters so that it is interoperable
+ with other layers that assume bc01 axis order. However, these additional
+ dimshuffle operations may sometimes negatively affect performance. For this
+ reason, it is possible to disable them by setting ``dimshuffle=False``. In
+ this case, the user is expected to manually ensure that the input and
+ parameters have the correct axis order. :class:`ShuffleBC01ToC01BLayer` and
+ :class:`ShuffleC01BToBC01Layer` can be used to convert between bc01 and
+ c01b axis order.
+ """
+ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1),
+ pad=0, untie_biases=False, W=None,
+ b=init.Constant(0.), nonlinearity=nonlinearities.rectify,
+ dimshuffle=True, flip_filters=False, partial_sum=1,
+ **kwargs):
+ if W is None:
+ if dimshuffle:
+ W = init.GlorotUniform()
+ else:
+ W = init.GlorotUniform(c01b=True)
+ self.dimshuffle = dimshuffle
+
+ super(Conv2DCCLayer, self).__init__(incoming, num_filters, filter_size,
+ stride, pad, untie_biases, W, b,
+ nonlinearity, flip_filters, n=2,
+ **kwargs)
+ self.partial_sum = partial_sum
+
+ if self.filter_size[0] != self.filter_size[1]:
+ raise RuntimeError("Conv2DCCLayer only supports square filters, "
+ "but filter_size=(%d, %d)" % filter_size)
+
+ if self.stride[0] != self.stride[1]:
+ raise RuntimeError("Conv2DCCLayer only supports square strides, "
+ "but stride=(%d, %d)" % stride)
+
+ if self.num_filters % 16 != 0:
+ raise RuntimeError("Conv2DCCLayer requires num_filters to be a "
+ "multiple of 16, but num_filters is "
+ "%d" % num_filters)
+
+ if not (self.num_input_channels < 4 or
+ self.num_input_channels % 4 == 0):
+ raise RuntimeError("Conv2DCCLayer requires the number of input "
+ "channels to be 1, 2, 3 or a multiple of 4, "
+ "but it is %d" % self.num_input_channels)
+
+ if isinstance(self.pad, tuple):
+ if self.pad[0] != self.pad[1]:
+ raise RuntimeError("Conv2DCCLayer only supports square "
+ "padding, but pad=(%d, %d)" % pad)
+ pad = self.pad[0]
+ elif self.pad == 'same':
+ pad = self.filter_size[0] // 2
+ elif self.pad == 'full':
+ pad = self.filter_size[0] - 1
+
+ if not self.dimshuffle and self.untie_biases and self.b is not None:
+ del self.params[self.b]
+ biases_shape = (num_filters, self.output_shape[1],
+ self.output_shape[2])
+ self.b = self.add_param(b, biases_shape, name="b",
+ regularizable=False)
+
+ self.filter_acts_op = FilterActs(stride=self.stride[0],
+ partial_sum=self.partial_sum,
+ pad=pad)
+
+ @property
+ def num_input_channels(self):
+ if self.dimshuffle:
+ return self.input_shape[1]
+ else:
+ return self.input_shape[0]
+
+ def get_W_shape(self):
+ if self.dimshuffle:
+ return super(Conv2DCCLayer, self).get_W_shape()
+ else:
+ return ((self.num_input_channels,) +
+ self.filter_size +
+ (self.num_filters,))
+
+ def get_output_shape_for(self, input_shape):
+ if not self.dimshuffle:
+ # c01b to bc01
+ input_shape = (input_shape[3], input_shape[0],
+ input_shape[1], input_shape[2])
+ shape = super(Conv2DCCLayer, self).get_output_shape_for(input_shape)
+ if not self.dimshuffle:
+ # bc01 to c01b
+ shape = (shape[1], shape[2], shape[3], shape[0])
+ return shape
+
+ def get_output_for(self, input, **kwargs):
+ if self.dimshuffle:
+ filters = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b
+ input = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b
+ else:
+ filters = self.W
+
+ if self.flip_filters:
+ filters = filters[:, ::-1, ::-1, :] # flip top-down, left-right
+
+ contiguous_filters = gpu_contiguous(filters)
+ contiguous_input = gpu_contiguous(input)
+ conved = self.filter_acts_op(contiguous_input, contiguous_filters)
+
+ if self.stride != 1:
+ # cuda-convnet calculates a non-standard strided output shape,
+ # so we need to truncate the output in this case
+ pad = self.pad if isinstance(self.pad, tuple) else (self.pad,) * 2
+ true_rows = conv_output_length(input.shape[1],
+ self.filter_size[0],
+ self.stride[0],
+ pad[0])
+ true_columns = conv_output_length(input.shape[2],
+ self.filter_size[1],
+ self.stride[1],
+ pad[1])
+ conved = conved[:, :true_rows, :true_columns, :]
+
+ if self.b is not None:
+ if self.untie_biases:
+ biases = self.b.dimshuffle(0, 1, 2, 'x') # c01 to c01b
+ else:
+ biases = self.b.dimshuffle(0, 'x', 'x', 'x') # c to c01b
+ conved += biases
+
+ conved = self.nonlinearity(conved)
+
+ if self.dimshuffle:
+ return conved.dimshuffle(3, 0, 1, 2) # c01b to bc01
+ else:
+ return conved
+
+
+class MaxPool2DCCLayer(Layer):
+ """
+ 2D max-pooling layer
+
+ Performs 2D max-pooling over the two trailing axes of a 4D input tensor
+ (or over axis 1 and 2 if ``dimshuffle=False``, see notes). This is an
+ alternative implementation which uses the cuda-convnet wrappers from
+ pylearn2: ``pylearn2.sandbox.cuda_convnet.pool.MaxPool``.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_size : integer or iterable
+ The length of the pooling region in each dimension. If an integer, it
+ is promoted to a square pooling region. If an iterable, it should have
+ two elements. This layer does not support non-square pooling regions.
+
+ stride : integer, iterable or ``None``
+ The strides between sucessive pooling regions in each dimension.
+ If ``None`` then ``stride = pool_size``. This layer does not support
+ using different strides along both axes.
+
+ pad : integer or iterable (default: 0)
+ This implementation does not support custom padding, so this argument
+ must always be set to ``0``. It exists only to make sure the
+ interface is compatible with :class:`lasagne.layers.MaxPool2DLayer`.
+
+ ignore_border : bool (default: False)
+ This implementation always includes partial pooling regions, so this
+ argument must always be set to False. It exists only to make sure the
+ interface is compatible with :class:`lasagne.layers.MaxPool2DLayer`.
+
+ dimshuffle : bool (default: True)
+ If ``True``, the layer will automatically apply the necessary
+ dimshuffle operations to deal with the fact that the cuda-convnet
+ implementation uses c01b (batch-size-last) axis order instead of bc01
+ (batch-size-first), which is the Lasagne/Theano default. This makes the
+ layer interoperable with other Lasagne layers.
+
+ If ``False``, this automatic dimshuffling is disabled and the layer
+ will expect its input and parameters to have c01b axis order. It is up
+ to the user to ensure this. :class:`ShuffleBC01ToC01BLayer` and
+ :class:`ShuffleC01BToBC01Layer` can be used to convert between bc01 and
+ c01b axis order.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ Notes
+ -----
+ The cuda-convnet max-pooling implementation has several limitations:
+
+ * only square pooling regions are supported.
+ * only identical strides in the horizontal and vertical direction are
+ supported.
+ * only square inputs are supported. (This limitation does not exist for
+ the convolution implementation.)
+ * partial pooling regions are always included (``ignore_border`` is forced
+ to ``False``).
+ * custom padding is not supported (``pad`` is forced to ``0``).
+ * this layer only works on the GPU.
+
+ The cuda-convnet pooling implementation uses c01b (batch-size-last)
+ axis order by default. The Theano/Lasagne default is bc01
+ (batch-size-first). This layer automatically adds the necessary dimshuffle
+ operations for the input and the parameters so that it is interoperable
+ with other layers that assume bc01 axis order. However, these additional
+ dimshuffle operations may sometimes negatively affect performance. For this
+ reason, it is possible to disable them by setting ``dimshuffle=False``. In
+ this case, the user is expected to manually ensure that the input and
+ parameters have the correct axis order. :class:`ShuffleBC01ToC01BLayer` and
+ :class:`ShuffleC01BToBC01Layer` can be used to convert between bc01 and
+ c01b axis order.
+ """
+ def __init__(self, incoming, pool_size, stride=None, ignore_border=False,
+ dimshuffle=True, **kwargs):
+ from pylearn2.sandbox.cuda_convnet.pool import MaxPool
+
+ if 'pad' in kwargs:
+ pad = kwargs.pop('pad')
+ if as_tuple(pad, 2) != (0, 0):
+ raise NotImplementedError("MaxPool2DCCLayer does not "
+ "support padding")
+
+ super(MaxPool2DCCLayer, self).__init__(incoming, **kwargs)
+
+ pool_size = as_tuple(pool_size, 2)
+
+ if pool_size[0] != pool_size[1]:
+ raise NotImplementedError("MaxPool2DCCLayer only supports square "
+ "pooling regions, but pool_size=(%d, %d)"
+ % pool_size)
+
+ self.pool_size = pool_size[0]
+
+ if stride is None:
+ self.stride = self.pool_size
+ else:
+ stride = as_tuple(stride, 2)
+ if stride[0] != stride[1]:
+ raise NotImplementedError("MaxPool2DCCLayer only supports "
+ "using the same stride in both "
+ "directions but stride=(%d, %d)"
+ % stride)
+ self.stride = stride[0]
+
+ if self.stride > self.pool_size:
+ raise NotImplementedError("MaxPool2DCCLayer only supports "
+ "stride <= pool_size.")
+
+ # The ignore_border argument is for compatibility with MaxPool2DLayer.
+ # ignore_border=True is not supported. Borders are never ignored.
+ if ignore_border:
+ raise NotImplementedError("MaxPool2DCCLayer does not support "
+ "ignore_border=True.")
+
+ self.dimshuffle = dimshuffle
+
+ self.pool_op = MaxPool(ds=self.pool_size, stride=self.stride)
+
+ def get_output_shape_for(self, input_shape):
+ if self.dimshuffle:
+ batch_size = input_shape[0]
+ num_input_channels = input_shape[1]
+ input_rows, input_columns = input_shape[2:4]
+ else:
+ batch_size = input_shape[3]
+ num_input_channels = input_shape[0]
+ input_rows, input_columns = input_shape[1:3]
+
+ output_rows = pool_output_length(input_rows,
+ pool_size=self.pool_size,
+ stride=self.stride,
+ pad=0,
+ ignore_border=False,
+ )
+ output_columns = pool_output_length(input_columns,
+ pool_size=self.pool_size,
+ stride=self.stride,
+ pad=0,
+ ignore_border=False,
+ )
+
+ if self.dimshuffle:
+ return (batch_size, num_input_channels, output_rows,
+ output_columns)
+ else:
+ return (num_input_channels, output_rows, output_columns,
+ batch_size)
+
+ def get_output_for(self, input, **kwargs):
+ if self.dimshuffle:
+ input = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b
+
+ contiguous_input = gpu_contiguous(input)
+ pooled = self.pool_op(contiguous_input)
+
+ if self.dimshuffle:
+ return pooled.dimshuffle(3, 0, 1, 2) # c01b to bc01
+ else:
+ return pooled
+
+
+# Helper classes for switching between bc01 and c01b input formats
+
+class ShuffleBC01ToC01BLayer(Layer):
+ """
+ shuffle 4D input from bc01 (batch-size-first) order to c01b
+ (batch-size-last) order.
+
+ This layer can be used for interoperability between c01b and bc01 layers.
+ For example, :class:`MaxPool2DCCLayer` and :class:`Conv2DCCLayer` operate
+ in c01b mode when they are created with ``dimshuffle=False``.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+ """
+ def get_output_shape_for(self, input_shape):
+ return (input_shape[1], input_shape[2], input_shape[3], input_shape[0])
+
+ def get_output_for(self, input, **kwargs):
+ return input.dimshuffle(1, 2, 3, 0)
+
+bc01_to_c01b = ShuffleBC01ToC01BLayer # shortcut
+
+
+class ShuffleC01BToBC01Layer(Layer):
+ """
+ shuffle 4D input from c01b (batch-size-last) order to bc01
+ (batch-size-first) order.
+
+ This layer can be used for interoperability between c01b and bc01 layers.
+ For example, :class:`MaxPool2DCCLayer` and :class:`Conv2DCCLayer` operate
+ in c01b mode when they are created with ``dimshuffle=False``.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+ """
+ def get_output_shape_for(self, input_shape):
+ return (input_shape[3], input_shape[0], input_shape[1], input_shape[2])
+
+ def get_output_for(self, input, **kwargs):
+ return input.dimshuffle(3, 0, 1, 2)
+
+c01b_to_bc01 = ShuffleC01BToBC01Layer # shortcut
+
+
+# c01b versions of other Layer classes
+
+class NINLayer_c01b(Layer):
+ """
+ lasagne.layers.NINLayer_c01b(incoming, num_units, untie_biases=False,
+ W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
+ nonlinearity=lasagne.nonlinearities.rectify, **kwargs)
+
+ Network-in-network layer with c01b axis ordering.
+
+ This is a c01b version of :class:`lasagne.layers.NINLayer`.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ num_units : int
+ The number of units of the layer
+
+ untie_biases : bool
+ If ``False``, the network has a single bias vector similar to a dense
+ layer. If ``True``, a separate bias vector is used for each spatial
+ position.
+
+ W : Theano shared variable, numpy array or callable
+ An initializer for the weights of the layer. If a shared variable or a
+ numpy array is provided the shape should be
+ (num_units, num_input_channels).
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, numpy array, callable or None
+ An initializer for the biases of the layer. If a shared variable or a
+ numpy array is provided the correct shape is determined by the
+ untie_biases setting. If untie_biases is ``False``, then the shape
+ should be ``(num_units,)``. If untie_biases is ``True`` then the shape
+ should be ``(num_units, rows, columns)``. If ``None`` is provided the
+ layer will have no biases.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+ """
+ def __init__(self, incoming, num_units, untie_biases=False,
+ W=init.GlorotUniform(), b=init.Constant(0.),
+ nonlinearity=nonlinearities.rectify, **kwargs):
+ super(NINLayer_c01b, self).__init__(incoming, **kwargs)
+ if nonlinearity is None:
+ self.nonlinearity = nonlinearities.identity
+ else:
+ self.nonlinearity = nonlinearity
+
+ self.num_units = num_units
+ self.untie_biases = untie_biases
+
+ num_input_channels = self.input_shape[0]
+
+ self.W = self.add_param(W, (num_units, num_input_channels), name="W")
+ if b is None:
+ self.b = None
+ else:
+ if self.untie_biases:
+ biases_shape = (num_units,) + self.output_shape[1:-1]
+ else:
+ biases_shape = (num_units,)
+ self.b = self.add_param(b, biases_shape, name="b",
+ regularizable=False)
+
+ def get_output_shape_for(self, input_shape):
+ return (self.num_units,) + input_shape[1:]
+
+ def get_output_for(self, input, **kwargs):
+ # fc * c01b... = f01b...
+ out = T.tensordot(self.W, input, axes=[[1], [0]])
+
+ if self.b is None:
+ activation = out
+ else:
+ if self.untie_biases:
+ bias_axes = range(input.ndim - 1) + ['x']
+ else:
+ bias_axes = [0] + (['x'] * (input.ndim - 1))
+ b_shuffled = self.b.dimshuffle(bias_axes)
+ activation = out + b_shuffled
+
+ return self.nonlinearity(activation)
diff --git a/lasagne/layers/dense.py b/lasagne/layers/dense.py
new file mode 100644
index 0000000..2aaf206
--- /dev/null
+++ b/lasagne/layers/dense.py
@@ -0,0 +1,192 @@
+import numpy as np
+import theano.tensor as T
+
+from .. import init
+from .. import nonlinearities
+
+from .base import Layer
+
+
+__all__ = [
+ "DenseLayer",
+ "NINLayer",
+]
+
+
+class DenseLayer(Layer):
+ """
+ lasagne.layers.DenseLayer(incoming, num_units,
+ W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
+ nonlinearity=lasagne.nonlinearities.rectify, **kwargs)
+
+ A fully connected layer.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ num_units : int
+ The number of units of the layer
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a matrix with shape ``(num_inputs, num_units)``.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_units,)``.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer, DenseLayer
+ >>> l_in = InputLayer((100, 20))
+ >>> l1 = DenseLayer(l_in, num_units=50)
+
+ Notes
+ -----
+ If the input to this layer has more than two axes, it will flatten the
+ trailing axes. This is useful for when a dense layer follows a
+ convolutional layer, for example. It is not necessary to insert a
+ :class:`FlattenLayer` in this case.
+ """
+ def __init__(self, incoming, num_units, W=init.GlorotUniform(),
+ b=init.Constant(0.), nonlinearity=nonlinearities.rectify,
+ **kwargs):
+ super(DenseLayer, self).__init__(incoming, **kwargs)
+ self.nonlinearity = (nonlinearities.identity if nonlinearity is None
+ else nonlinearity)
+
+ self.num_units = num_units
+
+ num_inputs = int(np.prod(self.input_shape[1:]))
+
+ self.W = self.add_param(W, (num_inputs, num_units), name="W")
+ if b is None:
+ self.b = None
+ else:
+ self.b = self.add_param(b, (num_units,), name="b",
+ regularizable=False)
+
+ def get_output_shape_for(self, input_shape):
+ return (input_shape[0], self.num_units)
+
+ def get_output_for(self, input, **kwargs):
+ if input.ndim > 2:
+ # if the input has more than two dimensions, flatten it into a
+ # batch of feature vectors.
+ input = input.flatten(2)
+
+ activation = T.dot(input, self.W)
+ if self.b is not None:
+ activation = activation + self.b.dimshuffle('x', 0)
+ return self.nonlinearity(activation)
+
+
+class NINLayer(Layer):
+ """
+ lasagne.layers.NINLayer(incoming, num_units, untie_biases=False,
+ W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
+ nonlinearity=lasagne.nonlinearities.rectify, **kwargs)
+
+ Network-in-network layer.
+ Like DenseLayer, but broadcasting across all trailing dimensions beyond the
+ 2nd. This results in a convolution operation with filter size 1 on all
+ trailing dimensions. Any number of trailing dimensions is supported,
+ so NINLayer can be used to implement 1D, 2D, 3D, ... convolutions.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ num_units : int
+ The number of units of the layer
+
+ untie_biases : bool
+ If false the network has a single bias vector similar to a dense
+ layer. If true a separate bias vector is used for each trailing
+ dimension beyond the 2nd.
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a matrix with shape ``(num_inputs, num_units)``,
+ where ``num_inputs`` is the size of the second dimension of the input.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_units,)`` for ``untie_biases=False``, and
+ a tensor of shape ``(num_units, input_shape[2], ..., input_shape[-1])``
+ for ``untie_biases=True``.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer, NINLayer
+ >>> l_in = InputLayer((100, 20, 10, 3))
+ >>> l1 = NINLayer(l_in, num_units=5)
+
+ References
+ ----------
+ .. [1] Lin, Min, Qiang Chen, and Shuicheng Yan (2013):
+ Network in network. arXiv preprint arXiv:1312.4400.
+ """
+ def __init__(self, incoming, num_units, untie_biases=False,
+ W=init.GlorotUniform(), b=init.Constant(0.),
+ nonlinearity=nonlinearities.rectify, **kwargs):
+ super(NINLayer, self).__init__(incoming, **kwargs)
+ self.nonlinearity = (nonlinearities.identity if nonlinearity is None
+ else nonlinearity)
+
+ self.num_units = num_units
+ self.untie_biases = untie_biases
+
+ num_input_channels = self.input_shape[1]
+
+ self.W = self.add_param(W, (num_input_channels, num_units), name="W")
+ if b is None:
+ self.b = None
+ else:
+ if self.untie_biases:
+ biases_shape = (num_units,) + self.output_shape[2:]
+ else:
+ biases_shape = (num_units,)
+ self.b = self.add_param(b, biases_shape, name="b",
+ regularizable=False)
+
+ def get_output_shape_for(self, input_shape):
+ return (input_shape[0], self.num_units) + input_shape[2:]
+
+ def get_output_for(self, input, **kwargs):
+ # cf * bc01... = fb01...
+ out_r = T.tensordot(self.W, input, axes=[[0], [1]])
+ # input dims to broadcast over
+ remaining_dims = range(2, input.ndim)
+ # bf01...
+ out = out_r.dimshuffle(1, 0, *remaining_dims)
+
+ if self.b is None:
+ activation = out
+ else:
+ if self.untie_biases:
+ # no broadcast
+ remaining_dims_biases = range(1, input.ndim - 1)
+ else:
+ remaining_dims_biases = ['x'] * (input.ndim - 2) # broadcast
+ b_shuffled = self.b.dimshuffle('x', 0, *remaining_dims_biases)
+ activation = out + b_shuffled
+
+ return self.nonlinearity(activation)
diff --git a/lasagne/layers/dnn.py b/lasagne/layers/dnn.py
new file mode 100644
index 0000000..0547a6b
--- /dev/null
+++ b/lasagne/layers/dnn.py
@@ -0,0 +1,593 @@
+import theano
+from theano.sandbox.cuda import dnn
+
+from .. import init
+from .. import nonlinearities
+from .base import Layer
+
+from .conv import conv_output_length, BaseConvLayer
+from .pool import pool_output_length
+from ..utils import as_tuple
+
+if not theano.sandbox.cuda.cuda_enabled:
+ raise ImportError(
+ "requires GPU support -- see http://lasagne.readthedocs.org/en/"
+ "latest/user/installation.html#gpu-support") # pragma: no cover
+elif not dnn.dnn_available():
+ raise ImportError(
+ "cuDNN not available: %s\nSee http://lasagne.readthedocs.org/en/"
+ "latest/user/installation.html#cudnn" %
+ dnn.dnn_available.msg) # pragma: no cover
+
+
+__all__ = [
+ "Pool2DDNNLayer",
+ "MaxPool2DDNNLayer",
+ "Pool3DDNNLayer",
+ "MaxPool3DDNNLayer",
+ "Conv2DDNNLayer",
+ "Conv3DDNNLayer",
+ "SpatialPyramidPoolingDNNLayer",
+]
+
+
+class Pool2DDNNLayer(Layer):
+ """
+ 2D pooling layer
+
+ Performs 2D mean- or max-pooling over the two trailing axes of a 4D input
+ tensor. This is an alternative implementation which uses
+ ``theano.sandbox.cuda.dnn.dnn_pool`` directly.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_size : integer or iterable
+ The length of the pooling region in each dimension. If an integer, it
+ is promoted to a square pooling region. If an iterable, it should have
+ two elements.
+
+ stride : integer, iterable or ``None``
+ The strides between sucessive pooling regions in each dimension.
+ If ``None`` then ``stride = pool_size``.
+
+ pad : integer or iterable
+ Number of elements to be added on each side of the input
+ in each dimension. Each value must be less than
+ the corresponding stride.
+
+ ignore_border : bool (default: True)
+ This implementation never includes partial pooling regions, so this
+ argument must always be set to True. It exists only to make sure the
+ interface is compatible with :class:`lasagne.layers.MaxPool2DLayer`.
+
+ mode : string
+ Pooling mode, one of 'max', 'average_inc_pad' or 'average_exc_pad'.
+ Defaults to 'max'.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ Notes
+ -----
+ The value used to pad the input is chosen to be less than
+ the minimum of the input, so that the output of each pooling region
+ always corresponds to some element in the unpadded input region.
+
+ This is a drop-in replacement for :class:`lasagne.layers.MaxPool2DLayer`.
+ Its interface is the same, except it does not support the ``ignore_border``
+ argument.
+ """
+ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0),
+ ignore_border=True, mode='max', **kwargs):
+ super(Pool2DDNNLayer, self).__init__(incoming, **kwargs)
+ if len(self.input_shape) != 4:
+ raise ValueError("Tried to create a 2D pooling layer with "
+ "input shape %r. Expected 4 input dimensions "
+ "(batchsize, channels, 2 spatial dimensions)."
+ % (self.input_shape,))
+ self.pool_size = as_tuple(pool_size, 2)
+ if stride is None:
+ self.stride = self.pool_size
+ else:
+ self.stride = as_tuple(stride, 2)
+ self.pad = as_tuple(pad, 2)
+ self.mode = mode
+ # The ignore_border argument is for compatibility with MaxPool2DLayer.
+ # ignore_border=False is not supported. Borders are always ignored.
+ if not ignore_border:
+ raise NotImplementedError("Pool2DDNNLayer does not support "
+ "ignore_border=False.")
+
+ def get_output_shape_for(self, input_shape):
+ output_shape = list(input_shape) # copy / convert to mutable list
+
+ output_shape[2] = pool_output_length(input_shape[2],
+ pool_size=self.pool_size[0],
+ stride=self.stride[0],
+ pad=self.pad[0],
+ ignore_border=True,
+ )
+
+ output_shape[3] = pool_output_length(input_shape[3],
+ pool_size=self.pool_size[1],
+ stride=self.stride[1],
+ pad=self.pad[1],
+ ignore_border=True,
+ )
+
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ return dnn.dnn_pool(input, self.pool_size, self.stride,
+ self.mode, self.pad)
+
+
+class MaxPool2DDNNLayer(Pool2DDNNLayer):
+ """
+ 2D max-pooling layer
+
+ Subclass of :class:`Pool2DDNNLayer` fixing ``mode='max'``, provided for
+ compatibility to other ``MaxPool2DLayer`` classes.
+ """
+ def __init__(self, incoming, pool_size, stride=None,
+ pad=(0, 0), ignore_border=True, **kwargs):
+ super(MaxPool2DDNNLayer, self).__init__(incoming, pool_size, stride,
+ pad, ignore_border, mode='max',
+ **kwargs)
+
+
+class Pool3DDNNLayer(Layer):
+ """
+ 3D pooling layer
+
+ Performs 3D mean- or max-pooling over the 3 trailing axes of a 5D input
+ tensor. This is an alternative implementation which uses
+ ``theano.sandbox.cuda.dnn.dnn_pool`` directly.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_size : integer or iterable
+ The length of the pooling region in each dimension. If an integer, it
+ is promoted to a square pooling region. If an iterable, it should have
+ two elements.
+
+ stride : integer, iterable or ``None``
+ The strides between sucessive pooling regions in each dimension.
+ If ``None`` then ``stride = pool_size``.
+
+ pad : integer or iterable
+ Number of elements to be added on each side of the input
+ in each dimension. Each value must be less than
+ the corresponding stride.
+
+ ignore_border : bool (default: True)
+ This implementation never includes partial pooling regions, so this
+ argument must always be set to True. It exists only to make sure the
+ interface is compatible with :class:`lasagne.layers.MaxPool2DLayer`.
+
+ mode : string
+ Pooling mode, one of 'max', 'average_inc_pad' or 'average_exc_pad'.
+ Defaults to 'max'.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ Notes
+ -----
+ The value used to pad the input is chosen to be less than
+ the minimum of the input, so that the output of each pooling region
+ always corresponds to some element in the unpadded input region.
+
+ """
+ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0, 0),
+ ignore_border=True, mode='max', **kwargs):
+ super(Pool3DDNNLayer, self).__init__(incoming, **kwargs)
+ if len(self.input_shape) != 5:
+ raise ValueError("Tried to create a 3D pooling layer with "
+ "input shape %r. Expected 5 input dimensions "
+ "(batchsize, channels, 3 spatial dimensions)."
+ % (self.input_shape,))
+ self.pool_size = as_tuple(pool_size, 3)
+ if stride is None:
+ self.stride = self.pool_size
+ else:
+ self.stride = as_tuple(stride, 3)
+ self.pad = as_tuple(pad, 3)
+ self.mode = mode
+ # The ignore_border argument is for compatibility with MaxPool2DLayer.
+ # ignore_border=False is not supported. Borders are always ignored.
+ if not ignore_border:
+ raise NotImplementedError("Pool3DDNNLayer does not support "
+ "ignore_border=False.")
+
+ def get_output_shape_for(self, input_shape):
+ output_shape = list(input_shape) # copy / convert to mutable list
+
+ output_shape[2] = pool_output_length(input_shape[2],
+ pool_size=self.pool_size[0],
+ stride=self.stride[0],
+ pad=self.pad[0],
+ ignore_border=True,
+ )
+
+ output_shape[3] = pool_output_length(input_shape[3],
+ pool_size=self.pool_size[1],
+ stride=self.stride[1],
+ pad=self.pad[1],
+ ignore_border=True,
+ )
+
+ output_shape[4] = pool_output_length(input_shape[4],
+ pool_size=self.pool_size[2],
+ stride=self.stride[2],
+ pad=self.pad[2],
+ ignore_border=True,
+ )
+
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ return dnn.dnn_pool(input, self.pool_size, self.stride,
+ self.mode, self.pad)
+
+
+class MaxPool3DDNNLayer(Pool3DDNNLayer):
+ """
+ 3D max-pooling layer
+
+ Subclass of :class:`Pool3DDNNLayer` fixing ``mode='max'``, provided for
+ consistency to ``MaxPool2DLayer`` classes.
+ """
+ def __init__(self, incoming, pool_size, stride=None,
+ pad=(0, 0, 0), ignore_border=True, **kwargs):
+ super(MaxPool3DDNNLayer, self).__init__(incoming, pool_size, stride,
+ pad, ignore_border, mode='max',
+ **kwargs)
+
+
+class Conv2DDNNLayer(BaseConvLayer):
+ """
+ lasagne.layers.Conv2DDNNLayer(incoming, num_filters, filter_size,
+ stride=(1, 1), pad=0, untie_biases=False,
+ W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
+ nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False,
+ **kwargs)
+
+ 2D convolutional layer
+
+ Performs a 2D convolution on its input and optionally adds a bias and
+ applies an elementwise nonlinearity. This is an alternative implementation
+ which uses ``theano.sandbox.cuda.dnn.dnn_conv`` directly.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. The
+ output of this layer should be a 4D tensor, with shape
+ ``(batch_size, num_input_channels, input_rows, input_columns)``.
+
+ num_filters : int
+ The number of learnable convolutional filters this layer has.
+
+ filter_size : int or iterable of int
+ An integer or a 2-element tuple specifying the size of the filters.
+
+ stride : int or iterable of int
+ An integer or a 2-element tuple specifying the stride of the
+ convolution operation.
+
+ pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0)
+ By default, the convolution is only computed where the input and the
+ filter fully overlap (a valid convolution). When ``stride=1``, this
+ yields an output that is smaller than the input by ``filter_size - 1``.
+ The `pad` argument allows you to implicitly pad the input with zeros,
+ extending the output size.
+
+ A single integer results in symmetric zero-padding of the given size on
+ all borders, a tuple of two integers allows different symmetric padding
+ per dimension.
+
+ ``'full'`` pads with one less than the filter size on both sides. This
+ is equivalent to computing the convolution wherever the input and the
+ filter overlap by at least one position.
+
+ ``'same'`` pads with half the filter size (rounded down) on both sides.
+ When ``stride=1`` this results in an output size equal to the input
+ size. Even filter size is not supported.
+
+ ``'valid'`` is an alias for ``0`` (no padding / a valid convolution).
+
+ Note that ``'full'`` and ``'same'`` can be faster than equivalent
+ integer values due to optimizations by Theano.
+
+ untie_biases : bool (default: False)
+ If ``False``, the layer will have a bias parameter for each channel,
+ which is shared across all positions in this channel. As a result, the
+ `b` attribute will be a vector (1D).
+
+ If True, the layer will have separate bias parameters for each
+ position in each channel. As a result, the `b` attribute will be a
+ 3D tensor.
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a 4D tensor with shape
+ ``(num_filters, num_input_channels, filter_rows, filter_columns)``.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to
+ ``False``. If it is set to ``True``, its shape should be
+ ``(num_filters, output_rows, output_columns)`` instead.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ flip_filters : bool (default: False)
+ Whether to flip the filters and perform a convolution, or not to flip
+ them and perform a correlation. Flipping adds a bit of overhead, so it
+ is disabled by default. In most cases this does not make a difference
+ anyway because the filters are learnt. However, ``flip_filters`` should
+ be set to ``True`` if weights are loaded into it that were learnt using
+ a regular :class:`lasagne.layers.Conv2DLayer`, for example.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ Attributes
+ ----------
+ W : Theano shared variable or expression
+ Variable or expression representing the filter weights.
+
+ b : Theano shared variable or expression
+ Variable or expression representing the biases.
+ """
+ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1),
+ pad=0, untie_biases=False, W=init.GlorotUniform(),
+ b=init.Constant(0.), nonlinearity=nonlinearities.rectify,
+ flip_filters=False, **kwargs):
+ super(Conv2DDNNLayer, self).__init__(incoming, num_filters,
+ filter_size, stride, pad,
+ untie_biases, W, b, nonlinearity,
+ flip_filters, n=2, **kwargs)
+
+ def convolve(self, input, **kwargs):
+ # by default we assume 'cross', consistent with corrmm.
+ conv_mode = 'conv' if self.flip_filters else 'cross'
+ border_mode = self.pad
+ if border_mode == 'same':
+ border_mode = tuple(s // 2 for s in self.filter_size)
+
+ conved = dnn.dnn_conv(img=input,
+ kerns=self.W,
+ subsample=self.stride,
+ border_mode=border_mode,
+ conv_mode=conv_mode
+ )
+ return conved
+
+
+class Conv3DDNNLayer(BaseConvLayer):
+ """
+ lasagne.layers.Conv3DDNNLayer(incoming, num_filters, filter_size,
+ stride=(1, 1, 1), pad=0, untie_biases=False,
+ W=lasagne.init.GlorotUniform(), b=lasagne.init.Constant(0.),
+ nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False,
+ **kwargs)
+
+ 3D convolutional layer
+
+ Performs a 3D convolution on its input and optionally adds a bias and
+ applies an elementwise nonlinearity. This implementation uses
+ ``theano.sandbox.cuda.dnn.dnn_conv3d`` directly.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. The
+ output of this layer should be a 5D tensor, with shape ``(batch_size,
+ num_input_channels, input_depth, input_rows, input_columns)``.
+
+ num_filters : int
+ The number of learnable convolutional filters this layer has.
+
+ filter_size : int or iterable of int
+ An integer or a 3-element tuple specifying the size of the filters.
+
+ stride : int or iterable of int
+ An integer or a 3-element tuple specifying the stride of the
+ convolution operation.
+
+ pad : int, iterable of int, 'full', 'same' or 'valid' (default: 0)
+ By default, the convolution is only computed where the input and the
+ filter fully overlap (a valid convolution). When ``stride=1``, this
+ yields an output that is smaller than the input by ``filter_size - 1``.
+ The `pad` argument allows you to implicitly pad the input with zeros,
+ extending the output size.
+
+ A single integer results in symmetric zero-padding of the given size on
+ all borders, a tuple of three integers allows different symmetric
+ padding per dimension.
+
+ ``'full'`` pads with one less than the filter size on both sides. This
+ is equivalent to computing the convolution wherever the input and the
+ filter overlap by at least one position.
+
+ ``'same'`` pads with half the filter size (rounded down) on both sides.
+ When ``stride=1`` this results in an output size equal to the input
+ size. Even filter size is not supported.
+
+ ``'valid'`` is an alias for ``0`` (no padding / a valid convolution).
+
+ Note that ``'full'`` and ``'same'`` can be faster than equivalent
+ integer values due to optimizations by Theano.
+
+ untie_biases : bool (default: False)
+ If ``False``, the layer will have a bias parameter for each channel,
+ which is shared across all positions in this channel. As a result, the
+ `b` attribute will be a vector (1D).
+
+ If True, the layer will have separate bias parameters for each
+ position in each channel. As a result, the `b` attribute will be a
+ 4D tensor.
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the weights.
+ These should be a 5D tensor with shape ``(num_filters,
+ num_input_channels, filter_depth, filter_rows, filter_columns)``.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases. Otherwise, biases should be
+ a 1D array with shape ``(num_filters,)`` if `untied_biases` is set to
+ ``False``. If it is set to ``True``, its shape should be
+ ``(num_filters, output_depth, output_rows, output_columns)`` instead.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+
+ flip_filters : bool (default: False)
+ Whether to flip the filters and perform a convolution, or not to flip
+ them and perform a correlation. Flipping adds a bit of overhead, so it
+ is disabled by default. In most cases this does not make a difference
+ anyway because the filters are learned, but if you want to compute
+ predictions with pre-trained weights, take care if they need flipping.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ Attributes
+ ----------
+ W : Theano shared variable or expression
+ Variable or expression representing the filter weights.
+
+ b : Theano shared variable or expression
+ Variable or expression representing the biases.
+ """
+ def __init__(self, incoming, num_filters, filter_size, stride=(1, 1, 1),
+ pad=0, untie_biases=False, W=init.GlorotUniform(),
+ b=init.Constant(0.), nonlinearity=nonlinearities.rectify,
+ flip_filters=False, **kwargs):
+ super(Conv3DDNNLayer, self).__init__(incoming, num_filters,
+ filter_size, stride, pad,
+ untie_biases, W, b, nonlinearity,
+ flip_filters, n=3, **kwargs)
+
+ def convolve(self, input, **kwargs):
+ # by default we assume 'cross', consistent with corrmm.
+ conv_mode = 'conv' if self.flip_filters else 'cross'
+ border_mode = self.pad
+ if border_mode == 'same':
+ border_mode = tuple(s // 2 for s in self.filter_size)
+
+ conved = dnn.dnn_conv3d(img=input,
+ kerns=self.W,
+ subsample=self.stride,
+ border_mode=border_mode,
+ conv_mode=conv_mode
+ )
+ return conved
+
+
+class SpatialPyramidPoolingDNNLayer(Layer):
+ """
+ Spatial Pyramid Pooling Layer
+
+ Performs spatial pyramid pooling (SPP) over the input.
+ It will turn a 2D input of arbitrary size into an output of fixed
+ dimension.
+ Hence, the convolutional part of a DNN can be connected to a dense part
+ with a fixed number of nodes even if the dimensions of the
+ input image are unknown.
+
+ The pooling is performed over :math:`l` pooling levels.
+ Each pooling level :math:`i` will create :math:`M_i` output features.
+ :math:`M_i` is given by :math:`n_i * n_i`,
+ with :math:`n_i` as the number of pooling operation per dimension in
+ level :math:`i`, and we use a list of the :math:`n_i`'s as a
+ parameter for SPP-Layer.
+ The length of this list is the level of the spatial pyramid.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_dims : list of integers
+ The list of :math:`n_i`'s that define the output dimension of each
+ pooling level :math:`i`. The length of pool_dims is the level of
+ the spatial pyramid.
+
+ mode : string
+ Pooling mode, one of 'max', 'average_inc_pad' or 'average_exc_pad'.
+ Defaults to 'max'.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ Notes
+ -----
+ This layer should be inserted between the convolutional part of a
+ DNN and its dense part. Convolutions can be used for
+ arbitrary input dimensions, but the size of their output will
+ depend on their input dimensions. Connecting the output of the
+ convolutional to the dense part then usually demands us to fix
+ the dimensions of the network's InputLayer.
+ The spatial pyramid pooling layer, however, allows us to leave the
+ network input dimensions arbitrary. The advantage over a global
+ pooling layer is the added robustness against object deformations
+ due to the pooling on different scales.
+
+ References
+ ----------
+ .. [1] He, Kaiming et al (2015):
+ Spatial Pyramid Pooling in Deep Convolutional Networks
+ for Visual Recognition.
+ http://arxiv.org/pdf/1406.4729.pdf.
+ """
+ def __init__(self, incoming, pool_dims=[4, 2, 1], mode='max', **kwargs):
+ super(SpatialPyramidPoolingDNNLayer, self).__init__(incoming,
+ **kwargs)
+ if len(self.input_shape) != 4:
+ raise ValueError("Tried to create a SPP layer with "
+ "input shape %r. Expected 4 input dimensions "
+ "(batchsize, channels, 2 spatial dimensions)."
+ % (self.input_shape,))
+ self.mode = mode
+ self.pool_dims = pool_dims
+
+ def get_output_for(self, input, **kwargs):
+ input_size = tuple(symb if fixed is None else fixed
+ for fixed, symb
+ in zip(self.input_shape[2:], input.shape[2:]))
+ pool_list = []
+ for pool_dim in self.pool_dims:
+ win_size = tuple((i + pool_dim - 1) // pool_dim
+ for i in input_size)
+ str_size = tuple(i // pool_dim for i in input_size)
+
+ pool = dnn.dnn_pool(input, win_size, str_size, self.mode, (0, 0))
+ pool = pool.flatten(3)
+ pool_list.append(pool)
+
+ return theano.tensor.concatenate(pool_list, axis=2)
+
+ def get_output_shape_for(self, input_shape):
+ num_features = sum(p*p for p in self.pool_dims)
+ return (input_shape[0], input_shape[1], num_features)
diff --git a/lasagne/layers/embedding.py b/lasagne/layers/embedding.py
new file mode 100644
index 0000000..b05cb19
--- /dev/null
+++ b/lasagne/layers/embedding.py
@@ -0,0 +1,69 @@
+import numpy as np
+import theano.tensor as T
+
+from .. import init
+from .base import Layer
+
+
+__all__ = [
+ "EmbeddingLayer"
+]
+
+
+class EmbeddingLayer(Layer):
+ """
+ lasagne.layers.EmbeddingLayer(incoming, input_size, output_size,
+ W=lasagne.init.Normal(), **kwargs)
+
+ A layer for word embeddings. The input should be an integer type
+ Tensor variable.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ input_size: int
+ The Number of different embeddings. The last embedding will have index
+ input_size - 1.
+
+ output_size : int
+ The size of each embedding.
+
+ W : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the embedding matrix.
+ This should be a matrix with shape ``(input_size, output_size)``.
+ See :func:`lasagne.utils.create_param` for more information.
+
+ Examples
+ --------
+ >>> from lasagne.layers import EmbeddingLayer, InputLayer, get_output
+ >>> import theano
+ >>> x = T.imatrix()
+ >>> l_in = InputLayer((3, ))
+ >>> W = np.arange(3*5).reshape((3, 5)).astype('float32')
+ >>> l1 = EmbeddingLayer(l_in, input_size=3, output_size=5, W=W)
+ >>> output = get_output(l1, x)
+ >>> f = theano.function([x], output)
+ >>> x_test = np.array([[0, 2], [1, 2]]).astype('int32')
+ >>> f(x_test)
+ array([[[ 0., 1., 2., 3., 4.],
+ [ 10., 11., 12., 13., 14.]],
+ <BLANKLINE>
+ [[ 5., 6., 7., 8., 9.],
+ [ 10., 11., 12., 13., 14.]]], dtype=float32)
+ """
+ def __init__(self, incoming, input_size, output_size,
+ W=init.Normal(), **kwargs):
+ super(EmbeddingLayer, self).__init__(incoming, **kwargs)
+
+ self.input_size = input_size
+ self.output_size = output_size
+
+ self.W = self.add_param(W, (input_size, output_size), name="W")
+
+ def get_output_shape_for(self, input_shape):
+ return input_shape + (self.output_size, )
+
+ def get_output_for(self, input, **kwargs):
+ return self.W[input]
diff --git a/lasagne/layers/helper.py b/lasagne/layers/helper.py
new file mode 100644
index 0000000..02a2039
--- /dev/null
+++ b/lasagne/layers/helper.py
@@ -0,0 +1,520 @@
+from collections import deque
+from difflib import get_close_matches
+from inspect import getargspec
+from itertools import chain
+from warnings import warn
+
+import theano
+import numpy as np
+
+from .. import utils
+
+
+__all__ = [
+ "get_all_layers",
+ "get_output",
+ "get_output_shape",
+ "get_all_params",
+ "count_params",
+ "get_all_param_values",
+ "set_all_param_values",
+]
+
+
+def get_all_layers(layer, treat_as_input=None):
+ """
+ This function gathers all layers below one or more given :class:`Layer`
+ instances, including the given layer(s). Its main use is to collect all
+ layers of a network just given the output layer(s). The layers are
+ guaranteed to be returned in a topological order: a layer in the result
+ list is always preceded by all layers its input depends on.
+
+ Parameters
+ ----------
+ layer : Layer or list
+ the :class:`Layer` instance for which to gather all layers feeding
+ into it, or a list of :class:`Layer` instances.
+
+ treat_as_input : None or iterable
+ an iterable of :class:`Layer` instances to treat as input layers
+ with no layers feeding into them. They will show up in the result
+ list, but their incoming layers will not be collected (unless they
+ are required for other layers as well).
+
+ Returns
+ -------
+ list
+ a list of :class:`Layer` instances feeding into the given
+ instance(s) either directly or indirectly, and the given
+ instance(s) themselves, in topological order.
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer, DenseLayer
+ >>> l_in = InputLayer((100, 20))
+ >>> l1 = DenseLayer(l_in, num_units=50)
+ >>> get_all_layers(l1) == [l_in, l1]
+ True
+ >>> l2 = DenseLayer(l_in, num_units=10)
+ >>> get_all_layers([l2, l1]) == [l_in, l2, l1]
+ True
+ >>> get_all_layers([l1, l2]) == [l_in, l1, l2]
+ True
+ >>> l3 = DenseLayer(l2, num_units=20)
+ >>> get_all_layers(l3) == [l_in, l2, l3]
+ True
+ >>> get_all_layers(l3, treat_as_input=[l2]) == [l2, l3]
+ True
+ """
+ # We perform a depth-first search. We add a layer to the result list only
+ # after adding all its incoming layers (if any) or when detecting a cycle.
+ # We use a LIFO stack to avoid ever running into recursion depth limits.
+ try:
+ queue = deque(layer)
+ except TypeError:
+ queue = deque([layer])
+ seen = set()
+ done = set()
+ result = []
+
+ # If treat_as_input is given, we pretend we've already collected all their
+ # incoming layers.
+ if treat_as_input is not None:
+ seen.update(treat_as_input)
+
+ while queue:
+ # Peek at the leftmost node in the queue.
+ layer = queue[0]
+ if layer is None:
+ # Some node had an input_layer set to `None`. Just ignore it.
+ queue.popleft()
+ elif layer not in seen:
+ # We haven't seen this node yet: Mark it and queue all incomings
+ # to be processed first. If there are no incomings, the node will
+ # be appended to the result list in the next iteration.
+ seen.add(layer)
+ if hasattr(layer, 'input_layers'):
+ queue.extendleft(reversed(layer.input_layers))
+ elif hasattr(layer, 'input_layer'):
+ queue.appendleft(layer.input_layer)
+ else:
+ # We've been here before: Either we've finished all its incomings,
+ # or we've detected a cycle. In both cases, we remove the layer
+ # from the queue and append it to the result list.
+ queue.popleft()
+ if layer not in done:
+ result.append(layer)
+ done.add(layer)
+
+ return result
+
+
+def get_output(layer_or_layers, inputs=None, **kwargs):
+ """
+ Computes the output of the network at one or more given layers.
+ Optionally, you can define the input(s) to propagate through the network
+ instead of using the input variable(s) associated with the network's
+ input layer(s).
+
+ Parameters
+ ----------
+ layer_or_layers : Layer or list
+ the :class:`Layer` instance for which to compute the output
+ expressions, or a list of :class:`Layer` instances.
+
+ inputs : None, Theano expression, numpy array, or dict
+ If None, uses the input variables associated with the
+ :class:`InputLayer` instances.
+ If a Theano expression, this defines the input for a single
+ :class:`InputLayer` instance. Will throw a ValueError if there
+ are multiple :class:`InputLayer` instances.
+ If a numpy array, this will be wrapped as a Theano constant
+ and used just like a Theano expression.
+ If a dictionary, any :class:`Layer` instance (including the
+ input layers) can be mapped to a Theano expression or numpy
+ array to use instead of its regular output.
+
+ Returns
+ -------
+ output : Theano expression or list
+ the output of the given layer(s) for the given network input
+
+ Notes
+ -----
+ Depending on your network architecture, `get_output([l1, l2])` may
+ be crucially different from `[get_output(l1), get_output(l2)]`. Only
+ the former ensures that the output expressions depend on the same
+ intermediate expressions. For example, when `l1` and `l2` depend on
+ a common dropout layer, the former will use the same dropout mask for
+ both, while the latter will use two different dropout masks.
+ """
+ from .input import InputLayer
+ from .base import MergeLayer
+ # track accepted kwargs used by get_output_for
+ accepted_kwargs = {'deterministic'}
+ # obtain topological ordering of all layers the output layer(s) depend on
+ treat_as_input = inputs.keys() if isinstance(inputs, dict) else []
+ all_layers = get_all_layers(layer_or_layers, treat_as_input)
+ # initialize layer-to-expression mapping from all input layers
+ all_outputs = dict((layer, layer.input_var)
+ for layer in all_layers
+ if isinstance(layer, InputLayer) and
+ layer not in treat_as_input)
+ # update layer-to-expression mapping from given input(s), if any
+ if isinstance(inputs, dict):
+ all_outputs.update((layer, utils.as_theano_expression(expr))
+ for layer, expr in inputs.items())
+ elif inputs is not None:
+ if len(all_outputs) > 1:
+ raise ValueError("get_output() was called with a single input "
+ "expression on a network with multiple input "
+ "layers. Please call it with a dictionary of "
+ "input expressions instead.")
+ for input_layer in all_outputs:
+ all_outputs[input_layer] = utils.as_theano_expression(inputs)
+ # update layer-to-expression mapping by propagating the inputs
+ for layer in all_layers:
+ if layer not in all_outputs:
+ try:
+ if isinstance(layer, MergeLayer):
+ layer_inputs = [all_outputs[input_layer]
+ for input_layer in layer.input_layers]
+ else:
+ layer_inputs = all_outputs[layer.input_layer]
+ except KeyError:
+ # one of the input_layer attributes must have been `None`
+ raise ValueError("get_output() was called without giving an "
+ "input expression for the free-floating "
+ "layer %r. Please call it with a dictionary "
+ "mapping this layer to an input expression."
+ % layer)
+ all_outputs[layer] = layer.get_output_for(layer_inputs, **kwargs)
+ try:
+ names, _, _, defaults = getargspec(layer.get_output_for)
+ except TypeError:
+ # If introspection is not possible, skip it
+ pass
+ else:
+ if defaults is not None:
+ accepted_kwargs |= set(names[-len(defaults):])
+ accepted_kwargs |= set(layer.get_output_kwargs)
+ unused_kwargs = set(kwargs.keys()) - accepted_kwargs
+ if unused_kwargs:
+ suggestions = []
+ for kwarg in unused_kwargs:
+ suggestion = get_close_matches(kwarg, accepted_kwargs)
+ if suggestion:
+ suggestions.append('%s (perhaps you meant %s)'
+ % (kwarg, suggestion[0]))
+ else:
+ suggestions.append(kwarg)
+ warn("get_output() was called with unused kwargs:\n\t%s"
+ % "\n\t".join(suggestions))
+ # return the output(s) of the requested layer(s) only
+ try:
+ return [all_outputs[layer] for layer in layer_or_layers]
+ except TypeError:
+ return all_outputs[layer_or_layers]
+
+
+def get_output_shape(layer_or_layers, input_shapes=None):
+ """
+ Computes the output shape of the network at one or more given layers.
+
+ Parameters
+ ----------
+ layer_or_layers : Layer or list
+ the :class:`Layer` instance for which to compute the output
+ shapes, or a list of :class:`Layer` instances.
+
+ input_shapes : None, tuple, or dict
+ If None, uses the input shapes associated with the
+ :class:`InputLayer` instances.
+ If a tuple, this defines the input shape for a single
+ :class:`InputLayer` instance. Will throw a ValueError if there
+ are multiple :class:`InputLayer` instances.
+ If a dictionary, any :class:`Layer` instance (including the
+ input layers) can be mapped to a shape tuple to use instead of
+ its regular output shape.
+
+ Returns
+ -------
+ tuple or list
+ the output shape of the given layer(s) for the given network input
+ """
+ # shortcut: return precomputed shapes if we do not need to propagate any
+ if input_shapes is None or input_shapes == {}:
+ try:
+ return [layer.output_shape for layer in layer_or_layers]
+ except TypeError:
+ return layer_or_layers.output_shape
+
+ from .input import InputLayer
+ from .base import MergeLayer
+ # obtain topological ordering of all layers the output layer(s) depend on
+ if isinstance(input_shapes, dict):
+ treat_as_input = input_shapes.keys()
+ else:
+ treat_as_input = []
+
+ all_layers = get_all_layers(layer_or_layers, treat_as_input)
+ # initialize layer-to-shape mapping from all input layers
+ all_shapes = dict((layer, layer.shape)
+ for layer in all_layers
+ if isinstance(layer, InputLayer) and
+ layer not in treat_as_input)
+ # update layer-to-shape mapping from given input(s), if any
+ if isinstance(input_shapes, dict):
+ all_shapes.update(input_shapes)
+ elif input_shapes is not None:
+ if len(all_shapes) > 1:
+ raise ValueError("get_output_shape() was called with a single "
+ "input shape on a network with multiple input "
+ "layers. Please call it with a dictionary of "
+ "input shapes instead.")
+ for input_layer in all_shapes:
+ all_shapes[input_layer] = input_shapes
+ # update layer-to-shape mapping by propagating the input shapes
+ for layer in all_layers:
+ if layer not in all_shapes:
+ if isinstance(layer, MergeLayer):
+ input_shapes = [all_shapes[input_layer]
+ for input_layer in layer.input_layers]
+ else:
+ input_shapes = all_shapes[layer.input_layer]
+ all_shapes[layer] = layer.get_output_shape_for(input_shapes)
+ # return the output shape(s) of the requested layer(s) only
+ try:
+ return [all_shapes[layer] for layer in layer_or_layers]
+ except TypeError:
+ return all_shapes[layer_or_layers]
+
+
+def get_all_params(layer, unwrap_shared=True, **tags):
+ """
+ Returns a list of Theano shared variables or expressions that
+ parameterize the layer.
+
+ This function gathers all parameters of all layers below one or more given
+ :class:`Layer` instances, including the layer(s) itself. Its main use is to
+ collect all parameters of a network just given the output layer(s).
+
+ By default, all shared variables that participate in the forward pass will
+ be returned. The list can optionally be filtered by specifying tags as
+ keyword arguments. For example, ``trainable=True`` will only return
+ trainable parameters, and ``regularizable=True`` will only return
+ parameters that can be regularized (e.g., by L2 decay).
+
+ Parameters
+ ----------
+ layer : Layer or list
+ The :class:`Layer` instance for which to gather all parameters, or a
+ list of :class:`Layer` instances.
+
+ unwrap_shared : bool (default: True)
+ Affects only parameters that were set to a Theano expression. If
+ ``True`` the function returns the shared variables contained in
+ the expression, otherwise the Theano expression itself.
+
+ **tags (optional)
+ tags can be specified to filter the list. Specifying ``tag1=True``
+ will limit the list to parameters that are tagged with ``tag1``.
+ Specifying ``tag1=False`` will limit the list to parameters that
+ are not tagged with ``tag1``. Commonly used tags are
+ ``regularizable`` and ``trainable``.
+
+ Returns
+ -------
+ params : list
+ A list of Theano shared variables or expressions representing
+ the parameters.
+
+ Notes
+ -----
+ If any of the layers' parameters was set to a Theano expression instead
+ of a shared variable, `unwrap_shared` controls whether to return the
+ shared variables involved in that expression (``unwrap_shared=True``,
+ the default), or the expression itself (``unwrap_shared=False``). In
+ either case, tag filtering applies to the expressions, considering all
+ variables within an expression to be tagged the same.
+
+ Examples
+ --------
+ Collecting all parameters from a two-layer network:
+
+ >>> from lasagne.layers import InputLayer, DenseLayer
+ >>> l_in = InputLayer((100, 20))
+ >>> l1 = DenseLayer(l_in, num_units=50)
+ >>> l2 = DenseLayer(l1, num_units=30)
+ >>> all_params = get_all_params(l2)
+ >>> all_params == [l1.W, l1.b, l2.W, l2.b]
+ True
+
+ Parameters can be filtered by tags, and parameter expressions are
+ unwrapped to return involved shared variables by default:
+
+ >>> from lasagne.utils import floatX
+ >>> w1 = theano.shared(floatX(.01 * np.random.randn(50, 30)))
+ >>> w2 = theano.shared(floatX(1))
+ >>> l2 = DenseLayer(l1, num_units=30, W=theano.tensor.exp(w1) - w2, b=None)
+ >>> all_params = get_all_params(l2, regularizable=True)
+ >>> all_params == [l1.W, w1, w2]
+ True
+
+ When disabling unwrapping, the expression for ``l2.W`` is returned instead:
+
+ >>> all_params = get_all_params(l2, regularizable=True,
+ ... unwrap_shared=False)
+ >>> all_params == [l1.W, l2.W]
+ True
+ """
+ layers = get_all_layers(layer)
+ params = chain.from_iterable(l.get_params(
+ unwrap_shared=unwrap_shared, **tags) for l in layers)
+ return utils.unique(params)
+
+
+def count_params(layer, **tags):
+ """
+ This function counts all parameters (i.e., the number of scalar
+ values) of all layers below one or more given :class:`Layer` instances,
+ including the layer(s) itself.
+
+ This is useful to compare the capacity of various network architectures.
+ All parameters returned by the :class:`Layer`s' `get_params` methods are
+ counted.
+
+ Parameters
+ ----------
+ layer : Layer or list
+ The :class:`Layer` instance for which to count the parameters, or a
+ list of :class:`Layer` instances.
+
+ **tags (optional)
+ tags can be specified to filter the list of parameter variables that
+ will be included in the count. Specifying ``tag1=True``
+ will limit the list to parameters that are tagged with ``tag1``.
+ Specifying ``tag1=False`` will limit the list to parameters that
+ are not tagged with ``tag1``. Commonly used tags are
+ ``regularizable`` and ``trainable``.
+
+ Returns
+ -------
+ int
+ The total number of learnable parameters.
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer, DenseLayer
+ >>> l_in = InputLayer((100, 20))
+ >>> l1 = DenseLayer(l_in, num_units=50)
+ >>> param_count = count_params(l1)
+ >>> param_count
+ 1050
+ >>> param_count == 20 * 50 + 50 # 20 input * 50 units + 50 biases
+ True
+ """
+ params = get_all_params(layer, **tags)
+ shapes = [p.get_value().shape for p in params]
+ counts = [np.prod(shape) for shape in shapes]
+ return sum(counts)
+
+
+def get_all_param_values(layer, **tags):
+ """
+ This function returns the values of the parameters of all layers below one
+ or more given :class:`Layer` instances, including the layer(s) itself.
+
+ This function can be used in conjunction with set_all_param_values to save
+ and restore model parameters.
+
+ Parameters
+ ----------
+ layer : Layer or list
+ The :class:`Layer` instance for which to gather all parameter values,
+ or a list of :class:`Layer` instances.
+
+ **tags (optional)
+ tags can be specified to filter the list. Specifying ``tag1=True``
+ will limit the list to parameters that are tagged with ``tag1``.
+ Specifying ``tag1=False`` will limit the list to parameters that
+ are not tagged with ``tag1``. Commonly used tags are
+ ``regularizable`` and ``trainable``.
+
+ Returns
+ -------
+ list of numpy.array
+ A list of numpy arrays representing the parameter values.
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer, DenseLayer
+ >>> l_in = InputLayer((100, 20))
+ >>> l1 = DenseLayer(l_in, num_units=50)
+ >>> all_param_values = get_all_param_values(l1)
+ >>> (all_param_values[0] == l1.W.get_value()).all()
+ True
+ >>> (all_param_values[1] == l1.b.get_value()).all()
+ True
+ """
+ params = get_all_params(layer, **tags)
+ return [p.get_value() for p in params]
+
+
+def set_all_param_values(layer, values, **tags):
+ """
+ Given a list of numpy arrays, this function sets the parameters of all
+ layers below one or more given :class:`Layer` instances (including the
+ layer(s) itself) to the given values.
+
+ This function can be used in conjunction with get_all_param_values to save
+ and restore model parameters.
+
+ Parameters
+ ----------
+ layer : Layer or list
+ The :class:`Layer` instance for which to set all parameter values, or a
+ list of :class:`Layer` instances.
+
+ values : list of numpy.array
+ A list of numpy arrays representing the parameter values, must match
+ the number of parameters.
+ Every parameter's shape must match the shape of its new value.
+
+ **tags (optional)
+ tags can be specified to filter the list of parameters to be set.
+ Specifying ``tag1=True`` will limit the list to parameters that are
+ tagged with ``tag1``.
+ Specifying ``tag1=False`` will limit the list to parameters that
+ are not tagged with ``tag1``. Commonly used tags are
+ ``regularizable`` and ``trainable``.
+
+ Raises
+ ------
+ ValueError
+ If the number of values is not equal to the number of params, or
+ if a parameter's shape does not match the shape of its new value.
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer, DenseLayer
+ >>> l_in = InputLayer((100, 20))
+ >>> l1 = DenseLayer(l_in, num_units=50)
+ >>> all_param_values = get_all_param_values(l1)
+ >>> # all_param_values is now [l1.W.get_value(), l1.b.get_value()]
+ >>> # ...
+ >>> set_all_param_values(l1, all_param_values)
+ >>> # the parameter values are restored.
+ """
+ params = get_all_params(layer, **tags)
+ if len(params) != len(values):
+ raise ValueError("mismatch: got %d values to set %d parameters" %
+ (len(values), len(params)))
+
+ for p, v in zip(params, values):
+ if p.get_value().shape != v.shape:
+ raise ValueError("mismatch: parameter has shape %r but value to "
+ "set has shape %r" %
+ (p.get_value().shape, v.shape))
+ else:
+ p.set_value(v)
diff --git a/lasagne/layers/input.py b/lasagne/layers/input.py
new file mode 100644
index 0000000..a48cfad
--- /dev/null
+++ b/lasagne/layers/input.py
@@ -0,0 +1,75 @@
+from collections import OrderedDict
+
+import theano
+import theano.tensor as T
+
+from .. import utils
+
+from .base import Layer
+
+
+__all__ = [
+ "InputLayer",
+]
+
+
+class InputLayer(Layer):
+ """
+ This layer holds a symbolic variable that represents a network input. A
+ variable can be specified when the layer is instantiated, else it is
+ created.
+
+ Parameters
+ ----------
+ shape : tuple of `int` or `None` elements
+ The shape of the input. Any element can be `None` to indicate that the
+ size of that dimension is not fixed at compile time.
+
+ input_var : Theano symbolic variable or `None` (default: `None`)
+ A variable representing a network input. If it is not provided, a
+ variable will be created.
+
+ Raises
+ ------
+ ValueError
+ If the dimension of `input_var` is not equal to `len(shape)`
+
+ Notes
+ -----
+ The first dimension usually indicates the batch size. If you specify it,
+ Theano may apply more optimizations while compiling the training or
+ prediction function, but the compiled function will not accept data of a
+ different batch size at runtime. To compile for a variable batch size, set
+ the first shape element to `None` instead.
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer
+ >>> l_in = InputLayer((100, 20))
+ """
+ def __init__(self, shape, input_var=None, name=None, **kwargs):
+ self.shape = shape
+ if any(d is not None and d <= 0 for d in self.shape):
+ raise ValueError((
+ "Cannot create InputLayer with a non-positive shape "
+ "dimension. shape=%r, self.name=%r") % (
+ self.shape, name))
+
+ ndim = len(shape)
+ if input_var is None:
+ # create the right TensorType for the given number of dimensions
+ input_var_type = T.TensorType(theano.config.floatX, [False] * ndim)
+ var_name = ("%s.input" % name) if name is not None else "input"
+ input_var = input_var_type(var_name)
+ else:
+ # ensure the given variable has the correct dimensionality
+ if input_var.ndim != ndim:
+ raise ValueError("shape has %d dimensions, but variable has "
+ "%d" % (ndim, input_var.ndim))
+ self.input_var = input_var
+ self.name = name
+ self.params = OrderedDict()
+
+ @Layer.output_shape.getter
+ def output_shape(self):
+ return self.shape
diff --git a/lasagne/layers/merge.py b/lasagne/layers/merge.py
new file mode 100644
index 0000000..737f7be
--- /dev/null
+++ b/lasagne/layers/merge.py
@@ -0,0 +1,403 @@
+import theano.tensor as T
+
+from .base import MergeLayer
+
+
+__all__ = [
+ "autocrop",
+ "autocrop_array_shapes",
+ "ConcatLayer",
+ "concat",
+ "ElemwiseMergeLayer",
+ "ElemwiseSumLayer",
+]
+
+
+def autocrop(inputs, cropping):
+ """
+ Crops the given input arrays.
+
+ Cropping takes a sequence of inputs and crops them per-axis in order to
+ ensure that their sizes are consistent so that they can be combined
+ in an element-wise fashion. If cropping is enabled for a specific axis,
+ the minimum size in that axis of all inputs is computed, and all
+ inputs are cropped to that size.
+
+ The per-axis cropping modes are:
+
+ `None`: this axis is not cropped, inputs are unchanged in this axis
+
+ `'lower'`: inputs are cropped choosing the lower portion in this axis
+ (`a[:crop_size, ...]`)
+
+ `'upper'`: inputs are cropped choosing the upper portion in this axis
+ (`a[-crop_size:, ...]`)
+
+ `'center'`: inputs are cropped choosing the central portion in this axis
+ (``a[offset:offset+crop_size, ...]`` where
+ ``offset = (a.shape[0]-crop_size)//2)``
+
+ Parameters
+ ----------
+ inputs : list of Theano expressions
+ The input arrays in the form of a list of Theano expressions
+
+ cropping : list of cropping modes
+ Cropping modes, one for each axis. If length of `cropping` is less
+ than the number of axes in the inputs, it is padded with `None`.
+ If `cropping` is None, `input` is returned as is.
+
+ Returns
+ -------
+ list of Theano expressions
+ each expression is the cropped version of the corresponding input
+
+ Example
+ -------
+ For example, given three inputs:
+
+ >>> import numpy
+ >>> import theano
+
+ >>> a = numpy.random.random((1, 2, 3, 4))
+ >>> b = numpy.random.random((5, 4, 4, 2))
+ >>> c = numpy.random.random((7, 1, 8, 9))
+
+ Cropping mode for each axis:
+
+ >>> cropping = [None, 'lower', 'center', 'upper']
+
+ Crop (note that the input arrays are converted to Theano vars first,
+ and that the results are converted back from Theano expressions to
+ numpy arrays by calling `eval()`)
+ >>> xa, xb, xc = autocrop([theano.shared(a), \
+ theano.shared(b), \
+ theano.shared(c)], cropping)
+ >>> xa, xb, xc = xa.eval(), xb.eval(), xc.eval()
+
+ They will be left as is in axis 0 and cropped in the other three,
+ choosing the lower, center and upper portions:
+
+ Axis 0: choose all, axis 1: lower 1 element,
+ axis 2: central 3 (all) and axis 3: upper 2
+ >>> (xa == a[:, :1, :3, -2:]).all()
+ True
+
+ Axis 0: choose all, axis 1: lower 1 element,
+ axis 2: central 3 starting at 0 and axis 3: upper 2 (all)
+ >>> (xb == b[:, :1, :3, -2:]).all()
+ True
+
+ Axis 0: all, axis 1: lower 1 element (all),
+ axis 2: central 3 starting at 2 and axis 3: upper 2
+ >>> (xc == c[:, :1, 2:5:, -2:]).all()
+ True
+ """
+ if cropping is None:
+ # No cropping in any dimension
+ return inputs
+ else:
+ # Get the number of dimensions
+ ndim = inputs[0].ndim
+ # Check for consistent number of dimensions
+ if not all(input.ndim == ndim for input in inputs):
+ raise ValueError("Not all inputs are of the same "
+ "dimensionality. Got {0} inputs of "
+ "dimensionalities {1}.".format(
+ len(inputs),
+ [input.ndim for input in inputs]))
+ # Get the shape of each input, where each shape will be a Theano
+ # expression
+ shapes = [input.shape for input in inputs]
+ # Convert the shapes to a matrix expression
+ shapes_tensor = T.as_tensor_variable(shapes)
+ # Min along axis 0 to get the minimum size in each dimension
+ min_shape = T.min(shapes_tensor, axis=0)
+
+ # Nested list of slices; each list in `slices` corresponds to
+ # an input and contains a slice for each dimension
+ slices_by_input = [[] for i in range(len(inputs))]
+
+ # If there are more dimensions than cropping entries, pad
+ # the cropping
+ cropping = list(cropping)
+ if ndim > len(cropping):
+ cropping = list(cropping) + \
+ [None] * (ndim - len(cropping))
+
+ # For each dimension
+ for dim, cr in enumerate(cropping):
+ if cr is None:
+ # Don't crop this dimension
+ slice_all = slice(None)
+ for slices in slices_by_input:
+ slices.append(slice_all)
+ else:
+ # We crop all inputs in the dimension `dim` so that they
+ # are the minimum found in this dimension from all inputs
+ sz = min_shape[dim]
+ if cr == 'lower':
+ # Choose the first `sz` elements
+ slc_lower = slice(None, sz)
+ for slices in slices_by_input:
+ slices.append(slc_lower)
+ elif cr == 'upper':
+ # Choose the last `sz` elements
+ slc_upper = slice(-sz, None)
+ for slices in slices_by_input:
+ slices.append(slc_upper)
+ elif cr == 'center':
+ # Choose `sz` elements from the center
+ for sh, slices in zip(shapes, slices_by_input):
+ offset = (sh[dim] - sz) // 2
+ slices.append(slice(offset, offset+sz))
+ else:
+ raise ValueError(
+ 'Unknown crop mode \'{0}\''.format(cr))
+
+ return [input[slices] for input, slices in
+ zip(inputs, slices_by_input)]
+
+
+def autocrop_array_shapes(input_shapes, cropping):
+ """
+ Computes the shapes of the given arrays after auto-cropping is applied.
+
+ For more information on cropping, see the :func:`autocrop` function
+ documentation.
+
+ Parameters
+ ----------
+ input_shapes : the shapes of input arrays prior to cropping in
+ the form of a list of tuples
+
+ cropping : a list of cropping modes, one for each axis. If length of
+ `cropping` is less than the number of axes in the inputs, it is
+ padded with `None`. If `cropping` is None, `input_shapes` is returned
+ as is. For more information on their values and operation, see the
+ :func:`autocrop` documentation.
+
+ Returns
+ -------
+ list of tuples
+ each tuple is a cropped version of the corresponding input
+ shape tuple in `input_shapes`
+
+ For example, given three input shapes with 4 axes each:
+
+ >>> a = (1, 2, 3, 4)
+ >>> b = (5, 4, 4, 2)
+ >>> c = (7, 1, 8, 9)
+
+ Cropping mode for each axis:
+
+ >>> cropping = [None, 'lower', 'center', 'upper']
+
+ Apply:
+
+ >>> cropped_shapes = autocrop_array_shapes([a, b, c], cropping)
+ >>> cropped_shapes[0]
+ (1, 1, 3, 2)
+
+ >>> cropped_shapes[1]
+ (5, 1, 3, 2)
+
+ >>> cropped_shapes[2]
+ (7, 1, 3, 2)
+
+ Note that axis 0 remains unchanged, where all the others are cropped
+ to the minimum size in that axis.
+ """
+ if cropping is None:
+ return input_shapes
+ else:
+ # Check for consistent number of dimensions
+ ndim = len(input_shapes[0])
+ if not all(len(sh) == ndim for sh in input_shapes):
+ raise ValueError("Not all inputs are of the same "
+ "dimensionality. Got {0} inputs of "
+ "dimensionalities {1}.".format(
+ len(input_shapes),
+ [len(sh) for sh in input_shapes]))
+
+ result = []
+
+ # If there are more dimensions than cropping entries, pad
+ # the cropping
+ cropping = list(cropping)
+ if ndim > len(cropping):
+ cropping = list(cropping) + \
+ [None] * (ndim - len(cropping))
+
+ for sh, cr in zip(zip(*input_shapes), cropping):
+ if cr is None:
+ result.append(sh)
+ elif cr in {'lower', 'center', 'upper'}:
+ result.append([min(sh)] * len(sh))
+ else:
+ raise ValueError('Unknown crop mode \'{0}\''.format(cr))
+ return [tuple(sh) for sh in zip(*result)]
+
+
+class ConcatLayer(MergeLayer):
+ """
+ Concatenates multiple inputs along the specified axis. Inputs should have
+ the same shape except for the dimension specified in axis, which can have
+ different sizes.
+
+ Parameters
+ -----------
+ incomings : a list of :class:`Layer` instances or tuples
+ The layers feeding into this layer, or expected input shapes
+
+ axis : int
+ Axis which inputs are joined over
+
+ cropping : None or [crop]
+ Cropping for each input axis. Cropping is described in the docstring
+ for :func:`autocrop`. Cropping is always disabled for `axis`.
+ """
+ def __init__(self, incomings, axis=1, cropping=None, **kwargs):
+ super(ConcatLayer, self).__init__(incomings, **kwargs)
+ self.axis = axis
+ if cropping is not None:
+ # If cropping is enabled, don't crop on the selected axis
+ cropping = list(cropping)
+ cropping[axis] = None
+ self.cropping = cropping
+
+ def get_output_shape_for(self, input_shapes):
+ input_shapes = autocrop_array_shapes(input_shapes, self.cropping)
+ # Infer the output shape by grabbing, for each axis, the first
+ # input size that is not `None` (if there is any)
+ output_shape = [next((s for s in sizes if s is not None), None)
+ for sizes in zip(*input_shapes)]
+
+ def match(shape1, shape2):
+ axis = self.axis if self.axis >= 0 else len(shape1) + self.axis
+ return (len(shape1) == len(shape2) and
+ all(i == axis or s1 is None or s2 is None or s1 == s2
+ for i, (s1, s2) in enumerate(zip(shape1, shape2))))
+
+ # Check for compatibility with inferred output shape
+ if not all(match(shape, output_shape) for shape in input_shapes):
+ raise ValueError("Mismatch: input shapes must be the same except "
+ "in the concatenation axis")
+ # Infer output shape on concatenation axis and return
+ sizes = [input_shape[self.axis] for input_shape in input_shapes]
+ concat_size = None if any(s is None for s in sizes) else sum(sizes)
+ output_shape[self.axis] = concat_size
+ return tuple(output_shape)
+
+ def get_output_for(self, inputs, **kwargs):
+ inputs = autocrop(inputs, self.cropping)
+ return T.concatenate(inputs, axis=self.axis)
+
+concat = ConcatLayer # shortcut
+
+
+class ElemwiseMergeLayer(MergeLayer):
+ """
+ This layer performs an elementwise merge of its input layers.
+ It requires all input layers to have the same output shape.
+
+ Parameters
+ ----------
+ incomings : a list of :class:`Layer` instances or tuples
+ the layers feeding into this layer, or expected input shapes,
+ with all incoming shapes being equal
+
+ merge_function : callable
+ the merge function to use. Should take two arguments and return the
+ updated value. Some possible merge functions are ``theano.tensor``:
+ ``mul``, ``add``, ``maximum`` and ``minimum``.
+
+ cropping : None or [crop]
+ Cropping for each input axis. Cropping is described in the docstring
+ for :func:`autocrop`
+
+ See Also
+ --------
+ ElemwiseSumLayer : Shortcut for sum layer.
+ """
+
+ def __init__(self, incomings, merge_function, cropping=None, **kwargs):
+ super(ElemwiseMergeLayer, self).__init__(incomings, **kwargs)
+ self.merge_function = merge_function
+ self.cropping = cropping
+
+ def get_output_shape_for(self, input_shapes):
+ input_shapes = autocrop_array_shapes(input_shapes, self.cropping)
+ # Infer the output shape by grabbing, for each axis, the first
+ # input size that is not `None` (if there is any)
+ output_shape = tuple(next((s for s in sizes if s is not None), None)
+ for sizes in zip(*input_shapes))
+
+ def match(shape1, shape2):
+ return (len(shape1) == len(shape2) and
+ all(s1 is None or s2 is None or s1 == s2
+ for s1, s2 in zip(shape1, shape2)))
+
+ # Check for compatibility with inferred output shape
+ if not all(match(shape, output_shape) for shape in input_shapes):
+ raise ValueError("Mismatch: not all input shapes are the same")
+ return output_shape
+
+ def get_output_for(self, inputs, **kwargs):
+ inputs = autocrop(inputs, self.cropping)
+ output = None
+ for input in inputs:
+ if output is not None:
+ output = self.merge_function(output, input)
+ else:
+ output = input
+ return output
+
+
+class ElemwiseSumLayer(ElemwiseMergeLayer):
+ """
+ This layer performs an elementwise sum of its input layers.
+ It requires all input layers to have the same output shape.
+
+ Parameters
+ ----------
+ incomings : a list of :class:`Layer` instances or tuples
+ the layers feeding into this layer, or expected input shapes,
+ with all incoming shapes being equal
+
+ coeffs: list or scalar
+ A same-sized list of coefficients, or a single coefficient that
+ is to be applied to all instances. By default, these will not
+ be included in the learnable parameters of this layer.
+
+ cropping : None or [crop]
+ Cropping for each input axis. Cropping is described in the docstring
+ for :func:`autocrop`
+
+ Notes
+ -----
+ Depending on your architecture, this can be used to avoid the more
+ costly :class:`ConcatLayer`. For example, instead of concatenating layers
+ before a :class:`DenseLayer`, insert separate :class:`DenseLayer` instances
+ of the same number of output units and add them up afterwards. (This avoids
+ the copy operations in concatenation, but splits up the dot product.)
+ """
+ def __init__(self, incomings, coeffs=1, cropping=None, **kwargs):
+ super(ElemwiseSumLayer, self).__init__(incomings, T.add,
+ cropping=cropping, **kwargs)
+ if isinstance(coeffs, list):
+ if len(coeffs) != len(incomings):
+ raise ValueError("Mismatch: got %d coeffs for %d incomings" %
+ (len(coeffs), len(incomings)))
+ else:
+ coeffs = [coeffs] * len(incomings)
+
+ self.coeffs = coeffs
+
+ def get_output_for(self, inputs, **kwargs):
+ # if needed multiply each input by its coefficient
+ inputs = [input * coeff if coeff != 1 else input
+ for coeff, input in zip(self.coeffs, inputs)]
+
+ # pass scaled inputs to the super class for summing
+ return super(ElemwiseSumLayer, self).get_output_for(inputs, **kwargs)
diff --git a/lasagne/layers/noise.py b/lasagne/layers/noise.py
new file mode 100644
index 0000000..7cbf81e
--- /dev/null
+++ b/lasagne/layers/noise.py
@@ -0,0 +1,136 @@
+import theano
+import theano.tensor as T
+
+from .base import Layer
+from ..random import get_rng
+
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
+
+
+__all__ = [
+ "DropoutLayer",
+ "dropout",
+ "GaussianNoiseLayer",
+]
+
+
+class DropoutLayer(Layer):
+ """Dropout layer
+
+ Sets values to zero with probability p. See notes for disabling dropout
+ during testing.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ the layer feeding into this layer, or the expected input shape
+ p : float or scalar tensor
+ The probability of setting a value to zero
+ rescale : bool
+ If true the input is rescaled with input / (1-p) when deterministic
+ is False.
+
+ Notes
+ -----
+ The dropout layer is a regularizer that randomly sets input values to
+ zero; see [1]_, [2]_ for why this might improve generalization.
+ During training you should set deterministic to false and during
+ testing you should set deterministic to true.
+
+ If rescale is true the input is scaled with input / (1-p) when
+ deterministic is false, see references for further discussion. Note that
+ this implementation scales the input at training time.
+
+ References
+ ----------
+ .. [1] Hinton, G., Srivastava, N., Krizhevsky, A., Sutskever, I.,
+ Salakhutdinov, R. R. (2012):
+ Improving neural networks by preventing co-adaptation of feature
+ detectors. arXiv preprint arXiv:1207.0580.
+
+ .. [2] Srivastava Nitish, Hinton, G., Krizhevsky, A., Sutskever,
+ I., & Salakhutdinov, R. R. (2014):
+ Dropout: A Simple Way to Prevent Neural Networks from Overfitting.
+ Journal of Machine Learning Research, 5(Jun)(2), 1929-1958.
+ """
+ def __init__(self, incoming, p=0.5, rescale=True, **kwargs):
+ super(DropoutLayer, self).__init__(incoming, **kwargs)
+ self._srng = RandomStreams(get_rng().randint(1, 2147462579))
+ self.p = p
+ self.rescale = rescale
+
+ def get_output_for(self, input, deterministic=False, **kwargs):
+ """
+ Parameters
+ ----------
+ input : tensor
+ output from the previous layer
+ deterministic : bool
+ If true dropout and scaling is disabled, see notes
+ """
+ if deterministic or self.p == 0:
+ return input
+ else:
+ # Using theano constant to prevent upcasting
+ one = T.constant(1)
+
+ retain_prob = one - self.p
+ if self.rescale:
+ input /= retain_prob
+
+ # use nonsymbolic shape for dropout mask if possible
+ input_shape = self.input_shape
+ if any(s is None for s in input_shape):
+ input_shape = input.shape
+
+ return input * self._srng.binomial(input_shape, p=retain_prob,
+ dtype=input.dtype)
+
+dropout = DropoutLayer # shortcut
+
+
+class GaussianNoiseLayer(Layer):
+ """Gaussian noise layer.
+
+ Add zero-mean Gaussian noise of given standard deviation to the input [1]_.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ the layer feeding into this layer, or the expected input shape
+ sigma : float or tensor scalar
+ Standard deviation of added Gaussian noise
+
+ Notes
+ -----
+ The Gaussian noise layer is a regularizer. During training you should set
+ deterministic to false and during testing you should set deterministic to
+ true.
+
+ References
+ ----------
+ .. [1] K.-C. Jim, C. Giles, and B. Horne (1996):
+ An analysis of noise in recurrent neural networks: convergence and
+ generalization.
+ IEEE Transactions on Neural Networks, 7(6):1424-1438.
+ """
+ def __init__(self, incoming, sigma=0.1, **kwargs):
+ super(GaussianNoiseLayer, self).__init__(incoming, **kwargs)
+ self._srng = RandomStreams(get_rng().randint(1, 2147462579))
+ self.sigma = sigma
+
+ def get_output_for(self, input, deterministic=False, **kwargs):
+ """
+ Parameters
+ ----------
+ input : tensor
+ output from the previous layer
+ deterministic : bool
+ If true noise is disabled, see notes
+ """
+ if deterministic or self.sigma == 0:
+ return input
+ else:
+ return input + self._srng.normal(input.shape,
+ avg=0.0,
+ std=self.sigma)
diff --git a/lasagne/layers/normalization.py b/lasagne/layers/normalization.py
new file mode 100644
index 0000000..c16d6a9
--- /dev/null
+++ b/lasagne/layers/normalization.py
@@ -0,0 +1,375 @@
+# -*- coding: utf-8 -*-
+
+"""
+The :class:`LocalResponseNormalization2DLayer
+<lasagne.layers.LocalResponseNormalization2DLayer>` implementation contains
+code from `pylearn2 <http://github.com/lisa-lab/pylearn2>`_, which is covered
+by the following license:
+
+
+Copyright (c) 2011--2014, Université de Montréal
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+import theano
+import theano.tensor as T
+
+from .. import init
+from .. import nonlinearities
+
+from .base import Layer
+
+
+__all__ = [
+ "LocalResponseNormalization2DLayer",
+ "BatchNormLayer",
+ "batch_norm",
+]
+
+
+class LocalResponseNormalization2DLayer(Layer):
+ """
+ Cross-channel Local Response Normalization for 2D feature maps.
+
+ Aggregation is purely across channels, not within channels,
+ and performed "pixelwise".
+
+ If the value of the :math:`i` th channel is :math:`x_i`, the output is
+
+ .. math::
+ x_i = \\frac{x_i}{ (k + ( \\alpha \\sum_j x_j^2 ))^\\beta }
+
+ where the summation is performed over this position on :math:`n`
+ neighboring channels.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. Must
+ follow *BC01* layout, i.e., ``(batchsize, channels, rows, columns)``.
+ alpha : float scalar
+ coefficient, see equation above
+ k : float scalar
+ offset, see equation above
+ beta : float scalar
+ exponent, see equation above
+ n : int
+ number of adjacent channels to normalize over, must be odd
+
+ Notes
+ -----
+ This code is adapted from pylearn2. See the module docstring for license
+ information.
+ """
+
+ def __init__(self, incoming, alpha=1e-4, k=2, beta=0.75, n=5, **kwargs):
+ super(LocalResponseNormalization2DLayer, self).__init__(incoming,
+ **kwargs)
+ self.alpha = alpha
+ self.k = k
+ self.beta = beta
+ self.n = n
+ if n % 2 == 0:
+ raise NotImplementedError("Only works with odd n")
+
+ def get_output_shape_for(self, input_shape):
+ return input_shape
+
+ def get_output_for(self, input, **kwargs):
+ input_shape = self.input_shape
+ if any(s is None for s in input_shape):
+ input_shape = input.shape
+ half_n = self.n // 2
+ input_sqr = T.sqr(input)
+ b, ch, r, c = input_shape
+ extra_channels = T.alloc(0., b, ch + 2*half_n, r, c)
+ input_sqr = T.set_subtensor(extra_channels[:, half_n:half_n+ch, :, :],
+ input_sqr)
+ scale = self.k
+ for i in range(self.n):
+ scale += self.alpha * input_sqr[:, i:i+ch, :, :]
+ scale = scale ** self.beta
+ return input / scale
+
+
+class BatchNormLayer(Layer):
+ """
+ lasagne.layers.BatchNormLayer(incoming, axes='auto', epsilon=1e-4,
+ alpha=0.1, beta=lasagne.init.Constant(0), gamma=lasagne.init.Constant(1),
+ mean=lasagne.init.Constant(0), inv_std=lasagne.init.Constant(1), **kwargs)
+
+ Batch Normalization
+
+ This layer implements batch normalization of its inputs, following [1]_:
+
+ .. math::
+ y = \\frac{x - \\mu}{\\sqrt{\\sigma^2 + \\epsilon}} \\gamma + \\beta
+
+ That is, the input is normalized to zero mean and unit variance, and then
+ linearly transformed. The crucial part is that the mean and variance are
+ computed across the batch dimension, i.e., over examples, not per example.
+
+ During training, :math:`\\mu` and :math:`\\sigma^2` are defined to be the
+ mean and variance of the current input mini-batch :math:`x`, and during
+ testing, they are replaced with average statistics over the training
+ data. Consequently, this layer has four stored parameters: :math:`\\beta`,
+ :math:`\\gamma`, and the averages :math:`\\mu` and :math:`\\sigma^2`
+ (nota bene: instead of :math:`\\sigma^2`, the layer actually stores
+ :math:`1 / \\sqrt{\\sigma^2 + \\epsilon}`, for compatibility to cuDNN).
+ By default, this layer learns the average statistics as exponential moving
+ averages computed during training, so it can be plugged into an existing
+ network without any changes of the training procedure (see Notes).
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+ axes : 'auto', int or tuple of int
+ The axis or axes to normalize over. If ``'auto'`` (the default),
+ normalize over all axes except for the second: this will normalize over
+ the minibatch dimension for dense layers, and additionally over all
+ spatial dimensions for convolutional layers.
+ epsilon : scalar
+ Small constant :math:`\\epsilon` added to the variance before taking
+ the square root and dividing by it, to avoid numerical problems
+ alpha : scalar
+ Coefficient for the exponential moving average of batch-wise means and
+ standard deviations computed during training; the closer to one, the
+ more it will depend on the last batches seen
+ beta : Theano shared variable, expression, numpy array, callable or None
+ Initial value, expression or initializer for :math:`\\beta`. Must match
+ the incoming shape, skipping all axes in `axes`. Set to ``None`` to fix
+ it to 0.0 instead of learning it.
+ See :func:`lasagne.utils.create_param` for more information.
+ gamma : Theano shared variable, expression, numpy array, callable or None
+ Initial value, expression or initializer for :math:`\\gamma`. Must
+ match the incoming shape, skipping all axes in `axes`. Set to ``None``
+ to fix it to 1.0 instead of learning it.
+ See :func:`lasagne.utils.create_param` for more information.
+ mean : Theano shared variable, expression, numpy array, or callable
+ Initial value, expression or initializer for :math:`\\mu`. Must match
+ the incoming shape, skipping all axes in `axes`.
+ See :func:`lasagne.utils.create_param` for more information.
+ inv_std : Theano shared variable, expression, numpy array, or callable
+ Initial value, expression or initializer for :math:`1 / \\sqrt{
+ \\sigma^2 + \\epsilon}`. Must match the incoming shape, skipping all
+ axes in `axes`.
+ See :func:`lasagne.utils.create_param` for more information.
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ Notes
+ -----
+ This layer should be inserted between a linear transformation (such as a
+ :class:`DenseLayer`, or :class:`Conv2DLayer`) and its nonlinearity. The
+ convenience function :func:`batch_norm` modifies an existing layer to
+ insert batch normalization in front of its nonlinearity.
+
+ The behavior can be controlled by passing keyword arguments to
+ :func:`lasagne.layers.get_output()` when building the output expression
+ of any network containing this layer.
+
+ During training, [1]_ normalize each input mini-batch by its statistics
+ and update an exponential moving average of the statistics to be used for
+ validation. This can be achieved by passing ``deterministic=False``.
+ For validation, [1]_ normalize each input mini-batch by the stored
+ statistics. This can be achieved by passing ``deterministic=True``.
+
+ For more fine-grained control, ``batch_norm_update_averages`` can be passed
+ to update the exponential moving averages (``True``) or not (``False``),
+ and ``batch_norm_use_averages`` can be passed to use the exponential moving
+ averages for normalization (``True``) or normalize each mini-batch by its
+ own statistics (``False``). These settings override ``deterministic``.
+
+ Note that for testing a model after training, [1]_ replace the stored
+ exponential moving average statistics by fixing all network weights and
+ re-computing average statistics over the training data in a layerwise
+ fashion. This is not part of the layer implementation.
+
+ In case you set `axes` to not include the batch dimension (the first axis,
+ usually), normalization is done per example, not across examples. This does
+ not require any averages, so you can pass ``batch_norm_update_averages``
+ and ``batch_norm_use_averages`` as ``False`` in this case.
+
+ See also
+ --------
+ batch_norm : Convenience function to apply batch normalization to a layer
+
+ References
+ ----------
+ .. [1] Ioffe, Sergey and Szegedy, Christian (2015):
+ Batch Normalization: Accelerating Deep Network Training by Reducing
+ Internal Covariate Shift. http://arxiv.org/abs/1502.03167.
+ """
+ def __init__(self, incoming, axes='auto', epsilon=1e-4, alpha=0.1,
+ beta=init.Constant(0), gamma=init.Constant(1),
+ mean=init.Constant(0), inv_std=init.Constant(1), **kwargs):
+ super(BatchNormLayer, self).__init__(incoming, **kwargs)
+
+ if axes == 'auto':
+ # default: normalize over all but the second axis
+ axes = (0,) + tuple(range(2, len(self.input_shape)))
+ elif isinstance(axes, int):
+ axes = (axes,)
+ self.axes = axes
+
+ self.epsilon = epsilon
+ self.alpha = alpha
+
+ # create parameters, ignoring all dimensions in axes
+ shape = [size for axis, size in enumerate(self.input_shape)
+ if axis not in self.axes]
+ if any(size is None for size in shape):
+ raise ValueError("BatchNormLayer needs specified input sizes for "
+ "all axes not normalized over.")
+ if beta is None:
+ self.beta = None
+ else:
+ self.beta = self.add_param(beta, shape, 'beta',
+ trainable=True, regularizable=False)
+ if gamma is None:
+ self.gamma = None
+ else:
+ self.gamma = self.add_param(gamma, shape, 'gamma',
+ trainable=True, regularizable=True)
+ self.mean = self.add_param(mean, shape, 'mean',
+ trainable=False, regularizable=False)
+ self.inv_std = self.add_param(inv_std, shape, 'inv_std',
+ trainable=False, regularizable=False)
+
+ def get_output_for(self, input, deterministic=False,
+ batch_norm_use_averages=None,
+ batch_norm_update_averages=None, **kwargs):
+ input_mean = input.mean(self.axes)
+ input_inv_std = T.inv(T.sqrt(input.var(self.axes) + self.epsilon))
+
+ # Decide whether to use the stored averages or mini-batch statistics
+ if batch_norm_use_averages is None:
+ batch_norm_use_averages = deterministic
+ use_averages = batch_norm_use_averages
+
+ if use_averages:
+ mean = self.mean
+ inv_std = self.inv_std
+ else:
+ mean = input_mean
+ inv_std = input_inv_std
+
+ # Decide whether to update the stored averages
+ if batch_norm_update_averages is None:
+ batch_norm_update_averages = not deterministic
+ update_averages = batch_norm_update_averages
+
+ if update_averages:
+ # Trick: To update the stored statistics, we create memory-aliased
+ # clones of the stored statistics:
+ running_mean = theano.clone(self.mean, share_inputs=False)
+ running_inv_std = theano.clone(self.inv_std, share_inputs=False)
+ # set a default update for them:
+ running_mean.default_update = ((1 - self.alpha) * running_mean +
+ self.alpha * input_mean)
+ running_inv_std.default_update = ((1 - self.alpha) *
+ running_inv_std +
+ self.alpha * input_inv_std)
+ # and make sure they end up in the graph without participating in
+ # the computation (this way their default_update will be collected
+ # and applied, but the computation will be optimized away):
+ mean += 0 * running_mean
+ inv_std += 0 * running_inv_std
+
+ # prepare dimshuffle pattern inserting broadcastable axes as needed
+ param_axes = iter(range(input.ndim - len(self.axes)))
+ pattern = ['x' if input_axis in self.axes
+ else next(param_axes)
+ for input_axis in range(input.ndim)]
+
+ # apply dimshuffle pattern to all parameters
+ beta = 0 if self.beta is None else self.beta.dimshuffle(pattern)
+ gamma = 1 if self.gamma is None else self.gamma.dimshuffle(pattern)
+ mean = mean.dimshuffle(pattern)
+ inv_std = inv_std.dimshuffle(pattern)
+
+ # normalize
+ normalized = (input - mean) * (gamma * inv_std) + beta
+ return normalized
+
+
+def batch_norm(layer, **kwargs):
+ """
+ Apply batch normalization to an existing layer. This is a convenience
+ function modifying an existing layer to include batch normalization: It
+ will steal the layer's nonlinearity if there is one (effectively
+ introducing the normalization right before the nonlinearity), remove
+ the layer's bias if there is one (because it would be redundant), and add
+ a :class:`BatchNormLayer` and :class:`NonlinearityLayer` on top.
+
+ Parameters
+ ----------
+ layer : A :class:`Layer` instance
+ The layer to apply the normalization to; note that it will be
+ irreversibly modified as specified above
+ **kwargs
+ Any additional keyword arguments are passed on to the
+ :class:`BatchNormLayer` constructor.
+
+ Returns
+ -------
+ BatchNormLayer or NonlinearityLayer instance
+ A batch normalization layer stacked on the given modified `layer`, or
+ a nonlinearity layer stacked on top of both if `layer` was nonlinear.
+
+ Examples
+ --------
+ Just wrap any layer into a :func:`batch_norm` call on creating it:
+
+ >>> from lasagne.layers import InputLayer, DenseLayer, batch_norm
+ >>> from lasagne.nonlinearities import tanh
+ >>> l1 = InputLayer((64, 768))
+ >>> l2 = batch_norm(DenseLayer(l1, num_units=500, nonlinearity=tanh))
+
+ This introduces batch normalization right before its nonlinearity:
+
+ >>> from lasagne.layers import get_all_layers
+ >>> [l.__class__.__name__ for l in get_all_layers(l2)]
+ ['InputLayer', 'DenseLayer', 'BatchNormLayer', 'NonlinearityLayer']
+ """
+ nonlinearity = getattr(layer, 'nonlinearity', None)
+ if nonlinearity is not None:
+ layer.nonlinearity = nonlinearities.identity
+ if hasattr(layer, 'b') and layer.b is not None:
+ del layer.params[layer.b]
+ layer.b = None
+ bn_name = (kwargs.pop('name', None) or
+ (getattr(layer, 'name', None) and layer.name + '_bn'))
+ layer = BatchNormLayer(layer, name=bn_name, **kwargs)
+ if nonlinearity is not None:
+ from .special import NonlinearityLayer
+ nonlin_name = bn_name and bn_name + '_nonlin'
+ layer = NonlinearityLayer(layer, nonlinearity, name=nonlin_name)
+ return layer
diff --git a/lasagne/layers/pool.py b/lasagne/layers/pool.py
new file mode 100644
index 0000000..86379ca
--- /dev/null
+++ b/lasagne/layers/pool.py
@@ -0,0 +1,639 @@
+import theano.tensor as T
+
+from .base import Layer
+from ..utils import as_tuple
+
+from theano.tensor.signal.pool import pool_2d
+
+
+__all__ = [
+ "MaxPool1DLayer",
+ "MaxPool2DLayer",
+ "Pool1DLayer",
+ "Pool2DLayer",
+ "Upscale1DLayer",
+ "Upscale2DLayer",
+ "FeaturePoolLayer",
+ "FeatureWTALayer",
+ "GlobalPoolLayer",
+]
+
+
+def pool_output_length(input_length, pool_size, stride, pad, ignore_border):
+ """
+ Compute the output length of a pooling operator
+ along a single dimension.
+
+ Parameters
+ ----------
+ input_length : integer
+ The length of the input in the pooling dimension
+ pool_size : integer
+ The length of the pooling region
+ stride : integer
+ The stride between successive pooling regions
+ pad : integer
+ The number of elements to be added to the input on each side.
+ ignore_border: bool
+ If ``True``, partial pooling regions will be ignored.
+ Must be ``True`` if ``pad != 0``.
+
+ Returns
+ -------
+ output_length
+ * None if either input is None.
+ * Computed length of the pooling operator otherwise.
+
+ Notes
+ -----
+ When ``ignore_border == True``, this is given by the number of full
+ pooling regions that fit in the padded input length,
+ divided by the stride (rounding down).
+
+ If ``ignore_border == False``, a single partial pooling region is
+ appended if at least one input element would be left uncovered otherwise.
+ """
+ if input_length is None or pool_size is None:
+ return None
+
+ if ignore_border:
+ output_length = input_length + 2 * pad - pool_size + 1
+ output_length = (output_length + stride - 1) // stride
+
+ # output length calculation taken from:
+ # https://github.com/Theano/Theano/blob/master/theano/tensor/signal/downsample.py
+ else:
+ assert pad == 0
+
+ if stride >= pool_size:
+ output_length = (input_length + stride - 1) // stride
+ else:
+ output_length = max(
+ 0, (input_length - pool_size + stride - 1) // stride) + 1
+
+ return output_length
+
+
+class Pool1DLayer(Layer):
+ """
+ 1D pooling layer
+
+ Performs 1D mean or max-pooling over the trailing axis
+ of a 3D input tensor.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_size : integer or iterable
+ The length of the pooling region. If an iterable, it should have a
+ single element.
+
+ stride : integer, iterable or ``None``
+ The stride between sucessive pooling regions.
+ If ``None`` then ``stride == pool_size``.
+
+ pad : integer or iterable
+ The number of elements to be added to the input on each side.
+ Must be less than stride.
+
+ ignore_border : bool
+ If ``True``, partial pooling regions will be ignored.
+ Must be ``True`` if ``pad != 0``.
+
+ mode : {'max', 'average_inc_pad', 'average_exc_pad'}
+ Pooling mode: max-pooling or mean-pooling including/excluding zeros
+ from partially padded pooling regions. Default is 'max'.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ See Also
+ --------
+ MaxPool1DLayer : Shortcut for max pooling layer.
+
+ Notes
+ -----
+ The value used to pad the input is chosen to be less than
+ the minimum of the input, so that the output of each pooling region
+ always corresponds to some element in the unpadded input region.
+
+ Using ``ignore_border=False`` prevents Theano from using cuDNN for the
+ operation, so it will fall back to a slower implementation.
+ """
+ def __init__(self, incoming, pool_size, stride=None, pad=0,
+ ignore_border=True, mode='max', **kwargs):
+ super(Pool1DLayer, self).__init__(incoming, **kwargs)
+
+ if len(self.input_shape) != 3:
+ raise ValueError("Tried to create a 1D pooling layer with "
+ "input shape %r. Expected 3 input dimensions "
+ "(batchsize, channels, 1 spatial dimensions)."
+ % (self.input_shape,))
+
+ self.pool_size = as_tuple(pool_size, 1)
+ self.stride = self.pool_size if stride is None else as_tuple(stride, 1)
+ self.pad = as_tuple(pad, 1)
+ self.ignore_border = ignore_border
+ self.mode = mode
+
+ def get_output_shape_for(self, input_shape):
+ output_shape = list(input_shape) # copy / convert to mutable list
+
+ output_shape[-1] = pool_output_length(input_shape[-1],
+ pool_size=self.pool_size[0],
+ stride=self.stride[0],
+ pad=self.pad[0],
+ ignore_border=self.ignore_border,
+ )
+
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ input_4d = T.shape_padright(input, 1)
+
+ pooled = pool_2d(input_4d,
+ ds=(self.pool_size[0], 1),
+ st=(self.stride[0], 1),
+ ignore_border=self.ignore_border,
+ padding=(self.pad[0], 0),
+ mode=self.mode,
+ )
+ return pooled[:, :, :, 0]
+
+
+class Pool2DLayer(Layer):
+ """
+ 2D pooling layer
+
+ Performs 2D mean or max-pooling over the two trailing axes
+ of a 4D input tensor.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_size : integer or iterable
+ The length of the pooling region in each dimension. If an integer, it
+ is promoted to a square pooling region. If an iterable, it should have
+ two elements.
+
+ stride : integer, iterable or ``None``
+ The strides between sucessive pooling regions in each dimension.
+ If ``None`` then ``stride = pool_size``.
+
+ pad : integer or iterable
+ Number of elements to be added on each side of the input
+ in each dimension. Each value must be less than
+ the corresponding stride.
+
+ ignore_border : bool
+ If ``True``, partial pooling regions will be ignored.
+ Must be ``True`` if ``pad != (0, 0)``.
+
+ mode : {'max', 'average_inc_pad', 'average_exc_pad'}
+ Pooling mode: max-pooling or mean-pooling including/excluding zeros
+ from partially padded pooling regions. Default is 'max'.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ See Also
+ --------
+ MaxPool2DLayer : Shortcut for max pooling layer.
+
+ Notes
+ -----
+ The value used to pad the input is chosen to be less than
+ the minimum of the input, so that the output of each pooling region
+ always corresponds to some element in the unpadded input region.
+
+ Using ``ignore_border=False`` prevents Theano from using cuDNN for the
+ operation, so it will fall back to a slower implementation.
+ """
+
+ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0),
+ ignore_border=True, mode='max', **kwargs):
+ super(Pool2DLayer, self).__init__(incoming, **kwargs)
+
+ self.pool_size = as_tuple(pool_size, 2)
+
+ if len(self.input_shape) != 4:
+ raise ValueError("Tried to create a 2D pooling layer with "
+ "input shape %r. Expected 4 input dimensions "
+ "(batchsize, channels, 2 spatial dimensions)."
+ % (self.input_shape,))
+
+ if stride is None:
+ self.stride = self.pool_size
+ else:
+ self.stride = as_tuple(stride, 2)
+
+ self.pad = as_tuple(pad, 2)
+
+ self.ignore_border = ignore_border
+ self.mode = mode
+
+ def get_output_shape_for(self, input_shape):
+ output_shape = list(input_shape) # copy / convert to mutable list
+
+ output_shape[2] = pool_output_length(input_shape[2],
+ pool_size=self.pool_size[0],
+ stride=self.stride[0],
+ pad=self.pad[0],
+ ignore_border=self.ignore_border,
+ )
+
+ output_shape[3] = pool_output_length(input_shape[3],
+ pool_size=self.pool_size[1],
+ stride=self.stride[1],
+ pad=self.pad[1],
+ ignore_border=self.ignore_border,
+ )
+
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ pooled = pool_2d(input,
+ ds=self.pool_size,
+ st=self.stride,
+ ignore_border=self.ignore_border,
+ padding=self.pad,
+ mode=self.mode,
+ )
+ return pooled
+
+
+class MaxPool1DLayer(Pool1DLayer):
+ """
+ 1D max-pooling layer
+
+ Performs 1D max-pooling over the trailing axis of a 3D input tensor.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_size : integer or iterable
+ The length of the pooling region. If an iterable, it should have a
+ single element.
+
+ stride : integer, iterable or ``None``
+ The stride between sucessive pooling regions.
+ If ``None`` then ``stride == pool_size``.
+
+ pad : integer or iterable
+ The number of elements to be added to the input on each side.
+ Must be less than stride.
+
+ ignore_border : bool
+ If ``True``, partial pooling regions will be ignored.
+ Must be ``True`` if ``pad != 0``.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ Notes
+ -----
+ The value used to pad the input is chosen to be less than
+ the minimum of the input, so that the output of each pooling region
+ always corresponds to some element in the unpadded input region.
+
+ Using ``ignore_border=False`` prevents Theano from using cuDNN for the
+ operation, so it will fall back to a slower implementation.
+ """
+
+ def __init__(self, incoming, pool_size, stride=None, pad=0,
+ ignore_border=True, **kwargs):
+ super(MaxPool1DLayer, self).__init__(incoming,
+ pool_size,
+ stride,
+ pad,
+ ignore_border,
+ mode='max',
+ **kwargs)
+
+
+class MaxPool2DLayer(Pool2DLayer):
+ """
+ 2D max-pooling layer
+
+ Performs 2D max-pooling over the two trailing axes of a 4D input tensor.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_size : integer or iterable
+ The length of the pooling region in each dimension. If an integer, it
+ is promoted to a square pooling region. If an iterable, it should have
+ two elements.
+
+ stride : integer, iterable or ``None``
+ The strides between sucessive pooling regions in each dimension.
+ If ``None`` then ``stride = pool_size``.
+
+ pad : integer or iterable
+ Number of elements to be added on each side of the input
+ in each dimension. Each value must be less than
+ the corresponding stride.
+
+ ignore_border : bool
+ If ``True``, partial pooling regions will be ignored.
+ Must be ``True`` if ``pad != (0, 0)``.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ Notes
+ -----
+ The value used to pad the input is chosen to be less than
+ the minimum of the input, so that the output of each pooling region
+ always corresponds to some element in the unpadded input region.
+
+ Using ``ignore_border=False`` prevents Theano from using cuDNN for the
+ operation, so it will fall back to a slower implementation.
+ """
+
+ def __init__(self, incoming, pool_size, stride=None, pad=(0, 0),
+ ignore_border=True, **kwargs):
+ super(MaxPool2DLayer, self).__init__(incoming,
+ pool_size,
+ stride,
+ pad,
+ ignore_border,
+ mode='max',
+ **kwargs)
+
+# TODO: add reshape-based implementation to MaxPool*DLayer
+# TODO: add MaxPool3DLayer
+
+
+class Upscale1DLayer(Layer):
+ """
+ 1D upscaling layer
+
+ Performs 1D upscaling over the trailing axis of a 3D input tensor.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ scale_factor : integer or iterable
+ The scale factor. If an iterable, it should have one element.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+ """
+
+ def __init__(self, incoming, scale_factor, **kwargs):
+ super(Upscale1DLayer, self).__init__(incoming, **kwargs)
+
+ self.scale_factor = as_tuple(scale_factor, 1)
+
+ if self.scale_factor[0] < 1:
+ raise ValueError('Scale factor must be >= 1, not {0}'.format(
+ self.scale_factor))
+
+ def get_output_shape_for(self, input_shape):
+ output_shape = list(input_shape) # copy / convert to mutable list
+ if output_shape[2] is not None:
+ output_shape[2] *= self.scale_factor[0]
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ a, = self.scale_factor
+ upscaled = input
+ if a > 1:
+ upscaled = T.extra_ops.repeat(upscaled, a, 2)
+ return upscaled
+
+
+class Upscale2DLayer(Layer):
+ """
+ 2D upscaling layer
+
+ Performs 2D upscaling over the two trailing axes of a 4D input tensor.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ scale_factor : integer or iterable
+ The scale factor in each dimension. If an integer, it is promoted to
+ a square scale factor region. If an iterable, it should have two
+ elements.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+ """
+
+ def __init__(self, incoming, scale_factor, **kwargs):
+ super(Upscale2DLayer, self).__init__(incoming, **kwargs)
+
+ self.scale_factor = as_tuple(scale_factor, 2)
+
+ if self.scale_factor[0] < 1 or self.scale_factor[1] < 1:
+ raise ValueError('Scale factor must be >= 1, not {0}'.format(
+ self.scale_factor))
+
+ def get_output_shape_for(self, input_shape):
+ output_shape = list(input_shape) # copy / convert to mutable list
+ if output_shape[2] is not None:
+ output_shape[2] *= self.scale_factor[0]
+ if output_shape[3] is not None:
+ output_shape[3] *= self.scale_factor[1]
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ a, b = self.scale_factor
+ upscaled = input
+ if b > 1:
+ upscaled = T.extra_ops.repeat(upscaled, b, 3)
+ if a > 1:
+ upscaled = T.extra_ops.repeat(upscaled, a, 2)
+ return upscaled
+
+
+class FeaturePoolLayer(Layer):
+ """
+ lasagne.layers.FeaturePoolLayer(incoming, pool_size, axis=1,
+ pool_function=theano.tensor.max, **kwargs)
+
+ Feature pooling layer
+
+ This layer pools across a given axis of the input. By default this is axis
+ 1, which corresponds to the feature axis for :class:`DenseLayer`,
+ :class:`Conv1DLayer` and :class:`Conv2DLayer`. The layer can be used to
+ implement maxout.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_size : integer
+ the size of the pooling regions, i.e. the number of features / feature
+ maps to be pooled together.
+
+ axis : integer
+ the axis along which to pool. The default value of ``1`` works
+ for :class:`DenseLayer`, :class:`Conv1DLayer` and :class:`Conv2DLayer`.
+
+ pool_function : callable
+ the pooling function to use. This defaults to `theano.tensor.max`
+ (i.e. max-pooling) and can be replaced by any other aggregation
+ function.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ Notes
+ -----
+ This layer requires that the size of the axis along which it pools is a
+ multiple of the pool size.
+ """
+
+ def __init__(self, incoming, pool_size, axis=1, pool_function=T.max,
+ **kwargs):
+ super(FeaturePoolLayer, self).__init__(incoming, **kwargs)
+ self.pool_size = pool_size
+ self.axis = axis
+ self.pool_function = pool_function
+
+ num_feature_maps = self.input_shape[self.axis]
+ if num_feature_maps % self.pool_size != 0:
+ raise ValueError("Number of input feature maps (%d) is not a "
+ "multiple of the pool size (pool_size=%d)" %
+ (num_feature_maps, self.pool_size))
+
+ def get_output_shape_for(self, input_shape):
+ output_shape = list(input_shape) # make a mutable copy
+ output_shape[self.axis] = input_shape[self.axis] // self.pool_size
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ input_shape = tuple(input.shape)
+ num_feature_maps = input_shape[self.axis]
+ num_feature_maps_out = num_feature_maps // self.pool_size
+
+ pool_shape = (input_shape[:self.axis] +
+ (num_feature_maps_out, self.pool_size) +
+ input_shape[self.axis+1:])
+
+ input_reshaped = input.reshape(pool_shape)
+ return self.pool_function(input_reshaped, axis=self.axis + 1)
+
+
+class FeatureWTALayer(Layer):
+ """
+ 'Winner Take All' layer
+
+ This layer performs 'Winner Take All' (WTA) across feature maps: zero out
+ all but the maximal activation value within a region.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_size : integer
+ the number of feature maps per region.
+
+ axis : integer
+ the axis along which the regions are formed.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+
+ Notes
+ -----
+ This layer requires that the size of the axis along which it groups units
+ is a multiple of the pool size.
+ """
+
+ def __init__(self, incoming, pool_size, axis=1, **kwargs):
+ super(FeatureWTALayer, self).__init__(incoming, **kwargs)
+ self.pool_size = pool_size
+ self.axis = axis
+
+ num_feature_maps = self.input_shape[self.axis]
+ if num_feature_maps % self.pool_size != 0:
+ raise ValueError("Number of input feature maps (%d) is not a "
+ "multiple of the region size (pool_size=%d)" %
+ (num_feature_maps, self.pool_size))
+
+ def get_output_for(self, input, **kwargs):
+ num_feature_maps = input.shape[self.axis]
+ num_pools = num_feature_maps // self.pool_size
+
+ pool_shape = ()
+ arange_shuffle_pattern = ()
+ for k in range(self.axis):
+ pool_shape += (input.shape[k],)
+ arange_shuffle_pattern += ('x',)
+
+ pool_shape += (num_pools, self.pool_size)
+ arange_shuffle_pattern += ('x', 0)
+
+ for k in range(self.axis + 1, input.ndim):
+ pool_shape += (input.shape[k],)
+ arange_shuffle_pattern += ('x',)
+
+ input_reshaped = input.reshape(pool_shape)
+ max_indices = T.argmax(input_reshaped, axis=self.axis + 1,
+ keepdims=True)
+
+ arange = T.arange(self.pool_size).dimshuffle(*arange_shuffle_pattern)
+ mask = T.eq(max_indices, arange).reshape(input.shape)
+
+ return input * mask
+
+
+class GlobalPoolLayer(Layer):
+ """
+ lasagne.layers.GlobalPoolLayer(incoming,
+ pool_function=theano.tensor.mean, **kwargs)
+
+ Global pooling layer
+
+ This layer pools globally across all trailing dimensions beyond the 2nd.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ pool_function : callable
+ the pooling function to use. This defaults to `theano.tensor.mean`
+ (i.e. mean-pooling) and can be replaced by any other aggregation
+ function.
+
+ **kwargs
+ Any additional keyword arguments are passed to the :class:`Layer`
+ superclass.
+ """
+
+ def __init__(self, incoming, pool_function=T.mean, **kwargs):
+ super(GlobalPoolLayer, self).__init__(incoming, **kwargs)
+ self.pool_function = pool_function
+
+ def get_output_shape_for(self, input_shape):
+ return input_shape[:2]
+
+ def get_output_for(self, input, **kwargs):
+ return self.pool_function(input.flatten(3), axis=2)
diff --git a/lasagne/layers/recurrent.py b/lasagne/layers/recurrent.py
new file mode 100644
index 0000000..0d6aed5
--- /dev/null
+++ b/lasagne/layers/recurrent.py
@@ -0,0 +1,1480 @@
+# -*- coding: utf-8 -*-
+"""
+Layers to construct recurrent networks. Recurrent layers can be used similarly
+to feed-forward layers except that the input shape is expected to be
+``(batch_size, sequence_length, num_inputs)``. The CustomRecurrentLayer can
+also support more than one "feature" dimension (e.g. using convolutional
+connections), but for all other layers, dimensions trailing the third
+dimension are flattened.
+
+The following recurrent layers are implemented:
+
+.. currentmodule:: lasagne.layers
+
+.. autosummary::
+ :nosignatures:
+
+ CustomRecurrentLayer
+ RecurrentLayer
+ LSTMLayer
+ GRULayer
+
+For recurrent layers with gates we use a helper class to set up the parameters
+in each gate:
+
+.. autosummary::
+ :nosignatures:
+
+ Gate
+
+Please refer to that class if you need to modify initial conditions of gates.
+
+Recurrent layers and feed-forward layers can be combined in the same network
+by using a few reshape operations; please refer to the example below.
+
+Examples
+--------
+The following example demonstrates how recurrent layers can be easily mixed
+with feed-forward layers using :class:`ReshapeLayer` and how to build a
+network with variable batch size and number of time steps.
+
+>>> from lasagne.layers import *
+>>> num_inputs, num_units, num_classes = 10, 12, 5
+>>> # By setting the first two dimensions as None, we are allowing them to vary
+>>> # They correspond to batch size and sequence length, so we will be able to
+>>> # feed in batches of varying size with sequences of varying length.
+>>> l_inp = InputLayer((None, None, num_inputs))
+>>> # We can retrieve symbolic references to the input variable's shape, which
+>>> # we will later use in reshape layers.
+>>> batchsize, seqlen, _ = l_inp.input_var.shape
+>>> l_lstm = LSTMLayer(l_inp, num_units=num_units)
+>>> # In order to connect a recurrent layer to a dense layer, we need to
+>>> # flatten the first two dimensions (our "sample dimensions"); this will
+>>> # cause each time step of each sequence to be processed independently
+>>> l_shp = ReshapeLayer(l_lstm, (-1, num_units))
+>>> l_dense = DenseLayer(l_shp, num_units=num_classes)
+>>> # To reshape back to our original shape, we can use the symbolic shape
+>>> # variables we retrieved above.
+>>> l_out = ReshapeLayer(l_dense, (batchsize, seqlen, num_classes))
+"""
+import numpy as np
+import theano
+import theano.tensor as T
+from .. import nonlinearities
+from .. import init
+from ..utils import unroll_scan
+
+from .base import MergeLayer, Layer
+from .input import InputLayer
+from .dense import DenseLayer
+from . import helper
+
+__all__ = [
+ "CustomRecurrentLayer",
+ "RecurrentLayer",
+ "Gate",
+ "LSTMLayer",
+ "GRULayer"
+]
+
+
+class CustomRecurrentLayer(MergeLayer):
+ """
+ lasagne.layers.recurrent.CustomRecurrentLayer(incoming, input_to_hidden,
+ hidden_to_hidden, nonlinearity=lasagne.nonlinearities.rectify,
+ hid_init=lasagne.init.Constant(0.), backwards=False,
+ learn_init=False, gradient_steps=-1, grad_clipping=0,
+ unroll_scan=False, precompute_input=True, mask_input=None,
+ only_return_final=False, **kwargs)
+
+ A layer which implements a recurrent connection.
+
+ This layer allows you to specify custom input-to-hidden and
+ hidden-to-hidden connections by instantiating :class:`lasagne.layers.Layer`
+ instances and passing them on initialization. Note that these connections
+ can consist of multiple layers chained together. The output shape for the
+ provided input-to-hidden and hidden-to-hidden connections must be the same.
+ If you are looking for a standard, densely-connected recurrent layer,
+ please see :class:`RecurrentLayer`. The output is computed by
+
+ .. math ::
+ h_t = \sigma(f_i(x_t) + f_h(h_{t-1}))
+
+ Parameters
+ ----------
+ incoming : a :class:`lasagne.layers.Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape.
+ input_to_hidden : :class:`lasagne.layers.Layer`
+ :class:`lasagne.layers.Layer` instance which connects input to the
+ hidden state (:math:`f_i`). This layer may be connected to a chain of
+ layers, which must end in a :class:`lasagne.layers.InputLayer` with the
+ same input shape as `incoming`, except for the first dimension: When
+ ``precompute_input == True`` (the default), it must be
+ ``incoming.output_shape[0]*incoming.output_shape[1]`` or ``None``; when
+ ``precompute_input == False``, it must be ``incoming.output_shape[0]``
+ or ``None``.
+ hidden_to_hidden : :class:`lasagne.layers.Layer`
+ Layer which connects the previous hidden state to the new state
+ (:math:`f_h`). This layer may be connected to a chain of layers, which
+ must end in a :class:`lasagne.layers.InputLayer` with the same input
+ shape as `hidden_to_hidden`'s output shape.
+ nonlinearity : callable or None
+ Nonlinearity to apply when computing new state (:math:`\sigma`). If
+ None is provided, no nonlinearity will be applied.
+ hid_init : callable, np.ndarray, theano.shared or :class:`Layer`
+ Initializer for initial hidden state (:math:`h_0`).
+ backwards : bool
+ If True, process the sequence backwards and then reverse the
+ output again such that the output from the layer is always
+ from :math:`x_1` to :math:`x_n`.
+ learn_init : bool
+ If True, initial hidden values are learned.
+ gradient_steps : int
+ Number of timesteps to include in the backpropagated gradient.
+ If -1, backpropagate through the entire sequence.
+ grad_clipping : float
+ If nonzero, the gradient messages are clipped to the given value during
+ the backward pass. See [1]_ (p. 6) for further explanation.
+ unroll_scan : bool
+ If True the recursion is unrolled instead of using scan. For some
+ graphs this gives a significant speed up but it might also consume
+ more memory. When `unroll_scan` is True, backpropagation always
+ includes the full sequence, so `gradient_steps` must be set to -1 and
+ the input sequence length must be known at compile time (i.e., cannot
+ be given as None).
+ precompute_input : bool
+ If True, precompute input_to_hid before iterating through
+ the sequence. This can result in a speedup at the expense of
+ an increase in memory usage.
+ mask_input : :class:`lasagne.layers.Layer`
+ Layer which allows for a sequence mask to be input, for when sequences
+ are of variable length. Default `None`, which means no mask will be
+ supplied (i.e. all sequences are of the same length).
+ only_return_final : bool
+ If True, only return the final sequential output (e.g. for tasks where
+ a single target value for the entire sequence is desired). In this
+ case, Theano makes an optimization which saves memory.
+
+ Examples
+ --------
+
+ The following example constructs a simple `CustomRecurrentLayer` which
+ has dense input-to-hidden and hidden-to-hidden connections.
+
+ >>> import lasagne
+ >>> n_batch, n_steps, n_in = (2, 3, 4)
+ >>> n_hid = 5
+ >>> l_in = lasagne.layers.InputLayer((n_batch, n_steps, n_in))
+ >>> l_in_hid = lasagne.layers.DenseLayer(
+ ... lasagne.layers.InputLayer((None, n_in)), n_hid)
+ >>> l_hid_hid = lasagne.layers.DenseLayer(
+ ... lasagne.layers.InputLayer((None, n_hid)), n_hid)
+ >>> l_rec = lasagne.layers.CustomRecurrentLayer(l_in, l_in_hid, l_hid_hid)
+
+ The CustomRecurrentLayer can also support "convolutional recurrence", as is
+ demonstrated below.
+
+ >>> n_batch, n_steps, n_channels, width, height = (2, 3, 4, 5, 6)
+ >>> n_out_filters = 7
+ >>> filter_shape = (3, 3)
+ >>> l_in = lasagne.layers.InputLayer(
+ ... (n_batch, n_steps, n_channels, width, height))
+ >>> l_in_to_hid = lasagne.layers.Conv2DLayer(
+ ... lasagne.layers.InputLayer((None, n_channels, width, height)),
+ ... n_out_filters, filter_shape, pad='same')
+ >>> l_hid_to_hid = lasagne.layers.Conv2DLayer(
+ ... lasagne.layers.InputLayer(l_in_to_hid.output_shape),
+ ... n_out_filters, filter_shape, pad='same')
+ >>> l_rec = lasagne.layers.CustomRecurrentLayer(
+ ... l_in, l_in_to_hid, l_hid_to_hid)
+
+ References
+ ----------
+ .. [1] Graves, Alex: "Generating sequences with recurrent neural networks."
+ arXiv preprint arXiv:1308.0850 (2013).
+ """
+ def __init__(self, incoming, input_to_hidden, hidden_to_hidden,
+ nonlinearity=nonlinearities.rectify,
+ hid_init=init.Constant(0.),
+ backwards=False,
+ learn_init=False,
+ gradient_steps=-1,
+ grad_clipping=0,
+ unroll_scan=False,
+ precompute_input=True,
+ mask_input=None,
+ only_return_final=False,
+ **kwargs):
+
+ # This layer inherits from a MergeLayer, because it can have three
+ # inputs - the layer input, the mask and the initial hidden state. We
+ # will just provide the layer input as incomings, unless a mask input
+ # or initial hidden state was provided.
+ incomings = [incoming]
+ self.mask_incoming_index = -1
+ self.hid_init_incoming_index = -1
+ if mask_input is not None:
+ incomings.append(mask_input)
+ self.mask_incoming_index = len(incomings)-1
+ if isinstance(hid_init, Layer):
+ incomings.append(hid_init)
+ self.hid_init_incoming_index = len(incomings)-1
+
+ super(CustomRecurrentLayer, self).__init__(incomings, **kwargs)
+
+ self.input_to_hidden = input_to_hidden
+ self.hidden_to_hidden = hidden_to_hidden
+ self.learn_init = learn_init
+ self.backwards = backwards
+ self.gradient_steps = gradient_steps
+ self.grad_clipping = grad_clipping
+ self.unroll_scan = unroll_scan
+ self.precompute_input = precompute_input
+ self.only_return_final = only_return_final
+
+ if unroll_scan and gradient_steps != -1:
+ raise ValueError(
+ "Gradient steps must be -1 when unroll_scan is true.")
+
+ # Retrieve the dimensionality of the incoming layer
+ input_shape = self.input_shapes[0]
+
+ if unroll_scan and input_shape[1] is None:
+ raise ValueError("Input sequence length cannot be specified as "
+ "None when unroll_scan is True")
+
+ # Check that the input_to_hidden connection can appropriately handle
+ # a first dimension of input_shape[0]*input_shape[1] when we will
+ # precompute the input dot product
+ if (self.precompute_input and
+ input_to_hidden.output_shape[0] is not None and
+ input_shape[0] is not None and
+ input_shape[1] is not None and
+ (input_to_hidden.output_shape[0] !=
+ input_shape[0]*input_shape[1])):
+ raise ValueError(
+ 'When precompute_input == True, '
+ 'input_to_hidden.output_shape[0] must equal '
+ 'incoming.output_shape[0]*incoming.output_shape[1] '
+ '(i.e. batch_size*sequence_length) or be None but '
+ 'input_to_hidden.output_shape[0] = {} and '
+ 'incoming.output_shape[0]*incoming.output_shape[1] = '
+ '{}'.format(input_to_hidden.output_shape[0],
+ input_shape[0]*input_shape[1]))
+
+ # Check that the first dimension of input_to_hidden and
+ # hidden_to_hidden's outputs match when we won't precompute the input
+ # dot product
+ if (not self.precompute_input and
+ input_to_hidden.output_shape[0] is not None and
+ hidden_to_hidden.output_shape[0] is not None and
+ (input_to_hidden.output_shape[0] !=
+ hidden_to_hidden.output_shape[0])):
+ raise ValueError(
+ 'When precompute_input == False, '
+ 'input_to_hidden.output_shape[0] must equal '
+ 'hidden_to_hidden.output_shape[0] but '
+ 'input_to_hidden.output_shape[0] = {} and '
+ 'hidden_to_hidden.output_shape[0] = {}'.format(
+ input_to_hidden.output_shape[0],
+ hidden_to_hidden.output_shape[0]))
+
+ # Check that input_to_hidden and hidden_to_hidden output shapes match,
+ # but don't check a dimension if it's None for either shape
+ if not all(s1 is None or s2 is None or s1 == s2
+ for s1, s2 in zip(input_to_hidden.output_shape[1:],
+ hidden_to_hidden.output_shape[1:])):
+ raise ValueError("The output shape for input_to_hidden and "
+ "hidden_to_hidden must be equal after the first "
+ "dimension, but input_to_hidden.output_shape={} "
+ "and hidden_to_hidden.output_shape={}".format(
+ input_to_hidden.output_shape,
+ hidden_to_hidden.output_shape))
+
+ # Check that input_to_hidden's output shape is the same as
+ # hidden_to_hidden's input shape but don't check a dimension if it's
+ # None for either shape
+ if not all(s1 is None or s2 is None or s1 == s2
+ for s1, s2 in zip(input_to_hidden.output_shape[1:],
+ hidden_to_hidden.input_shape[1:])):
+ raise ValueError("The output shape for input_to_hidden "
+ "must be equal to the input shape of "
+ "hidden_to_hidden after the first dimension, but "
+ "input_to_hidden.output_shape={} and "
+ "hidden_to_hidden.input_shape={}".format(
+ input_to_hidden.output_shape,
+ hidden_to_hidden.input_shape))
+
+ if nonlinearity is None:
+ self.nonlinearity = nonlinearities.identity
+ else:
+ self.nonlinearity = nonlinearity
+
+ # Initialize hidden state
+ if isinstance(hid_init, Layer):
+ self.hid_init = hid_init
+ else:
+ self.hid_init = self.add_param(
+ hid_init, (1,) + hidden_to_hidden.output_shape[1:],
+ name="hid_init", trainable=learn_init, regularizable=False)
+
+ def get_params(self, **tags):
+ # Get all parameters from this layer, the master layer
+ params = super(CustomRecurrentLayer, self).get_params(**tags)
+ # Combine with all parameters from the child layers
+ params += helper.get_all_params(self.input_to_hidden, **tags)
+ params += helper.get_all_params(self.hidden_to_hidden, **tags)
+ return params
+
+ def get_output_shape_for(self, input_shapes):
+ # The shape of the input to this layer will be the first element
+ # of input_shapes, whether or not a mask input is being used.
+ input_shape = input_shapes[0]
+ # When only_return_final is true, the second (sequence step) dimension
+ # will be flattened
+ if self.only_return_final:
+ return (input_shape[0],) + self.hidden_to_hidden.output_shape[1:]
+ # Otherwise, the shape will be (n_batch, n_steps, trailing_dims...)
+ else:
+ return ((input_shape[0], input_shape[1]) +
+ self.hidden_to_hidden.output_shape[1:])
+
+ def get_output_for(self, inputs, **kwargs):
+ """
+ Compute this layer's output function given a symbolic input variable.
+
+ Parameters
+ ----------
+ inputs : list of theano.TensorType
+ `inputs[0]` should always be the symbolic input variable. When
+ this layer has a mask input (i.e. was instantiated with
+ `mask_input != None`, indicating that the lengths of sequences in
+ each batch vary), `inputs` should have length 2, where `inputs[1]`
+ is the `mask`. The `mask` should be supplied as a Theano variable
+ denoting whether each time step in each sequence in the batch is
+ part of the sequence or not. `mask` should be a matrix of shape
+ ``(n_batch, n_time_steps)`` where ``mask[i, j] = 1`` when ``j <=
+ (length of sequence i)`` and ``mask[i, j] = 0`` when ``j > (length
+ of sequence i)``. When the hidden state of this layer is to be
+ pre-filled (i.e. was set to a :class:`Layer` instance) `inputs`
+ should have length at least 2, and `inputs[-1]` is the hidden state
+ to prefill with.
+
+ Returns
+ -------
+ layer_output : theano.TensorType
+ Symbolic output variable.
+ """
+ # Retrieve the layer input
+ input = inputs[0]
+ # Retrieve the mask when it is supplied
+ mask = None
+ hid_init = None
+ if self.mask_incoming_index > 0:
+ mask = inputs[self.mask_incoming_index]
+ if self.hid_init_incoming_index > 0:
+ hid_init = inputs[self.hid_init_incoming_index]
+
+ # Input should be provided as (n_batch, n_time_steps, n_features)
+ # but scan requires the iterable dimension to be first
+ # So, we need to dimshuffle to (n_time_steps, n_batch, n_features)
+ input = input.dimshuffle(1, 0, *range(2, input.ndim))
+ seq_len, num_batch = input.shape[0], input.shape[1]
+
+ if self.precompute_input:
+ # Because the input is given for all time steps, we can precompute
+ # the inputs to hidden before scanning. First we need to reshape
+ # from (seq_len, batch_size, trailing dimensions...) to
+ # (seq_len*batch_size, trailing dimensions...)
+ # This strange use of a generator in a tuple was because
+ # input.shape[2:] was raising a Theano error
+ trailing_dims = tuple(input.shape[n] for n in range(2, input.ndim))
+ input = T.reshape(input, (seq_len*num_batch,) + trailing_dims)
+ input = helper.get_output(
+ self.input_to_hidden, input, **kwargs)
+
+ # Reshape back to (seq_len, batch_size, trailing dimensions...)
+ trailing_dims = tuple(input.shape[n] for n in range(1, input.ndim))
+ input = T.reshape(input, (seq_len, num_batch) + trailing_dims)
+
+ # We will always pass the hidden-to-hidden layer params to step
+ non_seqs = helper.get_all_params(self.hidden_to_hidden)
+ # When we are not precomputing the input, we also need to pass the
+ # input-to-hidden parameters to step
+ if not self.precompute_input:
+ non_seqs += helper.get_all_params(self.input_to_hidden)
+
+ # Create single recurrent computation step function
+ def step(input_n, hid_previous, *args):
+ # Compute the hidden-to-hidden activation
+ hid_pre = helper.get_output(
+ self.hidden_to_hidden, hid_previous, **kwargs)
+
+ # If the dot product is precomputed then add it, otherwise
+ # calculate the input_to_hidden values and add them
+ if self.precompute_input:
+ hid_pre += input_n
+ else:
+ hid_pre += helper.get_output(
+ self.input_to_hidden, input_n, **kwargs)
+
+ # Clip gradients
+ if self.grad_clipping:
+ hid_pre = theano.gradient.grad_clip(
+ hid_pre, -self.grad_clipping, self.grad_clipping)
+
+ return self.nonlinearity(hid_pre)
+
+ def step_masked(input_n, mask_n, hid_previous, *args):
+ # Skip over any input with mask 0 by copying the previous
+ # hidden state; proceed normally for any input with mask 1.
+ hid = step(input_n, hid_previous, *args)
+ hid_out = T.switch(mask_n, hid, hid_previous)
+ return [hid_out]
+
+ if mask is not None:
+ mask = mask.dimshuffle(1, 0, 'x')
+ sequences = [input, mask]
+ step_fun = step_masked
+ else:
+ sequences = input
+ step_fun = step
+
+ if not isinstance(self.hid_init, Layer):
+ # The code below simply repeats self.hid_init num_batch times in
+ # its first dimension. Turns out using a dot product and a
+ # dimshuffle is faster than T.repeat.
+ dot_dims = (list(range(1, self.hid_init.ndim - 1)) +
+ [0, self.hid_init.ndim - 1])
+ hid_init = T.dot(T.ones((num_batch, 1)),
+ self.hid_init.dimshuffle(dot_dims))
+
+ if self.unroll_scan:
+ # Retrieve the dimensionality of the incoming layer
+ input_shape = self.input_shapes[0]
+ # Explicitly unroll the recurrence instead of using scan
+ hid_out = unroll_scan(
+ fn=step_fun,
+ sequences=sequences,
+ outputs_info=[hid_init],
+ go_backwards=self.backwards,
+ non_sequences=non_seqs,
+ n_steps=input_shape[1])[0]
+ else:
+ # Scan op iterates over first dimension of input and repeatedly
+ # applies the step function
+ hid_out = theano.scan(
+ fn=step_fun,
+ sequences=sequences,
+ go_backwards=self.backwards,
+ outputs_info=[hid_init],
+ non_sequences=non_seqs,
+ truncate_gradient=self.gradient_steps,
+ strict=True)[0]
+
+ # When it is requested that we only return the final sequence step,
+ # we need to slice it out immediately after scan is applied
+ if self.only_return_final:
+ hid_out = hid_out[-1]
+ else:
+ # dimshuffle back to (n_batch, n_time_steps, n_features))
+ hid_out = hid_out.dimshuffle(1, 0, *range(2, hid_out.ndim))
+
+ # if scan is backward reverse the output
+ if self.backwards:
+ hid_out = hid_out[:, ::-1]
+
+ return hid_out
+
+
+class RecurrentLayer(CustomRecurrentLayer):
+ """
+ lasagne.layers.recurrent.RecurrentLayer(incoming, num_units,
+ W_in_to_hid=lasagne.init.Uniform(), W_hid_to_hid=lasagne.init.Uniform(),
+ b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.rectify,
+ hid_init=lasagne.init.Constant(0.), backwards=False, learn_init=False,
+ gradient_steps=-1, grad_clipping=0, unroll_scan=False,
+ precompute_input=True, mask_input=None, only_return_final=False, **kwargs)
+
+ Dense recurrent neural network (RNN) layer
+
+ A "vanilla" RNN layer, which has dense input-to-hidden and
+ hidden-to-hidden connections. The output is computed as
+
+ .. math ::
+ h_t = \sigma(x_t W_x + h_{t-1} W_h + b)
+
+ Parameters
+ ----------
+ incoming : a :class:`lasagne.layers.Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape.
+ num_units : int
+ Number of hidden units in the layer.
+ W_in_to_hid : Theano shared variable, numpy array or callable
+ Initializer for input-to-hidden weight matrix (:math:`W_x`).
+ W_hid_to_hid : Theano shared variable, numpy array or callable
+ Initializer for hidden-to-hidden weight matrix (:math:`W_h`).
+ b : Theano shared variable, numpy array, callable or None
+ Initializer for bias vector (:math:`b`). If None is provided there will
+ be no bias.
+ nonlinearity : callable or None
+ Nonlinearity to apply when computing new state (:math:`\sigma`). If
+ None is provided, no nonlinearity will be applied.
+ hid_init : callable, np.ndarray, theano.shared or :class:`Layer`
+ Initializer for initial hidden state (:math:`h_0`).
+ backwards : bool
+ If True, process the sequence backwards and then reverse the
+ output again such that the output from the layer is always
+ from :math:`x_1` to :math:`x_n`.
+ learn_init : bool
+ If True, initial hidden values are learned.
+ gradient_steps : int
+ Number of timesteps to include in the backpropagated gradient.
+ If -1, backpropagate through the entire sequence.
+ grad_clipping : float
+ If nonzero, the gradient messages are clipped to the given value during
+ the backward pass. See [1]_ (p. 6) for further explanation.
+ unroll_scan : bool
+ If True the recursion is unrolled instead of using scan. For some
+ graphs this gives a significant speed up but it might also consume
+ more memory. When `unroll_scan` is True, backpropagation always
+ includes the full sequence, so `gradient_steps` must be set to -1 and
+ the input sequence length must be known at compile time (i.e., cannot
+ be given as None).
+ precompute_input : bool
+ If True, precompute input_to_hid before iterating through
+ the sequence. This can result in a speedup at the expense of
+ an increase in memory usage.
+ mask_input : :class:`lasagne.layers.Layer`
+ Layer which allows for a sequence mask to be input, for when sequences
+ are of variable length. Default `None`, which means no mask will be
+ supplied (i.e. all sequences are of the same length).
+ only_return_final : bool
+ If True, only return the final sequential output (e.g. for tasks where
+ a single target value for the entire sequence is desired). In this
+ case, Theano makes an optimization which saves memory.
+
+ References
+ ----------
+ .. [1] Graves, Alex: "Generating sequences with recurrent neural networks."
+ arXiv preprint arXiv:1308.0850 (2013).
+ """
+ def __init__(self, incoming, num_units,
+ W_in_to_hid=init.Uniform(),
+ W_hid_to_hid=init.Uniform(),
+ b=init.Constant(0.),
+ nonlinearity=nonlinearities.rectify,
+ hid_init=init.Constant(0.),
+ backwards=False,
+ learn_init=False,
+ gradient_steps=-1,
+ grad_clipping=0,
+ unroll_scan=False,
+ precompute_input=True,
+ mask_input=None,
+ only_return_final=False,
+ **kwargs):
+
+ if isinstance(incoming, tuple):
+ input_shape = incoming
+ else:
+ input_shape = incoming.output_shape
+ # Retrieve the supplied name, if it exists; otherwise use ''
+ if 'name' in kwargs:
+ basename = kwargs['name'] + '.'
+ # Create a separate version of kwargs for the contained layers
+ # which does not include 'name'
+ layer_kwargs = dict((key, arg) for key, arg in kwargs.items()
+ if key != 'name')
+ else:
+ basename = ''
+ layer_kwargs = kwargs
+ # We will be passing the input at each time step to the dense layer,
+ # so we need to remove the second dimension (the time dimension)
+ in_to_hid = DenseLayer(InputLayer((None,) + input_shape[2:]),
+ num_units, W=W_in_to_hid, b=b,
+ nonlinearity=None,
+ name=basename + 'input_to_hidden',
+ **layer_kwargs)
+ # The hidden-to-hidden layer expects its inputs to have num_units
+ # features because it recycles the previous hidden state
+ hid_to_hid = DenseLayer(InputLayer((None, num_units)),
+ num_units, W=W_hid_to_hid, b=None,
+ nonlinearity=None,
+ name=basename + 'hidden_to_hidden',
+ **layer_kwargs)
+
+ # Make child layer parameters intuitively accessible
+ self.W_in_to_hid = in_to_hid.W
+ self.W_hid_to_hid = hid_to_hid.W
+ self.b = in_to_hid.b
+
+ # Just use the CustomRecurrentLayer with the DenseLayers we created
+ super(RecurrentLayer, self).__init__(
+ incoming, in_to_hid, hid_to_hid, nonlinearity=nonlinearity,
+ hid_init=hid_init, backwards=backwards, learn_init=learn_init,
+ gradient_steps=gradient_steps,
+ grad_clipping=grad_clipping, unroll_scan=unroll_scan,
+ precompute_input=precompute_input, mask_input=mask_input,
+ only_return_final=only_return_final, **kwargs)
+
+
+class Gate(object):
+ """
+ lasagne.layers.recurrent.Gate(W_in=lasagne.init.Normal(0.1),
+ W_hid=lasagne.init.Normal(0.1), W_cell=lasagne.init.Normal(0.1),
+ b=lasagne.init.Constant(0.), nonlinearity=lasagne.nonlinearities.sigmoid)
+
+ Simple class to hold the parameters for a gate connection. We define
+ a gate loosely as something which computes the linear mix of two inputs,
+ optionally computes an element-wise product with a third, adds a bias, and
+ applies a nonlinearity.
+
+ Parameters
+ ----------
+ W_in : Theano shared variable, numpy array or callable
+ Initializer for input-to-gate weight matrix.
+ W_hid : Theano shared variable, numpy array or callable
+ Initializer for hidden-to-gate weight matrix.
+ W_cell : Theano shared variable, numpy array, callable, or None
+ Initializer for cell-to-gate weight vector. If None, no cell-to-gate
+ weight vector will be stored.
+ b : Theano shared variable, numpy array or callable
+ Initializer for input gate bias vector.
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the input gate activation. If None
+ is provided, no nonlinearity will be applied.
+
+ Examples
+ --------
+ For :class:`LSTMLayer` the bias of the forget gate is often initialized to
+ a large positive value to encourage the layer initially remember the cell
+ value, see e.g. [1]_ page 15.
+
+ >>> import lasagne
+ >>> forget_gate = Gate(b=lasagne.init.Constant(5.0))
+ >>> l_lstm = LSTMLayer((10, 20, 30), num_units=10,
+ ... forgetgate=forget_gate)
+
+ References
+ ----------
+ .. [1] Gers, Felix A., Jürgen Schmidhuber, and Fred Cummins. "Learning to
+ forget: Continual prediction with LSTM." Neural computation 12.10
+ (2000): 2451-2471.
+
+ """
+ def __init__(self, W_in=init.Normal(0.1), W_hid=init.Normal(0.1),
+ W_cell=init.Normal(0.1), b=init.Constant(0.),
+ nonlinearity=nonlinearities.sigmoid):
+ self.W_in = W_in
+ self.W_hid = W_hid
+ # Don't store a cell weight vector when cell is None
+ if W_cell is not None:
+ self.W_cell = W_cell
+ self.b = b
+ # For the nonlinearity, if None is supplied, use identity
+ if nonlinearity is None:
+ self.nonlinearity = nonlinearities.identity
+ else:
+ self.nonlinearity = nonlinearity
+
+
+class LSTMLayer(MergeLayer):
+ r"""
+ lasagne.layers.recurrent.LSTMLayer(incoming, num_units,
+ ingate=lasagne.layers.Gate(), forgetgate=lasagne.layers.Gate(),
+ cell=lasagne.layers.Gate(
+ W_cell=None, nonlinearity=lasagne.nonlinearities.tanh),
+ outgate=lasagne.layers.Gate(),
+ nonlinearity=lasagne.nonlinearities.tanh,
+ cell_init=lasagne.init.Constant(0.),
+ hid_init=lasagne.init.Constant(0.), backwards=False, learn_init=False,
+ peepholes=True, gradient_steps=-1, grad_clipping=0, unroll_scan=False,
+ precompute_input=True, mask_input=None, only_return_final=False, **kwargs)
+
+ A long short-term memory (LSTM) layer.
+
+ Includes optional "peephole connections" and a forget gate. Based on the
+ definition in [1]_, which is the current common definition. The output is
+ computed by
+
+ .. math ::
+
+ i_t &= \sigma_i(x_t W_{xi} + h_{t-1} W_{hi}
+ + w_{ci} \odot c_{t-1} + b_i)\\
+ f_t &= \sigma_f(x_t W_{xf} + h_{t-1} W_{hf}
+ + w_{cf} \odot c_{t-1} + b_f)\\
+ c_t &= f_t \odot c_{t - 1}
+ + i_t \odot \sigma_c(x_t W_{xc} + h_{t-1} W_{hc} + b_c)\\
+ o_t &= \sigma_o(x_t W_{xo} + h_{t-1} W_{ho} + w_{co} \odot c_t + b_o)\\
+ h_t &= o_t \odot \sigma_h(c_t)
+
+ Parameters
+ ----------
+ incoming : a :class:`lasagne.layers.Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape.
+ num_units : int
+ Number of hidden/cell units in the layer.
+ ingate : Gate
+ Parameters for the input gate (:math:`i_t`): :math:`W_{xi}`,
+ :math:`W_{hi}`, :math:`w_{ci}`, :math:`b_i`, and :math:`\sigma_i`.
+ forgetgate : Gate
+ Parameters for the forget gate (:math:`f_t`): :math:`W_{xf}`,
+ :math:`W_{hf}`, :math:`w_{cf}`, :math:`b_f`, and :math:`\sigma_f`.
+ cell : Gate
+ Parameters for the cell computation (:math:`c_t`): :math:`W_{xc}`,
+ :math:`W_{hc}`, :math:`b_c`, and :math:`\sigma_c`.
+ outgate : Gate
+ Parameters for the output gate (:math:`o_t`): :math:`W_{xo}`,
+ :math:`W_{ho}`, :math:`w_{co}`, :math:`b_o`, and :math:`\sigma_o`.
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the output (:math:`\sigma_h`). If
+ None is provided, no nonlinearity will be applied.
+ cell_init : callable, np.ndarray, theano.shared or :class:`Layer`
+ Initializer for initial cell state (:math:`c_0`).
+ hid_init : callable, np.ndarray, theano.shared or :class:`Layer`
+ Initializer for initial hidden state (:math:`h_0`).
+ backwards : bool
+ If True, process the sequence backwards and then reverse the
+ output again such that the output from the layer is always
+ from :math:`x_1` to :math:`x_n`.
+ learn_init : bool
+ If True, initial hidden values are learned.
+ peepholes : bool
+ If True, the LSTM uses peephole connections.
+ When False, `ingate.W_cell`, `forgetgate.W_cell` and
+ `outgate.W_cell` are ignored.
+ gradient_steps : int
+ Number of timesteps to include in the backpropagated gradient.
+ If -1, backpropagate through the entire sequence.
+ grad_clipping : float
+ If nonzero, the gradient messages are clipped to the given value during
+ the backward pass. See [1]_ (p. 6) for further explanation.
+ unroll_scan : bool
+ If True the recursion is unrolled instead of using scan. For some
+ graphs this gives a significant speed up but it might also consume
+ more memory. When `unroll_scan` is True, backpropagation always
+ includes the full sequence, so `gradient_steps` must be set to -1 and
+ the input sequence length must be known at compile time (i.e., cannot
+ be given as None).
+ precompute_input : bool
+ If True, precompute input_to_hid before iterating through
+ the sequence. This can result in a speedup at the expense of
+ an increase in memory usage.
+ mask_input : :class:`lasagne.layers.Layer`
+ Layer which allows for a sequence mask to be input, for when sequences
+ are of variable length. Default `None`, which means no mask will be
+ supplied (i.e. all sequences are of the same length).
+ only_return_final : bool
+ If True, only return the final sequential output (e.g. for tasks where
+ a single target value for the entire sequence is desired). In this
+ case, Theano makes an optimization which saves memory.
+
+ References
+ ----------
+ .. [1] Graves, Alex: "Generating sequences with recurrent neural networks."
+ arXiv preprint arXiv:1308.0850 (2013).
+ """
+ def __init__(self, incoming, num_units,
+ ingate=Gate(),
+ forgetgate=Gate(),
+ cell=Gate(W_cell=None, nonlinearity=nonlinearities.tanh),
+ outgate=Gate(),
+ nonlinearity=nonlinearities.tanh,
+ cell_init=init.Constant(0.),
+ hid_init=init.Constant(0.),
+ backwards=False,
+ learn_init=False,
+ peepholes=True,
+ gradient_steps=-1,
+ grad_clipping=0,
+ unroll_scan=False,
+ precompute_input=True,
+ mask_input=None,
+ only_return_final=False,
+ **kwargs):
+
+ # This layer inherits from a MergeLayer, because it can have four
+ # inputs - the layer input, the mask, the initial hidden state and the
+ # inital cell state. We will just provide the layer input as incomings,
+ # unless a mask input, inital hidden state or initial cell state was
+ # provided.
+ incomings = [incoming]
+ self.mask_incoming_index = -1
+ self.hid_init_incoming_index = -1
+ self.cell_init_incoming_index = -1
+ if mask_input is not None:
+ incomings.append(mask_input)
+ self.mask_incoming_index = len(incomings)-1
+ if isinstance(hid_init, Layer):
+ incomings.append(hid_init)
+ self.hid_init_incoming_index = len(incomings)-1
+ if isinstance(cell_init, Layer):
+ incomings.append(cell_init)
+ self.cell_init_incoming_index = len(incomings)-1
+
+ # Initialize parent layer
+ super(LSTMLayer, self).__init__(incomings, **kwargs)
+
+ # If the provided nonlinearity is None, make it linear
+ if nonlinearity is None:
+ self.nonlinearity = nonlinearities.identity
+ else:
+ self.nonlinearity = nonlinearity
+
+ self.learn_init = learn_init
+ self.num_units = num_units
+ self.backwards = backwards
+ self.peepholes = peepholes
+ self.gradient_steps = gradient_steps
+ self.grad_clipping = grad_clipping
+ self.unroll_scan = unroll_scan
+ self.precompute_input = precompute_input
+ self.only_return_final = only_return_final
+
+ if unroll_scan and gradient_steps != -1:
+ raise ValueError(
+ "Gradient steps must be -1 when unroll_scan is true.")
+
+ # Retrieve the dimensionality of the incoming layer
+ input_shape = self.input_shapes[0]
+
+ if unroll_scan and input_shape[1] is None:
+ raise ValueError("Input sequence length cannot be specified as "
+ "None when unroll_scan is True")
+
+ num_inputs = np.prod(input_shape[2:])
+
+ def add_gate_params(gate, gate_name):
+ """ Convenience function for adding layer parameters from a Gate
+ instance. """
+ return (self.add_param(gate.W_in, (num_inputs, num_units),
+ name="W_in_to_{}".format(gate_name)),
+ self.add_param(gate.W_hid, (num_units, num_units),
+ name="W_hid_to_{}".format(gate_name)),
+ self.add_param(gate.b, (num_units,),
+ name="b_{}".format(gate_name),
+ regularizable=False),
+ gate.nonlinearity)
+
+ # Add in parameters from the supplied Gate instances
+ (self.W_in_to_ingate, self.W_hid_to_ingate, self.b_ingate,
+ self.nonlinearity_ingate) = add_gate_params(ingate, 'ingate')
+
+ (self.W_in_to_forgetgate, self.W_hid_to_forgetgate, self.b_forgetgate,
+ self.nonlinearity_forgetgate) = add_gate_params(forgetgate,
+ 'forgetgate')
+
+ (self.W_in_to_cell, self.W_hid_to_cell, self.b_cell,
+ self.nonlinearity_cell) = add_gate_params(cell, 'cell')
+
+ (self.W_in_to_outgate, self.W_hid_to_outgate, self.b_outgate,
+ self.nonlinearity_outgate) = add_gate_params(outgate, 'outgate')
+
+ # If peephole (cell to gate) connections were enabled, initialize
+ # peephole connections. These are elementwise products with the cell
+ # state, so they are represented as vectors.
+ if self.peepholes:
+ self.W_cell_to_ingate = self.add_param(
+ ingate.W_cell, (num_units, ), name="W_cell_to_ingate")
+
+ self.W_cell_to_forgetgate = self.add_param(
+ forgetgate.W_cell, (num_units, ), name="W_cell_to_forgetgate")
+
+ self.W_cell_to_outgate = self.add_param(
+ outgate.W_cell, (num_units, ), name="W_cell_to_outgate")
+
+ # Setup initial values for the cell and the hidden units
+ if isinstance(cell_init, Layer):
+ self.cell_init = cell_init
+ else:
+ self.cell_init = self.add_param(
+ cell_init, (1, num_units), name="cell_init",
+ trainable=learn_init, regularizable=False)
+
+ if isinstance(hid_init, Layer):
+ self.hid_init = hid_init
+ else:
+ self.hid_init = self.add_param(
+ hid_init, (1, self.num_units), name="hid_init",
+ trainable=learn_init, regularizable=False)
+
+ def get_output_shape_for(self, input_shapes):
+ # The shape of the input to this layer will be the first element
+ # of input_shapes, whether or not a mask input is being used.
+ input_shape = input_shapes[0]
+ # When only_return_final is true, the second (sequence step) dimension
+ # will be flattened
+ if self.only_return_final:
+ return input_shape[0], self.num_units
+ # Otherwise, the shape will be (n_batch, n_steps, num_units)
+ else:
+ return input_shape[0], input_shape[1], self.num_units
+
+ def get_output_for(self, inputs, **kwargs):
+ """
+ Compute this layer's output function given a symbolic input variable
+
+ Parameters
+ ----------
+ inputs : list of theano.TensorType
+ `inputs[0]` should always be the symbolic input variable. When
+ this layer has a mask input (i.e. was instantiated with
+ `mask_input != None`, indicating that the lengths of sequences in
+ each batch vary), `inputs` should have length 2, where `inputs[1]`
+ is the `mask`. The `mask` should be supplied as a Theano variable
+ denoting whether each time step in each sequence in the batch is
+ part of the sequence or not. `mask` should be a matrix of shape
+ ``(n_batch, n_time_steps)`` where ``mask[i, j] = 1`` when ``j <=
+ (length of sequence i)`` and ``mask[i, j] = 0`` when ``j > (length
+ of sequence i)``. When the hidden state of this layer is to be
+ pre-filled (i.e. was set to a :class:`Layer` instance) `inputs`
+ should have length at least 2, and `inputs[-1]` is the hidden state
+ to prefill with. When the cell state of this layer is to be
+ pre-filled (i.e. was set to a :class:`Layer` instance) `inputs`
+ should have length at least 2, and `inputs[-1]` is the hidden state
+ to prefill with. When both the cell state and the hidden state are
+ being pre-filled `inputs[-2]` is the hidden state, while
+ `inputs[-1]` is the cell state.
+
+ Returns
+ -------
+ layer_output : theano.TensorType
+ Symbolic output variable.
+ """
+ # Retrieve the layer input
+ input = inputs[0]
+ # Retrieve the mask when it is supplied
+ mask = None
+ hid_init = None
+ cell_init = None
+ if self.mask_incoming_index > 0:
+ mask = inputs[self.mask_incoming_index]
+ if self.hid_init_incoming_index > 0:
+ hid_init = inputs[self.hid_init_incoming_index]
+ if self.cell_init_incoming_index > 0:
+ cell_init = inputs[self.cell_init_incoming_index]
+
+ # Treat all dimensions after the second as flattened feature dimensions
+ if input.ndim > 3:
+ input = T.flatten(input, 3)
+
+ # Because scan iterates over the first dimension we dimshuffle to
+ # (n_time_steps, n_batch, n_features)
+ input = input.dimshuffle(1, 0, 2)
+ seq_len, num_batch, _ = input.shape
+
+ # Stack input weight matrices into a (num_inputs, 4*num_units)
+ # matrix, which speeds up computation
+ W_in_stacked = T.concatenate(
+ [self.W_in_to_ingate, self.W_in_to_forgetgate,
+ self.W_in_to_cell, self.W_in_to_outgate], axis=1)
+
+ # Same for hidden weight matrices
+ W_hid_stacked = T.concatenate(
+ [self.W_hid_to_ingate, self.W_hid_to_forgetgate,
+ self.W_hid_to_cell, self.W_hid_to_outgate], axis=1)
+
+ # Stack biases into a (4*num_units) vector
+ b_stacked = T.concatenate(
+ [self.b_ingate, self.b_forgetgate,
+ self.b_cell, self.b_outgate], axis=0)
+
+ if self.precompute_input:
+ # Because the input is given for all time steps, we can
+ # precompute_input the inputs dot weight matrices before scanning.
+ # W_in_stacked is (n_features, 4*num_units). input is then
+ # (n_time_steps, n_batch, 4*num_units).
+ input = T.dot(input, W_in_stacked) + b_stacked
+
+ # When theano.scan calls step, input_n will be (n_batch, 4*num_units).
+ # We define a slicing function that extract the input to each LSTM gate
+ def slice_w(x, n):
+ return x[:, n*self.num_units:(n+1)*self.num_units]
+
+ # Create single recurrent computation step function
+ # input_n is the n'th vector of the input
+ def step(input_n, cell_previous, hid_previous, *args):
+ if not self.precompute_input:
+ input_n = T.dot(input_n, W_in_stacked) + b_stacked
+
+ # Calculate gates pre-activations and slice
+ gates = input_n + T.dot(hid_previous, W_hid_stacked)
+
+ # Clip gradients
+ if self.grad_clipping:
+ gates = theano.gradient.grad_clip(
+ gates, -self.grad_clipping, self.grad_clipping)
+
+ # Extract the pre-activation gate values
+ ingate = slice_w(gates, 0)
+ forgetgate = slice_w(gates, 1)
+ cell_input = slice_w(gates, 2)
+ outgate = slice_w(gates, 3)
+
+ if self.peepholes:
+ # Compute peephole connections
+ ingate += cell_previous*self.W_cell_to_ingate
+ forgetgate += cell_previous*self.W_cell_to_forgetgate
+
+ # Apply nonlinearities
+ ingate = self.nonlinearity_ingate(ingate)
+ forgetgate = self.nonlinearity_forgetgate(forgetgate)
+ cell_input = self.nonlinearity_cell(cell_input)
+
+ # Compute new cell value
+ cell = forgetgate*cell_previous + ingate*cell_input
+
+ if self.peepholes:
+ outgate += cell*self.W_cell_to_outgate
+ outgate = self.nonlinearity_outgate(outgate)
+
+ # Compute new hidden unit activation
+ hid = outgate*self.nonlinearity(cell)
+ return [cell, hid]
+
+ def step_masked(input_n, mask_n, cell_previous, hid_previous, *args):
+ cell, hid = step(input_n, cell_previous, hid_previous, *args)
+
+ # Skip over any input with mask 0 by copying the previous
+ # hidden state; proceed normally for any input with mask 1.
+ cell = T.switch(mask_n, cell, cell_previous)
+ hid = T.switch(mask_n, hid, hid_previous)
+
+ return [cell, hid]
+
+ if mask is not None:
+ # mask is given as (batch_size, seq_len). Because scan iterates
+ # over first dimension, we dimshuffle to (seq_len, batch_size) and
+ # add a broadcastable dimension
+ mask = mask.dimshuffle(1, 0, 'x')
+ sequences = [input, mask]
+ step_fun = step_masked
+ else:
+ sequences = input
+ step_fun = step
+
+ ones = T.ones((num_batch, 1))
+ if not isinstance(self.cell_init, Layer):
+ # Dot against a 1s vector to repeat to shape (num_batch, num_units)
+ cell_init = T.dot(ones, self.cell_init)
+
+ if not isinstance(self.hid_init, Layer):
+ # Dot against a 1s vector to repeat to shape (num_batch, num_units)
+ hid_init = T.dot(ones, self.hid_init)
+
+ # The hidden-to-hidden weight matrix is always used in step
+ non_seqs = [W_hid_stacked]
+ # The "peephole" weight matrices are only used when self.peepholes=True
+ if self.peepholes:
+ non_seqs += [self.W_cell_to_ingate,
+ self.W_cell_to_forgetgate,
+ self.W_cell_to_outgate]
+
+ # When we aren't precomputing the input outside of scan, we need to
+ # provide the input weights and biases to the step function
+ if not self.precompute_input:
+ non_seqs += [W_in_stacked, b_stacked]
+
+ if self.unroll_scan:
+ # Retrieve the dimensionality of the incoming layer
+ input_shape = self.input_shapes[0]
+ # Explicitly unroll the recurrence instead of using scan
+ cell_out, hid_out = unroll_scan(
+ fn=step_fun,
+ sequences=sequences,
+ outputs_info=[cell_init, hid_init],
+ go_backwards=self.backwards,
+ non_sequences=non_seqs,
+ n_steps=input_shape[1])
+ else:
+ # Scan op iterates over first dimension of input and repeatedly
+ # applies the step function
+ cell_out, hid_out = theano.scan(
+ fn=step_fun,
+ sequences=sequences,
+ outputs_info=[cell_init, hid_init],
+ go_backwards=self.backwards,
+ truncate_gradient=self.gradient_steps,
+ non_sequences=non_seqs,
+ strict=True)[0]
+
+ # When it is requested that we only return the final sequence step,
+ # we need to slice it out immediately after scan is applied
+ if self.only_return_final:
+ hid_out = hid_out[-1]
+ else:
+ # dimshuffle back to (n_batch, n_time_steps, n_features))
+ hid_out = hid_out.dimshuffle(1, 0, 2)
+
+ # if scan is backward reverse the output
+ if self.backwards:
+ hid_out = hid_out[:, ::-1]
+
+ return hid_out
+
+
+class GRULayer(MergeLayer):
+ r"""
+ lasagne.layers.recurrent.GRULayer(incoming, num_units,
+ resetgate=lasagne.layers.Gate(W_cell=None),
+ updategate=lasagne.layers.Gate(W_cell=None),
+ hidden_update=lasagne.layers.Gate(
+ W_cell=None, lasagne.nonlinearities.tanh),
+ hid_init=lasagne.init.Constant(0.), backwards=False, learn_init=False,
+ gradient_steps=-1, grad_clipping=0, unroll_scan=False,
+ precompute_input=True, mask_input=None, only_return_final=False, **kwargs)
+
+ Gated Recurrent Unit (GRU) Layer
+
+ Implements the recurrent step proposed in [1]_, which computes the output
+ by
+
+ .. math ::
+ r_t &= \sigma_r(x_t W_{xr} + h_{t - 1} W_{hr} + b_r)\\
+ u_t &= \sigma_u(x_t W_{xu} + h_{t - 1} W_{hu} + b_u)\\
+ c_t &= \sigma_c(x_t W_{xc} + r_t \odot (h_{t - 1} W_{hc}) + b_c)\\
+ h_t &= (1 - u_t) \odot h_{t - 1} + u_t \odot c_t
+
+ Parameters
+ ----------
+ incoming : a :class:`lasagne.layers.Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape.
+ num_units : int
+ Number of hidden units in the layer.
+ resetgate : Gate
+ Parameters for the reset gate (:math:`r_t`): :math:`W_{xr}`,
+ :math:`W_{hr}`, :math:`b_r`, and :math:`\sigma_r`.
+ updategate : Gate
+ Parameters for the update gate (:math:`u_t`): :math:`W_{xu}`,
+ :math:`W_{hu}`, :math:`b_u`, and :math:`\sigma_u`.
+ hidden_update : Gate
+ Parameters for the hidden update (:math:`c_t`): :math:`W_{xc}`,
+ :math:`W_{hc}`, :math:`b_c`, and :math:`\sigma_c`.
+ hid_init : callable, np.ndarray, theano.shared or :class:`Layer`
+ Initializer for initial hidden state (:math:`h_0`).
+ backwards : bool
+ If True, process the sequence backwards and then reverse the
+ output again such that the output from the layer is always
+ from :math:`x_1` to :math:`x_n`.
+ learn_init : bool
+ If True, initial hidden values are learned.
+ gradient_steps : int
+ Number of timesteps to include in the backpropagated gradient.
+ If -1, backpropagate through the entire sequence.
+ grad_clipping : float
+ If nonzero, the gradient messages are clipped to the given value during
+ the backward pass. See [1]_ (p. 6) for further explanation.
+ unroll_scan : bool
+ If True the recursion is unrolled instead of using scan. For some
+ graphs this gives a significant speed up but it might also consume
+ more memory. When `unroll_scan` is True, backpropagation always
+ includes the full sequence, so `gradient_steps` must be set to -1 and
+ the input sequence length must be known at compile time (i.e., cannot
+ be given as None).
+ precompute_input : bool
+ If True, precompute input_to_hid before iterating through
+ the sequence. This can result in a speedup at the expense of
+ an increase in memory usage.
+ mask_input : :class:`lasagne.layers.Layer`
+ Layer which allows for a sequence mask to be input, for when sequences
+ are of variable length. Default `None`, which means no mask will be
+ supplied (i.e. all sequences are of the same length).
+ only_return_final : bool
+ If True, only return the final sequential output (e.g. for tasks where
+ a single target value for the entire sequence is desired). In this
+ case, Theano makes an optimization which saves memory.
+
+ References
+ ----------
+ .. [1] Cho, Kyunghyun, et al: On the properties of neural
+ machine translation: Encoder-decoder approaches.
+ arXiv preprint arXiv:1409.1259 (2014).
+ .. [2] Chung, Junyoung, et al.: Empirical Evaluation of Gated
+ Recurrent Neural Networks on Sequence Modeling.
+ arXiv preprint arXiv:1412.3555 (2014).
+ .. [3] Graves, Alex: "Generating sequences with recurrent neural networks."
+ arXiv preprint arXiv:1308.0850 (2013).
+
+ Notes
+ -----
+ An alternate update for the candidate hidden state is proposed in [2]_:
+
+ .. math::
+ c_t &= \sigma_c(x_t W_{ic} + (r_t \odot h_{t - 1})W_{hc} + b_c)\\
+
+ We use the formulation from [1]_ because it allows us to do all matrix
+ operations in a single dot product.
+ """
+ def __init__(self, incoming, num_units,
+ resetgate=Gate(W_cell=None),
+ updategate=Gate(W_cell=None),
+ hidden_update=Gate(W_cell=None,
+ nonlinearity=nonlinearities.tanh),
+ hid_init=init.Constant(0.),
+ backwards=False,
+ learn_init=False,
+ gradient_steps=-1,
+ grad_clipping=0,
+ unroll_scan=False,
+ precompute_input=True,
+ mask_input=None,
+ only_return_final=False,
+ **kwargs):
+
+ # This layer inherits from a MergeLayer, because it can have three
+ # inputs - the layer input, the mask and the initial hidden state. We
+ # will just provide the layer input as incomings, unless a mask input
+ # or initial hidden state was provided.
+ incomings = [incoming]
+ self.mask_incoming_index = -1
+ self.hid_init_incoming_index = -1
+ if mask_input is not None:
+ incomings.append(mask_input)
+ self.mask_incoming_index = len(incomings)-1
+ if isinstance(hid_init, Layer):
+ incomings.append(hid_init)
+ self.hid_init_incoming_index = len(incomings)-1
+
+ # Initialize parent layer
+ super(GRULayer, self).__init__(incomings, **kwargs)
+
+ self.learn_init = learn_init
+ self.num_units = num_units
+ self.grad_clipping = grad_clipping
+ self.backwards = backwards
+ self.gradient_steps = gradient_steps
+ self.unroll_scan = unroll_scan
+ self.precompute_input = precompute_input
+ self.only_return_final = only_return_final
+
+ if unroll_scan and gradient_steps != -1:
+ raise ValueError(
+ "Gradient steps must be -1 when unroll_scan is true.")
+
+ # Retrieve the dimensionality of the incoming layer
+ input_shape = self.input_shapes[0]
+
+ if unroll_scan and input_shape[1] is None:
+ raise ValueError("Input sequence length cannot be specified as "
+ "None when unroll_scan is True")
+
+ # Input dimensionality is the output dimensionality of the input layer
+ num_inputs = np.prod(input_shape[2:])
+
+ def add_gate_params(gate, gate_name):
+ """ Convenience function for adding layer parameters from a Gate
+ instance. """
+ return (self.add_param(gate.W_in, (num_inputs, num_units),
+ name="W_in_to_{}".format(gate_name)),
+ self.add_param(gate.W_hid, (num_units, num_units),
+ name="W_hid_to_{}".format(gate_name)),
+ self.add_param(gate.b, (num_units,),
+ name="b_{}".format(gate_name),
+ regularizable=False),
+ gate.nonlinearity)
+
+ # Add in all parameters from gates
+ (self.W_in_to_updategate, self.W_hid_to_updategate, self.b_updategate,
+ self.nonlinearity_updategate) = add_gate_params(updategate,
+ 'updategate')
+ (self.W_in_to_resetgate, self.W_hid_to_resetgate, self.b_resetgate,
+ self.nonlinearity_resetgate) = add_gate_params(resetgate, 'resetgate')
+
+ (self.W_in_to_hidden_update, self.W_hid_to_hidden_update,
+ self.b_hidden_update, self.nonlinearity_hid) = add_gate_params(
+ hidden_update, 'hidden_update')
+
+ # Initialize hidden state
+ if isinstance(hid_init, Layer):
+ self.hid_init = hid_init
+ else:
+ self.hid_init = self.add_param(
+ hid_init, (1, self.num_units), name="hid_init",
+ trainable=learn_init, regularizable=False)
+
+ def get_output_shape_for(self, input_shapes):
+ # The shape of the input to this layer will be the first element
+ # of input_shapes, whether or not a mask input is being used.
+ input_shape = input_shapes[0]
+ # When only_return_final is true, the second (sequence step) dimension
+ # will be flattened
+ if self.only_return_final:
+ return input_shape[0], self.num_units
+ # Otherwise, the shape will be (n_batch, n_steps, num_units)
+ else:
+ return input_shape[0], input_shape[1], self.num_units
+
+ def get_output_for(self, inputs, **kwargs):
+ """
+ Compute this layer's output function given a symbolic input variable
+
+ Parameters
+ ----------
+ inputs : list of theano.TensorType
+ `inputs[0]` should always be the symbolic input variable. When
+ this layer has a mask input (i.e. was instantiated with
+ `mask_input != None`, indicating that the lengths of sequences in
+ each batch vary), `inputs` should have length 2, where `inputs[1]`
+ is the `mask`. The `mask` should be supplied as a Theano variable
+ denoting whether each time step in each sequence in the batch is
+ part of the sequence or not. `mask` should be a matrix of shape
+ ``(n_batch, n_time_steps)`` where ``mask[i, j] = 1`` when ``j <=
+ (length of sequence i)`` and ``mask[i, j] = 0`` when ``j > (length
+ of sequence i)``. When the hidden state of this layer is to be
+ pre-filled (i.e. was set to a :class:`Layer` instance) `inputs`
+ should have length at least 2, and `inputs[-1]` is the hidden state
+ to prefill with.
+
+ Returns
+ -------
+ layer_output : theano.TensorType
+ Symbolic output variable.
+ """
+ # Retrieve the layer input
+ input = inputs[0]
+ # Retrieve the mask when it is supplied
+ mask = None
+ hid_init = None
+ if self.mask_incoming_index > 0:
+ mask = inputs[self.mask_incoming_index]
+ if self.hid_init_incoming_index > 0:
+ hid_init = inputs[self.hid_init_incoming_index]
+
+ # Treat all dimensions after the second as flattened feature dimensions
+ if input.ndim > 3:
+ input = T.flatten(input, 3)
+
+ # Because scan iterates over the first dimension we dimshuffle to
+ # (n_time_steps, n_batch, n_features)
+ input = input.dimshuffle(1, 0, 2)
+ seq_len, num_batch, _ = input.shape
+
+ # Stack input weight matrices into a (num_inputs, 3*num_units)
+ # matrix, which speeds up computation
+ W_in_stacked = T.concatenate(
+ [self.W_in_to_resetgate, self.W_in_to_updategate,
+ self.W_in_to_hidden_update], axis=1)
+
+ # Same for hidden weight matrices
+ W_hid_stacked = T.concatenate(
+ [self.W_hid_to_resetgate, self.W_hid_to_updategate,
+ self.W_hid_to_hidden_update], axis=1)
+
+ # Stack gate biases into a (3*num_units) vector
+ b_stacked = T.concatenate(
+ [self.b_resetgate, self.b_updategate,
+ self.b_hidden_update], axis=0)
+
+ if self.precompute_input:
+ # precompute_input inputs*W. W_in is (n_features, 3*num_units).
+ # input is then (n_batch, n_time_steps, 3*num_units).
+ input = T.dot(input, W_in_stacked) + b_stacked
+
+ # When theano.scan calls step, input_n will be (n_batch, 3*num_units).
+ # We define a slicing function that extract the input to each GRU gate
+ def slice_w(x, n):
+ return x[:, n*self.num_units:(n+1)*self.num_units]
+
+ # Create single recurrent computation step function
+ # input__n is the n'th vector of the input
+ def step(input_n, hid_previous, *args):
+ # Compute W_{hr} h_{t - 1}, W_{hu} h_{t - 1}, and W_{hc} h_{t - 1}
+ hid_input = T.dot(hid_previous, W_hid_stacked)
+
+ if self.grad_clipping:
+ input_n = theano.gradient.grad_clip(
+ input_n, -self.grad_clipping, self.grad_clipping)
+ hid_input = theano.gradient.grad_clip(
+ hid_input, -self.grad_clipping, self.grad_clipping)
+
+ if not self.precompute_input:
+ # Compute W_{xr}x_t + b_r, W_{xu}x_t + b_u, and W_{xc}x_t + b_c
+ input_n = T.dot(input_n, W_in_stacked) + b_stacked
+
+ # Reset and update gates
+ resetgate = slice_w(hid_input, 0) + slice_w(input_n, 0)
+ updategate = slice_w(hid_input, 1) + slice_w(input_n, 1)
+ resetgate = self.nonlinearity_resetgate(resetgate)
+ updategate = self.nonlinearity_updategate(updategate)
+
+ # Compute W_{xc}x_t + r_t \odot (W_{hc} h_{t - 1})
+ hidden_update_in = slice_w(input_n, 2)
+ hidden_update_hid = slice_w(hid_input, 2)
+ hidden_update = hidden_update_in + resetgate*hidden_update_hid
+ if self.grad_clipping:
+ hidden_update = theano.gradient.grad_clip(
+ hidden_update, -self.grad_clipping, self.grad_clipping)
+ hidden_update = self.nonlinearity_hid(hidden_update)
+
+ # Compute (1 - u_t)h_{t - 1} + u_t c_t
+ hid = (1 - updategate)*hid_previous + updategate*hidden_update
+ return hid
+
+ def step_masked(input_n, mask_n, hid_previous, *args):
+ hid = step(input_n, hid_previous, *args)
+
+ # Skip over any input with mask 0 by copying the previous
+ # hidden state; proceed normally for any input with mask 1.
+ hid = T.switch(mask_n, hid, hid_previous)
+
+ return hid
+
+ if mask is not None:
+ # mask is given as (batch_size, seq_len). Because scan iterates
+ # over first dimension, we dimshuffle to (seq_len, batch_size) and
+ # add a broadcastable dimension
+ mask = mask.dimshuffle(1, 0, 'x')
+ sequences = [input, mask]
+ step_fun = step_masked
+ else:
+ sequences = [input]
+ step_fun = step
+
+ if not isinstance(self.hid_init, Layer):
+ # Dot against a 1s vector to repeat to shape (num_batch, num_units)
+ hid_init = T.dot(T.ones((num_batch, 1)), self.hid_init)
+
+ # The hidden-to-hidden weight matrix is always used in step
+ non_seqs = [W_hid_stacked]
+ # When we aren't precomputing the input outside of scan, we need to
+ # provide the input weights and biases to the step function
+ if not self.precompute_input:
+ non_seqs += [W_in_stacked, b_stacked]
+
+ if self.unroll_scan:
+ # Retrieve the dimensionality of the incoming layer
+ input_shape = self.input_shapes[0]
+ # Explicitly unroll the recurrence instead of using scan
+ hid_out = unroll_scan(
+ fn=step_fun,
+ sequences=sequences,
+ outputs_info=[hid_init],
+ go_backwards=self.backwards,
+ non_sequences=non_seqs,
+ n_steps=input_shape[1])[0]
+ else:
+ # Scan op iterates over first dimension of input and repeatedly
+ # applies the step function
+ hid_out = theano.scan(
+ fn=step_fun,
+ sequences=sequences,
+ go_backwards=self.backwards,
+ outputs_info=[hid_init],
+ non_sequences=non_seqs,
+ truncate_gradient=self.gradient_steps,
+ strict=True)[0]
+
+ # When it is requested that we only return the final sequence step,
+ # we need to slice it out immediately after scan is applied
+ if self.only_return_final:
+ hid_out = hid_out[-1]
+ else:
+ # dimshuffle back to (n_batch, n_time_steps, n_features))
+ hid_out = hid_out.dimshuffle(1, 0, 2)
+
+ # if scan is backward reverse the output
+ if self.backwards:
+ hid_out = hid_out[:, ::-1]
+
+ return hid_out
diff --git a/lasagne/layers/shape.py b/lasagne/layers/shape.py
new file mode 100644
index 0000000..4f5e7ef
--- /dev/null
+++ b/lasagne/layers/shape.py
@@ -0,0 +1,397 @@
+import numpy as np
+import theano.tensor as T
+
+from ..theano_extensions import padding
+
+from .base import Layer
+
+
+__all__ = [
+ "FlattenLayer",
+ "flatten",
+ "ReshapeLayer",
+ "reshape",
+ "DimshuffleLayer",
+ "dimshuffle",
+ "PadLayer",
+ "pad",
+ "SliceLayer"
+]
+
+
+class FlattenLayer(Layer):
+ """
+ A layer that flattens its input. The leading ``outdim-1`` dimensions of
+ the output will have the same shape as the input. The remaining dimensions
+ are collapsed into the last dimension.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape.
+ outdim : int
+ The number of dimensions in the output.
+
+ See Also
+ --------
+ flatten : Shortcut
+ """
+ def __init__(self, incoming, outdim=2, **kwargs):
+ super(FlattenLayer, self).__init__(incoming, **kwargs)
+ self.outdim = outdim
+
+ if outdim < 1:
+ raise ValueError('Dim must be >0, was %i', outdim)
+
+ def get_output_shape_for(self, input_shape):
+ to_flatten = input_shape[self.outdim - 1:]
+
+ if any(s is None for s in to_flatten):
+ flattened = None
+ else:
+ flattened = int(np.prod(to_flatten))
+
+ return input_shape[:self.outdim - 1] + (flattened,)
+
+ def get_output_for(self, input, **kwargs):
+ return input.flatten(self.outdim)
+
+flatten = FlattenLayer # shortcut
+
+
+class ReshapeLayer(Layer):
+ """
+ A layer reshaping its input tensor to another tensor of the same total
+ number of elements.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ shape : tuple
+ The target shape specification. Each element can be one of:
+
+ * ``i``, a positive integer directly giving the size of the dimension
+ * ``[i]``, a single-element list of int, denoting to use the size
+ of the ``i`` th input dimension
+ * ``-1``, denoting to infer the size for this dimension to match
+ the total number of elements in the input tensor (cannot be used
+ more than once in a specification)
+ * TensorVariable directly giving the size of the dimension
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer, ReshapeLayer
+ >>> l_in = InputLayer((32, 100, 20))
+ >>> l1 = ReshapeLayer(l_in, ((32, 50, 40)))
+ >>> l1.output_shape
+ (32, 50, 40)
+ >>> l_in = InputLayer((None, 100, 20))
+ >>> l1 = ReshapeLayer(l_in, ([0], [1], 5, -1))
+ >>> l1.output_shape
+ (None, 100, 5, 4)
+
+ Notes
+ -----
+ The tensor elements will be fetched and placed in C-like order. That
+ is, reshaping `[1,2,3,4,5,6]` to shape `(2,3)` will result in a matrix
+ `[[1,2,3],[4,5,6]]`, not in `[[1,3,5],[2,4,6]]` (Fortran-like order),
+ regardless of the memory layout of the input tensor. For C-contiguous
+ input, reshaping is cheap, for others it may require copying the data.
+ """
+
+ def __init__(self, incoming, shape, **kwargs):
+ super(ReshapeLayer, self).__init__(incoming, **kwargs)
+ shape = tuple(shape)
+ for s in shape:
+ if isinstance(s, int):
+ if s == 0 or s < - 1:
+ raise ValueError("`shape` integers must be positive or -1")
+ elif isinstance(s, list):
+ if len(s) != 1 or not isinstance(s[0], int) or s[0] < 0:
+ raise ValueError("`shape` input references must be "
+ "single-element lists of int >= 0")
+ elif isinstance(s, T.TensorVariable):
+ if s.ndim != 0:
+ raise ValueError(
+ "A symbolic variable in a shape specification must be "
+ "a scalar, but had %i dimensions" % s.ndim)
+ else:
+ raise ValueError("`shape` must be a tuple of int and/or [int]")
+ if sum(s == -1 for s in shape) > 1:
+ raise ValueError("`shape` cannot contain multiple -1")
+ self.shape = shape
+ # try computing the output shape once as a sanity check
+ self.get_output_shape_for(self.input_shape)
+
+ def get_output_shape_for(self, input_shape, **kwargs):
+ # Initialize output shape from shape specification
+ output_shape = list(self.shape)
+ # First, replace all `[i]` with the corresponding input dimension, and
+ # mask parts of the shapes thus becoming irrelevant for -1 inference
+ masked_input_shape = list(input_shape)
+ masked_output_shape = list(output_shape)
+ for dim, o in enumerate(output_shape):
+ if isinstance(o, list):
+ if o[0] >= len(input_shape):
+ raise ValueError("specification contains [%d], but input "
+ "shape has %d dimensions only" %
+ (o[0], len(input_shape)))
+ output_shape[dim] = input_shape[o[0]]
+ masked_output_shape[dim] = input_shape[o[0]]
+ if (input_shape[o[0]] is None) \
+ and (masked_input_shape[o[0]] is None):
+ # first time we copied this unknown input size: mask
+ # it, we have a 1:1 correspondence between out[dim] and
+ # in[o[0]] and can ignore it for -1 inference even if
+ # it is unknown.
+ masked_input_shape[o[0]] = 1
+ masked_output_shape[dim] = 1
+ # Secondly, replace all symbolic shapes with `None`, as we cannot
+ # infer their size here.
+ for dim, o in enumerate(output_shape):
+ if isinstance(o, T.TensorVariable):
+ output_shape[dim] = None
+ masked_output_shape[dim] = None
+ # From the shapes, compute the sizes of the input and output tensor
+ input_size = (None if any(x is None for x in masked_input_shape)
+ else np.prod(masked_input_shape))
+ output_size = (None if any(x is None for x in masked_output_shape)
+ else np.prod(masked_output_shape))
+ del masked_input_shape, masked_output_shape
+ # Finally, infer value for -1 if needed
+ if -1 in output_shape:
+ dim = output_shape.index(-1)
+ if (input_size is None) or (output_size is None):
+ output_shape[dim] = None
+ output_size = None
+ else:
+ output_size *= -1
+ output_shape[dim] = input_size // output_size
+ output_size *= output_shape[dim]
+ # Sanity check
+ if (input_size is not None) and (output_size is not None) \
+ and (input_size != output_size):
+ raise ValueError("%s cannot be reshaped to specification %s. "
+ "The total size mismatches." %
+ (input_shape, self.shape))
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ # Replace all `[i]` with the corresponding input dimension
+ output_shape = list(self.shape)
+ for dim, o in enumerate(output_shape):
+ if isinstance(o, list):
+ output_shape[dim] = input.shape[o[0]]
+ # Everything else is handled by Theano
+ return input.reshape(tuple(output_shape))
+
+reshape = ReshapeLayer # shortcut
+
+
+class DimshuffleLayer(Layer):
+ """
+ A layer that rearranges the dimension of its input tensor, maintaining
+ the same same total number of elements.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ the layer feeding into this layer, or the expected input shape
+
+ pattern : tuple
+ The new dimension order, with each element giving the index
+ of the dimension in the input tensor or `'x'` to broadcast it.
+ For example `(3,2,1,0)` will reverse the order of a 4-dimensional
+ tensor. Use `'x'` to broadcast, e.g. `(3,2,1,'x',0)` will
+ take a 4 tensor of shape `(2,3,5,7)` as input and produce a
+ tensor of shape `(7,5,3,1,2)` with the 4th dimension being
+ broadcast-able. In general, all dimensions in the input tensor
+ must be used to generate the output tensor. Omitting a dimension
+ attempts to collapse it; this can only be done to broadcast-able
+ dimensions, e.g. a 5-tensor of shape `(7,5,3,1,2)` with the 4th
+ being broadcast-able can be shuffled with the pattern `(4,2,1,0)`
+ collapsing the 4th dimension resulting in a tensor of shape
+ `(2,3,5,7)`.
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer, DimshuffleLayer
+ >>> l_in = InputLayer((2, 3, 5, 7))
+ >>> l1 = DimshuffleLayer(l_in, (3, 2, 1, 'x', 0))
+ >>> l1.output_shape
+ (7, 5, 3, 1, 2)
+ >>> l2 = DimshuffleLayer(l1, (4, 2, 1, 0))
+ >>> l2.output_shape
+ (2, 3, 5, 7)
+ """
+ def __init__(self, incoming, pattern, **kwargs):
+ super(DimshuffleLayer, self).__init__(incoming, **kwargs)
+
+ # Sanity check the pattern
+ used_dims = set()
+ for p in pattern:
+ if isinstance(p, int):
+ # Dimension p
+ if p in used_dims:
+ raise ValueError("pattern contains dimension {0} more "
+ "than once".format(p))
+ used_dims.add(p)
+ elif p == 'x':
+ # Broadcast
+ pass
+ else:
+ raise ValueError("pattern should only contain dimension"
+ "indices or 'x', not {0}".format(p))
+
+ self.pattern = pattern
+
+ # try computing the output shape once as a sanity check
+ self.get_output_shape_for(self.input_shape)
+
+ def get_output_shape_for(self, input_shape):
+ # Build output shape while keeping track of the dimensions that we are
+ # attempting to collapse, so we can ensure that they are broadcastable
+ output_shape = []
+ dims_used = [False] * len(input_shape)
+ for p in self.pattern:
+ if isinstance(p, int):
+ if p < 0 or p >= len(input_shape):
+ raise ValueError("pattern contains {0}, but input shape "
+ "has {1} dimensions "
+ "only".format(p, len(input_shape)))
+ # Dimension p
+ o = input_shape[p]
+ dims_used[p] = True
+ elif p == 'x':
+ # Broadcast; will be of size 1
+ o = 1
+ output_shape.append(o)
+
+ for i, (dim_size, used) in enumerate(zip(input_shape, dims_used)):
+ if not used and dim_size != 1 and dim_size is not None:
+ raise ValueError(
+ "pattern attempted to collapse dimension "
+ "{0} of size {1}; dimensions with size != 1/None are not"
+ "broadcastable and cannot be "
+ "collapsed".format(i, dim_size))
+
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ return input.dimshuffle(self.pattern)
+
+dimshuffle = DimshuffleLayer # shortcut
+
+
+class PadLayer(Layer):
+ """
+ Pad all dimensions except the first ``batch_ndim`` with ``width``
+ zeros on both sides, or with another value specified in ``val``.
+ Individual padding for each dimension or edge can be specified
+ using a tuple or list of tuples for ``width``.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ width : int, iterable of int, or iterable of tuple
+ Padding width. If an int, pads each axis symmetrically with the same
+ amount in the beginning and end. If an iterable of int, defines the
+ symmetric padding width separately for each axis. If an iterable of
+ tuples of two ints, defines a seperate padding width for each beginning
+ and end of each axis.
+
+ val : float
+ Value used for padding
+
+ batch_ndim : int
+ Dimensions up to this value are not padded. For padding convolutional
+ layers this should be set to 2 so the sample and filter dimensions are
+ not padded
+ """
+ def __init__(self, incoming, width, val=0, batch_ndim=2, **kwargs):
+ super(PadLayer, self).__init__(incoming, **kwargs)
+ self.width = width
+ self.val = val
+ self.batch_ndim = batch_ndim
+
+ def get_output_shape_for(self, input_shape):
+ output_shape = list(input_shape)
+
+ if isinstance(self.width, int):
+ widths = [self.width] * (len(input_shape) - self.batch_ndim)
+ else:
+ widths = self.width
+
+ for k, w in enumerate(widths):
+ if output_shape[k + self.batch_ndim] is None:
+ continue
+ else:
+ try:
+ l, r = w
+ except TypeError:
+ l = r = w
+ output_shape[k + self.batch_ndim] += l + r
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ return padding.pad(input, self.width, self.val, self.batch_ndim)
+
+pad = PadLayer # shortcut
+
+
+class SliceLayer(Layer):
+ """
+ Slices the input at a specific axis and at specific indices.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ indices : int or slice instance
+ If an ``int``, selects a single element from the given axis, dropping
+ the axis. If a slice, selects all elements in the given range, keeping
+ the axis.
+
+ axis : int
+ Specifies the axis from which the indices are selected.
+
+ Examples
+ --------
+ >>> from lasagne.layers import SliceLayer, InputLayer
+ >>> l_in = InputLayer((2, 3, 4))
+ >>> SliceLayer(l_in, indices=0, axis=1).output_shape
+ ... # equals input[:, 0]
+ (2, 4)
+ >>> SliceLayer(l_in, indices=slice(0, 1), axis=1).output_shape
+ ... # equals input[:, 0:1]
+ (2, 1, 4)
+ >>> SliceLayer(l_in, indices=slice(-2, None), axis=-1).output_shape
+ ... # equals input[..., -2:]
+ (2, 3, 2)
+ """
+ def __init__(self, incoming, indices, axis=-1, **kwargs):
+ super(SliceLayer, self).__init__(incoming, **kwargs)
+ self.slice = indices
+ self.axis = axis
+
+ def get_output_shape_for(self, input_shape):
+ output_shape = list(input_shape)
+ if isinstance(self.slice, int):
+ del output_shape[self.axis]
+ elif input_shape[self.axis] is not None:
+ output_shape[self.axis] = len(
+ range(*self.slice.indices(input_shape[self.axis])))
+ else:
+ output_shape[self.axis] = None
+ return tuple(output_shape)
+
+ def get_output_for(self, input, **kwargs):
+ axis = self.axis
+ if axis < 0:
+ axis += input.ndim
+ return input[(slice(None),) * axis + (self.slice,)]
diff --git a/lasagne/layers/special.py b/lasagne/layers/special.py
new file mode 100644
index 0000000..13f7716
--- /dev/null
+++ b/lasagne/layers/special.py
@@ -0,0 +1,1155 @@
+import theano
+import theano.tensor as T
+import numpy as np
+
+from .. import init
+from .. import nonlinearities
+from ..utils import as_tuple, floatX
+from ..random import get_rng
+from .base import Layer, MergeLayer
+from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
+
+
+__all__ = [
+ "NonlinearityLayer",
+ "BiasLayer",
+ "ScaleLayer",
+ "standardize",
+ "ExpressionLayer",
+ "InverseLayer",
+ "TransformerLayer",
+ "TPSTransformerLayer",
+ "ParametricRectifierLayer",
+ "prelu",
+ "RandomizedRectifierLayer",
+ "rrelu",
+]
+
+
+class NonlinearityLayer(Layer):
+ """
+ lasagne.layers.NonlinearityLayer(incoming,
+ nonlinearity=lasagne.nonlinearities.rectify, **kwargs)
+
+ A layer that just applies a nonlinearity.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ nonlinearity : callable or None
+ The nonlinearity that is applied to the layer activations. If None
+ is provided, the layer will be linear.
+ """
+ def __init__(self, incoming, nonlinearity=nonlinearities.rectify,
+ **kwargs):
+ super(NonlinearityLayer, self).__init__(incoming, **kwargs)
+ self.nonlinearity = (nonlinearities.identity if nonlinearity is None
+ else nonlinearity)
+
+ def get_output_for(self, input, **kwargs):
+ return self.nonlinearity(input)
+
+
+class BiasLayer(Layer):
+ """
+ lasagne.layers.BiasLayer(incoming, b=lasagne.init.Constant(0),
+ shared_axes='auto', **kwargs)
+
+ A layer that just adds a (trainable) bias term.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ b : Theano shared variable, expression, numpy array, callable or ``None``
+ Initial value, expression or initializer for the biases. If set to
+ ``None``, the layer will have no biases and pass through its input
+ unchanged. Otherwise, the bias shape must match the incoming shape,
+ skipping those axes the biases are shared over (see the example below).
+ See :func:`lasagne.utils.create_param` for more information.
+
+ shared_axes : 'auto', int or tuple of int
+ The axis or axes to share biases over. If ``'auto'`` (the default),
+ share over all axes except for the second: this will share biases over
+ the minibatch dimension for dense layers, and additionally over all
+ spatial dimensions for convolutional layers.
+
+ Notes
+ -----
+ The bias parameter dimensionality is the input dimensionality minus the
+ number of axes the biases are shared over, which matches the bias parameter
+ conventions of :class:`DenseLayer` or :class:`Conv2DLayer`. For example:
+
+ >>> layer = BiasLayer((20, 30, 40, 50), shared_axes=(0, 2))
+ >>> layer.b.get_value().shape
+ (30, 50)
+ """
+ def __init__(self, incoming, b=init.Constant(0), shared_axes='auto',
+ **kwargs):
+ super(BiasLayer, self).__init__(incoming, **kwargs)
+
+ if shared_axes == 'auto':
+ # default: share biases over all but the second axis
+ shared_axes = (0,) + tuple(range(2, len(self.input_shape)))
+ elif isinstance(shared_axes, int):
+ shared_axes = (shared_axes,)
+ self.shared_axes = shared_axes
+
+ if b is None:
+ self.b = None
+ else:
+ # create bias parameter, ignoring all dimensions in shared_axes
+ shape = [size for axis, size in enumerate(self.input_shape)
+ if axis not in self.shared_axes]
+ if any(size is None for size in shape):
+ raise ValueError("BiasLayer needs specified input sizes for "
+ "all axes that biases are not shared over.")
+ self.b = self.add_param(b, shape, 'b', regularizable=False)
+
+ def get_output_for(self, input, **kwargs):
+ if self.b is not None:
+ bias_axes = iter(range(self.b.ndim))
+ pattern = ['x' if input_axis in self.shared_axes
+ else next(bias_axes)
+ for input_axis in range(input.ndim)]
+ return input + self.b.dimshuffle(*pattern)
+ else:
+ return input
+
+
+class ScaleLayer(Layer):
+ """
+ lasagne.layers.ScaleLayer(incoming, scales=lasagne.init.Constant(1),
+ shared_axes='auto', **kwargs)
+
+ A layer that scales its inputs by learned coefficients.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ scales : Theano shared variable, expression, numpy array, or callable
+ Initial value, expression or initializer for the scale. The scale
+ shape must match the incoming shape, skipping those axes the scales are
+ shared over (see the example below). See
+ :func:`lasagne.utils.create_param` for more information.
+
+ shared_axes : 'auto', int or tuple of int
+ The axis or axes to share scales over. If ``'auto'`` (the default),
+ share over all axes except for the second: this will share scales over
+ the minibatch dimension for dense layers, and additionally over all
+ spatial dimensions for convolutional layers.
+
+ Notes
+ -----
+ The scales parameter dimensionality is the input dimensionality minus the
+ number of axes the scales are shared over, which matches the bias parameter
+ conventions of :class:`DenseLayer` or :class:`Conv2DLayer`. For example:
+
+ >>> layer = ScaleLayer((20, 30, 40, 50), shared_axes=(0, 2))
+ >>> layer.scales.get_value().shape
+ (30, 50)
+ """
+ def __init__(self, incoming, scales=init.Constant(1), shared_axes='auto',
+ **kwargs):
+ super(ScaleLayer, self).__init__(incoming, **kwargs)
+
+ if shared_axes == 'auto':
+ # default: share scales over all but the second axis
+ shared_axes = (0,) + tuple(range(2, len(self.input_shape)))
+ elif isinstance(shared_axes, int):
+ shared_axes = (shared_axes,)
+ self.shared_axes = shared_axes
+
+ # create scales parameter, ignoring all dimensions in shared_axes
+ shape = [size for axis, size in enumerate(self.input_shape)
+ if axis not in self.shared_axes]
+ if any(size is None for size in shape):
+ raise ValueError("ScaleLayer needs specified input sizes for "
+ "all axes that scales are not shared over.")
+ self.scales = self.add_param(
+ scales, shape, 'scales', regularizable=False)
+
+ def get_output_for(self, input, **kwargs):
+ axes = iter(range(self.scales.ndim))
+ pattern = ['x' if input_axis in self.shared_axes
+ else next(axes) for input_axis in range(input.ndim)]
+ return input * self.scales.dimshuffle(*pattern)
+
+
+def standardize(layer, offset, scale, shared_axes='auto'):
+ """
+ Convenience function for standardizing inputs by applying a fixed offset
+ and scale. This is usually useful when you want the input to your network
+ to, say, have zero mean and unit standard deviation over the feature
+ dimensions. This layer allows you to include the appropriate statistics to
+ achieve this normalization as part of your network, and applies them to its
+ input. The statistics are supplied as the `offset` and `scale` parameters,
+ which are applied to the input by subtracting `offset` and dividing by
+ `scale`, sharing dimensions as specified by the `shared_axes` argument.
+
+ Parameters
+ ----------
+ layer : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape.
+ offset : Theano shared variable or numpy array
+ The offset to apply (via subtraction) to the axis/axes being
+ standardized.
+ scale : Theano shared variable or numpy array
+ The scale to apply (via division) to the axis/axes being standardized.
+ shared_axes : 'auto', int or tuple of int
+ The axis or axes to share the offset and scale over. If ``'auto'`` (the
+ default), share over all axes except for the second: this will share
+ scales over the minibatch dimension for dense layers, and additionally
+ over all spatial dimensions for convolutional layers.
+
+ Examples
+ --------
+ Assuming your training data exists in a 2D numpy ndarray called
+ ``training_data``, you can use this function to scale input features to the
+ [0, 1] range based on the training set statistics like so:
+
+ >>> import lasagne
+ >>> import numpy as np
+ >>> training_data = np.random.standard_normal((100, 20))
+ >>> input_shape = (None, training_data.shape[1])
+ >>> l_in = lasagne.layers.InputLayer(input_shape)
+ >>> offset = training_data.min(axis=0)
+ >>> scale = training_data.max(axis=0) - training_data.min(axis=0)
+ >>> l_std = standardize(l_in, offset, scale, shared_axes=0)
+
+ Alternatively, to z-score your inputs based on training set statistics, you
+ could set ``offset = training_data.mean(axis=0)`` and
+ ``scale = training_data.std(axis=0)`` instead.
+ """
+ # Subtract the offset
+ layer = BiasLayer(layer, -offset, shared_axes)
+ # Do not optimize the offset parameter
+ layer.params[layer.b].remove('trainable')
+ # Divide by the scale
+ layer = ScaleLayer(layer, floatX(1.)/scale, shared_axes)
+ # Do not optimize the scales parameter
+ layer.params[layer.scales].remove('trainable')
+ return layer
+
+
+class ExpressionLayer(Layer):
+ """
+ This layer provides boilerplate for a custom layer that applies a
+ simple transformation to the input.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape.
+
+ function : callable
+ A function to be applied to the output of the previous layer.
+
+ output_shape : None, callable, tuple, or 'auto'
+ Specifies the output shape of this layer. If a tuple, this fixes the
+ output shape for any input shape (the tuple can contain None if some
+ dimensions may vary). If a callable, it should return the calculated
+ output shape given the input shape. If None, the output shape is
+ assumed to be the same as the input shape. If 'auto', an attempt will
+ be made to automatically infer the correct output shape.
+
+ Notes
+ -----
+ An :class:`ExpressionLayer` that does not change the shape of the data
+ (i.e., is constructed with the default setting of ``output_shape=None``)
+ is functionally equivalent to a :class:`NonlinearityLayer`.
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer, ExpressionLayer
+ >>> l_in = InputLayer((32, 100, 20))
+ >>> l1 = ExpressionLayer(l_in, lambda X: X.mean(-1), output_shape='auto')
+ >>> l1.output_shape
+ (32, 100)
+ """
+ def __init__(self, incoming, function, output_shape=None, **kwargs):
+ super(ExpressionLayer, self).__init__(incoming, **kwargs)
+
+ if output_shape is None:
+ self._output_shape = None
+ elif output_shape == 'auto':
+ self._output_shape = 'auto'
+ elif hasattr(output_shape, '__call__'):
+ self.get_output_shape_for = output_shape
+ else:
+ self._output_shape = tuple(output_shape)
+
+ self.function = function
+
+ def get_output_shape_for(self, input_shape):
+ if self._output_shape is None:
+ return input_shape
+ elif self._output_shape is 'auto':
+ input_shape = (0 if s is None else s for s in input_shape)
+ X = theano.tensor.alloc(0, *input_shape)
+ output_shape = self.function(X).shape.eval()
+ output_shape = tuple(s if s else None for s in output_shape)
+ return output_shape
+ else:
+ return self._output_shape
+
+ def get_output_for(self, input, **kwargs):
+ return self.function(input)
+
+
+class InverseLayer(MergeLayer):
+ """
+ The :class:`InverseLayer` class performs inverse operations
+ for a single layer of a neural network by applying the
+ partial derivative of the layer to be inverted with
+ respect to its input: transposed layer
+ for a :class:`DenseLayer`, deconvolutional layer for
+ :class:`Conv2DLayer`, :class:`Conv1DLayer`; or
+ an unpooling layer for :class:`MaxPool2DLayer`.
+
+ It is specially useful for building (convolutional)
+ autoencoders with tied parameters.
+
+ Note that if the layer to be inverted contains a nonlinearity
+ and/or a bias, the :class:`InverseLayer` will include the derivative
+ of that in its computation.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape.
+ layer : a :class:`Layer` instance or a tuple
+ The layer with respect to which the instance of the
+ :class:`InverseLayer` is inverse to.
+
+ Examples
+ --------
+ >>> import lasagne
+ >>> from lasagne.layers import InputLayer, Conv2DLayer, DenseLayer
+ >>> from lasagne.layers import InverseLayer
+ >>> l_in = InputLayer((100, 3, 28, 28))
+ >>> l1 = Conv2DLayer(l_in, num_filters=16, filter_size=5)
+ >>> l2 = DenseLayer(l1, num_units=20)
+ >>> l_u2 = InverseLayer(l2, l2) # backprop through l2
+ >>> l_u1 = InverseLayer(l_u2, l1) # backprop through l1
+ """
+ def __init__(self, incoming, layer, **kwargs):
+
+ super(InverseLayer, self).__init__(
+ [incoming, layer, layer.input_layer], **kwargs)
+
+ def get_output_shape_for(self, input_shapes):
+ return input_shapes[2]
+
+ def get_output_for(self, inputs, **kwargs):
+ input, layer_out, layer_in = inputs
+ return theano.grad(None, wrt=layer_in, known_grads={layer_out: input})
+
+
+class TransformerLayer(MergeLayer):
+ """
+ Spatial transformer layer
+
+ The layer applies an affine transformation on the input. The affine
+ transformation is parameterized with six learned parameters [1]_.
+ The output is interpolated with a bilinear transformation.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. The
+ output of this layer should be a 4D tensor, with shape
+ ``(batch_size, num_input_channels, input_rows, input_columns)``.
+
+ localization_network : a :class:`Layer` instance
+ The network that calculates the parameters of the affine
+ transformation. See the example for how to initialize to the identity
+ transform.
+
+ downsample_factor : float or iterable of float
+ A float or a 2-element tuple specifying the downsample factor for the
+ output image (in both spatial dimensions). A value of 1 will keep the
+ original size of the input. Values larger than 1 will downsample the
+ input. Values below 1 will upsample the input.
+
+ References
+ ----------
+ .. [1] Max Jaderberg, Karen Simonyan, Andrew Zisserman,
+ Koray Kavukcuoglu (2015):
+ Spatial Transformer Networks. NIPS 2015,
+ http://papers.nips.cc/paper/5854-spatial-transformer-networks.pdf
+
+ Examples
+ --------
+ Here we set up the layer to initially do the identity transform, similarly
+ to [1]_. Note that you will want to use a localization with linear output.
+ If the output from the localization networks is [t1, t2, t3, t4, t5, t6]
+ then t1 and t5 determines zoom, t2 and t4 determines skewness, and t3 and
+ t6 move the center position.
+
+ >>> import numpy as np
+ >>> import lasagne
+ >>> b = np.zeros((2, 3), dtype='float32')
+ >>> b[0, 0] = 1
+ >>> b[1, 1] = 1
+ >>> b = b.flatten() # identity transform
+ >>> W = lasagne.init.Constant(0.0)
+ >>> l_in = lasagne.layers.InputLayer((None, 3, 28, 28))
+ >>> l_loc = lasagne.layers.DenseLayer(l_in, num_units=6, W=W, b=b,
+ ... nonlinearity=None)
+ >>> l_trans = lasagne.layers.TransformerLayer(l_in, l_loc)
+ """
+ def __init__(self, incoming, localization_network, downsample_factor=1,
+ **kwargs):
+ super(TransformerLayer, self).__init__(
+ [incoming, localization_network], **kwargs)
+ self.downsample_factor = as_tuple(downsample_factor, 2)
+
+ input_shp, loc_shp = self.input_shapes
+
+ if loc_shp[-1] != 6 or len(loc_shp) != 2:
+ raise ValueError("The localization network must have "
+ "output shape: (batch_size, 6)")
+ if len(input_shp) != 4:
+ raise ValueError("The input network must have a 4-dimensional "
+ "output shape: (batch_size, num_input_channels, "
+ "input_rows, input_columns)")
+
+ def get_output_shape_for(self, input_shapes):
+ shape = input_shapes[0]
+ factors = self.downsample_factor
+ return (shape[:2] + tuple(None if s is None else int(s / f)
+ for s, f in zip(shape[2:], factors)))
+
+ def get_output_for(self, inputs, **kwargs):
+ # see eq. (1) and sec 3.1 in [1]
+ input, theta = inputs
+ return _transform_affine(theta, input, self.downsample_factor)
+
+
+def _transform_affine(theta, input, downsample_factor):
+ num_batch, num_channels, height, width = input.shape
+ theta = T.reshape(theta, (-1, 2, 3))
+
+ # grid of (x_t, y_t, 1), eq (1) in ref [1]
+ out_height = T.cast(height / downsample_factor[0], 'int64')
+ out_width = T.cast(width / downsample_factor[1], 'int64')
+ grid = _meshgrid(out_height, out_width)
+
+ # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s)
+ T_g = T.dot(theta, grid)
+ x_s = T_g[:, 0]
+ y_s = T_g[:, 1]
+ x_s_flat = x_s.flatten()
+ y_s_flat = y_s.flatten()
+
+ # dimshuffle input to (bs, height, width, channels)
+ input_dim = input.dimshuffle(0, 2, 3, 1)
+ input_transformed = _interpolate(
+ input_dim, x_s_flat, y_s_flat,
+ out_height, out_width)
+
+ output = T.reshape(
+ input_transformed, (num_batch, out_height, out_width, num_channels))
+ output = output.dimshuffle(0, 3, 1, 2) # dimshuffle to conv format
+ return output
+
+
+def _interpolate(im, x, y, out_height, out_width):
+ # *_f are floats
+ num_batch, height, width, channels = im.shape
+ height_f = T.cast(height, theano.config.floatX)
+ width_f = T.cast(width, theano.config.floatX)
+
+ # clip coordinates to [-1, 1]
+ x = T.clip(x, -1, 1)
+ y = T.clip(y, -1, 1)
+
+ # scale coordinates from [-1, 1] to [0, width/height - 1]
+ x = (x + 1) / 2 * (width_f - 1)
+ y = (y + 1) / 2 * (height_f - 1)
+
+ # obtain indices of the 2x2 pixel neighborhood surrounding the coordinates;
+ # we need those in floatX for interpolation and in int64 for indexing. for
+ # indexing, we need to take care they do not extend past the image.
+ x0_f = T.floor(x)
+ y0_f = T.floor(y)
+ x1_f = x0_f + 1
+ y1_f = y0_f + 1
+ x0 = T.cast(x0_f, 'int64')
+ y0 = T.cast(y0_f, 'int64')
+ x1 = T.cast(T.minimum(x1_f, width_f - 1), 'int64')
+ y1 = T.cast(T.minimum(y1_f, height_f - 1), 'int64')
+
+ # The input is [num_batch, height, width, channels]. We do the lookup in
+ # the flattened input, i.e [num_batch*height*width, channels]. We need
+ # to offset all indices to match the flat version
+ dim2 = width
+ dim1 = width*height
+ base = T.repeat(
+ T.arange(num_batch, dtype='int64')*dim1, out_height*out_width)
+ base_y0 = base + y0*dim2
+ base_y1 = base + y1*dim2
+ idx_a = base_y0 + x0
+ idx_b = base_y1 + x0
+ idx_c = base_y0 + x1
+ idx_d = base_y1 + x1
+
+ # use indices to lookup pixels for all samples
+ im_flat = im.reshape((-1, channels))
+ Ia = im_flat[idx_a]
+ Ib = im_flat[idx_b]
+ Ic = im_flat[idx_c]
+ Id = im_flat[idx_d]
+
+ # calculate interpolated values
+ wa = ((x1_f-x) * (y1_f-y)).dimshuffle(0, 'x')
+ wb = ((x1_f-x) * (y-y0_f)).dimshuffle(0, 'x')
+ wc = ((x-x0_f) * (y1_f-y)).dimshuffle(0, 'x')
+ wd = ((x-x0_f) * (y-y0_f)).dimshuffle(0, 'x')
+ output = T.sum([wa*Ia, wb*Ib, wc*Ic, wd*Id], axis=0)
+ return output
+
+
+def _linspace(start, stop, num):
+ # Theano linspace. Behaves similar to np.linspace
+ start = T.cast(start, theano.config.floatX)
+ stop = T.cast(stop, theano.config.floatX)
+ num = T.cast(num, theano.config.floatX)
+ step = (stop-start)/(num-1)
+ return T.arange(num, dtype=theano.config.floatX)*step+start
+
+
+def _meshgrid(height, width):
+ # This function is the grid generator from eq. (1) in reference [1].
+ # It is equivalent to the following numpy code:
+ # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width),
+ # np.linspace(-1, 1, height))
+ # ones = np.ones(np.prod(x_t.shape))
+ # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
+ # It is implemented in Theano instead to support symbolic grid sizes.
+ # Note: If the image size is known at layer construction time, we could
+ # compute the meshgrid offline in numpy instead of doing it dynamically
+ # in Theano. However, it hardly affected performance when we tried.
+ x_t = T.dot(T.ones((height, 1)),
+ _linspace(-1.0, 1.0, width).dimshuffle('x', 0))
+ y_t = T.dot(_linspace(-1.0, 1.0, height).dimshuffle(0, 'x'),
+ T.ones((1, width)))
+
+ x_t_flat = x_t.reshape((1, -1))
+ y_t_flat = y_t.reshape((1, -1))
+ ones = T.ones_like(x_t_flat)
+ grid = T.concatenate([x_t_flat, y_t_flat, ones], axis=0)
+ return grid
+
+
+class TPSTransformerLayer(MergeLayer):
+ """
+ Spatial transformer layer
+
+ The layer applies a thin plate spline transformation [2]_ on the input
+ as in [1]_. The thin plate spline transform is determined based on the
+ movement of some number of control points. The starting positions for
+ these control points are fixed. The output is interpolated with a
+ bilinear transformation.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape. The
+ output of this layer should be a 4D tensor, with shape
+ ``(batch_size, num_input_channels, input_rows, input_columns)``.
+
+ localization_network : a :class:`Layer` instance
+ The network that calculates the parameters of the thin plate spline
+ transformation as the x and y coordinates of the destination offsets of
+ each control point. The output of the localization network should
+ be a 2D tensor, with shape ``(batch_size, 2 * num_control_points)``
+
+ downsample_factor : float or iterable of float
+ A float or a 2-element tuple specifying the downsample factor for the
+ output image (in both spatial dimensions). A value of 1 will keep the
+ original size of the input. Values larger than 1 will downsample the
+ input. Values below 1 will upsample the input.
+
+ control_points : integer
+ The number of control points to be used for the thin plate spline
+ transformation. These points will be arranged as a grid along the
+ image, so the value must be a perfect square. Default is 16.
+
+ precompute_grid : 'auto' or boolean
+ Flag to precompute the U function [2]_ for the grid and source
+ points. If 'auto', will be set to true as long as the input height
+ and width are specified. If true, the U function is computed when the
+ layer is constructed for a fixed input shape. If false, grid will be
+ computed as part of the Theano computational graph, which is
+ substantially slower as this computation scales with
+ num_pixels*num_control_points. Default is 'auto'.
+
+ References
+ ----------
+ .. [1] Max Jaderberg, Karen Simonyan, Andrew Zisserman,
+ Koray Kavukcuoglu (2015):
+ Spatial Transformer Networks. NIPS 2015,
+ http://papers.nips.cc/paper/5854-spatial-transformer-networks.pdf
+ .. [2] Fred L. Bookstein (1989):
+ Principal warps: thin-plate splines and the decomposition of
+ deformations. IEEE Transactions on
+ Pattern Analysis and Machine Intelligence.
+ http://doi.org/10.1109/34.24792
+
+ Examples
+ --------
+ Here, we'll implement an identity transform using a thin plate spline
+ transform. First we'll create the destination control point offsets. To
+ make everything invariant to the shape of the image, the x and y range
+ of the image is normalized to [-1, 1] as in ref [1]_. To replicate an
+ identity transform, we'll set the bias to have all offsets be 0. More
+ complicated transformations can easily be implemented using different x
+ and y offsets (importantly, each control point can have it's own pair of
+ offsets).
+
+ >>> import numpy as np
+ >>> import lasagne
+ >>>
+ >>> # Create the network
+ >>> # we'll initialize the weights and biases to zero, so it starts
+ >>> # as the identity transform (all control point offsets are zero)
+ >>> W = b = lasagne.init.Constant(0.0)
+ >>>
+ >>> # Set the number of points
+ >>> num_points = 16
+ >>>
+ >>> l_in = lasagne.layers.InputLayer((None, 3, 28, 28))
+ >>> l_loc = lasagne.layers.DenseLayer(l_in, num_units=2*num_points,
+ ... W=W, b=b, nonlinearity=None)
+ >>> l_trans = lasagne.layers.TPSTransformerLayer(l_in, l_loc,
+ ... control_points=num_points)
+ """
+
+ def __init__(self, incoming, localization_network, downsample_factor=1,
+ control_points=16, precompute_grid='auto', **kwargs):
+ super(TPSTransformerLayer, self).__init__(
+ [incoming, localization_network], **kwargs)
+
+ self.downsample_factor = as_tuple(downsample_factor, 2)
+ self.control_points = control_points
+
+ input_shp, loc_shp = self.input_shapes
+
+ # Error checking
+ if loc_shp[-1] != 2 * control_points or len(loc_shp) != 2:
+ raise ValueError("The localization network must have "
+ "output shape: (batch_size, "
+ "2*control_points)")
+
+ if round(np.sqrt(control_points)) != np.sqrt(
+ control_points):
+ raise ValueError("The number of control points must be"
+ " a perfect square.")
+
+ if len(input_shp) != 4:
+ raise ValueError("The input network must have a 4-dimensional "
+ "output shape: (batch_size, num_input_channels, "
+ "input_rows, input_columns)")
+
+ # Process precompute grid
+ can_precompute_grid = all(s is not None for s in input_shp[2:])
+ if precompute_grid == 'auto':
+ precompute_grid = can_precompute_grid
+ elif precompute_grid and not can_precompute_grid:
+ raise ValueError("Grid can only be precomputed if the input "
+ "height and width are pre-specified.")
+ self.precompute_grid = precompute_grid
+
+ # Create source points and L matrix
+ self.right_mat, self.L_inv, self.source_points, self.out_height, \
+ self.out_width = _initialize_tps(
+ control_points, input_shp, self.downsample_factor,
+ precompute_grid)
+
+ def get_output_shape_for(self, input_shapes):
+ shape = input_shapes[0]
+ factors = self.downsample_factor
+ return (shape[:2] + tuple(None if s is None else int(s / f)
+ for s, f in zip(shape[2:], factors)))
+
+ def get_output_for(self, inputs, **kwargs):
+ # see eq. (1) and sec 3.1 in [1]
+ # Get input and destination control points
+ input, dest_offsets = inputs
+ return _transform_thin_plate_spline(
+ dest_offsets, input, self.right_mat, self.L_inv,
+ self.source_points, self.out_height, self.out_width,
+ self.precompute_grid, self.downsample_factor)
+
+
+def _transform_thin_plate_spline(
+ dest_offsets, input, right_mat, L_inv, source_points, out_height,
+ out_width, precompute_grid, downsample_factor):
+
+ num_batch, num_channels, height, width = input.shape
+ num_control_points = source_points.shape[1]
+
+ # reshape destination offsets to be (num_batch, 2, num_control_points)
+ # and add to source_points
+ dest_points = source_points + T.reshape(
+ dest_offsets, (num_batch, 2, num_control_points))
+
+ # Solve as in ref [2]
+ coefficients = T.dot(dest_points, L_inv[:, 3:].T)
+
+ if precompute_grid:
+
+ # Transform each point on the source grid (image_size x image_size)
+ right_mat = T.tile(right_mat.dimshuffle('x', 0, 1), (num_batch, 1, 1))
+ transformed_points = T.batched_dot(coefficients, right_mat)
+
+ else:
+
+ # Transformed grid
+ out_height = T.cast(height / downsample_factor[0], 'int64')
+ out_width = T.cast(width / downsample_factor[1], 'int64')
+ orig_grid = _meshgrid(out_height, out_width)
+ orig_grid = orig_grid[0:2, :]
+ orig_grid = T.tile(orig_grid, (num_batch, 1, 1))
+
+ # Transform each point on the source grid (image_size x image_size)
+ transformed_points = _get_transformed_points_tps(
+ orig_grid, source_points, coefficients, num_control_points,
+ num_batch)
+
+ # Get out new points
+ x_transformed = transformed_points[:, 0].flatten()
+ y_transformed = transformed_points[:, 1].flatten()
+
+ # dimshuffle input to (bs, height, width, channels)
+ input_dim = input.dimshuffle(0, 2, 3, 1)
+ input_transformed = _interpolate(
+ input_dim, x_transformed, y_transformed,
+ out_height, out_width)
+
+ output = T.reshape(input_transformed,
+ (num_batch, out_height, out_width, num_channels))
+ output = output.dimshuffle(0, 3, 1, 2) # dimshuffle to conv format
+ return output
+
+
+def _get_transformed_points_tps(new_points, source_points, coefficients,
+ num_points, batch_size):
+ """
+ Calculates the transformed points' value using the provided coefficients
+
+ :param new_points: num_batch x 2 x num_to_transform tensor
+ :param source_points: 2 x num_points array of source points
+ :param coefficients: coefficients (should be shape (num_batch, 2,
+ control_points + 3))
+ :param num_points: the number of points
+
+ :return: the x and y coordinates of each transformed point. Shape (
+ num_batch, 2, num_to_transform)
+ """
+
+ # Calculate the U function for the new point and each source point as in
+ # ref [2]
+ # The U function is simply U(r) = r^2 * log(r^2), where r^2 is the
+ # squared distance
+
+ # Calculate the squared dist between the new point and the source points
+ to_transform = new_points.dimshuffle(0, 'x', 1, 2)
+ stacked_transform = T.tile(to_transform, (1, num_points, 1, 1))
+ r_2 = T.sum(((stacked_transform - source_points.dimshuffle(
+ 'x', 1, 0, 'x')) ** 2), axis=2)
+
+ # Take the product (r^2 * log(r^2)), being careful to avoid NaNs
+ log_r_2 = T.log(r_2)
+ distances = T.switch(T.isnan(log_r_2), r_2 * log_r_2, 0.)
+
+ # Add in the coefficients for the affine translation (1, x, and y,
+ # corresponding to a_1, a_x, and a_y)
+ upper_array = T.concatenate([T.ones((batch_size, 1, new_points.shape[2]),
+ dtype=theano.config.floatX),
+ new_points], axis=1)
+ right_mat = T.concatenate([upper_array, distances], axis=1)
+
+ # Calculate the new value as the dot product
+ new_value = T.batched_dot(coefficients, right_mat)
+ return new_value
+
+
+def _U_func_numpy(x1, y1, x2, y2):
+ """
+ Function which implements the U function from Bookstein paper
+ :param x1: x coordinate of the first point
+ :param y1: y coordinate of the first point
+ :param x2: x coordinate of the second point
+ :param y2: y coordinate of the second point
+ :return: value of z
+ """
+
+ # Return zero if same point
+ if x1 == x2 and y1 == y2:
+ return 0.
+
+ # Calculate the squared Euclidean norm (r^2)
+ r_2 = (x2 - x1) ** 2 + (y2 - y1) ** 2
+
+ # Return the squared norm (r^2 * log r^2)
+ return r_2 * np.log(r_2)
+
+
+def _initialize_tps(num_control_points, input_shape, downsample_factor,
+ precompute_grid):
+ """
+ Initializes the thin plate spline calculation by creating the source
+ point array and the inverted L matrix used for calculating the
+ transformations as in ref [2]_
+
+ :param num_control_points: the number of control points. Must be a
+ perfect square. Points will be used to generate an evenly spaced grid.
+ :param input_shape: tuple with 4 elements specifying the input shape
+ :param downsample_factor: tuple with 2 elements specifying the
+ downsample for the height and width, respectively
+ :param precompute_grid: boolean specifying whether to precompute the
+ grid matrix
+ :return:
+ right_mat: shape (num_control_points + 3, out_height*out_width) tensor
+ L_inv: shape (num_control_points + 3, num_control_points + 3) tensor
+ source_points: shape (2, num_control_points) tensor
+ out_height: tensor constant specifying the ouptut height
+ out_width: tensor constant specifying the output width
+
+ """
+
+ # break out input_shape
+ _, _, height, width = input_shape
+
+ # Create source grid
+ grid_size = np.sqrt(num_control_points)
+ x_control_source, y_control_source = np.meshgrid(
+ np.linspace(-1, 1, grid_size),
+ np.linspace(-1, 1, grid_size))
+
+ # Create 2 x num_points array of source points
+ source_points = np.vstack(
+ (x_control_source.flatten(), y_control_source.flatten()))
+
+ # Convert to floatX
+ source_points = source_points.astype(theano.config.floatX)
+
+ # Get number of equations
+ num_equations = num_control_points + 3
+
+ # Initialize L to be num_equations square matrix
+ L = np.zeros((num_equations, num_equations), dtype=theano.config.floatX)
+
+ # Create P matrix components
+ L[0, 3:num_equations] = 1.
+ L[1:3, 3:num_equations] = source_points
+ L[3:num_equations, 0] = 1.
+ L[3:num_equations, 1:3] = source_points.T
+
+ # Loop through each pair of points and create the K matrix
+ for point_1 in range(num_control_points):
+ for point_2 in range(point_1, num_control_points):
+
+ L[point_1 + 3, point_2 + 3] = _U_func_numpy(
+ source_points[0, point_1], source_points[1, point_1],
+ source_points[0, point_2], source_points[1, point_2])
+
+ if point_1 != point_2:
+ L[point_2 + 3, point_1 + 3] = L[point_1 + 3, point_2 + 3]
+
+ # Invert
+ L_inv = np.linalg.inv(L)
+
+ if precompute_grid:
+ # Construct grid
+ out_height = np.array(height / downsample_factor[0]).astype('int64')
+ out_width = np.array(width / downsample_factor[1]).astype('int64')
+ x_t, y_t = np.meshgrid(np.linspace(-1, 1, out_width),
+ np.linspace(-1, 1, out_height))
+ ones = np.ones(np.prod(x_t.shape))
+ orig_grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
+ orig_grid = orig_grid[0:2, :]
+ orig_grid = orig_grid.astype(theano.config.floatX)
+
+ # Construct right mat
+
+ # First Calculate the U function for the new point and each source
+ # point as in ref [2]
+ # The U function is simply U(r) = r^2 * log(r^2), where r^2 is the
+ # squared distance
+ to_transform = orig_grid[:, :, np.newaxis].transpose(2, 0, 1)
+ stacked_transform = np.tile(to_transform, (num_control_points, 1, 1))
+ stacked_source_points = \
+ source_points[:, :, np.newaxis].transpose(1, 0, 2)
+ r_2 = np.sum((stacked_transform - stacked_source_points) ** 2, axis=1)
+
+ # Take the product (r^2 * log(r^2)), being careful to avoid NaNs
+ log_r_2 = np.log(r_2)
+ log_r_2[np.isinf(log_r_2)] = 0.
+ distances = r_2 * log_r_2
+
+ # Add in the coefficients for the affine translation (1, x, and y,
+ # corresponding to a_1, a_x, and a_y)
+ upper_array = np.ones(shape=(1, orig_grid.shape[1]),
+ dtype=theano.config.floatX)
+ upper_array = np.concatenate([upper_array, orig_grid], axis=0)
+ right_mat = np.concatenate([upper_array, distances], axis=0)
+
+ # Convert to tensors
+ out_height = T.as_tensor_variable(out_height)
+ out_width = T.as_tensor_variable(out_width)
+ right_mat = T.as_tensor_variable(right_mat)
+
+ else:
+ out_height = None
+ out_width = None
+ right_mat = None
+
+ # Convert to tensors
+ L_inv = T.as_tensor_variable(L_inv)
+ source_points = T.as_tensor_variable(source_points)
+
+ return right_mat, L_inv, source_points, out_height, out_width
+
+
+class ParametricRectifierLayer(Layer):
+ """
+ lasagne.layers.ParametricRectifierLayer(incoming,
+ alpha=init.Constant(0.25), shared_axes='auto', **kwargs)
+
+ A layer that applies parametric rectify nonlinearity to its input
+ following [1]_.
+
+ Equation for the parametric rectifier linear unit:
+ :math:`\\varphi(x) = \\max(x,0) + \\alpha \\min(x,0)`
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ alpha : Theano shared variable, expression, numpy array or callable
+ Initial value, expression or initializer for the alpha values. The
+ shape must match the incoming shape, skipping those axes the alpha
+ values are shared over (see the example below).
+ See :func:`lasagne.utils.create_param` for more information.
+
+ shared_axes : 'auto', 'all', int or tuple of int
+ The axes along which the parameters of the rectifier units are
+ going to be shared. If ``'auto'`` (the default), share over all axes
+ except for the second - this will share the parameter over the
+ minibatch dimension for dense layers, and additionally over all
+ spatial dimensions for convolutional layers. If ``'all'``, share over
+ all axes, which corresponds to a single scalar parameter.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ References
+ ----------
+ .. [1] K He, X Zhang et al. (2015):
+ Delving Deep into Rectifiers: Surpassing Human-Level Performance on
+ ImageNet Classification,
+ http://arxiv.org/abs/1502.01852
+
+ Notes
+ -----
+ The alpha parameter dimensionality is the input dimensionality minus the
+ number of axes it is shared over, which matches the same convention as
+ the :class:`BiasLayer`.
+
+ >>> layer = ParametricRectifierLayer((20, 3, 28, 28), shared_axes=(0, 3))
+ >>> layer.alpha.get_value().shape
+ (3, 28)
+ """
+ def __init__(self, incoming, alpha=init.Constant(0.25), shared_axes='auto',
+ **kwargs):
+ super(ParametricRectifierLayer, self).__init__(incoming, **kwargs)
+ if shared_axes == 'auto':
+ self.shared_axes = (0,) + tuple(range(2, len(self.input_shape)))
+ elif shared_axes == 'all':
+ self.shared_axes = tuple(range(len(self.input_shape)))
+ elif isinstance(shared_axes, int):
+ self.shared_axes = (shared_axes,)
+ else:
+ self.shared_axes = shared_axes
+
+ shape = [size for axis, size in enumerate(self.input_shape)
+ if axis not in self.shared_axes]
+ if any(size is None for size in shape):
+ raise ValueError("ParametricRectifierLayer needs input sizes for "
+ "all axes that alpha's are not shared over.")
+ self.alpha = self.add_param(alpha, shape, name="alpha",
+ regularizable=False)
+
+ def get_output_for(self, input, **kwargs):
+ axes = iter(range(self.alpha.ndim))
+ pattern = ['x' if input_axis in self.shared_axes
+ else next(axes)
+ for input_axis in range(input.ndim)]
+ alpha = self.alpha.dimshuffle(pattern)
+ return theano.tensor.nnet.relu(input, alpha)
+
+
+def prelu(layer, **kwargs):
+ """
+ Convenience function to apply parametric rectify to a given layer's output.
+ Will set the layer's nonlinearity to identity if there is one and will
+ apply the parametric rectifier instead.
+
+ Parameters
+ ----------
+ layer: a :class:`Layer` instance
+ The `Layer` instance to apply the parametric rectifier layer to;
+ note that it will be irreversibly modified as specified above
+
+ **kwargs
+ Any additional keyword arguments are passed to the
+ :class:`ParametericRectifierLayer`
+
+ Examples
+ --------
+ Note that this function modifies an existing layer, like this:
+
+ >>> from lasagne.layers import InputLayer, DenseLayer, prelu
+ >>> layer = InputLayer((32, 100))
+ >>> layer = DenseLayer(layer, num_units=200)
+ >>> layer = prelu(layer)
+
+ In particular, :func:`prelu` can *not* be passed as a nonlinearity.
+ """
+ nonlinearity = getattr(layer, 'nonlinearity', None)
+ if nonlinearity is not None:
+ layer.nonlinearity = nonlinearities.identity
+ return ParametricRectifierLayer(layer, **kwargs)
+
+
+class RandomizedRectifierLayer(Layer):
+ """
+ A layer that applies a randomized leaky rectify nonlinearity to its input.
+
+ The randomized leaky rectifier was first proposed and used in the Kaggle
+ NDSB Competition, and later evaluated in [1]_. Compared to the standard
+ leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope
+ for negative input during training, and a fixed slope during evaluation.
+
+ Equation for the randomized rectifier linear unit during training:
+ :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)`
+
+ During evaluation, the factor is fixed to the arithmetic mean of `lower`
+ and `upper`.
+
+ Parameters
+ ----------
+ incoming : a :class:`Layer` instance or a tuple
+ The layer feeding into this layer, or the expected input shape
+
+ lower : Theano shared variable, expression, or constant
+ The lower bound for the randomly chosen slopes.
+
+ upper : Theano shared variable, expression, or constant
+ The upper bound for the randomly chosen slopes.
+
+ shared_axes : 'auto', 'all', int or tuple of int
+ The axes along which the random slopes of the rectifier units are
+ going to be shared. If ``'auto'`` (the default), share over all axes
+ except for the second - this will share the random slope over the
+ minibatch dimension for dense layers, and additionally over all
+ spatial dimensions for convolutional layers. If ``'all'``, share over
+ all axes, thus using a single random slope.
+
+ **kwargs
+ Any additional keyword arguments are passed to the `Layer` superclass.
+
+ References
+ ----------
+ .. [1] Bing Xu, Naiyan Wang et al. (2015):
+ Empirical Evaluation of Rectified Activations in Convolutional Network,
+ http://arxiv.org/abs/1505.00853
+ """
+ def __init__(self, incoming, lower=0.3, upper=0.8, shared_axes='auto',
+ **kwargs):
+ super(RandomizedRectifierLayer, self).__init__(incoming, **kwargs)
+ self._srng = RandomStreams(get_rng().randint(1, 2147462579))
+ self.lower = lower
+ self.upper = upper
+
+ if not isinstance(lower > upper, theano.Variable) and lower > upper:
+ raise ValueError("Upper bound for RandomizedRectifierLayer needs "
+ "to be higher than lower bound.")
+
+ if shared_axes == 'auto':
+ self.shared_axes = (0,) + tuple(range(2, len(self.input_shape)))
+ elif shared_axes == 'all':
+ self.shared_axes = tuple(range(len(self.input_shape)))
+ elif isinstance(shared_axes, int):
+ self.shared_axes = (shared_axes,)
+ else:
+ self.shared_axes = shared_axes
+
+ def get_output_for(self, input, deterministic=False, **kwargs):
+ """
+ Parameters
+ ----------
+ input : tensor
+ output from the previous layer
+ deterministic : bool
+ If true, the arithmetic mean of lower and upper are used for the
+ leaky slope.
+ """
+ if deterministic or self.upper == self.lower:
+ return theano.tensor.nnet.relu(input, (self.upper+self.lower)/2.0)
+ else:
+ shape = list(self.input_shape)
+ if any(s is None for s in shape):
+ shape = list(input.shape)
+ for ax in self.shared_axes:
+ shape[ax] = 1
+
+ rnd = self._srng.uniform(tuple(shape),
+ low=self.lower,
+ high=self.upper,
+ dtype=theano.config.floatX)
+ rnd = theano.tensor.addbroadcast(rnd, *self.shared_axes)
+ return theano.tensor.nnet.relu(input, rnd)
+
+
+def rrelu(layer, **kwargs):
+ """
+ Convenience function to apply randomized rectify to a given layer's output.
+ Will set the layer's nonlinearity to identity if there is one and will
+ apply the randomized rectifier instead.
+
+ Parameters
+ ----------
+ layer: a :class:`Layer` instance
+ The `Layer` instance to apply the randomized rectifier layer to;
+ note that it will be irreversibly modified as specified above
+
+ **kwargs
+ Any additional keyword arguments are passed to the
+ :class:`RandomizedRectifierLayer`
+
+ Examples
+ --------
+ Note that this function modifies an existing layer, like this:
+
+ >>> from lasagne.layers import InputLayer, DenseLayer, rrelu
+ >>> layer = InputLayer((32, 100))
+ >>> layer = DenseLayer(layer, num_units=200)
+ >>> layer = rrelu(layer)
+
+ In particular, :func:`rrelu` can *not* be passed as a nonlinearity.
+ """
+ nonlinearity = getattr(layer, 'nonlinearity', None)
+ if nonlinearity is not None:
+ layer.nonlinearity = nonlinearities.identity
+ return RandomizedRectifierLayer(layer, **kwargs)
diff --git a/lasagne/nonlinearities.py b/lasagne/nonlinearities.py
new file mode 100644
index 0000000..b734bac
--- /dev/null
+++ b/lasagne/nonlinearities.py
@@ -0,0 +1,305 @@
+# -*- coding: utf-8 -*-
+"""
+Non-linear activation functions for artificial neurons.
+"""
+
+import theano.tensor
+
+
+# sigmoid
+def sigmoid(x):
+ """Sigmoid activation function :math:`\\varphi(x) = \\frac{1}{1 + e^{-x}}`
+
+ Parameters
+ ----------
+ x : float32
+ The activation (the summed, weighted input of a neuron).
+
+ Returns
+ -------
+ float32 in [0, 1]
+ The output of the sigmoid function applied to the activation.
+ """
+ return theano.tensor.nnet.sigmoid(x)
+
+
+# softmax (row-wise)
+def softmax(x):
+ """Softmax activation function
+ :math:`\\varphi(\\mathbf{x})_j =
+ \\frac{e^{\mathbf{x}_j}}{\sum_{k=1}^K e^{\mathbf{x}_k}}`
+ where :math:`K` is the total number of neurons in the layer. This
+ activation function gets applied row-wise.
+
+ Parameters
+ ----------
+ x : float32
+ The activation (the summed, weighted input of a neuron).
+
+ Returns
+ -------
+ float32 where the sum of the row is 1 and each single value is in [0, 1]
+ The output of the softmax function applied to the activation.
+ """
+ return theano.tensor.nnet.softmax(x)
+
+
+# tanh
+def tanh(x):
+ """Tanh activation function :math:`\\varphi(x) = \\tanh(x)`
+
+ Parameters
+ ----------
+ x : float32
+ The activation (the summed, weighted input of a neuron).
+
+ Returns
+ -------
+ float32 in [-1, 1]
+ The output of the tanh function applied to the activation.
+ """
+ return theano.tensor.tanh(x)
+
+
+# scaled tanh
+class ScaledTanH(object):
+ """Scaled tanh :math:`\\varphi(x) = \\tanh(\\alpha \\cdot x) \\cdot \\beta`
+
+ This is a modified tanh function which allows to rescale both the input and
+ the output of the activation.
+
+ Scaling the input down will result in decreasing the maximum slope of the
+ tanh and as a result it will be in the linear regime in a larger interval
+ of the input space. Scaling the input up will increase the maximum slope
+ of the tanh and thus bring it closer to a step function.
+
+ Scaling the output changes the output interval to :math:`[-\\beta,\\beta]`.
+
+ Parameters
+ ----------
+ scale_in : float32
+ The scale parameter :math:`\\alpha` for the input
+
+ scale_out : float32
+ The scale parameter :math:`\\beta` for the output
+
+ Methods
+ -------
+ __call__(x)
+ Apply the scaled tanh function to the activation `x`.
+
+ Examples
+ --------
+ In contrast to other activation functions in this module, this is
+ a class that needs to be instantiated to obtain a callable:
+
+ >>> from lasagne.layers import InputLayer, DenseLayer
+ >>> l_in = InputLayer((None, 100))
+ >>> from lasagne.nonlinearities import ScaledTanH
+ >>> scaled_tanh = ScaledTanH(scale_in=0.5, scale_out=2.27)
+ >>> l1 = DenseLayer(l_in, num_units=200, nonlinearity=scaled_tanh)
+
+ Notes
+ -----
+ LeCun et al. (in [1]_, Section 4.4) suggest ``scale_in=2./3`` and
+ ``scale_out=1.7159``, which has :math:`\\varphi(\\pm 1) = \\pm 1`,
+ maximum second derivative at 1, and an effective gain close to 1.
+
+ By carefully matching :math:`\\alpha` and :math:`\\beta`, the nonlinearity
+ can also be tuned to preserve the mean and variance of its input:
+
+ * ``scale_in=0.5``, ``scale_out=2.4``: If the input is a random normal
+ variable, the output will have zero mean and unit variance.
+ * ``scale_in=1``, ``scale_out=1.6``: Same property, but with a smaller
+ linear regime in input space.
+ * ``scale_in=0.5``, ``scale_out=2.27``: If the input is a uniform normal
+ variable, the output will have zero mean and unit variance.
+ * ``scale_in=1``, ``scale_out=1.48``: Same property, but with a smaller
+ linear regime in input space.
+
+ References
+ ----------
+ .. [1] LeCun, Yann A., et al. (1998):
+ Efficient BackProp,
+ http://link.springer.com/chapter/10.1007/3-540-49430-8_2,
+ http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf
+ .. [2] Masci, Jonathan, et al. (2011):
+ Stacked Convolutional Auto-Encoders for Hierarchical Feature Extraction,
+ http://link.springer.com/chapter/10.1007/978-3-642-21735-7_7,
+ http://people.idsia.ch/~ciresan/data/icann2011.pdf
+ """
+
+ def __init__(self, scale_in=1, scale_out=1):
+ self.scale_in = scale_in
+ self.scale_out = scale_out
+
+ def __call__(self, x):
+ return theano.tensor.tanh(x * self.scale_in) * self.scale_out
+
+
+ScaledTanh = ScaledTanH # alias with alternative capitalization
+
+
+# rectify
+def rectify(x):
+ """Rectify activation function :math:`\\varphi(x) = \\max(0, x)`
+
+ Parameters
+ ----------
+ x : float32
+ The activation (the summed, weighted input of a neuron).
+
+ Returns
+ -------
+ float32
+ The output of the rectify function applied to the activation.
+ """
+ return theano.tensor.nnet.relu(x)
+
+
+# leaky rectify
+class LeakyRectify(object):
+ """Leaky rectifier :math:`\\varphi(x) = \\max(\\alpha \\cdot x, x)`
+
+ The leaky rectifier was introduced in [1]_. Compared to the standard
+ rectifier :func:`rectify`, it has a nonzero gradient for negative input,
+ which often helps convergence.
+
+ Parameters
+ ----------
+ leakiness : float
+ Slope for negative input, usually between 0 and 1.
+ A leakiness of 0 will lead to the standard rectifier,
+ a leakiness of 1 will lead to a linear activation function,
+ and any value in between will give a leaky rectifier.
+
+ Methods
+ -------
+ __call__(x)
+ Apply the leaky rectify function to the activation `x`.
+
+ Examples
+ --------
+ In contrast to other activation functions in this module, this is
+ a class that needs to be instantiated to obtain a callable:
+
+ >>> from lasagne.layers import InputLayer, DenseLayer
+ >>> l_in = InputLayer((None, 100))
+ >>> from lasagne.nonlinearities import LeakyRectify
+ >>> custom_rectify = LeakyRectify(0.1)
+ >>> l1 = DenseLayer(l_in, num_units=200, nonlinearity=custom_rectify)
+
+ Alternatively, you can use the provided instance for leakiness=0.01:
+
+ >>> from lasagne.nonlinearities import leaky_rectify
+ >>> l2 = DenseLayer(l_in, num_units=200, nonlinearity=leaky_rectify)
+
+ Or the one for a high leakiness of 1/3:
+
+ >>> from lasagne.nonlinearities import very_leaky_rectify
+ >>> l3 = DenseLayer(l_in, num_units=200, nonlinearity=very_leaky_rectify)
+
+ See Also
+ --------
+ leaky_rectify: Instance with default leakiness of 0.01, as in [1]_.
+ very_leaky_rectify: Instance with high leakiness of 1/3, as in [2]_.
+
+ References
+ ----------
+ .. [1] Maas et al. (2013):
+ Rectifier Nonlinearities Improve Neural Network Acoustic Models,
+ http://web.stanford.edu/~awni/papers/relu_hybrid_icml2013_final.pdf
+ .. [2] Graham, Benjamin (2014):
+ Spatially-sparse convolutional neural networks,
+ http://arxiv.org/abs/1409.6070
+ """
+ def __init__(self, leakiness=0.01):
+ self.leakiness = leakiness
+
+ def __call__(self, x):
+ return theano.tensor.nnet.relu(x, self.leakiness)
+
+
+leaky_rectify = LeakyRectify() # shortcut with default leakiness
+leaky_rectify.__doc__ = """leaky_rectify(x)
+
+ Instance of :class:`LeakyRectify` with leakiness :math:`\\alpha=0.01`
+ """
+
+
+very_leaky_rectify = LeakyRectify(1./3) # shortcut with high leakiness
+very_leaky_rectify.__doc__ = """very_leaky_rectify(x)
+
+ Instance of :class:`LeakyRectify` with leakiness :math:`\\alpha=1/3`
+ """
+
+
+# elu
+def elu(x):
+ """Exponential Linear Unit :math:`\\varphi(x) = (x > 0) ? x : e^x - 1`
+
+ The Exponential Linear Unit (ELU) was introduced in [1]_. Compared to the
+ linear rectifier :func:`rectify`, it has a mean activation closer to zero
+ and nonzero gradient for negative input, which can help convergence.
+ Compared to the leaky rectifier :class:`LeakyRectify`, it saturates for
+ highly negative inputs.
+
+ Parameters
+ ----------
+ x : float32
+ The activation (the summed, weighed input of a neuron).
+
+ Returns
+ -------
+ float32
+ The output of the exponential linear unit for the activation.
+
+ Notes
+ -----
+ In [1]_, an additional parameter :math:`\\alpha` controls the (negative)
+ saturation value for negative inputs, but is set to 1 for all experiments.
+ It is omitted here.
+
+ References
+ ----------
+ .. [1] Djork-Arné Clevert, Thomas Unterthiner, Sepp Hochreiter (2015):
+ Fast and Accurate Deep Network Learning by Exponential Linear Units
+ (ELUs), http://arxiv.org/abs/1511.07289
+ """
+ return theano.tensor.switch(x > 0, x, theano.tensor.exp(x) - 1)
+
+
+# softplus
+def softplus(x):
+ """Softplus activation function :math:`\\varphi(x) = \\log(1 + e^x)`
+
+ Parameters
+ ----------
+ x : float32
+ The activation (the summed, weighted input of a neuron).
+
+ Returns
+ -------
+ float32
+ The output of the softplus function applied to the activation.
+ """
+ return theano.tensor.nnet.softplus(x)
+
+
+# linear
+def linear(x):
+ """Linear activation function :math:`\\varphi(x) = x`
+
+ Parameters
+ ----------
+ x : float32
+ The activation (the summed, weighted input of a neuron).
+
+ Returns
+ -------
+ float32
+ The output of the identity applied to the activation.
+ """
+ return x
+
+identity = linear
diff --git a/lasagne/objectives.py b/lasagne/objectives.py
new file mode 100644
index 0000000..5da3f16
--- /dev/null
+++ b/lasagne/objectives.py
@@ -0,0 +1,379 @@
+"""
+Provides some minimal help with building loss expressions for training or
+validating a neural network.
+
+Five functions build element- or item-wise loss expressions from network
+predictions and targets:
+
+.. autosummary::
+ :nosignatures:
+
+ binary_crossentropy
+ categorical_crossentropy
+ squared_error
+ binary_hinge_loss
+ multiclass_hinge_loss
+
+A convenience function aggregates such losses into a scalar expression
+suitable for differentiation:
+
+.. autosummary::
+ :nosignatures:
+
+ aggregate
+
+Note that these functions only serve to write more readable code, but are
+completely optional. Essentially, any differentiable scalar Theano expression
+can be used as a training objective.
+
+Finally, two functions compute evaluation measures that are useful for
+validation and testing only, not for training:
+
+.. autosummary::
+ :nosignatures:
+
+ binary_accuracy
+ categorical_accuracy
+
+Those can also be aggregated into a scalar expression if needed.
+
+Examples
+--------
+Assuming you have a simple neural network for 3-way classification:
+
+>>> from lasagne.layers import InputLayer, DenseLayer, get_output
+>>> from lasagne.nonlinearities import softmax, rectify
+>>> l_in = InputLayer((100, 20))
+>>> l_hid = DenseLayer(l_in, num_units=30, nonlinearity=rectify)
+>>> l_out = DenseLayer(l_hid, num_units=3, nonlinearity=softmax)
+
+And Theano variables representing your network input and targets:
+
+>>> import theano
+>>> data = theano.tensor.matrix('data')
+>>> targets = theano.tensor.matrix('targets')
+
+You'd first construct an element-wise loss expression:
+
+>>> from lasagne.objectives import categorical_crossentropy, aggregate
+>>> predictions = get_output(l_out, data)
+>>> loss = categorical_crossentropy(predictions, targets)
+
+Then aggregate it into a scalar (you could also just call ``mean()`` on it):
+
+>>> loss = aggregate(loss, mode='mean')
+
+Finally, this gives a loss expression you can pass to any of the update
+methods in :mod:`lasagne.updates`. For validation of a network, you will
+usually want to repeat these steps with deterministic network output, i.e.,
+without dropout or any other nondeterministic computation in between:
+
+>>> test_predictions = get_output(l_out, data, deterministic=True)
+>>> test_loss = categorical_crossentropy(test_predictions, targets)
+>>> test_loss = aggregate(test_loss)
+
+This gives a loss expression good for monitoring validation error.
+"""
+
+import theano.tensor.nnet
+
+from lasagne.layers import get_output
+
+__all__ = [
+ "binary_crossentropy",
+ "categorical_crossentropy",
+ "squared_error",
+ "aggregate",
+ "binary_hinge_loss",
+ "multiclass_hinge_loss",
+ "binary_accuracy",
+ "categorical_accuracy"
+]
+
+
+def binary_crossentropy(predictions, targets):
+ """Computes the binary cross-entropy between predictions and targets.
+
+ .. math:: L = -t \\log(p) - (1 - t) \\log(1 - p)
+
+ Parameters
+ ----------
+ predictions : Theano tensor
+ Predictions in (0, 1), such as sigmoidal output of a neural network.
+ targets : Theano tensor
+ Targets in [0, 1], such as ground truth labels.
+
+ Returns
+ -------
+ Theano tensor
+ An expression for the element-wise binary cross-entropy.
+
+ Notes
+ -----
+ This is the loss function of choice for binary classification problems
+ and sigmoid output units.
+ """
+ return theano.tensor.nnet.binary_crossentropy(predictions, targets)
+
+
+def categorical_crossentropy(predictions, targets):
+ """Computes the categorical cross-entropy between predictions and targets.
+
+ .. math:: L_i = - \\sum_j{t_{i,j} \\log(p_{i,j})}
+
+ Parameters
+ ----------
+ predictions : Theano 2D tensor
+ Predictions in (0, 1), such as softmax output of a neural network,
+ with data points in rows and class probabilities in columns.
+ targets : Theano 2D tensor or 1D tensor
+ Either targets in [0, 1] matching the layout of `predictions`, or
+ a vector of int giving the correct class index per data point.
+
+ Returns
+ -------
+ Theano 1D tensor
+ An expression for the item-wise categorical cross-entropy.
+
+ Notes
+ -----
+ This is the loss function of choice for multi-class classification
+ problems and softmax output units. For hard targets, i.e., targets
+ that assign all of the probability to a single class per data point,
+ providing a vector of int for the targets is usually slightly more
+ efficient than providing a matrix with a single 1.0 per row.
+ """
+ return theano.tensor.nnet.categorical_crossentropy(predictions, targets)
+
+
+def squared_error(a, b):
+ """Computes the element-wise squared difference between two tensors.
+
+ .. math:: L = (p - t)^2
+
+ Parameters
+ ----------
+ a, b : Theano tensor
+ The tensors to compute the squared difference between.
+
+ Returns
+ -------
+ Theano tensor
+ An expression for the element-wise squared difference.
+
+ Notes
+ -----
+ This is the loss function of choice for many regression problems
+ or auto-encoders with linear output units.
+ """
+ return (a - b)**2
+
+
+def aggregate(loss, weights=None, mode='mean'):
+ """Aggregates an element- or item-wise loss to a scalar loss.
+
+ Parameters
+ ----------
+ loss : Theano tensor
+ The loss expression to aggregate.
+ weights : Theano tensor, optional
+ The weights for each element or item, must be broadcastable to
+ the same shape as `loss` if given. If omitted, all elements will
+ be weighted the same.
+ mode : {'mean', 'sum', 'normalized_sum'}
+ Whether to aggregate by averaging, by summing or by summing and
+ dividing by the total weights (which requires `weights` to be given).
+
+ Returns
+ -------
+ Theano scalar
+ A scalar loss expression suitable for differentiation.
+
+ Notes
+ -----
+ By supplying binary weights (i.e., only using values 0 and 1), this
+ function can also be used for masking out particular entries in the
+ loss expression. Note that masked entries still need to be valid
+ values, not-a-numbers (NaNs) will propagate through.
+
+ When applied to batch-wise loss expressions, setting `mode` to
+ ``'normalized_sum'`` ensures that the loss per batch is of a similar
+ magnitude, independent of associated weights. However, it means that
+ a given data point contributes more to the loss when it shares a batch
+ with low-weighted or masked data points than with high-weighted ones.
+ """
+ if weights is not None:
+ loss = loss * weights
+ if mode == 'mean':
+ return loss.mean()
+ elif mode == 'sum':
+ return loss.sum()
+ elif mode == 'normalized_sum':
+ if weights is None:
+ raise ValueError("require weights for mode='normalized_sum'")
+ return loss.sum() / weights.sum()
+ else:
+ raise ValueError("mode must be 'mean', 'sum' or 'normalized_sum', "
+ "got %r" % mode)
+
+
+def binary_hinge_loss(predictions, targets, binary=True, delta=1):
+ """Computes the binary hinge loss between predictions and targets.
+
+ .. math:: L_i = \\max(0, \\delta - t_i p_i)
+
+ Parameters
+ ----------
+ predictions : Theano tensor
+ Predictions in (0, 1), such as sigmoidal output of a neural network.
+ targets : Theano tensor
+ Targets in {0, 1} (or in {-1, 1} depending on `binary`), such as
+ ground truth labels.
+ binary : bool, default True
+ ``True`` if targets are in {0, 1}, ``False`` if they are in {-1, 1}
+ delta : scalar, default 1
+ The hinge loss margin
+
+ Returns
+ -------
+ Theano tensor
+ An expression for the element-wise binary hinge loss
+
+ Notes
+ -----
+ This is an alternative to the binary cross-entropy loss for binary
+ classification problems
+ """
+ if binary:
+ targets = 2 * targets - 1
+ return theano.tensor.nnet.relu(delta - predictions * targets)
+
+
+def multiclass_hinge_loss(predictions, targets, delta=1):
+ """Computes the multi-class hinge loss between predictions and targets.
+
+ .. math:: L_i = \\max_{j \\not = p_i} (0, t_j - t_{p_i} + \\delta)
+
+ Parameters
+ ----------
+ predictions : Theano 2D tensor
+ Predictions in (0, 1), such as softmax output of a neural network,
+ with data points in rows and class probabilities in columns.
+ targets : Theano 2D tensor or 1D tensor
+ Either a vector of int giving the correct class index per data point
+ or a 2D tensor of one-hot encoding of the correct class in the same
+ layout as predictions (non-binary targets in [0, 1] do not work!)
+ delta : scalar, default 1
+ The hinge loss margin
+
+ Returns
+ -------
+ Theano 1D tensor
+ An expression for the item-wise multi-class hinge loss
+
+ Notes
+ -----
+ This is an alternative to the categorical cross-entropy loss for
+ multi-class classification problems
+ """
+ num_cls = predictions.shape[1]
+ if targets.ndim == predictions.ndim - 1:
+ targets = theano.tensor.extra_ops.to_one_hot(targets, num_cls)
+ elif targets.ndim != predictions.ndim:
+ raise TypeError('rank mismatch between targets and predictions')
+ corrects = predictions[targets.nonzero()]
+ rest = theano.tensor.reshape(predictions[(1-targets).nonzero()],
+ (-1, num_cls-1))
+ rest = theano.tensor.max(rest, axis=1)
+ return theano.tensor.nnet.relu(rest - corrects + delta)
+
+
+def binary_accuracy(predictions, targets, threshold=0.5):
+ """Computes the binary accuracy between predictions and targets.
+
+ .. math:: L_i = \\mathbb{I}(t_i = \mathbb{I}(p_i \\ge \\alpha))
+
+ Parameters
+ ----------
+ predictions : Theano tensor
+ Predictions in [0, 1], such as a sigmoidal output of a neural network,
+ giving the probability of the positive class
+ targets : Theano tensor
+ Targets in {0, 1}, such as ground truth labels.
+ threshold : scalar, default: 0.5
+ Specifies at what threshold to consider the predictions being of the
+ positive class
+
+ Returns
+ -------
+ Theano tensor
+ An expression for the element-wise binary accuracy in {0, 1}
+
+ Notes
+ -----
+ This objective function should not be used with a gradient calculation;
+ its gradient is zero everywhere. It is intended as a convenience for
+ validation and testing, not training.
+
+ To obtain the average accuracy, call :func:`theano.tensor.mean()` on the
+ result, passing ``dtype=theano.config.floatX`` to compute the mean on GPU.
+ """
+ predictions = theano.tensor.ge(predictions, threshold)
+ return theano.tensor.eq(predictions, targets)
+
+
+def categorical_accuracy(predictions, targets, top_k=1):
+ """Computes the categorical accuracy between predictions and targets.
+
+ .. math:: L_i = \\mathbb{I}(t_i = \\operatorname{argmax}_c p_{i,c})
+
+ Can be relaxed to allow matches among the top :math:`k` predictions:
+
+ .. math::
+ L_i = \\mathbb{I}(t_i \\in \\operatorname{argsort}_c (-p_{i,c})_{:k})
+
+ Parameters
+ ----------
+ predictions : Theano 2D tensor
+ Predictions in (0, 1), such as softmax output of a neural network,
+ with data points in rows and class probabilities in columns.
+ targets : Theano 2D tensor or 1D tensor
+ Either a vector of int giving the correct class index per data point
+ or a 2D tensor of 1 hot encoding of the correct class in the same
+ layout as predictions
+ top_k : int
+ Regard a prediction to be correct if the target class is among the
+ `top_k` largest class probabilities. For the default value of 1, a
+ prediction is correct only if the target class is the most probable.
+
+ Returns
+ -------
+ Theano 1D tensor
+ An expression for the item-wise categorical accuracy in {0, 1}
+
+ Notes
+ -----
+ This is a strictly non differential function as it includes an argmax.
+ This objective function should never be used with a gradient calculation.
+ It is intended as a convenience for validation and testing not training.
+
+ To obtain the average accuracy, call :func:`theano.tensor.mean()` on the
+ result, passing ``dtype=theano.config.floatX`` to compute the mean on GPU.
+ """
+ if targets.ndim == predictions.ndim:
+ targets = theano.tensor.argmax(targets, axis=-1)
+ elif targets.ndim != predictions.ndim - 1:
+ raise TypeError('rank mismatch between targets and predictions')
+
+ if top_k == 1:
+ # standard categorical accuracy
+ top = theano.tensor.argmax(predictions, axis=-1)
+ return theano.tensor.eq(top, targets)
+ else:
+ # top-k accuracy
+ top = theano.tensor.argsort(predictions, axis=-1)
+ # (Theano cannot index with [..., -top_k:], we need to simulate that)
+ top = top[[slice(None) for _ in range(top.ndim - 1)] +
+ [slice(-top_k, None)]]
+ targets = theano.tensor.shape_padaxis(targets, axis=-1)
+ return theano.tensor.any(theano.tensor.eq(top, targets), axis=-1)
diff --git a/lasagne/random.py b/lasagne/random.py
new file mode 100644
index 0000000..65a0e70
--- /dev/null
+++ b/lasagne/random.py
@@ -0,0 +1,36 @@
+"""
+A module with a package-wide random number generator,
+used for weight initialization and seeding noise layers.
+This can be replaced by a :class:`numpy.random.RandomState` instance with a
+particular seed to facilitate reproducibility.
+"""
+
+import numpy as np
+
+
+_rng = np.random
+
+
+def get_rng():
+ """Get the package-level random number generator.
+
+ Returns
+ -------
+ :class:`numpy.random.RandomState` instance
+ The :class:`numpy.random.RandomState` instance passed to the most
+ recent call of :func:`set_rng`, or ``numpy.random`` if :func:`set_rng`
+ has never been called.
+ """
+ return _rng
+
+
+def set_rng(new_rng):
+ """Set the package-level random number generator.
+
+ Parameters
+ ----------
+ new_rng : ``numpy.random`` or a :class:`numpy.random.RandomState` instance
+ The random number generator to use.
+ """
+ global _rng
+ _rng = new_rng
diff --git a/lasagne/regularization.py b/lasagne/regularization.py
new file mode 100644
index 0000000..d3672a9
--- /dev/null
+++ b/lasagne/regularization.py
@@ -0,0 +1,189 @@
+"""
+Functions to apply regularization to the weights in a network.
+
+We provide functions to calculate the L1 and L2 penalty. Penalty functions
+take a tensor as input and calculate the penalty contribution from that tensor:
+
+.. autosummary::
+ :nosignatures:
+
+ l1
+ l2
+
+A helper function can be used to apply a penalty function to a tensor or a
+list of tensors:
+
+.. autosummary::
+ :nosignatures:
+
+ apply_penalty
+
+Finally we provide two helper functions for applying a penalty function to the
+parameters in a layer or the parameters in a group of layers:
+
+.. autosummary::
+ :nosignatures:
+
+ regularize_layer_params_weighted
+ regularize_network_params
+
+Examples
+--------
+>>> import lasagne
+>>> import theano.tensor as T
+>>> import theano
+>>> from lasagne.nonlinearities import softmax
+>>> from lasagne.layers import InputLayer, DenseLayer, get_output
+>>> from lasagne.regularization import regularize_layer_params_weighted, l2, l1
+>>> from lasagne.regularization import regularize_layer_params
+>>> layer_in = InputLayer((100, 20))
+>>> layer1 = DenseLayer(layer_in, num_units=3)
+>>> layer2 = DenseLayer(layer1, num_units=5, nonlinearity=softmax)
+>>> x = T.matrix('x') # shp: num_batch x num_features
+>>> y = T.ivector('y') # shp: num_batch
+>>> l_out = get_output(layer2, x)
+>>> loss = T.mean(T.nnet.categorical_crossentropy(l_out, y))
+>>> layers = {layer1: 0.1, layer2: 0.5}
+>>> l2_penalty = regularize_layer_params_weighted(layers, l2)
+>>> l1_penalty = regularize_layer_params(layer2, l1) * 1e-4
+>>> loss = loss + l2_penalty + l1_penalty
+"""
+import theano.tensor as T
+from .layers import Layer, get_all_params
+
+
+def l1(x):
+ """Computes the L1 norm of a tensor
+
+ Parameters
+ ----------
+ x : Theano tensor
+
+ Returns
+ -------
+ Theano scalar
+ l1 norm (sum of absolute values of elements)
+ """
+ return T.sum(abs(x))
+
+
+def l2(x):
+ """Computes the squared L2 norm of a tensor
+
+ Parameters
+ ----------
+ x : Theano tensor
+
+ Returns
+ -------
+ Theano scalar
+ squared l2 norm (sum of squared values of elements)
+ """
+ return T.sum(x**2)
+
+
+def apply_penalty(tensor_or_tensors, penalty, **kwargs):
+ """
+ Computes the total cost for applying a specified penalty
+ to a tensor or group of tensors.
+
+ Parameters
+ ----------
+ tensor_or_tensors : Theano tensor or list of tensors
+ penalty : callable
+ **kwargs
+ keyword arguments passed to penalty.
+
+ Returns
+ -------
+ Theano scalar
+ a scalar expression for the total penalty cost
+ """
+ try:
+ return sum(penalty(x, **kwargs) for x in tensor_or_tensors)
+ except (TypeError, ValueError):
+ return penalty(tensor_or_tensors, **kwargs)
+
+
+def regularize_layer_params(layer, penalty,
+ tags={'regularizable': True}, **kwargs):
+ """
+ Computes a regularization cost by applying a penalty to the parameters
+ of a layer or group of layers.
+
+ Parameters
+ ----------
+ layer : a :class:`Layer` instances or list of layers.
+ penalty : callable
+ tags: dict
+ Tag specifications which filter the parameters of the layer or layers.
+ By default, only parameters with the `regularizable` tag are included.
+ **kwargs
+ keyword arguments passed to penalty.
+
+ Returns
+ -------
+ Theano scalar
+ a scalar expression for the cost
+ """
+ layers = [layer, ] if isinstance(layer, Layer) else layer
+ all_params = []
+
+ for layer in layers:
+ all_params += layer.get_params(**tags)
+
+ return apply_penalty(all_params, penalty, **kwargs)
+
+
+def regularize_layer_params_weighted(layers, penalty,
+ tags={'regularizable': True}, **kwargs):
+ """
+ Computes a regularization cost by applying a penalty to the parameters
+ of a layer or group of layers, weighted by a coefficient for each layer.
+
+ Parameters
+ ----------
+ layers : dict
+ A mapping from :class:`Layer` instances to coefficients.
+ penalty : callable
+ tags: dict
+ Tag specifications which filter the parameters of the layer or layers.
+ By default, only parameters with the `regularizable` tag are included.
+ **kwargs
+ keyword arguments passed to penalty.
+
+ Returns
+ -------
+ Theano scalar
+ a scalar expression for the cost
+ """
+ return sum(coeff * apply_penalty(layer.get_params(**tags),
+ penalty,
+ **kwargs)
+ for layer, coeff in layers.items()
+ )
+
+
+def regularize_network_params(layer, penalty,
+ tags={'regularizable': True}, **kwargs):
+ """
+ Computes a regularization cost by applying a penalty to the parameters
+ of all layers in a network.
+
+ Parameters
+ ----------
+ layer : a :class:`Layer` instance.
+ Parameters of this layer and all layers below it will be penalized.
+ penalty : callable
+ tags: dict
+ Tag specifications which filter the parameters of the layer or layers.
+ By default, only parameters with the `regularizable` tag are included.
+ **kwargs
+ keyword arguments passed to penalty.
+
+ Returns
+ -------
+ Theano scalar
+ a scalar expression for the cost
+ """
+ return apply_penalty(get_all_params(layer, **tags), penalty, **kwargs)
diff --git a/lasagne/tests/conftest.py b/lasagne/tests/conftest.py
new file mode 100644
index 0000000..9e3c776
--- /dev/null
+++ b/lasagne/tests/conftest.py
@@ -0,0 +1,10 @@
+import pytest
+
+
+def pytest_addoption(parser):
+ parser.addoption("--runslow", action="store_true", help="run slow tests")
+
+
+def pytest_runtest_setup(item):
+ if 'slow' in item.keywords and not item.config.getoption("--runslow"):
+ pytest.skip("need --runslow option to run")
diff --git a/lasagne/tests/layers/conftest.py b/lasagne/tests/layers/conftest.py
new file mode 100644
index 0000000..38114cf
--- /dev/null
+++ b/lasagne/tests/layers/conftest.py
@@ -0,0 +1,13 @@
+from mock import Mock
+import pytest
+
+
+ at pytest.fixture
+def dummy_input_layer():
+ from lasagne.layers.input import InputLayer
+ input_layer = InputLayer((2, 3, 4))
+ mock = Mock(input_layer)
+ mock.shape = input_layer.shape
+ mock.input_var = input_layer.input_var
+ mock.output_shape = input_layer.output_shape
+ return mock
diff --git a/lasagne/tests/layers/test_base.py b/lasagne/tests/layers/test_base.py
new file mode 100644
index 0000000..b234b1b
--- /dev/null
+++ b/lasagne/tests/layers/test_base.py
@@ -0,0 +1,180 @@
+from mock import Mock
+import numpy
+import pytest
+import theano
+
+
+class TestLayer:
+ @pytest.fixture
+ def layer(self):
+ from lasagne.layers.base import Layer
+ return Layer(Mock(output_shape=(None,)))
+
+ @pytest.fixture
+ def named_layer(self):
+ from lasagne.layers.base import Layer
+ return Layer(Mock(output_shape=(None,)), name='layer_name')
+
+ def test_input_shape(self, layer):
+ assert layer.input_shape == layer.input_layer.output_shape
+
+ def test_get_output_shape_for(self, layer):
+ shape = Mock()
+ assert layer.get_output_shape_for(shape) == shape
+
+ @pytest.fixture
+ def layer_from_shape(self):
+ from lasagne.layers.base import Layer
+ return Layer((None, 20))
+
+ def test_layer_from_shape(self, layer_from_shape):
+ layer = layer_from_shape
+ assert layer.input_layer is None
+ assert layer.input_shape == (None, 20)
+
+ def test_named_layer(self, named_layer):
+ assert named_layer.name == 'layer_name'
+
+ def test_get_params(self, layer):
+ assert layer.get_params() == []
+
+ def test_get_params_tags(self, layer):
+ a_shape = (20, 50)
+ a = numpy.random.normal(0, 1, a_shape)
+ A = layer.add_param(a, a_shape, name='A', tag1=True, tag2=False)
+
+ b_shape = (30, 20)
+ b = numpy.random.normal(0, 1, b_shape)
+ B = layer.add_param(b, b_shape, name='B', tag1=True, tag2=True)
+
+ c_shape = (40, 10)
+ c = numpy.random.normal(0, 1, c_shape)
+ C = layer.add_param(c, c_shape, name='C', tag2=True)
+
+ assert layer.get_params() == [A, B, C]
+ assert layer.get_params(tag1=True) == [A, B]
+ assert layer.get_params(tag1=False) == [C]
+ assert layer.get_params(tag2=True) == [B, C]
+ assert layer.get_params(tag2=False) == [A]
+ assert layer.get_params(tag1=True, tag2=True) == [B]
+
+ def test_get_params_expressions(self, layer):
+ x, y, z = (theano.shared(0, name=n) for n in 'xyz')
+ W1 = layer.add_param(x**2 + theano.tensor.log(y), (), tag1=True)
+ W2 = layer.add_param(theano.tensor.matrix(), (10, 10), tag1=True)
+ W3 = layer.add_param(z.T, (), tag2=True)
+ # layer.params stores the parameter expressions:
+ assert list(layer.params.keys()) == [W1, W2, W3]
+ # layer.get_params() returns the underlying shared variables:
+ assert layer.get_params() == [x, y, z]
+ # filtering acts on the parameter expressions:
+ assert layer.get_params(tag1=True) == [x, y]
+ assert layer.get_params(tag2=True) == [z]
+
+ def test_add_param_tags(self, layer):
+ a_shape = (20, 50)
+ a = numpy.random.normal(0, 1, a_shape)
+ A = layer.add_param(a, a_shape)
+ assert A in layer.params
+ assert 'trainable' in layer.params[A]
+ assert 'regularizable' in layer.params[A]
+
+ b_shape = (30, 20)
+ b = numpy.random.normal(0, 1, b_shape)
+ B = layer.add_param(b, b_shape, trainable=False)
+ assert B in layer.params
+ assert 'trainable' not in layer.params[B]
+ assert 'regularizable' in layer.params[B]
+
+ c_shape = (40, 10)
+ c = numpy.random.normal(0, 1, c_shape)
+ C = layer.add_param(c, c_shape, tag1=True)
+ assert C in layer.params
+ assert 'trainable' in layer.params[C]
+ assert 'regularizable' in layer.params[C]
+ assert 'tag1' in layer.params[C]
+
+ def test_add_param_name(self, layer):
+ a_shape = (20, 50)
+ a = numpy.random.normal(0, 1, a_shape)
+ A = layer.add_param(a, a_shape, name='A')
+ assert A.name == 'A'
+
+ def test_add_param_named_layer_name(self, named_layer):
+ a_shape = (20, 50)
+ a = numpy.random.normal(0, 1, a_shape)
+ A = named_layer.add_param(a, a_shape, name='A')
+ assert A.name == 'layer_name.A'
+
+ def test_get_output_for_notimplemented(self, layer):
+ with pytest.raises(NotImplementedError):
+ layer.get_output_for(Mock())
+
+ def test_nonpositive_input_dims_raises_value_error(self, layer):
+ from lasagne.layers.base import Layer
+ neg_input_layer = Mock(output_shape=(None, -1, -1))
+ zero_input_layer = Mock(output_shape=(None, 0, 0))
+ pos_input_layer = Mock(output_shape=(None, 1, 1))
+ with pytest.raises(ValueError):
+ Layer(neg_input_layer)
+ with pytest.raises(ValueError):
+ Layer(zero_input_layer)
+ Layer(pos_input_layer)
+
+ def test_symbolic_output_shape(self):
+ from lasagne.layers.base import Layer
+
+ class WrongLayer(Layer):
+ def get_output_shape_for(self, input_shape):
+ return theano.tensor.vector().shape
+ with pytest.raises(ValueError) as exc:
+ WrongLayer((None,)).output_shape
+ assert "symbolic output shape" in exc.value.args[0]
+
+
+class TestMergeLayer:
+ @pytest.fixture
+ def layer(self):
+ from lasagne.layers.base import MergeLayer
+ return MergeLayer([Mock(), Mock()])
+
+ def test_input_shapes(self, layer):
+ assert layer.input_shapes == [l.output_shape
+ for l in layer.input_layers]
+
+ @pytest.fixture
+ def layer_from_shape(self):
+ from lasagne.layers.input import InputLayer
+ from lasagne.layers.base import MergeLayer
+ return MergeLayer(
+ [(None, 20),
+ Mock(InputLayer((None,)), output_shape=(None,))]
+ )
+
+ def test_layer_from_shape(self, layer_from_shape):
+ layer = layer_from_shape
+ assert layer.input_layers[0] is None
+ assert layer.input_shapes[0] == (None, 20)
+ assert layer.input_layers[1] is not None
+ assert (layer.input_shapes[1] == layer.input_layers[1].output_shape)
+
+ def test_get_params(self, layer):
+ assert layer.get_params() == []
+
+ def test_get_output_shape_for_notimplemented(self, layer):
+ with pytest.raises(NotImplementedError):
+ layer.get_output_shape_for(Mock())
+
+ def test_get_output_for_notimplemented(self, layer):
+ with pytest.raises(NotImplementedError):
+ layer.get_output_for(Mock())
+
+ def test_symbolic_output_shape(self):
+ from lasagne.layers.base import MergeLayer
+
+ class WrongLayer(MergeLayer):
+ def get_output_shape_for(self, input_shapes):
+ return theano.tensor.vector().shape
+ with pytest.raises(ValueError) as exc:
+ WrongLayer([(None,)]).output_shape
+ assert "symbolic output shape" in exc.value.args[0]
diff --git a/lasagne/tests/layers/test_conv.py b/lasagne/tests/layers/test_conv.py
new file mode 100644
index 0000000..369091d
--- /dev/null
+++ b/lasagne/tests/layers/test_conv.py
@@ -0,0 +1,781 @@
+import numpy as np
+import pytest
+import importlib
+import theano
+
+import lasagne
+from lasagne.utils import floatX, as_tuple
+
+
+def convNd(input, kernel, pad, stride=1, n=None):
+ """Execute a batch of a stack of N-dimensional convolutions.
+
+ Parameters
+ ----------
+ input : numpy array
+ kernel : numpy array
+ pad : {0, 'valid', 'same', 'full'}, int or tuple of int
+ stride : int or tuple of int
+ n : int
+
+ Returns
+ -------
+ numpy array
+ """
+ if n is None:
+ n = input.ndim - 2
+ if pad not in ['valid', 'same', 'full']:
+ pad = as_tuple(pad, n, int)
+ input = np.pad(input, [(p, p) for p in (0, 0) + pad], mode='constant')
+ pad = 'valid'
+
+ output = np.zeros((input.shape[0], kernel.shape[0]) +
+ tuple(i + k - 1 for i, k in zip(input.shape[2:],
+ kernel.shape[2:])))
+
+ if n == 1:
+ for i in range(kernel.shape[2]):
+ f = kernel[:, :, i:i+1]
+ c = (input[:, np.newaxis] * f).sum(axis=2)
+ output[:, :,
+ i:i + input.shape[2]] += c
+ elif n == 2:
+ for i in range(kernel.shape[2]):
+ for j in range(kernel.shape[3]):
+ f = kernel[:, :, i:i+1, j:j+1]
+ c = (input[:, np.newaxis] * f).sum(axis=2)
+ output[:, :,
+ i:i + input.shape[2],
+ j:j + input.shape[3]] += c
+ elif n == 3:
+ for i in range(kernel.shape[2]):
+ for j in range(kernel.shape[3]):
+ for k in range(kernel.shape[4]):
+ f = kernel[:, :, i:i+1, j:j+1, k:k+1]
+ c = (input[:, np.newaxis] * f).sum(axis=2)
+ output[:, :,
+ i:i + input.shape[2],
+ j:j + input.shape[3],
+ k:k + input.shape[4]] += c
+ else:
+ raise NotImplementedError("convNd() only supports n in (1, 2, 3)")
+
+ if pad == 'valid':
+ trim = tuple(k - 1 for k in kernel.shape[2:])
+ slices = [slice(None), slice(None)]
+ slices += [slice(t, -t or None) for t in trim]
+ output = output[slices]
+ elif pad == 'same':
+ shift = tuple((k - 1) // 2 for k in kernel.shape[2:])
+ slices = [slice(None), slice(None)]
+ slices += [slice(s, s + i) for s, i in zip(shift, input.shape[2:])]
+ output = output[slices]
+
+ stride = as_tuple(stride, n, int)
+ if any(s > 1 for s in stride):
+ slices = [slice(None), slice(None)]
+ slices += [slice(None, None, s) for s in stride]
+ output = output[slices]
+
+ return output
+
+
+def dilate(input, factors):
+ """Inserts `factors[i] - 1` zeros between input elements on axis i."""
+ output = np.zeros(tuple((s-1)*f + 1 for s, f in zip(input.shape, factors)),
+ dtype=input.dtype)
+ output[[slice(None, None, factor) for factor in factors]] = input
+ return output
+
+
+def transposed_convNd(input, kernel, crop, stride=1, n=None):
+ if n is None:
+ n = input.ndim - 2
+ if crop == 'valid':
+ pad = 'full'
+ elif crop == 'full':
+ pad = 'valid'
+ elif crop == 'same':
+ pad = 'same'
+ else:
+ crop = as_tuple(crop, n, int)
+ pad = tuple(f - 1 - c for f, c in zip(kernel.shape[2:], crop))
+ stride = as_tuple(stride, n, int)
+ dilated_input = dilate(input, (1, 1) + stride)
+ return convNd(dilated_input, kernel, pad, stride=1, n=n)
+
+
+def dilated_convNd(input, kernel, pad, dilation=1, n=None):
+ if n is None:
+ n = input.ndim - 2
+ dilation = as_tuple(dilation, n, int)
+ dilated_kernel = dilate(kernel, (1, 1) + dilation)
+ return convNd(input, dilated_kernel, pad, stride=1, n=n)
+
+
+def convNd_test_sets(n):
+ def _convert(input, kernel, output, kwargs):
+ return [theano.shared(floatX(input)), floatX(kernel), output, kwargs]
+
+ extra_shape = (11, 16, 23)
+ input_shape = (3, 1) + extra_shape[-n:]
+
+ for pad in (0, 1, 2, 'full', 'same'):
+ for stride in (1, 2, 3):
+ for filter_size in (1, 3):
+ if stride > filter_size:
+ continue
+ input = np.random.random(input_shape)
+ kernel = np.random.random((16, 1) + (filter_size,) * n)
+ output = convNd(input, kernel, pad, stride, n=n)
+ yield _convert(input, kernel, output, {'pad': pad,
+ 'stride': stride,
+ 'flip_filters': True,
+ })
+
+ # bias-less case
+ input = np.random.random(input_shape)
+ kernel = np.random.random((16, 1) + (3,) * n)
+ output = convNd(input, kernel, pad='valid')
+ yield _convert(input, kernel, output, {'b': None, 'flip_filters': True})
+ # untie_biases=True case
+ yield _convert(input, kernel, output, {'untie_biases': True,
+ 'flip_filters': True})
+ # pad='valid' case
+ yield _convert(input, kernel, output, {'pad': 'valid',
+ 'flip_filters': True})
+ # flip_filters=False case
+ flip = (slice(None), slice(None)) + (slice(None, None, -1),) * n
+ output = convNd(input, kernel[flip], pad='valid')
+ yield _convert(input, kernel, output, {'flip_filters': False})
+
+
+def conv3d_test_sets():
+ return convNd_test_sets(3)
+
+
+def conv2d_test_sets():
+ return convNd_test_sets(2)
+
+
+def conv1d_test_sets():
+ return convNd_test_sets(1)
+
+
+def transp_conv2d_test_sets():
+ def _convert(input, kernel, output, kwargs):
+ return [floatX(input), floatX(kernel), output, kwargs]
+
+ input_shape = (3, 1, 11, 16)
+ for crop in (0, 1, 2, 'full', 'same'):
+ for stride in (1, 2, 3):
+ for filter_size in (1, 3):
+ if stride > filter_size:
+ continue
+ if crop not in ('full', 'same') and crop > (filter_size - 1):
+ continue
+ input = np.random.random(input_shape)
+ kernel = np.random.random((16, 1, filter_size, filter_size))
+ output = transposed_convNd(input, kernel, crop, stride, 2)
+ yield _convert(input, kernel, output, {'crop': crop,
+ 'stride': stride,
+ 'flip_filters': True})
+
+ # bias-less case
+ input = np.random.random(input_shape)
+ kernel = np.random.random((16, 1, 3, 3))
+ output = transposed_convNd(input, kernel, 'valid')
+ yield _convert(input, kernel, output, {'b': None, 'flip_filters': True})
+ # untie_biases=True case
+ yield _convert(input, kernel, output, {'untie_biases': True,
+ 'flip_filters': True})
+ # crop='valid' case
+ yield _convert(input, kernel, output, {'crop': 'valid',
+ 'flip_filters': True})
+ # flip_filters=False case
+ output = transposed_convNd(input, kernel[:, :, ::-1, ::-1], 'valid')
+ yield _convert(input, kernel, output, {'flip_filters': False})
+
+
+def dilated_conv2d_test_sets():
+ def _convert(input, kernel, output, kwargs):
+ return [floatX(input), floatX(kernel), output, kwargs]
+
+ input_shape = (3, 1, 11, 16)
+ for dilation in (1, 2, 3):
+ for filter_size in (1, 3):
+ input = np.random.random(input_shape)
+ kernel = np.random.random((16, 1, filter_size, filter_size))
+ kernel_flip = kernel[:, :, ::-1, ::-1]
+ output = dilated_convNd(input, kernel_flip, 'valid', dilation, 2)
+ yield _convert(input, kernel, output, {'dilation': dilation})
+
+ # bias-less case
+ input = np.random.random(input_shape)
+ kernel = np.random.random((16, 1, 3, 3))
+ output = dilated_convNd(input, kernel[:, :, ::-1, ::-1], pad='valid')
+ yield _convert(input, kernel, output, {'b': None})
+ # untie_biases=True case
+ yield _convert(input, kernel, output, {'untie_biases': True})
+
+
+def test_conv_output_length():
+ from lasagne.layers.conv import conv_output_length
+
+ assert conv_output_length(13, 5, 3, 'valid') == 3
+ assert conv_output_length(13, 5, 3, 0) == 3
+ assert conv_output_length(13, 5, 3, 'full') == 6
+ assert conv_output_length(13, 5, 3, 'same') == 5
+ assert conv_output_length(13, 5, 3, 2) == 5
+
+ with pytest.raises(ValueError) as exc:
+ conv_output_length(13, 5, 3, '_nonexistent_mode')
+ assert "Invalid pad: " in exc.value.args[0]
+
+
+def test_conv_input_length():
+ from lasagne.layers.conv import conv_input_length
+
+ # using the examples from https://github.com/vdumoulin/conv_arithmetic
+ # no padding, no strides
+ assert conv_input_length(2, 3, 1, 'valid') == 4
+ assert conv_input_length(2, 3, 1, 0) == 4
+ # padding, no strides
+ assert conv_input_length(6, 4, 1, 2) == 5
+ # no padding, strides
+ assert conv_input_length(2, 3, 2, 0) == 5
+ # padding, strides
+ assert conv_input_length(3, 3, 2, 'same') == 5
+ # full convolution
+ assert conv_input_length(3, 3, 2, 'full') == 3
+
+ with pytest.raises(ValueError) as exc:
+ conv_input_length(3, 5, 3, '_nonexistent_mode')
+ assert "Invalid pad: " in exc.value.args[0]
+
+
+ at pytest.fixture
+def DummyInputLayer():
+ def factory(shape):
+ from lasagne.layers.input import InputLayer
+ return InputLayer(shape)
+ return factory
+
+
+class TestBaseConvLayer:
+
+ def test_infer_dimensionality(self):
+ from lasagne.layers.conv import BaseConvLayer
+ shape = (10, 20, 30, 40, 50, 60)
+ for n in range(1, 4):
+ layer = BaseConvLayer(shape[:n+2], 1, 3)
+ assert layer.n == n
+
+ def test_convolve_not_implemented(self):
+ from lasagne.layers.conv import BaseConvLayer
+ layer = BaseConvLayer((10, 20, 30), 1, 3)
+ with pytest.raises(NotImplementedError):
+ layer.convolve(theano.tensor.tensor3())
+
+ def test_fail_on_mismatching_dimensionality(self):
+ from lasagne.layers.conv import BaseConvLayer
+ with pytest.raises(ValueError) as exc:
+ BaseConvLayer((10, 20, 30), 1, 3, n=2)
+ assert "Expected 4 input dimensions" in exc.value.args[0]
+ with pytest.raises(ValueError) as exc:
+ BaseConvLayer((10, 20, 30, 40), 1, 3, n=1)
+ assert "Expected 3 input dimensions" in exc.value.args[0]
+
+
+class TestConv1DLayer:
+
+ @pytest.mark.parametrize(
+ "input, kernel, output, kwargs", list(conv1d_test_sets()))
+ def test_defaults(self, DummyInputLayer,
+ input, kernel, output, kwargs):
+ b, c, w = input.shape.eval()
+ input_layer = DummyInputLayer((b, c, w))
+ try:
+ from lasagne.layers.conv import Conv1DLayer
+ layer = Conv1DLayer(
+ input_layer,
+ num_filters=kernel.shape[0],
+ filter_size=kernel.shape[2],
+ W=kernel,
+ **kwargs
+ )
+ actual = layer.get_output_for(input).eval()
+ assert actual.shape == output.shape
+ assert actual.shape == layer.output_shape
+ assert np.allclose(actual, output)
+
+ except NotImplementedError:
+ pass
+
+ def test_init_none_nonlinearity_bias(self, DummyInputLayer):
+ from lasagne.layers.conv import Conv1DLayer
+ input_layer = DummyInputLayer((1, 2, 3))
+ layer = Conv1DLayer(input_layer, num_filters=16, filter_size=(3,),
+ nonlinearity=None, b=None)
+ assert layer.nonlinearity == lasagne.nonlinearities.identity
+ assert layer.b is None
+
+ def test_invalid_pad(self, DummyInputLayer):
+ from lasagne.layers.conv import Conv1DLayer
+ input_layer = DummyInputLayer((1, 2, 3))
+ with pytest.raises(TypeError) as exc:
+ layer = Conv1DLayer(input_layer, num_filters=16, filter_size=(3,),
+ pad='_nonexistent_mode')
+ assert "iterable of int" in exc.value.args[0]
+
+ with pytest.raises(NotImplementedError) as exc:
+ layer = Conv1DLayer(input_layer, num_filters=16, filter_size=(4,),
+ pad='same')
+ assert "requires odd filter size" in exc.value.args[0]
+
+
+class TestConv2DLayerImplementations:
+
+ @pytest.fixture(
+ params=[
+ ('lasagne.layers', 'Conv2DLayer'),
+ ('lasagne.layers.cuda_convnet', 'Conv2DCCLayer'),
+ ('lasagne.layers.corrmm', 'Conv2DMMLayer'),
+ ('lasagne.layers.dnn', 'Conv2DDNNLayer'),
+ ],
+ )
+ def Conv2DImpl(self, request):
+ impl_module_name, impl_name = request.param
+ try:
+ mod = importlib.import_module(impl_module_name)
+ except ImportError:
+ pytest.skip("{} not available".format(impl_module_name))
+
+ return getattr(mod, impl_name)
+
+ @pytest.mark.parametrize(
+ "input, kernel, output, kwargs", list(conv2d_test_sets()))
+ def test_defaults(self, Conv2DImpl, DummyInputLayer,
+ input, kernel, output, kwargs):
+ b, c, h, w = input.shape.eval()
+ input_layer = DummyInputLayer((b, c, h, w))
+ try:
+ layer = Conv2DImpl(
+ input_layer,
+ num_filters=kernel.shape[0],
+ filter_size=kernel.shape[2:],
+ W=kernel,
+ **kwargs
+ )
+ actual = layer.get_output_for(input).eval()
+ assert actual.shape == output.shape
+ assert actual.shape == layer.output_shape
+ assert np.allclose(actual, output)
+
+ except NotImplementedError:
+ pytest.skip()
+
+ @pytest.mark.parametrize(
+ "input, kernel, output, kwargs", list(conv2d_test_sets()))
+ def test_with_nones(self, Conv2DImpl, DummyInputLayer,
+ input, kernel, output, kwargs):
+ if kwargs.get('untie_biases', False):
+ pytest.skip()
+ b, c, h, w = input.shape.eval()
+ input_layer = DummyInputLayer((None, c, None, None))
+ try:
+ layer = Conv2DImpl(
+ input_layer,
+ num_filters=kernel.shape[0],
+ filter_size=kernel.shape[2:],
+ W=kernel,
+ **kwargs
+ )
+ actual = layer.get_output_for(input).eval()
+
+ assert layer.output_shape == (None,
+ kernel.shape[0],
+ None,
+ None)
+ assert actual.shape == output.shape
+ assert np.allclose(actual, output)
+
+ except NotImplementedError:
+ pytest.skip()
+
+ def test_init_none_nonlinearity_bias(self, Conv2DImpl, DummyInputLayer):
+ input_layer = DummyInputLayer((1, 2, 3, 3))
+ layer = Conv2DImpl(input_layer, num_filters=16, filter_size=(3, 3),
+ nonlinearity=None, b=None)
+ assert layer.nonlinearity == lasagne.nonlinearities.identity
+ assert layer.b is None
+
+ def test_invalid_pad(self, Conv2DImpl, DummyInputLayer):
+ input_layer = DummyInputLayer((1, 2, 3, 3))
+ with pytest.raises(TypeError) as exc:
+ layer = Conv2DImpl(input_layer, num_filters=16, filter_size=(3, 3),
+ pad='_nonexistent_mode')
+ assert "iterable of int" in exc.value.args[0]
+
+ with pytest.raises(NotImplementedError) as exc:
+ layer = Conv2DImpl(input_layer, num_filters=16, filter_size=(4, 4),
+ pad='same')
+ assert "requires odd filter size" in exc.value.args[0]
+
+ def test_get_params(self, Conv2DImpl, DummyInputLayer):
+ input_layer = DummyInputLayer((128, 3, 32, 32))
+ layer = Conv2DImpl(input_layer, num_filters=16, filter_size=(3, 3))
+ assert layer.get_params() == [layer.W, layer.b]
+ assert layer.get_params(regularizable=False) == [layer.b]
+ assert layer.get_params(regularizable=True) == [layer.W]
+ assert layer.get_params(trainable=True) == [layer.W, layer.b]
+ assert layer.get_params(trainable=False) == []
+ assert layer.get_params(_nonexistent_tag=True) == []
+ assert layer.get_params(_nonexistent_tag=False) == [layer.W, layer.b]
+
+
+class TestConv3DLayerImplementations:
+
+ @pytest.fixture(
+ params=[
+ ('lasagne.layers.dnn', 'Conv3DDNNLayer'),
+ ],
+ )
+ def Conv3DImpl(self, request):
+ impl_module_name, impl_name = request.param
+ try:
+ mod = importlib.import_module(impl_module_name)
+ except ImportError:
+ pytest.skip("{} not available".format(impl_module_name))
+
+ return getattr(mod, impl_name)
+
+ @pytest.mark.parametrize(
+ "input, kernel, output, kwargs", list(conv3d_test_sets()))
+ def test_defaults(self, Conv3DImpl, DummyInputLayer,
+ input, kernel, output, kwargs):
+ b, c, d, h, w = input.shape.eval()
+ input_layer = DummyInputLayer((b, c, d, h, w))
+ try:
+ layer = Conv3DImpl(
+ input_layer,
+ num_filters=kernel.shape[0],
+ filter_size=kernel.shape[2:],
+ W=kernel,
+ **kwargs
+ )
+ actual = layer.get_output_for(input).eval()
+ assert actual.shape == output.shape
+ assert actual.shape == layer.output_shape
+ assert np.allclose(actual, output)
+
+ except NotImplementedError:
+ pytest.skip()
+
+ @pytest.mark.parametrize(
+ "input, kernel, output, kwargs", list(conv3d_test_sets()))
+ def test_with_nones(self, Conv3DImpl, DummyInputLayer,
+ input, kernel, output, kwargs):
+ if kwargs.get('untie_biases', False):
+ pytest.skip()
+ b, c, d, h, w = input.shape.eval()
+ input_layer = DummyInputLayer((None, c, None, None, None))
+ try:
+ layer = Conv3DImpl(
+ input_layer,
+ num_filters=kernel.shape[0],
+ filter_size=kernel.shape[2:],
+ W=kernel,
+ **kwargs
+ )
+ actual = layer.get_output_for(input).eval()
+
+ assert layer.output_shape == (None,
+ kernel.shape[0],
+ None,
+ None,
+ None)
+ assert actual.shape == output.shape
+ assert np.allclose(actual, output)
+
+ except NotImplementedError:
+ pytest.skip()
+
+ def test_init_none_nonlinearity_bias(self, Conv3DImpl, DummyInputLayer):
+ input_layer = DummyInputLayer((1, 2, 3, 3, 3))
+ layer = Conv3DImpl(input_layer, num_filters=16, filter_size=(3, 3, 3),
+ nonlinearity=None, b=None)
+ assert layer.nonlinearity == lasagne.nonlinearities.identity
+ assert layer.b is None
+
+ def test_invalid_pad(self, Conv3DImpl, DummyInputLayer):
+ input_layer = DummyInputLayer((1, 2, 3, 3, 3))
+ with pytest.raises(TypeError) as exc:
+ layer = Conv3DImpl(input_layer, num_filters=16,
+ filter_size=(3, 3, 3),
+ pad='_nonexistent_mode')
+ assert "iterable of int" in exc.value.args[0]
+
+ with pytest.raises(NotImplementedError) as exc:
+ layer = Conv3DImpl(input_layer, num_filters=16,
+ filter_size=(4, 4, 4),
+ pad='same')
+ assert "requires odd filter size" in exc.value.args[0]
+
+ def test_get_params(self, Conv3DImpl, DummyInputLayer):
+ input_layer = DummyInputLayer((128, 3, 32, 32, 32))
+ layer = Conv3DImpl(input_layer, num_filters=16, filter_size=(3, 3, 3))
+ assert layer.get_params() == [layer.W, layer.b]
+ assert layer.get_params(regularizable=False) == [layer.b]
+ assert layer.get_params(regularizable=True) == [layer.W]
+ assert layer.get_params(trainable=True) == [layer.W, layer.b]
+ assert layer.get_params(trainable=False) == []
+ assert layer.get_params(_nonexistent_tag=True) == []
+ assert layer.get_params(_nonexistent_tag=False) == [layer.W, layer.b]
+
+
+class TestTransposedConv2DLayer:
+ @pytest.mark.parametrize(
+ "input, kernel, output, kwargs", list(transp_conv2d_test_sets()))
+ def test_defaults(self, DummyInputLayer, input, kernel, output, kwargs):
+ from lasagne.layers import TransposedConv2DLayer
+ b, c, h, w = input.shape
+ input_layer = DummyInputLayer((b, c, h, w))
+ layer = TransposedConv2DLayer(
+ input_layer,
+ num_filters=kernel.shape[0],
+ filter_size=kernel.shape[2:],
+ W=kernel.transpose(1, 0, 2, 3),
+ **kwargs)
+ actual = layer.get_output_for(input).eval()
+ assert actual.shape == output.shape
+ assert actual.shape == layer.output_shape
+ assert np.allclose(actual, output)
+
+ @pytest.mark.parametrize(
+ "input, kernel, output, kwargs", list(transp_conv2d_test_sets()))
+ def test_with_nones(self, DummyInputLayer, input, kernel, output, kwargs):
+ if kwargs.get('untie_biases', False):
+ pytest.skip()
+ from lasagne.layers import TransposedConv2DLayer
+ b, c, h, w = input.shape
+ input_layer = DummyInputLayer((None, c, None, None))
+ layer = TransposedConv2DLayer(
+ input_layer,
+ num_filters=kernel.shape[0],
+ filter_size=kernel.shape[2:],
+ W=kernel.transpose(1, 0, 2, 3),
+ **kwargs)
+ assert layer.output_shape == (None, output.shape[1], None, None)
+ actual = layer.get_output_for(input).eval()
+ assert actual.shape == output.shape
+ assert np.allclose(actual, output)
+
+
+class TestDilatedConv2DLayer:
+ @pytest.mark.parametrize(
+ "input, kernel, output, kwargs", list(dilated_conv2d_test_sets()))
+ def test_defaults(self, DummyInputLayer, input, kernel, output, kwargs):
+ from lasagne.layers import DilatedConv2DLayer
+ b, c, h, w = input.shape
+ input_layer = DummyInputLayer((b, c, h, w))
+ layer = DilatedConv2DLayer(
+ input_layer,
+ num_filters=kernel.shape[0],
+ filter_size=kernel.shape[2:],
+ W=kernel.transpose(1, 0, 2, 3),
+ **kwargs)
+ actual = layer.get_output_for(theano.shared(input)).eval()
+ assert actual.shape == output.shape
+ assert actual.shape == layer.output_shape
+ assert np.allclose(actual, output)
+
+ @pytest.mark.parametrize(
+ "input, kernel, output, kwargs", list(dilated_conv2d_test_sets()))
+ def test_with_nones(self, DummyInputLayer, input, kernel, output, kwargs):
+ if kwargs.get('untie_biases', False):
+ pytest.skip()
+ from lasagne.layers import DilatedConv2DLayer
+ b, c, h, w = input.shape
+ input_layer = DummyInputLayer((None, c, None, None))
+ layer = DilatedConv2DLayer(
+ input_layer,
+ num_filters=kernel.shape[0],
+ filter_size=kernel.shape[2:],
+ W=kernel.transpose(1, 0, 2, 3),
+ **kwargs)
+ assert layer.output_shape == (None, output.shape[1], None, None)
+ actual = layer.get_output_for(input).eval()
+ assert actual.shape == output.shape
+ assert np.allclose(actual, output)
+
+ def test_unsupported_settings(self, DummyInputLayer):
+ from lasagne.layers import DilatedConv2DLayer
+ input_layer = DummyInputLayer((10, 20, 30, 40))
+ for pad in 'same', 'full', 1:
+ with pytest.raises(NotImplementedError) as exc:
+ DilatedConv2DLayer(input_layer, 2, 3, pad=pad)
+ assert "requires pad=0" in exc.value.args[0]
+ with pytest.raises(NotImplementedError) as exc:
+ DilatedConv2DLayer(input_layer, 2, 3, flip_filters=True)
+ assert "requires flip_filters=False" in exc.value.args[0]
+
+
+class TestConv2DDNNLayer:
+ def test_import_without_gpu_or_cudnn_raises(self):
+ from theano.sandbox import cuda
+ if cuda.cuda_enabled and cuda.dnn.dnn_available():
+ pytest.skip()
+ else:
+ with pytest.raises(ImportError):
+ import lasagne.layers.dnn
+
+
+class TestConv2DMMLayer:
+ def test_import_without_gpu_raises(self):
+ from theano.sandbox import cuda
+ if cuda.cuda_enabled:
+ pytest.skip()
+ else:
+ with pytest.raises(ImportError):
+ import lasagne.layers.corrmm
+
+
+class TestConv2DCCLayer:
+ def test_import_without_gpu_raises(self):
+ from theano.sandbox import cuda
+ if cuda.cuda_enabled:
+ pytest.skip()
+ else:
+ with pytest.raises(ImportError):
+ import lasagne.layers.cuda_convnet
+
+ def test_unsupported_settings(self, DummyInputLayer):
+ try:
+ from lasagne.layers.cuda_convnet import Conv2DCCLayer
+ except ImportError:
+ pytest.skip("cuda_convnet not available")
+
+ input_layer = DummyInputLayer((128, 3, 32, 32))
+
+ with pytest.raises(RuntimeError) as exc:
+ layer = Conv2DCCLayer(input_layer, num_filters=16,
+ filter_size=(3, 5))
+ assert ("Conv2DCCLayer only supports square filters" in
+ exc.value.args[0])
+
+ with pytest.raises(RuntimeError) as exc:
+ layer = Conv2DCCLayer(input_layer, num_filters=16,
+ filter_size=(3, 3), stride=(1, 2))
+ assert ("Conv2DCCLayer only supports square strides" in
+ exc.value.args[0])
+
+ with pytest.raises(RuntimeError) as exc:
+ layer = Conv2DCCLayer(input_layer, num_filters=15,
+ filter_size=(3, 3))
+ assert ("Conv2DCCLayer requires num_filters to be a multiple of 16" in
+ exc.value.args[0])
+
+ with pytest.raises(RuntimeError) as exc:
+ layer = Conv2DCCLayer(input_layer, num_filters=16,
+ filter_size=(3, 3), pad=(1, 2))
+ assert ("Conv2DCCLayer only supports square padding" in
+ exc.value.args[0])
+
+ input_layer = DummyInputLayer((128, 7, 32, 32))
+
+ with pytest.raises(RuntimeError) as exc:
+ layer = Conv2DCCLayer(input_layer, num_filters=16,
+ filter_size=(3, 3))
+ assert ("Conv2DCCLayer requires the number of input channels to be "
+ "1, 2, 3 or a multiple of 4" in exc.value.args[0])
+
+ def test_pad(self, DummyInputLayer):
+ try:
+ from lasagne.layers.cuda_convnet import Conv2DCCLayer
+ except ImportError:
+ pytest.skip("cuda_convnet not available")
+
+ input_layer = DummyInputLayer((128, 3, 32, 32))
+ layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3),
+ pad=(3, 3))
+ assert layer.output_shape == (128, 16, 36, 36)
+
+ def test_dimshuffle_false_shapes(self, DummyInputLayer):
+ try:
+ from lasagne.layers.cuda_convnet import Conv2DCCLayer
+ except ImportError:
+ pytest.skip("cuda_convnet not available")
+
+ input_layer = DummyInputLayer((4, 32, 32, 128)) # c01b instead of bc01
+ layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3),
+ dimshuffle=False)
+ assert layer.W.get_value().shape == (4, 3, 3, 16)
+ assert layer.b.get_value().shape == (16,)
+
+ layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3),
+ dimshuffle=False, untie_biases=True)
+ assert layer.W.get_value().shape == (4, 3, 3, 16)
+ assert layer.b.get_value().shape == (16, 30, 30)
+
+ def test_dimshuffle_false_get_output_for(self, DummyInputLayer):
+ try:
+ from lasagne.layers.cuda_convnet import Conv2DCCLayer
+ except ImportError:
+ pytest.skip("cuda_convnet not available")
+
+ # this implementation is tested against FilterActs instead of
+ # theano.tensor.nnet.conv.conv2d because using the latter leads to
+ # numerical precision errors.
+ from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
+ filter_acts = FilterActs(stride=1, pad=0, partial_sum=1)
+
+ input = theano.shared(floatX(np.random.random((4, 5, 5, 8))))
+ kernel = theano.shared(floatX(np.random.random((4, 3, 3, 16))))
+
+ input_layer = DummyInputLayer((4, 5, 5, 8)) # c01b instead of bc01
+ layer = Conv2DCCLayer(input_layer, num_filters=16, filter_size=(3, 3),
+ dimshuffle=False, W=kernel, b=None,
+ nonlinearity=None)
+
+ output = np.array(filter_acts(input, kernel).eval())
+
+ actual = layer.get_output_for(input).eval()
+ actual = np.array(actual)
+ assert actual.shape == output.shape
+ assert actual.shape == layer.output_shape
+ assert np.allclose(actual, output)
+
+
+class TestShuffleLayers:
+ def test_bc01_to_c01b(self):
+ from lasagne.layers.input import InputLayer
+ try:
+ from lasagne.layers.cuda_convnet import ShuffleBC01ToC01BLayer
+ except ImportError:
+ pytest.skip("cuda_convnet not available")
+
+ input_layer = InputLayer((1, 2, 3, 4))
+ layer = ShuffleBC01ToC01BLayer(input_layer)
+ assert layer.output_shape == (2, 3, 4, 1)
+
+ input = floatX(np.random.random((1, 2, 3, 4)))
+ output = input.transpose(1, 2, 3, 0)
+ actual = layer.get_output_for(theano.shared(input)).eval()
+ assert np.allclose(output, actual)
+
+ def test_c01b_to_bc01(self):
+ from lasagne.layers.input import InputLayer
+ try:
+ from lasagne.layers.cuda_convnet import ShuffleC01BToBC01Layer
+ except ImportError:
+ pytest.skip("cuda_convnet not available")
+
+ input_layer = InputLayer((1, 2, 3, 4))
+ layer = ShuffleC01BToBC01Layer(input_layer)
+ assert layer.output_shape == (4, 1, 2, 3)
+
+ input = floatX(np.random.random((1, 2, 3, 4)))
+ output = input.transpose(3, 0, 1, 2)
+ actual = layer.get_output_for(theano.shared(input)).eval()
+ assert np.allclose(output, actual)
diff --git a/lasagne/tests/layers/test_dense.py b/lasagne/tests/layers/test_dense.py
new file mode 100644
index 0000000..58aca9f
--- /dev/null
+++ b/lasagne/tests/layers/test_dense.py
@@ -0,0 +1,361 @@
+from mock import Mock
+import numpy as np
+import pytest
+import theano
+
+
+import lasagne
+
+
+class TestDenseLayer:
+ @pytest.fixture
+ def DenseLayer(self):
+ from lasagne.layers.dense import DenseLayer
+ return DenseLayer
+
+ @pytest.fixture
+ def layer_vars(self, dummy_input_layer):
+ from lasagne.layers.dense import DenseLayer
+ W = Mock()
+ b = Mock()
+ nonlinearity = Mock()
+
+ W.return_value = np.ones((12, 3))
+ b.return_value = np.ones((3,)) * 3
+ layer = DenseLayer(
+ dummy_input_layer,
+ num_units=3,
+ W=W,
+ b=b,
+ nonlinearity=nonlinearity,
+ )
+
+ return {
+ 'W': W,
+ 'b': b,
+ 'nonlinearity': nonlinearity,
+ 'layer': layer,
+ }
+
+ @pytest.fixture
+ def layer(self, layer_vars):
+ return layer_vars['layer']
+
+ def test_init(self, layer_vars):
+ layer = layer_vars['layer']
+ assert (layer.W.get_value() == layer_vars['W'].return_value).all()
+ assert (layer.b.get_value() == layer_vars['b'].return_value).all()
+ layer_vars['W'].assert_called_with((12, 3))
+ layer_vars['b'].assert_called_with((3,))
+
+ def test_init_none_nonlinearity_bias(self, DenseLayer, dummy_input_layer):
+ layer = DenseLayer(
+ dummy_input_layer,
+ num_units=3,
+ nonlinearity=None,
+ b=None,
+ )
+ assert layer.nonlinearity == lasagne.nonlinearities.identity
+ assert layer.b is None
+
+ def test_get_params(self, layer):
+ assert layer.get_params() == [layer.W, layer.b]
+ assert layer.get_params(regularizable=False) == [layer.b]
+ assert layer.get_params(regularizable=True) == [layer.W]
+ assert layer.get_params(trainable=True) == [layer.W, layer.b]
+ assert layer.get_params(trainable=False) == []
+ assert layer.get_params(_nonexistent_tag=True) == []
+ assert layer.get_params(_nonexistent_tag=False) == [layer.W, layer.b]
+
+ def test_get_output_shape_for(self, layer):
+ assert layer.get_output_shape_for((5, 6, 7)) == (5, 3)
+
+ def test_get_output_for(self, layer_vars):
+ layer = layer_vars['layer']
+ nonlinearity = layer_vars['nonlinearity']
+ W = layer_vars['W']()
+ b = layer_vars['b']()
+
+ input = theano.shared(np.ones((2, 12)))
+ result = layer.get_output_for(input)
+ assert result is nonlinearity.return_value
+
+ # Check that the input to the nonlinearity was what we expect
+ # from dense layer, i.e. the dot product plus bias
+ nonlinearity_arg = nonlinearity.call_args[0][0]
+ assert (nonlinearity_arg.eval() ==
+ np.dot(input.get_value(), W) + b).all()
+
+ def test_get_output_for_flattens_input(self, layer_vars):
+ layer = layer_vars['layer']
+ nonlinearity = layer_vars['nonlinearity']
+ W = layer_vars['W']()
+ b = layer_vars['b']()
+
+ input = theano.shared(np.ones((2, 3, 4)))
+ result = layer.get_output_for(input)
+ assert result is nonlinearity.return_value
+
+ # Check that the input to the nonlinearity was what we expect
+ # from dense layer, i.e. the dot product plus bias
+ nonlinearity_arg = nonlinearity.call_args[0][0]
+ assert np.allclose(nonlinearity_arg.eval(),
+ np.dot(input.get_value().reshape(2, -1), W) + b)
+
+ def test_param_names(self, layer):
+ assert layer.W.name == "W"
+ assert layer.b.name == "b"
+
+ def test_named_layer_param_names(self, DenseLayer, dummy_input_layer):
+ layer = DenseLayer(
+ dummy_input_layer,
+ num_units=3,
+ name="foo"
+ )
+
+ assert layer.W.name == "foo.W"
+ assert layer.b.name == "foo.b"
+
+
+class TestNINLayer:
+ @pytest.fixture
+ def dummy_input_layer(self):
+ from lasagne.layers.input import InputLayer
+ input_layer = InputLayer((2, 3, 4, 5))
+ mock = Mock(input_layer)
+ mock.shape = input_layer.shape
+ mock.input_var = input_layer.input_var
+ mock.output_shape = input_layer.output_shape
+ return mock
+
+ @pytest.fixture
+ def NINLayer(self):
+ from lasagne.layers.dense import NINLayer
+ return NINLayer
+
+ @pytest.fixture
+ def layer_vars(self, NINLayer, dummy_input_layer):
+ W = Mock()
+ b = Mock()
+ nonlinearity = Mock()
+
+ W.return_value = np.ones((3, 5))
+ b.return_value = np.ones((5,))
+ layer = NINLayer(
+ dummy_input_layer,
+ num_units=5,
+ W=W,
+ b=b,
+ nonlinearity=nonlinearity,
+ )
+
+ return {
+ 'W': W,
+ 'b': b,
+ 'nonlinearity': nonlinearity,
+ 'layer': layer,
+ }
+
+ @pytest.fixture
+ def layer(self, layer_vars):
+ return layer_vars['layer']
+
+ def test_init(self, layer_vars):
+ layer = layer_vars['layer']
+ assert (layer.W.get_value() == layer_vars['W'].return_value).all()
+ assert (layer.b.get_value() == layer_vars['b'].return_value).all()
+ layer_vars['W'].assert_called_with((3, 5))
+ layer_vars['b'].assert_called_with((5,))
+
+ def test_init_none_nonlinearity_bias(self, NINLayer, dummy_input_layer):
+ layer = NINLayer(
+ dummy_input_layer,
+ num_units=3,
+ nonlinearity=None,
+ b=None,
+ )
+ assert layer.nonlinearity == lasagne.nonlinearities.identity
+ assert layer.b is None
+
+ def test_init_untie_biases(self, NINLayer, dummy_input_layer):
+ layer = NINLayer(
+ dummy_input_layer,
+ num_units=5,
+ untie_biases=True,
+ )
+ assert (layer.b.shape.eval() == (5, 4, 5)).all()
+
+ def test_get_params(self, layer):
+ assert layer.get_params() == [layer.W, layer.b]
+ assert layer.get_params(regularizable=False) == [layer.b]
+ assert layer.get_params(regularizable=True) == [layer.W]
+ assert layer.get_params(trainable=True) == [layer.W, layer.b]
+ assert layer.get_params(trainable=False) == []
+ assert layer.get_params(_nonexistent_tag=True) == []
+ assert layer.get_params(_nonexistent_tag=False) == [layer.W, layer.b]
+
+ def test_get_output_shape_for(self, layer):
+ assert layer.get_output_shape_for((5, 6, 7, 8)) == (5, 5, 7, 8)
+
+ @pytest.mark.parametrize("extra_kwargs", [
+ {},
+ {'untie_biases': True},
+ {'b': None},
+ ])
+ def test_get_output_for(self, dummy_input_layer, extra_kwargs):
+ from lasagne.layers.dense import NINLayer
+ nonlinearity = Mock()
+
+ layer = NINLayer(
+ dummy_input_layer,
+ num_units=6,
+ nonlinearity=nonlinearity,
+ **extra_kwargs
+ )
+
+ input = theano.shared(np.random.uniform(-1, 1, (2, 3, 4, 5)))
+ result = layer.get_output_for(input)
+ assert result is nonlinearity.return_value
+
+ nonlinearity_arg = nonlinearity.call_args[0][0]
+ X = input.get_value()
+ X = np.rollaxis(X, 1).T
+ X = np.dot(X, layer.W.get_value())
+ if layer.b is not None:
+ if layer.untie_biases:
+ X += layer.b.get_value()[:, np.newaxis].T
+ else:
+ X += layer.b.get_value()
+ X = np.rollaxis(X.T, 0, 2)
+ assert np.allclose(nonlinearity_arg.eval(), X)
+
+ def test_param_names(self, layer):
+ assert layer.W.name == "W"
+ assert layer.b.name == "b"
+
+ def test_named_layer_param_names(self, NINLayer, dummy_input_layer):
+ layer = NINLayer(
+ dummy_input_layer,
+ num_units=3,
+ name="foo"
+ )
+
+ assert layer.W.name == "foo.W"
+ assert layer.b.name == "foo.b"
+
+
+class TestNINLayer_c01b:
+ @pytest.fixture
+ def dummy_input_layer(self):
+ from lasagne.layers.input import InputLayer
+ input_layer = InputLayer((3, 4, 5, 2))
+ mock = Mock(input_layer)
+ mock.shape = input_layer.shape
+ mock.input_var = input_layer.input_var
+ mock.output_shape = input_layer.output_shape
+ return mock
+
+ @pytest.fixture
+ def NINLayer_c01b(self):
+ try:
+ from lasagne.layers.cuda_convnet import NINLayer_c01b
+ except ImportError:
+ pytest.skip("cuda_convnet not available")
+ return NINLayer_c01b
+
+ @pytest.fixture
+ def layer_vars(self, NINLayer_c01b, dummy_input_layer):
+ W = Mock()
+ b = Mock()
+ nonlinearity = Mock()
+
+ W.return_value = np.ones((5, 3))
+ b.return_value = np.ones((5,))
+ layer = NINLayer_c01b(
+ dummy_input_layer,
+ num_units=5,
+ W=W,
+ b=b,
+ nonlinearity=nonlinearity,
+ )
+
+ return {
+ 'W': W,
+ 'b': b,
+ 'nonlinearity': nonlinearity,
+ 'layer': layer,
+ }
+
+ @pytest.fixture
+ def layer(self, layer_vars):
+ return layer_vars['layer']
+
+ def test_init(self, layer_vars):
+ layer = layer_vars['layer']
+ assert (layer.W.get_value() == layer_vars['W'].return_value).all()
+ assert (layer.b.get_value() == layer_vars['b'].return_value).all()
+ layer_vars['W'].assert_called_with((5, 3))
+ layer_vars['b'].assert_called_with((5,))
+
+ def test_init_none_nonlinearity_bias(self, NINLayer_c01b,
+ dummy_input_layer):
+ layer = NINLayer_c01b(
+ dummy_input_layer,
+ num_units=3,
+ nonlinearity=None,
+ b=None,
+ )
+ assert layer.nonlinearity == lasagne.nonlinearities.identity
+ assert layer.b is None
+
+ def test_init_untie_biases(self, NINLayer_c01b, dummy_input_layer):
+ layer = NINLayer_c01b(
+ dummy_input_layer,
+ num_units=5,
+ untie_biases=True,
+ )
+ assert (layer.b.shape.eval() == (5, 4, 5)).all()
+
+ def test_get_params(self, layer):
+ assert layer.get_params() == [layer.W, layer.b]
+ assert layer.get_params(regularizable=False) == [layer.b]
+ assert layer.get_params(regularizable=True) == [layer.W]
+ assert layer.get_params(trainable=True) == [layer.W, layer.b]
+ assert layer.get_params(trainable=False) == []
+ assert layer.get_params(_nonexistent_tag=True) == []
+ assert layer.get_params(_nonexistent_tag=False) == [layer.W, layer.b]
+
+ def test_get_output_shape_for(self, layer):
+ assert layer.get_output_shape_for((6, 7, 8, 5)) == (5, 7, 8, 5)
+
+ @pytest.mark.parametrize("extra_kwargs", [
+ {},
+ {'untie_biases': True},
+ {'b': None},
+ ])
+ def test_get_output_for(self, dummy_input_layer, NINLayer_c01b,
+ extra_kwargs):
+ nonlinearity = Mock()
+
+ layer = NINLayer_c01b(
+ dummy_input_layer,
+ num_units=6,
+ nonlinearity=nonlinearity,
+ **extra_kwargs
+ )
+
+ input = theano.shared(np.random.uniform(-1, 1, (3, 4, 5, 2)))
+ result = layer.get_output_for(input)
+ assert result is nonlinearity.return_value
+
+ nonlinearity_arg = nonlinearity.call_args[0][0]
+ X = input.get_value()
+ W = layer.W.get_value()
+ out = np.dot(W, X.reshape(X.shape[0], -1))
+ out = out.reshape(W.shape[0], X.shape[1], X.shape[2], X.shape[3])
+ if layer.b is not None:
+ if layer.untie_biases:
+ out += layer.b.get_value()[..., None]
+ else:
+ out += layer.b.get_value()[:, None, None, None]
+ assert np.allclose(nonlinearity_arg.eval(), out)
diff --git a/lasagne/tests/layers/test_embedding.py b/lasagne/tests/layers/test_embedding.py
new file mode 100644
index 0000000..36c8d00
--- /dev/null
+++ b/lasagne/tests/layers/test_embedding.py
@@ -0,0 +1,56 @@
+import numpy
+import pytest
+import theano
+
+
+def test_embedding_2D_input():
+ import numpy as np
+ import theano
+ import theano.tensor as T
+ from lasagne.layers import EmbeddingLayer, InputLayer, helper
+ x = T.imatrix()
+ batch_size = 2
+ seq_len = 3
+ emb_size = 5
+ vocab_size = 3
+ l_in = InputLayer((None, seq_len))
+ W = np.arange(
+ vocab_size*emb_size).reshape((vocab_size, emb_size)).astype('float32')
+ l1 = EmbeddingLayer(l_in, input_size=vocab_size, output_size=emb_size,
+ W=W)
+
+ x_test = np.array([[0, 1, 2], [0, 0, 2]], dtype='int32')
+
+ # check output shape
+ assert helper.get_output_shape(
+ l1, (batch_size, seq_len)) == (batch_size, seq_len, emb_size)
+
+ output = helper.get_output(l1, x)
+ f = theano.function([x], output)
+ np.testing.assert_array_almost_equal(f(x_test), W[x_test])
+
+
+def test_embedding_1D_input():
+ import numpy as np
+ import theano
+ import theano.tensor as T
+ from lasagne.layers import EmbeddingLayer, InputLayer, helper
+ x = T.ivector()
+ batch_size = 2
+ emb_size = 10
+ vocab_size = 3
+ l_in = InputLayer((None,))
+ W = np.arange(
+ vocab_size*emb_size).reshape((vocab_size, emb_size)).astype('float32')
+ l1 = EmbeddingLayer(l_in, input_size=vocab_size, output_size=emb_size,
+ W=W)
+
+ x_test = np.array([0, 1, 2], dtype='int32')
+
+ # check output shape
+ assert helper.get_output_shape(
+ l1, (batch_size, )) == (batch_size, emb_size)
+
+ output = helper.get_output(l1, x)
+ f = theano.function([x], output)
+ np.testing.assert_array_almost_equal(f(x_test), W[x_test])
diff --git a/lasagne/tests/layers/test_helper.py b/lasagne/tests/layers/test_helper.py
new file mode 100644
index 0000000..326a96e
--- /dev/null
+++ b/lasagne/tests/layers/test_helper.py
@@ -0,0 +1,791 @@
+import warnings
+from mock import Mock, PropertyMock
+import pytest
+import numpy
+import theano
+
+
+class TestGetAllLayers:
+ def test_stack(self):
+ from lasagne.layers import InputLayer, DenseLayer, get_all_layers
+ from itertools import permutations
+ # l1 --> l2 --> l3
+ l1 = InputLayer((10, 20))
+ l2 = DenseLayer(l1, 30)
+ l3 = DenseLayer(l2, 40)
+ # try all possible combinations and orders for a query
+ for count in (0, 1, 2, 3):
+ for query in permutations([l1, l2, l3], count):
+ if l3 in query:
+ expected = [l1, l2, l3]
+ elif l2 in query:
+ expected = [l1, l2]
+ elif l1 in query:
+ expected = [l1]
+ else:
+ expected = []
+ assert get_all_layers(query) == expected
+ # treat_as_input=[l2] should block l1 from appearing
+ assert get_all_layers(l3, treat_as_input=[l2]) == [l2, l3]
+
+ def test_merge(self):
+ from lasagne.layers import (InputLayer, DenseLayer, ElemwiseSumLayer,
+ get_all_layers)
+ # l1 --> l2 --> l3 --> l6
+ # l4 --> l5 ----^
+ l1 = InputLayer((10, 20))
+ l2 = DenseLayer(l1, 30)
+ l3 = DenseLayer(l2, 40)
+ l4 = InputLayer((10, 30))
+ l5 = DenseLayer(l4, 40)
+ l6 = ElemwiseSumLayer([l3, l5])
+ # try various combinations and orders for a query
+ assert get_all_layers(l6) == [l1, l2, l3, l4, l5, l6]
+ assert get_all_layers([l4, l6]) == [l4, l1, l2, l3, l5, l6]
+ assert get_all_layers([l5, l6]) == [l4, l5, l1, l2, l3, l6]
+ assert get_all_layers([l4, l2, l5, l6]) == [l4, l1, l2, l5, l3, l6]
+ # check that treat_as_input correctly blocks the search
+ assert get_all_layers(l6, treat_as_input=[l2]) == [l2, l3, l4, l5, l6]
+ assert get_all_layers(l6, treat_as_input=[l3, l5]) == [l3, l5, l6]
+ assert get_all_layers([l6, l2], treat_as_input=[l6]) == [l6, l1, l2]
+
+ def test_split(self):
+ from lasagne.layers import InputLayer, DenseLayer, get_all_layers
+ # l1 --> l2 --> l3
+ # \---> l4
+ l1 = InputLayer((10, 20))
+ l2 = DenseLayer(l1, 30)
+ l3 = DenseLayer(l2, 40)
+ l4 = DenseLayer(l1, 50)
+ # try various combinations and orders for a query
+ assert get_all_layers(l3) == [l1, l2, l3]
+ assert get_all_layers(l4) == [l1, l4]
+ assert get_all_layers([l3, l4]) == [l1, l2, l3, l4]
+ assert get_all_layers([l4, l3]) == [l1, l4, l2, l3]
+ # check that treat_as_input correctly blocks the search
+ assert get_all_layers(l3, treat_as_input=[l2]) == [l2, l3]
+ assert get_all_layers([l3, l4], treat_as_input=[l2]) == [l2, l3,
+ l1, l4]
+
+ def test_bridge(self):
+ from lasagne.layers import (InputLayer, DenseLayer, ElemwiseSumLayer,
+ get_all_layers)
+ # l1 --> l2 --> l3 --> l4 --> l5
+ # \------------^
+ l1 = InputLayer((10, 20))
+ l2 = DenseLayer(l1, 30)
+ l3 = DenseLayer(l2, 30)
+ l4 = ElemwiseSumLayer([l2, l3])
+ l5 = DenseLayer(l4, 40)
+ # check for correct topological order
+ assert get_all_layers(l5) == [l1, l2, l3, l4, l5]
+ # check that treat_as_input=[l4] blocks the search and =[l3] does not
+ assert get_all_layers(l5, treat_as_input=[l4]) == [l4, l5]
+ assert get_all_layers(l5, treat_as_input=[l3]) == [l1, l2, l3, l4, l5]
+
+
+class TestGetOutput_InputLayer:
+ @pytest.fixture
+ def get_output(self):
+ from lasagne.layers.helper import get_output
+ return get_output
+
+ @pytest.fixture
+ def layer(self):
+ from lasagne.layers.input import InputLayer
+ return InputLayer((3, 2))
+
+ def test_get_output_without_arguments(self, layer, get_output):
+ assert get_output(layer) is layer.input_var
+
+ def test_get_output_input_is_variable(self, layer, get_output):
+ variable = theano.Variable("myvariable")
+ assert get_output(layer, variable) is variable
+
+ def test_get_output_input_is_array(self, layer, get_output):
+ inputs = [[1, 2, 3]]
+ output = get_output(layer, inputs)
+ assert numpy.all(output.eval() == inputs)
+
+ def test_get_output_input_is_a_mapping(self, layer, get_output):
+ inputs = {layer: theano.tensor.matrix()}
+ assert get_output(layer, inputs) is inputs[layer]
+
+
+class TestGetOutput_Layer:
+ @pytest.fixture
+ def get_output(self):
+ from lasagne.layers.helper import get_output
+ return get_output
+
+ @pytest.fixture
+ def layers(self):
+ from lasagne.layers.base import Layer
+ from lasagne.layers.input import InputLayer
+ # create a mock that has the same attributes as an InputLayer instance
+ l1 = Mock(InputLayer((None,)), output_shape=(None,),
+ get_output_kwargs=[])
+ # create a mock that has the same attributes as a Layer instance
+ l2 = Mock(Layer(l1), output_shape=(None,), get_output_kwargs=[])
+ # link it to the InputLayer mock
+ l2.input_layer = l1
+ # create another mock that has the same attributes as a Layer instance
+ l3 = Mock(Layer(l2), output_shape=(None,), get_output_kwargs=['kwarg'])
+ # link it to the first mock, to get an "l1 --> l2 --> l3" chain
+ l3.input_layer = l2
+ return l1, l2, l3
+
+ def test_get_output_without_arguments(self, layers, get_output):
+ l1, l2, l3 = layers
+ output = get_output(l3)
+ # expected: l3.get_output_for(l2.get_output_for(l1.input_var))
+ assert output is l3.get_output_for.return_value
+ l3.get_output_for.assert_called_with(
+ l2.get_output_for.return_value)
+ l2.get_output_for.assert_called_with(
+ l1.input_var)
+
+ def test_get_output_with_single_argument(self, layers, get_output):
+ l1, l2, l3 = layers
+ inputs, kwarg = theano.tensor.matrix(), object()
+ output = get_output(l3, inputs, kwarg=kwarg)
+ # expected: l3.get_output_for(l2.get_output_for(inputs, kwarg=kwarg),
+ # kwarg=kwarg)
+ assert output is l3.get_output_for.return_value
+ l3.get_output_for.assert_called_with(
+ l2.get_output_for.return_value, kwarg=kwarg)
+ l2.get_output_for.assert_called_with(
+ inputs, kwarg=kwarg)
+
+ def test_get_output_input_is_a_mapping(self, layers, get_output):
+ l1, l2, l3 = layers
+ p = PropertyMock()
+ type(l1).input_var = p
+ inputs = {l3: theano.tensor.matrix()}
+ # expected: inputs[l3]
+ assert get_output(l3, inputs) is inputs[l3]
+ # l3.get_output_for, l2.get_output_for should not have been called
+ assert l3.get_output_for.call_count == 0
+ assert l2.get_output_for.call_count == 0
+ # l1.input_var should not have been accessed
+ assert p.call_count == 0
+
+ def test_get_output_input_is_a_mapping_no_key(self, layers, get_output):
+ l1, l2, l3 = layers
+ output = get_output(l3, {})
+ # expected: l3.get_output_for(l2.get_output_for(l1.input_var))
+ assert output is l3.get_output_for.return_value
+ l3.get_output_for.assert_called_with(
+ l2.get_output_for.return_value)
+ l2.get_output_for.assert_called_with(
+ l1.input_var)
+
+ def test_get_output_input_is_a_mapping_to_array(self, layers, get_output):
+ l1, l2, l3 = layers
+ p = PropertyMock()
+ type(l1).input_var = p
+ inputs = {l3: [[1, 2, 3]]}
+ output = get_output(l3, inputs)
+ # expected: inputs[l3]
+ assert numpy.all(output.eval() == inputs[l3])
+ # l3.get_output_for, l2.get_output_for should not have been called
+ assert l3.get_output_for.call_count == 0
+ assert l2.get_output_for.call_count == 0
+ # l1.input_var should not have been accessed
+ assert p.call_count == 0
+
+ def test_get_output_input_is_a_mapping_for_layer(self, layers, get_output):
+ l1, l2, l3 = layers
+ p = PropertyMock()
+ type(l1).input_var = p
+ input_expr, kwarg = theano.tensor.matrix(), object()
+ inputs = {l2: input_expr}
+ output = get_output(l3, inputs, kwarg=kwarg)
+ # expected: l3.get_output_for(input_expr, kwarg=kwarg)
+ assert output is l3.get_output_for.return_value
+ l3.get_output_for.assert_called_with(input_expr, kwarg=kwarg)
+ # l2.get_output_for should not have been called
+ assert l2.get_output_for.call_count == 0
+ # l1.input_var should not have been accessed
+ assert p.call_count == 0
+
+ def test_get_output_input_is_a_mapping_for_input_layer(self, layers,
+ get_output):
+ l1, l2, l3 = layers
+ p = PropertyMock()
+ type(l1).input_var = p
+ input_expr, kwarg = theano.tensor.matrix(), object()
+ inputs = {l1: input_expr}
+ output = get_output(l3, inputs, kwarg=kwarg)
+ # expected: l3.get_output_for(l2.get_output_for(input_expr,
+ # kwarg=kwarg),
+ # kwarg=kwarg)
+ assert output is l3.get_output_for.return_value
+ l3.get_output_for.assert_called_with(
+ l2.get_output_for.return_value, kwarg=kwarg)
+ l2.get_output_for.assert_called_with(
+ input_expr, kwarg=kwarg)
+ # l1.input_var should not have been accessed
+ assert p.call_count == 0
+
+ def test_get_output_with_unused_kwarg(self, layers, get_output):
+ l1, l2, l3 = layers
+ unused_kwarg = object()
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter('always')
+ get_output(l3, kwagg=unused_kwarg)
+ assert len(w) == 1
+ assert issubclass(w[0].category, UserWarning)
+ assert 'perhaps you meant kwarg' in str(w[0].message)
+
+ def test_get_output_with_no_unused_kwarg(self, layers, get_output):
+ l1, l2, l3 = layers
+ with warnings.catch_warnings(record=True) as w:
+ warnings.simplefilter('always')
+ get_output(l3)
+ assert len(w) == 0
+
+ @pytest.fixture
+ def layer_from_shape(self):
+ from lasagne.layers.base import Layer
+ return Layer((None, 20))
+
+ def test_layer_from_shape_invalid_get_output(self, layer_from_shape,
+ get_output):
+ layer = layer_from_shape
+ with pytest.raises(ValueError):
+ get_output(layer)
+ with pytest.raises(ValueError):
+ get_output(layer, [1, 2])
+ with pytest.raises(ValueError):
+ get_output(layer, {Mock(): [1, 2]})
+
+ def test_layer_from_shape_valid_get_output(self, layer_from_shape,
+ get_output):
+ layer = layer_from_shape
+ inputs = {layer: theano.tensor.matrix()}
+ assert get_output(layer, inputs) is inputs[layer]
+ inputs = {None: theano.tensor.matrix()}
+ layer.get_output_for = Mock()
+ assert get_output(layer, inputs) is layer.get_output_for.return_value
+ layer.get_output_for.assert_called_with(inputs[None])
+
+
+class TestGetOutput_MergeLayer:
+ @pytest.fixture
+ def get_output(self):
+ from lasagne.layers.helper import get_output
+ return get_output
+
+ @pytest.fixture
+ def layers(self):
+ from lasagne.layers.base import Layer, MergeLayer
+ from lasagne.layers.input import InputLayer
+ # create two mocks of the same attributes as an InputLayer instance
+ l1 = [Mock(InputLayer((None,)), output_shape=(None,),
+ get_output_kwargs=[]),
+ Mock(InputLayer((None,)), output_shape=(None,),
+ get_output_kwargs=[])]
+ # create two mocks of the same attributes as a Layer instance
+ l2 = [Mock(Layer(l1[0]), output_shape=(None,),
+ get_output_kwargs=[]),
+ Mock(Layer(l1[1]), output_shape=(None,),
+ get_output_kwargs=[])]
+ # link them to the InputLayer mocks
+ l2[0].input_layer = l1[0]
+ l2[1].input_layer = l1[1]
+ # create a mock that has the same attributes as a MergeLayer
+ l3 = Mock(MergeLayer(l2), get_output_kwargs=['kwarg'])
+ # link it to the two layer mocks, to get the following network:
+ # l1[0] --> l2[0] --> l3
+ # l1[1] --> l2[1] ----^
+ l3.input_layers = l2
+ return l1, l2, l3
+
+ def test_get_output_without_arguments(self, layers, get_output):
+ l1, l2, l3 = layers
+ output = get_output(l3)
+ # expected: l3.get_output_for([l2[0].get_output_for(l1[0].input_var),
+ # l2[1].get_output_for(l1[1].input_var)])
+ assert output is l3.get_output_for.return_value
+ l3.get_output_for.assert_called_with([
+ l2[0].get_output_for.return_value,
+ l2[1].get_output_for.return_value,
+ ])
+ l2[0].get_output_for.assert_called_with(
+ l1[0].input_var)
+ l2[1].get_output_for.assert_called_with(
+ l1[1].input_var)
+
+ def test_get_output_with_single_argument_fails(self, layers, get_output):
+ l1, l2, l3 = layers
+ inputs, kwarg = theano.tensor.matrix(), object()
+ # expected to fail: only gave one expression for two input layers
+ with pytest.raises(ValueError):
+ output = get_output(l3, inputs, kwarg=kwarg)
+
+ def test_get_output_input_is_a_mapping(self, layers, get_output):
+ l1, l2, l3 = layers
+ p = PropertyMock()
+ type(l1[0]).input_var = p
+ type(l1[1]).input_var = p
+ inputs = {l3: theano.tensor.matrix()}
+ # expected: inputs[l3]
+ assert get_output(l3, inputs) is inputs[l3]
+ # l3.get_output_for, l2[*].get_output_for should not have been called
+ assert l3.get_output_for.call_count == 0
+ assert l2[0].get_output_for.call_count == 0
+ assert l2[1].get_output_for.call_count == 0
+ # l1[*].input_var should not have been accessed
+ assert p.call_count == 0
+
+ def test_get_output_input_is_a_mapping_no_key(self, layers, get_output):
+ l1, l2, l3 = layers
+ output = get_output(l3, {})
+ # expected: l3.get_output_for([l2[0].get_output_for(l1[0].input_var),
+ # l2[1].get_output_for(l1[1].input_var)])
+ assert output is l3.get_output_for.return_value
+ l3.get_output_for.assert_called_with([
+ l2[0].get_output_for.return_value,
+ l2[1].get_output_for.return_value,
+ ])
+ l2[0].get_output_for.assert_called_with(
+ l1[0].input_var)
+ l2[1].get_output_for.assert_called_with(
+ l1[1].input_var)
+
+ def test_get_output_input_is_a_mapping_to_array(self, layers, get_output):
+ l1, l2, l3 = layers
+ p = PropertyMock()
+ type(l1[0]).input_var = p
+ type(l1[1]).input_var = p
+ inputs = {l3: [[1, 2, 3]]}
+ output = get_output(l3, inputs)
+ # expected: inputs[l3]
+ assert numpy.all(output.eval() == inputs[l3])
+ # l3.get_output_for, l2[*].get_output_for should not have been called
+ assert l3.get_output_for.call_count == 0
+ assert l2[0].get_output_for.call_count == 0
+ assert l2[1].get_output_for.call_count == 0
+ # l1[*].input_var should not have been accessed
+ assert p.call_count == 0
+
+ def test_get_output_input_is_a_mapping_for_layer(self, layers, get_output):
+ l1, l2, l3 = layers
+ p = PropertyMock()
+ type(l1[0]).input_var = p
+ input_expr, kwarg = theano.tensor.matrix(), object()
+ inputs = {l2[0]: input_expr}
+ output = get_output(l3, inputs, kwarg=kwarg)
+ # expected: l3.get_output_for([input_expr,
+ # l2[1].get_output_for(l1[1].input_var,
+ # kwarg=kwarg)],
+ # kwarg=kwarg)
+ assert output is l3.get_output_for.return_value
+ l3.get_output_for.assert_called_with([
+ input_expr,
+ l2[1].get_output_for.return_value,
+ ], kwarg=kwarg)
+ l2[1].get_output_for.assert_called_with(
+ l1[1].input_var, kwarg=kwarg)
+ # l2[0].get_output_for should not have been called
+ assert l2[0].get_output_for.call_count == 0
+ # l1[0].input_var should not have been accessed
+ assert p.call_count == 0
+
+ def test_get_output_input_is_a_mapping_for_input_layer(self, layers,
+ get_output):
+ l1, l2, l3 = layers
+ p = PropertyMock()
+ type(l1[0]).input_var = p
+ input_expr, kwarg = theano.tensor.matrix(), object()
+ inputs = {l1[0]: input_expr}
+ output = get_output(l3, inputs, kwarg=kwarg)
+ # expected: l3.get_output_for([l2[0].get_output_for(input_expr,
+ # kwarg=kwarg),
+ # l2[1].get_output_for(l1[1].input_var,
+ # kwarg=kwarg)],
+ # kwarg=kwarg)
+ assert output is l3.get_output_for.return_value
+ l3.get_output_for.assert_called_with([
+ l2[0].get_output_for.return_value,
+ l2[1].get_output_for.return_value,
+ ], kwarg=kwarg)
+ l2[0].get_output_for.assert_called_with(
+ input_expr, kwarg=kwarg)
+ l2[1].get_output_for.assert_called_with(
+ l1[1].input_var, kwarg=kwarg)
+ # l1[0].input_var should not have been accessed
+ assert p.call_count == 0
+
+ @pytest.fixture
+ def layer_from_shape(self):
+ from lasagne.layers.input import InputLayer
+ from lasagne.layers.base import MergeLayer
+ return MergeLayer([
+ (None, 20),
+ Mock(InputLayer((None,)), output_shape=(None,))])
+
+ def test_layer_from_shape_invalid_get_output(self, layer_from_shape,
+ get_output):
+ layer = layer_from_shape
+ with pytest.raises(ValueError):
+ get_output(layer)
+ with pytest.raises(ValueError):
+ get_output(layer, [1, 2])
+ with pytest.raises(ValueError):
+ get_output(layer, {layer.input_layers[1]: [1, 2]})
+
+ def test_layer_from_shape_valid_get_output(self, layer_from_shape,
+ get_output):
+ layer = layer_from_shape
+ inputs = {layer: theano.tensor.matrix()}
+ assert get_output(layer, inputs) is inputs[layer]
+ inputs = {None: theano.tensor.matrix()}
+ layer.get_output_for = Mock()
+ assert get_output(layer, inputs) is layer.get_output_for.return_value
+ layer.get_output_for.assert_called_with(
+ [inputs[None], layer.input_layers[1].input_var])
+
+
+class TestGetOutputShape_InputLayer:
+ @pytest.fixture
+ def get_output_shape(self):
+ from lasagne.layers.helper import get_output_shape
+ return get_output_shape
+
+ @pytest.fixture
+ def layer(self):
+ from lasagne.layers.input import InputLayer
+ return InputLayer((3, 2))
+
+ def test_get_output_shape_without_arguments(self, layer, get_output_shape):
+ assert get_output_shape(layer) == (3, 2)
+
+ def test_get_output_shape_input_is_tuple(self, layer, get_output_shape):
+ shp = (4, 5, 6)
+ assert get_output_shape(layer, shp) == shp
+
+ def test_get_output_shape_input_is_a_mapping(self, layer,
+ get_output_shape):
+ input_shapes = {layer: (4, 5, 6)}
+ assert get_output_shape(layer, input_shapes) == input_shapes[layer]
+
+
+class TestGetOutputShape_Layer:
+ @pytest.fixture
+ def get_output_shape(self):
+ from lasagne.layers.helper import get_output_shape
+ return get_output_shape
+
+ @pytest.fixture
+ def layers(self):
+ from lasagne.layers.base import Layer
+ from lasagne.layers.input import InputLayer
+ # create a mock that has the same attributes as an InputLayer instance
+ l1 = Mock(InputLayer((None,)), output_shape=(None,))
+ # create a mock that has the same attributes as a Layer instance
+ l2 = Mock(Layer(l1), output_shape=(None,))
+ # link it to the InputLayer mock
+ l2.input_layer = l1
+ # create another mock that has the same attributes as a Layer instance
+ l3 = Mock(Layer(l2), output_shape=(None,))
+ # link it to the first mock, to get an "l1 --> l2 --> l3" chain
+ l3.input_layer = l2
+ return l1, l2, l3
+
+ def test_get_output_shape_without_arguments(self, layers,
+ get_output_shape):
+ l1, l2, l3 = layers
+ output_shape = get_output_shape(l3)
+ # expected: l3.output_shape
+ assert output_shape is l3.output_shape
+ # l3.get_output_shape_for, l2.get_output_shape_for should not have been
+ # called
+ assert l3.get_output_shape_for.call_count == 0
+ assert l2.get_output_shape_for.call_count == 0
+
+ def test_get_output_shape_with_single_argument(self, layers,
+ get_output_shape):
+ l1, l2, l3 = layers
+ shp = (3, 4, 5)
+ output_shape = get_output_shape(l3, shp)
+ # expected: l3.get_output_shape_for(l2.get_output_shape_for(shp))
+ assert output_shape is l3.get_output_shape_for.return_value
+ l3.get_output_shape_for.assert_called_with(
+ l2.get_output_shape_for.return_value)
+ l2.get_output_shape_for.assert_called_with(shp)
+
+ def test_get_output_shape_input_is_a_mapping(self, layers,
+ get_output_shape):
+ l1, l2, l3 = layers
+ input_shapes = {l3: (4, 5, 6)}
+ # expected: input_shapes[l3]
+ assert get_output_shape(l3, input_shapes) is input_shapes[l3]
+ # l3.get_output_shape_for, l2.get_output_shape_for should not have been
+ # called
+ assert l3.get_output_shape_for.call_count == 0
+ assert l2.get_output_shape_for.call_count == 0
+
+ def test_get_output_shape_input_is_a_mapping_no_key(self, layers,
+ get_output_shape):
+ l1, l2, l3 = layers
+ output_shape = get_output_shape(l3, {})
+ # expected: l3.output_shape
+ assert output_shape is l3.output_shape
+ # l3.get_output_shape_for, l2.get_output_shape_for should not have been
+ # called
+ assert l3.get_output_shape_for.call_count == 0
+ assert l2.get_output_shape_for.call_count == 0
+
+ def test_get_output_shape_input_is_a_mapping_for_layer(self, layers,
+ get_output_shape):
+ l1, l2, l3 = layers
+ shp = (4, 5, 6)
+ input_shapes = {l2: shp}
+ output_shape = get_output_shape(l3, input_shapes)
+ # expected: l3.get_output_shape_for(shp)
+ assert output_shape is l3.get_output_shape_for.return_value
+ l3.get_output_shape_for.assert_called_with(shp)
+ # l2.get_output_shape_for should not have been called
+ assert l2.get_output_shape_for.call_count == 0
+
+ def test_get_output_shape_input_is_a_mapping_for_input_layer(
+ self, layers, get_output_shape):
+ l1, l2, l3 = layers
+ shp = (4, 5, 6)
+ input_shapes = {l1: shp}
+ output_shape = get_output_shape(l3, input_shapes)
+ # expected: l3.get_output_shape_for(l2.get_output_shape_for(shp))
+ assert output_shape is l3.get_output_shape_for.return_value
+ l3.get_output_shape_for.assert_called_with(
+ l2.get_output_shape_for.return_value)
+ l2.get_output_shape_for.assert_called_with(shp)
+
+ @pytest.fixture
+ def layer_from_shape(self):
+ from lasagne.layers.base import Layer
+ return Layer((None, 20))
+
+ def test_layer_from_shape(self, layer_from_shape, get_output_shape):
+ layer = layer_from_shape
+ input_shapes = {layer: (4, 5, 6)}
+ assert get_output_shape(layer, input_shapes) is input_shapes[layer]
+ input_shapes = {None: (4, 5, 6)}
+ layer.get_output_shape_for = Mock()
+ assert (get_output_shape(layer, input_shapes) is
+ layer.get_output_shape_for.return_value)
+ layer.get_output_shape_for.assert_called_with(input_shapes[None])
+
+
+class TestGetOutputShape_MergeLayer:
+ @pytest.fixture
+ def get_output_shape(self):
+ from lasagne.layers.helper import get_output_shape
+ return get_output_shape
+
+ @pytest.fixture
+ def layers(self):
+ from lasagne.layers.base import Layer, MergeLayer
+ from lasagne.layers.input import InputLayer
+ # create two mocks of the same attributes as an InputLayer instance
+ l1 = [Mock(InputLayer((None,)), output_shape=(None,)),
+ Mock(InputLayer((None,)), output_shape=(None,))]
+ # create two mocks of the same attributes as a Layer instance
+ l2 = [Mock(Layer(l1[0]), output_shape=(None,)),
+ Mock(Layer(l1[1]), output_shape=(None,))]
+ # link them to the InputLayer mocks
+ l2[0].input_layer = l1[0]
+ l2[1].input_layer = l1[1]
+ # create a mock that has the same attributes as a MergeLayer
+ l3 = Mock(MergeLayer(l2))
+ # link it to the two layer mocks, to get the following network:
+ # l1[0] --> l2[0] --> l3
+ # l1[1] --> l2[1] ----^
+ l3.input_layers = l2
+ return l1, l2, l3
+
+ def test_get_output_shape_without_arguments(self, layers,
+ get_output_shape):
+ l1, l2, l3 = layers
+ output_shape = get_output_shape(l3)
+ # expected: l3.output_shape
+ assert output_shape is l3.output_shape
+ # l3.get_output_shape_for, l2[*].get_output_shape_for should not have
+ # been called
+ assert l3.get_output_shape_for.call_count == 0
+ assert l2[0].get_output_shape_for.call_count == 0
+ assert l2[1].get_output_shape_for.call_count == 0
+
+ def test_get_output_shape_with_single_argument_fails(self, layers,
+ get_output_shape):
+ l1, l2, l3 = layers
+ shp = (4, 5, 6)
+ # expected to fail: only gave one shape tuple for two input layers
+ with pytest.raises(ValueError):
+ output_shape = get_output_shape(l3, shp)
+
+ def test_get_output_shape_input_is_a_mapping(self, layers,
+ get_output_shape):
+ l1, l2, l3 = layers
+ input_shapes = {l3: (4, 5, 6)}
+ # expected: input_shapes[l3]
+ assert get_output_shape(l3, input_shapes) is input_shapes[l3]
+ # l3.get_output_shape_for, l2[*].get_output_shape_for should not have
+ # been called
+ assert l3.get_output_shape_for.call_count == 0
+ assert l2[0].get_output_shape_for.call_count == 0
+ assert l2[1].get_output_shape_for.call_count == 0
+
+ def test_get_output_shape_input_is_a_mapping_no_key(self, layers,
+ get_output_shape):
+ l1, l2, l3 = layers
+ output_shape = get_output_shape(l3, {})
+ # expected: l3.output_shape
+ assert output_shape is l3.output_shape
+ # l3.get_output_shape_for, l2[*].get_output_shape_for should not have
+ # been called
+ assert l3.get_output_shape_for.call_count == 0
+ assert l2[0].get_output_shape_for.call_count == 0
+ assert l2[1].get_output_shape_for.call_count == 0
+
+ def test_get_output_shape_input_is_a_mapping_for_layer(self, layers,
+ get_output_shape):
+ l1, l2, l3 = layers
+ shp = (4, 5, 6)
+ input_shapes = {l2[0]: shp}
+ output = get_output_shape(l3, input_shapes)
+ # expected: l3.get_output_shape_for(
+ # [shp, l2[1].get_output_shape_for(l1[1].shape)])
+ assert output is l3.get_output_shape_for.return_value
+ l3.get_output_shape_for.assert_called_with([
+ shp, l2[1].get_output_shape_for.return_value])
+ l2[1].get_output_shape_for.assert_called_with(l1[1].shape)
+ # l2[0].get_output_shape_for should not have been called
+ assert l2[0].get_output_shape_for.call_count == 0
+
+ def test_get_output_shape_input_is_a_mapping_for_input_layer(
+ self, layers, get_output_shape):
+ l1, l2, l3 = layers
+ shp = (4, 5, 6)
+ input_shapes = {l1[0]: shp}
+ output = get_output_shape(l3, input_shapes)
+ # expected: l3.get_output_shape_for(
+ # [l2[0].get_output_shape_for(shp),
+ # l2[1].get_output_shape_for(l1[1].shape)])
+ assert output is l3.get_output_shape_for.return_value
+ l3.get_output_shape_for.assert_called_with([
+ l2[0].get_output_shape_for.return_value,
+ l2[1].get_output_shape_for.return_value,
+ ])
+ l2[0].get_output_shape_for.assert_called_with(shp)
+ l2[1].get_output_shape_for.assert_called_with(l1[1].shape)
+
+ @pytest.fixture
+ def layer_from_shape(self):
+ from lasagne.layers.input import InputLayer
+ from lasagne.layers.base import MergeLayer
+ return MergeLayer([
+ (None, 20),
+ Mock(InputLayer((None,)), output_shape=(None,))])
+
+ def test_layer_from_shape_valid_get_output_shape(self, layer_from_shape,
+ get_output_shape):
+ layer = layer_from_shape
+ input_shapes = {layer: (4, 5, 6)}
+ assert get_output_shape(layer, input_shapes) is input_shapes[layer]
+ input_shapes = {None: (4, 5, 6)}
+ layer.get_output_shape_for = Mock()
+ assert (get_output_shape(layer, input_shapes) is
+ layer.get_output_shape_for.return_value)
+ layer.get_output_shape_for.assert_called_with(
+ [input_shapes[None], layer.input_layers[1].shape])
+
+
+class TestGetAllParams:
+ def test_get_all_params(self):
+ from lasagne.layers import (InputLayer, DenseLayer, get_all_params)
+ l1 = InputLayer((10, 20))
+ l2 = DenseLayer(l1, 30)
+ l3 = DenseLayer(l2, 40)
+
+ assert get_all_params(l3) == l2.get_params() + l3.get_params()
+ assert (get_all_params(l3, regularizable=False) ==
+ (l2.get_params(regularizable=False) +
+ l3.get_params(regularizable=False)))
+
+ assert (get_all_params(l3, regularizable=True) ==
+ (l2.get_params(regularizable=True) +
+ l3.get_params(regularizable=True)))
+
+ def test_get_all_params_with_unwrap_shared(self):
+ from lasagne.layers import (InputLayer, DenseLayer, get_all_params)
+ import theano.tensor as T
+ from lasagne.utils import floatX
+
+ l1 = InputLayer((10, 20))
+ l2 = DenseLayer(l1, 30)
+
+ W1 = theano.shared(floatX(numpy.zeros((30, 2))))
+ W2 = theano.shared(floatX(numpy.zeros((2, 40))))
+ W_expr = T.dot(W1, W2)
+ l3 = DenseLayer(l2, 40, W=W_expr, b=None)
+
+ l2_params = get_all_params(l2)
+ assert get_all_params(l3) == l2_params + [W1, W2]
+ assert get_all_params(l3, unwrap_shared=False) == l2_params + [W_expr]
+
+
+class TestCountParams:
+ def test_get_all_params(self):
+ from lasagne.layers import (InputLayer, DenseLayer, count_params)
+ l1 = InputLayer((10, 20))
+ l2 = DenseLayer(l1, 30)
+ l3 = DenseLayer(l2, 40)
+
+ num_weights = 20 * 30 + 30 * 40
+ num_biases = 30 + 40
+
+ assert count_params(l3, regularizable=True) == num_weights
+ assert count_params(l3, regularizable=False) == num_biases
+ assert count_params(l3) == num_weights + num_biases
+
+
+class TestGetAllParamValues:
+ def test_get_all_param_values(self):
+ from lasagne.layers import (InputLayer, DenseLayer,
+ get_all_param_values)
+ l1 = InputLayer((10, 20))
+ l2 = DenseLayer(l1, 30)
+ l3 = DenseLayer(l2, 40)
+
+ pvs = get_all_param_values(l3)
+ assert len(pvs) == 4
+
+
+class TestSetAllParamValues:
+ def test_set_all_param_values(self):
+ from lasagne.layers import (InputLayer, DenseLayer,
+ set_all_param_values)
+ from lasagne.utils import floatX
+
+ l1 = InputLayer((10, 20))
+ l2 = DenseLayer(l1, 30)
+ l3 = DenseLayer(l2, 40)
+
+ a2 = floatX(numpy.random.normal(0, 1, (20, 30)))
+ b2 = floatX(numpy.random.normal(0, 1, (30,)))
+ a3 = floatX(numpy.random.normal(0, 1, (30, 40)))
+ b3 = floatX(numpy.random.normal(0, 1, (40,)))
+ set_all_param_values(l3, [a2, b2, a3, b3])
+ assert numpy.allclose(l3.W.get_value(), a3)
+ assert numpy.allclose(l3.b.get_value(), b3)
+ assert numpy.allclose(l2.W.get_value(), a2)
+ assert numpy.allclose(l2.b.get_value(), b2)
+
+ with pytest.raises(ValueError):
+ set_all_param_values(l3, [a3, b3, a2])
+
+ with pytest.raises(ValueError):
+ a3_bad = floatX(numpy.random.normal(0, 1, (25, 40)))
+ set_all_param_values(l3, [a2, b2, a3_bad, b3])
diff --git a/lasagne/tests/layers/test_input.py b/lasagne/tests/layers/test_input.py
new file mode 100644
index 0000000..88654e5
--- /dev/null
+++ b/lasagne/tests/layers/test_input.py
@@ -0,0 +1,41 @@
+import pytest
+import theano
+
+
+class TestInputLayer:
+ @pytest.fixture
+ def layer(self):
+ from lasagne.layers.input import InputLayer
+ return InputLayer((3, 2))
+
+ def test_input_var(self, layer):
+ assert layer.input_var.ndim == 2
+
+ def test_shape(self, layer):
+ assert layer.shape == (3, 2)
+
+ def test_input_var_name(self, layer):
+ assert layer.input_var.name == "input"
+
+ def test_named_layer_input_var_name(self):
+ from lasagne.layers.input import InputLayer
+ layer = InputLayer((3, 2), name="foo")
+ assert layer.input_var.name == "foo.input"
+
+ def test_get_params(self, layer):
+ assert layer.get_params() == []
+
+ def test_bad_shape_fails(self):
+ from lasagne.layers.input import InputLayer
+ input_var = theano.tensor.tensor4()
+
+ with pytest.raises(ValueError):
+ InputLayer((3, 2), input_var)
+
+ def test_nonpositive_input_dims_raises_value_error(self):
+ from lasagne.layers import InputLayer
+ with pytest.raises(ValueError):
+ InputLayer(shape=(None, -1, -1))
+ with pytest.raises(ValueError):
+ InputLayer(shape=(None, 0, 0))
+ InputLayer(shape=(None, 1, 1))
diff --git a/lasagne/tests/layers/test_merge.py b/lasagne/tests/layers/test_merge.py
new file mode 100644
index 0000000..470ea5c
--- /dev/null
+++ b/lasagne/tests/layers/test_merge.py
@@ -0,0 +1,256 @@
+from mock import Mock
+import numpy
+import pytest
+import theano
+
+
+class TestAutocrop:
+ # Test internal helper methods of MergeCropLayer
+ def test_autocrop_array_shapes(self):
+ from lasagne.layers.merge import autocrop_array_shapes
+ crop0 = None
+ crop1 = [None, 'lower', 'center', 'upper']
+ # Too few crop modes; should get padded with None
+ crop2 = ['lower', 'upper']
+ # Invalid crop modes
+ crop_bad = ['lower', 'upper', 'bad', 'worse']
+
+ assert autocrop_array_shapes(
+ [(1, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)], crop0) == \
+ [(1, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)]
+ assert autocrop_array_shapes(
+ [(1, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)], crop1) == \
+ [(1, 2, 3, 2), (5, 2, 3, 2), (5, 2, 3, 2)]
+ assert autocrop_array_shapes(
+ [(1, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)], crop2) == \
+ [(1, 2, 3, 4), (1, 2, 7, 8), (1, 2, 3, 2)]
+
+ with pytest.raises(ValueError):
+ autocrop_array_shapes(
+ [(1, 2, 3, 4), (5, 6, 7, 8), (5, 4, 3, 2)], crop_bad)
+
+ # Inconsistent dimensionality
+ with pytest.raises(ValueError):
+ autocrop_array_shapes(
+ [(1, 2, 3, 4), (5, 6, 7), (5, 4, 3, 2, 10)], crop1)
+
+ def test_crop_inputs(self):
+ from lasagne.layers.merge import autocrop
+ from numpy.testing import assert_array_equal
+ crop_0 = None
+ crop_1 = [None, 'lower', 'center', 'upper']
+ crop_l = ['lower', 'lower', 'lower', 'lower']
+ crop_c = ['center', 'center', 'center', 'center']
+ crop_u = ['upper', 'upper', 'upper', 'upper']
+ crop_x = ['lower', 'lower']
+ crop_bad = ['lower', 'lower', 'bad', 'worse']
+
+ x0 = numpy.random.random((2, 3, 5, 7))
+ x1 = numpy.random.random((1, 2, 3, 4))
+ x2 = numpy.random.random((6, 3, 4, 2))
+
+ def crop_test(cropping, inputs, expected):
+ inputs = [theano.shared(x) for x in inputs]
+ outs = autocrop(inputs, cropping)
+ outs = [o.eval() for o in outs]
+ assert len(outs) == len(expected)
+ for o, e in zip(outs, expected):
+ assert_array_equal(o, e)
+
+ crop_test(crop_0, [x0, x1],
+ [x0, x1])
+ crop_test(crop_1, [x0, x1],
+ [x0[:, :2, 1:4, 3:], x1[:, :, :, :]])
+ crop_test(crop_l, [x0, x1],
+ [x0[:1, :2, :3, :4], x1[:, :, :, :]])
+ crop_test(crop_c, [x0, x1],
+ [x0[:1, :2, 1:4, 1:5], x1[:, :, :, :]])
+ crop_test(crop_u, [x0, x1],
+ [x0[1:, 1:, 2:, 3:], x1[:, :, :, :]])
+
+ crop_test(crop_0, [x0, x2],
+ [x0, x2])
+ crop_test(crop_1, [x0, x2],
+ [x0[:, :, :4, 5:], x2[:, :, :, :]])
+ crop_test(crop_l, [x0, x2],
+ [x0[:, :, :4, :2], x2[:2, :, :, :]])
+ crop_test(crop_c, [x0, x2],
+ [x0[:, :, :4, 2:4], x2[2:4, :, :, :]])
+ crop_test(crop_u, [x0, x2],
+ [x0[:, :, 1:, 5:], x2[4:, :, :, :]])
+
+ crop_test(crop_0, [x0, x1, x2],
+ [x0, x1, x2])
+ crop_test(crop_1, [x0, x1, x2],
+ [x0[:, :2, 1:4, 5:], x1[:, :, :, 2:], x2[:, :2, :3, :]])
+ crop_test(crop_l, [x0, x1, x2],
+ [x0[:1, :2, :3, :2], x1[:, :, :, :2], x2[:1, :2, :3, :]])
+ crop_test(crop_c, [x0, x1, x2],
+ [x0[:1, :2, 1:4, 2:4], x1[:, :, :, 1:3], x2[2:3, :2, :3, :]])
+ crop_test(crop_u, [x0, x1, x2],
+ [x0[1:, 1:, 2:, 5:], x1[:, :, :, 2:], x2[5:, 1:, 1:, :]])
+
+ crop_test(crop_x, [x0, x1, x2],
+ [x0[:1, :2, :, :], x1[:1, :2, :, :], x2[:1, :2, :, :]])
+
+ # test that num outputs is correct when the number of inputs is
+ # larger than ndim of the inputs.
+ crop_test(crop_x, [x0, x1, x2, x0, x1, x2],
+ [x0[:1, :2, :, :], x1[:1, :2, :, :], x2[:1, :2, :, :],
+ x0[:1, :2, :, :], x1[:1, :2, :, :], x2[:1, :2, :, :]])
+
+ with pytest.raises(ValueError):
+ crop_test(crop_bad, [x0, x1, x2],
+ [x0[:1, :2, :, :], x1[:1, :2, :, :], x2[:1, :2, :, :]])
+
+ # Inconsistent dimensionality
+ with pytest.raises(ValueError):
+ crop_test(crop_bad, [x0[:, :, :, 0], x1, x2[:, :, :, :, None]],
+ [x0[:1, :2, :, :], x1[:1, :2, :, :], x2[:1, :2, :, :]])
+
+
+class TestConcatLayer:
+ def layer(self, axis):
+ from lasagne.layers.merge import ConcatLayer
+ return ConcatLayer([Mock(), Mock()], axis=axis)
+
+ @pytest.fixture
+ def crop_layer_0(self):
+ from lasagne.layers.merge import ConcatLayer
+ return ConcatLayer([Mock(), Mock()], axis=0,
+ cropping=['lower'] * 2)
+
+ @pytest.fixture
+ def crop_layer_1(self):
+ from lasagne.layers.merge import ConcatLayer
+ return ConcatLayer([Mock(), Mock()], axis=1,
+ cropping=['lower'] * 2)
+
+ @pytest.mark.parametrize("axis", (1, -1))
+ def test_get_output_shape_for(self, axis):
+ layer = self.layer(axis)
+ assert layer.get_output_shape_for([(3, 2), (3, 5)]) == (3, 7)
+ assert layer.get_output_shape_for([(3, 2), (3, None)]) == (3, None)
+ assert layer.get_output_shape_for([(None, 2), (3, 5)]) == (3, 7)
+ assert layer.get_output_shape_for([(None, 2), (None, 5)]) == (None, 7)
+ with pytest.raises(ValueError):
+ layer.get_output_shape_for([(4, None), (3, 5)])
+ with pytest.raises(ValueError):
+ layer.get_output_shape_for([(3, 2), (4, None)])
+ with pytest.raises(ValueError):
+ layer.get_output_shape_for([(None, 2), (3, 5), (4, 5)])
+
+ def test_get_output_shape_for_cropped(self, crop_layer_0, crop_layer_1):
+ input_shapes = [(3, 2), (4, 5)]
+ result_0 = crop_layer_0.get_output_shape_for(input_shapes)
+ result_1 = crop_layer_1.get_output_shape_for(input_shapes)
+ assert result_0 == (7, 2)
+ assert result_1 == (3, 7)
+
+ @pytest.mark.parametrize("axis", (1, -1))
+ def test_get_output_for(self, axis):
+ layer = self.layer(axis)
+ inputs = [theano.shared(numpy.ones((3, 3))),
+ theano.shared(numpy.ones((3, 2)))]
+ result = layer.get_output_for(inputs)
+ result_eval = result.eval()
+ desired_result = numpy.hstack([input.get_value() for input in inputs])
+ assert (result_eval == desired_result).all()
+
+ def test_get_output_for_cropped(self, crop_layer_0, crop_layer_1):
+ x0 = numpy.random.random((5, 3))
+ x1 = numpy.random.random((4, 2))
+ inputs = [theano.shared(x0),
+ theano.shared(x1)]
+ result_0 = crop_layer_0.get_output_for(inputs).eval()
+ result_1 = crop_layer_1.get_output_for(inputs).eval()
+ desired_result_0 = numpy.concatenate([x0[:, :2], x1[:, :2]], axis=0)
+ desired_result_1 = numpy.concatenate([x0[:4, :], x1[:4, :]], axis=1)
+ assert (result_0 == desired_result_0).all()
+ assert (result_1 == desired_result_1).all()
+
+
+class TestElemwiseSumLayer:
+ @pytest.fixture
+ def layer(self):
+ from lasagne.layers.merge import ElemwiseSumLayer
+ return ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, -1])
+
+ @pytest.fixture
+ def crop_layer(self):
+ from lasagne.layers.merge import ElemwiseSumLayer
+ return ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, -1],
+ cropping=['lower'] * 2)
+
+ def test_get_output_shape_for(self, layer):
+ assert layer.get_output_shape_for([(3, 2), (3, 2)]) == (3, 2)
+ assert layer.get_output_shape_for([(3, 2), (3, None)]) == (3, 2)
+ assert layer.get_output_shape_for([(None, 2), (3, 2)]) == (3, 2)
+ assert layer.get_output_shape_for([(None, 2), (None, 2)]) == (None, 2)
+ with pytest.raises(ValueError):
+ layer.get_output_shape_for([(3, None), (4, 2)])
+ with pytest.raises(ValueError):
+ layer.get_output_shape_for([(3, 2), (4, None)])
+ with pytest.raises(ValueError):
+ layer.get_output_shape_for([(None, 2), (3, 2), (4, 2)])
+
+ def test_get_output_for(self, layer):
+ a = numpy.array([[0, 1], [2, 3]])
+ b = numpy.array([[1, 2], [4, 5]])
+ inputs = [theano.shared(a),
+ theano.shared(b)]
+ result = layer.get_output_for(inputs)
+ result_eval = result.eval()
+ desired_result = 2*a - b
+ assert (result_eval == desired_result).all()
+
+ def test_get_output_for_cropped(self, crop_layer):
+ from numpy.testing import assert_array_almost_equal as aeq
+ x0 = numpy.random.random((5, 3))
+ x1 = numpy.random.random((4, 2))
+ inputs = [theano.shared(x0),
+ theano.shared(x1)]
+ result = crop_layer.get_output_for(inputs).eval()
+ desired_result = 2*x0[:4, :2] - x1[:4, :2]
+ aeq(result, desired_result)
+
+ def test_bad_coeffs_fails(self, layer):
+ from lasagne.layers.merge import ElemwiseSumLayer
+ with pytest.raises(ValueError):
+ ElemwiseSumLayer([Mock(), Mock()], coeffs=[2, 3, -1])
+
+
+class TestElemwiseMergeLayerMul:
+ @pytest.fixture
+ def layer(self):
+ import theano.tensor as T
+ from lasagne.layers.merge import ElemwiseMergeLayer
+ return ElemwiseMergeLayer([Mock(), Mock()], merge_function=T.mul)
+
+ def test_get_output_for(self, layer):
+ a = numpy.array([[0, 1], [2, 3]])
+ b = numpy.array([[1, 2], [4, 5]])
+ inputs = [theano.shared(a),
+ theano.shared(b)]
+ result = layer.get_output_for(inputs)
+ result_eval = result.eval()
+ desired_result = a*b
+ assert (result_eval == desired_result).all()
+
+
+class TestElemwiseMergeLayerMaximum:
+ @pytest.fixture
+ def layer(self):
+ import theano.tensor as T
+ from lasagne.layers.merge import ElemwiseMergeLayer
+ return ElemwiseMergeLayer([Mock(), Mock()], merge_function=T.maximum)
+
+ def test_get_output_for(self, layer):
+ a = numpy.array([[0, 1], [2, 3]])
+ b = numpy.array([[1, 2], [4, 5]])
+ inputs = [theano.shared(a),
+ theano.shared(b)]
+ result = layer.get_output_for(inputs)
+ result_eval = result.eval()
+ desired_result = numpy.maximum(a, b)
+ assert (result_eval == desired_result).all()
diff --git a/lasagne/tests/layers/test_noise.py b/lasagne/tests/layers/test_noise.py
new file mode 100644
index 0000000..0bc598c
--- /dev/null
+++ b/lasagne/tests/layers/test_noise.py
@@ -0,0 +1,127 @@
+from mock import Mock
+import numpy
+from numpy.random import RandomState
+import theano
+import pytest
+
+from lasagne.random import get_rng, set_rng
+
+
+class TestDropoutLayer:
+ @pytest.fixture(params=[(100, 100), (None, 100)])
+ def input_layer(self, request):
+ from lasagne.layers.input import InputLayer
+ return InputLayer(request.param)
+
+ @pytest.fixture
+ def layer(self, input_layer):
+ from lasagne.layers.noise import DropoutLayer
+ return DropoutLayer(input_layer)
+
+ @pytest.fixture
+ def layer_no_rescale(self, input_layer):
+ from lasagne.layers.noise import DropoutLayer
+ return DropoutLayer(input_layer, rescale=False)
+
+ @pytest.fixture
+ def layer_p_02(self, input_layer):
+ from lasagne.layers.noise import DropoutLayer
+ return DropoutLayer(input_layer, p=0.2)
+
+ def test_get_output_for_non_deterministic(self, layer):
+ input = theano.shared(numpy.ones((100, 100)))
+ result = layer.get_output_for(input)
+ result_eval = result.eval()
+ assert 0.9 < result_eval.mean() < 1.1
+ assert (numpy.unique(result_eval) == [0., 2.]).all()
+
+ def test_get_output_for_deterministic(self, layer):
+ input = theano.shared(numpy.ones((100, 100)))
+ result = layer.get_output_for(input, deterministic=True)
+ result_eval = result.eval()
+ assert (result_eval == input.get_value()).all()
+
+ def test_get_output_for_no_rescale(self, layer_no_rescale):
+ input = theano.shared(numpy.ones((100, 100)))
+ result = layer_no_rescale.get_output_for(input)
+ result_eval = result.eval()
+ assert 0.4 < result_eval.mean() < 0.6
+ assert (numpy.unique(result_eval) == [0., 1.]).all()
+
+ def test_get_output_for_no_rescale_dtype(self, layer_no_rescale):
+ input = theano.shared(numpy.ones((100, 100), dtype=numpy.int32))
+ result = layer_no_rescale.get_output_for(input)
+ assert result.dtype == input.dtype
+
+ def test_get_output_for_p_02(self, layer_p_02):
+ input = theano.shared(numpy.ones((100, 100)))
+ result = layer_p_02.get_output_for(input)
+ result_eval = result.eval()
+ assert 0.9 < result_eval.mean() < 1.1
+ assert (numpy.round(numpy.unique(result_eval), 2) == [0., 1.25]).all()
+
+ def test_get_output_for_p_float32(self, input_layer):
+ from lasagne.layers.noise import DropoutLayer
+ layer = DropoutLayer(input_layer, p=numpy.float32(0.5))
+ input = theano.shared(numpy.ones((100, 100), dtype=numpy.float32))
+ assert layer.get_output_for(input).dtype == input.dtype
+
+ def test_specified_rng(self, input_layer):
+ from lasagne.layers.noise import DropoutLayer
+ input = theano.shared(numpy.ones((100, 100)))
+ seed = 123456789
+ rng = get_rng()
+
+ set_rng(RandomState(seed))
+ result = DropoutLayer(input_layer).get_output_for(input)
+ result_eval1 = result.eval()
+
+ set_rng(RandomState(seed))
+ result = DropoutLayer(input_layer).get_output_for(input)
+ result_eval2 = result.eval()
+
+ set_rng(rng) # reset to original RNG for other tests
+ assert numpy.allclose(result_eval1, result_eval2)
+
+
+class TestGaussianNoiseLayer:
+ @pytest.fixture
+ def layer(self):
+ from lasagne.layers.noise import GaussianNoiseLayer
+ return GaussianNoiseLayer(Mock(output_shape=(None,)))
+
+ @pytest.fixture(params=[(100, 100), (None, 100)])
+ def input_layer(self, request):
+ from lasagne.layers.input import InputLayer
+ return InputLayer(request.param)
+
+ def test_get_output_for_non_deterministic(self, layer):
+ input = theano.shared(numpy.ones((100, 100)))
+ result = layer.get_output_for(input, deterministic=False)
+ result_eval = result.eval()
+ assert (result_eval != input.eval()).all()
+ assert result_eval.mean() != 1.0
+ assert numpy.round(result_eval.mean()) == 1.0
+
+ def test_get_output_for_deterministic(self, layer):
+ input = theano.shared(numpy.ones((3, 3)))
+ result = layer.get_output_for(input, deterministic=True)
+ result_eval = result.eval()
+ assert (result_eval == input.eval()).all()
+
+ def test_specified_rng(self, input_layer):
+ from lasagne.layers.noise import GaussianNoiseLayer
+ input = theano.shared(numpy.ones((100, 100)))
+ seed = 123456789
+ rng = get_rng()
+
+ set_rng(RandomState(seed))
+ result = GaussianNoiseLayer(input_layer).get_output_for(input)
+ result_eval1 = result.eval()
+
+ set_rng(RandomState(seed))
+ result = GaussianNoiseLayer(input_layer).get_output_for(input)
+ result_eval2 = result.eval()
+
+ set_rng(rng) # reset to original RNG for other tests
+ assert numpy.allclose(result_eval1, result_eval2)
diff --git a/lasagne/tests/layers/test_normalization.py b/lasagne/tests/layers/test_normalization.py
new file mode 100644
index 0000000..5f6bc4f
--- /dev/null
+++ b/lasagne/tests/layers/test_normalization.py
@@ -0,0 +1,327 @@
+# -*- coding: utf-8 -*-
+
+"""
+
+The :func:`ground_truth_normalizer()`, :func:`ground_truth_normalize_row` and
+:class:`TestLocalResponseNormalization2DLayer` implementations contain code
+from `pylearn2 <http://github.com/lisa-lab/pylearn2>`_, which is covered
+by the following license:
+
+
+Copyright (c) 2011--2014, Université de Montréal
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+ list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+
+from mock import Mock
+import numpy as np
+import pytest
+import theano
+
+
+def ground_truth_normalizer(c01b, k, n, alpha, beta):
+ out = np.zeros(c01b.shape)
+
+ for r in range(out.shape[1]):
+ for c in range(out.shape[2]):
+ for x in range(out.shape[3]):
+ out[:, r, c, x] = ground_truth_normalize_row(
+ row=c01b[:, r, c, x],
+ k=k, n=n, alpha=alpha, beta=beta)
+ return out
+
+
+def ground_truth_normalize_row(row, k, n, alpha, beta):
+ assert row.ndim == 1
+ out = np.zeros(row.shape)
+ for i in range(row.shape[0]):
+ s = k
+ tot = 0
+ for j in range(max(0, i-n//2), min(row.shape[0], i+n//2+1)):
+ tot += 1
+ sq = row[j] ** 2.
+ assert sq > 0.
+ assert s >= k
+ assert alpha > 0.
+ s += alpha * sq
+ assert s >= k
+ assert tot <= n
+ assert s >= k
+ s = s ** beta
+ out[i] = row[i] / s
+ return out
+
+
+class TestLocalResponseNormalization2DLayer:
+
+ @pytest.fixture
+ def rng(self):
+ return np.random.RandomState([2013, 2])
+
+ @pytest.fixture
+ def input_data(self, rng):
+ channels = 15
+ rows = 3
+ cols = 4
+ batch_size = 2
+ shape = (batch_size, channels, rows, cols)
+ return rng.randn(*shape).astype(theano.config.floatX)
+
+ @pytest.fixture
+ def input_layer(self, input_data):
+ from lasagne.layers.input import InputLayer
+ shape = list(input_data.shape)
+ shape[0] = None
+ return InputLayer(shape)
+
+ @pytest.fixture
+ def layer(self, input_layer):
+
+ from lasagne.layers.normalization import\
+ LocalResponseNormalization2DLayer
+
+ layer = LocalResponseNormalization2DLayer(input_layer,
+ alpha=1.5,
+ k=2,
+ beta=0.75,
+ n=5)
+ return layer
+
+ def test_get_params(self, layer):
+ assert layer.get_params() == []
+
+ def test_get_output_shape_for(self, layer):
+ assert layer.get_output_shape_for((1, 2, 3, 4)) == (1, 2, 3, 4)
+
+ def test_even_n_fails(self, input_layer):
+ from lasagne.layers.normalization import\
+ LocalResponseNormalization2DLayer
+
+ with pytest.raises(NotImplementedError):
+ LocalResponseNormalization2DLayer(input_layer, n=4)
+
+ def test_normalization(self, input_data, input_layer, layer):
+ from lasagne.layers import get_output
+ X = input_layer.input_var
+ lrn = theano.function([X], get_output(layer, X))
+ out = lrn(input_data)
+
+ # ground_truth_normalizer assumes c01b
+ input_data_c01b = input_data.transpose([1, 2, 3, 0])
+ ground_out = ground_truth_normalizer(input_data_c01b,
+ n=layer.n, k=layer.k,
+ alpha=layer.alpha,
+ beta=layer.beta)
+ ground_out = np.transpose(ground_out, [3, 0, 1, 2])
+
+ assert out.shape == ground_out.shape
+
+ assert np.allclose(out, ground_out)
+
+
+class TestBatchNormLayer:
+ @pytest.fixture
+ def BatchNormLayer(self):
+ from lasagne.layers.normalization import BatchNormLayer
+ return BatchNormLayer
+
+ @pytest.fixture
+ def init_unique(self):
+ # initializer for a tensor of unique values
+ return lambda shape: np.arange(np.prod(shape)).reshape(shape)
+
+ def test_init(self, BatchNormLayer, init_unique):
+ input_shape = (2, 3, 4)
+ # default: normalize over all but second axis
+ beta = BatchNormLayer(input_shape, beta=init_unique).beta
+ assert np.allclose(beta.get_value(), init_unique((3,)))
+ # normalize over first axis only
+ beta = BatchNormLayer(input_shape, beta=init_unique, axes=0).beta
+ assert np.allclose(beta.get_value(), init_unique((3, 4)))
+ # normalize over second and third axis
+ beta = BatchNormLayer(input_shape, beta=init_unique, axes=(1, 2)).beta
+ assert np.allclose(beta.get_value(), init_unique((2,)))
+
+ @pytest.mark.parametrize('update_averages', [None, True, False])
+ @pytest.mark.parametrize('use_averages', [None, True, False])
+ @pytest.mark.parametrize('deterministic', [True, False])
+ def test_get_output_for(self, BatchNormLayer, deterministic, use_averages,
+ update_averages):
+ input_shape = (20, 30, 40)
+
+ # random input tensor, beta, gamma, mean, inv_std and alpha
+ input = (np.random.randn(*input_shape).astype(theano.config.floatX) +
+ np.random.randn(1, 30, 1).astype(theano.config.floatX))
+ beta = np.random.randn(30).astype(theano.config.floatX)
+ gamma = np.random.randn(30).astype(theano.config.floatX)
+ mean = np.random.randn(30).astype(theano.config.floatX)
+ inv_std = np.random.rand(30).astype(theano.config.floatX)
+ alpha = np.random.rand()
+
+ # create layer (with default axes: normalize over all but second axis)
+ layer = BatchNormLayer(input_shape, beta=beta, gamma=gamma, mean=mean,
+ inv_std=inv_std, alpha=alpha)
+
+ # call get_output_for()
+ kwargs = {'deterministic': deterministic}
+ if use_averages is not None:
+ kwargs['batch_norm_use_averages'] = use_averages
+ else:
+ use_averages = deterministic
+ if update_averages is not None:
+ kwargs['batch_norm_update_averages'] = update_averages
+ else:
+ update_averages = not deterministic
+ result = layer.get_output_for(theano.tensor.constant(input),
+ **kwargs).eval()
+
+ # compute expected results and expected updated parameters
+ input_mean = input.mean(axis=(0, 2))
+ input_inv_std = 1 / np.sqrt(input.var(axis=(0, 2)) + layer.epsilon)
+ if use_averages:
+ use_mean, use_inv_std = mean, inv_std
+ else:
+ use_mean, use_inv_std = input_mean, input_inv_std
+ bcast = (np.newaxis, slice(None), np.newaxis)
+ exp_result = (input - use_mean[bcast]) * use_inv_std[bcast]
+ exp_result = exp_result * gamma[bcast] + beta[bcast]
+ if update_averages:
+ new_mean = (1 - alpha) * mean + alpha * input_mean
+ new_inv_std = (1 - alpha) * inv_std + alpha * input_inv_std
+ else:
+ new_mean, new_inv_std = mean, inv_std
+
+ # compare expected results to actual results
+ tol = {'atol': 1e-5, 'rtol': 1e-6}
+ assert np.allclose(layer.mean.get_value(), new_mean, **tol)
+ assert np.allclose(layer.inv_std.get_value(), new_inv_std, **tol)
+ assert np.allclose(result, exp_result, **tol)
+
+ def test_undefined_shape(self, BatchNormLayer):
+ # should work:
+ BatchNormLayer((64, None, 3), axes=(1, 2))
+ # should not work:
+ with pytest.raises(ValueError) as exc:
+ BatchNormLayer((64, None, 3), axes=(0, 2))
+ assert 'needs specified input sizes' in exc.value.args[0]
+
+ def test_skip_linear_transform(self, BatchNormLayer):
+ input_shape = (20, 30, 40)
+
+ # random input tensor, beta, gamma
+ input = (np.random.randn(*input_shape).astype(theano.config.floatX) +
+ np.random.randn(1, 30, 1).astype(theano.config.floatX))
+ beta = np.random.randn(30).astype(theano.config.floatX)
+ gamma = np.random.randn(30).astype(theano.config.floatX)
+
+ # create layers without beta or gamma
+ layer1 = BatchNormLayer(input_shape, beta=None, gamma=gamma)
+ layer2 = BatchNormLayer(input_shape, beta=beta, gamma=None)
+
+ # check that one parameter is missing
+ assert len(layer1.get_params()) == 3
+ assert len(layer2.get_params()) == 3
+
+ # call get_output_for()
+ result1 = layer1.get_output_for(theano.tensor.constant(input),
+ deterministic=False).eval()
+ result2 = layer2.get_output_for(theano.tensor.constant(input),
+ deterministic=False).eval()
+
+ # compute expected results and expected updated parameters
+ mean = input.mean(axis=(0, 2))
+ std = np.sqrt(input.var(axis=(0, 2)) + layer1.epsilon)
+ exp_result = (input - mean[None, :, None]) / std[None, :, None]
+ exp_result1 = exp_result * gamma[None, :, None] # no beta
+ exp_result2 = exp_result + beta[None, :, None] # no gamma
+
+ # compare expected results to actual results
+ tol = {'atol': 1e-5, 'rtol': 1e-6}
+ assert np.allclose(result1, exp_result1, **tol)
+ assert np.allclose(result2, exp_result2, **tol)
+
+
+def test_batch_norm_macro():
+ from lasagne.layers import (Layer, BatchNormLayer, batch_norm,
+ NonlinearityLayer)
+ from lasagne.nonlinearities import identity
+ input_shape = (2, 3)
+ obj = object()
+
+ # check if it steals the nonlinearity
+ layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj)
+ bnstack = batch_norm(layer)
+ assert isinstance(bnstack, NonlinearityLayer)
+ assert isinstance(bnstack.input_layer, BatchNormLayer)
+ assert layer.nonlinearity is identity
+ assert bnstack.nonlinearity is obj
+
+ # check if it removes the bias
+ layer = Mock(Layer, output_shape=input_shape, b=obj, params={obj: set()})
+ bnstack = batch_norm(layer)
+ assert isinstance(bnstack, BatchNormLayer)
+ assert layer.b is None
+ assert obj not in layer.params
+
+ # check if it can handle an unset bias
+ layer = Mock(Layer, output_shape=input_shape, b=None, params={obj: set()})
+ bnstack = batch_norm(layer)
+ assert isinstance(bnstack, BatchNormLayer)
+ assert layer.b is None
+
+ # check if it passes on kwargs
+ layer = Mock(Layer, output_shape=input_shape)
+ bnstack = batch_norm(layer, name='foo')
+ assert isinstance(bnstack, BatchNormLayer)
+ assert bnstack.name == 'foo'
+
+ # check if created layers are named with kwargs name
+ layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj)
+ layer.name = 'foo'
+ bnstack = batch_norm(layer, name='foo_bnorm')
+ assert isinstance(bnstack, NonlinearityLayer)
+ assert isinstance(bnstack.input_layer, BatchNormLayer)
+ assert bnstack.name == 'foo_bnorm_nonlin'
+ assert bnstack.input_layer.name == 'foo_bnorm'
+
+ # check if created layers are named with wrapped layer name
+ layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj)
+ layer.name = 'foo'
+ bnstack = batch_norm(layer)
+ assert isinstance(bnstack, NonlinearityLayer)
+ assert isinstance(bnstack.input_layer, BatchNormLayer)
+ assert bnstack.name == 'foo_bn_nonlin'
+ assert bnstack.input_layer.name == 'foo_bn'
+
+ # check if created layers remain unnamed if no names are given
+ layer = Mock(Layer, output_shape=input_shape, nonlinearity=obj)
+ bnstack = batch_norm(layer)
+ assert isinstance(bnstack, NonlinearityLayer)
+ assert isinstance(bnstack.input_layer, BatchNormLayer)
+ assert bnstack.name is None
+ assert bnstack.input_layer.name is None
diff --git a/lasagne/tests/layers/test_pool.py b/lasagne/tests/layers/test_pool.py
new file mode 100644
index 0000000..bce9a4e
--- /dev/null
+++ b/lasagne/tests/layers/test_pool.py
@@ -0,0 +1,905 @@
+from mock import Mock
+import numpy as np
+import pytest
+import theano
+
+from lasagne.utils import floatX
+
+
+def max_pool_1d(data, pool_size, stride=None):
+ stride = pool_size if stride is None else stride
+
+ idx = range(data.shape[-1])
+ used_idx = set([])
+ idx_sets = []
+
+ i = 0
+ while i < data.shape[-1]:
+ idx_set = set(range(i, i + pool_size))
+ idx_set = idx_set.intersection(idx)
+ if not idx_set.issubset(used_idx):
+ idx_sets.append(list(idx_set))
+ used_idx = used_idx.union(idx_set)
+ i += stride
+
+ data_pooled = np.array(
+ [data[..., idx_set].max(axis=-1) for idx_set in idx_sets])
+ data_pooled = np.rollaxis(data_pooled, 0, len(data_pooled.shape))
+
+ return data_pooled
+
+
+def max_pool_1d_ignoreborder(data, pool_size, stride=None, pad=0):
+ stride = pool_size if stride is None else stride
+
+ pads = [(0, 0), ] * len(data.shape)
+ pads[-1] = (pad, pad)
+ data = np.pad(data, pads, mode='constant', constant_values=(-np.inf,))
+
+ data_shifted = np.zeros((pool_size,) + data.shape)
+ data_shifted = data_shifted[..., :data.shape[-1] - pool_size + 1]
+ for i in range(pool_size):
+ data_shifted[i] = data[..., i:i + data.shape[-1] - pool_size + 1]
+ data_pooled = data_shifted.max(axis=0)
+
+ if stride:
+ data_pooled = data_pooled[..., ::stride]
+
+ return data_pooled
+
+
+def upscale_1d_shape(shape, scale_factor):
+ return (shape[0], shape[1],
+ shape[2] * scale_factor[0])
+
+
+def upscale_1d(data, scale_factor):
+ upscaled = np.zeros(upscale_1d_shape(data.shape, scale_factor))
+ for i in range(scale_factor[0]):
+ upscaled[:, :, i::scale_factor[0]] = data
+ return upscaled
+
+
+def max_pool_2d(data, pool_size, stride):
+ data_pooled = max_pool_1d(data, pool_size[1], stride[1])
+
+ data_pooled = np.swapaxes(data_pooled, -1, -2)
+ data_pooled = max_pool_1d(data_pooled, pool_size[0], stride[0])
+ data_pooled = np.swapaxes(data_pooled, -1, -2)
+
+ return data_pooled
+
+
+def max_pool_2d_ignoreborder(data, pool_size, stride, pad):
+ data_pooled = max_pool_1d_ignoreborder(
+ data, pool_size[1], stride[1], pad[1])
+
+ data_pooled = np.swapaxes(data_pooled, -1, -2)
+ data_pooled = max_pool_1d_ignoreborder(
+ data_pooled, pool_size[0], stride[0], pad[0])
+ data_pooled = np.swapaxes(data_pooled, -1, -2)
+
+ return data_pooled
+
+
+def max_pool_3d_ignoreborder(data, pool_size, stride, pad):
+ # Pool last dim
+ data_pooled = max_pool_1d_ignoreborder(
+ data, pool_size[2], stride[2], pad[2])
+ # Swap second to last to back and pool it
+ data_pooled = np.swapaxes(data_pooled, -1, -2)
+ data_pooled = max_pool_1d_ignoreborder(
+ data_pooled, pool_size[1], stride[1], pad[1])
+
+ # Swap third to last and pool
+ data_pooled = np.swapaxes(data_pooled, -1, -3)
+ data_pooled = max_pool_1d_ignoreborder(
+ data_pooled, pool_size[0], stride[0], pad[0])
+
+ # Bring back in order
+ data_pooled = np.swapaxes(data_pooled, -1, -2)
+ data_pooled = np.swapaxes(data_pooled, -2, -3)
+
+ return data_pooled
+
+
+def upscale_2d_shape(shape, scale_factor):
+ return (shape[0], shape[1],
+ shape[2] * scale_factor[0], shape[3] * scale_factor[1])
+
+
+def upscale_2d(data, scale_factor):
+ upscaled = np.zeros(upscale_2d_shape(data.shape, scale_factor))
+ for j in range(scale_factor[0]):
+ for i in range(scale_factor[1]):
+ upscaled[:, :, j::scale_factor[0], i::scale_factor[1]] = data
+ return upscaled
+
+
+def spatial_pool(data, pool_dims):
+
+ def ceildiv(a, b):
+ return (a + b - 1) // b
+
+ def floordiv(a, b):
+ return a // b
+
+ input_size = data.shape[2:]
+ pooled_data_list = []
+ for pool_dim in pool_dims:
+ pool_size = tuple(ceildiv(i, pool_dim) for i in input_size)
+ stride_size = tuple(floordiv(i, pool_dim) for i in input_size)
+
+ pooled_part = max_pool_2d_ignoreborder(
+ data, pool_size, stride_size, (0, 0))
+ pooled_part = pooled_part.reshape(
+ data.shape[0], data.shape[1], pool_dim ** 2)
+ pooled_data_list.append(pooled_part)
+
+ return np.concatenate(pooled_data_list, axis=2)
+
+
+class TestFeaturePoolLayer:
+ def pool_test_sets():
+ for pool_size in [2, 3]:
+ for axis in [1, 2]:
+ yield (pool_size, axis)
+
+ def input_layer(self, output_shape):
+ from lasagne.layers.input import InputLayer
+ return InputLayer(output_shape)
+
+ def layer(self, input_layer, pool_size, axis):
+ from lasagne.layers.pool import FeaturePoolLayer
+ return FeaturePoolLayer(
+ input_layer,
+ pool_size=pool_size,
+ axis=axis,
+ )
+
+ def test_init_raises(self):
+ input_layer = self.input_layer((2, 3, 4))
+
+ with pytest.raises(ValueError):
+ self.layer(input_layer, pool_size=2, axis=1)
+
+ @pytest.mark.parametrize(
+ "pool_size, axis", list(pool_test_sets()))
+ def test_layer(self, pool_size, axis):
+ input = floatX(np.random.randn(3, 6, 12, 23))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+
+ layer = self.layer(input_layer, pool_size, axis)
+ layer_result = layer.get_output_for(input_theano).eval()
+
+ numpy_result = np.swapaxes(input, axis, -1)
+ numpy_result = max_pool_1d(numpy_result, pool_size)
+ numpy_result = np.swapaxes(numpy_result, -1, axis)
+
+ assert np.all(numpy_result.shape == layer.output_shape)
+ assert np.all(numpy_result.shape == layer_result.shape)
+ assert np.allclose(numpy_result, layer_result)
+
+
+class TestMaxPool1DLayer:
+ def pool_test_sets():
+ for pool_size in [2, 3]:
+ for stride in [1, 2, 3, 4]:
+ yield (pool_size, stride)
+
+ def pool_test_sets_ignoreborder():
+ for pool_size in [2, 3]:
+ for stride in [1, 2, 3, 4]:
+ for pad in range(pool_size):
+ yield (pool_size, stride, pad)
+
+ def input_layer(self, output_shape):
+ return Mock(output_shape=output_shape)
+
+ def layer(self, input_layer, pool_size, stride=None, pad=0):
+ from lasagne.layers.pool import MaxPool1DLayer
+ return MaxPool1DLayer(
+ input_layer,
+ pool_size=pool_size,
+ stride=stride,
+ ignore_border=False,
+ )
+
+ def layer_ignoreborder(self, input_layer, pool_size, stride=None, pad=0):
+ from lasagne.layers.pool import MaxPool1DLayer
+ return MaxPool1DLayer(
+ input_layer,
+ pool_size=pool_size,
+ stride=stride,
+ pad=pad,
+ ignore_border=True,
+ )
+
+ @pytest.mark.parametrize(
+ "pool_size, stride", list(pool_test_sets()))
+ def test_get_output_and_shape_for(self, pool_size, stride):
+ input = floatX(np.random.randn(8, 16, 23))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+
+ layer = self.layer(input_layer, pool_size, stride)
+ layer_output_shape = layer.get_output_shape_for(input.shape)
+ layer_output = layer.get_output_for(input_theano)
+ layer_result = layer_output.eval()
+
+ numpy_result = max_pool_1d(input, pool_size, stride)
+
+ assert numpy_result.shape == layer_output_shape
+ assert np.allclose(numpy_result, layer_result)
+
+ @pytest.mark.parametrize(
+ "pool_size, stride, pad", list(pool_test_sets_ignoreborder()))
+ def test_get_output_for_ignoreborder(self, pool_size, stride, pad):
+ input = floatX(np.random.randn(8, 16, 23))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+ layer_output = self.layer_ignoreborder(
+ input_layer, pool_size, stride, pad).get_output_for(input_theano)
+
+ layer_result = layer_output.eval()
+ numpy_result = max_pool_1d_ignoreborder(input, pool_size, stride, pad)
+
+ assert np.all(numpy_result.shape == layer_result.shape)
+ assert np.allclose(numpy_result, layer_result)
+
+ @pytest.mark.parametrize(
+ "input_shape", [(32, 64, 128), (None, 64, 128), (32, None, 128),
+ (32, 64, None)])
+ def test_get_output_shape_for(self, input_shape):
+ input_layer = self.input_layer(input_shape)
+ layer = self.layer_ignoreborder(input_layer, pool_size=2)
+ assert layer.get_output_shape_for((None, 64, 128)) == (None, 64, 64)
+ assert layer.get_output_shape_for((32, 64, None)) == (32, 64, None)
+ assert layer.get_output_shape_for((32, 64, 128)) == (32, 64, 64)
+
+ def test_fail_on_mismatching_dimensionality(self):
+ from lasagne.layers.pool import MaxPool1DLayer
+ with pytest.raises(ValueError) as exc:
+ MaxPool1DLayer((10, 20), 3, 2)
+ assert "Expected 3 input dimensions" in exc.value.args[0]
+ with pytest.raises(ValueError) as exc:
+ MaxPool1DLayer((10, 20, 30, 40), 3, 2)
+ assert "Expected 3 input dimensions" in exc.value.args[0]
+
+
+class TestMaxPool2DLayer:
+ def pool_test_sets():
+ for pool_size in [2, 3]:
+ for stride in [1, 2, 3, 4]:
+ yield (pool_size, stride)
+
+ def pool_test_sets_ignoreborder():
+ for pool_size in [2, 3]:
+ for stride in [1, 2, 3, 4]:
+ for pad in range(pool_size):
+ yield (pool_size, stride, pad)
+
+ def input_layer(self, output_shape):
+ return Mock(output_shape=output_shape)
+
+ def layer(self, input_layer, pool_size, stride=None,
+ pad=(0, 0), ignore_border=False):
+ from lasagne.layers.pool import MaxPool2DLayer
+ return MaxPool2DLayer(
+ input_layer,
+ pool_size=pool_size,
+ stride=stride,
+ pad=pad,
+ ignore_border=ignore_border,
+ )
+
+ @pytest.mark.parametrize(
+ "pool_size, stride", list(pool_test_sets()))
+ def test_get_output_for(self, pool_size, stride):
+ try:
+ input = floatX(np.random.randn(8, 16, 17, 13))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+ result = self.layer(
+ input_layer,
+ (pool_size, pool_size),
+ (stride, stride),
+ ignore_border=False,
+ ).get_output_for(input_theano)
+
+ result_eval = result.eval()
+ numpy_result = max_pool_2d(
+ input, (pool_size, pool_size), (stride, stride))
+
+ assert np.all(numpy_result.shape == result_eval.shape)
+ assert np.allclose(result_eval, numpy_result)
+ except NotImplementedError:
+ pytest.skip()
+
+ @pytest.mark.parametrize(
+ "pool_size, stride, pad", list(pool_test_sets_ignoreborder()))
+ def test_get_output_for_ignoreborder(self, pool_size,
+ stride, pad):
+ try:
+ input = floatX(np.random.randn(8, 16, 17, 13))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+
+ result = self.layer(
+ input_layer,
+ pool_size,
+ stride,
+ pad,
+ ignore_border=True,
+ ).get_output_for(input_theano)
+
+ result_eval = result.eval()
+ numpy_result = max_pool_2d_ignoreborder(
+ input, (pool_size, pool_size), (stride, stride), (pad, pad))
+
+ assert np.all(numpy_result.shape == result_eval.shape)
+ assert np.allclose(result_eval, numpy_result)
+ except NotImplementedError:
+ pytest.skip()
+
+ @pytest.mark.parametrize(
+ "input_shape,output_shape",
+ [((32, 64, 24, 24), (32, 64, 12, 12)),
+ ((None, 64, 24, 24), (None, 64, 12, 12)),
+ ((32, None, 24, 24), (32, None, 12, 12)),
+ ((32, 64, None, 24), (32, 64, None, 12)),
+ ((32, 64, 24, None), (32, 64, 12, None)),
+ ((32, 64, None, None), (32, 64, None, None))],
+ )
+ def test_get_output_shape_for(self, input_shape, output_shape):
+ try:
+ input_layer = self.input_layer(input_shape)
+ layer = self.layer(input_layer,
+ pool_size=(2, 2), stride=None)
+ assert layer.get_output_shape_for(
+ input_shape) == output_shape
+ except NotImplementedError:
+ pytest.skip()
+
+ def test_fail_on_mismatching_dimensionality(self):
+ from lasagne.layers.pool import MaxPool2DLayer
+ with pytest.raises(ValueError) as exc:
+ MaxPool2DLayer((10, 20, 30), 3, 2)
+ assert "Expected 4 input dimensions" in exc.value.args[0]
+ with pytest.raises(ValueError) as exc:
+ MaxPool2DLayer((10, 20, 30, 40, 50), 3, 2)
+ assert "Expected 4 input dimensions" in exc.value.args[0]
+
+
+class TestMaxPool2DCCLayer:
+ def pool_test_sets():
+ for pool_size in [2, 3]:
+ for stride in range(1, pool_size+1):
+ yield (pool_size, stride)
+
+ def input_layer(self, output_shape):
+ return Mock(output_shape=output_shape)
+
+ def layer(self, input_layer, pool_size, stride):
+ try:
+ from lasagne.layers.cuda_convnet import MaxPool2DCCLayer
+ except ImportError:
+ pytest.skip("cuda_convnet not available")
+ return MaxPool2DCCLayer(
+ input_layer,
+ pool_size=pool_size,
+ stride=stride,
+ )
+
+ @pytest.mark.parametrize(
+ "pool_size, stride", list(pool_test_sets()))
+ def test_get_output_for(self, pool_size, stride):
+ try:
+ input = floatX(np.random.randn(8, 16, 16, 16))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+ result = self.layer(
+ input_layer,
+ (pool_size, pool_size),
+ (stride, stride),
+ ).get_output_for(input_theano)
+
+ result_eval = result.eval()
+ numpy_result = max_pool_2d(
+ input, (pool_size, pool_size), (stride, stride))
+
+ assert np.all(numpy_result.shape == result_eval.shape)
+ assert np.allclose(result_eval, numpy_result)
+ except NotImplementedError:
+ pytest.skip()
+
+ @pytest.mark.parametrize(
+ "input_shape,output_shape",
+ [((32, 64, 24, 24), (32, 64, 12, 12)),
+ ((None, 64, 24, 24), (None, 64, 12, 12)),
+ ((32, None, 24, 24), (32, None, 12, 12)),
+ ((32, 64, None, 24), (32, 64, None, 12)),
+ ((32, 64, 24, None), (32, 64, 12, None)),
+ ((32, 64, None, None), (32, 64, None, None))],
+ )
+ def test_get_output_shape_for(self, input_shape, output_shape):
+ try:
+ input_layer = self.input_layer(input_shape)
+ layer = self.layer(input_layer,
+ pool_size=(2, 2), stride=None)
+ assert layer.get_output_shape_for(
+ input_shape) == output_shape
+ except NotImplementedError:
+ pytest.skip()
+
+ def test_not_implemented(self):
+ try:
+ from lasagne.layers.cuda_convnet import MaxPool2DCCLayer
+ except ImportError:
+ pytest.skip("cuda_convnet not available")
+
+ input_layer = self.input_layer((128, 4, 12, 12))
+
+ with pytest.raises(NotImplementedError) as exc:
+ layer = MaxPool2DCCLayer(input_layer, pool_size=2, pad=2)
+ assert "MaxPool2DCCLayer does not support padding" in exc.value.args[0]
+
+ with pytest.raises(NotImplementedError) as exc:
+ layer = MaxPool2DCCLayer(input_layer, pool_size=(2, 3))
+ assert ("MaxPool2DCCLayer only supports square pooling regions" in
+ exc.value.args[0])
+
+ with pytest.raises(NotImplementedError) as exc:
+ layer = MaxPool2DCCLayer(input_layer, pool_size=2, stride=(1, 2))
+ assert (("MaxPool2DCCLayer only supports using the same stride in "
+ "both directions") in exc.value.args[0])
+
+ with pytest.raises(NotImplementedError) as exc:
+ layer = MaxPool2DCCLayer(input_layer, pool_size=2, stride=3)
+ assert ("MaxPool2DCCLayer only supports stride <= pool_size" in
+ exc.value.args[0])
+
+ with pytest.raises(NotImplementedError) as exc:
+ layer = MaxPool2DCCLayer(input_layer, pool_size=2,
+ ignore_border=True)
+ assert ("MaxPool2DCCLayer does not support ignore_border=True" in
+ exc.value.args[0])
+
+ def test_dimshuffle_false(self):
+ try:
+ from lasagne.layers.cuda_convnet import MaxPool2DCCLayer
+ except ImportError:
+ pytest.skip("cuda_convnet not available")
+ from lasagne.layers.input import InputLayer
+
+ input_layer = InputLayer((4, 12, 12, 16)) # c01b order
+ layer = MaxPool2DCCLayer(input_layer, pool_size=2, dimshuffle=False)
+ assert layer.output_shape == (4, 6, 6, 16)
+
+ input = floatX(np.random.randn(4, 12, 12, 16))
+ output = max_pool_2d(input.transpose(3, 0, 1, 2), (2, 2), (2, 2))
+ output = output.transpose(1, 2, 3, 0)
+ actual = layer.get_output_for(input).eval()
+ assert np.allclose(output, actual)
+
+
+class TestMaxPool2DNNLayer:
+ def pool_test_sets_ignoreborder():
+ for pool_size in [2, 3]:
+ for stride in [1, 2, 3, 4]:
+ for pad in range(pool_size):
+ yield (pool_size, stride, pad)
+
+ def input_layer(self, output_shape):
+ return Mock(output_shape=output_shape)
+
+ def layer(self, input_layer, pool_size, stride, pad):
+ try:
+ from lasagne.layers.dnn import MaxPool2DDNNLayer
+ except ImportError:
+ pytest.skip("cuDNN not available")
+
+ return MaxPool2DDNNLayer(
+ input_layer,
+ pool_size=pool_size,
+ stride=stride,
+ pad=pad,
+ )
+
+ @pytest.mark.parametrize(
+ "pool_size, stride, pad", list(pool_test_sets_ignoreborder()))
+ def test_get_output_for_ignoreborder(self, pool_size,
+ stride, pad):
+ try:
+ input = floatX(np.random.randn(8, 16, 17, 13))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+
+ result = self.layer(
+ input_layer,
+ pool_size,
+ stride,
+ pad,
+ ).get_output_for(input_theano)
+
+ result_eval = result.eval()
+ numpy_result = max_pool_2d_ignoreborder(
+ input, (pool_size, pool_size), (stride, stride), (pad, pad))
+
+ assert np.all(numpy_result.shape == result_eval.shape)
+ assert np.allclose(result_eval, numpy_result)
+ except NotImplementedError:
+ pytest.skip()
+
+ @pytest.mark.parametrize(
+ "input_shape,output_shape",
+ [((32, 64, 24, 24), (32, 64, 12, 12)),
+ ((None, 64, 24, 24), (None, 64, 12, 12)),
+ ((32, None, 24, 24), (32, None, 12, 12)),
+ ((32, 64, None, 24), (32, 64, None, 12)),
+ ((32, 64, 24, None), (32, 64, 12, None)),
+ ((32, 64, None, None), (32, 64, None, None))],
+ )
+ def test_get_output_shape_for(self, input_shape, output_shape):
+ try:
+ input_layer = self.input_layer(input_shape)
+ layer = self.layer(input_layer,
+ pool_size=(2, 2), stride=None, pad=(0, 0))
+ assert layer.get_output_shape_for(
+ input_shape) == output_shape
+ except NotImplementedError:
+ raise
+ # pytest.skip()
+
+ def test_not_implemented(self):
+ try:
+ from lasagne.layers.dnn import MaxPool2DDNNLayer
+ except ImportError:
+ pytest.skip("cuDNN not available")
+ with pytest.raises(NotImplementedError) as exc:
+ layer = MaxPool2DDNNLayer((1, 2, 3, 4), pool_size=2,
+ ignore_border=False)
+ assert ("Pool2DDNNLayer does not support ignore_border=False" in
+ exc.value.args[0])
+
+ def test_fail_on_mismatching_dimensionality(self):
+ try:
+ from lasagne.layers.dnn import MaxPool2DDNNLayer
+ except ImportError:
+ pytest.skip("cuDNN not available")
+ with pytest.raises(ValueError) as exc:
+ MaxPool2DDNNLayer((10, 20, 30), 3, 2)
+ assert "Expected 4 input dimensions" in exc.value.args[0]
+ with pytest.raises(ValueError) as exc:
+ MaxPool2DDNNLayer((10, 20, 30, 40, 50), 3, 2)
+ assert "Expected 4 input dimensions" in exc.value.args[0]
+
+
+class TestMaxPool3DNNLayer:
+ def pool_test_sets_ignoreborder():
+ for pool_size in [2, 3]:
+ for stride in [1, 2, 3, 4]:
+ for pad in range(pool_size):
+ yield (pool_size, stride, pad)
+
+ def input_layer(self, output_shape):
+ return Mock(output_shape=output_shape)
+
+ def layer(self, input_layer, pool_size, stride, pad):
+ try:
+ from lasagne.layers.dnn import MaxPool3DDNNLayer
+ except ImportError:
+ pytest.skip("cuDNN not available")
+
+ return MaxPool3DDNNLayer(
+ input_layer,
+ pool_size=pool_size,
+ stride=stride,
+ pad=pad,
+ )
+
+ @pytest.mark.parametrize(
+ "pool_size, stride, pad", list(pool_test_sets_ignoreborder()))
+ def test_get_output_for_ignoreborder(self, pool_size,
+ stride, pad):
+ try:
+ input = floatX(np.random.randn(5, 8, 16, 17, 13))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+
+ result = self.layer(
+ input_layer,
+ pool_size,
+ stride,
+ pad,
+ ).get_output_for(input_theano)
+
+ result_eval = result.eval()
+ numpy_result = max_pool_3d_ignoreborder(
+ input, [pool_size]*3, [stride]*3, [pad]*3)
+
+ assert np.all(numpy_result.shape == result_eval.shape)
+ assert np.allclose(result_eval, numpy_result)
+ except NotImplementedError:
+ pytest.skip()
+
+ @pytest.mark.parametrize(
+ "input_shape,output_shape",
+ [((32, 32, 64, 24, 24), (32, 32, 32, 12, 12)),
+ ((None, 32, 48, 24, 24), (None, 32, 24, 12, 12)),
+ ((32, None, 32, 24, 24), (32, None, 16, 12, 12)),
+ ((32, 64, None, 24, 24), (32, 64, None, 12, 12)),
+ ((32, 64, 32, None, 24), (32, 64, 16, None, 12)),
+ ((32, 64, 32, 24, None), (32, 64, 16, 12, None)),
+ ((32, 64, 12, None, None), (32, 64, 6, None, None)),
+ ((32, 64, None, None, None), (32, 64, None, None, None))],
+ )
+ def test_get_output_shape_for(self, input_shape, output_shape):
+ try:
+ input_layer = self.input_layer(input_shape)
+ layer = self.layer(input_layer,
+ pool_size=(2, 2, 2), stride=None, pad=(0, 0, 0))
+ assert layer.get_output_shape_for(
+ input_shape) == output_shape
+ except NotImplementedError:
+ raise
+ # pytest.skip()
+
+ def test_not_implemented(self):
+ try:
+ from lasagne.layers.dnn import MaxPool3DDNNLayer
+ except ImportError:
+ pytest.skip("cuDNN not available")
+ with pytest.raises(NotImplementedError) as exc:
+ layer = MaxPool3DDNNLayer((1, 2, 3, 4, 5), pool_size=2,
+ ignore_border=False)
+ assert ("Pool3DDNNLayer does not support ignore_border=False" in
+ exc.value.args[0])
+
+ def test_fail_on_mismatching_dimensionality(self):
+ try:
+ from lasagne.layers.dnn import MaxPool3DDNNLayer
+ except ImportError:
+ pytest.skip("cuDNN not available")
+ with pytest.raises(ValueError) as exc:
+ MaxPool3DDNNLayer((10, 20, 30, 40), 3, 2)
+ assert "Expected 5 input dimensions" in exc.value.args[0]
+ with pytest.raises(ValueError) as exc:
+ MaxPool3DDNNLayer((10, 20, 30, 40, 50, 60), 3, 2)
+ assert "Expected 5 input dimensions" in exc.value.args[0]
+
+
+class TestUpscale1DLayer:
+ def scale_factor_test_sets():
+ for scale_factor in [2, 3]:
+ yield scale_factor
+
+ def input_layer(self, output_shape):
+ return Mock(output_shape=output_shape)
+
+ def layer(self, input_layer, scale_factor):
+ from lasagne.layers.pool import Upscale1DLayer
+ return Upscale1DLayer(
+ input_layer,
+ scale_factor=scale_factor,
+ )
+
+ def test_invalid_scale_factor(self):
+ from lasagne.layers.pool import Upscale1DLayer
+ inlayer = self.input_layer((128, 3, 32))
+ with pytest.raises(ValueError):
+ Upscale1DLayer(inlayer, scale_factor=0)
+ with pytest.raises(ValueError):
+ Upscale1DLayer(inlayer, scale_factor=-1)
+ with pytest.raises(ValueError):
+ Upscale1DLayer(inlayer, scale_factor=(0))
+
+ @pytest.mark.parametrize(
+ "scale_factor", list(scale_factor_test_sets()))
+ def test_get_output_for(self, scale_factor):
+ input = floatX(np.random.randn(8, 16, 17))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+ result = self.layer(
+ input_layer,
+ (scale_factor),
+ ).get_output_for(input_theano)
+
+ result_eval = result.eval()
+ numpy_result = upscale_1d(input, (scale_factor, scale_factor))
+
+ assert np.all(numpy_result.shape == result_eval.shape)
+ assert np.allclose(result_eval, numpy_result)
+
+ @pytest.mark.parametrize(
+ "input_shape,output_shape",
+ [((32, 64, 24), (32, 64, 48)),
+ ((None, 64, 24), (None, 64, 48)),
+ ((32, None, 24), (32, None, 48)),
+ ((32, 64, None), (32, 64, None))],
+ )
+ def test_get_output_shape_for(self, input_shape, output_shape):
+ input_layer = self.input_layer(input_shape)
+ layer = self.layer(input_layer,
+ scale_factor=(2))
+ assert layer.get_output_shape_for(
+ input_shape) == output_shape
+
+
+class TestUpscale2DLayer:
+ def scale_factor_test_sets():
+ for scale_factor in [2, 3]:
+ yield scale_factor
+
+ def input_layer(self, output_shape):
+ return Mock(output_shape=output_shape)
+
+ def layer(self, input_layer, scale_factor):
+ from lasagne.layers.pool import Upscale2DLayer
+ return Upscale2DLayer(
+ input_layer,
+ scale_factor=scale_factor,
+ )
+
+ def test_invalid_scale_factor(self):
+ from lasagne.layers.pool import Upscale2DLayer
+ inlayer = self.input_layer((128, 3, 32, 32))
+ with pytest.raises(ValueError):
+ Upscale2DLayer(inlayer, scale_factor=0)
+ with pytest.raises(ValueError):
+ Upscale2DLayer(inlayer, scale_factor=-1)
+ with pytest.raises(ValueError):
+ Upscale2DLayer(inlayer, scale_factor=(0, 2))
+ with pytest.raises(ValueError):
+ Upscale2DLayer(inlayer, scale_factor=(2, 0))
+
+ @pytest.mark.parametrize(
+ "scale_factor", list(scale_factor_test_sets()))
+ def test_get_output_for(self, scale_factor):
+ input = floatX(np.random.randn(8, 16, 17, 13))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+ result = self.layer(
+ input_layer,
+ (scale_factor, scale_factor),
+ ).get_output_for(input_theano)
+
+ result_eval = result.eval()
+ numpy_result = upscale_2d(input, (scale_factor, scale_factor))
+
+ assert np.all(numpy_result.shape == result_eval.shape)
+ assert np.allclose(result_eval, numpy_result)
+
+ @pytest.mark.parametrize(
+ "input_shape,output_shape",
+ [((32, 64, 24, 24), (32, 64, 48, 48)),
+ ((None, 64, 24, 24), (None, 64, 48, 48)),
+ ((32, None, 24, 24), (32, None, 48, 48)),
+ ((32, 64, None, 24), (32, 64, None, 48)),
+ ((32, 64, 24, None), (32, 64, 48, None)),
+ ((32, 64, None, None), (32, 64, None, None))],
+ )
+ def test_get_output_shape_for(self, input_shape, output_shape):
+ input_layer = self.input_layer(input_shape)
+ layer = self.layer(input_layer,
+ scale_factor=(2, 2))
+ assert layer.get_output_shape_for(
+ input_shape) == output_shape
+
+
+class TestFeatureWTALayer(object):
+ @pytest.fixture
+ def FeatureWTALayer(self):
+ from lasagne.layers.pool import FeatureWTALayer
+ return FeatureWTALayer
+
+ @pytest.fixture
+ def input_layer(self):
+ from lasagne.layers.input import InputLayer
+ return InputLayer((2, 4, 8))
+
+ @pytest.fixture
+ def layer(self, FeatureWTALayer, input_layer):
+ return FeatureWTALayer(input_layer, pool_size=2)
+
+ def test_init_raises(self, FeatureWTALayer, input_layer):
+ with pytest.raises(ValueError):
+ FeatureWTALayer(input_layer, pool_size=3)
+
+ def test_get_output_for(self, layer):
+ input = theano.shared(np.random.uniform(-1, 1, (2, 4, 8)))
+ result = layer.get_output_for(input).eval()
+
+ reshaped = input.get_value().reshape((2, 2, 2, 8))
+ np_result = reshaped * (reshaped == reshaped.max(2, keepdims=True))
+ np_result = np_result.reshape((2, 4, 8))
+
+ assert np.allclose(result, np_result)
+
+
+class TestGlobalPoolLayer(object):
+ @pytest.fixture
+ def GlobalPoolLayer(self):
+ from lasagne.layers.pool import GlobalPoolLayer
+ return GlobalPoolLayer
+
+ @pytest.fixture
+ def layer(self, GlobalPoolLayer):
+ return GlobalPoolLayer(Mock(output_shape=(None,)))
+
+ def test_get_output_shape_for(self, layer):
+ assert layer.get_output_shape_for((2, 3, 4, 5)) == (2, 3)
+
+ def test_get_output_for(self, layer):
+ input = theano.shared(np.random.uniform(-1, 1, (2, 3, 4, 5)))
+ result = layer.get_output_for(input).eval()
+
+ np_result = input.get_value().reshape((2, 3, -1)).mean(-1)
+
+ assert np.allclose(result, np_result)
+
+
+class TestSpatialPyramidPoolingDNNLayer:
+ def pool_dims_test_sets():
+ for pyramid_level in [2, 3, 4]:
+ pool_dims = list(range(1, pyramid_level))
+ yield pool_dims
+
+ def input_layer(self, output_shape):
+ return Mock(output_shape=output_shape)
+
+ def layer(self, input_layer, pool_dims):
+ try:
+ from lasagne.layers.dnn import SpatialPyramidPoolingDNNLayer
+ except ImportError:
+ pytest.skip("cuDNN not available")
+
+ return SpatialPyramidPoolingDNNLayer(input_layer, pool_dims=pool_dims)
+
+ @pytest.mark.parametrize(
+ "pool_dims", list(pool_dims_test_sets()))
+ def test_get_output_for(self, pool_dims):
+ try:
+ input = floatX(np.random.randn(8, 16, 17, 13))
+ input_layer = self.input_layer(input.shape)
+ input_theano = theano.shared(input)
+ layer = self.layer(input_layer, pool_dims)
+
+ result = layer.get_output_for(input_theano)
+
+ result_eval = result.eval()
+ numpy_result = spatial_pool(input, pool_dims)
+
+ assert result_eval.shape == numpy_result.shape
+ assert np.allclose(result_eval, numpy_result)
+ assert result_eval.shape == layer.output_shape
+ except NotImplementedError:
+ pytest.skip()
+
+ @pytest.mark.parametrize(
+ "input_shape,output_shape",
+ [((32, 64, 24, 24), (32, 64, 21)),
+ ((None, 64, 23, 25), (None, 64, 21)),
+ ((32, None, 22, 26), (32, None, 21)),
+ ((None, None, None, None), (None, None, 21))],
+ )
+ def test_get_output_shape_for(self, input_shape, output_shape):
+ try:
+ input_layer = self.input_layer(input_shape)
+ layer = self.layer(input_layer, pool_dims=[1, 2, 4])
+ assert layer.get_output_shape_for(input_shape) == output_shape
+ except NotImplementedError:
+ raise
+
+ def test_fail_on_mismatching_dimensionality(self):
+ try:
+ from lasagne.layers.dnn import SpatialPyramidPoolingDNNLayer
+ except ImportError:
+ pytest.skip("cuDNN not available")
+ with pytest.raises(ValueError) as exc:
+ SpatialPyramidPoolingDNNLayer((10, 20, 30))
+ assert "Expected 4 input dimensions" in exc.value.args[0]
+ with pytest.raises(ValueError) as exc:
+ SpatialPyramidPoolingDNNLayer((10, 20, 30, 40, 50))
+ assert "Expected 4 input dimensions" in exc.value.args[0]
diff --git a/lasagne/tests/layers/test_recurrent.py b/lasagne/tests/layers/test_recurrent.py
new file mode 100644
index 0000000..9464c04
--- /dev/null
+++ b/lasagne/tests/layers/test_recurrent.py
@@ -0,0 +1,1101 @@
+import pytest
+
+from lasagne.layers import RecurrentLayer, LSTMLayer, CustomRecurrentLayer
+from lasagne.layers import InputLayer, DenseLayer, GRULayer, Gate, Layer
+from lasagne.layers import helper
+import theano
+import theano.tensor as T
+import numpy as np
+import lasagne
+from mock import Mock
+
+
+def test_recurrent_return_shape():
+ num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11
+ num_units = 6
+ x = T.tensor4()
+ in_shp = (num_batch, seq_len, n_features1, n_features2)
+ l_inp = InputLayer(in_shp)
+ l_rec = RecurrentLayer(l_inp, num_units=num_units)
+
+ x_in = np.random.random(in_shp).astype('float32')
+ output = helper.get_output(l_rec, x)
+ output_val = output.eval({x: x_in})
+
+ assert helper.get_output_shape(l_rec, x_in.shape) == output_val.shape
+ assert output_val.shape == (num_batch, seq_len, num_units)
+
+
+def test_recurrent_grad():
+ num_batch, seq_len, n_features = 5, 3, 10
+ num_units = 6
+ l_inp = InputLayer((num_batch, seq_len, n_features))
+ l_rec = RecurrentLayer(l_inp,
+ num_units=num_units)
+ output = helper.get_output(l_rec)
+ g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_rec))
+ assert isinstance(g, (list, tuple))
+
+
+def test_recurrent_nparams():
+ l_inp = InputLayer((2, 2, 3))
+ l_rec = RecurrentLayer(l_inp, 5, learn_init=False, nonlinearity=None)
+
+ # b, W_hid_to_hid and W_in_to_hid
+ assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 3
+
+ # b + hid_init
+ assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 2
+
+
+def test_recurrent_nparams_learn_init():
+ l_inp = InputLayer((2, 2, 3))
+ l_rec = RecurrentLayer(l_inp, 5, learn_init=True)
+
+ # b, W_hid_to_hid and W_in_to_hid + hid_init
+ assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 4
+
+ # b + hid_init
+ assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 2
+
+
+def test_recurrent_hid_init_layer():
+ # test that you can set hid_init to be a layer
+ l_inp = InputLayer((2, 2, 3))
+ l_inp_h = InputLayer((2, 5))
+ l_rec = RecurrentLayer(l_inp, 5, hid_init=l_inp_h)
+
+ x = T.tensor3()
+ h = T.matrix()
+
+ output = lasagne.layers.get_output(l_rec, {l_inp: x, l_inp_h: h})
+
+
+def test_recurrent_nparams_hid_init_layer():
+ # test that you can see layers through hid_init
+ l_inp = InputLayer((2, 2, 3))
+ l_inp_h = InputLayer((2, 5))
+ l_inp_h_de = DenseLayer(l_inp_h, 7)
+ l_rec = RecurrentLayer(l_inp, 7, hid_init=l_inp_h_de)
+
+ # directly check the layers can be seen through hid_init
+ assert lasagne.layers.get_all_layers(l_rec) == [l_inp, l_inp_h, l_inp_h_de,
+ l_rec]
+
+ # b, W_hid_to_hid and W_in_to_hid + W + b
+ assert len(lasagne.layers.get_all_params(l_rec, trainable=True)) == 5
+
+ # b (recurrent) + b (dense)
+ assert len(lasagne.layers.get_all_params(l_rec, regularizable=False)) == 2
+
+
+def test_recurrent_hid_init_mask():
+ # test that you can set hid_init to be a layer when a mask is provided
+ l_inp = InputLayer((2, 2, 3))
+ l_inp_h = InputLayer((2, 5))
+ l_inp_msk = InputLayer((2, 2))
+ l_rec = RecurrentLayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk)
+
+ x = T.tensor3()
+ h = T.matrix()
+ msk = T.matrix()
+
+ inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk}
+ output = lasagne.layers.get_output(l_rec, inputs)
+
+
+def test_recurrent_hid_init_layer_eval():
+ # Test `hid_init` as a `Layer` with some dummy input. Compare the output of
+ # a network with a `Layer` as input to `hid_init` to a network with a
+ # `np.array` as input to `hid_init`
+ n_units = 7
+ n_test_cases = 2
+ in_shp = (n_test_cases, 2, 3)
+ in_h_shp = (1, n_units)
+
+ # dummy inputs
+ X_test = np.ones(in_shp, dtype=theano.config.floatX)
+ Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX)
+ Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1))
+
+ # network with `Layer` initializer for hid_init
+ l_inp = InputLayer(in_shp)
+ l_inp_h = InputLayer(in_h_shp)
+ l_rec_inp_layer = RecurrentLayer(l_inp, n_units, hid_init=l_inp_h,
+ nonlinearity=None)
+
+ # network with `np.array` initializer for hid_init
+ l_rec_nparray = RecurrentLayer(l_inp, n_units, hid_init=Xh_test,
+ nonlinearity=None)
+
+ # copy network parameters from l_rec_inp_layer to l_rec_nparray
+ l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()])
+ l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()])
+ for k, v in l_rn_param.items():
+ if k in l_il_param:
+ v.set_value(l_il_param[k].get_value())
+
+ # build the theano functions
+ X = T.tensor3()
+ Xh = T.matrix()
+ output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer,
+ {l_inp: X, l_inp_h: Xh})
+ output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X})
+
+ # test both nets with dummy input
+ output_val_inp_layer = output_inp_layer.eval({X: X_test,
+ Xh: Xh_test_batch})
+ output_val_nparray = output_nparray.eval({X: X_test})
+
+ # check output given `Layer` is the same as with `np.array`
+ assert np.allclose(output_val_inp_layer, output_val_nparray)
+
+
+def test_recurrent_incoming_tuple():
+ input_shape = (2, 3, 4)
+ l_rec = lasagne.layers.RecurrentLayer(input_shape, 5)
+ assert l_rec.input_shapes[0] == input_shape
+
+
+def test_recurrent_name():
+ l_in = lasagne.layers.InputLayer((2, 3, 4))
+ layer_name = 'l_rec'
+ l_rec = lasagne.layers.RecurrentLayer(l_in, 4, name=layer_name)
+ assert l_rec.b.name == layer_name + '.input_to_hidden.b'
+ assert l_rec.W_in_to_hid.name == layer_name + '.input_to_hidden.W'
+ assert l_rec.W_hid_to_hid.name == layer_name + '.hidden_to_hidden.W'
+
+
+def test_custom_recurrent_arbitrary_shape():
+ # Check that the custom recurrent layer can handle more than 1 feature dim
+ n_batch, n_steps, n_channels, width, height = (2, 3, 4, 5, 6)
+ n_out_filters = 7
+ filter_shape = (3, 3)
+ l_in = lasagne.layers.InputLayer(
+ (n_batch, n_steps, n_channels, width, height))
+ l_in_to_hid = lasagne.layers.Conv2DLayer(
+ lasagne.layers.InputLayer((None, n_channels, width, height)),
+ n_out_filters, filter_shape, pad='same')
+ l_hid_to_hid = lasagne.layers.Conv2DLayer(
+ lasagne.layers.InputLayer((None, n_out_filters, width, height)),
+ n_out_filters, filter_shape, pad='same')
+ l_rec = lasagne.layers.CustomRecurrentLayer(
+ l_in, l_in_to_hid, l_hid_to_hid)
+ assert l_rec.output_shape == (n_batch, n_steps, n_out_filters, width,
+ height)
+ out = theano.function([l_in.input_var], lasagne.layers.get_output(l_rec))
+ out_shape = out(np.zeros((n_batch, n_steps, n_channels, width, height),
+ dtype=theano.config.floatX)).shape
+ assert out_shape == (n_batch, n_steps, n_out_filters, width, height)
+
+
+def test_recurrent_init_shape_error():
+ # Check that the custom recurrent layer throws errors for invalid shapes
+ n_batch, n_steps, n_channels, width, height = (2, 3, 4, 5, 6)
+ n_out_filters = 7
+ filter_shape = (3, 3)
+ l_in = lasagne.layers.InputLayer(
+ (n_batch, n_steps, n_channels, width, height))
+ l_hid_to_hid = lasagne.layers.Conv2DLayer(
+ lasagne.layers.InputLayer((n_batch, n_out_filters, width, height)),
+ n_out_filters, filter_shape, pad='same')
+
+ # When precompute_input == True, input_to_hidden.shape[0] must be None
+ # or n_batch*n_steps
+ l_in_to_hid = lasagne.layers.Conv2DLayer(
+ lasagne.layers.InputLayer((n_batch, n_channels, width, height)),
+ n_out_filters, filter_shape, pad='same')
+ with pytest.raises(ValueError):
+ l_rec = lasagne.layers.CustomRecurrentLayer(
+ l_in, l_in_to_hid, l_hid_to_hid, precompute_input=True)
+
+ # When precompute_input = False, input_to_hidden.shape[1] must be None
+ # or hidden_to_hidden.shape[1]
+ l_in_to_hid = lasagne.layers.Conv2DLayer(
+ lasagne.layers.InputLayer((n_batch + 1, n_channels, width, height)),
+ n_out_filters, filter_shape, pad='same')
+ with pytest.raises(ValueError):
+ l_rec = lasagne.layers.CustomRecurrentLayer(
+ l_in, l_in_to_hid, l_hid_to_hid, precompute_input=False)
+
+ # In any case, input_to_hidden and hidden_to_hidden's output shapes after
+ # the first dimension must match
+ l_in_to_hid = lasagne.layers.Conv2DLayer(
+ lasagne.layers.InputLayer((None, n_channels, width + 1, height)),
+ n_out_filters, filter_shape, pad='same')
+ with pytest.raises(ValueError):
+ l_rec = lasagne.layers.CustomRecurrentLayer(
+ l_in, l_in_to_hid, l_hid_to_hid)
+
+ # And, the output shape of input_to_hidden must match the input shape
+ # of hidden_to_hidden past the first dimension. By not using padding,
+ # the output of l_in_to_hid will be cropped, which will make the
+ # shape inappropriate.
+ l_in_to_hid = lasagne.layers.Conv2DLayer(
+ lasagne.layers.InputLayer((None, n_channels, width, height)),
+ n_out_filters, filter_shape)
+ l_hid_to_hid = lasagne.layers.Conv2DLayer(
+ lasagne.layers.InputLayer((n_batch, n_out_filters, width, height)),
+ n_out_filters, filter_shape)
+ with pytest.raises(ValueError):
+ l_rec = lasagne.layers.CustomRecurrentLayer(
+ l_in, l_in_to_hid, l_hid_to_hid)
+
+
+def test_recurrent_grad_clipping():
+ num_units = 5
+ batch_size = 3
+ seq_len = 2
+ n_inputs = 4
+ in_shp = (batch_size, seq_len, n_inputs)
+ l_inp = InputLayer(in_shp)
+ x = T.tensor3()
+ l_rec = RecurrentLayer(l_inp, num_units, grad_clipping=1.0)
+ output = lasagne.layers.get_output(l_rec, x)
+
+
+def test_recurrent_bck():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ x = T.tensor3()
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+
+ x_in = np.ones(in_shp).astype('float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_rec_fwd = RecurrentLayer(l_inp, num_units=num_units, backwards=False)
+ lasagne.random.get_rng().seed(1234)
+ l_rec_bck = RecurrentLayer(l_inp, num_units=num_units, backwards=True)
+ l_out_fwd = helper.get_output(l_rec_fwd, x)
+ l_out_bck = helper.get_output(l_rec_bck, x)
+
+ output_fwd = l_out_fwd.eval({l_out_fwd: x_in})
+ output_bck = l_out_bck.eval({l_out_bck: x_in})
+
+ # test that the backwards model reverses its final input
+ np.testing.assert_almost_equal(output_fwd, output_bck[:, ::-1])
+
+
+def test_recurrent_variable_input_size():
+ # check that seqlen and batchsize None works
+ num_batch, n_features1 = 6, 5
+ num_units = 13
+ x = T.tensor3()
+
+ in_shp = (None, None, n_features1)
+ l_inp = InputLayer(in_shp)
+ x_in1 = np.ones((num_batch+1, 10, n_features1)).astype('float32')
+ x_in2 = np.ones((num_batch, 15, n_features1)).astype('float32')
+ l_rec = RecurrentLayer(l_inp, num_units=num_units, backwards=False)
+ output = helper.get_output(l_rec, x)
+ output_val1 = output.eval({x: x_in1})
+ output_val2 = output.eval({x: x_in2})
+
+
+def test_recurrent_unroll_scan_fwd():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+ l_mask_inp = InputLayer(in_shp[:2])
+
+ x_in = np.random.random(in_shp).astype('float32')
+ mask_in = np.ones(in_shp[:2]).astype('float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_rec_scan = RecurrentLayer(l_inp, num_units=num_units, backwards=False,
+ unroll_scan=False, mask_input=l_mask_inp)
+ lasagne.random.get_rng().seed(1234)
+ l_rec_unroll = RecurrentLayer(l_inp, num_units=num_units, backwards=False,
+ unroll_scan=True, mask_input=l_mask_inp)
+ output_scan = helper.get_output(l_rec_scan)
+ output_unrolled = helper.get_output(l_rec_unroll)
+
+ output_scan_val = output_scan.eval(
+ {l_inp.input_var: x_in, l_mask_inp.input_var: mask_in})
+ output_unrolled_val = output_unrolled.eval(
+ {l_inp.input_var: x_in, l_mask_inp.input_var: mask_in})
+ np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
+
+
+def test_recurrent_unroll_scan_bck():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ x = T.tensor3()
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+ x_in = np.random.random(in_shp).astype('float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_rec_scan = RecurrentLayer(l_inp, num_units=num_units, backwards=True,
+ unroll_scan=False)
+ lasagne.random.get_rng().seed(1234)
+ l_rec_unroll = RecurrentLayer(l_inp, num_units=num_units, backwards=True,
+ unroll_scan=True)
+ output_scan = helper.get_output(l_rec_scan, x)
+ output_unrolled = helper.get_output(l_rec_unroll, x)
+ output_scan_val = output_scan.eval({x: x_in})
+ output_unrolled_val = output_unrolled.eval({x: x_in})
+
+ np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
+
+
+def test_recurrent_precompute():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+ l_mask_inp = InputLayer(in_shp[:2])
+
+ x_in = np.random.random(in_shp).astype('float32')
+ mask_in = np.ones((num_batch, seq_len), dtype='float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_rec_precompute = RecurrentLayer(l_inp, num_units=num_units,
+ precompute_input=True,
+ mask_input=l_mask_inp)
+ lasagne.random.get_rng().seed(1234)
+ l_rec_no_precompute = RecurrentLayer(l_inp, num_units=num_units,
+ precompute_input=False,
+ mask_input=l_mask_inp)
+ output_precompute = helper.get_output(
+ l_rec_precompute).eval({l_inp.input_var: x_in,
+ l_mask_inp.input_var: mask_in})
+ output_no_precompute = helper.get_output(
+ l_rec_no_precompute).eval({l_inp.input_var: x_in,
+ l_mask_inp.input_var: mask_in})
+
+ np.testing.assert_almost_equal(output_precompute, output_no_precompute)
+
+
+def test_recurrent_return_final():
+ num_batch, seq_len, n_features = 2, 3, 4
+ num_units = 2
+ in_shp = (num_batch, seq_len, n_features)
+ x_in = np.random.random(in_shp).astype('float32')
+
+ l_inp = InputLayer(in_shp)
+ lasagne.random.get_rng().seed(1234)
+ l_rec_final = RecurrentLayer(l_inp, num_units, only_return_final=True)
+ lasagne.random.get_rng().seed(1234)
+ l_rec_all = RecurrentLayer(l_inp, num_units, only_return_final=False)
+
+ output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in})
+ output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in})
+
+ assert output_final.shape == (output_all.shape[0], output_all.shape[2])
+ assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final)
+ assert np.allclose(output_final, output_all[:, -1])
+
+
+def test_lstm_return_shape():
+ num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11
+ num_units = 6
+ x = T.tensor4()
+ in_shp = (num_batch, seq_len, n_features1, n_features2)
+ l_inp = InputLayer(in_shp)
+
+ x_in = np.random.random(in_shp).astype('float32')
+
+ l_lstm = LSTMLayer(l_inp, num_units=num_units)
+ output = helper.get_output(l_lstm, x)
+ output_val = output.eval({x: x_in})
+ assert helper.get_output_shape(l_lstm, x_in.shape) == output_val.shape
+ assert output_val.shape == (num_batch, seq_len, num_units)
+
+
+def test_lstm_grad():
+ num_batch, seq_len, n_features = 5, 3, 10
+ num_units = 6
+ l_inp = InputLayer((num_batch, seq_len, n_features))
+ l_lstm = LSTMLayer(l_inp, num_units=num_units)
+ output = helper.get_output(l_lstm)
+ g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_lstm))
+ assert isinstance(g, (list, tuple))
+
+
+def test_lstm_nparams_no_peepholes():
+ l_inp = InputLayer((2, 2, 3))
+ l_lstm = LSTMLayer(l_inp, 5, peepholes=False, learn_init=False)
+
+ # 3*n_gates
+ # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate
+ assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 12
+
+ # bias params + init params
+ assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 6
+
+
+def test_lstm_nparams_peepholes():
+ l_inp = InputLayer((2, 2, 3))
+ l_lstm = LSTMLayer(l_inp, 5, peepholes=True, learn_init=False)
+
+ # 3*n_gates + peepholes(3).
+ # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate
+ assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 15
+
+ # bias params(4) + init params(2)
+ assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 6
+
+
+def test_lstm_nparams_learn_init():
+ l_inp = InputLayer((2, 2, 3))
+ l_lstm = LSTMLayer(l_inp, 5, peepholes=False, learn_init=True)
+
+ # 3*n_gates + inits(2).
+ # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate
+ assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 14
+
+ # bias params(4) + init params(2)
+ assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 6
+
+
+def test_lstm_hid_init_layer():
+ # test that you can set hid_init to be a layer
+ l_inp = InputLayer((2, 2, 3))
+ l_inp_h = InputLayer((2, 5))
+ l_cell_h = InputLayer((2, 5))
+ l_lstm = LSTMLayer(l_inp, 5, hid_init=l_inp_h, cell_init=l_cell_h)
+
+ x = T.tensor3()
+ h = T.matrix()
+
+ output = lasagne.layers.get_output(l_lstm, {l_inp: x, l_inp_h: h})
+
+
+def test_lstm_nparams_hid_init_layer():
+ # test that you can see layers through hid_init
+ l_inp = InputLayer((2, 2, 3))
+ l_inp_h = InputLayer((2, 5))
+ l_inp_h_de = DenseLayer(l_inp_h, 7)
+ l_inp_cell = InputLayer((2, 5))
+ l_inp_cell_de = DenseLayer(l_inp_cell, 7)
+ l_lstm = LSTMLayer(l_inp, 7, hid_init=l_inp_h_de, cell_init=l_inp_cell_de)
+
+ # directly check the layers can be seen through hid_init
+ layers_to_find = [l_inp, l_inp_h, l_inp_h_de, l_inp_cell, l_inp_cell_de,
+ l_lstm]
+ assert lasagne.layers.get_all_layers(l_lstm) == layers_to_find
+
+ # 3*n_gates + 4
+ # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate
+ # 4 is for the W and b parameters in the two DenseLayer layers
+ assert len(lasagne.layers.get_all_params(l_lstm, trainable=True)) == 19
+
+ # GRU bias params(3) + Dense bias params(1) * 2
+ assert len(lasagne.layers.get_all_params(l_lstm, regularizable=False)) == 6
+
+
+def test_lstm_hid_init_mask():
+ # test that you can set hid_init to be a layer when a mask is provided
+ l_inp = InputLayer((2, 2, 3))
+ l_inp_h = InputLayer((2, 5))
+ l_inp_msk = InputLayer((2, 2))
+ l_cell_h = InputLayer((2, 5))
+ l_lstm = LSTMLayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk,
+ cell_init=l_cell_h)
+
+ x = T.tensor3()
+ h = T.matrix()
+ msk = T.matrix()
+
+ inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk}
+ output = lasagne.layers.get_output(l_lstm, inputs)
+
+
+def test_lstm_hid_init_layer_eval():
+ # Test `hid_init` as a `Layer` with some dummy input. Compare the output of
+ # a network with a `Layer` as input to `hid_init` to a network with a
+ # `np.array` as input to `hid_init`
+ n_units = 7
+ n_test_cases = 2
+ in_shp = (n_test_cases, 2, 3)
+ in_h_shp = (1, n_units)
+ in_cell_shp = (1, n_units)
+
+ # dummy inputs
+ X_test = np.ones(in_shp, dtype=theano.config.floatX)
+ Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX)
+ Xc_test = np.ones(in_cell_shp, dtype=theano.config.floatX)
+ Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1))
+ Xc_test_batch = np.tile(Xc_test, (n_test_cases, 1))
+
+ # network with `Layer` initializer for hid_init
+ l_inp = InputLayer(in_shp)
+ l_inp_h = InputLayer(in_h_shp)
+ l_inp_cell = InputLayer(in_cell_shp)
+ l_rec_inp_layer = LSTMLayer(l_inp, n_units, hid_init=l_inp_h,
+ cell_init=l_inp_cell, nonlinearity=None)
+
+ # network with `np.array` initializer for hid_init
+ l_rec_nparray = LSTMLayer(l_inp, n_units, hid_init=Xh_test,
+ cell_init=Xc_test, nonlinearity=None)
+
+ # copy network parameters from l_rec_inp_layer to l_rec_nparray
+ l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()])
+ l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()])
+ for k, v in l_rn_param.items():
+ if k in l_il_param:
+ v.set_value(l_il_param[k].get_value())
+
+ # build the theano functions
+ X = T.tensor3()
+ Xh = T.matrix()
+ Xc = T.matrix()
+ output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer,
+ {l_inp: X, l_inp_h:
+ Xh, l_inp_cell: Xc})
+ output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X})
+
+ # test both nets with dummy input
+ output_val_inp_layer = output_inp_layer.eval({X: X_test, Xh: Xh_test_batch,
+ Xc: Xc_test_batch})
+ output_val_nparray = output_nparray.eval({X: X_test})
+
+ # check output given `Layer` is the same as with `np.array`
+ assert np.allclose(output_val_inp_layer, output_val_nparray)
+
+
+def test_lstm_grad_clipping():
+ # test that you can set grad_clip variable
+ x = T.tensor3()
+ l_rec = LSTMLayer(InputLayer((2, 2, 3)), 5, grad_clipping=1)
+ output = lasagne.layers.get_output(l_rec, x)
+
+
+def test_lstm_bck():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ x = T.tensor3()
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+
+ x_in = np.ones(in_shp).astype('float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_lstm_fwd = LSTMLayer(l_inp, num_units=num_units, backwards=False)
+ lasagne.random.get_rng().seed(1234)
+ l_lstm_bck = LSTMLayer(l_inp, num_units=num_units, backwards=True)
+ output_fwd = helper.get_output(l_lstm_fwd, x)
+ output_bck = helper.get_output(l_lstm_bck, x)
+
+ output_fwd_val = output_fwd.eval({x: x_in})
+ output_bck_val = output_bck.eval({x: x_in})
+
+ # test that the backwards model reverses its final input
+ np.testing.assert_almost_equal(output_fwd_val, output_bck_val[:, ::-1])
+
+
+def test_lstm_precompute():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+ l_mask_inp = InputLayer(in_shp[:2])
+
+ x_in = np.random.random(in_shp).astype('float32')
+ mask_in = np.ones((num_batch, seq_len), dtype='float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_lstm_precompute = LSTMLayer(
+ l_inp, num_units=num_units, precompute_input=True,
+ mask_input=l_mask_inp)
+ lasagne.random.get_rng().seed(1234)
+ l_lstm_no_precompute = LSTMLayer(
+ l_inp, num_units=num_units, precompute_input=False,
+ mask_input=l_mask_inp)
+ output_precompute = helper.get_output(
+ l_lstm_precompute).eval({l_inp.input_var: x_in,
+ l_mask_inp.input_var: mask_in})
+ output_no_precompute = helper.get_output(
+ l_lstm_no_precompute).eval({l_inp.input_var: x_in,
+ l_mask_inp.input_var: mask_in})
+
+ # test that the backwards model reverses its final input
+ np.testing.assert_almost_equal(output_precompute, output_no_precompute)
+
+
+def test_lstm_variable_input_size():
+ # that seqlen and batchsize None works
+ num_batch, n_features1 = 6, 5
+ num_units = 13
+ x = T.tensor3()
+
+ in_shp = (None, None, n_features1)
+ l_inp = InputLayer(in_shp)
+ x_in1 = np.ones((num_batch+1, 3+1, n_features1)).astype('float32')
+ x_in2 = np.ones((num_batch, 3, n_features1)).astype('float32')
+ l_rec = LSTMLayer(l_inp, num_units=num_units, backwards=False)
+ output = helper.get_output(l_rec, x)
+ output_val1 = output.eval({x: x_in1})
+ output_val2 = output.eval({x: x_in2})
+
+
+def test_lstm_unroll_scan_fwd():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+ l_mask_inp = InputLayer(in_shp[:2])
+
+ x_in = np.random.random(in_shp).astype('float32')
+ mask_in = np.ones(in_shp[:2]).astype('float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_lstm_scan = LSTMLayer(l_inp, num_units=num_units, backwards=False,
+ unroll_scan=False, mask_input=l_mask_inp)
+ lasagne.random.get_rng().seed(1234)
+ l_lstm_unrolled = LSTMLayer(l_inp, num_units=num_units, backwards=False,
+ unroll_scan=True, mask_input=l_mask_inp)
+ output_scan = helper.get_output(l_lstm_scan)
+ output_unrolled = helper.get_output(l_lstm_unrolled)
+
+ output_scan_val = output_scan.eval({l_inp.input_var: x_in,
+ l_mask_inp.input_var: mask_in})
+ output_unrolled_val = output_unrolled.eval({l_inp.input_var: x_in,
+ l_mask_inp.input_var: mask_in})
+
+ np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
+
+
+def test_lstm_unroll_scan_bck():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ x = T.tensor3()
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+
+ x_in = np.random.random(in_shp).astype('float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_lstm_scan = LSTMLayer(l_inp, num_units=num_units, backwards=True,
+ unroll_scan=False)
+ lasagne.random.get_rng().seed(1234)
+ l_lstm_unrolled = LSTMLayer(l_inp, num_units=num_units, backwards=True,
+ unroll_scan=True)
+ output_scan = helper.get_output(l_lstm_scan, x)
+ output_scan_unrolled = helper.get_output(l_lstm_unrolled, x)
+
+ output_scan_val = output_scan.eval({x: x_in})
+ output_unrolled_val = output_scan_unrolled.eval({x: x_in})
+
+ np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
+
+
+def test_lstm_passthrough():
+ # Tests that the LSTM can simply pass through its input
+ l_in = InputLayer((4, 5, 6))
+ zero = lasagne.init.Constant(0.)
+ one = lasagne.init.Constant(1.)
+ pass_gate = Gate(zero, zero, zero, one, None)
+ no_gate = Gate(zero, zero, zero, zero, None)
+ in_pass_gate = Gate(
+ np.eye(6).astype(theano.config.floatX), zero, zero, zero, None)
+ l_rec = LSTMLayer(
+ l_in, 6, pass_gate, no_gate, in_pass_gate, pass_gate, None)
+ out = lasagne.layers.get_output(l_rec)
+ inp = np.arange(4*5*6).reshape(4, 5, 6).astype(theano.config.floatX)
+ np.testing.assert_almost_equal(out.eval({l_in.input_var: inp}), inp)
+
+
+def test_lstm_return_final():
+ num_batch, seq_len, n_features = 2, 3, 4
+ num_units = 2
+ in_shp = (num_batch, seq_len, n_features)
+ x_in = np.random.random(in_shp).astype('float32')
+
+ l_inp = InputLayer(in_shp)
+ lasagne.random.get_rng().seed(1234)
+ l_rec_final = LSTMLayer(l_inp, num_units, only_return_final=True)
+ lasagne.random.get_rng().seed(1234)
+ l_rec_all = LSTMLayer(l_inp, num_units, only_return_final=False)
+
+ output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in})
+ output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in})
+
+ assert output_final.shape == (output_all.shape[0], output_all.shape[2])
+ assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final)
+ assert np.allclose(output_final, output_all[:, -1])
+
+
+def test_gru_return_shape():
+ num_batch, seq_len, n_features1, n_features2 = 5, 3, 10, 11
+ num_units = 6
+ x = T.tensor4()
+ in_shp = (num_batch, seq_len, n_features1, n_features2)
+ l_inp = InputLayer(in_shp)
+ l_rec = GRULayer(l_inp, num_units=num_units)
+
+ x_in = np.random.random(in_shp).astype('float32')
+ output = helper.get_output(l_rec, x)
+ output_val = output.eval({x: x_in})
+
+ assert helper.get_output_shape(l_rec, x_in.shape) == output_val.shape
+ assert output_val.shape == (num_batch, seq_len, num_units)
+
+
+def test_gru_grad():
+ num_batch, seq_len, n_features = 5, 3, 10
+ num_units = 6
+ l_inp = InputLayer((num_batch, seq_len, n_features))
+ l_gru = GRULayer(l_inp,
+ num_units=num_units)
+ output = helper.get_output(l_gru)
+ g = T.grad(T.mean(output), lasagne.layers.get_all_params(l_gru))
+ assert isinstance(g, (list, tuple))
+
+
+def test_gru_nparams_learn_init_false():
+ l_inp = InputLayer((2, 2, 3))
+ l_gru = GRULayer(l_inp, 5, learn_init=False)
+
+ # 3*n_gates
+ # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate
+ assert len(lasagne.layers.get_all_params(l_gru, trainable=True)) == 9
+
+ # bias params(3) + hid_init
+ assert len(lasagne.layers.get_all_params(l_gru, regularizable=False)) == 4
+
+
+def test_gru_nparams_learn_init_true():
+ l_inp = InputLayer((2, 2, 3))
+ l_gru = GRULayer(l_inp, 5, learn_init=True)
+
+ # 3*n_gates + hid_init
+ # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate
+ assert len(lasagne.layers.get_all_params(l_gru, trainable=True)) == 10
+
+ # bias params(3) + init params(1)
+ assert len(lasagne.layers.get_all_params(l_gru, regularizable=False)) == 4
+
+
+def test_gru_hid_init_layer():
+ # test that you can set hid_init to be a layer
+ l_inp = InputLayer((2, 2, 3))
+ l_inp_h = InputLayer((2, 5))
+ l_gru = GRULayer(l_inp, 5, hid_init=l_inp_h)
+
+ x = T.tensor3()
+ h = T.matrix()
+
+ output = lasagne.layers.get_output(l_gru, {l_inp: x, l_inp_h: h})
+
+
+def test_gru_nparams_hid_init_layer():
+ # test that you can see layers through hid_init
+ l_inp = InputLayer((2, 2, 3))
+ l_inp_h = InputLayer((2, 5))
+ l_inp_h_de = DenseLayer(l_inp_h, 7)
+ l_gru = GRULayer(l_inp, 7, hid_init=l_inp_h_de)
+
+ # directly check the layers can be seen through hid_init
+ assert lasagne.layers.get_all_layers(l_gru) == [l_inp, l_inp_h, l_inp_h_de,
+ l_gru]
+
+ # 3*n_gates + 2
+ # the 3 is because we have hid_to_gate, in_to_gate and bias for each gate
+ # 2 is for the W and b parameters in the DenseLayer
+ assert len(lasagne.layers.get_all_params(l_gru, trainable=True)) == 11
+
+ # GRU bias params(3) + Dense bias params(1)
+ assert len(lasagne.layers.get_all_params(l_gru, regularizable=False)) == 4
+
+
+def test_gru_hid_init_layer_eval():
+ # Test `hid_init` as a `Layer` with some dummy input. Compare the output of
+ # a network with a `Layer` as input to `hid_init` to a network with a
+ # `np.array` as input to `hid_init`
+ n_units = 7
+ n_test_cases = 2
+ in_shp = (n_test_cases, 2, 3)
+ in_h_shp = (1, n_units)
+
+ # dummy inputs
+ X_test = np.ones(in_shp, dtype=theano.config.floatX)
+ Xh_test = np.ones(in_h_shp, dtype=theano.config.floatX)
+ Xh_test_batch = np.tile(Xh_test, (n_test_cases, 1))
+
+ # network with `Layer` initializer for hid_init
+ l_inp = InputLayer(in_shp)
+ l_inp_h = InputLayer(in_h_shp)
+ l_rec_inp_layer = GRULayer(l_inp, n_units, hid_init=l_inp_h)
+
+ # network with `np.array` initializer for hid_init
+ l_rec_nparray = GRULayer(l_inp, n_units, hid_init=Xh_test)
+
+ # copy network parameters from l_rec_inp_layer to l_rec_nparray
+ l_il_param = dict([(p.name, p) for p in l_rec_inp_layer.get_params()])
+ l_rn_param = dict([(p.name, p) for p in l_rec_nparray.get_params()])
+ for k, v in l_rn_param.items():
+ if k in l_il_param:
+ v.set_value(l_il_param[k].get_value())
+
+ # build the theano functions
+ X = T.tensor3()
+ Xh = T.matrix()
+ output_inp_layer = lasagne.layers.get_output(l_rec_inp_layer,
+ {l_inp: X, l_inp_h: Xh})
+ output_nparray = lasagne.layers.get_output(l_rec_nparray, {l_inp: X})
+
+ # test both nets with dummy input
+ output_val_inp_layer = output_inp_layer.eval({X: X_test,
+ Xh: Xh_test_batch})
+ output_val_nparray = output_nparray.eval({X: X_test})
+
+ # check output given `Layer` is the same as with `np.array`
+ assert np.allclose(output_val_inp_layer, output_val_nparray)
+
+
+def test_gru_hid_init_mask():
+ # test that you can set hid_init to be a layer when a mask is provided
+ l_inp = InputLayer((2, 2, 3))
+ l_inp_h = InputLayer((2, 5))
+ l_inp_msk = InputLayer((2, 2))
+ l_gru = GRULayer(l_inp, 5, hid_init=l_inp_h, mask_input=l_inp_msk)
+
+ x = T.tensor3()
+ h = T.matrix()
+ msk = T.matrix()
+
+ inputs = {l_inp: x, l_inp_h: h, l_inp_msk: msk}
+ output = lasagne.layers.get_output(l_gru, inputs)
+
+
+def test_gru_grad_clipping():
+ # test that you can set grad_clip variable
+ x = T.tensor3()
+ l_rec = GRULayer(InputLayer((2, 2, 3)), 5, grad_clipping=1)
+ output = lasagne.layers.get_output(l_rec, x)
+
+
+def test_gru_bck():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ x = T.tensor3()
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+
+ x_in = np.ones(in_shp).astype('float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_gru_fwd = GRULayer(l_inp, num_units=num_units, backwards=False)
+ lasagne.random.get_rng().seed(1234)
+ l_gru_bck = GRULayer(l_inp, num_units=num_units, backwards=True)
+ output_fwd = helper.get_output(l_gru_fwd, x)
+ output_bck = helper.get_output(l_gru_bck, x)
+
+ output_fwd_val = output_fwd.eval({x: x_in})
+ output_bck_val = output_bck.eval({x: x_in})
+
+ # test that the backwards model reverses its final input
+ np.testing.assert_almost_equal(output_fwd_val, output_bck_val[:, ::-1])
+
+
+def test_gru_variable_input_size():
+ # that seqlen and batchsize None works
+ num_batch, n_features1 = 6, 5
+ num_units = 13
+ x = T.tensor3()
+
+ in_shp = (None, None, n_features1)
+ l_inp = InputLayer(in_shp)
+ x_in1 = np.ones((num_batch+1, 10, n_features1)).astype('float32')
+ x_in2 = np.ones((num_batch, 15, n_features1)).astype('float32')
+ l_rec = GRULayer(l_inp, num_units=num_units, backwards=False)
+ output = helper.get_output(l_rec, x)
+
+ output.eval({x: x_in1})
+ output.eval({x: x_in2})
+
+
+def test_gru_unroll_scan_fwd():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+ l_mask_inp = InputLayer(in_shp[:2])
+
+ x_in = np.random.random(in_shp).astype('float32')
+ mask_in = np.ones(in_shp[:2]).astype('float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_gru_scan = GRULayer(l_inp, num_units=num_units, backwards=False,
+ unroll_scan=False, mask_input=l_mask_inp)
+ lasagne.random.get_rng().seed(1234)
+ l_gru_unrolled = GRULayer(l_inp, num_units=num_units, backwards=False,
+ unroll_scan=True, mask_input=l_mask_inp)
+ output_scan = helper.get_output(l_gru_scan)
+ output_unrolled = helper.get_output(l_gru_unrolled)
+
+ output_scan_val = output_scan.eval({l_inp.input_var: x_in,
+ l_mask_inp.input_var: mask_in})
+ output_unrolled_val = output_unrolled.eval({l_inp.input_var: x_in,
+ l_mask_inp.input_var: mask_in})
+
+ np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
+
+
+def test_gru_unroll_scan_bck():
+ num_batch, seq_len, n_features1 = 2, 5, 4
+ num_units = 2
+ x = T.tensor3()
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+ x_in = np.random.random(in_shp).astype('float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_gru_scan = GRULayer(l_inp, num_units=num_units, backwards=True,
+ unroll_scan=False)
+ lasagne.random.get_rng().seed(1234)
+ l_gru_unrolled = GRULayer(l_inp, num_units=num_units, backwards=True,
+ unroll_scan=True)
+ output_scan = helper.get_output(l_gru_scan, x)
+ output_unrolled = helper.get_output(l_gru_unrolled, x)
+
+ output_scan_val = output_scan.eval({x: x_in})
+ output_unrolled_val = output_unrolled.eval({x: x_in})
+
+ np.testing.assert_almost_equal(output_scan_val, output_unrolled_val)
+
+
+def test_gru_precompute():
+ num_batch, seq_len, n_features1 = 2, 3, 4
+ num_units = 2
+ in_shp = (num_batch, seq_len, n_features1)
+ l_inp = InputLayer(in_shp)
+ l_mask_inp = InputLayer(in_shp[:2])
+
+ x_in = np.random.random(in_shp).astype('float32')
+ mask_in = np.ones((num_batch, seq_len), dtype='float32')
+
+ # need to set random seed.
+ lasagne.random.get_rng().seed(1234)
+ l_gru_precompute = GRULayer(l_inp, num_units=num_units,
+ precompute_input=True, mask_input=l_mask_inp)
+ lasagne.random.get_rng().seed(1234)
+ l_gru_no_precompute = GRULayer(l_inp, num_units=num_units,
+ precompute_input=False,
+ mask_input=l_mask_inp)
+ output_precompute = helper.get_output(
+ l_gru_precompute).eval({l_inp.input_var: x_in,
+ l_mask_inp.input_var: mask_in})
+ output_no_precompute = helper.get_output(
+ l_gru_no_precompute).eval({l_inp.input_var: x_in,
+ l_mask_inp.input_var: mask_in})
+
+ # test that the backwards model reverses its final input
+ np.testing.assert_almost_equal(output_precompute, output_no_precompute)
+
+
+def test_gru_passthrough():
+ # Tests that the LSTM can simply pass through its input
+ l_in = InputLayer((4, 5, 6))
+ zero = lasagne.init.Constant(0.)
+ one = lasagne.init.Constant(1.)
+ pass_gate = Gate(zero, zero, None, one, None)
+ no_gate = Gate(zero, zero, None, zero, None)
+ in_pass_gate = Gate(
+ np.eye(6).astype(theano.config.floatX), zero, None, zero, None)
+ l_rec = GRULayer(l_in, 6, no_gate, pass_gate, in_pass_gate)
+ out = lasagne.layers.get_output(l_rec)
+ inp = np.arange(4*5*6).reshape(4, 5, 6).astype(theano.config.floatX)
+ np.testing.assert_almost_equal(out.eval({l_in.input_var: inp}), inp)
+
+
+def test_gru_return_final():
+ num_batch, seq_len, n_features = 2, 3, 4
+ num_units = 2
+ in_shp = (num_batch, seq_len, n_features)
+ x_in = np.random.random(in_shp).astype('float32')
+
+ l_inp = InputLayer(in_shp)
+ lasagne.random.get_rng().seed(1234)
+ l_rec_final = GRULayer(l_inp, num_units, only_return_final=True)
+ lasagne.random.get_rng().seed(1234)
+ l_rec_all = GRULayer(l_inp, num_units, only_return_final=False)
+
+ output_final = helper.get_output(l_rec_final).eval({l_inp.input_var: x_in})
+ output_all = helper.get_output(l_rec_all).eval({l_inp.input_var: x_in})
+
+ assert output_final.shape == (output_all.shape[0], output_all.shape[2])
+ assert output_final.shape == lasagne.layers.get_output_shape(l_rec_final)
+ assert np.allclose(output_final, output_all[:, -1])
+
+
+def test_gradient_steps_error():
+ # Check that error is raised if gradient_steps is not -1 and scan_unroll
+ # is true
+ l_in = InputLayer((2, 2, 3))
+ with pytest.raises(ValueError):
+ RecurrentLayer(l_in, 5, gradient_steps=3, unroll_scan=True)
+
+ with pytest.raises(ValueError):
+ LSTMLayer(l_in, 5, gradient_steps=3, unroll_scan=True)
+
+ with pytest.raises(ValueError):
+ GRULayer(l_in, 5, gradient_steps=3, unroll_scan=True)
+
+
+def test_unroll_none_input_error():
+ # Test that a ValueError is raised if unroll scan is True and the input
+ # sequence length is specified as None.
+ l_in = InputLayer((2, None, 3))
+ with pytest.raises(ValueError):
+ RecurrentLayer(l_in, 5, unroll_scan=True)
+
+ with pytest.raises(ValueError):
+ LSTMLayer(l_in, 5, unroll_scan=True)
+
+ with pytest.raises(ValueError):
+ GRULayer(l_in, 5, unroll_scan=True)
+
+
+def test_CustomRecurrentLayer_child_kwargs():
+ in_shape = (2, 3, 4)
+ n_hid = 5
+ # Construct mock for input-to-hidden layer
+ in_to_hid = Mock(
+ Layer,
+ output_shape=(in_shape[0]*in_shape[1], n_hid),
+ input_shape=(in_shape[0]*in_shape[1], in_shape[2]),
+ input_layer=InputLayer((in_shape[0]*in_shape[1], in_shape[2])),
+ get_output_kwargs=['foo'])
+ # These two functions get called, need to return dummy values for them
+ in_to_hid.get_output_for.return_value = T.matrix()
+ in_to_hid.get_params.return_value = []
+ # As above, for hidden-to-hidden layer
+ hid_to_hid = Mock(
+ Layer,
+ output_shape=(in_shape[0], n_hid),
+ input_shape=(in_shape[0], n_hid),
+ input_layer=InputLayer((in_shape[0], n_hid)),
+ get_output_kwargs=[])
+ hid_to_hid.get_output_for.return_value = T.matrix()
+ hid_to_hid.get_params.return_value = []
+ # Construct a CustomRecurrentLayer using these Mocks
+ l_rec = lasagne.layers.CustomRecurrentLayer(
+ InputLayer(in_shape), in_to_hid, hid_to_hid)
+ # Call get_output with a kwarg, should be passd to in_to_hid and hid_to_hid
+ helper.get_output(l_rec, foo='bar')
+ # Retrieve the arguments used to call in_to_hid.get_output_for
+ args, kwargs = in_to_hid.get_output_for.call_args
+ # Should be one argument - the Theano expression
+ assert len(args) == 1
+ # One keywould argument - should be 'foo' -> 'bar'
+ assert kwargs == {'foo': 'bar'}
+ # Same as with in_to_hid
+ args, kwargs = hid_to_hid.get_output_for.call_args
+ assert len(args) == 1
+ assert kwargs == {'foo': 'bar'}
diff --git a/lasagne/tests/layers/test_shape.py b/lasagne/tests/layers/test_shape.py
new file mode 100644
index 0000000..2e1de90
--- /dev/null
+++ b/lasagne/tests/layers/test_shape.py
@@ -0,0 +1,291 @@
+import numpy as np
+import pytest
+import theano
+
+from mock import Mock
+
+
+class TestFlattenLayer:
+ @pytest.fixture
+ def layer(self):
+ from lasagne.layers.shape import FlattenLayer
+ return FlattenLayer(Mock(output_shape=(None,)))
+
+ @pytest.fixture
+ def layer_outdim3(self):
+ from lasagne.layers.shape import FlattenLayer
+ return FlattenLayer(Mock(output_shape=(None,)), outdim=3)
+
+ @pytest.fixture
+ def layer_outdim1(self):
+ from lasagne.layers.shape import FlattenLayer
+ return FlattenLayer(Mock(output_shape=(None,)), outdim=1)
+
+ def test_get_output_shape_for(self, layer):
+ input_shape = (2, 3, 4, 5)
+ assert layer.get_output_shape_for(input_shape) == (2, 3 * 4 * 5)
+
+ def test_get_output_shape_for_contain_none(self, layer):
+ input_shape = (2, 3, None, 5)
+ assert layer.get_output_shape_for(input_shape) == (2, None)
+
+ def test_get_output_for(self, layer):
+ input = np.random.random((2, 3, 4, 5))
+ result = layer.get_output_for(theano.shared(input)).eval()
+ assert (result == input.reshape((input.shape[0], -1))).all()
+
+ def test_get_output_shape_for_outdim3(self, layer_outdim3):
+ input_shape = (2, 3, 4, 5)
+ assert layer_outdim3.get_output_shape_for(input_shape) == (2, 3, 4 * 5)
+
+ def test_get_output_for_outdim3(self, layer_outdim3):
+ input = np.random.random((2, 3, 4, 5))
+ result = layer_outdim3.get_output_for(theano.shared(input)).eval()
+ assert (result == input.reshape(
+ (input.shape[0], input.shape[1], -1))).all()
+
+ def test_get_output_shape_for_outdim1(self, layer_outdim1):
+ input_shape = (2, 3, 4, 5)
+ assert layer_outdim1.get_output_shape_for(input_shape) == (
+ 2 * 3 * 4 * 5, )
+
+ def test_get_output_for_outdim1(self, layer_outdim1):
+ input = np.random.random((2, 3, 4, 5))
+ result = layer_outdim1.get_output_for(theano.shared(input)).eval()
+ assert (result == input.reshape(-1)).all()
+
+ def test_dim0_raises(self):
+ from lasagne.layers.shape import FlattenLayer
+ with pytest.raises(ValueError):
+ FlattenLayer((2, 3, 4), outdim=0)
+
+
+class TestPadLayer:
+ @pytest.fixture
+ def layerclass(self):
+ from lasagne.layers.shape import PadLayer
+ return PadLayer
+
+ @pytest.mark.parametrize(
+ "width, input_shape, output_shape",
+ [(3, (2, 3, 4, 5), (2, 3, 10, 11)),
+ ((2, 3), (2, 3, 4, 5), (2, 3, 8, 11)),
+ (((1, 2), (3, 4)), (2, 3, 4, 5), (2, 3, 7, 12)),
+ (3, (2, 3, None, 5), (2, 3, None, 11)),
+ ((2, 3), (2, 3, 4, None), (2, 3, 8, None)),
+ (((1, 2), (3, 4)), (None, 3, None, None), (None, 3, None, None)),
+ ])
+ def test_get_output_shape_for(self, layerclass,
+ width, input_shape, output_shape):
+ layer = layerclass(Mock(output_shape=(None,)), width=width)
+ assert layer.get_output_shape_for(input_shape) == output_shape
+
+ def test_get_output_for(self, layerclass):
+ layer = layerclass(Mock(output_shape=(None,)), width=2)
+ input = np.zeros((1, 2, 10))
+ trimmed = theano.shared(input[:, :, 2:-2])
+ result = layer.get_output_for(trimmed).eval()
+
+ assert (result == input).all()
+
+
+class TestReshapeLayer:
+ @pytest.fixture
+ def layerclass(self):
+ from lasagne.layers.shape import ReshapeLayer
+ return ReshapeLayer
+
+ @pytest.fixture
+ def two_unknown(self):
+ from lasagne.layers.input import InputLayer
+ shape = (16, 3, None, None, 10)
+ return (InputLayer(shape),
+ theano.shared(np.ones((16, 3, 5, 7, 10))))
+
+ def test_no_reference(self, layerclass, two_unknown):
+ inputlayer, inputdata = two_unknown
+ layer = layerclass(inputlayer, (16, 3, 5, 7, 2, 5))
+ assert layer.output_shape == (16, 3, 5, 7, 2, 5)
+ result = layer.get_output_for(inputdata).eval()
+ assert result.shape == (16, 3, 5, 7, 2, 5)
+
+ def test_reference_both(self, layerclass, two_unknown):
+ inputlayer, inputdata = two_unknown
+ layer = layerclass(inputlayer, (-1, [1], [2], [3], 2, 5))
+ assert layer.output_shape == (16, 3, None, None, 2, 5)
+ result = layer.get_output_for(inputdata).eval()
+ assert result.shape == (16, 3, 5, 7, 2, 5)
+
+ def test_reference_one(self, layerclass, two_unknown):
+ inputlayer, inputdata = two_unknown
+ layer = layerclass(inputlayer, (-1, [1], [2], 7, 2, 5))
+ assert layer.output_shape == (None, 3, None, 7, 2, 5)
+ result = layer.get_output_for(inputdata).eval()
+ assert result.shape == (16, 3, 5, 7, 2, 5)
+
+ def test_reference_twice(self, layerclass, two_unknown):
+ inputlayer, inputdata = two_unknown
+ layer = layerclass(inputlayer, (-1, [1], [2], [3], 2, [2]))
+ assert layer.output_shape == (None, 3, None, None, 2, None)
+ result = layer.get_output_for(inputdata).eval()
+ assert result.shape == (16, 3, 5, 7, 2, 5)
+
+ def test_merge_with_unknown(self, layerclass, two_unknown):
+ inputlayer, inputdata = two_unknown
+ layer = layerclass(inputlayer, ([0], [1], [2], -1))
+ assert layer.output_shape == (16, 3, None, None)
+ result = layer.get_output_for(inputdata).eval()
+ assert result.shape == (16, 3, 5, 70)
+
+ def test_merge_two_unknowns(self, layerclass, two_unknown):
+ inputlayer, inputdata = two_unknown
+ layer = layerclass(inputlayer, ([0], [1], -1, [4]))
+ assert layer.output_shape == (16, 3, None, 10)
+ result = layer.get_output_for(inputdata).eval()
+ assert result.shape == (16, 3, 35, 10)
+
+ def test_size_mismatch(self, layerclass, two_unknown):
+ inputlayer, inputdata = two_unknown
+ with pytest.raises(ValueError) as excinfo:
+ layerclass(inputlayer, (17, 3, [2], [3], -1))
+ assert 'match' in str(excinfo.value)
+
+ def test_invalid_spec(self, layerclass, two_unknown):
+ inputlayer, inputdata = two_unknown
+ with pytest.raises(ValueError):
+ layerclass(inputlayer, (-16, 3, 5, 7, 10))
+ with pytest.raises(ValueError):
+ layerclass(inputlayer, (-1, 3, 5, 7, -1))
+ with pytest.raises(ValueError):
+ layerclass(inputlayer, ([-1], 3, 5, 7, 10))
+ with pytest.raises(ValueError):
+ layerclass(inputlayer, ([0, 1], 3, 5, 7, 10))
+ with pytest.raises(ValueError):
+ layerclass(inputlayer, (None, 3, 5, 7, 10))
+ with pytest.raises(ValueError):
+ layerclass(inputlayer, (16, 3, 5, 7, [5]))
+ with pytest.raises(ValueError):
+ layerclass(inputlayer, (16, 3, theano.tensor.vector(), 7, 10))
+
+ def test_symbolic_shape(self):
+ from lasagne.layers import InputLayer, ReshapeLayer, get_output
+ x = theano.tensor.tensor3()
+ batch_size, seq_len, num_features = x.shape
+ l_inp = InputLayer((None, None, None))
+ l_rshp2 = ReshapeLayer(l_inp, (batch_size*seq_len, [2]))
+
+ # we cannot infer any of the output shapes because they are symbolic.
+ output_shape = l_rshp2.get_output_shape_for(
+ (batch_size, seq_len, num_features))
+ assert output_shape == (None, None)
+
+ output = get_output(l_rshp2, x)
+ out1 = output.eval({x: np.ones((3, 5, 6), dtype='float32')})
+ out2 = output.eval({x: np.ones((4, 5, 7), dtype='float32')})
+
+ assert out1.shape == (3*5, 6)
+ assert out2.shape == (4*5, 7)
+
+
+class TestDimshuffleLayer:
+ @pytest.fixture
+ def input_shape(self):
+ return (2, 3, 1, 5, 7)
+
+ @pytest.fixture
+ def input_var(self):
+ InputTensorType = theano.tensor.TensorType(
+ 'float64', broadcastable=(False, False, True, False, False),
+ name='DimShuffleTestTensor')
+ return InputTensorType(name='x')
+
+ @pytest.fixture
+ def input_layer(self, input_shape, input_var):
+ from lasagne.layers.input import InputLayer
+ return InputLayer(input_shape, input_var)
+
+ @pytest.fixture
+ def input_shape_with_None(self):
+ return (2, 3, None, 5, 7)
+
+ @pytest.fixture
+ def input_layer_with_None(self, input_shape_with_None, input_var):
+ from lasagne.layers.input import InputLayer
+ return InputLayer(input_shape_with_None, input_var)
+
+ @pytest.fixture
+ def input_data(self, input_shape):
+ return np.ones(input_shape)
+
+ def test_rearrange(self, input_data, input_var, input_layer):
+ from lasagne.layers.shape import DimshuffleLayer
+ ds = DimshuffleLayer(input_layer, [4, 3, 2, 1, 0])
+ assert ds.output_shape == (7, 5, 1, 3, 2)
+ assert ds.get_output_for(input_var).eval(
+ {input_var: input_data}).shape == (7, 5, 1, 3, 2)
+
+ def test_broadcast(self, input_data, input_var, input_layer):
+ from lasagne.layers.shape import DimshuffleLayer
+ ds = DimshuffleLayer(input_layer, [0, 1, 2, 3, 4, 'x'])
+ assert ds.output_shape == (2, 3, 1, 5, 7, 1)
+ assert ds.get_output_for(input_var).eval(
+ {input_var: input_data}).shape == (2, 3, 1, 5, 7, 1)
+
+ def test_collapse(self, input_data, input_var, input_layer):
+ from lasagne.layers.shape import DimshuffleLayer
+ ds_ok = DimshuffleLayer(input_layer, [0, 1, 3, 4])
+ assert ds_ok.output_shape == (2, 3, 5, 7)
+ assert ds_ok.get_output_for(input_var).eval(
+ {input_var: input_data}).shape == (2, 3, 5, 7)
+ with pytest.raises(ValueError):
+ DimshuffleLayer(input_layer, [0, 1, 2, 4])
+
+ def test_collapse_None(self, input_data, input_var, input_layer_with_None):
+ from lasagne.layers.shape import DimshuffleLayer
+ ds_ok = DimshuffleLayer(input_layer_with_None, [0, 1, 3, 4])
+ assert ds_ok.output_shape == (2, 3, 5, 7)
+ assert ds_ok.get_output_for(input_var).eval(
+ {input_var: input_data}).shape == (2, 3, 5, 7)
+ with pytest.raises(ValueError):
+ DimshuffleLayer(input_layer_with_None, [0, 1, 2, 4])
+
+ def test_invalid_pattern(self, input_data, input_var, input_layer):
+ from lasagne.layers.shape import DimshuffleLayer
+ with pytest.raises(ValueError):
+ DimshuffleLayer(input_layer, ['q'])
+ with pytest.raises(ValueError):
+ DimshuffleLayer(input_layer, [0, 0, 1, 3, 4])
+ with pytest.raises(ValueError):
+ # There is no dimension 42
+ DimshuffleLayer(input_layer, [0, 1, 2, 4, 42])
+
+
+def test_slice_layer():
+ from lasagne.layers import SliceLayer, InputLayer, get_output_shape,\
+ get_output
+ from numpy.testing import assert_array_almost_equal as aeq
+ in_shp = (3, 5, 2)
+ l_inp = InputLayer(in_shp)
+ l_slice_ax0 = SliceLayer(l_inp, axis=0, indices=0)
+ l_slice_ax1 = SliceLayer(l_inp, axis=1, indices=slice(3, 5))
+ l_slice_ax2 = SliceLayer(l_inp, axis=-1, indices=-1)
+
+ x = np.arange(np.prod(in_shp)).reshape(in_shp).astype('float32')
+ x1 = x[0]
+ x2 = x[:, 3:5]
+ x3 = x[:, :, -1]
+
+ assert get_output_shape(l_slice_ax0) == x1.shape
+ assert get_output_shape(l_slice_ax1) == x2.shape
+ assert get_output_shape(l_slice_ax2) == x3.shape
+
+ aeq(get_output(l_slice_ax0, x).eval(), x1)
+ aeq(get_output(l_slice_ax1, x).eval(), x2)
+ aeq(get_output(l_slice_ax2, x).eval(), x3)
+
+ # test slicing None dimension
+ in_shp = (2, None, 2)
+ l_inp = InputLayer(in_shp)
+ l_slice_ax1 = SliceLayer(l_inp, axis=1, indices=slice(3, 5))
+ assert get_output_shape(l_slice_ax1) == (2, None, 2)
+ aeq(get_output(l_slice_ax1, x).eval(), x2)
diff --git a/lasagne/tests/layers/test_special.py b/lasagne/tests/layers/test_special.py
new file mode 100644
index 0000000..c3befaa
--- /dev/null
+++ b/lasagne/tests/layers/test_special.py
@@ -0,0 +1,793 @@
+from mock import Mock
+import numpy as np
+import pytest
+import theano
+from lasagne.layers import InputLayer, standardize, get_output, get_all_params
+
+
+class TestExpressionLayer:
+ @pytest.fixture
+ def ExpressionLayer(self):
+ from lasagne.layers.special import ExpressionLayer
+ return ExpressionLayer
+
+ @pytest.fixture
+ def input_layer(self):
+ from lasagne.layers import InputLayer
+ return InputLayer((2, 3, 4, 5))
+
+ @pytest.fixture
+ def input_layer_nones(self):
+ from lasagne.layers import InputLayer
+ return InputLayer((1, None, None, 5))
+
+ def np_result(self, func, input_layer):
+ X = np.random.uniform(-1, 1, input_layer.output_shape)
+ return X, func(X)
+
+ @pytest.mark.parametrize('func',
+ [lambda X: X**2,
+ lambda X: X.mean(-1),
+ lambda X: X.sum(),
+ ])
+ def test_tuple_shape(self, func, input_layer, ExpressionLayer):
+ from lasagne.layers.helper import get_output
+
+ X, expected = self.np_result(func, input_layer)
+ layer = ExpressionLayer(input_layer, func, output_shape=expected.shape)
+ assert layer.get_output_shape_for(X.shape) == expected.shape
+
+ output = get_output(layer, X).eval()
+ assert np.allclose(output, expected)
+
+ @pytest.mark.parametrize('func',
+ [lambda X: X**2,
+ lambda X: X.mean(-1),
+ lambda X: X.sum(),
+ ])
+ def test_callable_shape(self, func, input_layer, ExpressionLayer):
+ from lasagne.layers.helper import get_output
+
+ X, expected = self.np_result(func, input_layer)
+
+ def get_shape(input_shape):
+ return func(np.empty(shape=input_shape)).shape
+
+ layer = ExpressionLayer(input_layer, func, output_shape=get_shape)
+ assert layer.get_output_shape_for(X.shape) == expected.shape
+
+ output = get_output(layer, X).eval()
+ assert np.allclose(output, expected)
+
+ @pytest.mark.parametrize('func',
+ [lambda X: X**2,
+ lambda X: X.mean(-1),
+ lambda X: X.sum(),
+ ])
+ def test_none_shape(self, func, input_layer, ExpressionLayer):
+ from lasagne.layers.helper import get_output
+
+ X, expected = self.np_result(func, input_layer)
+
+ layer = ExpressionLayer(input_layer, func, output_shape=None)
+ if X.shape == expected.shape:
+ assert layer.get_output_shape_for(X.shape) == expected.shape
+
+ output = get_output(layer, X).eval()
+ assert np.allclose(output, expected)
+
+ @pytest.mark.parametrize('func',
+ [lambda X: X**2,
+ lambda X: X.mean(-1),
+ lambda X: X.sum(),
+ ])
+ def test_auto_shape(self, func, input_layer, ExpressionLayer):
+ from lasagne.layers.helper import get_output
+
+ X, expected = self.np_result(func, input_layer)
+
+ layer = ExpressionLayer(input_layer, func, output_shape='auto')
+ assert layer.get_output_shape_for(X.shape) == expected.shape
+
+ output = get_output(layer, X).eval()
+ assert np.allclose(output, expected)
+
+ @pytest.mark.parametrize('func',
+ [lambda X: X**2,
+ lambda X: X.mean(-1),
+ lambda X: X.sum(),
+ ])
+ def test_nones_shape(self, func, input_layer_nones, ExpressionLayer):
+ input_shape = input_layer_nones.output_shape
+ np_shape = tuple(0 if s is None else s for s in input_shape)
+ X = np.random.uniform(-1, 1, np_shape)
+ expected = func(X)
+ expected_shape = tuple(s if s else None for s in expected.shape)
+
+ layer = ExpressionLayer(input_layer_nones,
+ func,
+ output_shape=expected_shape)
+ assert layer.get_output_shape_for(input_shape) == expected_shape
+
+ def get_shape(input_shape):
+ return expected_shape
+ layer = ExpressionLayer(input_layer_nones,
+ func,
+ output_shape=get_shape)
+ assert layer.get_output_shape_for(input_shape) == expected_shape
+
+ layer = ExpressionLayer(input_layer_nones,
+ func,
+ output_shape='auto')
+ assert layer.get_output_shape_for(input_shape) == expected_shape
+
+
+class TestNonlinearityLayer:
+ @pytest.fixture
+ def NonlinearityLayer(self):
+ from lasagne.layers.special import NonlinearityLayer
+ return NonlinearityLayer
+
+ @pytest.fixture
+ def layer_vars(self, NonlinearityLayer, dummy_input_layer):
+ nonlinearity = Mock()
+
+ layer = NonlinearityLayer(
+ dummy_input_layer,
+ nonlinearity=nonlinearity,
+ )
+
+ return {
+ 'nonlinearity': nonlinearity,
+ 'layer': layer,
+ }
+
+ @pytest.fixture
+ def layer(self, layer_vars):
+ return layer_vars['layer']
+
+ def test_init_none_nonlinearity(self, NonlinearityLayer,
+ dummy_input_layer):
+ import lasagne.nonlinearities
+ layer = NonlinearityLayer(
+ dummy_input_layer,
+ nonlinearity=None,
+ )
+ assert layer.nonlinearity == lasagne.nonlinearities.identity
+
+ def test_get_output_for(self, layer_vars):
+ layer = layer_vars['layer']
+ nonlinearity = layer_vars['nonlinearity']
+
+ input = theano.tensor.matrix()
+ result = layer.get_output_for(input)
+ nonlinearity.assert_called_with(input)
+ assert result is nonlinearity.return_value
+
+
+class TestBiasLayer:
+ @pytest.fixture
+ def BiasLayer(self):
+ from lasagne.layers.special import BiasLayer
+ return BiasLayer
+
+ @pytest.fixture
+ def init_b(self):
+ # initializer for a tensor of unique values
+ return lambda shape: np.arange(np.prod(shape)).reshape(shape)
+
+ def test_bias_init(self, BiasLayer, init_b):
+ input_shape = (2, 3, 4)
+ # default: share biases over all but second axis
+ b = BiasLayer(input_shape, b=init_b).b
+ assert np.allclose(b.get_value(), init_b((3,)))
+ # share over first axis only
+ b = BiasLayer(input_shape, b=init_b, shared_axes=0).b
+ assert np.allclose(b.get_value(), init_b((3, 4)))
+ # share over second and third axis
+ b = BiasLayer(input_shape, b=init_b, shared_axes=(1, 2)).b
+ assert np.allclose(b.get_value(), init_b((2,)))
+ # no bias
+ b = BiasLayer(input_shape, b=None).b
+ assert b is None
+
+ def test_get_output_for(self, BiasLayer, init_b):
+ input_shape = (2, 3, 4)
+ # random input tensor
+ input = np.random.randn(*input_shape).astype(theano.config.floatX)
+ # default: share biases over all but second axis
+ layer = BiasLayer(input_shape, b=init_b)
+ assert np.allclose(layer.get_output_for(input).eval(),
+ input + init_b((1, 3, 1)))
+ # share over first axis only
+ layer = BiasLayer(input_shape, b=init_b, shared_axes=0)
+ assert np.allclose(layer.get_output_for(input).eval(),
+ input + init_b((1, 3, 4)))
+ # share over second and third axis
+ layer = BiasLayer(input_shape, b=init_b, shared_axes=(1, 2))
+ assert np.allclose(layer.get_output_for(input).eval(),
+ input + init_b((2, 1, 1)))
+ # no bias
+ layer = BiasLayer(input_shape, b=None)
+ assert layer.get_output_for(input) is input
+
+ def test_undefined_shape(self, BiasLayer):
+ # should work:
+ BiasLayer((64, None, 3), shared_axes=(1, 2))
+ # should not work:
+ with pytest.raises(ValueError) as exc:
+ BiasLayer((64, None, 3), shared_axes=(0, 2))
+ assert 'needs specified input sizes' in exc.value.args[0]
+
+
+class TestScaleLayer:
+ @pytest.fixture
+ def ScaleLayer(self):
+ from lasagne.layers.special import ScaleLayer
+ return ScaleLayer
+
+ @pytest.fixture
+ def init_scales(self):
+ # initializer for a tensor of unique values
+ return lambda shape: np.arange(np.prod(shape)).reshape(shape)
+
+ def test_scales_init(self, ScaleLayer, init_scales):
+ input_shape = (2, 3, 4)
+ # default: share scales over all but second axis
+ b = ScaleLayer(input_shape, scales=init_scales).scales
+ assert np.allclose(b.get_value(), init_scales((3,)))
+ # share over first axis only
+ b = ScaleLayer(input_shape, scales=init_scales, shared_axes=0).scales
+ assert np.allclose(b.get_value(), init_scales((3, 4)))
+ # share over second and third axis
+ b = ScaleLayer(
+ input_shape, scales=init_scales, shared_axes=(1, 2)).scales
+ assert np.allclose(b.get_value(), init_scales((2,)))
+
+ def test_get_output_for(self, ScaleLayer, init_scales):
+ input_shape = (2, 3, 4)
+ # random input tensor
+ input = np.random.randn(*input_shape).astype(theano.config.floatX)
+ # default: share scales over all but second axis
+ layer = ScaleLayer(input_shape, scales=init_scales)
+ assert np.allclose(layer.get_output_for(input).eval(),
+ input * init_scales((1, 3, 1)))
+ # share over first axis only
+ layer = ScaleLayer(input_shape, scales=init_scales, shared_axes=0)
+ assert np.allclose(layer.get_output_for(input).eval(),
+ input * init_scales((1, 3, 4)))
+ # share over second and third axis
+ layer = ScaleLayer(input_shape, scales=init_scales, shared_axes=(1, 2))
+ assert np.allclose(layer.get_output_for(input).eval(),
+ input * init_scales((2, 1, 1)))
+
+ def test_undefined_shape(self, ScaleLayer):
+ # should work:
+ ScaleLayer((64, None, 3), shared_axes=(1, 2))
+ # should not work:
+ with pytest.raises(ValueError) as exc:
+ ScaleLayer((64, None, 3), shared_axes=(0, 2))
+ assert 'needs specified input sizes' in exc.value.args[0]
+
+
+def test_standardize():
+ # Simple example
+ X = np.random.standard_normal((1000, 20)).astype(theano.config.floatX)
+ l_in = InputLayer((None, 20))
+ l_std = standardize(
+ l_in, X.min(axis=0), (X.max(axis=0) - X.min(axis=0)), shared_axes=0)
+ out = get_output(l_std).eval({l_in.input_var: X})
+ assert np.allclose(out.max(axis=0), 1.)
+ assert np.allclose(out.min(axis=0), 0.)
+ assert len(get_all_params(l_std)) == 2
+ # More complicated example
+ X = np.random.standard_normal(
+ (50, 3, 100, 10)).astype(theano.config.floatX)
+ mean = X.mean(axis=(0, 2))
+ std = X.std(axis=(0, 2))
+ l_in = InputLayer((None, 3, None, 10))
+ l_std = standardize(l_in, mean, std, shared_axes=(0, 2))
+ out = get_output(l_std).eval({l_in.input_var: X})
+ assert np.allclose(out.mean(axis=(0, 2)), 0., atol=1e-5)
+ assert np.allclose(out.std((0, 2)), 1., atol=1e-5)
+
+
+class TestInverseLayer:
+ @pytest.fixture
+ def invlayer_vars(self):
+ from lasagne.layers.dense import DenseLayer
+ from lasagne.layers.input import InputLayer
+ from lasagne.layers.special import InverseLayer
+ from lasagne.nonlinearities import identity
+
+ l_in = InputLayer(shape=(10, 12))
+
+ layer = DenseLayer(
+ l_in,
+ num_units=3,
+ b=None,
+ nonlinearity=identity,
+ )
+
+ invlayer = InverseLayer(
+ incoming=layer,
+ layer=layer
+ )
+
+ return {
+ 'layer': layer,
+ 'invlayer': invlayer,
+ }
+
+ def test_init(self, invlayer_vars):
+ layer = invlayer_vars['layer']
+ invlayer = invlayer_vars['invlayer']
+ # Check that the output shape of the invlayer is the same
+ # as the input shape of the layer
+ assert layer.input_shape == invlayer.output_shape
+
+ def test_get_output_shape_for(self, invlayer_vars):
+ invlayer = invlayer_vars['invlayer']
+ assert invlayer.get_output_shape_for(
+ [(34, 55, 89, 144), (5, 8, 13, 21), (1, 1, 2, 3)]) == (1, 1, 2, 3)
+
+ def test_get_output_for(self, invlayer_vars):
+ from lasagne.layers.helper import get_output
+ invlayer = invlayer_vars['invlayer']
+ layer = invlayer_vars['layer']
+ W = layer.W.get_value()
+ input = theano.shared(
+ np.random.rand(*layer.input_shape))
+ results = get_output(invlayer, inputs=input)
+
+ # Check that the output of the invlayer is the output of the
+ # dot product of the output of the dense layer and the
+ # transposed weights
+ assert np.allclose(
+ results.eval(), np.dot(np.dot(input.get_value(), W), W.T))
+
+
+class TestTransformLayer():
+
+ def test_transform_affine_errors(self):
+ import lasagne
+ with pytest.raises(ValueError):
+ l_in_a = lasagne.layers.InputLayer((None, 3, 28, 28))
+ l_loc_a = lasagne.layers.DenseLayer(l_in_a, num_units=5)
+ l_trans = lasagne.layers.TransformerLayer(l_in_a, l_loc_a)
+ with pytest.raises(ValueError):
+ l_in_b = lasagne.layers.InputLayer((3, 28, 28))
+ l_loc_b = lasagne.layers.DenseLayer(l_in_b, num_units=6)
+ l_trans = lasagne.layers.TransformerLayer(l_in_b, l_loc_b)
+
+ def test_transform_affine_downsample(self):
+ import lasagne
+ downsample = (0.7, 2.3)
+ x = np.random.random((10, 3, 28, 28)).astype('float32')
+ x_sym = theano.tensor.tensor4()
+
+ # create transformer with fixed input size
+ l_in = lasagne.layers.InputLayer((None, 3, 28, 28))
+ l_loc = lasagne.layers.DenseLayer(l_in, num_units=6)
+ l_trans = lasagne.layers.TransformerLayer(
+ l_in, l_loc, downsample_factor=downsample)
+
+ # check that shape propagation works
+ assert l_trans.output_shape[0] is None
+ assert l_trans.output_shape[1:] == (3, int(28 / .7), int(28 / 2.3))
+
+ # check that data propagation works
+ output = lasagne.layers.get_output(l_trans, x_sym)
+ x_out = output.eval({x_sym: x})
+ assert x_out.shape[0] == x.shape[0]
+ assert x_out.shape[1:] == l_trans.output_shape[1:]
+
+ # create transformer with variable input size
+ l_in = lasagne.layers.InputLayer((None, 3, None, 28))
+ l_loc = lasagne.layers.DenseLayer(
+ lasagne.layers.ReshapeLayer(l_in, ([0], 3*28*28)),
+ num_units=6, W=l_loc.W, b=l_loc.b)
+ l_trans = lasagne.layers.TransformerLayer(
+ l_in, l_loc, downsample_factor=downsample)
+
+ # check that shape propagation works
+ assert l_trans.output_shape[0] is None
+ assert l_trans.output_shape[1] == 3
+ assert l_trans.output_shape[2] is None
+ assert l_trans.output_shape[3] == int(28 / 2.3)
+
+ # check that data propagation works
+ output = lasagne.layers.get_output(l_trans, x_sym)
+ x_out2 = output.eval({x_sym: x})
+ assert x_out2.shape == x_out.shape
+ np.testing.assert_allclose(x_out2, x_out, rtol=1e-5, atol=1e-5)
+
+ def test_transform_affine_identity(self):
+ from lasagne.layers import InputLayer, TransformerLayer
+ from lasagne.utils import floatX
+ from theano.tensor import constant
+ batchsize = 10
+ l_in = InputLayer((batchsize, 3, 28, 28))
+ l_loc = InputLayer((batchsize, 6))
+ layer = TransformerLayer(l_in, l_loc)
+ inputs = floatX(np.arange(np.prod(l_in.shape)).reshape(l_in.shape))
+ thetas = floatX(np.tile([1, 0, 0, 0, 1, 0], (batchsize, 1)))
+ outputs = layer.get_output_for([constant(inputs),
+ constant(thetas)]).eval()
+ np.testing.assert_allclose(inputs, outputs, rtol=1e-6)
+
+
+class TestTPSTransformLayer():
+
+ def test_transform_thin_plate_spline_errors(self):
+ import lasagne
+
+ # Check that number of inputs matches 2*num_control_points
+ with pytest.raises(ValueError):
+ num_control_points = 16
+ l_in_a = lasagne.layers.InputLayer((None, 3, 28, 28))
+ l_loc_a = lasagne.layers.DenseLayer(l_in_a,
+ num_units=3*num_control_points)
+ l_trans = lasagne.layers.TPSTransformerLayer(
+ l_in_a, l_loc_a, control_points=num_control_points)
+
+ # Check that error is raised when precompute_grid is set to True
+ # with unknown input size
+ with pytest.raises(ValueError):
+ l_in = lasagne.layers.InputLayer((None, 3, None, 28))
+ l_loc = lasagne.layers.DenseLayer(
+ lasagne.layers.ReshapeLayer(l_in, ([0], 3*28*28)),
+ num_units=32)
+ l_trans = lasagne.layers.TPSTransformerLayer(l_in, l_loc,
+ precompute_grid=True)
+
+ # Check that input is right size
+ with pytest.raises(ValueError):
+ l_in_b = lasagne.layers.InputLayer((3, 28, 28))
+ l_loc_b = lasagne.layers.DenseLayer(l_in_b, num_units=6)
+ l_trans = lasagne.layers.TPSTransformerLayer(l_in_b, l_loc_b)
+
+ # Check that number of control points is a perfect square
+ with pytest.raises(ValueError):
+ num_control_points = 17
+ l_in_a = lasagne.layers.InputLayer((None, 3, 28, 28))
+ l_loc_a = lasagne.layers.DenseLayer(l_in_a,
+ num_units=2*num_control_points)
+ l_trans = lasagne.layers.TPSTransformerLayer(
+ l_in_a, l_loc_a, control_points=num_control_points)
+
+ # Check that the input shape is correct
+ with pytest.raises(ValueError):
+ num_control_points = 16
+ l_in_b = lasagne.layers.InputLayer((3, 28, 28))
+ l_loc_b = lasagne.layers.DenseLayer(
+ l_in_b, num_units=2*num_control_points
+ )
+ l_trans = lasagne.layers.TPSTransformerLayer(l_in_b, l_loc_b)
+
+ def test_transform_thin_plate_spline_variable_input(self):
+ import lasagne
+ from lasagne.utils import floatX
+ from theano.tensor import constant
+
+ x = np.random.random((10, 3, 28, 28)).astype('float32')
+ x_sym = theano.tensor.tensor4()
+
+ l_in = lasagne.layers.InputLayer((None, 3, None, 28))
+ l_loc = lasagne.layers.DenseLayer(
+ lasagne.layers.ReshapeLayer(l_in, ([0], 3*28*28)),
+ num_units=32)
+ l_trans = lasagne.layers.TPSTransformerLayer(
+ l_in, l_loc, precompute_grid='auto')
+
+ # check that shape propagation works
+ assert l_trans.output_shape[0] is None
+ assert l_trans.output_shape[1] == 3
+ assert l_trans.output_shape[2] is None
+ assert l_trans.output_shape[3] == 28
+
+ # check that data propagation works
+ dest_offset = np.zeros(shape=(10, 32))
+ inputs = floatX(np.arange(np.prod(x.shape)).reshape(x.shape))
+ outputs = l_trans.get_output_for([constant(inputs),
+ constant(dest_offset)]).eval()
+ np.testing.assert_allclose(inputs, outputs, atol=5e-4)
+
+ def test_transform_thin_plate_spline_downsample(self):
+ import lasagne
+ downsample = (0.7, 2.3)
+ x = np.random.random((10, 3, 28, 28)).astype('float32')
+ x_sym = theano.tensor.tensor4()
+
+ # create transformer with fixed input size
+ l_in = lasagne.layers.InputLayer((None, 3, 28, 28))
+ l_loc = lasagne.layers.DenseLayer(l_in, num_units=32)
+ l_trans = lasagne.layers.TPSTransformerLayer(
+ l_in, l_loc, downsample_factor=downsample,
+ precompute_grid=False
+ )
+
+ # check that shape propagation works
+ assert l_trans.output_shape[0] is None
+ assert l_trans.output_shape[1:] == (3, int(28 / .7), int(28 / 2.3))
+
+ # check that data propagation works
+ output = lasagne.layers.get_output(l_trans, x_sym)
+ x_out = output.eval({x_sym: x})
+ assert x_out.shape[0] == x.shape[0]
+ assert x_out.shape[1:] == l_trans.output_shape[1:]
+
+ # create transformer with variable input size
+ l_in = lasagne.layers.InputLayer((None, 3, None, 28))
+ l_loc = lasagne.layers.DenseLayer(
+ lasagne.layers.ReshapeLayer(l_in, ([0], 3*28*28)),
+ num_units=32, W=l_loc.W, b=l_loc.b)
+ l_trans = lasagne.layers.TPSTransformerLayer(
+ l_in, l_loc, downsample_factor=downsample,
+ precompute_grid=False
+ )
+
+ # check that shape propagation works
+ assert l_trans.output_shape[0] is None
+ assert l_trans.output_shape[1] == 3
+ assert l_trans.output_shape[2] is None
+ assert l_trans.output_shape[3] == int(28 / 2.3)
+
+ # check that data propagation works
+ output = lasagne.layers.get_output(l_trans, x_sym)
+ x_out2 = output.eval({x_sym: x})
+ assert x_out2.shape == x_out.shape
+ np.testing.assert_allclose(x_out2, x_out, rtol=1e-5, atol=1e-5)
+
+ def test_transform_thin_plate_spline_identity(self):
+ from lasagne.layers import InputLayer, TPSTransformerLayer
+ from lasagne.utils import floatX
+ from theano.tensor import constant
+ batchsize = 5
+ num_control_points = 16
+ dest_offset = np.zeros(shape=(batchsize, 2*num_control_points))
+ l_in = InputLayer((batchsize, 3, 28, 28))
+ l_loc = InputLayer((batchsize, 2*num_control_points))
+ layer = TPSTransformerLayer(
+ l_in, l_loc, control_points=num_control_points
+ )
+ inputs = floatX(np.arange(np.prod(l_in.shape)).reshape(l_in.shape))
+ outputs = layer.get_output_for([constant(inputs),
+ constant(dest_offset)]).eval()
+ np.testing.assert_allclose(inputs, outputs, atol=5e-4)
+
+ def test_transform_thin_plate_spline_shift(self):
+ from lasagne.layers import InputLayer, TPSTransformerLayer
+ from theano.tensor import constant
+ batchsize = 5
+ num_control_points = 16
+ dest_offset = np.ones(shape=(batchsize, 2*num_control_points))
+ l_in = InputLayer((batchsize, 3, 28, 28))
+ l_loc = InputLayer((batchsize, 2*num_control_points))
+ layer = TPSTransformerLayer(
+ l_in, l_loc, control_points=num_control_points
+ )
+ image = np.zeros(shape=(28, 28))
+ image[[0, -1], :] = 1
+ image[:, [0, -1]] = 1
+ inputs = np.tile(image, (batchsize, 3, 1, 1))
+ shifted_input = np.ones(shape=(28, 28))
+ shifted_input[:13, :13] = 0
+ shifted_input[13, :13] = 0.50000271
+ shifted_input[:13, 13] = 0.50000271
+ shifted_input[13, 13] = 0.75000271
+ shifted_input = np.tile(shifted_input, (batchsize, 3, 1, 1))
+ outputs = layer.get_output_for([constant(inputs),
+ constant(dest_offset)]).eval()
+ np.testing.assert_allclose(shifted_input,
+ outputs, atol=1e-5)
+
+
+class TestParametricRectifierLayer:
+ @pytest.fixture
+ def ParametricRectifierLayer(self):
+ from lasagne.layers.special import ParametricRectifierLayer
+ return ParametricRectifierLayer
+
+ @pytest.fixture
+ def init_alpha(self):
+ # initializer for a tensor of unique values
+ return lambda shape: (np.arange(np.prod(shape)).reshape(shape)) \
+ / np.prod(shape)
+
+ def test_alpha_init(self, ParametricRectifierLayer, init_alpha):
+ input_shape = (None, 3, 28, 28)
+ # default: alphas only over 2nd axis
+ layer = ParametricRectifierLayer(input_shape, alpha=init_alpha)
+ alpha = layer.alpha
+ assert layer.shared_axes == (0, 2, 3)
+ assert alpha.get_value().shape == (3, )
+ assert np.allclose(alpha.get_value(), init_alpha((3, )))
+
+ # scalar alpha
+ layer = ParametricRectifierLayer(input_shape, alpha=init_alpha,
+ shared_axes='all')
+ alpha = layer.alpha
+ assert layer.shared_axes == (0, 1, 2, 3)
+ assert alpha.get_value().shape == ()
+ assert np.allclose(alpha.get_value(), init_alpha((1,)))
+
+ # alphas shared over the 1st axis
+ layer = ParametricRectifierLayer(input_shape, alpha=init_alpha,
+ shared_axes=0)
+ alpha = layer.alpha
+ assert layer.shared_axes == (0,)
+ assert alpha.get_value().shape == (3, 28, 28)
+ assert np.allclose(alpha.get_value(), init_alpha((3, 28, 28)))
+
+ # alphas shared over the 1st and 4th axes
+ layer = ParametricRectifierLayer(input_shape, alpha=init_alpha,
+ shared_axes=(0, 3))
+ alpha = layer.alpha
+ assert layer.shared_axes == (0, 3)
+ assert alpha.get_value().shape == (3, 28)
+ assert np.allclose(alpha.get_value(), init_alpha((3, 28)))
+
+ def test_undefined_shape(self, ParametricRectifierLayer):
+ with pytest.raises(ValueError):
+ ParametricRectifierLayer((None, 3, 28, 28), shared_axes=(1, 2, 3))
+
+ def test_get_output_for(self, ParametricRectifierLayer, init_alpha):
+ input_shape = (3, 3, 28, 28)
+ # random input tensor
+ input = np.random.randn(*input_shape).astype(theano.config.floatX)
+
+ # default: alphas shared only along 2nd axis
+ layer = ParametricRectifierLayer(input_shape, alpha=init_alpha)
+ alpha_v = layer.alpha.get_value()
+ expected = np.maximum(input, 0) + np.minimum(input, 0) * \
+ alpha_v[None, :, None, None]
+ assert np.allclose(layer.get_output_for(input).eval(), expected)
+
+ # scalar alpha
+ layer = ParametricRectifierLayer(input_shape, alpha=init_alpha,
+ shared_axes='all')
+ alpha_v = layer.alpha.get_value()
+ expected = np.maximum(input, 0) + np.minimum(input, 0) * alpha_v
+ assert np.allclose(layer.get_output_for(input).eval(), expected)
+
+ # alphas shared over the 1st axis
+ layer = ParametricRectifierLayer(input_shape, alpha=init_alpha,
+ shared_axes=0)
+ alpha_v = layer.alpha.get_value()
+ expected = np.maximum(input, 0) + np.minimum(input, 0) * \
+ alpha_v[None, :, :, :]
+ assert np.allclose(layer.get_output_for(input).eval(), expected)
+
+ # alphas shared over the 1st and 4th axes
+ layer = ParametricRectifierLayer(input_shape, shared_axes=(0, 3),
+ alpha=init_alpha)
+ alpha_v = layer.alpha.get_value()
+ expected = np.maximum(input, 0) + np.minimum(input, 0) * \
+ alpha_v[None, :, :, None]
+ assert np.allclose(layer.get_output_for(input).eval(), expected)
+
+ def test_prelu(self, init_alpha):
+ import lasagne
+ input_shape = (3, 28)
+ input = np.random.randn(*input_shape).astype(theano.config.floatX)
+
+ l_in = lasagne.layers.input.InputLayer(input_shape)
+ l_dense = lasagne.layers.dense.DenseLayer(l_in, num_units=100)
+ l_prelu = lasagne.layers.prelu(l_dense, alpha=init_alpha)
+ output = lasagne.layers.get_output(l_prelu, input)
+
+ assert l_dense.nonlinearity == lasagne.nonlinearities.identity
+
+ W = l_dense.W.get_value()
+ b = l_dense.b.get_value()
+ alpha_v = l_prelu.alpha.get_value()
+ expected = np.dot(input, W) + b
+ expected = np.maximum(expected, 0) + \
+ np.minimum(expected, 0) * alpha_v
+ assert np.allclose(output.eval(), expected)
+
+
+class TestRandomizedRectifierLayer:
+ @pytest.fixture
+ def RandomizedRectifierLayer(self):
+ from lasagne.layers.special import RandomizedRectifierLayer
+ return RandomizedRectifierLayer
+
+ def test_high_low(self, RandomizedRectifierLayer):
+ with pytest.raises(ValueError):
+ RandomizedRectifierLayer((None, 3, 28, 28), lower=0.9, upper=0.1)
+
+ def test_nomod_positive(self, RandomizedRectifierLayer):
+ input = np.ones((3, 3, 28, 28)).astype(theano.config.floatX)
+ layer = RandomizedRectifierLayer(input.shape)
+ out = layer.get_output_for(input).eval()
+ assert np.allclose(out, 1.0)
+
+ def test_low_eq_high(self, RandomizedRectifierLayer):
+ input = np.ones((3, 3, 28, 28)).astype(theano.config.floatX) * -1
+ layer = RandomizedRectifierLayer(input.shape, lower=0.5, upper=0.5)
+ out = layer.get_output_for(theano.tensor.constant(input)).eval()
+ assert np.allclose(out, -0.5)
+
+ def test_deterministic(self, RandomizedRectifierLayer):
+ input = np.ones((3, 3, 28, 28)).astype(theano.config.floatX) * -1
+ layer = RandomizedRectifierLayer(input.shape, lower=0.4, upper=0.6)
+ out = layer.get_output_for(theano.tensor.constant(input),
+ deterministic=True).eval()
+ assert np.allclose(out, -0.5)
+
+ def test_dim_None(self, RandomizedRectifierLayer):
+ import lasagne
+ l_in = lasagne.layers.input.InputLayer((None, 3, 28, 28))
+ layer = RandomizedRectifierLayer(l_in)
+ input = np.ones((3, 3, 28, 28)).astype(theano.config.floatX)
+ out = layer.get_output_for(input).eval()
+ assert np.allclose(out, 1.0)
+
+ def assert_between(self, layer, input, output):
+ slopes = output / input
+ slopes = slopes[input < 0]
+ assert slopes.min() >= layer.lower
+ assert slopes.max() <= layer.upper
+ assert slopes.var() > 0
+
+ def test_get_output_for(self, RandomizedRectifierLayer):
+ input_shape = (3, 3, 28, 28)
+
+ # ensure slope never exceeds [lower,upper)
+ input = np.random.randn(*input_shape).astype(theano.config.floatX)
+ layer = RandomizedRectifierLayer(input_shape, shared_axes=0)
+ self.assert_between(layer, input, layer.get_output_for(input).eval())
+
+ # from here on, we want to check parameter sharing
+ # this is easier to check if the input is all ones
+ input = np.ones(input_shape).astype(theano.config.floatX) * -1
+
+ # default: parameters shared along all but 2nd axis
+ layer = RandomizedRectifierLayer(input_shape)
+ out = layer.get_output_for(input).eval()
+ assert [
+ np.allclose(out.var(axis=a), 0)
+ for a in range(4)
+ ] == [True, False, True, True]
+
+ # share across all axes (single slope)
+ layer = RandomizedRectifierLayer(input_shape, shared_axes='all')
+ out = layer.get_output_for(input).eval()
+ assert [
+ np.allclose(out.var(axis=a), 0)
+ for a in range(4)
+ ] == [True, True, True, True]
+
+ # share across 1st axis
+ layer = RandomizedRectifierLayer(input_shape, shared_axes=0)
+ out = layer.get_output_for(input).eval()
+ assert [
+ np.allclose(out.var(axis=a), 0)
+ for a in range(4)
+ ] == [True, False, False, False]
+
+ # share across 1st and 4th axes
+ layer = RandomizedRectifierLayer(input_shape, shared_axes=(0, 3))
+ out = layer.get_output_for(input).eval()
+ assert [
+ np.allclose(out.var(axis=a), 0)
+ for a in range(4)
+ ] == [True, False, False, True]
+
+ def test_rrelu(self):
+ import lasagne
+ input_shape = (3, 28)
+ input = np.random.randn(*input_shape).astype(theano.config.floatX)
+
+ l_in = lasagne.layers.input.InputLayer(input_shape)
+ l_dense = lasagne.layers.dense.DenseLayer(l_in, num_units=100)
+ l_rrelu = lasagne.layers.rrelu(l_dense)
+ output = lasagne.layers.get_output(l_rrelu, input)
+
+ assert l_dense.nonlinearity == lasagne.nonlinearities.identity
+
+ W = l_dense.W.get_value()
+ b = l_dense.b.get_value()
+ self.assert_between(l_rrelu, np.dot(input, W) + b, output.eval())
diff --git a/lasagne/tests/test_examples.py b/lasagne/tests/test_examples.py
new file mode 100644
index 0000000..3d64c44
--- /dev/null
+++ b/lasagne/tests/test_examples.py
@@ -0,0 +1,38 @@
+from glob import glob
+from importlib import import_module
+from os.path import basename
+from os.path import dirname
+from os.path import join
+from os.path import splitext
+import sys
+
+import pytest
+
+
+EXAMPLES_DIR = join(dirname(dirname(dirname(__file__))), 'examples')
+
+
+def _example_modules():
+ paths = glob(join(EXAMPLES_DIR, "*py"))
+ return [splitext(basename(path))[0] for path in paths]
+
+
+ at pytest.fixture
+def example(request):
+ sys.path.insert(0, EXAMPLES_DIR)
+ request.addfinalizer(lambda: sys.path.remove(EXAMPLES_DIR))
+
+
+ at pytest.mark.slow
+ at pytest.mark.parametrize("module_name", _example_modules())
+def test_example(example, module_name):
+ try:
+ main = getattr(import_module(module_name), 'main')
+ except ImportError as e:
+ skip_exceptions = ["requires a GPU", "pylearn2", "dnn not available"]
+ if any([text in str(e) for text in skip_exceptions]):
+ pytest.skip(e)
+ else:
+ raise
+
+ main(num_epochs=1) # run the example for one iteration
diff --git a/lasagne/tests/test_init.py b/lasagne/tests/test_init.py
new file mode 100644
index 0000000..2933014
--- /dev/null
+++ b/lasagne/tests/test_init.py
@@ -0,0 +1,351 @@
+import pytest
+
+
+def test_initializer_sample():
+ from lasagne.init import Initializer
+
+ with pytest.raises(NotImplementedError):
+ Initializer().sample((100, 100))
+
+
+def test_shape():
+ from lasagne.init import Initializer
+
+ # Assert that all `Initializer` sublasses return the shape that
+ # we've asked for in `sample`:
+ for klass in Initializer.__subclasses__():
+ if len(klass.__subclasses__()):
+ # check HeNormal, HeUniform, GlorotNormal, GlorotUniform
+ for sub_klass in klass.__subclasses__():
+ assert sub_klass().sample((12, 23)).shape == (12, 23)
+ else:
+ assert klass().sample((12, 23)).shape == (12, 23)
+
+
+def test_specified_rng():
+ from lasagne.random import get_rng, set_rng
+ from lasagne.init import (Normal, Uniform, GlorotNormal,
+ GlorotUniform, Sparse, Orthogonal)
+
+ from numpy.random import RandomState
+ from numpy import allclose
+
+ seed = 123456789
+ rng = get_rng()
+
+ for init_class in [Normal, Uniform, GlorotNormal,
+ GlorotUniform, Sparse, Orthogonal]:
+ set_rng(RandomState(seed))
+ sample1 = init_class().sample((100, 100))
+ set_rng(RandomState(seed))
+ sample2 = init_class().sample((100, 100))
+ set_rng(rng) # reset to original RNG for other tests
+ assert allclose(sample1, sample2),\
+ ("random initialization was inconsistent for {}"
+ .format(init_class.__name__))
+
+
+def test_normal():
+ from lasagne.init import Normal
+
+ sample = Normal().sample((100, 200))
+ assert -0.001 < sample.mean() < 0.001
+ assert 0.009 < sample.std() < 0.011
+
+
+def test_uniform_range_as_number():
+ from lasagne.init import Uniform
+
+ sample = Uniform(1.0).sample((300, 400))
+ assert sample.shape == (300, 400)
+ assert -1.0 <= sample.min() < -0.9
+ assert 0.9 < sample.max() <= 1.0
+
+
+def test_uniform_range_as_range():
+ from lasagne.init import Uniform
+
+ sample = Uniform((0.0, 1.0)).sample((300, 400))
+ assert sample.shape == (300, 400)
+ assert 0.0 <= sample.min() < 0.1
+ assert 0.9 < sample.max() <= 1.0
+
+
+def test_uniform_mean_std():
+ from lasagne.init import Uniform
+ sample = Uniform(std=1.0, mean=5.0).sample((300, 400))
+ assert 4.9 < sample.mean() < 5.1
+ assert 0.9 < sample.std() < 1.1
+
+
+def test_glorot_normal():
+ from lasagne.init import GlorotNormal
+
+ sample = GlorotNormal().sample((100, 100))
+ assert -0.01 < sample.mean() < 0.01
+ assert 0.09 < sample.std() < 0.11
+
+
+def test_glorot_1d_not_supported():
+ from lasagne.init import GlorotNormal
+
+ with pytest.raises(RuntimeError):
+ GlorotNormal().sample((100,))
+
+
+def test_glorot_normal_receptive_field():
+ from lasagne.init import GlorotNormal
+
+ sample = GlorotNormal().sample((50, 50, 2))
+ assert -0.01 < sample.mean() < 0.01
+ assert 0.09 < sample.std() < 0.11
+
+
+def test_glorot_normal_gain():
+ from lasagne.init import GlorotNormal
+
+ sample = GlorotNormal(gain=10.0).sample((100, 100))
+ assert -0.1 < sample.mean() < 0.1
+ assert 0.9 < sample.std() < 1.1
+
+ sample = GlorotNormal(gain='relu').sample((100, 100))
+ assert -0.01 < sample.mean() < 0.01
+ assert 0.132 < sample.std() < 0.152
+
+
+def test_glorot_normal_c01b():
+ from lasagne.init import GlorotNormal
+
+ sample = GlorotNormal(c01b=True).sample((25, 2, 2, 25))
+ assert -0.01 < sample.mean() < 0.01
+ assert 0.09 < sample.std() < 0.11
+
+
+def test_glorot_normal_c01b_4d_only():
+ from lasagne.init import GlorotNormal
+
+ with pytest.raises(RuntimeError):
+ GlorotNormal(c01b=True).sample((100,))
+
+ with pytest.raises(RuntimeError):
+ GlorotNormal(c01b=True).sample((100, 100))
+
+ with pytest.raises(RuntimeError):
+ GlorotNormal(c01b=True).sample((100, 100, 100))
+
+
+def test_glorot_uniform():
+ from lasagne.init import GlorotUniform
+
+ sample = GlorotUniform().sample((150, 450))
+ assert -0.1 <= sample.min() < -0.09
+ assert 0.09 < sample.max() <= 0.1
+
+
+def test_glorot_uniform_receptive_field():
+ from lasagne.init import GlorotUniform
+
+ sample = GlorotUniform().sample((150, 150, 2))
+ assert -0.10 <= sample.min() < -0.09
+ assert 0.09 < sample.max() <= 0.10
+
+
+def test_glorot_uniform_gain():
+ from lasagne.init import GlorotUniform
+
+ sample = GlorotUniform(gain=10.0).sample((150, 450))
+ assert -1.0 <= sample.min() < -0.9
+ assert 0.9 < sample.max() <= 1.0
+
+ sample = GlorotUniform(gain='relu').sample((100, 100))
+ assert -0.01 < sample.mean() < 0.01
+ assert 0.132 < sample.std() < 0.152
+
+
+def test_glorot_uniform_c01b():
+ from lasagne.init import GlorotUniform
+
+ sample = GlorotUniform(c01b=True).sample((75, 2, 2, 75))
+ assert -0.1 <= sample.min() < -0.09
+ assert 0.09 < sample.max() <= 0.1
+
+
+def test_glorot_uniform_c01b_4d_only():
+ from lasagne.init import GlorotUniform
+
+ with pytest.raises(RuntimeError):
+ GlorotUniform(c01b=True).sample((100,))
+
+ with pytest.raises(RuntimeError):
+ GlorotUniform(c01b=True).sample((100, 100))
+
+ with pytest.raises(RuntimeError):
+ GlorotUniform(c01b=True).sample((100, 100, 100))
+
+
+def test_he_normal():
+ from lasagne.init import HeNormal
+
+ sample = HeNormal().sample((100, 100))
+ assert -0.01 < sample.mean() < 0.01
+ assert 0.09 < sample.std() < 0.11
+
+
+def test_he_1d_not_supported():
+ from lasagne.init import HeNormal
+
+ with pytest.raises(RuntimeError):
+ HeNormal().sample((100,))
+
+
+def test_he_normal_receptive_field():
+ from lasagne.init import HeNormal
+
+ sample = HeNormal().sample((50, 50, 2))
+ assert -0.01 < sample.mean() < 0.01
+ assert 0.09 < sample.std() < 0.11
+
+
+def test_he_normal_gain():
+ from lasagne.init import HeNormal
+
+ sample = HeNormal(gain=10.0).sample((100, 100))
+ assert -0.1 < sample.mean() < 0.1
+ assert 0.9 < sample.std() < 1.1
+
+ sample = HeNormal(gain='relu').sample((200, 50))
+ assert -0.1 < sample.mean() < 0.1
+ assert 0.07 < sample.std() < 0.12
+
+
+def test_he_normal_c01b():
+ from lasagne.init import HeNormal
+
+ sample = HeNormal(c01b=True).sample((25, 2, 2, 25))
+ assert -0.01 < sample.mean() < 0.01
+ assert 0.09 < sample.std() < 0.11
+
+
+def test_he_normal_c01b_4d_only():
+ from lasagne.init import HeNormal
+
+ with pytest.raises(RuntimeError):
+ HeNormal(c01b=True).sample((100,))
+
+ with pytest.raises(RuntimeError):
+ HeNormal(c01b=True).sample((100, 100))
+
+ with pytest.raises(RuntimeError):
+ HeNormal(c01b=True).sample((100, 100, 100))
+
+
+def test_he_uniform():
+ from lasagne.init import HeUniform
+
+ sample = HeUniform().sample((300, 200))
+ assert -0.1 <= sample.min() < -0.09
+ assert 0.09 < sample.max() <= 0.1
+
+
+def test_he_uniform_receptive_field():
+ from lasagne.init import HeUniform
+
+ sample = HeUniform().sample((150, 150, 2))
+ assert -0.10 <= sample.min() < -0.09
+ assert 0.09 < sample.max() <= 0.10
+
+
+def test_he_uniform_gain():
+ from lasagne.init import HeUniform
+
+ sample = HeUniform(gain=10.0).sample((300, 200))
+ assert -1.0 <= sample.min() < -0.9
+ assert 0.9 < sample.max() <= 1.0
+
+ sample = HeUniform(gain='relu').sample((100, 100))
+ assert -0.1 < sample.mean() < 0.1
+ assert 0.1 < sample.std() < 0.2
+
+
+def test_he_uniform_c01b():
+ from lasagne.init import HeUniform
+
+ sample = HeUniform(c01b=True).sample((75, 2, 2, 75))
+ assert -0.1 <= sample.min() < -0.09
+ assert 0.09 < sample.max() <= 0.1
+
+
+def test_he_uniform_c01b_4d_only():
+ from lasagne.init import HeUniform
+
+ with pytest.raises(RuntimeError):
+ HeUniform(c01b=True).sample((100,))
+
+ with pytest.raises(RuntimeError):
+ HeUniform(c01b=True).sample((100, 100))
+
+ with pytest.raises(RuntimeError):
+ HeUniform(c01b=True).sample((100, 100, 100))
+
+
+def test_constant():
+ from lasagne.init import Constant
+
+ sample = Constant(1.0).sample((10, 20))
+ assert (sample == 1.0).all()
+
+
+def test_sparse():
+ from lasagne.init import Sparse
+
+ sample = Sparse(sparsity=0.1).sample((10, 20))
+ assert (sample != 0.0).sum() == (10 * 20) * 0.1
+
+
+def test_sparse_1d_not_supported():
+ from lasagne.init import Sparse
+
+ with pytest.raises(RuntimeError):
+ Sparse().sample((100,))
+
+
+def test_orthogonal():
+ import numpy as np
+ from lasagne.init import Orthogonal
+
+ sample = Orthogonal().sample((100, 200))
+ assert np.allclose(np.dot(sample, sample.T), np.eye(100), atol=1e-6)
+
+ sample = Orthogonal().sample((200, 100))
+ assert np.allclose(np.dot(sample.T, sample), np.eye(100), atol=1e-6)
+
+
+def test_orthogonal_gain():
+ import numpy as np
+ from lasagne.init import Orthogonal
+
+ gain = 2
+ sample = Orthogonal(gain).sample((100, 200))
+ assert np.allclose(np.dot(sample, sample.T), gain * gain * np.eye(100),
+ atol=1e-6)
+
+ gain = np.sqrt(2)
+ sample = Orthogonal('relu').sample((100, 200))
+ assert np.allclose(np.dot(sample, sample.T), gain * gain * np.eye(100),
+ atol=1e-6)
+
+
+def test_orthogonal_multi():
+ import numpy as np
+ from lasagne.init import Orthogonal
+
+ sample = Orthogonal().sample((100, 50, 80))
+ sample = sample.reshape(100, 50*80)
+ assert np.allclose(np.dot(sample, sample.T), np.eye(100), atol=1e-6)
+
+
+def test_orthogonal_1d_not_supported():
+ from lasagne.init import Orthogonal
+
+ with pytest.raises(RuntimeError):
+ Orthogonal().sample((100,))
diff --git a/lasagne/tests/test_nonlinearities.py b/lasagne/tests/test_nonlinearities.py
new file mode 100644
index 0000000..3d906c1
--- /dev/null
+++ b/lasagne/tests/test_nonlinearities.py
@@ -0,0 +1,69 @@
+import pytest
+import numpy as np
+import theano.tensor as T
+
+
+class TestNonlinearities(object):
+ def linear(self, x):
+ return x
+
+ def rectify(self, x):
+ return x * (x > 0)
+
+ def leaky_rectify(self, x):
+ return x * (x > 0) + 0.01 * x * (x < 0)
+
+ def leaky_rectify_0(self, x):
+ return self.rectify(x)
+
+ def elu(self, x, alpha=1):
+ return np.where(x > 0, x, alpha * (np.exp(x) - 1))
+
+ def softplus(self, x):
+ return np.log1p(np.exp(x))
+
+ def sigmoid(self, x):
+ return 1 / (1 + np.exp(-x))
+
+ def tanh(self, x):
+ return np.tanh(x)
+
+ def scaled_tanh(self, x):
+ return np.tanh(x)
+
+ def scaled_tanh_p(self, x):
+ return 2.27 * np.tanh(0.5 * x)
+
+ def softmax(self, x):
+ return (np.exp(x).T / np.exp(x).sum(-1)).T
+
+ @pytest.mark.parametrize('nonlinearity',
+ ['linear', 'rectify',
+ 'leaky_rectify', 'elu', 'sigmoid',
+ 'tanh', 'scaled_tanh',
+ 'softmax', 'leaky_rectify_0',
+ 'scaled_tanh_p', 'softplus'])
+ def test_nonlinearity(self, nonlinearity):
+ import lasagne.nonlinearities
+
+ if nonlinearity == 'leaky_rectify_0':
+ from lasagne.nonlinearities import LeakyRectify
+ theano_nonlinearity = LeakyRectify(leakiness=0)
+ elif nonlinearity == 'scaled_tanh':
+ from lasagne.nonlinearities import ScaledTanH
+ theano_nonlinearity = ScaledTanH()
+ elif nonlinearity == 'scaled_tanh_p':
+ from lasagne.nonlinearities import ScaledTanH
+ theano_nonlinearity = ScaledTanH(scale_in=0.5, scale_out=2.27)
+ else:
+ theano_nonlinearity = getattr(lasagne.nonlinearities,
+ nonlinearity)
+ np_nonlinearity = getattr(self, nonlinearity)
+
+ X = T.matrix()
+ X0 = lasagne.utils.floatX(np.random.uniform(-3, 3, (10, 10)))
+
+ theano_result = theano_nonlinearity(X).eval({X: X0})
+ np_result = np_nonlinearity(X0)
+
+ assert np.allclose(theano_result, np_result)
diff --git a/lasagne/tests/test_objectives.py b/lasagne/tests/test_objectives.py
new file mode 100644
index 0000000..63717e2
--- /dev/null
+++ b/lasagne/tests/test_objectives.py
@@ -0,0 +1,236 @@
+import numpy as np
+import theano
+import pytest
+
+
+def test_binary_crossentropy():
+ # symbolic version
+ from lasagne.objectives import binary_crossentropy
+ p, t = theano.tensor.matrices('pt')
+ c = binary_crossentropy(p, t)
+ # numeric version
+ floatX = theano.config.floatX
+ predictions = np.random.rand(10, 20).astype(floatX)
+ targets = np.random.rand(10, 20).astype(floatX)
+ crossent = (- targets * np.log(predictions) -
+ (1-targets) * np.log(1-predictions))
+ # compare
+ assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
+
+
+def test_categorical_crossentropy():
+ # symbolic version
+ from lasagne.objectives import categorical_crossentropy
+ p, t = theano.tensor.matrices('pt')
+ c = categorical_crossentropy(p, t)
+ # numeric version
+ floatX = theano.config.floatX
+ predictions = np.random.rand(10, 20).astype(floatX)
+ predictions /= predictions.sum(axis=1, keepdims=True)
+ targets = np.random.rand(10, 20).astype(floatX)
+ targets /= targets.sum(axis=1, keepdims=True)
+ crossent = -(targets * np.log(predictions)).sum(axis=-1)
+ # compare
+ assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
+
+
+def test_categorical_crossentropy_onehot():
+ # symbolic version
+ from lasagne.objectives import categorical_crossentropy
+ p = theano.tensor.matrix('p')
+ t = theano.tensor.ivector('t') # correct class per item
+ c = categorical_crossentropy(p, t)
+ # numeric version
+ floatX = theano.config.floatX
+ predictions = np.random.rand(10, 20).astype(floatX)
+ predictions /= predictions.sum(axis=1, keepdims=True)
+ targets = np.random.randint(20, size=10).astype(np.uint8)
+ crossent = -np.log(predictions[np.arange(10), targets])
+ # compare
+ assert np.allclose(crossent, c.eval({p: predictions, t: targets}))
+
+
+def test_squared_error():
+ # symbolic version
+ from lasagne.objectives import squared_error
+ a, b = theano.tensor.matrices('ab')
+ c = squared_error(a, b)
+ # numeric version
+ floatX = theano.config.floatX
+ x = np.random.randn(10, 20).astype(floatX)
+ y = np.random.randn(10, 20).astype(floatX)
+ z = (x - y)**2
+ # compare
+ assert np.allclose(z, c.eval({a: x, b: y}))
+
+
+def test_aggregate_mean():
+ from lasagne.objectives import aggregate
+ x = theano.tensor.matrix('x')
+ assert theano.gof.graph.is_same_graph(aggregate(x), x.mean())
+ assert theano.gof.graph.is_same_graph(aggregate(x, mode='mean'), x.mean())
+
+
+def test_aggregate_sum():
+ from lasagne.objectives import aggregate
+ x = theano.tensor.matrix('x')
+ assert theano.gof.graph.is_same_graph(aggregate(x, mode='sum'), x.sum())
+
+
+def test_aggregate_weighted_mean():
+ from lasagne.objectives import aggregate
+ x = theano.tensor.matrix('x')
+ w = theano.tensor.matrix('w')
+ assert theano.gof.graph.is_same_graph(aggregate(x, w), (x * w).mean())
+ assert theano.gof.graph.is_same_graph(aggregate(x, w, mode='mean'),
+ (x * w).mean())
+
+
+def test_aggregate_weighted_sum():
+ from lasagne.objectives import aggregate
+ x = theano.tensor.matrix('x')
+ w = theano.tensor.matrix('w')
+ assert theano.gof.graph.is_same_graph(aggregate(x, w, mode='sum'),
+ (x * w).sum())
+
+
+def test_aggregate_weighted_normalized_sum():
+ from lasagne.objectives import aggregate
+ x = theano.tensor.matrix('x')
+ w = theano.tensor.matrix('w')
+ assert theano.gof.graph.is_same_graph(aggregate(x, w, 'normalized_sum'),
+ (x * w).sum() / w.sum())
+
+
+def test_aggregate_invalid():
+ from lasagne.objectives import aggregate
+ with pytest.raises(ValueError) as exc:
+ aggregate(theano.tensor.matrix(), mode='asdf')
+ assert 'mode must be' in exc.value.args[0]
+ with pytest.raises(ValueError) as exc:
+ aggregate(theano.tensor.matrix(), mode='normalized_sum')
+ assert 'require weights' in exc.value.args[0]
+
+
+def test_binary_hinge_loss():
+ from lasagne.objectives import binary_hinge_loss
+ from lasagne.nonlinearities import rectify
+ p = theano.tensor.vector('p')
+ t = theano.tensor.ivector('t')
+ c = binary_hinge_loss(p, t)
+ # numeric version
+ floatX = theano.config.floatX
+ predictions = np.random.rand(10).astype(floatX)
+ targets = np.random.random_integers(0, 1, (10,)).astype("int8")
+ hinge = rectify(1 - predictions * (2 * targets - 1))
+ # compare
+ assert np.allclose(hinge, c.eval({p: predictions, t: targets}))
+
+
+def test_binary_hinge_loss_not_binary_targets():
+ from lasagne.objectives import binary_hinge_loss
+ from lasagne.nonlinearities import rectify
+ p = theano.tensor.vector('p')
+ t = theano.tensor.ivector('t')
+ c = binary_hinge_loss(p, t, binary=False)
+ # numeric version
+ floatX = theano.config.floatX
+ predictions = np.random.rand(10, ).astype(floatX)
+ targets = np.random.random_integers(0, 1, (10, )).astype("int8")
+ targets = 2 * targets - 1
+ hinge = rectify(1 - predictions * targets)
+ # compare
+ assert np.allclose(hinge, c.eval({p: predictions, t: targets}))
+
+
+def test_multiclass_hinge_loss():
+ from lasagne.objectives import multiclass_hinge_loss
+ from lasagne.nonlinearities import rectify
+ p = theano.tensor.matrix('p')
+ t = theano.tensor.ivector('t')
+ c = multiclass_hinge_loss(p, t)
+ # numeric version
+ floatX = theano.config.floatX
+ predictions = np.random.rand(10, 20).astype(floatX)
+ targets = np.random.random_integers(0, 19, (10,)).astype("int8")
+ one_hot = np.zeros((10, 20))
+ one_hot[np.arange(10), targets] = 1
+ correct = predictions[one_hot > 0]
+ rest = predictions[one_hot < 1].reshape((10, 19))
+ rest = np.max(rest, axis=1)
+ hinge = rectify(1 + rest - correct)
+ # compare
+ assert np.allclose(hinge, c.eval({p: predictions, t: targets}))
+
+
+def test_multiclass_hinge_loss_invalid():
+ from lasagne.objectives import multiclass_hinge_loss
+ with pytest.raises(TypeError) as exc:
+ multiclass_hinge_loss(theano.tensor.vector(),
+ theano.tensor.matrix())
+ assert 'rank mismatch' in exc.value.args[0]
+
+
+def test_binary_accuracy():
+ from lasagne.objectives import binary_accuracy
+ p = theano.tensor.vector('p')
+ t = theano.tensor.ivector('t')
+ c = binary_accuracy(p, t)
+ # numeric version
+ floatX = theano.config.floatX
+ predictions = np.random.rand(10, ).astype(floatX) > 0.5
+ targets = np.random.random_integers(0, 1, (10,)).astype("int8")
+ accuracy = predictions == targets
+ # compare
+ assert np.allclose(accuracy, c.eval({p: predictions, t: targets}))
+
+
+def test_categorical_accuracy():
+ from lasagne.objectives import categorical_accuracy
+ p = theano.tensor.matrix('p')
+ t = theano.tensor.ivector('t')
+ c = categorical_accuracy(p, t)
+ # numeric version
+ floatX = theano.config.floatX
+ predictions = np.random.rand(100, 5).astype(floatX)
+ cls_predictions = np.argmax(predictions, axis=1)
+ targets = np.random.random_integers(0, 4, (100,)).astype("int8")
+ accuracy = cls_predictions == targets
+ # compare
+ assert np.allclose(accuracy, c.eval({p: predictions, t: targets}))
+ one_hot = np.zeros((100, 5)).astype("int8")
+ one_hot[np.arange(100), targets] = 1
+ t = theano.tensor.imatrix('t')
+ c = categorical_accuracy(p, t)
+ assert np.allclose(accuracy, c.eval({p: predictions, t: one_hot}))
+
+
+def test_categorical_accuracy_top_k():
+ from lasagne.objectives import categorical_accuracy
+ p = theano.tensor.matrix('p')
+ t = theano.tensor.ivector('t')
+ top_k = 4
+ c = categorical_accuracy(p, t, top_k=top_k)
+ # numeric version
+ floatX = theano.config.floatX
+ predictions = np.random.rand(10, 20).astype(floatX)
+ cls_predictions = np.argsort(predictions, axis=1).astype("int8")
+ # (construct targets such that top-1 to top-10 predictions are in there)
+ targets = cls_predictions[np.arange(10), -np.random.permutation(10)]
+ top_predictions = cls_predictions[:, -top_k:]
+ accuracy = np.any(top_predictions == targets[:, np.newaxis], axis=1)
+ # compare
+ assert np.allclose(accuracy, c.eval({p: predictions, t: targets}))
+ one_hot = np.zeros((10, 20)).astype("int8")
+ one_hot[np.arange(10), targets] = 1
+ t = theano.tensor.imatrix('t')
+ c = categorical_accuracy(p, t, top_k=top_k)
+ assert np.allclose(accuracy, c.eval({p: predictions, t: one_hot}))
+
+
+def test_categorial_accuracy_invalid():
+ from lasagne.objectives import categorical_accuracy
+ with pytest.raises(TypeError) as exc:
+ categorical_accuracy(theano.tensor.vector(),
+ theano.tensor.matrix())
+ assert 'rank mismatch' in exc.value.args[0]
diff --git a/lasagne/tests/test_regularization.py b/lasagne/tests/test_regularization.py
new file mode 100644
index 0000000..ce98870
--- /dev/null
+++ b/lasagne/tests/test_regularization.py
@@ -0,0 +1,99 @@
+import pytest
+import numpy as np
+import theano.tensor as T
+import lasagne
+
+from collections import OrderedDict
+from theano.scan_module.scan_utils import equal_computations
+from mock import Mock
+
+
+class TestRegularizationPenalties(object):
+ def l1(self, x):
+ return np.abs(x).sum()
+
+ def l2(self, x):
+ return (x**2).sum()
+
+ @pytest.mark.parametrize('penalty',
+ ['l1', 'l2'])
+ def test_penalty(self, penalty):
+ np_penalty = getattr(self, penalty)
+ theano_penalty = getattr(lasagne.regularization, penalty)
+
+ X = T.matrix()
+ X0 = lasagne.utils.floatX(np.random.uniform(-3, 3, (10, 10)))
+
+ theano_result = theano_penalty(X).eval({X: X0})
+ np_result = np_penalty(X0)
+
+ assert np.allclose(theano_result, np_result)
+
+
+class TestRegularizationHelpers(object):
+ @pytest.fixture
+ def layers(self):
+ l_1 = lasagne.layers.InputLayer((10,))
+ l_2 = lasagne.layers.DenseLayer(l_1, num_units=20)
+ l_3 = lasagne.layers.DenseLayer(l_2, num_units=30)
+ return l_1, l_2, l_3
+
+ def test_apply_penalty(self):
+ from lasagne.regularization import apply_penalty, l2
+ A = T.vector()
+ B = T.matrix()
+
+ assert apply_penalty([], l2) == 0
+
+ assert equal_computations([apply_penalty(A, l2)],
+ [l2(A)])
+
+ assert equal_computations([apply_penalty([A, B], l2)],
+ [sum([l2(A), l2(B)])])
+
+ def test_regularize_layer_params_single_layer(self, layers):
+ from lasagne.regularization import regularize_layer_params
+ l_1, l_2, l_3 = layers
+
+ penalty = Mock(return_value=0)
+ loss = regularize_layer_params(l_2, penalty)
+
+ assert penalty.call_count == 1
+ penalty.assert_any_call(l_2.W)
+
+ def test_regularize_layer_params_multiple_layers(self, layers):
+ from lasagne.regularization import regularize_layer_params
+ l_1, l_2, l_3 = layers
+
+ penalty = Mock(return_value=0)
+ loss = regularize_layer_params([l_1, l_2, l_3], penalty)
+
+ assert penalty.call_count == 2
+ penalty.assert_any_call(l_2.W)
+ penalty.assert_any_call(l_3.W)
+
+ def test_regularize_network_params(self, layers):
+ from lasagne.regularization import regularize_network_params
+ l_1, l_2, l_3 = layers
+
+ penalty = Mock(return_value=0)
+ loss = regularize_network_params(l_3, penalty)
+
+ assert penalty.call_count == 2
+ penalty.assert_any_call(l_2.W)
+ penalty.assert_any_call(l_3.W)
+
+ def test_regularize_layer_params_weighted(self, layers):
+ from lasagne.regularization import regularize_layer_params_weighted
+ from lasagne.regularization import apply_penalty, l2
+ l_1, l_2, l_3 = layers
+
+ layers = OrderedDict()
+ layers[l_2] = 0.1
+ layers[l_3] = 0.5
+
+ loss = regularize_layer_params_weighted(layers,
+ lasagne.regularization.l2)
+ assert equal_computations([loss],
+ [sum([0.1 * apply_penalty([l_2.W], l2),
+ 0.5 * apply_penalty([l_3.W], l2)])])
diff --git a/lasagne/tests/test_theano_extensions.py b/lasagne/tests/test_theano_extensions.py
new file mode 100644
index 0000000..57a9c6c
--- /dev/null
+++ b/lasagne/tests/test_theano_extensions.py
@@ -0,0 +1,155 @@
+import pytest
+import numpy as np
+import theano.tensor as T
+import lasagne
+
+
+def conv1d(input, kernel, stride=1):
+ output = []
+ for b in input:
+ temp = []
+ for c in kernel:
+ temp.append(
+ np.convolve(b[0, :], c[0, :], mode='valid'))
+ output.append(temp)
+ return np.array(output)[:, :, ::stride]
+
+
+ at pytest.mark.parametrize('impl', ['conv1d_sc', 'conv1d_mc0',
+ 'conv1d_mc1', 'conv1d_unstrided',
+ 'conv1d_sd', 'conv1d_md'])
+ at pytest.mark.parametrize('filter_flip', [True, False])
+ at pytest.mark.parametrize('stride', [1, 2])
+def test_conv(impl, stride, filter_flip):
+ import lasagne.theano_extensions.conv
+ conv = getattr(lasagne.theano_extensions.conv, impl)
+
+ X = T.tensor3()
+ W = T.tensor3()
+ input = lasagne.utils.floatX(np.ones((1, 1, 10)))
+ kernel = lasagne.utils.floatX(np.random.uniform(-1, 1, (2, 1, 6)))
+
+ conv_theano = conv(X, W, input.shape, kernel.shape, subsample=(stride,),
+ filter_flip=filter_flip).eval({X: input, W: kernel})
+
+ conv_np = conv1d(input, kernel, stride)
+
+ assert np.allclose(conv_theano, conv_np)
+
+
+ at pytest.mark.parametrize('impl', ['conv1d_sc', 'conv1d_mc0', 'conv1d_mc1'])
+def test_conv_nones(impl):
+ import lasagne.theano_extensions.conv
+ conv = getattr(lasagne.theano_extensions.conv, impl)
+
+ X = T.tensor3()
+ W = T.tensor3()
+ input = lasagne.utils.floatX(np.ones((1, 1, 12)))
+ kernel = lasagne.utils.floatX(np.random.uniform(-1, 1, (2, 1, 3)))
+
+ conv_theano = conv(X, W, None, None).eval({
+ X: input, W: kernel
+ })
+
+ conv_np = conv1d(input, kernel)
+
+ assert np.allclose(conv_theano, conv_np)
+
+
+ at pytest.mark.parametrize('impl', ['conv1d_mc0', 'conv1d_mc1'])
+ at pytest.mark.parametrize('pad', [1, (2,)])
+def test_conv_pad(impl, pad):
+ import lasagne.theano_extensions.conv
+ conv = getattr(lasagne.theano_extensions.conv, impl)
+
+ X = T.tensor3()
+ W = T.tensor3()
+ input = lasagne.utils.floatX(np.ones((1, 1, 12)))
+ kernel = lasagne.utils.floatX(np.random.uniform(-1, 1, (2, 1, 3)))
+
+ conv_theano = conv(X, W, input.shape, kernel.shape, border_mode=pad).eval({
+ X: input, W: kernel
+ })
+
+ pad = pad[0] if isinstance(pad, tuple) else pad
+ input = np.pad(input, [(0, 0), (0, 0), (pad, pad)], mode='constant')
+ conv_np = conv1d(input, kernel)
+
+ assert np.allclose(conv_theano, conv_np)
+
+
+ at pytest.mark.parametrize('impl', ['conv1d_sc', 'conv1d_mc0',
+ 'conv1d_mc1', 'conv1d_unstrided',
+ 'conv1d_sd', 'conv1d_md'])
+def test_conv_invalid_border_mode(impl):
+ import lasagne.theano_extensions.conv
+ conv = getattr(lasagne.theano_extensions.conv, impl)
+
+ X = T.tensor3()
+ W = T.tensor3()
+
+ with pytest.raises(Exception):
+ conv(X, W, (1, 1, 10), (2, 1, 3), border_mode=None)
+
+
+ at pytest.mark.parametrize('impl', ['conv1d_unstrided', 'conv1d_sd',
+ 'conv1d_md'])
+def test_conv_stride(impl):
+ import lasagne.theano_extensions.conv
+ conv = getattr(lasagne.theano_extensions.conv, impl)
+
+ X = T.tensor3()
+ W = T.tensor3()
+
+ with pytest.raises(Exception):
+ conv(X, W, (1, 1, 10), (2, 1, 3), subsample=(2,))
+
+
+ at pytest.mark.parametrize('val', [0, 7])
+ at pytest.mark.parametrize('batch_ndim', [1, 2])
+def test_pad(batch_ndim, val, width=3):
+ from lasagne.theano_extensions.padding import pad
+
+ X = T.tensor4()
+ X0 = lasagne.utils.floatX(np.ones((2, 3, 4, 5)))
+ X_pad_theano = pad(X, width, val, batch_ndim).eval({X: X0})
+
+ pads = tuple((width, width) if i >= batch_ndim else (0, 0)
+ for i, _ in enumerate(X0.shape))
+ X_pad_np = np.pad(X0, pads, mode='constant', constant_values=val)
+
+ assert (X_pad_theano == X_pad_np).all()
+
+
+ at pytest.mark.parametrize('batch_ndim', [1, 2])
+def test_pad_width_per_axis(batch_ndim, val=0):
+ from lasagne.theano_extensions.padding import pad
+
+ width = (1, 2, 3, 4)
+
+ X = T.tensor4()
+ X0 = lasagne.utils.floatX(np.ones((2, 3, 4, 5)))
+ X_pad_theano = pad(X, width[batch_ndim:], val, batch_ndim).eval({X: X0})
+
+ pads = tuple((w, w) if i >= batch_ndim else (0, 0)
+ for i, w in enumerate(width))
+ X_pad_np = np.pad(X0, pads, mode='constant', constant_values=val)
+
+ assert (X_pad_theano == X_pad_np).all()
+
+
+ at pytest.mark.parametrize('batch_ndim', [1, 2])
+def test_pad_width_per_border(batch_ndim, val=0):
+ from lasagne.theano_extensions.padding import pad
+
+ width = [(1, 2), (3, 4), (1, 2), (3, 4)]
+
+ X = T.tensor4()
+ X0 = lasagne.utils.floatX(np.ones((2, 3, 4, 5)))
+ X_pad_theano = pad(X, width[batch_ndim:], val, batch_ndim).eval({X: X0})
+
+ pads = tuple(w if i >= batch_ndim else (0, 0)
+ for i, w in enumerate(width))
+ X_pad_np = np.pad(X0, pads, mode='constant', constant_values=val)
+
+ assert (X_pad_theano == X_pad_np).all()
diff --git a/lasagne/tests/test_updates.py b/lasagne/tests/test_updates.py
new file mode 100644
index 0000000..c82cd2c
--- /dev/null
+++ b/lasagne/tests/test_updates.py
@@ -0,0 +1,227 @@
+import pytest
+import numpy as np
+import theano
+import theano.tensor as T
+import lasagne
+
+PCT_TOLERANCE = 1E-5
+
+
+class TestUpdateFunctions(object):
+ # These tests compare results on a toy problem to values
+ # calculated by the torch.optim package, using this script:
+ # https://gist.github.com/ebenolson/931e879ed38f257253d2
+ torch_values = {'sgd': [0.81707280688755,
+ 0.6648326359915,
+ 0.5386151140949],
+ 'momentum': [0.6848486952183,
+ 0.44803321781003,
+ 0.27431190123502],
+ 'nesterov_momentum': [0.67466543592725,
+ 0.44108468114241,
+ 0.2769002108997],
+ 'adagrad': [0.55373120047759,
+ 0.55373120041518,
+ 0.55373120039438],
+ 'rmsprop': [0.83205403985348,
+ 0.83205322744821,
+ 0.83205295664444],
+ 'adadelta': [0.95453237704725,
+ 0.9545237471374,
+ 0.95452214847397],
+ 'adam': [0.90034972009036,
+ 0.90034967993061,
+ 0.90034966654402],
+ 'adamax': [0.90211749000754,
+ 0.90211748762402,
+ 0.90211748682951],
+ }
+
+ def f(self, X):
+ return ([0.1, 0.2, 0.3] * X**2).sum()
+
+ @pytest.mark.parametrize('method, kwargs', [
+ ['sgd', {'learning_rate': 0.1}],
+ ['momentum', {'learning_rate': 0.1, 'momentum': 0.5}],
+ ['nesterov_momentum', {'learning_rate': 0.1, 'momentum': 0.5}],
+ ['adagrad', {'learning_rate': 0.1}],
+ ['rmsprop', {'learning_rate': 0.01}],
+ ['adadelta', {}],
+ ['adam', {'learning_rate': 0.01}],
+ ['adamax', {'learning_rate': 0.01}],
+ ])
+ def test_updates(self, method, kwargs):
+ A = theano.shared(lasagne.utils.floatX([1, 1, 1]))
+ B = theano.shared(lasagne.utils.floatX([1, 1, 1]))
+ update_func = getattr(lasagne.updates, method)
+ updates = update_func(self.f(A) + self.f(B),
+ [A, B],
+ **kwargs)
+ do_update = theano.function([], [], updates=updates)
+
+ for _ in range(10):
+ do_update()
+
+ assert np.allclose(A.get_value(), B.get_value())
+ assert np.allclose(A.get_value(), self.torch_values[method])
+
+ @pytest.mark.parametrize('method, kwargs', [
+ ['sgd', {'learning_rate': 0.1}],
+ ['momentum', {'learning_rate': 0.1,
+ 'momentum': 0.5}],
+ ['nesterov_momentum', {'learning_rate': 0.1,
+ 'momentum': 0.5}],
+ ['adagrad', {'learning_rate': 0.1,
+ 'epsilon': 1e-6}],
+ ['rmsprop', {'learning_rate': 0.01,
+ 'rho': 0.9,
+ 'epsilon': 1e-6}],
+ ['adadelta', {'learning_rate': 0.01,
+ 'rho': 0.9,
+ 'epsilon': 1e-6}],
+ ['adam', {'learning_rate': 0.01,
+ 'beta1': 0.9,
+ 'beta2': 0.999,
+ 'epsilon': 1e-8}],
+ ['adamax', {'learning_rate': 0.01,
+ 'beta1': 0.9,
+ 'beta2': 0.999,
+ 'epsilon': 1e-8}],
+ ])
+ def test_update_returntype(self, method, kwargs):
+ '''Checks whether lasagne.updates handles float32 inputs correctly'''
+ floatX_ = theano.config.floatX
+ theano.config.floatX = 'float32'
+ try:
+ A = theano.shared(lasagne.utils.floatX([1, 1, 1]))
+ B = theano.shared(lasagne.utils.floatX([1, 1, 1]))
+ update_func = getattr(lasagne.updates, method)
+ updates = update_func(self.f(A) + self.f(B),
+ [A, B],
+ **kwargs)
+
+ assert all(v.dtype == 'float32' for v in updates)
+
+ # Checking for float32 arguments
+ for param in kwargs:
+ kwargs[param] = np.float32(kwargs[param])
+ updates = update_func(self.f(A) + self.f(B),
+ [A, B],
+ **kwargs)
+
+ assert all(v.dtype == 'float32' for v in updates)
+ finally:
+ theano.config.floatX = floatX_
+
+
+def test_get_or_compute_grads():
+
+ from lasagne.updates import get_or_compute_grads
+
+ A = theano.shared(1)
+ B = theano.shared(1)
+ loss = A + B
+ grads = get_or_compute_grads(loss, [A, B])
+
+ assert get_or_compute_grads(grads, [A, B]) is grads
+
+ with pytest.raises(ValueError):
+ get_or_compute_grads(grads, [A])
+
+ C = T.scalar()
+ with pytest.raises(ValueError):
+ get_or_compute_grads(A + C, [A, C])
+
+
+ at pytest.mark.parametrize('ndim', [2, 3])
+def test_norm_constraint(ndim):
+ import numpy as np
+ import theano
+ from lasagne.updates import norm_constraint
+ from lasagne.utils import compute_norms
+
+ max_norm = 0.01
+
+ param = theano.shared(
+ np.random.randn(*((25,) * ndim)).astype(theano.config.floatX)
+ )
+
+ update = norm_constraint(param, max_norm)
+
+ apply_update = theano.function([], [], updates=[(param, update)])
+ apply_update()
+
+ assert param.dtype == update.dtype
+ assert (np.max(compute_norms(param.get_value())) <=
+ max_norm * (1 + PCT_TOLERANCE))
+
+
+def test_norm_constraint_norm_axes():
+ import numpy as np
+ import theano
+ from lasagne.updates import norm_constraint
+ from lasagne.utils import compute_norms
+
+ max_norm = 0.01
+ norm_axes = (0, 2)
+
+ param = theano.shared(
+ np.random.randn(10, 20, 30, 40).astype(theano.config.floatX)
+ )
+
+ update = norm_constraint(param, max_norm, norm_axes=norm_axes)
+
+ apply_update = theano.function([], [], updates=[(param, update)])
+ apply_update()
+
+ assert param.dtype == update.dtype
+ assert (np.max(compute_norms(param.get_value(), norm_axes=norm_axes)) <=
+ max_norm*(1 + PCT_TOLERANCE))
+
+
+def test_norm_constraint_dim6_raises():
+ import numpy as np
+ import theano
+ from lasagne.updates import norm_constraint
+
+ max_norm = 0.01
+
+ param = theano.shared(
+ np.random.randn(1, 2, 3, 4, 5, 6).astype(theano.config.floatX)
+ )
+
+ with pytest.raises(ValueError) as excinfo:
+ norm_constraint(param, max_norm)
+ assert "Unsupported tensor dimensionality" in str(excinfo.value)
+
+
+def test_total_norm_constraint():
+ import numpy as np
+ import theano
+ import theano.tensor as T
+ from lasagne.updates import total_norm_constraint
+
+ x1 = T.scalar()
+ x2 = T.matrix()
+ threshold = 5.0
+ tensors1 = total_norm_constraint([x1, x2], threshold, return_norm=False)
+ tensors2, norm = total_norm_constraint([x1, x2], threshold,
+ return_norm=True)
+
+ f1 = theano.function([x1, x2], [tensors1[0], tensors1[1]])
+ f2 = theano.function([x1, x2], [tensors2[0], tensors2[1],
+ norm])
+
+ x_test = np.arange(1+9, dtype='float32')
+ x1_test = x_test[-1]
+ x2_test = x_test[:9].reshape((3, 3))
+ x1_out1, x2_out1 = f1(x1_test, x2_test)
+ x1_out2, x2_out2, norm = f2(x1_test, x2_test)
+
+ np.testing.assert_array_almost_equal(x1_out1, x1_out2)
+ np.testing.assert_array_almost_equal(x2_out1, x2_out2)
+
+ x_out = [float(x1_out1)] + list(x2_out1.flatten())
+
+ np.testing.assert_array_almost_equal(np.linalg.norm(x_test), norm)
+ np.testing.assert_array_almost_equal(np.linalg.norm(x_out), threshold)
diff --git a/lasagne/tests/test_utils.py b/lasagne/tests/test_utils.py
new file mode 100644
index 0000000..98d38a6
--- /dev/null
+++ b/lasagne/tests/test_utils.py
@@ -0,0 +1,308 @@
+from mock import Mock
+import pytest
+import numpy as np
+import theano
+import theano.tensor as T
+
+
+def test_shared_empty():
+ from lasagne.utils import shared_empty
+
+ X = shared_empty(3)
+ assert (np.zeros((1, 1, 1)) == X.eval()).all()
+
+
+def test_as_theano_expression_fails():
+ from lasagne.utils import as_theano_expression
+ with pytest.raises(TypeError):
+ as_theano_expression({})
+
+
+def test_collect_shared_vars():
+ from lasagne.utils import collect_shared_vars as collect
+ x, y, z = (theano.shared(0, name=n) for n in 'xyz')
+ # collecting must not change the order
+ assert collect([x, y, z]) == [x, y, z]
+ # duplicates should be eliminated
+ assert collect([x, y, x, y, y, z]) == [x, y, z]
+ # ensure we have left-recursive depth-first search
+ assert collect((x + y) + z) == [x, y, z]
+ assert collect(x + (y + z)) == [x, y, z]
+ # complex expressions and constants should not be included
+ assert collect([x**2, y * z * np.ones(10), x + T.matrix()]) == [x, y, z]
+ # the result can even be empty
+ assert collect([T.matrix() + T.matrix(), T.log(T.matrix())]) == []
+
+
+def test_one_hot():
+ from lasagne.utils import one_hot
+ a = np.random.randint(0, 10, 20)
+ b = np.zeros((a.size, a.max()+1))
+ b[np.arange(a.size), a] = 1
+
+ result = one_hot(a).eval()
+ assert (result == b).all()
+
+
+def test_as_tuple_fails():
+ from lasagne.utils import as_tuple
+ with pytest.raises(ValueError):
+ as_tuple([1, 2, 3], 4)
+ with pytest.raises(TypeError):
+ as_tuple('asdf', 4, int)
+
+
+def test_compute_norms():
+ from lasagne.utils import compute_norms
+
+ # Test numpy version of compute_norms
+ array = np.random.randn(10, 20, 30, 40).astype(theano.config.floatX)
+
+ norms = compute_norms(array)
+
+ assert array.dtype == norms.dtype
+ assert norms.shape[0] == array.shape[0]
+
+ # Test theano version of compute_norms
+ t_array = theano.shared(array)
+ t_norms = compute_norms(t_array)
+
+ # Check if they do not differ much
+ assert np.allclose(t_norms.eval(), norms)
+
+
+def test_compute_norms_axes():
+ from lasagne.utils import compute_norms
+
+ # Test numpy versions of compute norms with axes
+ array = np.random.randn(10, 20, 30, 40).astype(theano.config.floatX)
+
+ norms = compute_norms(array, norm_axes=(0, 2))
+
+ assert array.dtype == norms.dtype
+ assert norms.shape == (array.shape[1], array.shape[3])
+
+ # Test theano version of compute_norms
+ t_array = theano.shared(array)
+ t_norms = compute_norms(t_array, norm_axes=(0, 2))
+
+ # Check if they do not differ much
+ assert np.allclose(t_norms.eval(), norms)
+
+
+def test_compute_norms_ndim1():
+ from lasagne.utils import compute_norms
+
+ # Test numpy versions of compute norms with axes
+ array = np.random.randn(10, ).astype(theano.config.floatX)
+
+ norms = compute_norms(array)
+
+ assert array.dtype == norms.dtype
+ assert norms.shape == array.shape
+
+ # Check if they do not differ much
+ assert np.allclose(norms, abs(array))
+
+ # Test theano version of compute_norms
+ t_array = theano.shared(array)
+ t_norms = compute_norms(t_array)
+
+ # Check if they do not differ much
+ assert np.allclose(t_norms.eval(), norms)
+
+
+def test_compute_norms_type_raises():
+ from lasagne.utils import compute_norms
+
+ array = [[1, 2], [3, 4]]
+
+ with pytest.raises(RuntimeError) as excinfo:
+ compute_norms(array)
+
+ assert ("Unsupported type") in str(excinfo.value)
+
+
+def test_compute_norms_ndim6_raises():
+ from lasagne.utils import compute_norms
+
+ array = np.random.randn(1, 2, 3, 4, 5, 6).astype(theano.config.floatX)
+
+ with pytest.raises(ValueError) as excinfo:
+ compute_norms(array)
+
+ assert "Unsupported tensor dimensionality" in str(excinfo.value)
+
+
+def test_create_param_bad_callable_raises():
+ from lasagne.utils import create_param
+
+ with pytest.raises(TypeError):
+ create_param(lambda x: {}, (1, 2, 3))
+ with pytest.raises(ValueError):
+ create_param(lambda x: np.array(1), (1, 2, 3))
+
+
+def test_create_param_bad_spec_raises():
+ from lasagne.utils import create_param
+
+ with pytest.raises(TypeError):
+ create_param({}, (1, 2, 3))
+
+
+def test_create_param_accepts_iterable_shape():
+ from lasagne.utils import create_param
+ factory = np.empty
+ create_param(factory, [2, 3])
+ create_param(factory, (x for x in [2, 3]))
+
+
+def test_create_param_numpy_bad_shape_raises_error():
+ from lasagne.utils import create_param
+
+ param = np.array([[1, 2, 3], [4, 5, 6]])
+ with pytest.raises(ValueError):
+ create_param(param, (3, 2))
+
+
+def test_create_param_numpy_returns_shared():
+ from lasagne.utils import create_param
+
+ param = np.array([[1, 2, 3], [4, 5, 6]])
+ result = create_param(param, (2, 3))
+ assert (result.get_value() == param).all()
+ assert isinstance(result, type(theano.shared(param)))
+ assert (result.get_value() == param).all()
+
+
+def test_create_param_shared_returns_same():
+ from lasagne.utils import create_param
+
+ param = theano.shared(np.array([[1, 2, 3], [4, 5, 6]]))
+ result = create_param(param, (2, 3))
+ assert result is param
+
+
+def test_create_param_shared_bad_ndim_raises_error():
+ from lasagne.utils import create_param
+
+ param = theano.shared(np.array([[1, 2, 3], [4, 5, 6]]))
+ with pytest.raises(ValueError):
+ create_param(param, (2, 3, 4))
+
+
+def test_create_param_callable_returns_return_value():
+ from lasagne.utils import create_param
+
+ array = np.array([[1, 2, 3], [4, 5, 6]])
+ factory = Mock()
+ factory.return_value = array
+ result = create_param(factory, (2, 3))
+ assert (result.get_value() == array).all()
+ factory.assert_called_with((2, 3))
+
+
+def test_create_param_callable_returns_shared():
+ from lasagne.utils import create_param
+
+ array = np.array([[1, 2, 3], [4, 5, 6]])
+ param = theano.shared(array)
+ factory = Mock()
+ factory.return_value = param
+ result = create_param(factory, (2, 3))
+ assert (result.get_value() == array).all()
+ factory.assert_called_with((2, 3))
+ assert result is param
+
+
+def test_create_param_callable_returns_shared_bad_ndim_raises_error():
+ from lasagne.utils import create_param
+
+ array = np.array([[1, 2], [3, 4]])
+ param = theano.shared(array)
+ factory = Mock()
+ factory.return_value = param
+ with pytest.raises(ValueError):
+ create_param(factory, (2, 3, 4))
+
+
+def test_create_param_callable_returns_theano_expr():
+ from lasagne.utils import create_param
+
+ array = np.array([[1, 2, 3], [4, 5, 6]])
+ param = theano.shared(array) * 2
+ factory = Mock()
+ factory.return_value = param
+ result = create_param(factory, (2, 3))
+ assert (result.eval() == array * 2).all()
+ assert result is param
+
+
+def test_nonpositive_dims_raises_value_error():
+ from lasagne.utils import create_param
+ neg_shape = (-1, -1)
+ zero_shape = (0, 0)
+ pos_shape = (1, 1)
+ spec = np.empty
+ with pytest.raises(ValueError):
+ create_param(spec, neg_shape)
+ with pytest.raises(ValueError):
+ create_param(spec, zero_shape)
+ create_param(spec, pos_shape)
+
+
+def test_create_param_callable_returns_wrong_type():
+ from lasagne.utils import create_param
+
+ param = 'string'
+ factory = Mock()
+ factory.return_value = param
+ with pytest.raises(TypeError):
+ create_param(factory, (1, 2))
+
+
+def test_create_param_retain_ndarray_dtype():
+ from lasagne.utils import create_param
+ param = np.array([[1, 2, 3], [4, 5, 6]])
+
+ param = param.astype('float64')
+ result = create_param(param, (2, 3))
+ assert (result.dtype == param.dtype)
+
+ param = param.astype('int16')
+ result = create_param(param, (2, 3))
+ assert (result.dtype == param.dtype)
+
+
+def test_create_param_broadcast_pattern():
+ from lasagne.utils import create_param
+ for shape in (10, 1, 20), (1, 2), (3, 1), (2, 3):
+ bcast = tuple(s == 1 for s in shape)
+ assert create_param(np.zeros, shape).broadcastable == bcast
+ assert create_param(np.zeros(shape, np.float32),
+ shape).broadcastable == bcast
+
+
+def test_unroll_scan():
+ from lasagne.utils import unroll_scan
+ k = 2
+ a = T.scalar("a")
+
+ result = unroll_scan(
+ fn=lambda step, prior_result, a: prior_result * a,
+ sequences=T.arange(k), outputs_info=[1.], non_sequences=[a], n_steps=k)
+ final_result = result[-1]
+ power = theano.function(inputs=[a], outputs=final_result)
+
+ assert np.all(power(10) == [10, 100])
+
+ b = T.scalar("b")
+
+ def mul_div(step, previous_mul, previous_div, mul, div):
+ return previous_mul*mul, previous_div/div
+
+ result = unroll_scan(
+ fn=mul_div, sequences=T.arange(k), outputs_info=[1., 1.],
+ non_sequences=[a, b], n_steps=k)
+ power = theano.function(inputs=[a, b], outputs=result)
+ assert np.allclose(power(10, 10), [[10, 100], [.1, .01]])
diff --git a/lasagne/theano_extensions/__init__.py b/lasagne/theano_extensions/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lasagne/theano_extensions/conv.py b/lasagne/theano_extensions/conv.py
new file mode 100644
index 0000000..6dbca08
--- /dev/null
+++ b/lasagne/theano_extensions/conv.py
@@ -0,0 +1,273 @@
+"""
+Alternative convolution implementations for Theano
+"""
+
+import numpy as np
+
+import theano.tensor as T
+
+
+# 1D convolutions
+
+def conv1d_sc(input, filters, image_shape=None, filter_shape=None,
+ border_mode='valid', subsample=(1,), filter_flip=True):
+ """
+ using conv2d with a single input channel
+ """
+ if border_mode not in ('valid', 0, (0,)):
+ raise RuntimeError("Unsupported border_mode for conv1d_sc: "
+ "%s" % border_mode)
+
+ if image_shape is None:
+ image_shape_sc = None
+ else:
+ # (b, c, i0) to (b, 1, c, i0)
+ image_shape_sc = (image_shape[0], 1, image_shape[1], image_shape[2])
+
+ if filter_shape is None:
+ filter_shape_sc = None
+ else:
+ filter_shape_sc = (filter_shape[0], 1, filter_shape[1],
+ filter_shape[2])
+
+ input_sc = input.dimshuffle(0, 'x', 1, 2)
+ # We need to flip the channels dimension because it will be convolved over.
+ filters_sc = filters.dimshuffle(0, 'x', 1, 2)[:, :, ::-1, :]
+
+ conved = T.nnet.conv2d(input_sc, filters_sc, image_shape_sc,
+ filter_shape_sc, subsample=(1, subsample[0]),
+ filter_flip=filter_flip)
+ return conved[:, :, 0, :] # drop the unused dimension
+
+
+def conv1d_mc0(input, filters, image_shape=None, filter_shape=None,
+ border_mode='valid', subsample=(1,), filter_flip=True):
+ """
+ using conv2d with width == 1
+ """
+ if image_shape is None:
+ image_shape_mc0 = None
+ else:
+ # (b, c, i0) to (b, c, 1, i0)
+ image_shape_mc0 = (image_shape[0], image_shape[1], 1, image_shape[2])
+
+ if filter_shape is None:
+ filter_shape_mc0 = None
+ else:
+ filter_shape_mc0 = (filter_shape[0], filter_shape[1], 1,
+ filter_shape[2])
+
+ if isinstance(border_mode, tuple):
+ (border_mode,) = border_mode
+ if isinstance(border_mode, int):
+ border_mode = (0, border_mode)
+
+ input_mc0 = input.dimshuffle(0, 1, 'x', 2)
+ filters_mc0 = filters.dimshuffle(0, 1, 'x', 2)
+
+ conved = T.nnet.conv2d(
+ input_mc0, filters_mc0, image_shape_mc0, filter_shape_mc0,
+ subsample=(1, subsample[0]), border_mode=border_mode,
+ filter_flip=filter_flip)
+ return conved[:, :, 0, :] # drop the unused dimension
+
+
+def conv1d_mc1(input, filters, image_shape=None, filter_shape=None,
+ border_mode='valid', subsample=(1,), filter_flip=True):
+ """
+ using conv2d with height == 1
+ """
+ if image_shape is None:
+ image_shape_mc1 = None
+ else:
+ # (b, c, i0) to (b, c, i0, 1)
+ image_shape_mc1 = (image_shape[0], image_shape[1], image_shape[2], 1)
+
+ if filter_shape is None:
+ filter_shape_mc1 = None
+ else:
+ filter_shape_mc1 = (filter_shape[0], filter_shape[1],
+ filter_shape[2], 1)
+
+ if isinstance(border_mode, tuple):
+ (border_mode,) = border_mode
+ if isinstance(border_mode, int):
+ border_mode = (border_mode, 0)
+
+ input_mc1 = input.dimshuffle(0, 1, 2, 'x')
+ filters_mc1 = filters.dimshuffle(0, 1, 2, 'x')
+
+ conved = T.nnet.conv2d(
+ input_mc1, filters_mc1, image_shape_mc1, filter_shape_mc1,
+ subsample=(subsample[0], 1), border_mode=border_mode,
+ filter_flip=filter_flip)
+ return conved[:, :, :, 0] # drop the unused dimension
+
+
+def conv1d_unstrided(input, filters, image_shape, filter_shape,
+ border_mode='valid', subsample=(1,), filter_flip=True,
+ implementation=conv1d_sc):
+ """
+ perform a strided 1D convolution by reshaping input and filters so that the
+ stride becomes 1. This function requires that the filter length is a
+ multiple of the stride. It also truncates the input to have a length
+ that is a multiple of the stride.
+ """
+ batch_size, num_input_channels, input_length = image_shape
+ num_filters, num_input_channels_, filter_length = filter_shape
+ stride = subsample[0]
+
+ if filter_length % stride > 0:
+ raise RuntimeError("Filter length (%d) is not a multiple of the "
+ "stride (%d)" % (filter_length, stride))
+ # TODO: test if this works for border_mode='full'
+ if border_mode not in ('valid', 0, (0,)):
+ raise RuntimeError("Unsupported border_mode for conv1d_unstrided: "
+ "%s" % border_mode)
+
+ num_steps = filter_length // stride
+
+ # input sizes need to be multiples of the strides,
+ # truncate to correct sizes.
+ truncated_length = (input_length // stride) * stride
+ input_truncated = input[:, :, :truncated_length]
+
+ r_input_shape = (batch_size, num_input_channels,
+ truncated_length // stride, stride)
+ r_input = input_truncated.reshape(r_input_shape)
+
+ # fold strides into the feature maps dimension (input)
+ r_input_folded_shape = (batch_size, num_input_channels * stride,
+ truncated_length // stride)
+ r_input_folded = r_input.dimshuffle(
+ 0, 1, 3, 2).reshape(r_input_folded_shape)
+
+ r_filter_shape = (num_filters, num_input_channels, num_steps, stride)
+ r_filters_flipped = filters[:, :, ::-1].reshape(r_filter_shape)
+
+ # fold strides into the feature maps dimension (filters)
+ r_filter_folded_shape = (num_filters, num_input_channels * stride,
+ num_steps)
+ r_filters_flipped_folded = r_filters_flipped.dimshuffle(
+ 0, 1, 3, 2).reshape(r_filter_folded_shape)
+ r_filters_folded = r_filters_flipped_folded[:, :, ::-1] # unflip
+
+ return implementation(r_input_folded, r_filters_folded,
+ r_input_folded_shape, r_filter_folded_shape,
+ border_mode, subsample=(1,), filter_flip=filter_flip)
+
+
+def conv1d_sd(input, filters, image_shape, filter_shape, border_mode='valid',
+ subsample=(1,), filter_flip=True):
+ """
+ using a single dot product
+ """
+ if border_mode not in ('valid', 0, (0,)):
+ raise RuntimeError("Unsupported border_mode for conv1d_sd: "
+ "%s" % border_mode)
+
+ batch_size, num_input_channels, input_length = image_shape
+ num_filters, num_input_channels_, filter_length = filter_shape
+ stride = subsample[0]
+
+ if filter_length % stride > 0:
+ raise RuntimeError("Filter length (%d) is not a multiple of the "
+ "stride (%d)" % (filter_length, stride))
+
+ num_steps = filter_length // stride
+ output_length = (input_length - filter_length + stride) // stride
+
+ # pad the input so all the shifted dot products fit inside.
+ # shape is (b, c, l)
+ padded_length = ((input_length // filter_length) * filter_length +
+ (num_steps - 1) * stride)
+
+ # at this point, it is possible that the padded_length is SMALLER than the
+ # input size. so then we have to truncate first.
+ truncated_length = min(input_length, padded_length)
+ input_truncated = input[:, :, :truncated_length]
+
+ input_padded_shape = (batch_size, num_input_channels, padded_length)
+ input_padded = T.zeros(input_padded_shape)
+ input_padded = T.set_subtensor(input_padded[:, :, :truncated_length],
+ input_truncated)
+
+ inputs = []
+ for num in range(num_steps):
+ shift = num * stride
+ length = (padded_length - shift) // filter_length
+
+ r_input_shape = (batch_size, num_input_channels, length, filter_length)
+ r_input = input_padded[
+ :, :, shift:length * filter_length + shift].reshape(r_input_shape)
+
+ inputs.append(r_input)
+
+ inputs_stacked = T.stack(*inputs) # shape is (n, b, c, w, f)
+ filters_flipped = filters[:, :, ::-1] if filter_flip else filters
+
+ r_conved = T.tensordot(inputs_stacked, filters_flipped,
+ np.asarray([[2, 4], [1, 2]]))
+ # resulting shape is (n, b, w, n_filters)
+ # output needs to be (b, n_filters, w * n)
+ r_conved = r_conved.dimshuffle(1, 3, 2, 0) # (b, n_filters, w, n)
+ conved = r_conved.reshape((r_conved.shape[0], r_conved.shape[1],
+ r_conved.shape[2] * r_conved.shape[3]))
+ # result is (b, n_f, l)
+
+ # remove padding
+ return conved[:, :, :output_length]
+
+
+def conv1d_md(input, filters, image_shape, filter_shape, border_mode='valid',
+ subsample=(1,), filter_flip=True):
+ """
+ using multiple dot products
+ """
+ if border_mode not in ('valid', 0, (0,)):
+ raise RuntimeError("Unsupported border_mode for conv1d_md: "
+ "%s" % border_mode)
+
+ batch_size, num_input_channels, input_length = image_shape
+ num_filters, num_input_channels_, filter_length = filter_shape
+ stride = subsample[0]
+
+ if filter_length % stride > 0:
+ raise RuntimeError("Filter length (%d) is not a multiple of the "
+ "stride (%d)" % (filter_length, stride))
+
+ num_steps = filter_length // stride
+ output_length = (input_length - filter_length + stride) // stride
+ output_shape = (batch_size, num_filters, output_length)
+
+ filters_flipped = filters[:, :, ::-1] if filter_flip else filters
+
+ conved = T.zeros(output_shape)
+
+ for num in range(num_steps):
+ shift = num * stride
+ length = (input_length - shift) // filter_length
+
+ if length == 0:
+ # we can safely skip this product, it doesn't contribute to the
+ # final convolution.
+ continue
+
+ r_input_shape = (batch_size, num_input_channels, length, filter_length)
+ r_input = input[
+ :, :, shift:length * filter_length + shift].reshape(r_input_shape)
+
+ # shape (b, l, n_filters)
+ r_conved = T.tensordot(r_input, filters_flipped,
+ np.asarray([[1, 3], [1, 2]]))
+ r_conved = r_conved.dimshuffle(0, 2, 1) # shape is (b, n_filters, l)
+ conved = T.set_subtensor(conved[:, :, num::num_steps], r_conved)
+
+ return conved
+
+
+# TODO: conv1d_md_channelslast?
+
+# 2D convolutions
+
+# TODO
diff --git a/lasagne/theano_extensions/padding.py b/lasagne/theano_extensions/padding.py
new file mode 100644
index 0000000..12c10ea
--- /dev/null
+++ b/lasagne/theano_extensions/padding.py
@@ -0,0 +1,53 @@
+"""
+Padding
+"""
+
+import theano.tensor as T
+
+
+def pad(x, width, val=0, batch_ndim=1):
+ """
+ Pad a tensor with a constant value.
+
+ Parameters
+ ----------
+ x : tensor
+
+ width : int, iterable of int, or iterable of tuple
+ Padding width. If an int, pads each axis symmetrically with the same
+ amount in the beginning and end. If an iterable of int, defines the
+ symmetric padding width separately for each axis. If an iterable of
+ tuples of two ints, defines a seperate padding width for each beginning
+ and end of each axis.
+
+ val : float
+ The constant value used for padding
+
+ batch_ndim : integer
+ Dimensions before the value will not be padded.
+
+ """
+ input_shape = x.shape
+ input_ndim = x.ndim
+
+ output_shape = list(input_shape)
+ indices = [slice(None) for _ in output_shape]
+
+ if isinstance(width, int):
+ widths = [width] * (input_ndim - batch_ndim)
+ else:
+ widths = width
+
+ for k, w in enumerate(widths):
+ try:
+ l, r = w
+ except TypeError:
+ l = r = w
+ output_shape[k + batch_ndim] += l + r
+ indices[k + batch_ndim] = slice(l, l + input_shape[k + batch_ndim])
+
+ if val:
+ out = T.ones(output_shape) * val
+ else:
+ out = T.zeros(output_shape)
+ return T.set_subtensor(out[tuple(indices)], x)
diff --git a/lasagne/updates.py b/lasagne/updates.py
new file mode 100644
index 0000000..61ee4c1
--- /dev/null
+++ b/lasagne/updates.py
@@ -0,0 +1,819 @@
+"""
+Functions to generate Theano update dictionaries for training.
+
+The update functions implement different methods to control the learning
+rate for use with stochastic gradient descent.
+
+Update functions take a loss expression or a list of gradient expressions and
+a list of parameters as input and return an ordered dictionary of updates:
+
+.. autosummary::
+ :nosignatures:
+
+ sgd
+ momentum
+ nesterov_momentum
+ adagrad
+ rmsprop
+ adadelta
+ adam
+ adamax
+
+Two functions can be used to further modify the updates to include momentum:
+
+.. autosummary::
+ :nosignatures:
+
+ apply_momentum
+ apply_nesterov_momentum
+
+Finally, we provide two helper functions to constrain the norm of tensors:
+
+.. autosummary::
+ :nosignatures:
+
+ norm_constraint
+ total_norm_constraint
+
+:func:`norm_constraint()` can be used to constrain the norm of parameters
+(as an alternative to weight decay), or for a form of gradient clipping.
+:func:`total_norm_constraint()` constrain the total norm of a list of tensors.
+This is often used when training recurrent neural networks.
+
+Examples
+--------
+>>> import lasagne
+>>> import theano.tensor as T
+>>> import theano
+>>> from lasagne.nonlinearities import softmax
+>>> from lasagne.layers import InputLayer, DenseLayer, get_output
+>>> from lasagne.updates import sgd, apply_momentum
+>>> l_in = InputLayer((100, 20))
+>>> l1 = DenseLayer(l_in, num_units=3, nonlinearity=softmax)
+>>> x = T.matrix('x') # shp: num_batch x num_features
+>>> y = T.ivector('y') # shp: num_batch
+>>> l_out = get_output(l1, x)
+>>> params = lasagne.layers.get_all_params(l1)
+>>> loss = T.mean(T.nnet.categorical_crossentropy(l_out, y))
+>>> updates_sgd = sgd(loss, params, learning_rate=0.0001)
+>>> updates = apply_momentum(updates_sgd, params, momentum=0.9)
+>>> train_function = theano.function([x, y], updates=updates)
+"""
+
+from collections import OrderedDict
+
+import numpy as np
+
+import theano
+import theano.tensor as T
+from . import utils
+
+__all__ = [
+ "sgd",
+ "apply_momentum",
+ "momentum",
+ "apply_nesterov_momentum",
+ "nesterov_momentum",
+ "adagrad",
+ "rmsprop",
+ "adadelta",
+ "adam",
+ "adamax",
+ "norm_constraint",
+ "total_norm_constraint"
+]
+
+
+def get_or_compute_grads(loss_or_grads, params):
+ """Helper function returning a list of gradients
+
+ Parameters
+ ----------
+ loss_or_grads : symbolic expression or list of expressions
+ A scalar loss expression, or a list of gradient expressions
+ params : list of shared variables
+ The variables to return the gradients for
+
+ Returns
+ -------
+ list of expressions
+ If `loss_or_grads` is a list, it is assumed to be a list of
+ gradients and returned as is, unless it does not match the length
+ of `params`, in which case a `ValueError` is raised.
+ Otherwise, `loss_or_grads` is assumed to be a cost expression and
+ the function returns `theano.grad(loss_or_grads, params)`.
+
+ Raises
+ ------
+ ValueError
+ If `loss_or_grads` is a list of a different length than `params`, or if
+ any element of `params` is not a shared variable (while we could still
+ compute its gradient, we can never update it and want to fail early).
+ """
+ if any(not isinstance(p, theano.compile.SharedVariable) for p in params):
+ raise ValueError("params must contain shared variables only. If it "
+ "contains arbitrary parameter expressions, then "
+ "lasagne.utils.collect_shared_vars() may help you.")
+ if isinstance(loss_or_grads, list):
+ if not len(loss_or_grads) == len(params):
+ raise ValueError("Got %d gradient expressions for %d parameters" %
+ (len(loss_or_grads), len(params)))
+ return loss_or_grads
+ else:
+ return theano.grad(loss_or_grads, params)
+
+
+def sgd(loss_or_grads, params, learning_rate):
+ """Stochastic Gradient Descent (SGD) updates
+
+ Generates update expressions of the form:
+
+ * ``param := param - learning_rate * gradient``
+
+ Parameters
+ ----------
+ loss_or_grads : symbolic expression or list of expressions
+ A scalar loss expression, or a list of gradient expressions
+ params : list of shared variables
+ The variables to generate update expressions for
+ learning_rate : float or symbolic scalar
+ The learning rate controlling the size of update steps
+
+ Returns
+ -------
+ OrderedDict
+ A dictionary mapping each parameter to its update expression
+ """
+ grads = get_or_compute_grads(loss_or_grads, params)
+ updates = OrderedDict()
+
+ for param, grad in zip(params, grads):
+ updates[param] = param - learning_rate * grad
+
+ return updates
+
+
+def apply_momentum(updates, params=None, momentum=0.9):
+ """Returns a modified update dictionary including momentum
+
+ Generates update expressions of the form:
+
+ * ``velocity := momentum * velocity + updates[param] - param``
+ * ``param := param + velocity``
+
+ Parameters
+ ----------
+ updates : OrderedDict
+ A dictionary mapping parameters to update expressions
+ params : iterable of shared variables, optional
+ The variables to apply momentum to. If omitted, will apply
+ momentum to all `updates.keys()`.
+ momentum : float or symbolic scalar, optional
+ The amount of momentum to apply. Higher momentum results in
+ smoothing over more update steps. Defaults to 0.9.
+
+ Returns
+ -------
+ OrderedDict
+ A copy of `updates` with momentum updates for all `params`.
+
+ Notes
+ -----
+ Higher momentum also results in larger update steps. To counter that,
+ you can optionally scale your learning rate by `1 - momentum`.
+
+ See Also
+ --------
+ momentum : Shortcut applying momentum to SGD updates
+ """
+ if params is None:
+ params = updates.keys()
+ updates = OrderedDict(updates)
+
+ for param in params:
+ value = param.get_value(borrow=True)
+ velocity = theano.shared(np.zeros(value.shape, dtype=value.dtype),
+ broadcastable=param.broadcastable)
+ x = momentum * velocity + updates[param]
+ updates[velocity] = x - param
+ updates[param] = x
+
+ return updates
+
+
+def momentum(loss_or_grads, params, learning_rate, momentum=0.9):
+ """Stochastic Gradient Descent (SGD) updates with momentum
+
+ Generates update expressions of the form:
+
+ * ``velocity := momentum * velocity - learning_rate * gradient``
+ * ``param := param + velocity``
+
+ Parameters
+ ----------
+ loss_or_grads : symbolic expression or list of expressions
+ A scalar loss expression, or a list of gradient expressions
+ params : list of shared variables
+ The variables to generate update expressions for
+ learning_rate : float or symbolic scalar
+ The learning rate controlling the size of update steps
+ momentum : float or symbolic scalar, optional
+ The amount of momentum to apply. Higher momentum results in
+ smoothing over more update steps. Defaults to 0.9.
+
+ Returns
+ -------
+ OrderedDict
+ A dictionary mapping each parameter to its update expression
+
+ Notes
+ -----
+ Higher momentum also results in larger update steps. To counter that,
+ you can optionally scale your learning rate by `1 - momentum`.
+
+ See Also
+ --------
+ apply_momentum : Generic function applying momentum to updates
+ nesterov_momentum : Nesterov's variant of SGD with momentum
+ """
+ updates = sgd(loss_or_grads, params, learning_rate)
+ return apply_momentum(updates, momentum=momentum)
+
+
+def apply_nesterov_momentum(updates, params=None, momentum=0.9):
+ """Returns a modified update dictionary including Nesterov momentum
+
+ Generates update expressions of the form:
+
+ * ``velocity := momentum * velocity + updates[param] - param``
+ * ``param := param + momentum * velocity + updates[param] - param``
+
+ Parameters
+ ----------
+ updates : OrderedDict
+ A dictionary mapping parameters to update expressions
+ params : iterable of shared variables, optional
+ The variables to apply momentum to. If omitted, will apply
+ momentum to all `updates.keys()`.
+ momentum : float or symbolic scalar, optional
+ The amount of momentum to apply. Higher momentum results in
+ smoothing over more update steps. Defaults to 0.9.
+
+ Returns
+ -------
+ OrderedDict
+ A copy of `updates` with momentum updates for all `params`.
+
+ Notes
+ -----
+ Higher momentum also results in larger update steps. To counter that,
+ you can optionally scale your learning rate by `1 - momentum`.
+
+ The classic formulation of Nesterov momentum (or Nesterov accelerated
+ gradient) requires the gradient to be evaluated at the predicted next
+ position in parameter space. Here, we use the formulation described at
+ https://github.com/lisa-lab/pylearn2/pull/136#issuecomment-10381617,
+ which allows the gradient to be evaluated at the current parameters.
+
+ See Also
+ --------
+ nesterov_momentum : Shortcut applying Nesterov momentum to SGD updates
+ """
+ if params is None:
+ params = updates.keys()
+ updates = OrderedDict(updates)
+
+ for param in params:
+ value = param.get_value(borrow=True)
+ velocity = theano.shared(np.zeros(value.shape, dtype=value.dtype),
+ broadcastable=param.broadcastable)
+ x = momentum * velocity + updates[param] - param
+ updates[velocity] = x
+ updates[param] = momentum * x + updates[param]
+
+ return updates
+
+
+def nesterov_momentum(loss_or_grads, params, learning_rate, momentum=0.9):
+ """Stochastic Gradient Descent (SGD) updates with Nesterov momentum
+
+ Generates update expressions of the form:
+
+ * ``velocity := momentum * velocity - learning_rate * gradient``
+ * ``param := param + momentum * velocity - learning_rate * gradient``
+
+ Parameters
+ ----------
+ loss_or_grads : symbolic expression or list of expressions
+ A scalar loss expression, or a list of gradient expressions
+ params : list of shared variables
+ The variables to generate update expressions for
+ learning_rate : float or symbolic scalar
+ The learning rate controlling the size of update steps
+ momentum : float or symbolic scalar, optional
+ The amount of momentum to apply. Higher momentum results in
+ smoothing over more update steps. Defaults to 0.9.
+
+ Returns
+ -------
+ OrderedDict
+ A dictionary mapping each parameter to its update expression
+
+ Notes
+ -----
+ Higher momentum also results in larger update steps. To counter that,
+ you can optionally scale your learning rate by `1 - momentum`.
+
+ The classic formulation of Nesterov momentum (or Nesterov accelerated
+ gradient) requires the gradient to be evaluated at the predicted next
+ position in parameter space. Here, we use the formulation described at
+ https://github.com/lisa-lab/pylearn2/pull/136#issuecomment-10381617,
+ which allows the gradient to be evaluated at the current parameters.
+
+ See Also
+ --------
+ apply_nesterov_momentum : Function applying momentum to updates
+ """
+ updates = sgd(loss_or_grads, params, learning_rate)
+ return apply_nesterov_momentum(updates, momentum=momentum)
+
+
+def adagrad(loss_or_grads, params, learning_rate=1.0, epsilon=1e-6):
+ """Adagrad updates
+
+ Scale learning rates by dividing with the square root of accumulated
+ squared gradients. See [1]_ for further description.
+
+ Parameters
+ ----------
+ loss_or_grads : symbolic expression or list of expressions
+ A scalar loss expression, or a list of gradient expressions
+ params : list of shared variables
+ The variables to generate update expressions for
+ learning_rate : float or symbolic scalar
+ The learning rate controlling the size of update steps
+ epsilon : float or symbolic scalar
+ Small value added for numerical stability
+
+ Returns
+ -------
+ OrderedDict
+ A dictionary mapping each parameter to its update expression
+
+ Notes
+ -----
+ Using step size eta Adagrad calculates the learning rate for feature i at
+ time step t as:
+
+ .. math:: \\eta_{t,i} = \\frac{\\eta}
+ {\\sqrt{\\sum^t_{t^\\prime} g^2_{t^\\prime,i}+\\epsilon}} g_{t,i}
+
+ as such the learning rate is monotonically decreasing.
+
+ Epsilon is not included in the typical formula, see [2]_.
+
+ References
+ ----------
+ .. [1] Duchi, J., Hazan, E., & Singer, Y. (2011):
+ Adaptive subgradient methods for online learning and stochastic
+ optimization. JMLR, 12:2121-2159.
+
+ .. [2] Chris Dyer:
+ Notes on AdaGrad. http://www.ark.cs.cmu.edu/cdyer/adagrad.pdf
+ """
+
+ grads = get_or_compute_grads(loss_or_grads, params)
+ updates = OrderedDict()
+
+ for param, grad in zip(params, grads):
+ value = param.get_value(borrow=True)
+ accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
+ broadcastable=param.broadcastable)
+ accu_new = accu + grad ** 2
+ updates[accu] = accu_new
+ updates[param] = param - (learning_rate * grad /
+ T.sqrt(accu_new + epsilon))
+
+ return updates
+
+
+def rmsprop(loss_or_grads, params, learning_rate=1.0, rho=0.9, epsilon=1e-6):
+ """RMSProp updates
+
+ Scale learning rates by dividing with the moving average of the root mean
+ squared (RMS) gradients. See [1]_ for further description.
+
+ Parameters
+ ----------
+ loss_or_grads : symbolic expression or list of expressions
+ A scalar loss expression, or a list of gradient expressions
+ params : list of shared variables
+ The variables to generate update expressions for
+ learning_rate : float or symbolic scalar
+ The learning rate controlling the size of update steps
+ rho : float or symbolic scalar
+ Gradient moving average decay factor
+ epsilon : float or symbolic scalar
+ Small value added for numerical stability
+
+ Returns
+ -------
+ OrderedDict
+ A dictionary mapping each parameter to its update expression
+
+ Notes
+ -----
+ `rho` should be between 0 and 1. A value of `rho` close to 1 will decay the
+ moving average slowly and a value close to 0 will decay the moving average
+ fast.
+
+ Using the step size :math:`\\eta` and a decay factor :math:`\\rho` the
+ learning rate :math:`\\eta_t` is calculated as:
+
+ .. math::
+ r_t &= \\rho r_{t-1} + (1-\\rho)*g^2\\\\
+ \\eta_t &= \\frac{\\eta}{\\sqrt{r_t + \\epsilon}}
+
+ References
+ ----------
+ .. [1] Tieleman, T. and Hinton, G. (2012):
+ Neural Networks for Machine Learning, Lecture 6.5 - rmsprop.
+ Coursera. http://www.youtube.com/watch?v=O3sxAc4hxZU (formula @5:20)
+ """
+ grads = get_or_compute_grads(loss_or_grads, params)
+ updates = OrderedDict()
+
+ # Using theano constant to prevent upcasting of float32
+ one = T.constant(1)
+
+ for param, grad in zip(params, grads):
+ value = param.get_value(borrow=True)
+ accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
+ broadcastable=param.broadcastable)
+ accu_new = rho * accu + (one - rho) * grad ** 2
+ updates[accu] = accu_new
+ updates[param] = param - (learning_rate * grad /
+ T.sqrt(accu_new + epsilon))
+
+ return updates
+
+
+def adadelta(loss_or_grads, params, learning_rate=1.0, rho=0.95, epsilon=1e-6):
+ """ Adadelta updates
+
+ Scale learning rates by the ratio of accumulated gradients to accumulated
+ updates, see [1]_ and notes for further description.
+
+ Parameters
+ ----------
+ loss_or_grads : symbolic expression or list of expressions
+ A scalar loss expression, or a list of gradient expressions
+ params : list of shared variables
+ The variables to generate update expressions for
+ learning_rate : float or symbolic scalar
+ The learning rate controlling the size of update steps
+ rho : float or symbolic scalar
+ Squared gradient moving average decay factor
+ epsilon : float or symbolic scalar
+ Small value added for numerical stability
+
+ Returns
+ -------
+ OrderedDict
+ A dictionary mapping each parameter to its update expression
+
+ Notes
+ -----
+ rho should be between 0 and 1. A value of rho close to 1 will decay the
+ moving average slowly and a value close to 0 will decay the moving average
+ fast.
+
+ rho = 0.95 and epsilon=1e-6 are suggested in the paper and reported to
+ work for multiple datasets (MNIST, speech).
+
+ In the paper, no learning rate is considered (so learning_rate=1.0).
+ Probably best to keep it at this value.
+ epsilon is important for the very first update (so the numerator does
+ not become 0).
+
+ Using the step size eta and a decay factor rho the learning rate is
+ calculated as:
+
+ .. math::
+ r_t &= \\rho r_{t-1} + (1-\\rho)*g^2\\\\
+ \\eta_t &= \\eta \\frac{\\sqrt{s_{t-1} + \\epsilon}}
+ {\sqrt{r_t + \epsilon}}\\\\
+ s_t &= \\rho s_{t-1} + (1-\\rho)*(\\eta_t*g)^2
+
+ References
+ ----------
+ .. [1] Zeiler, M. D. (2012):
+ ADADELTA: An Adaptive Learning Rate Method.
+ arXiv Preprint arXiv:1212.5701.
+ """
+ grads = get_or_compute_grads(loss_or_grads, params)
+ updates = OrderedDict()
+
+ # Using theano constant to prevent upcasting of float32
+ one = T.constant(1)
+
+ for param, grad in zip(params, grads):
+ value = param.get_value(borrow=True)
+ # accu: accumulate gradient magnitudes
+ accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
+ broadcastable=param.broadcastable)
+ # delta_accu: accumulate update magnitudes (recursively!)
+ delta_accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
+ broadcastable=param.broadcastable)
+
+ # update accu (as in rmsprop)
+ accu_new = rho * accu + (one - rho) * grad ** 2
+ updates[accu] = accu_new
+
+ # compute parameter update, using the 'old' delta_accu
+ update = (grad * T.sqrt(delta_accu + epsilon) /
+ T.sqrt(accu_new + epsilon))
+ updates[param] = param - learning_rate * update
+
+ # update delta_accu (as accu, but accumulating updates)
+ delta_accu_new = rho * delta_accu + (one - rho) * update ** 2
+ updates[delta_accu] = delta_accu_new
+
+ return updates
+
+
+def adam(loss_or_grads, params, learning_rate=0.001, beta1=0.9,
+ beta2=0.999, epsilon=1e-8):
+ """Adam updates
+
+ Adam updates implemented as in [1]_.
+
+ Parameters
+ ----------
+ loss_or_grads : symbolic expression or list of expressions
+ A scalar loss expression, or a list of gradient expressions
+ params : list of shared variables
+ The variables to generate update expressions for
+ learning_rate : float
+ Learning rate
+ beta1 : float
+ Exponential decay rate for the first moment estimates.
+ beta2 : float
+ Exponential decay rate for the second moment estimates.
+ epsilon : float
+ Constant for numerical stability.
+
+ Returns
+ -------
+ OrderedDict
+ A dictionary mapping each parameter to its update expression
+
+ Notes
+ -----
+ The paper [1]_ includes an additional hyperparameter lambda. This is only
+ needed to prove convergence of the algorithm and has no practical use
+ (personal communication with the authors), it is therefore omitted here.
+
+ References
+ ----------
+ .. [1] Kingma, Diederik, and Jimmy Ba (2014):
+ Adam: A Method for Stochastic Optimization.
+ arXiv preprint arXiv:1412.6980.
+ """
+ all_grads = get_or_compute_grads(loss_or_grads, params)
+ t_prev = theano.shared(utils.floatX(0.))
+ updates = OrderedDict()
+
+ # Using theano constant to prevent upcasting of float32
+ one = T.constant(1)
+
+ t = t_prev + 1
+ a_t = learning_rate*T.sqrt(one-beta2**t)/(one-beta1**t)
+
+ for param, g_t in zip(params, all_grads):
+ value = param.get_value(borrow=True)
+ m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
+ broadcastable=param.broadcastable)
+ v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
+ broadcastable=param.broadcastable)
+
+ m_t = beta1*m_prev + (one-beta1)*g_t
+ v_t = beta2*v_prev + (one-beta2)*g_t**2
+ step = a_t*m_t/(T.sqrt(v_t) + epsilon)
+
+ updates[m_prev] = m_t
+ updates[v_prev] = v_t
+ updates[param] = param - step
+
+ updates[t_prev] = t
+ return updates
+
+
+def adamax(loss_or_grads, params, learning_rate=0.002, beta1=0.9,
+ beta2=0.999, epsilon=1e-8):
+ """Adamax updates
+
+ Adamax updates implemented as in [1]_. This is a variant of of the Adam
+ algorithm based on the infinity norm.
+
+ Parameters
+ ----------
+ loss_or_grads : symbolic expression or list of expressions
+ A scalar loss expression, or a list of gradient expressions
+ params : list of shared variables
+ The variables to generate update expressions for
+ learning_rate : float
+ Learning rate
+ beta1 : float
+ Exponential decay rate for the first moment estimates.
+ beta2 : float
+ Exponential decay rate for the weighted infinity norm estimates.
+ epsilon : float
+ Constant for numerical stability.
+
+ Returns
+ -------
+ OrderedDict
+ A dictionary mapping each parameter to its update expression
+
+ References
+ ----------
+ .. [1] Kingma, Diederik, and Jimmy Ba (2014):
+ Adam: A Method for Stochastic Optimization.
+ arXiv preprint arXiv:1412.6980.
+ """
+ all_grads = get_or_compute_grads(loss_or_grads, params)
+ t_prev = theano.shared(utils.floatX(0.))
+ updates = OrderedDict()
+
+ # Using theano constant to prevent upcasting of float32
+ one = T.constant(1)
+
+ t = t_prev + 1
+ a_t = learning_rate/(one-beta1**t)
+
+ for param, g_t in zip(params, all_grads):
+ value = param.get_value(borrow=True)
+ m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
+ broadcastable=param.broadcastable)
+ u_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype),
+ broadcastable=param.broadcastable)
+
+ m_t = beta1*m_prev + (one-beta1)*g_t
+ u_t = T.maximum(beta2*u_prev, abs(g_t))
+ step = a_t*m_t/(u_t + epsilon)
+
+ updates[m_prev] = m_t
+ updates[u_prev] = u_t
+ updates[param] = param - step
+
+ updates[t_prev] = t
+ return updates
+
+
+def norm_constraint(tensor_var, max_norm, norm_axes=None, epsilon=1e-7):
+ """Max weight norm constraints and gradient clipping
+
+ This takes a TensorVariable and rescales it so that incoming weight
+ norms are below a specified constraint value. Vectors violating the
+ constraint are rescaled so that they are within the allowed range.
+
+ Parameters
+ ----------
+ tensor_var : TensorVariable
+ Theano expression for update, gradient, or other quantity.
+ max_norm : scalar
+ This value sets the maximum allowed value of any norm in
+ `tensor_var`.
+ norm_axes : sequence (list or tuple)
+ The axes over which to compute the norm. This overrides the
+ default norm axes defined for the number of dimensions
+ in `tensor_var`. When this is not specified and `tensor_var` is a
+ matrix (2D), this is set to `(0,)`. If `tensor_var` is a 3D, 4D or
+ 5D tensor, it is set to a tuple listing all axes but axis 0. The
+ former default is useful for working with dense layers, the latter
+ is useful for 1D, 2D and 3D convolutional layers.
+ (Optional)
+ epsilon : scalar, optional
+ Value used to prevent numerical instability when dividing by
+ very small or zero norms.
+
+ Returns
+ -------
+ TensorVariable
+ Input `tensor_var` with rescaling applied to weight vectors
+ that violate the specified constraints.
+
+ Examples
+ --------
+ >>> param = theano.shared(
+ ... np.random.randn(100, 200).astype(theano.config.floatX))
+ >>> update = param + 100
+ >>> update = norm_constraint(update, 10)
+ >>> func = theano.function([], [], updates=[(param, update)])
+ >>> # Apply constrained update
+ >>> _ = func()
+ >>> from lasagne.utils import compute_norms
+ >>> norms = compute_norms(param.get_value())
+ >>> np.isclose(np.max(norms), 10)
+ True
+
+ Notes
+ -----
+ When `norm_axes` is not specified, the axes over which the norm is
+ computed depend on the dimensionality of the input variable. If it is
+ 2D, it is assumed to come from a dense layer, and the norm is computed
+ over axis 0. If it is 3D, 4D or 5D, it is assumed to come from a
+ convolutional layer and the norm is computed over all trailing axes
+ beyond axis 0. For other uses, you should explicitly specify the axes
+ over which to compute the norm using `norm_axes`.
+ """
+ ndim = tensor_var.ndim
+
+ if norm_axes is not None:
+ sum_over = tuple(norm_axes)
+ elif ndim == 2: # DenseLayer
+ sum_over = (0,)
+ elif ndim in [3, 4, 5]: # Conv{1,2,3}DLayer
+ sum_over = tuple(range(1, ndim))
+ else:
+ raise ValueError(
+ "Unsupported tensor dimensionality {}."
+ "Must specify `norm_axes`".format(ndim)
+ )
+
+ dtype = np.dtype(theano.config.floatX).type
+ norms = T.sqrt(T.sum(T.sqr(tensor_var), axis=sum_over, keepdims=True))
+ target_norms = T.clip(norms, 0, dtype(max_norm))
+ constrained_output = \
+ (tensor_var * (target_norms / (dtype(epsilon) + norms)))
+
+ return constrained_output
+
+
+def total_norm_constraint(tensor_vars, max_norm, epsilon=1e-7,
+ return_norm=False):
+ """Rescales a list of tensors based on their combined norm
+
+ If the combined norm of the input tensors exceeds the threshold then all
+ tensors are rescaled such that the combined norm is equal to the threshold.
+
+ Scaling the norms of the gradients is often used when training recurrent
+ neural networks [1]_.
+
+ Parameters
+ ----------
+ tensor_vars : List of TensorVariables.
+ Tensors to be rescaled.
+ max_norm : float
+ Threshold value for total norm.
+ epsilon : scalar, optional
+ Value used to prevent numerical instability when dividing by
+ very small or zero norms.
+ return_norm : bool
+ If true the total norm is also returned.
+
+ Returns
+ -------
+ tensor_vars_scaled : list of TensorVariables
+ The scaled tensor variables.
+ norm : Theano scalar
+ The combined norms of the input variables prior to rescaling,
+ only returned if ``return_norms=True``.
+
+ Examples
+ --------
+ >>> from lasagne.layers import InputLayer, DenseLayer
+ >>> import lasagne
+ >>> from lasagne.updates import sgd, total_norm_constraint
+ >>> x = T.matrix()
+ >>> y = T.ivector()
+ >>> l_in = InputLayer((5, 10))
+ >>> l1 = DenseLayer(l_in, num_units=7, nonlinearity=T.nnet.softmax)
+ >>> output = lasagne.layers.get_output(l1, x)
+ >>> cost = T.mean(T.nnet.categorical_crossentropy(output, y))
+ >>> all_params = lasagne.layers.get_all_params(l1)
+ >>> all_grads = T.grad(cost, all_params)
+ >>> scaled_grads = total_norm_constraint(all_grads, 5)
+ >>> updates = sgd(scaled_grads, all_params, learning_rate=0.1)
+
+ Notes
+ -----
+ The total norm can be used to monitor training.
+
+ References
+ ----------
+ .. [1] Sutskever, I., Vinyals, O., & Le, Q. V. (2014): Sequence to sequence
+ learning with neural networks. In Advances in Neural Information
+ Processing Systems (pp. 3104-3112).
+ """
+ norm = T.sqrt(sum(T.sum(tensor**2) for tensor in tensor_vars))
+ dtype = np.dtype(theano.config.floatX).type
+ target_norm = T.clip(norm, 0, dtype(max_norm))
+ multiplier = target_norm / (dtype(epsilon) + norm)
+ tensor_vars_scaled = [step*multiplier for step in tensor_vars]
+
+ if return_norm:
+ return tensor_vars_scaled, norm
+ else:
+ return tensor_vars_scaled
diff --git a/lasagne/utils.py b/lasagne/utils.py
new file mode 100644
index 0000000..ad22f88
--- /dev/null
+++ b/lasagne/utils.py
@@ -0,0 +1,450 @@
+import numpy as np
+
+import theano
+import theano.tensor as T
+
+
+def floatX(arr):
+ """Converts data to a numpy array of dtype ``theano.config.floatX``.
+
+ Parameters
+ ----------
+ arr : array_like
+ The data to be converted.
+
+ Returns
+ -------
+ numpy ndarray
+ The input array in the ``floatX`` dtype configured for Theano.
+ If `arr` is an ndarray of correct dtype, it is returned as is.
+ """
+ return np.asarray(arr, dtype=theano.config.floatX)
+
+
+def shared_empty(dim=2, dtype=None):
+ """Creates empty Theano shared variable.
+
+ Shortcut to create an empty Theano shared variable with
+ the specified number of dimensions.
+
+ Parameters
+ ----------
+ dim : int, optional
+ The number of dimensions for the empty variable, defaults to 2.
+ dtype : a numpy data-type, optional
+ The desired dtype for the variable. Defaults to the Theano
+ ``floatX`` dtype.
+
+ Returns
+ -------
+ Theano shared variable
+ An empty Theano shared variable of dtype ``dtype`` with
+ `dim` dimensions.
+ """
+ if dtype is None:
+ dtype = theano.config.floatX
+
+ shp = tuple([1] * dim)
+ return theano.shared(np.zeros(shp, dtype=dtype))
+
+
+def as_theano_expression(input):
+ """Wrap as Theano expression.
+
+ Wraps the given input as a Theano constant if it is not
+ a valid Theano expression already. Useful to transparently
+ handle numpy arrays and Python scalars, for example.
+
+ Parameters
+ ----------
+ input : number, numpy array or Theano expression
+ Expression to be converted to a Theano constant.
+
+ Returns
+ -------
+ Theano symbolic constant
+ Theano constant version of `input`.
+ """
+ if isinstance(input, theano.gof.Variable):
+ return input
+ else:
+ try:
+ return theano.tensor.constant(input)
+ except Exception as e:
+ raise TypeError("Input of type %s is not a Theano expression and "
+ "cannot be wrapped as a Theano constant (original "
+ "exception: %s)" % (type(input), e))
+
+
+def collect_shared_vars(expressions):
+ """Returns all shared variables the given expression(s) depend on.
+
+ Parameters
+ ----------
+ expressions : Theano expression or iterable of Theano expressions
+ The expressions to collect shared variables from.
+
+ Returns
+ -------
+ list of Theano shared variables
+ All shared variables the given expression(s) depend on, in fixed order
+ (as found by a left-recursive depth-first search). If some expressions
+ are shared variables themselves, they are included in the result.
+ """
+ # wrap single expression in list
+ if isinstance(expressions, theano.Variable):
+ expressions = [expressions]
+ # return list of all shared variables
+ return [v for v in theano.gof.graph.inputs(reversed(expressions))
+ if isinstance(v, theano.compile.SharedVariable)]
+
+
+def one_hot(x, m=None):
+ """One-hot representation of integer vector.
+
+ Given a vector of integers from 0 to m-1, returns a matrix
+ with a one-hot representation, where each row corresponds
+ to an element of x.
+
+ Parameters
+ ----------
+ x : integer vector
+ The integer vector to convert to a one-hot representation.
+ m : int, optional
+ The number of different columns for the one-hot representation. This
+ needs to be strictly greater than the maximum value of `x`.
+ Defaults to ``max(x) + 1``.
+
+ Returns
+ -------
+ Theano tensor variable
+ A Theano tensor variable of shape (``n``, `m`), where ``n`` is the
+ length of `x`, with the one-hot representation of `x`.
+
+ Notes
+ -----
+ If your integer vector represents target class memberships, and you wish to
+ compute the cross-entropy between predictions and the target class
+ memberships, then there is no need to use this function, since the function
+ :func:`lasagne.objectives.categorical_crossentropy()` can compute the
+ cross-entropy from the integer vector directly.
+
+ """
+ if m is None:
+ m = T.cast(T.max(x) + 1, 'int32')
+
+ return T.eye(m)[T.cast(x, 'int32')]
+
+
+def unique(l):
+ """Filters duplicates of iterable.
+
+ Create a new list from l with duplicate entries removed,
+ while preserving the original order.
+
+ Parameters
+ ----------
+ l : iterable
+ Input iterable to filter of duplicates.
+
+ Returns
+ -------
+ list
+ A list of elements of `l` without duplicates and in the same order.
+ """
+ new_list = []
+ seen = set()
+ for el in l:
+ if el not in seen:
+ new_list.append(el)
+ seen.add(el)
+
+ return new_list
+
+
+def as_tuple(x, N, t=None):
+ """
+ Coerce a value to a tuple of given length (and possibly given type).
+
+ Parameters
+ ----------
+ x : value or iterable
+ N : integer
+ length of the desired tuple
+ t : type, optional
+ required type for all elements
+
+ Returns
+ -------
+ tuple
+ ``tuple(x)`` if `x` is iterable, ``(x,) * N`` otherwise.
+
+ Raises
+ ------
+ TypeError
+ if `type` is given and `x` or any of its elements do not match it
+ ValueError
+ if `x` is iterable, but does not have exactly `N` elements
+ """
+ try:
+ X = tuple(x)
+ except TypeError:
+ X = (x,) * N
+
+ if (t is not None) and not all(isinstance(v, t) for v in X):
+ raise TypeError("expected a single value or an iterable "
+ "of {0}, got {1} instead".format(t.__name__, x))
+
+ if len(X) != N:
+ raise ValueError("expected a single value or an iterable "
+ "with length {0}, got {1} instead".format(N, x))
+
+ return X
+
+
+def compute_norms(array, norm_axes=None):
+ """ Compute incoming weight vector norms.
+
+ Parameters
+ ----------
+ array : numpy array or Theano expression
+ Weight or bias.
+ norm_axes : sequence (list or tuple)
+ The axes over which to compute the norm. This overrides the
+ default norm axes defined for the number of dimensions
+ in `array`. When this is not specified and `array` is a 2D array,
+ this is set to `(0,)`. If `array` is a 3D, 4D or 5D array, it is
+ set to a tuple listing all axes but axis 0. The former default is
+ useful for working with dense layers, the latter is useful for 1D,
+ 2D and 3D convolutional layers.
+ Finally, in case `array` is a vector, `norm_axes` is set to an empty
+ tuple, and this function will simply return the absolute value for
+ each element. This is useful when the function is applied to all
+ parameters of the network, including the bias, without distinction.
+ (Optional)
+
+ Returns
+ -------
+ norms : 1D array or Theano vector (1D)
+ 1D array or Theano vector of incoming weight/bias vector norms.
+
+ Examples
+ --------
+ >>> array = np.random.randn(100, 200)
+ >>> norms = compute_norms(array)
+ >>> norms.shape
+ (200,)
+
+ >>> norms = compute_norms(array, norm_axes=(1,))
+ >>> norms.shape
+ (100,)
+ """
+
+ # Check if supported type
+ if not isinstance(array, theano.Variable) and \
+ not isinstance(array, np.ndarray):
+ raise RuntimeError(
+ "Unsupported type {}. "
+ "Only theano variables and numpy arrays "
+ "are supported".format(type(array))
+ )
+
+ # Compute default axes to sum over
+ ndim = array.ndim
+ if norm_axes is not None:
+ sum_over = tuple(norm_axes)
+ elif ndim == 1: # For Biases that are in 1d (e.g. b of DenseLayer)
+ sum_over = ()
+ elif ndim == 2: # DenseLayer
+ sum_over = (0,)
+ elif ndim in [3, 4, 5]: # Conv{1,2,3}DLayer
+ sum_over = tuple(range(1, ndim))
+ else:
+ raise ValueError(
+ "Unsupported tensor dimensionality {}. "
+ "Must specify `norm_axes`".format(array.ndim)
+ )
+
+ # Run numpy or Theano norm computation
+ if isinstance(array, theano.Variable):
+ # Apply theano version if it is a theano variable
+ if len(sum_over) == 0:
+ norms = T.abs_(array) # abs if we have nothing to sum over
+ else:
+ norms = T.sqrt(T.sum(array**2, axis=sum_over))
+ elif isinstance(array, np.ndarray):
+ # Apply the numpy version if ndarray
+ if len(sum_over) == 0:
+ norms = abs(array) # abs if we have nothing to sum over
+ else:
+ norms = np.sqrt(np.sum(array**2, axis=sum_over))
+
+ return norms
+
+
+def create_param(spec, shape, name=None):
+ """
+ Helper method to create Theano shared variables for layer parameters
+ and to initialize them.
+
+ Parameters
+ ----------
+ spec : scalar number, numpy array, Theano expression, or callable
+ Either of the following:
+
+ * a scalar or a numpy array with the initial parameter values
+ * a Theano expression or shared variable representing the parameters
+ * a function or callable that takes the desired shape of
+ the parameter array as its single argument and returns
+ a numpy array, a Theano expression, or a shared variable
+ representing the parameters.
+
+ shape : iterable of int
+ a tuple or other iterable of integers representing the desired
+ shape of the parameter array.
+
+ name : string, optional
+ The name to give to the parameter variable. Ignored if `spec`
+ is or returns a Theano expression or shared variable that
+ already has a name.
+
+
+ Returns
+ -------
+ Theano shared variable or Theano expression
+ A Theano shared variable or expression representing layer parameters.
+ If a scalar or a numpy array was provided, a shared variable is
+ initialized to contain this array. If a shared variable or expression
+ was provided, it is simply returned. If a callable was provided, it is
+ called, and its output is used to initialize a shared variable.
+
+ Notes
+ -----
+ This function is called by :meth:`Layer.add_param()` in the constructor
+ of most :class:`Layer` subclasses. This enables those layers to
+ support initialization with scalars, numpy arrays, existing Theano shared
+ variables or expressions, and callables for generating initial parameter
+ values, Theano expressions, or shared variables.
+ """
+ import numbers # to check if argument is a number
+ shape = tuple(shape) # convert to tuple if needed
+ if any(d <= 0 for d in shape):
+ raise ValueError((
+ "Cannot create param with a non-positive shape dimension. "
+ "Tried to create param with shape=%r, name=%r") % (shape, name))
+
+ err_prefix = "cannot initialize parameter %s: " % name
+ if callable(spec):
+ spec = spec(shape)
+ err_prefix += "the %s returned by the provided callable"
+ else:
+ err_prefix += "the provided %s"
+
+ if isinstance(spec, numbers.Number) or isinstance(spec, np.generic) \
+ and spec.dtype.kind in 'biufc':
+ spec = np.asarray(spec)
+
+ if isinstance(spec, np.ndarray):
+ if spec.shape != shape:
+ raise ValueError("%s has shape %s, should be %s" %
+ (err_prefix % "numpy array", spec.shape, shape))
+ # We assume parameter variables do not change shape after creation.
+ # We can thus fix their broadcast pattern, to allow Theano to infer
+ # broadcastable dimensions of expressions involving these parameters.
+ bcast = tuple(s == 1 for s in shape)
+ spec = theano.shared(spec, broadcastable=bcast)
+
+ if isinstance(spec, theano.Variable):
+ # We cannot check the shape here, Theano expressions (even shared
+ # variables) do not have a fixed compile-time shape. We can check the
+ # dimensionality though.
+ if spec.ndim != len(shape):
+ raise ValueError("%s has %d dimensions, should be %d" %
+ (err_prefix % "Theano variable", spec.ndim,
+ len(shape)))
+ # We only assign a name if the user hasn't done so already.
+ if not spec.name:
+ spec.name = name
+ return spec
+
+ else:
+ if "callable" in err_prefix:
+ raise TypeError("%s is not a numpy array or a Theano expression" %
+ (err_prefix % "value"))
+ else:
+ raise TypeError("%s is not a numpy array, a Theano expression, "
+ "or a callable" % (err_prefix % "spec"))
+
+
+def unroll_scan(fn, sequences, outputs_info, non_sequences, n_steps,
+ go_backwards=False):
+ """
+ Helper function to unroll for loops. Can be used to unroll theano.scan.
+ The parameter names are identical to theano.scan, please refer to here
+ for more information.
+
+ Note that this function does not support the truncate_gradient
+ setting from theano.scan.
+
+ Parameters
+ ----------
+
+ fn : function
+ Function that defines calculations at each step.
+
+ sequences : TensorVariable or list of TensorVariables
+ List of TensorVariable with sequence data. The function iterates
+ over the first dimension of each TensorVariable.
+
+ outputs_info : list of TensorVariables
+ List of tensors specifying the initial values for each recurrent
+ value.
+
+ non_sequences: list of TensorVariables
+ List of theano.shared variables that are used in the step function.
+
+ n_steps: int
+ Number of steps to unroll.
+
+ go_backwards: bool
+ If true the recursion starts at sequences[-1] and iterates
+ backwards.
+
+ Returns
+ -------
+ List of TensorVariables. Each element in the list gives the recurrent
+ values at each time step.
+
+ """
+ if not isinstance(sequences, (list, tuple)):
+ sequences = [sequences]
+
+ # When backwards reverse the recursion direction
+ counter = range(n_steps)
+ if go_backwards:
+ counter = counter[::-1]
+
+ output = []
+ prev_vals = outputs_info
+ for i in counter:
+ step_input = [s[i] for s in sequences] + prev_vals + non_sequences
+ out_ = fn(*step_input)
+ # The returned values from step can be either a TensorVariable,
+ # a list, or a tuple. Below, we force it to always be a list.
+ if isinstance(out_, T.TensorVariable):
+ out_ = [out_]
+ if isinstance(out_, tuple):
+ out_ = list(out_)
+ output.append(out_)
+
+ prev_vals = output[-1]
+
+ # iterate over each scan output and convert it to same format as scan:
+ # [[output11, output12,...output1n],
+ # [output21, output22,...output2n],...]
+ output_scan = []
+ for i in range(len(output[0])):
+ l = map(lambda x: x[i], output)
+ output_scan.append(T.stack(*l))
+
+ return output_scan
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..a78e8f2
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,10 @@
+-r requirements.txt
+mock
+numpydoc
+pep8==1.6.2
+pytest
+pytest-cov
+pytest-pep8
+Jinja2==2.7.3
+Sphinx==1.2.3
+sphinx_rtd_theme
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..0132aab
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+Theano==0.8.0
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..50b61b1
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,10 @@
+[aliases]
+dev = develop easy_install lasagne[testing]
+
+[pytest]
+addopts =
+ -v --doctest-modules
+ --cov=lasagne --cov-report=term-missing
+ --pep8
+ lasagne/
+python_files = test*py
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..251a135
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,67 @@
+import os
+import re
+from setuptools import find_packages
+from setuptools import setup
+# We need io.open() (Python 3's default open) to specify file encodings
+import io
+
+here = os.path.abspath(os.path.dirname(__file__))
+try:
+ # obtain version string from __init__.py
+ # Read ASCII file with builtin open() so __version__ is str in Python 2 and 3
+ with open(os.path.join(here, 'lasagne', '__init__.py'), 'r') as f:
+ init_py = f.read()
+ version = re.search('__version__ = "(.*)"', init_py).groups()[0]
+except Exception:
+ version = ''
+try:
+ # obtain long description from README and CHANGES
+ # Specify encoding to get a unicode type in Python 2 and a str in Python 3
+ with io.open(os.path.join(here, 'README.rst'), 'r', encoding='utf-8') as f:
+ README = f.read()
+ with io.open(os.path.join(here, 'CHANGES.rst'), 'r', encoding='utf-8') as f:
+ CHANGES = f.read()
+except IOError:
+ README = CHANGES = ''
+
+install_requires = [
+ 'numpy',
+ # 'Theano', # we require a development version, see requirements.txt
+ ]
+
+tests_require = [
+ 'mock',
+ 'pytest',
+ 'pytest-cov',
+ 'pytest-pep8',
+ ]
+
+setup(
+ name="Lasagne",
+ version=version,
+ description="A lightweight library to build and train neural networks "
+ "in Theano",
+ long_description="\n\n".join([README, CHANGES]),
+ classifiers=[
+ "Development Status :: 3 - Alpha",
+ "Intended Audience :: Developers",
+ "Intended Audience :: Science/Research",
+ "License :: OSI Approved :: MIT License",
+ "Programming Language :: Python :: 2.7",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.4",
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
+ ],
+ keywords="",
+ author="Lasagne contributors",
+ author_email="lasagne-users at googlegroups.com",
+ url="https://github.com/Lasagne/Lasagne",
+ license="MIT",
+ packages=find_packages(),
+ include_package_data=False,
+ zip_safe=False,
+ install_requires=install_requires,
+ extras_require={
+ 'testing': tests_require,
+ },
+ )
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/lasagne.git
More information about the debian-science-commits
mailing list