[lua-torch-image] 01/01: Imported Upstream version 0~20160730-g797fcb1

Thu Aug 25 14:13:41 UTC 2016

This is an automated email from the git hooks/post-receive script.

cdluminate-guest pushed a commit to branch master
in repository lua-torch-image.

commit bafd9bacf118845a0771981f4fd8b72fb9660c77
Author: Zhou Mo <cdluminate at gmail.com>
Date:   Thu Aug 25 12:34:05 2016 +0000

    Imported Upstream version 0~20160730-g797fcb1
---
 .gitignore                  |    1 +
 .travis.yml                 |   62 ++
 CMakeLists.txt              |   90 ++
 COPYRIGHT.txt               |   36 +
 README.md                   |   43 +
 assets/P2.pgm               |   11 +
 assets/P4.pbm               |    3 +
 assets/P5.pgm               |    7 +
 assets/P6.ppm               |  Bin 0 -> 313 bytes
 assets/corrupt-ihdr.png     |  Bin 0 -> 275 bytes
 assets/fabio.jpg            |  Bin 0 -> 17958 bytes
 assets/fabio.png            |  Bin 0 -> 65067 bytes
 assets/foobar.png           |  Bin 0 -> 202 bytes
 assets/grace_hopper_512.jpg |  Bin 0 -> 65544 bytes
 assets/grace_hopper_512.png |  Bin 0 -> 431614 bytes
 assets/gray16-1x2.png       |  Bin 0 -> 75 bytes
 assets/gray3x1.png          |  Bin 0 -> 73 bytes
 assets/rectangle.png        |  Bin 0 -> 113 bytes
 assets/rgb16-2x1.png        |  Bin 0 -> 79 bytes
 assets/rgb2x1.png           |  Bin 0 -> 76 bytes
 doc/colorspace.md           |   73 ++
 doc/drawing.md              |   44 +
 doc/gui.md                  |   53 +
 doc/index.md                |   35 +
 doc/paramtransform.md       |   73 ++
 doc/saveload.md             |   63 ++
 doc/simpletransform.md      |  130 +++
 doc/tensorconstruct.md      |   91 ++
 font.c                      |  287 ++++++
 generic/image.c             | 2296 ++++++++++++++++++++++++++++++++++++++++++
 generic/jpeg.c              |  527 ++++++++++
 generic/png.c               |  400 ++++++++
 generic/ppm.c               |  183 ++++
 image-1.1.alpha-0.rockspec  |   32 +
 image.c                     |   52 +
 init.lua                    | 2323 +++++++++++++++++++++++++++++++++++++++++++
 jpeg.c                      |   68 ++
 mkdocs.yml                  |   14 +
 png.c                       |   87 ++
 ppm.c                       |   70 ++
 test/test.lua               |  687 +++++++++++++
 test/test_rotate.lua        |   75 ++
 test/test_warp.lua          |  139 +++
 win.ui                      |   40 +
 44 files changed, 8095 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..378eac2
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+build
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..060ae0a
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,62 @@
+language: c
+compiler:
+  - gcc
+  - clang
+cache:
+  directories:
+  - $HOME/OpenBlasInstall
+sudo: false
+env:
+  - TORCH_LUA_VERSION=LUAJIT21
+  - TORCH_LUA_VERSION=LUA51
+  - TORCH_LUA_VERSION=LUA52
+addons:
+  apt:
+    packages:
+    - cmake
+    - gfortran
+    - gcc-multilib
+    - gfortran-multilib
+    - liblapack-dev
+    - build-essential
+    - gcc 
+    - g++ 
+    - curl
+    - cmake 
+    - libreadline-dev 
+    - git-core 
+    - libqt4-core 
+    - libqt4-gui
+    - libqt4-dev 
+    - libjpeg-dev 
+    - libpng-dev 
+    - ncurses-dev
+    - imagemagick 
+    - libzmq3-dev 
+    - gfortran 
+    - unzip 
+    - gnuplot
+    - gnuplot-x11 
+    - libgraphicsmagick1-dev
+    - imagemagick 
+before_script: 
+- export ROOT_TRAVIS_DIR=$(pwd)
+- export INSTALL_PREFIX=~/torch/install
+-  ls $HOME/OpenBlasInstall/lib || (cd /tmp/ && git clone https://github.com/xianyi/OpenBLAS.git -b master && cd OpenBLAS && (make NO_AFFINITY=1 -j$(getconf _NPROCESSORS_ONLN) 2>/dev/null >/dev/null) && make PREFIX=$HOME/OpenBlasInstall install)
+- git clone https://github.com/torch/distro.git ~/torch --recursive
+- cd ~/torch && git submodule update --init --recursive
+- mkdir build && cd build
+- export CMAKE_LIBRARY_PATH=$HOME/OpenBlasInstall/include:$HOME/OpenBlasInstall/lib:$CMAKE_LIBRARY_PATH
+- cmake .. -DCMAKE_INSTALL_PREFIX="${INSTALL_PREFIX}" -DCMAKE_BUILD_TYPE=Release -DWITH_${TORCH_LUA_VERSION}=ON
+- make && make install
+- cd $ROOT_TRAVIS_DIR
+- export LD_LIBRARY_PATH=${INSTALL_PREFIX}/lib:$LD_LIBRARY_PATH
+script: 
+- ${INSTALL_PREFIX}/bin/luarocks make 
+- export PATH=${INSTALL_PREFIX}/bin:$PATH
+- export TESTLUA=$(which luajit lua | head -n 1)
+- ${TESTLUA} -limage -e "print('image loaded succesfully')"
+- cd test
+- ${INSTALL_PREFIX}/bin/luarocks install graphicsmagick
+- ${TESTLUA} ./test_rotate.lua
+- ${TESTLUA} -limage -e "t=image.test(); if t.errors[1] then os.exit(1) end"
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..d19b863
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,90 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
+CMAKE_POLICY(VERSION 2.6)
+
+FIND_PACKAGE(Torch REQUIRED)
+FIND_PACKAGE(JPEG)
+FIND_PACKAGE(PNG)
+
+# OpenMP support?
+SET(WITH_OPENMP ON CACHE BOOL "OpenMP support if available?")
+IF (APPLE AND CMAKE_COMPILER_IS_GNUCC)
+  EXEC_PROGRAM (uname ARGS -v  OUTPUT_VARIABLE DARWIN_VERSION)
+  STRING (REGEX MATCH "[0-9]+" DARWIN_VERSION ${DARWIN_VERSION})
+  MESSAGE (STATUS "MAC OS Darwin Version: ${DARWIN_VERSION}")
+  IF (DARWIN_VERSION GREATER 9)
+    SET(APPLE_OPENMP_SUCKS 1)
+  ENDIF (DARWIN_VERSION GREATER 9)
+  EXECUTE_PROCESS (COMMAND ${CMAKE_C_COMPILER} -dumpversion
+    OUTPUT_VARIABLE GCC_VERSION)
+  IF (APPLE_OPENMP_SUCKS AND GCC_VERSION VERSION_LESS 4.6.2)
+    MESSAGE(STATUS "Warning: Disabling OpenMP (unstable with this version of GCC)")
+    MESSAGE(STATUS " Install GCC >= 4.6.2 or change your OS to enable OpenMP")
+    SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-unknown-pragmas")
+    SET(WITH_OPENMP OFF CACHE BOOL "OpenMP support if available?" FORCE)
+  ENDIF ()
+ENDIF ()
+
+IF (WITH_OPENMP)
+  FIND_PACKAGE(OpenMP)
+  IF(OPENMP_FOUND)
+    MESSAGE(STATUS "Compiling with OpenMP support")
+    SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
+    SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
+    SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
+  ENDIF(OPENMP_FOUND)
+ENDIF (WITH_OPENMP)
+
+SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
+
+SET(src ppm.c)
+ADD_TORCH_PACKAGE(ppm "${src}" "${luasrc}" "Image Processing")
+TARGET_LINK_LIBRARIES(ppm luaT TH)
+IF(LUALIB)
+  TARGET_LINK_LIBRARIES(ppm ${LUALIB})
+ENDIF()
+
+if (JPEG_FOUND)
+    SET(src jpeg.c)
+    include_directories (${JPEG_INCLUDE_DIR})
+    SET(CMAKE_REQUIRED_INCLUDES "${JPEG_INCLUDE_DIR}")
+    SET(CMAKE_REQUIRED_LIBRARIES "${JPEG_LIBRARY}")
+    INCLUDE(CheckSymbolExists)
+    CHECK_SYMBOL_EXISTS(jpeg_mem_src "stddef.h;stdio.h;jpeglib.h" HAVE_JPEG_MEM_SRC)
+    IF (HAVE_JPEG_MEM_SRC)
+      SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DHAVE_JPEG_MEM_SRC")
+    ENDIF (HAVE_JPEG_MEM_SRC)
+    CHECK_SYMBOL_EXISTS(jpeg_mem_dest "stddef.h;stdio.h;jpeglib.h" HAVE_JPEG_MEM_DEST)
+    IF (HAVE_JPEG_MEM_DEST)
+      SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DHAVE_JPEG_MEM_DEST")
+    ENDIF (HAVE_JPEG_MEM_DEST)
+    ADD_TORCH_PACKAGE(jpeg "${src}" "${luasrc}" "Image Processing")
+    TARGET_LINK_LIBRARIES(jpeg luaT TH ${JPEG_LIBRARIES})
+    IF(LUALIB)
+        TARGET_LINK_LIBRARIES(jpeg ${LUALIB})
+    ENDIF()
+else (JPEG_FOUND)
+    message ("WARNING: Could not find JPEG libraries, JPEG wrapper will not be installed")
+endif (JPEG_FOUND)
+
+if (PNG_FOUND)
+    SET(src png.c)
+    include_directories (${PNG_INCLUDE_DIR})
+    ADD_TORCH_PACKAGE(png "${src}" "${luasrc}" "Image Processing")
+    TARGET_LINK_LIBRARIES(png luaT TH ${PNG_LIBRARIES})
+    IF(LUALIB)
+        TARGET_LINK_LIBRARIES(png ${LUALIB})
+    ENDIF()
+else (PNG_FOUND)
+    message ("WARNING: Could not find PNG libraries, PNG wrapper will not be installed")
+endif (PNG_FOUND)
+
+SET(src image.c)
+SET(luasrc init.lua win.ui test/test.lua)
+
+ADD_TORCH_PACKAGE(image "${src}" "${luasrc}" "Image Processing")
+TARGET_LINK_LIBRARIES(image luaT TH)
+IF(LUALIB)
+  TARGET_LINK_LIBRARIES(image ${LUALIB})
+ENDIF()
+INSTALL(DIRECTORY "assets" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/image")
+INSTALL(FILES "README.md" DESTINATION "${Torch_INSTALL_LUA_PATH_SUBDIR}/image")
diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt
new file mode 100644
index 0000000..c9cc784
--- /dev/null
+++ b/COPYRIGHT.txt
@@ -0,0 +1,36 @@
+Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
+Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
+Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
+Copyright (c) 2011-2013 NYU (Clement Farabet)
+Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
+Copyright (c) 2006      Idiap Research Institute (Samy Bengio)
+Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the names of Deepmind Technologies, NYU, NEC Laboratories America 
+   and IDIAP Research Institute nor the names of its contributors may be 
+   used to endorse or promote products derived from this software without 
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..97d505b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,43 @@
+# image Package Reference Manual #
+
+[![Build Status](https://travis-ci.org/torch/image.svg)](https://travis-ci.org/torch/image) 
+
+__image__ is the [Torch7 distribution](http://torch.ch/) package for processing 
+images. It contains a wide variety of functions divided into the following categories:
+
+  * [Saving and loading](doc/saveload.md) images as JPEG, PNG, PPM and PGM;
+  * [Simple transformations](doc/simpletransform.md) like translation, scaling and rotation;
+  * [Parameterized transformations](doc/paramtransform.md) like convolutions and warping;
+  * [Simple Drawing Routines](doc/drawing.md) like drawing text or a rectangle on an image;
+  * [Graphical user interfaces](doc/gui.md) like display and window;
+  * [Color Space Conversions](doc/colorspace.md) from and to RGB, YUV, Lab, and HSL;
+  * [Tensor Constructors](doc/tensorconstruct.md) for creating Lenna, Fabio and Gaussian and Laplacian kernels;
+
+Note that unless speficied otherwise, this package deals with images of size 
+`nChannel x height x width`.
+
+## Install
+
+The easiest way to install this package it by following the [intructions](http://torch.ch/docs/getting-started.html) 
+to install [Torch7](http://www.torch.ch), which includes __image__. 
+Otherwise, to update or manually re-install it:
+
+```bash
+$ luarocks install image
+```
+
+You can test your install with:
+
+```bash
+$ luajit -limage -e "image.test()"
+```
+
+## Usage
+
+```lua
+> require 'image'
+> l = image.lena()
+> image.display(l)
+> f = image.fabio()
+> image.display(f)
+```
diff --git a/assets/P2.pgm b/assets/P2.pgm
new file mode 100644
index 0000000..0e76d7d
--- /dev/null
+++ b/assets/P2.pgm
@@ -0,0 +1,11 @@
+P2
+# feep.ascii.pgm
+24 7
+15
+0 0  0  0  0  0  0  0  0 0  0  0  0  0  0  0  0 0  0  0  0  0  0  0
+0 3  3  3  3  0  0  7  7 7  7  0  0 11 11 11 11 0  0 15 15 15 15  0
+0 3  0  0  0  0  0  7  0 0  0  0  0 11  0  0  0 0  0 15  0  0 15  0
+0 3  3  3  0  0  0  7  7 7  0  0  0 11 11 11  0 0  0 15 15 15 15  0
+0 3  0  0  0  0  0  7  0 0  0  0  0 11  0  0  0 0  0 15  0  0  0  0
+0 3  0  0  0  0  0  7  7 7  7  0  0 11 11 11 11 0  0 15  0  0  0  0
+0 0  0  0  0  0  0  0  0 0  0  0  0  0  0  0  0 0  0  0  0  0  0  0
\ No newline at end of file
diff --git a/assets/P4.pbm b/assets/P4.pbm
new file mode 100644
index 0000000..0cc3736
--- /dev/null
+++ b/assets/P4.pbm
@@ -0,0 +1,3 @@
+P4
+1 1
+�
\ No newline at end of file
diff --git a/assets/P5.pgm b/assets/P5.pgm
new file mode 100644
index 0000000..b4ea2fb
--- /dev/null
+++ b/assets/P5.pgm
@@ -0,0 +1,7 @@
+P5
+100 1
+255
+������������������������������������������������������������������������������������666666666666666P5
+100 1
+255
+������������������������������������������������������������������������������������666666666666666
\ No newline at end of file
diff --git a/assets/P6.ppm b/assets/P6.ppm
new file mode 100644
index 0000000..68b997c
Binary files /dev/null and b/assets/P6.ppm differ
diff --git a/assets/corrupt-ihdr.png b/assets/corrupt-ihdr.png
new file mode 100644
index 0000000..ca53ac9
Binary files /dev/null and b/assets/corrupt-ihdr.png differ
diff --git a/assets/fabio.jpg b/assets/fabio.jpg
new file mode 100644
index 0000000..895b2bc
Binary files /dev/null and b/assets/fabio.jpg differ
diff --git a/assets/fabio.png b/assets/fabio.png
new file mode 100644
index 0000000..0a7b14c
Binary files /dev/null and b/assets/fabio.png differ
diff --git a/assets/foobar.png b/assets/foobar.png
new file mode 100644
index 0000000..f40f68c
Binary files /dev/null and b/assets/foobar.png differ
diff --git a/assets/grace_hopper_512.jpg b/assets/grace_hopper_512.jpg
new file mode 100644
index 0000000..6026020
Binary files /dev/null and b/assets/grace_hopper_512.jpg differ
diff --git a/assets/grace_hopper_512.png b/assets/grace_hopper_512.png
new file mode 100644
index 0000000..f0cb7cd
Binary files /dev/null and b/assets/grace_hopper_512.png differ
diff --git a/assets/gray16-1x2.png b/assets/gray16-1x2.png
new file mode 100644
index 0000000..9b3cb5e
Binary files /dev/null and b/assets/gray16-1x2.png differ
diff --git a/assets/gray3x1.png b/assets/gray3x1.png
new file mode 100644
index 0000000..ce89719
Binary files /dev/null and b/assets/gray3x1.png differ
diff --git a/assets/rectangle.png b/assets/rectangle.png
new file mode 100644
index 0000000..aa4720e
Binary files /dev/null and b/assets/rectangle.png differ
diff --git a/assets/rgb16-2x1.png b/assets/rgb16-2x1.png
new file mode 100644
index 0000000..3aab682
Binary files /dev/null and b/assets/rgb16-2x1.png differ
diff --git a/assets/rgb2x1.png b/assets/rgb2x1.png
new file mode 100644
index 0000000..60a0e9f
Binary files /dev/null and b/assets/rgb2x1.png differ
diff --git a/doc/colorspace.md b/doc/colorspace.md
new file mode 100644
index 0000000..12c37d6
--- /dev/null
+++ b/doc/colorspace.md
@@ -0,0 +1,73 @@
+<a name="image.colorspace"></a>
+## Color Space Conversions ##
+This section includes functions for performing conversions between 
+different color spaces.
+
+<a name="image.rgb2lab"></a>
+### [res] image.rgb2lab([dst,] src) ###
+Converts a `src` RGB image to [Lab](https://en.wikipedia.org/wiki/Lab_color_space). 
+If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.lab2rgb"></a>
+### [res] image.lab2rgb([dst,] src) ###
+Converts a `src` [Lab](https://en.wikipedia.org/wiki/Lab_color_space) image to RGB. 
+If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.rgb2yuv"></a>
+### [res] image.rgb2yuv([dst,] src) ###
+Converts a RGB image to YUV. If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.yuv2rgb"></a>
+### [res] image.yuv2rgb([dst,] src) ###
+Converts a YUV image to RGB. If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.rgb2y"></a>
+### [res] image.rgb2y([dst,] src) ###
+Converts a RGB image to Y (discard U and V). 
+If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.rgb2hsl"></a>
+### [res] image.rgb2hsl([dst,] src) ###
+Converts a RGB image to [HSL](https://en.wikipedia.org/wiki/HSL_and_HSV). 
+If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.hsl2rgb"></a>
+### [res] image.hsl2rgb([dst,] src) ###
+Converts a HSL image to RGB. 
+If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.rgb2hsv"></a>
+### [res] image.rgb2hsv([dst,] src) ###
+Converts a RGB image to [HSV](https://en.wikipedia.org/wiki/HSL_and_HSV). 
+If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.hsv2rgb"></a>
+### [res] image.hsv2rgb([dst,] src) ###
+Converts a HSV image to RGB. 
+If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.rgb2nrgb"></a>
+### [res] image.rgb2nrgb([dst,] src) ###
+Converts an RGB image to normalized-RGB. 
+
+<a name="image.y2jet"></a>
+### [res] image.y2jet([dst,] src) ###
+Converts a L-levels (1 to L) greyscale image into a L-levels jet heat-map.
+If `dst` is provided, it is used to store the output image. Otherwise, returns a new `res` Tensor.
+
+This is particulary helpful for understanding the magnitude of the values of a matrix, or easily spot peaks in scalar field (like probability densities over a 2D area).
+For example, you can run it as
+
+```lua
+image.display{image=image.y2jet(torch.linspace(1,10,10)), zoom=50}
+```
+
diff --git a/doc/drawing.md b/doc/drawing.md
new file mode 100644
index 0000000..90aca0b
--- /dev/null
+++ b/doc/drawing.md
@@ -0,0 +1,44 @@
+<a name="image.drawing"></a>
+## Simple Drawing Routines ##
+This section includes simple routines to draw on images.
+
+<a name="image.drawText"></a>
+### [res] image.drawText(src, text, x, y, [options]) ###
+Draws text onto a 3-channel Tensor (C x H x W) at the x-offset `x` and y-offset `y`.
+
+The `options` table can be passed in to set color, background color, in-place etc.
+
+Options:
+* `color` - [table] The text color. A table of 3 numbers `{R, G, B}`, each number scaled between 0 and 255. For example, `red` is `{255, 0, 0}`
+* bg - [table] The background color where text is drawn. Same format as color.
+* size - [number] Size of the text to be drawn. `Default value = 1`.
+* wrap - [boolean] If the text goes out of bounds, wrap it with a newline automatically. `default value = true`
+* inplace - [boolean] If true, draws directly on the input tensor and returns it. `default value = false`
+
+Example:
+
+```lua
+image.drawText(image.lena(), "hello\nworld", 10, 10)
+image.drawText(image.lena(), "hello\nworld", 10, 20,{color = {0, 255, 0}, size = 5})
+image.drawText(image.lena(), "hello\nworld", 10, 20,{color = {0, 255, 0}, bg = {255, 0, 0}, size = 5})
+```
+
+<a name="image.drawRect"></a>
+### [res] image.drawRect(src, x1, y1, x2, y2, [options]) ###
+Draws a rectangle onto a 3-channel Tensor (C x H x W).  The top-left corner of
+the rectangle is `x1, y1`, and the bottom-right corner is `x2, y2`.
+
+The `options` table can be passed in to set color, in-place etc.
+
+Options:
+* `color` - [table] The rectangle color. A table of 3 numbers `{R, G, B}`, each
+    number scaled between 0 and 255. For example, `red` is `{255, 0, 0}`
+* `lineWidth` - [number] The width of the rectangle line, in pixels
+* `inplace` - [boolean] If true, draws directly on the input tensor and returns
+    it. `default value = false`
+
+Example:
+
+```lua
+image.drawRect(image.lena(), 200, 200, 370, 400, {lineWidth = 5, color = {0, 255, 0}})
+```
diff --git a/doc/gui.md b/doc/gui.md
new file mode 100644
index 0000000..3213af3
--- /dev/null
+++ b/doc/gui.md
@@ -0,0 +1,53 @@
+<a name="image.grapicalinter"></a>
+## Graphical User Interfaces ##
+The following functions, except for [image.toDisplayTensor](#image.toDisplayTensor), 
+require package [qtlua](https://github.com/torch/qtlua) and can only be 
+accessed via the `qlua` Lua interpreter (as opposed to the 
+[th](https://github.com/torch/trepl) or luajit interpreter).
+
+<a name="image.toDisplayTensor"></a>
+### [res] image.toDisplayTensor(input, [...]) ###
+Optional arguments `[...]` expand to `padding`, `nrow`, `scaleeach`, `min`, `max`, `symmetric`, `saturate`.
+Returns a single `res` Tensor that contains a grid of all in the images in `input`.
+The latter can either be a table of image Tensors of size `height x width` (greyscale) or 
+`nChannel x height x width` (color), 
+or a single Tensor of size `batchSize x nChannel x height x width` or `nChannel x height x width` 
+where `nChannel=[3,1]`, `batchSize x height x width` or `height x width`.
+
+When `scaleeach=false` (the default), all detected images 
+are compressed with successive calls to [image.minmax](simpletransform.md#image.minmax):
+```lua
+image.minmax{tensor=input[i], min=min, max=max, symm=symmetric, saturate=saturate}
+```
+`padding` specifies the number of padding pixels between images. The default is 0.
+`nrow` specifies the number of images per row. The default is 6.
+
+Note that arguments can also be specified as key-value arguments (in a table).
+
+<a name="image.display"></a>
+### [res] image.display(input, [...]) ###
+Optional arguments `[...]` expand to `zoom`, `min`, `max`, `legend`, `win`, 
+`x`, `y`, `scaleeach`, `gui`, `offscreen`, `padding`, `symm`, `nrow`.
+Displays `input` image(s) with optional saturation and zooming. 
+The `input`, which is either a Tensor of size `HxW`, `KxHxW` or `Kx3xHxW`, or list,
+is first prepared for display by passing it through [image.toDisplayTensor](#image.toDisplayTensor):
+```lua
+input = image.toDisplayTensor{
+   input=input, padding=padding, nrow=nrow, saturate=saturate, 
+   scaleeach=scaleeach, min=min, max=max, symmetric=symm
+}
+```
+The resulting `input` will be displayed using [qtlua](https://github.com/torch/qtlua).
+The displayed image will be zoomed by a factor of `zoom`. The default is 1.
+If `gui=true` (the default), the graphical user inteface (GUI) 
+is an interactive window that provides the user with the ability to zoom in or out. 
+This can be turned off for a faster display. `legend` is a legend to be displayed,
+which has a default value of `image.display`. `win` is an optional qt window descriptor.
+If `x` and `y` are given, they are used to offset the image. Both default to 0.
+When `offscreen=true`, rendering (to generate images) is performed offscreen.
+
+<a name="image.window"></a>
+### [window, painter] image.window([...]) ###
+Creates a window context for images. 
+Optional arguments `[...]` expand to `hook_resize`, `hook_mousepress`, `hook_mousedoublepress`.
+These have a default value of `nil`, but may correspond to commensurate qt objects.
diff --git a/doc/index.md b/doc/index.md
new file mode 100644
index 0000000..5e50c39
--- /dev/null
+++ b/doc/index.md
@@ -0,0 +1,35 @@
+# image Package Reference Manual #
+
+__image__ is the [Torch7 distribution](http://torch.ch/) package for processing 
+images. It contains a wide variety of functions divided into the following categories:
+
+  * [Saving and loading](saveload.md) images as JPEG, PNG, PPM and PGM;
+  * [Simple transformations](simpletransform.md) like translation, scaling and rotation;
+  * [Parameterized transformations](paramtransform.md) like convolutions and warping;
+  * [Simple Drawing Routines](doc/drawing.md) like drawing text or a rectangle on an image;
+  * [Graphical user interfaces](gui.md) like display and window;
+  * [Color Space Conversions](colorspace.md) from and to RGB, YUV, Lab, and HSL;
+  * [Tensor Constructors](tensorconstruct.md) for creating Lenna, Fabio and Gaussian and Laplacian kernels;
+
+Note that unless speficied otherwise, this package deals with images of size 
+`nChannel x height x width`.
+
+## Install
+
+The easiest way to install this package it by following the [intructions](http://torch.ch/docs/getting-started.html) 
+to install [Torch7](www.torch.ch), which includes __image__. 
+Otherwise, to update or manually re-install it:
+
+```bash
+$ luarocks install image
+```
+
+## Usage
+
+```lua
+> require 'image'
+> l = image.lena()
+> image.display(l)
+> f = image.fabio()
+> image.display(f)
+```
diff --git a/doc/paramtransform.md b/doc/paramtransform.md
new file mode 100644
index 0000000..839c754
--- /dev/null
+++ b/doc/paramtransform.md
@@ -0,0 +1,73 @@
+<a name="image.paramtrans"></a>
+## Parameterized transformations ##
+This section includes functions for performing transformations on 
+images requiring parameter Tensors like a warp `field` or a convolution
+`kernel`.
+
+<a name="image.warp"></a>
+### [res] image.warp([dst,]src,field,[mode,offset,clamp_mode,pad_val]) ###
+Warps image `src` (of size`KxHxW`) 
+according to flow field `field`. The latter has size `2xHxW` where the 
+first dimension is for the `(y,x)` flow field. String `mode` can 
+take on values [lanczos](https://en.wikipedia.org/wiki/Lanczos_resampling), 
+[bicubic](https://en.wikipedia.org/wiki/Bicubic_interpolation),
+[bilinear](https://en.wikipedia.org/wiki/Bilinear_interpolation) (the default), 
+or *simple*. When `offset` is true (the default), `(x,y)` is added to the flow field.
+The `clamp_mode` variable specifies how to handle the interpolation of samples off the input image.
+Permitted values are strings *clamp* (the default) or *pad*.
+When `clamp_mode` equals `pad`, the user can specify the padding value with `pad_val` (default = 0). Note: setting this value when `clamp_mode` equals `clamp` will result in an error.
+If `dst` is specified, it is used to store the result of the warp.
+Otherwise, returns a new `res` Tensor.
+
+<a name="image.convolve"></a>
+### [res] image.convolve([dst,] src, kernel, [mode]) ###
+Convolves Tensor `kernel` over image `src`. Valid string values for argument 
+`mode` are :
+ * *full* : the `src` image is effectively zero-padded such that the `res` of the convolution has the same size as `src`;
+ * *valid* (the default) : the `res` image will have `math.ceil(kernel/2)` less columns and rows on each side;
+ * *same* : performs a *full* convolution, but crops out the portion fitting the output size of *valid*;
+Note that this function internally uses 
+[torch.conv2](https://github.com/torch/torch7/blob/master/doc/maths.md#torch.conv.dok).
+If `dst` is provided, it is used to store the output image. 
+Otherwise, returns a new `res` Tensor.
+
+<a name="image.lcn"></a>
+### [res] image.lcn(src, [kernel]) ###
+Local contrast normalization (LCN) on a given `src` image using kernel `kernel`.
+If `kernel` is not given, then a default `9x9` Gaussian is used 
+(see [image.gaussian](tensorconstruct.md#image.gaussian)).
+
+To prevent border effects, the image is first global contrast normalized
+(GCN) by substracting the global mean and dividing by the global 
+standard deviation.
+
+Then the image is locally contrast normalized using the following equation:
+```lua
+res = (src - lm(src)) / sqrt( lm(src) - lm(src*src) )
+```
+where `lm(x)` is the local mean of each pixel in the image (i.e. 
+`image.convolve(x,kernel)`) and  `sqrt(x)` is the element-wise 
+square root of `x`. In other words, LCN performs 
+local substractive and divisive normalization. 
+
+Note that this implementation is different than the LCN Layer defined on page 3 of 
+[What is the Best Multi-Stage Architecture for Object Recognition?](http://yann.lecun.com/exdb/publis/pdf/jarrett-iccv-09.pdf).
+
+<a name="image.erode"></a>
+### [res] image.erode(src, [kernel, pad]) ###
+Performs a [morphological erosion](https://en.wikipedia.org/wiki/Erosion_(morphology)) 
+on binary (zeros and ones) image `src` using odd 
+dimensioned morphological binary kernel `kernel`. 
+The default is a kernel consisting of ones of size `3x3`. Number 
+`pad` is the value to assume outside the image boundary when performing 
+the convolution. The default is 1.
+
+<a name="image.dilate"></a>
+### [res] image.dilate(src, [kernel, pad]) ###
+Performs a [morphological dilation](https://en.wikipedia.org/wiki/Dilation_(morphology)) 
+on binary (zeros and ones) image `src` using odd 
+dimensioned morphological binary kernel `kernel`. 
+The default is a kernel consisting of ones of size `3x3`. Number 
+`pad` is the value to assume outside the image boundary when performing 
+the convolution. The default is 0.
+
diff --git a/doc/saveload.md b/doc/saveload.md
new file mode 100644
index 0000000..d90fc20
--- /dev/null
+++ b/doc/saveload.md
@@ -0,0 +1,63 @@
+<a name="image.saveload"></a>
+## Saving and Loading ##
+This sections includes functions for saving and loading different types 
+of images to and from disk.
+
+<a name="image.load"></a>
+### [res] image.load(filename, [depth, tensortype]) ###
+Loads an image located at path `filename` having `depth` channels (1 or 3)
+into a [Tensor](https://github.com/torch/torch7/blob/master/doc/tensor.md#tensor)
+of type `tensortype` (*float*, *double* or *byte*). The last two arguments 
+are optional.
+
+The image format is determined from the `filename`'s 
+extension suffix. Supported formats are 
+[JPEG](https://en.wikipedia.org/wiki/JPEG), 
+[PNG](https://en.wikipedia.org/wiki/Portable_Network_Graphics), 
+[PPM and PGM](https://en.wikipedia.org/wiki/Netpbm_format).
+ 
+The returned `res` Tensor has size `nChannel x height x width` where `nChannel` is 
+1 (greyscale) or 3 (usually [RGB](https://en.wikipedia.org/wiki/RGB_color_model) 
+or [YUV](https://en.wikipedia.org/wiki/YUV).
+
+Usage:
+```lua
+--To load as byte tensor for rgb imagefile
+local img = image.load(imagefile,3,'byte')
+
+--To load as byte tensor for gray imagefile
+local img = image.load(imagefile,1,'byte')
+
+```
+
+<a name="image.save"></a>
+### image.save(filename, tensor) ###
+Saves Tensor `tensor` to disk at path `filename`. The format to which 
+the image is saved is extrapolated from the `filename`'s extension suffix.
+The `tensor` should be of size `nChannel x height x width`.
+To save with a minimal loss, the tensor values should lie in the range [0, 1] since the tensor is clamped between 0 and 1 before being saved to the disk.
+
+<a name="image.decompressJPG"></a>
+### [res] image.decompressJPG(tensor, [depth, tensortype]) ###
+Decompresses an image from a ByteTensor in memory having `depth` channels (1 or 3)
+into a [Tensor](https://github.com/torch/torch7/blob/master/doc/tensor.md#tensor)
+of type `tensortype` (*float*, *double* or *byte*). The last two arguments
+are optional.
+
+Usage:
+```lua
+local fin = torch.DiskFile(imfile, 'r')
+fin:binary()
+fin:seekEnd()
+local file_size_bytes = fin:position() - 1
+fin:seek(1)
+local img_binary = torch.ByteTensor(file_size_bytes)
+fin:readByte(img_binary:storage())
+fin:close()
+-- Then when you're ready to decompress the ByteTensor:
+im = image.decompressJPG(img_binary)
+```
+
+<a name="image.compressJPG"></a>
+### [res] image.compressJPG(tensor, [quality]) ###
+Compresses an image to a ByteTensor in memory.  Optional quality is between 1 and 100 and adjusts compression quality.
diff --git a/doc/simpletransform.md b/doc/simpletransform.md
new file mode 100644
index 0000000..52b5341
--- /dev/null
+++ b/doc/simpletransform.md
@@ -0,0 +1,130 @@
+<a name="image.simpletrans"></a>
+## Simple Transformations ##
+This section includes simple but very common image transformations 
+like cropping, translation, scaling and rotation. 
+
+<a name="image.crop"></a>
+### [res] image.crop([dst,] src, x1, y1, [x2, y2]) ###
+Crops image `src` at coordinate `(x1, y1)` up to coordinate 
+`(x2, y2)`. The coordinate indexing is zero-based and `(x2, y2)` is non-inclusive.
+If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor. 
+
+```lua
+-- The indexing starts with 0 and 2 is non-inclusive coordinate.
+> require('image')
+> image.crop(torch.Tensor(3, 2, 2), 0, 0 , 2, 2) -- crop is a correct crop and the result is 3x2x2 tensor. 
+(1,.,.) = 
+  0  0
+  0  0
+
+(2,.,.) = 
+  0  0
+  0  0
+
+(3,.,.) = 
+  0  0
+  0  0
+[torch.DoubleTensor of size 3x2x2]
+```
+
+### [res] image.crop([dst,] src, format, width, height) ###
+Crops a `width x height` section of source image `src`. The argument
+`format` is a string specifying where to crop: it can be "c", "tl", "tr",
+"bl" or "br" for center, top left, top right, bottom left and bottom right,
+respectively.  If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.translate"></a>
+### [res] image.translate([dst,] src, x, y) ###
+Translates image `src` by `x` pixels horizontally and `y` pixels 
+vertically. If `dst` is provided, it is used to store the output
+image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.scale"></a>
+### [res] image.scale(src, width, height, [mode]) ###
+Rescale the height and width of image `src` to have 
+width `width` and height `height`.  Variable `mode` specifies 
+type of interpolation to be used. Valid values include 
+[bilinear](https://en.wikipedia.org/wiki/Bilinear_interpolation)
+(the default), [bicubic](https://en.wikipedia.org/wiki/Bicubic_interpolation),
+or *simple* interpolation. Returns a new `res` Tensor.
+
+### [res] image.scale(src, size, [mode]) ###
+Rescale the height and width of image `src`.  Variable `size` is a number
+or a string specifying the size of the result image. When `size` is a
+number, it specifies the maximum height or width of the output. When it is
+a string like `WxH` or `MAX` or `^MIN`, `*SC` or `*SCn/SCd` it specifies
+the `height x width`, maximum height or width of the output, minimum height
+or width of the output, scaling factor (number), or fractional scaling
+factor (int/int), respectively.
+
+### [res] image.scale(dst, src, [mode]) ###
+Rescale the height and width of image `src` to fit the dimensions of 
+Tensor `dst`. 
+
+<a name="image.rotate"></a>
+### [res] image.rotate([dst,], src, theta, [mode]) ###
+Rotates image `src` by `theta` radians. 
+If `dst` is specified it is used to store the results of the rotation.
+Variable `mode` specifies type of interpolation to be used. Valid values include 
+*simple* (the default) or *bilinear* interpolation.
+
+<a name="image.polar"></a>
+### [res] image.polar([dst,], src, [interpolation], [mode]) ###
+Converts image `src` to polar coordinates. In the polar image, angular information is in the vertical direction and radius information in the horizontal direction.
+If `dst` is specified it is used to store the polar image. If `dst` is not specified, its size is automatically determined. Variable `interpolation` specifies type of interpolation to be used. Valid values include *simple* (the default) or *bilinear* interpolation. Variable `mode` determines whether the *full* image is converted to the polar space (implying empty regions in the polar image), or whether only the *valid* central part of the polar transform is returned (the default).
+
+<a name="image.logpolar"></a>
+### [res] image.logpolar([dst,], src, [interpolation], [mode]) ###
+Converts image `src` to log-polar coordinates. In the log-polar image, angular information is in the vertical direction and log-radius information in the horizontal direction.
+If `dst` is specified it is used to store the polar image. If `dst` is not specified, its size is automatically determined. Variable `interpolation` specifies type of interpolation to be used. Valid values include *simple* (the default) or *bilinear* interpolation. Variable `mode` determines whether the *full* image is converted to the log-polar space (implying empty regions in the log-polar image), or whether only the *valid* central part of the log-polar transform is returned (the default). 
+
+<a name="image.hflip"></a>
+### [res] image.hflip([dst,] src) ###
+Flips image `src` horizontally (left<->right). If `dst` is provided, it is used to
+store the output image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.vflip"></a>
+### [res] image.vflip([dst,], src) ###
+Flips image `src` vertically (upsize<->down). If `dst` is provided, it is used to
+store the output image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.flip"></a>
+### [res] image.flip([dst,] src, flip_dim) ###
+Flips image `src` along the specified dimension. If `dst` is provided, it is used to
+store the output image. Otherwise, returns a new `res` Tensor.
+
+<a name="image.minmax"></a>
+### [res] image.minmax{tensor, [min, max, ...]} ###
+Compresses image `tensor` between `min` and `max`. 
+When omitted, `min` and `max` are infered from 
+`tensor:min()` and `tensor:max()`, respectively.
+The `tensor` is normalized using `min` and `max` by performing :
+```lua
+tensor:add(-min):div(max-min)
+```
+Other optional arguments (`...`) include `symm`, `inplace`, `saturate`, and `tensorOut`.
+When `symm=true` and `min` and `max` are both omitted, 
+`max = min*2` in the above equation. This results in a symmetric dynamic 
+range that is particularly useful for drawing filters. The default is `false`.
+When `inplace=true`, the result of the compression is stored in `tensor`. 
+The default is `false`.
+When `saturate=true`, the result of the compression is passed through
+a function that clips the values between 0 and 1 
+(i.e. anything below 0 is set to 0, anything above 1 is set to 1).
+When provided, Tensor `tensorOut` is used to store results. 
+Note that arguments should be provided as key-value pairs (in a table).
+
+<a name="image.gaussianpyramid"></a>
+### [res] image.gaussianpyramid([dst,] src, scales) ###
+Constructs a [Gaussian pyramid](https://en.wikipedia.org/wiki/Gaussian_pyramid)
+of scales `scales` from a 2D or 3D `src` image or size 
+`[nChannel x] width x height`. Each Tensor at index `i` 
+in the returned list of Tensors has size  `[nChannel x] width*scales[i] x height*scales[i]`.
+
+If list `dst` is provided, with or without Tensors, it is used to store the output images. 
+Otherwise, returns a new `res` list of Tensors.
+
+Internally, this function makes use of functions [image.gaussian](tensorconstruct.md#image.gaussian),
+[image.scale](#image.scale) and [image.convolve](paramtransform.md#image.convolve).
diff --git a/doc/tensorconstruct.md b/doc/tensorconstruct.md
new file mode 100644
index 0000000..18b909e
--- /dev/null
+++ b/doc/tensorconstruct.md
@@ -0,0 +1,91 @@
+<a name="image.tensorconst"></a>
+## Tensor Constructors ##
+The following functions construct Tensors like Gaussian or 
+Laplacian kernels, or images like Lenna and Fabio.
+
+<a name="image.lena"></a>
+### [res] image.lena() ###
+Returns the classic `Lenna.jpg` image as a `3 x 512 x 512` Tensor.
+
+<a name="image.fabio"></a>
+### [res] image.fabio() ###
+Returns the `fabio.jpg` image as a `257 x 271` Tensor.
+
+<a name="image.gaussian"></a>
+### [res] image.gaussian([size, sigma, amplitude, normalize, [...]]) ###
+Returns a 2D [Gaussian](https://en.wikipedia.org/wiki/Gaussian_function) 
+kernel of size `height x width`. When used as a Gaussian smoothing operator in a 2D 
+convolution, this kernel is used to `blur` images and remove detail and noise 
+(ref.: [Gaussian Smoothing](http://homepages.inf.ed.ac.uk/rbf/HIPR2/gsmooth.htm)).
+Optional arguments `[...]` expand to 
+`width`, `height`, `sigma_horz`, `sigma_vert`, `mean_horz`, `mean_vert` and `tensor`.
+
+The default value of `height` and `width` is `size`, where the latter 
+has a default value of 3. The amplitude of the Gaussian (its maximum value) 
+is `amplitude`. The default is 1. 
+When `normalize=true`, the kernel is normalized to have a sum of 1.
+This overrides the `amplitude` argument. The default is `false`.
+The default value of the horizontal and vertical standard deviation 
+`sigma_horz` and `sigma_vert` of the Gaussian kernel is `sigma`, where 
+the latter has a default value of 0.25. The default values for the 
+corresponding means `mean_horz` and `mean_vert` are 0.5. Both the 
+standard deviations and means are relative to kernels of unit width and height
+where the top-left corner is the origin. In other works, a mean of 0.5 is 
+the center of the kernel size, while a standard deviation of 0.25 is a quarter
+of it. When `tensor` is provided (a 2D Tensor), the `height`, `width` and `size` are ignored.
+It is used to store the returned gaussian kernel.
+
+Note that arguments can also be specified as key-value arguments (in a table).
+
+<a name="image.gaussian1D"></a>
+### [res] image.gaussian1D([size, sigma, amplitude, normalize, mean, tensor]) ###
+Returns a 1D Gaussian kernel of size `size`, mean `mean` and standard 
+deviation `sigma`. 
+Respectively, these arguments have default values of 3, 0.25 and 0.5. 
+The amplitude of the Gaussian (its maximum value) 
+is `amplitude`. The default is 1. 
+When `normalize=true`, the kernel is normalized to have a sum of 1.
+This overrides the `amplitude` argument. The default is `false`. Both the 
+standard deviation and mean are relative to a kernel of unit size. 
+In other works, a mean of 0.5 is the center of the kernel size, 
+while a standard deviation of 0.25 is a quarter of it. 
+When `tensor` is provided (a 1D Tensor), the `size` is ignored.
+It is used to store the returned gaussian kernel.
+
+Note that arguments can also be specified as key-value arguments (in a table).
+
+<a name="image.laplacian"></a>
+### [res] image.laplacian([size, sigma, amplitude, normalize, [...]]) ###
+Returns a 2D [Laplacian](https://en.wikipedia.org/wiki/Blob_detection#The_Laplacian_of_Gaussian) 
+kernel of size `height x width`. 
+When used in a 2D convolution, the Laplacian of an image highlights 
+regions of rapid intensity change and is therefore often used for edge detection 
+(ref.: [Laplacian/Laplacian of Gaussian](http://homepages.inf.ed.ac.uk/rbf/HIPR2/log.htm)).
+Optional arguments `[...]` expand to 
+`width`, `height`, `sigma_horz`, `sigma_vert`, `mean_horz`, `mean_vert`.
+
+The default value of `height` and `width` is `size`, where the latter 
+has a default value of 3. The amplitude of the Laplacian (its maximum value) 
+is `amplitude`. The default is 1. 
+When `normalize=true`, the kernel is normalized to have a sum of 1.
+This overrides the `amplitude` argument. The default is `false`.
+The default value of the horizontal and vertical standard deviation 
+`sigma_horz` and `sigma_vert` of the Laplacian kernel is `sigma`, where 
+the latter has a default value of 0.25. The default values for the 
+corresponding means `mean_horz` and `mean_vert` are 0.5. Both the 
+standard deviations and means are relative to kernels of unit width and height
+where the top-left corner is the origin. In other works, a mean of 0.5 is 
+the center of the kernel size, while a standard deviation of 0.25 is a quarter
+of it.
+
+<a name="image.colormap"></a>
+### [res] image.colormap(nColor) ###
+Creates an optimally-spaced RGB color mapping of `nColor` colors. 
+Note that the mapping is obtained by generating the colors around 
+the HSV wheel, varying the Hue component.
+The returned `res` Tensor has size `nColor x 3`. 
+
+<a name="image.jetColormap"></a>
+### [res] image.jetColormap(nColor) ###
+Creates a jet (blue to red) RGB color mapping of `nColor` colors.
+The returned `res` Tensor has size `nColor x 3`. 
diff --git a/font.c b/font.c
new file mode 100644
index 0000000..1fda9e1
--- /dev/null
+++ b/font.c
@@ -0,0 +1,287 @@
+/*
+Software License Agreement (BSD License)
+
+Copyright (c) 2012 Adafruit Industries.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+*/
+
+// Borrowed from https://github.com/adafruit/Adafruit-GFX-Library
+// Standard ASCII 5x7 font
+static const unsigned char image_ada_font[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00,
+	0x3E, 0x5B, 0x4F, 0x5B, 0x3E,
+	0x3E, 0x6B, 0x4F, 0x6B, 0x3E,
+	0x1C, 0x3E, 0x7C, 0x3E, 0x1C,
+	0x18, 0x3C, 0x7E, 0x3C, 0x18,
+	0x1C, 0x57, 0x7D, 0x57, 0x1C,
+	0x1C, 0x5E, 0x7F, 0x5E, 0x1C,
+	0x00, 0x18, 0x3C, 0x18, 0x00,
+	0xFF, 0xE7, 0xC3, 0xE7, 0xFF,
+	0x00, 0x18, 0x24, 0x18, 0x00,
+	0xFF, 0xE7, 0xDB, 0xE7, 0xFF,
+	0x30, 0x48, 0x3A, 0x06, 0x0E,
+	0x26, 0x29, 0x79, 0x29, 0x26,
+	0x40, 0x7F, 0x05, 0x05, 0x07,
+	0x40, 0x7F, 0x05, 0x25, 0x3F,
+	0x5A, 0x3C, 0xE7, 0x3C, 0x5A,
+	0x7F, 0x3E, 0x1C, 0x1C, 0x08,
+	0x08, 0x1C, 0x1C, 0x3E, 0x7F,
+	0x14, 0x22, 0x7F, 0x22, 0x14,
+	0x5F, 0x5F, 0x00, 0x5F, 0x5F,
+	0x06, 0x09, 0x7F, 0x01, 0x7F,
+	0x00, 0x66, 0x89, 0x95, 0x6A,
+	0x60, 0x60, 0x60, 0x60, 0x60,
+	0x94, 0xA2, 0xFF, 0xA2, 0x94,
+	0x08, 0x04, 0x7E, 0x04, 0x08,
+	0x10, 0x20, 0x7E, 0x20, 0x10,
+	0x08, 0x08, 0x2A, 0x1C, 0x08,
+	0x08, 0x1C, 0x2A, 0x08, 0x08,
+	0x1E, 0x10, 0x10, 0x10, 0x10,
+	0x0C, 0x1E, 0x0C, 0x1E, 0x0C,
+	0x30, 0x38, 0x3E, 0x38, 0x30,
+	0x06, 0x0E, 0x3E, 0x0E, 0x06,
+	0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x5F, 0x00, 0x00,
+	0x00, 0x07, 0x00, 0x07, 0x00,
+	0x14, 0x7F, 0x14, 0x7F, 0x14,
+	0x24, 0x2A, 0x7F, 0x2A, 0x12,
+	0x23, 0x13, 0x08, 0x64, 0x62,
+	0x36, 0x49, 0x56, 0x20, 0x50,
+	0x00, 0x08, 0x07, 0x03, 0x00,
+	0x00, 0x1C, 0x22, 0x41, 0x00,
+	0x00, 0x41, 0x22, 0x1C, 0x00,
+	0x2A, 0x1C, 0x7F, 0x1C, 0x2A,
+	0x08, 0x08, 0x3E, 0x08, 0x08,
+	0x00, 0x80, 0x70, 0x30, 0x00,
+	0x08, 0x08, 0x08, 0x08, 0x08,
+	0x00, 0x00, 0x60, 0x60, 0x00,
+	0x20, 0x10, 0x08, 0x04, 0x02,
+	0x3E, 0x51, 0x49, 0x45, 0x3E,
+	0x00, 0x42, 0x7F, 0x40, 0x00,
+	0x72, 0x49, 0x49, 0x49, 0x46,
+	0x21, 0x41, 0x49, 0x4D, 0x33,
+	0x18, 0x14, 0x12, 0x7F, 0x10,
+	0x27, 0x45, 0x45, 0x45, 0x39,
+	0x3C, 0x4A, 0x49, 0x49, 0x31,
+	0x41, 0x21, 0x11, 0x09, 0x07,
+	0x36, 0x49, 0x49, 0x49, 0x36,
+	0x46, 0x49, 0x49, 0x29, 0x1E,
+	0x00, 0x00, 0x14, 0x00, 0x00,
+	0x00, 0x40, 0x34, 0x00, 0x00,
+	0x00, 0x08, 0x14, 0x22, 0x41,
+	0x14, 0x14, 0x14, 0x14, 0x14,
+	0x00, 0x41, 0x22, 0x14, 0x08,
+	0x02, 0x01, 0x59, 0x09, 0x06,
+	0x3E, 0x41, 0x5D, 0x59, 0x4E,
+	0x7C, 0x12, 0x11, 0x12, 0x7C,
+	0x7F, 0x49, 0x49, 0x49, 0x36,
+	0x3E, 0x41, 0x41, 0x41, 0x22,
+	0x7F, 0x41, 0x41, 0x41, 0x3E,
+	0x7F, 0x49, 0x49, 0x49, 0x41,
+	0x7F, 0x09, 0x09, 0x09, 0x01,
+	0x3E, 0x41, 0x41, 0x51, 0x73,
+	0x7F, 0x08, 0x08, 0x08, 0x7F,
+	0x00, 0x41, 0x7F, 0x41, 0x00,
+	0x20, 0x40, 0x41, 0x3F, 0x01,
+	0x7F, 0x08, 0x14, 0x22, 0x41,
+	0x7F, 0x40, 0x40, 0x40, 0x40,
+	0x7F, 0x02, 0x1C, 0x02, 0x7F,
+	0x7F, 0x04, 0x08, 0x10, 0x7F,
+	0x3E, 0x41, 0x41, 0x41, 0x3E,
+	0x7F, 0x09, 0x09, 0x09, 0x06,
+	0x3E, 0x41, 0x51, 0x21, 0x5E,
+	0x7F, 0x09, 0x19, 0x29, 0x46,
+	0x26, 0x49, 0x49, 0x49, 0x32,
+	0x03, 0x01, 0x7F, 0x01, 0x03,
+	0x3F, 0x40, 0x40, 0x40, 0x3F,
+	0x1F, 0x20, 0x40, 0x20, 0x1F,
+	0x3F, 0x40, 0x38, 0x40, 0x3F,
+	0x63, 0x14, 0x08, 0x14, 0x63,
+	0x03, 0x04, 0x78, 0x04, 0x03,
+	0x61, 0x59, 0x49, 0x4D, 0x43,
+	0x00, 0x7F, 0x41, 0x41, 0x41,
+	0x02, 0x04, 0x08, 0x10, 0x20,
+	0x00, 0x41, 0x41, 0x41, 0x7F,
+	0x04, 0x02, 0x01, 0x02, 0x04,
+	0x40, 0x40, 0x40, 0x40, 0x40,
+	0x00, 0x03, 0x07, 0x08, 0x00,
+	0x20, 0x54, 0x54, 0x78, 0x40,
+	0x7F, 0x28, 0x44, 0x44, 0x38,
+	0x38, 0x44, 0x44, 0x44, 0x28,
+	0x38, 0x44, 0x44, 0x28, 0x7F,
+	0x38, 0x54, 0x54, 0x54, 0x18,
+	0x00, 0x08, 0x7E, 0x09, 0x02,
+	0x18, 0xA4, 0xA4, 0x9C, 0x78,
+	0x7F, 0x08, 0x04, 0x04, 0x78,
+	0x00, 0x44, 0x7D, 0x40, 0x00,
+	0x20, 0x40, 0x40, 0x3D, 0x00,
+	0x7F, 0x10, 0x28, 0x44, 0x00,
+	0x00, 0x41, 0x7F, 0x40, 0x00,
+	0x7C, 0x04, 0x78, 0x04, 0x78,
+	0x7C, 0x08, 0x04, 0x04, 0x78,
+	0x38, 0x44, 0x44, 0x44, 0x38,
+	0xFC, 0x18, 0x24, 0x24, 0x18,
+	0x18, 0x24, 0x24, 0x18, 0xFC,
+	0x7C, 0x08, 0x04, 0x04, 0x08,
+	0x48, 0x54, 0x54, 0x54, 0x24,
+	0x04, 0x04, 0x3F, 0x44, 0x24,
+	0x3C, 0x40, 0x40, 0x20, 0x7C,
+	0x1C, 0x20, 0x40, 0x20, 0x1C,
+	0x3C, 0x40, 0x30, 0x40, 0x3C,
+	0x44, 0x28, 0x10, 0x28, 0x44,
+	0x4C, 0x90, 0x90, 0x90, 0x7C,
+	0x44, 0x64, 0x54, 0x4C, 0x44,
+	0x00, 0x08, 0x36, 0x41, 0x00,
+	0x00, 0x00, 0x77, 0x00, 0x00,
+	0x00, 0x41, 0x36, 0x08, 0x00,
+	0x02, 0x01, 0x02, 0x04, 0x02,
+	0x3C, 0x26, 0x23, 0x26, 0x3C,
+	0x1E, 0xA1, 0xA1, 0x61, 0x12,
+	0x3A, 0x40, 0x40, 0x20, 0x7A,
+	0x38, 0x54, 0x54, 0x55, 0x59,
+	0x21, 0x55, 0x55, 0x79, 0x41,
+	0x22, 0x54, 0x54, 0x78, 0x42, // a-umlaut
+	0x21, 0x55, 0x54, 0x78, 0x40,
+	0x20, 0x54, 0x55, 0x79, 0x40,
+	0x0C, 0x1E, 0x52, 0x72, 0x12,
+	0x39, 0x55, 0x55, 0x55, 0x59,
+	0x39, 0x54, 0x54, 0x54, 0x59,
+	0x39, 0x55, 0x54, 0x54, 0x58,
+	0x00, 0x00, 0x45, 0x7C, 0x41,
+	0x00, 0x02, 0x45, 0x7D, 0x42,
+	0x00, 0x01, 0x45, 0x7C, 0x40,
+	0x7D, 0x12, 0x11, 0x12, 0x7D, // A-umlaut
+	0xF0, 0x28, 0x25, 0x28, 0xF0,
+	0x7C, 0x54, 0x55, 0x45, 0x00,
+	0x20, 0x54, 0x54, 0x7C, 0x54,
+	0x7C, 0x0A, 0x09, 0x7F, 0x49,
+	0x32, 0x49, 0x49, 0x49, 0x32,
+	0x3A, 0x44, 0x44, 0x44, 0x3A, // o-umlaut
+	0x32, 0x4A, 0x48, 0x48, 0x30,
+	0x3A, 0x41, 0x41, 0x21, 0x7A,
+	0x3A, 0x42, 0x40, 0x20, 0x78,
+	0x00, 0x9D, 0xA0, 0xA0, 0x7D,
+	0x3D, 0x42, 0x42, 0x42, 0x3D, // O-umlaut
+	0x3D, 0x40, 0x40, 0x40, 0x3D,
+	0x3C, 0x24, 0xFF, 0x24, 0x24,
+	0x48, 0x7E, 0x49, 0x43, 0x66,
+	0x2B, 0x2F, 0xFC, 0x2F, 0x2B,
+	0xFF, 0x09, 0x29, 0xF6, 0x20,
+	0xC0, 0x88, 0x7E, 0x09, 0x03,
+	0x20, 0x54, 0x54, 0x79, 0x41,
+	0x00, 0x00, 0x44, 0x7D, 0x41,
+	0x30, 0x48, 0x48, 0x4A, 0x32,
+	0x38, 0x40, 0x40, 0x22, 0x7A,
+	0x00, 0x7A, 0x0A, 0x0A, 0x72,
+	0x7D, 0x0D, 0x19, 0x31, 0x7D,
+	0x26, 0x29, 0x29, 0x2F, 0x28,
+	0x26, 0x29, 0x29, 0x29, 0x26,
+	0x30, 0x48, 0x4D, 0x40, 0x20,
+	0x38, 0x08, 0x08, 0x08, 0x08,
+	0x08, 0x08, 0x08, 0x08, 0x38,
+	0x2F, 0x10, 0xC8, 0xAC, 0xBA,
+	0x2F, 0x10, 0x28, 0x34, 0xFA,
+	0x00, 0x00, 0x7B, 0x00, 0x00,
+	0x08, 0x14, 0x2A, 0x14, 0x22,
+	0x22, 0x14, 0x2A, 0x14, 0x08,
+	0x55, 0x00, 0x55, 0x00, 0x55, // #176 (25% block) missing in old code
+	0xAA, 0x55, 0xAA, 0x55, 0xAA, // 50% block
+	0xFF, 0x55, 0xFF, 0x55, 0xFF, // 75% block
+	0x00, 0x00, 0x00, 0xFF, 0x00,
+	0x10, 0x10, 0x10, 0xFF, 0x00,
+	0x14, 0x14, 0x14, 0xFF, 0x00,
+	0x10, 0x10, 0xFF, 0x00, 0xFF,
+	0x10, 0x10, 0xF0, 0x10, 0xF0,
+	0x14, 0x14, 0x14, 0xFC, 0x00,
+	0x14, 0x14, 0xF7, 0x00, 0xFF,
+	0x00, 0x00, 0xFF, 0x00, 0xFF,
+	0x14, 0x14, 0xF4, 0x04, 0xFC,
+	0x14, 0x14, 0x17, 0x10, 0x1F,
+	0x10, 0x10, 0x1F, 0x10, 0x1F,
+	0x14, 0x14, 0x14, 0x1F, 0x00,
+	0x10, 0x10, 0x10, 0xF0, 0x00,
+	0x00, 0x00, 0x00, 0x1F, 0x10,
+	0x10, 0x10, 0x10, 0x1F, 0x10,
+	0x10, 0x10, 0x10, 0xF0, 0x10,
+	0x00, 0x00, 0x00, 0xFF, 0x10,
+	0x10, 0x10, 0x10, 0x10, 0x10,
+	0x10, 0x10, 0x10, 0xFF, 0x10,
+	0x00, 0x00, 0x00, 0xFF, 0x14,
+	0x00, 0x00, 0xFF, 0x00, 0xFF,
+	0x00, 0x00, 0x1F, 0x10, 0x17,
+	0x00, 0x00, 0xFC, 0x04, 0xF4,
+	0x14, 0x14, 0x17, 0x10, 0x17,
+	0x14, 0x14, 0xF4, 0x04, 0xF4,
+	0x00, 0x00, 0xFF, 0x00, 0xF7,
+	0x14, 0x14, 0x14, 0x14, 0x14,
+	0x14, 0x14, 0xF7, 0x00, 0xF7,
+	0x14, 0x14, 0x14, 0x17, 0x14,
+	0x10, 0x10, 0x1F, 0x10, 0x1F,
+	0x14, 0x14, 0x14, 0xF4, 0x14,
+	0x10, 0x10, 0xF0, 0x10, 0xF0,
+	0x00, 0x00, 0x1F, 0x10, 0x1F,
+	0x00, 0x00, 0x00, 0x1F, 0x14,
+	0x00, 0x00, 0x00, 0xFC, 0x14,
+	0x00, 0x00, 0xF0, 0x10, 0xF0,
+	0x10, 0x10, 0xFF, 0x10, 0xFF,
+	0x14, 0x14, 0x14, 0xFF, 0x14,
+	0x10, 0x10, 0x10, 0x1F, 0x00,
+	0x00, 0x00, 0x00, 0xF0, 0x10,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0xF0, 0xF0, 0xF0, 0xF0, 0xF0,
+	0xFF, 0xFF, 0xFF, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0xFF, 0xFF,
+	0x0F, 0x0F, 0x0F, 0x0F, 0x0F,
+	0x38, 0x44, 0x44, 0x38, 0x44,
+	0xFC, 0x4A, 0x4A, 0x4A, 0x34, // sharp-s or beta
+	0x7E, 0x02, 0x02, 0x06, 0x06,
+	0x02, 0x7E, 0x02, 0x7E, 0x02,
+	0x63, 0x55, 0x49, 0x41, 0x63,
+	0x38, 0x44, 0x44, 0x3C, 0x04,
+	0x40, 0x7E, 0x20, 0x1E, 0x20,
+	0x06, 0x02, 0x7E, 0x02, 0x02,
+	0x99, 0xA5, 0xE7, 0xA5, 0x99,
+	0x1C, 0x2A, 0x49, 0x2A, 0x1C,
+	0x4C, 0x72, 0x01, 0x72, 0x4C,
+	0x30, 0x4A, 0x4D, 0x4D, 0x30,
+	0x30, 0x48, 0x78, 0x48, 0x30,
+	0xBC, 0x62, 0x5A, 0x46, 0x3D,
+	0x3E, 0x49, 0x49, 0x49, 0x00,
+	0x7E, 0x01, 0x01, 0x01, 0x7E,
+	0x2A, 0x2A, 0x2A, 0x2A, 0x2A,
+	0x44, 0x44, 0x5F, 0x44, 0x44,
+	0x40, 0x51, 0x4A, 0x44, 0x40,
+	0x40, 0x44, 0x4A, 0x51, 0x40,
+	0x00, 0x00, 0xFF, 0x01, 0x03,
+	0xE0, 0x80, 0xFF, 0x00, 0x00,
+	0x08, 0x08, 0x6B, 0x6B, 0x08,
+	0x36, 0x12, 0x36, 0x24, 0x36,
+	0x06, 0x0F, 0x09, 0x0F, 0x06,
+	0x00, 0x00, 0x18, 0x18, 0x00,
+	0x00, 0x00, 0x10, 0x10, 0x00,
+	0x30, 0x40, 0xFF, 0x01, 0x01,
+	0x00, 0x1F, 0x01, 0x01, 0x1E,
+	0x00, 0x19, 0x1D, 0x17, 0x12,
+	0x00, 0x3C, 0x3C, 0x3C, 0x3C,
+	0x00, 0x00, 0x00, 0x00, 0x00  // #255 NBSP
+};
diff --git a/generic/image.c b/generic/image.c
new file mode 100755
index 0000000..f30fcad
--- /dev/null
+++ b/generic/image.c
@@ -0,0 +1,2296 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/image.c"
+#else
+
+#undef MAX
+#define MAX(a,b) ( ((a)>(b)) ? (a) : (b) )
+
+#undef MIN
+#define MIN(a,b) ( ((a)<(b)) ? (a) : (b) )
+
+#undef TAPI
+#define TAPI __declspec(dllimport)
+
+#ifndef M_PI
+#define M_PI    3.14159265358979323846
+#endif
+
+#undef temp_t
+#if defined(TH_REAL_IS_FLOAT) || defined(TH_REAL_IS_DOUBLE)
+#define temp_t real
+#else
+#define temp_t float
+#endif
+
+
+static inline real image_(FromIntermediate)(temp_t x) {
+#ifdef TH_REAL_IS_BYTE
+  x += 0.5;
+  if( x <= 0 ) return 0;
+  if( x >= 255 ) return 255;
+#endif
+  return x;
+}
+
+
+static void image_(Main_op_validate)( lua_State *L,  THTensor *Tsrc, THTensor *Tdst){
+
+  long src_depth = 1;
+  long dst_depth = 1;
+
+  luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "rotate: src not 2 or 3 dimensional");
+  luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "rotate: dst not 2 or 3 dimensional");
+
+  if(Tdst->nDimension == 3) dst_depth =  Tdst->size[0];
+  if(Tsrc->nDimension == 3) src_depth =  Tsrc->size[0];
+
+  if( (Tdst->nDimension==3 && ( src_depth!=dst_depth)) ||
+      (Tdst->nDimension!=Tsrc->nDimension) )
+    luaL_error(L, "image.scale: src and dst depths do not match");
+
+  if( Tdst->nDimension==3 && ( src_depth!=dst_depth) )
+    luaL_error(L, "image.scale: src and dst depths do not match");
+}
+
+static long image_(Main_op_stride)( THTensor *T,int i){
+  if (T->nDimension == 2) {
+    if (i == 0) return 0;
+    else return T->stride[i-1];
+  }
+  return T->stride[i];
+}
+
+static long image_(Main_op_depth)( THTensor *T){
+  if(T->nDimension == 3) return T->size[0]; /* rgb or rgba */
+  return 1; /* greyscale */
+}
+
+static void image_(Main_scaleLinear_rowcol)(THTensor *Tsrc,
+                                            THTensor *Tdst,
+                                            long src_start,
+                                            long dst_start,
+                                            long src_stride,
+                                            long dst_stride,
+                                            long src_len,
+                                            long dst_len ) {
+
+  real *src= THTensor_(data)(Tsrc);
+  real *dst= THTensor_(data)(Tdst);
+
+  if ( dst_len > src_len ){
+    long di;
+    float si_f;
+    long si_i;
+    float scale = (float)(src_len - 1) / (dst_len - 1);
+
+    if ( src_len == 1 ) {
+      for( di = 0; di < dst_len - 1; di++ ) {
+        long dst_pos = dst_start + di*dst_stride;
+        dst[dst_pos] = src[ src_start ];
+      }
+    } else {
+      for( di = 0; di < dst_len - 1; di++ ) {
+        long dst_pos = dst_start + di*dst_stride;
+        si_f = di * scale; si_i = (long)si_f; si_f -= si_i;
+
+        dst[dst_pos] = image_(FromIntermediate)(
+            (1 - si_f) * src[ src_start + si_i * src_stride ] +
+            si_f * src[ src_start + (si_i + 1) * src_stride ]);
+      }
+    }
+
+    dst[ dst_start + (dst_len - 1) * dst_stride ] =
+      src[ src_start + (src_len - 1) * src_stride ];
+  }
+  else if ( dst_len < src_len ) {
+    long di;
+    long si0_i = 0; float si0_f = 0;
+    long si1_i; float si1_f;
+    long si;
+    float scale = (float)src_len / dst_len;
+    float acc, n;
+
+    for( di = 0; di < dst_len; di++ )
+      {
+        si1_f = (di + 1) * scale; si1_i = (long)si1_f; si1_f -= si1_i;
+        acc = (1 - si0_f) * src[ src_start + si0_i * src_stride ];
+        n = 1 - si0_f;
+        for( si = si0_i + 1; si < si1_i; si++ )
+          {
+            acc += src[ src_start + si * src_stride ];
+            n += 1;
+          }
+        if( si1_i < src_len )
+          {
+            acc += si1_f * src[ src_start + si1_i*src_stride ];
+            n += si1_f;
+          }
+        dst[ dst_start + di*dst_stride ] = image_(FromIntermediate)(acc / n);
+        si0_i = si1_i; si0_f = si1_f;
+      }
+  }
+  else {
+    long i;
+    for( i = 0; i < dst_len; i++ )
+      dst[ dst_start + i*dst_stride ] = src[ src_start + i*src_stride ];
+  }
+}
+
+
+static inline temp_t image_(Main_cubicInterpolate)(temp_t p0,
+                                                   temp_t p1,
+                                                   temp_t p2,
+                                                   temp_t p3,
+                                                   temp_t x) {
+  temp_t a0 = p1;
+  temp_t a1 = p2 - p0;
+  temp_t a2 = 2 * p0 - 5 * p1 + 4 * p2 - p3;
+  temp_t a3 = 3 * (p1 - p2) + p3 - p0;
+  return a0 + 0.5 * x * (a1 + x * (a2 + x * a3));
+}
+
+
+static void image_(Main_scaleCubic_rowcol)(THTensor *Tsrc,
+                                           THTensor *Tdst,
+                                           long src_start,
+                                           long dst_start,
+                                           long src_stride,
+                                           long dst_stride,
+                                           long src_len,
+                                           long dst_len ) {
+
+  real *src= THTensor_(data)(Tsrc);
+  real *dst= THTensor_(data)(Tdst);
+
+  if ( dst_len == src_len ){
+    long i;
+    for( i = 0; i < dst_len; i++ )
+      dst[ dst_start + i*dst_stride ] = src[ src_start + i*src_stride ];
+  } else if ( src_len == 1 ) {
+     long i;
+     for( i = 0; i < dst_len - 1; i++ ) {
+       long dst_pos = dst_start + i*dst_stride;
+       dst[dst_pos] = src[ src_start ];
+     }
+  } else {
+    long di;
+    float si_f;
+    long si_i;
+    float scale;
+    if (dst_len == 1)
+      scale = (float)(src_len - 1);
+    else
+      scale = (float)(src_len - 1) / (dst_len - 1);
+
+    for( di = 0; di < dst_len - 1; di++ ) {
+      long dst_pos = dst_start + di*dst_stride;
+      si_f = di * scale; si_i = (long)si_f; si_f -= si_i;
+
+      temp_t p0;
+      temp_t p1 = src[ src_start + si_i * src_stride ];
+      temp_t p2 = src[ src_start + (si_i + 1) * src_stride ];
+      temp_t p3;
+      if (si_i > 0) {
+        p0 = src[ src_start + (si_i - 1) * src_stride ];
+      } else {
+        p0 = 2 * p1 - p2;
+      }
+      if (si_i + 2 < src_len) {
+        p3 = src[ src_start + (si_i + 2) * src_stride ];
+      } else {
+        p3 = 2 * p2 - p1;
+      }
+
+      temp_t value = image_(Main_cubicInterpolate)(p0, p1, p2, p3, si_f);
+      dst[dst_pos] = image_(FromIntermediate)(value);
+    }
+
+    dst[ dst_start + (dst_len - 1) * dst_stride ] =
+      src[ src_start + (src_len - 1) * src_stride ];
+  }
+}
+
+static int image_(Main_scaleBilinear)(lua_State *L) {
+
+  THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+  THTensor *Ttmp;
+  long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height;
+  long src_stride0, src_stride1, src_stride2, src_width, src_height;
+  long tmp_stride0, tmp_stride1, tmp_stride2, tmp_width, tmp_height;
+  long i, j, k;
+
+  image_(Main_op_validate)(L, Tsrc,Tdst);
+
+  int ndims;
+  if (Tdst->nDimension == 3) ndims = 3;
+  else ndims = 2;
+
+  Ttmp = THTensor_(newWithSize2d)(Tsrc->size[ndims-2], Tdst->size[ndims-1]);
+
+  dst_stride0= image_(Main_op_stride)(Tdst,0);
+  dst_stride1= image_(Main_op_stride)(Tdst,1);
+  dst_stride2= image_(Main_op_stride)(Tdst,2);
+  src_stride0= image_(Main_op_stride)(Tsrc,0);
+  src_stride1= image_(Main_op_stride)(Tsrc,1);
+  src_stride2= image_(Main_op_stride)(Tsrc,2);
+  tmp_stride0= image_(Main_op_stride)(Ttmp,0);
+  tmp_stride1= image_(Main_op_stride)(Ttmp,1);
+  tmp_stride2= image_(Main_op_stride)(Ttmp,2);
+  dst_width=   Tdst->size[ndims-1];
+  dst_height=  Tdst->size[ndims-2];
+  src_width=   Tsrc->size[ndims-1];
+  src_height=  Tsrc->size[ndims-2];
+  tmp_width=   Ttmp->size[1];
+  tmp_height=  Ttmp->size[0];
+
+  for(k=0;k<image_(Main_op_depth)(Tsrc);k++) {
+    /* compress/expand rows first */
+    for(j = 0; j < src_height; j++) {
+      image_(Main_scaleLinear_rowcol)(Tsrc,
+                                      Ttmp,
+                                      0*src_stride2+j*src_stride1+k*src_stride0,
+                                      0*tmp_stride2+j*tmp_stride1+k*tmp_stride0,
+                                      src_stride2,
+                                      tmp_stride2,
+                                      src_width,
+                                      tmp_width );
+
+    }
+
+    /* then columns */
+    for(i = 0; i < dst_width; i++) {
+      image_(Main_scaleLinear_rowcol)(Ttmp,
+                                      Tdst,
+                                      i*tmp_stride2+0*tmp_stride1+k*tmp_stride0,
+                                      i*dst_stride2+0*dst_stride1+k*dst_stride0,
+                                      tmp_stride1,
+                                      dst_stride1,
+                                      tmp_height,
+                                      dst_height );
+    }
+  }
+  THTensor_(free)(Ttmp);
+  return 0;
+}
+
+static int image_(Main_scaleBicubic)(lua_State *L) {
+
+  THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+  THTensor *Ttmp;
+  long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height;
+  long src_stride0, src_stride1, src_stride2, src_width, src_height;
+  long tmp_stride0, tmp_stride1, tmp_stride2, tmp_width, tmp_height;
+  long i, j, k;
+
+  image_(Main_op_validate)(L, Tsrc,Tdst);
+
+  int ndims;
+  if (Tdst->nDimension == 3) ndims = 3;
+  else ndims = 2;
+
+  Ttmp = THTensor_(newWithSize2d)(Tsrc->size[ndims-2], Tdst->size[ndims-1]);
+
+  dst_stride0= image_(Main_op_stride)(Tdst,0);
+  dst_stride1= image_(Main_op_stride)(Tdst,1);
+  dst_stride2= image_(Main_op_stride)(Tdst,2);
+  src_stride0= image_(Main_op_stride)(Tsrc,0);
+  src_stride1= image_(Main_op_stride)(Tsrc,1);
+  src_stride2= image_(Main_op_stride)(Tsrc,2);
+  tmp_stride0= image_(Main_op_stride)(Ttmp,0);
+  tmp_stride1= image_(Main_op_stride)(Ttmp,1);
+  tmp_stride2= image_(Main_op_stride)(Ttmp,2);
+  dst_width=   Tdst->size[ndims-1];
+  dst_height=  Tdst->size[ndims-2];
+  src_width=   Tsrc->size[ndims-1];
+  src_height=  Tsrc->size[ndims-2];
+  tmp_width=   Ttmp->size[1];
+  tmp_height=  Ttmp->size[0];
+
+  for(k=0;k<image_(Main_op_depth)(Tsrc);k++) {
+    /* compress/expand rows first */
+    for(j = 0; j < src_height; j++) {
+      image_(Main_scaleCubic_rowcol)(Tsrc,
+                                     Ttmp,
+                                     0*src_stride2+j*src_stride1+k*src_stride0,
+                                     0*tmp_stride2+j*tmp_stride1+k*tmp_stride0,
+                                     src_stride2,
+                                     tmp_stride2,
+                                     src_width,
+                                     tmp_width );
+    }
+
+    /* then columns */
+    for(i = 0; i < dst_width; i++) {
+      image_(Main_scaleCubic_rowcol)(Ttmp,
+                                     Tdst,
+                                     i*tmp_stride2+0*tmp_stride1+k*tmp_stride0,
+                                     i*dst_stride2+0*dst_stride1+k*dst_stride0,
+                                     tmp_stride1,
+                                     dst_stride1,
+                                     tmp_height,
+                                     dst_height );
+    }
+  }
+  THTensor_(free)(Ttmp);
+  return 0;
+}
+
+static int image_(Main_scaleSimple)(lua_State *L)
+{
+  THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+  real *src, *dst;
+  long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height, dst_depth;
+  long src_stride0, src_stride1, src_stride2, src_width, src_height, src_depth;
+  long i, j, k;
+  float scx, scy;
+
+  luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "image.scale: src not 2 or 3 dimensional");
+  luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "image.scale: dst not 2 or 3 dimensional");
+
+  src= THTensor_(data)(Tsrc);
+  dst= THTensor_(data)(Tdst);
+
+  dst_stride0 = 0;
+  dst_stride1 = Tdst->stride[Tdst->nDimension-2];
+  dst_stride2 = Tdst->stride[Tdst->nDimension-1];
+  dst_depth =  0;
+  dst_height = Tdst->size[Tdst->nDimension-2];
+  dst_width = Tdst->size[Tdst->nDimension-1];
+  if(Tdst->nDimension == 3) {
+    dst_stride0 = Tdst->stride[0];
+    dst_depth = Tdst->size[0];
+  }
+
+  src_stride0 = 0;
+  src_stride1 = Tsrc->stride[Tsrc->nDimension-2];
+  src_stride2 = Tsrc->stride[Tsrc->nDimension-1];
+  src_depth =  0;
+  src_height = Tsrc->size[Tsrc->nDimension-2];
+  src_width = Tsrc->size[Tsrc->nDimension-1];
+  if(Tsrc->nDimension == 3) {
+    src_stride0 = Tsrc->stride[0];
+    src_depth = Tsrc->size[0];
+  }
+
+  if( (Tdst->nDimension==3 && ( src_depth!=dst_depth)) ||
+      (Tdst->nDimension!=Tsrc->nDimension) ) {
+    printf("image.scale:%d,%d,%ld,%ld\n",Tsrc->nDimension,Tdst->nDimension,src_depth,dst_depth);
+    luaL_error(L, "image.scale: src and dst depths do not match");
+  }
+
+  if( Tdst->nDimension==3 && ( src_depth!=dst_depth) )
+    luaL_error(L, "image.scale: src and dst depths do not match");
+
+  /* printf("%d,%d -> %d,%d\n",src_width,src_height,dst_width,dst_height); */
+  scx=((float)src_width)/((float)dst_width);
+  scy=((float)src_height)/((float)dst_height);
+
+#pragma omp parallel for private(j, i, k)
+  for(j = 0; j < dst_height; j++) {
+    for(i = 0; i < dst_width; i++) {
+      float val = 0.0;
+      long ii=(long) (((float)i)*scx);
+      long jj=(long) (((float)j)*scy);
+      if(ii>src_width-1) ii=src_width-1;
+      if(jj>src_height-1) jj=src_height-1;
+
+      if(Tsrc->nDimension==2)
+        {
+          val=src[ii*src_stride2+jj*src_stride1];
+          dst[i*dst_stride2+j*dst_stride1] = image_(FromIntermediate)(val);
+        }
+      else
+        {
+          for(k=0;k<src_depth;k++)
+            {
+              val=src[ii*src_stride2+jj*src_stride1+k*src_stride0];
+              dst[i*dst_stride2+j*dst_stride1+k*dst_stride0] = image_(FromIntermediate)(val);
+            }
+        }
+    }
+  }
+  return 0;
+}
+
+static int image_(Main_rotate)(lua_State *L)
+{
+  THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+  float theta = luaL_checknumber(L, 3);
+  float cos_theta, sin_theta;
+  real *src, *dst;
+  long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height, dst_depth;
+  long src_stride0, src_stride1, src_stride2, src_width, src_height, src_depth;
+  long i, j, k;
+  float xc, yc;
+  float id,jd;
+  long ii,jj;
+
+  luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "rotate: src not 2 or 3 dimensional");
+  luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "rotate: dst not 2 or 3 dimensional");
+
+  src= THTensor_(data)(Tsrc);
+  dst= THTensor_(data)(Tdst);
+
+  if (dst == src) {
+    luaL_error(L, "image.rotate: in-place rotate not supported");
+  }
+
+  dst_stride0 = 0;
+  dst_stride1 = Tdst->stride[Tdst->nDimension-2];
+  dst_stride2 = Tdst->stride[Tdst->nDimension-1];
+  dst_depth =  0;
+  dst_height = Tdst->size[Tdst->nDimension-2];
+  dst_width = Tdst->size[Tdst->nDimension-1];
+  if(Tdst->nDimension == 3) {
+    dst_stride0 = Tdst->stride[0];
+    dst_depth = Tdst->size[0];
+  }
+
+  src_stride0 = 0;
+  src_stride1 = Tsrc->stride[Tsrc->nDimension-2];
+  src_stride2 = Tsrc->stride[Tsrc->nDimension-1];
+  src_depth =  0;
+  src_height = Tsrc->size[Tsrc->nDimension-2];
+  src_width = Tsrc->size[Tsrc->nDimension-1];
+  if(Tsrc->nDimension == 3) {
+    src_stride0 = Tsrc->stride[0];
+    src_depth = Tsrc->size[0];
+  }
+
+  if( Tsrc->nDimension==3 && Tdst->nDimension==3 && ( src_depth!=dst_depth) )
+    luaL_error(L, "image.rotate: src and dst depths do not match");
+
+  if( (Tsrc->nDimension!=Tdst->nDimension) )
+    luaL_error(L, "image.rotate: src and dst depths do not match");
+
+  xc = (src_width-1)/2.0;
+  yc = (src_height-1)/2.0;
+
+  sin_theta = sin(theta);
+  cos_theta = cos(theta);
+
+  for(j = 0; j < dst_height; j++) {
+    jd=j;
+    for(i = 0; i < dst_width; i++) {
+      float val = -1;
+      id= i;
+
+      ii = (long) round(cos_theta*(id-xc) - sin_theta*(jd-yc) + xc);
+      jj = (long) round(cos_theta*(jd-yc) + sin_theta*(id-xc) + yc);
+
+      /* rotated corners are blank */
+      if(ii>src_width-1) val=0;
+      if(jj>src_height-1) val=0;
+      if(ii<0) val=0;
+      if(jj<0) val=0;
+
+      if(Tsrc->nDimension==2)
+        {
+          if(val==-1)
+            val=src[ii*src_stride2+jj*src_stride1];
+          dst[i*dst_stride2+j*dst_stride1] = image_(FromIntermediate)(val);
+        }
+      else
+        {
+          int do_copy=0; if(val==-1) do_copy=1;
+          for(k=0;k<src_depth;k++)
+            {
+              if(do_copy)
+                val=src[ii*src_stride2+jj*src_stride1+k*src_stride0];
+              dst[i*dst_stride2+j*dst_stride1+k*dst_stride0] = image_(FromIntermediate)(val);
+            }
+        }
+    }
+  }
+  return 0;
+}
+static int image_(Main_rotateBilinear)(lua_State *L)
+{
+  THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+  float theta = luaL_checknumber(L, 3);
+  real *src, *dst;
+  long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height, dst_depth;
+  long src_stride0, src_stride1, src_stride2, src_width, src_height, src_depth;
+  long i, j, k;
+  float xc, yc;
+  float id,jd;
+  long ii_0, ii_1, jj_0, jj_1;
+
+  luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "rotate: src not 2 or 3 dimensional");
+  luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "rotate: dst not 2 or 3 dimensional");
+
+  src= THTensor_(data)(Tsrc);
+  dst= THTensor_(data)(Tdst);
+
+  if (dst == src) {
+    luaL_error(L, "image.rotate: in-place rotate not supported");
+  }
+
+  dst_stride0 = 0;
+  dst_stride1 = Tdst->stride[Tdst->nDimension-2];
+  dst_stride2 = Tdst->stride[Tdst->nDimension-1];
+  dst_depth =  0;
+  dst_height = Tdst->size[Tdst->nDimension-2];
+  dst_width = Tdst->size[Tdst->nDimension-1];
+  if(Tdst->nDimension == 3) {
+    dst_stride0 = Tdst->stride[0];
+    dst_depth = Tdst->size[0];
+  }
+
+  src_stride0 = 0;
+  src_stride1 = Tsrc->stride[Tsrc->nDimension-2];
+  src_stride2 = Tsrc->stride[Tsrc->nDimension-1];
+  src_depth =  0;
+  src_height = Tsrc->size[Tsrc->nDimension-2];
+  src_width = Tsrc->size[Tsrc->nDimension-1];
+  if(Tsrc->nDimension == 3) {
+    src_stride0 = Tsrc->stride[0];
+    src_depth = Tsrc->size[0];
+  }
+
+  if( Tsrc->nDimension==3 && Tdst->nDimension==3 && ( src_depth!=dst_depth) )
+    luaL_error(L, "image.rotate: src and dst depths do not match");
+
+  if( (Tsrc->nDimension!=Tdst->nDimension) )
+    luaL_error(L, "image.rotate: src and dst depths do not match");
+
+  xc = (src_width-1)/2.0;
+  yc = (src_height-1)/2.0;
+
+  for(j = 0; j < dst_height; j++) {
+    jd=j;
+    for(i = 0; i < dst_width; i++) {
+      float val = -1;
+      temp_t ri, rj, wi, wj;
+      id= i;
+      ri = cos(theta)*(id-xc)-sin(theta)*(jd-yc);
+      rj = cos(theta)*(jd-yc)+sin(theta)*(id-xc);
+
+      ii_0 = (long)floor(ri+xc);
+      ii_1 = ii_0 + 1;
+      jj_0 = (long)floor(rj+yc);
+      jj_1 = jj_0 + 1;
+      wi = ri+xc-ii_0;
+      wj = rj+yc-jj_0;
+
+      /* default to the closest value when interpolating on image boundaries (either image pixel or 0) */
+      if(ii_1==src_width && wi<0.5) ii_1 = ii_0;
+      else if(ii_1>=src_width) val=0;
+      if(jj_1==src_height && wj<0.5) jj_1 = jj_0;
+      else if(jj_1>=src_height) val=0;
+      if(ii_0==-1 && wi>0.5) ii_0 = ii_1;
+      else if(ii_0<0) val=0;
+      if(jj_0==-1 && wj>0.5) jj_0 = jj_1;
+      else if(jj_0<0) val=0;
+
+      if(Tsrc->nDimension==2) {
+        if(val==-1)
+          val = (1.0 - wi) * (1.0 - wj) * src[ii_0*src_stride2+jj_0*src_stride1]
+            + wi * (1.0 - wj) * src[ii_1*src_stride2+jj_0*src_stride1]
+            + (1.0 - wi) * wj * src[ii_0*src_stride2+jj_1*src_stride1]
+            + wi * wj * src[ii_1*src_stride2+jj_1*src_stride1];
+        dst[i*dst_stride2+j*dst_stride1] = image_(FromIntermediate)(val);
+      } else {
+        int do_copy=0; if(val==-1) do_copy=1;
+        for(k=0;k<src_depth;k++) {
+          if(do_copy) {
+            val = (1.0 - wi) * (1.0 - wj) * src[ii_0*src_stride2+jj_0*src_stride1+k*src_stride0]
+              + wi * (1.0 - wj) * src[ii_1*src_stride2+jj_0*src_stride1+k*src_stride0]
+              + (1.0 - wi) * wj * src[ii_0*src_stride2+jj_1*src_stride1+k*src_stride0]
+              + wi * wj * src[ii_1*src_stride2+jj_1*src_stride1+k*src_stride0];
+          }
+          dst[i*dst_stride2+j*dst_stride1+k*dst_stride0] = image_(FromIntermediate)(val);
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+static int image_(Main_polar)(lua_State *L)
+{
+    THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+    THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+    float doFull = luaL_checknumber(L, 3);
+    real *src, *dst;
+    long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height, dst_depth;
+    long src_stride0, src_stride1, src_stride2, src_width, src_height, src_depth;
+    long i, j, k;
+    float id, jd, a, r, m, midY, midX;
+    long ii,jj;
+
+    luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "polar: src not 2 or 3 dimensional");
+    luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "polar: dst not 2 or 3 dimensional");
+
+    src= THTensor_(data)(Tsrc);
+    dst= THTensor_(data)(Tdst);
+
+    dst_stride0 = 0;
+    dst_stride1 = Tdst->stride[Tdst->nDimension-2];
+    dst_stride2 = Tdst->stride[Tdst->nDimension-1];
+    dst_depth =  0;
+    dst_height = Tdst->size[Tdst->nDimension-2];
+    dst_width = Tdst->size[Tdst->nDimension-1];
+    if(Tdst->nDimension == 3) {
+        dst_stride0 = Tdst->stride[0];
+        dst_depth = Tdst->size[0];
+    }
+
+    src_stride0 = 0;
+    src_stride1 = Tsrc->stride[Tsrc->nDimension-2];
+    src_stride2 = Tsrc->stride[Tsrc->nDimension-1];
+    src_depth =  0;
+    src_height = Tsrc->size[Tsrc->nDimension-2];
+    src_width = Tsrc->size[Tsrc->nDimension-1];
+    if(Tsrc->nDimension == 3) {
+        src_stride0 = Tsrc->stride[0];
+        src_depth = Tsrc->size[0];
+    }
+
+    if( Tsrc->nDimension==3 && Tdst->nDimension==3 && ( src_depth!=dst_depth) ) {
+        luaL_error(L, "image.polar: src and dst depths do not match"); }
+
+    if( (Tsrc->nDimension!=Tdst->nDimension) ) {
+        luaL_error(L, "image.polar: src and dst depths do not match"); }
+
+    // compute maximum distance
+    midY = (float) src_height / 2.0;
+    midX = (float) src_width  / 2.0;
+    if(doFull == 1) {
+      m = sqrt((float) src_width * (float) src_width + (float) src_height * (float) src_height) / 2.0;
+    }
+    else {
+      m = (src_width < src_height) ? midX : midY;
+    }
+
+    // loop to fill polar image
+    for(j = 0; j < dst_height; j++) {               // orientation loop
+        jd = (float) j;
+        a = (2 * M_PI * jd) / (float) dst_height;   // current angle
+        for(i = 0; i < dst_width; i++) {            // radius loop
+            float val = -1;
+            id = (float) i;
+            r = (m * id) / (float) dst_width;       // current distance
+
+            jj = (long) floor( r * cos(a) + midY);  // y-location in source image
+            ii = (long) floor(-r * sin(a) + midX);  // x-location in source image
+
+            if(ii>src_width-1) val=0;
+            if(jj>src_height-1) val=0;
+            if(ii<0) val=0;
+            if(jj<0) val=0;
+
+            if(Tsrc->nDimension==2)
+            {
+                if(val==-1)
+                    val=src[ii*src_stride2+jj*src_stride1];
+                dst[i*dst_stride2+j*dst_stride1] = image_(FromIntermediate)(val);
+            }
+            else
+            {
+                int do_copy=0; if(val==-1) do_copy=1;
+                for(k=0;k<src_depth;k++)
+                {
+                    if(do_copy)
+                        val=src[ii*src_stride2+jj*src_stride1+k*src_stride0];
+                    dst[i*dst_stride2+j*dst_stride1+k*dst_stride0] = image_(FromIntermediate)(val);
+                }
+            }
+        }
+    }
+    return 0;
+}
+static int image_(Main_polarBilinear)(lua_State *L)
+{
+    THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+    THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+    float doFull = luaL_checknumber(L, 3);
+    real *src, *dst;
+    long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height, dst_depth;
+    long src_stride0, src_stride1, src_stride2, src_width, src_height, src_depth;
+    long i, j, k;
+    float id, jd, a, r, m, midY, midX;
+    long ii_0, ii_1, jj_0, jj_1;
+
+    luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "polar: src not 2 or 3 dimensional");
+    luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "polar: dst not 2 or 3 dimensional");
+
+    src= THTensor_(data)(Tsrc);
+    dst= THTensor_(data)(Tdst);
+
+    dst_stride0 = 0;
+    dst_stride1 = Tdst->stride[Tdst->nDimension-2];
+    dst_stride2 = Tdst->stride[Tdst->nDimension-1];
+    dst_depth =  0;
+    dst_height = Tdst->size[Tdst->nDimension-2];
+    dst_width = Tdst->size[Tdst->nDimension-1];
+    if(Tdst->nDimension == 3) {
+        dst_stride0 = Tdst->stride[0];
+        dst_depth = Tdst->size[0];
+    }
+
+    src_stride0 = 0;
+    src_stride1 = Tsrc->stride[Tsrc->nDimension-2];
+    src_stride2 = Tsrc->stride[Tsrc->nDimension-1];
+    src_depth =  0;
+    src_height = Tsrc->size[Tsrc->nDimension-2];
+    src_width = Tsrc->size[Tsrc->nDimension-1];
+    if(Tsrc->nDimension == 3) {
+        src_stride0 = Tsrc->stride[0];
+        src_depth = Tsrc->size[0];
+    }
+
+    if( Tsrc->nDimension==3 && Tdst->nDimension==3 && ( src_depth!=dst_depth) ) {
+        luaL_error(L, "image.polar: src and dst depths do not match"); }
+
+    if( (Tsrc->nDimension!=Tdst->nDimension) ) {
+        luaL_error(L, "image.polar: src and dst depths do not match"); }
+
+    // compute maximum distance
+    midY = (float) src_height / 2.0;
+    midX = (float) src_width  / 2.0;
+    if(doFull == 1) {
+      m = sqrt((float) src_width * (float) src_width + (float) src_height * (float) src_height) / 2.0;
+    }
+    else {
+      m = (src_width < src_height) ? midX : midY;
+    }
+
+    // loop to fill polar image
+    for(j = 0; j < dst_height; j++) {                 // orientation loop
+        jd = (float) j;
+        a = (2 * M_PI * jd) / (float) dst_height;     // current angle
+        for(i = 0; i < dst_width; i++) {              // radius loop
+            float val = -1;
+            temp_t ri, rj, wi, wj;
+            id = (float) i;
+            r = (m * id) / (float) dst_width;         // current distance
+
+            rj =  r * cos(a) + midY;                  // y-location in source image
+            ri = -r * sin(a) + midX;                  // x-location in source image
+
+            ii_0=(long)floor(ri);
+            ii_1=ii_0 + 1;
+            jj_0=(long)floor(rj);
+            jj_1=jj_0 + 1;
+            wi = ri - ii_0;
+            wj = rj - jj_0;
+
+            // switch to nearest interpolation when bilinear is impossible
+            if(ii_1>src_width-1 || jj_1>src_height-1 || ii_0<0 || jj_0<0) {
+                if(ii_0>src_width-1) val=0;
+                if(jj_0>src_height-1) val=0;
+                if(ii_0<0) val=0;
+                if(jj_0<0) val=0;
+
+                if(Tsrc->nDimension==2)
+                {
+                    if(val==-1)
+                        val=src[ii_0*src_stride2+jj_0*src_stride1];
+                    dst[i*dst_stride2+j*dst_stride1] = image_(FromIntermediate)(val);
+                }
+                else
+                {
+                    int do_copy=0; if(val==-1) do_copy=1;
+                    for(k=0;k<src_depth;k++)
+                    {
+                        if(do_copy)
+                            val=src[ii_0*src_stride2+jj_0*src_stride1+k*src_stride0];
+                        dst[i*dst_stride2+j*dst_stride1+k*dst_stride0] = image_(FromIntermediate)(val);
+                    }
+                }
+            }
+
+            // bilinear interpolation
+            else {
+                if(Tsrc->nDimension==2) {
+                    if(val==-1)
+                        val = (1.0 - wi) * (1.0 - wj) * src[ii_0*src_stride2+jj_0*src_stride1]
+                        + wi * (1.0 - wj) * src[ii_1*src_stride2+jj_0*src_stride1]
+                        + (1.0 - wi) * wj * src[ii_0*src_stride2+jj_1*src_stride1]
+                        + wi * wj * src[ii_1*src_stride2+jj_1*src_stride1];
+                    dst[i*dst_stride2+j*dst_stride1] = image_(FromIntermediate)(val);
+                } else {
+                    int do_copy=0; if(val==-1) do_copy=1;
+                    for(k=0;k<src_depth;k++) {
+                        if(do_copy) {
+                            val = (1.0 - wi) * (1.0 - wj) * src[ii_0*src_stride2+jj_0*src_stride1+k*src_stride0]
+                            + wi * (1.0 - wj) * src[ii_1*src_stride2+jj_0*src_stride1+k*src_stride0]
+                            + (1.0 - wi) * wj * src[ii_0*src_stride2+jj_1*src_stride1+k*src_stride0]
+                            + wi * wj * src[ii_1*src_stride2+jj_1*src_stride1+k*src_stride0];
+                        }
+                        dst[i*dst_stride2+j*dst_stride1+k*dst_stride0] = image_(FromIntermediate)(val);
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+static int image_(Main_logPolar)(lua_State *L)
+{
+    THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+    THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+    float doFull = luaL_checknumber(L, 3);
+    real *src, *dst;
+    long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height, dst_depth;
+    long src_stride0, src_stride1, src_stride2, src_width, src_height, src_depth;
+    long i, j, k;
+    float id, jd, a, r, m, midY, midX, fw;
+    long ii,jj;
+
+    luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "polar: src not 2 or 3 dimensional");
+    luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "polar: dst not 2 or 3 dimensional");
+
+    src= THTensor_(data)(Tsrc);
+    dst= THTensor_(data)(Tdst);
+
+    dst_stride0 = 0;
+    dst_stride1 = Tdst->stride[Tdst->nDimension-2];
+    dst_stride2 = Tdst->stride[Tdst->nDimension-1];
+    dst_depth =  0;
+    dst_height = Tdst->size[Tdst->nDimension-2];
+    dst_width = Tdst->size[Tdst->nDimension-1];
+    if(Tdst->nDimension == 3) {
+        dst_stride0 = Tdst->stride[0];
+        dst_depth = Tdst->size[0];
+    }
+
+    src_stride0 = 0;
+    src_stride1 = Tsrc->stride[Tsrc->nDimension-2];
+    src_stride2 = Tsrc->stride[Tsrc->nDimension-1];
+    src_depth =  0;
+    src_height = Tsrc->size[Tsrc->nDimension-2];
+    src_width = Tsrc->size[Tsrc->nDimension-1];
+    if(Tsrc->nDimension == 3) {
+        src_stride0 = Tsrc->stride[0];
+        src_depth = Tsrc->size[0];
+    }
+
+    if( Tsrc->nDimension==3 && Tdst->nDimension==3 && ( src_depth!=dst_depth) ) {
+        luaL_error(L, "image.polar: src and dst depths do not match"); }
+
+    if( (Tsrc->nDimension!=Tdst->nDimension) ) {
+        luaL_error(L, "image.polar: src and dst depths do not match"); }
+
+    // compute maximum distance
+    midY = (float) src_height / 2.0;
+    midX = (float) src_width  / 2.0;
+    if(doFull == 1) {
+        m = sqrt((float) src_width * (float) src_width + (float) src_height * (float) src_height) / 2.0;
+    }
+    else {
+        m = (src_width < src_height) ? midX : midY;
+    }
+
+    // loop to fill polar image
+    fw = log(m) / (float) dst_width;
+    for(j = 0; j < dst_height; j++) {               // orientation loop
+        jd = (float) j;
+        a = (2 * M_PI * jd) / (float) dst_height;   // current angle
+        for(i = 0; i < dst_width; i++) {            // radius loop
+            float val = -1;
+            id = (float) i;
+
+            r = exp(id * fw);
+
+            jj = (long) floor( r * cos(a) + midY);  // y-location in source image
+            ii = (long) floor(-r * sin(a) + midX);  // x-location in source image
+
+            if(ii>src_width-1) val=0;
+            if(jj>src_height-1) val=0;
+            if(ii<0) val=0;
+            if(jj<0) val=0;
+
+            if(Tsrc->nDimension==2)
+            {
+                if(val==-1)
+                    val=src[ii*src_stride2+jj*src_stride1];
+                dst[i*dst_stride2+j*dst_stride1] = image_(FromIntermediate)(val);
+            }
+            else
+            {
+                int do_copy=0; if(val==-1) do_copy=1;
+                for(k=0;k<src_depth;k++)
+                {
+                    if(do_copy)
+                        val=src[ii*src_stride2+jj*src_stride1+k*src_stride0];
+                    dst[i*dst_stride2+j*dst_stride1+k*dst_stride0] = image_(FromIntermediate)(val);
+                }
+            }
+        }
+    }
+    return 0;
+}
+static int image_(Main_logPolarBilinear)(lua_State *L)
+{
+    THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+    THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+    float doFull = luaL_checknumber(L, 3);
+    real *src, *dst;
+    long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height, dst_depth;
+    long src_stride0, src_stride1, src_stride2, src_width, src_height, src_depth;
+    long i, j, k;
+    float id, jd, a, r, m, midY, midX, fw;
+    long ii_0, ii_1, jj_0, jj_1;
+
+    luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "polar: src not 2 or 3 dimensional");
+    luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "polar: dst not 2 or 3 dimensional");
+
+    src= THTensor_(data)(Tsrc);
+    dst= THTensor_(data)(Tdst);
+
+    dst_stride0 = 0;
+    dst_stride1 = Tdst->stride[Tdst->nDimension-2];
+    dst_stride2 = Tdst->stride[Tdst->nDimension-1];
+    dst_depth =  0;
+    dst_height = Tdst->size[Tdst->nDimension-2];
+    dst_width = Tdst->size[Tdst->nDimension-1];
+    if(Tdst->nDimension == 3) {
+        dst_stride0 = Tdst->stride[0];
+        dst_depth = Tdst->size[0];
+    }
+
+    src_stride0 = 0;
+    src_stride1 = Tsrc->stride[Tsrc->nDimension-2];
+    src_stride2 = Tsrc->stride[Tsrc->nDimension-1];
+    src_depth =  0;
+    src_height = Tsrc->size[Tsrc->nDimension-2];
+    src_width = Tsrc->size[Tsrc->nDimension-1];
+    if(Tsrc->nDimension == 3) {
+        src_stride0 = Tsrc->stride[0];
+        src_depth = Tsrc->size[0];
+    }
+
+    if( Tsrc->nDimension==3 && Tdst->nDimension==3 && ( src_depth!=dst_depth) ) {
+        luaL_error(L, "image.polar: src and dst depths do not match"); }
+
+    if( (Tsrc->nDimension!=Tdst->nDimension) ) {
+        luaL_error(L, "image.polar: src and dst depths do not match"); }
+
+    // compute maximum distance
+    midY = (float) src_height / 2.0;
+    midX = (float) src_width  / 2.0;
+    if(doFull == 1) {
+        m = sqrt((float) src_width * (float) src_width + (float) src_height * (float) src_height) / 2.0;
+    }
+    else {
+        m = (src_width < src_height) ? midX : midY;
+    }
+
+    // loop to fill polar image
+    fw = log(m) / (float) dst_width;
+    for(j = 0; j < dst_height; j++) {                 // orientation loop
+        jd = (float) j;
+        a = (2 * M_PI * jd) / (float) dst_height;     // current angle
+        for(i = 0; i < dst_width; i++) {              // radius loop
+            float val = -1;
+            float ri, rj, wi, wj;
+            id = (float) i;
+
+            r = exp(id * fw);
+
+            rj =  r * cos(a) + midY;                  // y-location in source image
+            ri = -r * sin(a) + midX;                  // x-location in source image
+
+            ii_0=(long)floor(ri);
+            ii_1=ii_0 + 1;
+            jj_0=(long)floor(rj);
+            jj_1=jj_0 + 1;
+            wi = ri - ii_0;
+            wj = rj - jj_0;
+
+            // switch to nearest interpolation when bilinear is impossible
+            if(ii_1>src_width-1 || jj_1>src_height-1 || ii_0<0 || jj_0<0) {
+                if(ii_0>src_width-1) val=0;
+                if(jj_0>src_height-1) val=0;
+                if(ii_0<0) val=0;
+                if(jj_0<0) val=0;
+
+                if(Tsrc->nDimension==2)
+                {
+                    if(val==-1)
+                        val=src[ii_0*src_stride2+jj_0*src_stride1];
+                    dst[i*dst_stride2+j*dst_stride1] = image_(FromIntermediate)(val);
+                }
+                else
+                {
+                    int do_copy=0; if(val==-1) do_copy=1;
+                    for(k=0;k<src_depth;k++)
+                    {
+                        if(do_copy)
+                            val=src[ii_0*src_stride2+jj_0*src_stride1+k*src_stride0];
+                        dst[i*dst_stride2+j*dst_stride1+k*dst_stride0] = image_(FromIntermediate)(val);
+                    }
+                }
+            }
+
+            // bilinear interpolation
+            else {
+                if(Tsrc->nDimension==2) {
+                    if(val==-1)
+                        val = (1.0 - wi) * (1.0 - wj) * src[ii_0*src_stride2+jj_0*src_stride1]
+                        + wi * (1.0 - wj) * src[ii_1*src_stride2+jj_0*src_stride1]
+                        + (1.0 - wi) * wj * src[ii_0*src_stride2+jj_1*src_stride1]
+                        + wi * wj * src[ii_1*src_stride2+jj_1*src_stride1];
+                    dst[i*dst_stride2+j*dst_stride1] = image_(FromIntermediate)(val);
+                } else {
+                    int do_copy=0; if(val==-1) do_copy=1;
+                    for(k=0;k<src_depth;k++) {
+                        if(do_copy) {
+                            val = (1.0 - wi) * (1.0 - wj) * src[ii_0*src_stride2+jj_0*src_stride1+k*src_stride0]
+                            + wi * (1.0 - wj) * src[ii_1*src_stride2+jj_0*src_stride1+k*src_stride0]
+                            + (1.0 - wi) * wj * src[ii_0*src_stride2+jj_1*src_stride1+k*src_stride0]
+                            + wi * wj * src[ii_1*src_stride2+jj_1*src_stride1+k*src_stride0];
+                        }
+                        dst[i*dst_stride2+j*dst_stride1+k*dst_stride0] = image_(FromIntermediate)(val);
+                    }
+                }
+            }
+        }
+    }
+    return 0;
+}
+
+
+static int image_(Main_cropNoScale)(lua_State *L)
+{
+  THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+  long startx = luaL_checklong(L, 3);
+  long starty = luaL_checklong(L, 4);
+  real *src, *dst;
+  long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height, dst_depth;
+  long src_stride0, src_stride1, src_stride2, src_width, src_height, src_depth;
+  long i, j, k;
+
+  luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "rotate: src not 2 or 3 dimensional");
+  luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "rotate: dst not 2 or 3 dimensional");
+
+  src= THTensor_(data)(Tsrc);
+  dst= THTensor_(data)(Tdst);
+
+  dst_stride0 = 0;
+  dst_stride1 = Tdst->stride[Tdst->nDimension-2];
+  dst_stride2 = Tdst->stride[Tdst->nDimension-1];
+  dst_depth =  0;
+  dst_height = Tdst->size[Tdst->nDimension-2];
+  dst_width = Tdst->size[Tdst->nDimension-1];
+  if(Tdst->nDimension == 3) {
+    dst_stride0 = Tdst->stride[0];
+    dst_depth = Tdst->size[0];
+  }
+
+  src_stride0 = 0;
+  src_stride1 = Tsrc->stride[Tsrc->nDimension-2];
+  src_stride2 = Tsrc->stride[Tsrc->nDimension-1];
+  src_depth =  0;
+  src_height = Tsrc->size[Tsrc->nDimension-2];
+  src_width = Tsrc->size[Tsrc->nDimension-1];
+  if(Tsrc->nDimension == 3) {
+    src_stride0 = Tsrc->stride[0];
+    src_depth = Tsrc->size[0];
+  }
+
+  if( startx<0 || starty<0 || (startx+dst_width>src_width) || (starty+dst_height>src_height))
+    luaL_error(L, "image.crop: crop goes outside bounds of src");
+
+  if( Tdst->nDimension==3 && ( src_depth!=dst_depth) )
+    luaL_error(L, "image.crop: src and dst depths do not match");
+
+  for(j = 0; j < dst_height; j++) {
+    for(i = 0; i < dst_width; i++) {
+      float val = 0.0;
+
+      long ii=i+startx;
+      long jj=j+starty;
+
+      if(Tsrc->nDimension==2)
+        {
+          val=src[ii*src_stride2+jj*src_stride1];
+          dst[i*dst_stride2+j*dst_stride1] = image_(FromIntermediate)(val);
+        }
+      else
+        {
+          for(k=0;k<src_depth;k++)
+            {
+              val=src[ii*src_stride2+jj*src_stride1+k*src_stride0];
+              dst[i*dst_stride2+j*dst_stride1+k*dst_stride0] = image_(FromIntermediate)(val);
+            }
+        }
+    }
+  }
+  return 0;
+}
+
+static int image_(Main_translate)(lua_State *L)
+{
+  THTensor *Tsrc = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *Tdst = luaT_checkudata(L, 2, torch_Tensor);
+  long shiftx = luaL_checklong(L, 3);
+  long shifty = luaL_checklong(L, 4);
+  real *src, *dst;
+  long dst_stride0, dst_stride1, dst_stride2, dst_width, dst_height, dst_depth;
+  long src_stride0, src_stride1, src_stride2, src_width, src_height, src_depth;
+  long i, j, k;
+
+  luaL_argcheck(L, Tsrc->nDimension==2 || Tsrc->nDimension==3, 1, "rotate: src not 2 or 3 dimensional");
+  luaL_argcheck(L, Tdst->nDimension==2 || Tdst->nDimension==3, 2, "rotate: dst not 2 or 3 dimensional");
+
+  src= THTensor_(data)(Tsrc);
+  dst= THTensor_(data)(Tdst);
+
+  dst_stride0 = 1;
+  dst_stride1 = Tdst->stride[Tdst->nDimension-2];
+  dst_stride2 = Tdst->stride[Tdst->nDimension-1];
+  dst_depth =  1;
+  dst_height = Tdst->size[Tdst->nDimension-2];
+  dst_width = Tdst->size[Tdst->nDimension-1];
+  if(Tdst->nDimension == 3) {
+    dst_stride0 = Tdst->stride[0];
+    dst_depth = Tdst->size[0];
+  }
+
+  src_stride0 = 1;
+  src_stride1 = Tsrc->stride[Tsrc->nDimension-2];
+  src_stride2 = Tsrc->stride[Tsrc->nDimension-1];
+  src_depth =  1;
+  src_height = Tsrc->size[Tsrc->nDimension-2];
+  src_width = Tsrc->size[Tsrc->nDimension-1];
+  if(Tsrc->nDimension == 3) {
+    src_stride0 = Tsrc->stride[0];
+    src_depth = Tsrc->size[0];
+  }
+
+  if( Tdst->nDimension==3 && ( src_depth!=dst_depth) )
+    luaL_error(L, "image.translate: src and dst depths do not match");
+
+  for(j = 0; j < src_height; j++) {
+    for(i = 0; i < src_width; i++) {
+      long ii=i+shiftx;
+      long jj=j+shifty;
+
+      // Check it's within destination bounds, else crop
+      if(ii<dst_width && jj<dst_height && ii>=0 && jj>=0) {
+        for(k=0;k<src_depth;k++) {
+          dst[ii*dst_stride2+jj*dst_stride1+k*dst_stride0] = src[i*src_stride2+j*src_stride1+k*src_stride0];
+        }
+      }
+    }
+  }
+  return 0;
+}
+
+static int image_(Main_saturate)(lua_State *L) {
+#ifdef TH_REAL_IS_BYTE
+  // Noop since necessarily constrained to [0, 255].
+#else
+  THTensor *input = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *output = input;
+
+  TH_TENSOR_APPLY2(real, output, real, input,                       \
+                   *output_data = (*input_data < 0) ? 0 : (*input_data > 1) ? 1 : *input_data;)
+#endif
+  return 1;
+}
+
+/*
+ * Converts an RGB color value to HSL. Conversion formula
+ * adapted from http://en.wikipedia.org/wiki/HSL_color_space.
+ * Assumes r, g, and b are contained in the set [0, 1] and
+ * returns h, s, and l in the set [0, 1].
+ */
+int image_(Main_rgb2hsl)(lua_State *L) {
+  THTensor *rgb = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *hsl = luaT_checkudata(L, 2, torch_Tensor);
+
+  int y,x;
+  temp_t r, g, b, h, s, l;
+  for (y=0; y<rgb->size[1]; y++) {
+    for (x=0; x<rgb->size[2]; x++) {
+      // get Rgb
+      r = THTensor_(get3d)(rgb, 0, y, x);
+      g = THTensor_(get3d)(rgb, 1, y, x);
+      b = THTensor_(get3d)(rgb, 2, y, x);
+#ifdef TH_REAL_IS_BYTE
+      r /= 255;
+      g /= 255;
+      b /= 255;
+#endif
+
+      temp_t mx = max(max(r, g), b);
+      temp_t mn = min(min(r, g), b);
+      if(mx == mn) {
+        h = 0; // achromatic
+        s = 0;
+        l = mx;
+      } else {
+        temp_t d = mx - mn;
+        if (mx == r) {
+          h = (g - b) / d + (g < b ? 6 : 0);
+        } else if (mx == g) {
+          h = (b - r) / d + 2;
+        } else {
+          h = (r - g) / d + 4;
+        }
+        h /= 6;
+        l = (mx + mn) / 2;
+        s = l > 0.5 ? d / (2 - mx - mn) : d / (mx + mn);
+      }
+
+      // set hsl
+#ifdef TH_REAL_IS_BYTE
+      h *= 255;
+      s *= 255;
+      l *= 255;
+#endif
+      THTensor_(set3d)(hsl, 0, y, x, image_(FromIntermediate)(h));
+      THTensor_(set3d)(hsl, 1, y, x, image_(FromIntermediate)(s));
+      THTensor_(set3d)(hsl, 2, y, x, image_(FromIntermediate)(l));
+    }
+  }
+  return 0;
+}
+
+// helper
+static inline temp_t image_(hue2rgb)(temp_t p, temp_t q, temp_t t) {
+  if (t < 0.) t += 1;
+  if (t > 1.) t -= 1;
+  if (t < 1./6)
+    return p + (q - p) * 6. * t;
+  else if (t < 1./2)
+    return q;
+  else if (t < 2./3)
+    return p + (q - p) * (2./3 - t) * 6.;
+  else
+    return p;
+}
+
+/*
+ * Converts an HSL color value to RGB. Conversion formula
+ * adapted from http://en.wikipedia.org/wiki/HSL_color_space.
+ * Assumes h, s, and l are contained in the set [0, 1] and
+ * returns r, g, and b in the set [0, 1].
+ */
+int image_(Main_hsl2rgb)(lua_State *L) {
+  THTensor *hsl = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *rgb = luaT_checkudata(L, 2, torch_Tensor);
+
+  int y,x;
+  temp_t r, g, b, h, s, l;
+  for (y=0; y<hsl->size[1]; y++) {
+    for (x=0; x<hsl->size[2]; x++) {
+      // get hsl
+      h = THTensor_(get3d)(hsl, 0, y, x);
+      s = THTensor_(get3d)(hsl, 1, y, x);
+      l = THTensor_(get3d)(hsl, 2, y, x);
+#ifdef TH_REAL_IS_BYTE
+      h /= 255;
+      s /= 255;
+      l /= 255;
+#endif
+
+      if(s == 0) {
+        // achromatic
+        r = l;
+        g = l;
+        b = l;
+      } else {
+        temp_t q = (l < 0.5) ? (l * (1 + s)) : (l + s - l * s);
+        temp_t p = 2 * l - q;
+        temp_t hr = h + 1./3;
+        temp_t hg = h;
+        temp_t hb = h - 1./3;
+        r = image_(hue2rgb)(p, q, hr);
+        g = image_(hue2rgb)(p, q, hg);
+        b = image_(hue2rgb)(p, q, hb);
+      }
+
+      // set rgb
+#ifdef TH_REAL_IS_BYTE
+      r *= 255;
+      g *= 255;
+      b *= 255;
+#endif
+      THTensor_(set3d)(rgb, 0, y, x, image_(FromIntermediate)(r));
+      THTensor_(set3d)(rgb, 1, y, x, image_(FromIntermediate)(g));
+      THTensor_(set3d)(rgb, 2, y, x, image_(FromIntermediate)(b));
+    }
+  }
+  return 0;
+}
+
+/*
+ * Converts an RGB color value to HSV. Conversion formula
+ * adapted from http://en.wikipedia.org/wiki/HSV_color_space.
+ * Assumes r, g, and b are contained in the set [0, 1] and
+ * returns h, s, and v in the set [0, 1].
+ */
+int image_(Main_rgb2hsv)(lua_State *L) {
+  THTensor *rgb = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *hsv = luaT_checkudata(L, 2, torch_Tensor);
+
+  int y, x;
+  temp_t r, g, b, h, s, v;
+  for (y=0; y<rgb->size[1]; y++) {
+    for (x=0; x<rgb->size[2]; x++) {
+      // get Rgb
+      r = THTensor_(get3d)(rgb, 0, y, x);
+      g = THTensor_(get3d)(rgb, 1, y, x);
+      b = THTensor_(get3d)(rgb, 2, y, x);
+#ifdef TH_REAL_IS_BYTE
+      r /= 255;
+      g /= 255;
+      b /= 255;
+#endif
+
+      temp_t mx = max(max(r, g), b);
+      temp_t mn = min(min(r, g), b);
+      if(mx == mn) {
+        // achromatic
+        h = 0;
+        s = 0;
+        v = mx;
+      } else {
+        temp_t d = mx - mn;
+        if (mx == r) {
+          h = (g - b) / d + (g < b ? 6 : 0);
+        } else if (mx == g) {
+          h = (b - r) / d + 2;
+        } else {
+          h = (r - g) / d + 4;
+        }
+        h /= 6;
+        s = d / mx;
+        v = mx;
+      }
+
+      // set hsv
+#ifdef TH_REAL_IS_BYTE
+      h *= 255;
+      s *= 255;
+      v *= 255;
+#endif
+      THTensor_(set3d)(hsv, 0, y, x, image_(FromIntermediate)(h));
+      THTensor_(set3d)(hsv, 1, y, x, image_(FromIntermediate)(s));
+      THTensor_(set3d)(hsv, 2, y, x, image_(FromIntermediate)(v));
+    }
+  }
+  return 0;
+}
+
+/*
+ * Converts an HSV color value to RGB. Conversion formula
+ * adapted from http://en.wikipedia.org/wiki/HSV_color_space.
+ * Assumes h, s, and l are contained in the set [0, 1] and
+ * returns r, g, and b in the set [0, 1].
+ */
+int image_(Main_hsv2rgb)(lua_State *L) {
+  THTensor *hsv = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *rgb = luaT_checkudata(L, 2, torch_Tensor);
+
+  int y, x;
+  temp_t r, g, b, h, s, v;
+  for (y=0; y<hsv->size[1]; y++) {
+    for (x=0; x<hsv->size[2]; x++) {
+      // get hsv
+      h = THTensor_(get3d)(hsv, 0, y, x);
+      s = THTensor_(get3d)(hsv, 1, y, x);
+      v = THTensor_(get3d)(hsv, 2, y, x);
+#ifdef TH_REAL_IS_BYTE
+      h /= 255;
+      s /= 255;
+      v /= 255;
+#endif
+
+      int i = floor(h*6.);
+      temp_t f = h*6-i;
+      temp_t p = v*(1-s);
+      temp_t q = v*(1-f*s);
+      temp_t t = v*(1-(1-f)*s);
+
+      switch (i % 6) {
+      case 0: r = v, g = t, b = p; break;
+      case 1: r = q, g = v, b = p; break;
+      case 2: r = p, g = v, b = t; break;
+      case 3: r = p, g = q, b = v; break;
+      case 4: r = t, g = p, b = v; break;
+      case 5: r = v, g = p, b = q; break;
+      default: r=0; g = 0, b = 0; break;
+      }
+
+      // set rgb
+#ifdef TH_REAL_IS_BYTE
+      r *= 255;
+      g *= 255;
+      b *= 255;
+#endif
+      THTensor_(set3d)(rgb, 0, y, x, image_(FromIntermediate)(r));
+      THTensor_(set3d)(rgb, 1, y, x, image_(FromIntermediate)(g));
+      THTensor_(set3d)(rgb, 2, y, x, image_(FromIntermediate)(b));
+    }
+  }
+  return 0;
+}
+
+#ifndef TH_REAL_IS_BYTE
+/*
+ * Convert an sRGB color channel to a linear sRGB color channel.
+ */
+static inline real image_(gamma_expand_sRGB)(real nonlinear)
+{
+  return (nonlinear <= 0.04045) ? (nonlinear / 12.92)
+                                : (pow((nonlinear+0.055)/1.055, 2.4));
+}
+
+/*
+ * Convert a linear sRGB color channel to a sRGB color channel.
+ */
+static inline real image_(gamma_compress_sRGB)(real linear)
+{
+  return (linear <= 0.0031308) ? (12.92 * linear)
+                               : (1.055 * pow(linear, 1.0/2.4) - 0.055);
+}
+
+/*
+ * Converts an sRGB color value to LAB.
+ * Based on http://www.brucelindbloom.com/index.html?Equations.html.
+ * Assumes r, g, and b are contained in the set [0, 1].
+ * LAB output is NOT restricted to [0, 1]!
+ */
+int image_(Main_rgb2lab)(lua_State *L) {
+  THTensor *rgb = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *lab = luaT_checkudata(L, 2, torch_Tensor);
+
+  // CIE Standard
+  double epsilon = 216.0/24389.0;
+  double k = 24389.0/27.0;
+  // D65 white point
+  double xn = 0.950456;
+  double zn = 1.088754;
+
+  int y,x;
+  real r,g,b,l,a,_b;
+  for (y=0; y<rgb->size[1]; y++) {
+    for (x=0; x<rgb->size[2]; x++) {
+      // get RGB
+      r = image_(gamma_expand_sRGB)(THTensor_(get3d)(rgb, 0, y, x));
+      g = image_(gamma_expand_sRGB)(THTensor_(get3d)(rgb, 1, y, x));
+      b = image_(gamma_expand_sRGB)(THTensor_(get3d)(rgb, 2, y, x));
+
+      // sRGB to XYZ
+      double X = 0.412453 * r + 0.357580 * g + 0.180423 * b;
+      double Y = 0.212671 * r + 0.715160 * g + 0.072169 * b;
+      double Z = 0.019334 * r + 0.119193 * g + 0.950227 * b;
+
+      // normalize for D65 white point
+      X /= xn;
+      Z /= zn;
+
+      // XYZ normalized to CIE Lab
+      double fx = X > epsilon ? pow(X, 1/3.0) : (k * X + 16)/116;
+      double fy = Y > epsilon ? pow(Y, 1/3.0) : (k * Y + 16)/116;
+      double fz = Z > epsilon ? pow(Z, 1/3.0) : (k * Z + 16)/116;
+      l = 116 * fy - 16;
+      a = 500 * (fx - fy);
+      _b = 200 * (fy - fz);
+
+      // set lab
+      THTensor_(set3d)(lab, 0, y, x, l);
+      THTensor_(set3d)(lab, 1, y, x, a);
+      THTensor_(set3d)(lab, 2, y, x, _b);
+    }
+  }
+  return 0;
+}
+
+/*
+ * Converts an LAB color value to sRGB.
+ * Based on http://www.brucelindbloom.com/index.html?Equations.html.
+ * returns r, g, and b in the set [0, 1].
+ */
+int image_(Main_lab2rgb)(lua_State *L) {
+  THTensor *lab = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *rgb = luaT_checkudata(L, 2, torch_Tensor);
+
+  int y,x;
+  real r,g,b,l,a,_b;
+
+  // CIE Standard
+  double epsilon = 216.0/24389.0;
+  double k = 24389.0/27.0;
+  // D65 white point
+  double xn = 0.950456;
+  double zn = 1.088754;
+
+  for (y=0; y<lab->size[1]; y++) {
+    for (x=0; x<lab->size[2]; x++) {
+      // get lab
+      l = THTensor_(get3d)(lab, 0, y, x);
+      a = THTensor_(get3d)(lab, 1, y, x);
+      _b = THTensor_(get3d)(lab, 2, y, x);
+
+      // LAB to XYZ
+      double fy = (l + 16) / 116;
+      double fz = fy - _b / 200;
+      double fx = (a / 500) + fy;
+      double X = pow(fx, 3);
+      if (X <= epsilon)
+        X = (116 * fx - 16) / k;
+      double Y = l > (k * epsilon) ? pow((l + 16) / 116, 3) : l/k;
+      double Z = pow(fz, 3);
+      if (Z <= epsilon)
+        Z = (116 * fz - 16) / k;
+
+      X *= xn;
+      Z *= zn;
+
+      // XYZ to sRGB
+      r =  3.2404542 * X - 1.5371385 * Y - 0.4985314 * Z;
+      g = -0.9692660 * X + 1.8760108 * Y + 0.0415560 * Z;
+      b =  0.0556434 * X - 0.2040259 * Y + 1.0572252 * Z;
+
+      // set rgb
+      THTensor_(set3d)(rgb, 0, y, x, image_(gamma_compress_sRGB(r)));
+      THTensor_(set3d)(rgb, 1, y, x, image_(gamma_compress_sRGB(g)));
+      THTensor_(set3d)(rgb, 2, y, x, image_(gamma_compress_sRGB(b)));
+    }
+  }
+  return 0;
+}
+#else
+int image_(Main_rgb2lab)(lua_State *L) {
+  return luaL_error(L, "image.rgb2lab: not supported for torch.ByteTensor");
+}
+
+int image_(Main_lab2rgb)(lua_State *L) {
+  return luaL_error(L, "image.lab2rgb: not supported for torch.ByteTensor");
+}
+#endif // TH_REAL_IS_BYTE
+
+/* Vertically flip an image */
+int image_(Main_vflip)(lua_State *L) {
+  THTensor *dst = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *src = luaT_checkudata(L, 2, torch_Tensor);
+
+  int width = dst->size[2];
+  int height = dst->size[1];
+  int channels = dst->size[0];
+  long *is = src->stride;
+  long *os = dst->stride;
+
+  // get raw pointers
+  real *dst_data = THTensor_(data)(dst);
+  real *src_data = THTensor_(data)(src);
+
+  long k, x, y;
+  if (dst_data != src_data) {
+      /* not in-place.
+       * this branch could be removed by first duplicating the src into dst then doing inplace */
+#pragma omp parallel for private(k, x, y)
+      for(k=0; k<channels; k++) {
+          for (y=0; y<height; y++) {
+            for (x=0; x<width; x++) {
+                dst_data[ k*os[0] + (height-1-y)*os[1] + x*os[2] ] = src_data[ k*is[0] + y*is[1] + x*is[2] ];
+            }
+          }
+      }
+  } else {
+      /* in-place  */
+      real swap, * src_px,  * dst_px;
+      long half_height = height >> 1;
+      for(k=0; k<channels; k++) {
+          for (y=0; y < half_height; y++) {
+            for (x=0; x<width; x++) {
+                src_px = src_data + k*is[0] + y*is[1] + x*is[2];
+                dst_px =  dst_data + k*is[0] + (height-1-y)*is[1] + x*is[2];
+                swap = *dst_px;
+                *dst_px = *src_px;
+                *src_px = swap;
+            }
+          }
+      }
+  }
+
+  return 0;
+}
+
+
+/* Horizontally flip an image */
+int image_(Main_hflip)(lua_State *L) {
+  THTensor *dst = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *src = luaT_checkudata(L, 2, torch_Tensor);
+
+  int width = dst->size[2];
+  int height = dst->size[1];
+  int channels = dst->size[0];
+  long *is = src->stride;
+  long *os = dst->stride;
+
+  // get raw pointers
+  real *dst_data = THTensor_(data)(dst);
+  real *src_data = THTensor_(data)(src);
+
+  long k, x, y;
+  if (dst_data != src_data) {
+      /* not in-place.
+       * this branch could be removed by first duplicating the src into dst then doing inplace */
+#pragma omp parallel for private(k, x, y)
+      for(k=0; k<channels; k++) {
+          for (y=0; y<height; y++) {
+              for (x=0; x<width; x++) {
+                  dst_data[ k*os[0] + y*os[1] + (width-x-1)*os[2] ] = src_data[ k*is[0] + y*is[1] + x*is[2] ];
+              }
+          }
+      }
+  } else {
+      /* in-place  */
+      real swap, * src_px,  * dst_px;
+      long half_width = width >> 1;
+      for(k=0; k<channels; k++) {
+          for (y=0; y < height; y++) {
+            for (x=0; x<half_width; x++) {
+                src_px = src_data + k*is[0] + y*is[1] + x*is[2];
+                dst_px =  dst_data + k*is[0] + y*is[1] + (width-x-1)*is[2];
+                swap = *dst_px;
+                *dst_px = *src_px;
+                *src_px = swap;
+            }
+          }
+      }
+  }
+
+  return 0;
+}
+
+/* flip an image along a specified dimension */
+int image_(Main_flip)(lua_State *L) {
+  THTensor *dst = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *src = luaT_checkudata(L, 2, torch_Tensor);
+  long flip_dim = luaL_checklong(L, 3);
+
+  if ((dst->nDimension != 5) || (src->nDimension != 5)) {
+    luaL_error(L, "image.flip: expected 5 dimensions for src and dst");
+  }
+
+  if (flip_dim < 1 || flip_dim > dst->nDimension || flip_dim > 5) {
+    luaL_error(L, "image.flip: flip_dim out of bounds");
+  }
+  flip_dim--;  //  Make it zero indexed
+
+  // get raw pointers
+  real *dst_data = THTensor_(data)(dst);
+  real *src_data = THTensor_(data)(src);
+  if (dst_data == src_data) {
+    luaL_error(L, "image.flip: in-place flip not supported");
+  }
+
+  long size0 = dst->size[0];
+  long size1 = dst->size[1];
+  long size2 = dst->size[2];
+  long size3 = dst->size[3];
+  long size4 = dst->size[4];
+
+  if (src->size[0] != size0 || src->size[1] != size1 ||
+      src->size[2] != size2 || src->size[3] != size3 ||
+      src->size[4] != size4) {
+    luaL_error(L, "image.flip: src and dst are not the same size");
+  }
+
+  long *is = src->stride;
+  long *os = dst->stride;
+
+  long x, y, z, d, t, isrc, idst = 0;
+  for (t = 0; t < size0; t++) {
+    for (d = 0; d < size1; d++) {
+      for (z = 0; z < size2; z++) {
+        for (y = 0; y < size3; y++) {
+          for (x = 0; x < size4; x++) {
+            isrc = t*is[0] + d*is[1] + z*is[2] + y*is[3] + x*is[4];
+            // The big switch statement here looks ugly, however on my machine
+            // gcc compiles it to a skip list, so it should be fast.
+            switch (flip_dim) {
+              case 0:
+                idst = (size0 - t - 1)*os[0] + d*os[1] + z*os[2] + y*os[3] + x*os[4];
+                break;
+              case 1:
+                idst = t*os[0] + (size1 - d - 1)*os[1] + z*os[2] + y*os[3] + x*os[4];
+                break;
+              case 2:
+                idst = t*os[0] + d*os[1] + (size2 - z - 1)*os[2] + y*os[3] + x*os[4];
+                break;
+              case 3:
+                idst = t*os[0] + d*os[1] + z*os[2] + (size3 - y - 1)*os[3] + x*os[4];
+                break;
+              case 4:
+                idst = t*os[0] + d*os[1] + z*os[2] + y*os[3] + (size4 - x - 1)*os[4];
+                break;
+            }
+            dst_data[ idst ] = src_data[  isrc ];
+          }
+        }
+      }
+    }
+  }
+
+  return 0;
+}
+
+static inline void image_(Main_bicubicInterpolate)(
+  real* src, long* is, long* size, temp_t ix, temp_t iy,
+  real* dst, long *os,
+  real pad_value, int bounds_check)
+{
+  int i, j, k;
+  temp_t arr[4], p[4];
+
+  // Calculate fractional and integer components
+  long x_pix = floor(ix);
+  long y_pix = floor(iy);
+  temp_t dx = ix - x_pix;
+  temp_t dy = iy - y_pix;
+
+  for (k=0; k<size[0]; k++) {
+    #pragma unroll
+    for (i = 0; i < 4; i++) {
+      long v = y_pix + i - 1;
+      real* data = &src[k * is[0] + v * is[1]];
+
+      #pragma unroll
+      for (j = 0; j < 4; j++) {
+        long u = x_pix + j - 1;
+        if (bounds_check && (v < 0 || v >= size[1] || u < 0 || u >= size[2])) {
+          p[j] = pad_value;
+        } else {
+          p[j] = data[u * is[2]];
+        }
+      }
+
+      arr[i] = image_(Main_cubicInterpolate)(p[0], p[1], p[2], p[3], dx);
+    }
+
+    temp_t value = image_(Main_cubicInterpolate)(arr[0], arr[1], arr[2], arr[3], dy);
+    dst[k * os[0]] = image_(FromIntermediate)(value);
+  }
+}
+
+/*
+ * Warps an image, according to an (x,y) flow field. The flow
+ * field is in the space of the destination image, each vector
+ * ponts to a source pixel in the original image.
+ */
+int image_(Main_warp)(lua_State *L) {
+  THTensor *dst = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *src = luaT_checkudata(L, 2, torch_Tensor);
+  THTensor *flowfield = luaT_checkudata(L, 3, torch_Tensor);
+  int mode = lua_tointeger(L, 4);
+  int offset_mode = lua_toboolean(L, 5);
+  int clamp_mode = lua_tointeger(L, 6);
+  real pad_value = (real)lua_tonumber(L, 7);
+
+  // dims
+  int width = dst->size[2];
+  int height = dst->size[1];
+  int src_width = src->size[2];
+  int src_height = src->size[1];
+  int channels = dst->size[0];
+  long *is = src->stride;
+  long *os = dst->stride;
+  long *fs = flowfield->stride;
+
+  // get raw pointers
+  real *dst_data = THTensor_(data)(dst);
+  real *src_data = THTensor_(data)(src);
+  real *flow_data = THTensor_(data)(flowfield);
+
+  // resample
+  long k,x,y,v,u,i,j;
+#pragma omp parallel for private(k, x, y, v, u, i, j)
+  for (y=0; y<height; y++) {
+    for (x=0; x<width; x++) {
+      // subpixel position:
+      float flow_y = flow_data[ 0*fs[0] + y*fs[1] + x*fs[2] ];
+      float flow_x = flow_data[ 1*fs[0] + y*fs[1] + x*fs[2] ];
+      float iy = offset_mode*y + flow_y;
+      float ix = offset_mode*x + flow_x;
+
+      // borders
+      int off_image = 0;
+      if (iy < 0 || iy > src_height - 1 ||
+          ix < 0 || ix > src_width - 1) {
+        off_image = 1;
+      }
+
+      if (off_image == 1 && clamp_mode == 1) {
+        // We're off the image and we're clamping the input image to 0
+        for (k=0; k<channels; k++) {
+          dst_data[ k*os[0] + y*os[1] + x*os[2] ] = pad_value;
+        }
+      } else {
+        ix = MAX(ix,0); ix = MIN(ix,src_width-1);
+        iy = MAX(iy,0); iy = MIN(iy,src_height-1);
+
+        // bilinear?
+        switch (mode) {
+        case 1:  // Bilinear interpolation
+          {
+            // 4 nearest neighbors:
+            long ix_nw = floor(ix);
+            long iy_nw = floor(iy);
+            long ix_ne = ix_nw + 1;
+            long iy_ne = iy_nw;
+            long ix_sw = ix_nw;
+            long iy_sw = iy_nw + 1;
+            long ix_se = ix_nw + 1;
+            long iy_se = iy_nw + 1;
+
+            // get surfaces to each neighbor:
+            temp_t nw = (ix_se-ix)*(iy_se-iy);
+            temp_t ne = (ix-ix_sw)*(iy_sw-iy);
+            temp_t sw = (ix_ne-ix)*(iy-iy_ne);
+            temp_t se = (ix-ix_nw)*(iy-iy_nw);
+
+            // weighted sum of neighbors:
+            for (k=0; k<channels; k++) {
+              dst_data[ k*os[0] + y*os[1] + x*os[2] ] = image_(FromIntermediate)(
+                  src_data[ k*is[0] +               iy_nw*is[1] +              ix_nw*is[2] ] * nw
+                + src_data[ k*is[0] +               iy_ne*is[1] + MIN(ix_ne,src_width-1)*is[2] ] * ne
+                + src_data[ k*is[0] + MIN(iy_sw,src_height-1)*is[1] +              ix_sw*is[2] ] * sw
+                + src_data[ k*is[0] + MIN(iy_se,src_height-1)*is[1] + MIN(ix_se,src_width-1)*is[2] ] * se);
+            }
+          }
+          break;
+        case 0:  // Simple (i.e., nearest neighbor)
+          {
+            // 1 nearest neighbor:
+            long ix_n = floor(ix+0.5);
+            long iy_n = floor(iy+0.5);
+
+            // weighted sum of neighbors:
+            for (k=0; k<channels; k++) {
+              dst_data[ k*os[0] + y*os[1] + x*os[2] ] = src_data[ k*is[0] + iy_n*is[1] + ix_n*is[2] ];
+            }
+          }
+          break;
+        case 2:  // Bicubic
+          {
+            // We only need to do bounds checking if ix or iy are near the edge
+            int edge = !(iy >= 1 && iy < src_height - 2 && ix >= 1 && ix < src_width - 2);
+
+            real* dst = dst_data + y*os[1] + x*os[2];
+            if (edge) {
+              image_(Main_bicubicInterpolate)(src_data, is, src->size, ix, iy, dst, os, pad_value, 1);
+            } else {
+              image_(Main_bicubicInterpolate)(src_data, is, src->size, ix, iy, dst, os, pad_value, 0);
+            }
+          }
+          break;
+        case 3:  // Lanczos
+          {
+            // Note: Lanczos can be made fast if the resampling period is
+            // constant... and therefore the Lu, Lv can be cached and reused.
+            // However, unfortunately warp makes no assumptions about resampling
+            // and so we need to perform the O(k^2) convolution on each pixel AND
+            // we have to re-calculate the kernel for every pixel.
+            // See wikipedia for more info.
+            // It is however an extremely good approximation to to full sinc
+            // interpolation (IIR) filter.
+            // Another note is that the version here has been optimized using
+            // pretty aggressive code flow and explicit inlining.  It might not
+            // be very readable (contact me, Jonathan Tompson, if it is not)
+
+            // Calculate fractional and integer components
+            long x_pix = floor(ix);
+            long y_pix = floor(iy);
+
+            // Precalculate the L(x) function evaluations in the u and v direction
+            #define rad (3)  // This is a tunable parameter: 2 to 3 is OK
+            float Lu[2 * rad];  // L(x) for u direction
+            float Lv[2 * rad];  // L(x) for v direction
+            for (u=x_pix-rad+1, i=0; u<=x_pix+rad; u++, i++) {
+              float du = ix - (float)u;  // Lanczos kernel x value
+              du = du < 0 ? -du : du;  // prefer not to used std absf
+              if (du < 0.000001f) {  // TODO: Is there a real eps standard?
+                Lu[i] = 1;
+              } else if (du > (float)rad) {
+                Lu[i] = 0;
+              } else {
+                Lu[i] = ((float)rad * sin((float)M_PI * du) *
+                  sin((float)M_PI * du / (float)rad)) /
+                  ((float)(M_PI * M_PI) * du * du);
+              }
+            }
+            for (v=y_pix-rad+1, i=0; v<=y_pix+rad; v++, i++) {
+              float dv = iy - (float)v;  // Lanczos kernel x value
+              dv = dv < 0 ? -dv : dv;  // prefer not to used std absf
+              if (dv < 0.000001f) {  // TODO: Is there a real eps standard?
+                Lv[i] = 1;
+              } else if (dv > (float)rad) {
+                Lv[i] = 0;
+              } else {
+                Lv[i] = ((float)rad * sin((float)M_PI * dv) *
+                  sin((float)M_PI * dv / (float)rad)) /
+                  ((float)(M_PI * M_PI) * dv * dv);
+              }
+            }
+            float sum_weights = 0;
+            for (u=0; u<2*rad; u++) {
+              for (v=0; v<2*rad; v++) {
+                sum_weights += (Lu[u] * Lv[v]);
+              }
+            }
+
+            for (k=0; k<channels; k++) {
+              temp_t result = 0;
+              for (u=x_pix-rad+1, i=0; u<=x_pix+rad; u++, i++) {
+                long curu = MAX(MIN((long)(src_width-1), u), 0);
+                for (v=y_pix-rad+1, j=0; v<=y_pix+rad; v++, j++) {
+                  long curv = MAX(MIN((long)(src_height-1), v), 0);
+                  temp_t Suv = src_data[k * is[0] + curv * is[1] + curu * is[2]];
+
+                  temp_t weight = Lu[i] * Lv[j];
+                  result += (Suv * weight);
+                }
+              }
+              // Normalize by the sum of the weights
+              result = result / (float)sum_weights;
+
+              // Again,  I assume that since the image is stored as reals we
+              // don't have to worry about clamping to min and max int (to
+              // prevent over or underflow)
+              dst_data[ k*os[0] + y*os[1] + x*os[2] ] = image_(FromIntermediate)(result);
+            }
+          }
+          break;
+        }  // end switch (mode)
+      }  // end else
+    }
+  }
+
+  // done
+  return 0;
+}
+
+
+int image_(Main_gaussian)(lua_State *L) {
+  THTensor *dst = luaT_checkudata(L, 1, torch_Tensor);
+  long width = dst->size[1];
+  long height = dst->size[0];
+  long *os = dst->stride;
+
+  real *dst_data = THTensor_(data)(dst);
+
+  temp_t amplitude = (temp_t)lua_tonumber(L, 2);
+  int normalize = (int)lua_toboolean(L, 3);
+  temp_t sigma_u = (temp_t)lua_tonumber(L, 4);
+  temp_t sigma_v = (temp_t)lua_tonumber(L, 5);
+  temp_t mean_u = (temp_t)lua_tonumber(L, 6) * width + 0.5;
+  temp_t mean_v = (temp_t)lua_tonumber(L, 7) * height + 0.5;
+
+  // Precalculate 1/(sigma*size) for speed (for some stupid reason the pragma
+  // omp declaration prevents gcc from optimizing the inside loop on my macine:
+  // verified by checking the assembly output)
+  temp_t over_sigmau = 1.0 / (sigma_u * width);
+  temp_t over_sigmav = 1.0 / (sigma_v * height);
+
+  long v, u;
+  temp_t du, dv;
+#pragma omp parallel for private(v, u, du, dv)
+  for (v = 0; v < height; v++) {
+    for (u = 0; u < width; u++) {
+      du = (u + 1 - mean_u) * over_sigmau;
+      dv = (v + 1 - mean_v) * over_sigmav;
+      temp_t value = amplitude * exp(-0.5 * (du*du + dv*dv));
+      dst_data[ v*os[0] + u*os[1] ] = image_(FromIntermediate)(value);
+    }
+  }
+
+  if (normalize) {
+    temp_t sum = 0;
+    // We could parallelize this, but it's more trouble than it's worth
+    for(v = 0; v < height; v++) {
+      for(u = 0; u < width; u++) {
+        sum += dst_data[ v*os[0] + u*os[1] ];
+      }
+    }
+    temp_t one_over_sum = 1.0 / sum;
+#pragma omp parallel for private(v, u)
+    for(v = 0; v < height; v++) {
+      for(u = 0; u < width; u++) {
+        dst_data[ v*os[0] + u*os[1] ] *= one_over_sum;
+      }
+    }
+  }
+  return 0;
+}
+
+/*
+ * Borrowed from github.com/clementfarabet/lua---imgraph
+ * with Clément's permission for implementing y2jet()
+ */
+int image_(Main_colorize)(lua_State *L) {
+  // get args
+  THTensor *output = (THTensor *)luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *input = (THTensor *)luaT_checkudata(L, 2, torch_Tensor);
+  THTensor *colormap = (THTensor *)luaT_checkudata(L, 3, torch_Tensor);
+
+  // dims
+  long height = input->size[0];
+  long width = input->size[1];
+
+  // generate color map if not given
+  int noColorMap = THTensor_(nElement)(colormap) == 0;
+  if (noColorMap) {
+    THTensor_(resize2d)(colormap, width*height, 3);
+    THTensor_(fill)(colormap, -1);
+  }
+
+  // colormap channels
+  int channels = colormap->size[1];
+
+  // generate output
+  THTensor_(resize3d)(output, channels, height, width);
+  int x,y,k;
+  for (y = 0; y < height; y++) {
+    for (x = 0; x < width; x++) {
+      int id = THTensor_(get2d)(input, y, x);
+      if (noColorMap) {
+        for (k = 0; k < channels; k++) {
+          temp_t value = (float)rand() / (float)RAND_MAX;
+#ifdef TH_REAL_IS_BYTE
+          value *= 255;
+#endif
+          THTensor_(set2d)(colormap, id, k, image_(FromIntermediate)(value));
+        }
+      }
+      for (k = 0; k < channels; k++) {
+        real color = THTensor_(get2d)(colormap, id, k);
+        THTensor_(set3d)(output, k, y, x, color);
+      }
+    }
+  }
+
+  // return nothing
+  return 0;
+}
+
+int image_(Main_rgb2y)(lua_State *L) {
+  THTensor *rgb = luaT_checkudata(L, 1, torch_Tensor);
+  THTensor *yim = luaT_checkudata(L, 2, torch_Tensor);
+
+  luaL_argcheck(L, rgb->nDimension == 3, 1, "image.rgb2y: src not 3D");
+  luaL_argcheck(L, yim->nDimension == 2, 2, "image.rgb2y: dst not 2D");
+  luaL_argcheck(L, rgb->size[1] == yim->size[0], 2,
+                "image.rgb2y: src and dst not of same height");
+  luaL_argcheck(L, rgb->size[2] == yim->size[1], 2,
+                "image.rgb2y: src and dst not of same width");
+
+  int y, x;
+  temp_t r, g, b, yc;
+  const int height = rgb->size[1];
+  const int width = rgb->size[2];
+  for (y=0; y<height; y++) {
+    for (x=0; x<width; x++) {
+      // get Rgb
+      r = THTensor_(get3d)(rgb, 0, y, x);
+      g = THTensor_(get3d)(rgb, 1, y, x);
+      b = THTensor_(get3d)(rgb, 2, y, x);
+
+      yc = 0.299 * r + 0.587 * g + 0.114 * b;
+      THTensor_(set2d)(yim, y, x, image_(FromIntermediate)(yc));
+    }
+  }
+  return 0;
+}
+
+static inline void image_(drawPixel)(THTensor *output, int y, int x,
+                                     int cr, int cg, int cb) {
+#ifdef TH_REAL_IS_BYTE
+  THTensor_(set3d)(output, 0, y, x, cr);
+  THTensor_(set3d)(output, 1, y, x, cg);
+  THTensor_(set3d)(output, 2, y, x, cb);
+#else
+  THTensor_(set3d)(output, 0, y, x, cr / 255);
+  THTensor_(set3d)(output, 1, y, x, cg / 255);
+  THTensor_(set3d)(output, 2, y, x, cb / 255);
+#endif
+}
+static inline void image_(drawChar)(THTensor *output, int x, int y, unsigned char c, int size,
+                                    int cr, int cg, int cb,
+                                    int bg_cr, int bg_cg, int bg_cb) {
+  long channels = output->size[0];
+  long height = output->size[1];
+  long width  = output->size[2];
+
+  /* out of bounds condition, return without drawing */
+  if((x >= width)            || // Clip right
+     (y >= height)           || // Clip bottom
+     ((x + 6 * size - 1) < 0) || // Clip left
+     ((y + 8 * size - 1) < 0))   // Clip top
+    return;
+
+  for(char i = 0; i < 6; i++ ) {
+    unsigned char line;
+    if (i < 5) {
+      line = *(const unsigned char *)(image_ada_font+(c*5) + i);
+    } else {
+      line = 0x0;
+    }
+    for(char j = 0; j < 8; j++, line >>= 1) {
+      if(line & 0x1) {
+        if (size == 1) {
+          image_(drawPixel)(output, y+j, x+i, cr, cg, cb);
+        }
+        else {
+          for (int ii = x+(i*size); ii < x+(i*size) + size; ii++) {
+            for (int jj = y+(j*size); jj < y+(j*size) + size; jj++) {
+              image_(drawPixel)(output, jj, ii, cr, cg, cb);
+            }
+          }
+        }
+      } else if (bg_cr != -1 && bg_cg != -1 && bg_cb != -1) {
+        if (size == 1) {
+          image_(drawPixel)(output, y+j, x+i, bg_cr, bg_cg, bg_cb);
+        } else {
+          for (int ii = x+(i*size); ii < x+(i*size) + size; ii++) {
+            for (int jj = y+(j*size); jj < y+(j*size) + size; jj++) {
+              image_(drawPixel)(output, jj, ii, bg_cr, bg_cg, bg_cb);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+int image_(Main_drawtext)(lua_State *L) {
+  // get args
+  THTensor *output = (THTensor *)luaT_checkudata(L, 1, torch_Tensor);
+  const char* text = lua_tostring(L, 2);
+  long x = luaL_checklong(L, 3);
+  long y = luaL_checklong(L, 4);
+  int size = luaL_checkint(L, 5);
+  int cr = luaL_checkint(L, 6);
+  int cg = luaL_checkint(L, 7);
+  int cb = luaL_checkint(L, 8);
+  int bg_cr = luaL_checkint(L, 9);
+  int bg_cg = luaL_checkint(L, 10);
+  int bg_cb = luaL_checkint(L, 11);
+  int wrap = luaL_checkint(L, 12);
+
+  long len = strlen(text);
+
+  // dims
+  long channels = output->size[0];
+  long height = output->size[1];
+  long width  = output->size[2];
+
+  long cursor_y = y;
+  long cursor_x = x;
+
+  for (long cnt = 0; cnt < len; cnt++) {
+    unsigned char c = text[cnt];
+    if(c == '\n') {
+      cursor_y += size*8;
+      cursor_x  = x;
+    } else if(c == '\r') {
+      // skip em
+    } else {
+      if(wrap && ((cursor_x + size * 6) >= width)) { // Heading off edge?
+        cursor_x  = 0;            // Reset x to zero
+        cursor_y += size * 8; // Advance y one line
+      }
+      image_(drawChar)(output, cursor_x, cursor_y, c, size,
+                       cr, cg, cb,
+                       bg_cr, bg_cg, bg_cb);
+      cursor_x += size * 6;
+    }
+  }
+
+  return 0;
+}
+
+int image_(Main_drawRect)(lua_State *L) {
+  THTensor *output = (THTensor *)luaT_checkudata(L, 1, torch_Tensor);
+  long x1long = luaL_checklong(L, 2);
+  long y1long = luaL_checklong(L, 3);
+  long x2long = luaL_checklong(L, 4);
+  long y2long = luaL_checklong(L, 5);
+  int lineWidth = luaL_checkint(L, 6);
+  int cr = luaL_checkint(L, 7);
+  int cg = luaL_checkint(L, 8);
+  int cb = luaL_checkint(L, 9);
+
+  int offset = lineWidth / 2;
+  int x1 = (int) MAX(0, x1long - offset - 1);
+  int y1 = (int) MAX(0, y1long - offset - 1);
+  int x2 = (int) MIN(output->size[2] - 1, x2long - offset - 1);
+  int y2 = (int) MIN(output->size[1] - 1, y2long - offset - 1);
+
+  int w = x2 - x1 + 1;
+  int h = y2 - y1 + 1;
+  for (int y = y1; y < y2 + lineWidth; y++) {
+    for (int x = x1; x < x1 + lineWidth; x++) {
+      image_(drawPixel)(output, y, x, cr, cg, cb);
+    }
+    for (int x = x2; x < x2 + lineWidth; x++) {
+      image_(drawPixel)(output, y, x, cr, cg, cb);
+    }
+  }
+  for (int x = x1; x < x2 + lineWidth; x++) {
+    for (int y = y1; y < y1 + lineWidth; y++) {
+      image_(drawPixel)(output, y, x, cr, cg, cb);
+    }
+    for (int y = y2; y < y2 + lineWidth; y++) {
+      image_(drawPixel)(output, y, x, cr, cg, cb);
+    }
+  }
+
+  return 0;
+}
+
+
+static const struct luaL_Reg image_(Main__) [] = {
+  {"scaleSimple", image_(Main_scaleSimple)},
+  {"scaleBilinear", image_(Main_scaleBilinear)},
+  {"scaleBicubic", image_(Main_scaleBicubic)},
+  {"rotate", image_(Main_rotate)},
+  {"rotateBilinear", image_(Main_rotateBilinear)},
+  {"polar", image_(Main_polar)},
+  {"polarBilinear", image_(Main_polarBilinear)},
+  {"logPolar", image_(Main_logPolar)},
+  {"logPolarBilinear", image_(Main_logPolarBilinear)},
+  {"translate", image_(Main_translate)},
+  {"cropNoScale", image_(Main_cropNoScale)},
+  {"warp", image_(Main_warp)},
+  {"saturate", image_(Main_saturate)},
+  {"rgb2y",   image_(Main_rgb2y)},
+  {"rgb2hsv", image_(Main_rgb2hsv)},
+  {"rgb2hsl", image_(Main_rgb2hsl)},
+  {"hsv2rgb", image_(Main_hsv2rgb)},
+  {"hsl2rgb", image_(Main_hsl2rgb)},
+  {"rgb2lab", image_(Main_rgb2lab)},
+  {"lab2rgb", image_(Main_lab2rgb)},
+  {"gaussian", image_(Main_gaussian)},
+  {"vflip", image_(Main_vflip)},
+  {"hflip", image_(Main_hflip)},
+  {"flip", image_(Main_flip)},
+  {"colorize", image_(Main_colorize)},
+  {"text", image_(Main_drawtext)},
+  {"drawRect", image_(Main_drawRect)},
+  {NULL, NULL}
+};
+
+void image_(Main_init)(lua_State *L)
+{
+  luaT_pushmetatable(L, torch_Tensor);
+  luaT_registeratname(L, image_(Main__), "image");
+}
+
+#endif // TH_GENERIC_FILE
diff --git a/generic/jpeg.c b/generic/jpeg.c
new file mode 100755
index 0000000..1fd1e70
--- /dev/null
+++ b/generic/jpeg.c
@@ -0,0 +1,527 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/jpeg.c"
+#else
+
+/******************** JPEG DECOMPRESSION SAMPLE INTERFACE *******************/
+
+/* This half of the example shows how to read data from the JPEG decompressor.
+ * It's a bit more refined than the above, in that we show:
+ *   (a) how to modify the JPEG library's standard error-reporting behavior;
+ *   (b) how to allocate workspace using the library's memory manager.
+ *
+ * Just to make this example a little different from the first one, we'll
+ * assume that we do not intend to put the whole image into an in-memory
+ * buffer, but to send it line-by-line someplace else.  We need a one-
+ * scanline-high JSAMPLE array as a work buffer, and we will let the JPEG
+ * memory manager allocate it for us.  This approach is actually quite useful
+ * because we don't need to remember to deallocate the buffer separately: it
+ * will go away automatically when the JPEG object is cleaned up.
+ */
+
+
+/*
+ * ERROR HANDLING:
+ *
+ * The JPEG library's standard error handler (jerror.c) is divided into
+ * several "methods" which you can override individually.  This lets you
+ * adjust the behavior without duplicating a lot of code, which you might
+ * have to update with each future release.
+ *
+ * Our example here shows how to override the "error_exit" method so that
+ * control is returned to the library's caller when a fatal error occurs,
+ * rather than calling exit() as the standard error_exit method does.
+ *
+ * We use C's setjmp/longjmp facility to return control.  This means that the
+ * routine which calls the JPEG library must first execute a setjmp() call to
+ * establish the return point.  We want the replacement error_exit to do a
+ * longjmp().  But we need to make the setjmp buffer accessible to the
+ * error_exit routine.  To do this, we make a private extension of the
+ * standard JPEG error handler object.  (If we were using C++, we'd say we
+ * were making a subclass of the regular error handler.)
+ *
+ * Here's the extended error handler struct:
+ */
+
+#ifndef _LIBJPEG_ERROR_STRUCTS_
+#define _LIBJPEG_ERROR_STRUCTS_
+struct my_error_mgr {
+  struct jpeg_error_mgr pub;	/* "public" fields */
+
+  jmp_buf setjmp_buffer;	/* for return to caller */
+
+  char msg[JMSG_LENGTH_MAX]; /* last error message */
+};
+
+typedef struct my_error_mgr * my_error_ptr;
+#endif
+
+/*
+ * Here's the routine that will replace the standard error_exit method:
+ */
+
+METHODDEF(void)
+libjpeg_(Main_error) (j_common_ptr cinfo)
+{
+  /* cinfo->err really points to a my_error_mgr struct, so coerce pointer */
+  my_error_ptr myerr = (my_error_ptr) cinfo->err;
+
+  /* See below. */
+  (*cinfo->err->output_message) (cinfo);
+
+  /* Return control to the setjmp point */
+  longjmp(myerr->setjmp_buffer, 1);
+}
+
+/*
+ * Here's the routine that will replace the standard output_message method:
+ */
+
+METHODDEF(void)
+libjpeg_(Main_output_message) (j_common_ptr cinfo)
+{
+  my_error_ptr myerr = (my_error_ptr) cinfo->err;
+
+  (*cinfo->err->format_message) (cinfo, myerr->msg);
+}
+
+
+/*
+ * Sample routine for JPEG decompression.  We assume that the source file name
+ * is passed in.  We want to return 1 on success, 0 on error.
+ */
+
+
+static int libjpeg_(Main_size)(lua_State *L)
+{
+  /* This struct contains the JPEG decompression parameters and pointers to
+   * working space (which is allocated as needed by the JPEG library).
+   */
+  struct jpeg_decompress_struct cinfo;
+  /* We use our private extension JPEG error handler.
+   * Note that this struct must live as long as the main JPEG parameter
+   * struct, to avoid dangling-pointer problems.
+   */
+  struct my_error_mgr jerr;
+  /* More stuff */
+  FILE * infile;		/* source file */
+
+  const char *filename = luaL_checkstring(L, 1);
+
+  /* In this example we want to open the input file before doing anything else,
+   * so that the setjmp() error recovery below can assume the file is open.
+   * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
+   * requires it in order to read binary files.
+   */
+
+  if ((infile = fopen(filename, "rb")) == NULL)
+  {
+    luaL_error(L, "cannot open file <%s> for reading", filename);
+  }
+
+  /* Step 1: allocate and initialize JPEG decompression object */
+
+  /* We set up the normal JPEG error routines, then override error_exit. */
+  cinfo.err = jpeg_std_error(&jerr.pub);
+  jerr.pub.error_exit = libjpeg_(Main_error);
+  jerr.pub.output_message = libjpeg_(Main_output_message);
+  /* Establish the setjmp return context for my_error_exit to use. */
+  if (setjmp(jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error.
+     * We need to clean up the JPEG object, close the input file, and return.
+     */
+    jpeg_destroy_decompress(&cinfo);
+    fclose(infile);
+    luaL_error(L, jerr.msg);
+  }
+
+  /* Now we can initialize the JPEG decompression object. */
+  jpeg_create_decompress(&cinfo);
+
+  /* Step 2: specify data source (eg, a file) */
+
+  jpeg_stdio_src(&cinfo, infile);
+
+  /* Step 3: read file parameters with jpeg_read_header() */
+
+  jpeg_read_header(&cinfo, TRUE);
+  /* We can ignore the return value from jpeg_read_header since
+   *   (a) suspension is not possible with the stdio data source, and
+   *   (b) we passed TRUE to reject a tables-only JPEG file as an error.
+   * See libjpeg.doc for more info.
+   */
+
+  /* Step 4: set parameters for decompression */
+
+  /* In this example, we don't need to change any of the defaults set by
+   * jpeg_read_header(), so we do nothing here.
+   */
+
+  /* Step 5: Start decompressor */
+
+  (void) jpeg_start_decompress(&cinfo);
+  /* We can ignore the return value since suspension is not possible
+   * with the stdio data source.
+   */
+
+  lua_pushnumber(L, cinfo.output_components);
+  lua_pushnumber(L, cinfo.output_height);
+  lua_pushnumber(L, cinfo.output_width);
+
+  /* Step 8: Release JPEG decompression object */
+
+  /* This is an important step since it will release a good deal of memory. */
+  jpeg_destroy_decompress(&cinfo);
+
+  /* After finish_decompress, we can close the input file.
+   * Here we postpone it until after no more JPEG errors are possible,
+   * so as to simplify the setjmp error logic above.  (Actually, I don't
+   * think that jpeg_destroy can do an error exit, but why assume anything...)
+   */
+  fclose(infile);
+
+  /* At this point you may want to check to see whether any corrupt-data
+   * warnings occurred (test whether jerr.pub.num_warnings is nonzero).
+   */
+
+  /* And we're done! */
+  return 3;
+}
+
+static int libjpeg_(Main_load)(lua_State *L)
+{
+  const int load_from_file = luaL_checkint(L, 1);
+
+#if !defined(HAVE_JPEG_MEM_SRC)
+  if (load_from_file != 1) {
+    luaL_error(L, JPEG_MEM_SRC_ERR_MSG);
+  }
+#endif
+
+  /* This struct contains the JPEG decompression parameters and pointers to
+   * working space (which is allocated as needed by the JPEG library).
+   */
+  struct jpeg_decompress_struct cinfo;
+  /* We use our private extension JPEG error handler.
+   * Note that this struct must live as long as the main JPEG parameter
+   * struct, to avoid dangling-pointer problems.
+   */
+  struct my_error_mgr jerr;
+  /* More stuff */
+  FILE * infile;		    /* source file (if loading from file) */
+  unsigned char * inmem;    /* source memory (if loading from memory) */
+  unsigned long inmem_size; /* source memory size (bytes) */
+  JSAMPARRAY buffer;		/* Output row buffer */
+  /* int row_stride;		/1* physical row width in output buffer *1/ */
+  int i, k;
+
+  THTensor *tensor = NULL;
+
+  if (load_from_file == 1) {
+    const char *filename = luaL_checkstring(L, 2);
+
+    /* In this example we want to open the input file before doing anything else,
+     * so that the setjmp() error recovery below can assume the file is open.
+     * VERY IMPORTANT: use "b" option to fopen() if you are on a machine that
+     * requires it in order to read binary files.
+     */
+
+    if ((infile = fopen(filename, "rb")) == NULL)
+    {
+      luaL_error(L, "cannot open file <%s> for reading", filename);
+    }
+  } else {
+    /* We're loading from a ByteTensor */
+    THByteTensor *src = luaT_checkudata(L, 2, "torch.ByteTensor");
+    inmem = THByteTensor_data(src);
+    inmem_size = src->size[0];
+    infile = NULL;
+  }
+
+  /* Step 1: allocate and initialize JPEG decompression object */
+
+  /* We set up the normal JPEG error routines, then override error_exit. */
+  cinfo.err = jpeg_std_error(&jerr.pub);
+  jerr.pub.error_exit = libjpeg_(Main_error);
+  jerr.pub.output_message = libjpeg_(Main_output_message);
+  /* Establish the setjmp return context for my_error_exit to use. */
+  if (setjmp(jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error.
+     * We need to clean up the JPEG object, close the input file, and return.
+     */
+    jpeg_destroy_decompress(&cinfo);
+    if (infile) {
+      fclose(infile);
+    }
+    luaL_error(L, jerr.msg);
+  }
+  /* Now we can initialize the JPEG decompression object. */
+  jpeg_create_decompress(&cinfo);
+
+  /* Step 2: specify data source (eg, a file) */
+  if (load_from_file == 1) {
+    jpeg_stdio_src(&cinfo, infile);
+  } else {
+    jpeg_mem_src(&cinfo, inmem, inmem_size);
+  }
+
+  /* Step 3: read file parameters with jpeg_read_header() */
+
+  (void) jpeg_read_header(&cinfo, TRUE);
+  /* We can ignore the return value from jpeg_read_header since
+   *   (a) suspension is not possible with the stdio data source, and
+   *   (b) we passed TRUE to reject a tables-only JPEG file as an error.
+   * See libjpeg.doc for more info.
+   */
+
+  /* Step 4: set parameters for decompression */
+
+  /* In this example, we don't need to change any of the defaults set by
+   * jpeg_read_header(), so we do nothing here.
+   */
+
+  /* Step 5: Start decompressor */
+
+  (void) jpeg_start_decompress(&cinfo);
+  /* We can ignore the return value since suspension is not possible
+   * with the stdio data source.
+   */
+
+  /* We may need to do some setup of our own at this point before reading
+   * the data.  After jpeg_start_decompress() we have the correct scaled
+   * output image dimensions available, as well as the output colormap
+   * if we asked for color quantization.
+   * In this example, we need to make an output work buffer of the right size.
+   */
+
+  /* Make a one-row-high sample array that will go away when done with image */
+  const unsigned int chans = cinfo.output_components;
+  const unsigned int height = cinfo.output_height;
+  const unsigned int width = cinfo.output_width;
+  tensor = THTensor_(newWithSize3d)(chans, height, width);
+  real *tdata = THTensor_(data)(tensor);
+  buffer = (*cinfo.mem->alloc_sarray)
+    ((j_common_ptr) &cinfo, JPOOL_IMAGE, chans * width, 1);
+
+  /* Step 6: while (scan lines remain to be read) */
+  /*           jpeg_read_scanlines(...); */
+
+  /* Here we use the library's state variable cinfo.output_scanline as the
+   * loop counter, so that we don't have to keep track ourselves.
+   */
+  while (cinfo.output_scanline < height) {
+    /* jpeg_read_scanlines expects an array of pointers to scanlines.
+     * Here the array is only one element long, but you could ask for
+     * more than one scanline at a time if that's more convenient.
+     */
+    (void) jpeg_read_scanlines(&cinfo, buffer, 1);
+    const unsigned int j = cinfo.output_scanline-1;
+
+    if (chans == 3) { /* special-case for speed */
+      real *td1 = tdata + 0 * (height * width) + j * width;
+      real *td2 = tdata + 1 * (height * width) + j * width;
+      real *td3 = tdata + 2 * (height * width) + j * width;
+      const unsigned char *buf = buffer[0];
+      for(i = 0; i < width; i++) {
+        *td1++ = (real)buf[chans * i + 0];
+        *td2++ = (real)buf[chans * i + 1];
+        *td3++ = (real)buf[chans * i + 2];
+      }
+    } else if (chans == 1) { /* special-case for speed */
+      real *td = tdata + j * width;
+      for(i = 0; i < width; i++) {
+        *td++ = (real)buffer[0][i];
+      }
+    } else { /* general case */
+      for(k = 0; k < chans; k++) {
+        const unsigned int k_ = k;
+        real *td = tdata + k_ * (height * width) + j * width;
+        for(i = 0; i < width; i++) {
+          *td++ = (real)buffer[0][chans * i + k_];
+        }
+      }
+    }
+  }
+  /* Step 7: Finish decompression */
+
+  (void) jpeg_finish_decompress(&cinfo);
+  /* We can ignore the return value since suspension is not possible
+   * with the stdio data source.
+   */
+
+  /* Step 8: Release JPEG decompression object */
+
+  /* This is an important step since it will release a good deal of memory. */
+  jpeg_destroy_decompress(&cinfo);
+
+  /* After finish_decompress, we can close the input file.
+   * Here we postpone it until after no more JPEG errors are possible,
+   * so as to simplify the setjmp error logic above.  (Actually, I don't
+   * think that jpeg_destroy can do an error exit, but why assume anything...)
+   */
+  if (infile) {
+    fclose(infile);
+  }
+
+  /* At this point you may want to check to see whether any corrupt-data
+   * warnings occurred (test whether jerr.pub.num_warnings is nonzero).
+   */
+
+  /* And we're done! */
+  luaT_pushudata(L, tensor, torch_Tensor);
+  return 1;
+}
+
+/*
+ * save function
+ *
+ */
+int libjpeg_(Main_save)(lua_State *L) {
+  const int save_to_file = luaL_checkint(L, 3);
+
+#if !defined(HAVE_JPEG_MEM_DEST)
+  if (save_to_file != 1) {
+    luaL_error(L, JPEG_MEM_DEST_ERR_MSG);
+  }
+#endif
+
+  unsigned char *inmem = NULL;  /* destination memory (if saving to memory) */
+  unsigned long inmem_size = 0;  /* destination memory size (bytes) */
+
+  /* get args */
+  const char *filename = luaL_checkstring(L, 1);
+  THTensor *tensor = luaT_checkudata(L, 2, torch_Tensor);
+  THTensor *tensorc = THTensor_(newContiguous)(tensor);
+  real *tensor_data = THTensor_(data)(tensorc);
+
+  THByteTensor* tensor_dest = NULL;
+  if (save_to_file == 0) {
+    tensor_dest = luaT_checkudata(L, 5, "torch.ByteTensor");
+  }
+
+  int quality = luaL_checkint(L, 4);
+  if (quality < 0 || quality > 100) {
+    luaL_error(L, "quality should be between 0 and 100");
+  }
+
+  /* jpeg struct */
+  struct jpeg_compress_struct cinfo;
+  struct jpeg_error_mgr jerr;
+
+  /* pointer to raw image */
+  unsigned char *raw_image = NULL;
+
+  /* dimensions of the image we want to write */
+  int width=0, height=0, bytes_per_pixel=0;
+  int color_space=0;
+  if (tensorc->nDimension == 3) {
+    bytes_per_pixel = tensorc->size[0];
+    height = tensorc->size[1];
+    width = tensorc->size[2];
+    if (bytes_per_pixel == 3) {
+      color_space = JCS_RGB;
+    } else if (bytes_per_pixel == 1) {
+      color_space = JCS_GRAYSCALE;
+    } else {
+      luaL_error(L, "tensor should have 1 or 3 channels (gray or RGB)");
+    }
+  } else if (tensorc->nDimension == 2) {
+    bytes_per_pixel = 1;
+    height = tensorc->size[0];
+    width = tensorc->size[1];
+    color_space = JCS_GRAYSCALE;
+  } else {
+    luaL_error(L, "supports only 1 or 3 dimension tensors");
+  }
+
+  /* alloc raw image data */
+  raw_image = (unsigned char *)malloc((sizeof (unsigned char))*width*height*bytes_per_pixel);
+
+  /* convert tensor to raw bytes */
+  int x,y,k;
+  for (k=0; k<bytes_per_pixel; k++) {
+    for (y=0; y<height; y++) {
+      for (x=0; x<width; x++) {
+        raw_image[(y*width+x)*bytes_per_pixel+k] = *tensor_data++;
+      }
+    }
+  }
+
+  /* this is a pointer to one row of image data */
+  JSAMPROW row_pointer[1];
+  FILE *outfile = NULL;
+  if (save_to_file == 1) {
+    outfile = fopen( filename, "wb" );
+    if ( !outfile ) {
+      luaL_error(L, "Error opening output jpeg file %s\n!", filename );
+    }
+  }
+
+  cinfo.err = jpeg_std_error( &jerr );
+  jpeg_create_compress(&cinfo);
+
+  /* specify data source (eg, a file) */
+  if (save_to_file == 1) {
+    jpeg_stdio_dest(&cinfo, outfile);
+  } else {
+    jpeg_mem_dest(&cinfo, &inmem, &inmem_size);
+  }
+
+  /* Setting the parameters of the output file here */
+  cinfo.image_width = width;
+  cinfo.image_height = height;
+  cinfo.input_components = bytes_per_pixel;
+  cinfo.in_color_space = color_space;
+
+  /* default compression parameters, we shouldn't be worried about these */
+  jpeg_set_defaults( &cinfo );
+  jpeg_set_quality(&cinfo, quality, (boolean)0);
+
+  /* Now do the compression .. */
+  jpeg_start_compress( &cinfo, TRUE );
+
+  /* like reading a file, this time write one row at a time */
+  while( cinfo.next_scanline < cinfo.image_height ) {
+    row_pointer[0] = &raw_image[ cinfo.next_scanline * cinfo.image_width *  cinfo.input_components];
+    jpeg_write_scanlines( &cinfo, row_pointer, 1 );
+  }
+
+  /* similar to read file, clean up after we're done compressing */
+  jpeg_finish_compress( &cinfo );
+  jpeg_destroy_compress( &cinfo );
+
+  if (outfile != NULL) {
+    fclose( outfile );
+  }
+
+  if (save_to_file == 0) {
+
+    THByteTensor_resize1d(tensor_dest, inmem_size);  /* will fail if it's not a Byte Tensor */
+    unsigned char* tensor_dest_data = THByteTensor_data(tensor_dest);
+    memcpy(tensor_dest_data, inmem, inmem_size);
+    free(inmem);
+  }
+
+  /* some cleanup */
+  free(raw_image);
+  THTensor_(free)(tensorc);
+
+  /* success code is 1! */
+  return 1;
+}
+
+static const luaL_Reg libjpeg_(Main__)[] =
+{
+  {"size", libjpeg_(Main_size)},
+  {"load", libjpeg_(Main_load)},
+  {"save", libjpeg_(Main_save)},
+  {NULL, NULL}
+};
+
+DLL_EXPORT int libjpeg_(Main_init)(lua_State *L)
+{
+  luaT_pushmetatable(L, torch_Tensor);
+  luaT_registeratname(L, libjpeg_(Main__), "libjpeg");
+  return 1;
+}
+
+#endif
diff --git a/generic/png.c b/generic/png.c
new file mode 100755
index 0000000..5613236
--- /dev/null
+++ b/generic/png.c
@@ -0,0 +1,400 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/png.c"
+#else
+
+/*
+ * Copyright 2002-2010 Guillaume Cottenceau.
+ *
+ * This software may be freely redistributed under the terms
+ * of the X11 license.
+ *
+ * Clement: modified for Torch7.
+ */
+
+static int libpng_(Main_load)(lua_State *L)
+{
+
+  png_byte header[8];    // 8 is the maximum size that can be checked
+
+  int width, height, bit_depth;
+  png_byte color_type;
+  
+  png_structp png_ptr;
+  png_infop info_ptr;
+  png_bytep * row_pointers;
+  size_t fread_ret;
+  FILE* fp;
+  libpng_inmem_buffer inmem = {0};    /* source memory (if loading from memory) */
+  libpng_errmsg errmsg;
+
+  const int load_from_file = luaL_checkint(L, 1);
+
+  if (load_from_file == 1){
+    const char *file_name = luaL_checkstring(L, 2);
+   /* open file and test for it being a png */
+    fp = fopen(file_name, "rb");
+    if (!fp)
+      luaL_error(L, "[read_png_file] File %s could not be opened for reading", file_name);
+    fread_ret = fread(header, 1, 8, fp);
+    if (fread_ret != 8)
+      luaL_error(L, "[read_png_file] File %s error reading header", file_name);
+    if (png_sig_cmp(header, 0, 8))
+      luaL_error(L, "[read_png_file] File %s is not recognized as a PNG file", file_name);
+  } else {
+    /* We're loading from a ByteTensor */
+    THByteTensor *src = luaT_checkudata(L, 2, "torch.ByteTensor");
+    inmem.buffer = THByteTensor_data(src);
+    inmem.length = src->size[0];
+    inmem.offset = 8;
+    fp = NULL;
+    if (png_sig_cmp(inmem.buffer, 0, 8))
+      luaL_error(L, "[read_png_byte_tensor] ByteTensor is not recognized as a PNG file");
+  }
+  /* initialize stuff */
+  png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+  if (!png_ptr)
+    luaL_error(L, "[read_png] png_create_read_struct failed");
+
+  png_set_error_fn(png_ptr, &errmsg, libpng_error_fn, NULL);
+
+  info_ptr = png_create_info_struct(png_ptr);
+  if (!info_ptr) {
+    png_destroy_read_struct(&png_ptr, NULL, NULL);
+    if (fp) {
+      fclose(fp);
+    }
+    luaL_error(L, "[read_png] png_create_info_struct failed");
+  }
+
+  if (setjmp(png_jmpbuf(png_ptr))) {
+    png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+    if (fp) {
+      fclose(fp);
+    }
+    luaL_error(L, "[read_png] Error during init_io: %s", errmsg.str);
+  }
+
+  if (load_from_file == 1) {
+    png_init_io(png_ptr, fp);
+  } else {
+    /* set the read callback */
+    png_set_read_fn(png_ptr,(png_voidp)&inmem, libpng_userReadData);
+  }
+  png_set_sig_bytes(png_ptr, 8);
+  png_read_info(png_ptr, info_ptr);
+
+  width      = png_get_image_width(png_ptr, info_ptr);
+  height     = png_get_image_height(png_ptr, info_ptr);
+  color_type = png_get_color_type(png_ptr, info_ptr);
+  bit_depth  = png_get_bit_depth(png_ptr, info_ptr);
+
+  /* get depth */
+  int depth = 0;
+  if (color_type == PNG_COLOR_TYPE_RGBA) {
+    depth = 4;
+  } else if (color_type == PNG_COLOR_TYPE_RGB) {
+    depth = 3;
+  } else if (color_type == PNG_COLOR_TYPE_GRAY) {
+    if (bit_depth < 8) {
+      png_set_expand_gray_1_2_4_to_8(png_ptr);
+    }
+    depth = 1;
+  } else if (color_type == PNG_COLOR_TYPE_GA) {
+    depth = 2;
+  } else if (color_type == PNG_COLOR_TYPE_PALETTE) {
+    depth = 3;
+    png_set_expand(png_ptr);
+  } else {
+    png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+    if (fp) {
+      fclose(fp);
+    }
+    luaL_error(L, "[read_png_file] Unknown color space");
+  }
+
+  if (bit_depth < 8) {
+    png_set_strip_16(png_ptr);
+  }
+
+  png_read_update_info(png_ptr, info_ptr);
+
+  /* read file */
+  if (setjmp(png_jmpbuf(png_ptr))) {
+    png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+    if (fp) {
+      fclose(fp);
+    }
+    luaL_error(L, "[read_png_file] Error during read_image: %s", errmsg.str);
+  }
+
+  /* alloc tensor */
+  THTensor *tensor = THTensor_(newWithSize3d)(depth, height, width);
+  real *tensor_data = THTensor_(data)(tensor);
+
+  /* alloc data in lib format */
+  row_pointers = (png_bytep*) malloc(sizeof(png_bytep) * height);
+  int y;
+  for (y=0; y<height; y++)
+    row_pointers[y] = (png_byte*) malloc(png_get_rowbytes(png_ptr,info_ptr));
+
+  /* read image in */
+  png_read_image(png_ptr, row_pointers);
+
+  /* convert image to dest tensor */
+  int x,k;
+  if ((bit_depth == 16) && (sizeof(real) > 1)) {
+    for (k=0; k<depth; k++) {
+      for (y=0; y<height; y++) {
+	png_byte* row = row_pointers[y];
+	for (x=0; x<width; x++) {
+	  // PNG is big-endian
+	  int val = ((int)row[(x*depth+k)*2] << 8) + row[(x*depth+k)*2+1];
+	  *tensor_data++ = (real)val;
+	}
+      }
+    }
+  } else {
+    int stride = 1;
+    if (bit_depth == 16) {
+      /* PNG has 16 bit color depth, but the tensor type is byte. */
+      stride = 2;
+    }
+    for (k=0; k<depth; k++) {
+      for (y=0; y<height; y++) {
+	png_byte* row = row_pointers[y];
+	for (x=0; x<width; x++) {
+	  *tensor_data++ = (real)row[(x*depth+k)*stride];
+	  //png_byte val = row[x*depth+k];
+	  //THTensor_(set3d)(tensor, k, y, x, (real)val);
+	}
+      }
+    }
+  }
+
+
+  /* cleanup heap allocation */
+  for (y=0; y<height; y++)
+    free(row_pointers[y]);
+  free(row_pointers);
+
+  /* cleanup png structs */
+  png_read_end(png_ptr, NULL);
+  png_destroy_read_struct(&png_ptr, &info_ptr, NULL);
+
+  /* done with file */
+  if (fp) {
+    fclose(fp);
+  }
+
+  /* return tensor */
+  luaT_pushudata(L, tensor, torch_Tensor);
+
+  if (bit_depth < 8) {
+    bit_depth = 8;
+  }
+  lua_pushnumber(L, bit_depth);
+
+  return 2;
+}
+
+static int libpng_(Main_save)(lua_State *L)
+{
+  THTensor *tensor = luaT_checkudata(L, 2, torch_Tensor);
+  const char *file_name = luaL_checkstring(L, 1);
+
+  int width=0, height=0;
+  png_byte color_type = 0;
+  png_byte bit_depth = 8;
+
+  png_structp png_ptr;
+  png_infop info_ptr;
+  png_bytep * row_pointers;
+  libpng_errmsg errmsg;
+
+  /* get dims and contiguous tensor */
+  THTensor *tensorc = THTensor_(newContiguous)(tensor);
+  real *tensor_data = THTensor_(data)(tensorc);
+  long depth=0;
+  if (tensorc->nDimension == 3) {
+    depth = tensorc->size[0];
+    height = tensorc->size[1];
+    width = tensorc->size[2];
+  } else if (tensorc->nDimension == 2) {
+    depth = 1;
+    height = tensorc->size[0];
+    width = tensorc->size[1];    
+  }
+
+  /* depth check */
+  if ((depth != 1) && (depth != 3) && (depth != 4)) {
+    luaL_error(L, "[write_png_file] Depth must be 1, 3 or 4");
+  }
+  if (depth == 4) color_type = PNG_COLOR_TYPE_RGBA;
+  else if (depth == 3) color_type = PNG_COLOR_TYPE_RGB;
+  else if (depth == 1) color_type = PNG_COLOR_TYPE_GRAY;
+
+  /* create file */
+  FILE *fp = fopen(file_name, "wb");
+  if (!fp)
+    luaL_error(L, "[write_png_file] File %s could not be opened for writing", file_name);
+
+  /* initialize stuff */
+  png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+
+  if (!png_ptr)
+    luaL_error(L, "[write_png_file] png_create_write_struct failed");
+
+  png_set_error_fn(png_ptr, &errmsg, libpng_error_fn, NULL);
+
+  info_ptr = png_create_info_struct(png_ptr);
+  if (!info_ptr)
+    luaL_error(L, "[write_png_file] png_create_info_struct failed");
+
+  if (setjmp(png_jmpbuf(png_ptr)))
+    luaL_error(L, "[write_png_file] Error during init_io: %s", errmsg.str);
+
+  png_init_io(png_ptr, fp);
+
+  /* write header */
+  if (setjmp(png_jmpbuf(png_ptr)))
+    luaL_error(L, "[write_png_file] Error during writing header: %s", errmsg.str);
+
+  png_set_IHDR(png_ptr, info_ptr, width, height,
+         bit_depth, color_type, PNG_INTERLACE_NONE,
+         PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
+
+  png_write_info(png_ptr, info_ptr);
+
+  /* convert tensor to 8bit bytes */
+  row_pointers = (png_bytep*) malloc(sizeof(png_bytep) * height);
+  int y;
+  for (y=0; y<height; y++)
+    row_pointers[y] = (png_byte*) malloc(png_get_rowbytes(png_ptr,info_ptr));
+
+  /* convert image to dest tensor */
+  int x,k;
+  for (k=0; k<depth; k++) {
+    for (y=0; y<height; y++) {
+      png_byte* row = row_pointers[y];
+      for (x=0; x<width; x++) {
+        //row[x*depth+k] = (png_byte)THTensor_(get3d)(tensor, k, y, x);
+        row[x*depth+k] = *tensor_data++;
+      }
+    }
+  }
+
+  /* write bytes */
+  if (setjmp(png_jmpbuf(png_ptr)))
+    luaL_error(L, "[write_png_file] Error during writing bytes: %s", errmsg.str);
+
+  png_write_image(png_ptr, row_pointers);
+
+  /* end write */
+  if (setjmp(png_jmpbuf(png_ptr)))
+    luaL_error(L, "[write_png_file] Error during end of write: %s", errmsg.str);
+
+  /* cleanup png structs */
+  png_write_end(png_ptr, NULL);
+  png_destroy_write_struct(&png_ptr, &info_ptr);
+
+  /* cleanup heap allocation */
+  for (y=0; y<height; y++)
+    free(row_pointers[y]);
+  free(row_pointers);
+
+  /* cleanup */
+  fclose(fp);
+  THTensor_(free)(tensorc);
+  return 0;
+}
+
+static int libpng_(Main_size)(lua_State *L) 
+{
+  const char *filename = luaL_checkstring(L, 1);
+  png_byte header[8];    // 8 is the maximum size that can be checked
+
+  int width, height;
+  png_byte color_type;
+
+  png_structp png_ptr;
+  png_infop info_ptr;
+  libpng_errmsg errmsg;
+  size_t fread_ret;
+  /* open file and test for it being a png */
+  FILE *fp = fopen(filename, "rb");
+  if (!fp)
+    luaL_error(L, "[get_png_size] File %s could not be opened for reading", filename);
+  fread_ret = fread(header, 1, 8, fp);
+  if (fread_ret != 8)
+    luaL_error(L, "[get_png_size] File %s error reading header", filename);
+  
+  if (png_sig_cmp(header, 0, 8))
+    luaL_error(L, "[get_png_size] File %s is not recognized as a PNG file", filename);
+  
+  /* initialize stuff */
+  png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
+  
+  if (!png_ptr)
+    luaL_error(L, "[get_png_size] png_create_read_struct failed");
+
+  png_set_error_fn(png_ptr, &errmsg, libpng_error_fn, NULL);
+
+  info_ptr = png_create_info_struct(png_ptr);
+  if (!info_ptr)
+    luaL_error(L, "[get_png_size] png_create_info_struct failed");
+  
+  if (setjmp(png_jmpbuf(png_ptr)))
+    luaL_error(L, "[get_png_size] Error during init_io: %s", errmsg.str);
+
+  png_init_io(png_ptr, fp);
+  png_set_sig_bytes(png_ptr, 8);
+  
+  png_read_info(png_ptr, info_ptr);
+  
+  width      = png_get_image_width(png_ptr, info_ptr);
+  height     = png_get_image_height(png_ptr, info_ptr);
+  color_type = png_get_color_type(png_ptr, info_ptr);
+  png_read_update_info(png_ptr, info_ptr);
+
+  /* get depth */
+  int depth = 0;
+  if (color_type == PNG_COLOR_TYPE_RGBA)
+    depth = 4;
+  else if (color_type == PNG_COLOR_TYPE_RGB)
+    depth = 3;
+  else if (color_type == PNG_COLOR_TYPE_GRAY)
+    depth = 1;
+  else if (color_type == PNG_COLOR_TYPE_GA)
+    depth = 2;
+  else if (color_type == PNG_COLOR_TYPE_PALETTE)
+    luaL_error(L, "[get_png_size] unsupported type: PALETTE");
+  else
+    luaL_error(L, "[get_png_size] Unknown color space");
+
+  /* done with file */
+  fclose(fp);
+
+  lua_pushnumber(L, depth);
+  lua_pushnumber(L, height);
+  lua_pushnumber(L, width);
+
+  return 3;
+}
+
+static const luaL_Reg libpng_(Main__)[] =
+{
+  {"load", libpng_(Main_load)},
+  {"size", libpng_(Main_size)},
+  {"save", libpng_(Main_save)},
+  {NULL, NULL}
+};
+
+DLL_EXPORT int libpng_(Main_init)(lua_State *L)
+{
+  luaT_pushmetatable(L, torch_Tensor);
+  luaT_registeratname(L, libpng_(Main__), "libpng");
+  return 1;
+}
+
+#endif
diff --git a/generic/ppm.c b/generic/ppm.c
new file mode 100644
index 0000000..6d324e9
--- /dev/null
+++ b/generic/ppm.c
@@ -0,0 +1,183 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/ppm.c"
+#else
+
+static int libppm_(Main_load)(lua_State *L)
+{
+  const char *filename = luaL_checkstring(L, 1);
+  FILE* fp = fopen ( filename, "r" );
+  if ( !fp ) {
+    luaL_error(L, "cannot open file <%s> for reading", filename);
+  }
+
+  long W,H,C;
+  char p,n;
+  int D, bps, bpc;
+
+  // magic number
+  p = (char)getc(fp);
+  if ( p != 'P' ) {
+    W = H = 0;
+    fclose(fp);
+    luaL_error(L, "corrupted file");
+  }
+
+  n = (char)getc(fp);
+
+  // Dimensions
+  W = ppm_get_long(fp);
+  H = ppm_get_long(fp);
+
+  // Max color value
+  D = ppm_get_long(fp);
+
+  // Either 8 or 16 bits per pixel
+  bps = 8;
+  if (D > 255) {
+     bps = 16;
+  }
+  bpc = bps / 8;
+
+  //printf("Loading PPM\nMAGIC: %c%c\nWidth: %ld, Height: %ld\nChannels: %d, Bits-per-pixel: %d\n", p, n, W, H, D, bps);
+
+  // load data
+  int ok = 1;
+  size_t s;
+  unsigned char *r = NULL;
+  if ( n=='6' ) {
+    C = 3;
+    s = W*H*C*bpc;
+    r = malloc(s);
+    if (fread ( r, 1, s, fp ) < s) ok = 0;
+  } else if ( n=='5' ) {
+    C = 1;
+    s = W*H*C*bpc;
+    r = malloc(s);
+    if (fread ( r, 1, s, fp ) < s) ok = 0;
+  } else if ( n=='3' ) {
+    int c,i;
+    C = 3;
+    s = W*H*C;
+    r = malloc(s);
+    for (i=0; i<s; i++) {
+      if (fscanf ( fp, "%d", &c ) != 1) { ok = 0; break; }
+      r[i] = 255*c / D;
+    }
+  } else if ( n=='2' ) {
+    int c,i;
+    C = 1;
+    s = W*H*C;
+    r = malloc(s);
+    for (i=0; i<s; i++) {
+      if (fscanf ( fp, "%d", &c ) != 1) { ok = 0; break; }
+      r[i] = 255*c / D;
+    }
+  } else {
+    W=H=C=0;
+    fclose ( fp );
+    luaL_error(L, "unsupported magic number: P%c", n);
+  }
+
+  if (!ok) {
+    fclose ( fp );
+    luaL_error(L, "corrupted file or read error");
+  }
+
+  // export tensor
+  THTensor *tensor = THTensor_(newWithSize3d)(C,H,W);
+  real *data = THTensor_(data)(tensor);
+  long i,k,j=0;
+  int val;
+  for (i=0; i<W*H; i++) {
+    for (k=0; k<C; k++) {
+       if (bpc == 1) {
+          data[k*H*W+i] = (real)r[j++];
+       } else if (bpc == 2) {
+          val = r[j] | (r[j+1] << 8);
+          j += 2;
+          data[k*H*W+i] = (real)val;
+       }
+    }
+  }
+
+  // cleanup
+  free(r);
+  fclose(fp);
+
+  // return loaded image
+  luaT_pushudata(L, tensor, torch_Tensor);
+  return 1;
+}
+
+int libppm_(Main_save)(lua_State *L) {
+  // get args
+  const char *filename = luaL_checkstring(L, 1);
+  THTensor *tensor = luaT_checkudata(L, 2, torch_Tensor);
+  THTensor *tensorc = THTensor_(newContiguous)(tensor);
+  real *data = THTensor_(data)(tensorc);
+
+  // dimensions
+  long C,H,W,N;
+  if (tensorc->nDimension == 3) {
+    C = tensorc->size[0];
+    H = tensorc->size[1];
+    W = tensorc->size[2];
+  } else if (tensorc->nDimension == 2) {
+    C = 1;
+    H = tensorc->size[0];
+    W = tensorc->size[1];
+  } else {
+    C=W=H=0;
+    luaL_error(L, "can only export tensor with geometry: HxW or 1xHxW or 3xHxW");
+  }
+  N = C*H*W;
+
+  // convert to chars
+  unsigned char *bytes = (unsigned char*)malloc(N);
+  long i,k,j=0;
+  for (i=0; i<W*H; i++) {
+    for (k=0; k<C; k++) {
+      bytes[j++] = (unsigned char)data[k*H*W+i];
+    }
+  }
+
+  // open file
+  FILE* fp = fopen(filename, "w");
+  if ( !fp ) {
+    luaL_error(L, "cannot open file <%s> for writing", filename);
+  }
+
+  // write 3 or 1 channel(s) header
+  if (C == 3) {
+    fprintf(fp, "P6\n%ld %ld\n%d\n", W, H, 255);
+  } else {
+    fprintf(fp, "P5\n%ld %ld\n%d\n", W, H, 255);
+  }
+
+  // write data
+  fwrite(bytes, 1, N, fp);
+
+  // cleanup
+  THTensor_(free)(tensorc);
+  free(bytes);
+  fclose (fp);
+
+  // return result
+  return 1;
+}
+
+static const luaL_Reg libppm_(Main__)[] =
+{
+  {"load", libppm_(Main_load)},
+  {"save", libppm_(Main_save)},
+  {NULL, NULL}
+};
+
+DLL_EXPORT int libppm_(Main_init)(lua_State *L)
+{
+  luaT_pushmetatable(L, torch_Tensor);
+  luaT_registeratname(L, libppm_(Main__), "libppm");
+  return 1;
+}
+
+#endif
diff --git a/image-1.1.alpha-0.rockspec b/image-1.1.alpha-0.rockspec
new file mode 100644
index 0000000..99e84c1
--- /dev/null
+++ b/image-1.1.alpha-0.rockspec
@@ -0,0 +1,32 @@
+package = "image"
+version = "1.1.alpha-0"
+
+source = {
+   url = "git://github.com/torch/image",
+   tag = "master"
+}
+
+description = {
+   summary = "An image library for Torch",
+   detailed = [[
+This package provides routines to load/save and manipulate images
+using Torch's Tensor data structure.
+   ]],
+   homepage = "https://github.com/torch/image",
+   license = "BSD"
+}
+
+dependencies = {
+   "torch >= 7.0",
+   "sys >= 1.0",
+   "xlua >= 1.0",
+   "dok"
+}
+
+build = {
+   type = "command",
+   build_command = [[
+cmake -E make_directory build && cd build && cmake .. -DLUALIB=$(LUALIB) -DLUA_INCDIR="$(LUA_INCDIR)" -DLUA_LIBDIR="$(LUA_LIBDIR)"  -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH="$(LUA_BINDIR)/.." -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE)
+   ]],
+   install_command = "cd build && $(MAKE) install"
+}
diff --git a/image.c b/image.c
new file mode 100644
index 0000000..a6783e9
--- /dev/null
+++ b/image.c
@@ -0,0 +1,52 @@
+
+#include <TH.h>
+#include <luaT.h>
+
+#if LUA_VERSION_NUM >= 503
+#define luaL_checklong(L,n)     ((long)luaL_checkinteger(L, (n)))
+#endif
+
+
+#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
+#define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor)
+#define image_(NAME) TH_CONCAT_3(image_, Real, NAME)
+
+#ifdef max
+#undef max
+#endif
+#define max( a, b ) ( ((a) > (b)) ? (a) : (b) )
+
+#ifdef min
+#undef min
+#endif
+#define min( a, b ) ( ((a) < (b)) ? (a) : (b) )
+
+#include "font.c"
+
+#include "generic/image.c"
+#include "THGenerateAllTypes.h"
+
+DLL_EXPORT int luaopen_libimage(lua_State *L)
+{
+  image_FloatMain_init(L);
+  image_DoubleMain_init(L);
+  image_ByteMain_init(L);
+
+  lua_newtable(L);
+  lua_pushvalue(L, -1);
+  lua_setglobal(L, "image");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, image_DoubleMain__, 0);
+  lua_setfield(L, -2, "double");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, image_FloatMain__, 0);
+  lua_setfield(L, -2, "float");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, image_ByteMain__, 0);
+  lua_setfield(L, -2, "byte");
+
+  return 1;
+}
diff --git a/init.lua b/init.lua
new file mode 100644
index 0000000..5f4b9ed
--- /dev/null
+++ b/init.lua
@@ -0,0 +1,2323 @@
+----------------------------------------------------------------------
+--
+-- Copyright (c) 2011 Ronan Collobert, Clement Farabet
+--
+-- Permission is hereby granted, free of charge, to any person obtaining
+-- a copy of this software and associated documentation files (the
+-- "Software"), to deal in the Software without restriction, including
+-- without limitation the rights to use, copy, modify, merge, publish,
+-- distribute, sublicense, and/or sell copies of the Software, and to
+-- permit persons to whom the Software is furnished to do so, subject to
+-- the following conditions:
+--
+-- The above copyright notice and this permission notice shall be
+-- included in all copies or substantial portions of the Software.
+--
+-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+-- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+-- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+-- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+-- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+-- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+-- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+--
+----------------------------------------------------------------------
+-- description:
+--     image - an image toolBox, for Torch
+--
+-- history:
+--     July  1, 2011, 7:42PM - import from Torch5 - Clement Farabet
+----------------------------------------------------------------------
+
+require 'torch'
+require 'xlua'
+require 'dok'
+require 'libimage'
+
+local fpath = require 'sys.fpath'
+
+local startswith = function(str, prefix)
+  return string.find(str, prefix, 1, true) == 1
+end
+
+local magicJPG = string.char(0xff, 0xd8, 0xff)
+local magicPNG = string.char(0x89, 0x50, 0x4e, 0x47)
+
+----------------------------------------------------------------------
+-- include unit test function
+--
+require 'image.test'
+
+----------------------------------------------------------------------
+-- types lookups
+--
+local type2tensor = {
+   float = torch.FloatTensor(),
+   double = torch.DoubleTensor(),
+   byte = torch.ByteTensor(),
+}
+local template = function(type)
+   if type then
+      return type2tensor[type]
+   else
+      return torch.Tensor()
+   end
+end
+
+----------------------------------------------------------------------
+-- save/load in multiple formats
+--
+
+-- depth convertion helper
+local function todepth(img, depth)
+   if depth and depth == 1 then
+      if img:nDimension() == 2 then
+         -- all good
+      elseif img:size(1) == 3 or img:size(1) == 4 then
+	 img = image.rgb2y(img:narrow(1,1,3))[1]
+      elseif img:size(1) == 2 then
+         img = img:narrow(1,1,1)
+      elseif img:size(1) ~= 1 then
+         dok.error('image loaded has wrong #channels', 'image.todepth')
+      end
+   elseif depth and depth == 3 then
+      local chan = img:size(1)
+      if chan == 3 then
+         -- all good
+      elseif img:nDimension() == 2 then
+         local imgrgb = img.new(3, img:size(1), img:size(2))
+         imgrgb:select(1, 1):copy(img)
+         imgrgb:select(1, 2):copy(img)
+         imgrgb:select(1, 3):copy(img)
+         img = imgrgb
+      elseif chan == 4 then
+         img = img:narrow(1,1,3)
+      elseif chan == 1 then
+         local imgrgb = img.new(3, img:size(2), img:size(3))
+         imgrgb:select(1, 1):copy(img)
+         imgrgb:select(1, 2):copy(img)
+         imgrgb:select(1, 3):copy(img)
+         img = imgrgb
+      else
+         dok.error('image loaded has wrong #channels', 'image.todepth')
+      end
+   end
+   return img
+end
+
+local function isPNG(magicTensor)
+    local pngMagic = torch.ByteTensor({0x89,0x50,0x4e,0x47})
+    return torch.all(torch.eq(magicTensor, pngMagic))
+end
+
+local function isJPG(magicTensor)
+    -- There are many valid 4th bytes, so only check the first 3 bytes.
+    -- libjpeg should support most if not all of these:
+    -- source: http://filesignatures.net/?page=all&order=SIGNATURE&alpha=J
+    local jpgMagic = torch.ByteTensor({0xff, 0xd8, 0xff})
+    return torch.all(torch.eq(magicTensor, jpgMagic))
+end
+
+local function decompress(tensor, depth, tensortype)
+    if torch.typename(tensor) ~= 'torch.ByteTensor' then
+        dok.error('Input tensor must be a byte tensor',
+                  'image.decompress')
+    end
+    if tensor:nElement() < 4 then
+    	dok.error('Input must be either jpg or png format',
+                  'image.decompress')
+    end
+    if isJPG(tensor[{{1,3}}]) then
+        return image.decompressJPG(tensor, depth, tensortype)
+    elseif isPNG(tensor[{{1,4}}]) then
+        return image.decompressPNG(tensor, depth, tensortype)
+    else
+        dok.error('Input must be either jpg or png format',
+                  'image.decompress')
+    end
+end
+rawset(image, 'decompress', decompress)
+
+local function processPNG(img, depth, bit_depth, tensortype)
+    local MAXVAL = 255
+    if bit_depth == 16 then MAXVAL = 65535 end
+    if tensortype ~= 'byte' then
+        img:mul(1/MAXVAL)
+    end
+    img = todepth(img, depth)
+    return img
+end
+
+local function loadPNG(filename, depth, tensortype)
+   if not xlua.require 'libpng' then
+      dok.error('libpng package not found, please install libpng','image.loadPNG')
+   end
+   local load_from_file = 1
+   local a, bit_depth = template(tensortype).libpng.load(load_from_file, filename)
+   return processPNG(a, depth, bit_depth, tensortype)
+end
+rawset(image, 'loadPNG', loadPNG)
+
+local function clampImage(tensor)
+   if tensor:type() == 'torch.ByteTensor' then
+      return tensor
+   end
+   local a = torch.Tensor():resize(tensor:size()):copy(tensor)
+   a.image.saturate(a) -- bound btwn 0 and 1
+   a:mul(255)          -- remap to [0..255]
+   return a
+end
+
+local function savePNG(filename, tensor)
+   if not xlua.require 'libpng' then
+      dok.error('libpng package not found, please install libpng','image.savePNG')
+   end
+   tensor = clampImage(tensor)
+   tensor.libpng.save(filename, tensor)
+end
+rawset(image, 'savePNG', savePNG)
+
+local function decompressPNG(tensor, depth, tensortype)
+    if not xlua.require 'libpng' then
+        dok.error('libpng package not found, please install libpng',
+                  'image.decompressPNG')
+    end
+    if torch.typename(tensor) ~= 'torch.ByteTensor' then
+        dok.error('Input tensor (with compressed png) must be a byte tensor',
+                  'image.decompressPNG')
+    end
+    local load_from_file = 0
+    local a, bit_depth = template(tensortype).libpng.load(load_from_file, tensor)
+    if a == nil then
+        return nil
+    else
+        return processPNG(a, depth, bit_depth, tensortype)
+    end
+end
+rawset(image, 'decompressPNG', decompressPNG)
+
+function image.getPNGsize(filename)
+   if not xlua.require 'libpng' then
+      dok.error('libpng package not found, please install libpng','image.getPNGsize')
+   end
+   return torch.Tensor().libpng.size(filename)
+end
+
+local function processJPG(img, depth, tensortype)
+   local MAXVAL = 255
+   if tensortype ~= 'byte' then
+      img:mul(1/MAXVAL)
+   end
+   img = todepth(img, depth)
+   return img
+end
+
+local function loadJPG(filename, depth, tensortype)
+   if not xlua.require 'libjpeg' then
+      dok.error('libjpeg package not found, please install libjpeg','image.loadJPG')
+   end
+   local load_from_file = 1
+   local a = template(tensortype).libjpeg.load(load_from_file, filename)
+   if a == nil then
+      return nil
+   else
+      return processJPG(a, depth, tensortype)
+   end
+end
+rawset(image, 'loadJPG', loadJPG)
+
+local function decompressJPG(tensor, depth, tensortype)
+   if not xlua.require 'libjpeg' then
+      dok.error('libjpeg package not found, please install libjpeg',
+        'image.decompressJPG')
+   end
+   if torch.typename(tensor) ~= 'torch.ByteTensor' then
+      dok.error('Input tensor (with compressed jpeg) must be a byte tensor',
+        'image.decompressJPG')
+   end
+   local load_from_file = 0
+   local a = template(tensortype).libjpeg.load(load_from_file, tensor)
+   if a == nil then
+      return nil
+   else
+      return processJPG(a, depth, tensortype)
+   end
+end
+rawset(image, 'decompressJPG', decompressJPG)
+
+local function saveJPG(filename, tensor)
+   if not xlua.require 'libjpeg' then
+      dok.error('libjpeg package not found, please install libjpeg','image.saveJPG')
+   end
+   tensor = clampImage(tensor)
+   local save_to_file = 1
+   local quality = 75
+   tensor.libjpeg.save(filename, tensor, save_to_file, quality)
+end
+rawset(image, 'saveJPG', saveJPG)
+
+function image.getJPGsize(filename)
+   if not xlua.require 'libjpeg' then
+      dok.error('libjpeg package not found, please install libjpeg','image.getJPGsize')
+   end
+   return torch.Tensor().libjpeg.size(filename)
+end
+
+local function compressJPG(tensor, quality)
+   if not xlua.require 'libjpeg' then
+      dok.error('libjpeg package not found, please install libjpeg',
+         'image.compressJPG')
+   end
+   tensor = clampImage(tensor)
+   local b = torch.ByteTensor()
+   local save_to_file = 0
+   quality = quality or 75
+   tensor.libjpeg.save("", tensor, save_to_file, quality, b)
+   return b
+end
+rawset(image, 'compressJPG', compressJPG)
+
+local function loadPPM(filename, depth, tensortype)
+   require 'libppm'
+   local MAXVAL = 255
+   local a = template(tensortype).libppm.load(filename)
+   if tensortype ~= 'byte' then
+      a:mul(1/MAXVAL)
+   end
+   a = todepth(a, depth)
+   return a
+end
+rawset(image, 'loadPPM', loadPPM)
+
+local function savePPM(filename, tensor)
+   require 'libppm'
+   if tensor:nDimension() ~= 3 or tensor:size(1) ~= 3 then
+      dok.error('can only save 3xHxW images as PPM', 'image.savePPM')
+   end
+   tensor = clampImage(tensor)
+   tensor.libppm.save(filename, tensor)
+end
+rawset(image, 'savePPM', savePPM)
+
+local function savePGM(filename, tensor)
+   require 'libppm'
+   if tensor:nDimension() == 3 and tensor:size(1) ~= 1 then
+      dok.error('can only save 1xHxW or HxW images as PGM', 'image.savePGM')
+   end
+   tensor = clampImage(tensor)
+   tensor.libppm.save(filename, tensor)
+end
+rawset(image, 'savePGM', savePGM)
+
+local filetypes = {
+   jpg = {loader = image.loadJPG, saver = image.saveJPG},
+   png = {loader = image.loadPNG, saver = image.savePNG},
+   ppm = {loader = image.loadPPM, saver = image.savePPM},
+   -- yes, loadPPM not loadPGM
+   pgm = {loader = image.loadPPM, saver = image.savePGM}
+}
+
+filetypes['JPG']  = filetypes['jpg']
+filetypes['JPEG'] = filetypes['jpg']
+filetypes['jpeg'] = filetypes['jpg']
+filetypes['PNG']  = filetypes['png']
+filetypes['PPM']  = filetypes['ppm']
+filetypes['PGM']  = filetypes['pgm']
+rawset(image, 'supported_filetypes', filetypes)
+
+local function is_supported(suffix)
+   return filetypes[suffix] ~= nil
+end
+rawset(image, 'is_supported', is_supported)
+
+local function load(filename, depth, tensortype)
+   if not filename then
+      print(dok.usage('image.load',
+                       'loads an image into a torch.Tensor', nil,
+                       {type='string', help='path to file', req=true},
+                       {type='number', help='force destination depth: 1 | 3'},
+                       {type='string', help='type: byte | float | double'}))
+      dok.error('missing file name', 'image.load')
+   end
+
+   local ext
+
+   local f, err = io.open(filename, 'rb')
+   if not f then
+      error(err)
+   end
+   local hdr = f:read(4) or ''
+   f:close()
+
+   if startswith(hdr, magicJPG) then
+      ext = 'jpg'
+   elseif startswith(hdr, magicPNG) then
+      ext = 'png'
+   elseif hdr:match('^P[25]') then
+      ext = 'pgm'
+   elseif hdr:match('^P[36]') then
+      ext = 'ppm'
+   end
+
+   if not ext then
+      ext = string.match(filename,'%.(%a+)$')
+   end
+
+   local tensor
+   if image.is_supported(ext) then
+      tensor = filetypes[ext].loader(filename, depth, tensortype)
+   else
+      dok.error('unknown image type: ' .. ext, 'image.load')
+   end
+
+   return tensor
+end
+rawset(image, 'load', load)
+
+local function save(filename, tensor)
+   if not filename or not tensor then
+      print(dok.usage('image.save',
+                       'saves a torch.Tensor to a disk', nil,
+                       {type='string', help='path to file', req=true},
+                       {type='torch.Tensor', help='tensor to save (NxHxW, N = 1 | 3)'}))
+      dok.error('missing file name | tensor to save', 'image.save')
+   end
+   local ext = string.match(filename,'%.(%a+)$')
+   if image.is_supported(ext) then
+      tensor = filetypes[ext].saver(filename, tensor)
+   else
+      dok.error('unknown image type: ' .. ext, 'image.save')
+   end
+end
+rawset(image, 'save', save)
+
+----------------------------------------------------------------------
+-- crop
+--
+local function crop(...)
+   local dst,src,startx,starty,endx,endy
+   local format,width,height
+   local args = {...}
+   if select('#',...) == 6 then
+      dst = args[1]
+      src = args[2]
+      startx = args[3]
+      starty = args[4]
+      endx = args[5]
+      endy = args[6]
+   elseif select('#',...) == 5 then
+      if type(args[3]) == 'string' then
+         dst = args[1]
+         src = args[2]
+         format = args[3]
+         width = args[4]
+         height = args[5]
+      else
+         src = args[1]
+         startx = args[2]
+         starty = args[3]
+         endx = args[4]
+         endy = args[5]
+      end
+   elseif select('#',...) == 4 then
+      if type(args[2]) == 'string' then
+         src = args[1]
+         format = args[2]
+         width = args[3]
+         height = args[4]
+      else
+         dst = args[1]
+         src = args[2]
+         startx = args[3]
+         starty = args[4]
+      end
+   elseif select('#',...) == 3 then
+      src = args[1]
+      startx = args[2]
+      starty = args[3]
+   else
+      print(dok.usage('image.crop',
+                       'crop an image', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='number', help='start x', req=true},
+                       {type='number', help='start y', req=true},
+                       {type='number', help='end x'},
+                       {type='number', help='end y'},
+                       '',
+                       {type='torch.Tensor', help='destination', req=true},
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='number', help='start x', req=true},
+                       {type='number', help='start y', req=true},
+                       {type='number', help='end x'},
+                       {type='number', help='end y'},
+                       '',
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='string', help='format: "c" or "tl" or "tr" or "bl" or "br"', req=true},
+                       {type='number', help='width', req=true},
+                       {type='number', help='height', req=true},
+                       '',
+                       {type='torch.Tensor', help='destination', req=true},
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='string', help='format: "c" or "tl" or "tr" or "bl" or "br"', req=true},
+                       {type='number', help='width', req=true},
+                       {type='number', help='height', req=true}))
+      dok.error('incorrect arguments', 'image.crop')
+   end
+   if format then
+      local iwidth,iheight
+      if src:nDimension() == 3 then
+         iwidth,iheight = src:size(3),src:size(2)
+      else
+         iwidth,iheight = src:size(2),src:size(1)
+      end
+      local x1, y1
+      if format == 'c' then
+         x1, y1 = math.floor((iwidth-width)/2), math.floor((iheight-height)/2)
+      elseif format == 'tl' then
+         x1, y1 = 0, 0
+      elseif format == 'tr' then
+         x1, y1 = iwidth-width, 0
+      elseif format == 'bl' then
+         x1, y1 = 0, iheight-height
+      elseif format == 'br' then
+         x1, y1 = iwidth-width, iheight-height
+      else
+         error('crop format must be "c"|"tl"|"tr"|"bl"|"br"')
+      end
+      return crop(dst, src, x1, y1, x1+width, y1+height)
+   end
+   if endx==nil then
+      return src.image.cropNoScale(src,dst,startx,starty)
+   else
+      local depth=1
+      local x
+      if src:nDimension() > 2 then
+         x = src.new(src:size(1),endy-starty,endx-startx)
+      else
+         x = src.new(endy-starty,endx-startx)
+      end
+      src.image.cropNoScale(src,x,startx,starty)
+      if dst then
+         image.scale(dst, x)
+      else
+         dst = x
+      end
+   end
+   return dst
+end
+rawset(image, 'crop', crop)
+
+----------------------------------------------------------------------
+-- translate
+--
+local function translate(...)
+   local dst,src,x,y
+   local args = {...}
+   if select('#',...) == 4 then
+      dst = args[1]
+      src = args[2]
+      x = args[3]
+      y = args[4]
+   elseif select('#',...) == 3 then
+      src = args[1]
+      x = args[2]
+      y = args[3]
+   else
+      print(dok.usage('image.translate',
+                       'translate an image', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='number', help='horizontal translation', req=true},
+                       {type='number', help='vertical translation', req=true},
+                       '',
+                       {type='torch.Tensor', help='destination', req=true},
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='number', help='horizontal translation', req=true},
+                       {type='number', help='vertical translation', req=true}))
+      dok.error('incorrect arguments', 'image.translate')
+   end
+   dst = dst or src.new()
+   dst:resizeAs(src)
+   dst:zero()
+   src.image.translate(src,dst,x,y)
+   return dst
+end
+rawset(image, 'translate', translate)
+
+----------------------------------------------------------------------
+-- scale
+--
+local function scale(...)
+   local dst,src,width,height,mode,size
+   local args = {...}
+   if select('#',...) == 4 then
+      src = args[1]
+      width = args[2]
+      height = args[3]
+      mode = args[4]
+   elseif select('#',...) == 3 then
+      if type(args[3]) == 'string' then
+         if type(args[2]) == 'string' or type(args[2]) == 'number' then
+            src = args[1]
+            size = args[2]
+            mode = args[3]
+         else
+            dst = args[1]
+            src = args[2]
+            mode = args[3]
+         end
+      else
+         src = args[1]
+         width = args[2]
+         height = args[3]
+      end
+   elseif select('#',...) == 2 then
+      if type(args[2]) == 'string' or type(args[2]) == 'number' then
+         src = args[1]
+         size = args[2]
+      else
+         dst = args[1]
+         src = args[2]
+      end
+   else
+      print(dok.usage('image.scale',
+                       'rescale an image (geometry)', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='number', help='destination width', req=true},
+                       {type='number', help='destination height', req=true},
+                       {type='string', help='mode: bilinear | bicubic |simple', default='bilinear'},
+                       '',
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='string | number', help='destination size: "WxH" or "MAX" or "^MIN" or "*SC" or "*SCd/SCn" or MAX', req=true},
+                       {type='string', help='mode: bilinear | bicubic | simple', default='bilinear'},
+                       '',
+                       {type='torch.Tensor', help='destination image', req=true},
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='string', help='mode: bilinear | bicubic | simple', default='bilinear'}))
+      dok.error('incorrect arguments', 'image.scale')
+   end
+   if size then
+      local iwidth, iheight
+      if src:nDimension() == 3 then
+         iwidth, iheight = src:size(3),src:size(2)
+      else
+         iwidth, iheight = src:size(2),src:size(1)
+      end
+
+      -- MAX?
+      local imax = math.max(iwidth, iheight)
+      local omax = tonumber(size)
+      if omax then
+         height = iheight*omax/imax
+         width = iwidth*omax/imax
+      end
+
+      -- WxH?
+      if not width or not height then
+         width, height = size:match('(%d+)x(%d+)')
+      end
+
+      -- ^MIN?
+      if not width or not height then
+         local imin = math.min(iwidth, iheight)
+         local omin = tonumber(size:match('%^(%d+)'))
+         if omin then
+            height = iheight*omin/imin
+            width = iwidth*omin/imin
+         end
+      end
+
+      -- *SCn/SCd?
+      if not width or not height then
+         local scn, scd = size:match('%*(%d+)%/(%d+)')
+         if scn and scd then
+            height = iheight*scn/scd
+            width = iwidth*scn/scd
+         end
+      end
+
+      -- *SC?
+      if not width or not height then
+         local sc = tonumber(size:match('%*(.+)'))
+         if sc then
+            height = iheight*sc
+            width = iwidth*sc
+         end
+      end
+   end
+   if not dst and (not width or not height) then
+      dok.error('could not find valid dest size', 'image.scale')
+   end
+   if not dst then
+      height = math.max(height, 1)
+      width = math.max(width, 1)
+      if src:nDimension() == 3 then
+         dst = src.new(src:size(1), height, width)
+      else
+         dst = src.new(height, width)
+      end
+   end
+   mode = mode or 'bilinear'
+   if mode=='bilinear' then
+      src.image.scaleBilinear(src,dst)
+   elseif mode=='bicubic' then
+      src.image.scaleBicubic(src,dst)
+   elseif mode=='simple' then
+      src.image.scaleSimple(src,dst)
+   else
+      dok.error('mode must be one of: simple | bicubic | bilinear', 'image.scale')
+   end
+   return dst
+end
+rawset(image, 'scale', scale)
+
+----------------------------------------------------------------------
+-- rotate
+--
+local function rotate(...)
+   local dst,src,theta, mode
+   local args = {...}
+   if select('#',...) == 4 then
+      dst = args[1]
+      src = args[2]
+      theta = args[3]
+      mode = args[4]
+   elseif select('#',...) == 3 then
+      if type(args[2]) == 'number' then
+	 src = args[1]
+	 theta = args[2]
+	 mode = args[3]
+      else
+	 dst = args[1]
+	 src = args[2]
+	 theta = args[3]
+      end
+   elseif select('#',...) == 2 then
+      src = args[1]
+      theta = args[2]
+   else
+      print(dok.usage('image.rotate',
+                       'rotate an image by theta radians', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='number', help='rotation angle (in radians)', req=true},
+		       {type='string', help='mode: simple | bilinear', default='simple'},
+                       '',
+                       {type='torch.Tensor', help='destination', req=true},
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='number', help='rotation angle (in radians)', req=true},
+		       {type='string', help='mode: simple | bilinear', default='simple'}))
+      dok.error('incorrect arguments', 'image.rotate')
+   end
+   dst = dst or src.new()
+   dst:resizeAs(src)
+   mode = mode or 'simple'
+   if mode == 'simple' then
+      src.image.rotate(src,dst,theta)
+   elseif mode == 'bilinear' then
+      src.image.rotateBilinear(src,dst,theta)
+   else
+      dok.error('mode must be one of: simple | bilinear', 'image.rotate')
+   end
+   return dst
+end
+rawset(image, 'rotate', rotate)
+
+----------------------------------------------------------------------
+-- polar
+--
+local function polar(...)
+   local dst,src,interp,mode
+   local args = {...}
+   if select('#',...) == 4 then
+      dst    = args[1]
+      src    = args[2]
+      interp = args[3]
+      mode   = args[4]
+    elseif select('#',...) == 3 then
+      if type(args[2]) == 'string' then
+        src    = args[1]
+        interp = args[2]
+        mode   = args[3]
+      else
+        dst    = args[1]
+        src    = args[2]
+        interp = args[3]
+      end
+   elseif select('#',...) == 2 then
+      if type(args[2]) == 'string' then
+        src    = args[1]
+        interp = args[2]
+      else
+        dst  = args[1]
+        src  = args[2]
+      end
+   elseif select('#',...) == 1 then
+     src = args[1]
+   else
+      print(dok.usage('image.polar',
+                       'convert an image to polar coordinates', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='string', help='interpolation: simple | bilinear', default='simple'},
+                       {type='string', help='mode: valid | full', default='valid'},
+                       '',
+                       {type='torch.Tensor', help='destination', req=true},
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='string', help='interpolation: simple | bilinear', default='simple'},
+                       {type='string', help='mode: valid | full', default='valid'}))
+      dok.error('incorrect arguments', 'image.polar')
+   end
+   interp = interp or 'valid'
+   mode = mode or 'simple'
+   if dst == nil then
+      local maxDist = math.floor(math.max(src:size(2), src:size(3)))
+      dst = src.new()
+      dst:resize(src:size(1), maxDist, maxDist)
+   end
+   if interp == 'simple' then
+      if mode == 'full' then
+        src.image.polar(src,dst,1)
+      elseif mode == 'valid' then
+        src.image.polar(src,dst,0)
+      else
+        dok.error('mode must be one of: valid | full', 'image.polar')
+      end
+   elseif interp == 'bilinear' then
+      if mode == 'full' then
+        src.image.polarBilinear(src,dst,1)
+      elseif mode == 'valid' then
+        src.image.polarBilinear(src,dst,0)
+      else
+        dok.error('mode must be one of: valid | full', 'image.polar')
+      end
+   else
+      dok.error('interpolation must be one of: simple | bilinear', 'image.polar')
+   end
+   return dst
+end
+rawset(image, 'polar', polar)
+
+----------------------------------------------------------------------
+-- logpolar
+--
+local function logpolar(...)
+   local dst,src,interp,mode
+   local args = {...}
+   if select('#',...) == 4 then
+      dst    = args[1]
+      src    = args[2]
+      interp = args[3]
+      mode   = args[4]
+    elseif select('#',...) == 3 then
+      if type(args[2]) == 'string' then
+        src    = args[1]
+        interp = args[2]
+        mode   = args[3]
+      else
+        dst    = args[1]
+        src    = args[2]
+        interp = args[3]
+      end
+   elseif select('#',...) == 2 then
+      if type(args[2]) == 'string' then
+        src    = args[1]
+        interp = args[2]
+      else
+        dst  = args[1]
+        src  = args[2]
+      end
+   elseif select('#',...) == 1 then
+     src = args[1]
+   else
+      print(dok.usage('image.logpolar',
+                       'convert an image to log-polar coordinates', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='string', help='interpolation: simple | bilinear', default='simple'},
+                       {type='string', help='mode: valid | full', default='valid'},
+                       '',
+                       {type='torch.Tensor', help='destination', req=true},
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='string', help='interpolation: simple | bilinear', default='simple'},
+                       {type='string', help='mode: valid | full', default='valid'}))
+      dok.error('incorrect arguments', 'image.polar')
+   end
+   interp = interp or 'valid'
+   mode = mode or 'simple'
+   if dst == nil then
+      local maxDist = math.floor(math.max(src:size(2), src:size(3)))
+      dst = src.new()
+      dst:resize(src:size(1), maxDist, maxDist)
+   end
+   if interp == 'simple' then
+      if mode == 'full' then
+        src.image.logPolar(src,dst,1)
+      elseif mode == 'valid' then
+        src.image.logPolar(src,dst,0)
+      else
+        dok.error('mode must be one of: valid | full', 'image.logpolar')
+      end
+   elseif interp == 'bilinear' then
+      if mode == 'full' then
+        src.image.logPolarBilinear(src,dst,1)
+      elseif mode == 'valid' then
+        src.image.logPolarBilinear(src,dst,0)
+      else
+        dok.error('mode must be one of: valid | full', 'image.logpolar')
+      end
+   else
+      dok.error('interpolation must be one of: simple | bilinear', 'image.logpolar')
+   end
+   return dst
+end
+rawset(image, 'logpolar', logpolar)
+
+----------------------------------------------------------------------
+-- warp
+--
+local function warp(...)
+   local dst,src,field
+   local mode = 'bilinear'
+   local offset_mode = true
+   local clamp_mode = 'clamp'
+   local pad_value = 0
+   local args = {...}
+   local nargs = select('#',...)
+   local bad_args = false
+   if nargs == 2 then
+      src = args[1]
+      field = args[2]
+   elseif nargs >= 3 then
+      if type(args[3]) == 'string' then
+         -- No destination tensor
+         src = args[1]
+         field = args[2]
+         mode = args[3]
+         if nargs >= 4 then offset_mode = args[4] end
+         if nargs >= 5 then clamp_mode = args[5] end
+         if nargs >= 6 then
+           assert(clamp_mode == 'pad', 'pad_value can only be specified if' ..
+                                       ' clamp_mode = "pad"')
+           pad_value = args[6]
+         end
+         if nargs >= 7 then bad_args = true end
+      else
+         -- With Destination tensor
+         dst = args[1]
+         src = args[2]
+         field = args[3]
+         if nargs >= 4 then mode = args[4] end
+         if nargs >= 5 then offset_mode = args[5] end
+         if nargs >= 6 then clamp_mode = args[6] end
+         if nargs >= 7 then
+           assert(clamp_mode == 'pad', 'pad_value can only be specified if' ..
+                                       ' clamp_mode = "pad"')
+           pad_value = args[7]
+         end
+         if nargs >= 8 then bad_args = true end
+      end
+   end
+   if bad_args then
+      print(dok.usage('image.warp',
+         'warp an image, according to a flow field', nil,
+         {type='torch.Tensor', help='input image (KxHxW)', req=true},
+         {type='torch.Tensor', help='(y,x) flow field (2xHxW)', req=true},
+         {type='string', help='mode: lanczos | bicubic | bilinear | simple', default='bilinear'},
+         {type='string', help='offset mode (add (x,y) to flow field)', default=true},
+         {type='string', help='clamp mode: how to handle interp of samples off the input image (clamp | pad)', default='clamp'},
+         '',
+         {type='torch.Tensor', help='destination', req=true},
+         {type='torch.Tensor', help='input image (KxHxW)', req=true},
+         {type='torch.Tensor', help='(y,x) flow field (2xHxW)', req=true},
+         {type='string', help='mode: lanczos | bicubic | bilinear | simple', default='bilinear'},
+         {type='string', help='offset mode (add (x,y) to flow field)', default=true},
+         {type='string', help='clamp mode: how to handle interp of samples off the input image (clamp | pad)', default='clamp'},
+         {type='number', help='pad value: value to pad image. Can only be set when clamp mode equals "pad"', default=0}))
+      dok.error('incorrect arguments', 'image.warp')
+   end
+   -- This is a little messy, but convert mode string to an enum
+   if (mode == 'simple') then
+      mode = 0
+   elseif (mode == 'bilinear') then
+      mode = 1
+   elseif (mode == 'bicubic') then
+      mode = 2
+   elseif (mode == 'lanczos') then
+      mode = 3
+   else
+      dok.error('Incorrect arguments (mode is not lanczos | bicubic | bilinear | simple)!', 'image.warp')
+   end
+   if (clamp_mode == 'clamp') then
+      clamp_mode = 0
+   elseif (clamp_mode == 'pad') then
+      clamp_mode = 1
+   else
+      dok.error('Incorrect arguments (clamp_mode is not clamp | pad)!', 'image.warp')
+   end
+
+   local dim2 = false
+   if src:nDimension() == 2 then
+      dim2 = true
+      src = src:reshape(1,src:size(1),src:size(2))
+   end
+   dst = dst or src.new()
+   dst:resize(src:size(1), field:size(2), field:size(3))
+
+   src.image.warp(dst, src, field, mode, offset_mode, clamp_mode, pad_value)
+   if dim2 then
+      dst = dst[1]
+   end
+   return dst
+end
+rawset(image, 'warp', warp)
+
+----------------------------------------------------------------------
+-- hflip
+--
+local function hflip(...)
+   local dst,src
+   local args = {...}
+   if select('#',...) == 2 then
+      dst = args[1]
+      src = args[2]
+   elseif select('#',...) == 1 then
+      src = args[1]
+   else
+      print(dok.usage('image.hflip',
+                       'flips an image horizontally (left/right)', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       '',
+                       {type='torch.Tensor', help='destination', req=true},
+                       {type='torch.Tensor', help='input image', req=true}))
+      dok.error('incorrect arguments', 'image.hflip')
+   end
+
+   if (src:dim() == 2) and (not src:isContiguous()) then
+     dok.error('2D input tensor is not contiguous', 'image.hflip')
+   end
+
+   dst = dst or src.new()
+   local original_size = src:size()
+   if src:nDimension() == 2 then
+      src = src:new():resize(1,src:size(1),src:size(2))
+   end
+   dst:resizeAs(src)
+
+   if not dst:isContiguous() then
+     dok.error('destination tensor is not contiguous', 'image.hflip')
+   end
+
+   dst.image.hflip(dst, src)
+   dst:resize(original_size)
+   return dst
+end
+rawset(image, 'hflip', hflip)
+
+----------------------------------------------------------------------
+-- vflip
+--
+local function vflip(...)
+   local dst,src
+   local args = {...}
+   if select('#',...) == 2 then
+      dst = args[1]
+      src = args[2]
+   elseif select('#',...) == 1 then
+      src = args[1]
+   else
+      print(dok.usage('image.vflip',
+                       'flips an image vertically (upside-down)', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       '',
+                       {type='torch.Tensor', help='destination', req=true},
+                       {type='torch.Tensor', help='input image', req=true}))
+      dok.error('incorrect arguments', 'image.vflip')
+   end
+
+   if (src:dim() == 2) and (not src:isContiguous()) then
+     dok.error('2D input tensor is not contiguous', 'image.vflip')
+   end
+
+   dst = dst or src.new()
+   local original_size = src:size()
+   if src:nDimension() == 2 then
+      src = src:new():resize(1,src:size(1),src:size(2))
+   end
+   dst:resizeAs(src)
+
+   if not dst:isContiguous() then
+     dok.error('destination tensor is not contiguous', 'image.vflip')
+   end
+
+   dst.image.vflip(dst, src)
+   dst:resize(original_size)
+   return dst
+end
+rawset(image, 'vflip', vflip)
+
+----------------------------------------------------------------------
+-- flip (specify dimension, up to 5D tensor)
+--
+local function flip(...)
+   local dst,src,flip_dim
+   local args = {...}
+   if select('#',...) == 3 then
+      dst = args[1]
+      src = args[2]
+      flip_dim = args[3]
+   elseif select('#',...) == 2 then
+      src = args[1]
+      flip_dim = args[2]
+   else
+      print(dok.usage('image.flip',
+                       'flips an image along a specified dimension', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='number', help='Dimension to flip', req=true},
+                       '',
+                       {type='torch.Tensor', help='destination', req=true},
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='number', help='Dimension to flip', req=true}))
+      dok.error('incorrect arguments', 'image.flip')
+   end
+   assert(src:nDimension() <= 5, 'too many input dims (up to 5D supported)')
+   assert(flip_dim <= src:nDimension() and flip_dim >= 1, 'Bad flip dimension')
+
+   if not src:isContiguous() then
+     dok.error('input tensor is not contiguous', 'image.flip')
+   end
+
+   dst = dst or src.new()
+   local original_size = src:size()
+   local flip_dim_cpp
+   if src:nDimension() == 1 then
+      src = src:new():resize(1, 1, 1, 1, src:size(1))
+      flip_dim_cpp = flip_dim + 4
+   elseif src:nDimension() == 2 then
+      src = src:new():resize(1, 1, 1, src:size(1), src:size(2))
+      flip_dim_cpp = flip_dim + 3
+   elseif src:nDimension() == 3 then
+      src = src:new():resize(1, 1, src:size(1), src:size(2),src:size(3))
+      flip_dim_cpp = flip_dim + 2
+   elseif src:nDimension() == 4 then
+      src = src:new():resize(1, src:size(1), src:size(2), src:size(3),
+        src:size(4))
+      flip_dim_cpp = flip_dim + 1
+   else
+      flip_dim_cpp = flip_dim
+   end
+   dst:resizeAs(src)
+
+   if not dst:isContiguous() then
+     dok.error('destination tensor is not contiguous', 'image.flip')
+   end
+
+   dst.image.flip(dst, src, flip_dim_cpp)
+   dst:resize(original_size)
+   return dst
+end
+
+rawset(image, 'flip', flip)
+
+----------------------------------------------------------------------
+-- convolve(dst,src,ker,type)
+-- convolve(dst,src,ker)
+-- dst = convolve(src,ker,type)
+-- dst = convolve(src,ker)
+--
+local function convolve(...)
+   local dst,src,kernel,mode
+   local args = {...}
+   if select('#',...) == 4 then
+      dst = args[1]
+      src = args[2]
+      kernel = args[3]
+      mode = args[4]
+   elseif select('#',...) == 3 then
+      if type(args[3]) == 'string' then
+         src = args[1]
+         kernel = args[2]
+         mode = args[3]
+      else
+         dst = args[1]
+         src = args[2]
+         kernel = args[3]
+      end
+   elseif select('#',...) == 2 then
+      src = args[1]
+      kernel = args[2]
+   else
+      print(dok.usage('image.convolve',
+                       'convolves an input image with a kernel, returns the result', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='torch.Tensor', help='kernel', req=true},
+                       {type='string', help='type: full | valid | same', default='valid'},
+                       '',
+                       {type='torch.Tensor', help='destination', req=true},
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='torch.Tensor', help='kernel', req=true},
+                       {type='string', help='type: full | valid | same', default='valid'}))
+      dok.error('incorrect arguments', 'image.convolve')
+   end
+   if mode and mode ~= 'valid' and mode ~= 'full' and mode ~= 'same' then
+      dok.error('mode has to be one of: full | valid | same', 'image.convolve')
+   end
+   local md = (((mode == 'full') or (mode == 'same')) and 'F') or 'V'
+   if kernel:nDimension() == 2 and src:nDimension() == 3 then
+      local k3d = src.new(src:size(1), kernel:size(1), kernel:size(2))
+      for i = 1,src:size(1) do
+         k3d[i]:copy(kernel)
+      end
+      kernel = k3d
+   end
+   if dst then
+      torch.conv2(dst,src,kernel,md)
+   else
+      dst = torch.conv2(src,kernel,md)
+   end
+   if mode == 'same' then
+      local cx = dst:dim()
+      local cy = cx-1
+      local ofy = math.ceil(kernel:size(cy)/2)
+      local ofx = math.ceil(kernel:size(cx)/2)
+      dst = dst:narrow(cy, ofy, src:size(cy)):narrow(cx, ofx, src:size(cx))
+   end
+   return dst
+end
+rawset(image, 'convolve', convolve)
+
+----------------------------------------------------------------------
+-- compresses an image between min and max
+--
+local function minmax(args)
+   local tensor = args.tensor
+   local min = args.min
+   local max = args.max
+   local symm = args.symm or false
+   local inplace = args.inplace or false
+   local saturate = args.saturate or false
+   local tensorOut = args.tensorOut or (inplace and tensor)
+      or torch.Tensor(tensor:size()):copy(tensor)
+
+   -- resize
+   if args.tensorOut then
+      tensorOut:resize(tensor:size()):copy(tensor)
+   end
+
+   -- saturate useless if min/max inferred
+   if min == nil and max == nil then
+      saturate = false
+   end
+
+   -- rescale min
+   local fmin = 0
+   if (min == nil) then
+      if args.symm then
+         fmin = math.max(math.abs(tensorOut:min()),math.abs(tensorOut:max()))
+         min = -fmin
+      else
+         min = tensorOut:min()
+      end
+   end
+   if (min ~= 0) then tensorOut:add(-min) end
+
+   -- rescale for max
+   if (max == nil) then
+      if args.symm then
+         max = fmin*2
+      else
+         max = tensorOut:max()
+      end
+   else
+      max = max - min
+   end
+   if (max ~= 0) then tensorOut:div(max) end
+
+   -- saturate
+   if saturate then
+      tensorOut.image.saturate(tensorOut)
+   end
+
+   -- and return
+   return tensorOut
+end
+rawset(image, 'minmax', minmax)
+
+local function toDisplayTensor(...)
+   -- usage
+   local _, input, padding, nrow, scaleeach, min, max, symm, saturate = dok.unpack(
+      {...},
+      'image.toDisplayTensor',
+      'given a pack of tensors, returns a single tensor that contains a grid of all in the pack',
+      {arg='input',type='torch.Tensor | table', help='input (HxW or KxHxW or Kx3xHxW or list)',req=true},
+      {arg='padding', type='number', help='number of padding pixels between images', default=0},
+      {arg='nrow',type='number',help='number of images per row', default=6},
+      {arg='scaleeach', type='boolean', help='individual scaling for list of images', default=false},
+      {arg='min', type='number', help='lower-bound for range'},
+      {arg='max', type='number', help='upper-bound for range'},
+      {arg='symmetric',type='boolean',help='if on, images will be displayed using a symmetric dynamic range, useful for drawing filters', default=false},
+      {arg='saturate', type='boolean', help='saturate (useful when min/max are lower than actual min/max', default=true}
+   )
+
+   local packed = torch.Tensor()
+   if type(input) == 'table' then
+      -- pack images in single tensor
+      local ndims = input[1]:dim()
+      local channels = ((ndims == 2) and 1) or input[1]:size(1)
+      local height = input[1]:size(ndims-1)
+      local width = input[1]:size(ndims)
+      packed:resize(#input,channels,height,width)
+      for i,img in ipairs(input) do
+         packed[i]:copy(input[i])
+      end
+   elseif torch.isTensor(input) then
+      packed:resize(input:size()):copy(input)
+   else
+      error('Unknown or incompatbile type of input: ' .. torch.type(input))
+   end
+
+   -- scale each
+   if scaleeach and (
+         (packed:dim() == 4 and (packed:size(2) == 3 or packed:size(2) == 1))
+         or
+         (packed:dim() == 3 and (packed:size(1) ~= 1 and packed:size(1) ~= 3))
+         ) then
+      for i=1,packed:size(1) do
+         image.minmax{tensor=packed[i], inplace=true, min=min, max=max, symm=symm, saturate=saturate}
+      end
+   end
+
+   local grid = torch.Tensor()
+   if packed:dim() == 4 and (packed:size(2) == 3 or packed:size(2) == 1) then
+      -- arbitrary number of color images: lay them out on a grid
+      local nmaps = packed:size(1)
+      local xmaps = math.min(nrow, nmaps)
+      local ymaps = math.ceil(nmaps / xmaps)
+      local height = packed:size(3)+padding
+      local width = packed:size(4)+padding
+      grid:resize(packed:size(2), height*ymaps, width*xmaps):fill(packed:max())
+      local k = 1
+      for y = 1,ymaps do
+         for x = 1,xmaps do
+            if k > nmaps then break end
+            grid:narrow(2,(y-1)*height+1+padding/2,height-padding):narrow(3,(x-1)*width+1+padding/2,width-padding):copy(packed[k])
+            k = k + 1
+         end
+      end
+   elseif packed:dim() == 2  or (packed:dim() == 3 and (packed:size(1) == 1 or packed:size(1) == 3)) then
+      -- Rescale range
+      image.minmax{tensor=packed, inplace=true, min=min, max=max, symm=symm, saturate=saturate}
+      return packed
+   elseif packed:dim() == 3 then
+      -- arbitrary number of channels: lay them out on a grid
+      local nmaps = packed:size(1)
+      local xmaps = math.min(nrow, nmaps)
+      local ymaps = math.ceil(nmaps / xmaps)
+      local height = packed:size(2)+padding
+      local width = packed:size(3)+padding
+      grid:resize(height*ymaps, width*xmaps):fill(packed:max())
+      local k = 1
+      for y = 1,ymaps do
+         for x = 1,xmaps do
+            if k > nmaps then break end
+            grid:narrow(1,(y-1)*height+1+padding/2,height-padding):narrow(2,(x-1)*width+1+padding/2,width-padding):copy(packed[k])
+            k = k + 1
+         end
+      end
+   else
+      xerror('packed must be a HxW or KxHxW or Kx3xHxW tensor, or a list of tensors', 'image.toDisplayTensor')
+   end
+
+   if not scaleeach then
+      image.minmax{tensor=grid, inplace=true, min=min, max=max, symm=symm, saturate=saturate}
+   end
+   return grid
+end
+rawset(image,'toDisplayTensor',toDisplayTensor)
+
+----------------------------------------------------------------------
+-- super generic display function
+--
+local function display(...)
+   -- usage
+   local _, input, zoom, min, max, legend, w, ox, oy, scaleeach, gui, offscreen, padding, symm, nrow, saturate = dok.unpack(
+      {...},
+      'image.display',
+      'displays a single image, with optional saturation/zoom',
+      {arg='image', type='torch.Tensor | table', help='image (HxW or KxHxW or Kx3xHxW or list)', req=true},
+      {arg='zoom', type='number', help='display zoom', default=1},
+      {arg='min', type='number', help='lower-bound for range'},
+      {arg='max', type='number', help='upper-bound for range'},
+      {arg='legend', type='string', help='legend', default='image.display'},
+      {arg='win', type='qt window', help='window descriptor'},
+      {arg='x', type='number', help='x offset (only if win is given)', default=0},
+      {arg='y', type='number', help='y offset (only if win is given)', default=0},
+      {arg='scaleeach', type='boolean', help='individual scaling for list of images', default=false},
+      {arg='gui', type='boolean', help='if on, user can zoom in/out (turn off for faster display)',
+       default=true},
+      {arg='offscreen', type='boolean', help='offscreen rendering (to generate images)',
+       default=false},
+      {arg='padding', type='number', help='number of padding pixels between images', default=0},
+      {arg='symmetric',type='boolean',help='if on, images will be displayed using a symmetric dynamic range, useful for drawing filters', default=false},
+      {arg='nrow',type='number',help='number of images per row', default=6},
+      {arg='saturate', type='boolean', help='saturate (useful when min/max are lower than actual min/max', default=true}
+   )
+
+   -- dependencies
+   require 'qt'
+   require 'qttorch'
+   require 'qtwidget'
+   require 'qtuiloader'
+
+   input = image.toDisplayTensor{input=input, padding=padding, nrow=nrow, saturate=saturate,
+                                 scaleeach=scaleeach, min=min, max=max, symmetric=symm}
+
+   -- if 2 dims or 3 dims and 1/3 channels, then we treat it as a single image
+   if input:nDimension() == 2  or (input:nDimension() == 3 and (input:size(1) == 1 or input:size(1) == 3)) then
+      -- Compute width
+      local d = input:nDimension()
+      local x = input:size(d)*zoom
+      local y = input:size(d-1)*zoom
+
+      -- if gui active, then create interactive window (with zoom, clicks and so on)
+      if gui and not w and not offscreen then
+         -- create window context
+         local closure = w
+         local hook_resize, hook_mouse
+         if closure and closure.window and closure.image then
+            closure.image = input
+            closure.refresh(x,y)
+         else
+            closure = {image=input}
+            hook_resize = function(wi,he)
+                             local qtimg = qt.QImage.fromTensor(closure.image)
+                             closure.painter:image(0,0,wi,he,qtimg)
+                             collectgarbage()
+                          end
+            hook_mouse = function(x,y,button)
+                            --local size = closure.window.frame.size:totable()
+                            --size.width =
+                            --size.height =
+                            if button == 'LeftButton' then
+                            elseif button == 'RightButton' then
+                            end
+                            --closure.window.frame.size = qt.QSize(size)
+                         end
+            closure.window, closure.painter = image.window(hook_resize,hook_mouse)
+            closure.refresh = hook_resize
+         end
+         closure.window.size = qt.QSize{width=x,height=y}
+         closure.window.windowTitle = legend
+         closure.window:show()
+         hook_resize(x,y)
+         closure.isclosure = true
+         return closure
+      else
+         if offscreen then
+            w = w or qt.QtLuaPainter(x,y)
+         else
+            w = w or qtwidget.newwindow(x,y,legend)
+         end
+         if w.window and not w.window.visible then
+            -- make sure window is visible
+            w.window.visible = true
+         end
+         if w.isclosure then
+            -- window was created with gui, just update closure
+            local closure = w
+            closure.image = input
+            local size = closure.window.size:totable()
+            closure.window.windowTitle = legend
+            closure.refresh(size.width, size.height)
+         else
+            -- if no gui, create plain window, and blit
+            local qtimg = qt.QImage.fromTensor(input)
+            w:image(ox,oy,x,y,qtimg)
+         end
+      end
+   else
+      xerror('image must be a HxW or KxHxW or Kx3xHxW tensor, or a list of tensors', 'image.display')
+   end
+   -- return painter
+   return w
+end
+rawset(image, 'display', display)
+
+----------------------------------------------------------------------
+-- creates a window context for images
+--
+local function window(hook_resize, hook_mousepress, hook_mousedoublepress)
+   require 'qt'
+   require 'qttorch'
+   require 'qtwidget'
+   require 'qtuiloader'
+   local pathui = paths.concat(fpath(), 'win.ui')
+   local win = qtuiloader.load(pathui)
+   local painter = qt.QtLuaPainter(win.frame)
+   if hook_resize then
+      qt.connect(qt.QtLuaListener(win.frame),
+                 'sigResize(int,int)',
+                 hook_resize)
+   end
+   if hook_mousepress then
+      qt.connect(qt.QtLuaListener(win.frame),
+                 'sigMousePress(int,int,QByteArray,QByteArray,QByteArray)',
+                 hook_mousepress)
+   end
+   if hook_mousedoublepress then
+      qt.connect(qt.QtLuaListener(win.frame),
+                 'sigMouseDoubleClick(int,int,QByteArray,QByteArray,QByteArray)',
+                 hook_mousedoublepress)
+   end
+   local ctrl = false
+   qt.connect(qt.QtLuaListener(win),
+              'sigKeyPress(QString,QByteArray,QByteArray)',
+              function (str, s2)
+                 if s2 and s2 == 'Key_Control' then
+                    ctrl = true
+                 elseif s2 and s2 == 'Key_W' and ctrl then
+                    win:close()
+                 else
+                    ctrl = false
+                 end
+              end)
+   return win,painter
+end
+rawset(image, 'window', window)
+
+----------------------------------------------------------------------
+-- lena is always useful
+--
+local function lena()
+   local fname = 'grace_hopper_512'
+   if xlua.require 'libjpeg' then
+      lena = image.load(paths.concat(fpath(), 'assets', fname .. '.jpg'), 3)
+   elseif xlua.require 'libpng' then
+      lena = image.load(paths.concat(fpath(), 'assets', fname .. '.png'), 3)
+   else
+      dok.error('no bindings available to load images (libjpeg AND libpng missing)', 'image.lena')
+   end
+   return lena
+end
+rawset(image, 'lena', lena)
+
+
+----------------------------------------------------------------------
+-- fabio is a nice gender-balancing variation on lena
+-- See: http://www.claremontmckenna.edu/news/every-picture-tells-a-story/
+-- and first use in http://arxiv.org/abs/1202.6429
+-- along with original file on http://nuit-blanche.blogspot.co.uk/2012/03/let-there-be-only-one-fabio.html
+local function fabio()
+   local fname = 'fabio'
+   if xlua.require 'libjpeg' then
+      lena = image.load(paths.concat(fpath(), 'assets', fname .. '.jpg'), 1)
+   elseif xlua.require 'libpng' then
+      lena = image.load(paths.concat(fpath(), 'assets', fname .. '.png'), 1)
+   else
+      dok.error('no bindings available to load images (libjpeg AND libpng missing)', 'image.fabio')
+   end
+   return lena
+end
+rawset(image, 'fabio', fabio)
+
+
+----------------------------------------------------------------------
+-- image.rgb2yuv(image)
+-- converts a RGB image to YUV
+--
+function image.rgb2yuv(...)
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      print(dok.usage('image.rgb2yuv',
+                      'transforms an image from RGB to YUV', nil,
+                      {type='torch.Tensor', help='input image', req=true},
+                      '',
+                      {type='torch.Tensor', help='output image', req=true},
+                      {type='torch.Tensor', help='input image', req=true}
+                      ))
+      dok.error('missing input', 'image.rgb2yuv')
+   end
+
+   -- resize
+   output = output or input.new()
+   output:resizeAs(input)
+
+   -- input chanels
+   local inputRed = input[1]
+   local inputGreen = input[2]
+   local inputBlue = input[3]
+
+   -- output chanels
+   local outputY = output[1]
+   local outputU = output[2]
+   local outputV = output[3]
+
+   -- convert
+   outputY:zero():add(0.299, inputRed):add(0.587, inputGreen):add(0.114, inputBlue)
+   outputU:zero():add(-0.14713, inputRed):add(-0.28886, inputGreen):add(0.436, inputBlue)
+   outputV:zero():add(0.615, inputRed):add(-0.51499, inputGreen):add(-0.10001, inputBlue)
+
+   -- return YUV image
+   return output
+end
+
+----------------------------------------------------------------------
+-- image.yuv2rgb(image)
+-- converts a YUV image to RGB
+--
+function image.yuv2rgb(...)
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      print(dok.usage('image.yuv2rgb',
+                      'transforms an image from YUV to RGB', nil,
+                      {type='torch.Tensor', help='input image', req=true},
+                      '',
+                      {type='torch.Tensor', help='output image', req=true},
+                      {type='torch.Tensor', help='input image', req=true}
+                      ))
+      dok.error('missing input', 'image.yuv2rgb')
+   end
+
+   -- resize
+   output = output or input.new()
+   output:resizeAs(input)
+
+   -- input chanels
+   local inputY = input[1]
+   local inputU = input[2]
+   local inputV = input[3]
+
+   -- output chanels
+   local outputRed = output[1]
+   local outputGreen = output[2]
+   local outputBlue = output[3]
+
+   -- convert
+   outputRed:copy(inputY):add(1.13983, inputV)
+   outputGreen:copy(inputY):add(-0.39465, inputU):add(-0.58060, inputV)
+   outputBlue:copy(inputY):add(2.03211, inputU)
+
+   -- return RGB image
+   return output
+end
+
+----------------------------------------------------------------------
+-- image.rgb2y(image)
+-- converts a RGB image to Y (discards U/V)
+--
+function image.rgb2y(...)
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      print(dok.usage('image.rgb2y',
+                      'transforms an image from RGB to Y', nil,
+                      {type='torch.Tensor', help='input image', req=true},
+                      '',
+                      {type='torch.Tensor', help='output image', req=true},
+                      {type='torch.Tensor', help='input image', req=true}
+                      ))
+      dok.error('missing input', 'image.rgb2y')
+   end
+
+   -- resize
+   output = output or input.new()
+   output:resize(1, input:size(2), input:size(3))
+
+   -- input chanels
+   local inputRed = input[1]
+   local inputGreen = input[2]
+   local inputBlue = input[3]
+
+   -- output chanels
+   local outputY = output[1]
+
+   -- convert
+   input.image.rgb2y(input, outputY)
+
+   -- return YUV image
+   return output
+end
+
+----------------------------------------------------------------------
+-- image.rgb2hsl(image)
+-- converts an RGB image to HSL
+--
+function image.rgb2hsl(...)
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      print(dok.usage('image.rgb2hsl',
+                      'transforms an image from RGB to HSL', nil,
+                      {type='torch.Tensor', help='input image', req=true},
+                      '',
+                      {type='torch.Tensor', help='output image', req=true},
+                      {type='torch.Tensor', help='input image', req=true}
+                      ))
+      dok.error('missing input', 'image.rgb2hsl')
+   end
+
+   -- resize
+   output = output or input.new()
+   output:resizeAs(input)
+
+   -- compute
+   input.image.rgb2hsl(input,output)
+
+   -- return HSL image
+   return output
+end
+
+----------------------------------------------------------------------
+-- image.hsl2rgb(image)
+-- converts an HSL image to RGB
+--
+function image.hsl2rgb(...)
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      print(dok.usage('image.hsl2rgb',
+                      'transforms an image from HSL to RGB', nil,
+                      {type='torch.Tensor', help='input image', req=true},
+                      '',
+                      {type='torch.Tensor', help='output image', req=true},
+                      {type='torch.Tensor', help='input image', req=true}
+                      ))
+      dok.error('missing input', 'image.hsl2rgb')
+   end
+
+   -- resize
+   output = output or input.new()
+   output:resizeAs(input)
+
+   -- compute
+   input.image.hsl2rgb(input,output)
+
+   -- return HSL image
+   return output
+end
+
+----------------------------------------------------------------------
+-- image.rgb2hsv(image)
+-- converts an RGB image to HSV
+--
+function image.rgb2hsv(...)
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      print(dok.usage('image.rgb2hsv',
+                      'transforms an image from RGB to HSV', nil,
+                      {type='torch.Tensor', help='input image', req=true},
+                      '',
+                      {type='torch.Tensor', help='output image', req=true},
+                      {type='torch.Tensor', help='input image', req=true}
+                      ))
+      dok.error('missing input', 'image.rgb2hsv')
+   end
+
+   -- resize
+   output = output or input.new()
+   output:resizeAs(input)
+
+   -- compute
+   input.image.rgb2hsv(input,output)
+
+   -- return HSV image
+   return output
+end
+
+----------------------------------------------------------------------
+-- image.hsv2rgb(image)
+-- converts an HSV image to RGB
+--
+function image.hsv2rgb(...)
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      print(dok.usage('image.hsv2rgb',
+                      'transforms an image from HSV to RGB', nil,
+                      {type='torch.Tensor', help='input image', req=true},
+                      '',
+                      {type='torch.Tensor', help='output image', req=true},
+                      {type='torch.Tensor', help='input image', req=true}
+                      ))
+      dok.error('missing input', 'image.hsv2rgb')
+   end
+
+   -- resize
+   output = output or input.new()
+   output:resizeAs(input)
+
+   -- compute
+   input.image.hsv2rgb(input,output)
+
+   -- return HSV image
+   return output
+end
+
+----------------------------------------------------------------------
+-- image.rgb2lab(image)
+-- converts an RGB image to LAB
+-- assumes sRGB input in the range [0, 1]
+--
+function image.rgb2lab(...)
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      print(dok.usage('image.rgb2lab',
+                      'transforms an image from sRGB to LAB', nil,
+                      {type='torch.Tensor', help='input image', req=true},
+                      '',
+                      {type='torch.Tensor', help='output image', req=true},
+                      {type='torch.Tensor', help='input image', req=true}
+                      ))
+      dok.error('missing input', 'image.rgb2lab')
+   end
+
+   -- resize
+   output = output or input.new()
+   output:resizeAs(input)
+
+   -- compute
+   input.image.rgb2lab(input,output)
+
+   -- return LAB image
+   return output
+end
+
+----------------------------------------------------------------------
+-- image.lab2rgb(image)
+-- converts an LAB image to RGB (assumes sRGB)
+--
+function image.lab2rgb(...)
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      print(dok.usage('image.lab2rgb',
+                      'transforms an image from LAB to RGB', nil,
+                      {type='torch.Tensor', help='input image', req=true},
+                      '',
+                      {type='torch.Tensor', help='output image', req=true},
+                      {type='torch.Tensor', help='input image', req=true}
+                      ))
+      dok.error('missing input', 'image.lab2rgb')
+   end
+
+   -- resize
+   output = output or input.new()
+   output:resizeAs(input)
+
+   -- compute
+   input.image.lab2rgb(input,output)
+
+   -- return sRGB image
+   return output
+end
+
+
+----------------------------------------------------------------------
+-- image.rgb2nrgb(image)
+-- converts an RGB image to normalized-RGB
+--
+function image.rgb2nrgb(...)
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      print(dok.usage('image.rgb2nrgb',
+                      'transforms an image from RGB to normalized RGB', nil,
+                      {type='torch.Tensor', help='input image', req=true},
+                      '',
+                      {type='torch.Tensor', help='output image', req=true},
+                      {type='torch.Tensor', help='input image', req=true}
+                      ))
+      dok.error('missing input', 'image.rgb2nrgb')
+   end
+
+   -- resize
+   output = output or input.new()
+   output:resizeAs(input)
+   local sum = input.new()
+   sum:resize(input:size(2), input:size(3))
+
+   -- compute sum and normalize
+   sum:copy(input[1]):add(input[2]):add(input[3]):add(1e-6)
+   output:copy(input)
+   output[1]:cdiv(sum)
+   output[2]:cdiv(sum)
+   output[3]:cdiv(sum)
+
+   -- return HSV image
+   return output
+end
+
+----------------------------------------------------------------------
+-- image.y2jet(image)
+-- Converts a L-levels (1-L) greyscale image into a jet heat-map
+--
+function image.y2jet(...)
+
+   -- arg check
+   local output,input
+   local args = {...}
+   if select('#',...) == 2 then
+      output = args[1]
+      input = args[2]
+   elseif select('#',...) == 1 then
+      input = args[1]
+   else
+      error('Invalid input for image.y2jet()')
+   end
+
+   -- accept 3D grayscale
+   if input:dim() == 3 and input:size(1) == 1 then
+      input = input.new(input):resize(input:size(2), input:size(3))
+   end
+
+   -- accept 1D greyscale
+   if input:dim() == 1  then
+      input = input.new(input):resize(1, input:size(1))
+   end
+
+   local output = output or input.new()
+   local L = input:max()
+
+   local colorMap = image.jetColormap(L)
+   if torch.type(input) == 'torch.ByteTensor' then
+     colorMap = colorMap:mul(255):round()
+     colorMap[torch.lt(colorMap, 0)] = 0
+     colorMap[torch.gt(colorMap, 255)] = 255
+     colorMap = colorMap:byte()
+   else
+     colorMap = colorMap:typeAs(input)
+   end
+
+   input.image.colorize(output, input-1, colorMap)
+
+   return output
+end
+
+-- color, bgcolor, size, wrap, inplace
+function image.drawText(src, text, x, y, opts)
+    opts = opts or {}
+    assert(torch.isTensor(src) and src:dim() == 3 and src:size(1) == 3,
+	   "input image has to be a 3D tensor of shape 3 x H x W ")
+    local out = src
+    if not opts.inplace then
+	out = src:clone()
+    end
+    if not text or text:gsub("%s*$", "") == '' then return out end
+    x = x or 1
+    y = y or 1
+    local color = opts.color or {255, 0, 0} -- red default
+    local bgcolor = opts.bg or {-1, -1, -1} -- no bgcolor default
+    local size = opts.size or 1
+    if opts.wrap == nil then opts.wrap = true end -- to wrap long lines or not
+    src.image.text(out, text, x, y, size,
+		   color[1], color[2], color[3],
+		   bgcolor[1], bgcolor[2], bgcolor[3],
+		   opts.wrap and 1 or 0)
+    return out
+end
+
+----------------------------------------------------------------------
+--- Draw a rectangle on the image
+--
+-- color, bgcolor, size, wrap, inplace
+function image.drawRect(src, x1, y1, x2, y2, opts)
+   opts = opts or {}
+   assert(torch.isTensor(src) and src:dim() == 3 and src:size(1) == 3,
+    "input image has to be a 3D tensor of shape 3 x H x W ")
+   local out = src
+   if not opts.inplace then
+      out = src:clone()
+   end
+   if not (x1 and x2 and y1 and y2) then return out end
+   local color = opts.color or {255, 0, 0} -- red default
+   local lineWidth = opts.lineWidth or 1
+
+   src.image.drawRect(out, x1, y1, x2, y2, lineWidth, color[1], color[2], color[3])
+   return out
+end
+
+
+----------------------------------------------------------------------
+--- Returns a gaussian kernel.
+--
+function image.gaussian(...)
+   -- process args
+   local _, size, sigma, amplitude, normalize, width, height,
+      sigma_horz, sigma_vert, mean_horz, mean_vert, tensor = dok.unpack(
+      {...},
+      'image.gaussian',
+      'returns a 2D gaussian kernel',
+      {arg='size', type='number', help='kernel size (size x size)', default=3},
+      {arg='sigma', type='number', help='sigma (horizontal and vertical)', default=0.25},
+      {arg='amplitude', type='number', help='amplitute of the gaussian (max value)', default=1},
+      {arg='normalize', type='number', help='normalize kernel (exc Amplitude)', default=false},
+      {arg='width', type='number', help='kernel width', defaulta='size'},
+      {arg='height', type='number', help='kernel height', defaulta='size'},
+      {arg='sigma_horz', type='number', help='horizontal sigma', defaulta='sigma'},
+      {arg='sigma_vert', type='number', help='vertical sigma', defaulta='sigma'},
+      {arg='mean_horz', type='number', help='horizontal mean', default=0.5},
+      {arg='mean_vert', type='number', help='vertical mean', default=0.5},
+      {arg='tensor', type='torch.Tensor', help='result tensor (height/width are ignored)'}
+   )
+   if tensor then
+      assert(tensor:dim() == 2, "expecting 2D tensor")
+      assert(tensor:nElement() > 0, "expecting non-empty tensor")
+   end
+   -- generate kernel
+   local gauss = tensor or torch.Tensor(height, width)
+   gauss.image.gaussian(gauss, amplitude, normalize, sigma_horz, sigma_vert, mean_horz, mean_vert)
+
+   return gauss
+end
+
+function image.gaussian1D(...)
+   -- process args
+   local _, size, sigma, amplitude, normalize, mean, tensor
+      = dok.unpack(
+      {...},
+      'image.gaussian1D',
+      'returns a 1D gaussian kernel',
+      {arg='size', type='number', help='size the kernel', default=3},
+      {arg='sigma', type='number', help='Sigma', default=0.25},
+      {arg='amplitude', type='number', help='Amplitute of the gaussian (max value)', default=1},
+      {arg='normalize', type='number', help='Normalize kernel (exc Amplitude)', default=false},
+      {arg='mean', type='number', help='Mean', default=0.5},
+      {arg='tensor', type='torch.Tensor', help='result tensor (size is ignored)'}
+   )
+
+   -- local vars
+   if tensor then
+      assert(tensor:dim() == 1, "expecting 1D tensor")
+      assert(tensor:nElement() > 0, "expecting non-empty tensor")
+      size = tensor:size(1)
+   end
+   local center = mean * size + 0.5
+
+   -- generate kernel
+   local gauss = tensor or torch.Tensor(size)
+   for i=1,size do
+      gauss[i] = amplitude * math.exp(-(math.pow((i-center)
+                                              /(sigma*size),2)/2))
+   end
+   if normalize then
+      gauss:div(gauss:sum())
+   end
+   return gauss
+end
+
+----------------------------------------------------------------------
+--- Returns a Laplacian kernel.
+--
+function image.laplacian(...)
+   -- process args
+   local _, size, sigma, amplitude, normalize,
+   width, height, sigma_horz, sigma_vert, mean_horz, mean_vert = dok.unpack(
+      {...},
+      'image.laplacian',
+      'returns a 2D Laplacian kernel',
+      {arg='size', type='number', help='kernel size (size x size)', default=3},
+      {arg='sigma', type='number', help='sigma (horizontal and vertical)', default=0.1},
+      {arg='amplitude', type='number', help='amplitute of the Laplacian (max value)', default=1},
+      {arg='normalize', type='number', help='normalize kernel (exc Amplitude)', default=false},
+      {arg='width', type='number', help='kernel width', defaulta='size'},
+      {arg='height', type='number', help='kernel height', defaulta='size'},
+      {arg='sigma_horz', type='number', help='horizontal sigma', defaulta='sigma'},
+      {arg='sigma_vert', type='number', help='vertical sigma', defaulta='sigma'},
+      {arg='mean_horz', type='number', help='horizontal mean', default=0.5},
+      {arg='mean_vert', type='number', help='vertical mean', default=0.5}
+   )
+
+   -- local vars
+   local center_x = mean_horz * width + 0.5
+   local center_y = mean_vert * height + 0.5
+
+   -- generate kernel
+   local logauss = torch.Tensor(height,width)
+   for i=1,height do
+      for j=1,width do
+         local xsq = math.pow((i-center_x)/(sigma_horz*width),2)/2
+         local ysq = math.pow((j-center_y)/(sigma_vert*height),2)/2
+         local derivCoef = 1 - (xsq + ysq)
+         logauss[i][j] = derivCoef * amplitude * math.exp(-(xsq + ysq))
+      end
+   end
+   if normalize then
+      logauss:div(logauss:sum())
+   end
+   return logauss
+end
+
+----------------------------------------------------------------------
+--- Gaussian Pyramid
+--
+function image.gaussianpyramid(...)
+   local dst,src,scales
+   local args = {...}
+   if select('#',...) == 3 then
+      dst = args[1]
+      src = args[2]
+      scales = args[3]
+   elseif select('#',...) == 2 then
+      dst = {}
+      src = args[1]
+      scales = args[2]
+   else
+      print(dok.usage('image.gaussianpyramid',
+                       'construct a Gaussian pyramid from an image', nil,
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='table', help='list of scales', req=true},
+                       '',
+                       {type='table', help='destination (list of Tensors)', req=true},
+                       {type='torch.Tensor', help='input image', req=true},
+                       {type='table', help='list of scales', req=true}))
+      dok.error('incorrect arguments', 'image.gaussianpyramid')
+   end
+   if src:nDimension() == 2 then
+      for i = 1,#scales do
+         dst[i] = dst[i] or src.new()
+         dst[i]:resize(src:size(1)*scales[i], src:size(2)*scales[i])
+      end
+   elseif src:nDimension() == 3 then
+      for i = 1,#scales do
+         dst[i] = dst[i] or src.new()
+         dst[i]:resize(src:size(1), src:size(2)*scales[i], src:size(3)*scales[i])
+      end
+   else
+      dok.error('src image must be 2D or 3D', 'image.gaussianpyramid')
+   end
+   local k = image.gaussian{width=3, normalize=true}:typeAs(src)
+   local tmp = src
+   for i = 1,#scales do
+      if scales[i] == 1 then
+         dst[i][{}] = tmp
+      else
+         image.scale(dst[i], tmp, 'simple')
+      end
+      tmp = image.convolve(dst[i], k, 'same')
+   end
+   return dst
+end
+
+----------------------------------------------------------------------
+--- Creates an optimally-spaced RGB color mapping
+--
+function image.colormap(nbColor)
+   -- note: the best way of obtaining optimally-spaced
+   -- colors is to generate them around the HSV wheel,
+   -- by varying the Hue component
+   local map = torch.Tensor(nbColor,3)
+   local huef = 0
+   local satf = 0
+   for i = 1,nbColor do
+      -- HSL
+      local hue = math.fmod(huef,360)
+      local sat = math.fmod(satf,0.7) + 0.3
+      local light = 0.5
+      huef = huef + 39
+      satf = satf + 1/9
+      -- HSL -> RGB
+      local c = (1 - math.abs(2*light-1))*sat
+      local huep = hue/60
+      local x = c*(1-math.abs(math.fmod(huep,2)-1))
+      local redp
+      local greenp
+      local bluep
+      if huep < 1 then
+         redp = c; greenp = x; bluep = 0
+      elseif huep < 2 then
+         redp = x; greenp = c; bluep = 0
+      elseif huep < 3 then
+         redp = 0; greenp = c; bluep = x
+      elseif huep < 4 then
+         redp = 0; greenp = x; bluep = c
+      elseif huep < 5 then
+         redp = x; greenp = 0; bluep = c
+      else
+         redp = c; greenp = 0; bluep = x
+      end
+      local m = light - c/2
+      map[i][1] = redp + m
+      map[i][2] = greenp + m
+      map[i][3] = bluep + m
+   end
+   return map
+end
+
+----------------------------------------------------------------------
+--- Creates a jet color mapping - Inspired by http://www.metastine.com/?p=7
+--
+function image.jetColormap(nbColor)
+   local map = torch.Tensor(nbColor,3)
+   for i = 1,nbColor do
+      local fourValue = 4 * i / nbColor
+      map[i][1] = math.max(math.min(fourValue - 1.5, -fourValue + 4.5, 1),0)
+      map[i][2] = math.max(math.min(fourValue -  .5, -fourValue + 3.5, 1),0)
+      map[i][3] = math.max(math.min(fourValue +  .5, -fourValue + 2.5, 1),0)
+   end
+   return map
+end
+
+
+
+------------------------------------------------------------------------
+--- Local contrast normalization of an image
+--
+-- local contrast normalization on a given image tensor using kernel ker.
+-- If kernel is not given, then a default 9x9 gaussian will be used
+function image.lcn(im,ker)
+
+   ker = ker or image.gaussian({size=9,sigma=1.591/9,normalize=true})
+   local im = im:clone():type('torch.DoubleTensor')
+   if not(im:dim() == 2 or (im:dim() == 3 and im:size(1) == 1)) then
+     error('grayscale image expected')
+   end
+   if im:dim() == 3 then
+      im = im[1]
+   end
+   local mn = im:mean()
+   local sd = im:std()
+   -- print(ker)
+
+   -- 1. subtract the mean and divide by the standard deviation
+   im:add(-mn)
+   im:div(sd)
+
+   -- 2. calculate local mean and std and normalize each pixel
+
+   -- mean
+   local lmn = torch.conv2(im, ker)
+   -- variance
+   local imsq = im:clone():cmul(im)
+   local lmnsq = torch.conv2(imsq, ker)
+   local lvar = lmn:clone():cmul(lmn)
+   lvar:add(-1,lmnsq):mul(-1)
+   -- avoid numerical errors
+   lvar:apply(function(x) if x < 0 then return 0 end end)
+   -- standard deviation
+   local lstd  = lvar:sqrt()
+   lstd:apply(function (x) if x < 1 then return 1 end end)
+
+   -- apply normalization
+   local shifti = math.floor(ker:size(1)/2)+1
+   local shiftj = math.floor(ker:size(2)/2)+1
+   --print(shifti,shiftj,lstd:size(),im:size())
+   local dim = im:narrow(1,shifti,lstd:size(1)):narrow(2,shiftj,lstd:size(2)):clone()
+   dim:add(-1,lmn)
+   dim:cdiv(lstd)
+   return dim:clone()
+
+end
+
+------------------------------------------------------------------------
+--- Morphological erosion
+function image.erode(im,kern,pad)
+   if not im then
+      print(dok.usage("image.erode",
+		            "Morphological erosion for odd dimension kernels",nil,
+			    {type="torch.Tensor",help="binary image of 0 and 1",req=true},
+			    {type="torch.Tensor",help="morphological kernel of 0 and 1; default is 3x3"},
+			    {type="number",help="value to assume outside boundary; default is 1"}))
+      dok.error("missing image","image.erode")
+   end
+   -- Default kernel is 3x3
+   local kern = kern or torch.ones(3,3):typeAs(im)
+   local pad = pad or 1
+   -- Padding the image
+   local hpad = kern:size(1)/2-0.5
+   local wpad = kern:size(2)/2-0.5
+   local padded = torch.zeros(im:size(1)+2*hpad,im:size(2)+2*wpad):fill(pad):typeAs(im)
+   padded[{{hpad+1,im:size(1)+hpad},{wpad+1,im:size(2)+wpad}}]:copy(im)
+   -- Do convolution
+   local n = kern:sum()
+   local conv = padded:conv2(kern)
+   -- Do erosion
+   return conv:eq(n):typeAs(im)
+end
+
+------------------------------------------------------------------------
+--- Morphological dilation
+function image.dilate(im,kern,pad)
+   if not im then
+      print(dok.usage("image.dilate",
+		            "Morphological dilation for odd dimension kernels",nil,
+			    {type="torch.Tensor",help="binary image of 0 and 1",req=true},
+			    {type="torch.Tensor",help="morphological kernel of 0 and 1; default is 3x3"},
+			    {type="number",help="value to assume outside boundary; default is 0"}))
+      dok.error("missing image","image.dilate")
+   end
+   -- Default kernel is 3x3
+   local kern = kern or torch.ones(3,3):typeAs(im)
+   kern = image.hflip(image.vflip(kern))
+   local pad = pad or 0
+   -- Padding the image
+   local hpad = kern:size(1)/2-0.5
+   local wpad = kern:size(2)/2-0.5
+   local padded = torch.zeros(im:size(1)+2*hpad,im:size(2)+2*wpad):fill(pad):typeAs(im)
+   padded[{{hpad+1,im:size(1)+hpad},{wpad+1,im:size(2)+wpad}}]:copy(im)
+   -- Do convolution
+   local conv = padded:conv2(kern)
+   -- Do erosion
+   return conv:gt(0):typeAs(im)
+end
+
+return image
diff --git a/jpeg.c b/jpeg.c
new file mode 100644
index 0000000..ae4ce14
--- /dev/null
+++ b/jpeg.c
@@ -0,0 +1,68 @@
+
+#include <TH.h>
+#include <luaT.h>
+#include <jpeglib.h>
+#include <setjmp.h>
+
+#if LUA_VERSION_NUM >= 503
+#define luaL_checkint(L,n)      ((int)luaL_checkinteger(L, (n)))
+#endif
+
+
+#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
+#define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor)
+#define libjpeg_(NAME) TH_CONCAT_3(libjpeg_, Real, NAME)
+
+static void
+jpeg_mem_src_dummy(j_decompress_ptr c, unsigned char *ibuf, unsigned long isiz)
+{
+}
+
+static void
+jpeg_mem_dest_dummy(j_compress_ptr c, unsigned char **obuf, unsigned long *osiz)
+{
+}
+
+#define JPEG_MEM_SRC_NOT_DEF  "`jpeg_mem_src` is not defined."
+#define JPEG_MEM_DEST_NOT_DEF "`jpeg_mem_dest` is not defined."
+#define JPEG_REQUIRED_VERSION " Use libjpeg v8+, libjpeg-turbo 1.3+ or build" \
+                              " libjpeg-turbo with `--with-mem-srcdst`."
+
+#define JPEG_MEM_SRC_ERR_MSG  JPEG_MEM_SRC_NOT_DEF JPEG_REQUIRED_VERSION
+#define JPEG_MEM_DEST_ERR_MSG JPEG_MEM_DEST_NOT_DEF JPEG_REQUIRED_VERSION
+
+#if !defined(HAVE_JPEG_MEM_SRC)
+#define jpeg_mem_src jpeg_mem_src_dummy
+#endif
+
+#if !defined(HAVE_JPEG_MEM_DEST)
+#define jpeg_mem_dest jpeg_mem_dest_dummy
+#endif
+
+#include "generic/jpeg.c"
+#include "THGenerateAllTypes.h"
+
+DLL_EXPORT int luaopen_libjpeg(lua_State *L)
+{
+  libjpeg_FloatMain_init(L);
+  libjpeg_DoubleMain_init(L);
+  libjpeg_ByteMain_init(L);
+
+  lua_newtable(L);
+  lua_pushvalue(L, -1);
+  lua_setglobal(L, "libjpeg");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, libjpeg_DoubleMain__, 0);
+  lua_setfield(L, -2, "double");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, libjpeg_FloatMain__, 0);
+  lua_setfield(L, -2, "float");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, libjpeg_ByteMain__, 0);
+  lua_setfield(L, -2, "byte");
+
+  return 1;
+}
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..97c8026
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,14 @@
+site_name: image
+theme : simplex
+repo_url : https://github.com/torch/image
+use_directory_urls : false
+markdown_extensions: [extra]
+docs_dir : doc
+pages:
+- [index.md, Image]
+- [saveload.md, Saving and Loading]
+- [simpletransform.md, Simple Transformations]
+- [paramtransform.md, Parameterized Transformations]
+- [gui.md, Graphical User Interfaces]
+- [colorspace.md, Color Space Conversions]
+- [tensorconstruct.md, Tensor Constructors]
diff --git a/png.c b/png.c
new file mode 100644
index 0000000..e69eee0
--- /dev/null
+++ b/png.c
@@ -0,0 +1,87 @@
+
+#include <TH.h>
+#include <luaT.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#if LUA_VERSION_NUM >= 503
+#define luaL_checkint(L,n)      ((int)luaL_checkinteger(L, (n)))
+#endif
+
+#define PNG_DEBUG 3
+#include <png.h>
+
+#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
+#define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor)
+#define libpng_(NAME) TH_CONCAT_3(libpng_, Real, NAME)
+
+/*
+ * Bookkeeping struct for reading png data from memory
+ */
+typedef struct {
+  unsigned char* buffer;
+  png_size_t offset;
+  png_size_t length;
+} libpng_inmem_buffer;
+
+/*
+ * Call back for reading png data from memory
+ */
+static void
+libpng_userReadData(png_structp pngPtrSrc, png_bytep dest, png_size_t length)
+{
+  libpng_inmem_buffer* src = png_get_io_ptr(pngPtrSrc);
+  assert(src->offset+length <= src->length);
+  memcpy(dest, src->buffer + src->offset, length);
+  src->offset += length;
+}
+
+/*
+ * Error message wrapper (single member struct to preserve `str` size info)
+ */
+typedef struct {
+  char str[256];
+} libpng_errmsg;
+
+/*
+ * Custom error handling function (see `png_set_error_fn`)
+ */
+static void
+libpng_error_fn(png_structp png_ptr, png_const_charp error_msg)
+{
+  libpng_errmsg *errmsg = png_get_error_ptr(png_ptr);
+  int max = sizeof(errmsg->str) - 1;
+  strncpy(errmsg->str, error_msg, max);
+  errmsg->str[max] = '\0';
+  longjmp(png_jmpbuf(png_ptr), 1);
+}
+
+#include "generic/png.c"
+#include "THGenerateAllTypes.h"
+
+DLL_EXPORT int luaopen_libpng(lua_State *L)
+{
+  libpng_FloatMain_init(L);
+  libpng_DoubleMain_init(L);
+  libpng_ByteMain_init(L);
+
+  lua_newtable(L);
+  lua_pushvalue(L, -1);
+  lua_setglobal(L, "libpng");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, libpng_DoubleMain__, 0);
+  lua_setfield(L, -2, "double");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, libpng_FloatMain__, 0);
+  lua_setfield(L, -2, "float");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, libpng_ByteMain__, 0);
+  lua_setfield(L, -2, "byte");
+
+  return 1;
+}
diff --git a/ppm.c b/ppm.c
new file mode 100644
index 0000000..0303ffb
--- /dev/null
+++ b/ppm.c
@@ -0,0 +1,70 @@
+
+#include <TH.h>
+#include <luaT.h>
+
+#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
+#define torch_Tensor TH_CONCAT_STRING_3(torch., Real, Tensor)
+#define libppm_(NAME) TH_CONCAT_3(libppm_, Real, NAME)
+
+/* Get the next character in the file, skipping over comments, which
+ * start with a # and continue to the end of the line. 
+ */
+static char ppm_getc(FILE *fp)
+{
+   char ch;
+
+   ch = (char)getc(fp);
+   if (ch == '#') {
+      do {
+         ch = (char)getc(fp);
+      } while (ch != '\n' && ch != '\r');
+   }
+
+   return ch;
+}
+
+/* Get the next integer, skipping whitespace and comments. */
+static long ppm_get_long(FILE *fp)
+{
+   char ch;
+   long i = 0;
+
+   do {
+      ch = ppm_getc(fp);
+   } while (ch == ' ' || ch == ',' || ch == '\t' || ch == '\n' || ch == '\r');
+
+   do {
+      i = i * 10 + ch - '0';
+      ch = ppm_getc(fp);
+   } while (ch >= '0' && ch <= '9');
+
+   return i;
+}
+
+#include "generic/ppm.c"
+#include "THGenerateAllTypes.h"
+
+DLL_EXPORT int luaopen_libppm(lua_State *L)
+{
+  libppm_FloatMain_init(L);
+  libppm_DoubleMain_init(L);
+  libppm_ByteMain_init(L);
+
+  lua_newtable(L);
+  lua_pushvalue(L, -1);
+  lua_setglobal(L, "libppm");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, libppm_DoubleMain__, 0);
+  lua_setfield(L, -2, "double");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, libppm_FloatMain__, 0);
+  lua_setfield(L, -2, "float");
+
+  lua_newtable(L);
+  luaT_setfuncs(L, libppm_ByteMain__, 0);
+  lua_setfield(L, -2, "byte");
+
+  return 1;
+}
diff --git a/test/test.lua b/test/test.lua
new file mode 100644
index 0000000..80299e9
--- /dev/null
+++ b/test/test.lua
@@ -0,0 +1,687 @@
+local test = torch.TestSuite()
+local precision = 1e-4
+local precision_mean = 1e-3
+local precision_std = 1e-1
+
+
+local function getTestImagePath(name)
+  return paths.concat(sys.fpath(), 'assets', name)
+end
+
+
+local function assertByteTensorEq(actual, expected, rcond, msg)
+  rcond = rcond or 1e-5
+  tester:assertTensorEq(actual:double(), expected:double(), rcond, msg)
+end
+
+
+local function toByteTensor(x)
+  local y = torch.round(x)
+  y[torch.le(x, 0)] = 0
+  y[torch.ge(x, 255)] = 255
+  return y:byte()
+end
+
+
+local function toByteImage(x)
+  return toByteTensor(torch.mul(x, 255))
+end
+
+
+local function testFunctionOnByteTensor(f, msg)
+  local lena = image.lena():float()
+  local expected = toByteImage(f(lena))
+  local actual = f(toByteImage(lena))
+  assertByteTensorEq(actual, expected, nil, msg)
+end
+
+
+local unpack = unpack and unpack or table.unpack -- lua52 compatibility
+
+
+----------------------------------------------------------------------
+-- Flip test
+--
+function test.FlipAgainstHFlip()
+  for ndims = 1, 5 do
+    for flip_dim = 1, ndims do
+      local sz = {}
+      for i = 1, ndims do
+        sz[i] = math.random(5,10)
+      end
+
+      local input = torch.rand(unpack(sz))
+      local output = image.flip(input, flip_dim)
+
+      -- Now perform the same operation using HFLIP
+      local input_tran = input
+      if (flip_dim < ndims) then
+        -- First permute the flip dimension to X dim
+        input_tran = input:transpose(flip_dim, ndims):contiguous()
+      end
+      -- Now reshape it to 3D
+      local original_hflip_sz = input_tran:size()
+      if ndims == 1 then
+        input_tran:resize(1, original_hflip_sz[1])
+      end
+      if ndims > 3 then
+        sz1 = 1
+        for i = 1, ndims - 2 do
+          sz1 = sz1 * original_hflip_sz[i]
+        end
+        input_tran:resize(sz1, original_hflip_sz[input_tran:dim()-1],
+          original_hflip_sz[input_tran:dim()])
+      end
+
+      local output_hflip = image.hflip(input_tran)
+
+      -- Put it back to Ndim
+      output_hflip:resize(original_hflip_sz)
+
+      if (flip_dim < ndims) then
+        -- permute bacx the flip dimension
+        output_hflip = output_hflip:transpose(flip_dim, ndims):contiguous()
+      end
+
+      local err = output_hflip - output
+      tester:asserteq(err:abs():max(), 0, 'error - bad flip! (ndims='..
+        ndims..',flip_dim='..flip_dim..')')
+    end
+  end
+end
+
+----------------------------------------------------------------------
+-- Gaussian tests
+--
+-- The old gaussian function, commit: 71670e1dcfcfe040aba5403c800a0d316987c2ed
+local function naive_gaussian(...)
+   -- process args
+   local _, size, sigma, amplitude, normalize,
+   width, height, sigma_horz, sigma_vert, mean_horz, mean_vert = dok.unpack(
+      {...},
+      'image.gaussian',
+      'returns a 2D gaussian kernel',
+      {arg='size', type='number', help='kernel size (size x size)', default=3},
+      {arg='sigma', type='number', help='sigma (horizontal and vertical)', default=0.25},
+      {arg='amplitude', type='number', help='amplitute of the gaussian (max value)', default=1},
+      {arg='normalize', type='number', help='normalize kernel (exc Amplitude)', default=false},
+      {arg='width', type='number', help='kernel width', defaulta='size'},
+      {arg='height', type='number', help='kernel height', defaulta='size'},
+      {arg='sigma_horz', type='number', help='horizontal sigma', defaulta='sigma'},
+      {arg='sigma_vert', type='number', help='vertical sigma', defaulta='sigma'},
+      {arg='mean_horz', type='number', help='horizontal mean', default=0.5},
+      {arg='mean_vert', type='number', help='vertical mean', default=0.5}
+   )
+
+   -- local vars
+   local center_x = mean_horz * width + 0.5
+   local center_y = mean_vert * height + 0.5
+
+   -- generate kernel
+   local gauss = torch.Tensor(height, width)
+   for i=1,height do
+      for j=1,width do
+         gauss[i][j] = amplitude * math.exp(-(math.pow((j-center_x)
+                                                    /(sigma_horz*width),2)/2
+                                           + math.pow((i-center_y)
+                                                   /(sigma_vert*height),2)/2))
+      end
+   end
+   if normalize then
+      gauss:div(gauss:sum())
+   end
+   return gauss
+end
+
+function test.gaussian()
+   local sigma_horz = 0.1 + math.random() * 0.3;  -- [0.1, 0.4]
+   local sigma_vert = 0.1 + math.random() * 0.3;  -- [0.1, 0.4]
+   local mean_horz = 0.1 + math.random() * 0.8;  -- [0.1, 0.9]
+   local mean_vert = 0.1 + math.random() * 0.8;  -- [0.1, 0.9]
+   local width = 640
+   local height = 480
+   local amplitude = 10
+
+   for _, normalize in pairs{true, false} do
+      im1 = image.gaussian{amplitude=amplitude,
+                        normalize=normalize,
+                        width=width,
+                        height=height,
+                        sigma_horz=sigma_horz,
+                        sigma_vert=sigma_vert,
+                        mean_horz=mean_horz,
+                        mean_vert=mean_vert}
+
+      im2 = naive_gaussian{amplitude=amplitude,
+                  normalize=normalize,
+                  width=width,
+                  height=height,
+                  sigma_horz=sigma_horz,
+                  sigma_vert=sigma_vert,
+                  mean_horz=mean_horz,
+                  mean_vert=mean_vert}
+
+      tester:assertlt(im1:add(-1, im2):sum(), precision, "Incorrect gaussian")
+   end
+end
+
+
+function test.byteGaussian()
+  local expected = toByteTensor(image.gaussian{
+      amplitude = 1000,
+      tensor = torch.FloatTensor(5, 5),
+  })
+  local actual = image.gaussian{
+      amplitude = 1000,
+      tensor = torch.ByteTensor(5, 5),
+  }
+  assertByteTensorEq(actual, expected)
+end
+
+
+----------------------------------------------------------------------
+-- Gaussian pyramid test
+--
+function test.gaussianpyramid()
+  -- Char, Short and Int tensors not supported.
+  types = {
+      'torch.ByteTensor',
+      'torch.FloatTensor',
+      'torch.DoubleTensor'
+  }
+  for _, type in ipairs(types) do
+    local output = unpack(image.gaussianpyramid(torch.rand(8, 8):type(type), {0.5}))
+    tester:assert(output:type() == type, 'Type ' .. type .. ' produces a different output.')
+  end
+end
+
+----------------------------------------------------------------------
+-- Scale test
+--
+local function outerProduct(x)
+  x = torch.Tensor(x)
+  return torch.ger(x, x)
+end
+
+
+function test.bilinearUpscale()
+  local im = outerProduct{1, 2, 4, 2}
+  local expected = outerProduct{1, 1.5, 2, 3, 4, 3, 2}
+  local actual = image.scale(im, expected:size(2), expected:size(1), 'bilinear')
+  tester:assertTensorEq(actual, expected, 1e-5)
+end
+
+
+function test.bilinearDownscale()
+  local im = outerProduct{1, 2, 4, 2}
+  local expected = outerProduct{1.25, 3, 2.5}
+  local actual = image.scale(im, expected:size(2), expected:size(1), 'bilinear')
+  tester:assertTensorEq(actual, expected, 1e-5)
+end
+
+
+function test.bicubicUpscale()
+  local im = outerProduct{1, 2, 4, 2}
+  local expected = outerProduct{1, 1.4375, 2, 3.1875, 4, 3.25, 2}
+  local actual = image.scale(im, expected:size(2), expected:size(1), 'bicubic')
+  tester:assertTensorEq(actual, expected, 1e-5)
+end
+
+
+function test.bicubicDownscale()
+  local im = outerProduct{1, 2, 4, 2}
+  local expected = outerProduct{1, 3.1875, 2}
+  local actual = image.scale(im, expected:size(2), expected:size(1), 'bicubic')
+  tester:assertTensorEq(actual, expected, 1e-5)
+end
+
+
+function test.bicubicUpscale_ByteTensor()
+  local im = torch.ByteTensor{{0, 1, 32}}
+  local expected = torch.ByteTensor{{0, 0, 9, 32}}
+  local actual = image.scale(im, expected:size(2), expected:size(1), 'bicubic')
+  assertByteTensorEq(actual, expected)
+end
+
+
+function test.bilinearUpscale_ByteTensor()
+  local im = torch.ByteTensor{{1, 2},
+                              {2, 3}}
+  local expected = torch.ByteTensor{{1, 2, 2},
+                                    {2, 3, 3},
+                                    {2, 3, 3}}
+  local actual = image.scale(im, expected:size(2), expected:size(1))
+  assertByteTensorEq(actual, expected)
+end
+
+
+----------------------------------------------------------------------
+-- Scale test
+--
+local flip_tests = {}
+function flip_tests.test_transformation_largeByteImage(flip)
+    local x_real = image.fabio():double():mul(255)
+    local x_byte = x_real:clone():byte()
+
+    assert(x_byte:size(1) > 256 and x_byte:size(2) > 256, 'Tricky case only occurs for images larger than 256 px, pick another example')
+
+    local f_real, f_byte
+    f_real = image[flip](x_real)
+    f_byte = image[flip](x_byte)
+    assertByteTensorEq(f_real:byte(), f_byte, 1e-16,
+        flip .. ':  result for double and byte images do not match')
+end
+
+function flip_tests.test_inplace(flip)
+    local im = image.lena()
+    local not_inplace = image[flip](im)
+    local in_place = im:clone()
+    image[flip](in_place, in_place)
+    tester:assertTensorEq(in_place, not_inplace, 1e-16, flip .. ': result in-place does not match result not in-place')
+end
+
+for _, flip in pairs{'vflip', 'hflip'} do
+    for name, flip_test in pairs(flip_tests) do
+        test[name .. '_' .. flip] = function() return flip_test(flip) end
+    end
+end
+
+function test.test_vflip_simple()
+    local im_even = torch.Tensor{{1,2}, {3, 4}}
+    local expected_even = torch.Tensor{{3, 4}, {1, 2}}
+    local x_even = image.vflip(im_even)
+    tester:assertTensorEq(expected_even, x_even, 1e-16, 'vflip: fails on even size')
+    -- test inplace
+    image.vflip(im_even, im_even)
+    tester:assertTensorEq(expected_even, im_even, 1e-16, 'vflip: fails on even size in place')
+
+    local im_odd = torch.Tensor{{1,2}, {3, 4}, {5, 6}}
+    local expected_odd = torch.Tensor{{5,6}, {3, 4}, {1, 2}}
+    local x_odd = image.vflip(im_odd)
+    tester:assertTensorEq(expected_odd, x_odd, 1e-16, 'vflip: fails on odd size')
+    -- test inplace
+    image.vflip(im_odd, im_odd)
+    tester:assertTensorEq(expected_odd, im_odd, 1e-16, 'vflip: fails on odd size in place')
+end
+
+function test.test_hflip_simple()
+    local im_even = torch.Tensor{{1, 2}, {3, 4}}
+    local expected_even = torch.Tensor{{2, 1}, {4, 3}}
+    local x_even = image.hflip(im_even)
+    tester:assertTensorEq(expected_even, x_even, 1e-16, 'hflip: fails on even size')
+    -- test inplace
+    image.hflip(im_even, im_even)
+    tester:assertTensorEq(expected_even, im_even, 1e-16, 'hflip: fails on even size in place')
+
+    local im_odd = torch.Tensor{{1,2, 3}, {4, 5, 6}}
+    local expected_odd = torch.Tensor{{3, 2, 1}, {6, 5, 4}}
+    local x_odd = image.hflip(im_odd)
+    tester:assertTensorEq(expected_odd, x_odd, 1e-16, 'hflip: fails on odd size')
+    -- test inplace
+    image.hflip(im_odd, im_odd)
+    tester:assertTensorEq(expected_odd, im_odd, 1e-16, 'hflip: fails on odd size in place')
+end
+
+----------------------------------------------------------------------
+-- decompress jpg test
+--
+function test.CompareLoadAndDecompress()
+  -- This test breaks if someone removes lena from the repo
+  local imfile = getTestImagePath('grace_hopper_512.jpg')
+  if not paths.filep(imfile) then
+    error(imfile .. ' is missing!')
+  end
+
+  -- Load lena directly from the filename
+  local img = image.loadJPG(imfile)
+
+  -- Make sure the returned image width and height match the height and width
+  -- reported by graphicsmagick (just a sanity check)
+  local ok, gm = pcall(require, 'graphicsmagick')
+  if not ok then
+    -- skip this part of the test if graphicsmagick is not installed
+    print('\ntest.CompareLoadAndDecompress partially requires the ' ..
+          'graphicsmagick package to run. You can install it with ' ..
+          '"luarocks install graphicsmagick".')
+  else
+    local info = gm.info(imfile)
+    local w = info.width
+    local h = info.height
+    tester:assert(w == img:size(3), 'image dimension error ')
+    tester:assert(h == img:size(3), 'image dimension error ')
+  end
+
+  -- Now load the raw binary from the source file into a ByteTensor
+  local fin = torch.DiskFile(imfile, 'r')
+  fin:binary()
+  fin:seekEnd()
+  local file_size_bytes = fin:position() - 1
+  fin:seek(1)
+  local img_binary = torch.ByteTensor(file_size_bytes)
+  fin:readByte(img_binary:storage())
+  fin:close()
+
+  -- Now decompress the image from the ByteTensor
+  local img_from_tensor = image.decompressJPG(img_binary)
+
+  tester:assertlt((img_from_tensor - img):abs():max(), precision,
+    'images from load and decompress dont match! ')
+end
+
+function test.LoadInvalid()
+  -- Make sure nothing nasty happens if we try and load a "garbage" tensor
+  local file_size_bytes = 1000
+  local img_binary = torch.rand(file_size_bytes):mul(255):byte()
+
+  -- Now decompress the image from the ByteTensor
+  tester:assertError(
+    function() image.decompressJPG(img_binary) end,
+    'A non-nil was returned on an invalid input!'
+  )
+end
+
+----------------------------------------------------------------------
+-- compress jpg test
+--
+
+function test.CompressAndDecompress()
+  -- This test is unfortunately a correlated test: it will only be valid
+  -- if decompressJPG is OK.  However, since decompressJPG has it's own unit
+  -- test, this is problably fine.
+
+  local img = image.lena()
+
+  local quality = 100
+  local img_compressed = image.compressJPG(img, quality)
+  local size_100 = img_compressed:size(1)
+  local img_decompressed = image.decompressJPG(img_compressed)
+  local err = img_decompressed - img
+
+  -- Now in general we will get BIG compression artifacts (even at quality=100)
+  -- but they will be relatively small, so instead of a abs():max() test, we do
+  -- a mean and std test.
+  local mean_err = err:mean()
+  local std_err = err:std()
+  tester:assertlt(mean_err, precision_mean, 'compressJPG error is too high! ')
+  tester:assertlt(std_err, precision_std, 'compressJPG error is too high! ')
+
+  -- Also check that the quality setting scales the size of the compressed image
+  quality = 25
+  img_compressed = image.compressJPG(img, quality)
+  local size_25 = img_compressed:size(1)
+  tester:assertlt(size_25, size_100, 'compressJPG quality setting error! ')
+end
+
+----------------------------------------------------------------------
+-- Lab conversion test
+-- These tests break if someone removes lena from the repo
+
+
+local function testRoundtrip(forward, backward)
+  local expected = image.lena()
+  local actual = backward(forward(expected))
+  tester:assertTensorEq(actual, expected, 1e-4)
+end
+
+
+function test.rgb2lab()
+  testRoundtrip(image.rgb2lab, image.lab2rgb)
+end
+
+
+function test.rgb2hsv()
+  testRoundtrip(image.rgb2hsv, image.hsv2rgb)
+end
+
+
+function test.rgb2hsl()
+  testRoundtrip(image.rgb2hsl, image.hsl2rgb)
+end
+
+
+function test.rgb2y()
+  local x = torch.FloatTensor{{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}}:transpose(1, 3)
+  local actual = image.rgb2y(x)
+  local expected = torch.FloatTensor{{{0.299}, {0.587}, {0.114}}}
+  tester:assertTensorEq(actual, expected, 1e-5)
+end
+
+
+function test.y2jet()
+  local levels = torch.Tensor{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
+  local expected = image.jetColormap(10)
+  local actual = image.y2jet(levels)[{{}, 1, {}}]:t()
+  tester:assertTensorEq(actual, expected, 1e-5)
+end
+
+
+function test.rgb2labByteTensor()
+  local lena = image.lena():byte()
+  tester:assertError(function () image.rgb2lab(lena) end)
+  tester:assertError(function () image.lab2rgb(lena) end)
+end
+
+
+local function testByteTensorRoundtrip(forward, backward, cond, msg)
+  local lena = toByteImage(image.lena())
+  local expected = lena
+  local actual = backward(forward(expected))  
+  assertByteTensorEq(actual, expected, cond, msg)
+end
+
+
+function test.toFromByteTensor()
+  local expected = toByteImage(image.lena():float())
+  local actual = toByteImage(expected:float():div(255))
+  assertByteTensorEq(actual, expected, nil, msg)
+end
+
+
+function test.rgb2hsvByteTensor()
+  testFunctionOnByteTensor(image.rgb2hsv, 'image.rgb2hsv error for ByteTensor')
+  testFunctionOnByteTensor(image.hsv2rgb, 'image.hsv2rgb error for ByteTensor')
+  testByteTensorRoundtrip(image.rgb2hsv, image.hsv2rgb, 3,
+                          'image.rgb2hsv roundtrip error for ByteTensor')
+end
+
+
+function test.rgb2hslByteTensor()
+  testFunctionOnByteTensor(image.rgb2hsl, 'image.hsl2rgb error for ByteTensor')
+  testFunctionOnByteTensor(image.hsl2rgb, 'image.rgb2hsl error for ByteTensor')
+  testByteTensorRoundtrip(image.rgb2hsl, image.hsl2rgb, 3,
+                          'image.rgb2hsl roundtrip error for ByteTensor')
+end
+
+
+function test.rgb2yByteTensor()
+  testFunctionOnByteTensor(image.rgb2y, 'image.rgb2y error for ByteTensor')
+end
+
+
+function test.y2jetByteTensor()
+  local levels = torch.Tensor{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
+  local expected = toByteImage(image.y2jet(levels))
+  local actual = image.y2jet(levels:byte())
+  assertByteTensorEq(actual, expected, nil)
+end
+
+
+----------------------------------------------------------------------
+-- PNG test
+--
+local function toBlob(filename)
+  local f = torch.DiskFile(filename, 'r')
+  f:binary()
+  f:seekEnd()
+  local size = f:position() - 1
+  f:seek(1)
+  local blob = torch.ByteTensor(size)
+  f:readByte(blob:storage())
+  f:close()
+  return blob
+end
+
+local function checkPNG(imfile, depth, tensortype, want)
+  local img = image.load(imfile, depth, tensortype)
+  -- Tensors have to be converted to double, since assertTensorEq does not support ByteTensor
+  --print('img: ', img)
+  --print('want: ', want)
+  assertByteTensorEq(img, want, precision_mean,
+                    string.format('%s: pixel values are unexpected', imfile))
+end
+
+function test.LoadPNG()
+  -- Gray 8-bit PNG image with width = 3, height = 1
+  local gray8byte = torch.ByteTensor({{{0,127,255}}})
+  checkPNG(getTestImagePath('gray3x1.png'), 1, 'byte', gray8byte)
+
+  local gray8double = torch.DoubleTensor({{{0, 127/255, 1}}})
+  checkPNG(getTestImagePath('gray3x1.png'), 1, 'double', gray8double)
+
+  -- Gray 16-bit PNG image with width=1, height = 2
+  local gray16byte = torch.ByteTensor{{{0}, {255}}}
+  checkPNG(getTestImagePath('gray16-1x2.png'), 1, 'byte', gray16byte)
+
+  local gray16float = torch.FloatTensor{{{0}, {65534/65535}}}
+  checkPNG(getTestImagePath('gray16-1x2.png'), 1, 'float', gray16float)
+
+  -- Color 8-bit PNG image with width = 2, height = 1
+  local rgb8byte = torch.ByteTensor{{{255, 0}}, {{0, 127}}, {{63, 0}}}
+  checkPNG(getTestImagePath('rgb2x1.png'), 3, 'byte', rgb8byte)
+
+  local rgb8float = torch.FloatTensor{{{1, 0}}, {{0, 127/255}}, {{63/255, 0}}}
+  checkPNG(getTestImagePath('rgb2x1.png'), 3, 'float', rgb8float)
+
+  -- Color 16-bit PNG image with width = 2, height = 1
+  local rgb16byte = torch.ByteTensor{{{255, 0}}, {{0, 127}}, {{63, 0}}}
+  checkPNG(getTestImagePath('rgb16-2x1.png'), 3, 'byte', rgb16byte)
+
+  local rgb16float = torch.FloatTensor{{{1, 0}}, {{0, 32767/65535}}, {{16383/65535, 0}}}
+  checkPNG(getTestImagePath('rgb16-2x1.png'), 3, 'float', rgb16float)
+end
+
+function test.DecompressPNG()
+  tester:assertTensorEq(
+    image.load(getTestImagePath('rgb2x1.png')),
+    image.decompressPNG(toBlob(getTestImagePath('rgb2x1.png'))),
+    precision_mean,
+    'decompressed and loaded images should be equal'
+  )
+end
+
+function test.LoadCorruptedPNG()
+  tester:assertErrorPattern(
+    function() image.load(getTestImagePath("corrupt-ihdr.png")) end,
+    "Error during init_io",
+    "corrupted image should not be loaded or unexpected error message"
+  )
+end
+
+----------------------------------------------------------------------
+-- PPM test
+--
+function test.test_ppmload()
+    -- test.ppm is a 100x1 "French flag" like image, i.e the first pixel is blue
+    -- the 84 next pixels are white and the 15 last pixels are red.
+    -- This makes possible to implement a non regression test vs. the former
+    -- PPM loader which had for effect to skip the first 85 pixels because of
+    -- a header parser bug
+    local img = image.load(getTestImagePath("P6.ppm"))
+    local pix = img[{ {}, {1}, {1} }]
+
+    -- Check the first pixel is blue
+    local ref = torch.zeros(3, 1, 1)
+    ref[3][1][1] = 1
+    tester:assertTensorEq(pix, ref, 0, "PPM load: first pixel check failed")
+end
+
+
+function test.test_pgmaload()
+    -- ascii.ppm is a PGMA file (ascii pgm)
+    -- example comes from ehere
+    -- http://people.sc.fsu.edu/~jburkardt/data/pgma/pgma.html
+    local img = image.load(getTestImagePath("P2.pgm"), 1, 'byte')
+    local max_gray = 15 -- 4th line of ascii.pgm
+    local ascii_val = 3 -- pixel (2,2) in the file
+    local pix_val = math.floor(255 * ascii_val / max_gray)
+
+    local pix = img[1][2][2]
+
+    -- Check that Pixel(1, 2,2) == 3
+    local ref = pix_val
+    tester:asserteq(pix, ref, "PGMA load: pixel check failed")
+end
+
+function test.test_pgmload()
+    -- test.ppm is a 100x1 "French flag" like image, i.e the first pixel is blue
+    -- the 84 next pixels are white and the 15 last pixels are red.
+    -- This makes possible to implement a non regression test vs. the former
+    -- PPM loader which had for effect to skip the first 85 pixels because of
+    -- a header parser bug
+    local img = image.load(getTestImagePath("P5.pgm"))
+    local pix = img[{ {}, {1}, {1} }]
+
+    local ref = torch.zeros(1, 1, 1); ref[1][1][1] = 0.07
+    tester:assertTensorEq(pix, ref, 0.001, "PPM load: first pixel check failed")
+end
+
+function test.test_pbmload()
+  -- test.pbm is a Portable BitMap (not supported)
+  tester:assertErrorPattern(
+    function() image.loadPPM(getTestImagePath("P4.pbm")) end,
+    "unsupported magic number",
+    "PBM format should not be loaded or unexpected error message"
+  )
+end
+
+----------------------------------------------------------------------
+-- Text drawing test
+--
+function test.test_textdraw()
+  local types = {
+     ["torch.ByteTensor"]   = "byte",
+     ["torch.DoubleTensor"] = "double",
+     ["torch.FloatTensor"]  = "float"
+  }
+  for k,v in pairs(types) do
+    local img = image.drawText(
+       torch.zeros(3, 24, 24):type(k),
+       "foo\nbar", 2, 4, {color={255, 255, 255}, bg={255, 0, 0}}
+    )
+    checkPNG(getTestImagePath("foobar.png"), 3, v, img)
+  end
+end
+
+----------------------------------------------------------------------
+-- Text drawing rect
+--
+function test.test_drawRect()
+  local types = {
+     ["torch.ByteTensor"]   = "byte",
+     ["torch.DoubleTensor"] = "double",
+     ["torch.FloatTensor"]  = "float"
+  }
+  for k,v in pairs(types) do
+    local bg = torch.zeros(3, 24, 12):type(k)
+    if k == 'torch.ByteTensor' then
+      bg:fill(3)
+    else
+      bg:fill(3/255)
+    end
+    local img = image.drawRect(bg, 5, 5, 10, 20, {color={255, 0, 255}})
+    checkPNG(getTestImagePath("rectangle.png"), 3, v, img)
+  end
+end
+
+function image.test(tests, seed)
+   local defaultTensorType = torch.getdefaulttensortype()
+   torch.setdefaulttensortype('torch.DoubleTensor')
+   seed = seed or os.time()
+   print('seed: ', seed)
+   math.randomseed(seed)
+   tester = torch.Tester()
+   tester:add(test)
+   tester:run(tests)
+   torch.setdefaulttensortype(defaultTensorType)
+   return tester
+end
diff --git a/test/test_rotate.lua b/test/test_rotate.lua
new file mode 100644
index 0000000..8f7ef91
--- /dev/null
+++ b/test/test_rotate.lua
@@ -0,0 +1,75 @@
+require 'image'
+
+torch.setdefaulttensortype('torch.FloatTensor')
+torch.setnumthreads(16)
+
+local function test_rotate(src, mode)
+   torch.manualSeed(11)
+   local mean_dist = 0.0
+   for i = 1, 10 do
+      local theta = torch.uniform(0, 2 * math.pi)
+      local d1, d2, d3, d4
+      
+      -- rotate
+      if mode then
+         d1 = image.rotate(src, theta, mode)
+         d2 = src.new():resizeAs(src)
+         image.rotate(d2, src, theta, mode)
+      else
+         d1 = image.rotate(src, theta)
+         d2 = src.new():resizeAs(src)
+         image.rotate(d2, src, theta)
+      end
+
+      -- revert
+      local revert = 2 * math.pi - theta
+      if mode then
+         d3 = image.rotate(d1, revert, mode)
+         d4 = src.new():resizeAs(src)
+         image.rotate(d4, d2, revert, mode)
+      else
+         d3 = image.rotate(d1, revert)
+         d4 = src.new():resizeAs(src)
+         image.rotate(d4, d2, revert)
+      end
+      
+      -- diff
+      if src:dim() == 3 then
+         local cs = image.crop(src, src:size(2) / 4, src:size(3) / 4, src:size(2) / 4 * 3, src:size(3) / 4 * 3)
+         local c3 = image.crop(d3, src:size(2) / 4, src:size(3) / 4, src:size(2) / 4 * 3, src:size(3) / 4 * 3)
+         local c4 = image.crop(d4, src:size(2) / 4, src:size(3) / 4, src:size(2) / 4 * 3, src:size(3) / 4 * 3)
+         mean_dist = mean_dist + cs:dist(c3)
+         mean_dist = mean_dist + cs:dist(c4)
+      elseif src:dim() == 2 then
+         local cs = image.crop(src, src:size(1) / 4, src:size(2) / 4, src:size(1) / 4 * 3, src:size(2) / 4 * 3)
+         local c3 = image.crop(d3, src:size(1) / 4, src:size(2) / 4, src:size(1) / 4 * 3, src:size(2) / 4 * 3)
+         local c4 = image.crop(d4, src:size(1) / 4, src:size(2) / 4, src:size(1) / 4 * 3, src:size(2) / 4 * 3)
+         mean_dist = mean_dist + cs:dist(c3)
+         mean_dist = mean_dist + cs:dist(c4)
+      end
+      --[[
+      if i == 1 then
+         image.display(src)
+         image.display(d1)
+         image.display(d2)
+         image.display(d3)
+         image.display(d4)
+      end
+      --]]
+   end
+   if mode then
+      print("mode = " .. mode .. ", mean dist: " .. mean_dist / (10 * 2))
+   else
+      print("mode = nil, mean dist: " .. mean_dist / (10 * 2))
+   end
+end
+local src = image.scale(image.lena(), 128, 128, 'bilinear')
+print("** dim3")
+test_rotate(src, nil)
+test_rotate(src, 'simple')
+test_rotate(src, 'bilinear')
+print("** dim2")
+src = src:select(1, 1)
+test_rotate(src, nil)
+test_rotate(src, 'simple')
+test_rotate(src, 'bilinear')
diff --git a/test/test_warp.lua b/test/test_warp.lua
new file mode 100644
index 0000000..5c4a1ed
--- /dev/null
+++ b/test/test_warp.lua
@@ -0,0 +1,139 @@
+require 'image'
+torch.setdefaulttensortype('torch.FloatTensor')
+torch.setnumthreads(16)
+
+im = image.lena()
+-- Subsample lena like crazy
+im = image.scale(im, im:size()[3] / 8, im:size()[2] / 8, 'bilinear')
+
+width = im:size()[3]  -- 512 / 8
+height = im:size()[2]  -- 512 / 8
+nchan = im:size()[1]  -- 3
+upscale = 8
+width_up = width * upscale
+height_up = height * upscale
+
+-- ******************************************
+-- COMPARE RESULTS OF UPSCALE (INTERPOLATION)
+-- ******************************************
+
+-- x/y grids
+grid_y = torch.ger( torch.linspace(-1,1,height_up), torch.ones(width_up) )
+grid_x = torch.ger( torch.ones(height_up), torch.linspace(-1,1,width_up) )
+
+flow = torch.FloatTensor()
+flow:resize(2,height_up,width_up)
+flow:zero()
+
+-- Apply scale
+flow_scale = torch.FloatTensor()
+flow_scale:resize(2,height_up,width_up)
+flow_scale[1] = grid_y
+flow_scale[2] = grid_x
+flow_scale[1]:add(1):mul(0.5) -- 0 to 1
+flow_scale[2]:add(1):mul(0.5) -- 0 to 1
+flow_scale[1]:mul(height-1)
+flow_scale[2]:mul(width-1)
+flow:add(flow_scale)
+
+t0 = sys.clock()
+im_simple = image.warp(im, flow, 'simple', false)
+t1 = sys.clock()
+print("Upscale Time simple = " .. (t1 - t0))  -- Not a robust measure (should average)
+image.display{image = im_simple, zoom = 1, legend = 'upscale simple'}
+
+t0 = sys.clock()
+im_bilinear = image.warp(im, flow, 'bilinear', false)
+t1 = sys.clock()
+print("Upscale Time bilinear = " .. (t1 - t0))  -- Not a robust measure (should average)
+image.display{image = im_bilinear, zoom = 1, legend = 'upscale bilinear'}
+
+t0 = sys.clock()
+im_bicubic = image.warp(im, flow, 'bicubic', false)
+t1 = sys.clock()
+print("Upscale Time bicubic = " .. (t1 - t0))  -- Not a robust measure (should average)
+image.display{image = im_bicubic, zoom = 1, legend = 'upscale bicubic'}
+
+t0 = sys.clock()
+im_lanczos = image.warp(im, flow, 'lanczos', false)
+t1 = sys.clock()
+print("Upscale Time lanczos = " .. (t1 - t0))  -- Not a robust measure (should average)
+image.display{image = im_lanczos, zoom = 1, legend = 'upscale lanczos'}
+
+-- *********************************************
+-- NOW TRY A ROTATION AT THE STANDARD RESOLUTION
+-- *********************************************
+
+im = image.lena()
+-- Subsample lena a little bit
+im = image.scale(im, im:size()[3] / 4, im:size()[2] / 4, 'bilinear')
+
+width = im:size()[3]  -- 512 / 4
+height = im:size()[2]  -- 512 / 4
+nchan = im:size()[1]  -- 3
+
+grid_y = torch.ger( torch.linspace(-1,1,height), torch.ones(width) )
+grid_x = torch.ger( torch.ones(height), torch.linspace(-1,1,width) )
+
+flow = torch.FloatTensor()
+flow:resize(2,height,width)
+flow:zero()
+
+-- Apply uniform scale
+flow_scale = torch.FloatTensor()
+flow_scale:resize(2,height,width)
+flow_scale[1] = grid_y
+flow_scale[2] = grid_x
+flow_scale[1]:add(1):mul(0.5) -- 0 to 1
+flow_scale[2]:add(1):mul(0.5) -- 0 to 1
+flow_scale[1]:mul(height-1)
+flow_scale[2]:mul(width-1)
+flow:add(flow_scale)
+
+flow_rot = torch.FloatTensor()
+flow_rot:resize(2,height,width)
+flow_rot[1] = grid_y * ((height-1)/2) * -1
+flow_rot[2] = grid_x * ((width-1)/2) * -1
+view = flow_rot:reshape(2,height*width)
+function rmat(deg)
+  local r = deg/180*math.pi
+  return torch.FloatTensor{{math.cos(r), -math.sin(r)}, {math.sin(r), math.cos(r)}}
+end
+rot_angle = 360/7  -- a nice non-integer value
+rotmat = rmat(rot_angle)
+flow_rotr = torch.mm(rotmat, view)
+flow_rot = flow_rot - flow_rotr:reshape( 2, height, width )
+flow:add(flow_rot)
+
+t0 = sys.clock()
+im_simple = image.warp(im, flow, 'simple', false)
+t1 = sys.clock()
+print("Rotation Time simple = " .. (t1 - t0))  -- Not a robust measure (should average)
+image.display{image = im_simple, zoom = 4, legend = 'rotation simple'}
+
+t0 = sys.clock()
+im_bilinear = image.warp(im, flow, 'bilinear', false)
+t1 = sys.clock()
+print("Rotation Time bilinear = " .. (t1 - t0))  -- Not a robust measure (should average)
+image.display{image = im_bilinear, zoom = 4, legend = 'rotation bilinear'}
+
+t0 = sys.clock()
+im_bicubic = image.warp(im, flow, 'bicubic', false)
+t1 = sys.clock()
+print("Rotation Time bicubic = " .. (t1 - t0))  -- Not a robust measure (should average)
+image.display{image = im_bicubic, zoom = 4, legend = 'rotation bicubic'}
+
+t0 = sys.clock()
+im_lanczos = image.warp(im, flow, 'lanczos', false)
+t1 = sys.clock()
+print("Rotation Time lanczos = " .. (t1 - t0))  -- Not a robust measure (should average)
+image.display{image = im_lanczos, zoom = 4, legend = 'rotation lanczos'}
+
+im_lanczos = image.warp(im, flow, 'lanczos', false, 'pad')
+image.display{image = im_lanczos, zoom = 4, legend = 'rotation lanczos (default pad)'}
+
+im_lanczos = image.warp(im, flow, 'lanczos', false, 'pad', 1)
+image.display{image = im_lanczos, zoom = 4, legend = 'rotation lanczos (pad 1)'}
+
+image.display{image = im, zoom = 4, legend = 'source image'}
+
diff --git a/win.ui b/win.ui
new file mode 100644
index 0000000..a2dfdc5
--- /dev/null
+++ b/win.ui
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Display</class>
+ <widget class="QWidget" name="Display">
+  <property name="geometry">
+   <rect>
+    <x>0</x>
+    <y>0</y>
+    <width>640</width>
+    <height>480</height>
+   </rect>
+  </property>
+  <property name="windowTitle">
+   <string>Form</string>
+  </property>
+  <layout class="QVBoxLayout" name="verticalLayout">
+   <property name="margin">
+    <number>0</number>
+   </property>
+   <item>
+    <widget class="QFrame" name="frame">
+     <property name="sizeIncrement">
+      <size>
+       <width>10</width>
+       <height>10</height>
+      </size>
+     </property>
+     <property name="frameShape">
+      <enum>QFrame::NoFrame</enum>
+     </property>
+     <property name="frameShadow">
+      <enum>QFrame::Raised</enum>
+     </property>
+    </widget>
+   </item>
+  </layout>
+ </widget>
+ <resources/>
+ <connections/>
+</ui>

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/lua-torch-image.git