[arrayfire] 69/248: Added short (s16) and ushort (u16) types for CPU

Tue Nov 17 15:54:03 UTC 2015

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch dfsg-clean
in repository arrayfire.

commit ebfe9e5b67b784ff3981abef98d6cdb22aea8588
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Tue Sep 22 13:08:59 2015 -0400

    Added short (s16) and ushort (u16) types for CPU
    
    * Work in progress. Need to add CUDA and OpenCL
    * Header files have 16 bit type functions wrapped in AF_API_VERSION
---
 include/af/array.h                    | 42 +++++++++++++++++++++++++--
 include/af/defines.h                  | 10 +++++--
 include/af/traits.hpp                 | 20 +++++++++++++
 include/af/util.h                     |  4 +--
 src/api/c/assign.cpp                  |  6 ++++
 src/api/c/bilateral.cpp               |  2 ++
 src/api/c/binary.cpp                  |  8 ++++++
 src/api/c/cast.cpp                    |  2 ++
 src/api/c/convolve.cpp                |  4 +++
 src/api/c/corrcoef.cpp                |  2 ++
 src/api/c/covariance.cpp              |  2 ++
 src/api/c/data.cpp                    | 40 ++++++++++++++++++++++++--
 src/api/c/device.cpp                  |  8 ++++++
 src/api/c/diff.cpp                    |  4 +++
 src/api/c/dog.cpp                     |  2 ++
 src/api/c/fast.cpp                    |  2 ++
 src/api/c/fftconvolve.cpp             |  2 ++
 src/api/c/filters.cpp                 |  2 ++
 src/api/c/flip.cpp                    |  2 ++
 src/api/c/handle.hpp                  |  3 ++
 src/api/c/histeq.cpp                  |  2 ++
 src/api/c/histogram.cpp               |  2 ++
 src/api/c/implicit.cpp                |  6 ++++
 src/api/c/index.cpp                   | 10 ++++++-
 src/api/c/join.cpp                    |  4 +++
 src/api/c/match_template.cpp          |  2 ++
 src/api/c/mean.cpp                    |  8 ++++++
 src/api/c/meanshift.cpp               |  2 ++
 src/api/c/median.cpp                  |  4 +++
 src/api/c/moddims.cpp                 |  2 ++
 src/api/c/morph.cpp                   |  4 +++
 src/api/c/nearest_neighbour.cpp       | 13 +++++----
 src/api/c/print.cpp                   |  6 ++++
 src/api/c/reduce.cpp                  | 16 +++++++++++
 src/api/c/regions.cpp                 |  2 ++
 src/api/c/reorder.cpp                 |  2 ++
 src/api/c/replace.cpp                 |  4 +++
 src/api/c/resize.cpp                  |  2 ++
 src/api/c/rgb_gray.cpp                |  2 ++
 src/api/c/rotate.cpp                  |  2 ++
 src/api/c/sat.cpp                     |  2 ++
 src/api/c/scan.cpp                    |  2 ++
 src/api/c/select.cpp                  |  6 ++++
 src/api/c/set.cpp                     |  6 ++++
 src/api/c/shift.cpp                   |  2 ++
 src/api/c/sobel.cpp                   |  2 ++
 src/api/c/sort.cpp                    |  8 ++++++
 src/api/c/stdev.cpp                   |  4 +++
 src/api/c/stream.cpp                  |  4 +++
 src/api/c/susan.cpp                   |  2 ++
 src/api/c/tile.cpp                    |  2 ++
 src/api/c/transform.cpp               |  2 ++
 src/api/c/transpose.cpp               |  4 +++
 src/api/c/type_util.cpp               | 10 +++++--
 src/api/c/unwrap.cpp                  |  2 ++
 src/api/c/var.cpp                     |  8 ++++++
 src/api/c/where.cpp                   |  2 ++
 src/api/c/wrap.cpp                    |  2 ++
 src/api/cpp/array.cpp                 | 24 ++++++++++++++--
 src/api/cpp/corrcoef.cpp              |  4 +++
 src/api/cpp/data.cpp                  |  2 ++
 src/api/cpp/device.cpp                |  8 ++++--
 src/api/cpp/mean.cpp                  |  4 +++
 src/api/cpp/median.cpp                |  4 +++
 src/api/cpp/reduce.cpp                |  6 ++++
 src/api/cpp/stdev.cpp                 |  4 +++
 src/api/cpp/var.cpp                   |  2 ++
 src/backend/ArrayInfo.cpp             |  2 ++
 src/backend/cpu/Array.cpp             |  2 ++
 src/backend/cpu/approx.cpp            |  8 +++---
 src/backend/cpu/assign.cpp            |  2 ++
 src/backend/cpu/bilateral.cpp         |  2 ++
 src/backend/cpu/convolve.cpp          |  2 ++
 src/backend/cpu/copy.cpp              | 54 +++++++++++++++++++++--------------
 src/backend/cpu/diagonal.cpp          |  2 ++
 src/backend/cpu/diff.cpp              |  2 ++
 src/backend/cpu/fast.cpp              |  2 ++
 src/backend/cpu/fftconvolve.cpp       |  4 +++
 src/backend/cpu/hist_graphics.cpp     |  2 ++
 src/backend/cpu/histogram.cpp         |  2 ++
 src/backend/cpu/identity.cpp          |  4 ++-
 src/backend/cpu/image.cpp             |  2 ++
 src/backend/cpu/index.cpp             |  2 ++
 src/backend/cpu/iota.cpp              |  2 ++
 src/backend/cpu/ireduce.cpp           |  4 +++
 src/backend/cpu/join.cpp              |  4 +++
 src/backend/cpu/lookup.cpp            |  4 +++
 src/backend/cpu/match_template.cpp    |  2 ++
 src/backend/cpu/meanshift.cpp         |  2 ++
 src/backend/cpu/medfilt.cpp           |  2 ++
 src/backend/cpu/memory.cpp            |  2 ++
 src/backend/cpu/morph.cpp             |  2 ++
 src/backend/cpu/nearest_neighbour.cpp | 13 ++++++++-
 src/backend/cpu/plot.cpp              |  2 ++
 src/backend/cpu/random.cpp            |  2 ++
 src/backend/cpu/range.cpp             |  2 ++
 src/backend/cpu/reduce.cpp            | 16 ++++++++++-
 src/backend/cpu/regions.cpp           |  2 ++
 src/backend/cpu/reorder.cpp           |  2 ++
 src/backend/cpu/resize.cpp            |  2 ++
 src/backend/cpu/rotate.cpp            |  2 ++
 src/backend/cpu/scan.cpp              |  2 ++
 src/backend/cpu/select.cpp            |  2 ++
 src/backend/cpu/set.cpp               |  2 ++
 src/backend/cpu/shift.cpp             |  2 ++
 src/backend/cpu/sobel.cpp             |  2 ++
 src/backend/cpu/sort.cpp              |  2 ++
 src/backend/cpu/sort_by_key.cpp       |  5 ++++
 src/backend/cpu/sort_index.cpp        |  2 ++
 src/backend/cpu/susan.cpp             |  2 ++
 src/backend/cpu/tile.cpp              |  2 ++
 src/backend/cpu/transform.cpp         |  2 ++
 src/backend/cpu/transpose.cpp         |  2 ++
 src/backend/cpu/triangle.cpp          |  2 ++
 src/backend/cpu/types.hpp             |  1 +
 src/backend/cpu/unwrap.cpp            |  2 ++
 src/backend/cpu/where.cpp             |  2 ++
 src/backend/cpu/wrap.cpp              |  2 ++
 test/array.cpp                        | 22 +++++++++++++-
 test/assign.cpp                       |  2 +-
 test/bilateral.cpp                    |  2 +-
 test/constant.cpp                     |  2 +-
 test/convolve.cpp                     |  2 +-
 test/diff1.cpp                        |  2 +-
 test/diff2.cpp                        |  2 +-
 test/dog.cpp                          |  6 ++--
 test/fast.cpp                         |  2 +-
 test/hamming.cpp                      |  4 +--
 test/histogram.cpp                    |  2 +-
 test/index.cpp                        |  4 +--
 test/iota.cpp                         |  2 +-
 test/join.cpp                         |  2 +-
 test/match_template.cpp               |  2 +-
 test/mean.cpp                         | 26 ++++++++++++-----
 test/meanshift.cpp                    |  2 +-
 test/medfilt.cpp                      |  2 +-
 test/median.cpp                       |  2 ++
 test/moddims.cpp                      |  2 +-
 test/morph.cpp                        |  2 +-
 test/nearest_neighbour.cpp            | 14 ++++++++-
 test/random.cpp                       |  2 +-
 test/range.cpp                        |  2 +-
 test/reduce.cpp                       | 14 +++++----
 test/regions.cpp                      |  2 +-
 test/reorder.cpp                      |  2 +-
 test/replace.cpp                      |  2 +-
 test/resize.cpp                       |  2 +-
 test/rotate.cpp                       |  2 +-
 test/rotate_linear.cpp                |  2 +-
 test/sat.cpp                          |  2 +-
 test/scan.cpp                         |  4 ++-
 test/select.cpp                       |  2 +-
 test/shift.cpp                        |  2 +-
 test/sobel.cpp                        |  2 +-
 test/sort.cpp                         |  2 +-
 test/sort_by_key.cpp                  |  2 +-
 test/sort_index.cpp                   |  2 +-
 test/susan.cpp                        |  2 +-
 test/testHelpers.hpp                  | 36 ++++-------------------
 test/tile.cpp                         |  2 +-
 test/translate.cpp                    |  2 +-
 test/transpose.cpp                    |  2 +-
 test/transpose_inplace.cpp            |  2 +-
 test/triangle.cpp                     |  2 +-
 test/unwrap.cpp                       |  2 +-
 test/var.cpp                          | 16 ++++++-----
 test/where.cpp                        |  2 +-
 test/wrap.cpp                         |  2 +-
 test/write.cpp                        |  2 +-
 169 files changed, 666 insertions(+), 149 deletions(-)

diff --git a/include/af/array.h b/include/af/array.h
index bdc6502..c6ee564 100644
--- a/include/af/array.h
+++ b/include/af/array.h
@@ -84,6 +84,19 @@ namespace af
             ASSIGN(/=)
 #undef ASSIGN
 
+#if AF_API_VERSION >= 32
+#define ASSIGN(OP)                                                  \
+            array_proxy& operator OP(const short &a);               \
+            array_proxy& operator OP(const unsigned short &a);      \
+
+            ASSIGN(=)
+            ASSIGN(+=)
+            ASSIGN(-=)
+            ASSIGN(*=)
+            ASSIGN(/=)
+#undef ASSIGN
+#endif
+
             // af::array member functions. same behavior as those below
             af_array get();
             af_array get() const;
@@ -813,7 +826,7 @@ namespace af
         /// \ingroup method_mat
         array H() const;
 
-#define ASSIGN(OP)                                                                      \
+#define ASSIGN_(OP)                                                                     \
         array& OP(const array &val);                                                    \
         array& OP(const double &val);              /**< \copydoc OP (const array &) */  \
         array& OP(const cdouble &val);             /**< \copydoc OP (const array &) */  \
@@ -829,6 +842,17 @@ namespace af
         array& OP(const long long  &val);          /**< \copydoc OP (const array &) */  \
         array& OP(const unsigned long long &val);  /**< \copydoc OP (const array &) */  \
 
+#if AF_API_VERSION >= 32
+#define ASSIGN(OP)                                                                      \
+        ASSIGN_(OP)                                                                     \
+        array& OP(const short  &val);              /**< \copydoc OP (const array &) */  \
+        array& OP(const unsigned short &val);      /**< \copydoc OP (const array &) */  \
+
+#else
+#define ASSIGN(OP) ASSIGN_(OP)
+#endif
+
+
         /// \ingroup array_mem_operator_eq
         /// @{
         /// \brief Assignes the value(s) of val to the elements of the array.
@@ -892,6 +916,7 @@ namespace af
 
 
 #undef ASSIGN
+#undef ASSIGN_
 
         ///
         /// \brief Negates the values of the array
@@ -930,7 +955,7 @@ namespace af
     };
     // end of class array
 
-#define BIN_OP(OP)                                                                                                       \
+#define BIN_OP_(OP)                                                                                                      \
     AFAPI array OP (const array& lhs, const array& rhs);                                                                 \
     AFAPI array OP (const bool& lhs, const array& rhs);                 /**< \copydoc OP (const array&, const array&) */ \
     AFAPI array OP (const int& lhs, const array& rhs);                  /**< \copydoc OP (const array&, const array&) */ \
@@ -959,6 +984,18 @@ namespace af
     AFAPI array OP (const array& lhs, const cfloat& rhs);               /**< \copydoc OP (const array&, const array&) */ \
     AFAPI array OP (const array& lhs, const cdouble& rhs);              /**< \copydoc OP (const array&, const array&) */ \
 
+#if AF_API_VERSION >= 32
+#define BIN_OP(OP)                                                                                                       \
+        BIN_OP_(OP)                                                                                                      \
+        AFAPI array OP (const short& lhs, const array& rhs);            /**< \copydoc OP (const array&, const array&) */ \
+        AFAPI array OP (const unsigned short& lhs, const array& rhs);   /**< \copydoc OP (const array&, const array&) */ \
+        AFAPI array OP (const array& lhs, const short& rhs);            /**< \copydoc OP (const array&, const array&) */ \
+        AFAPI array OP (const array& lhs, const unsigned short& rhs);   /**< \copydoc OP (const array&, const array&) */ \
+
+#else
+#define BIN_OP(OP) BIN_OP_(OP)
+#endif
+
     /// \ingroup arith_func_add
     /// @{
     /// \brief Adds two arrays or an array and a value.
@@ -1178,6 +1215,7 @@ namespace af
     /// @}
 
 #undef BIN_OP
+#undef BIN_OP_
 
     /// Evaluate an expression (nonblocking).
     /**
diff --git a/include/af/defines.h b/include/af/defines.h
index 641a929..1f1a878 100644
--- a/include/af/defines.h
+++ b/include/af/defines.h
@@ -173,12 +173,16 @@ typedef enum {
     c32,    ///< 32-bit complex floating point values
     f64,    ///< 64-bit complex floating point values
     c64,    ///< 64-bit complex floating point values
-    b8,     ///< 8-bit boolean values
+    b8 ,    ///< 8-bit boolean values
     s32,    ///< 32-bit signed integral values
     u32,    ///< 32-bit unsigned integral values
-    u8,     ///< 8-bit unsigned integral values
+    u8 ,    ///< 8-bit unsigned integral values
     s64,    ///< 64-bit signed integral values
-    u64     ///< 64-bit unsigned integral values
+    u64,    ///< 64-bit unsigned integral values
+#if AF_API_VERSION >= 32
+    s16,    ///< 16-bit signed integral values
+    u16,    ///< 16-bit unsigned integral values
+#endif
 } af_dtype;
 
 typedef enum {
diff --git a/include/af/traits.hpp b/include/af/traits.hpp
index 5f7fed3..5e2e3da 100644
--- a/include/af/traits.hpp
+++ b/include/af/traits.hpp
@@ -139,6 +139,26 @@ struct dtype_traits<unsigned long long> {
     static const char* getName() { return "ulong"; }
 };
 
+template<>
+struct dtype_traits<short> {
+    enum {
+        af_type = s16 ,
+        ctype = s16
+    };
+    typedef short base_type;
+    static const char* getName() { return "short"; }
+};
+
+template<>
+struct dtype_traits<unsigned short> {
+    enum {
+        af_type = u16 ,
+        ctype = u16
+    };
+    typedef unsigned short base_type;
+    static const char* getName() { return "ushort"; }
+};
+
 }
 
 #endif
diff --git a/include/af/util.h b/include/af/util.h
index 97e939e..c1fd96a 100644
--- a/include/af/util.h
+++ b/include/af/util.h
@@ -121,11 +121,11 @@ namespace af
 
 #define af_print(...) GET_PRINT_MACRO(__VA_ARGS__, AF_PRINT2, AF_PRINT1)(__VA_ARGS__)
 
-#else
+#else // AF_API_VERSION
 
 #define af_print(exp) af::print(#exp, exp);
 
-#endif
+#endif // AF_API_VERSION
 
 #endif //__cplusplus
 
diff --git a/src/api/c/assign.cpp b/src/api/c/assign.cpp
index c990889..13fa179 100644
--- a/src/api/c/assign.cpp
+++ b/src/api/c/assign.cpp
@@ -105,6 +105,8 @@ void assign_helper(Array<T> &out, const unsigned &ndims, const af_seq *index, co
             case u32: assign<T, uint   >(out, ndims, index, getArray<uint     >(in_));  break;
             case s64: assign<T, intl   >(out, ndims, index, getArray<intl     >(in_));  break;
             case u64: assign<T, uintl  >(out, ndims, index, getArray<uintl    >(in_));  break;
+            case s16: assign<T, short  >(out, ndims, index, getArray<short    >(in_));  break;
+            case u16: assign<T, ushort >(out, ndims, index, getArray<ushort   >(in_));  break;
             case u8 : assign<T, uchar  >(out, ndims, index, getArray<uchar    >(in_));  break;
             case b8 : assign<T, char   >(out, ndims, index, getArray<char     >(in_));  break;
             default : TYPE_ERROR(1, iType); break;
@@ -165,6 +167,8 @@ af_err af_assign_seq(af_array *out,
                 case u32: assign_helper<uint   >(getWritableArray<uint   >(res), ndims, index, rhs);  break;
                 case s64: assign_helper<intl   >(getWritableArray<intl   >(res), ndims, index, rhs);  break;
                 case u64: assign_helper<uintl  >(getWritableArray<uintl  >(res), ndims, index, rhs);  break;
+                case s16: assign_helper<short  >(getWritableArray<short  >(res), ndims, index, rhs);  break;
+                case u16: assign_helper<ushort >(getWritableArray<ushort >(res), ndims, index, rhs);  break;
                 case u8 : assign_helper<uchar  >(getWritableArray<uchar  >(res), ndims, index, rhs);  break;
                 case b8 : assign_helper<char   >(getWritableArray<char   >(res), ndims, index, rhs);  break;
                 default : TYPE_ERROR(1, oType); break;
@@ -332,6 +336,8 @@ af_err af_assign_gen(af_array *out,
                 case u32: genAssign<uint   >(output, idxrs, rhs); break;
                 case s64: genAssign<intl   >(output, idxrs, rhs); break;
                 case s32: genAssign<int    >(output, idxrs, rhs); break;
+                case s16: genAssign<short  >(output, idxrs, rhs); break;
+                case u16: genAssign<ushort >(output, idxrs, rhs); break;
                 case  u8: genAssign<uchar  >(output, idxrs, rhs); break;
                 case  b8: genAssign<char   >(output, idxrs, rhs); break;
                 default: TYPE_ERROR(1, rhsType);
diff --git a/src/api/c/bilateral.cpp b/src/api/c/bilateral.cpp
index c83c7ef..4f9281d 100644
--- a/src/api/c/bilateral.cpp
+++ b/src/api/c/bilateral.cpp
@@ -42,6 +42,8 @@ static af_err bilateral(af_array *out, const af_array &in, const float &s_sigma,
             case s32: output = bilateral<int   ,  float, isColor> (in, s_sigma, c_sigma); break;
             case u32: output = bilateral<uint  ,  float, isColor> (in, s_sigma, c_sigma); break;
             case u8 : output = bilateral<uchar ,  float, isColor> (in, s_sigma, c_sigma); break;
+            case s16: output = bilateral<short ,  float, isColor> (in, s_sigma, c_sigma); break;
+            case u16: output = bilateral<ushort,  float, isColor> (in, s_sigma, c_sigma); break;
             default : TYPE_ERROR(1, type);
         }
         std::swap(*out,output);
diff --git a/src/api/c/binary.cpp b/src/api/c/binary.cpp
index 8a6ae46..2997c13 100644
--- a/src/api/c/binary.cpp
+++ b/src/api/c/binary.cpp
@@ -55,6 +55,8 @@ static af_err af_arith(af_array *out, const af_array lhs, const af_array rhs, co
         case b8 : res = arithOp<char   , op>(lhs, rhs, odims); break;
         case s64: res = arithOp<intl   , op>(lhs, rhs, odims); break;
         case u64: res = arithOp<uintl  , op>(lhs, rhs, odims); break;
+        case s16: res = arithOp<short  , op>(lhs, rhs, odims); break;
+        case u16: res = arithOp<ushort , op>(lhs, rhs, odims); break;
         default: TYPE_ERROR(0, otype);
         }
 
@@ -85,6 +87,8 @@ static af_err af_arith_real(af_array *out, const af_array lhs, const af_array rh
         case b8 : res = arithOp<char   , op>(lhs, rhs, odims); break;
         case s64: res = arithOp<intl   , op>(lhs, rhs, odims); break;
         case u64: res = arithOp<uintl  , op>(lhs, rhs, odims); break;
+        case s16: res = arithOp<short  , op>(lhs, rhs, odims); break;
+        case u16: res = arithOp<ushort , op>(lhs, rhs, odims); break;
         default: TYPE_ERROR(0, otype);
         }
 
@@ -260,6 +264,8 @@ static af_err af_logic(af_array *out, const af_array lhs, const af_array rhs, co
         case b8 : res = logicOp<char   , op>(lhs, rhs, odims); break;
         case s64: res = logicOp<intl   , op>(lhs, rhs, odims); break;
         case u64: res = logicOp<uintl  , op>(lhs, rhs, odims); break;
+        case s16: res = logicOp<short  , op>(lhs, rhs, odims); break;
+        case u16: res = logicOp<ushort , op>(lhs, rhs, odims); break;
         default: TYPE_ERROR(0, type);
         }
 
@@ -335,6 +341,8 @@ static af_err af_bitwise(af_array *out, const af_array lhs, const af_array rhs,
         case b8 : res = bitOp<char   , op>(lhs, rhs, odims); break;
         case s64: res = bitOp<intl   , op>(lhs, rhs, odims); break;
         case u64: res = bitOp<uintl  , op>(lhs, rhs, odims); break;
+        case s16: res = bitOp<short  , op>(lhs, rhs, odims); break;
+        case u16: res = bitOp<ushort , op>(lhs, rhs, odims); break;
         default: TYPE_ERROR(0, type);
         }
 
diff --git a/src/api/c/cast.cpp b/src/api/c/cast.cpp
index 379b2df..872ace2 100644
--- a/src/api/c/cast.cpp
+++ b/src/api/c/cast.cpp
@@ -39,6 +39,8 @@ static af_array cast(const af_array in, const af_dtype type)
     case b8 : return getHandle(castArray<char    >(in));
     case s64: return getHandle(castArray<intl    >(in));
     case u64: return getHandle(castArray<uintl   >(in));
+    case s16: return getHandle(castArray<short   >(in));
+    case u16: return getHandle(castArray<ushort  >(in));
     default: TYPE_ERROR(2, type);
     }
 }
diff --git a/src/api/c/convolve.cpp b/src/api/c/convolve.cpp
index 912d8fd..3639008 100644
--- a/src/api/c/convolve.cpp
+++ b/src/api/c/convolve.cpp
@@ -85,6 +85,8 @@ af_err convolve(af_array *out, const af_array signal, const af_array filter)
             case f64: output = convolve<double ,  double, baseDim, expand>(signal, filter, convBT); break;
             case u32: output = convolve<uint   ,   float, baseDim, expand>(signal, filter, convBT); break;
             case s32: output = convolve<int    ,   float, baseDim, expand>(signal, filter, convBT); break;
+            case u16: output = convolve<ushort ,   float, baseDim, expand>(signal, filter, convBT); break;
+            case s16: output = convolve<short  ,   float, baseDim, expand>(signal, filter, convBT); break;
             case u8:  output = convolve<uchar  ,   float, baseDim, expand>(signal, filter, convBT); break;
             case b8:  output = convolve<char   ,   float, baseDim, expand>(signal, filter, convBT); break;
             default: TYPE_ERROR(1, stype);
@@ -120,6 +122,8 @@ af_err convolve2_sep(af_array *out, af_array col_filter, af_array row_filter, co
             case f64: output = convolve2<double ,  double, expand>(signal, col_filter, row_filter); break;
             case u32: output = convolve2<uint   ,   float, expand>(signal, col_filter, row_filter); break;
             case s32: output = convolve2<int    ,   float, expand>(signal, col_filter, row_filter); break;
+            case u16: output = convolve2<ushort ,   float, expand>(signal, col_filter, row_filter); break;
+            case s16: output = convolve2<short  ,   float, expand>(signal, col_filter, row_filter); break;
             case u8:  output = convolve2<uchar  ,   float, expand>(signal, col_filter, row_filter); break;
             case b8:  output = convolve2<char   ,   float, expand>(signal, col_filter, row_filter); break;
             default: TYPE_ERROR(1, signalType);
diff --git a/src/api/c/corrcoef.cpp b/src/api/c/corrcoef.cpp
index d6d9800..275fa80 100644
--- a/src/api/c/corrcoef.cpp
+++ b/src/api/c/corrcoef.cpp
@@ -71,6 +71,8 @@ af_err af_corrcoef(double *realVal, double *imagVal, const af_array X, const af_
             case u32: *realVal = corrcoef<uint  , float >(X, Y); break;
             case s64: *realVal = corrcoef<intl  , double>(X, Y); break;
             case u64: *realVal = corrcoef<uintl , double>(X, Y); break;
+            case s16: *realVal = corrcoef<short , float >(X, Y); break;
+            case u16: *realVal = corrcoef<ushort, float >(X, Y); break;
             case  u8: *realVal = corrcoef<uchar , float >(X, Y); break;
             case  b8: *realVal = corrcoef<char  , float >(X, Y); break;
             default : TYPE_ERROR(1, xType);
diff --git a/src/api/c/covariance.cpp b/src/api/c/covariance.cpp
index 80b391d..1050b72 100644
--- a/src/api/c/covariance.cpp
+++ b/src/api/c/covariance.cpp
@@ -71,6 +71,8 @@ af_err af_cov(af_array* out, const af_array X, const af_array Y, const bool isbi
             case u32: output = cov<uint  , float >(X, Y, isbiased); break;
             case s64: output = cov<intl  , double>(X, Y, isbiased); break;
             case u64: output = cov<uintl , double>(X, Y, isbiased); break;
+            case s16: output = cov<short , float >(X, Y, isbiased); break;
+            case u16: output = cov<ushort, float >(X, Y, isbiased); break;
             case  u8: output = cov<uchar , float >(X, Y, isbiased); break;
             default : TYPE_ERROR(1, xType);
         }
diff --git a/src/api/c/data.cpp b/src/api/c/data.cpp
index 50acaad..56a1dcf 100644
--- a/src/api/c/data.cpp
+++ b/src/api/c/data.cpp
@@ -59,6 +59,8 @@ af_err af_get_data_ptr(void *data, const af_array arr)
         case u8:    copyData(static_cast<uchar    *>(data), arr);  break;
         case s64:   copyData(static_cast<intl     *>(data), arr);  break;
         case u64:   copyData(static_cast<uintl    *>(data), arr);  break;
+        case s16:   copyData(static_cast<short    *>(data), arr);  break;
+        case u16:   copyData(static_cast<ushort   *>(data), arr);  break;
         default:    TYPE_ERROR(1, type);
         }
     }
@@ -88,6 +90,8 @@ af_err af_create_array(af_array *result, const void * const data,
         case u8:    out = createHandleFromData(d, static_cast<const uchar   *>(data)); break;
         case s64:   out = createHandleFromData(d, static_cast<const intl    *>(data)); break;
         case u64:   out = createHandleFromData(d, static_cast<const uintl   *>(data)); break;
+        case s16:   out = createHandleFromData(d, static_cast<const short   *>(data)); break;
+        case u16:   out = createHandleFromData(d, static_cast<const ushort  *>(data)); break;
         default:    TYPE_ERROR(4, type);
         }
         std::swap(*result, out);
@@ -118,6 +122,8 @@ af_err af_constant(af_array *result, const double value,
         case u8:    out = createHandleFromValue<uchar  >(d, value); break;
         case s64:   out = createHandleFromValue<intl   >(d, value); break;
         case u64:   out = createHandleFromValue<uintl  >(d, value); break;
+        case s16:   out = createHandleFromValue<short  >(d, value); break;
+        case u16:   out = createHandleFromValue<ushort >(d, value); break;
         default:    TYPE_ERROR(4, type);
         }
         std::swap(*result, out);
@@ -212,6 +218,8 @@ af_err af_create_handle(af_array *result, const unsigned ndims, const dim_t * co
         case u8:    out = createHandle<uchar  >(d); break;
         case s64:   out = createHandle<intl   >(d); break;
         case u64:   out = createHandle<uintl  >(d); break;
+        case s16:   out = createHandle<short  >(d); break;
+        case u16:   out = createHandle<ushort >(d); break;
         default:    TYPE_ERROR(3, type);
         }
         std::swap(*result, out);
@@ -239,6 +247,8 @@ af_err af_copy_array(af_array *out, const af_array in)
         case u8:    res = copyArray<uchar   >(in); break;
         case s64:   res = copyArray<intl    >(in); break;
         case u64:   res = copyArray<uintl   >(in); break;
+        case s16:   res = copyArray<short   >(in); break;
+        case u16:   res = copyArray<ushort  >(in); break;
         default:    TYPE_ERROR(1, type);
         }
         std::swap(*out, res);
@@ -266,6 +276,8 @@ af_err af_get_data_ref_count(int *use_count, const af_array in)
         case u8:    res = getArray<uchar   >(in).useCount(); break;
         case s64:   res = getArray<intl    >(in).useCount(); break;
         case u64:   res = getArray<uintl   >(in).useCount(); break;
+        case s16:   res = getArray<short   >(in).useCount(); break;
+        case u16:   res = getArray<ushort  >(in).useCount(); break;
         default:    TYPE_ERROR(1, type);
         }
         std::swap(*use_count, res);
@@ -310,6 +322,8 @@ af_err af_randu(af_array *out, const unsigned ndims, const dim_t * const dims, c
         case u32:   result = randu_<uint   >(d);    break;
         case s64:   result = randu_<intl   >(d);    break;
         case u64:   result = randu_<uintl  >(d);    break;
+        case s16:   result = randu_<short  >(d);    break;
+        case u16:   result = randu_<ushort >(d);    break;
         case u8:    result = randu_<uchar  >(d);    break;
         case b8:    result = randu_<char  >(d);    break;
         default:    TYPE_ERROR(3, type);
@@ -375,6 +389,8 @@ af_err af_identity(af_array *out, const unsigned ndims, const dim_t * const dims
         case u8:    result = identity_<uchar  >(d);    break;
         case u64:   result = identity_<uintl  >(d);    break;
         case s64:   result = identity_<intl   >(d);    break;
+        case u16:   result = identity_<ushort >(d);    break;
+        case s16:   result = identity_<short  >(d);    break;
             // Removed because of bool type. Functions implementations exist.
         case b8:    result = identity_<char   >(d);    break;
         default:    TYPE_ERROR(3, type);
@@ -401,6 +417,8 @@ af_err af_release_array(af_array arr)
         case u8:    releaseHandle<uchar   >(arr); break;
         case s64:   releaseHandle<intl    >(arr); break;
         case u64:   releaseHandle<uintl   >(arr); break;
+        case s16:   releaseHandle<short   >(arr); break;
+        case u16:   releaseHandle<ushort  >(arr); break;
         default:    TYPE_ERROR(0, type);
         }
     }
@@ -433,6 +451,8 @@ af_array retain(const af_array in)
     case b8:  return retainHandle<char            >(in);
     case s64: return retainHandle<intl            >(in);
     case u64: return retainHandle<uintl           >(in);
+    case s16: return retainHandle<short           >(in);
+    case u16: return retainHandle<ushort          >(in);
     default:
         TYPE_ERROR(1, ty);
     }
@@ -470,6 +490,8 @@ af_err af_range(af_array *result, const unsigned ndims, const dim_t * const dims
         case u32:   out = range_<uint   >(d, seq_dim); break;
         case s64:   out = range_<intl   >(d, seq_dim); break;
         case u64:   out = range_<uintl  >(d, seq_dim); break;
+        case s16:   out = range_<short  >(d, seq_dim); break;
+        case u16:   out = range_<ushort >(d, seq_dim); break;
         case u8:    out = range_<uchar  >(d, seq_dim); break;
         default:    TYPE_ERROR(4, type);
         }
@@ -513,6 +535,8 @@ af_err af_iota(af_array *result, const unsigned ndims, const dim_t * const dims,
         case u32:   out = iota_<uint   >(d, t); break;
         case s64:   out = iota_<intl   >(d, t); break;
         case u64:   out = iota_<uintl  >(d, t); break;
+        case s16:   out = iota_<short  >(d, t); break;
+        case u16:   out = iota_<ushort >(d, t); break;
         case u8:    out = iota_<uchar  >(d, t); break;
         default:    TYPE_ERROR(4, type);
         }
@@ -596,6 +620,8 @@ af_err af_eval(af_array arr)
         case b8 : eval<char   >(arr); break;
         case s64: eval<intl   >(arr); break;
         case u64: eval<uintl  >(arr); break;
+        case s16: eval<short  >(arr); break;
+        case u16: eval<ushort >(arr); break;
         default:
             TYPE_ERROR(0, type);
         }
@@ -633,6 +659,8 @@ af_err af_diag_create(af_array *out, const af_array in, const int num)
         case u32:   result = diagCreate<uint   >(in, num);    break;
         case s64:   result = diagCreate<intl   >(in, num);    break;
         case u64:   result = diagCreate<uintl  >(in, num);    break;
+        case s16:   result = diagCreate<short  >(in, num);    break;
+        case u16:   result = diagCreate<ushort >(in, num);    break;
         case u8:    result = diagCreate<uchar  >(in, num);    break;
             // Removed because of bool type. Functions implementations exist.
         case b8:    result = diagCreate<char   >(in, num);    break;
@@ -662,6 +690,8 @@ af_err af_diag_extract(af_array *out, const af_array in, const int num)
         case u32:   result = diagExtract<uint   >(in, num);    break;
         case s64:   result = diagExtract<intl   >(in, num);    break;
         case u64:   result = diagExtract<uintl  >(in, num);    break;
+        case s16:   result = diagExtract<short  >(in, num);    break;
+        case u16:   result = diagExtract<ushort >(in, num);    break;
         case u8:    result = diagExtract<uchar  >(in, num);    break;
             // Removed because of bool type. Functions implementations exist.
         case b8:    result = diagExtract<char   >(in, num);    break;
@@ -702,6 +732,8 @@ af_err af_write_array(af_array arr, const void *data, const size_t bytes, af_sou
         case u8:    write_array(arr, static_cast<const uchar   *>(data), bytes, src); break;
         case s64:   write_array(arr, static_cast<const intl    *>(data), bytes, src); break;
         case u64:   write_array(arr, static_cast<const uintl   *>(data), bytes, src); break;
+        case s16:   write_array(arr, static_cast<const short   *>(data), bytes, src); break;
+        case u16:   write_array(arr, static_cast<const ushort  *>(data), bytes, src); break;
         default:    TYPE_ERROR(4, type);
         }
     }
@@ -729,9 +761,11 @@ af_err af_lower(af_array *out, const af_array in, bool is_unit_diag)
         case c32: res = triangle<cfloat  , false>(in, is_unit_diag); break;
         case c64: res = triangle<cdouble , false>(in, is_unit_diag); break;
         case s32: res = triangle<int     , false>(in, is_unit_diag); break;
-        case s64: res = triangle<intl    , false>(in, is_unit_diag); break;
         case u32: res = triangle<uint    , false>(in, is_unit_diag); break;
+        case s64: res = triangle<intl    , false>(in, is_unit_diag); break;
         case u64: res = triangle<uintl   , false>(in, is_unit_diag); break;
+        case s16: res = triangle<short   , false>(in, is_unit_diag); break;
+        case u16: res = triangle<ushort  , false>(in, is_unit_diag); break;
         case u8 : res = triangle<uchar   , false>(in, is_unit_diag); break;
         case b8 : res = triangle<char    , false>(in, is_unit_diag); break;
         }
@@ -753,9 +787,11 @@ af_err af_upper(af_array *out, const af_array in, bool is_unit_diag)
         case c32: res = triangle<cfloat  , true>(in, is_unit_diag); break;
         case c64: res = triangle<cdouble , true>(in, is_unit_diag); break;
         case s32: res = triangle<int     , true>(in, is_unit_diag); break;
-        case s64: res = triangle<intl    , true>(in, is_unit_diag); break;
         case u32: res = triangle<uint    , true>(in, is_unit_diag); break;
+        case s64: res = triangle<intl    , true>(in, is_unit_diag); break;
         case u64: res = triangle<uintl   , true>(in, is_unit_diag); break;
+        case s16: res = triangle<short   , true>(in, is_unit_diag); break;
+        case u16: res = triangle<ushort  , true>(in, is_unit_diag); break;
         case u8 : res = triangle<uchar   , true>(in, is_unit_diag); break;
         case b8 : res = triangle<char    , true>(in, is_unit_diag); break;
         }
diff --git a/src/api/c/device.cpp b/src/api/c/device.cpp
index e2dba14..cd5bd57 100644
--- a/src/api/c/device.cpp
+++ b/src/api/c/device.cpp
@@ -146,6 +146,8 @@ af_err af_device_array(af_array *arr, const void *data,
         case u32: res = getHandle(createDeviceDataArray<uint   >(d, data)); break;
         case s64: res = getHandle(createDeviceDataArray<intl   >(d, data)); break;
         case u64: res = getHandle(createDeviceDataArray<uintl  >(d, data)); break;
+        case s16: res = getHandle(createDeviceDataArray<short  >(d, data)); break;
+        case u16: res = getHandle(createDeviceDataArray<ushort >(d, data)); break;
         case u8 : res = getHandle(createDeviceDataArray<uchar  >(d, data)); break;
         case b8 : res = getHandle(createDeviceDataArray<char   >(d, data)); break;
         default: TYPE_ERROR(4, type);
@@ -176,6 +178,8 @@ af_err af_get_device_ptr(void **data, const af_array arr)
         case u32: *data = getDevicePtr(getArray<uint   >(arr)); break;
         case s64: *data = getDevicePtr(getArray<intl   >(arr)); break;
         case u64: *data = getDevicePtr(getArray<uintl  >(arr)); break;
+        case s16: *data = getDevicePtr(getArray<short  >(arr)); break;
+        case u16: *data = getDevicePtr(getArray<ushort >(arr)); break;
         case u8 : *data = getDevicePtr(getArray<uchar  >(arr)); break;
         case b8 : *data = getDevicePtr(getArray<char   >(arr)); break;
 
@@ -211,6 +215,8 @@ af_err af_lock_device_ptr(const af_array arr)
         case u32: lockDevicePtr<uint   >(arr); break;
         case s64: lockDevicePtr<intl   >(arr); break;
         case u64: lockDevicePtr<uintl  >(arr); break;
+        case s16: lockDevicePtr<short  >(arr); break;
+        case u16: lockDevicePtr<ushort >(arr); break;
         case u8 : lockDevicePtr<uchar  >(arr); break;
         case b8 : lockDevicePtr<char   >(arr); break;
         default: TYPE_ERROR(4, type);
@@ -245,6 +251,8 @@ af_err af_unlock_device_ptr(const af_array arr)
         case u32: unlockDevicePtr<uint   >(arr); break;
         case s64: unlockDevicePtr<intl   >(arr); break;
         case u64: unlockDevicePtr<uintl  >(arr); break;
+        case s16: unlockDevicePtr<short  >(arr); break;
+        case u16: unlockDevicePtr<ushort >(arr); break;
         case u8 : unlockDevicePtr<uchar  >(arr); break;
         case b8 : unlockDevicePtr<char   >(arr); break;
         default: TYPE_ERROR(4, type);
diff --git a/src/api/c/diff.cpp b/src/api/c/diff.cpp
index 75ce5d8..8bc4d07 100644
--- a/src/api/c/diff.cpp
+++ b/src/api/c/diff.cpp
@@ -54,6 +54,8 @@ af_err af_diff1(af_array *out, const af_array in, const int dim)
             case u32: output = diff1<uint   >(in,dim);  break;
             case s64: output = diff1<intl   >(in,dim);  break;
             case u64: output = diff1<uintl  >(in,dim);  break;
+            case s16: output = diff1<short  >(in,dim);  break;
+            case u16: output = diff1<ushort >(in,dim);  break;
             case u8:  output = diff1<uchar  >(in,dim);  break;
             default:  TYPE_ERROR(1, type);
         }
@@ -89,6 +91,8 @@ af_err af_diff2(af_array *out, const af_array in, const int dim)
             case u32: output = diff2<uint   >(in,dim);  break;
             case s64: output = diff2<intl   >(in,dim);  break;
             case u64: output = diff2<uintl  >(in,dim);  break;
+            case s16: output = diff2<short  >(in,dim);  break;
+            case u16: output = diff2<ushort >(in,dim);  break;
             case u8:  output = diff2<uchar  >(in,dim);  break;
             default:  TYPE_ERROR(1, type);
         }
diff --git a/src/api/c/dog.cpp b/src/api/c/dog.cpp
index 190017a..3cf793c 100644
--- a/src/api/c/dog.cpp
+++ b/src/api/c/dog.cpp
@@ -59,6 +59,8 @@ af_err af_dog(af_array *out, const af_array in, const int radius1, const int rad
             case b8 : output = dog<char  , float>(in, radius1, radius2); break;
             case s32: output = dog<int   , float>(in, radius1, radius2); break;
             case u32: output = dog<uint  , float>(in, radius1, radius2); break;
+            case s16: output = dog<short , float>(in, radius1, radius2); break;
+            case u16: output = dog<ushort, float>(in, radius1, radius2); break;
             case u8 : output = dog<uchar , float>(in, radius1, radius2); break;
             default : TYPE_ERROR(1, type);
         }
diff --git a/src/api/c/fast.cpp b/src/api/c/fast.cpp
index e28f590..9a40319 100644
--- a/src/api/c/fast.cpp
+++ b/src/api/c/fast.cpp
@@ -70,6 +70,8 @@ af_err af_fast(af_features *out, const af_array in, const float thr,
             case b8 : *out = fast<char  >(in, thr, arc_length, non_max, feature_ratio, edge); break;
             case s32: *out = fast<int   >(in, thr, arc_length, non_max, feature_ratio, edge); break;
             case u32: *out = fast<uint  >(in, thr, arc_length, non_max, feature_ratio, edge); break;
+            case s16: *out = fast<short >(in, thr, arc_length, non_max, feature_ratio, edge); break;
+            case u16: *out = fast<ushort>(in, thr, arc_length, non_max, feature_ratio, edge); break;
             case u8 : *out = fast<uchar >(in, thr, arc_length, non_max, feature_ratio, edge); break;
             default : TYPE_ERROR(1, type);
         }
diff --git a/src/api/c/fftconvolve.cpp b/src/api/c/fftconvolve.cpp
index fc3a91c..2d9f2f6 100644
--- a/src/api/c/fftconvolve.cpp
+++ b/src/api/c/fftconvolve.cpp
@@ -143,6 +143,8 @@ af_err fft_convolve(af_array *out, const af_array signal, const af_array filter,
             case f32: output = fftconvolve<float , float,  cfloat,  false, false, baseDim>(signal, filter, expand, convBT); break;
             case u32: output = fftconvolve<uint  , float,  cfloat,  false, true,  baseDim>(signal, filter, expand, convBT); break;
             case s32: output = fftconvolve<int   , float,  cfloat,  false, true,  baseDim>(signal, filter, expand, convBT); break;
+            case u16: output = fftconvolve<ushort, float,  cfloat,  false, true,  baseDim>(signal, filter, expand, convBT); break;
+            case s16: output = fftconvolve<short , float,  cfloat,  false, true,  baseDim>(signal, filter, expand, convBT); break;
             case u8:  output = fftconvolve<uchar , float,  cfloat,  false, true,  baseDim>(signal, filter, expand, convBT); break;
             case b8:  output = fftconvolve<char  , float,  cfloat,  false, true,  baseDim>(signal, filter, expand, convBT); break;
             case c32: output = fftconvolve_fallback<cfloat , cfloat , cfloat , baseDim>(signal, filter, expand); break;
diff --git a/src/api/c/filters.cpp b/src/api/c/filters.cpp
index 4658604..5be7322 100644
--- a/src/api/c/filters.cpp
+++ b/src/api/c/filters.cpp
@@ -54,6 +54,8 @@ af_err af_medfilt(af_array *out, const af_array in, const dim_t wind_length, con
                 case b8 : output = medfilt<char  >(in, wind_length, wind_width, edge_pad); break;
                 case s32: output = medfilt<int   >(in, wind_length, wind_width, edge_pad); break;
                 case u32: output = medfilt<uint  >(in, wind_length, wind_width, edge_pad); break;
+                case s16: output = medfilt<short >(in, wind_length, wind_width, edge_pad); break;
+                case u16: output = medfilt<ushort>(in, wind_length, wind_width, edge_pad); break;
                 case u8 : output = medfilt<uchar >(in, wind_length, wind_width, edge_pad); break;
                 default : TYPE_ERROR(1, type);
             }
diff --git a/src/api/c/flip.cpp b/src/api/c/flip.cpp
index a88c217..3d5bf53 100644
--- a/src/api/c/flip.cpp
+++ b/src/api/c/flip.cpp
@@ -69,6 +69,8 @@ af_err af_flip(af_array *result, const af_array in, const unsigned dim)
         case u32:    out = flipArray<unsigned>(in, dim);  break;
         case s64:    out = flipArray<intl>    (in, dim);  break;
         case u64:    out = flipArray<uintl>   (in, dim);  break;
+        case s16:    out = flipArray<short>   (in, dim);  break;
+        case u16:    out = flipArray<ushort>  (in, dim);  break;
         case u8:     out = flipArray<uchar>   (in, dim);  break;
         default:    TYPE_ERROR(1, in_type);
         }
diff --git a/src/api/c/handle.hpp b/src/api/c/handle.hpp
index beb8393..70f17eb 100644
--- a/src/api/c/handle.hpp
+++ b/src/api/c/handle.hpp
@@ -31,6 +31,7 @@ detail::Array<To> castArray(const af_array &in)
     using detail::cdouble;
     using detail::uint;
     using detail::uchar;
+    using detail::ushort;
 
     const ArrayInfo info = getInfo(in);
     switch (info.getType()) {
@@ -44,6 +45,8 @@ detail::Array<To> castArray(const af_array &in)
     case b8 : return detail::cast<To, char   >(getArray<char   >(in));
     case s64: return detail::cast<To, intl   >(getArray<intl   >(in));
     case u64: return detail::cast<To, uintl  >(getArray<uintl  >(in));
+    case s16: return detail::cast<To, short  >(getArray<short  >(in));
+    case u16: return detail::cast<To, ushort >(getArray<ushort >(in));
     default: TYPE_ERROR(1, info.getType());
     }
 }
diff --git a/src/api/c/histeq.cpp b/src/api/c/histeq.cpp
index 1b14ae5..56ad3eb 100644
--- a/src/api/c/histeq.cpp
+++ b/src/api/c/histeq.cpp
@@ -77,6 +77,8 @@ af_err af_hist_equal(af_array *out, const af_array in, const af_array hist)
             case f32: output = hist_equal<float , uint>(in, hist); break;
             case s32: output = hist_equal<int   , uint>(in, hist); break;
             case u32: output = hist_equal<uint  , uint>(in, hist); break;
+            case s16: output = hist_equal<short , uint>(in, hist); break;
+            case u16: output = hist_equal<ushort, uint>(in, hist); break;
             case u8 : output = hist_equal<uchar , uint>(in, hist); break;
             default : TYPE_ERROR(1, dataType);
         }
diff --git a/src/api/c/histogram.cpp b/src/api/c/histogram.cpp
index 2d5477e..08ae354 100644
--- a/src/api/c/histogram.cpp
+++ b/src/api/c/histogram.cpp
@@ -38,6 +38,8 @@ af_err af_histogram(af_array *out, const af_array in,
             case b8 : output = histogram<char  , uint>(in, nbins, minval, maxval); break;
             case s32: output = histogram<int   , uint>(in, nbins, minval, maxval); break;
             case u32: output = histogram<uint  , uint>(in, nbins, minval, maxval); break;
+            case s16: output = histogram<short , uint>(in, nbins, minval, maxval); break;
+            case u16: output = histogram<ushort, uint>(in, nbins, minval, maxval); break;
             case u8 : output = histogram<uchar , uint>(in, nbins, minval, maxval); break;
             default : TYPE_ERROR(1, type);
         }
diff --git a/src/api/c/implicit.cpp b/src/api/c/implicit.cpp
index b7a661d..372fb96 100644
--- a/src/api/c/implicit.cpp
+++ b/src/api/c/implicit.cpp
@@ -47,6 +47,12 @@ af_dtype implicit(const af_dtype lty, const af_dtype rty)
     if ((lty == s32) ||
         (rty == s32)) return s32;
 
+    if ((lty == u16) ||
+        (rty == u16)) return u16;
+
+    if ((lty == s16) ||
+        (rty == s16)) return s16;
+
     if ((lty == u8 ) ||
         (rty == u8 )) return u8;
 
diff --git a/src/api/c/index.cpp b/src/api/c/index.cpp
index 9dc7836..6ba8772 100644
--- a/src/api/c/index.cpp
+++ b/src/api/c/index.cpp
@@ -60,6 +60,8 @@ af_err af_index(af_array *result, const af_array in, const unsigned ndims, const
         case b8:     indexArray<char>    (out, in, ndims, index);  break;
         case s32:    indexArray<int>     (out, in, ndims, index);  break;
         case u32:    indexArray<unsigned>(out, in, ndims, index);  break;
+        case s16:    indexArray<short>   (out, in, ndims, index);  break;
+        case u16:    indexArray<ushort>  (out, in, ndims, index);  break;
         case s64:    indexArray<intl>    (out, in, ndims, index);  break;
         case u64:    indexArray<uintl>   (out, in, ndims, index);  break;
         case u8:     indexArray<uchar>   (out, in, ndims, index);  break;
@@ -88,6 +90,8 @@ static af_array lookup(const af_array &in, const af_array &idx, const unsigned d
         case u32: return getHandle(lookup<unsigned, idx_t > (getArray<unsigned>(in), getArray<idx_t>(idx), dim));
         case s64: return getHandle(lookup<intl    , idx_t > (getArray<intl    >(in), getArray<idx_t>(idx), dim));
         case u64: return getHandle(lookup<uintl   , idx_t > (getArray<uintl   >(in), getArray<idx_t>(idx), dim));
+        case s16: return getHandle(lookup<short   , idx_t > (getArray<short   >(in), getArray<idx_t>(idx), dim));
+        case u16: return getHandle(lookup<ushort  , idx_t > (getArray<ushort  >(in), getArray<idx_t>(idx), dim));
         case  u8: return getHandle(lookup<uchar   , idx_t > (getArray<uchar   >(in), getArray<idx_t>(idx), dim));
         case  b8: return getHandle(lookup<char    , idx_t > (getArray<char    >(in), getArray<idx_t>(idx), dim));
         default : TYPE_ERROR(1, inType);
@@ -116,6 +120,8 @@ af_err af_lookup(af_array *out, const af_array in, const af_array indices, const
             case f64: output = lookup<double  >(in, indices, dim); break;
             case s32: output = lookup<int     >(in, indices, dim); break;
             case u32: output = lookup<unsigned>(in, indices, dim); break;
+            case s16: output = lookup<short   >(in, indices, dim); break;
+            case u16: output = lookup<ushort  >(in, indices, dim); break;
             case  u8: output = lookup<uchar   >(in, indices, dim); break;
             default : TYPE_ERROR(1, idxType);
         }
@@ -208,9 +214,11 @@ af_err af_index_gen(af_array *out, const af_array in, const dim_t ndims, const a
             case c32: output = genIndex<cfloat >(in, idxrs); break;
             case f32: output = genIndex<float  >(in, idxrs); break;
             case u64: output = genIndex<uintl  >(in, idxrs); break;
-            case u32: output = genIndex<uint   >(in, idxrs); break;
             case s64: output = genIndex<intl   >(in, idxrs); break;
+            case u32: output = genIndex<uint   >(in, idxrs); break;
             case s32: output = genIndex<int    >(in, idxrs); break;
+            case u16: output = genIndex<ushort >(in, idxrs); break;
+            case s16: output = genIndex<short  >(in, idxrs); break;
             case  u8: output = genIndex<uchar  >(in, idxrs); break;
             case  b8: output = genIndex<char   >(in, idxrs); break;
             default: TYPE_ERROR(1, inType);
diff --git a/src/api/c/join.cpp b/src/api/c/join.cpp
index 67035f3..2a2b93d 100644
--- a/src/api/c/join.cpp
+++ b/src/api/c/join.cpp
@@ -67,6 +67,8 @@ af_err af_join(af_array *out, const int dim, const af_array first, const af_arra
             case u32: output = join<uint   , uint   >(dim, first, second);  break;
             case s64: output = join<intl   , intl   >(dim, first, second);  break;
             case u64: output = join<uintl  , uintl  >(dim, first, second);  break;
+            case s16: output = join<short  , short  >(dim, first, second);  break;
+            case u16: output = join<ushort , ushort >(dim, first, second);  break;
             case u8:  output = join<uchar  , uchar  >(dim, first, second);  break;
             default:  TYPE_ERROR(1, finfo.getType());
         }
@@ -119,6 +121,8 @@ af_err af_join_many(af_array *out, const int dim, const unsigned n_arrays, const
             case u32: output = join_many<uint   >(dim, n_arrays, inputs);  break;
             case s64: output = join_many<intl   >(dim, n_arrays, inputs);  break;
             case u64: output = join_many<uintl  >(dim, n_arrays, inputs);  break;
+            case s16: output = join_many<short  >(dim, n_arrays, inputs);  break;
+            case u16: output = join_many<ushort >(dim, n_arrays, inputs);  break;
             case u8:  output = join_many<uchar  >(dim, n_arrays, inputs);  break;
             default:  TYPE_ERROR(1, info[0].getType());
         }
diff --git a/src/api/c/match_template.cpp b/src/api/c/match_template.cpp
index 4e755e2..0e618c2 100644
--- a/src/api/c/match_template.cpp
+++ b/src/api/c/match_template.cpp
@@ -60,6 +60,8 @@ af_err af_match_template(af_array *out, const af_array search_img, const af_arra
             case f32: output = match_template<float ,  float>(search_img, template_img, m_type); break;
             case s32: output = match_template<int   ,  float>(search_img, template_img, m_type); break;
             case u32: output = match_template<uint  ,  float>(search_img, template_img, m_type); break;
+            case s16: output = match_template<short ,  float>(search_img, template_img, m_type); break;
+            case u16: output = match_template<ushort,  float>(search_img, template_img, m_type); break;
             case  b8: output = match_template<char  ,  float>(search_img, template_img, m_type); break;
             case  u8: output = match_template<uchar ,  float>(search_img, template_img, m_type); break;
             default : TYPE_ERROR(1, sType);
diff --git a/src/api/c/mean.cpp b/src/api/c/mean.cpp
index 1f71a85..76d2e6e 100644
--- a/src/api/c/mean.cpp
+++ b/src/api/c/mean.cpp
@@ -79,6 +79,8 @@ af_err af_mean(af_array *out, const af_array in, const dim_t dim)
             case u32: output = mean<uint  ,  float >(in, dim); break;
             case s64: output = mean<intl  ,  double>(in, dim); break;
             case u64: output = mean<uintl ,  double>(in, dim); break;
+            case s16: output = mean<short ,  float >(in, dim); break;
+            case u16: output = mean<ushort,  float >(in, dim); break;
             case  u8: output = mean<uchar ,  float >(in, dim); break;
             case  b8: output = mean<char  ,  float >(in, dim); break;
             case c32: output = mean<cfloat,  cfloat>(in, dim); break;
@@ -111,6 +113,8 @@ af_err af_mean_weighted(af_array *out, const af_array in, const af_array weights
             case u32: output = mean<uint  ,  float >(in, weights, dim); break;
             case s64: output = mean<intl  ,  double>(in, weights, dim); break;
             case u64: output = mean<uintl ,  double>(in, weights, dim); break;
+            case s16: output = mean<short ,  float >(in, weights, dim); break;
+            case u16: output = mean<ushort,  float >(in, weights, dim); break;
             case  u8: output = mean<uchar ,  float >(in, weights, dim); break;
             case  b8: output = mean<char  ,  float >(in, weights, dim); break;
             case c32: output = mean<cfloat,  cfloat>(in, weights, dim); break;
@@ -135,6 +139,8 @@ af_err af_mean_all(double *realVal, double *imagVal, const af_array in)
             case u32: *realVal = mean<uint  ,  float>(in); break;
             case s64: *realVal = mean<intl  , double>(in); break;
             case u64: *realVal = mean<uintl , double>(in); break;
+            case s16: *realVal = mean<short ,  float>(in); break;
+            case u16: *realVal = mean<ushort,  float>(in); break;
             case  u8: *realVal = mean<uchar ,  float>(in); break;
             case  b8: *realVal = mean<char  ,  float>(in); break;
             case c32: {
@@ -171,6 +177,8 @@ af_err af_mean_all_weighted(double *realVal, double *imagVal, const af_array in,
             case u32: *realVal = mean<uint  ,  float>(in, weights); break;
             case s64: *realVal = mean<intl  , double>(in, weights); break;
             case u64: *realVal = mean<uintl , double>(in, weights); break;
+            case s16: *realVal = mean<short ,  float>(in, weights); break;
+            case u16: *realVal = mean<ushort,  float>(in, weights); break;
             case  u8: *realVal = mean<uchar ,  float>(in, weights); break;
             case  b8: *realVal = mean<char  ,  float>(in, weights); break;
             case c32: {
diff --git a/src/api/c/meanshift.cpp b/src/api/c/meanshift.cpp
index 6c93854..1001a9c 100644
--- a/src/api/c/meanshift.cpp
+++ b/src/api/c/meanshift.cpp
@@ -46,6 +46,8 @@ af_err mean_shift(af_array *out, const af_array in, const float s_sigma, const f
             case b8 : output = mean_shift<char  , is_color>(in, s_sigma, c_sigma, iter); break;
             case s32: output = mean_shift<int   , is_color>(in, s_sigma, c_sigma, iter); break;
             case u32: output = mean_shift<uint  , is_color>(in, s_sigma, c_sigma, iter); break;
+            case s16: output = mean_shift<short , is_color>(in, s_sigma, c_sigma, iter); break;
+            case u16: output = mean_shift<ushort, is_color>(in, s_sigma, c_sigma, iter); break;
             case u8 : output = mean_shift<uchar , is_color>(in, s_sigma, c_sigma, iter); break;
             default : TYPE_ERROR(1, type);
         }
diff --git a/src/api/c/median.cpp b/src/api/c/median.cpp
index e91425d..e3de3d4 100644
--- a/src/api/c/median.cpp
+++ b/src/api/c/median.cpp
@@ -129,6 +129,8 @@ af_err af_median_all(double *realVal, double *imagVal, const af_array in)
             case f32: *realVal = median<float >(in); break;
             case s32: *realVal = median<int   >(in); break;
             case u32: *realVal = median<uint  >(in); break;
+            case s16: *realVal = median<short >(in); break;
+            case u16: *realVal = median<ushort>(in); break;
             case  u8: *realVal = median<uchar >(in); break;
             default : TYPE_ERROR(1, type);
         }
@@ -150,6 +152,8 @@ af_err af_median(af_array* out, const af_array in, const dim_t dim)
             case f32: output = median<float >(in, dim); break;
             case s32: output = median<int   >(in, dim); break;
             case u32: output = median<uint  >(in, dim); break;
+            case s16: output = median<short >(in, dim); break;
+            case u16: output = median<ushort>(in, dim); break;
             case  u8: output = median<uchar >(in, dim); break;
             default : TYPE_ERROR(1, type);
         }
diff --git a/src/api/c/moddims.cpp b/src/api/c/moddims.cpp
index e43efa0..7ccc38c 100644
--- a/src/api/c/moddims.cpp
+++ b/src/api/c/moddims.cpp
@@ -63,6 +63,8 @@ af_err af_moddims(af_array *out, const af_array in,
         case u8:  output = getHandle(modDims<uchar  >(getArray<uchar  >(in), newDims)); break;
         case s64: output = getHandle(modDims<intl   >(getArray<intl   >(in), newDims)); break;
         case u64: output = getHandle(modDims<uintl  >(getArray<uintl  >(in), newDims)); break;
+        case s16: output = getHandle(modDims<short  >(getArray<short  >(in), newDims)); break;
+        case u16: output = getHandle(modDims<ushort >(getArray<ushort >(in), newDims)); break;
         default: TYPE_ERROR(1, type);
         }
         std::swap(*out,output);
diff --git a/src/api/c/morph.cpp b/src/api/c/morph.cpp
index 980097c..bd9c680 100644
--- a/src/api/c/morph.cpp
+++ b/src/api/c/morph.cpp
@@ -58,6 +58,8 @@ static af_err morph(af_array *out, const af_array &in, const af_array &mask)
             case b8 : output = morph<char  , isDilation>(in, mask);      break;
             case s32: output = morph<int   , isDilation>(in, mask);      break;
             case u32: output = morph<uint  , isDilation>(in, mask);      break;
+            case s16: output = morph<short , isDilation>(in, mask);      break;
+            case u16: output = morph<ushort, isDilation>(in, mask);      break;
             case u8 : output = morph<uchar , isDilation>(in, mask);      break;
             default : TYPE_ERROR(1, type);
         }
@@ -90,6 +92,8 @@ static af_err morph3d(af_array *out, const af_array &in, const af_array &mask)
             case b8 : output = morph3d<char  , isDilation>(in, mask);       break;
             case s32: output = morph3d<int   , isDilation>(in, mask);       break;
             case u32: output = morph3d<uint  , isDilation>(in, mask);       break;
+            case s16: output = morph3d<short , isDilation>(in, mask);       break;
+            case u16: output = morph3d<ushort, isDilation>(in, mask);       break;
             case u8 : output = morph3d<uchar , isDilation>(in, mask);       break;
             default : TYPE_ERROR(1, type);
         }
diff --git a/src/api/c/nearest_neighbour.cpp b/src/api/c/nearest_neighbour.cpp
index d47e0ae..03064a4 100644
--- a/src/api/c/nearest_neighbour.cpp
+++ b/src/api/c/nearest_neighbour.cpp
@@ -57,16 +57,17 @@ af_err af_nearest_neighbour(af_array* idx, af_array* dist,
         ARG_ASSERT(6, dist_type == AF_SAD || dist_type == AF_SSD || dist_type == AF_SHD);
         TYPE_ASSERT(qType == tType);
 
-        // For Hamming, only u8, u32 and u64 allowed.
+        // For Hamming, only u8, u16, u32 and u64 allowed.
         af_array oIdx;
         af_array oDist;
 
         if(dist_type == AF_SHD) {
-            TYPE_ASSERT(qType == u8 || qType == u32 || qType == u64);
+            TYPE_ASSERT(qType == u8 || qType == u16 || qType == u32 || qType == u64);
             switch(qType) {
-                case u8:  nearest_neighbour<uchar, uint>(&oIdx, &oDist, query, train, dist_dim, n_dist, dist_type); break;
-                case u32: nearest_neighbour<uint , uint>(&oIdx, &oDist, query, train, dist_dim, n_dist, dist_type); break;
-                case u64: nearest_neighbour<uintl, uint>(&oIdx, &oDist, query, train, dist_dim, n_dist, dist_type); break;
+                case u8:  nearest_neighbour<uchar , uint>(&oIdx, &oDist, query, train, dist_dim, n_dist, AF_SHD); break;
+                case u16: nearest_neighbour<ushort, uint>(&oIdx, &oDist, query, train, dist_dim, n_dist, AF_SHD); break;
+                case u32: nearest_neighbour<uint  , uint>(&oIdx, &oDist, query, train, dist_dim, n_dist, AF_SHD); break;
+                case u64: nearest_neighbour<uintl , uint>(&oIdx, &oDist, query, train, dist_dim, n_dist, AF_SHD); break;
                 default : TYPE_ERROR(1, qType);
             }
         } else {
@@ -77,6 +78,8 @@ af_err af_nearest_neighbour(af_array* idx, af_array* dist,
                 case u32: nearest_neighbour<uint  , uint  >(&oIdx, &oDist, query, train, dist_dim, n_dist, dist_type); break;
                 case s64: nearest_neighbour<intl  , intl  >(&oIdx, &oDist, query, train, dist_dim, n_dist, dist_type); break;
                 case u64: nearest_neighbour<uintl , uintl >(&oIdx, &oDist, query, train, dist_dim, n_dist, dist_type); break;
+                case s16: nearest_neighbour<short , int   >(&oIdx, &oDist, query, train, dist_dim, n_dist, dist_type); break;
+                case u16: nearest_neighbour<ushort, uint  >(&oIdx, &oDist, query, train, dist_dim, n_dist, dist_type); break;
                 case u8:  nearest_neighbour<uchar , uint  >(&oIdx, &oDist, query, train, dist_dim, n_dist, dist_type); break;
                 default : TYPE_ERROR(1, qType);
             }
diff --git a/src/api/c/print.cpp b/src/api/c/print.cpp
index eb6dd05..a5c178c 100644
--- a/src/api/c/print.cpp
+++ b/src/api/c/print.cpp
@@ -111,6 +111,8 @@ af_err af_print_array(af_array arr)
         case u8:    print<uchar>   (NULL, arr, 4);   break;
         case s64:   print<intl>    (NULL, arr, 4);   break;
         case u64:   print<uintl>   (NULL, arr, 4);   break;
+        case s16:   print<short>   (NULL, arr, 4);   break;
+        case u16:   print<ushort>  (NULL, arr, 4);   break;
         default:    TYPE_ERROR(1, type);
         }
     }
@@ -136,6 +138,8 @@ af_err af_print_array_gen(const char *exp, const af_array arr, const int precisi
         case u8:    print<uchar   >(exp, arr, precision);   break;
         case s64:   print<intl    >(exp, arr, precision);   break;
         case u64:   print<uintl   >(exp, arr, precision);   break;
+        case s16:   print<short   >(exp, arr, precision);   break;
+        case u16:   print<ushort  >(exp, arr, precision);   break;
         default:    TYPE_ERROR(1, type);
         }
     }
@@ -163,6 +167,8 @@ af_err af_array_to_string(char **output, const char *exp, const af_array arr,
         case u8:    print<uchar   >(exp, arr, precision, ss, transpose);   break;
         case s64:   print<intl    >(exp, arr, precision, ss, transpose);   break;
         case u64:   print<uintl   >(exp, arr, precision, ss, transpose);   break;
+        case s16:   print<short   >(exp, arr, precision, ss, transpose);   break;
+        case u16:   print<ushort  >(exp, arr, precision, ss, transpose);   break;
         default:    TYPE_ERROR(1, type);
         }
         std::string str = ss.str();
diff --git a/src/api/c/reduce.cpp b/src/api/c/reduce.cpp
index cedf4f9..3fe30be 100644
--- a/src/api/c/reduce.cpp
+++ b/src/api/c/reduce.cpp
@@ -56,6 +56,8 @@ static af_err reduce_type(af_array *out, const af_array in, const int dim)
         case s32:  res = reduce<op, int    , To>(in, dim); break;
         case u64:  res = reduce<op, uintl  , To>(in, dim); break;
         case s64:  res = reduce<op, intl   , To>(in, dim); break;
+        case u16:  res = reduce<op, ushort , To>(in, dim); break;
+        case s16:  res = reduce<op, short  , To>(in, dim); break;
         case b8:   res = reduce<op, char   , To>(in, dim); break;
         case u8:   res = reduce<op, uchar  , To>(in, dim); break;
         default:   TYPE_ERROR(1, type);
@@ -95,6 +97,8 @@ static af_err reduce_common(af_array *out, const af_array in, const int dim)
         case s32:  res = reduce<op, int    , int    >(in, dim); break;
         case u64:  res = reduce<op, uintl  , uintl  >(in, dim); break;
         case s64:  res = reduce<op, intl   , intl   >(in, dim); break;
+        case u16:  res = reduce<op, ushort , ushort >(in, dim); break;
+        case s16:  res = reduce<op, short  , short  >(in, dim); break;
         case b8:   res = reduce<op, char   , char   >(in, dim); break;
         case u8:   res = reduce<op, uchar  , uchar  >(in, dim); break;
         default:   TYPE_ERROR(1, type);
@@ -135,6 +139,8 @@ static af_err reduce_promote(af_array *out, const af_array in, const int dim,
         case s32:  res = reduce<op, int    , int    >(in, dim, change_nan, nanval); break;
         case u64:  res = reduce<op, uintl  , uintl  >(in, dim, change_nan, nanval); break;
         case s64:  res = reduce<op, intl   , intl   >(in, dim, change_nan, nanval); break;
+        case u16:  res = reduce<op, ushort , uint   >(in, dim, change_nan, nanval); break;
+        case s16:  res = reduce<op, short  , int    >(in, dim, change_nan, nanval); break;
         case u8:   res = reduce<op, uchar  , uint   >(in, dim, change_nan, nanval); break;
             // Make sure you are adding only "1" for every non zero value, even if op == af_add_t
         case b8:   res = reduce<af_notzero_t, char  , uint   >(in, dim, change_nan, nanval); break;
@@ -219,6 +225,8 @@ static af_err reduce_all_type(double *real, double *imag, const af_array in)
         case s32:  *real = (double)reduce_all<op, int    , To>(in); break;
         case u64:  *real = (double)reduce_all<op, uintl  , To>(in); break;
         case s64:  *real = (double)reduce_all<op, intl   , To>(in); break;
+        case u16:  *real = (double)reduce_all<op, ushort , To>(in); break;
+        case s16:  *real = (double)reduce_all<op, short  , To>(in); break;
         case b8:   *real = (double)reduce_all<op, char   , To>(in); break;
         case u8:   *real = (double)reduce_all<op, uchar  , To>(in); break;
         default:   TYPE_ERROR(1, type);
@@ -252,6 +260,8 @@ static af_err reduce_all_common(double *real_val, double *imag_val, const af_arr
         case s32:  *real_val = (double)reduce_all<op, int    , int    >(in); break;
         case u64:  *real_val = (double)reduce_all<op, uintl  , uintl  >(in); break;
         case s64:  *real_val = (double)reduce_all<op, intl   , intl   >(in); break;
+        case u16:  *real_val = (double)reduce_all<op, ushort , ushort >(in); break;
+        case s16:  *real_val = (double)reduce_all<op, short  , short  >(in); break;
         case b8:   *real_val = (double)reduce_all<op, char   , char   >(in); break;
         case u8:   *real_val = (double)reduce_all<op, uchar  , uchar  >(in); break;
 
@@ -301,6 +311,8 @@ static af_err reduce_all_promote(double *real_val, double *imag_val, const af_ar
         case s32: *real_val = (double)reduce_all<op, int    , int    >(in, change_nan, nanval); break;
         case u64: *real_val = (double)reduce_all<op, uintl  , uintl  >(in, change_nan, nanval); break;
         case s64: *real_val = (double)reduce_all<op, intl   , intl   >(in, change_nan, nanval); break;
+        case u16: *real_val = (double)reduce_all<op, ushort , uint   >(in, change_nan, nanval); break;
+        case s16: *real_val = (double)reduce_all<op, short  , int    >(in, change_nan, nanval); break;
         case u8:  *real_val = (double)reduce_all<op, uchar  , uint   >(in, change_nan, nanval); break;
             // Make sure you are adding only "1" for every non zero value, even if op == af_add_t
         case b8:  *real_val = (double)reduce_all<af_notzero_t, char, uint>(in, change_nan, nanval); break;
@@ -405,6 +417,8 @@ static af_err ireduce_common(af_array *val, af_array *idx, const af_array in, co
         case s32:  ireduce<op, int    >(&res, &loc, in, dim); break;
         case u64:  ireduce<op, uintl  >(&res, &loc, in, dim); break;
         case s64:  ireduce<op, intl   >(&res, &loc, in, dim); break;
+        case u16:  ireduce<op, ushort >(&res, &loc, in, dim); break;
+        case s16:  ireduce<op, short  >(&res, &loc, in, dim); break;
         case b8:   ireduce<op, char   >(&res, &loc, in, dim); break;
         case u8:   ireduce<op, uchar  >(&res, &loc, in, dim); break;
         default:   TYPE_ERROR(1, type);
@@ -457,6 +471,8 @@ static af_err ireduce_all_common(double *real_val, double *imag_val,
         case s32:  *real_val = (double)ireduce_all<op, int   >(loc, in); break;
         case u64:  *real_val = (double)ireduce_all<op, uintl >(loc, in); break;
         case s64:  *real_val = (double)ireduce_all<op, intl  >(loc, in); break;
+        case u16:  *real_val = (double)ireduce_all<op, ushort>(loc, in); break;
+        case s16:  *real_val = (double)ireduce_all<op, short >(loc, in); break;
         case b8:   *real_val = (double)ireduce_all<op, char  >(loc, in); break;
         case u8:   *real_val = (double)ireduce_all<op, uchar >(loc, in); break;
 
diff --git a/src/api/c/regions.cpp b/src/api/c/regions.cpp
index 4245eac..49ddedf 100644
--- a/src/api/c/regions.cpp
+++ b/src/api/c/regions.cpp
@@ -46,6 +46,8 @@ af_err af_regions(af_array *out, const af_array in, const af_connectivity connec
             case f64: output = regions<double>(in, connectivity); break;
             case s32: output = regions<int   >(in, connectivity); break;
             case u32: output = regions<uint  >(in, connectivity); break;
+            case s16: output = regions<short >(in, connectivity); break;
+            case u16: output = regions<ushort>(in, connectivity); break;
             default : TYPE_ERROR(0, type);
         }
         std::swap(*out, output);
diff --git a/src/api/c/reorder.cpp b/src/api/c/reorder.cpp
index 733981c..10d2cc3 100644
--- a/src/api/c/reorder.cpp
+++ b/src/api/c/reorder.cpp
@@ -71,6 +71,8 @@ af_err af_reorder(af_array *out, const af_array in, const af::dim4 &rdims)
             case u8:  output = reorder<uchar  >(in, rdims);  break;
             case s64: output = reorder<intl   >(in, rdims);  break;
             case u64: output = reorder<uintl  >(in, rdims);  break;
+            case s16: output = reorder<short  >(in, rdims);  break;
+            case u16: output = reorder<ushort >(in, rdims);  break;
             default:  TYPE_ERROR(1, type);
         }
         std::swap(*out,output);
diff --git a/src/api/c/replace.cpp b/src/api/c/replace.cpp
index 1f37988..7c0a3cf 100644
--- a/src/api/c/replace.cpp
+++ b/src/api/c/replace.cpp
@@ -59,6 +59,8 @@ af_err af_replace(af_array a, const af_array cond, const af_array b)
         case u32: replace<uint   >(a, cond, b); break;
         case s64: replace<intl   >(a, cond, b); break;
         case u64: replace<uintl  >(a, cond, b); break;
+        case s16: replace<short  >(a, cond, b); break;
+        case u16: replace<ushort >(a, cond, b); break;
         case u8:  replace<uchar  >(a, cond, b); break;
         case b8:  replace<char   >(a, cond, b); break;
         default:  TYPE_ERROR(2, ainfo.getType());
@@ -99,6 +101,8 @@ af_err af_replace_scalar(af_array a, const af_array cond, const double b)
         case u32: replace_scalar<uint   >(a, cond, b); break;
         case s64: replace_scalar<intl   >(a, cond, b); break;
         case u64: replace_scalar<uintl  >(a, cond, b); break;
+        case s16: replace_scalar<short  >(a, cond, b); break;
+        case u16: replace_scalar<ushort >(a, cond, b); break;
         case u8:  replace_scalar<uchar  >(a, cond, b); break;
         case b8:  replace_scalar<char   >(a, cond, b); break;
         default:  TYPE_ERROR(2, ainfo.getType());
diff --git a/src/api/c/resize.cpp b/src/api/c/resize.cpp
index 419af85..d17bd29 100644
--- a/src/api/c/resize.cpp
+++ b/src/api/c/resize.cpp
@@ -50,6 +50,8 @@ af_err af_resize(af_array *out, const af_array in, const dim_t odim0, const dim_
             case u32: output = resize<uint   >(in, odim0, odim1, method);  break;
             case s64: output = resize<intl   >(in, odim0, odim1, method);  break;
             case u64: output = resize<uintl  >(in, odim0, odim1, method);  break;
+            case s16: output = resize<short  >(in, odim0, odim1, method);  break;
+            case u16: output = resize<ushort >(in, odim0, odim1, method);  break;
             case u8:  output = resize<uchar  >(in, odim0, odim1, method);  break;
             case b8:  output = resize<char   >(in, odim0, odim1, method);  break;
             default:  TYPE_ERROR(1, type);
diff --git a/src/api/c/rgb_gray.cpp b/src/api/c/rgb_gray.cpp
index 0ed5eb9..1e52ae0 100644
--- a/src/api/c/rgb_gray.cpp
+++ b/src/api/c/rgb_gray.cpp
@@ -122,6 +122,8 @@ af_err convert(af_array* out, const af_array in, const float r, const float g, c
             case f32: output = convert<float , float , isRGB2GRAY>(in, r, g, b); break;
             case u32: output = convert<uint  , float , isRGB2GRAY>(in, r, g, b); break;
             case s32: output = convert<int   , float , isRGB2GRAY>(in, r, g, b); break;
+            case u16: output = convert<ushort, float , isRGB2GRAY>(in, r, g, b); break;
+            case s16: output = convert<short , float , isRGB2GRAY>(in, r, g, b); break;
             case u8:  output = convert<uchar , float , isRGB2GRAY>(in, r, g, b); break;
             default: TYPE_ERROR(1, iType); break;
         }
diff --git a/src/api/c/rotate.cpp b/src/api/c/rotate.cpp
index b792239..a5978e3 100644
--- a/src/api/c/rotate.cpp
+++ b/src/api/c/rotate.cpp
@@ -63,6 +63,8 @@ af_err af_rotate(af_array *out, const af_array in, const float theta,
             case u32: output = rotate<uint   >(in, theta, odims, method);  break;
             case s64: output = rotate<intl   >(in, theta, odims, method);  break;
             case u64: output = rotate<uintl  >(in, theta, odims, method);  break;
+            case s16: output = rotate<short  >(in, theta, odims, method);  break;
+            case u16: output = rotate<ushort >(in, theta, odims, method);  break;
             case u8:  output = rotate<uchar  >(in, theta, odims, method);  break;
             case b8:  output = rotate<uchar  >(in, theta, odims, method);  break;
             default:  TYPE_ERROR(1, itype);
diff --git a/src/api/c/sat.cpp b/src/api/c/sat.cpp
index 65a4481..fa6d0a4 100644
--- a/src/api/c/sat.cpp
+++ b/src/api/c/sat.cpp
@@ -47,6 +47,8 @@ af_err af_sat(af_array* out, const af_array in)
             case  u8: output = sat<uint  , uchar >(in); break;
             case s64: output = sat<intl  , intl  >(in); break;
             case u64: output = sat<uintl , uintl >(in); break;
+            case s16: output = sat<int   , short >(in); break;
+            case u16: output = sat<uint  , ushort>(in); break;
             default: TYPE_ERROR(1, inputType);
         }
         std::swap(*out, output);
diff --git a/src/api/c/scan.cpp b/src/api/c/scan.cpp
index d0c9e8e..321324b 100644
--- a/src/api/c/scan.cpp
+++ b/src/api/c/scan.cpp
@@ -53,6 +53,8 @@ af_err af_accum(af_array *out, const af_array in, const int dim)
         case s32:  res = scan<af_add_t, int    , int    >(in, dim); break;
         case u64:  res = scan<af_add_t, uintl  , uintl  >(in, dim); break;
         case s64:  res = scan<af_add_t, intl   , intl   >(in, dim); break;
+        case u16:  res = scan<af_add_t, ushort , uint   >(in, dim); break;
+        case s16:  res = scan<af_add_t, short  , int    >(in, dim); break;
         case u8:   res = scan<af_add_t, uchar  , uint   >(in, dim); break;
         // Make sure you are adding only "1" for every non zero value, even if op == af_add_t
         case b8:   res = scan<af_notzero_t, char  , uint   >(in, dim); break;
diff --git a/src/api/c/select.cpp b/src/api/c/select.cpp
index 06eef2a..42eb91b 100644
--- a/src/api/c/select.cpp
+++ b/src/api/c/select.cpp
@@ -63,6 +63,8 @@ af_err af_select(af_array *out, const af_array cond, const af_array a, const af_
         case u32: res = select<uint   >(cond, a, b, odims); break;
         case s64: res = select<intl   >(cond, a, b, odims); break;
         case u64: res = select<uintl  >(cond, a, b, odims); break;
+        case s16: res = select<short  >(cond, a, b, odims); break;
+        case u16: res = select<ushort >(cond, a, b, odims); break;
         case u8:  res = select<uchar  >(cond, a, b, odims); break;
         case b8:  res = select<char   >(cond, a, b, odims); break;
         default:  TYPE_ERROR(2, ainfo.getType());
@@ -106,6 +108,8 @@ af_err af_select_scalar_r(af_array *out, const af_array cond, const af_array a,
         case c64: res = select_scalar<cdouble, false>(cond, a, b, adims); break;
         case s32: res = select_scalar<int    , false>(cond, a, b, adims); break;
         case u32: res = select_scalar<uint   , false>(cond, a, b, adims); break;
+        case s16: res = select_scalar<short  , false>(cond, a, b, adims); break;
+        case u16: res = select_scalar<ushort , false>(cond, a, b, adims); break;
         case s64: res = select_scalar<intl   , false>(cond, a, b, adims); break;
         case u64: res = select_scalar<uintl  , false>(cond, a, b, adims); break;
         case u8:  res = select_scalar<uchar  , false>(cond, a, b, adims); break;
@@ -143,6 +147,8 @@ af_err af_select_scalar_l(af_array *out, const af_array cond, const double a, co
         case c64: res = select_scalar<cdouble, true >(cond, b, a, bdims); break;
         case s32: res = select_scalar<int    , true >(cond, b, a, bdims); break;
         case u32: res = select_scalar<uint   , true >(cond, b, a, bdims); break;
+        case s16: res = select_scalar<short  , true >(cond, b, a, bdims); break;
+        case u16: res = select_scalar<ushort , true >(cond, b, a, bdims); break;
         case s64: res = select_scalar<intl   , true >(cond, b, a, bdims); break;
         case u64: res = select_scalar<uintl  , true >(cond, b, a, bdims); break;
         case u8:  res = select_scalar<uchar  , true >(cond, b, a, bdims); break;
diff --git a/src/api/c/set.cpp b/src/api/c/set.cpp
index 1200eae..cada021 100644
--- a/src/api/c/set.cpp
+++ b/src/api/c/set.cpp
@@ -36,6 +36,8 @@ af_err af_set_unique(af_array *out, const af_array in, const bool is_sorted)
         case f64: res = setUnique<double >(in, is_sorted); break;
         case s32: res = setUnique<int    >(in, is_sorted); break;
         case u32: res = setUnique<uint   >(in, is_sorted); break;
+        case s16: res = setUnique<short  >(in, is_sorted); break;
+        case u16: res = setUnique<ushort >(in, is_sorted); break;
         case b8:  res = setUnique<char   >(in, is_sorted); break;
         case u8:  res = setUnique<uchar  >(in, is_sorted); break;
         default: TYPE_ERROR(1, type);
@@ -69,6 +71,8 @@ af_err af_set_union(af_array *out, const af_array first, const af_array second,
         case f64: res = setUnion<double >(first, second, is_unique); break;
         case s32: res = setUnion<int    >(first, second, is_unique); break;
         case u32: res = setUnion<uint   >(first, second, is_unique); break;
+        case s16: res = setUnion<short  >(first, second, is_unique); break;
+        case u16: res = setUnion<ushort >(first, second, is_unique); break;
         case b8:  res = setUnion<char   >(first, second, is_unique); break;
         case u8:  res = setUnion<uchar  >(first, second, is_unique); break;
         default: TYPE_ERROR(1, first_type);
@@ -101,6 +105,8 @@ af_err af_set_intersect(af_array *out, const af_array first, const af_array seco
         case f64: res = setIntersect<double >(first, second, is_unique); break;
         case s32: res = setIntersect<int    >(first, second, is_unique); break;
         case u32: res = setIntersect<uint   >(first, second, is_unique); break;
+        case s16: res = setIntersect<short  >(first, second, is_unique); break;
+        case u16: res = setIntersect<ushort >(first, second, is_unique); break;
         case b8:  res = setIntersect<char   >(first, second, is_unique); break;
         case u8:  res = setIntersect<uchar  >(first, second, is_unique); break;
         default: TYPE_ERROR(1, first_type);
diff --git a/src/api/c/shift.cpp b/src/api/c/shift.cpp
index 28e2180..e383915 100644
--- a/src/api/c/shift.cpp
+++ b/src/api/c/shift.cpp
@@ -43,6 +43,8 @@ af_err af_shift(af_array *out, const af_array in, const int sdims[4])
             case u32: output = shift<uint   >(in, sdims);  break;
             case s64: output = shift<intl   >(in, sdims);  break;
             case u64: output = shift<uintl  >(in, sdims);  break;
+            case s16: output = shift<short  >(in, sdims);  break;
+            case u16: output = shift<ushort >(in, sdims);  break;
             case u8:  output = shift<uchar  >(in, sdims);  break;
             default:  TYPE_ERROR(1, type);
         }
diff --git a/src/api/c/sobel.cpp b/src/api/c/sobel.cpp
index 594bf65..6d28a6a 100644
--- a/src/api/c/sobel.cpp
+++ b/src/api/c/sobel.cpp
@@ -48,6 +48,8 @@ af_err af_sobel_operator(af_array *dx, af_array *dy, const af_array img, const u
             case f64: output = sobelDerivatives<double, double>(img, ker_size); break;
             case s32: output = sobelDerivatives<int   , int>   (img, ker_size); break;
             case u32: output = sobelDerivatives<uint  , int>   (img, ker_size); break;
+            case s16: output = sobelDerivatives<short , int>   (img, ker_size); break;
+            case u16: output = sobelDerivatives<ushort, int>   (img, ker_size); break;
             case b8 : output = sobelDerivatives<char  , int>   (img, ker_size); break;
             case u8:  output = sobelDerivatives<uchar , int>   (img, ker_size); break;
             default : TYPE_ERROR(1, type);
diff --git a/src/api/c/sort.cpp b/src/api/c/sort.cpp
index 39a7f22..b127aa5 100644
--- a/src/api/c/sort.cpp
+++ b/src/api/c/sort.cpp
@@ -52,6 +52,8 @@ af_err af_sort(af_array *out, const af_array in, const unsigned dim, const bool
             case f64: val = sort<double >(in, dim, isAscending);  break;
             case s32: val = sort<int    >(in, dim, isAscending);  break;
             case u32: val = sort<uint   >(in, dim, isAscending);  break;
+            case s16: val = sort<short  >(in, dim, isAscending);  break;
+            case u16: val = sort<ushort >(in, dim, isAscending);  break;
             case u8:  val = sort<uchar  >(in, dim, isAscending);  break;
             case b8:  val = sort<char   >(in, dim, isAscending);  break;
             default:  TYPE_ERROR(1, type);
@@ -100,6 +102,8 @@ af_err af_sort_index(af_array *out, af_array *indices, const af_array in, const
             case f64: sort_index<double >(&val, &idx, in, dim, isAscending);  break;
             case s32: sort_index<int    >(&val, &idx, in, dim, isAscending);  break;
             case u32: sort_index<uint   >(&val, &idx, in, dim, isAscending);  break;
+            case s16: sort_index<short  >(&val, &idx, in, dim, isAscending);  break;
+            case u16: sort_index<ushort >(&val, &idx, in, dim, isAscending);  break;
             case u8:  sort_index<uchar  >(&val, &idx, in, dim, isAscending);  break;
             case b8:  sort_index<char   >(&val, &idx, in, dim, isAscending);  break;
             default:  TYPE_ERROR(1, type);
@@ -144,6 +148,8 @@ void sort_by_key_tmplt(af_array *okey, af_array *oval, const af_array ikey, cons
     case f64: sort_by_key<Tk, double >(okey, oval, ikey, ival, dim, isAscending);  break;
     case s32: sort_by_key<Tk, int    >(okey, oval, ikey, ival, dim, isAscending);  break;
     case u32: sort_by_key<Tk, uint   >(okey, oval, ikey, ival, dim, isAscending);  break;
+    case s16: sort_by_key<Tk, short  >(okey, oval, ikey, ival, dim, isAscending);  break;
+    case u16: sort_by_key<Tk, ushort >(okey, oval, ikey, ival, dim, isAscending);  break;
     case u8:  sort_by_key<Tk, uchar  >(okey, oval, ikey, ival, dim, isAscending);  break;
     case b8:  sort_by_key<Tk, char   >(okey, oval, ikey, ival, dim, isAscending);  break;
     default:  TYPE_ERROR(1, vtype);
@@ -175,6 +181,8 @@ af_err af_sort_by_key(af_array *out_keys, af_array *out_values,
             case f64: sort_by_key_tmplt<double >(&oKey, &oVal, keys, values, dim, isAscending);  break;
             case s32: sort_by_key_tmplt<int    >(&oKey, &oVal, keys, values, dim, isAscending);  break;
             case u32: sort_by_key_tmplt<uint   >(&oKey, &oVal, keys, values, dim, isAscending);  break;
+            case s16: sort_by_key_tmplt<short  >(&oKey, &oVal, keys, values, dim, isAscending);  break;
+            case u16: sort_by_key_tmplt<ushort >(&oKey, &oVal, keys, values, dim, isAscending);  break;
             case u8:  sort_by_key_tmplt<uchar  >(&oKey, &oVal, keys, values, dim, isAscending);  break;
             case b8:  sort_by_key_tmplt<char   >(&oKey, &oVal, keys, values, dim, isAscending);  break;
             default:  TYPE_ERROR(1, type);
diff --git a/src/api/c/stdev.cpp b/src/api/c/stdev.cpp
index b2f307b..cf871bd 100644
--- a/src/api/c/stdev.cpp
+++ b/src/api/c/stdev.cpp
@@ -77,6 +77,8 @@ af_err af_stdev_all(double *realVal, double *imagVal, const af_array in)
             case f32: *realVal = stdev<float , float >(in); break;
             case s32: *realVal = stdev<int   , float >(in); break;
             case u32: *realVal = stdev<uint  , float >(in); break;
+            case s16: *realVal = stdev<short , float >(in); break;
+            case u16: *realVal = stdev<ushort, float >(in); break;
             case s64: *realVal = stdev<intl  , double>(in); break;
             case u64: *realVal = stdev<uintl , double>(in); break;
             case  u8: *realVal = stdev<uchar , float >(in); break;
@@ -112,6 +114,8 @@ af_err af_stdev(af_array *out, const af_array in, const dim_t dim)
             case f32: output = stdev<float ,  float >(in, dim); break;
             case s32: output = stdev<int   ,  float >(in, dim); break;
             case u32: output = stdev<uint  ,  float >(in, dim); break;
+            case s16: output = stdev<short ,  float >(in, dim); break;
+            case u16: output = stdev<ushort,  float >(in, dim); break;
             case s64: output = stdev<intl  ,  double>(in, dim); break;
             case u64: output = stdev<uintl ,  double>(in, dim); break;
             case  u8: output = stdev<uchar ,  float >(in, dim); break;
diff --git a/src/api/c/stream.cpp b/src/api/c/stream.cpp
index 1161703..a7b5771 100644
--- a/src/api/c/stream.cpp
+++ b/src/api/c/stream.cpp
@@ -133,6 +133,8 @@ af_err af_save_array(int *index, const char *key, const af_array arr, const char
             case u8:    id = save<uchar>   (key, arr, filename, append);   break;
             case s64:   id = save<intl>    (key, arr, filename, append);   break;
             case u64:   id = save<uintl>   (key, arr, filename, append);   break;
+            case s16:   id = save<short>   (key, arr, filename, append);   break;
+            case u16:   id = save<ushort>  (key, arr, filename, append);   break;
             default:    TYPE_ERROR(1, type);
         }
         std::swap(*index, id);
@@ -234,6 +236,8 @@ static af_array readArrayV1(const char *filename, const unsigned index)
         case u8  : out = readDataToArray<uchar>  (fs);  break;
         case s64 : out = readDataToArray<intl>   (fs);  break;
         case u64 : out = readDataToArray<uintl>  (fs);  break;
+        case s16 : out = readDataToArray<short>  (fs);  break;
+        case u16 : out = readDataToArray<ushort> (fs);  break;
         default:    TYPE_ERROR(1, type);
     }
     fs.close();
diff --git a/src/api/c/susan.cpp b/src/api/c/susan.cpp
index e070df8..24cb913 100644
--- a/src/api/c/susan.cpp
+++ b/src/api/c/susan.cpp
@@ -69,6 +69,8 @@ af_err af_susan(af_features* out, const af_array in,
             case b8 : *out = susan<char  >(in, radius, diff_thr, geom_thr, feature_ratio, edge); break;
             case s32: *out = susan<int   >(in, radius, diff_thr, geom_thr, feature_ratio, edge); break;
             case u32: *out = susan<uint  >(in, radius, diff_thr, geom_thr, feature_ratio, edge); break;
+            case s16: *out = susan<short >(in, radius, diff_thr, geom_thr, feature_ratio, edge); break;
+            case u16: *out = susan<ushort>(in, radius, diff_thr, geom_thr, feature_ratio, edge); break;
             case u8 : *out = susan<uchar >(in, radius, diff_thr, geom_thr, feature_ratio, edge); break;
             default : TYPE_ERROR(1, type);
         }
diff --git a/src/api/c/tile.cpp b/src/api/c/tile.cpp
index 7d546c2..f722f89 100644
--- a/src/api/c/tile.cpp
+++ b/src/api/c/tile.cpp
@@ -70,6 +70,8 @@ af_err af_tile(af_array *out, const af_array in, const af::dim4 &tileDims)
             case u32: output = tile<uint   >(in, tileDims);  break;
             case s64: output = tile<intl   >(in, tileDims);  break;
             case u64: output = tile<uintl  >(in, tileDims);  break;
+            case s16: output = tile<short  >(in, tileDims);  break;
+            case u16: output = tile<ushort >(in, tileDims);  break;
             case u8:  output = tile<uchar  >(in, tileDims);  break;
             default:  TYPE_ERROR(1, type);
         }
diff --git a/src/api/c/transform.cpp b/src/api/c/transform.cpp
index c24c9f7..bacb008 100644
--- a/src/api/c/transform.cpp
+++ b/src/api/c/transform.cpp
@@ -63,6 +63,8 @@ af_err af_transform(af_array *out, const af_array in, const af_array tf,
             case u32: output = transform<uint   >(in, tf, odims, method, inverse);  break;
             case s64: output = transform<intl   >(in, tf, odims, method, inverse);  break;
             case u64: output = transform<uintl  >(in, tf, odims, method, inverse);  break;
+            case s16: output = transform<short  >(in, tf, odims, method, inverse);  break;
+            case u16: output = transform<ushort >(in, tf, odims, method, inverse);  break;
             case u8:  output = transform<uchar  >(in, tf, odims, method, inverse);  break;
             case b8:  output = transform<char   >(in, tf, odims, method, inverse);  break;
             default:  TYPE_ERROR(1, itype);
diff --git a/src/api/c/transpose.cpp b/src/api/c/transpose.cpp
index eb89695..1418c29 100644
--- a/src/api/c/transpose.cpp
+++ b/src/api/c/transpose.cpp
@@ -61,6 +61,8 @@ af_err af_transpose(af_array *out, af_array in, const bool conjugate)
             case u8 : output = trs<uchar>  (in, conjugate);    break;
             case s64: output = trs<intl>   (in, conjugate);    break;
             case u64: output = trs<uintl>  (in, conjugate);    break;
+            case s16: output = trs<short>  (in, conjugate);    break;
+            case u16: output = trs<ushort> (in, conjugate);    break;
             default : TYPE_ERROR(1, type);
         }
         std::swap(*out,output);
@@ -101,6 +103,8 @@ af_err af_transpose_inplace(af_array in, const bool conjugate)
             case u8 : transpose_inplace<uchar>  (in, conjugate);    break;
             case s64: transpose_inplace<intl>   (in, conjugate);    break;
             case u64: transpose_inplace<uintl>  (in, conjugate);    break;
+            case s16: transpose_inplace<short>  (in, conjugate);    break;
+            case u16: transpose_inplace<ushort> (in, conjugate);    break;
             default : TYPE_ERROR(1, type);
         }
     }
diff --git a/src/api/c/type_util.cpp b/src/api/c/type_util.cpp
index 750932c..39a9af6 100644
--- a/src/api/c/type_util.cpp
+++ b/src/api/c/type_util.cpp
@@ -18,8 +18,12 @@ const char *getName(af_dtype type)
     case c64: return "complex double";
     case u32: return "unsigned int";
     case s32: return "int";
-    case u8: return "unsigned char";
-    case b8: return "bool";
-    default: return "unknown type";
+    case u16: return "unsigned short";
+    case s16: return "short";
+    case u64: return "unsigned long long";
+    case s64: return "long long";
+    case u8 : return "unsigned char";
+    case b8 : return "bool";
+    default : return "unknown type";
     }
 }
diff --git a/src/api/c/unwrap.cpp b/src/api/c/unwrap.cpp
index 2e80d94..25b4a67 100644
--- a/src/api/c/unwrap.cpp
+++ b/src/api/c/unwrap.cpp
@@ -52,6 +52,8 @@ af_err af_unwrap(af_array *out, const af_array in, const dim_t wx, const dim_t w
             case u32: output = unwrap<uint   >(in, wx, wy, sx, sy, px, py, is_column);  break;
             case s64: output = unwrap<intl   >(in, wx, wy, sx, sy, px, py, is_column);  break;
             case u64: output = unwrap<uintl  >(in, wx, wy, sx, sy, px, py, is_column);  break;
+            case s16: output = unwrap<short  >(in, wx, wy, sx, sy, px, py, is_column);  break;
+            case u16: output = unwrap<ushort >(in, wx, wy, sx, sy, px, py, is_column);  break;
             case u8:  output = unwrap<uchar  >(in, wx, wy, sx, sy, px, py, is_column);  break;
             case b8:  output = unwrap<char   >(in, wx, wy, sx, sy, px, py, is_column);  break;
             default:  TYPE_ERROR(1, type);
diff --git a/src/api/c/var.cpp b/src/api/c/var.cpp
index 7feb1c4..a6bf434 100644
--- a/src/api/c/var.cpp
+++ b/src/api/c/var.cpp
@@ -127,6 +127,8 @@ af_err af_var(af_array *out, const af_array in, const bool isbiased, const dim_t
             case f32: output = var<float ,  float >(in, isbiased, dim); break;
             case s32: output = var<int   ,  float >(in, isbiased, dim); break;
             case u32: output = var<uint  ,  float >(in, isbiased, dim); break;
+            case s16: output = var<short ,  float >(in, isbiased, dim); break;
+            case u16: output = var<ushort,  float >(in, isbiased, dim); break;
             case s64: output = var<intl  ,  double>(in, isbiased, dim); break;
             case u64: output = var<uintl ,  double>(in, isbiased, dim); break;
             case  u8: output = var<uchar ,  float >(in, isbiased, dim); break;
@@ -159,6 +161,8 @@ af_err af_var_weighted(af_array *out, const af_array in, const af_array weights,
             case f32: output = var<float ,  float >(in, weights, dim); break;
             case s32: output = var<int   ,  float >(in, weights, dim); break;
             case u32: output = var<uint  ,  float >(in, weights, dim); break;
+            case s16: output = var<short ,  float >(in, weights, dim); break;
+            case u16: output = var<ushort,  float >(in, weights, dim); break;
             case s64: output = var<intl  ,  double>(in, weights, dim); break;
             case u64: output = var<uintl ,  double>(in, weights, dim); break;
             case  u8: output = var<uchar ,  float >(in, weights, dim); break;
@@ -183,6 +187,8 @@ af_err af_var_all(double *realVal, double *imagVal, const af_array in, const boo
             case f32: *realVal = varAll<float , float >(in, isbiased); break;
             case s32: *realVal = varAll<int   , float >(in, isbiased); break;
             case u32: *realVal = varAll<uint  , float >(in, isbiased); break;
+            case s16: *realVal = varAll<short , float >(in, isbiased); break;
+            case u16: *realVal = varAll<ushort, float >(in, isbiased); break;
             case s64: *realVal = varAll<intl  , double>(in, isbiased); break;
             case u64: *realVal = varAll<uintl , double>(in, isbiased); break;
             case  u8: *realVal = varAll<uchar , float >(in, isbiased); break;
@@ -219,6 +225,8 @@ af_err af_var_all_weighted(double *realVal, double *imagVal, const af_array in,
             case f32: *realVal = varAll<float , float >(in, weights); break;
             case s32: *realVal = varAll<int   , float >(in, weights); break;
             case u32: *realVal = varAll<uint  , float >(in, weights); break;
+            case s16: *realVal = varAll<short , float >(in, weights); break;
+            case u16: *realVal = varAll<ushort, float >(in, weights); break;
             case s64: *realVal = varAll<intl  , double >(in, weights); break;
             case u64: *realVal = varAll<uintl , double >(in, weights); break;
             case  u8: *realVal = varAll<uchar , float >(in, weights); break;
diff --git a/src/api/c/where.cpp b/src/api/c/where.cpp
index 0853e6d..4aad8c4 100644
--- a/src/api/c/where.cpp
+++ b/src/api/c/where.cpp
@@ -40,6 +40,8 @@ af_err af_where(af_array *idx, const af_array in)
         case u32: res = where<uint   >(in); break;
         case s64: res = where<intl   >(in); break;
         case u64: res = where<uintl  >(in); break;
+        case s16: res = where<short  >(in); break;
+        case u16: res = where<ushort >(in); break;
         case u8 : res = where<uchar  >(in); break;
         case b8 : res = where<char   >(in); break;
         default:
diff --git a/src/api/c/wrap.cpp b/src/api/c/wrap.cpp
index dc2b54b..85386b2a 100644
--- a/src/api/c/wrap.cpp
+++ b/src/api/c/wrap.cpp
@@ -66,6 +66,8 @@ af_err af_wrap(af_array *out, const af_array in,
             case u32: output = wrap<uint   >(in, ox, oy, wx, wy, sx, sy, px, py, is_column);  break;
             case s64: output = wrap<intl   >(in, ox, oy, wx, wy, sx, sy, px, py, is_column);  break;
             case u64: output = wrap<uintl  >(in, ox, oy, wx, wy, sx, sy, px, py, is_column);  break;
+            case s16: output = wrap<short  >(in, ox, oy, wx, wy, sx, sy, px, py, is_column);  break;
+            case u16: output = wrap<ushort >(in, ox, oy, wx, wy, sx, sy, px, py, is_column);  break;
             case u8:  output = wrap<uchar  >(in, ox, oy, wx, wy, sx, sy, px, py, is_column);  break;
             case b8:  output = wrap<char   >(in, ox, oy, wx, wy, sx, sy, px, py, is_column);  break;
             default:  TYPE_ERROR(1, type);
diff --git a/src/api/cpp/array.cpp b/src/api/cpp/array.cpp
index 3280457..8d7f164 100644
--- a/src/api/cpp/array.cpp
+++ b/src/api/cpp/array.cpp
@@ -84,6 +84,8 @@ namespace af
         case b8 : return sizeof(unsigned char);
         case c32: return sizeof(float) * 2;
         case c64: return sizeof(double) * 2;
+        case s16: return sizeof(short);
+        case u16: return sizeof(ushort);
         default: return sizeof(float);
         }
     }
@@ -219,6 +221,8 @@ namespace af
     INSTANTIATE(char)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 #undef INSTANTIATE
 
@@ -669,15 +673,18 @@ namespace af
     ASSIGN_TYPE(char               , OP)        \
     ASSIGN_TYPE(unsigned char      , OP)        \
     ASSIGN_TYPE(bool               , OP)        \
+    ASSIGN_TYPE(short              , OP)        \
+    ASSIGN_TYPE(unsigned short     , OP)        \
 
     ASSIGN_OP(= , =)
     ASSIGN_OP(+=, +)
     ASSIGN_OP(-=, -)
     ASSIGN_OP(*=, *)
     ASSIGN_OP(/=, /)
-#undef ASSIGN_TYPE
 #undef ASSIGN_OP
 
+#undef ASSIGN_TYPE
+
 #define SELF_OP(OP, op1)                                                          \
     array::array_proxy& array::array_proxy::operator OP(const array_proxy &other) \
     {                                                                             \
@@ -815,6 +822,8 @@ namespace af
     ASSIGN_TYPE(char               , OP)                            \
     ASSIGN_TYPE(unsigned char      , OP)                            \
     ASSIGN_TYPE(bool               , OP)                            \
+    ASSIGN_TYPE(short              , OP)                            \
+    ASSIGN_TYPE(unsigned short     , OP)                            \
 
     ASSIGN_OP(+=, af_add)
     ASSIGN_OP(-=, af_sub)
@@ -822,6 +831,7 @@ namespace af
     ASSIGN_OP(/=, af_div)
 
 #undef ASSIGN_OP
+
 #undef ASSIGN_TYPE
 
 #define ASSIGN_TYPE(TY, OP)                                     \
@@ -847,10 +857,13 @@ namespace af
     ASSIGN_TYPE(char               , OP)        \
     ASSIGN_TYPE(unsigned char      , OP)        \
     ASSIGN_TYPE(bool               , OP)        \
+    ASSIGN_TYPE(short              , OP)        \
+    ASSIGN_TYPE(unsigned short     , OP)        \
 
     ASSIGN_OP(= )
 
 #undef ASSIGN_OP
+
 #undef ASSIGN_TYPE
 
 af::dtype implicit_dtype(af::dtype scalar_type, af::dtype array_type)
@@ -917,6 +930,8 @@ af::dtype implicit_dtype(af::dtype scalar_type, af::dtype array_type)
     BINARY_TYPE(char               , OP, func, b8)              \
     BINARY_TYPE(unsigned char      , OP, func, u8)              \
     BINARY_TYPE(bool               , OP, func, b8)              \
+    BINARY_TYPE(short              , OP, func, s16)             \
+    BINARY_TYPE(unsigned short     , OP, func, u16)             \
 
     BINARY_OP(+, af_add)
     BINARY_OP(-, af_sub)
@@ -937,9 +952,10 @@ af::dtype implicit_dtype(af::dtype scalar_type, af::dtype array_type)
     BINARY_OP(<<, af_bitshiftl)
     BINARY_OP(>>, af_bitshiftr)
 
-#undef BINARY_TYPE
 #undef BINARY_OP
 
+#undef BINARY_TYPE
+
     array array::operator-() const
     {
         af_array lhs = this->get();
@@ -1013,6 +1029,8 @@ af::dtype implicit_dtype(af::dtype scalar_type, af::dtype array_type)
     INSTANTIATE(char)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(short);
+    INSTANTIATE(ushort);
 
 #undef INSTANTIATE
 
@@ -1041,6 +1059,8 @@ af::dtype implicit_dtype(af::dtype scalar_type, af::dtype array_type)
     INSTANTIATE(char)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(short);
+    INSTANTIATE(ushort);
 
 #undef INSTANTIATE
 #undef TEMPLATE_MEM_FUNC
diff --git a/src/api/cpp/corrcoef.cpp b/src/api/cpp/corrcoef.cpp
index 3b8f5cf..ed78a68 100644
--- a/src/api/cpp/corrcoef.cpp
+++ b/src/api/cpp/corrcoef.cpp
@@ -28,6 +28,10 @@ INSTANTIATE_CORRCOEF(int);
 INSTANTIATE_CORRCOEF(unsigned int);
 INSTANTIATE_CORRCOEF(char);
 INSTANTIATE_CORRCOEF(unsigned char);
+INSTANTIATE_CORRCOEF(intl);
+INSTANTIATE_CORRCOEF(uintl);
+INSTANTIATE_CORRCOEF(short);
+INSTANTIATE_CORRCOEF(unsigned short);
 
 #undef INSTANTIATE_CORRCOEF
 
diff --git a/src/api/cpp/data.cpp b/src/api/cpp/data.cpp
index 196fbf8..3b7854a 100644
--- a/src/api/cpp/data.cpp
+++ b/src/api/cpp/data.cpp
@@ -117,6 +117,8 @@ namespace af
     CONSTANT(long long);
     CONSTANT(unsigned long long);
     CONSTANT(bool);
+    CONSTANT(short);
+    CONSTANT(unsigned short);
 
 #undef CONSTANT
 
diff --git a/src/api/cpp/device.cpp b/src/api/cpp/device.cpp
index 5f837eb..193cba3 100644
--- a/src/api/cpp/device.cpp
+++ b/src/api/cpp/device.cpp
@@ -91,6 +91,8 @@ namespace af
         case b8 : return sizeof(unsigned char);
         case c32: return sizeof(float) * 2;
         case c64: return sizeof(double) * 2;
+        case s16: return sizeof(short);
+        case u16: return sizeof(ushort);
         default: return sizeof(float);
         }
     }
@@ -148,12 +150,12 @@ namespace af
     }
 
 #define INSTANTIATE(T)                                                  \
-    template<> AFAPI                                                    \
+    template<>                                                          \
     T* alloc(const size_t elements)                                     \
     {                                                                   \
         return (T*)alloc(elements, (af::dtype)dtype_traits<T>::af_type); \
     }                                                                   \
-    template<> AFAPI                                                    \
+    template<>                                                          \
     T* pinned(const size_t elements)                                    \
     {                                                                   \
         return (T*)pinned(elements, (af::dtype)dtype_traits<T>::af_type); \
@@ -167,5 +169,7 @@ namespace af
     INSTANTIATE(unsigned)
     INSTANTIATE(unsigned char)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(unsigned short)
 
 }
diff --git a/src/api/cpp/mean.cpp b/src/api/cpp/mean.cpp
index 877ca16..980a0d1 100644
--- a/src/api/cpp/mean.cpp
+++ b/src/api/cpp/mean.cpp
@@ -80,6 +80,10 @@ INSTANTIATE_MEAN(int);
 INSTANTIATE_MEAN(unsigned int);
 INSTANTIATE_MEAN(char);
 INSTANTIATE_MEAN(unsigned char);
+INSTANTIATE_MEAN(long long);
+INSTANTIATE_MEAN(unsigned long long);
+INSTANTIATE_MEAN(short);
+INSTANTIATE_MEAN(unsigned short);
 
 #undef INSTANTIATE_MEAN
 
diff --git a/src/api/cpp/median.cpp b/src/api/cpp/median.cpp
index 2d6d878..d047d78 100644
--- a/src/api/cpp/median.cpp
+++ b/src/api/cpp/median.cpp
@@ -29,6 +29,10 @@ INSTANTIATE_MEDIAN(int);
 INSTANTIATE_MEDIAN(unsigned int);
 INSTANTIATE_MEDIAN(char);
 INSTANTIATE_MEDIAN(unsigned char);
+INSTANTIATE_MEDIAN(long long);
+INSTANTIATE_MEDIAN(unsigned long long);
+INSTANTIATE_MEDIAN(short);
+INSTANTIATE_MEDIAN(unsigned short);
 
 #undef INSTANTIATE_MEDIAN
 
diff --git a/src/api/cpp/reduce.cpp b/src/api/cpp/reduce.cpp
index d492ef0..18c12ee 100644
--- a/src/api/cpp/reduce.cpp
+++ b/src/api/cpp/reduce.cpp
@@ -115,6 +115,8 @@ namespace af
     INSTANTIATE_REAL(fnC, fnCPP, unsigned long)         \
     INSTANTIATE_REAL(fnC, fnCPP, long long)             \
     INSTANTIATE_REAL(fnC, fnCPP, unsigned long long)    \
+    INSTANTIATE_REAL(fnC, fnCPP, short)                 \
+    INSTANTIATE_REAL(fnC, fnCPP, unsigned short)        \
     INSTANTIATE_REAL(fnC, fnCPP, char)                  \
     INSTANTIATE_REAL(fnC, fnCPP, unsigned char)         \
     INSTANTIATE_CPLX(fnC, fnCPP, af_cfloat, float)      \
@@ -201,6 +203,8 @@ INSTANTIATE(product_nan, product)
     INSTANTIATE_COMPAT(fnCPP, fnCompat, unsigned char)          \
     INSTANTIATE_COMPAT(fnCPP, fnCompat, af_cfloat)              \
     INSTANTIATE_COMPAT(fnCPP, fnCompat, af_cdouble)             \
+    INSTANTIATE_COMPAT(fnCPP, fnCompat, short)                  \
+    INSTANTIATE_COMPAT(fnCPP, fnCompat, unsigned short)         \
 
     INSTANTIATE(product, mul)
     INSTANTIATE(allTrue, alltrue)
@@ -238,6 +242,8 @@ INSTANTIATE(product_nan, product)
     INSTANTIATE_REAL(fn, unsigned)              \
     INSTANTIATE_REAL(fn, char)                  \
     INSTANTIATE_REAL(fn, unsigned char)         \
+    INSTANTIATE_REAL(fn, short)                 \
+    INSTANTIATE_REAL(fn, unsigned short)        \
     INSTANTIATE_CPLX(fn, af_cfloat, float)      \
     INSTANTIATE_CPLX(fn, af_cdouble, double)    \
 
diff --git a/src/api/cpp/stdev.cpp b/src/api/cpp/stdev.cpp
index b21366a..5a05057 100644
--- a/src/api/cpp/stdev.cpp
+++ b/src/api/cpp/stdev.cpp
@@ -42,6 +42,10 @@ INSTANTIATE_STDEV(float);
 INSTANTIATE_STDEV(double);
 INSTANTIATE_STDEV(int);
 INSTANTIATE_STDEV(unsigned int);
+INSTANTIATE_STDEV(intl);
+INSTANTIATE_STDEV(uintl);
+INSTANTIATE_STDEV(short);
+INSTANTIATE_STDEV(unsigned short);
 INSTANTIATE_STDEV(char);
 INSTANTIATE_STDEV(unsigned char);
 
diff --git a/src/api/cpp/var.cpp b/src/api/cpp/var.cpp
index 224cd9b..bcff1dc 100644
--- a/src/api/cpp/var.cpp
+++ b/src/api/cpp/var.cpp
@@ -80,6 +80,8 @@ INSTANTIATE_VAR(int);
 INSTANTIATE_VAR(unsigned int);
 INSTANTIATE_VAR(intl);
 INSTANTIATE_VAR(uintl);
+INSTANTIATE_VAR(short);
+INSTANTIATE_VAR(unsigned short);
 INSTANTIATE_VAR(char);
 INSTANTIATE_VAR(unsigned char);
 
diff --git a/src/backend/ArrayInfo.cpp b/src/backend/ArrayInfo.cpp
index 2fc56a9..8aea983 100644
--- a/src/backend/ArrayInfo.cpp
+++ b/src/backend/ArrayInfo.cpp
@@ -133,6 +133,8 @@ bool ArrayInfo::isInteger() const
          || type == u32
          || type == s64
          || type == u64
+         || type == s16
+         || type == u16
          || type == u8);
 }
 
diff --git a/src/backend/cpu/Array.cpp b/src/backend/cpu/Array.cpp
index 683fc1a..096d75f 100644
--- a/src/backend/cpu/Array.cpp
+++ b/src/backend/cpu/Array.cpp
@@ -293,4 +293,6 @@ namespace cpu
     INSTANTIATE(char)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cpu/approx.cpp b/src/backend/cpu/approx.cpp
index 0686b2f..2d3beae 100644
--- a/src/backend/cpu/approx.cpp
+++ b/src/backend/cpu/approx.cpp
@@ -333,12 +333,12 @@ namespace cpu
         return out;
     }
 
-#define INSTANTIATE(Ty, Tp)                                                                     \
+#define INSTANTIATE(Ty, Tp)                                                                    \
     template Array<Ty> approx1<Ty, Tp>(const Array<Ty> &in, const Array<Tp> &pos,              \
-                                        const af_interp_type method, const float offGrid);      \
+                                       const af_interp_type method, const float offGrid);      \
     template Array<Ty> approx2<Ty, Tp>(const Array<Ty> &in, const Array<Tp> &pos0,             \
-                                        const Array<Tp> &pos1, const af_interp_type method,     \
-                                        const float offGrid);                                   \
+                                       const Array<Tp> &pos1, const af_interp_type method,     \
+                                       const float offGrid);                                   \
 
     INSTANTIATE(float  , float )
     INSTANTIATE(double , double)
diff --git a/src/backend/cpu/assign.cpp b/src/backend/cpu/assign.cpp
index a8ac33e..623bd52 100644
--- a/src/backend/cpu/assign.cpp
+++ b/src/backend/cpu/assign.cpp
@@ -124,5 +124,7 @@ INSTANTIATE(intl   )
 INSTANTIATE(int    )
 INSTANTIATE(uchar  )
 INSTANTIATE(char   )
+INSTANTIATE(ushort )
+INSTANTIATE(short  )
 
 }
diff --git a/src/backend/cpu/bilateral.cpp b/src/backend/cpu/bilateral.cpp
index d8ef7c6..2d1e4dd 100644
--- a/src/backend/cpu/bilateral.cpp
+++ b/src/backend/cpu/bilateral.cpp
@@ -107,5 +107,7 @@ INSTANTIATE(char  ,  float)
 INSTANTIATE(int   ,  float)
 INSTANTIATE(uint  ,  float)
 INSTANTIATE(uchar ,  float)
+INSTANTIATE(short ,  float)
+INSTANTIATE(ushort,  float)
 
 }
diff --git a/src/backend/cpu/convolve.cpp b/src/backend/cpu/convolve.cpp
index 33670d4..3ab44c8 100644
--- a/src/backend/cpu/convolve.cpp
+++ b/src/backend/cpu/convolve.cpp
@@ -319,5 +319,7 @@ INSTANTIATE(uint   ,   float)
 INSTANTIATE(int    ,   float)
 INSTANTIATE(uchar  ,   float)
 INSTANTIATE(char   ,   float)
+INSTANTIATE(ushort ,   float)
+INSTANTIATE(short  ,   float)
 
 }
diff --git a/src/backend/cpu/copy.cpp b/src/backend/cpu/copy.cpp
index a2bb4ff..87e4480 100644
--- a/src/backend/cpu/copy.cpp
+++ b/src/backend/cpu/copy.cpp
@@ -149,6 +149,8 @@ namespace cpu
     INSTANTIATE(char   )
     INSTANTIATE(intl   )
     INSTANTIATE(uintl  )
+    INSTANTIATE(short  )
+    INSTANTIATE(ushort )
 
 
 #define INSTANTIATE_PAD_ARRAY(SRC_T)                                    \
@@ -158,29 +160,35 @@ namespace cpu
     template Array<cdouble> padArray<SRC_T, cdouble>(Array<SRC_T> const &src, dim4 const &dims, cdouble default_value, double factor); \
     template Array<int    > padArray<SRC_T, int    >(Array<SRC_T> const &src, dim4 const &dims, int     default_value, double factor); \
     template Array<uint   > padArray<SRC_T, uint   >(Array<SRC_T> const &src, dim4 const &dims, uint    default_value, double factor); \
-    template Array<intl    > padArray<SRC_T, intl    >(Array<SRC_T> const &src, dim4 const &dims, intl     default_value, double factor); \
-    template Array<uintl   > padArray<SRC_T, uintl   >(Array<SRC_T> const &src, dim4 const &dims, uintl    default_value, double factor); \
+    template Array<intl   > padArray<SRC_T, intl   >(Array<SRC_T> const &src, dim4 const &dims, intl    default_value, double factor); \
+    template Array<uintl  > padArray<SRC_T, uintl  >(Array<SRC_T> const &src, dim4 const &dims, uintl   default_value, double factor); \
+    template Array<short  > padArray<SRC_T, short  >(Array<SRC_T> const &src, dim4 const &dims, short   default_value, double factor); \
+    template Array<ushort > padArray<SRC_T, ushort >(Array<SRC_T> const &src, dim4 const &dims, ushort  default_value, double factor); \
     template Array<uchar  > padArray<SRC_T, uchar  >(Array<SRC_T> const &src, dim4 const &dims, uchar   default_value, double factor); \
     template Array<char   > padArray<SRC_T, char   >(Array<SRC_T> const &src, dim4 const &dims, char    default_value, double factor); \
-    template void copyArray<SRC_T, float  >(Array<float  > &dst, Array<SRC_T> const &src); \
-    template void copyArray<SRC_T, double >(Array<double > &dst, Array<SRC_T> const &src); \
-    template void copyArray<SRC_T, cfloat >(Array<cfloat > &dst, Array<SRC_T> const &src); \
-    template void copyArray<SRC_T, cdouble>(Array<cdouble> &dst, Array<SRC_T> const &src); \
-    template void copyArray<SRC_T, int    >(Array<int    > &dst, Array<SRC_T> const &src); \
-    template void copyArray<SRC_T, uint   >(Array<uint   > &dst, Array<SRC_T> const &src); \
-    template void copyArray<SRC_T, intl    >(Array<intl    > &dst, Array<SRC_T> const &src); \
-    template void copyArray<SRC_T, uintl   >(Array<uintl   > &dst, Array<SRC_T> const &src); \
-    template void copyArray<SRC_T, uchar  >(Array<uchar  > &dst, Array<SRC_T> const &src); \
+    template void copyArray<SRC_T, float  >(Array<float  > &dst, Array<SRC_T> const &src);  \
+    template void copyArray<SRC_T, double >(Array<double > &dst, Array<SRC_T> const &src);  \
+    template void copyArray<SRC_T, cfloat >(Array<cfloat > &dst, Array<SRC_T> const &src);  \
+    template void copyArray<SRC_T, cdouble>(Array<cdouble> &dst, Array<SRC_T> const &src);  \
+    template void copyArray<SRC_T, int    >(Array<int    > &dst, Array<SRC_T> const &src);  \
+    template void copyArray<SRC_T, uint   >(Array<uint   > &dst, Array<SRC_T> const &src);  \
+    template void copyArray<SRC_T, intl   >(Array<intl   > &dst, Array<SRC_T> const &src);  \
+    template void copyArray<SRC_T, uintl  >(Array<uintl  > &dst, Array<SRC_T> const &src);  \
+    template void copyArray<SRC_T, short  >(Array<short  > &dst, Array<SRC_T> const &src);  \
+    template void copyArray<SRC_T, ushort >(Array<ushort > &dst, Array<SRC_T> const &src);  \
+    template void copyArray<SRC_T, uchar  >(Array<uchar  > &dst, Array<SRC_T> const &src);  \
     template void copyArray<SRC_T, char   >(Array<char   > &dst, Array<SRC_T> const &src);
 
     INSTANTIATE_PAD_ARRAY(float )
     INSTANTIATE_PAD_ARRAY(double)
     INSTANTIATE_PAD_ARRAY(int   )
     INSTANTIATE_PAD_ARRAY(uint  )
-    INSTANTIATE_PAD_ARRAY(intl   )
-    INSTANTIATE_PAD_ARRAY(uintl  )
+    INSTANTIATE_PAD_ARRAY(intl  )
+    INSTANTIATE_PAD_ARRAY(uintl )
     INSTANTIATE_PAD_ARRAY(uchar )
     INSTANTIATE_PAD_ARRAY(char  )
+    INSTANTIATE_PAD_ARRAY(ushort)
+    INSTANTIATE_PAD_ARRAY(short )
 
 #define INSTANTIATE_PAD_ARRAY_COMPLEX(SRC_T)                            \
     template Array<cfloat > padArray<SRC_T, cfloat >(Array<SRC_T> const &src, dim4 const &dims, cfloat  default_value, double factor); \
@@ -197,14 +205,16 @@ namespace cpu
         CPU_NOT_SUPPORTED();\
     }
 
-    SPECILIAZE_UNUSED_COPYARRAY(cfloat, double)
-    SPECILIAZE_UNUSED_COPYARRAY(cfloat, float)
-    SPECILIAZE_UNUSED_COPYARRAY(cfloat, uchar)
-    SPECILIAZE_UNUSED_COPYARRAY(cfloat, char)
-    SPECILIAZE_UNUSED_COPYARRAY(cfloat, uint)
-    SPECILIAZE_UNUSED_COPYARRAY(cfloat, int)
-    SPECILIAZE_UNUSED_COPYARRAY(cfloat, intl)
-    SPECILIAZE_UNUSED_COPYARRAY(cfloat, uintl)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat , double)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat , float)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat , uchar)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat , char)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat , uint)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat , int)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat , intl)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat , uintl)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat , short)
+    SPECILIAZE_UNUSED_COPYARRAY(cfloat , ushort)
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, double)
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, float)
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, uchar)
@@ -213,5 +223,7 @@ namespace cpu
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, int)
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, intl)
     SPECILIAZE_UNUSED_COPYARRAY(cdouble, uintl)
+    SPECILIAZE_UNUSED_COPYARRAY(cdouble, short)
+    SPECILIAZE_UNUSED_COPYARRAY(cdouble, ushort)
 
 }
diff --git a/src/backend/cpu/diagonal.cpp b/src/backend/cpu/diagonal.cpp
index 2ae69a6..d949a24 100644
--- a/src/backend/cpu/diagonal.cpp
+++ b/src/backend/cpu/diagonal.cpp
@@ -86,5 +86,7 @@ namespace cpu
     INSTANTIATE_DIAGONAL(uintl)
     INSTANTIATE_DIAGONAL(char)
     INSTANTIATE_DIAGONAL(uchar)
+    INSTANTIATE_DIAGONAL(short)
+    INSTANTIATE_DIAGONAL(ushort)
 
 }
diff --git a/src/backend/cpu/diff.cpp b/src/backend/cpu/diff.cpp
index 907c111..063a761 100644
--- a/src/backend/cpu/diff.cpp
+++ b/src/backend/cpu/diff.cpp
@@ -120,4 +120,6 @@ namespace cpu
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(ushort)
+    INSTANTIATE(short)
 }
diff --git a/src/backend/cpu/fast.cpp b/src/backend/cpu/fast.cpp
index 929d48f..1c8069c 100644
--- a/src/backend/cpu/fast.cpp
+++ b/src/backend/cpu/fast.cpp
@@ -336,5 +336,7 @@ INSTANTIATE(char  )
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
 INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cpu/fftconvolve.cpp b/src/backend/cpu/fftconvolve.cpp
index bdc5538..f76f3a0 100644
--- a/src/backend/cpu/fftconvolve.cpp
+++ b/src/backend/cpu/fftconvolve.cpp
@@ -428,5 +428,9 @@ INSTANTIATE(uint  , float,  cfloat,  false, true)
 INSTANTIATE(int   , float,  cfloat,  false, true)
 INSTANTIATE(uchar , float,  cfloat,  false, true)
 INSTANTIATE(char  , float,  cfloat,  false, true)
+INSTANTIATE(uintl , float,  cfloat,  false, true)
+INSTANTIATE(intl  , float,  cfloat,  false, true)
+INSTANTIATE(ushort, float,  cfloat,  false, true)
+INSTANTIATE(short , float,  cfloat,  false, true)
 
 } // namespace cpu
diff --git a/src/backend/cpu/hist_graphics.cpp b/src/backend/cpu/hist_graphics.cpp
index 4c940fb..21d3fdf 100644
--- a/src/backend/cpu/hist_graphics.cpp
+++ b/src/backend/cpu/hist_graphics.cpp
@@ -34,6 +34,8 @@ INSTANTIATE(float)
 INSTANTIATE(int)
 INSTANTIATE(uint)
 INSTANTIATE(uchar)
+INSTANTIATE(short)
+INSTANTIATE(ushort)
 
 }
 
diff --git a/src/backend/cpu/histogram.cpp b/src/backend/cpu/histogram.cpp
index de38f37..8359729 100644
--- a/src/backend/cpu/histogram.cpp
+++ b/src/backend/cpu/histogram.cpp
@@ -58,5 +58,7 @@ INSTANTIATE(char  , uint)
 INSTANTIATE(int   , uint)
 INSTANTIATE(uint  , uint)
 INSTANTIATE(uchar , uint)
+INSTANTIATE(short , uint)
+INSTANTIATE(ushort, uint)
 
 }
diff --git a/src/backend/cpu/identity.cpp b/src/backend/cpu/identity.cpp
index 3112991..2973ae4 100644
--- a/src/backend/cpu/identity.cpp
+++ b/src/backend/cpu/identity.cpp
@@ -42,10 +42,12 @@ namespace cpu
     INSTANTIATE_IDENTITY(cfloat)
     INSTANTIATE_IDENTITY(cdouble)
     INSTANTIATE_IDENTITY(int)
+    INSTANTIATE_IDENTITY(uint)
     INSTANTIATE_IDENTITY(intl)
     INSTANTIATE_IDENTITY(uintl)
-    INSTANTIATE_IDENTITY(uint)
     INSTANTIATE_IDENTITY(char)
     INSTANTIATE_IDENTITY(uchar)
+    INSTANTIATE_IDENTITY(short)
+    INSTANTIATE_IDENTITY(ushort)
 
 }
diff --git a/src/backend/cpu/image.cpp b/src/backend/cpu/image.cpp
index 8b211fe..947afa2 100644
--- a/src/backend/cpu/image.cpp
+++ b/src/backend/cpu/image.cpp
@@ -46,6 +46,8 @@ namespace cpu
     INSTANTIATE(uint)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(ushort)
+    INSTANTIATE(short)
 }
 
 #endif  // WITH_GRAPHICS
diff --git a/src/backend/cpu/index.cpp b/src/backend/cpu/index.cpp
index 162e67f..e6d3dab 100644
--- a/src/backend/cpu/index.cpp
+++ b/src/backend/cpu/index.cpp
@@ -122,5 +122,7 @@ INSTANTIATE(intl   )
 INSTANTIATE(int    )
 INSTANTIATE(uchar  )
 INSTANTIATE(char   )
+INSTANTIATE(ushort )
+INSTANTIATE(short  )
 
 }
diff --git a/src/backend/cpu/iota.cpp b/src/backend/cpu/iota.cpp
index 668500f..47bcb92 100644
--- a/src/backend/cpu/iota.cpp
+++ b/src/backend/cpu/iota.cpp
@@ -67,4 +67,6 @@ namespace cpu
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cpu/ireduce.cpp b/src/backend/cpu/ireduce.cpp
index 199a0be..2928af9 100644
--- a/src/backend/cpu/ireduce.cpp
+++ b/src/backend/cpu/ireduce.cpp
@@ -185,6 +185,8 @@ namespace cpu
     INSTANTIATE(af_min_t, uintl  )
     INSTANTIATE(af_min_t, char   )
     INSTANTIATE(af_min_t, uchar  )
+    INSTANTIATE(af_min_t, short  )
+    INSTANTIATE(af_min_t, ushort )
 
     //max
     INSTANTIATE(af_max_t, float  )
@@ -197,4 +199,6 @@ namespace cpu
     INSTANTIATE(af_max_t, uintl  )
     INSTANTIATE(af_max_t, char   )
     INSTANTIATE(af_max_t, uchar  )
+    INSTANTIATE(af_max_t, short  )
+    INSTANTIATE(af_max_t, ushort )
 }
diff --git a/src/backend/cpu/join.cpp b/src/backend/cpu/join.cpp
index eeb34a0..78d2a51 100644
--- a/src/backend/cpu/join.cpp
+++ b/src/backend/cpu/join.cpp
@@ -226,6 +226,8 @@ namespace cpu
     INSTANTIATE(uintl,   uintl)
     INSTANTIATE(uchar,   uchar)
     INSTANTIATE(char,    char)
+    INSTANTIATE(ushort,  ushort)
+    INSTANTIATE(short,   short)
 
 #undef INSTANTIATE
 
@@ -242,6 +244,8 @@ namespace cpu
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(ushort)
+    INSTANTIATE(short)
 
 #undef INSTANTIATE
 }
diff --git a/src/backend/cpu/lookup.cpp b/src/backend/cpu/lookup.cpp
index f3e18bd..1c47699 100644
--- a/src/backend/cpu/lookup.cpp
+++ b/src/backend/cpu/lookup.cpp
@@ -80,6 +80,8 @@ Array<in_t> lookup(const Array<in_t> &input, const Array<idx_t> &indices, const
     template Array<T>  lookup<T, double  >(const Array<T> &input, const Array<double  > &indices, const unsigned dim); \
     template Array<T>  lookup<T, int     >(const Array<T> &input, const Array<int     > &indices, const unsigned dim); \
     template Array<T>  lookup<T, unsigned>(const Array<T> &input, const Array<unsigned> &indices, const unsigned dim); \
+    template Array<T>  lookup<T, short   >(const Array<T> &input, const Array<short   > &indices, const unsigned dim); \
+    template Array<T>  lookup<T, ushort  >(const Array<T> &input, const Array<ushort  > &indices, const unsigned dim); \
     template Array<T>  lookup<T, uchar   >(const Array<T> &input, const Array<uchar   > &indices, const unsigned dim);
 
 INSTANTIATE(float   );
@@ -92,5 +94,7 @@ INSTANTIATE(intl    );
 INSTANTIATE(uintl   );
 INSTANTIATE(uchar   );
 INSTANTIATE(char    );
+INSTANTIATE(ushort  );
+INSTANTIATE(short   );
 
 }
diff --git a/src/backend/cpu/match_template.cpp b/src/backend/cpu/match_template.cpp
index b026529..4d93014 100644
--- a/src/backend/cpu/match_template.cpp
+++ b/src/backend/cpu/match_template.cpp
@@ -159,5 +159,7 @@ INSTANTIATE(char  ,  float)
 INSTANTIATE(int   ,  float)
 INSTANTIATE(uint  ,  float)
 INSTANTIATE(uchar ,  float)
+INSTANTIATE(short ,  float)
+INSTANTIATE(ushort,  float)
 
 }
diff --git a/src/backend/cpu/meanshift.cpp b/src/backend/cpu/meanshift.cpp
index 86e1d6e..1be2281 100644
--- a/src/backend/cpu/meanshift.cpp
+++ b/src/backend/cpu/meanshift.cpp
@@ -155,5 +155,7 @@ INSTANTIATE(char  )
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
 INSTANTIATE(uchar )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cpu/medfilt.cpp b/src/backend/cpu/medfilt.cpp
index 1047a52..3ded3c0 100644
--- a/src/backend/cpu/medfilt.cpp
+++ b/src/backend/cpu/medfilt.cpp
@@ -145,5 +145,7 @@ INSTANTIATE(char  )
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
 INSTANTIATE(uchar )
+INSTANTIATE(ushort)
+INSTANTIATE(short )
 
 }
diff --git a/src/backend/cpu/memory.cpp b/src/backend/cpu/memory.cpp
index c2a1441..ac10643 100644
--- a/src/backend/cpu/memory.cpp
+++ b/src/backend/cpu/memory.cpp
@@ -241,4 +241,6 @@ namespace cpu
     INSTANTIATE(uchar)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(ushort)
+    INSTANTIATE(short )
 }
diff --git a/src/backend/cpu/morph.cpp b/src/backend/cpu/morph.cpp
index ff7b49d..eb2e1de 100644
--- a/src/backend/cpu/morph.cpp
+++ b/src/backend/cpu/morph.cpp
@@ -168,5 +168,7 @@ INSTANTIATE(char  )
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
 INSTANTIATE(uchar )
+INSTANTIATE(ushort)
+INSTANTIATE(short )
 
 }
diff --git a/src/backend/cpu/nearest_neighbour.cpp b/src/backend/cpu/nearest_neighbour.cpp
index f706769..79d4151 100644
--- a/src/backend/cpu/nearest_neighbour.cpp
+++ b/src/backend/cpu/nearest_neighbour.cpp
@@ -80,6 +80,15 @@ struct dist_op<uchar, To, AF_SHD>
     }
 };
 
+template<typename To>
+struct dist_op<ushort, To, AF_SHD>
+{
+    To operator()(ushort v1, ushort v2)
+    {
+        return __builtin_popcount(v1 ^ v2);
+    }
+};
+
 template<typename T, typename To, af_match_type dist_type>
 void nearest_neighbour_(Array<uint>& idx, Array<To>& dist,
                         const Array<T>& query, const Array<T>& train,
@@ -169,7 +178,9 @@ INSTANTIATE(uint  , uint)
 INSTANTIATE(intl  , intl)
 INSTANTIATE(uintl , uintl)
 INSTANTIATE(uchar , uint)
+INSTANTIATE(ushort, uint)
+INSTANTIATE(short , int)
 
-INSTANTIATE(uintl, uint)    // For Hamming
+INSTANTIATE(uintl , uint)    // For Hamming
 
 }
diff --git a/src/backend/cpu/plot.cpp b/src/backend/cpu/plot.cpp
index 68c4300..9de1993 100644
--- a/src/backend/cpu/plot.cpp
+++ b/src/backend/cpu/plot.cpp
@@ -41,6 +41,8 @@ namespace cpu
     INSTANTIATE(int)
     INSTANTIATE(uint)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
 
 #endif  // WITH_GRAPHICS
diff --git a/src/backend/cpu/random.cpp b/src/backend/cpu/random.cpp
index 4c91b96..ab4230e 100644
--- a/src/backend/cpu/random.cpp
+++ b/src/backend/cpu/random.cpp
@@ -133,6 +133,8 @@ INSTANTIATE_UNIFORM(uint)
 INSTANTIATE_UNIFORM(intl)
 INSTANTIATE_UNIFORM(uintl)
 INSTANTIATE_UNIFORM(uchar)
+INSTANTIATE_UNIFORM(short)
+INSTANTIATE_UNIFORM(ushort)
 
 #define INSTANTIATE_NORMAL(T)                              \
     template Array<T>  randn<T>(const af::dim4 &dims);
diff --git a/src/backend/cpu/range.cpp b/src/backend/cpu/range.cpp
index f0c8de1..eabf3a1 100644
--- a/src/backend/cpu/range.cpp
+++ b/src/backend/cpu/range.cpp
@@ -82,4 +82,6 @@ namespace cpu
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
+    INSTANTIATE(ushort)
+    INSTANTIATE(short)
 }
diff --git a/src/backend/cpu/reduce.cpp b/src/backend/cpu/reduce.cpp
index 5724508..9b5b9f0 100644
--- a/src/backend/cpu/reduce.cpp
+++ b/src/backend/cpu/reduce.cpp
@@ -139,6 +139,8 @@ namespace cpu
     INSTANTIATE(af_min_t, uintl  , uintl  )
     INSTANTIATE(af_min_t, char   , char   )
     INSTANTIATE(af_min_t, uchar  , uchar  )
+    INSTANTIATE(af_min_t, short  , short  )
+    INSTANTIATE(af_min_t, ushort , ushort )
 
     //max
     INSTANTIATE(af_max_t, float  , float  )
@@ -151,6 +153,8 @@ namespace cpu
     INSTANTIATE(af_max_t, uintl  , uintl  )
     INSTANTIATE(af_max_t, char   , char   )
     INSTANTIATE(af_max_t, uchar  , uchar  )
+    INSTANTIATE(af_max_t, short  , short  )
+    INSTANTIATE(af_max_t, ushort , ushort )
 
     //sum
     INSTANTIATE(af_add_t, float  , float  )
@@ -163,8 +167,10 @@ namespace cpu
     INSTANTIATE(af_add_t, uintl  , uintl  )
     INSTANTIATE(af_add_t, char   , int    )
     INSTANTIATE(af_add_t, uchar  , uint   )
+    INSTANTIATE(af_add_t, short  , int    )
+    INSTANTIATE(af_add_t, ushort , uint   )
 
-    //sum
+    //mul
     INSTANTIATE(af_mul_t, float  , float  )
     INSTANTIATE(af_mul_t, double , double )
     INSTANTIATE(af_mul_t, cfloat , cfloat )
@@ -175,6 +181,8 @@ namespace cpu
     INSTANTIATE(af_mul_t, uintl  , uintl  )
     INSTANTIATE(af_mul_t, char   , int    )
     INSTANTIATE(af_mul_t, uchar  , uint   )
+    INSTANTIATE(af_mul_t, short  , int    )
+    INSTANTIATE(af_mul_t, ushort , uint   )
 
     // count
     INSTANTIATE(af_notzero_t, float  , uint)
@@ -187,6 +195,8 @@ namespace cpu
     INSTANTIATE(af_notzero_t, uintl  , uint)
     INSTANTIATE(af_notzero_t, char   , uint)
     INSTANTIATE(af_notzero_t, uchar  , uint)
+    INSTANTIATE(af_notzero_t, short  , uint)
+    INSTANTIATE(af_notzero_t, ushort , uint)
 
     //anytrue
     INSTANTIATE(af_or_t, float  , char)
@@ -199,6 +209,8 @@ namespace cpu
     INSTANTIATE(af_or_t, uintl  , char)
     INSTANTIATE(af_or_t, char   , char)
     INSTANTIATE(af_or_t, uchar  , char)
+    INSTANTIATE(af_or_t, short  , char)
+    INSTANTIATE(af_or_t, ushort , char)
 
     //alltrue
     INSTANTIATE(af_and_t, float  , char)
@@ -211,4 +223,6 @@ namespace cpu
     INSTANTIATE(af_and_t, uintl  , char)
     INSTANTIATE(af_and_t, char   , char)
     INSTANTIATE(af_and_t, uchar  , char)
+    INSTANTIATE(af_and_t, short  , char)
+    INSTANTIATE(af_and_t, ushort , char)
 }
diff --git a/src/backend/cpu/regions.cpp b/src/backend/cpu/regions.cpp
index b137768..b753fb5 100644
--- a/src/backend/cpu/regions.cpp
+++ b/src/backend/cpu/regions.cpp
@@ -208,5 +208,7 @@ INSTANTIATE(float )
 INSTANTIATE(double)
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
+INSTANTIATE(short )
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cpu/reorder.cpp b/src/backend/cpu/reorder.cpp
index 42da24e..a9824a4 100644
--- a/src/backend/cpu/reorder.cpp
+++ b/src/backend/cpu/reorder.cpp
@@ -70,6 +70,8 @@ namespace cpu
     INSTANTIATE(char)
     INSTANTIATE(intl)
     INSTANTIATE(uintl)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 
 }
diff --git a/src/backend/cpu/resize.cpp b/src/backend/cpu/resize.cpp
index a4ba43f..8c4da58 100644
--- a/src/backend/cpu/resize.cpp
+++ b/src/backend/cpu/resize.cpp
@@ -217,4 +217,6 @@ namespace cpu
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cpu/rotate.cpp b/src/backend/cpu/rotate.cpp
index 2293ee2..a4af64b 100644
--- a/src/backend/cpu/rotate.cpp
+++ b/src/backend/cpu/rotate.cpp
@@ -115,4 +115,6 @@ namespace cpu
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cpu/scan.cpp b/src/backend/cpu/scan.cpp
index 9cd4163..2bdda21 100644
--- a/src/backend/cpu/scan.cpp
+++ b/src/backend/cpu/scan.cpp
@@ -108,6 +108,8 @@ namespace cpu
     INSTANTIATE(af_add_t, uintl  , uintl  )
     INSTANTIATE(af_add_t, char   , int    )
     INSTANTIATE(af_add_t, uchar  , uint   )
+    INSTANTIATE(af_add_t, short  , int    )
+    INSTANTIATE(af_add_t, ushort , uint   )
     INSTANTIATE(af_notzero_t, char  , uint   )
 
 }
diff --git a/src/backend/cpu/select.cpp b/src/backend/cpu/select.cpp
index 286e884..7b2cc81 100644
--- a/src/backend/cpu/select.cpp
+++ b/src/backend/cpu/select.cpp
@@ -140,4 +140,6 @@ namespace cpu
     INSTANTIATE(uintl  )
     INSTANTIATE(char   )
     INSTANTIATE(uchar  )
+    INSTANTIATE(short  )
+    INSTANTIATE(ushort )
 }
diff --git a/src/backend/cpu/set.cpp b/src/backend/cpu/set.cpp
index 3a8239e..26efb2c 100644
--- a/src/backend/cpu/set.cpp
+++ b/src/backend/cpu/set.cpp
@@ -115,4 +115,6 @@ namespace cpu
     INSTANTIATE(uint)
     INSTANTIATE(char)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cpu/shift.cpp b/src/backend/cpu/shift.cpp
index eff5c09..05cac4c 100644
--- a/src/backend/cpu/shift.cpp
+++ b/src/backend/cpu/shift.cpp
@@ -82,5 +82,7 @@ namespace cpu
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cpu/sobel.cpp b/src/backend/cpu/sobel.cpp
index 41cd8ce..3c6b174 100644
--- a/src/backend/cpu/sobel.cpp
+++ b/src/backend/cpu/sobel.cpp
@@ -104,5 +104,7 @@ INSTANTIATE(int   , int)
 INSTANTIATE(uint  , int)
 INSTANTIATE(char  , int)
 INSTANTIATE(uchar , int)
+INSTANTIATE(short , int)
+INSTANTIATE(ushort, int)
 
 }
diff --git a/src/backend/cpu/sort.cpp b/src/backend/cpu/sort.cpp
index 6c1ebb7..8e5120e 100644
--- a/src/backend/cpu/sort.cpp
+++ b/src/backend/cpu/sort.cpp
@@ -81,4 +81,6 @@ namespace cpu
     INSTANTIATE(uint)
     INSTANTIATE(char)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cpu/sort_by_key.cpp b/src/backend/cpu/sort_by_key.cpp
index b96c6cc..7350cb5 100644
--- a/src/backend/cpu/sort_by_key.cpp
+++ b/src/backend/cpu/sort_by_key.cpp
@@ -122,6 +122,9 @@ namespace cpu
     INSTANTIATE(Tk, uint)      \
     INSTANTIATE(Tk, char)      \
     INSTANTIATE(Tk, uchar)     \
+    INSTANTIATE(Tk, short)     \
+    INSTANTIATE(Tk, ushort)    \
+
 
     INSTANTIATE1(float)
     INSTANTIATE1(double)
@@ -129,4 +132,6 @@ namespace cpu
     INSTANTIATE1(uint)
     INSTANTIATE1(char)
     INSTANTIATE1(uchar)
+    INSTANTIATE1(short)
+    INSTANTIATE1(ushort)
 }
diff --git a/src/backend/cpu/sort_index.cpp b/src/backend/cpu/sort_index.cpp
index 75690e0..245f152 100644
--- a/src/backend/cpu/sort_index.cpp
+++ b/src/backend/cpu/sort_index.cpp
@@ -105,4 +105,6 @@ namespace cpu
     INSTANTIATE(uint)
     INSTANTIATE(char)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cpu/susan.cpp b/src/backend/cpu/susan.cpp
index ad5b702..458577f 100644
--- a/src/backend/cpu/susan.cpp
+++ b/src/backend/cpu/susan.cpp
@@ -132,5 +132,7 @@ INSTANTIATE(char  )
 INSTANTIATE(int   )
 INSTANTIATE(uint  )
 INSTANTIATE(uchar )
+INSTANTIATE(short)
+INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cpu/tile.cpp b/src/backend/cpu/tile.cpp
index 4ca30d2..77e72af 100644
--- a/src/backend/cpu/tile.cpp
+++ b/src/backend/cpu/tile.cpp
@@ -71,5 +71,7 @@ namespace cpu
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cpu/transform.cpp b/src/backend/cpu/transform.cpp
index d1cf58e..68e8d96 100644
--- a/src/backend/cpu/transform.cpp
+++ b/src/backend/cpu/transform.cpp
@@ -142,4 +142,6 @@ namespace cpu
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cpu/transpose.cpp b/src/backend/cpu/transpose.cpp
index f820f9e..bea0aa0 100644
--- a/src/backend/cpu/transpose.cpp
+++ b/src/backend/cpu/transpose.cpp
@@ -159,6 +159,8 @@ INSTANTIATE(uint   )
 INSTANTIATE(uchar  )
 INSTANTIATE(intl   )
 INSTANTIATE(uintl  )
+INSTANTIATE(short)
+INSTANTIATE(ushort)
 
 
 }
diff --git a/src/backend/cpu/triangle.cpp b/src/backend/cpu/triangle.cpp
index 82c4fd1..6b0f326 100644
--- a/src/backend/cpu/triangle.cpp
+++ b/src/backend/cpu/triangle.cpp
@@ -85,5 +85,7 @@ Array<T> triangle(const Array<T> &in)
     INSTANTIATE(uintl)
     INSTANTIATE(char)
     INSTANTIATE(uchar)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 
 }
diff --git a/src/backend/cpu/types.hpp b/src/backend/cpu/types.hpp
index a281b6b..0776df7 100644
--- a/src/backend/cpu/types.hpp
+++ b/src/backend/cpu/types.hpp
@@ -16,6 +16,7 @@ namespace cpu
     typedef std::complex<double>    cdouble;
     typedef unsigned int            uint;
     typedef unsigned char           uchar;
+    typedef unsigned short          ushort;
 
     template<typename T> struct is_complex          { static const bool value = false;  };
     template<> struct           is_complex<cfloat>  { static const bool value = true;   };
diff --git a/src/backend/cpu/unwrap.cpp b/src/backend/cpu/unwrap.cpp
index 466da2e..f9c25f9 100644
--- a/src/backend/cpu/unwrap.cpp
+++ b/src/backend/cpu/unwrap.cpp
@@ -119,4 +119,6 @@ namespace cpu
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/src/backend/cpu/where.cpp b/src/backend/cpu/where.cpp
index c1ffd0f..6c0f8c7 100644
--- a/src/backend/cpu/where.cpp
+++ b/src/backend/cpu/where.cpp
@@ -72,5 +72,7 @@ namespace cpu
     INSTANTIATE(intl   )
     INSTANTIATE(uintl  )
     INSTANTIATE(uchar  )
+    INSTANTIATE(short  )
+    INSTANTIATE(ushort )
 
 }
diff --git a/src/backend/cpu/wrap.cpp b/src/backend/cpu/wrap.cpp
index 1ed9150..a04a6f5 100644
--- a/src/backend/cpu/wrap.cpp
+++ b/src/backend/cpu/wrap.cpp
@@ -119,4 +119,6 @@ namespace cpu
     INSTANTIATE(uintl)
     INSTANTIATE(uchar)
     INSTANTIATE(char)
+    INSTANTIATE(short)
+    INSTANTIATE(ushort)
 }
diff --git a/test/array.cpp b/test/array.cpp
index 682bc5b..e3cb622 100644
--- a/test/array.cpp
+++ b/test/array.cpp
@@ -20,7 +20,7 @@ class Array : public ::testing::Test
 
 };
 
-typedef ::testing::Types<float, double, af::cfloat, af::cdouble, char, unsigned char, int, uint, intl, uintl> TestTypes;
+typedef ::testing::Types<float, double, af::cfloat, af::cdouble, char, unsigned char, int, uint, intl, uintl, short, ushort> TestTypes;
 TYPED_TEST_CASE(Array, TestTypes);
 
 TEST(Array, ConstructorDefault)
@@ -283,6 +283,26 @@ TYPED_TEST(Array, TypeAttributes)
             EXPECT_FALSE(one.iscomplex());
             EXPECT_FALSE(one.isbool());
             break;
+        case s16:
+            EXPECT_FALSE(one.isfloating());
+            EXPECT_FALSE(one.isdouble());
+            EXPECT_FALSE(one.issingle());
+            EXPECT_FALSE(one.isrealfloating());
+            EXPECT_TRUE(one.isinteger());
+            EXPECT_TRUE(one.isreal());
+            EXPECT_FALSE(one.iscomplex());
+            EXPECT_FALSE(one.isbool());
+            break;
+        case u16:
+            EXPECT_FALSE(one.isfloating());
+            EXPECT_FALSE(one.isdouble());
+            EXPECT_FALSE(one.issingle());
+            EXPECT_FALSE(one.isrealfloating());
+            EXPECT_TRUE(one.isinteger());
+            EXPECT_TRUE(one.isreal());
+            EXPECT_FALSE(one.iscomplex());
+            EXPECT_FALSE(one.isbool());
+            break;
         case u8:
             EXPECT_FALSE(one.isfloating());
             EXPECT_FALSE(one.isdouble());
diff --git a/test/assign.cpp b/test/assign.cpp
index 5692392..af68acd 100644
--- a/test/assign.cpp
+++ b/test/assign.cpp
@@ -79,7 +79,7 @@ class ArrayAssign : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, af::cdouble, af::cfloat, double, int, uint, char, uchar, intl, uintl> TestTypes;
+typedef ::testing::Types<float, af::cdouble, af::cfloat, double, int, uint, char, uchar, intl, uintl, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(ArrayAssign, TestTypes);
diff --git a/test/bilateral.cpp b/test/bilateral.cpp
index c80d376..08b7a4c 100644
--- a/test/bilateral.cpp
+++ b/test/bilateral.cpp
@@ -80,7 +80,7 @@ class BilateralOnData : public ::testing::Test
 {
 };
 
-typedef ::testing::Types<float, double, int, uint, char, uchar> DataTestTypes;
+typedef ::testing::Types<float, double, int, uint, char, uchar, short, ushort> DataTestTypes;
 
 // register the type list
 TYPED_TEST_CASE(BilateralOnData, DataTestTypes);
diff --git a/test/constant.cpp b/test/constant.cpp
index 8f65582..d3244a0 100644
--- a/test/constant.cpp
+++ b/test/constant.cpp
@@ -19,7 +19,7 @@ using std::vector;
 template<typename T>
 class Constant : public ::testing::Test { };
 
-typedef ::testing::Types<float, af::cfloat, double, af::cdouble, int, unsigned, char, uchar, uintl, intl> TestTypes;
+typedef ::testing::Types<float, af::cfloat, double, af::cdouble, int, unsigned, char, uchar, uintl, intl, short, ushort> TestTypes;
 TYPED_TEST_CASE(Constant, TestTypes);
 
 template<typename T>
diff --git a/test/convolve.cpp b/test/convolve.cpp
index 185eba9..630742b 100644
--- a/test/convolve.cpp
+++ b/test/convolve.cpp
@@ -28,7 +28,7 @@ class Convolve : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<cdouble, cfloat, float, double, int, uint, char, uchar> TestTypes;
+typedef ::testing::Types<cdouble, cfloat, float, double, int, uint, char, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Convolve, TestTypes);
diff --git a/test/diff1.cpp b/test/diff1.cpp
index 7fe19db..9459681 100644
--- a/test/diff1.cpp
+++ b/test/diff1.cpp
@@ -46,7 +46,7 @@ class Diff1 : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, cfloat, double, cdouble, int, unsigned, intl, uintl, char, unsigned char> TestTypes;
+typedef ::testing::Types<float, cfloat, double, cdouble, int, unsigned, intl, uintl, char, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Diff1, TestTypes);
diff --git a/test/diff2.cpp b/test/diff2.cpp
index 9f7d0cb..3649f7a 100644
--- a/test/diff2.cpp
+++ b/test/diff2.cpp
@@ -46,7 +46,7 @@ class Diff2 : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, cfloat, double, cdouble, int, unsigned, intl, uintl, char, unsigned char> TestTypes;
+typedef ::testing::Types<float, cfloat, double, cdouble, int, unsigned, intl, uintl, char, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Diff2, TestTypes);
diff --git a/test/dog.cpp b/test/dog.cpp
index 284a8ad..f981bba 100644
--- a/test/dog.cpp
+++ b/test/dog.cpp
@@ -24,7 +24,7 @@ class DOG : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, int, uint, char, uchar> TestTypes;
+typedef ::testing::Types<float, double, int, uint, char, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(DOG, TestTypes);
@@ -35,7 +35,7 @@ TYPED_TEST(DOG, Basic)
     if (noDoubleTests<TypeParam>()) return;
 
     af::dim4 iDims(512, 512, 1, 1);
-    af::array in = af::constant<TypeParam>(1, iDims);
+    af::array in = af::constant(1, iDims, (af_dtype)af::dtype_traits<float>::af_type);
     /* calculate DOG using ArrayFire functions */
     af::array k1    = af::gaussianKernel(3, 3);
     af::array k2    = af::gaussianKernel(2, 2);
@@ -54,7 +54,7 @@ TYPED_TEST(DOG, Batch)
     if (noDoubleTests<TypeParam>()) return;
 
     af::dim4 iDims(512, 512, 3, 1);
-    af::array in = af::constant<TypeParam>(1, iDims);
+    af::array in = af::constant(1, iDims, (af_dtype)af::dtype_traits<float>::af_type);
     /* calculate DOG using ArrayFire functions */
     af::array k1    = af::gaussianKernel(3, 3);
     af::array k2    = af::gaussianKernel(2, 2);
diff --git a/test/fast.cpp b/test/fast.cpp
index 2c24f8a..ba61908 100644
--- a/test/fast.cpp
+++ b/test/fast.cpp
@@ -63,7 +63,7 @@ class FixedFAST : public ::testing::Test
 };
 
 typedef ::testing::Types<float, double> FloatTestTypes;
-typedef ::testing::Types<int, unsigned> FixedTestTypes;
+typedef ::testing::Types<int, unsigned, ushort> FixedTestTypes;
 
 TYPED_TEST_CASE(FloatFAST, FloatTestTypes);
 TYPED_TEST_CASE(FixedFAST, FixedTestTypes);
diff --git a/test/hamming.cpp b/test/hamming.cpp
index 042ff30..5b359b7 100644
--- a/test/hamming.cpp
+++ b/test/hamming.cpp
@@ -35,8 +35,8 @@ class HammingMatcher32 : public ::testing::Test
 };
 
 // create lists of types to be tested
-typedef ::testing::Types<uchar> TestTypes8;
-typedef ::testing::Types<uint> TestTypes32;
+typedef ::testing::Types<uchar, ushort> TestTypes8;
+typedef ::testing::Types<uint, uintl> TestTypes32;
 
 // register the type list
 TYPED_TEST_CASE(HammingMatcher8,  TestTypes8);
diff --git a/test/histogram.cpp b/test/histogram.cpp
index dfae986..9ab9e69 100644
--- a/test/histogram.cpp
+++ b/test/histogram.cpp
@@ -27,7 +27,7 @@ class Histogram : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, int, uint, char, uchar> TestTypes;
+typedef ::testing::Types<float, double, int, uint, char, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Histogram, TestTypes);
diff --git a/test/index.cpp b/test/index.cpp
index 6a798aa..497183d 100644
--- a/test/index.cpp
+++ b/test/index.cpp
@@ -126,7 +126,7 @@ public:
     vector<af_seq> span_seqs;
 };
 
-typedef ::testing::Types<float, double, af::cfloat, af::cdouble, int, unsigned, unsigned char, intl, uintl> AllTypes;
+typedef ::testing::Types<float, double, af::cfloat, af::cdouble, int, unsigned, unsigned char, intl, uintl, short, ushort> AllTypes;
 TYPED_TEST_CASE(Indexing1D, AllTypes);
 
 TYPED_TEST(Indexing1D, Continious)          { DimCheck<TypeParam>(this->continuous_seqs);           }
@@ -549,7 +549,7 @@ class lookup : public ::testing::Test
         }
 };
 
-typedef ::testing::Types<float, double, int, unsigned, unsigned char> ArrIdxTestTypes;
+typedef ::testing::Types<float, double, int, unsigned, unsigned char, short, ushort> ArrIdxTestTypes;
 TYPED_TEST_CASE(lookup, ArrIdxTestTypes);
 
 template<typename T>
diff --git a/test/iota.cpp b/test/iota.cpp
index 1c1ca6c..e91741d 100644
--- a/test/iota.cpp
+++ b/test/iota.cpp
@@ -38,7 +38,7 @@ class Iota : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, int, unsigned int, intl, uintl, unsigned char> TestTypes;
+typedef ::testing::Types<float, double, int, unsigned int, intl, uintl, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Iota, TestTypes);
diff --git a/test/join.cpp b/test/join.cpp
index 0101445..0c5b1bf 100644
--- a/test/join.cpp
+++ b/test/join.cpp
@@ -39,7 +39,7 @@ class Join : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, intl, uintl, char, unsigned char> TestTypes;
+typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, intl, uintl, char, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Join, TestTypes);
diff --git a/test/match_template.cpp b/test/match_template.cpp
index 083bdca..adebea4 100644
--- a/test/match_template.cpp
+++ b/test/match_template.cpp
@@ -26,7 +26,7 @@ class MatchTemplate : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, int, uint, char, uchar> TestTypes;
+typedef ::testing::Types<float, double, int, uint, char, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(MatchTemplate, TestTypes);
diff --git a/test/mean.cpp b/test/mean.cpp
index 15a2c35..1559c78 100644
--- a/test/mean.cpp
+++ b/test/mean.cpp
@@ -28,7 +28,7 @@ class Mean : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<cdouble, cfloat, float, double, int, uint, intl, uintl, char, uchar> TestTypes;
+typedef ::testing::Types<cdouble, cfloat, float, double, int, uint, intl, uintl, char, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Mean, TestTypes);
@@ -50,18 +50,20 @@ struct c32HelperType {
 template<typename T>
 struct elseType {
    typedef typename cond_type< is_same_type<T, uintl>::value ||
-                               is_same_type<T, intl>::value,
+                               is_same_type<T, intl> ::value,
                                               double,
                                               T>::type type;
 };
 
 template<typename T>
 struct meanOutType {
-   typedef typename cond_type< is_same_type<T, float>::value ||
-                               is_same_type<T, int>::value ||
-                               is_same_type<T, uint>::value ||
-                               is_same_type<T, uchar>::value ||
-                               is_same_type<T, char>::value,
+   typedef typename cond_type< is_same_type<T, float>   ::value ||
+                               is_same_type<T, int>     ::value ||
+                               is_same_type<T, uint>    ::value ||
+                               is_same_type<T, uchar>   ::value ||
+                               is_same_type<T, short>   ::value ||
+                               is_same_type<T, ushort>  ::value ||
+                               is_same_type<T, char>    ::value,
                                               float,
                               typename elseType<T>::type>::type type;
 };
@@ -198,6 +200,16 @@ TEST(Mean, CPP_u8)
     testCPPMean<uchar>(2, af::dim4(100, 1, 1, 1));
 }
 
+TEST(Mean, CPP_s16)
+{
+    testCPPMean<short>(2, af::dim4(5, 5, 2, 2));
+}
+
+TEST(Mean, CPP_u16)
+{
+    testCPPMean<ushort>(2, af::dim4(100, 1, 1, 1));
+}
+
 TEST(Mean, CPP_cfloat)
 {
     testCPPMean<cfloat>(cfloat(2.1f), af::dim4(10, 5, 2, 1));
diff --git a/test/meanshift.cpp b/test/meanshift.cpp
index 5f1f9a4..2cc8750 100644
--- a/test/meanshift.cpp
+++ b/test/meanshift.cpp
@@ -27,7 +27,7 @@ class Meanshift : public ::testing::Test
         virtual void SetUp() {}
 };
 
-typedef ::testing::Types<float, double, int, uint, char, uchar> TestTypes;
+typedef ::testing::Types<float, double, int, uint, char, uchar, short, ushort> TestTypes;
 
 TYPED_TEST_CASE(Meanshift, TestTypes);
 
diff --git a/test/medfilt.cpp b/test/medfilt.cpp
index db00d94..99dd0b6 100644
--- a/test/medfilt.cpp
+++ b/test/medfilt.cpp
@@ -26,7 +26,7 @@ class MedianFilter : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, int, uint, char, uchar> TestTypes;
+typedef ::testing::Types<float, double, int, uint, char, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(MedianFilter, TestTypes);
diff --git a/test/median.cpp b/test/median.cpp
index 86dee96..9e50b66 100644
--- a/test/median.cpp
+++ b/test/median.cpp
@@ -106,4 +106,6 @@ MEDIAN0(float, float)
 MEDIAN0(float, int)
 MEDIAN0(float, uint)
 MEDIAN0(float, uchar)
+MEDIAN0(float, short)
+MEDIAN0(float, ushort)
 MEDIAN0(double, double)
diff --git a/test/moddims.cpp b/test/moddims.cpp
index 5fe751b..053948d 100644
--- a/test/moddims.cpp
+++ b/test/moddims.cpp
@@ -36,7 +36,7 @@ class Moddims : public ::testing::Test
 
 // create a list of types to be tested
 // TODO: complex types tests have to be added
-typedef ::testing::Types<float, double, int, unsigned, char, unsigned char> TestTypes;
+typedef ::testing::Types<float, double, int, unsigned, char, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Moddims, TestTypes);
diff --git a/test/morph.cpp b/test/morph.cpp
index 04de84f..d73ca9b 100644
--- a/test/morph.cpp
+++ b/test/morph.cpp
@@ -27,7 +27,7 @@ class Morph : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, int, uint, char, uchar> TestTypes;
+typedef ::testing::Types<float, double, int, uint, char, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Morph, TestTypes);
diff --git a/test/nearest_neighbour.cpp b/test/nearest_neighbour.cpp
index 3ca166b..2bca086 100644
--- a/test/nearest_neighbour.cpp
+++ b/test/nearest_neighbour.cpp
@@ -28,7 +28,7 @@ class NearestNeighbour : public ::testing::Test
 };
 
 // create lists of types to be tested
-typedef ::testing::Types<float, double, int, uint, intl, uintl, uchar> TestTypes;
+typedef ::testing::Types<float, double, int, uint, intl, uintl, uchar, short, ushort> TestTypes;
 
 template<typename T>
 struct otype_t
@@ -37,6 +37,18 @@ struct otype_t
 };
 
 template<>
+struct otype_t<short>
+{
+    typedef int otype;
+};
+
+template<>
+struct otype_t<ushort>
+{
+    typedef uint otype;
+};
+
+template<>
 struct otype_t<uchar>
 {
     typedef uint otype;
diff --git a/test/random.cpp b/test/random.cpp
index 4ca5126..29f157a 100644
--- a/test/random.cpp
+++ b/test/random.cpp
@@ -178,7 +178,7 @@ void testSetSeed(const uintl seed0, const uintl seed1, bool is_norm = false)
 
     for (int i = 0; i < num; i++) {
         // Verify if same seed produces same arrays
-        ASSERT_EQ(h_in0[i], h_in2[i]);
+        ASSERT_EQ(h_in0[i], h_in2[i]) << "at : " << i;
 
         // Verify different arrays created with different seeds differ
         // b8 and u9 can clash because they generate a small set of values
diff --git a/test/range.cpp b/test/range.cpp
index 6d7d9b7..be4c22b 100644
--- a/test/range.cpp
+++ b/test/range.cpp
@@ -38,7 +38,7 @@ class Range : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, int, unsigned int, intl, uintl, unsigned char> TestTypes;
+typedef ::testing::Types<float, double, int, unsigned int, intl, uintl, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Range, TestTypes);
diff --git a/test/reduce.cpp b/test/reduce.cpp
index 000f1ea..b38d399 100644
--- a/test/reduce.cpp
+++ b/test/reduce.cpp
@@ -31,7 +31,7 @@ class Reduce : public ::testing::Test
 {
 };
 
-typedef ::testing::Types<float, double, af::cfloat, af::cdouble, uint, int, intl, uintl, uchar> TestTypes;
+typedef ::testing::Types<float, double, af::cfloat, af::cdouble, uint, int, intl, uintl, uchar, short, ushort> TestTypes;
 TYPED_TEST_CASE(Reduce, TestTypes);
 
 typedef af_err (*reduceFunc)(af_array *, const af_array, const int);
@@ -125,10 +125,14 @@ struct promote_type {
 };
 
 // char and uchar are promoted to int for sum and product
-template<> struct promote_type<uchar, af_sum>       { typedef uint type; };
-template<> struct promote_type<char , af_sum>       { typedef uint type; };
-template<> struct promote_type<uchar, af_product>   { typedef uint type; };
-template<> struct promote_type<char , af_product>   { typedef uint type; };
+template<> struct promote_type<uchar , af_sum>       { typedef uint type; };
+template<> struct promote_type<char  , af_sum>       { typedef uint type; };
+template<> struct promote_type<short , af_sum>       { typedef int  type; };
+template<> struct promote_type<ushort, af_sum>       { typedef uint type; };
+template<> struct promote_type<uchar , af_product>   { typedef uint type; };
+template<> struct promote_type<char  , af_product>   { typedef uint type; };
+template<> struct promote_type<short, af_product>    { typedef int  type; };
+template<> struct promote_type<ushort, af_product>   { typedef uint type; };
 
 #define REDUCE_TESTS(FN)                                                                    \
     TYPED_TEST(Reduce,Test_##FN)                                                    \
diff --git a/test/regions.cpp b/test/regions.cpp
index 273f336..fccb902 100644
--- a/test/regions.cpp
+++ b/test/regions.cpp
@@ -33,7 +33,7 @@ class Regions : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, int, unsigned> TestTypes;
+typedef ::testing::Types<float, double, int, unsigned, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Regions, TestTypes);
diff --git a/test/reorder.cpp b/test/reorder.cpp
index 789fbfb..4b57170 100644
--- a/test/reorder.cpp
+++ b/test/reorder.cpp
@@ -38,7 +38,7 @@ class Reorder : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, char, unsigned char> TestTypes;
+typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, char, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Reorder, TestTypes);
diff --git a/test/replace.cpp b/test/replace.cpp
index 34316b3..c6d3b5d 100644
--- a/test/replace.cpp
+++ b/test/replace.cpp
@@ -24,7 +24,7 @@ class Replace : public ::testing::Test
 {
 };
 
-typedef ::testing::Types<float, double, af::cfloat, af::cdouble, uint, int, intl, uintl, uchar, char> TestTypes;
+typedef ::testing::Types<float, double, af::cfloat, af::cdouble, uint, int, intl, uintl, uchar, char, short, ushort> TestTypes;
 
 TYPED_TEST_CASE(Replace, TestTypes);
 
diff --git a/test/resize.cpp b/test/resize.cpp
index 0be2af4..6ec4e55 100644
--- a/test/resize.cpp
+++ b/test/resize.cpp
@@ -54,7 +54,7 @@ class ResizeI : public ::testing::Test
 
 // create a list of types to be tested
 typedef ::testing::Types<float, double, cfloat, cdouble> TestTypesF;
-typedef ::testing::Types<int, unsigned, intl, uintl, unsigned char, char> TestTypesI;
+typedef ::testing::Types<int, unsigned, intl, uintl, unsigned char, char, short, ushort> TestTypesI;
 
 // register the type list
 TYPED_TEST_CASE(Resize, TestTypesF);
diff --git a/test/rotate.cpp b/test/rotate.cpp
index 00a234f..f97cd3a 100644
--- a/test/rotate.cpp
+++ b/test/rotate.cpp
@@ -32,7 +32,7 @@ class Rotate : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, cfloat, cdouble, int, intl, char> TestTypes;
+typedef ::testing::Types<float, double, cfloat, cdouble, int, intl, char, short> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Rotate, TestTypes);
diff --git a/test/rotate_linear.cpp b/test/rotate_linear.cpp
index 06a6433..29a9107 100644
--- a/test/rotate_linear.cpp
+++ b/test/rotate_linear.cpp
@@ -36,7 +36,7 @@ class Rotate : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, cfloat, cdouble, int, intl, char> TestTypes;
+typedef ::testing::Types<float, double, cfloat, cdouble, int, intl, char, short> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Rotate, TestTypes);
diff --git a/test/sat.cpp b/test/sat.cpp
index 00261e2..4cfb582 100644
--- a/test/sat.cpp
+++ b/test/sat.cpp
@@ -26,7 +26,7 @@ class SAT : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, int, uint, char, uchar, uintl, intl> TestTypes;
+typedef ::testing::Types<float, double, int, uint, char, uchar, uintl, intl, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(SAT, TestTypes);
diff --git a/test/scan.cpp b/test/scan.cpp
index 88ee8b4..386568d 100644
--- a/test/scan.cpp
+++ b/test/scan.cpp
@@ -108,7 +108,9 @@ SCAN_TESTS(accum, cdouble , cdouble   , cdouble   );
 SCAN_TESTS(accum, unsigned, unsigned  , unsigned  );
 SCAN_TESTS(accum, intl    , intl      , intl      );
 SCAN_TESTS(accum, uintl   , uintl     , uintl     );
-SCAN_TESTS(accum, uchar   , unsigned char, unsigned);
+SCAN_TESTS(accum, uchar   , uchar     , unsigned  );
+SCAN_TESTS(accum, short   , short     , int       );
+SCAN_TESTS(accum, ushort  , ushort    , uint      );
 
 TEST(Scan,Test_Scan_Big0)
 {
diff --git a/test/select.cpp b/test/select.cpp
index bc3e1f0..91c8110 100644
--- a/test/select.cpp
+++ b/test/select.cpp
@@ -24,7 +24,7 @@ class Select : public ::testing::Test
 {
 };
 
-typedef ::testing::Types<float, double, af::cfloat, af::cdouble, uint, int, intl, uintl, uchar, char> TestTypes;
+typedef ::testing::Types<float, double, af::cfloat, af::cdouble, uint, int, intl, uintl, uchar, char, short, ushort> TestTypes;
 TYPED_TEST_CASE(Select, TestTypes);
 
 template<typename T>
diff --git a/test/shift.cpp b/test/shift.cpp
index a3cf35d..74f418c 100644
--- a/test/shift.cpp
+++ b/test/shift.cpp
@@ -38,7 +38,7 @@ class Shift : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, intl, uintl, char, unsigned char> TestTypes;
+typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, intl, uintl, char, unsigned char, short, ushort> TestTypes;
 // register the type list
 TYPED_TEST_CASE(Shift, TestTypes);
 
diff --git a/test/sobel.cpp b/test/sobel.cpp
index 2ec5ab0..d3f4528 100644
--- a/test/sobel.cpp
+++ b/test/sobel.cpp
@@ -34,7 +34,7 @@ class Sobel_Integer : public ::testing::Test
 
 // create a list of types to be tested
 typedef ::testing::Types<float, double> TestTypes;
-typedef ::testing::Types<int, unsigned, char, unsigned char> TestTypesInt;
+typedef ::testing::Types<int, unsigned, char, unsigned char, short, ushort> TestTypesInt;
 
 // register the type list
 TYPED_TEST_CASE(Sobel, TestTypes);
diff --git a/test/sort.cpp b/test/sort.cpp
index 7377d2a..ae63b3f 100644
--- a/test/sort.cpp
+++ b/test/sort.cpp
@@ -38,7 +38,7 @@ class Sort : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, uint, int, uchar> TestTypes;
+typedef ::testing::Types<float, double, uint, int, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Sort, TestTypes);
diff --git a/test/sort_by_key.cpp b/test/sort_by_key.cpp
index 35bbc97..e67537b 100644
--- a/test/sort_by_key.cpp
+++ b/test/sort_by_key.cpp
@@ -38,7 +38,7 @@ class Sort : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, uint, int, uchar> TestTypes;
+typedef ::testing::Types<float, double, uint, int, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Sort, TestTypes);
diff --git a/test/sort_index.cpp b/test/sort_index.cpp
index 1f503a7..1a4d6ac 100644
--- a/test/sort_index.cpp
+++ b/test/sort_index.cpp
@@ -38,7 +38,7 @@ class Sort : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, uint, int, uchar> TestTypes;
+typedef ::testing::Types<float, double, uint, int, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Sort, TestTypes);
diff --git a/test/susan.cpp b/test/susan.cpp
index 4e69953..01ed228 100644
--- a/test/susan.cpp
+++ b/test/susan.cpp
@@ -55,7 +55,7 @@ class Susan : public ::testing::Test
         virtual void SetUp() {}
 };
 
-typedef ::testing::Types<float, double, int, uint, char, uchar> TestTypes;
+typedef ::testing::Types<float, double, int, uint, char, uchar, short, ushort> TestTypes;
 
 TYPED_TEST_CASE(Susan, TestTypes);
 
diff --git a/test/testHelpers.hpp b/test/testHelpers.hpp
index 09e1dc2..ac7bfb0 100644
--- a/test/testHelpers.hpp
+++ b/test/testHelpers.hpp
@@ -361,42 +361,18 @@ struct cond_type<false, T, Other> {
 };
 
 template<typename T>
-double real(T val) { return real(val); }
+double real(T val) { return (double)val; }
 template<>
-double real<double>(double val) { return val; }
+double real<af::cdouble>(af::cdouble val) { return real(val); }
 template<>
-double real<float>(float val) { return val; }
-template<>
-double real<int>(int val) { return val; }
-template<>
-double real<char>(char val) { return val; }
-template<>
-double real<uchar>(uchar val) { return val; }
-template<>
-double real<uint>(uint val) { return val; }
-template<>
-double real<intl>(intl val) { return val; }
-template<>
-double real<uintl>(uintl val) { return val; }
+double real<af::cfloat> (af::cfloat val) { return real(val); }
 
 template<typename T>
-double imag(T val) { return imag(val); }
-template<>
-double imag<double>(double val) { return 0; }
-template<>
-double imag<float>(float val) { return 0; }
-template<>
-double imag<int>(int val) { return 0; }
-template<>
-double imag<uint>(uint val) { return 0; }
-template<>
-double imag<intl>(intl val) { return 0; }
-template<>
-double imag<uintl>(uintl val) { return 0; }
+double imag(T val) { return (double)val; }
 template<>
-double imag<char>(char val) { return 0; }
+double imag<af::cdouble>(af::cdouble val) { return imag(val); }
 template<>
-double imag<uchar>(uchar val) { return 0; }
+double imag<af::cfloat> (af::cfloat val) { return imag(val); }
 
 template<typename T>
 bool noDoubleTests()
diff --git a/test/tile.cpp b/test/tile.cpp
index adeda5b..964b77f 100644
--- a/test/tile.cpp
+++ b/test/tile.cpp
@@ -38,7 +38,7 @@ class Tile : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, intl, uintl, char, unsigned char> TestTypes;
+typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, intl, uintl, char, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Tile, TestTypes);
diff --git a/test/translate.cpp b/test/translate.cpp
index cd2df33..5b00c04 100644
--- a/test/translate.cpp
+++ b/test/translate.cpp
@@ -41,7 +41,7 @@ class TranslateInt : public ::testing::Test
 
 // create a list of types to be tested
 typedef ::testing::Types<float, double, cfloat, cdouble> TestTypes;
-typedef ::testing::Types<int, intl, char> TestTypesInt;
+typedef ::testing::Types<int, intl, char, short> TestTypesInt;
 
 // register the type list
 TYPED_TEST_CASE(Translate, TestTypes);
diff --git a/test/transpose.cpp b/test/transpose.cpp
index 1e4ee47..6be1ba4 100644
--- a/test/transpose.cpp
+++ b/test/transpose.cpp
@@ -37,7 +37,7 @@ class Transpose : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, cfloat, double, cdouble, int, uint, char, uchar> TestTypes;
+typedef ::testing::Types<float, cfloat, double, cdouble, int, uint, char, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Transpose, TestTypes);
diff --git a/test/transpose_inplace.cpp b/test/transpose_inplace.cpp
index 34e1764..a54ff75 100644
--- a/test/transpose_inplace.cpp
+++ b/test/transpose_inplace.cpp
@@ -29,7 +29,7 @@ class Transpose : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, cfloat, double, cdouble, int, uint, char, uchar> TestTypes;
+typedef ::testing::Types<float, cfloat, double, cdouble, int, uint, char, uchar, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Transpose, TestTypes);
diff --git a/test/triangle.cpp b/test/triangle.cpp
index d3bed92..e0b609b 100644
--- a/test/triangle.cpp
+++ b/test/triangle.cpp
@@ -30,7 +30,7 @@ using af::dim4;
 template<typename T>
 class Triangle : public ::testing::Test { };
 
-typedef ::testing::Types<float, af::cfloat, double, af::cdouble, int, unsigned, char, uchar, uintl, intl> TestTypes;
+typedef ::testing::Types<float, af::cfloat, double, af::cdouble, int, unsigned, char, uchar, uintl, intl, short, ushort> TestTypes;
 TYPED_TEST_CASE(Triangle, TestTypes);
 
 template<typename T>
diff --git a/test/unwrap.cpp b/test/unwrap.cpp
index 28ec1c0..82371d3 100644
--- a/test/unwrap.cpp
+++ b/test/unwrap.cpp
@@ -34,7 +34,7 @@ class Unwrap : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, intl, uintl, char, unsigned char> TestTypes;
+typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, intl, uintl, char, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Unwrap, TestTypes);
diff --git a/test/var.cpp b/test/var.cpp
index fcea0ab..2311130 100644
--- a/test/var.cpp
+++ b/test/var.cpp
@@ -27,24 +27,26 @@ class Var : public ::testing::Test
 
 };
 
-typedef ::testing::Types< float, double, cfloat, cdouble, uint, int, uintl, intl, char, uchar> TestTypes;
+typedef ::testing::Types< float, double, cfloat, cdouble, uint, int, uintl, intl, char, uchar, short, ushort> TestTypes;
 TYPED_TEST_CASE(Var, TestTypes);
 
 template<typename T>
 struct elseType {
    typedef typename cond_type< is_same_type<T, uintl>::value ||
-                               is_same_type<T, intl>::value,
+                               is_same_type<T, intl> ::value,
                                               double,
                                               T>::type type;
 };
 
 template<typename T>
 struct varOutType {
-   typedef typename cond_type< is_same_type<T, float>::value ||
-                               is_same_type<T, int>::value ||
-                               is_same_type<T, uint>::value ||
-                               is_same_type<T, uchar>::value ||
-                               is_same_type<T, char>::value,
+   typedef typename cond_type< is_same_type<T, float >::value ||
+                               is_same_type<T, int   >::value ||
+                               is_same_type<T, uint  >::value ||
+                               is_same_type<T, short >::value ||
+                               is_same_type<T, ushort>::value ||
+                               is_same_type<T, uchar >::value ||
+                               is_same_type<T, char  >::value,
                                               float,
                               typename elseType<T>::type>::type type;
 };
diff --git a/test/where.cpp b/test/where.cpp
index 96bc8d5..eb21e0d 100644
--- a/test/where.cpp
+++ b/test/where.cpp
@@ -27,7 +27,7 @@ using af::cdouble;
 template<typename T>
 class Where : public ::testing::Test { };
 
-typedef ::testing::Types< float, double, cfloat, cdouble, int, uint, intl, uintl, char, uchar > TestTypes;
+typedef ::testing::Types< float, double, cfloat, cdouble, int, uint, intl, uintl, char, uchar, short, ushort> TestTypes;
 TYPED_TEST_CASE(Where, TestTypes);
 
 template<typename T>
diff --git a/test/wrap.cpp b/test/wrap.cpp
index 0a9cdc2..0cc6fab 100644
--- a/test/wrap.cpp
+++ b/test/wrap.cpp
@@ -35,7 +35,7 @@ class Wrap : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, intl, uintl, char, unsigned char> TestTypes;
+typedef ::testing::Types<float, double, cfloat, cdouble, int, unsigned int, intl, uintl, char, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Wrap, TestTypes);
diff --git a/test/write.cpp b/test/write.cpp
index afe5f38..b96cb0a 100644
--- a/test/write.cpp
+++ b/test/write.cpp
@@ -32,7 +32,7 @@ class Write : public ::testing::Test
 };
 
 // create a list of types to be tested
-typedef ::testing::Types<float, cfloat, double, cdouble, int, unsigned, char, unsigned char> TestTypes;
+typedef ::testing::Types<float, cfloat, double, cdouble, int, unsigned, char, unsigned char, short, ushort> TestTypes;
 
 // register the type list
 TYPED_TEST_CASE(Write, TestTypes);

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git