[arrayfire] 125/284: Merge branch 'devel' into async

Sun Feb 7 18:59:25 UTC 2016

This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/experimental
in repository arrayfire.

commit f0b9691c883777c9768d20d018f0e063246bc930
Merge: 6058dd2 2ae8273
Author: pradeep <pradeep at arrayfire.com>
Date:   Wed Dec 30 16:38:23 2015 -0500

    Merge branch 'devel' into async

 CMakeLists.txt                                |   3 +
 CMakeModules/FindLAPACKE.cmake                |   6 +-
 CMakeModules/MinBuildTime.cmake               |  93 +++++++++
 CMakeModules/build_clBLAS.cmake               |   6 +-
 CMakeModules/build_clFFT.cmake                |   6 +-
 CMakeModules/build_forge.cmake                |   2 +-
 README.md                                     |   4 +-
 docs/arrayfire.css                            |   5 +
 docs/details/backend.dox                      |   9 +
 docs/details/image.dox                        |  47 +++--
 docs/pages/release_notes.md                   |  75 ++++++++
 examples/graphics/gravity_sim.cpp             | 140 ++++++++++++++
 examples/graphics/plot3.cpp                   |   4 +-
 include/af/arith.h                            |   2 +-
 include/af/array.h                            |   2 +-
 include/af/data.h                             |  12 +-
 include/af/macros.h                           |  61 ++++++
 src/api/c/colorspace.cpp                      |  63 +++---
 src/api/c/err_common.cpp                      |  84 ++++----
 src/api/c/err_common.hpp                      | 122 ++++++++----
 src/api/c/graphics_common.hpp                 |   6 +-
 src/api/c/hist.cpp                            |   2 +-
 src/api/c/image.cpp                           |  18 +-
 src/api/c/imageio.cpp                         |  17 +-
 src/api/c/imageio2.cpp                        |   8 +-
 src/api/c/plot.cpp                            |   2 +-
 src/api/c/plot3.cpp                           |   2 +-
 src/api/c/print.cpp                           |  24 ++-
 src/api/c/surface.cpp                         |   2 +-
 src/api/c/transform.cpp                       |  32 +--
 src/api/cpp/array.cpp                         |  10 +-
 src/api/cpp/error.hpp                         |  12 +-
 src/api/cpp/gfor.cpp                          |   2 +-
 src/api/cpp/seq.cpp                           |   4 +-
 src/api/cpp/where.cpp                         |   2 +-
 src/api/unified/data.cpp                      |   6 +-
 src/api/unified/symbol_manager.cpp            |  47 ++++-
 src/api/unified/symbol_manager.hpp            |   2 +-
 src/backend/cpu/err_cpu.hpp                   |   5 +-
 src/backend/cpu/kernel/random.hpp             |   2 +-
 src/backend/cpu/kernel/rotate.hpp             |   5 +-
 src/backend/cpu/kernel/transform.hpp          |  60 ++++--
 src/backend/cpu/transform.cpp                 |  13 +-
 src/backend/cpu/transform.hpp                 |   2 +-
 src/backend/cpu/transform_interp.hpp          |  78 ++++++--
 src/backend/cuda/err_cuda.hpp                 |   3 +-
 src/backend/cuda/kernel/rotate.hpp            |   6 +-
 src/backend/cuda/kernel/transform.hpp         |  79 +++++---
 src/backend/cuda/kernel/transform_interp.hpp  |  65 +++++--
 src/backend/cuda/platform.cpp                 |  25 ++-
 src/backend/cuda/transform.cu                 |  10 +-
 src/backend/cuda/transform.hpp                |   3 +-
 src/backend/defines.hpp                       |  24 ++-
 src/backend/opencl/err_opencl.hpp             |   5 +-
 src/backend/opencl/kernel/transform.cl        |  32 ++-
 src/backend/opencl/kernel/transform.hpp       |  14 +-
 src/backend/opencl/kernel/transform_interp.cl |  69 +++++--
 src/backend/opencl/magma/labrd.cpp            | 119 ++++++------
 src/backend/opencl/magma/magma_cpu_blas.h     |  31 ++-
 src/backend/opencl/transform.cpp              | 110 +++++++----
 src/backend/opencl/transform.hpp              |   2 +-
 test/CMakeLists.txt                           |  38 ++--
 test/backend.cpp                              |  66 +++++++
 test/bilateral.cpp                            |   1 +
 test/cholesky_dense.cpp                       |   1 +
 test/data                                     |   2 +-
 test/fast.cpp                                 |   2 +
 test/gloh_nonfree.cpp                         |   2 +
 test/harris.cpp                               |   2 +
 test/homography.cpp                           |   3 +
 test/imageio.cpp                              |  12 ++
 test/info.cpp                                 |  25 +--
 test/inverse_dense.cpp                        |   1 +
 test/lu_dense.cpp                             |   3 +
 test/meanshift.cpp                            |   2 +
 test/medfilt.cpp                              |   1 +
 test/morph.cpp                                |   2 +
 test/orb.cpp                                  |   2 +
 test/qr_dense.cpp                             |   2 +
 test/rank_dense.cpp                           |   7 +
 test/sift_nonfree.cpp                         |   2 +
 test/solve_dense.cpp                          |   6 +
 test/susan.cpp                                |   1 +
 test/svd_dense.cpp                            |   1 +
 test/testHelpers.hpp                          |  33 ++++
 test/transform.cpp                            | 267 ++++++++++++++++++++++++++
 86 files changed, 1699 insertions(+), 491 deletions(-)

diff --cc src/backend/cpu/kernel/rotate.hpp
index 6e4f758,0000000..395ea3f
mode 100644,000000..100644

--- a/src/backend/cpu/kernel/rotate.hpp
+++ b/src/backend/cpu/kernel/rotate.hpp
@@@ -1,83 -1,0 +1,84 @@@
 +/*******************************************************
 + * Copyright (c) 2015, ArrayFire
 + * All rights reserved.
 + *
 + * This file is distributed under 3-clause BSD license.
 + * The complete license agreement can be obtained at:
 + * http://arrayfire.com/licenses/BSD-3-Clause
 + ********************************************************/
 +
 +#pragma once
 +#include <af/defines.h>
 +#include <Array.hpp>
 +#include <math.hpp>
 +#include <err_cpu.hpp>
 +
 +namespace cpu
 +{
 +namespace kernel
 +{
 +
 +template<typename T, af_interp_type method>
 +void rotate(Array<T> output, const Array<T> input, const float theta)
 +{
 +    const af::dim4 odims    = output.dims();
 +    const af::dim4 idims    = input.dims();
 +    const af::dim4 ostrides = output.strides();
 +    const af::dim4 istrides = input.strides();
 +
 +    const T* in   = input.get();
 +          T* out  = output.get();
 +    dim_t nimages = idims[2];
 +
 +    void (*t_fn)(T *, const T *, const float *, const af::dim4 &,
 +                 const af::dim4 &, const af::dim4 &,
-                  const dim_t, const dim_t, const dim_t, const dim_t);
++                 const dim_t, const dim_t, const dim_t, const dim_t,
++                 const bool);
 +
 +    const float c = cos(-theta), s = sin(-theta);
 +    float tx, ty;
 +    {
 +        const float nx = 0.5 * (idims[0] - 1);
 +        const float ny = 0.5 * (idims[1] - 1);
 +        const float mx = 0.5 * (odims[0] - 1);
 +        const float my = 0.5 * (odims[1] - 1);
 +        const float sx = (mx * c + my *-s);
 +        const float sy = (mx * s + my * c);
 +        tx = -(sx - nx);
 +        ty = -(sy - ny);
 +    }
 +
 +    const float tmat[6] = {std::round( c * 1000) / 1000.0f,
 +                           std::round(-s * 1000) / 1000.0f,
 +                           std::round(tx * 1000) / 1000.0f,
 +                           std::round( s * 1000) / 1000.0f,
 +                           std::round( c * 1000) / 1000.0f,
 +                           std::round(ty * 1000) / 1000.0f,
 +                          };
 +
 +    switch(method) {
 +        case AF_INTERP_NEAREST:
 +            t_fn = &transform_n;
 +            break;
 +        case AF_INTERP_BILINEAR:
 +            t_fn = &transform_b;
 +            break;
 +        case AF_INTERP_LOWER:
 +            t_fn = &transform_l;
 +            break;
 +        default:
 +            AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
 +            break;
 +    }
 +
 +
 +    // Do transform for image
 +    for(int yy = 0; yy < (int)odims[1]; yy++) {
 +        for(int xx = 0; xx < (int)odims[0]; xx++) {
-             t_fn(out, in, tmat, idims, ostrides, istrides, nimages, 0, xx, yy);
++            t_fn(out, in, tmat, idims, ostrides, istrides, nimages, 0, xx, yy, false);
 +        }
 +    }
 +}
 +
 +}
 +}
diff --cc src/backend/cpu/kernel/transform.hpp
index d97613a,0000000..2311e4e
mode 100644,000000..100644
--- a/src/backend/cpu/kernel/transform.hpp
+++ b/src/backend/cpu/kernel/transform.hpp
@@@ -1,105 -1,0 +1,131 @@@
 +/*******************************************************
 + * Copyright (c) 2015, ArrayFire
 + * All rights reserved.
 + *
 + * This file is distributed under 3-clause BSD license.
 + * The complete license agreement can be obtained at:
 + * http://arrayfire.com/licenses/BSD-3-Clause
 + ********************************************************/
 +
 +#pragma once
 +#include <af/defines.h>
 +#include <Array.hpp>
 +#include <err_cpu.hpp>
 +
 +namespace cpu
 +{
 +namespace kernel
 +{
 +
 +template <typename T>
- void calc_affine_inverse(T *txo, const T *txi)
++void calc_transform_inverse(T *txo, const T *txi, const bool perspective)
 +{
-     T det = txi[0]*txi[4] - txi[1]*txi[3];
++    if (perspective) {
++        txo[0] =   txi[4]*txi[8] - txi[5]*txi[7];
++        txo[1] = -(txi[1]*txi[8] - txi[2]*txi[7]);
++        txo[2] =   txi[1]*txi[5] - txi[2]*txi[4];
 +
-     txo[0] = txi[4] / det;
-     txo[1] = txi[3] / det;
-     txo[3] = txi[1] / det;
-     txo[4] = txi[0] / det;
++        txo[3] = -(txi[3]*txi[8] - txi[5]*txi[6]);
++        txo[4] =   txi[0]*txi[8] - txi[2]*txi[6];
++        txo[5] = -(txi[0]*txi[5] - txi[2]*txi[3]);
 +
-     txo[2] = txi[2] * -txo[0] + txi[5] * -txo[1];
-     txo[5] = txi[2] * -txo[3] + txi[5] * -txo[4];
++        txo[6] =   txi[3]*txi[7] - txi[4]*txi[6];
++        txo[7] = -(txi[0]*txi[7] - txi[1]*txi[6]);
++        txo[8] =   txi[0]*txi[4] - txi[1]*txi[3];
++
++        T det = txi[0]*txo[0] + txi[1]*txo[3] + txi[2]*txo[6];
++
++        txo[0] /= det; txo[1] /= det; txo[2] /= det;
++        txo[3] /= det; txo[4] /= det; txo[5] /= det;
++        txo[6] /= det; txo[7] /= det; txo[8] /= det;
++    }
++    else {
++        T det = txi[0]*txi[4] - txi[1]*txi[3];
++
++        txo[0] = txi[4] / det;
++        txo[1] = txi[3] / det;
++        txo[3] = txi[1] / det;
++        txo[4] = txi[0] / det;
++
++        txo[2] = txi[2] * -txo[0] + txi[5] * -txo[1];
++        txo[5] = txi[2] * -txo[3] + txi[5] * -txo[4];
++    }
 +}
 +
 +template <typename T>
- void calc_affine_inverse(T *tmat, const T *tmat_ptr, const bool inverse)
++void calc_transform_inverse(T *tmat, const T *tmat_ptr, const bool inverse,
++                            const bool perspective, const unsigned transf_len)
 +{
 +    // The way kernel is structured, it expects an inverse
 +    // transform matrix by default.
 +    // If it is an forward transform, then we need its inverse
 +    if(inverse) {
-         for(int i = 0; i < 6; i++)
++        for(int i = 0; i < (int)transf_len; i++)
 +            tmat[i] = tmat_ptr[i];
 +    } else {
-         calc_affine_inverse(tmat, tmat_ptr);
++        calc_transform_inverse(tmat, tmat_ptr, perspective);
 +    }
 +}
 +
 +template<typename T, af_interp_type method>
 +void transform(Array<T> output, const Array<T> input,
-                const Array<float> transform, const bool inverse)
++               const Array<float> transform, const bool inverse,
++               const bool perspective)
 +{
 +    const af::dim4 idims    = input.dims();
 +    const af::dim4 odims    = output.dims();
 +    const af::dim4 istrides = input.strides();
 +    const af::dim4 ostrides = output.strides();
 +
 +    T * out = output.get();
 +    const T * in = input.get();
 +    const float* tf = transform.get();
 +
 +    dim_t nimages     = idims[2];
 +    // Multiplied in src/backend/transform.cpp
 +    dim_t ntransforms = odims[2] / idims[2];
 +
 +    void (*t_fn)(T *, const T *, const float *, const af::dim4 &,
 +                 const af::dim4 &, const af::dim4 &,
-                  const dim_t, const dim_t, const dim_t, const dim_t);
++                 const dim_t, const dim_t, const dim_t, const dim_t,
++                 const bool);
 +
 +    switch(method) {
 +        case AF_INTERP_NEAREST:
 +            t_fn = &transform_n;
 +            break;
 +        case AF_INTERP_BILINEAR:
 +            t_fn = &transform_b;
 +            break;
 +        case AF_INTERP_LOWER:
 +            t_fn = &transform_l;
 +            break;
 +        default:
 +            AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
 +            break;
 +    }
 +
++    const int transf_len = (perspective) ? 9 : 6;
 +
 +    // For each transform channel
 +    for(int t_idx = 0; t_idx < (int)ntransforms; t_idx++) {
 +        // Compute inverse if required
-         const float *tmat_ptr = tf + t_idx * 6;
-         float tmat[6];
-         calc_affine_inverse(tmat, tmat_ptr, inverse);
++        const float *tmat_ptr = tf + t_idx * transf_len;
++        float* tmat = new float[transf_len];
++        calc_transform_inverse(tmat, tmat_ptr, inverse, perspective, transf_len);
 +
 +        // Offset for output pointer
 +        dim_t o_offset = t_idx * nimages * ostrides[2];
 +
 +        // Do transform for image
 +        for(int yy = 0; yy < (int)odims[1]; yy++) {
 +            for(int xx = 0; xx < (int)odims[0]; xx++) {
-                 t_fn(out, in, tmat, idims, ostrides, istrides, nimages, o_offset, xx, yy);
++                t_fn(out, in, tmat, idims, ostrides, istrides, nimages, o_offset, xx, yy, perspective);
 +            }
 +        }
++        delete[] tmat;
 +    }
 +}
 +
 +}
 +}
diff --cc src/backend/cpu/transform.cpp
index fc71458,bf072c3..b2ab8db
--- a/src/backend/cpu/transform.cpp
+++ b/src/backend/cpu/transform.cpp
@@@ -17,50 -16,160 +17,53 @@@
  
  namespace cpu
  {
 -    template <typename T>
 -    void calc_transform_inverse(T *txo, const T *txi, const bool perspective)
 -    {
 -        if (perspective) {
 -            txo[0] =   txi[4]*txi[8] - txi[5]*txi[7];
 -            txo[1] = -(txi[1]*txi[8] - txi[2]*txi[7]);
 -            txo[2] =   txi[1]*txi[5] - txi[2]*txi[4];
  
 -            txo[3] = -(txi[3]*txi[8] - txi[5]*txi[6]);
 -            txo[4] =   txi[0]*txi[8] - txi[2]*txi[6];
 -            txo[5] = -(txi[0]*txi[5] - txi[2]*txi[3]);
 -
 -            txo[6] =   txi[3]*txi[7] - txi[4]*txi[6];
 -            txo[7] = -(txi[0]*txi[7] - txi[1]*txi[6]);
 -            txo[8] =   txi[0]*txi[4] - txi[1]*txi[3];
 -
 -            T det = txi[0]*txo[0] + txi[1]*txo[3] + txi[2]*txo[6];
 -
 -            txo[0] /= det; txo[1] /= det; txo[2] /= det;
 -            txo[3] /= det; txo[4] /= det; txo[5] /= det;
 -            txo[6] /= det; txo[7] /= det; txo[8] /= det;
 -        }
 -        else {
 -            T det = txi[0]*txi[4] - txi[1]*txi[3];
 -
 -            txo[0] = txi[4] / det;
 -            txo[1] = txi[3] / det;
 -            txo[3] = txi[1] / det;
 -            txo[4] = txi[0] / det;
 -
 -            txo[2] = txi[2] * -txo[0] + txi[5] * -txo[1];
 -            txo[5] = txi[2] * -txo[3] + txi[5] * -txo[4];
 -        }
 -    }
 -
 -    template <typename T>
 -    void calc_transform_inverse(T *tmat, const T *tmat_ptr, const bool inverse,
 -                                const bool perspective, const unsigned transf_len)
 -    {
 -        // The way kernel is structured, it expects an inverse
 -        // transform matrix by default.
 -        // If it is an forward transform, then we need its inverse
 -        if(inverse) {
 -            for(int i = 0; i < (int)transf_len; i++)
 -                tmat[i] = tmat_ptr[i];
 -        } else {
 -            calc_transform_inverse(tmat, tmat_ptr, perspective);
 -        }
 -    }
 -
 -    template<typename T, af_interp_type method>
 -    void transform_(T *out, const T *in, const float *tf,
 -                    const af::dim4 &odims, const af::dim4 &idims,
 -                    const af::dim4 &ostrides, const af::dim4 &istrides,
 -                    const af::dim4 &tstrides, const bool inverse,
 -                    const bool perspective)
 -    {
 -        dim_t nimages     = idims[2];
 -        // Multiplied in src/backend/transform.cpp
 -        dim_t ntransforms = odims[2] / idims[2];
 -
 -        void (*t_fn)(T *, const T *, const float *, const af::dim4 &,
 -                     const af::dim4 &, const af::dim4 &,
 -                     const dim_t, const dim_t, const dim_t, const dim_t, const bool);
 -
 -        switch(method) {
 -            case AF_INTERP_NEAREST:
 -                t_fn = &transform_n;
 -                break;
 -            case AF_INTERP_BILINEAR:
 -                t_fn = &transform_b;
 -                break;
 -            case AF_INTERP_LOWER:
 -                t_fn = &transform_l;
 -                break;
 -            default:
 -                AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
 -                break;
 -        }
 -
 -        const int transf_len = (perspective) ? 9 : 6;
 -
 -        // For each transform channel
 -        for(int t_idx = 0; t_idx < (int)ntransforms; t_idx++) {
 -            // Compute inverse if required
 -            const float *tmat_ptr = tf + t_idx * transf_len;
 -            float* tmat = new float[transf_len];
 -            calc_transform_inverse(tmat, tmat_ptr, inverse, perspective, transf_len);
 -
 -            // Offset for output pointer
 -            dim_t o_offset = t_idx * nimages * ostrides[2];
 -
 -            // Do transform for image
 -            for(int yy = 0; yy < (int)odims[1]; yy++) {
 -                for(int xx = 0; xx < (int)odims[0]; xx++) {
 -                    t_fn(out, in, tmat, idims, ostrides, istrides, nimages, o_offset, xx, yy, perspective);
 -                }
 -            }
 -            delete[] tmat;
 -        }
 +template<typename T>
 +Array<T> transform(const Array<T> &in, const Array<float> &transform, const af::dim4 &odims,
-                     const af_interp_type method, const bool inverse)
++                    const af_interp_type method, const bool inverse, const bool perspective)
 +{
 +    in.eval();
 +    transform.eval();
 +
 +    Array<T> out = createEmptyArray<T>(odims);
 +
 +    switch(method) {
 +        case AF_INTERP_NEAREST :
-             getQueue().enqueue(kernel::transform<T, AF_INTERP_NEAREST >, out, in, transform, inverse);
++            getQueue().enqueue(kernel::transform<T, AF_INTERP_NEAREST >, out, in, transform,
++                    inverse, perspective);
 +            break;
 +        case AF_INTERP_BILINEAR:
-             getQueue().enqueue(kernel::transform<T, AF_INTERP_BILINEAR>, out, in, transform, inverse);
++            getQueue().enqueue(kernel::transform<T, AF_INTERP_BILINEAR>, out, in, transform,
++                    inverse, perspective);
 +            break;
 +        case AF_INTERP_LOWER   :
-             getQueue().enqueue(kernel::transform<T, AF_INTERP_LOWER   >, out, in, transform, inverse);
++            getQueue().enqueue(kernel::transform<T, AF_INTERP_LOWER   >, out, in, transform,
++                    inverse, perspective);
 +            break;
 +        default: AF_ERROR("Unsupported interpolation type", AF_ERR_ARG); break;
      }
  
 -    template<typename T>
 -    Array<T> transform(const Array<T> &in, const Array<float> &transform, const af::dim4 &odims,
 -                        const af_interp_type method, const bool inverse, const bool perspective)
 -    {
 -        const af::dim4 idims = in.dims();
 -
 -        Array<T> out = createEmptyArray<T>(odims);
 -
 -        switch(method) {
 -            case AF_INTERP_NEAREST:
 -                transform_<T, AF_INTERP_NEAREST>
 -                          (out.get(), in.get(), transform.get(), odims, idims,
 -                           out.strides(), in.strides(), transform.strides(), inverse,
 -                           perspective);
 -                break;
 -            case AF_INTERP_BILINEAR:
 -                transform_<T, AF_INTERP_BILINEAR>
 -                          (out.get(), in.get(), transform.get(), odims, idims,
 -                           out.strides(), in.strides(), transform.strides(), inverse,
 -                           perspective);
 -                break;
 -            case AF_INTERP_LOWER:
 -                transform_<T, AF_INTERP_LOWER>
 -                          (out.get(), in.get(), transform.get(), odims, idims,
 -                           out.strides(), in.strides(), transform.strides(), inverse,
 -                           perspective);
 -                break;
 -            default:
 -                AF_ERROR("Unsupported interpolation type", AF_ERR_ARG);
 -                break;
 -        }
 +    return out;
 +}
  
 -        return out;
 -    }
  
 +#define INSTANTIATE(T)                                                              \
 +template Array<T> transform(const Array<T> &in, const Array<float> &transform,      \
 +                            const af::dim4 &odims, const af_interp_type method,     \
-                             const bool inverse);
++                            const bool inverse, const bool perspective);
  
 -#define INSTANTIATE(T)                                                                  \
 -    template Array<T> transform(const Array<T> &in, const Array<float> &transform,      \
 -                                const af::dim4 &odims, const af_interp_type method,     \
 -                                const bool inverse, const bool perspective);
  
 +INSTANTIATE(float)
 +INSTANTIATE(double)
 +INSTANTIATE(cfloat)
 +INSTANTIATE(cdouble)
 +INSTANTIATE(int)
 +INSTANTIATE(uint)
 +INSTANTIATE(intl)
 +INSTANTIATE(uintl)
 +INSTANTIATE(uchar)
 +INSTANTIATE(char)
 +INSTANTIATE(short)
 +INSTANTIATE(ushort)
  
 -    INSTANTIATE(float)
 -    INSTANTIATE(double)
 -    INSTANTIATE(cfloat)
 -    INSTANTIATE(cdouble)
 -    INSTANTIATE(int)
 -    INSTANTIATE(uint)
 -    INSTANTIATE(intl)
 -    INSTANTIATE(uintl)
 -    INSTANTIATE(uchar)
 -    INSTANTIATE(char)
 -    INSTANTIATE(short)
 -    INSTANTIATE(ushort)
  }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git