[clfft] 13/128: Precallback - C2R 1D out-place updates
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:33 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit fec5292d5e6da3909b43bd06dc237a4d6d517c99
Author: Pradeep <pradeep.rao at amd.com>
Date: Mon Aug 10 16:15:23 2015 +0530
Precallback - C2R 1D out-place updates
---
src/client-callback/callback-client.cpp | 32 +++++++++++++++++++++-----------
src/library/generator.stockham.cpp | 30 ++++++++++++++++++++++++------
2 files changed, 45 insertions(+), 17 deletions(-)
diff --git a/src/client-callback/callback-client.cpp b/src/client-callback/callback-client.cpp
index 1614922..c660154 100644
--- a/src/client-callback/callback-client.cpp
+++ b/src/client-callback/callback-client.cpp
@@ -184,26 +184,36 @@ bool compare(T1 *refData, std::valarray< T2 > real, std::valarray< T2 > imag,
//Compare reference and opencl output
template < typename T1 , typename T2 >
bool compare(T1 *refData, std::valarray< T2 > real,
- size_t length, int batchsize, const float epsilon = 1e-6f)
+ size_t batch_size, size_t *o_strides, size_t *lengths, const float epsilon = 1e-6f)
{
float error = 0.0f;
T1 ref = 0.0;
T1 diff;
float normRef = 0.0f;
float normError = 0.0f;
- size_t scale = length;
-
+
//real compare
- for (int b = 0; b < batchsize; b++)
- {
- int idx = b * (length + 2);
- for(size_t i = idx; i < (idx + length); ++i)
+ for(size_t b = 0; b < batch_size; b++)
+ {
+ size_t p3 = b * o_strides[3];
+ for(size_t k = 0; k < lengths[2]; k++)
{
- diff = refData[i] - (real[i] * scale);
- error += (float)(diff * diff);
- ref += refData[i] * refData[i];
+ size_t p2 = p3 + k * o_strides[2];
+ for(size_t j = 0; j < lengths[1]; j++)
+ {
+ size_t p1 = p2 + j * o_strides[1];
+ for(size_t i = 0; i < lengths[0]; i++)
+ {
+ size_t p0 = p1 + i * o_strides[0];
+
+ diff = refData[p0] - (real[p0] * lengths[0]);
+ error += (float)(diff * diff);
+ ref += refData[p0] * refData[p0];
+ }
+ }
}
}
+
if (error != 0)
{
normRef =::sqrtf((float) ref);
@@ -1029,7 +1039,7 @@ void compareWithReference(clfftLayout in_layout, clfftLayout out_layout, size_t
refout = get_fftwf_output_c2r(lengths, strides, inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
in_layout, outfftVectorSizePadded, outfftVectorSize, dim, dir);
- if (!compare<float, T>(refout, real, outfftVectorSize, batch_size))
+ if (!compare<float, T>(refout, real, batch_size, o_strides, lengths))
checkflag = true;
/*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index df35141..9c7e734 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -2943,17 +2943,25 @@ namespace StockhamGenerator
if(outInterleaved)
{
- str += "__global "; str += r2Type; str += " * restrict gbOut)\n";
+ str += "__global "; str += r2Type; str += " * restrict gbOut";
}
else if(outReal)
{
- str += "__global "; str += rType; str += " * restrict gbOut)\n";
+ str += "__global "; str += rType; str += " * restrict gbOut";
}
else
{
str += "__global const "; str += rType; str += " * restrict gbOutRe, ";
- str += "__global const "; str += rType; str += " * restrict gbOutIm)\n";
+ str += "__global const "; str += rType; str += " * restrict gbOutIm";
+ }
+
+ //If plan has pre-callback
+ if (params.fft_hasPreCallback)
+ {
+ str += callbackstr;
}
+
+ str += ")\n";
}
else
{
@@ -3218,8 +3226,8 @@ namespace StockhamGenerator
{
if(inInterleaved || inReal)
{
- if(!rcSimple) { str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
- str += "lwbIn = gbIn + iOffset;\n\t";
+ if(!rcSimple && !params.fft_hasPreCallback) { str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
+ if(!params.fft_hasPreCallback) { str += "lwbIn = gbIn + iOffset;\n\t"; }
}
else
{
@@ -3432,7 +3440,17 @@ namespace StockhamGenerator
}
else
{
- if(inInterleaved || inReal) inBuf = (inInterleaved && params.fft_hasPreCallback) ? "gb, gb, " : "lwbIn, lwbIn2, ";
+ if(inInterleaved || inReal)
+ {
+ if (!params.fft_hasPreCallback)
+ {
+ inBuf = "lwbIn, lwbIn2, ";
+ }
+ else
+ {
+ inBuf = (params.fft_placeness == CLFFT_INPLACE) ? "gb, gb, " : "gbIn, gbIn, " ;
+ }
+ }
else inBuf = (params.fft_hasPreCallback) ? "gbInRe, gbInRe, gbInIm, gbInIm, " : "lwbInRe, lwbInRe2, lwbInIm, lwbInIm2, ";
if(outInterleaved || outReal) outBuf = "lwbOut, lwbOut2";
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list