[clfft] 25/128: Precallback - R2C large 1D and few more GTests
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:34 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit e10d1cdeb44ea353addcd960330dcce947504c06
Author: Pradeep <pradeep.rao at amd.com>
Date: Wed Aug 19 23:33:34 2015 +0530
Precallback - R2C large 1D and few more GTests
---
src/library/accessors.cpp | 22 +-
src/library/generator.copy.cpp | 16 +-
src/library/generator.stockham.cpp | 160 ++++++++-------
src/library/generator.transpose.gcn.cpp | 30 ++-
src/library/plan.cpp | 16 ++
src/tests/accuracy_test_common.h | 69 ++++++-
src/tests/accuracy_test_precallback.cpp | 345 ++++++++++++++++++++++++++++++--
src/tests/buffer.h | 59 ++++--
src/tests/cl_transform.h | 8 +-
src/tests/fftw_transform.h | 4 +-
src/tests/test_constants.h | 14 ++
11 files changed, 606 insertions(+), 137 deletions(-)
diff --git a/src/library/accessors.cpp b/src/library/accessors.cpp
index 808b202..32de44d 100644
--- a/src/library/accessors.cpp
+++ b/src/library/accessors.cpp
@@ -781,23 +781,17 @@ clfftStatus clFFTSetPlanCallback(clfftPlanHandle plHandle, const char* funcName,
if (callbackType == PRECALLBACK)
{
- if (fftPlan->inputLayout == CLFFT_COMPLEX_INTERLEAVED || fftPlan->inputLayout == CLFFT_COMPLEX_PLANAR
- || fftPlan->inputLayout == CLFFT_HERMITIAN_INTERLEAVED || fftPlan->inputLayout == CLFFT_HERMITIAN_PLANAR)
+ if (funcName != NULL && funcString != NULL)
{
- if (funcName != NULL && funcString != NULL)
- {
- fftPlan->hasPreCallback = true;
+ fftPlan->hasPreCallback = true;
- fftPlan->preCallback.funcname = funcName;
- fftPlan->preCallback.funcstring = funcString;
- fftPlan->preCallback.userdatastruct = userStructString;
- fftPlan->preCallback.localMemSize = (localMemSize > 0) ? localMemSize : 0;
+ fftPlan->preCallback.funcname = funcName;
+ fftPlan->preCallback.funcstring = funcString;
+ fftPlan->preCallback.userdatastruct = userStructString;
+ fftPlan->preCallback.localMemSize = (localMemSize > 0) ? localMemSize : 0;
- fftPlan->precallUserData = userdata;
- }
- }
- else
- return CLFFT_NOTIMPLEMENTED;
+ fftPlan->precallUserData = userdata;
+ }
}
return CLFFT_SUCCESS;
diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp
index 3315e0d..45e9728 100644
--- a/src/library/generator.copy.cpp
+++ b/src/library/generator.copy.cpp
@@ -252,11 +252,15 @@ namespace CopyGenerator
// Setup registers
str += "\t"; str += RegBaseType<PR>(2); str += " R;\n\n";
+ size_t NtRounded64 = DivRoundingUp<size_t>(Nt,64) * 64;
+
if(!general)
{
// Setup variables
- str += "\tuint batch, mel, mel2;\n\t";
- str += "batch = me/"; str += SztToStr(Nt); str += ";\n\t";
+ str += "\tuint batch, meg, mel, mel2;\n\t";
+ str += "batch = me/"; str += SztToStr(NtRounded64); str += ";\n\t";
+ str += "meg = me%"; str += SztToStr(NtRounded64); str += ";\n\t";
+
str += "mel = me%"; str += SztToStr(Nt); str += ";\n\t";
str += "mel2 = ("; str += SztToStr(N); str += " - mel)%"; str += SztToStr(N); str += ";\n\n";
}
@@ -346,6 +350,7 @@ namespace CopyGenerator
}
else
{
+ str += "if(meg < "; str += SztToStr(Nt); str += ")\n\t{\n\t";
if(c2h)
{
if(inIlvd)
@@ -384,7 +389,7 @@ namespace CopyGenerator
{
str += "lwbOut[0] = R;\n\t";
str += "R.y = -R.y;\n\t";
- str += "lwbOut2[0] = R;\n\n";
+ str += "lwbOut2[0] = R;\n\t";
}
else
{
@@ -392,9 +397,10 @@ namespace CopyGenerator
str += "lwbOutIm[0] = R.y;\n\t";
str += "R.y = -R.y;\n\t";
str += "lwbOutRe2[0] = R.x;\n\t";
- str += "lwbOutIm2[0] = R.y;\n\n";
+ str += "lwbOutIm2[0] = R.y;\n\t";
}
}
+ str += "}\n\n";
}
str += "}\n";
@@ -463,7 +469,7 @@ clfftStatus FFTGeneratedCopyAction::getWorkSizes (std::vector<size_t> & globalWS
}
else
{
- count *= (1 + this->signature.fft_N[0]/2);
+ count *= (DivRoundingUp<size_t>((1 + this->signature.fft_N[0]/2), 64) * 64);
}
}
break;
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index 03771e5..91e9e81 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -863,7 +863,7 @@ namespace StockhamGenerator
RegBaseAndCountAndPos("", i*radix + r, regIndex);
hid = (i * radix + r) / (numB * radix / 2);
- if (fft_doPreCallback && component == SR_COMP_REAL && hid != 0)
+ if (fft_doPreCallback && c2r && component == SR_COMP_REAL && hid != 0)
{
regIndexC = regIndex; regIndexC += ").y";
}
@@ -921,7 +921,7 @@ namespace StockhamGenerator
passStr += ");";
}
- if (fft_doPreCallback && component == SR_COMP_REAL && hid != 0)
+ if (fft_doPreCallback && c2r && component == SR_COMP_REAL && hid != 0)
{
passStr += "\n\t";
passStr += regIndexC; passStr += " = "; passStr += regIndexSub; passStr += ";";
@@ -932,10 +932,24 @@ namespace StockhamGenerator
passStr += " = ";
//Use the return value from precallback if set
- if (fft_doPreCallback && component == SR_COMP_BOTH)
+ if (fft_doPreCallback && (component == SR_COMP_BOTH || r2c))
{
- passStr += "retPrecallback["; passStr += SztToStr(v); passStr += "]";
- passStr += interleaved ? tail : (c == 0) ? ".x;" : ".y;";
+ if (component == SR_COMP_BOTH)
+ {
+ passStr += "retPrecallback["; passStr += SztToStr(v); passStr += "]";
+ passStr += interleaved ? tail : (c == 0) ? ".x;" : ".y;";
+ }
+ else if (r2c)
+ {
+ passStr += fft_preCallback.funcname; passStr += "("; passStr += buffer; passStr += ", ";
+ passStr += bufOffset; passStr += ", userdata";
+
+ if (fft_preCallback.localMemSize > 0)
+ {
+ passStr += ", localmem";
+ }
+ passStr += ");";
+ }
}
else
{
@@ -1229,7 +1243,7 @@ namespace StockhamGenerator
std::string buffer;
RegBaseAndCountAndPos("", r, regIndex);
- if (fft_doPreCallback)
+ if (fft_doPreCallback && c2r)
{
regIndex += ")";
if (interleaved)
@@ -1790,9 +1804,9 @@ namespace StockhamGenerator
}
//Include callback parameters if callback is set
- if (fft_doPreCallback && !r2c)
+ if (fft_doPreCallback )
{
- if (c2r)
+ if ((r2c && !rcSimple) || c2r)
{
passStr += ", uint inOffset2";
}
@@ -1860,7 +1874,14 @@ namespace StockhamGenerator
else
{
passStr += "\n\tif(rw > 1)\n\t{";
- SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, bufferInRe2, bufferInIm2, "inOffset", 1, numB1, 0, passStr);
+ if (fft_doPreCallback)
+ {
+ SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, bufferInRe2, bufferInIm2, "inOffset2", 1, numB1, 0, passStr);
+ }
+ else
+ {
+ SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, bufferInRe2, bufferInIm2, "inOffset", 1, numB1, 0, passStr);
+ }
passStr += "\n\t}\n";
passStr += "\telse\n\t{";
@@ -2494,7 +2515,7 @@ namespace StockhamGenerator
//Pass precallback information to Pass object if its the first pass.
//This will be used in single kernel transforms
- if (params.fft_hasPreCallback && !r2c && i == 0 && !params.blockCompute)
+ if (params.fft_hasPreCallback && i == 0 && !params.blockCompute)
{
passes[0].SetPrecallback(params.fft_hasPreCallback, params.fft_preCallback);
}
@@ -2539,7 +2560,7 @@ namespace StockhamGenerator
//Pass precallback information to Pass object if its the first pass.
//This will be used in single kernel transforms
- if (!r2c && pid == 0 && params.fft_hasPreCallback)
+ if (pid == 0 && params.fft_hasPreCallback)
{
passes[0].SetPrecallback(params.fft_hasPreCallback, params.fft_preCallback);
}
@@ -3055,25 +3076,27 @@ namespace StockhamGenerator
str += "uint oOffset2;\n\n\t";
}
- if(inInterleaved)
- {
- if(!rcSimple && !params.fft_hasPreCallback) { str += "__global "; str += r2Type; str += " *lwbIn2;\n\t"; }
- if(!params.fft_hasPreCallback) { str += "__global "; str += r2Type; str += " *lwbIn;\n\t"; }
- }
- else if(inReal)
- {
- if(!rcSimple) { str += "__global "; str += rType; str += " *lwbIn2;\n\t"; }
- str += "__global "; str += rType; str += " *lwbIn;\n\t";
+ if (!params.fft_hasPreCallback)
+ {
+ if(inInterleaved)
+ {
+ if(!rcSimple) { str += "__global "; str += r2Type; str += " *lwbIn2;\n\t"; }
+ str += "__global "; str += r2Type; str += " *lwbIn;\n\t";
+ }
+ else if(inReal)
+ {
+ if(!rcSimple) { str += "__global "; str += rType; str += " *lwbIn2;\n\t"; }
+ str += "__global "; str += rType; str += " *lwbIn;\n\t";
- }
- else
- {
- if(!rcSimple && !params.fft_hasPreCallback) { str += "__global "; str += rType; str += " *lwbInRe2;\n\t"; }
- if(!rcSimple && !params.fft_hasPreCallback) { str += "__global "; str += rType; str += " *lwbInIm2;\n\t"; }
- if (!params.fft_hasPreCallback)
- {
- str += "__global "; str += rType; str += " *lwbInRe;\n\t";
- str += "__global "; str += rType; str += " *lwbInIm;\n\t";
+ }
+ else
+ {
+ if(!rcSimple) { str += "__global "; str += rType; str += " *lwbInRe2;\n\t"; }
+ if(!rcSimple) { str += "__global "; str += rType; str += " *lwbInIm2;\n\t"; }
+
+ str += "__global "; str += rType; str += " *lwbInRe;\n\t";
+ str += "__global "; str += rType; str += " *lwbInIm;\n\t";
+
}
}
@@ -3225,16 +3248,19 @@ namespace StockhamGenerator
str += "\n\t";
if(params.fft_placeness == CLFFT_INPLACE)
{
- if(inInterleaved)
- {
- if(!rcSimple && !params.fft_hasPreCallback) { str += "lwbIn2 = (__global "; str += r2Type; str += " *)gb + iOffset2;\n\t"; }
- if(!params.fft_hasPreCallback) { str += "lwbIn = (__global "; str += r2Type; str += " *)gb + iOffset;\n\t"; }
- }
- else
+ if(!params.fft_hasPreCallback)
{
- if(!rcSimple) { str += "lwbIn2 = (__global "; str += rType; str += " *)gb + iOffset2;\n\t"; }
- str += "lwbIn = (__global "; str += rType; str += " *)gb + iOffset;\n\t";
+ if(inInterleaved)
+ {
+ if(!rcSimple) { str += "lwbIn2 = (__global "; str += r2Type; str += " *)gb + iOffset2;\n\t"; }
+ str += "lwbIn = (__global "; str += r2Type; str += " *)gb + iOffset;\n\t";
+ }
+ else
+ {
+ if(!rcSimple) { str += "lwbIn2 = (__global "; str += rType; str += " *)gb + iOffset2;\n\t"; }
+ str += "lwbIn = (__global "; str += rType; str += " *)gb + iOffset;\n\t";
+ }
}
if(!rcSimple) { str += "lwbOut2 = gb + oOffset2;\n\t"; }
@@ -3243,17 +3269,14 @@ namespace StockhamGenerator
}
else
{
- if(inInterleaved || inReal)
+ if (!params.fft_hasPreCallback)
{
- if (!params.fft_hasPreCallback)
+ if(inInterleaved || inReal)
{
if(!rcSimple) { str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
str += "lwbIn = gbIn + iOffset;\n\t";
}
- }
- else
- {
- if (!params.fft_hasPreCallback)
+ else
{
if(!rcSimple) { str += "lwbInRe2 = gbInRe + iOffset2;\n\t"; }
if(!rcSimple) { str += "lwbInIm2 = gbInIm + iOffset2;\n\t"; }
@@ -3343,18 +3366,15 @@ namespace StockhamGenerator
}
std::string inOffset;
- if (!r2c)
+ if (params.fft_placeness == CLFFT_INPLACE && !r2c2r)
{
- if (params.fft_placeness == CLFFT_INPLACE && !c2r)
- {
- inOffset += "ioOffset";
- }
- else
- {
- inOffset += "iOffset";
- }
+ inOffset += "ioOffset";
}
-
+ else
+ {
+ inOffset += "iOffset";
+ }
+
// Read data into LDS for blocked access
if(blockCompute)
{
@@ -3457,7 +3477,7 @@ namespace StockhamGenerator
{
if(rcSimple)
{
- if(inInterleaved || inReal) inBuf = "lwbIn, ";
+ if(inInterleaved || inReal) inBuf = params.fft_hasPreCallback ? "gbIn, " : "lwbIn, ";
else inBuf = "lwbInRe, lwbInIm, ";
if(outInterleaved || outReal) outBuf = "lwbOut";
else outBuf = "lwbOutRe, lwbOutIm";
@@ -3474,8 +3494,8 @@ namespace StockhamGenerator
{
if (params.fft_placeness == CLFFT_INPLACE)
{
- inBuf = "(__global "; inBuf += r2Type; inBuf += "*) gb, ";
- inBuf += "(__global "; inBuf += r2Type; inBuf += "*) gb, ";
+ inBuf = "(__global "; inBuf += r2c ? rType : r2Type; inBuf += "*) gb, ";
+ inBuf += "(__global "; inBuf += r2c ? rType : r2Type; inBuf += "*) gb, ";
}
else
{
@@ -3533,22 +3553,17 @@ namespace StockhamGenerator
str += "\t";
str += PassName(0, fwd);
str += "("; str += rw; str += me;
- if (r2c)
- {
- str += "0";
- }
- else
- {
- str += (params.fft_hasPreCallback || !c2r) ? inOffset : "0";
- }
+
+ str += (params.fft_hasPreCallback || !r2c2r) ? inOffset : "0";
+
str += ", 0, ";
str += inBuf; str += outBuf;
str += IterRegs("&");
//if precalback set
- if (!r2c && params.fft_hasPreCallback)
+ if (params.fft_hasPreCallback)
{
- str += c2r ? ", iOffset2, userdata" : ", userdata";
+ str += (r2c2r && !rcSimple) ? ", iOffset2, userdata" : ", userdata";
if (params.fft_preCallback.localMemSize > 0)
{
@@ -3604,14 +3619,7 @@ namespace StockhamGenerator
}
else
{
- if (r2c)
- {
- str += "0";
- }
- else
- {
- str += (params.fft_hasPreCallback || !c2r) ? inOffset : "0";
- }
+ str += (params.fft_hasPreCallback || !r2c2r) ? inOffset : "0";
}
str += ", ";
str += ldsOff;
@@ -3620,9 +3628,9 @@ namespace StockhamGenerator
str += ldsArgs; str += IterRegs("&");
//if precalback set, append additional arguments
- if (!r2c && !blockCompute && params.fft_hasPreCallback)
+ if (!blockCompute && params.fft_hasPreCallback)
{
- str += c2r ? ", iOffset2, userdata" : ", userdata";
+ str += (r2c2r && !rcSimple) ? ", iOffset2, userdata" : ", userdata";
if (params.fft_preCallback.localMemSize > 0)
{
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 7740592..41bf2d2 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -513,7 +513,11 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
case CLFFT_HERMITIAN_PLANAR:
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
case CLFFT_REAL:
- clKernWrite( transKernel, 3 ) << "global " << dtInput << "* tileIn = " << pmRealIn << " + iOffset;" << std::endl;
+ //No need of tileIn declaration when precallback is set as the global buffer is used directly
+ if (!params.fft_hasPreCallback)
+ {
+ clKernWrite( transKernel, 3 ) << "global " << dtInput << "* tileIn = " << pmRealIn << " + iOffset;" << std::endl;
+ }
break;
}
@@ -558,7 +562,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
size_t wIndexYEnd = params.transOutHorizontal ? params.fft_N[0] % blockSize.x : params.fft_N[1] % blockSize.y;
//If precallback is set
- if (params.fft_hasPreCallback)
+ if (params.fft_hasPreCallback && params.fft_inputLayout == CLFFT_COMPLEX_PLANAR)
{
clKernWrite( transKernel, 3 ) << dtComplex << " retCallback;" << std::endl;
}
@@ -675,13 +679,12 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
{
if (params.fft_preCallback.localMemSize > 0)
{
- clKernWrite( transKernel, 9 ) << "retCallback = " << params.fft_preCallback.funcname << "(" << pmComplexIn << ", iOffset + gInd, userdata, localmem);" << std::endl;
+ clKernWrite( transKernel, 9 ) << "tmp = " << params.fft_preCallback.funcname << "(" << pmComplexIn << ", iOffset + gInd, userdata, localmem);" << std::endl;
}
else
{
- clKernWrite( transKernel, 9 ) << "retCallback = " << params.fft_preCallback.funcname << "(" << pmComplexIn << ", iOffset + gInd, userdata);" << std::endl;
+ clKernWrite( transKernel, 9 ) << "tmp = " << params.fft_preCallback.funcname << "(" << pmComplexIn << ", iOffset + gInd, userdata);" << std::endl;
}
- clKernWrite( transKernel, 9 ) << "tmp = retCallback;" << std::endl;
}
else
{
@@ -715,9 +718,22 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
case CLFFT_HERMITIAN_PLANAR:
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
case CLFFT_REAL:
- clKernWrite( transKernel, 9 ) << "tmp = tileIn[ gInd ];" << std::endl;
+ if (params.fft_hasPreCallback)
+ {
+ if (params.fft_preCallback.localMemSize > 0)
+ {
+ clKernWrite( transKernel, 9 ) << "tmp = " << params.fft_preCallback.funcname << "(" << pmRealIn << ", iOffset + gInd, userdata, localmem);" << std::endl;
+ }
+ else
+ {
+ clKernWrite( transKernel, 9 ) << "tmp = " << params.fft_preCallback.funcname << "(" << pmRealIn << ", iOffset + gInd, userdata);" << std::endl;
+ }
+ }
+ else
+ {
+ clKernWrite( transKernel, 9 ) << "tmp = tileIn[ gInd ];" << std::endl;
+ }
break;
-
}
if(branchingInAny)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 26c7af6..5642465 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -873,6 +873,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans1Plan->gen = Transpose_GCN;
trans1Plan->transflag = true;
+ //Set callback data if set on top level plan
+ if (fftPlan->hasPreCallback)
+ {
+ trans1Plan->hasPreCallback = true;
+ trans1Plan->preCallback = fftPlan->preCallback;
+ trans1Plan->precallUserData = fftPlan->precallUserData;
+ }
+
OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan large1d trans1 plan failed" ) );
@@ -1099,6 +1107,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
colTPlan->oDist *= fftPlan->length[index];
}
+ //Set callback data if set on top level plan
+ if (fftPlan->hasPreCallback)
+ {
+ colTPlan->hasPreCallback = true;
+ colTPlan->preCallback = fftPlan->preCallback;
+ colTPlan->precallUserData = fftPlan->precallUserData;
+ }
+
OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d first column plan failed" ) );
//another column FFT, size clLengths[0], batch clLengths[1], output without transpose
diff --git a/src/tests/accuracy_test_common.h b/src/tests/accuracy_test_common.h
index 159cde4..7807701 100644
--- a/src/tests/accuracy_test_common.h
+++ b/src/tests/accuracy_test_common.h
@@ -172,13 +172,13 @@ void precallback_complex_to_complex( data_pattern pattern, direction::direction_
//set precallback values
if (hasUserDatatype)
{
- test_fft.set_precallback_complex_userdatatype();
+ test_fft.set_input_precallback_userdatatype();
}
else
{
- test_fft.set_precallback_complex();
+ test_fft.set_input_precallback();
}
- reference.set_precallback_complex();
+ reference.set_input_precallback();
if( direction == direction::forward )
{
@@ -269,6 +269,69 @@ void real_to_complex( data_pattern pattern,
/*****************************************************/
// dimension is inferred from lengths.size()
// tightly packed is inferred from strides.empty()
+// input layout is always real
+template< class T, class cl_T, class fftw_T >
+void precallback_real_to_complex( data_pattern pattern,
+ std::vector<size_t> lengths, size_t batch,
+ std::vector<size_t> input_strides, std::vector<size_t> output_strides,
+ size_t input_distance, size_t output_distance,
+ layout::buffer_layout_t out_layout,
+ placeness::placeness_t placeness,
+ T scale = 1.0f )
+{
+ clfft<T, cl_T> test_fft( static_cast<clfftDim>(lengths.size()), &lengths[0],
+ input_strides.empty() ? NULL : &input_strides[0],
+ output_strides.empty() ? NULL : &output_strides[0],
+ batch, input_distance, output_distance,
+ cl_layout(layout::real), cl_layout(out_layout),
+ cl_placeness(placeness) );
+
+ fftw<T, fftw_T> reference( lengths.size(), &lengths[0], batch, r2c );
+
+ if( pattern == sawtooth )
+ {
+ test_fft.set_input_to_sawtooth( 1.0f );
+ reference.set_data_to_sawtooth( 1.0f );
+ }
+ else if( pattern == value )
+ {
+ test_fft.set_input_to_value( 2.0f );
+ reference.set_all_data_to_value( 2.0f );
+ }
+ else if( pattern == impulse )
+ {
+ test_fft.set_input_to_impulse();
+ reference.set_data_to_impulse();
+ }
+ else if( pattern == erratic )
+ {
+ test_fft.set_input_to_random();
+ reference.set_data_to_random();
+ }
+ else
+ {
+ throw std::runtime_error( "invalid pattern type in real_to_complex()" );
+ }
+
+ // if we're starting with unequal data, we're destined for failure
+ EXPECT_EQ( true, test_fft.input_buffer() == reference.input_buffer() );
+
+ test_fft.set_input_precallback();
+ reference.set_input_precallback();
+
+ test_fft.forward_scale( scale );
+ reference.forward_scale( scale );
+
+ test_fft.transform();
+ reference.transform();
+
+ EXPECT_EQ( true, test_fft.result() == reference.result() );
+}
+
+/*****************************************************/
+/*****************************************************/
+// dimension is inferred from lengths.size()
+// tightly packed is inferred from strides.empty()
// output layout is always real
template< class T, class cl_T, class fftw_T >
void complex_to_real( data_pattern pattern,
diff --git a/src/tests/accuracy_test_precallback.cpp b/src/tests/accuracy_test_precallback.cpp
index a617017..6a392a4 100644
--- a/src/tests/accuracy_test_precallback.cpp
+++ b/src/tests/accuracy_test_precallback.cpp
@@ -50,7 +50,7 @@ namespace precallback
// *****************************************************
// *****************************************************
template< class T, class cl_T, class fftw_T >
-void precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+void pow2_normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
{
std::vector<size_t> lengths;
lengths.push_back( normal2 );
@@ -68,22 +68,22 @@ void precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved(
precallback_complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
}
-TEST_F(accuracy_test_precallback_single, precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
{
- try { precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+ try { pow2_normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
catch( const std::exception& err ) { handle_exception(err); }
}
-TEST_F(accuracy_test_precallback_double, precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
{
- try { precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+ try { pow2_normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
catch( const std::exception& err ) { handle_exception(err); }
}
// *****************************************************
// *****************************************************
template< class T, class cl_T, class fftw_T >
-void precallback_1D_forward_in_place_complex_planar_to_complex_planar()
+void pow2_normal_1D_forward_in_place_complex_planar_to_complex_planar()
{
std::vector<size_t> lengths;
lengths.push_back( normal2 );
@@ -101,22 +101,22 @@ void precallback_1D_forward_in_place_complex_planar_to_complex_planar()
precallback_complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
}
-TEST_F(accuracy_test_precallback_single, precallback_1D_forward_in_place_complex_planar_to_complex_planar)
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_forward_in_place_complex_planar_to_complex_planar)
{
- try { precallback_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+ try { pow2_normal_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
catch( const std::exception& err ) { handle_exception(err); }
}
-TEST_F(accuracy_test_precallback_double, precallback_1D_forward_in_place_complex_planar_to_complex_planar)
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_forward_in_place_complex_planar_to_complex_planar)
{
- try { precallback_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+ try { pow2_normal_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
catch( const std::exception& err ) { handle_exception(err); }
}
// *****************************************************
// *****************************************************
template< class T, class cl_T, class fftw_T >
-void precallback_1D_forward_in_place_complex_to_complex_userdatatype()
+void pow2_normal_1D_forward_in_place_complex_to_complex_userdatatype()
{
std::vector<size_t> lengths;
lengths.push_back( normal2 );
@@ -134,9 +134,328 @@ void precallback_1D_forward_in_place_complex_to_complex_userdatatype()
precallback_complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, true );
}
-TEST_F(accuracy_test_precallback_single, precallback_1D_forward_in_place_complex_to_complex_userdatatype)
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_forward_in_place_complex_to_complex_userdatatype)
{
- try { precallback_1D_forward_in_place_complex_to_complex_userdatatype< float, cl_float, fftwf_complex >(); }
+ try { pow2_normal_1D_forward_in_place_complex_to_complex_userdatatype< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( normal2 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t out_layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+ try { pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+ try { pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_small_1D_forward_in_place_real_to_hermitian_interleaved()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( small2 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t out_layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_small_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+ try { pow2_small_1D_forward_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_small_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+ try { pow2_small_1D_forward_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_large_1D_forward_in_place_real_to_hermitian_interleaved()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( large2 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t out_layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_large_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+ try { pow2_large_1D_forward_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_large_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+ try { pow2_large_1D_forward_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_large_1D_4M_in_place_real_to_hermitian_interleaved()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( 4194304 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_large_1D_4M_in_place_real_to_hermitian_interleaved)
+{
+ try { pow2_large_1D_4M_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_large_1D_4M_in_place_real_to_hermitian_interleaved)
+{
+ try { pow2_large_1D_4M_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_array_real_to_hermitian()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( normal2 );
+ size_t batch = 8;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_array_real_to_hermitian)
+{
+ try { pow2_normal_1D_array_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_array_real_to_hermitian)
+{
+ try { pow2_normal_1D_array_real_to_hermitian< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_array_real_to_hermitian_with_odd_batch_size()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( normal2 );
+ size_t batch = 5;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_array_real_to_hermitian_with_odd_batch_size)
+{
+ try { pow2_normal_1D_array_real_to_hermitian_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_array_real_to_hermitian_with_odd_batch_size)
+{
+ try { pow2_normal_1D_array_real_to_hermitian_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_out_of_place_real_to_hermitian_interleaved()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( normal2 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::out_of_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_out_of_place_real_to_hermitian_interleaved)
+{
+ try { pow2_normal_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_out_of_place_real_to_hermitian_interleaved)
+{
+ try { pow2_normal_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_out_of_place_real_to_hermitian_planar()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( normal2 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_planar;
+ placeness::placeness_t placeness = placeness::out_of_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_out_of_place_real_to_hermitian_planar)
+{
+ try { pow2_normal_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_out_of_place_real_to_hermitian_planar)
+{
+ try { pow2_normal_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow3_normal_1D_in_place_real_to_hermitian_interleaved()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( normal3 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow3_normal_1D_in_place_real_to_hermitian_interleaved)
+{
+ try { pow3_normal_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow3_normal_1D_in_place_real_to_hermitian_interleaved)
+{
+ try { pow3_normal_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow3_small_1D_in_place_real_to_hermitian_interleaved()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( small3 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow3_small_1D_in_place_real_to_hermitian_interleaved)
+{
+ try { pow3_small_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow3_small_1D_in_place_real_to_hermitian_interleaved)
+{
+ try { pow3_small_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow3_large_1D_in_place_real_to_hermitian_interleaved()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( large3 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow3_large_1D_in_place_real_to_hermitian_interleaved)
+{
+ try { pow3_large_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow3_large_1D_in_place_real_to_hermitian_interleaved)
+{
+ try { pow3_large_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
catch( const std::exception& err ) { handle_exception(err); }
}
diff --git a/src/tests/buffer.h b/src/tests/buffer.h
index 50fc5a5..c9664fc 100644
--- a/src/tests/buffer.h
+++ b/src/tests/buffer.h
@@ -600,25 +600,54 @@ public:
void operator*=( buffer<T> & other_buffer )
{
+ size_t the_index;
+ T* base_ptr;
+ T* real_ptr;
+ T* imag_ptr;
+
if( is_interleaved() )
{
- T* base_ptr = _the_buffers[interleaved].ptr();
+ base_ptr = _the_buffers[interleaved].ptr();
+ }
+ else if ( is_planar() )
+ {
+ real_ptr = _the_buffers[re].ptr();
+ imag_ptr = _the_buffers[im].ptr();
+ }
+ else if ( is_real() )
+ {
+ base_ptr = _the_buffers[re].ptr();
+ }
- for( size_t batch = 0; batch < batch_size(); batch++ )
- for( size_t z = 0; z < length(dimz); z++ )
- for( size_t y = 0; y < length(dimy); y++ )
- for( size_t x = 0; x < length(dimx); x++ )
- {
- size_t real_index = index(x, y, z, batch);
- size_t imag_index = real_index + 1; // the imaginary component immediately follows the real
-
- *( base_ptr + real_index ) *= other_buffer.real(x, y, z, batch);
- if (!(this->is_real() || other_buffer.is_real()))
- {
- *( base_ptr + imag_index ) *= other_buffer.imag(x, y, z, batch);
- }
+ for( size_t batch = 0; batch < batch_size(); batch++ )
+ for( size_t z = 0; z < length(dimz); z++ )
+ for( size_t y = 0; y < length(dimy); y++ )
+ for( size_t x = 0; x < length(dimx); x++ )
+ {
+ the_index = index(x, y, z, batch);
+ if( is_interleaved() )
+ {
+ *( base_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
+
+ if (!other_buffer.is_real())
+ {
+ the_index = the_index + 1; // the imaginary component immediately follows the real
+ *( base_ptr + the_index ) *= other_buffer.imag(x, y, z, batch);
}
- }
+ }
+ else if ( is_planar() )
+ {
+ *( real_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
+ if (!other_buffer.is_real())
+ {
+ *( imag_ptr + the_index ) *= other_buffer.imag(x, y, z, batch);
+ }
+ }
+ else if ( is_real() )
+ {
+ *( base_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
+ }
+ }
}
/*****************************************************/
diff --git a/src/tests/cl_transform.h b/src/tests/cl_transform.h
index 2edf28d..3a33bd3 100644
--- a/src/tests/cl_transform.h
+++ b/src/tests/cl_transform.h
@@ -618,7 +618,7 @@ public:
}
/*****************************************************/
- void set_precallback_complex() {
+ void set_input_precallback() {
cl_int status = 0;
clfftPrecision precision;
clfftGetPlanPrecision( *plan_handle, &precision );
@@ -633,6 +633,10 @@ public:
{
precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_PLANAR) : STRINGIFY(MULVAL_PLANAR_DP);
}
+ else if (input.is_real())
+ {
+ precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_REAL) : STRINGIFY(MULVAL_REAL_DP);
+ }
//precallback user data
buffer<T> userdata( static_cast<size_t>(dimension),
@@ -657,7 +661,7 @@ public:
}
/*****************************************************/
- void set_precallback_complex_userdatatype() {
+ void set_input_precallback_userdatatype() {
cl_int status = 0;
char* precallbackstr = STRINGIFY(MULVAL_UDT);
diff --git a/src/tests/fftw_transform.h b/src/tests/fftw_transform.h
index 9936b21..7970207 100644
--- a/src/tests/fftw_transform.h
+++ b/src/tests/fftw_transform.h
@@ -440,7 +440,7 @@ public:
input = other_buffer;
}
- void set_precallback_complex()
+ void set_input_precallback()
{
//precallback user data
buffer<T> userdata( input.number_of_dimensions(),
@@ -453,7 +453,7 @@ public:
);
userdata.set_all_to_random_data(_lengths[0], 10);
-
+
input *= userdata;
}
diff --git a/src/tests/test_constants.h b/src/tests/test_constants.h
index cc39769..56f14d4 100644
--- a/src/tests/test_constants.h
+++ b/src/tests/test_constants.h
@@ -63,6 +63,20 @@
return ret; \n \
}
+#define MULVAL_REAL float mulval(__global void* in, uint offset, __global void* userdata)\n \
+ { \n \
+ float scalar = *((__global float*)userdata + offset); \n \
+ float ret = *((__global float*)in + offset) * scalar; \n \
+ return ret; \n \
+ }
+
+#define MULVAL_REAL_DP double mulval(__global void* in, uint offset, __global void* userdata)\n \
+ { \n \
+ double scalar = *((__global double*)userdata + offset); \n \
+ double ret = *((__global double*)in + offset) * scalar; \n \
+ return ret; \n \
+ }
+
#define STRUCT_USERDATA typedef struct USER_DATA \
{ \
float scalar1; \
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list