[clfft] 25/128: Precallback - R2C large 1D and few more GTests

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:34 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit e10d1cdeb44ea353addcd960330dcce947504c06
Author: Pradeep <pradeep.rao at amd.com>
Date:   Wed Aug 19 23:33:34 2015 +0530

    Precallback - R2C large 1D and few more GTests
---
 src/library/accessors.cpp               |  22 +-
 src/library/generator.copy.cpp          |  16 +-
 src/library/generator.stockham.cpp      | 160 ++++++++-------
 src/library/generator.transpose.gcn.cpp |  30 ++-
 src/library/plan.cpp                    |  16 ++
 src/tests/accuracy_test_common.h        |  69 ++++++-
 src/tests/accuracy_test_precallback.cpp | 345 ++++++++++++++++++++++++++++++--
 src/tests/buffer.h                      |  59 ++++--
 src/tests/cl_transform.h                |   8 +-
 src/tests/fftw_transform.h              |   4 +-
 src/tests/test_constants.h              |  14 ++
 11 files changed, 606 insertions(+), 137 deletions(-)

diff --git a/src/library/accessors.cpp b/src/library/accessors.cpp
index 808b202..32de44d 100644
--- a/src/library/accessors.cpp
+++ b/src/library/accessors.cpp
@@ -781,23 +781,17 @@ clfftStatus clFFTSetPlanCallback(clfftPlanHandle plHandle, const char* funcName,
 
 	if (callbackType == PRECALLBACK)
 	{
-		if (fftPlan->inputLayout == CLFFT_COMPLEX_INTERLEAVED || fftPlan->inputLayout == CLFFT_COMPLEX_PLANAR 
-			|| fftPlan->inputLayout == CLFFT_HERMITIAN_INTERLEAVED || fftPlan->inputLayout == CLFFT_HERMITIAN_PLANAR)
+		if (funcName != NULL && funcString != NULL)
 		{
-			if (funcName != NULL && funcString != NULL)
-			{
-				fftPlan->hasPreCallback = true;
+			fftPlan->hasPreCallback = true;
 
-				fftPlan->preCallback.funcname = funcName;
-				fftPlan->preCallback.funcstring = funcString;
-				fftPlan->preCallback.userdatastruct = userStructString;
-				fftPlan->preCallback.localMemSize = (localMemSize > 0) ? localMemSize : 0;
+			fftPlan->preCallback.funcname = funcName;
+			fftPlan->preCallback.funcstring = funcString;
+			fftPlan->preCallback.userdatastruct = userStructString;
+			fftPlan->preCallback.localMemSize = (localMemSize > 0) ? localMemSize : 0;
 
-				fftPlan->precallUserData = userdata;
-			}
-		}
-		else
-			return CLFFT_NOTIMPLEMENTED;
+			fftPlan->precallUserData = userdata;
+		}		
 	}
 
 	return	CLFFT_SUCCESS;
diff --git a/src/library/generator.copy.cpp b/src/library/generator.copy.cpp
index 3315e0d..45e9728 100644
--- a/src/library/generator.copy.cpp
+++ b/src/library/generator.copy.cpp
@@ -252,11 +252,15 @@ namespace CopyGenerator
 			// Setup registers
 			str += "\t"; str += RegBaseType<PR>(2); str += " R;\n\n";
 
+			size_t NtRounded64 = DivRoundingUp<size_t>(Nt,64) * 64;
+
 			if(!general)
 			{
 				// Setup variables
-				str += "\tuint batch, mel, mel2;\n\t";
-				str += "batch = me/"; str += SztToStr(Nt); str += ";\n\t";
+				str += "\tuint batch, meg, mel, mel2;\n\t";
+				str += "batch = me/"; str += SztToStr(NtRounded64); str += ";\n\t";
+				str += "meg = me%"; str += SztToStr(NtRounded64); str += ";\n\t";
+
 				str += "mel = me%"; str += SztToStr(Nt); str += ";\n\t";
 				str += "mel2 = ("; str += SztToStr(N); str += " - mel)%"; str += SztToStr(N); str += ";\n\n";
 			}
@@ -346,6 +350,7 @@ namespace CopyGenerator
 			}
 			else
 			{
+				str += "if(meg < "; str += SztToStr(Nt); str += ")\n\t{\n\t";
 				if(c2h)
 				{
 					if(inIlvd)
@@ -384,7 +389,7 @@ namespace CopyGenerator
 					{
 						str += "lwbOut[0] = R;\n\t";
 						str += "R.y = -R.y;\n\t";
-						str += "lwbOut2[0] = R;\n\n";
+						str += "lwbOut2[0] = R;\n\t";
 					}
 					else
 					{
@@ -392,9 +397,10 @@ namespace CopyGenerator
 						str += "lwbOutIm[0] = R.y;\n\t";
 						str += "R.y = -R.y;\n\t";
 						str += "lwbOutRe2[0] = R.x;\n\t";
-						str += "lwbOutIm2[0] = R.y;\n\n";
+						str += "lwbOutIm2[0] = R.y;\n\t";
 					}
 				}
+				str += "}\n\n";
 			}
 
 			str += "}\n";
@@ -463,7 +469,7 @@ clfftStatus FFTGeneratedCopyAction::getWorkSizes (std::vector<size_t> & globalWS
 				}
 				else
 				{
-					count *= (1 + this->signature.fft_N[0]/2); 
+					count *= (DivRoundingUp<size_t>((1 + this->signature.fft_N[0]/2), 64) * 64);
 				}
 			}
 			break;
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index 03771e5..91e9e81 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -863,7 +863,7 @@ namespace StockhamGenerator
 										RegBaseAndCountAndPos("", i*radix + r, regIndex); 
 									
 										hid = (i * radix + r) / (numB * radix / 2);
-										if (fft_doPreCallback && component == SR_COMP_REAL && hid != 0)
+										if (fft_doPreCallback && c2r && component == SR_COMP_REAL && hid != 0)
 										{
 											regIndexC = regIndex; regIndexC += ").y";
 										}
@@ -921,7 +921,7 @@ namespace StockhamGenerator
 									passStr += ");";
 								}
 
-								if (fft_doPreCallback && component == SR_COMP_REAL && hid != 0)
+								if (fft_doPreCallback && c2r && component == SR_COMP_REAL && hid != 0)
 								{
 									passStr += "\n\t";
 									passStr += regIndexC; passStr += " = "; passStr += regIndexSub; passStr += ";";
@@ -932,10 +932,24 @@ namespace StockhamGenerator
 								passStr += " = "; 
 
 								//Use the return value from precallback if set
-								if (fft_doPreCallback && component == SR_COMP_BOTH)
+								if (fft_doPreCallback && (component == SR_COMP_BOTH || r2c))
 								{
-									passStr += "retPrecallback["; passStr += SztToStr(v); passStr += "]"; 
-									passStr += interleaved ? tail : (c == 0) ? ".x;" : ".y;";
+									if (component == SR_COMP_BOTH)
+									{
+										passStr += "retPrecallback["; passStr += SztToStr(v); passStr += "]"; 
+										passStr += interleaved ? tail : (c == 0) ? ".x;" : ".y;";
+									}
+									else if (r2c)
+									{
+										passStr += fft_preCallback.funcname; passStr += "("; passStr += buffer; passStr += ", ";
+										passStr += bufOffset; passStr += ", userdata";
+
+										if (fft_preCallback.localMemSize > 0)
+										{
+											passStr += ", localmem";
+										}
+										passStr += ");";
+									}
 								}
 								else
 								{
@@ -1229,7 +1243,7 @@ namespace StockhamGenerator
 						std::string buffer;
 
 						RegBaseAndCountAndPos("", r, regIndex); 
-						if (fft_doPreCallback)
+						if (fft_doPreCallback && c2r)
 						{
 							 regIndex += ")";
 							 if (interleaved)
@@ -1790,9 +1804,9 @@ namespace StockhamGenerator
 			}
 
 			//Include callback parameters if callback is set
-			if (fft_doPreCallback && !r2c)
+			if (fft_doPreCallback )
 			{
-				if (c2r)
+				if ((r2c && !rcSimple) || c2r)
 				{
 					passStr += ", uint inOffset2";
 				}
@@ -1860,7 +1874,14 @@ namespace StockhamGenerator
 					else
 					{
 						passStr += "\n\tif(rw > 1)\n\t{";
-						SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, bufferInRe2, bufferInIm2, "inOffset", 1, numB1, 0, passStr);
+						if (fft_doPreCallback)
+						{
+							SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, bufferInRe2, bufferInIm2, "inOffset2", 1, numB1, 0, passStr);
+						}
+						else
+						{
+							SweepRegs(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, bufferInRe2, bufferInIm2, "inOffset", 1, numB1, 0, passStr);
+						}
 						passStr += "\n\t}\n";
 
 						passStr += "\telse\n\t{";
@@ -2494,7 +2515,7 @@ namespace StockhamGenerator
 					
 					//Pass precallback information to Pass object if its the first pass. 
 					//This will be used in single kernel transforms
-					if (params.fft_hasPreCallback && !r2c && i == 0 && !params.blockCompute)
+					if (params.fft_hasPreCallback && i == 0 && !params.blockCompute)
 					{
 						passes[0].SetPrecallback(params.fft_hasPreCallback, params.fft_preCallback);
 					}
@@ -2539,7 +2560,7 @@ namespace StockhamGenerator
 
 					//Pass precallback information to Pass object if its the first pass. 
 					//This will be used in single kernel transforms
-					if (!r2c && pid == 0 && params.fft_hasPreCallback)
+					if (pid == 0 && params.fft_hasPreCallback)
 					{
 						passes[0].SetPrecallback(params.fft_hasPreCallback, params.fft_preCallback);
 					}
@@ -3055,25 +3076,27 @@ namespace StockhamGenerator
 						str += "uint oOffset2;\n\n\t";
 					}
 
-					if(inInterleaved)
-					{
-						if(!rcSimple && !params.fft_hasPreCallback)	{	str += "__global "; str += r2Type; str += " *lwbIn2;\n\t"; }
-						if(!params.fft_hasPreCallback)	{   str += "__global "; str += r2Type; str += " *lwbIn;\n\t";  }
-					}
-					else if(inReal)
-					{
-						if(!rcSimple)	{	str += "__global "; str += rType; str += " *lwbIn2;\n\t"; }
-											str += "__global "; str += rType; str += " *lwbIn;\n\t";
+					if (!params.fft_hasPreCallback)	
+					{ 
+						if(inInterleaved)
+						{
+							if(!rcSimple)	{	str += "__global "; str += r2Type; str += " *lwbIn2;\n\t"; }
+												str += "__global "; str += r2Type; str += " *lwbIn;\n\t";  
+						}
+						else if(inReal)
+						{
+							if(!rcSimple)	{	str += "__global "; str += rType; str += " *lwbIn2;\n\t"; }
+												str += "__global "; str += rType; str += " *lwbIn;\n\t";
 
-					}
-					else
-					{
-						if(!rcSimple && !params.fft_hasPreCallback)	{	str += "__global "; str += rType; str += " *lwbInRe2;\n\t"; }
-						if(!rcSimple && !params.fft_hasPreCallback)	{	str += "__global "; str += rType; str += " *lwbInIm2;\n\t"; }
-						if (!params.fft_hasPreCallback)	
-						{  
-							str += "__global "; str += rType; str += " *lwbInRe;\n\t"; 
-							str += "__global "; str += rType; str += " *lwbInIm;\n\t"; 
+						}
+						else
+						{
+							if(!rcSimple)	{	str += "__global "; str += rType; str += " *lwbInRe2;\n\t"; }
+							if(!rcSimple)	{	str += "__global "; str += rType; str += " *lwbInIm2;\n\t"; }
+							  
+												str += "__global "; str += rType; str += " *lwbInRe;\n\t"; 
+												str += "__global "; str += rType; str += " *lwbInIm;\n\t"; 
+							
 						}
 					}
 
@@ -3225,16 +3248,19 @@ namespace StockhamGenerator
 					str += "\n\t";
 					if(params.fft_placeness == CLFFT_INPLACE)
 					{
-						if(inInterleaved)
-						{
-							if(!rcSimple && !params.fft_hasPreCallback) {	str += "lwbIn2 = (__global "; str += r2Type; str += " *)gb + iOffset2;\n\t"; }
-							if(!params.fft_hasPreCallback) {	str += "lwbIn  = (__global "; str += r2Type; str += " *)gb + iOffset;\n\t"; }
-						}
-						else
+						if(!params.fft_hasPreCallback)
 						{
-							if(!rcSimple) {	str += "lwbIn2 = (__global "; str += rType; str += " *)gb + iOffset2;\n\t"; }
-											str += "lwbIn  = (__global "; str += rType; str += " *)gb + iOffset;\n\t";
+							if(inInterleaved)
+							{
+								if(!rcSimple) {	str += "lwbIn2 = (__global "; str += r2Type; str += " *)gb + iOffset2;\n\t"; }
+												str += "lwbIn  = (__global "; str += r2Type; str += " *)gb + iOffset;\n\t"; 
+							}
+							else
+							{
+								if(!rcSimple) {	str += "lwbIn2 = (__global "; str += rType; str += " *)gb + iOffset2;\n\t"; }
+												str += "lwbIn  = (__global "; str += rType; str += " *)gb + iOffset;\n\t";
 
+							}
 						}
 
 						if(!rcSimple) {	str += "lwbOut2 = gb + oOffset2;\n\t"; }
@@ -3243,17 +3269,14 @@ namespace StockhamGenerator
 					}
 					else
 					{
-						if(inInterleaved || inReal)
+						if (!params.fft_hasPreCallback)
 						{
-							if (!params.fft_hasPreCallback)
+							if(inInterleaved || inReal)
 							{
 								if(!rcSimple) {	str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
 								str += "lwbIn = gbIn + iOffset;\n\t"; 
 							}
-						}
-						else
-						{
-							if (!params.fft_hasPreCallback)
+							else
 							{
 								if(!rcSimple) {	str += "lwbInRe2 = gbInRe + iOffset2;\n\t"; }
 								if(!rcSimple) {	str += "lwbInIm2 = gbInIm + iOffset2;\n\t"; }
@@ -3343,18 +3366,15 @@ namespace StockhamGenerator
 				}
 
 				std::string inOffset;
-				if (!r2c)
+				if (params.fft_placeness == CLFFT_INPLACE && !r2c2r)
 				{
-					if (params.fft_placeness == CLFFT_INPLACE && !c2r)
-					{
-						inOffset += "ioOffset";
-					}
-					else
-					{
-						inOffset += "iOffset";
-					}
+					inOffset += "ioOffset";
 				}
-
+				else
+				{
+					inOffset += "iOffset";
+				}
+				
 				// Read data into LDS for blocked access
 				if(blockCompute)
 				{
@@ -3457,7 +3477,7 @@ namespace StockhamGenerator
 				{
 					if(rcSimple)
 					{
-						if(inInterleaved || inReal)		inBuf  = "lwbIn, ";
+						if(inInterleaved || inReal)		inBuf  = params.fft_hasPreCallback ?  "gbIn, " : "lwbIn, ";
 						else							inBuf  = "lwbInRe, lwbInIm, ";
 						if(outInterleaved || outReal)	outBuf = "lwbOut";
 						else							outBuf = "lwbOutRe, lwbOutIm";
@@ -3474,8 +3494,8 @@ namespace StockhamGenerator
 							{
 								if (params.fft_placeness == CLFFT_INPLACE) 
 								{
-									inBuf = "(__global "; inBuf += r2Type; inBuf += "*) gb, ";
-									inBuf += "(__global "; inBuf += r2Type; inBuf += "*) gb, ";
+									inBuf = "(__global "; inBuf += r2c ? rType : r2Type; inBuf += "*) gb, ";
+									inBuf += "(__global "; inBuf += r2c ? rType : r2Type; inBuf += "*) gb, ";
 								}
 								else
 								{
@@ -3533,22 +3553,17 @@ namespace StockhamGenerator
 					str += "\t";
 					str += PassName(0, fwd);
 					str += "("; str += rw; str += me;
-					if (r2c)
-					{
-						str += "0";
-					}
-					else
-					{
-						str += (params.fft_hasPreCallback || !c2r) ? inOffset : "0";
-					}
+					
+					str += (params.fft_hasPreCallback || !r2c2r) ? inOffset : "0";
+					
 					str += ", 0, ";
 					str += inBuf; str += outBuf;
 					str += IterRegs("&");
 
 					//if precalback set 
-					if (!r2c && params.fft_hasPreCallback)
+					if (params.fft_hasPreCallback)
 					{
-						str += c2r ?  ", iOffset2, userdata" : ", userdata";
+						str += (r2c2r && !rcSimple) ?  ", iOffset2, userdata" : ", userdata";
 
 						if (params.fft_preCallback.localMemSize > 0)
 						{
@@ -3604,14 +3619,7 @@ namespace StockhamGenerator
 							}
 							else
 							{
-								if (r2c)
-								{
-									str += "0";
-								}
-								else
-								{
-									str += (params.fft_hasPreCallback || !c2r) ? inOffset : "0";
-								}
+								str += (params.fft_hasPreCallback || !r2c2r) ? inOffset : "0";
 							}
 							str += ", ";
 							str += ldsOff;
@@ -3620,9 +3628,9 @@ namespace StockhamGenerator
 							str += ldsArgs; str += IterRegs("&"); 
 							
 							//if precalback set, append additional arguments
-							if (!r2c && !blockCompute && params.fft_hasPreCallback)
+							if (!blockCompute && params.fft_hasPreCallback)
 							{
-								str += c2r ?  ", iOffset2, userdata" : ", userdata";
+								str += (r2c2r && !rcSimple) ?  ", iOffset2, userdata" : ", userdata";
 
 								if (params.fft_preCallback.localMemSize > 0)
 								{
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 7740592..41bf2d2 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -513,7 +513,11 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 		case CLFFT_HERMITIAN_PLANAR:
 			return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
 		case CLFFT_REAL:
-			clKernWrite( transKernel, 3 ) << "global " << dtInput << "* tileIn = " << pmRealIn << " + iOffset;" << std::endl;
+			//No need of tileIn declaration when precallback is set as the global buffer is used directly
+			if (!params.fft_hasPreCallback)
+			{
+				clKernWrite( transKernel, 3 ) << "global " << dtInput << "* tileIn = " << pmRealIn << " + iOffset;" << std::endl;
+			}
 			break;
 			
 		}
@@ -558,7 +562,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 		size_t wIndexYEnd = params.transOutHorizontal ? params.fft_N[0] % blockSize.x : params.fft_N[1] % blockSize.y;
 
 		//If precallback is set
-		if (params.fft_hasPreCallback)
+		if (params.fft_hasPreCallback && params.fft_inputLayout == CLFFT_COMPLEX_PLANAR)
 		{
 			clKernWrite( transKernel, 3 ) << dtComplex << " retCallback;" << std::endl;
 		}
@@ -675,13 +679,12 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 					{
 						if (params.fft_preCallback.localMemSize > 0)
 						{
-							clKernWrite( transKernel, 9 ) << "retCallback = " << params.fft_preCallback.funcname << "(" << pmComplexIn << ", iOffset + gInd, userdata, localmem);" << std::endl;
+							clKernWrite( transKernel, 9 ) << "tmp = " << params.fft_preCallback.funcname << "(" << pmComplexIn << ", iOffset + gInd, userdata, localmem);" << std::endl;
 						}
 						else
 						{
-							clKernWrite( transKernel, 9 ) << "retCallback = " << params.fft_preCallback.funcname << "(" << pmComplexIn << ", iOffset + gInd, userdata);" << std::endl;
+							clKernWrite( transKernel, 9 ) << "tmp = " << params.fft_preCallback.funcname << "(" << pmComplexIn << ", iOffset + gInd, userdata);" << std::endl;
 						}
-						clKernWrite( transKernel, 9 ) << "tmp = retCallback;" << std::endl;
 					}
 					else
 					{
@@ -715,9 +718,22 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 			case CLFFT_HERMITIAN_PLANAR:
 				return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
 			case CLFFT_REAL:
-				clKernWrite( transKernel, 9 ) << "tmp = tileIn[ gInd ];" << std::endl;
+				if (params.fft_hasPreCallback)
+				{
+					if (params.fft_preCallback.localMemSize > 0)
+					{
+						clKernWrite( transKernel, 9 ) << "tmp = " << params.fft_preCallback.funcname << "(" << pmRealIn << ", iOffset + gInd, userdata, localmem);" << std::endl;
+					}
+					else
+					{
+						clKernWrite( transKernel, 9 ) << "tmp = " << params.fft_preCallback.funcname << "(" << pmRealIn << ", iOffset + gInd, userdata);" << std::endl;
+					}
+				}
+				else
+				{
+					clKernWrite( transKernel, 9 ) << "tmp = tileIn[ gInd ];" << std::endl;
+				}
 				break;
-
 			}
 
 			if(branchingInAny)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 26c7af6..5642465 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -873,6 +873,14 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					trans1Plan->gen           = Transpose_GCN;
 					trans1Plan->transflag     = true;
 
+					//Set callback data if set on top level plan
+					if (fftPlan->hasPreCallback)
+					{
+						trans1Plan->hasPreCallback = true;
+						trans1Plan->preCallback = fftPlan->preCallback;
+						trans1Plan->precallUserData = fftPlan->precallUserData;
+					}
+
 					OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
 						_T( "BakePlan large1d trans1 plan failed" ) );
 
@@ -1099,6 +1107,14 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 						colTPlan->oDist        *= fftPlan->length[index];
 					}
 
+					//Set callback data if set on top level plan
+					if (fftPlan->hasPreCallback)
+					{
+						colTPlan->hasPreCallback = true;
+						colTPlan->preCallback = fftPlan->preCallback;
+						colTPlan->precallUserData = fftPlan->precallUserData;
+					}
+
 					OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan large1d first column plan failed" ) );
 
 					//another column FFT, size clLengths[0], batch clLengths[1], output without transpose
diff --git a/src/tests/accuracy_test_common.h b/src/tests/accuracy_test_common.h
index 159cde4..7807701 100644
--- a/src/tests/accuracy_test_common.h
+++ b/src/tests/accuracy_test_common.h
@@ -172,13 +172,13 @@ void precallback_complex_to_complex( data_pattern pattern, direction::direction_
 	//set precallback values
 	if (hasUserDatatype)
 	{
-		test_fft.set_precallback_complex_userdatatype();
+		test_fft.set_input_precallback_userdatatype();
 	}
 	else
 	{
-		test_fft.set_precallback_complex();
+		test_fft.set_input_precallback();
 	}
-	reference.set_precallback_complex();
+	reference.set_input_precallback();
 
 	if( direction == direction::forward )
 	{
@@ -269,6 +269,69 @@ void real_to_complex( data_pattern pattern,
 /*****************************************************/
 // dimension is inferred from lengths.size()
 // tightly packed is inferred from strides.empty()
+// input layout is always real
+template< class T, class cl_T, class fftw_T >
+void precallback_real_to_complex( data_pattern pattern,
+	std::vector<size_t> lengths, size_t batch,
+	std::vector<size_t> input_strides, std::vector<size_t> output_strides,
+	size_t input_distance, size_t output_distance,
+	layout::buffer_layout_t out_layout,
+	placeness::placeness_t placeness,
+	T scale = 1.0f )
+{
+	clfft<T, cl_T> test_fft( static_cast<clfftDim>(lengths.size()), &lengths[0],
+		input_strides.empty() ? NULL : &input_strides[0],
+		output_strides.empty() ? NULL : &output_strides[0],
+		batch, input_distance, output_distance,
+		cl_layout(layout::real), cl_layout(out_layout),
+		cl_placeness(placeness) );
+
+	fftw<T, fftw_T> reference( lengths.size(), &lengths[0], batch, r2c );
+
+	if( pattern == sawtooth )
+	{
+		test_fft.set_input_to_sawtooth( 1.0f );
+		reference.set_data_to_sawtooth( 1.0f );
+	}
+	else if( pattern == value )
+	{
+		test_fft.set_input_to_value( 2.0f );
+		reference.set_all_data_to_value( 2.0f );
+	}
+	else if( pattern == impulse )
+	{
+		test_fft.set_input_to_impulse();
+		reference.set_data_to_impulse();
+	}
+	else if( pattern == erratic )
+	{
+		test_fft.set_input_to_random();
+		reference.set_data_to_random();
+	}
+	else
+	{
+		throw std::runtime_error( "invalid pattern type in real_to_complex()" );
+	}
+
+	// if we're starting with unequal data, we're destined for failure
+	EXPECT_EQ( true, test_fft.input_buffer() == reference.input_buffer() );
+
+	test_fft.set_input_precallback();
+	reference.set_input_precallback();
+
+	test_fft.forward_scale( scale );
+	reference.forward_scale( scale );
+
+	test_fft.transform();
+	reference.transform();
+
+	EXPECT_EQ( true, test_fft.result() == reference.result() );
+}
+
+/*****************************************************/
+/*****************************************************/
+// dimension is inferred from lengths.size()
+// tightly packed is inferred from strides.empty()
 // output layout is always real
 template< class T, class cl_T, class fftw_T >
 void complex_to_real( data_pattern pattern,
diff --git a/src/tests/accuracy_test_precallback.cpp b/src/tests/accuracy_test_precallback.cpp
index a617017..6a392a4 100644
--- a/src/tests/accuracy_test_precallback.cpp
+++ b/src/tests/accuracy_test_precallback.cpp
@@ -50,7 +50,7 @@ namespace precallback
 // *****************************************************
 // *****************************************************
 template< class T, class cl_T, class fftw_T >
-void precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+void pow2_normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
 {
 	std::vector<size_t> lengths;
 	lengths.push_back( normal2 );
@@ -68,22 +68,22 @@ void precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved(
 	precallback_complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
 }
 
-TEST_F(accuracy_test_precallback_single, precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
 {
-	try { precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+	try { pow2_normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
 	catch( const std::exception& err ) { handle_exception(err);	}
 }
 
-TEST_F(accuracy_test_precallback_double, precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
 {
-	try { precallback_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+	try { pow2_normal_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
 	catch( const std::exception& err ) { handle_exception(err);	}
 }
 
 // *****************************************************
 // *****************************************************
 template< class T, class cl_T, class fftw_T >
-void precallback_1D_forward_in_place_complex_planar_to_complex_planar()
+void pow2_normal_1D_forward_in_place_complex_planar_to_complex_planar()
 {
 	std::vector<size_t> lengths;
 	lengths.push_back( normal2 );
@@ -101,22 +101,22 @@ void precallback_1D_forward_in_place_complex_planar_to_complex_planar()
 	precallback_complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness );
 }
 
-TEST_F(accuracy_test_precallback_single, precallback_1D_forward_in_place_complex_planar_to_complex_planar)
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_forward_in_place_complex_planar_to_complex_planar)
 {
-	try { precallback_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+	try { pow2_normal_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
 	catch( const std::exception& err ) { handle_exception(err);	}
 }
 
-TEST_F(accuracy_test_precallback_double, precallback_1D_forward_in_place_complex_planar_to_complex_planar)
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_forward_in_place_complex_planar_to_complex_planar)
 {
-	try { precallback_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
+	try { pow2_normal_1D_forward_in_place_complex_planar_to_complex_planar< double, cl_double, fftw_complex >(); }
 	catch( const std::exception& err ) { handle_exception(err);	}
 }
 
 // *****************************************************
 // *****************************************************
 template< class T, class cl_T, class fftw_T >
-void precallback_1D_forward_in_place_complex_to_complex_userdatatype()
+void pow2_normal_1D_forward_in_place_complex_to_complex_userdatatype()
 {
 	std::vector<size_t> lengths;
 	lengths.push_back( normal2 );
@@ -134,9 +134,328 @@ void precallback_1D_forward_in_place_complex_to_complex_userdatatype()
 	precallback_complex_to_complex<T, cl_T, fftw_T>( pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness, true );
 }
 
-TEST_F(accuracy_test_precallback_single, precallback_1D_forward_in_place_complex_to_complex_userdatatype)
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_forward_in_place_complex_to_complex_userdatatype)
 {
-	try { precallback_1D_forward_in_place_complex_to_complex_userdatatype< float, cl_float, fftwf_complex >(); }
+	try { pow2_normal_1D_forward_in_place_complex_to_complex_userdatatype< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t out_layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+	try { pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+	try { pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_small_1D_forward_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t out_layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_small_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+	try { pow2_small_1D_forward_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_small_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+	try { pow2_small_1D_forward_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_large_1D_forward_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t out_layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, out_layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_large_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+	try { pow2_large_1D_forward_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_large_1D_forward_in_place_real_to_hermitian_interleaved)
+{
+	try { pow2_large_1D_forward_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_large_1D_4M_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( 4194304 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_large_1D_4M_in_place_real_to_hermitian_interleaved)
+{
+	try { pow2_large_1D_4M_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_large_1D_4M_in_place_real_to_hermitian_interleaved)
+{
+	try { pow2_large_1D_4M_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_array_real_to_hermitian()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 8;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_array_real_to_hermitian)
+{
+	try { pow2_normal_1D_array_real_to_hermitian< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_array_real_to_hermitian)
+{
+	try { pow2_normal_1D_array_real_to_hermitian< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_array_real_to_hermitian_with_odd_batch_size()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 5;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { pow2_normal_1D_array_real_to_hermitian_with_odd_batch_size< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_array_real_to_hermitian_with_odd_batch_size)
+{
+	try { pow2_normal_1D_array_real_to_hermitian_with_odd_batch_size< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_out_of_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { pow2_normal_1D_out_of_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_out_of_place_real_to_hermitian_interleaved)
+{
+	try { pow2_normal_1D_out_of_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_out_of_place_real_to_hermitian_planar()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { pow2_normal_1D_out_of_place_real_to_hermitian_planar< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_out_of_place_real_to_hermitian_planar)
+{
+	try { pow2_normal_1D_out_of_place_real_to_hermitian_planar< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow3_normal_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow3_normal_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { pow3_normal_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow3_normal_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { pow3_normal_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow3_small_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( small3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow3_small_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { pow3_small_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow3_small_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { pow3_small_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow3_large_1D_in_place_real_to_hermitian_interleaved()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large3 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow3_large_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { pow3_large_1D_in_place_real_to_hermitian_interleaved< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow3_large_1D_in_place_real_to_hermitian_interleaved)
+{
+	try { pow3_large_1D_in_place_real_to_hermitian_interleaved< double, cl_double, fftw_complex >(); }
 	catch( const std::exception& err ) { handle_exception(err);	}
 }
 
diff --git a/src/tests/buffer.h b/src/tests/buffer.h
index 50fc5a5..c9664fc 100644
--- a/src/tests/buffer.h
+++ b/src/tests/buffer.h
@@ -600,25 +600,54 @@ public:
 
 	void operator*=( buffer<T> & other_buffer )
 	{
+		size_t the_index;
+		T* base_ptr;
+		T* real_ptr;
+		T* imag_ptr;
+
 		if( is_interleaved() )
 		{
-			T* base_ptr = _the_buffers[interleaved].ptr();
+			base_ptr = _the_buffers[interleaved].ptr();
+		}
+		else if ( is_planar() )
+		{
+			real_ptr = _the_buffers[re].ptr();
+			imag_ptr = _the_buffers[im].ptr();
+		}
+		else if ( is_real() )
+		{
+			base_ptr = _the_buffers[re].ptr();
+		}
 
-			for( size_t batch = 0; batch < batch_size(); batch++ )
-					for( size_t z = 0; z < length(dimz); z++ )
-						for( size_t y = 0; y < length(dimy); y++ )
-							for( size_t x = 0; x < length(dimx); x++ )
-							{					
-								size_t real_index = index(x, y, z, batch);
-								size_t imag_index = real_index + 1; // the imaginary component immediately follows the real
-
-								*( base_ptr + real_index ) *= other_buffer.real(x, y, z, batch);
-								if (!(this->is_real() || other_buffer.is_real()))
-								{
-									*( base_ptr + imag_index ) *= other_buffer.imag(x, y, z, batch);
-								}
+		for( size_t batch = 0; batch < batch_size(); batch++ )
+			for( size_t z = 0; z < length(dimz); z++ )
+				for( size_t y = 0; y < length(dimy); y++ )
+					for( size_t x = 0; x < length(dimx); x++ )
+					{		
+						the_index = index(x, y, z, batch);
+						if( is_interleaved() )
+						{
+							*( base_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
+		
+							if (!other_buffer.is_real())
+							{
+								the_index = the_index + 1; // the imaginary component immediately follows the real
+								*( base_ptr + the_index ) *= other_buffer.imag(x, y, z, batch);
 							}
-		}
+						}
+						else if ( is_planar() )
+						{
+							*( real_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
+							if (!other_buffer.is_real())
+							{
+								*( imag_ptr + the_index ) *= other_buffer.imag(x, y, z, batch);
+							}
+						}
+						else if ( is_real() )
+						{
+							*( base_ptr + the_index ) *= other_buffer.real(x, y, z, batch);
+						}
+					}
 	}
 
 	/*****************************************************/
diff --git a/src/tests/cl_transform.h b/src/tests/cl_transform.h
index 2edf28d..3a33bd3 100644
--- a/src/tests/cl_transform.h
+++ b/src/tests/cl_transform.h
@@ -618,7 +618,7 @@ public:
 	}
 
 	/*****************************************************/
-	void set_precallback_complex() {
+	void set_input_precallback() {
 		cl_int status = 0;
 		clfftPrecision precision;
 		clfftGetPlanPrecision( *plan_handle, &precision );
@@ -633,6 +633,10 @@ public:
 		{
 			precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_PLANAR) : STRINGIFY(MULVAL_PLANAR_DP);
 		}
+		else if (input.is_real())
+		{
+			precallbackstr = (precision == CLFFT_SINGLE) ? STRINGIFY(MULVAL_REAL) : STRINGIFY(MULVAL_REAL_DP);
+		}
 
 		//precallback user data
 		buffer<T> userdata( 	static_cast<size_t>(dimension),
@@ -657,7 +661,7 @@ public:
 	}
 
 		/*****************************************************/
-	void set_precallback_complex_userdatatype() {
+	void set_input_precallback_userdatatype() {
 		cl_int status = 0;
 
 		char* precallbackstr = STRINGIFY(MULVAL_UDT);
diff --git a/src/tests/fftw_transform.h b/src/tests/fftw_transform.h
index 9936b21..7970207 100644
--- a/src/tests/fftw_transform.h
+++ b/src/tests/fftw_transform.h
@@ -440,7 +440,7 @@ public:
 		input = other_buffer;
 	}
 
-	void set_precallback_complex()
+	void set_input_precallback()
 	{
 		//precallback user data
 		buffer<T> userdata( 	input.number_of_dimensions(),
@@ -453,7 +453,7 @@ public:
 					);
 		
 		userdata.set_all_to_random_data(_lengths[0], 10);
-
+		
 		input *= userdata;
 	}
 
diff --git a/src/tests/test_constants.h b/src/tests/test_constants.h
index cc39769..56f14d4 100644
--- a/src/tests/test_constants.h
+++ b/src/tests/test_constants.h
@@ -63,6 +63,20 @@
 				return ret; \n \
 				}
 
+#define MULVAL_REAL float mulval(__global void* in, uint offset, __global void* userdata)\n \
+				{ \n \
+				float scalar = *((__global float*)userdata + offset); \n \
+				float ret = *((__global float*)in + offset) * scalar; \n \
+				return ret; \n \
+				}
+
+#define MULVAL_REAL_DP double mulval(__global void* in, uint offset, __global void* userdata)\n \
+				{ \n \
+				double scalar = *((__global double*)userdata + offset); \n \
+				double ret = *((__global double*)in + offset) * scalar; \n \
+				return ret; \n \
+				}
+
 #define STRUCT_USERDATA typedef struct USER_DATA  \
 					   {  \
 						float scalar1;  \

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list