[clfft] 13/128: Precallback - C2R 1D out-place updates

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:33 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit fec5292d5e6da3909b43bd06dc237a4d6d517c99
Author: Pradeep <pradeep.rao at amd.com>
Date:   Mon Aug 10 16:15:23 2015 +0530

    Precallback - C2R 1D out-place updates
---
 src/client-callback/callback-client.cpp | 32 +++++++++++++++++++++-----------
 src/library/generator.stockham.cpp      | 30 ++++++++++++++++++++++++------
 2 files changed, 45 insertions(+), 17 deletions(-)

diff --git a/src/client-callback/callback-client.cpp b/src/client-callback/callback-client.cpp
index 1614922..c660154 100644
--- a/src/client-callback/callback-client.cpp
+++ b/src/client-callback/callback-client.cpp
@@ -184,26 +184,36 @@ bool compare(T1 *refData, std::valarray< T2 > real, std::valarray< T2 > imag,
 //Compare reference and opencl output
 template < typename T1 , typename T2 >
 bool compare(T1 *refData, std::valarray< T2 > real, 
-             size_t length, int batchsize, const float epsilon = 1e-6f)
+             size_t batch_size, size_t *o_strides, size_t *lengths, const float epsilon = 1e-6f)
 {
     float error = 0.0f;
     T1 ref = 0.0;
 	T1 diff;
 	float normRef = 0.0f;
 	float normError = 0.0f;
-	size_t scale = length;
-
+	
 	//real compare
-	for (int b = 0; b < batchsize; b++)
-	{	
-		int idx = b * (length + 2);
-		for(size_t i = idx; i < (idx + length); ++i)
+	for(size_t b = 0; b < batch_size; b++)
+	{
+		size_t p3 = b * o_strides[3];
+		for(size_t k = 0; k < lengths[2]; k++)
 		{
-			diff = refData[i] - (real[i] * scale);
-			error += (float)(diff * diff);
-			ref += refData[i] * refData[i];
+			size_t p2 = p3 + k * o_strides[2];
+			for(size_t j = 0; j < lengths[1]; j++)
+			{
+				size_t p1 = p2 + j * o_strides[1];
+				for(size_t i = 0; i < lengths[0]; i++)
+				{
+					size_t p0 = p1 + i * o_strides[0];
+
+					diff = refData[p0] - (real[p0] * lengths[0]);
+					error += (float)(diff * diff);
+					ref += refData[p0] * refData[p0];
+				}
+			}
 		}
 	}
+	
 	if (error != 0)
 	{
 		normRef =::sqrtf((float) ref);
@@ -1029,7 +1039,7 @@ void compareWithReference(clfftLayout in_layout, clfftLayout out_layout, size_t
 					refout = get_fftwf_output_c2r(lengths, strides,  inStrides, outStrides, batch_size, fftBatchSize, outfftBatchSize, fftVectorSizePadded,
 												in_layout, outfftVectorSizePadded, outfftVectorSize, dim, dir);
 
-					if (!compare<float, T>(refout, real, outfftVectorSize, batch_size))
+					if (!compare<float, T>(refout, real, batch_size, o_strides, lengths))
 						checkflag = true;
 
 					/*for( cl_uint i = 0; i < outfftBatchSize; i = i + outStrides[0])
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index df35141..9c7e734 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -2943,17 +2943,25 @@ namespace StockhamGenerator
 
 						if(outInterleaved)
 						{
-							str += "__global "; str += r2Type; str += " * restrict gbOut)\n";
+							str += "__global "; str += r2Type; str += " * restrict gbOut";
 						}
 						else if(outReal)
 						{
-							str += "__global "; str += rType; str += " * restrict gbOut)\n";
+							str += "__global "; str += rType; str += " * restrict gbOut";
 						}
 						else
 						{
 							str += "__global const "; str += rType; str += " * restrict gbOutRe, ";
-							str += "__global const "; str += rType; str += " * restrict gbOutIm)\n";
+							str += "__global const "; str += rType; str += " * restrict gbOutIm";
+						}
+
+						//If plan has pre-callback
+						if (params.fft_hasPreCallback)
+						{
+							str += callbackstr;
 						}
+
+						str += ")\n";
 					}
 					else
 					{
@@ -3218,8 +3226,8 @@ namespace StockhamGenerator
 					{
 						if(inInterleaved || inReal)
 						{
-							if(!rcSimple) {	str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
-											str += "lwbIn = gbIn + iOffset;\n\t";
+							if(!rcSimple && !params.fft_hasPreCallback) {	str += "lwbIn2 = gbIn + iOffset2;\n\t"; }
+							if(!params.fft_hasPreCallback) { str += "lwbIn = gbIn + iOffset;\n\t"; }
 						}
 						else
 						{
@@ -3432,7 +3440,17 @@ namespace StockhamGenerator
 					}
 					else
 					{
-						if(inInterleaved || inReal)		inBuf  = (inInterleaved && params.fft_hasPreCallback) ? "gb, gb, " : "lwbIn, lwbIn2, ";
+						if(inInterleaved || inReal)		
+						{
+							if (!params.fft_hasPreCallback)
+							{
+								inBuf  = "lwbIn, lwbIn2, ";
+							}
+							else
+							{
+								inBuf  = (params.fft_placeness == CLFFT_INPLACE) ? "gb, gb, " : "gbIn, gbIn, " ;
+							}	
+						}
 						else							inBuf  = (params.fft_hasPreCallback) ? "gbInRe, gbInRe, gbInIm, gbInIm, " : "lwbInRe, lwbInRe2, lwbInIm, lwbInIm2, ";
 
 						if(outInterleaved || outReal)	outBuf = "lwbOut, lwbOut2";

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list