[clfft] 08/32: add twiddling to general swap kernels.(planar and forward only for now. need to add interleaved and backward.). extended size supported for inplace transpose.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue Apr 26 08:34:08 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit ca209c20d92adc6b05d9c65f6dd29c306c4a421f
Author: Timmy <timmy.liu at amd.com>
Date:   Wed Mar 16 14:26:40 2016 -0600

    add twiddling to general swap kernels.(planar and forward only for now. need to add interleaved and backward.). extended size supported for inplace transpose.
---
 src/library/action.transpose.cpp    | 28 ++++++++++++++++++++++++++--
 src/library/generator.transpose.cpp |  1 -
 src/library/plan.cpp                | 24 +++++++++++++++---------
 src/tests/accuracy_test_pow3.cpp    | 10 ++++++++++
 4 files changed, 51 insertions(+), 12 deletions(-)

diff --git a/src/library/action.transpose.cpp b/src/library/action.transpose.cpp
index ce657bd..ba2bca0 100644
--- a/src/library/action.transpose.cpp
+++ b/src/library/action.transpose.cpp
@@ -135,6 +135,8 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::initParams()
     }
 
     this->signature.fft_DataDim = this->plan->length.size() + 1;
+	//if (this->plan->length.back() == 729 && this->plan->length.size() > 2)//silly Timmy delete
+	//	this->signature.fft_DataDim--;
     int i = 0;
     for (i = 0; i < (this->signature.fft_DataDim - 1); i++)
     {
@@ -336,8 +338,7 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::getWorkSizes(std::vector< size
 	size_t dim_ratio = bigger_dim / smaller_dim;
     size_t global_item_size;
 
-    if (this->signature.nonSquareKernelType == NON_SQUARE_TRANS_TRANSPOSE_BATCHED_LEADING  
-        || this->signature.nonSquareKernelType == NON_SQUARE_TRANS_TRANSPOSE_BATCHED)
+    if (this->signature.nonSquareKernelType == NON_SQUARE_TRANS_TRANSPOSE_BATCHED_LEADING)
     {
         std::cout << "TIMMY"<< std::endl;
         if (smaller_dim % (16 * reShapeFactor) == 0)
@@ -363,6 +364,29 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::getWorkSizes(std::vector< size
         localWS.clear();
         localWS.push_back(lwSize);
     }
+	else if (this->signature.nonSquareKernelType == NON_SQUARE_TRANS_TRANSPOSE_BATCHED)
+	{
+		std::cout << "TIMMY" << std::endl;
+		if (smaller_dim % (16 * reShapeFactor) == 0)
+			wg_slice = smaller_dim / 16 / reShapeFactor;
+		else
+			wg_slice = (smaller_dim / (16 * reShapeFactor)) + 1;
+
+		global_item_size = wg_slice*(wg_slice + 1) / 2 * 16 * 16 * this->plan->batchsize;
+
+		for (int i = 2; i < this->plan->length.size(); i++)//Timmy delete
+		{
+			global_item_size *= this->plan->length[i];
+		}
+
+		/*Push the data required for the transpose kernels*/
+		globalWS.clear();
+		globalWS.push_back(global_item_size);
+
+
+		localWS.clear();
+		localWS.push_back(lwSize);
+	}
     else
     {
         /*Now calculate the data for the swap kernels */
diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp
index f4ea9da..a2b6b3d 100644
--- a/src/library/generator.transpose.cpp
+++ b/src/library/generator.transpose.cpp
@@ -38,7 +38,6 @@ void OffsetCalc(std::stringstream& transKernel, const FFTKernelGenKeyParams& par
 	for (size_t i = params.fft_DataDim - 2; i > 0; i--)
 	{
 		clKernWrite(transKernel, 3) << offset << " += (g_index/numGroupsY_" << i << ")*" << stride[i + 1] << ";" << std::endl;
-		//clKernWrite(transKernel, 3) << offset << " += (g_index/numGroupsY_" << i << ")*" << 1048576 << ";" << std::endl;
 		clKernWrite(transKernel, 3) << "g_index = g_index % numGroupsY_" << i << ";" << std::endl;
 	}
 
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 9dc23ef..391246a 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -697,7 +697,9 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 
 					for (size_t index = 1; index < fftPlan->length.size(); index++)
 					{
-						trans1Plan->length.push_back(fftPlan->length[index]);
+						//trans1Plan->length.push_back(fftPlan->length[index]);
+						trans1Plan->batchsize = trans1Plan->batchsize * fftPlan->length[index];//Timmy
+						trans1Plan->iDist = trans1Plan->iDist / fftPlan->length[index];//Timmy
 						trans1Plan->inStride.push_back(fftPlan->inStride[index]);
 						trans1Plan->outStride.push_back(trans1Plan->oDist);
 						trans1Plan->oDist *= fftPlan->length[index];
@@ -783,14 +785,16 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					trans2Plan->oDist         = clLengths[1] * trans2Plan->outStride[1];
                     trans2Plan->gen           = transGen;
 
-					if (transGen != Transpose_NONSQUARE)//TIMMY twiddle
+					//if (transGen != Transpose_NONSQUARE)//TIMMY twiddle
 						trans2Plan->large1D		  = fftPlan->length[0];
 
 					trans2Plan->transflag     = true;
 
 					for (size_t index = 1; index < fftPlan->length.size(); index++)
 					{
-						trans2Plan->length.push_back(fftPlan->length[index]);
+						//trans2Plan->length.push_back(fftPlan->length[index]);
+						trans2Plan->batchsize = trans2Plan->batchsize * fftPlan->length[index];//Timmy
+						trans2Plan->iDist = trans2Plan->iDist / fftPlan->length[index];//Timmy
 						trans2Plan->inStride.push_back(fftPlan->outStride[index]);
 						trans2Plan->outStride.push_back(trans2Plan->oDist);
 						trans2Plan->oDist *= fftPlan->length[index];
@@ -839,11 +843,11 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 						row2Plan->oDist *= fftPlan->length[index];
 					}
 					
-					if (transGen != Transpose_NONSQUARE)//TIMMY twiddle in transform
-					{
-						row2Plan->large1D = fftPlan->length[0];
-						row2Plan->twiddleFront = true;
-					}
+					//if (transGen != Transpose_NONSQUARE)//TIMMY twiddle in transform
+					//{
+					//	row2Plan->large1D = fftPlan->length[0];
+					//	row2Plan->twiddleFront = true;
+					//}
 
 					OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ),
 						_T( "BakePlan large1d second row plan failed" ) );
@@ -876,7 +880,9 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 
 					for (size_t index = 1; index < fftPlan->length.size(); index++)
 					{
-						trans3Plan->length.push_back(fftPlan->length[index]);
+						//trans3Plan->length.push_back(fftPlan->length[index]);
+						trans3Plan->batchsize = trans3Plan->batchsize * fftPlan->length[index];//Timmy for 2D
+						trans3Plan->iDist = trans3Plan->iDist / fftPlan->length[index];//Timmy
 						trans3Plan->inStride.push_back(trans3Plan->iDist);
 						trans3Plan->iDist *= fftPlan->length[index];
 						trans3Plan->outStride.push_back(fftPlan->outStride[index]);
diff --git a/src/tests/accuracy_test_pow3.cpp b/src/tests/accuracy_test_pow3.cpp
index 3e7770e..7034e03 100644
--- a/src/tests/accuracy_test_pow3.cpp
+++ b/src/tests/accuracy_test_pow3.cpp
@@ -2038,6 +2038,16 @@ TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_com
 	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 1, layout::complex_planar, direction::forward); }
 	catch (const std::exception& err) { handle_exception(err); }
 }
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_1594323_2)
+{
+	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 2, layout::complex_planar, direction::forward); }
+	catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_2187_1)
+{
+	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(2187, 1, layout::complex_planar, direction::forward); }
+	catch (const std::exception& err) { handle_exception(err); }
+}
 
 //interleaved
 TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1594323_1)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list