[clfft] 12/32: bug fix. passed gtest *huge_1D*, *huge_sp_test* and *huge_dp_test*

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue Apr 26 08:34:09 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit de26d9a9e9063aef6df501bceca0379ce7b86563
Author: Timmy <timmy.liu at amd.com>
Date:   Tue Mar 22 10:00:46 2016 -0500

    bug fix. passed gtest *huge_1D*, *huge_sp_test* and *huge_dp_test*
---
 src/library/action.transpose.cpp |  4 +---
 src/library/plan.cpp             | 27 +++++++++++++++++++++------
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/src/library/action.transpose.cpp b/src/library/action.transpose.cpp
index 2a897bc..82789b8 100644
--- a/src/library/action.transpose.cpp
+++ b/src/library/action.transpose.cpp
@@ -135,8 +135,7 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::initParams()
     }
 
     this->signature.fft_DataDim = this->plan->length.size() + 1;
-	//if (this->plan->length.back() == 729 && this->plan->length.size() > 2)//silly Timmy delete
-	//	this->signature.fft_DataDim--;
+
     int i = 0;
     for (i = 0; i < (this->signature.fft_DataDim - 1); i++)
     {
@@ -700,7 +699,6 @@ clfftStatus FFTGeneratedTransposeSquareAction::generateKernel(FFTRepo& fftRepo,
 
 	std::string programCode;
 	OPENCL_V(clfft_transpose_generator::genTransposeKernelBatched(this->signature, programCode, lwSize, reShapeFactor), _T("GenerateTransposeKernel() failed!"));
-	//std::cout << programCode << std::endl;//TIMMY
 
 	cl_int status = CL_SUCCESS;
 	cl_device_id Device = NULL;
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 6091d98..8e951a3 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -614,9 +614,11 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					clLengths[1] = 100;
 
 				clLengths[0] = fftPlan->length[0]/clLengths[1];
-				//timmy delete ensure clLengths[0] > clLengths[1] only when inplace is enabled
+				//timmy ensure clLengths[0] > clLengths[1] only when inplace is enabled 
+				//so that swap kernel is launched after the square transpose kernel since twiddling is only enabled as the second kernel
 				if (clLengths[0] < clLengths[1] && clfftGetRequestLibNoMemAlloc() && fftPlan->placeness == CLFFT_INPLACE)
 				{
+					std::cout << "switch lengths" << std::endl;
 					size_t temp = clLengths[0];
 					clLengths[0] = clLengths[1];
 					clLengths[1] = temp;
@@ -712,7 +714,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					trans1Plan->gen           = transGen;
 					trans1Plan->transflag     = true;
 
-					if (trans1Plan->gen == Transpose_NONSQUARE || 1)
+					if (trans1Plan->gen == Transpose_NONSQUARE || trans1Plan->gen == Transpose_SQUARE)// inplace transpose
 					{
 						for (size_t index = 1; index < fftPlan->length.size(); index++)
 						{
@@ -828,7 +830,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 
 					trans2Plan->transflag     = true;
 
-					if (trans2Plan->gen == Transpose_NONSQUARE || 1)// TIMMY delete
+					if (trans2Plan->gen == Transpose_NONSQUARE || trans2Plan->gen == Transpose_SQUARE)// inplace transpose
 					{
 						for (size_t index = 1; index < fftPlan->length.size(); index++)
 						{
@@ -937,7 +939,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					trans3Plan->transOutHorizontal = true;
 
 
-					if (trans3Plan->gen == Transpose_NONSQUARE || 1)
+					if (trans3Plan->gen == Transpose_NONSQUARE)// inplace transpose
 					{
 						for (size_t index = 1; index < fftPlan->length.size(); index++)
 						{
@@ -950,12 +952,24 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 							*/
 							trans3Plan->batchsize = trans3Plan->batchsize * fftPlan->length[index];
 							trans3Plan->iDist = trans3Plan->iDist / fftPlan->length[index];
-							//trans3Plan->inStride.push_back(trans3Plan->iDist);//Timmy for square
+							//trans3Plan->inStride.push_back(trans3Plan->iDist);
 							trans3Plan->inStride.push_back(fftPlan->inStride[index]);
 							trans3Plan->iDist *= fftPlan->length[index];
 							trans3Plan->outStride.push_back(fftPlan->outStride[index]);
 						}
 					}
+					else if (trans3Plan->gen == Transpose_SQUARE)
+					{
+						for (size_t index = 1; index < fftPlan->length.size(); index++)
+						{
+							trans3Plan->batchsize = trans3Plan->batchsize * fftPlan->length[index];
+							//trans3Plan->iDist = trans3Plan->iDist / fftPlan->length[index];
+							//trans3Plan->inStride.push_back(trans3Plan->iDist);
+							trans3Plan->inStride.push_back(fftPlan->inStride[index]);
+							//trans3Plan->iDist *= fftPlan->length[index];
+							trans3Plan->outStride.push_back(fftPlan->outStride[index]);
+						}
+					}
 					else
 					{
 						for (size_t index = 1; index < fftPlan->length.size(); index++)
@@ -2076,6 +2090,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 						if (clLengths[0] > clLengths[1] && fftPlan->large1D == 0)
 						{
                             //twidding can be done in swap when swap is the second kernel for now
+							//TODO enable twiddling in swap here as well
 							currKernelOrder = SWAP_AND_TRANSPOSE;
 						}
 						else
@@ -2093,7 +2108,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 						}
 						//if the original input data is more than 1d only TRANSPOSE_LEADING_AND_SWAP order is supported
 						//TODO need to fix this here. related to multi dim batch size.
-						//if (fftPlan->length.size() > 2) //Timmy test
+						//if (fftPlan->length.size() > 2) 
 						//	currKernelOrder = TRANSPOSE_LEADING_AND_SWAP;
 						std::cout << "currKernelOrder = " << currKernelOrder << std::endl;
 						//ends tranpose kernel order

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list