[clfft] 87/107: updating 3D real backward

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:40 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 23e811f48723ed2aea1e219fd5b02680972d17ff
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Sat Apr 25 19:42:07 2015 -0500

    updating 3D real backward
---
 src/library/plan.cpp      | 432 +++++++++++++++++++++++++++++++++++-----------
 src/library/transform.cpp | 114 ++++++++++--
 2 files changed, 424 insertions(+), 122 deletions(-)

diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 760da7b..58eb619 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -2685,7 +2685,8 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 
 				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
 
-				if( (xyPlan->outStride[2] == Nt*length1) &&
+				if( (xyPlan->inStride[0] == 1) && (xyPlan->outStride[0] == 1) &&
+					(xyPlan->outStride[2] == Nt*length1) &&
 					( ((xyPlan->inStride[2] == Nt*2*length1) && (xyPlan->placeness == CLFFT_INPLACE)) ||
 					  ((xyPlan->inStride[2] == length0*length1) && (xyPlan->placeness == CLFFT_OUTOFPLACE)) ) )
 				{
@@ -2880,8 +2881,6 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 						_T( "BakePlan for planTY failed" ) );
 
 
-					fftPlan->baked = true;
-					return	CLFFT_SUCCESS;
 				}
 				else
 				{
@@ -2969,161 +2968,388 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					fftPlan->tmpBufSizeC2R = fftPlan->tmpBufSize;
 				}
 
-				size_t clLengths[] = { 1, 0, 0 };
+				if( (fftPlan->inStride[0] == 1) && (fftPlan->outStride[0] == 1) &&
+					( ((fftPlan->outStride[2] == Nt*2*length1) && (fftPlan->placeness == CLFFT_INPLACE)) ||
+						((fftPlan->outStride[2] == length0*length1) && (fftPlan->placeness == CLFFT_OUTOFPLACE)) )
+					&& (fftPlan->inStride[2] == Nt*length1) )
+				{
+					// create first transpose plan
+					
+					//Transpose 
+					// input --> tmp
+					size_t transLengths[2] = { length0*length1, length2 };
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTZ, fftPlan->context, CLFFT_2D, transLengths ),
+						_T( "CreateDefaultPlan for planTZ transpose failed" ) );
 
-				clLengths[0] = fftPlan->length[ DimZ ];
+					FFTPlan* trans1Plan	= NULL;
+					lockRAII* trans1Lock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planTZ, trans1Plan, trans1Lock ), _T( "fftRepo.getPlan failed" ) );
 
-				//create 1D col plan
-				OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
-					_T( "CreateDefaultPlan for planZ failed" ) );
+					trans1Plan->transflag = true;
 
-				FFTPlan* colPlan	= NULL;
-				lockRAII* colLock	= NULL;
-				OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+					transLengths[0] = Nt*length1;
+					OPENCL_V(clfftSetPlanLength( fftPlan->planTZ, CLFFT_2D, transLengths ),
+						_T( "clfftSetPlanLength for planTZ transpose failed" ) );
 
-				switch(fftPlan->inputLayout)
-				{
-				case CLFFT_HERMITIAN_INTERLEAVED:
+					switch(fftPlan->inputLayout)
 					{
-						colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
-						colPlan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+					case CLFFT_HERMITIAN_INTERLEAVED:
+						{
+							trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							trans1Plan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						}
+						break;
+					case CLFFT_HERMITIAN_PLANAR:
+						{
+							trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							trans1Plan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+						}
+						break;
+					default: assert(false);
 					}
-					break;
-				case CLFFT_HERMITIAN_PLANAR:
+
+					trans1Plan->placeness     = CLFFT_OUTOFPLACE;
+					trans1Plan->precision     = fftPlan->precision;
+					trans1Plan->tmpBufSize    = 0;
+					trans1Plan->batchsize     = fftPlan->batchsize;
+					trans1Plan->envelope	  = fftPlan->envelope;
+					trans1Plan->forwardScale  = 1.0f;
+					trans1Plan->backwardScale = 1.0f;
+
+					trans1Plan->inStride[0]   = 1;
+					trans1Plan->inStride[1]   = Nt*length1;
+					trans1Plan->outStride[0]  = 1;
+					trans1Plan->outStride[1]  = length2;
+					trans1Plan->iDist         = fftPlan->iDist;
+					trans1Plan->oDist		  = Nt*length1*length2;
+					trans1Plan->transOutHorizontal = true;
+
+					trans1Plan->gen           = Transpose_GCN;
+
+
+					for (size_t index=3; index < fftPlan->length.size(); index++)
 					{
-						colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
-						colPlan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+						trans1Plan->length.push_back(fftPlan->length[index]);
+						trans1Plan->inStride.push_back(fftPlan->inStride[index]);
+						trans1Plan->outStride.push_back(trans1Plan->oDist);
+						trans1Plan->oDist *= fftPlan->length[index];
 					}
-					break;
-				default: assert(false);
-				}
 
-				colPlan->length.push_back(Nt);
-				colPlan->length.push_back(length1);
+					OPENCL_V(clfftBakePlan(fftPlan->planTZ, numQueues, commQueueFFT, NULL, NULL ),
+						_T( "BakePlan for planTZ failed" ) );
 
-				colPlan->inStride[0]  = fftPlan->inStride[2];
-				colPlan->inStride.push_back(fftPlan->inStride[0]);
-				colPlan->inStride.push_back(fftPlan->inStride[1]);
-				colPlan->iDist         = fftPlan->iDist;
+					// create col plan
+					// complex to complex
 
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimZ ] ),
+						_T( "CreateDefaultPlan for planZ failed" ) );
+
+					FFTPlan* colPlan	= NULL;
+					lockRAII* colLock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+					colPlan->length.push_back(Nt*length1);
+
+					colPlan->inStride[0]  = 1;
+					colPlan->inStride.push_back(length2);
+					colPlan->iDist        = trans1Plan->oDist;
 
-				if (fftPlan->placeness == CLFFT_INPLACE)
-				{
 					colPlan->placeness = CLFFT_INPLACE;
+					colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+					colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
 
 					colPlan->outStride[0]  = colPlan->inStride[0];
 					colPlan->outStride.push_back(colPlan->inStride[1]);
-					colPlan->outStride.push_back(colPlan->inStride[2]);
 					colPlan->oDist         = colPlan->iDist;
 
 					for (size_t index=3; index < fftPlan->length.size(); index++)
 					{
 						colPlan->length.push_back(fftPlan->length[index]);
-						colPlan->inStride.push_back(fftPlan->inStride[index]);
-						colPlan->outStride.push_back(fftPlan->inStride[index]);
+						colPlan->inStride.push_back(trans1Plan->outStride[index-1]);
+						colPlan->outStride.push_back(trans1Plan->outStride[index-1]);
+					}
+
+
+					colPlan->precision     = fftPlan->precision;
+					colPlan->forwardScale  = 1.0f;
+					colPlan->backwardScale = 1.0f;
+					colPlan->tmpBufSize    = 0;
+
+					colPlan->gen			= fftPlan->gen;
+					colPlan->envelope		= fftPlan->envelope;
+
+					colPlan->batchsize = fftPlan->batchsize;
+
+					OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planZ failed" ) );
+
+					// create second transpose plan
+					
+					//Transpose 
+					//tmp --> output
+					size_t trans2Lengths[2] = { length2, length0*length1 };
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTX, fftPlan->context, CLFFT_2D, trans2Lengths ),
+						_T( "CreateDefaultPlan for planTX transpose failed" ) );
+
+					FFTPlan* trans2Plan	= NULL;
+					lockRAII* trans2Lock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planTX, trans2Plan, trans2Lock ), _T( "fftRepo.getPlan failed" ) );
+
+					trans2Plan->transflag = true;
+
+					trans2Lengths[1] = Nt*length1;
+					OPENCL_V(clfftSetPlanLength( fftPlan->planTX, CLFFT_2D, trans2Lengths ),
+						_T( "clfftSetPlanLength for planTX transpose failed" ) );
+
+
+					trans2Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+					trans2Plan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+
+
+					trans2Plan->placeness     = CLFFT_OUTOFPLACE;
+					trans2Plan->precision     = fftPlan->precision;
+					trans2Plan->tmpBufSize    = 0;
+					trans2Plan->batchsize     = fftPlan->batchsize;
+					trans2Plan->envelope	  = fftPlan->envelope;
+					trans2Plan->forwardScale  = 1.0f;
+					trans2Plan->backwardScale = 1.0f;
+
+					trans2Plan->inStride[0]   = 1;
+					trans2Plan->inStride[1]   = length2;
+					trans2Plan->outStride[0]  = 1;
+					trans2Plan->outStride[1]  = Nt*length1;
+					trans2Plan->iDist         = colPlan->oDist;
+					trans2Plan->oDist		  = Nt*length1*length2;
+
+					trans2Plan->gen           = Transpose_GCN;
+					trans2Plan->transflag     = true;
+
+					for (size_t index=3; index < fftPlan->length.size(); index++)
+					{
+						trans2Plan->length.push_back(fftPlan->length[index]);
+						trans2Plan->inStride.push_back(colPlan->outStride[index-1]);
+						trans2Plan->outStride.push_back(trans2Plan->oDist);
+						trans2Plan->oDist *= fftPlan->length[index];
+
 					}
+
+					OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
+						_T( "BakePlan for planTX failed" ) );
+
+					// create row plan
+					// hermitian to real
+
+					//create 2D xy plan
+					size_t clLengths[] = { length0, length1, 0 };
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
+						_T( "CreateDefaultPlan for 2D planX failed" ) );
+
+					FFTPlan* rowPlan	= NULL;
+					lockRAII* rowLock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planX, rowPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+					rowPlan->outputLayout  = fftPlan->outputLayout;
+					rowPlan->inputLayout   = CLFFT_HERMITIAN_INTERLEAVED;
+
+					rowPlan->length.push_back(length2);
+
+					rowPlan->outStride[0]  = fftPlan->outStride[0];
+					rowPlan->outStride[1]  = fftPlan->outStride[1];
+					rowPlan->outStride.push_back(fftPlan->outStride[2]);
+					rowPlan->oDist         = fftPlan->oDist;
+
+					rowPlan->inStride[0]  = trans2Plan->outStride[0];
+					rowPlan->inStride[1]  = Nt;
+					rowPlan->inStride.push_back(Nt*length1);
+					rowPlan->iDist         = trans2Plan->oDist;
+
+					for (size_t index=3; index < fftPlan->length.size(); index++)
+					{
+						rowPlan->length.push_back(fftPlan->length[index]);
+						rowPlan->inStride.push_back(trans2Plan->outStride[index-1]);
+						rowPlan->outStride.push_back(fftPlan->outStride[index]);
+					}
+
+					if (fftPlan->placeness == CLFFT_INPLACE)
+					{
+						rowPlan->placeness     = CLFFT_INPLACE;
+					}
+					else
+					{
+						rowPlan->placeness     = CLFFT_OUTOFPLACE;
+					}				
+
+
+					rowPlan->precision     = fftPlan->precision;
+					rowPlan->forwardScale  = fftPlan->forwardScale;
+					rowPlan->backwardScale = fftPlan->backwardScale;
+					rowPlan->tmpBufSize    = 0;
+
+					rowPlan->gen			= fftPlan->gen;
+					rowPlan->envelope		= fftPlan->envelope;
+
+					rowPlan->batchsize    = fftPlan->batchsize;
+
+					OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
 				}
 				else
 				{
-					colPlan->placeness = CLFFT_OUTOFPLACE;
 
-					colPlan->outStride[0]  = Nt*length1;
-					colPlan->outStride.push_back(1);
-					colPlan->outStride.push_back(Nt);
-					colPlan->oDist         = Nt*length1*length2;
+					size_t clLengths[] = { 1, 0, 0 };
 
-					for (size_t index=3; index < fftPlan->length.size(); index++)
+					clLengths[0] = fftPlan->length[ DimZ ];
+
+					//create 1D col plan
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
+						_T( "CreateDefaultPlan for planZ failed" ) );
+
+					FFTPlan* colPlan	= NULL;
+					lockRAII* colLock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+					switch(fftPlan->inputLayout)
 					{
-						colPlan->length.push_back(fftPlan->length[index]);
-						colPlan->inStride.push_back(fftPlan->inStride[index]);
-						colPlan->outStride.push_back(colPlan->oDist);
-						colPlan->oDist *= fftPlan->length[index];
+					case CLFFT_HERMITIAN_INTERLEAVED:
+						{
+							colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							colPlan->inputLayout  = CLFFT_COMPLEX_INTERLEAVED;
+						}
+						break;
+					case CLFFT_HERMITIAN_PLANAR:
+						{
+							colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+							colPlan->inputLayout  = CLFFT_COMPLEX_PLANAR;
+						}
+						break;
+					default: assert(false);
+					}
+
+					colPlan->length.push_back(Nt);
+					colPlan->length.push_back(length1);
+
+					colPlan->inStride[0]  = fftPlan->inStride[2];
+					colPlan->inStride.push_back(fftPlan->inStride[0]);
+					colPlan->inStride.push_back(fftPlan->inStride[1]);
+					colPlan->iDist         = fftPlan->iDist;
+
+
+					if (fftPlan->placeness == CLFFT_INPLACE)
+					{
+						colPlan->placeness = CLFFT_INPLACE;
+
+						colPlan->outStride[0]  = colPlan->inStride[0];
+						colPlan->outStride.push_back(colPlan->inStride[1]);
+						colPlan->outStride.push_back(colPlan->inStride[2]);
+						colPlan->oDist         = colPlan->iDist;
+
+						for (size_t index=3; index < fftPlan->length.size(); index++)
+						{
+							colPlan->length.push_back(fftPlan->length[index]);
+							colPlan->inStride.push_back(fftPlan->inStride[index]);
+							colPlan->outStride.push_back(fftPlan->inStride[index]);
+						}
+					}
+					else
+					{
+						colPlan->placeness = CLFFT_OUTOFPLACE;
+
+						colPlan->outStride[0]  = Nt*length1;
+						colPlan->outStride.push_back(1);
+						colPlan->outStride.push_back(Nt);
+						colPlan->oDist         = Nt*length1*length2;
+
+						for (size_t index=3; index < fftPlan->length.size(); index++)
+						{
+							colPlan->length.push_back(fftPlan->length[index]);
+							colPlan->inStride.push_back(fftPlan->inStride[index]);
+							colPlan->outStride.push_back(colPlan->oDist);
+							colPlan->oDist *= fftPlan->length[index];
+						}
 					}
-				}
 
 				
-				colPlan->precision     = fftPlan->precision;
-				colPlan->forwardScale  = 1.0f;
-				colPlan->backwardScale = 1.0f;
-				colPlan->tmpBufSize    = 0;
+					colPlan->precision     = fftPlan->precision;
+					colPlan->forwardScale  = 1.0f;
+					colPlan->backwardScale = 1.0f;
+					colPlan->tmpBufSize    = 0;
 
-				colPlan->gen			 = fftPlan->gen;
-				colPlan->envelope		 = fftPlan->envelope;
+					colPlan->gen			 = fftPlan->gen;
+					colPlan->envelope		 = fftPlan->envelope;
 
-				colPlan->batchsize = fftPlan->batchsize;
+					colPlan->batchsize = fftPlan->batchsize;
 
 				
-				OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
+					OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
 
 
-				clLengths[0] = fftPlan->length[ DimX ];
-				clLengths[1] = fftPlan->length[ DimY ];
+					clLengths[0] = fftPlan->length[ DimX ];
+					clLengths[1] = fftPlan->length[ DimY ];
 
-				//create 2D xy plan
-				OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
-					_T( "CreateDefaultPlan 2D planX failed" ) );
+					//create 2D xy plan
+					OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
+						_T( "CreateDefaultPlan 2D planX failed" ) );
 
-				FFTPlan* xyPlan	= NULL;
-				lockRAII* rowLock	= NULL;
-				OPENCL_V( fftRepo.getPlan( fftPlan->planX, xyPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+					FFTPlan* xyPlan	= NULL;
+					lockRAII* rowLock	= NULL;
+					OPENCL_V( fftRepo.getPlan( fftPlan->planX, xyPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
 
-				xyPlan->inputLayout   = CLFFT_HERMITIAN_INTERLEAVED;
-				xyPlan->outputLayout  = fftPlan->outputLayout;
+					xyPlan->inputLayout   = CLFFT_HERMITIAN_INTERLEAVED;
+					xyPlan->outputLayout  = fftPlan->outputLayout;
 
-				xyPlan->length.push_back(length2);
+					xyPlan->length.push_back(length2);
 			
-				xyPlan->outStride[0]  = fftPlan->outStride[0];
-				xyPlan->outStride[1]  = fftPlan->outStride[1];
-				xyPlan->outStride.push_back(fftPlan->outStride[2]);
-				xyPlan->oDist         = fftPlan->oDist;
+					xyPlan->outStride[0]  = fftPlan->outStride[0];
+					xyPlan->outStride[1]  = fftPlan->outStride[1];
+					xyPlan->outStride.push_back(fftPlan->outStride[2]);
+					xyPlan->oDist         = fftPlan->oDist;
 
-				if (fftPlan->placeness == CLFFT_INPLACE)
-				{
-					xyPlan->placeness     = CLFFT_INPLACE;
+					if (fftPlan->placeness == CLFFT_INPLACE)
+					{
+						xyPlan->placeness     = CLFFT_INPLACE;
 
-					xyPlan->inStride[0]  = colPlan->outStride[1];
-					xyPlan->inStride[1]  = colPlan->outStride[2];
-					xyPlan->inStride.push_back(colPlan->outStride[0]);
-					xyPlan->iDist         = colPlan->oDist;
+						xyPlan->inStride[0]  = colPlan->outStride[1];
+						xyPlan->inStride[1]  = colPlan->outStride[2];
+						xyPlan->inStride.push_back(colPlan->outStride[0]);
+						xyPlan->iDist         = colPlan->oDist;
 
-					for (size_t index=3; index < fftPlan->length.size(); index++)
-					{
-						xyPlan->length.push_back(fftPlan->length[index]);
-						xyPlan->inStride.push_back(colPlan->outStride[index]);
-						xyPlan->outStride.push_back(fftPlan->outStride[index]);
+						for (size_t index=3; index < fftPlan->length.size(); index++)
+						{
+							xyPlan->length.push_back(fftPlan->length[index]);
+							xyPlan->inStride.push_back(colPlan->outStride[index]);
+							xyPlan->outStride.push_back(fftPlan->outStride[index]);
+						}
 					}
-				}
-				else
-				{
-					xyPlan->placeness     = CLFFT_OUTOFPLACE;
+					else
+					{
+						xyPlan->placeness     = CLFFT_OUTOFPLACE;
 
-					xyPlan->inStride[0]   = 1;
-					xyPlan->inStride[1]   = Nt;
-					xyPlan->inStride.push_back(Nt*length1);
-					xyPlan->iDist         = Nt*length1*length2;
+						xyPlan->inStride[0]   = 1;
+						xyPlan->inStride[1]   = Nt;
+						xyPlan->inStride.push_back(Nt*length1);
+						xyPlan->iDist         = Nt*length1*length2;
 
-					for (size_t index=3; index < fftPlan->length.size(); index++)
-					{
-						xyPlan->length.push_back(fftPlan->length[index]);
-						xyPlan->outStride.push_back(fftPlan->outStride[index]);
-						xyPlan->inStride.push_back(xyPlan->iDist);						
-						xyPlan->iDist *= fftPlan->length[index];
+						for (size_t index=3; index < fftPlan->length.size(); index++)
+						{
+							xyPlan->length.push_back(fftPlan->length[index]);
+							xyPlan->outStride.push_back(fftPlan->outStride[index]);
+							xyPlan->inStride.push_back(xyPlan->iDist);						
+							xyPlan->iDist *= fftPlan->length[index];
+						}
 					}
-				}
 
 
-				xyPlan->precision     = fftPlan->precision;
-				xyPlan->forwardScale  = fftPlan->forwardScale;
-				xyPlan->backwardScale = fftPlan->backwardScale;
-				xyPlan->tmpBufSize    = fftPlan->tmpBufSize;
+					xyPlan->precision     = fftPlan->precision;
+					xyPlan->forwardScale  = fftPlan->forwardScale;
+					xyPlan->backwardScale = fftPlan->backwardScale;
+					xyPlan->tmpBufSize    = fftPlan->tmpBufSize;
 
-				xyPlan->gen			 = fftPlan->gen;
-				xyPlan->envelope	 = fftPlan->envelope;
+					xyPlan->gen			 = fftPlan->gen;
+					xyPlan->envelope	 = fftPlan->envelope;
 
-				xyPlan->batchsize    = fftPlan->batchsize;
+					xyPlan->batchsize    = fftPlan->batchsize;
 
 
-				OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
+					OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
+				}
 			}
 			else
 			{
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index 9680fc2..0ce9288 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -744,7 +744,8 @@ clfftStatus clfftEnqueueTransform(
 						{
 							cl_mem *mybuffers;
 
-							if (fftPlan->placeness==CLFFT_INPLACE)
+							if ( (fftPlan->placeness==CLFFT_INPLACE) ||
+								 ((fftPlan->placeness==CLFFT_OUTOFPLACE) && (fftPlan->length.size() > 2)) )
 								mybuffers = clInputBuffers;
 							else
 								mybuffers = &(fftPlan->intBufferC2R);
@@ -988,30 +989,105 @@ clfftStatus clfftEnqueueTransform(
 			}
 			else if(fftPlan->outputLayout == CLFFT_REAL)
 			{
-				cl_mem *out_local, *int_local, *out_z;
-
-				if(fftPlan->placeness == CLFFT_INPLACE)
+				if(fftPlan->planTZ)
 				{
-					out_local = NULL;
-					int_local = NULL;
-					out_z = clInputBuffers;
+					cl_mem *mybuffers;
+
+					if (fftPlan->placeness==CLFFT_INPLACE)
+						mybuffers = clInputBuffers;
+					else
+						mybuffers = &(fftPlan->intBufferC2R);
+
+					cl_event transZOutEvents = NULL;
+					cl_event transXOutEvents = NULL;
+
+					//First transpose
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planTZ, dir, numQueuesAndEvents, commQueues, numWaitEvents, 
+						waitEvents, &transZOutEvents, clInputBuffers, &localIntBuffer, NULL ),
+						_T("clfftEnqueueTransform for first transpose failed"));
+					
+
+#if defined(DEBUGGING)
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, mybuffers[0], CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+						NULL, NULL ),
+						_T("Reading the result buffer failed") );
+#endif
+
+					//First row
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, dir, numQueuesAndEvents, commQueues, 1, &transZOutEvents, 
+						&rowOutEvents, &localIntBuffer, NULL, NULL ),
+						_T("clfftEnqueueTransform for col failed"));
+					clReleaseEvent(transZOutEvents);
+
+
+#if defined(DEBUGGING)
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, mybuffers[0], CL_TRUE, 0, buffSizeBytes*2, &output2[ 0 ], 0,
+						NULL, NULL ),
+						_T("Reading the result buffer failed") );
+#endif
+
+					//Second transpose
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planTX, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+						&transXOutEvents, &localIntBuffer, mybuffers, NULL ),
+						_T("clfftEnqueueTransform for second transpose failed"));
+							
+
+#if defined(DEBUGGING)
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+						NULL, NULL ),
+						_T("Reading the result buffer failed") );
+#endif
+
+
+					//Second Row transform
+					if(fftPlan->placeness == CLFFT_INPLACE)
+					{
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+							outEvents, clInputBuffers, NULL, NULL ),
+							_T("clfftEnqueueTransform for second row failed"));
+					}
+					else
+					{
+						OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+							outEvents, mybuffers, clOutputBuffers, NULL ),
+							_T("clfftEnqueueTransform for second row failed"));
+					}
+					clReleaseEvent(transXOutEvents);
+#if defined(DEBUGGING)
+					OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+						NULL, NULL ),
+						_T("Reading the result buffer failed") );
+#endif
+
+
 				}
 				else
 				{
-					out_local = clOutputBuffers;
-					int_local = &(fftPlan->intBufferC2R);
-					out_z = int_local;
-				}
+					cl_mem *out_local, *int_local, *out_z;
+
+					if(fftPlan->placeness == CLFFT_INPLACE)
+					{
+						out_local = NULL;
+						int_local = NULL;
+						out_z = clInputBuffers;
+					}
+					else
+					{
+						out_local = clOutputBuffers;
+						int_local = &(fftPlan->intBufferC2R);
+						out_z = int_local;
+					}
 
-				//deal with 1D Z column first
-				OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, numWaitEvents,
-					waitEvents, &rowOutEvents, clInputBuffers, int_local, localIntBuffer ),
-					_T("clfftEnqueueTransform for 3D-Z column failed"));
+					//deal with 1D Z column first
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+						waitEvents, &rowOutEvents, clInputBuffers, int_local, localIntBuffer ),
+						_T("clfftEnqueueTransform for 3D-Z column failed"));
 
-				//deal with 2D row
-				OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
-					outEvents, out_z, out_local, localIntBuffer ),
-					_T("clfftEnqueueTransform for 3D-XY row failed"));
+					//deal with 2D row
+					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+						outEvents, out_z, out_local, localIntBuffer ),
+						_T("clfftEnqueueTransform for 3D-XY row failed"));
+				}
 			}
 			else
 			{

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list