[clfft] 92/107: fixing dev issues

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:41 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 862cb2c7194525ed91c38def1a9953e0e1425c75
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Mon Jun 15 20:30:06 2015 -0500

    fixing dev issues
---
 src/library/generator.transpose.gcn.cpp | 32 ++++++++++++++++++++++-----
 src/library/plan.cpp                    | 39 ++++++++++++++++++++++-----------
 2 files changed, 52 insertions(+), 19 deletions(-)

diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 01afca1..c5f7ebf 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -451,8 +451,10 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 			break;
 		case CLFFT_HERMITIAN_INTERLEAVED:
 		case CLFFT_HERMITIAN_PLANAR:
-		case CLFFT_REAL:
 			return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+		case CLFFT_REAL:
+			clKernWrite( transKernel, 3 ) << "local " << dtPlanar << " lds[ " << ldsSize.x << " ][ " << ldsSize.y << " ];" << std::endl << std::endl;
+			break;
 		}
 
 
@@ -474,12 +476,23 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 			break;
 		case CLFFT_HERMITIAN_INTERLEAVED:
 		case CLFFT_HERMITIAN_PLANAR:
-		case CLFFT_REAL:
 			return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+		case CLFFT_REAL:
+			clKernWrite( transKernel, 3 ) << "global " << dtInput << "* tileIn = " << pmRealIn << " + iOffset;" << std::endl;
+			break;
+			
 		}
 
 		// This is the loop reading through the Tile
-		clKernWrite( transKernel, 3 ) << dtComplex << " tmp;" << std::endl;
+		if( params.fft_inputLayout == CLFFT_REAL )
+		{
+			clKernWrite( transKernel, 3 ) << dtPlanar << " tmp;" << std::endl;
+		}
+		else
+		{
+			clKernWrite( transKernel, 3 ) << dtComplex << " tmp;" << std::endl;
+		}
+
 		clKernWrite( transKernel, 3 ) << "rowSizeinUnits = " << params.fft_inStride[ 1 ] << ";" << std::endl; 
 		clKernWrite( transKernel, 3 ) << std::endl << std::endl;
 
@@ -626,8 +639,11 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				break;
 			case CLFFT_HERMITIAN_INTERLEAVED:
 			case CLFFT_HERMITIAN_PLANAR:
-			case CLFFT_REAL:
 				return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+			case CLFFT_REAL:
+				clKernWrite( transKernel, 9 ) << "tmp = tileIn[ gInd ];" << std::endl;
+				break;
+
 			}
 
 			if(branchingInAny)
@@ -668,8 +684,10 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 			break;
 		case CLFFT_HERMITIAN_INTERLEAVED:
 		case CLFFT_HERMITIAN_PLANAR:
-		case CLFFT_REAL:
 			return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+		case CLFFT_REAL:
+			clKernWrite( transKernel, 3 ) << "global " << dtOutput << "* tileOut = " << pmRealOut << " + oOffset;" << std::endl << std::endl;
+			break;
 		}
 
 		// Write the transposed values from LDS into global memory
@@ -791,8 +809,10 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 				break;
 			case CLFFT_HERMITIAN_INTERLEAVED:
 			case CLFFT_HERMITIAN_PLANAR:
-			case CLFFT_REAL:
 				return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+			case CLFFT_REAL:
+				clKernWrite( transKernel, 9 ) << "tileOut[ gInd ] = tmp;" << std::endl;
+				break;
 			}
 
 			if(branchingInAny)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 1355a6c..5234d72 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -891,6 +891,8 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					lockRAII* trans2Lock	= NULL;
 					OPENCL_V( fftRepo.getPlan( fftPlan->planTY, trans2Plan, trans2Lock ), _T( "fftRepo.getPlan failed" ) );
 
+					trans2Plan->transflag = true;
+
 					size_t transLengths[2];
 					transLengths[0] = 1 + clLengths[1]/2;
 					transLengths[1] = clLengths[0];
@@ -909,11 +911,12 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					trans2Plan->inStride[0]   = 1;
 					trans2Plan->inStride[1]   = 1 + clLengths[1]/2;
 					trans2Plan->outStride[0]  = 1;
-					trans2Plan->outStride[1]  = clLengths[0] + padding;
+					trans2Plan->outStride[1]  = clLengths[0];
 					trans2Plan->iDist         = clLengths[0] * trans2Plan->inStride[1];
-					trans2Plan->oDist         = (1 + clLengths[1]/2) * trans2Plan->outStride[1];
+					trans2Plan->oDist         = fftPlan->oDist;
                     trans2Plan->gen           = Transpose_GCN;
 					trans2Plan->transflag     = true;
+					trans2Plan->transOutHorizontal = true;
 
 					OPENCL_V(clfftBakePlan(fftPlan->planTY, numQueues, commQueueFFT, NULL, NULL ),
 						_T( "BakePlan large1d trans2 plan failed" ) );
@@ -964,6 +967,13 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					lockRAII* trans3Lock	= NULL;
 					OPENCL_V( fftRepo.getPlan( fftPlan->planTZ, trans3Plan, trans3Lock ), _T( "fftRepo.getPlan failed" ) );
 
+					trans3Plan->transflag = true;
+
+					transLengths[0] = 1 + clLengths[0]/2;
+					transLengths[1] = clLengths[1];
+					OPENCL_V(clfftSetPlanLength( fftPlan->planTZ, CLFFT_2D, transLengths ),
+						_T( "clfftSetPlanLength for planTZ transpose failed" ) );
+
 					trans3Plan->placeness     = CLFFT_OUTOFPLACE;
 					trans3Plan->precision     = fftPlan->precision;
 					trans3Plan->tmpBufSize    = 0;
@@ -1707,6 +1717,20 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 		break;
 	case CLFFT_2D:
 		{
+
+			if (fftPlan->transflag) //Transpose for 2D
+			{
+                clfftStatus err;
+				if(fftPlan->gen == Transpose_GCN)
+					fftPlan->action = new FFTGeneratedTransposeGCNAction(plHandle, fftPlan, *commQueueFFT, err);
+				else
+					fftPlan->action = new FFTGeneratedTransposeVLIWAction(plHandle, fftPlan, *commQueueFFT, err);
+                OPENCL_V( err, "FFTGeneratedTransposeVLIWAction failed");
+
+				fftPlan->baked		= true;
+				return	CLFFT_SUCCESS;
+			}
+
 			size_t length0 = fftPlan->length[0];
 			size_t length1 = fftPlan->length[1];
 
@@ -1740,18 +1764,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 			while (1 && (fftPlan->inputLayout != CLFFT_REAL) && (fftPlan->outputLayout != CLFFT_REAL))
 			{
 				//break;
-				if (fftPlan->transflag) //Transpose for 2D
-				{
-                    clfftStatus err;
-					if(fftPlan->gen == Transpose_GCN)
-						fftPlan->action = new FFTGeneratedTransposeGCNAction(plHandle, fftPlan, *commQueueFFT, err);
-					else
-						fftPlan->action = new FFTGeneratedTransposeVLIWAction(plHandle, fftPlan, *commQueueFFT, err);
-                    OPENCL_V( err, "FFTGeneratedTransposeVLIWAction failed");
 
-					fftPlan->baked		= true;
-					return	CLFFT_SUCCESS;
-				}
 
                 // TODO : Check for a better way to do this.
                 bool isnvidia = false;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list