[clfft] 93/107: fixing rc issues and adding minor features

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:41 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 797715de64e28d0c165cd12fe97d022f0a1eed49
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Sun Jun 21 21:30:07 2015 -0500

    fixing rc issues and adding minor features
---
 src/library/action.cpp                  | 19 +++++++++++++++++--
 src/library/generator.transpose.gcn.cpp | 22 ++++++++++++++++++++--
 src/library/plan.cpp                    | 16 ++++++++++++++--
 src/library/transform.cpp               |  8 ++++----
 4 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/src/library/action.cpp b/src/library/action.cpp
index b9b9884..e1506ff 100644
--- a/src/library/action.cpp
+++ b/src/library/action.cpp
@@ -486,8 +486,23 @@ clfftStatus FFTAction::selectBufferArguments(FFTPlan * fftPlan,
         }
         default:
         {
-            //	Don't recognize output layout
-            return CLFFT_INVALID_ARG_VALUE;
+			if(fftPlan->transflag)
+			{
+				if( fftPlan->placeness == CLFFT_INPLACE )
+				{
+					return CLFFT_INVALID_ARG_VALUE;
+				}
+				else
+				{
+					inputBuff.push_back( clInputBuffers[ 0 ] );
+					outputBuff.push_back( clOutputBuffers[ 0 ] );
+				}
+			}
+			else
+			{
+				//	Don't recognize output layout
+				return CLFFT_INVALID_ARG_VALUE;
+			}
         }
         }
 
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index c5f7ebf..c4d6e0b 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -784,13 +784,29 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 					if(branchingInGroupX)
 					{
 						clKernWrite( transKernel, 9 ) << std::endl;
-						clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexXEnd << ") )" << std::endl;
+						if(params.fft_realSpecial)
+						{
+							clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexXEnd << ") && (" <<
+								wIndexX << " < 1) )" << std::endl;
+						}
+						else
+						{
+							clKernWrite( transKernel, 9 ) << "if( (" << wIndexY << " < " << wIndexXEnd << ") )" << std::endl;
+						}
 						clKernWrite( transKernel, 9 ) << "{" << std::endl;
 					}
 					else
 					{
 						clKernWrite( transKernel, 9 ) << std::endl;
-						clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << " < " << wIndexYEnd << ") )" << std::endl;
+						if(params.fft_realSpecial)
+						{
+							clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << " < " << wIndexYEnd << ") && (" <<
+								wIndexY << " < 1) )" << std::endl;
+						}
+						else
+						{
+							clKernWrite( transKernel, 9 ) << "if( (" << wIndexX << " < " << wIndexYEnd << ") )" << std::endl;
+						}
 						clKernWrite( transKernel, 9 ) << "{" << std::endl;
 					}
 				}
@@ -848,6 +864,8 @@ clfftStatus FFTGeneratedTransposeGCNAction::initParams ()
     this->signature.fft_outputLayout = this->plan->outputLayout;
     this->signature.fft_3StepTwiddle = false;
 
+	this->signature.fft_realSpecial  = this->plan->realSpecial;
+
 	this->signature.transOutHorizontal = this->plan->transOutHorizontal;	// using the twiddle front flag to specify horizontal write
 														// we do this so as to reuse flags in FFTKernelGenKeyParams
 														// and to avoid making a new one 
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 5234d72..013ade0 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -796,7 +796,8 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 
 				// For real transforms
 				// Special case optimization with 5-step algorithm
-				if( (fftPlan->inputLayout == CLFFT_REAL) && IsPo2(fftPlan->length[0]) )
+				if( (fftPlan->inputLayout == CLFFT_REAL) && IsPo2(fftPlan->length[0])
+					&& (fftPlan->inStride[0] == 1) && (fftPlan->outStride[0] == 1) )
 				{
 					if (fftPlan->length.size() > 1) break;
 					if (fftPlan->inStride[0] != 1 || fftPlan->outStride[0] != 1) break;
@@ -814,6 +815,16 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					{
 						fftPlan->tmpBufSize = (smallerDim + padding) * biggerDim *
 							fftPlan->batchsize * fftPlan->ElementSize() / 2;
+
+						for (size_t index=1; index < fftPlan->length.size(); index++)
+						{
+							fftPlan->tmpBufSizeRC *= fftPlan->length[index];
+						}
+					}
+
+					if (fftPlan->tmpBufSizeRC==0 )
+					{
+						fftPlan->tmpBufSizeRC = fftPlan->tmpBufSize;
 					}
 
 					//Transpose
@@ -947,7 +958,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					row2Plan->outputLayout  = CLFFT_COMPLEX_INTERLEAVED;
 					row2Plan->inStride[0]   = 1;
 					row2Plan->outStride[0]  = 1;
-					row2Plan->inStride.push_back(clLengths[0] + padding);
+					row2Plan->inStride.push_back(clLengths[0]);
 					row2Plan->outStride.push_back(1 + clLengths[0]/2);
 					row2Plan->iDist         = (1 + clLengths[1]/2) * row2Plan->inStride[1];
 					row2Plan->oDist         = clLengths[1] * row2Plan->outStride[1];
@@ -989,6 +1000,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					trans3Plan->oDist         = fftPlan->oDist;
                     trans3Plan->gen           = Transpose_GCN;
 					trans3Plan->transflag     = true;
+					trans3Plan->realSpecial	  = true;
 					trans3Plan->transOutHorizontal = true;
 
 					OPENCL_V(clfftBakePlan(fftPlan->planTZ, numQueues, commQueueFFT, NULL, NULL ),
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index f9f9b2c..29cdc46 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -157,7 +157,7 @@ clfftStatus clfftEnqueueTransform(
 					//tmp->output
 					cl_event rowXOutEvents = NULL;
 					OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, 1,
-						&transTXOutEvents, &rowXOutEvents, &localIntBuffer, mybuffers, NULL ),
+						&transTXOutEvents, &rowXOutEvents, &localIntBuffer, &(fftPlan->intBufferRC), NULL ),
 						_T("clfftEnqueueTransform for large1D rowX failed"));
 					clReleaseEvent(transTXOutEvents);
 
@@ -175,7 +175,7 @@ clfftStatus clfftEnqueueTransform(
 					// output->tmp
 					cl_event transTYOutEvents = NULL;
 					OPENCL_V( clfftEnqueueTransform( fftPlan->planTY, dir, numQueuesAndEvents, commQueues, 1,
-						&rowXOutEvents, &transTYOutEvents, mybuffers, &localIntBuffer, NULL ),
+						&rowXOutEvents, &transTYOutEvents, &(fftPlan->intBufferRC), &localIntBuffer, NULL ),
 						_T("clfftEnqueueTransform for large1D transTY failed"));
 					clReleaseEvent(rowXOutEvents);
 
@@ -193,7 +193,7 @@ clfftStatus clfftEnqueueTransform(
 					//tmp->tmp, inplace
 					cl_event rowYOutEvents = NULL;
 					OPENCL_V( clfftEnqueueTransform( fftPlan->planY, dir, numQueuesAndEvents, commQueues, 1,
-						&transTYOutEvents, &rowYOutEvents, &localIntBuffer, NULL, NULL ),
+						&transTYOutEvents, &rowYOutEvents, &localIntBuffer, &(fftPlan->intBufferRC), NULL ),
 						_T("clfftEnqueueTransform for large1D rowY failed"));
 					clReleaseEvent(transTYOutEvents);
 
@@ -209,7 +209,7 @@ clfftStatus clfftEnqueueTransform(
 					//Third Transpose
 					// tmp->output
 					OPENCL_V( clfftEnqueueTransform( fftPlan->planTZ, dir, numQueuesAndEvents, commQueues, 1,
-						&rowYOutEvents, outEvents, &localIntBuffer, mybuffers, NULL ),
+						&rowYOutEvents, outEvents, &(fftPlan->intBufferRC), mybuffers, NULL ),
 						_T("clfftEnqueueTransform for large1D transTZ failed"));
 					clReleaseEvent(rowYOutEvents);
 			}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list