[clfft] 87/107: updating 3D real backward
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jul 30 18:06:40 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 23e811f48723ed2aea1e219fd5b02680972d17ff
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Sat Apr 25 19:42:07 2015 -0500
updating 3D real backward
---
src/library/plan.cpp | 432 +++++++++++++++++++++++++++++++++++-----------
src/library/transform.cpp | 114 ++++++++++--
2 files changed, 424 insertions(+), 122 deletions(-)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 760da7b..58eb619 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -2685,7 +2685,8 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
- if( (xyPlan->outStride[2] == Nt*length1) &&
+ if( (xyPlan->inStride[0] == 1) && (xyPlan->outStride[0] == 1) &&
+ (xyPlan->outStride[2] == Nt*length1) &&
( ((xyPlan->inStride[2] == Nt*2*length1) && (xyPlan->placeness == CLFFT_INPLACE)) ||
((xyPlan->inStride[2] == length0*length1) && (xyPlan->placeness == CLFFT_OUTOFPLACE)) ) )
{
@@ -2880,8 +2881,6 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
_T( "BakePlan for planTY failed" ) );
- fftPlan->baked = true;
- return CLFFT_SUCCESS;
}
else
{
@@ -2969,161 +2968,388 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
fftPlan->tmpBufSizeC2R = fftPlan->tmpBufSize;
}
- size_t clLengths[] = { 1, 0, 0 };
+ if( (fftPlan->inStride[0] == 1) && (fftPlan->outStride[0] == 1) &&
+ ( ((fftPlan->outStride[2] == Nt*2*length1) && (fftPlan->placeness == CLFFT_INPLACE)) ||
+ ((fftPlan->outStride[2] == length0*length1) && (fftPlan->placeness == CLFFT_OUTOFPLACE)) )
+ && (fftPlan->inStride[2] == Nt*length1) )
+ {
+ // create first transpose plan
+
+ //Transpose
+ // input --> tmp
+ size_t transLengths[2] = { length0*length1, length2 };
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTZ, fftPlan->context, CLFFT_2D, transLengths ),
+ _T( "CreateDefaultPlan for planTZ transpose failed" ) );
- clLengths[0] = fftPlan->length[ DimZ ];
+ FFTPlan* trans1Plan = NULL;
+ lockRAII* trans1Lock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planTZ, trans1Plan, trans1Lock ), _T( "fftRepo.getPlan failed" ) );
- //create 1D col plan
- OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
- _T( "CreateDefaultPlan for planZ failed" ) );
+ trans1Plan->transflag = true;
- FFTPlan* colPlan = NULL;
- lockRAII* colLock = NULL;
- OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+ transLengths[0] = Nt*length1;
+ OPENCL_V(clfftSetPlanLength( fftPlan->planTZ, CLFFT_2D, transLengths ),
+ _T( "clfftSetPlanLength for planTZ transpose failed" ) );
- switch(fftPlan->inputLayout)
- {
- case CLFFT_HERMITIAN_INTERLEAVED:
+ switch(fftPlan->inputLayout)
{
- colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
- colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ case CLFFT_HERMITIAN_INTERLEAVED:
+ {
+ trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ trans1Plan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ }
+ break;
+ case CLFFT_HERMITIAN_PLANAR:
+ {
+ trans1Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ trans1Plan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ }
+ break;
+ default: assert(false);
}
- break;
- case CLFFT_HERMITIAN_PLANAR:
+
+ trans1Plan->placeness = CLFFT_OUTOFPLACE;
+ trans1Plan->precision = fftPlan->precision;
+ trans1Plan->tmpBufSize = 0;
+ trans1Plan->batchsize = fftPlan->batchsize;
+ trans1Plan->envelope = fftPlan->envelope;
+ trans1Plan->forwardScale = 1.0f;
+ trans1Plan->backwardScale = 1.0f;
+
+ trans1Plan->inStride[0] = 1;
+ trans1Plan->inStride[1] = Nt*length1;
+ trans1Plan->outStride[0] = 1;
+ trans1Plan->outStride[1] = length2;
+ trans1Plan->iDist = fftPlan->iDist;
+ trans1Plan->oDist = Nt*length1*length2;
+ trans1Plan->transOutHorizontal = true;
+
+ trans1Plan->gen = Transpose_GCN;
+
+
+ for (size_t index=3; index < fftPlan->length.size(); index++)
{
- colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
- colPlan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ trans1Plan->length.push_back(fftPlan->length[index]);
+ trans1Plan->inStride.push_back(fftPlan->inStride[index]);
+ trans1Plan->outStride.push_back(trans1Plan->oDist);
+ trans1Plan->oDist *= fftPlan->length[index];
}
- break;
- default: assert(false);
- }
- colPlan->length.push_back(Nt);
- colPlan->length.push_back(length1);
+ OPENCL_V(clfftBakePlan(fftPlan->planTZ, numQueues, commQueueFFT, NULL, NULL ),
+ _T( "BakePlan for planTZ failed" ) );
- colPlan->inStride[0] = fftPlan->inStride[2];
- colPlan->inStride.push_back(fftPlan->inStride[0]);
- colPlan->inStride.push_back(fftPlan->inStride[1]);
- colPlan->iDist = fftPlan->iDist;
+ // create col plan
+ // complex to complex
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, &fftPlan->length[ DimZ ] ),
+ _T( "CreateDefaultPlan for planZ failed" ) );
+
+ FFTPlan* colPlan = NULL;
+ lockRAII* colLock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+ colPlan->length.push_back(Nt*length1);
+
+ colPlan->inStride[0] = 1;
+ colPlan->inStride.push_back(length2);
+ colPlan->iDist = trans1Plan->oDist;
- if (fftPlan->placeness == CLFFT_INPLACE)
- {
colPlan->placeness = CLFFT_INPLACE;
+ colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
colPlan->outStride[0] = colPlan->inStride[0];
colPlan->outStride.push_back(colPlan->inStride[1]);
- colPlan->outStride.push_back(colPlan->inStride[2]);
colPlan->oDist = colPlan->iDist;
for (size_t index=3; index < fftPlan->length.size(); index++)
{
colPlan->length.push_back(fftPlan->length[index]);
- colPlan->inStride.push_back(fftPlan->inStride[index]);
- colPlan->outStride.push_back(fftPlan->inStride[index]);
+ colPlan->inStride.push_back(trans1Plan->outStride[index-1]);
+ colPlan->outStride.push_back(trans1Plan->outStride[index-1]);
+ }
+
+
+ colPlan->precision = fftPlan->precision;
+ colPlan->forwardScale = 1.0f;
+ colPlan->backwardScale = 1.0f;
+ colPlan->tmpBufSize = 0;
+
+ colPlan->gen = fftPlan->gen;
+ colPlan->envelope = fftPlan->envelope;
+
+ colPlan->batchsize = fftPlan->batchsize;
+
+ OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planZ failed" ) );
+
+ // create second transpose plan
+
+ //Transpose
+ //tmp --> output
+ size_t trans2Lengths[2] = { length2, length0*length1 };
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planTX, fftPlan->context, CLFFT_2D, trans2Lengths ),
+ _T( "CreateDefaultPlan for planTX transpose failed" ) );
+
+ FFTPlan* trans2Plan = NULL;
+ lockRAII* trans2Lock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planTX, trans2Plan, trans2Lock ), _T( "fftRepo.getPlan failed" ) );
+
+ trans2Plan->transflag = true;
+
+ trans2Lengths[1] = Nt*length1;
+ OPENCL_V(clfftSetPlanLength( fftPlan->planTX, CLFFT_2D, trans2Lengths ),
+ _T( "clfftSetPlanLength for planTX transpose failed" ) );
+
+
+ trans2Plan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ trans2Plan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+
+
+ trans2Plan->placeness = CLFFT_OUTOFPLACE;
+ trans2Plan->precision = fftPlan->precision;
+ trans2Plan->tmpBufSize = 0;
+ trans2Plan->batchsize = fftPlan->batchsize;
+ trans2Plan->envelope = fftPlan->envelope;
+ trans2Plan->forwardScale = 1.0f;
+ trans2Plan->backwardScale = 1.0f;
+
+ trans2Plan->inStride[0] = 1;
+ trans2Plan->inStride[1] = length2;
+ trans2Plan->outStride[0] = 1;
+ trans2Plan->outStride[1] = Nt*length1;
+ trans2Plan->iDist = colPlan->oDist;
+ trans2Plan->oDist = Nt*length1*length2;
+
+ trans2Plan->gen = Transpose_GCN;
+ trans2Plan->transflag = true;
+
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ trans2Plan->length.push_back(fftPlan->length[index]);
+ trans2Plan->inStride.push_back(colPlan->outStride[index-1]);
+ trans2Plan->outStride.push_back(trans2Plan->oDist);
+ trans2Plan->oDist *= fftPlan->length[index];
+
}
+
+ OPENCL_V(clfftBakePlan(fftPlan->planTX, numQueues, commQueueFFT, NULL, NULL ),
+ _T( "BakePlan for planTX failed" ) );
+
+ // create row plan
+ // hermitian to real
+
+ //create 2D xy plan
+ size_t clLengths[] = { length0, length1, 0 };
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
+ _T( "CreateDefaultPlan for 2D planX failed" ) );
+
+ FFTPlan* rowPlan = NULL;
+ lockRAII* rowLock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planX, rowPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+
+ rowPlan->outputLayout = fftPlan->outputLayout;
+ rowPlan->inputLayout = CLFFT_HERMITIAN_INTERLEAVED;
+
+ rowPlan->length.push_back(length2);
+
+ rowPlan->outStride[0] = fftPlan->outStride[0];
+ rowPlan->outStride[1] = fftPlan->outStride[1];
+ rowPlan->outStride.push_back(fftPlan->outStride[2]);
+ rowPlan->oDist = fftPlan->oDist;
+
+ rowPlan->inStride[0] = trans2Plan->outStride[0];
+ rowPlan->inStride[1] = Nt;
+ rowPlan->inStride.push_back(Nt*length1);
+ rowPlan->iDist = trans2Plan->oDist;
+
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ rowPlan->length.push_back(fftPlan->length[index]);
+ rowPlan->inStride.push_back(trans2Plan->outStride[index-1]);
+ rowPlan->outStride.push_back(fftPlan->outStride[index]);
+ }
+
+ if (fftPlan->placeness == CLFFT_INPLACE)
+ {
+ rowPlan->placeness = CLFFT_INPLACE;
+ }
+ else
+ {
+ rowPlan->placeness = CLFFT_OUTOFPLACE;
+ }
+
+
+ rowPlan->precision = fftPlan->precision;
+ rowPlan->forwardScale = fftPlan->forwardScale;
+ rowPlan->backwardScale = fftPlan->backwardScale;
+ rowPlan->tmpBufSize = 0;
+
+ rowPlan->gen = fftPlan->gen;
+ rowPlan->envelope = fftPlan->envelope;
+
+ rowPlan->batchsize = fftPlan->batchsize;
+
+ OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan for planX failed" ) );
}
else
{
- colPlan->placeness = CLFFT_OUTOFPLACE;
- colPlan->outStride[0] = Nt*length1;
- colPlan->outStride.push_back(1);
- colPlan->outStride.push_back(Nt);
- colPlan->oDist = Nt*length1*length2;
+ size_t clLengths[] = { 1, 0, 0 };
- for (size_t index=3; index < fftPlan->length.size(); index++)
+ clLengths[0] = fftPlan->length[ DimZ ];
+
+ //create 1D col plan
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planZ, fftPlan->context, CLFFT_1D, clLengths ),
+ _T( "CreateDefaultPlan for planZ failed" ) );
+
+ FFTPlan* colPlan = NULL;
+ lockRAII* colLock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planZ, colPlan, colLock ), _T( "fftRepo.getPlan failed" ) );
+
+ switch(fftPlan->inputLayout)
{
- colPlan->length.push_back(fftPlan->length[index]);
- colPlan->inStride.push_back(fftPlan->inStride[index]);
- colPlan->outStride.push_back(colPlan->oDist);
- colPlan->oDist *= fftPlan->length[index];
+ case CLFFT_HERMITIAN_INTERLEAVED:
+ {
+ colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ colPlan->inputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ }
+ break;
+ case CLFFT_HERMITIAN_PLANAR:
+ {
+ colPlan->outputLayout = CLFFT_COMPLEX_INTERLEAVED;
+ colPlan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ }
+ break;
+ default: assert(false);
+ }
+
+ colPlan->length.push_back(Nt);
+ colPlan->length.push_back(length1);
+
+ colPlan->inStride[0] = fftPlan->inStride[2];
+ colPlan->inStride.push_back(fftPlan->inStride[0]);
+ colPlan->inStride.push_back(fftPlan->inStride[1]);
+ colPlan->iDist = fftPlan->iDist;
+
+
+ if (fftPlan->placeness == CLFFT_INPLACE)
+ {
+ colPlan->placeness = CLFFT_INPLACE;
+
+ colPlan->outStride[0] = colPlan->inStride[0];
+ colPlan->outStride.push_back(colPlan->inStride[1]);
+ colPlan->outStride.push_back(colPlan->inStride[2]);
+ colPlan->oDist = colPlan->iDist;
+
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->inStride.push_back(fftPlan->inStride[index]);
+ colPlan->outStride.push_back(fftPlan->inStride[index]);
+ }
+ }
+ else
+ {
+ colPlan->placeness = CLFFT_OUTOFPLACE;
+
+ colPlan->outStride[0] = Nt*length1;
+ colPlan->outStride.push_back(1);
+ colPlan->outStride.push_back(Nt);
+ colPlan->oDist = Nt*length1*length2;
+
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ colPlan->length.push_back(fftPlan->length[index]);
+ colPlan->inStride.push_back(fftPlan->inStride[index]);
+ colPlan->outStride.push_back(colPlan->oDist);
+ colPlan->oDist *= fftPlan->length[index];
+ }
}
- }
- colPlan->precision = fftPlan->precision;
- colPlan->forwardScale = 1.0f;
- colPlan->backwardScale = 1.0f;
- colPlan->tmpBufSize = 0;
+ colPlan->precision = fftPlan->precision;
+ colPlan->forwardScale = 1.0f;
+ colPlan->backwardScale = 1.0f;
+ colPlan->tmpBufSize = 0;
- colPlan->gen = fftPlan->gen;
- colPlan->envelope = fftPlan->envelope;
+ colPlan->gen = fftPlan->gen;
+ colPlan->envelope = fftPlan->envelope;
- colPlan->batchsize = fftPlan->batchsize;
+ colPlan->batchsize = fftPlan->batchsize;
- OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
+ OPENCL_V(clfftBakePlan(fftPlan->planZ, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->1D planZ failed" ) );
- clLengths[0] = fftPlan->length[ DimX ];
- clLengths[1] = fftPlan->length[ DimY ];
+ clLengths[0] = fftPlan->length[ DimX ];
+ clLengths[1] = fftPlan->length[ DimY ];
- //create 2D xy plan
- OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
- _T( "CreateDefaultPlan 2D planX failed" ) );
+ //create 2D xy plan
+ OPENCL_V(clfftCreateDefaultPlanInternal( &fftPlan->planX, fftPlan->context, CLFFT_2D, clLengths ),
+ _T( "CreateDefaultPlan 2D planX failed" ) );
- FFTPlan* xyPlan = NULL;
- lockRAII* rowLock = NULL;
- OPENCL_V( fftRepo.getPlan( fftPlan->planX, xyPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
+ FFTPlan* xyPlan = NULL;
+ lockRAII* rowLock = NULL;
+ OPENCL_V( fftRepo.getPlan( fftPlan->planX, xyPlan, rowLock ), _T( "fftRepo.getPlan failed" ) );
- xyPlan->inputLayout = CLFFT_HERMITIAN_INTERLEAVED;
- xyPlan->outputLayout = fftPlan->outputLayout;
+ xyPlan->inputLayout = CLFFT_HERMITIAN_INTERLEAVED;
+ xyPlan->outputLayout = fftPlan->outputLayout;
- xyPlan->length.push_back(length2);
+ xyPlan->length.push_back(length2);
- xyPlan->outStride[0] = fftPlan->outStride[0];
- xyPlan->outStride[1] = fftPlan->outStride[1];
- xyPlan->outStride.push_back(fftPlan->outStride[2]);
- xyPlan->oDist = fftPlan->oDist;
+ xyPlan->outStride[0] = fftPlan->outStride[0];
+ xyPlan->outStride[1] = fftPlan->outStride[1];
+ xyPlan->outStride.push_back(fftPlan->outStride[2]);
+ xyPlan->oDist = fftPlan->oDist;
- if (fftPlan->placeness == CLFFT_INPLACE)
- {
- xyPlan->placeness = CLFFT_INPLACE;
+ if (fftPlan->placeness == CLFFT_INPLACE)
+ {
+ xyPlan->placeness = CLFFT_INPLACE;
- xyPlan->inStride[0] = colPlan->outStride[1];
- xyPlan->inStride[1] = colPlan->outStride[2];
- xyPlan->inStride.push_back(colPlan->outStride[0]);
- xyPlan->iDist = colPlan->oDist;
+ xyPlan->inStride[0] = colPlan->outStride[1];
+ xyPlan->inStride[1] = colPlan->outStride[2];
+ xyPlan->inStride.push_back(colPlan->outStride[0]);
+ xyPlan->iDist = colPlan->oDist;
- for (size_t index=3; index < fftPlan->length.size(); index++)
- {
- xyPlan->length.push_back(fftPlan->length[index]);
- xyPlan->inStride.push_back(colPlan->outStride[index]);
- xyPlan->outStride.push_back(fftPlan->outStride[index]);
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ xyPlan->length.push_back(fftPlan->length[index]);
+ xyPlan->inStride.push_back(colPlan->outStride[index]);
+ xyPlan->outStride.push_back(fftPlan->outStride[index]);
+ }
}
- }
- else
- {
- xyPlan->placeness = CLFFT_OUTOFPLACE;
+ else
+ {
+ xyPlan->placeness = CLFFT_OUTOFPLACE;
- xyPlan->inStride[0] = 1;
- xyPlan->inStride[1] = Nt;
- xyPlan->inStride.push_back(Nt*length1);
- xyPlan->iDist = Nt*length1*length2;
+ xyPlan->inStride[0] = 1;
+ xyPlan->inStride[1] = Nt;
+ xyPlan->inStride.push_back(Nt*length1);
+ xyPlan->iDist = Nt*length1*length2;
- for (size_t index=3; index < fftPlan->length.size(); index++)
- {
- xyPlan->length.push_back(fftPlan->length[index]);
- xyPlan->outStride.push_back(fftPlan->outStride[index]);
- xyPlan->inStride.push_back(xyPlan->iDist);
- xyPlan->iDist *= fftPlan->length[index];
+ for (size_t index=3; index < fftPlan->length.size(); index++)
+ {
+ xyPlan->length.push_back(fftPlan->length[index]);
+ xyPlan->outStride.push_back(fftPlan->outStride[index]);
+ xyPlan->inStride.push_back(xyPlan->iDist);
+ xyPlan->iDist *= fftPlan->length[index];
+ }
}
- }
- xyPlan->precision = fftPlan->precision;
- xyPlan->forwardScale = fftPlan->forwardScale;
- xyPlan->backwardScale = fftPlan->backwardScale;
- xyPlan->tmpBufSize = fftPlan->tmpBufSize;
+ xyPlan->precision = fftPlan->precision;
+ xyPlan->forwardScale = fftPlan->forwardScale;
+ xyPlan->backwardScale = fftPlan->backwardScale;
+ xyPlan->tmpBufSize = fftPlan->tmpBufSize;
- xyPlan->gen = fftPlan->gen;
- xyPlan->envelope = fftPlan->envelope;
+ xyPlan->gen = fftPlan->gen;
+ xyPlan->envelope = fftPlan->envelope;
- xyPlan->batchsize = fftPlan->batchsize;
+ xyPlan->batchsize = fftPlan->batchsize;
- OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
+ OPENCL_V(clfftBakePlan(fftPlan->planX, numQueues, commQueueFFT, NULL, NULL ), _T( "BakePlan 3D->2D planX failed" ) );
+ }
}
else
{
diff --git a/src/library/transform.cpp b/src/library/transform.cpp
index 9680fc2..0ce9288 100644
--- a/src/library/transform.cpp
+++ b/src/library/transform.cpp
@@ -744,7 +744,8 @@ clfftStatus clfftEnqueueTransform(
{
cl_mem *mybuffers;
- if (fftPlan->placeness==CLFFT_INPLACE)
+ if ( (fftPlan->placeness==CLFFT_INPLACE) ||
+ ((fftPlan->placeness==CLFFT_OUTOFPLACE) && (fftPlan->length.size() > 2)) )
mybuffers = clInputBuffers;
else
mybuffers = &(fftPlan->intBufferC2R);
@@ -988,30 +989,105 @@ clfftStatus clfftEnqueueTransform(
}
else if(fftPlan->outputLayout == CLFFT_REAL)
{
- cl_mem *out_local, *int_local, *out_z;
-
- if(fftPlan->placeness == CLFFT_INPLACE)
+ if(fftPlan->planTZ)
{
- out_local = NULL;
- int_local = NULL;
- out_z = clInputBuffers;
+ cl_mem *mybuffers;
+
+ if (fftPlan->placeness==CLFFT_INPLACE)
+ mybuffers = clInputBuffers;
+ else
+ mybuffers = &(fftPlan->intBufferC2R);
+
+ cl_event transZOutEvents = NULL;
+ cl_event transXOutEvents = NULL;
+
+ //First transpose
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planTZ, dir, numQueuesAndEvents, commQueues, numWaitEvents,
+ waitEvents, &transZOutEvents, clInputBuffers, &localIntBuffer, NULL ),
+ _T("clfftEnqueueTransform for first transpose failed"));
+
+
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, mybuffers[0], CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+ //First row
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, dir, numQueuesAndEvents, commQueues, 1, &transZOutEvents,
+ &rowOutEvents, &localIntBuffer, NULL, NULL ),
+ _T("clfftEnqueueTransform for col failed"));
+ clReleaseEvent(transZOutEvents);
+
+
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, mybuffers[0], CL_TRUE, 0, buffSizeBytes*2, &output2[ 0 ], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+ //Second transpose
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planTX, dir, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+ &transXOutEvents, &localIntBuffer, mybuffers, NULL ),
+ _T("clfftEnqueueTransform for second transpose failed"));
+
+
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+
+ //Second Row transform
+ if(fftPlan->placeness == CLFFT_INPLACE)
+ {
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+ outEvents, clInputBuffers, NULL, NULL ),
+ _T("clfftEnqueueTransform for second row failed"));
+ }
+ else
+ {
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planX, dir, numQueuesAndEvents, commQueues, 1, &transXOutEvents,
+ outEvents, mybuffers, clOutputBuffers, NULL ),
+ _T("clfftEnqueueTransform for second row failed"));
+ }
+ clReleaseEvent(transXOutEvents);
+#if defined(DEBUGGING)
+ OPENCL_V( clEnqueueReadBuffer( *commQueues, localIntBuffer, CL_TRUE, 0, buffSizeBytes*2, &output2[0], 0,
+ NULL, NULL ),
+ _T("Reading the result buffer failed") );
+#endif
+
+
}
else
{
- out_local = clOutputBuffers;
- int_local = &(fftPlan->intBufferC2R);
- out_z = int_local;
- }
+ cl_mem *out_local, *int_local, *out_z;
+
+ if(fftPlan->placeness == CLFFT_INPLACE)
+ {
+ out_local = NULL;
+ int_local = NULL;
+ out_z = clInputBuffers;
+ }
+ else
+ {
+ out_local = clOutputBuffers;
+ int_local = &(fftPlan->intBufferC2R);
+ out_z = int_local;
+ }
- //deal with 1D Z column first
- OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, numWaitEvents,
- waitEvents, &rowOutEvents, clInputBuffers, int_local, localIntBuffer ),
- _T("clfftEnqueueTransform for 3D-Z column failed"));
+ //deal with 1D Z column first
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planZ, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, numWaitEvents,
+ waitEvents, &rowOutEvents, clInputBuffers, int_local, localIntBuffer ),
+ _T("clfftEnqueueTransform for 3D-Z column failed"));
- //deal with 2D row
- OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
- outEvents, out_z, out_local, localIntBuffer ),
- _T("clfftEnqueueTransform for 3D-XY row failed"));
+ //deal with 2D row
+ OPENCL_V( clfftEnqueueTransform( fftPlan->planX, CLFFT_BACKWARD, numQueuesAndEvents, commQueues, 1, &rowOutEvents,
+ outEvents, out_z, out_local, localIntBuffer ),
+ _T("clfftEnqueueTransform for 3D-XY row failed"));
+ }
}
else
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list