[clfft] 12/32: bug fix. passed gtest *huge_1D*, *huge_sp_test* and *huge_dp_test*
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Apr 26 08:34:09 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit de26d9a9e9063aef6df501bceca0379ce7b86563
Author: Timmy <timmy.liu at amd.com>
Date: Tue Mar 22 10:00:46 2016 -0500
bug fix. passed gtest *huge_1D*, *huge_sp_test* and *huge_dp_test*
---
src/library/action.transpose.cpp | 4 +---
src/library/plan.cpp | 27 +++++++++++++++++++++------
2 files changed, 22 insertions(+), 9 deletions(-)
diff --git a/src/library/action.transpose.cpp b/src/library/action.transpose.cpp
index 2a897bc..82789b8 100644
--- a/src/library/action.transpose.cpp
+++ b/src/library/action.transpose.cpp
@@ -135,8 +135,7 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::initParams()
}
this->signature.fft_DataDim = this->plan->length.size() + 1;
- //if (this->plan->length.back() == 729 && this->plan->length.size() > 2)//silly Timmy delete
- // this->signature.fft_DataDim--;
+
int i = 0;
for (i = 0; i < (this->signature.fft_DataDim - 1); i++)
{
@@ -700,7 +699,6 @@ clfftStatus FFTGeneratedTransposeSquareAction::generateKernel(FFTRepo& fftRepo,
std::string programCode;
OPENCL_V(clfft_transpose_generator::genTransposeKernelBatched(this->signature, programCode, lwSize, reShapeFactor), _T("GenerateTransposeKernel() failed!"));
- //std::cout << programCode << std::endl;//TIMMY
cl_int status = CL_SUCCESS;
cl_device_id Device = NULL;
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 6091d98..8e951a3 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -614,9 +614,11 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
clLengths[1] = 100;
clLengths[0] = fftPlan->length[0]/clLengths[1];
- //timmy delete ensure clLengths[0] > clLengths[1] only when inplace is enabled
+ //timmy ensure clLengths[0] > clLengths[1] only when inplace is enabled
+ //so that swap kernel is launched after the square transpose kernel since twiddling is only enabled as the second kernel
if (clLengths[0] < clLengths[1] && clfftGetRequestLibNoMemAlloc() && fftPlan->placeness == CLFFT_INPLACE)
{
+ std::cout << "switch lengths" << std::endl;
size_t temp = clLengths[0];
clLengths[0] = clLengths[1];
clLengths[1] = temp;
@@ -712,7 +714,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans1Plan->gen = transGen;
trans1Plan->transflag = true;
- if (trans1Plan->gen == Transpose_NONSQUARE || 1)
+ if (trans1Plan->gen == Transpose_NONSQUARE || trans1Plan->gen == Transpose_SQUARE)// inplace transpose
{
for (size_t index = 1; index < fftPlan->length.size(); index++)
{
@@ -828,7 +830,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans2Plan->transflag = true;
- if (trans2Plan->gen == Transpose_NONSQUARE || 1)// TIMMY delete
+ if (trans2Plan->gen == Transpose_NONSQUARE || trans2Plan->gen == Transpose_SQUARE)// inplace transpose
{
for (size_t index = 1; index < fftPlan->length.size(); index++)
{
@@ -937,7 +939,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans3Plan->transOutHorizontal = true;
- if (trans3Plan->gen == Transpose_NONSQUARE || 1)
+ if (trans3Plan->gen == Transpose_NONSQUARE)// inplace transpose
{
for (size_t index = 1; index < fftPlan->length.size(); index++)
{
@@ -950,12 +952,24 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
*/
trans3Plan->batchsize = trans3Plan->batchsize * fftPlan->length[index];
trans3Plan->iDist = trans3Plan->iDist / fftPlan->length[index];
- //trans3Plan->inStride.push_back(trans3Plan->iDist);//Timmy for square
+ //trans3Plan->inStride.push_back(trans3Plan->iDist);
trans3Plan->inStride.push_back(fftPlan->inStride[index]);
trans3Plan->iDist *= fftPlan->length[index];
trans3Plan->outStride.push_back(fftPlan->outStride[index]);
}
}
+ else if (trans3Plan->gen == Transpose_SQUARE)
+ {
+ for (size_t index = 1; index < fftPlan->length.size(); index++)
+ {
+ trans3Plan->batchsize = trans3Plan->batchsize * fftPlan->length[index];
+ //trans3Plan->iDist = trans3Plan->iDist / fftPlan->length[index];
+ //trans3Plan->inStride.push_back(trans3Plan->iDist);
+ trans3Plan->inStride.push_back(fftPlan->inStride[index]);
+ //trans3Plan->iDist *= fftPlan->length[index];
+ trans3Plan->outStride.push_back(fftPlan->outStride[index]);
+ }
+ }
else
{
for (size_t index = 1; index < fftPlan->length.size(); index++)
@@ -2076,6 +2090,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
if (clLengths[0] > clLengths[1] && fftPlan->large1D == 0)
{
//twidding can be done in swap when swap is the second kernel for now
+ //TODO enable twiddling in swap here as well
currKernelOrder = SWAP_AND_TRANSPOSE;
}
else
@@ -2093,7 +2108,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
}
//if the original input data is more than 1d only TRANSPOSE_LEADING_AND_SWAP order is supported
//TODO need to fix this here. related to multi dim batch size.
- //if (fftPlan->length.size() > 2) //Timmy test
+ //if (fftPlan->length.size() > 2)
// currKernelOrder = TRANSPOSE_LEADING_AND_SWAP;
std::cout << "currKernelOrder = " << currKernelOrder << std::endl;
//ends tranpose kernel order
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list