[clfft] 38/74: bug fix for twiddle factor computation in non square kernels.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:15 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.
commit dbd9745e8004ae69473ef4215ebd96c21e92b7d9
Author: santanu-thangaraj <t.santanu at gmail.com>
Date: Wed Dec 9 22:09:10 2015 +0530
bug fix for twiddle factor computation in non square kernels.
---
src/library/generator.transpose.nonsquare.cpp | 4 ++--
src/library/plan.cpp | 14 +++++++-------
2 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/src/library/generator.transpose.nonsquare.cpp b/src/library/generator.transpose.nonsquare.cpp
index bfd926d..52aa64f 100644
--- a/src/library/generator.transpose.nonsquare.cpp
+++ b/src/library/generator.transpose.nonsquare.cpp
@@ -148,12 +148,12 @@ static clfftStatus genTwiddleMath(const FFTKernelGenKeyParams& params, std::stri
if (params.fft_N[0] > params.fft_N[1])
{
clKernWrite(transKernel, 9) << dtComplex << " Wm = TW3step( ("<< params.fft_N[1] <<" * square_matrix_index + t_gx_p*32 + lidx) * (t_gy_p*32 + lidy + loop*8) );" << std::endl;
- clKernWrite(transKernel, 9) << dtComplex << " Wt = TW3step( (t_gy_p*32 + lidx) * ("<< params.fft_N[1] <<" * square_matrix_index + t_gx_p*32 + lidy + loop*8) );" << std::endl;
+ clKernWrite(transKernel, 9) << dtComplex << " Wt = TW3step( ("<< params.fft_N[1] <<" * square_matrix_index + t_gy_p*32 + lidx) * (t_gx_p*32 + lidy + loop*8) );" << std::endl;
}
else
{
clKernWrite(transKernel, 9) << dtComplex << " Wm = TW3step( (t_gx_p*32 + lidx) * (" << params.fft_N[0] << " * square_matrix_index + t_gy_p*32 + lidy + loop*8) );" << std::endl;
- clKernWrite(transKernel, 9) << dtComplex << " Wt = TW3step( (" << params.fft_N[0] << " * square_matrix_index + t_gy_p*32 + lidx) * (t_gx_p*32 + lidy + loop*8) );" << std::endl;
+ clKernWrite(transKernel, 9) << dtComplex << " Wt = TW3step( (t_gy_p*32 + lidx) * (" << params.fft_N[0] << " * square_matrix_index + t_gx_p*32 + lidy + loop*8) );" << std::endl;
}
clKernWrite(transKernel, 9) << dtComplex << " Tm, Tt;" << std::endl;
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index c018e78..9b5f6c4 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -775,7 +775,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans2Plan->oDist = clLengths[1] * trans2Plan->outStride[1];
trans2Plan->gen = transGen;
- if(transGen != Transpose_NONSQUARE)
+ // if(transGen != Transpose_NONSQUARE)
trans2Plan->large1D = fftPlan->length[0];
trans2Plan->transflag = true;
@@ -831,11 +831,11 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
row2Plan->oDist *= fftPlan->length[index];
}
- if (transGen == Transpose_NONSQUARE)
- {
- row2Plan->large1D = fftPlan->length[0];
- row2Plan->twiddleFront = true;
- }
+// if (transGen == Transpose_NONSQUARE)
+// {
+// row2Plan->large1D = fftPlan->length[0];
+// row2Plan->twiddleFront = true;
+// }
OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan large1d second row plan failed" ) );
@@ -1935,7 +1935,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans1Plan->gen = Transpose_NONSQUARE;
trans1Plan->nonSquareKernelType = NON_SQUARE_TRANS_TRANSPOSE;
trans1Plan->transflag = true;
-
+ trans1Plan->large1D = fftPlan->large1D;
for (size_t index = 2; index < fftPlan->length.size(); index++)
{
trans1Plan->length.push_back(fftPlan->length[index]);
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list