[clfft] 08/32: add twiddling to general swap kernels.(planar and forward only for now. need to add interleaved and backward.). extended size supported for inplace transpose.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Apr 26 08:34:08 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit ca209c20d92adc6b05d9c65f6dd29c306c4a421f
Author: Timmy <timmy.liu at amd.com>
Date: Wed Mar 16 14:26:40 2016 -0600
add twiddling to general swap kernels.(planar and forward only for now. need to add interleaved and backward.). extended size supported for inplace transpose.
---
src/library/action.transpose.cpp | 28 ++++++++++++++++++++++++++--
src/library/generator.transpose.cpp | 1 -
src/library/plan.cpp | 24 +++++++++++++++---------
src/tests/accuracy_test_pow3.cpp | 10 ++++++++++
4 files changed, 51 insertions(+), 12 deletions(-)
diff --git a/src/library/action.transpose.cpp b/src/library/action.transpose.cpp
index ce657bd..ba2bca0 100644
--- a/src/library/action.transpose.cpp
+++ b/src/library/action.transpose.cpp
@@ -135,6 +135,8 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::initParams()
}
this->signature.fft_DataDim = this->plan->length.size() + 1;
+ //if (this->plan->length.back() == 729 && this->plan->length.size() > 2)//silly Timmy delete
+ // this->signature.fft_DataDim--;
int i = 0;
for (i = 0; i < (this->signature.fft_DataDim - 1); i++)
{
@@ -336,8 +338,7 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::getWorkSizes(std::vector< size
size_t dim_ratio = bigger_dim / smaller_dim;
size_t global_item_size;
- if (this->signature.nonSquareKernelType == NON_SQUARE_TRANS_TRANSPOSE_BATCHED_LEADING
- || this->signature.nonSquareKernelType == NON_SQUARE_TRANS_TRANSPOSE_BATCHED)
+ if (this->signature.nonSquareKernelType == NON_SQUARE_TRANS_TRANSPOSE_BATCHED_LEADING)
{
std::cout << "TIMMY"<< std::endl;
if (smaller_dim % (16 * reShapeFactor) == 0)
@@ -363,6 +364,29 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::getWorkSizes(std::vector< size
localWS.clear();
localWS.push_back(lwSize);
}
+ else if (this->signature.nonSquareKernelType == NON_SQUARE_TRANS_TRANSPOSE_BATCHED)
+ {
+ std::cout << "TIMMY" << std::endl;
+ if (smaller_dim % (16 * reShapeFactor) == 0)
+ wg_slice = smaller_dim / 16 / reShapeFactor;
+ else
+ wg_slice = (smaller_dim / (16 * reShapeFactor)) + 1;
+
+ global_item_size = wg_slice*(wg_slice + 1) / 2 * 16 * 16 * this->plan->batchsize;
+
+ for (int i = 2; i < this->plan->length.size(); i++)//Timmy delete
+ {
+ global_item_size *= this->plan->length[i];
+ }
+
+ /*Push the data required for the transpose kernels*/
+ globalWS.clear();
+ globalWS.push_back(global_item_size);
+
+
+ localWS.clear();
+ localWS.push_back(lwSize);
+ }
else
{
/*Now calculate the data for the swap kernels */
diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp
index f4ea9da..a2b6b3d 100644
--- a/src/library/generator.transpose.cpp
+++ b/src/library/generator.transpose.cpp
@@ -38,7 +38,6 @@ void OffsetCalc(std::stringstream& transKernel, const FFTKernelGenKeyParams& par
for (size_t i = params.fft_DataDim - 2; i > 0; i--)
{
clKernWrite(transKernel, 3) << offset << " += (g_index/numGroupsY_" << i << ")*" << stride[i + 1] << ";" << std::endl;
- //clKernWrite(transKernel, 3) << offset << " += (g_index/numGroupsY_" << i << ")*" << 1048576 << ";" << std::endl;
clKernWrite(transKernel, 3) << "g_index = g_index % numGroupsY_" << i << ";" << std::endl;
}
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 9dc23ef..391246a 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -697,7 +697,9 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
for (size_t index = 1; index < fftPlan->length.size(); index++)
{
- trans1Plan->length.push_back(fftPlan->length[index]);
+ //trans1Plan->length.push_back(fftPlan->length[index]);
+ trans1Plan->batchsize = trans1Plan->batchsize * fftPlan->length[index];//Timmy
+ trans1Plan->iDist = trans1Plan->iDist / fftPlan->length[index];//Timmy
trans1Plan->inStride.push_back(fftPlan->inStride[index]);
trans1Plan->outStride.push_back(trans1Plan->oDist);
trans1Plan->oDist *= fftPlan->length[index];
@@ -783,14 +785,16 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
trans2Plan->oDist = clLengths[1] * trans2Plan->outStride[1];
trans2Plan->gen = transGen;
- if (transGen != Transpose_NONSQUARE)//TIMMY twiddle
+ //if (transGen != Transpose_NONSQUARE)//TIMMY twiddle
trans2Plan->large1D = fftPlan->length[0];
trans2Plan->transflag = true;
for (size_t index = 1; index < fftPlan->length.size(); index++)
{
- trans2Plan->length.push_back(fftPlan->length[index]);
+ //trans2Plan->length.push_back(fftPlan->length[index]);
+ trans2Plan->batchsize = trans2Plan->batchsize * fftPlan->length[index];//Timmy
+ trans2Plan->iDist = trans2Plan->iDist / fftPlan->length[index];//Timmy
trans2Plan->inStride.push_back(fftPlan->outStride[index]);
trans2Plan->outStride.push_back(trans2Plan->oDist);
trans2Plan->oDist *= fftPlan->length[index];
@@ -839,11 +843,11 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
row2Plan->oDist *= fftPlan->length[index];
}
- if (transGen != Transpose_NONSQUARE)//TIMMY twiddle in transform
- {
- row2Plan->large1D = fftPlan->length[0];
- row2Plan->twiddleFront = true;
- }
+ //if (transGen != Transpose_NONSQUARE)//TIMMY twiddle in transform
+ //{
+ // row2Plan->large1D = fftPlan->length[0];
+ // row2Plan->twiddleFront = true;
+ //}
OPENCL_V(clfftBakePlan(fftPlan->planY, numQueues, commQueueFFT, NULL, NULL ),
_T( "BakePlan large1d second row plan failed" ) );
@@ -876,7 +880,9 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
for (size_t index = 1; index < fftPlan->length.size(); index++)
{
- trans3Plan->length.push_back(fftPlan->length[index]);
+ //trans3Plan->length.push_back(fftPlan->length[index]);
+ trans3Plan->batchsize = trans3Plan->batchsize * fftPlan->length[index];//Timmy for 2D
+ trans3Plan->iDist = trans3Plan->iDist / fftPlan->length[index];//Timmy
trans3Plan->inStride.push_back(trans3Plan->iDist);
trans3Plan->iDist *= fftPlan->length[index];
trans3Plan->outStride.push_back(fftPlan->outStride[index]);
diff --git a/src/tests/accuracy_test_pow3.cpp b/src/tests/accuracy_test_pow3.cpp
index 3e7770e..7034e03 100644
--- a/src/tests/accuracy_test_pow3.cpp
+++ b/src/tests/accuracy_test_pow3.cpp
@@ -2038,6 +2038,16 @@ TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_com
try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 1, layout::complex_planar, direction::forward); }
catch (const std::exception& err) { handle_exception(err); }
}
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_1594323_2)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 2, layout::complex_planar, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_2187_1)
+{
+ try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(2187, 1, layout::complex_planar, direction::forward); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
//interleaved
TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1594323_1)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list