[clfft] 05/32: added some bug fixes and gtest cases; passed most single precision test. For double precison some time 2d inplace transepose are requried. And this is not passing yet.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Tue Apr 26 08:34:07 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 39aacc105f691822fd681137b00778be2e6faffc
Author: Timmy <timmy.liu at amd.com>
Date: Fri Mar 11 08:30:16 2016 -0600
added some bug fixes and gtest cases; passed most single precision test. For double precison some time 2d inplace transepose are requried. And this is not passing yet.
---
src/library/enqueue.cpp | 2 +-
src/library/generator.transpose.cpp | 38 +++++++++++++++++-----
src/library/plan.cpp | 5 +--
src/tests/accuracy_test_pow3.cpp | 63 +++++++++++++++++++++++++++++++++++++
src/tests/test_constants.h | 1 +
5 files changed, 98 insertions(+), 11 deletions(-)
diff --git a/src/library/enqueue.cpp b/src/library/enqueue.cpp
index 1691a16..5c7de09 100644
--- a/src/library/enqueue.cpp
+++ b/src/library/enqueue.cpp
@@ -624,7 +624,7 @@ clfftStatus FFTAction::enqueue(clfftPlanHandle plHandle,
std::vector< size_t > gWorkSize;
std::vector< size_t > lWorkSize;
clfftStatus result = this->getWorkSizes (gWorkSize, lWorkSize);
-
+ std::cout << "work sizes are " << gWorkSize[0] << ", " << lWorkSize[0] << std::endl;
// TODO: if getWorkSizes returns CLFFT_INVALID_GLOBAL_WORK_SIZE, that means
// that this multidimensional input data array is too large to be transformed
// with a single call to clEnqueueNDRangeKernel. For now, we will just return
diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp
index 0399b40..c17080a 100644
--- a/src/library/generator.transpose.cpp
+++ b/src/library/generator.transpose.cpp
@@ -1012,11 +1012,11 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
//if post-callback is set for the plan
//generate the swap_table
- std::vector<std::vector<size_t>> permutationTable;
+ std::vector<std::vector<size_t> > permutationTable;
permutation_calculation(dim_ratio, smaller_dim, permutationTable);
clKernWrite(transKernel, 0) << "__constant int swap_table["<< permutationTable.size() <<"][1] = {" << std::endl;
- for (std::vector<std::vector<size_t>>::iterator itor = permutationTable.begin(); itor != permutationTable.end(); itor++)
+ for (std::vector<std::vector<size_t> >::iterator itor = permutationTable.begin(); itor != permutationTable.end(); itor++)
{
clKernWrite(transKernel, 0) << "{" << (*itor)[0] << "}";
if (itor == (permutationTable.end() - 1))//last vector
@@ -1025,7 +1025,14 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
clKernWrite(transKernel, 0) << "," << std::endl;
}
- std::string funcName = "swap_nonsquare_" + std::to_string(smaller_dim) + "_" + std::to_string(dim_ratio);
+ //std::string funcName = "swap_nonsquare_" + std::to_string(smaller_dim) + "_" + std::to_string(dim_ratio);
+ std::string funcName = "swap_nonsquare_";
+ std::string smaller_dim_str = static_cast<std::ostringstream*>(&(std::ostringstream() << smaller_dim))->str();
+ std::string dim_ratio_str = static_cast<std::ostringstream*>(&(std::ostringstream() << dim_ratio))->str();
+ if(params.fft_N[0] > params.fft_N[1])
+ funcName = funcName + smaller_dim_str + "_" + dim_ratio_str;
+ else
+ funcName = funcName + dim_ratio_str + "_" + smaller_dim_str;
KernelFuncName = funcName;
size_t local_work_size_swap = 256;
@@ -1089,8 +1096,15 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
}
clKernWrite(transKernel, 3) << std::endl;
- clKernWrite(transKernel, 3) << "int group_offset = (prev/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
- << " + (prev%" << dim_ratio << ")*" << smaller_dim << ";" << std::endl; //might look like: int group_offset = (prev/3)*729*3 + (prev%3)*729;
+ if (params.fft_N[0] > params.fft_N[1])//decides whether we have a tall or wide rectangle
+ {
+ clKernWrite(transKernel, 3) << "int group_offset = (prev/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
+ << " + (prev%" << dim_ratio << ")*" << smaller_dim << ";" << std::endl; //might look like: int group_offset = (prev/3)*729*3 + (prev%3)*729;
+ }
+ else
+ {
+ clKernWrite(transKernel, 3) << "int group_offset = (prev*"<< smaller_dim << ");" << std::endl; //might look like: int group_offset = prev*729;
+ }
clKernWrite(transKernel, 3) << std::endl;
//move to that row block and load that row block to LDS
@@ -1144,9 +1158,17 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
clKernWrite(transKernel, 3) << std::endl;
clKernWrite(transKernel, 3) << "do{" << std::endl;//begining of do-while
//calculate the next location p(k) = (k*n)mod(m*n-1), if 0 < k < m*n-1
- clKernWrite(transKernel, 6) << "next = (prev*" << dim_ratio << ")%" << smaller_dim*dim_ratio - 1 << ";" << std::endl;
- clKernWrite(transKernel, 6) << "group_offset = (next/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
- << " + (next%" << dim_ratio << ")*" << smaller_dim << ";" << std::endl; //might look like: group_offset = (next/3)*729*3 + (next%3)*729;
+ if (params.fft_N[0] > params.fft_N[1])//decides whether we have a tall or wide rectangle
+ {
+ clKernWrite(transKernel, 6) << "next = (prev*" << smaller_dim << ")%" << smaller_dim*dim_ratio - 1 << ";" << std::endl;
+ clKernWrite(transKernel, 6) << "group_offset = (next/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
+ << " + (next%" << dim_ratio << ")*" << smaller_dim << ";" << std::endl; //might look like: group_offset = (next/3)*729*3 + (next%3)*729;
+ }
+ else
+ {
+ clKernWrite(transKernel, 6) << "next = (prev*" << dim_ratio << ")%" << smaller_dim*dim_ratio - 1 << ";" << std::endl;
+ clKernWrite(transKernel, 3) << "group_offset = (next*" << smaller_dim << ");" << std::endl; //might look like: int group_offset = prev*729;
+ }
clKernWrite(transKernel, 3) << std::endl;
switch (params.fft_inputLayout)
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 0ac9554..7b7e103 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -656,6 +656,7 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
transGen = Transpose_SQUARE;
}
+
if ( (fftPlan->tmpBufSize==0 ) && !fftPlan->allOpsInplace)
{
fftPlan->tmpBufSize = (smallerDim + padding) * biggerDim *
@@ -1998,14 +1999,14 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
}
else
{
- currKernelOrder = TRANSPOSE_AND_SWAP;//TRANSPOSE_AND_SWAP TIMMY TEMP
+ currKernelOrder = TRANSPOSE_AND_SWAP;
}
}
//if the original input data is more than 1d only TRANSPOSE_LEADING_AND_SWAP order is supported
//TODO need to fix this here. related to multi dim batch size.
if (fftPlan->length.size() > 2)
currKernelOrder = TRANSPOSE_LEADING_AND_SWAP;
- std::cout << "transpose kernel order is " << currKernelOrder << std::endl;
+ std::cout << "currKernelOrder = " << currKernelOrder << std::endl;
//ends tranpose kernel order
//Transpose stage 1
diff --git a/src/tests/accuracy_test_pow3.cpp b/src/tests/accuracy_test_pow3.cpp
index cf7777e..9ebbe4e 100644
--- a/src/tests/accuracy_test_pow3.cpp
+++ b/src/tests/accuracy_test_pow3.cpp
@@ -1809,6 +1809,69 @@ TEST_F(accuracy_test_pow3_double, large_1D_out_of_place_hermitian_planar_to_real
}
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ huge 1D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+
+template< class T, class cl_T, class fftw_T>
+void huge_1D_forward_in_place_complex_planar_to_complex_planar()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( huge3 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t in_layout = layout::complex_planar;
+ layout::buffer_layout_t out_layout = layout::complex_planar;
+ placeness::placeness_t placeness = placeness::in_place;
+ direction::direction_t direction = direction::forward;
+
+ data_pattern pattern = sawtooth;
+ complex_to_complex<T, cl_T, fftw_T>(pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness);
+}
+
+TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_planar_to_complex_planar)
+{
+ try { huge_1D_forward_in_place_complex_planar_to_complex_planar< float, cl_float, fftwf_complex >(); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T>
+void huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back(huge3);
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t in_layout = layout::complex_interleaved;
+ layout::buffer_layout_t out_layout = layout::complex_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+ direction::direction_t direction = direction::forward;
+
+ data_pattern pattern = sawtooth;
+ complex_to_complex<T, cl_T, fftw_T>(pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness);
+}
+
+TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+ try { huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved< float, cl_float, fftwf_complex >(); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved)
+{
+ try { huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved< double, cl_double, fftw_complex >(); }
+ catch (const std::exception& err) { handle_exception(err); }
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
// ^^^^^^^^^^^^^^^^^^^^^^^ normal 2D ^^^^^^^^^^^^^^^^^^^^^^ //
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
diff --git a/src/tests/test_constants.h b/src/tests/test_constants.h
index 4b0d9ca..95f2afc 100644
--- a/src/tests/test_constants.h
+++ b/src/tests/test_constants.h
@@ -190,6 +190,7 @@ const size_t small3 = 9;
const size_t normal3 = 729;
const size_t large3 = 6561;
const size_t dlarge3 = 2187;
+const size_t huge3 = 729 * 729 * 3;
const size_t small5 = 25;
const size_t normal5 = 625;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list