[clfft] 49/128: Precallback - Fix for mixed radix C2R test case failure
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:37 UTC 2015
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 739582859157a97bda91687359d7d2e4f8303ea6
Author: Pradeep <pradeep.rao at amd.com>
Date: Fri Aug 28 11:25:43 2015 +0530
Precallback - Fix for mixed radix C2R test case failure
---
src/library/generator.stockham.cpp | 50 +++--
src/tests/accuracy_test_precallback.cpp | 313 +++++++++++++++++++++-----------
2 files changed, 239 insertions(+), 124 deletions(-)
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index 1f7d7cd..4302636 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -727,7 +727,7 @@ namespace StockhamGenerator
void SweepRegs( size_t flag, bool fwd, bool interleaved, size_t stride, size_t component,
double scale, bool frontTwiddle,
const std::string &bufferRe, const std::string &bufferIm, const std::string &offset,
- size_t regC, size_t numB, size_t numPrev, std::string &passStr, bool isPrecallVector = false) const
+ size_t regC, size_t numB, size_t numPrev, std::string &passStr, bool isPrecallVector = false, bool oddt = false) const
{
assert( (flag == SR_READ ) ||
(flag == SR_TWMUL) ||
@@ -825,7 +825,9 @@ namespace StockhamGenerator
return;
}
- int hid;
+ int hid = 0;
+ bool swapElement = false;
+ int tIter = numB * radix;
// block to rearrange reads of adjacent memory locations together
if(linearRegs && (flag == SR_READ))
@@ -836,6 +838,8 @@ namespace StockhamGenerator
{
for(size_t c=cStart; c<cEnd; c++) // component loop: 0 - real, 1 - imaginary
{
+ swapElement = (fft_doPreCallback && c2r && component == SR_COMP_REAL); //reset at start of loop
+
std::string tail;
std::string regIndex;
std::string regIndexC;
@@ -856,8 +860,10 @@ namespace StockhamGenerator
{
RegBaseAndCountAndPos("", i*radix + r, regIndex);
- hid = (i * radix + r) / (numB * radix / 2);
- if (fft_doPreCallback && c2r && component == SR_COMP_REAL && hid != 0)
+ hid = (i * radix + r) / (tIter / 2);
+ swapElement = swapElement && hid != 0;
+ swapElement = (oddt && ((i * radix + r) >= (tIter - 1))) ? false : swapElement; //for c2r odd size don't swap for last register
+ if (swapElement)
{
regIndexC = regIndex; regIndexC += ").y";
}
@@ -904,7 +910,7 @@ namespace StockhamGenerator
passStr += ");";
}
- if (fft_doPreCallback && c2r && component == SR_COMP_REAL && hid != 0)
+ if (swapElement)
{
passStr += "\n\t";
passStr += regIndexC; passStr += " = "; passStr += regIndex; passStr += ";";
@@ -1756,7 +1762,7 @@ namespace StockhamGenerator
}
else
{
- regIndex += ".y"; regIndexPair += ".y";
+ regIndex += ".y"; regIndexPair += (fft_doPreCallback && oddt) ? ".x" : ".y";
if(!batch2) { passStr += regIndex; passStr += " + "; passStr += regIndexPair; }
else { passStr += " - "; passStr += regIndex; passStr += " + "; passStr += regIndexPair; }
@@ -2200,7 +2206,14 @@ namespace StockhamGenerator
SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, true, bufferInRe, bufferInRe, "inOffset", passStr);
passStr += "\n\t}";
passStr += "\n\tif((rw > 1) && (me%2))\n\t{";
- SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, true, bufferInIm2, bufferInIm2, "inOffset", passStr);
+ if (fft_doPreCallback)
+ {
+ SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, true, bufferInRe2, bufferInIm2, "inOffset2", passStr);
+ }
+ else
+ {
+ SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, true, bufferInIm2, bufferInIm2, "inOffset", passStr);
+ }
passStr += "\n\t}\n";
}
@@ -2222,7 +2235,7 @@ namespace StockhamGenerator
}
passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
- SweepRegs(SR_READ, fwd, outInterleaved, processBufStride, SR_COMP_REAL, 1.0f, false, processBufRe, processBufIm, processBufOffset, 1, numB1, 0, passStr);
+ SweepRegs(SR_READ, fwd, outInterleaved, processBufStride, SR_COMP_REAL, 1.0f, false, processBufRe, processBufIm, processBufOffset, 1, numB1, 0, passStr, false, oddp);
passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
@@ -2257,19 +2270,20 @@ namespace StockhamGenerator
passStr += "\n\t}\n\telse\n\t{";
SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, true, true, false, bufferInRe2, bufferInRe2, "inOffset", passStr);
passStr += "\n\t}";
+
+
+ if(oddp)
+ {
+ passStr += "\n\tif(rw && (me%2))\n\t{";
+ SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, false, true, bufferInIm, bufferInIm, "inOffset", passStr);
+ passStr += "\n\t}";
+ passStr += "\n\tif((rw > 1) && (me%2))\n\t{";
+ SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, true, true, bufferInRe2, bufferInRe2, "inOffset", passStr);
+ passStr += "\n\t}";
+ }
}
passStr += "\n";
- if(oddp)
- {
- passStr += "\n\tif(rw && (me%2))\n\t{";
- SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, false, true, bufferInIm, bufferInIm, "inOffset", passStr);
- passStr += "\n\t}";
- passStr += "\n\tif((rw > 1) && (me%2))\n\t{";
- SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, true, true, bufferInRe2, bufferInRe2, "inOffset", passStr);
- passStr += "\n\t}\n";
- }
-
SweepRegsRC(SR_WRITE, fwd, outInterleaved, processBufStride, SR_COMP_IMAG, 1.0f, false, true, false, processBufRe, processBufIm, processBufOffset, passStr);
if(oddp)
{
diff --git a/src/tests/accuracy_test_precallback.cpp b/src/tests/accuracy_test_precallback.cpp
index a09d2f6..09efeb7 100644
--- a/src/tests/accuracy_test_precallback.cpp
+++ b/src/tests/accuracy_test_precallback.cpp
@@ -104,112 +104,6 @@ namespace precallback
{
/**********************************************************************************************
-**************************************Complex To Real***************************************
-**********************************************************************************************/
-#pragma region Complex_To_Real
-
-template< typename T, typename cl_T, typename fftw_T >
-void mixed_radix_real_to_hermitian( size_t problem_size )
-{
- try
- {
- if(verbose) std::cout << "Now testing problem size " << problem_size << std::endl;
-
- std::vector<size_t> lengths;
- lengths.push_back( problem_size );
- size_t batch = 1;
-
- std::vector<size_t> input_strides;
- std::vector<size_t> output_strides;
-
- size_t input_distance = 0;
- size_t output_distance = 0;
-
- layout::buffer_layout_t layout = layout::hermitian_interleaved;
-
- placeness::placeness_t placeness = placeness::in_place;
-
- data_pattern pattern = sawtooth;
- precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
- }
- catch( const std::exception& err ) {
- handle_exception(err);
- }
-}
-
-TEST_P( mixed_radix_precallback, single_precision_real_to_hermitian_auto_generated ) {
- size_t problem_size = GetParam();
- RecordProperty("problem_size", (int)problem_size);
- mixed_radix_real_to_hermitian<float, cl_float, fftwf_complex>(problem_size);
-}
-
-TEST_P( mixed_radix_precallback, double_precision_real_to_hermitian_auto_generated ) {
- size_t problem_size = GetParam();
- RecordProperty("problem_size", (int)problem_size);
- mixed_radix_real_to_hermitian<double, cl_double, fftw_complex>(problem_size);
-}
-
-template< class T, class cl_T, class fftw_T >
-void pow2_large_1D_in_place_hermitian_interleaved_to_real()
-{
- std::vector<size_t> lengths;
- lengths.push_back( large2 );
- size_t batch = 1;
- std::vector<size_t> input_strides;
- std::vector<size_t> output_strides;
- size_t input_distance = 0;
- size_t output_distance = 0;
- layout::buffer_layout_t layout = layout::hermitian_interleaved;
- placeness::placeness_t placeness = placeness::in_place;
-
- data_pattern pattern = sawtooth;
- precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
-}
-
-TEST_F(accuracy_test_precallback_single, pow2_large_1D_in_place_hermitian_interleaved_to_real)
-{
- try { pow2_large_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
- catch( const std::exception& err ) { handle_exception(err); }
-}
-
-TEST_F(accuracy_test_precallback_double, pow2_large_1D_in_place_hermitian_interleaved_to_real)
-{
- try { pow2_large_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
- catch( const std::exception& err ) { handle_exception(err); }
-}
-
-template< class T, class cl_T, class fftw_T >
-void pow2_large_1D_out_of_place_hermitian_planar_to_real()
-{
- std::vector<size_t> lengths;
- lengths.push_back( large2 );
- size_t batch = 1;
- std::vector<size_t> input_strides;
- std::vector<size_t> output_strides;
- size_t input_distance = 0;
- size_t output_distance = 0;
- layout::buffer_layout_t layout = layout::hermitian_planar;
- placeness::placeness_t placeness = placeness::out_of_place;
-
- data_pattern pattern = sawtooth;
- precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
-}
-
-TEST_F(accuracy_test_precallback_single, pow2_large_1D_out_of_place_hermitian_planar_to_real)
-{
- try { pow2_large_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
- catch( const std::exception& err ) { handle_exception(err); }
-}
-
-TEST_F(accuracy_test_precallback_double, pow2_large_1D_out_of_place_hermitian_planar_to_real)
-{
- try { pow2_large_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
- catch( const std::exception& err ) { handle_exception(err); }
-}
-
-#pragma endregion
-
-/**********************************************************************************************
**************************************Complex To Complex***************************************
**********************************************************************************************/
#pragma region Complex_To_Complex
@@ -418,10 +312,217 @@ TEST_F(accuracy_test_precallback_single, pow2_normal_1D_forward_in_place_complex
#pragma endregion
/**********************************************************************************************
+**************************************Complex To Real***************************************
+**********************************************************************************************/
+#pragma region Complex_To_Real
+
+template< typename T, typename cl_T, typename fftw_T >
+void mixed_radix_hermitian_to_real( size_t problem_size )
+{
+ try
+ {
+ if(verbose) std::cout << "Now testing problem size " << problem_size << std::endl;
+
+ std::vector<size_t> lengths;
+ lengths.push_back( problem_size );
+ size_t batch = 1;
+
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+ }
+ catch( const std::exception& err ) {
+ handle_exception(err);
+ }
+}
+
+TEST_P( mixed_radix_precallback, single_precision_hermitian_to_real_auto_generated ) {
+ size_t problem_size = GetParam();
+ RecordProperty("problem_size", (int)problem_size);
+ mixed_radix_hermitian_to_real<float, cl_float, fftwf_complex>(problem_size);
+}
+
+TEST_P( mixed_radix_precallback, double_precision_hermitian_to_real_auto_generated ) {
+ size_t problem_size = GetParam();
+ RecordProperty("problem_size", (int)problem_size);
+ mixed_radix_hermitian_to_real<double, cl_double, fftw_complex>(problem_size);
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_in_place_hermitian_interleaved_to_real()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( normal2 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_in_place_hermitian_interleaved_to_real)
+{
+ try { pow2_normal_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_in_place_hermitian_interleaved_to_real)
+{
+ try { pow2_normal_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_non_unit_stride_and_distance_hermitian_to_real()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( normal2 );
+ size_t batch = 2;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ input_strides.push_back( 42 );
+ output_strides.push_back( 42 );
+ size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+ size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::out_of_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_non_unit_stride_and_distance_hermitian_to_real)
+{
+ try { pow2_normal_1D_non_unit_stride_and_distance_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_non_unit_stride_and_distance_hermitian_to_real)
+{
+ try { pow2_normal_1D_non_unit_stride_and_distance_hermitian_to_real< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_large_1D_in_place_hermitian_interleaved_to_real()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( large2 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_large_1D_in_place_hermitian_interleaved_to_real)
+{
+ try { pow2_large_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_large_1D_in_place_hermitian_interleaved_to_real)
+{
+ try { pow2_large_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_large_1D_out_of_place_hermitian_planar_to_real()
+{
+ std::vector<size_t> lengths;
+ lengths.push_back( large2 );
+ size_t batch = 1;
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+ layout::buffer_layout_t layout = layout::hermitian_planar;
+ placeness::placeness_t placeness = placeness::out_of_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_large_1D_out_of_place_hermitian_planar_to_real)
+{
+ try { pow2_large_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_large_1D_out_of_place_hermitian_planar_to_real)
+{
+ try { pow2_large_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+ catch( const std::exception& err ) { handle_exception(err); }
+}
+
+#pragma endregion
+
+/**********************************************************************************************
**************************************Real To Complex***************************************
**********************************************************************************************/
#pragma region Real_To_Complex
+template< typename T, typename cl_T, typename fftw_T >
+void mixed_radix_real_to_hermitian( size_t problem_size )
+{
+ try
+ {
+ if(verbose) std::cout << "Now testing problem size " << problem_size << std::endl;
+
+ std::vector<size_t> lengths;
+ lengths.push_back( problem_size );
+ size_t batch = 1;
+
+ std::vector<size_t> input_strides;
+ std::vector<size_t> output_strides;
+
+ size_t input_distance = 0;
+ size_t output_distance = 0;
+
+ layout::buffer_layout_t layout = layout::hermitian_interleaved;
+
+ placeness::placeness_t placeness = placeness::in_place;
+
+ data_pattern pattern = sawtooth;
+ precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+ }
+ catch( const std::exception& err ) {
+ handle_exception(err);
+ }
+}
+
+TEST_P( mixed_radix_precallback, single_precision_real_to_hermitian_auto_generated ) {
+ size_t problem_size = GetParam();
+ RecordProperty("problem_size", (int)problem_size);
+ mixed_radix_real_to_hermitian<float, cl_float, fftwf_complex>(problem_size);
+}
+
+TEST_P( mixed_radix_precallback, double_precision_real_to_hermitian_auto_generated ) {
+ size_t problem_size = GetParam();
+ RecordProperty("problem_size", (int)problem_size);
+ mixed_radix_real_to_hermitian<double, cl_double, fftw_complex>(problem_size);
+}
+
template< class T, class cl_T, class fftw_T >
void pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved()
{
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list