[clfft] 49/128: Precallback - Fix for mixed radix C2R test case failure

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Oct 22 14:54:37 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 739582859157a97bda91687359d7d2e4f8303ea6
Author: Pradeep <pradeep.rao at amd.com>
Date:   Fri Aug 28 11:25:43 2015 +0530

    Precallback - Fix for mixed radix C2R test case failure
---
 src/library/generator.stockham.cpp      |  50 +++--
 src/tests/accuracy_test_precallback.cpp | 313 +++++++++++++++++++++-----------
 2 files changed, 239 insertions(+), 124 deletions(-)

diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index 1f7d7cd..4302636 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -727,7 +727,7 @@ namespace StockhamGenerator
 		void SweepRegs(	size_t flag, bool fwd, bool interleaved, size_t stride, size_t component,
 						double scale, bool frontTwiddle,
 						const std::string &bufferRe, const std::string &bufferIm, const std::string &offset,
-						size_t regC, size_t numB, size_t numPrev, std::string &passStr, bool isPrecallVector = false) const
+						size_t regC, size_t numB, size_t numPrev, std::string &passStr, bool isPrecallVector = false, bool oddt = false) const
 		{
 			assert( (flag == SR_READ )			||
 					(flag == SR_TWMUL)			||
@@ -825,7 +825,9 @@ namespace StockhamGenerator
 				return;
 			}
 
-			int hid;
+			int hid = 0;
+			bool swapElement = false;
+			int tIter = numB * radix;
 
 			// block to rearrange reads of adjacent memory locations together
 			if(linearRegs && (flag == SR_READ))
@@ -836,6 +838,8 @@ namespace StockhamGenerator
 					{
 						for(size_t c=cStart; c<cEnd; c++) // component loop: 0 - real, 1 - imaginary
 						{
+							swapElement = (fft_doPreCallback && c2r && component == SR_COMP_REAL); //reset at start of loop
+
 							std::string tail;
 							std::string regIndex;
 							std::string regIndexC;
@@ -856,8 +860,10 @@ namespace StockhamGenerator
 								{
 									RegBaseAndCountAndPos("", i*radix + r, regIndex); 
 									
-									hid = (i * radix + r) / (numB * radix / 2);
-									if (fft_doPreCallback && c2r && component == SR_COMP_REAL && hid != 0)
+									hid = (i * radix + r) / (tIter / 2);
+									swapElement = swapElement && hid != 0;
+									swapElement = (oddt && ((i * radix + r) >= (tIter - 1))) ? false : swapElement;  //for c2r odd size don't swap for last register
+									if (swapElement)
 									{
 										regIndexC = regIndex; regIndexC += ").y";
 									}
@@ -904,7 +910,7 @@ namespace StockhamGenerator
 								passStr += ");";
 							}
 
-							if (fft_doPreCallback && c2r && component == SR_COMP_REAL && hid != 0)
+							if (swapElement)
 							{
 								passStr += "\n\t";
 								passStr += regIndexC; passStr += " = "; passStr += regIndex; passStr += ";";
@@ -1756,7 +1762,7 @@ namespace StockhamGenerator
 								}
 								else
 								{
-									regIndex += ".y"; regIndexPair += ".y";
+									regIndex += ".y"; regIndexPair += (fft_doPreCallback && oddt) ? ".x" : ".y";
 
 									if(!batch2)	{					passStr += regIndex; passStr += " + "; passStr += regIndexPair; }
 									else		{ passStr += " - "; passStr += regIndex; passStr += " + "; passStr += regIndexPair; }
@@ -2200,7 +2206,14 @@ namespace StockhamGenerator
 							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, false, true, bufferInRe, bufferInRe, "inOffset", passStr);
 							passStr += "\n\t}";
 							passStr += "\n\tif((rw > 1) && (me%2))\n\t{";
-							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, true, bufferInIm2, bufferInIm2, "inOffset", passStr);
+							if (fft_doPreCallback)
+							{
+								SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, true, bufferInRe2, bufferInIm2, "inOffset2", passStr);
+							}
+							else
+							{
+								SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_REAL, 1.0f, false, true, true, bufferInIm2, bufferInIm2, "inOffset", passStr);
+							}
 							passStr += "\n\t}\n";
 						}
 
@@ -2222,7 +2235,7 @@ namespace StockhamGenerator
 					}
 
 					passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
-					SweepRegs(SR_READ, fwd, outInterleaved, processBufStride, SR_COMP_REAL, 1.0f, false, processBufRe, processBufIm, processBufOffset, 1, numB1, 0, passStr);
+					SweepRegs(SR_READ, fwd, outInterleaved, processBufStride, SR_COMP_REAL, 1.0f, false, processBufRe, processBufIm, processBufOffset, 1, numB1, 0, passStr, false, oddp);
 					passStr += "\n\n\tbarrier(CLK_LOCAL_MEM_FENCE);\n";
 
 
@@ -2257,19 +2270,20 @@ namespace StockhamGenerator
 							passStr += "\n\t}\n\telse\n\t{";
 							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, true, true, false, bufferInRe2, bufferInRe2, "inOffset", passStr);
 							passStr += "\n\t}";
+						
+						
+							if(oddp)
+							{
+								passStr += "\n\tif(rw && (me%2))\n\t{";
+								SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, false, true, bufferInIm, bufferInIm, "inOffset", passStr);
+								passStr += "\n\t}";
+								passStr += "\n\tif((rw > 1) && (me%2))\n\t{";
+								SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, true, true, bufferInRe2, bufferInRe2, "inOffset", passStr);
+								passStr += "\n\t}";
+							}
 						}
 						passStr += "\n";
 
-						if(oddp)
-						{
-							passStr += "\n\tif(rw && (me%2))\n\t{";
-							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, false, true, bufferInIm, bufferInIm, "inOffset", passStr);
-							passStr += "\n\t}";
-							passStr += "\n\tif((rw > 1) && (me%2))\n\t{";
-							SweepRegsRC(SR_READ, fwd, inInterleaved, inStride, SR_COMP_IMAG, 1.0f, false, true, true, bufferInRe2, bufferInRe2, "inOffset", passStr);
-							passStr += "\n\t}\n";
-						}
-
 						SweepRegsRC(SR_WRITE, fwd, outInterleaved, processBufStride, SR_COMP_IMAG, 1.0f, false, true, false, processBufRe, processBufIm, processBufOffset, passStr);
 						if(oddp)
 						{
diff --git a/src/tests/accuracy_test_precallback.cpp b/src/tests/accuracy_test_precallback.cpp
index a09d2f6..09efeb7 100644
--- a/src/tests/accuracy_test_precallback.cpp
+++ b/src/tests/accuracy_test_precallback.cpp
@@ -104,112 +104,6 @@ namespace precallback
 {
 
 /**********************************************************************************************
-**************************************Complex To Real***************************************
-**********************************************************************************************/
-#pragma region Complex_To_Real
-
-template< typename T, typename cl_T, typename fftw_T >
-void mixed_radix_real_to_hermitian( size_t problem_size )
-{
-	try
-	{
-		if(verbose) std::cout << "Now testing problem size " << problem_size << std::endl;
-
-		std::vector<size_t> lengths;
-		lengths.push_back( problem_size );
-		size_t batch = 1;
-
-		std::vector<size_t> input_strides;
-		std::vector<size_t> output_strides;
-
-		size_t input_distance = 0;
-		size_t output_distance = 0;
-
-		layout::buffer_layout_t layout = layout::hermitian_interleaved;
-
-		placeness::placeness_t placeness = placeness::in_place;
-
-		data_pattern pattern = sawtooth;
-		precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
-	}
-	catch( const std::exception& err ) {
-		handle_exception(err);
-	}
-}
-
-TEST_P( mixed_radix_precallback, single_precision_real_to_hermitian_auto_generated ) {
-	size_t problem_size = GetParam();
-	RecordProperty("problem_size", (int)problem_size);
-	mixed_radix_real_to_hermitian<float, cl_float, fftwf_complex>(problem_size);
-}
-
-TEST_P( mixed_radix_precallback, double_precision_real_to_hermitian_auto_generated ) {
-	size_t problem_size = GetParam();
-	RecordProperty("problem_size", (int)problem_size);
-	mixed_radix_real_to_hermitian<double, cl_double, fftw_complex>(problem_size);
-}
-
-template< class T, class cl_T, class fftw_T >
-void pow2_large_1D_in_place_hermitian_interleaved_to_real()
-{
-	std::vector<size_t> lengths;
-	lengths.push_back( large2 );
-	size_t batch = 1;
-	std::vector<size_t> input_strides;
-	std::vector<size_t> output_strides;
-	size_t input_distance = 0;
-	size_t output_distance = 0;
-	layout::buffer_layout_t layout = layout::hermitian_interleaved;
-	placeness::placeness_t placeness = placeness::in_place;
-
-	data_pattern pattern = sawtooth;
-	precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
-}
-
-TEST_F(accuracy_test_precallback_single, pow2_large_1D_in_place_hermitian_interleaved_to_real)
-{
-	try { pow2_large_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
-	catch( const std::exception& err ) { handle_exception(err);	}
-}
-
-TEST_F(accuracy_test_precallback_double, pow2_large_1D_in_place_hermitian_interleaved_to_real)
-{
-	try { pow2_large_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
-	catch( const std::exception& err ) { handle_exception(err);	}
-}
-
-template< class T, class cl_T, class fftw_T >
-void pow2_large_1D_out_of_place_hermitian_planar_to_real()
-{
-	std::vector<size_t> lengths;
-	lengths.push_back( large2 );
-	size_t batch = 1;
-	std::vector<size_t> input_strides;
-	std::vector<size_t> output_strides;
-	size_t input_distance = 0;
-	size_t output_distance = 0;
-	layout::buffer_layout_t layout = layout::hermitian_planar;
-	placeness::placeness_t placeness = placeness::out_of_place;
-
-	data_pattern pattern = sawtooth;
-	precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
-}
-
-TEST_F(accuracy_test_precallback_single, pow2_large_1D_out_of_place_hermitian_planar_to_real)
-{
-	try { pow2_large_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
-	catch( const std::exception& err ) { handle_exception(err);	}
-}
-
-TEST_F(accuracy_test_precallback_double, pow2_large_1D_out_of_place_hermitian_planar_to_real)
-{
-	try { pow2_large_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
-	catch( const std::exception& err ) { handle_exception(err);	}
-}
-
-#pragma endregion
-
-/**********************************************************************************************
 **************************************Complex To Complex***************************************
 **********************************************************************************************/
 #pragma region Complex_To_Complex
@@ -418,10 +312,217 @@ TEST_F(accuracy_test_precallback_single, pow2_normal_1D_forward_in_place_complex
 #pragma endregion
 
 /**********************************************************************************************
+**************************************Complex To Real***************************************
+**********************************************************************************************/
+#pragma region Complex_To_Real
+
+template< typename T, typename cl_T, typename fftw_T >
+void mixed_radix_hermitian_to_real( size_t problem_size )
+{
+	try
+	{
+		if(verbose) std::cout << "Now testing problem size " << problem_size << std::endl;
+
+		std::vector<size_t> lengths;
+		lengths.push_back( problem_size );
+		size_t batch = 1;
+
+		std::vector<size_t> input_strides;
+		std::vector<size_t> output_strides;
+
+		size_t input_distance = 0;
+		size_t output_distance = 0;
+
+		layout::buffer_layout_t layout = layout::hermitian_interleaved;
+
+		placeness::placeness_t placeness = placeness::in_place;
+
+		data_pattern pattern = sawtooth;
+		precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+	}
+	catch( const std::exception& err ) {
+		handle_exception(err);
+	}
+}
+
+TEST_P( mixed_radix_precallback, single_precision_hermitian_to_real_auto_generated ) {
+	size_t problem_size = GetParam();
+	RecordProperty("problem_size", (int)problem_size);
+	mixed_radix_hermitian_to_real<float, cl_float, fftwf_complex>(problem_size);
+}
+
+TEST_P( mixed_radix_precallback, double_precision_hermitian_to_real_auto_generated ) {
+	size_t problem_size = GetParam();
+	RecordProperty("problem_size", (int)problem_size);
+	mixed_radix_hermitian_to_real<double, cl_double, fftw_complex>(problem_size);
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { pow2_normal_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { pow2_normal_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_normal_1D_non_unit_stride_and_distance_hermitian_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( normal2 );
+	size_t batch = 2;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	input_strides.push_back( 42 );
+	output_strides.push_back( 42 );
+	size_t input_distance = lengths[lengths.size()-1] * input_strides[input_strides.size()-1] + 14;
+	size_t output_distance = lengths[lengths.size()-1] * output_strides[output_strides.size()-1] + 14;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_normal_1D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { pow2_normal_1D_non_unit_stride_and_distance_hermitian_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_normal_1D_non_unit_stride_and_distance_hermitian_to_real)
+{
+	try { pow2_normal_1D_non_unit_stride_and_distance_hermitian_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_large_1D_in_place_hermitian_interleaved_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_interleaved;
+	placeness::placeness_t placeness = placeness::in_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_large_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { pow2_large_1D_in_place_hermitian_interleaved_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_large_1D_in_place_hermitian_interleaved_to_real)
+{
+	try { pow2_large_1D_in_place_hermitian_interleaved_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+template< class T, class cl_T, class fftw_T >
+void pow2_large_1D_out_of_place_hermitian_planar_to_real()
+{
+	std::vector<size_t> lengths;
+	lengths.push_back( large2 );
+	size_t batch = 1;
+	std::vector<size_t> input_strides;
+	std::vector<size_t> output_strides;
+	size_t input_distance = 0;
+	size_t output_distance = 0;
+	layout::buffer_layout_t layout = layout::hermitian_planar;
+	placeness::placeness_t placeness = placeness::out_of_place;
+
+	data_pattern pattern = sawtooth;
+	precallback_complex_to_real<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+}
+
+TEST_F(accuracy_test_precallback_single, pow2_large_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { pow2_large_1D_out_of_place_hermitian_planar_to_real< float, cl_float, fftwf_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+TEST_F(accuracy_test_precallback_double, pow2_large_1D_out_of_place_hermitian_planar_to_real)
+{
+	try { pow2_large_1D_out_of_place_hermitian_planar_to_real< double, cl_double, fftw_complex >(); }
+	catch( const std::exception& err ) { handle_exception(err);	}
+}
+
+#pragma endregion
+
+/**********************************************************************************************
 **************************************Real To Complex***************************************
 **********************************************************************************************/
 #pragma region Real_To_Complex
 
+template< typename T, typename cl_T, typename fftw_T >
+void mixed_radix_real_to_hermitian( size_t problem_size )
+{
+	try
+	{
+		if(verbose) std::cout << "Now testing problem size " << problem_size << std::endl;
+
+		std::vector<size_t> lengths;
+		lengths.push_back( problem_size );
+		size_t batch = 1;
+
+		std::vector<size_t> input_strides;
+		std::vector<size_t> output_strides;
+
+		size_t input_distance = 0;
+		size_t output_distance = 0;
+
+		layout::buffer_layout_t layout = layout::hermitian_interleaved;
+
+		placeness::placeness_t placeness = placeness::in_place;
+
+		data_pattern pattern = sawtooth;
+		precallback_real_to_complex<T, cl_T, fftw_T>( pattern, lengths, batch, input_strides, output_strides, input_distance, output_distance, layout, placeness );
+	}
+	catch( const std::exception& err ) {
+		handle_exception(err);
+	}
+}
+
+TEST_P( mixed_radix_precallback, single_precision_real_to_hermitian_auto_generated ) {
+	size_t problem_size = GetParam();
+	RecordProperty("problem_size", (int)problem_size);
+	mixed_radix_real_to_hermitian<float, cl_float, fftwf_complex>(problem_size);
+}
+
+TEST_P( mixed_radix_precallback, double_precision_real_to_hermitian_auto_generated ) {
+	size_t problem_size = GetParam();
+	RecordProperty("problem_size", (int)problem_size);
+	mixed_radix_real_to_hermitian<double, cl_double, fftw_complex>(problem_size);
+}
+
 template< class T, class cl_T, class fftw_T >
 void pow2_normal_1D_forward_in_place_real_to_hermitian_interleaved()
 {

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list