[clfft] 10/32: added 64 test cases related to 1:3 and 1:5 inplace transpose. inplace transpose now can be done in recusive layer. still need to modify swap line kernel in the case the each line is bigger than LDS can hold.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Tue Apr 26 08:34:08 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit b7f933ca81893c09e6864c15485d444e898b4c77
Author: unknown <timmy.liu at amd.com>
Date:   Thu Mar 17 16:54:36 2016 -0500

    added 64 test cases related to 1:3 and 1:5 inplace transpose. inplace transpose now can be done in recusive layer. still need to modify swap line kernel in the case the each line is bigger than LDS can hold.
---
 src/library/generator.transpose.cpp | 106 +++++++++++++++----
 src/library/plan.cpp                |  42 ++++++--
 src/tests/accuracy_test_pow3.cpp    | 162 ++++++----------------------
 src/tests/accuracy_test_pow5.cpp    | 203 ++++++++++++++++++++++++++++++++++++
 4 files changed, 351 insertions(+), 162 deletions(-)

diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp
index 654dd29..600b71f 100644
--- a/src/library/generator.transpose.cpp
+++ b/src/library/generator.transpose.cpp
@@ -1010,7 +1010,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
 	//if pre-callback is set for the plan
 	//if post-callback is set for the plan
 
-	//twiddle in swap kernel
+	//twiddle in swap kernel (for now, swap with twiddle seems to always be the second kernel after transpose)
 	bool twiddleSwapKernel = params.fft_3StepTwiddle && (dim_ratio > 1);
 
 	//generate the swap_table
@@ -1019,7 +1019,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
 
 	clKernWrite(transKernel, 0) << "__constant int swap_table["<< permutationTable.size()+2 <<"][1] = {" << std::endl;
 	clKernWrite(transKernel, 0) << "{0}," << std::endl;
-	clKernWrite(transKernel, 0) << "{"<< smaller_dim * dim_ratio - 1 <<"}," << std::endl;
+	clKernWrite(transKernel, 0) << "{"<< smaller_dim * dim_ratio - 1 <<"}," << std::endl;// add the first and last row to the swap table. needed for twiddling
 	for (std::vector<std::vector<size_t> >::iterator itor = permutationTable.begin(); itor != permutationTable.end(); itor++)
 	{
 		clKernWrite(transKernel, 0) << "{" << (*itor)[0] << "}";
@@ -1193,21 +1193,17 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
         if (params.fft_N[0] > params.fft_N[1])//decides whether we have a tall or wide rectangle
         {
             clKernWrite(transKernel, 6) << "next = (prev*" << smaller_dim << ")%" << smaller_dim*dim_ratio - 1 << ";" << std::endl;
-            //ugly
+            //takes care the last row
             clKernWrite(transKernel, 6) << "if (prev == " << smaller_dim * dim_ratio - 1 << ")" << std::endl;
             clKernWrite(transKernel, 9) << "next = " << smaller_dim * dim_ratio - 1 << ";" << std::endl;
 
             clKernWrite(transKernel, 6) << "group_offset = (next/" << dim_ratio << ")*" << smaller_dim << "*" << dim_ratio
                 << " + (next%" << dim_ratio << ")*" << smaller_dim << ";" << std::endl; //might look like: group_offset = (next/3)*729*3 + (next%3)*729;
-            if (twiddleSwapKernel)
-            {
-                //TODO
-            }
         }
         else
         {
             clKernWrite(transKernel, 6) << "next = (prev*" << dim_ratio << ")%" << smaller_dim*dim_ratio - 1 << ";" << std::endl;
-            //ugly
+            //takes care the last row
             clKernWrite(transKernel, 6) << "if (prev == " << smaller_dim * dim_ratio - 1 << ")" << std::endl;
             clKernWrite(transKernel, 9) << "next = " << smaller_dim * dim_ratio - 1 << ";" << std::endl;
 
@@ -1267,19 +1263,93 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
         clKernWrite(transKernel, 3) << std::endl;
         switch (params.fft_inputLayout)
         {
-        case CLFFT_REAL:
+        case CLFFT_REAL:// for real case this is different
         case CLFFT_COMPLEX_INTERLEAVED:
         {
-            for (int i = 0; i < smaller_dim; i = i + 256)
+            if (twiddleSwapKernel)
             {
-                if (i + 256 < smaller_dim)
-                    clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "] = prevValue[idx+" << i << "];" << std::endl;
-                else
+                clKernWrite(transKernel, 6) << "size_t p;" << std::endl;
+                clKernWrite(transKernel, 6) << "size_t q;" << std::endl;
+                clKernWrite(transKernel, 6) << dtComplex << " twiddle_factor;" << std::endl;
+
+                for (int i = 0; i < smaller_dim; i = i + 256)
                 {
-                    // need to handle boundary
-                    clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
-                    clKernWrite(transKernel, 9) << "inputA[group_offset+idx+" << i << "] = prevValue[idx+" << i << "];" << std::endl;
-                    clKernWrite(transKernel, 6) << "}" << std::endl;
+                    if (i + 256 < smaller_dim)
+                    {
+                        if (params.fft_N[0] > params.fft_N[1])//decides whether we have a tall or wide rectangle
+                        {
+                            //input is wide; output is tall
+                            clKernWrite(transKernel, 6) << "p = (group_offset+idx+" << i << ")/" << smaller_dim << ";" << std::endl;
+                            clKernWrite(transKernel, 6) << "q = (group_offset+idx+" << i << ")%" << smaller_dim << ";" << std::endl;
+                        }
+                        else
+                        {
+                            //input is tall; output is wide
+                            clKernWrite(transKernel, 6) << "p = (group_offset+idx+" << i << ")/" << bigger_dim << ";" << std::endl;
+                            clKernWrite(transKernel, 6) << "q = (group_offset+idx+" << i << ")%" << bigger_dim << ";" << std::endl;
+                        }
+                        clKernWrite(transKernel, 6) << "twiddle_factor = TW3step(p*q);" << std::endl;
+                        if (fwd)
+                        {
+                            //forward
+                            clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "].x = prevValue[idx+" << i << "].x * twiddle_factor.x - prevValue[idx+" << i << "].y * twiddle_factor.y;" << std::endl;
+                            clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "].y = prevValue[idx+" << i << "].x * twiddle_factor.y + prevValue[idx+" << i << "].y * twiddle_factor.x;" << std::endl;
+                        }
+                        else
+                        {
+                            //backward
+                            clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "].x = prevValue[idx+" << i << "].x * twiddle_factor.x + prevValue[idx+" << i << "].y * twiddle_factor.y;" << std::endl;
+                            clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "].y = prevValue[idx+" << i << "].y * twiddle_factor.x - prevValue[idx+" << i << "].x * twiddle_factor.y;" << std::endl;
+                        }
+                        //clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "] = prevValue[idx+" << i << "];" << std::endl;
+                    }
+                    else
+                    {
+                        // need to handle boundary
+                        clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+                        if (params.fft_N[0] > params.fft_N[1])//decides whether we have a tall or wide rectangle
+                        {
+                            //input is wide; output is tall
+                            clKernWrite(transKernel, 6) << "p = (group_offset+idx+" << i << ")/" << smaller_dim << ";" << std::endl;
+                            clKernWrite(transKernel, 6) << "q = (group_offset+idx+" << i << ")%" << smaller_dim << ";" << std::endl;
+                        }
+                        else
+                        {
+                            //input is tall; output is wide
+                            clKernWrite(transKernel, 6) << "p = (group_offset+idx+" << i << ")/" << bigger_dim << ";" << std::endl;
+                            clKernWrite(transKernel, 6) << "q = (group_offset+idx+" << i << ")%" << bigger_dim << ";" << std::endl;
+                        }
+                        clKernWrite(transKernel, 6) << "twiddle_factor = TW3step(p*q);" << std::endl;
+                        if (fwd)
+                        {
+                            //forward
+                            clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "].x = prevValue[idx+" << i << "].x * twiddle_factor.x - prevValue[idx+" << i << "].y * twiddle_factor.y;" << std::endl;
+                            clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "].y = prevValue[idx+" << i << "].x * twiddle_factor.y + prevValue[idx+" << i << "].y * twiddle_factor.x;" << std::endl;
+                        }
+                        else
+                        {
+                            //backward
+                            clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "].x = prevValue[idx+" << i << "].x * twiddle_factor.x + prevValue[idx+" << i << "].y * twiddle_factor.y;" << std::endl;
+                            clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "].y = prevValue[idx+" << i << "].y * twiddle_factor.x - prevValue[idx+" << i << "].x * twiddle_factor.y;" << std::endl;
+                        }
+                        //clKernWrite(transKernel, 9) << "inputA[group_offset+idx+" << i << "] = prevValue[idx+" << i << "];" << std::endl;
+                        clKernWrite(transKernel, 6) << "}" << std::endl;
+                    }
+                }
+            }
+            else
+            {
+                for (int i = 0; i < smaller_dim; i = i + 256)
+                {
+                    if (i + 256 < smaller_dim)
+                        clKernWrite(transKernel, 6) << "inputA[group_offset+idx+" << i << "] = prevValue[idx+" << i << "];" << std::endl;
+                    else
+                    {
+                        // need to handle boundary
+                        clKernWrite(transKernel, 6) << "if(idx+" << i << "<" << smaller_dim << "){" << std::endl;
+                        clKernWrite(transKernel, 9) << "inputA[group_offset+idx+" << i << "] = prevValue[idx+" << i << "];" << std::endl;
+                        clKernWrite(transKernel, 6) << "}" << std::endl;
+                    }
                 }
             }
             break;
@@ -1420,7 +1490,7 @@ clfftStatus genSwapKernelGeneral(const FFTGeneratedTransposeNonSquareAction::Sig
         clKernWrite(transKernel, 0) << "}" << std::endl;//end of kernel
 
         if (!twiddleSwapKernel)
-            break; // break for bothDir
+            break; // break for bothDir only need one kernel if twiddle is not done here
 
     }//end of for (size_t bothDir = 0; bothDir < 2; bothDir++)
 	
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 391246a..71185cf 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -698,8 +698,15 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					for (size_t index = 1; index < fftPlan->length.size(); index++)
 					{
 						//trans1Plan->length.push_back(fftPlan->length[index]);
-						trans1Plan->batchsize = trans1Plan->batchsize * fftPlan->length[index];//Timmy
-						trans1Plan->iDist = trans1Plan->iDist / fftPlan->length[index];//Timmy
+                        /*
+                        replacing the line above with the two lines below since:
+                        fftPlan is still 1D, thus the broken down transpose should be 2D not 3D
+                        the batchSize for the transpose should increase accordingly. 
+                        the iDist should decrease accordingly. Push back to length will cause a 3D transpose 
+                        */
+						trans1Plan->batchsize = trans1Plan->batchsize * fftPlan->length[index];
+						trans1Plan->iDist = trans1Plan->iDist / fftPlan->length[index];
+
 						trans1Plan->inStride.push_back(fftPlan->inStride[index]);
 						trans1Plan->outStride.push_back(trans1Plan->oDist);
 						trans1Plan->oDist *= fftPlan->length[index];
@@ -785,7 +792,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					trans2Plan->oDist         = clLengths[1] * trans2Plan->outStride[1];
                     trans2Plan->gen           = transGen;
 
-					//if (transGen != Transpose_NONSQUARE)//TIMMY twiddle
+					//if (transGen != Transpose_NONSQUARE)//twiddle
 						trans2Plan->large1D		  = fftPlan->length[0];
 
 					trans2Plan->transflag     = true;
@@ -793,8 +800,14 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					for (size_t index = 1; index < fftPlan->length.size(); index++)
 					{
 						//trans2Plan->length.push_back(fftPlan->length[index]);
-						trans2Plan->batchsize = trans2Plan->batchsize * fftPlan->length[index];//Timmy
-						trans2Plan->iDist = trans2Plan->iDist / fftPlan->length[index];//Timmy
+                        /*
+                        replacing the line above with the two lines below since:
+                        fftPlan is still 1D, thus the broken down transpose should be 2D not 3D
+                        the batchSize for the transpose should increase accordingly.
+                        the iDist should decrease accordingly. Push back to length will cause a 3D transpose
+                        */
+						trans2Plan->batchsize = trans2Plan->batchsize * fftPlan->length[index];
+						trans2Plan->iDist = trans2Plan->iDist / fftPlan->length[index];
 						trans2Plan->inStride.push_back(fftPlan->outStride[index]);
 						trans2Plan->outStride.push_back(trans2Plan->oDist);
 						trans2Plan->oDist *= fftPlan->length[index];
@@ -843,7 +856,7 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 						row2Plan->oDist *= fftPlan->length[index];
 					}
 					
-					//if (transGen != Transpose_NONSQUARE)//TIMMY twiddle in transform
+					//if (transGen != Transpose_NONSQUARE)//twiddle in transform
 					//{
 					//	row2Plan->large1D = fftPlan->length[0];
 					//	row2Plan->twiddleFront = true;
@@ -881,8 +894,14 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 					for (size_t index = 1; index < fftPlan->length.size(); index++)
 					{
 						//trans3Plan->length.push_back(fftPlan->length[index]);
-						trans3Plan->batchsize = trans3Plan->batchsize * fftPlan->length[index];//Timmy for 2D
-						trans3Plan->iDist = trans3Plan->iDist / fftPlan->length[index];//Timmy
+                        /*
+                        replacing the line above with the two lines below since:
+                        fftPlan is still 1D, thus the broken down transpose should be 2D not 3D
+                        the batchSize for the transpose should increase accordingly.
+                        the iDist should decrease accordingly. Push back to length will cause a 3D transpose
+                        */
+						trans3Plan->batchsize = trans3Plan->batchsize * fftPlan->length[index];
+						trans3Plan->iDist = trans3Plan->iDist / fftPlan->length[index];
 						trans3Plan->inStride.push_back(trans3Plan->iDist);
 						trans3Plan->iDist *= fftPlan->length[index];
 						trans3Plan->outStride.push_back(fftPlan->outStride[index]);
@@ -1993,20 +2012,21 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 						NON_SQUARE_KERNEL_ORDER currKernelOrder;
 						// controling the transpose and swap kernel order
 						// if leading dim is larger than the other dim it makes sense to swap and transpose
-						if (clLengths[0] > clLengths[1])
+						if (clLengths[0] > clLengths[1] && fftPlan->large1D == 0)
 						{
+                            //twidding can be done in swap when swap is the second kernel for now
 							currKernelOrder = SWAP_AND_TRANSPOSE;
 						}
 						else
 						{
 							if (fftPlan->large1D != 0 && 0)
 							{
-								//currently tranpose twiddling is only supported in below case
-								//TODO support tranpose twiddling for all cases.
+                                //this is not going to happen
 								currKernelOrder = TRANSPOSE_LEADING_AND_SWAP;
 							}
 							else
 							{
+                                //twiddling can be done in swap
 								currKernelOrder = TRANSPOSE_AND_SWAP;
 							}
 						}
diff --git a/src/tests/accuracy_test_pow3.cpp b/src/tests/accuracy_test_pow3.cpp
index 7034e03..f1369db 100644
--- a/src/tests/accuracy_test_pow3.cpp
+++ b/src/tests/accuracy_test_pow3.cpp
@@ -1922,96 +1922,7 @@ TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_interleaved_
     catch (const std::exception& err) { handle_exception(err); }
 }
 
-//78125 = 125 * 125 * 5, backward and forward, planar and interleaved, single and double, batch size 1 and 3
-TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_78125_1)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 1, layout::complex_planar, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_78125_1)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 1, layout::complex_planar, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-
-TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_78125_3)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 3, layout::complex_planar, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_78125_3)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 3, layout::complex_planar, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-
-TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_78125_1)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 1, layout::complex_planar, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_78125_1)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 1, layout::complex_planar, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-
-TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_78125_3)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 3, layout::complex_planar, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_78125_3)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 3, layout::complex_planar, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-//interleaved
-TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_78125_1)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 1, layout::complex_interleaved, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_78125_1)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 1, layout::complex_interleaved, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-
-TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_78125_3)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 3, layout::complex_interleaved, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_78125_3)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 3, layout::complex_interleaved, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-
-TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_78125_1)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 1, layout::complex_interleaved, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_78125_1)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 1, layout::complex_interleaved, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-
-TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_78125_3)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 3, layout::complex_interleaved, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_78125_3)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 3, layout::complex_interleaved, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-
-//1594323 = 729 * 729 * 3 backward and forward, planar and interleaved, single only, double would require 3d transpose (not supported), batch size 1 and 3
+//1594323 = 729 * 729 * 3 backward and forward, planar and interleaved, single and double, batch size 1 and 3
 TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_1594323_1)
 {
 	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1594323, 1, layout::complex_planar, direction::forward); }
@@ -2038,16 +1949,21 @@ TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_com
 	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 1, layout::complex_planar, direction::forward); }
 	catch (const std::exception& err) { handle_exception(err); }
 }
-TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_1594323_2)
+TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_1594323_1)
 {
-	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 2, layout::complex_planar, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 1, layout::complex_planar, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
 }
-TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_2187_1)
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_1594323_3)
 {
-	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(2187, 1, layout::complex_planar, direction::forward); }
+	try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 3, layout::complex_planar, direction::forward); }
 	catch (const std::exception& err) { handle_exception(err); }
 }
+TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_1594323_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 3, layout::complex_planar, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
 
 //interleaved
 TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1594323_1)
@@ -2060,7 +1976,6 @@ TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_interleaved_
 	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1594323, 1, layout::complex_interleaved, direction::backward); }
 	catch (const std::exception& err) { handle_exception(err); }
 }
-
 TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1594323_3)
 {
 	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1594323, 3, layout::complex_interleaved, direction::forward); }
@@ -2071,54 +1986,35 @@ TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_interleaved_
 	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1594323, 3, layout::complex_interleaved, direction::backward); }
 	catch (const std::exception& err) { handle_exception(err); }
 }
-
-//1953125 = 625 * 625 * 5 backward and forward, planar and interleaved, single only, double would require 3d transpose (not supported), batch size 1 and 3
-TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_1953125_1)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 1, layout::complex_planar, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_1953125_1)
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1594323_1)
 {
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 1, layout::complex_planar, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-
-TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_1953125_3)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 3, layout::complex_planar, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 1, layout::complex_interleaved, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
 }
-TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_1953125_3)
+TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_1594323_1)
 {
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 3, layout::complex_planar, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 1, layout::complex_interleaved, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
 }
-
-//interleaved
-TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1953125_1)
+TEST_F(accuracy_test_pow3_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1594323_3)
 {
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 1, layout::complex_interleaved, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 3, layout::complex_interleaved, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
 }
-TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_1953125_1)
+TEST_F(accuracy_test_pow3_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_1594323_3)
 {
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 1, layout::complex_interleaved, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1594323, 3, layout::complex_interleaved, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
 }
 
-TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1953125_3)
-{
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 3, layout::complex_interleaved, direction::forward); }
-	catch (const std::exception& err) { handle_exception(err); }
-}
-TEST_F(accuracy_test_pow3_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_1953125_3)
+//14348907 = 2187 * 2187 * 3 backward and forward, planar and interleaved, single and double, batch size 1 and 3
+/*
+TEST_F(accuracy_test_pow3_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_14348907_1)
 {
-	try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 3, layout::complex_interleaved, direction::backward); }
-	catch (const std::exception& err) { handle_exception(err); }
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(14348907, 1, layout::complex_planar, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
 }
-
-
+*/
 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
 // ^^^^^^^^^^^^^^^^^^^^^^^ normal 2D ^^^^^^^^^^^^^^^^^^^^^^ //
 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
diff --git a/src/tests/accuracy_test_pow5.cpp b/src/tests/accuracy_test_pow5.cpp
index 0861c76..c4ab62e 100644
--- a/src/tests/accuracy_test_pow5.cpp
+++ b/src/tests/accuracy_test_pow5.cpp
@@ -1809,6 +1809,209 @@ TEST_F(accuracy_test_pow5_double, large_1D_out_of_place_hermitian_planar_to_real
 }
 
 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^ huge 1D ^^^^^^^^^^^^^^^^^^^^^^ //
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
+
+// *****************************************************
+// *****************************************************
+
+template< class T, class cl_T, class fftw_T>
+void huge_1D_forward_in_place_complex_to_complex(size_t lenSize, size_t batchSize, layout::buffer_layout_t layoutType, direction::direction_t direction_type)
+{
+    std::vector<size_t> lengths;
+    lengths.push_back(lenSize);
+    size_t batch = batchSize;
+    std::vector<size_t> input_strides;
+    std::vector<size_t> output_strides;
+    size_t input_distance = 0;
+    size_t output_distance = 0;
+    layout::buffer_layout_t in_layout = layoutType;
+    layout::buffer_layout_t out_layout = layoutType;
+    placeness::placeness_t placeness = placeness::in_place;
+    direction::direction_t direction = direction_type;
+
+    data_pattern pattern = sawtooth;
+    complex_to_complex<T, cl_T, fftw_T>(pattern, direction, lengths, batch, input_strides, output_strides, input_distance, output_distance, in_layout, out_layout, placeness);
+}
+
+//78125 = 125 * 125 * 5, backward and forward, planar and interleaved, single and double, batch size 1 and 3
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_78125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 1, layout::complex_planar, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_78125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 1, layout::complex_planar, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_78125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 3, layout::complex_planar, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_78125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 3, layout::complex_planar, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_78125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 1, layout::complex_planar, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_78125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 1, layout::complex_planar, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_78125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 3, layout::complex_planar, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_78125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 3, layout::complex_planar, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+//interleaved
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_78125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 1, layout::complex_interleaved, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_78125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 1, layout::complex_interleaved, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_78125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 3, layout::complex_interleaved, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_78125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(78125, 3, layout::complex_interleaved, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_78125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 1, layout::complex_interleaved, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_78125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 1, layout::complex_interleaved, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_78125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 3, layout::complex_interleaved, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_78125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(78125, 3, layout::complex_interleaved, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+//1953125 = 625 * 625 * 5 backward and forward, planar and interleaved, single and double, batch size 1 and 3
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_1953125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 1, layout::complex_planar, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_1953125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 1, layout::complex_planar, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_planar_to_complex_planar_1953125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 3, layout::complex_planar, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_planar_to_complex_planar_1953125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 3, layout::complex_planar, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_1953125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1953125, 1, layout::complex_planar, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_1953125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1953125, 1, layout::complex_planar, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_planar_to_complex_planar_1953125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1953125, 3, layout::complex_planar, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_planar_to_complex_planar_1953125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1953125, 3, layout::complex_planar, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+//interleaved
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1953125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 1, layout::complex_interleaved, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_1953125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 1, layout::complex_interleaved, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_single, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1953125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 3, layout::complex_interleaved, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_single, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_1953125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< float, cl_float, fftwf_complex >(1953125, 3, layout::complex_interleaved, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1953125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1953125, 1, layout::complex_interleaved, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_1953125_1)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1953125, 1, layout::complex_interleaved, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+TEST_F(accuracy_test_pow5_double, huge_1D_forward_in_place_complex_interleaved_to_complex_interleaved_1953125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1953125, 3, layout::complex_interleaved, direction::forward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+TEST_F(accuracy_test_pow5_double, huge_1D_backward_in_place_complex_interleaved_to_complex_interleaved_1953125_3)
+{
+    try { huge_1D_forward_in_place_complex_to_complex< double, cl_double, fftw_complex >(1953125, 3, layout::complex_interleaved, direction::backward); }
+    catch (const std::exception& err) { handle_exception(err); }
+}
+
+// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
 // ^^^^^^^^^^^^^^^^^^^^^^^ normal 2D ^^^^^^^^^^^^^^^^^^^^^^ //
 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ //
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list