[clfft] 17/74: version having the initial full kernel generator.

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:13 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.

commit fab8c74479201d7bdc4153f5b200b99110e6f99f
Author: santanu-thangaraj <t.santanu at gmail.com>
Date:   Tue Nov 24 14:35:07 2015 +0530

    version having the initial full kernel generator.
---
 src/library/generator.transpose.nonsquare.cpp | 133 ++++++++++++++++++++++++--
 1 file changed, 126 insertions(+), 7 deletions(-)

diff --git a/src/library/generator.transpose.nonsquare.cpp b/src/library/generator.transpose.nonsquare.cpp
index e96770a..252059c 100644
--- a/src/library/generator.transpose.nonsquare.cpp
+++ b/src/library/generator.transpose.nonsquare.cpp
@@ -968,7 +968,7 @@ clKernWrite(transKernel, 3) << "}" << std::endl; // end else
         }
 
         std::string funcName;
-        funcName = "swap_nonsquare";
+
 
         size_t local_work_size = num_lines_loaded << 4;
         local_work_size = (local_work_size > 256) ? 256 : local_work_size;
@@ -982,24 +982,127 @@ clKernWrite(transKernel, 3) << "}" << std::endl; // end else
 
         clKernWrite(transKernel, 0) << std::endl;
 
-        clKernWrite(transKernel, 0) << "__constant int swap_table["<< cycle_map[0] <<"][5] = {" << std::endl;
+        //"<< cycle_map[0] <<"
+        clKernWrite(transKernel, 0) << "__constant int swap_table[][3] = {" << std::endl;
 
-        int inx = 0, start_inx, swap_inx = 0;
+        int inx = 0, start_inx, swap_inx = 0, num_swaps = 0;
         for (int i = 0; i < cycle_map[0]; i++)
         {
             start_inx = cycle_map[++inx];
-            std::cout << "\nCycle:" << (i + 1) << ">\t" << "(" << start_inx << "," << cycle_map[inx + 1] << ")";
+
+            clKernWrite(transKernel, 0) << "{  " << start_inx << ",  " << cycle_map[inx + 1] << ",  0}," << std::endl;
+            num_swaps++;
+          //  std::cout << "\nCycle:" << (i + 1) << ">\t" << "(" << start_inx << "," << cycle_map[inx + 1] << ")";
 
             while (start_inx != cycle_map[++inx])
             {
-                swap_inx = 1 - swap_inx;
-                std::cout << "\t" << "(" << cycle_map[inx] << "," << cycle_map[inx + 1] << ")";
+                        
+              //  std::cout << "\t" << "(" << cycle_map[inx] << "," << cycle_map[inx + 1] << ")";
                 int action_var = (cycle_map[inx + 1] == start_inx) ? 2 : 1;
+                clKernWrite(transKernel, 0) << "{  " << cycle_map[inx] << ",  " << cycle_map[inx + 1] << ",  " << action_var << "}," << std::endl;
+                num_swaps++;
             }
         }
         clKernWrite(transKernel, 0) << "};" << std::endl;
 
         clKernWrite(transKernel, 0) << std::endl;
+
+        switch (params.fft_inputLayout)
+        {
+        case CLFFT_COMPLEX_INTERLEAVED:
+            clKernWrite(transKernel, 0) << "void swap(global " << dtComplex << "* inputA, "<< "__local " << dtComplex << "* Ls, __local " << dtComplex << "* Ld, int is, int id, int pos){" << std::endl;
+            break;
+        case CLFFT_COMPLEX_PLANAR:
+            clKernWrite(transKernel, 0) << "void swap(global " << dtPlanar << "* inputA_R, global " << dtPlanar << "* inputA_I, " << "__local " << dtComplex << "* Ls, __local " << dtComplex << "* Ld, int is, int id, int pos){" << std::endl;
+            break;
+        case CLFFT_HERMITIAN_INTERLEAVED:
+        case CLFFT_HERMITIAN_PLANAR:
+            return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+        case CLFFT_REAL:
+            clKernWrite(transKernel, 0) << "void swap(global " << dtPlanar << "* inputA, " << "__local " << dtPlanar << "* Ls, __local " << dtPlanar << "* Ld, int is, int id, int pos){" << std::endl;
+            break;
+        default:
+            return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+        }
+      /*  for (int p = 0; p < num_lines_loaded; p++)
+        {
+            for (int j = 0; j < small_dim; j++)
+            {
+                if (pos == 0)
+                {
+                    ts[p*small_dim + j] = z[is*num_lines_loaded*small_dim + p*small_dim + j];
+                    td[p*small_dim + j] = z[id*num_lines_loaded*small_dim + p*small_dim + j];
+                    z[id*num_lines_loaded*small_dim + p*small_dim + j] = ts[p*small_dim + j];
+                }
+                else if (pos == 1)
+                {
+                    td[p*small_dim + j] = z[id*num_lines_loaded*small_dim + p*small_dim + j];
+                    z[id*num_lines_loaded*small_dim + p*small_dim + j] = ts[p*small_dim + j];
+                }
+                else
+                {
+                    z[id*num_lines_loaded*small_dim + p*small_dim + j] = ts[p*small_dim + j];
+                }
+            }
+        }*/
+
+        clKernWrite(transKernel, 3) << "for (int p = get_local_id(0) / 16; p < " << num_lines_loaded << "; p += " << local_work_size / 16 << "){" << std::endl;
+        clKernWrite(transKernel, 6) << "for (int j = get_local_id(0) % 16; j < " << smaller_dim << "; j += " << 16 << "){" << std::endl;
+
+        switch (params.fft_inputLayout)
+        {
+        case CLFFT_REAL:
+        case CLFFT_COMPLEX_INTERLEAVED:
+
+            clKernWrite(transKernel, 9) << "if (pos == 0){" << std::endl;
+            clKernWrite(transKernel, 12) << "Ls[p*" << smaller_dim << " + j] = inputA[is*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 12) << "Ld[p*" << smaller_dim << " + j] = inputA[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 12) << "inputA[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j] = Ls[p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 9) << "}" << std::endl;
+
+            clKernWrite(transKernel, 9) << "else if (pos == 1){" << std::endl;
+            clKernWrite(transKernel, 12) << "Ld[p*" << smaller_dim << " + j] = inputA[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 12) << "inputA[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j] = Ls[p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 9) << "}" << std::endl;
+
+            clKernWrite(transKernel, 9) << "else{" << std::endl;
+            clKernWrite(transKernel, 12) << "inputA[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j] = Ls[p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 9) << "}" << std::endl;
+            break;
+        case CLFFT_HERMITIAN_INTERLEAVED:
+        case CLFFT_HERMITIAN_PLANAR:
+            return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+        case CLFFT_COMPLEX_PLANAR:
+            clKernWrite(transKernel, 9) << "if (pos == 0){" << std::endl;
+            clKernWrite(transKernel, 12) << "Ls[p*" << smaller_dim << " + j].x = inputA_R[is*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 12) << "Ls[p*" << smaller_dim << " + j].y = inputA_I[is*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 12) << "Ld[p*" << smaller_dim << " + j].x = inputA_R[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 12) << "Ld[p*" << smaller_dim << " + j].y = inputA_I[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 12) << "inputA_R[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j] = Ls[p*" << smaller_dim << " + j].x;" << std::endl;
+            clKernWrite(transKernel, 12) << "inputA_I[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j] = Ls[p*" << smaller_dim << " + j].y;" << std::endl;
+            clKernWrite(transKernel, 9) << "}" << std::endl;
+
+            clKernWrite(transKernel, 9) << "else if (pos == 1){" << std::endl;
+            clKernWrite(transKernel, 12) << "Ld[p*" << smaller_dim << " + j].x = inputA_R[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 12) << "Ld[p*" << smaller_dim << " + j].y = inputA_I[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j];" << std::endl;
+            clKernWrite(transKernel, 12) << "inputA_R[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j] = Ls[p*" << smaller_dim << " + j].x;" << std::endl;
+            clKernWrite(transKernel, 12) << "inputA_I[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j] = Ls[p*" << smaller_dim << " + j].y;" << std::endl;
+            clKernWrite(transKernel, 9) << "}" << std::endl;
+
+            clKernWrite(transKernel, 9) << "else{" << std::endl;
+            clKernWrite(transKernel, 12) << "inputA_R[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j] = Ls[p*" << smaller_dim << " + j].x;" << std::endl;
+            clKernWrite(transKernel, 12) << "inputA_I[id*" << num_lines_loaded << "*" << smaller_dim << " + p*" << smaller_dim << " + j] = Ls[p*" << smaller_dim << " + j].y;" << std::endl;
+            clKernWrite(transKernel, 9) << "}" << std::endl;
+            break;
+        default:
+            return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+        }
+        clKernWrite(transKernel, 6) << "}" << std::endl;
+        clKernWrite(transKernel, 3) << "}" << std::endl;
+
+        clKernWrite(transKernel, 0) << "}" << std::endl << std::endl;
+
+        funcName = "swap_nonsquare";
         // Generate kernel API
         genTransposePrototype(params, local_work_size, dtPlanar, dtComplex, funcName, transKernel, dtInput, dtOutput);
 
@@ -1134,10 +1237,26 @@ clKernWrite(transKernel, 3) << "}" << std::endl; // end else
             }
         }
 
-        clKernWrite(transKernel, 3) << "__local " << dtComplex << " **loc_swap_ptr[2];" << std::endl;
+        clKernWrite(transKernel, 3) << "__local " << dtComplex << " *loc_swap_ptr[2];" << std::endl;
         clKernWrite(transKernel, 3) << "loc_swap_ptr[0] = te;" << std::endl;
         clKernWrite(transKernel, 3) << "loc_swap_ptr[1] = to;" << std::endl;
 
+        clKernWrite(transKernel, 3) << "int swap_inx;" << std::endl;
+        clKernWrite(transKernel, 3) << "for (int loop = 0; loop < " << num_swaps << "; loop ++){" << std::endl;
+        clKernWrite(transKernel, 6) << "swap_inx = 1 - swap_inx;" << std::endl;
+        switch (params.fft_inputLayout)
+        {
+        case CLFFT_COMPLEX_INTERLEAVED:
+        case CLFFT_REAL:   
+            clKernWrite(transKernel, 6) << "swap(inputA, loc_swap_ptr[swap_inx], loc_swap_ptr[1 - swap_inx], swap_table[0], swap_table[1], swap_table[2]);" << std::endl;
+            break;
+        case CLFFT_COMPLEX_PLANAR:
+            clKernWrite(transKernel, 6) << "swap(inputA_R, inputA_I, loc_swap_ptr[swap_inx], loc_swap_ptr[1 - swap_inx], swap_table[0], swap_table[1], swap_table[2]);" << std::endl;
+            break;
+        
+        }
+        
+        clKernWrite(transKernel, 3) << "}" << std::endl;
 
         clKernWrite(transKernel, 0) << "}" << std::endl;
         strKernel = transKernel.str();

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list