[clfft] 08/21: fix some test fails related to pre callback
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Wed Mar 16 13:14:03 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 4a832d60350009b497dbb5118dd75dcdf7206b21
Author: Timmy <timmy.liu at amd.com>
Date: Thu Feb 25 13:26:20 2016 -0600
fix some test fails related to pre callback
---
src/library/generator.transpose.cpp | 122 ++++++++++++++++++++++++++++++------
1 file changed, 104 insertions(+), 18 deletions(-)
diff --git a/src/library/generator.transpose.cpp b/src/library/generator.transpose.cpp
index 319e207..d702771 100644
--- a/src/library/generator.transpose.cpp
+++ b/src/library/generator.transpose.cpp
@@ -481,6 +481,14 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
clKernWrite(transKernel, 0) << params.fft_postCallback.funcstring << std::endl;
clKernWrite(transKernel, 0) << std::endl;
}
+ //If pre-callback is set for the plan
+ if (params.fft_hasPreCallback)
+ {
+ //we have already checked available LDS for pre callback
+ //Insert callback function code at the beginning
+ clKernWrite(transKernel, 0) << params.fft_preCallback.funcstring << std::endl;
+ clKernWrite(transKernel, 0) << std::endl;
+ }
/*Generating the swapping logic*/
{
@@ -576,6 +584,15 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
}
}
+ if (params.fft_hasPreCallback)
+ {
+ clKernWrite(transKernel, 0) << ", size_t iOffset, __global void* pre_userdata";
+ if (params.fft_preCallback.localMemSize > 0)
+ {
+ clKernWrite(transKernel, 0) << ", __local void* localmem";
+ }
+ }
+
clKernWrite(transKernel, 0) << "){" << std::endl;
clKernWrite(transKernel, 3) << "for (int j = get_local_id(0); j < end_indx; j += " << local_work_size_swap << "){" << std::endl;
@@ -585,15 +602,45 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
case CLFFT_REAL:
case CLFFT_COMPLEX_INTERLEAVED:
+ if (params.fft_hasPreCallback)
+ {
+ clKernWrite(transKernel, 6) << "if (pos == 0){" << std::endl;
- clKernWrite(transKernel, 6) << "if (pos == 0){" << std::endl;
- clKernWrite(transKernel, 9) << "Ls[j] = inputA[is *" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
- clKernWrite(transKernel, 9) << "Ld[j] = inputA[id *" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
- clKernWrite(transKernel, 6) << "}" << std::endl;
+ clKernWrite(transKernel, 9) << "Ls[j] = " << params.fft_preCallback.funcname << "(inputA, ( is *" << smaller_dim << " + " << num_elements_loaded << " * work_id + j + iOffset), pre_userdata";
+ if (params.fft_preCallback.localMemSize > 0)
+ {
+ clKernWrite(transKernel, 0) << ", localmem";
+ }
+ clKernWrite(transKernel, 0) << ");" << std::endl;
- clKernWrite(transKernel, 6) << "else if (pos == 1){" << std::endl;
- clKernWrite(transKernel, 9) << "Ld[j] = inputA[id *" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
- clKernWrite(transKernel, 6) << "}" << std::endl;
+ clKernWrite(transKernel, 9) << "Ld[j] = " << params.fft_preCallback.funcname << "(inputA, ( id *" << smaller_dim << " + " << num_elements_loaded << " * work_id + j + iOffset), pre_userdata";
+ if (params.fft_preCallback.localMemSize > 0)
+ {
+ clKernWrite(transKernel, 0) << ", localmem";
+ }
+ clKernWrite(transKernel, 0) << ");" << std::endl;
+ clKernWrite(transKernel, 6) << "}" << std::endl;
+
+ clKernWrite(transKernel, 6) << "else if (pos == 1){" << std::endl;
+ clKernWrite(transKernel, 9) << "Ld[j] = " << params.fft_preCallback.funcname << "(inputA, ( id *" << smaller_dim << " + " << num_elements_loaded << " * work_id + j + iOffset), pre_userdata";
+ if (params.fft_preCallback.localMemSize > 0)
+ {
+ clKernWrite(transKernel, 0) << ", localmem";
+ }
+ clKernWrite(transKernel, 0) << ");" << std::endl;
+ clKernWrite(transKernel, 6) << "}" << std::endl;
+ }
+ else
+ {
+ clKernWrite(transKernel, 6) << "if (pos == 0){" << std::endl;
+ clKernWrite(transKernel, 9) << "Ls[j] = inputA[is *" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
+ clKernWrite(transKernel, 9) << "Ld[j] = inputA[id *" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
+ clKernWrite(transKernel, 6) << "}" << std::endl;
+
+ clKernWrite(transKernel, 6) << "else if (pos == 1){" << std::endl;
+ clKernWrite(transKernel, 9) << "Ld[j] = inputA[id *" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
+ clKernWrite(transKernel, 6) << "}" << std::endl;
+ }
if (params.fft_hasPostCallback)
{
@@ -613,19 +660,50 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
case CLFFT_HERMITIAN_PLANAR:
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
case CLFFT_COMPLEX_PLANAR:
+ if (params.fft_hasPreCallback)
+ {
+ clKernWrite(transKernel, 6) << "if (pos == 0){" << std::endl;
+ clKernWrite(transKernel, 9) << "Ls[j] = " << params.fft_preCallback.funcname << "(inputA_R, inputA_I, (is * " << smaller_dim << " + " << num_elements_loaded << " * work_id + j + iOffset), pre_userdata";
+ if (params.fft_preCallback.localMemSize > 0)
+ {
+ clKernWrite(transKernel, 0) << ", localmem";
+ }
+ clKernWrite(transKernel, 0) << ");" << std::endl;
- clKernWrite(transKernel, 6) << "if (pos == 0){" << std::endl;
- clKernWrite(transKernel, 9) << "Ls[j].x = inputA_R[is*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
- clKernWrite(transKernel, 9) << "Ls[j].y = inputA_I[is*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
- clKernWrite(transKernel, 9) << "Ld[j].x = inputA_R[id*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
- clKernWrite(transKernel, 9) << "Ld[j].y = inputA_I[id*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
- clKernWrite(transKernel, 6) << "}" << std::endl;
+ clKernWrite(transKernel, 9) << "Ld[j] = " << params.fft_preCallback.funcname << "(inputA_R, inputA_I, (id * " << smaller_dim << " + " << num_elements_loaded << " * work_id + j + iOffset), pre_userdata";
+ if (params.fft_preCallback.localMemSize > 0)
+ {
+ clKernWrite(transKernel, 0) << ", localmem";
+ }
+ clKernWrite(transKernel, 0) << ");" << std::endl;
- clKernWrite(transKernel, 6) << "else if (pos == 1){" << std::endl;
- clKernWrite(transKernel, 9) << "Ld[j].x = inputA_R[id*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
- clKernWrite(transKernel, 9) << "Ld[j].y = inputA_I[id*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
- clKernWrite(transKernel, 6) << "}" << std::endl;
+ clKernWrite(transKernel, 6) << "}" << std::endl;
+ clKernWrite(transKernel, 6) << "else if (pos == 1){" << std::endl;
+
+ clKernWrite(transKernel, 9) << "Ld[j] = " << params.fft_preCallback.funcname << "(inputA_R, inputA_I, (id * " << smaller_dim << " + " << num_elements_loaded << " * work_id + j + iOffset), pre_userdata";
+ if (params.fft_preCallback.localMemSize > 0)
+ {
+ clKernWrite(transKernel, 0) << ", localmem";
+ }
+ clKernWrite(transKernel, 0) << ");" << std::endl;
+
+ clKernWrite(transKernel, 6) << "}" << std::endl;
+ }
+ else
+ {
+ clKernWrite(transKernel, 6) << "if (pos == 0){" << std::endl;
+ clKernWrite(transKernel, 9) << "Ls[j].x = inputA_R[is*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
+ clKernWrite(transKernel, 9) << "Ls[j].y = inputA_I[is*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
+ clKernWrite(transKernel, 9) << "Ld[j].x = inputA_R[id*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
+ clKernWrite(transKernel, 9) << "Ld[j].y = inputA_I[id*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
+ clKernWrite(transKernel, 6) << "}" << std::endl;
+
+ clKernWrite(transKernel, 6) << "else if (pos == 1){" << std::endl;
+ clKernWrite(transKernel, 9) << "Ld[j].x = inputA_R[id*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
+ clKernWrite(transKernel, 9) << "Ld[j].y = inputA_I[id*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j];" << std::endl;
+ clKernWrite(transKernel, 6) << "}" << std::endl;
+ }
if (params.fft_hasPostCallback)
{
clKernWrite(transKernel, 6) << params.fft_postCallback.funcname << "(inputA_R, inputA_I, (iOffset + id*" << smaller_dim << " + " << num_elements_loaded << " * work_id + j), post_userdata, Ls[j].x, Ls[j].y";
@@ -679,7 +757,7 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
clKernWrite(transKernel, 3) << tmpBuffType << " " << dtInput << " *to = (tmp_tot_mem + " << num_elements_loaded << ");" << std::endl;
//Do not advance offset when postcallback is set as the starting address of global buffer is needed
- if (!params.fft_hasPostCallback)
+ if (!params.fft_hasPostCallback && !params.fft_hasPreCallback)
clKernWrite(transKernel, 3) << "inputA += iOffset;" << std::endl; // Set A ptr to the start of each slice
break;
case CLFFT_COMPLEX_PLANAR:
@@ -749,6 +827,14 @@ clfftStatus genSwapKernel(const FFTGeneratedTransposeNonSquareAction::Signature
clKernWrite(transKernel, 0) << ", localmem";
}
}
+ if (params.fft_hasPreCallback)
+ {
+ clKernWrite(transKernel, 0) << ", iOffset, pre_userdata";
+ if (params.fft_preCallback.localMemSize > 0)
+ {
+ clKernWrite(transKernel, 0) << ", localmem";
+ }
+ }
clKernWrite(transKernel, 0) << ");" << std::endl;
clKernWrite(transKernel, 3) << "}" << std::endl;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list