[clfft] 15/74: back up before swap logic is added.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:12 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.
commit 56add45d15e335b8007c20bdcc09b7ec5bd74c09
Author: santanu-thangaraj <t.santanu at gmail.com>
Date: Mon Nov 23 10:11:08 2015 +0530
back up before swap logic is added.
---
src/library/generator.transpose.nonsquare.cpp | 152 +++++++++++++-------------
src/library/generator.transpose.nonsquare.h | 1 +
src/library/plan.cpp | 4 +-
3 files changed, 81 insertions(+), 76 deletions(-)
diff --git a/src/library/generator.transpose.nonsquare.cpp b/src/library/generator.transpose.nonsquare.cpp
index 5151bf1..2ebe702 100644
--- a/src/library/generator.transpose.nonsquare.cpp
+++ b/src/library/generator.transpose.nonsquare.cpp
@@ -740,93 +740,93 @@ static clfftStatus genTransposeKernel(const FFTGeneratedTransposeNonSquareAction
}
- // If requested, generate the Twiddle math to multiply constant values
- if (params.fft_3StepTwiddle)
- genTwiddleMath(params, transKernel, dtComplex, fwd);
-
- clKernWrite(transKernel, 9) << "xy_s[index] = tmpm;" << std::endl;
- clKernWrite(transKernel, 9) << "yx_s[index] = tmpt;" << std::endl;
-
- clKernWrite(transKernel, 9) << "}" << std::endl;
- clKernWrite(transKernel, 3) << "}" << std::endl;
-
- clKernWrite(transKernel, 3) << "" << std::endl;
- clKernWrite(transKernel, 3) << "barrier(CLK_LOCAL_MEM_FENCE);" << std::endl;
- clKernWrite(transKernel, 3) << "" << std::endl;
-
- // Step2: Write from shared to global
-
- clKernWrite(transKernel, 3) << "if (" << smaller_dim << " - (t_gx_p + 1) *" << 16 * reShapeFactor << ">0){" << std::endl;
- clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
- clKernWrite(transKernel, 9) << "index = lidx*" << 16 * reShapeFactor << " + lidy + " << 16 / reShapeFactor << "*loop ;" << std::endl;
+// If requested, generate the Twiddle math to multiply constant values
+if (params.fft_3StepTwiddle)
+genTwiddleMath(params, transKernel, dtComplex, fwd);
- // Handle planar and interleaved right here
- switch (params.fft_outputLayout)
- {
- case CLFFT_COMPLEX_INTERLEAVED:
- clKernWrite(transKernel, 9) << "outputA[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index];" << std::endl;
- clKernWrite(transKernel, 9) << "outputA[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index]; " << std::endl;
+clKernWrite(transKernel, 9) << "xy_s[index] = tmpm;" << std::endl;
+clKernWrite(transKernel, 9) << "yx_s[index] = tmpt;" << std::endl;
- break;
- case CLFFT_COMPLEX_PLANAR:
- clKernWrite(transKernel, 9) << "outputA_R[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index].x;" << std::endl;
- clKernWrite(transKernel, 9) << "outputA_I[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index].y;" << std::endl;
- clKernWrite(transKernel, 9) << "outputA_R[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index].x; " << std::endl;
- clKernWrite(transKernel, 9) << "outputA_I[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index].y; " << std::endl;
+clKernWrite(transKernel, 9) << "}" << std::endl;
+clKernWrite(transKernel, 3) << "}" << std::endl;
+clKernWrite(transKernel, 3) << "" << std::endl;
+clKernWrite(transKernel, 3) << "barrier(CLK_LOCAL_MEM_FENCE);" << std::endl;
+clKernWrite(transKernel, 3) << "" << std::endl;
+// Step2: Write from shared to global
- break;
- case CLFFT_HERMITIAN_INTERLEAVED:
- case CLFFT_HERMITIAN_PLANAR:
- return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
- case CLFFT_REAL:
- break;
- default:
- return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
- }
-
-
- clKernWrite(transKernel, 6) << "}" << std::endl;
- clKernWrite(transKernel, 3) << "}" << std::endl;
+clKernWrite(transKernel, 3) << "if (" << smaller_dim << " - (t_gx_p + 1) *" << 16 * reShapeFactor << ">0){" << std::endl;
+clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+clKernWrite(transKernel, 9) << "index = lidx*" << 16 * reShapeFactor << " + lidy + " << 16 / reShapeFactor << "*loop ;" << std::endl;
- clKernWrite(transKernel, 3) << "else{" << std::endl;
- clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+// Handle planar and interleaved right here
+switch (params.fft_outputLayout)
+{
+case CLFFT_COMPLEX_INTERLEAVED:
+ clKernWrite(transKernel, 9) << "outputA[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index];" << std::endl;
+ clKernWrite(transKernel, 9) << "outputA[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index]; " << std::endl;
+
+ break;
+case CLFFT_COMPLEX_PLANAR:
+ clKernWrite(transKernel, 9) << "outputA_R[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index].x;" << std::endl;
+ clKernWrite(transKernel, 9) << "outputA_I[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index].y;" << std::endl;
+ clKernWrite(transKernel, 9) << "outputA_R[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index].x; " << std::endl;
+ clKernWrite(transKernel, 9) << "outputA_I[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index].y; " << std::endl;
+
+
+
+ break;
+case CLFFT_HERMITIAN_INTERLEAVED:
+case CLFFT_HERMITIAN_PLANAR:
+ return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+case CLFFT_REAL:
+ break;
+default:
+ return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+}
- clKernWrite(transKernel, 9) << "index = lidx*" << 16 * reShapeFactor << " + lidy + " << 16 / reShapeFactor << "*loop;" << std::endl;
- // Handle planar and interleaved right here
- switch (params.fft_outputLayout)
- {
- case CLFFT_COMPLEX_INTERLEAVED:
- clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << " && idx<" << params.fft_N[0] << ")" << std::endl;
- clKernWrite(transKernel, 12) << "outputA[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index]; " << std::endl;
- clKernWrite(transKernel, 9) << "if ((t_gy_p * " << 16 * reShapeFactor << " + lidx)<" << params.fft_N[0] << " && (t_gx_p * " << 16 * reShapeFactor << " + lidy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << ")" << std::endl;
- clKernWrite(transKernel, 12) << "outputA[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index];" << std::endl;
+clKernWrite(transKernel, 6) << "}" << std::endl;
+clKernWrite(transKernel, 3) << "}" << std::endl;
- break;
- case CLFFT_COMPLEX_PLANAR:
- clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << " && idx<" << params.fft_N[0] << ") {" << std::endl;
- clKernWrite(transKernel, 12) << "outputA_R[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index].x; " << std::endl;
- clKernWrite(transKernel, 12) << "outputA_I[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index].y; }" << std::endl;
- clKernWrite(transKernel, 9) << "if ((t_gy_p * " << 16 * reShapeFactor << " + lidx)<" << params.fft_N[0] << " && (t_gx_p * " << 16 * reShapeFactor << " + lidy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << ") {" << std::endl;
- clKernWrite(transKernel, 12) << "outputA_R[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index].x;" << std::endl;
- clKernWrite(transKernel, 12) << "outputA_I[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index].y; }" << std::endl;
+clKernWrite(transKernel, 3) << "else{" << std::endl;
+clKernWrite(transKernel, 6) << "for (int loop = 0; loop<" << reShapeFactor*reShapeFactor << "; ++loop){" << std::endl;
+clKernWrite(transKernel, 9) << "index = lidx*" << 16 * reShapeFactor << " + lidy + " << 16 / reShapeFactor << "*loop;" << std::endl;
- break;
- case CLFFT_HERMITIAN_INTERLEAVED:
- case CLFFT_HERMITIAN_PLANAR:
- return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
- case CLFFT_REAL:
- break;
- default:
- return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
- }
+// Handle planar and interleaved right here
+switch (params.fft_outputLayout)
+{
+case CLFFT_COMPLEX_INTERLEAVED:
+ clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << " && idx<" << params.fft_N[0] << ")" << std::endl;
+ clKernWrite(transKernel, 12) << "outputA[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index]; " << std::endl;
+ clKernWrite(transKernel, 9) << "if ((t_gy_p * " << 16 * reShapeFactor << " + lidx)<" << params.fft_N[0] << " && (t_gx_p * " << 16 * reShapeFactor << " + lidy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << ")" << std::endl;
+ clKernWrite(transKernel, 12) << "outputA[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index];" << std::endl;
+
+ break;
+case CLFFT_COMPLEX_PLANAR:
+ clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << " && idx<" << params.fft_N[0] << ") {" << std::endl;
+ clKernWrite(transKernel, 12) << "outputA_R[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index].x; " << std::endl;
+ clKernWrite(transKernel, 12) << "outputA_I[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index].y; }" << std::endl;
+ clKernWrite(transKernel, 9) << "if ((t_gy_p * " << 16 * reShapeFactor << " + lidx)<" << params.fft_N[0] << " && (t_gx_p * " << 16 * reShapeFactor << " + lidy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << ") {" << std::endl;
+ clKernWrite(transKernel, 12) << "outputA_R[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index].x;" << std::endl;
+ clKernWrite(transKernel, 12) << "outputA_I[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index].y; }" << std::endl;
+
+
+ break;
+case CLFFT_HERMITIAN_INTERLEAVED:
+case CLFFT_HERMITIAN_PLANAR:
+ return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+case CLFFT_REAL:
+ break;
+default:
+ return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
+}
- clKernWrite(transKernel, 6) << "}" << std::endl; // end for
- clKernWrite(transKernel, 3) << "}" << std::endl; // end else
+clKernWrite(transKernel, 6) << "}" << std::endl; // end for
+clKernWrite(transKernel, 3) << "}" << std::endl; // end else
}
@@ -838,6 +838,10 @@ static clfftStatus genTransposeKernel(const FFTGeneratedTransposeNonSquareAction
break;
}
+ /*Generating the swapping logic*/
+ {
+
+ }
return CLFFT_SUCCESS;
}
diff --git a/src/library/generator.transpose.nonsquare.h b/src/library/generator.transpose.nonsquare.h
index 559ee90..8b9e809 100644
--- a/src/library/generator.transpose.nonsquare.h
+++ b/src/library/generator.transpose.nonsquare.h
@@ -21,6 +21,7 @@
#include "repo.h"
#include "plan.h"
+#define AVAIL_MEM_SIZE 32768
#endif
#pragma once
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 5f2b0a8..75abcf3 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -1886,8 +1886,8 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
if (!test_performed)
{
//test_performed = 1;
- fftPlan->length[0] = 64;// fftPlan->length[1];
- fftPlan->length[1] = fftPlan->length[0] * 2;
+ fftPlan->length[1] = 64;// fftPlan->length[1];
+ fftPlan->length[0] = fftPlan->length[1] * 2;
fftPlan->action = new FFTGeneratedTransposeNonSquareAction(plHandle, fftPlan, *commQueueFFT, err);
OPENCL_V(err, "FFTGeneratedTransposeNonSquareAction() failed");
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list