[clfft] 21/74: bug fixes.
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Thu Jan 14 19:52:13 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch debian/sid
in repository clfft.
commit 727542b3a458e35d17fee021072fd721cff5adb9
Author: santanu-thangaraj <t.santanu at gmail.com>
Date: Thu Nov 26 20:42:55 2015 +0530
bug fixes.
---
src/library/generator.transpose.nonsquare.cpp | 46 +++++++--------------------
src/library/plan.cpp | 6 ++--
2 files changed, 15 insertions(+), 37 deletions(-)
diff --git a/src/library/generator.transpose.nonsquare.cpp b/src/library/generator.transpose.nonsquare.cpp
index 36991b8..3627b20 100644
--- a/src/library/generator.transpose.nonsquare.cpp
+++ b/src/library/generator.transpose.nonsquare.cpp
@@ -305,6 +305,7 @@ static void get_cycles(size_t *cycle_map, int num_reduced_row, int num_reduced_c
}
}
cycle_map[0] = num_cycles;
+ delete[] is_swapped;
}
@@ -765,7 +766,7 @@ static clfftStatus genTransposeKernel(const FFTGeneratedTransposeNonSquareAction
{
case CLFFT_COMPLEX_INTERLEAVED:
case CLFFT_REAL:
- clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << "&& idx<" << params.fft_N[0] << ")" << std::endl;
+ clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16 / reShapeFactor << ")<" << smaller_dim << "&& idx<" << smaller_dim << ")" << std::endl;
if (params.fft_hasPreCallback)
{
if (params.fft_preCallback.localMemSize > 0)
@@ -884,17 +885,17 @@ static clfftStatus genTransposeKernel(const FFTGeneratedTransposeNonSquareAction
{
case CLFFT_COMPLEX_INTERLEAVED:
case CLFFT_REAL:
- clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << " && idx<" << params.fft_N[0] << ")" << std::endl;
+ clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16 / reShapeFactor << ")<" << smaller_dim << " && idx<" << smaller_dim << ")" << std::endl;
clKernWrite(transKernel, 12) << "outputA[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index]; " << std::endl;
- clKernWrite(transKernel, 9) << "if ((t_gy_p * " << 16 * reShapeFactor << " + lidx)<" << params.fft_N[0] << " && (t_gx_p * " << 16 * reShapeFactor << " + lidy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << ")" << std::endl;
+ clKernWrite(transKernel, 9) << "if ((t_gy_p * " << 16 * reShapeFactor << " + lidx)<" << smaller_dim << " && (t_gx_p * " << 16 * reShapeFactor << " + lidy + loop*" << 16 / reShapeFactor << ")<" << smaller_dim << ")" << std::endl;
clKernWrite(transKernel, 12) << "outputA[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index];" << std::endl;
break;
case CLFFT_COMPLEX_PLANAR:
- clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << " && idx<" << params.fft_N[0] << ") {" << std::endl;
+ clKernWrite(transKernel, 9) << "if ((idy + loop*" << 16 / reShapeFactor << ")<" << smaller_dim << " && idx<" << smaller_dim << ") {" << std::endl;
clKernWrite(transKernel, 12) << "outputA_R[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index].x; " << std::endl;
clKernWrite(transKernel, 12) << "outputA_I[(idy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + idx] = yx_s[index].y; }" << std::endl;
- clKernWrite(transKernel, 9) << "if ((t_gy_p * " << 16 * reShapeFactor << " + lidx)<" << params.fft_N[0] << " && (t_gx_p * " << 16 * reShapeFactor << " + lidy + loop*" << 16 / reShapeFactor << ")<" << params.fft_N[0] << ") {" << std::endl;
+ clKernWrite(transKernel, 9) << "if ((t_gy_p * " << 16 * reShapeFactor << " + lidx)<" << smaller_dim << " && (t_gx_p * " << 16 * reShapeFactor << " + lidy + loop*" << 16 / reShapeFactor << ")<" << smaller_dim << ") {" << std::endl;
clKernWrite(transKernel, 12) << "outputA_R[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index].x;" << std::endl;
clKernWrite(transKernel, 12) << "outputA_I[(lidy + loop*" << 16 / reShapeFactor << ")*" << params.fft_N[0] << " + lidx + starting_index_yx] = xy_s[index].y; }" << std::endl;
@@ -953,7 +954,7 @@ static clfftStatus genTransposeKernel(const FFTGeneratedTransposeNonSquareAction
if (max_capacity <= 0)
{
std::cout << "\nIn-place transpose cannot be performed within specified memory constraints.\n";
- exit(1);
+ return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
}
int num_lines_loaded = get_num_lines_to_be_loaded(max_capacity, smaller_dim);
int num_reduced_row;
@@ -985,27 +986,24 @@ static clfftStatus genTransposeKernel(const FFTGeneratedTransposeNonSquareAction
clKernWrite(transKernel, 0) << std::endl;
- //"<< cycle_map[0] <<"
clKernWrite(transKernel, 0) << "__constant int swap_table[][3] = {" << std::endl;
int inx = 0, start_inx, swap_inx = 0, num_swaps = 0;
for (int i = 0; i < cycle_map[0]; i++)
{
start_inx = cycle_map[++inx];
-
clKernWrite(transKernel, 0) << "{ " << start_inx << ", " << cycle_map[inx + 1] << ", 0}," << std::endl;
num_swaps++;
- // std::cout << "\nCycle:" << (i + 1) << ">\t" << "(" << start_inx << "," << cycle_map[inx + 1] << ")";
while (start_inx != cycle_map[++inx])
- {
-
- // std::cout << "\t" << "(" << cycle_map[inx] << "," << cycle_map[inx + 1] << ")";
+ {
int action_var = (cycle_map[inx + 1] == start_inx) ? 2 : 1;
clKernWrite(transKernel, 0) << "{ " << cycle_map[inx] << ", " << cycle_map[inx + 1] << ", " << action_var << "}," << std::endl;
num_swaps++;
}
}
+
+ delete[] cycle_map;
clKernWrite(transKernel, 0) << "};" << std::endl;
clKernWrite(transKernel, 0) << std::endl;
@@ -1027,27 +1025,6 @@ static clfftStatus genTransposeKernel(const FFTGeneratedTransposeNonSquareAction
default:
return CLFFT_TRANSPOSED_NOTIMPLEMENTED;
}
- /* for (int p = 0; p < num_lines_loaded; p++)
- {
- for (int j = 0; j < small_dim; j++)
- {
- if (pos == 0)
- {
- ts[p*small_dim + j] = z[is*num_lines_loaded*small_dim + p*small_dim + j];
- td[p*small_dim + j] = z[id*num_lines_loaded*small_dim + p*small_dim + j];
- z[id*num_lines_loaded*small_dim + p*small_dim + j] = ts[p*small_dim + j];
- }
- else if (pos == 1)
- {
- td[p*small_dim + j] = z[id*num_lines_loaded*small_dim + p*small_dim + j];
- z[id*num_lines_loaded*small_dim + p*small_dim + j] = ts[p*small_dim + j];
- }
- else
- {
- z[id*num_lines_loaded*small_dim + p*small_dim + j] = ts[p*small_dim + j];
- }
- }
- }*/
clKernWrite(transKernel, 3) << "for (int p = get_local_id(0) / 16; p < " << num_lines_loaded << "; p += " << local_work_size / 16 << "){" << std::endl;
clKernWrite(transKernel, 6) << "for (int j = get_local_id(0) % 16; j < " << smaller_dim << "; j += " << 16 << "){" << std::endl;
@@ -1316,7 +1293,8 @@ clfftStatus FFTGeneratedTransposeNonSquareAction::initParams()
if (this->plan->large1D != 0) {
ARG_CHECK(this->signature.fft_N[0] != 0)
- ARG_CHECK((this->plan->large1D % this->signature.fft_N[0]) == 0)
+ //ToDo:ENABLE ASSERT
+ // ARG_CHECK((this->plan->large1D % this->signature.fft_N[0]) == 0)
this->signature.fft_3StepTwiddle = true;
//ToDo:ENABLE ASSERT
// ARG_CHECK(this->plan->large1D == (this->signature.fft_N[1] * this->signature.fft_N[0]));
diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index 2ea16c5..35bd0ab 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -1891,13 +1891,13 @@ clfftStatus clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
if (!test_performed)
{
//CLFFT_COMPLEX_PLANAR
- fftPlan->inputLayout = CLFFT_COMPLEX_PLANAR;
+ /* fftPlan->inputLayout = CLFFT_COMPLEX_PLANAR;
fftPlan->outputLayout = CLFFT_COMPLEX_PLANAR;
if(fftPlan->inputLayout == CLFFT_REAL)
test_performed = 1;
- fftPlan->precision = CLFFT_DOUBLE;
- fftPlan->length[1] = 64;// fftPlan->length[1];
+ fftPlan->precision = CLFFT_DOUBLE;*/
+ //fftPlan->length[1] = 49;// fftPlan->length[1];
fftPlan->length[0] = fftPlan->length[1] * 2;
fftPlan->action = new FFTGeneratedTransposeNonSquareAction(plHandle, fftPlan, *commQueueFFT, err);
OPENCL_V(err, "FFTGeneratedTransposeNonSquareAction() failed");
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list