[clfft] 07/10: fixing bugs in r2c transforms, fixes for long delay in test launch
Ghislain Vaillant
ghisvail-guest at moszumanska.debian.org
Wed Mar 30 15:49:30 UTC 2016
This is an automated email from the git hooks/post-receive script.
ghisvail-guest pushed a commit to branch master
in repository clfft.
commit 625d3595579949f706125d92a82eeb402d64f46d
Author: bragadeesh <bragadeesh.natarajan at amd>
Date: Thu Mar 24 17:02:41 2016 -0700
fixing bugs in r2c transforms, fixes for long delay in test launch
---
src/library/generator.stockham.cpp | 2 +-
src/library/generator.transpose.gcn.cpp | 7 +++--
src/tests/buffer.h | 2 +-
src/tests/test_constants.cpp | 50 +++++++++++++++++++--------------
4 files changed, 35 insertions(+), 26 deletions(-)
diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index e69a25e..a2ddfa9 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -4286,7 +4286,7 @@ namespace StockhamGenerator
{
size_t Nt = 1 + length/2;
str += "\n\t\tif( (bt == 0) || (2*bt == ";
- str += SztToStr(params.fft_realSpecial_Nr); str += ") ) break;\n";
+ str += SztToStr(params.fft_realSpecial_Nr); str += ") ) { rw = 0; }\n";
str += "\t\tlwbOut += ("; str += SztToStr(params.fft_realSpecial_Nr);
str += " - 2*bt)*"; str += SztToStr(Nt); str += ";\n";
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 1f1f205..808bdf7 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -456,7 +456,6 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
clKernWrite( transKernel, 3 ) << "const size_t reShapeFactor = " << reShapeFactor << ";" << std::endl;
clKernWrite( transKernel, 3 ) << "const size_t wgUnroll = " << loopCount << ";" << std::endl;
clKernWrite( transKernel, 3 ) << "const Tile wgTileExtent = { localExtent.x * reShapeFactor, localExtent.y / reShapeFactor };" << std::endl;
- clKernWrite( transKernel, 3 ) << "const size_t tileSizeinUnits = wgTileExtent.x * wgTileExtent.y * wgUnroll;" << std::endl << std::endl;
// This is the size of a matrix in the y dimension in units of group size; used to calculate stride[2] indexing
@@ -906,6 +905,8 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
}
else if(branchingInAny)
{
+ std::string limitToWGForRealSpecial = params.transOutHorizontal ? "groupIndex.x" : "currDimIndex";
+
if(i == 0)
{
if(branchingInGroupX)
@@ -914,7 +915,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
if(params.fft_realSpecial)
{
clKernWrite( transKernel, 9 ) << "if( ((" << wIndexY << " == " << wIndexXEnd - 1 << ") && (" <<
- wIndexX << " < 1)) ";
+ wIndexX << " < 1) && (" << limitToWGForRealSpecial << " == " << cornerGroupX << ")) ";
if(wIndexXEnd > 1)
{
clKernWrite( transKernel, 0 ) << "|| (" << wIndexY << " < " << wIndexXEnd - 1 << ") )" << std::endl;
@@ -936,7 +937,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
if(params.fft_realSpecial)
{
clKernWrite( transKernel, 9 ) << "if( ((" << wIndexX << " == " << wIndexYEnd - 1 << ") && (" <<
- wIndexY << " < 1)) ";
+ wIndexY << " < 1) && (" << limitToWGForRealSpecial << " == " << cornerGroupY << ")) ";
if(wIndexYEnd > 1)
{
clKernWrite( transKernel, 0 ) << "|| (" << wIndexX << " < " << wIndexYEnd - 1 << ") )" << std::endl;
diff --git a/src/tests/buffer.h b/src/tests/buffer.h
index cdaa2e8..4ca2722 100644
--- a/src/tests/buffer.h
+++ b/src/tests/buffer.h
@@ -453,7 +453,7 @@ private:
const size_t max_mismatches_output = default_number_of_mismatches_to_output;
if( mismatched_point_indices.size() != 0 && max_mismatches_output != 0 && suppress_output == false) {
- std::cout << std::endl << std::dec << mismatched_point_indices.size() << " of " << number_of_data_points_single_batch()
+ std::cout << std::endl << std::dec << mismatched_point_indices.size() << " of " << batch_size() * number_of_data_points_single_batch()
<<" data points did not match. The first " << max_mismatches_output << " (max) mismatching points follow:" << std::endl;
std::cout << std::endl << "(array index)(index) ";
diff --git a/src/tests/test_constants.cpp b/src/tests/test_constants.cpp
index 17d04cd..1a69d1f 100644
--- a/src/tests/test_constants.cpp
+++ b/src/tests/test_constants.cpp
@@ -84,29 +84,37 @@ void handle_exception( const std::exception& except )
/*****************************************************/
size_t max_mem_available_on_cl_device(size_t device_index) {
- std::vector< cl_device_id > device_id;
- cl_context tempContext = NULL;
- device_id = initializeCL(
- g_device_type,
- (cl_int)device_index,
- g_platform_id,
- tempContext,
- false
- );
+ static size_t g_device_max_mem_size = 0;
- cl_ulong device_max_to_allocate = 0;
- if( device_id.size() == 0 || device_index > device_id.size() )
+ // this is not thread-safe using globals, it is just quick fix for now, todo proper fix
+ if (g_device_max_mem_size == 0)
{
- }
- else
- {
- OPENCL_V_THROW( ::clGetDeviceInfo( device_id[device_index], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( cl_ulong ), &device_max_to_allocate, NULL ),
- "Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )" );
- }
+ std::vector< cl_device_id > device_id;
+ cl_context tempContext = NULL;
+ device_id = initializeCL(
+ g_device_type,
+ (cl_int)device_index,
+ g_platform_id,
+ tempContext,
+ false
+ );
- cl_command_queue tempQueue = NULL;
- cl_event tempEvent = NULL;
- ::cleanupCL( &tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent );
+ cl_ulong device_max_to_allocate = 0;
+ if (device_id.size() == 0 || device_index > device_id.size())
+ {
+ }
+ else
+ {
+ OPENCL_V_THROW(::clGetDeviceInfo(device_id[device_index], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &device_max_to_allocate, NULL),
+ "Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )");
+ }
+
+ cl_command_queue tempQueue = NULL;
+ cl_event tempEvent = NULL;
+ ::cleanupCL(&tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent);
+
+ g_device_max_mem_size = static_cast<size_t>(device_max_to_allocate);
+ }
- return static_cast<size_t>(device_max_to_allocate);
+ return g_device_max_mem_size;
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git
More information about the debian-science-commits
mailing list