[clfft] 07/10: fixing bugs in r2c transforms, fixes for long delay in test launch

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Wed Mar 30 15:49:30 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch master
in repository clfft.

commit 625d3595579949f706125d92a82eeb402d64f46d
Author: bragadeesh <bragadeesh.natarajan at amd>
Date:   Thu Mar 24 17:02:41 2016 -0700

    fixing bugs in r2c transforms, fixes for long delay in test launch
---
 src/library/generator.stockham.cpp      |  2 +-
 src/library/generator.transpose.gcn.cpp |  7 +++--
 src/tests/buffer.h                      |  2 +-
 src/tests/test_constants.cpp            | 50 +++++++++++++++++++--------------
 4 files changed, 35 insertions(+), 26 deletions(-)

diff --git a/src/library/generator.stockham.cpp b/src/library/generator.stockham.cpp
index e69a25e..a2ddfa9 100644
--- a/src/library/generator.stockham.cpp
+++ b/src/library/generator.stockham.cpp
@@ -4286,7 +4286,7 @@ namespace StockhamGenerator
 				{
 					size_t Nt = 1 + length/2;
 					str += 	"\n\t\tif( (bt == 0) || (2*bt == ";
-					str += SztToStr(params.fft_realSpecial_Nr); str += ") ) break;\n";
+					str += SztToStr(params.fft_realSpecial_Nr); str += ") ) { rw = 0; }\n";
 
 					str += "\t\tlwbOut += ("; str += SztToStr(params.fft_realSpecial_Nr);
 					str += " - 2*bt)*"; str += SztToStr(Nt); str += ";\n";
diff --git a/src/library/generator.transpose.gcn.cpp b/src/library/generator.transpose.gcn.cpp
index 1f1f205..808bdf7 100644
--- a/src/library/generator.transpose.gcn.cpp
+++ b/src/library/generator.transpose.gcn.cpp
@@ -456,7 +456,6 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 		clKernWrite( transKernel, 3 ) << "const size_t reShapeFactor = " << reShapeFactor << ";" << std::endl;
 		clKernWrite( transKernel, 3 ) << "const size_t wgUnroll = " << loopCount << ";" << std::endl;
 		clKernWrite( transKernel, 3 ) << "const Tile wgTileExtent = { localExtent.x * reShapeFactor, localExtent.y / reShapeFactor };" << std::endl;
-		clKernWrite( transKernel, 3 ) << "const size_t tileSizeinUnits = wgTileExtent.x * wgTileExtent.y * wgUnroll;" << std::endl << std::endl;
 
 
 		// This is the size of a matrix in the y dimension in units of group size; used to calculate stride[2] indexing
@@ -906,6 +905,8 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 			}
 			else if(branchingInAny)
 			{
+				std::string limitToWGForRealSpecial = params.transOutHorizontal ? "groupIndex.x" : "currDimIndex";
+
 				if(i == 0)
 				{
 					if(branchingInGroupX)
@@ -914,7 +915,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 						if(params.fft_realSpecial)
 						{
 							clKernWrite( transKernel, 9 ) << "if( ((" << wIndexY << " == " << wIndexXEnd - 1 << ") && (" <<
-								wIndexX << " < 1)) ";
+								wIndexX << " < 1) && (" << limitToWGForRealSpecial << " == " << cornerGroupX << ")) ";
 							if(wIndexXEnd > 1)
 							{
 								clKernWrite( transKernel, 0 ) << "|| (" << wIndexY << " < " << wIndexXEnd - 1 << ") )" << std::endl;
@@ -936,7 +937,7 @@ static clfftStatus genTransposeKernel( const FFTGeneratedTransposeGCNAction::Sig
 						if(params.fft_realSpecial)
 						{
 							clKernWrite( transKernel, 9 ) << "if( ((" << wIndexX << " == " << wIndexYEnd - 1 << ") && (" <<
-								wIndexY << " < 1)) ";
+								wIndexY << " < 1) && (" << limitToWGForRealSpecial << " == " << cornerGroupY << ")) ";
 							if(wIndexYEnd > 1)
 							{
 								clKernWrite( transKernel, 0 ) << "|| (" << wIndexX << " < " << wIndexYEnd - 1 << ") )" << std::endl;
diff --git a/src/tests/buffer.h b/src/tests/buffer.h
index cdaa2e8..4ca2722 100644
--- a/src/tests/buffer.h
+++ b/src/tests/buffer.h
@@ -453,7 +453,7 @@ private:
 			const size_t max_mismatches_output = default_number_of_mismatches_to_output;
 
 			if( mismatched_point_indices.size() != 0 && max_mismatches_output != 0 && suppress_output == false) {
-				std::cout << std::endl << std::dec << mismatched_point_indices.size() << " of " << number_of_data_points_single_batch()
+				std::cout << std::endl << std::dec << mismatched_point_indices.size() << " of " << batch_size() * number_of_data_points_single_batch()
 					<<" data points did not match.  The first " << max_mismatches_output << " (max) mismatching points follow:" << std::endl;
 
 				std::cout << std::endl << "(array index)(index) ";
diff --git a/src/tests/test_constants.cpp b/src/tests/test_constants.cpp
index 17d04cd..1a69d1f 100644
--- a/src/tests/test_constants.cpp
+++ b/src/tests/test_constants.cpp
@@ -84,29 +84,37 @@ void handle_exception( const std::exception& except )
 /*****************************************************/
 size_t max_mem_available_on_cl_device(size_t device_index) {
 
-	std::vector< cl_device_id >	device_id;
-	cl_context tempContext = NULL;
-	device_id = initializeCL(
-		g_device_type,
-		(cl_int)device_index,
-		g_platform_id,
-		tempContext,
-		false
-		);
+	static size_t g_device_max_mem_size  = 0;
 
-	cl_ulong device_max_to_allocate = 0;
-	if( device_id.size() == 0 || device_index > device_id.size() )
+	// this is not thread-safe using globals, it is just quick fix for now, todo proper fix
+	if (g_device_max_mem_size == 0)
 	{
-	}
-	else
-	{
-		OPENCL_V_THROW( ::clGetDeviceInfo( device_id[device_index], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( cl_ulong ), &device_max_to_allocate, NULL ),
-			"Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )" );
-	}
+		std::vector< cl_device_id >	device_id;
+		cl_context tempContext = NULL;
+		device_id = initializeCL(
+			g_device_type,
+			(cl_int)device_index,
+			g_platform_id,
+			tempContext,
+			false
+			);
 
-	cl_command_queue tempQueue = NULL;
-	cl_event tempEvent = NULL;
-	::cleanupCL( &tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent );
+		cl_ulong device_max_to_allocate = 0;
+		if (device_id.size() == 0 || device_index > device_id.size())
+		{
+		}
+		else
+		{
+			OPENCL_V_THROW(::clGetDeviceInfo(device_id[device_index], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &device_max_to_allocate, NULL),
+				"Getting CL_DEVICE_MAX_MEM_ALLOC_SIZE device info ( ::clGetDeviceInfo() )");
+		}
+
+		cl_command_queue tempQueue = NULL;
+		cl_event tempEvent = NULL;
+		::cleanupCL(&tempContext, &tempQueue, 0, NULL, 0, NULL, &tempEvent);
+
+		g_device_max_mem_size = static_cast<size_t>(device_max_to_allocate);
+	}
 
-	return static_cast<size_t>(device_max_to_allocate);
+	return g_device_max_mem_size;
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list