[clfft] 09/13: do not split 1d if size<threshold

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Sun Jun 5 01:29:39 UTC 2016


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/master
in repository clfft.

commit dd116f5de98cb3d9760f9562a59ee0f8de0fca3d
Author: Timmy <timmy.liu at amd.com>
Date:   Thu May 26 13:22:23 2016 -0500

    do not split 1d if size<threshold
---
 src/library/plan.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/library/plan.cpp b/src/library/plan.cpp
index e532663..19e50dc 100644
--- a/src/library/plan.cpp
+++ b/src/library/plan.cpp
@@ -66,7 +66,7 @@ static bool pow235(size_t num, size_t &pow2, size_t &pow3, size_t &pow5)
 	return true;
 }
 
-static bool split1D_for_inplace(size_t num, vector<vector<size_t> > &splitNums, clfftPrecision precision)
+static bool split1D_for_inplace(size_t num, vector<vector<size_t> > &splitNums, clfftPrecision precision, size_t threshold)
 {
 	/* a helper function to split big 1D to friendly 2D sizes for inplace transpose kernels
 	   currently only radix 2, 3 and 5 are supported
@@ -74,9 +74,6 @@ static bool split1D_for_inplace(size_t num, vector<vector<size_t> > &splitNums,
 	   And this mupliple is radix2, 3 or 5.
 	   each splited dimentsion should be further splited until that it is smaller than 4096
 	*/
-	size_t threshold = 4096;
-	if (precision == CLFFT_DOUBLE)
-		threshold = 2048;
 	if (num <= threshold)
 		return true;
 	if (num % 2 != 0 && num % 3 != 0 && num % 5 != 0)
@@ -174,8 +171,8 @@ static bool split1D_for_inplace(size_t num, vector<vector<size_t> > &splitNums,
 	splitVec.push_back(temp);
 	splitNums.push_back(splitVec);
 
-	status = status && split1D_for_inplace(temp*divide_factor, splitNums, precision);
-	status = status && split1D_for_inplace(temp, splitNums, precision);
+	status = status && split1D_for_inplace(temp*divide_factor, splitNums, precision, threshold);
+	status = status && split1D_for_inplace(temp, splitNums, precision, threshold);
 	return status;
 
 }
@@ -794,13 +791,17 @@ clfftStatus	clfftBakePlan( clfftPlanHandle plHandle, cl_uint numQueues, cl_comma
 				if (fftPlan->length[0] == 354294)
 					clLengths[1] = 243;
 				*/
+				size_t threshold = 4096;
+				if (fftPlan->precision == CLFFT_DOUBLE)
+					threshold = 2048;
 				if (clfftGetRequestLibNoMemAlloc() &&
 					fftPlan->placeness == CLFFT_INPLACE &&
-					(fftPlan->inputLayout == fftPlan->outputLayout) )
+					(fftPlan->inputLayout == fftPlan->outputLayout)
+					&& fftPlan->length[0] > threshold)
 				{
 					//for inplace fft with inplace transpose, the split logic is different
 					vector<vector<size_t> > splitNums;
-					bool implemented = split1D_for_inplace(fftPlan->length[0], splitNums, fftPlan->precision);
+					bool implemented = split1D_for_inplace(fftPlan->length[0], splitNums, fftPlan->precision, threshold);
 					if (implemented)
 						clLengths[1] = splitNums[0][0];
 				}

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/clfft.git



More information about the debian-science-commits mailing list