[beignet] 06/07: Fix excessive rounding error in tgamma
Rebecca Palmer
rnpalmer-guest at moszumanska.debian.org
Fri Apr 24 21:56:27 UTC 2015
This is an automated email from the git hooks/post-receive script.
rnpalmer-guest pushed a commit to branch master
in repository beignet.
commit 1712ccfdb8bd87874971b11ad5ae3b9ab3d69f47
Author: Rebecca N. Palmer <rebecca_palmer at zoho.com>
Date: Fri Apr 24 21:31:57 2015 +0100
Fix excessive rounding error in tgamma
---
debian/README.Debian | 3 -
debian/changelog | 1 +
debian/patches/series | 1 +
debian/patches/tgamma-accuracy.patch | 119 +++++++++++++++++++++++++++++++++++
4 files changed, 121 insertions(+), 3 deletions(-)
diff --git a/debian/README.Debian b/debian/README.Debian
index ab09048..71d581a 100644
--- a/debian/README.Debian
+++ b/debian/README.Debian
@@ -9,6 +9,3 @@ This slows down some of the math functions as they cannot use the
(lower-precision) native instructions: most only moderately (eg. ~30%
for sin/cos) but ~10-fold for pow and rootn (though not pown or sqrt).
Applications that prefer speed to precision may use the native_* functions.
-
-The tgamma function is implemented as exp(lgamma) and is hence
-precision non-compliant for large outputs.
diff --git a/debian/changelog b/debian/changelog
index 4967630..c91d939 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,7 @@ beignet (1.0.3-1) UNRELEASED; urgency=medium
* New upstream release.
* Drop patches applied upstream, refresh others.
* Fix bug in builtin_pow test.
+ * Fix excessive rounding error in tgamma.
-- Rebecca N. Palmer <rebecca_palmer at zoho.com> Fri, 24 Apr 2015 21:16:12 +0100
diff --git a/debian/patches/series b/debian/patches/series
index d7373e1..071a992 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -7,3 +7,4 @@ reduce-notfound-output.patch
default-to-full-precision.patch
shared-llvm.patch
builtin_pow-fix-spurious-failure.patch
+tgamma-accuracy.patch
diff --git a/debian/patches/tgamma-accuracy.patch b/debian/patches/tgamma-accuracy.patch
new file mode 100644
index 0000000..f9b9b03
--- /dev/null
+++ b/debian/patches/tgamma-accuracy.patch
@@ -0,0 +1,119 @@
+Description: Make tgamma meet the OpenCL accuracy standard
+
+tgamma=exp(lgamma) was not accurate enough for approx. x>8
+
+Author: Rebecca N. Palmer <rebecca_palmer at zoho.com>
+Forwarded: http://lists.freedesktop.org/archives/beignet/2015-April/005548.html
+
+--- beignet-1.0.2.orig/backend/src/libocl/tmpl/ocl_math.tmpl.cl
++++ beignet-1.0.2/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+@@ -1746,12 +1746,6 @@ OVERLOADABLE float __gen_ocl_internal_ex
+ }
+ }
+
+-INLINE_OVERLOADABLE float tgamma(float x) {
+- float y;
+- int s;
+- y=lgamma_r(x,&s);
+- return __gen_ocl_internal_exp(y)*s;
+-}
+
+ /* erf,erfc from glibc s_erff.c -- float version of s_erf.c.
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian at cygnus.com.
+@@ -3167,6 +3167,95 @@ OVERLOADABLE float __gen_ocl_internal_po
+ return sn*z;
+ }
+
++OVERLOADABLE float tgamma (float x)
++{/*based on glibc tgammaf*/
++ unsigned int hx;
++
++ GEN_OCL_GET_FLOAT_WORD(hx,x);
++
++ if (hx == 0xff800000)
++ {
++ /* x == -Inf. According to ISO this is NaN. */
++ return NAN;
++ }
++ if ((hx & 0x7f800000) == 0x7f800000)
++ {
++ /* Positive infinity (return positive infinity) or NaN (return
++ NaN). */
++ return x;
++ }
++ if (x < 0.0f && __gen_ocl_internal_floor (x) == x)
++ {
++ /* integer x < 0 */
++ return NAN;
++ }
++
++ if (x >= 36.0f)
++ {
++ /* Overflow. */
++ return INFINITY;
++ }
++ else if (x <= 0.0f && x >= -FLT_EPSILON / 4.0f)
++ {
++ return 1.0f / x;
++ }
++ else
++ {
++ float sinpix = __gen_ocl_internal_sinpi(x);
++ if (x <= -42.0f)
++ /* Underflow. */
++ {return 0.0f * sinpix /*for sign*/;}
++ int exp2_adj = 0;
++ float x_abs = __gen_ocl_fabs(x);
++ float gam0;
++
++ if (x_abs < 4.0f) {
++ /* gamma = exp(lgamma) is only accurate for small lgamma */
++ float prod,x_adj;
++ if (x_abs < 0.5f) {
++ prod = 1.0f / x_abs;
++ x_adj = x_abs + 1.0f;
++ } else if (x_abs <= 1.5f) {
++ prod = 1.0f;
++ x_adj = x_abs;
++ } else if (x_abs < 2.5f) {
++ x_adj = x_abs - 1.0f;
++ prod = x_adj;
++ } else {
++ x_adj = x_abs - 2.0f;
++ prod = x_adj * (x_abs - 1.0f);
++ }
++ gam0 = __gen_ocl_internal_exp (lgamma (x_adj)) * prod;
++ }
++ else {
++ /* Compute gamma (X) using Stirling's approximation,
++ starting by computing pow (X, X) with a power of 2
++ factored out to avoid intermediate overflow. */
++ float x_int = __gen_ocl_internal_round (x_abs);
++ float x_frac = x_abs - x_int;
++ int x_log2;
++ float x_mant = frexp (x_abs, &x_log2);
++ if (x_mant < M_SQRT1_2_F)
++ {
++ x_log2--;
++ x_mant *= 2.0f;
++ }
++ exp2_adj = x_log2 * (int) x_int;
++ float ret = (__gen_ocl_internal_pow(x_mant, x_abs)
++ * exp2 (x_log2 * x_frac)
++ * __gen_ocl_internal_exp (-x_abs)
++ * sqrt (2.0f * M_PI_F / x_abs) );
++
++ float x2 = x_abs * x_abs;
++ float bsum = (0x3.403404p-12f / x2 -0xb.60b61p-12f) / x2 + 0x1.555556p-4f;
++ gam0 = ret + ret * __gen_ocl_internal_expm1 (bsum / x_abs);
++ }
++ if (x > 0.0f) {return __gen_ocl_internal_ldexp (gam0, exp2_adj);}
++ float gam1 = M_PI_F / (-x * sinpix * gam0);
++ return __gen_ocl_internal_ldexp (gam1, -exp2_adj);
++ }
++}
++
+ OVERLOADABLE float hypot(float x, float y) {
+ if (__ocl_math_fastpath_flag)
+ return __gen_ocl_internal_fastpath_hypot(x, y);
+
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git
More information about the Pkg-opencl-commits
mailing list