[beignet] 06/07: Fix excessive rounding error in tgamma

Fri Apr 24 21:56:27 UTC 2015

This is an automated email from the git hooks/post-receive script.

rnpalmer-guest pushed a commit to branch master
in repository beignet.

commit 1712ccfdb8bd87874971b11ad5ae3b9ab3d69f47
Author: Rebecca N. Palmer <rebecca_palmer at zoho.com>
Date:   Fri Apr 24 21:31:57 2015 +0100

    Fix excessive rounding error in tgamma
---
 debian/README.Debian                 |   3 -
 debian/changelog                     |   1 +
 debian/patches/series                |   1 +
 debian/patches/tgamma-accuracy.patch | 119 +++++++++++++++++++++++++++++++++++
 4 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/debian/README.Debian b/debian/README.Debian
index ab09048..71d581a 100644
--- a/debian/README.Debian
+++ b/debian/README.Debian
@@ -9,6 +9,3 @@ This slows down some of the math functions as they cannot use the
 (lower-precision) native instructions: most only moderately (eg. ~30%
 for sin/cos) but ~10-fold for pow and rootn (though not pown or sqrt).
 Applications that prefer speed to precision may use the native_* functions.
-
-The tgamma function is implemented as exp(lgamma) and is hence
-precision non-compliant for large outputs.
diff --git a/debian/changelog b/debian/changelog
index 4967630..c91d939 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -3,6 +3,7 @@ beignet (1.0.3-1) UNRELEASED; urgency=medium
   * New upstream release.
   * Drop patches applied upstream, refresh others.
   * Fix bug in builtin_pow test.
+  * Fix excessive rounding error in tgamma.
 
  -- Rebecca N. Palmer <rebecca_palmer at zoho.com>  Fri, 24 Apr 2015 21:16:12 +0100
 
diff --git a/debian/patches/series b/debian/patches/series
index d7373e1..071a992 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -7,3 +7,4 @@ reduce-notfound-output.patch
 default-to-full-precision.patch
 shared-llvm.patch
 builtin_pow-fix-spurious-failure.patch
+tgamma-accuracy.patch
diff --git a/debian/patches/tgamma-accuracy.patch b/debian/patches/tgamma-accuracy.patch
new file mode 100644
index 0000000..f9b9b03
--- /dev/null
+++ b/debian/patches/tgamma-accuracy.patch
@@ -0,0 +1,119 @@
+Description: Make tgamma meet the OpenCL accuracy standard
+
+tgamma=exp(lgamma) was not accurate enough for approx. x>8
+
+Author: Rebecca N. Palmer <rebecca_palmer at zoho.com>
+Forwarded: http://lists.freedesktop.org/archives/beignet/2015-April/005548.html
+
+--- beignet-1.0.2.orig/backend/src/libocl/tmpl/ocl_math.tmpl.cl
++++ beignet-1.0.2/backend/src/libocl/tmpl/ocl_math.tmpl.cl
+@@ -1746,12 +1746,6 @@ OVERLOADABLE float __gen_ocl_internal_ex
+   }
+ }
+ 
+-INLINE_OVERLOADABLE float tgamma(float x) {
+-  float y;
+-  int s;
+-  y=lgamma_r(x,&s);
+-  return __gen_ocl_internal_exp(y)*s;
+-}
+ 
+ /* erf,erfc from glibc s_erff.c -- float version of s_erf.c.
+  * Conversion to float by Ian Lance Taylor, Cygnus Support, ian at cygnus.com.
+@@ -3167,6 +3167,95 @@ OVERLOADABLE float __gen_ocl_internal_po
+   return sn*z;
+ }
+ 
++OVERLOADABLE float tgamma (float x)
++{/*based on glibc tgammaf*/
++  unsigned int hx;
++
++  GEN_OCL_GET_FLOAT_WORD(hx,x);
++
++  if (hx == 0xff800000)
++    {
++      /* x == -Inf.  According to ISO this is NaN.  */
++      return NAN;
++    }
++  if ((hx & 0x7f800000) == 0x7f800000)
++    {
++      /* Positive infinity (return positive infinity) or NaN (return
++	 NaN).  */
++      return x;
++    }
++  if (x < 0.0f && __gen_ocl_internal_floor (x) == x)
++    {
++      /* integer x < 0 */
++      return NAN;
++    }
++
++  if (x >= 36.0f)
++    {
++      /* Overflow.  */
++      return INFINITY;
++    }
++  else if (x <= 0.0f && x >= -FLT_EPSILON / 4.0f)
++    {
++      return 1.0f / x;
++    }
++  else
++    {
++      float sinpix = __gen_ocl_internal_sinpi(x);
++      if (x <= -42.0f)
++	/* Underflow.  */
++	{return 0.0f * sinpix /*for sign*/;}
++      int exp2_adj = 0;
++      float x_abs = __gen_ocl_fabs(x);
++      float gam0;
++      
++      if (x_abs < 4.0f) {
++        /* gamma = exp(lgamma) is only accurate for small lgamma */
++        float prod,x_adj;
++        if (x_abs < 0.5f) {
++          prod = 1.0f / x_abs;
++          x_adj = x_abs + 1.0f;
++        } else if (x_abs <= 1.5f) {
++          prod = 1.0f;
++          x_adj = x_abs;
++        } else if (x_abs < 2.5f) {
++          x_adj = x_abs - 1.0f;
++          prod = x_adj;
++        } else {
++          x_adj = x_abs - 2.0f;
++          prod = x_adj * (x_abs - 1.0f);
++        }
++        gam0 = __gen_ocl_internal_exp (lgamma (x_adj)) * prod;
++      }
++      else {
++        /* Compute gamma (X) using Stirling's approximation,
++  	 starting by computing pow (X, X) with a power of 2
++  	 factored out to avoid intermediate overflow.  */
++        float x_int = __gen_ocl_internal_round (x_abs);
++        float x_frac = x_abs - x_int;
++        int x_log2;
++        float x_mant = frexp (x_abs, &x_log2);
++        if (x_mant < M_SQRT1_2_F)
++          {
++          x_log2--;
++          x_mant *= 2.0f;
++          }
++        exp2_adj = x_log2 * (int) x_int;
++        float ret = (__gen_ocl_internal_pow(x_mant, x_abs)
++  		   * exp2 (x_log2 * x_frac)
++  		   * __gen_ocl_internal_exp (-x_abs)
++  		   * sqrt (2.0f * M_PI_F / x_abs) );
++  
++        float x2 = x_abs * x_abs;
++        float bsum = (0x3.403404p-12f / x2 -0xb.60b61p-12f) / x2 + 0x1.555556p-4f;
++        gam0 = ret + ret * __gen_ocl_internal_expm1 (bsum / x_abs);
++      }
++      if (x > 0.0f) {return __gen_ocl_internal_ldexp (gam0, exp2_adj);}
++      float gam1 = M_PI_F / (-x * sinpix * gam0);
++      return __gen_ocl_internal_ldexp (gam1, -exp2_adj);
++    }
++}
++
+ OVERLOADABLE float hypot(float x, float y) {
+   if (__ocl_math_fastpath_flag)
+     return __gen_ocl_internal_fastpath_hypot(x, y);
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-opencl/beignet.git