gonzui


Format: Advanced Search

t2ex/bsd_source/lib/libc/src_bsd/math/s_fma.cbare sourcepermlink (0.04 seconds)

Search this content:

    1: /*      $OpenBSD: s_fma.c,v 1.1 2011/07/06 00:02:42 martynas Exp $   */
    2: 
    3: /*-
    4:  * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG>
    5:  * All rights reserved.
    6:  *
    7:  * Redistribution and use in source and binary forms, with or without
    8:  * modification, are permitted provided that the following conditions
    9:  * are met:
   10:  * 1. Redistributions of source code must retain the above copyright
   11:  *    notice, this list of conditions and the following disclaimer.
   12:  * 2. Redistributions in binary form must reproduce the above copyright
   13:  *    notice, this list of conditions and the following disclaimer in the
   14:  *    documentation and/or other materials provided with the distribution.
   15:  *
   16:  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17:  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18:  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19:  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
   20:  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21:  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22:  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23:  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24:  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25:  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26:  * SUCH DAMAGE.
   27:  */
   28: 
   29: /* LINTLIBRARY */
   30: 
   31: #include <sys/cdefs.h>
   32: #if 0
   33: __FBSDID("$FreeBSD: src/lib/msun/src/s_fma.c,v 1.5 2008/04/03 06:14:51 das Exp $");
   34: #endif
   35: 
   36: #include <fenv.h>
   37: #include <float.h>
   38: #include <math.h>
   39: 
   40: /*
   41:  * Fused multiply-add: Compute x * y + z with a single rounding error.
   42:  *
   43:  * We use scaling to avoid overflow/underflow, along with the
   44:  * canonical precision-doubling technique adapted from:
   45:  *
   46:  *      Dekker, T.  A Floating-Point Technique for Extending the
   47:  *      Available Precision.  Numer. Math. 18, 224-242 (1971).
   48:  *
   49:  * This algorithm is sensitive to the rounding precision.  FPUs such
   50:  * as the i387 must be set in double-precision mode if variables are
   51:  * to be stored in FP registers in order to avoid incorrect results.
   52:  * This is the default on FreeBSD, but not on many other systems.
   53:  *
   54:  * Hardware instructions should be used on architectures that support it,
   55:  * since this implementation will likely be several times slower.
   56:  */
   57: #if LDBL_MANT_DIG != 113
   58: double
   59: fma(double x, double y, double z)
   60: {
   61:         static const double split = 0x1p27 + 1.0;
   62:         double xs, ys, zs;
   63:         double c, cc, hx, hy, p, q, tx, ty;
   64:         double r, rr, s;
   65:         int oround;
   66:         int ex, ey, ez;
   67:         int spread;
   68: 
   69:         /*
   70:          * Handle special cases. The order of operations and the particular
   71:          * return values here are crucial in handling special cases involving
   72:          * infinities, NaNs, overflows, and signed zeroes correctly.
   73:          */
   74:         if (x == 0.0 || y == 0.0)
   75:                 return (x * y + z);
   76:         if (z == 0.0)
   77:                 return (x * y);
   78:         if (!isfinite(x) || !isfinite(y))
   79:                 return (x * y + z);
   80:         if (!isfinite(z))
   81:                 return (z);
   82: 
   83:         xs = frexp(x, &ex);
   84:         ys = frexp(y, &ey);
   85:         zs = frexp(z, &ez);
   86:         oround = /*fegetround()*/FE_TONEAREST;
   87:         spread = ex + ey - ez;
   88: 
   89:         /*
   90:          * If x * y and z are many orders of magnitude apart, the scaling
   91:          * will overflow, so we handle these cases specially.  Rounding
   92:          * modes other than FE_TONEAREST are painful.
   93:          */
   94:         if (spread > DBL_MANT_DIG * 2) {
   95:                 fenv_t env;
   96:                 /*feraiseexcept(FE_INEXACT)*/;
   97:                 switch(oround) {
   98:                 case FE_TONEAREST:
   99:                         return (x * y);
  100:                 case FE_TOWARDZERO:
  101:                         if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
  102:                                 return (x * y);
  103:                         /*feholdexcept(&env)*/;
  104:                         r = x * y;
  105:                         if (!/*fetestexcept(FE_INEXACT)*/0)
  106:                                 r = nextafter(r, 0);
  107:                         /*feupdateenv(&env)*/;
  108:                         return (r);
  109:                 case FE_DOWNWARD:
  110:                         if (z > 0.0)
  111:                                 return (x * y);
  112:                         /*feholdexcept(&env)*/;
  113:                         r = x * y;
  114:                         if (!/*fetestexcept(FE_INEXACT)*/0)
  115:                                 r = nextafter(r, -INFINITY);
  116:                         /*feupdateenv(&env)*/;
  117:                         return (r);
  118:                 default:      /* FE_UPWARD */
  119:                         if (z < 0.0)
  120:                                 return (x * y);
  121:                         /*feholdexcept(&env)*/;
  122:                         r = x * y;
  123:                         if (!/*fetestexcept(FE_INEXACT)*/0)
  124:                                 r = nextafter(r, INFINITY);
  125:                         /*feupdateenv(&env)*/;
  126:                         return (r);
  127:                 }
  128:         }
  129:         if (spread < -DBL_MANT_DIG) {
  130:                 /*feraiseexcept(FE_INEXACT)*/;
  131:                 if (!isnormal(z))
  132:                         /*feraiseexcept(FE_UNDERFLOW)*/;
  133:                 switch (oround) {
  134:                 case FE_TONEAREST:
  135:                         return (z);
  136:                 case FE_TOWARDZERO:
  137:                         if (x > 0.0 ^ y < 0.0 ^ z < 0.0)
  138:                                 return (z);
  139:                         else
  140:                                 return (nextafter(z, 0));
  141:                 case FE_DOWNWARD:
  142:                         if (x > 0.0 ^ y < 0.0)
  143:                                 return (z);
  144:                         else
  145:                                 return (nextafter(z, -INFINITY));
  146:                 default:      /* FE_UPWARD */
  147:                         if (x > 0.0 ^ y < 0.0)
  148:                                 return (nextafter(z, INFINITY));
  149:                         else
  150:                                 return (z);
  151:                 }
  152:         }
  153: 
  154:         /*
  155:          * Use Dekker's algorithm to perform the multiplication and
  156:          * subsequent addition in twice the machine precision.
  157:          * Arrange so that x * y = c + cc, and x * y + z = r + rr.
  158:          */
  159:         /*fesetround(FE_TONEAREST)*/;
  160: 
  161:         p = xs * split;
  162:         hx = xs - p;
  163:         hx += p;
  164:         tx = xs - hx;
  165: 
  166:         p = ys * split;
  167:         hy = ys - p;
  168:         hy += p;
  169:         ty = ys - hy;
  170: 
  171:         p = hx * hy;
  172:         q = hx * ty + tx * hy;
  173:         c = p + q;
  174:         cc = p - c + q + tx * ty;
  175: 
  176:         zs = ldexp(zs, -spread);
  177:         r = c + zs;
  178:         s = r - c;
  179:         rr = (c - (r - s)) + (zs - s) + cc;
  180: 
  181:         spread = ex + ey;
  182:         if (spread + ilogb(r) > -1023) {
  183:                 /*fesetround(oround)*/;
  184:                 r = r + rr;
  185:         } else {
  186:                 /*
  187:                  * The result is subnormal, so we round before scaling to
  188:                  * avoid double rounding.
  189:                  */
  190:                 p = ldexp(copysign(0x1p-1022, r), -spread);
  191:                 c = r + p;
  192:                 s = c - r;
  193:                 cc = (r - (c - s)) + (p - s) + rr;
  194:                 /*fesetround(oround)*/;
  195:                 r = (c + cc) - p;
  196:         }
  197:         return (ldexp(r, spread));
  198: }
  199: #else   /* LDBL_MANT_DIG == 113 */
  200: /*
  201:  * 113 bits of precision is more than twice the precision of a double,
  202:  * so it is enough to represent the intermediate product exactly.
  203:  */
  204: double
  205: fma(double x, double y, double z)
  206: {
  207:         return ((long double)x * y + z);
  208: }
  209: #endif  /* LDBL_MANT_DIG != 113 */
  210: 
  211: #if     LDBL_MANT_DIG == 53
  212: #ifdef  lint
  213: /* PROTOLIB1 */
  214: long double fmal(long double, long double, long double);
  215: #else   /* lint */
  216: __weak_alias(fmal, fma);
  217: #endif  /* lint */
  218: #endif  /* LDBL_MANT_DIG == 53 */