t2ex/bsd_source/lib/libc/src_bsd/math/s_fma.c | bare source | permlink (0.01 seconds) |
1: /* $OpenBSD: s_fma.c,v 1.1 2011/07/06 00:02:42 martynas Exp $ */ 2: 3: /*- 4: * Copyright (c) 2005 David Schultz <das@FreeBSD.ORG> 5: * All rights reserved. 6: * 7: * Redistribution and use in source and binary forms, with or without 8: * modification, are permitted provided that the following conditions 9: * are met: 10: * 1. Redistributions of source code must retain the above copyright 11: * notice, this list of conditions and the following disclaimer. 12: * 2. Redistributions in binary form must reproduce the above copyright 13: * notice, this list of conditions and the following disclaimer in the 14: * documentation and/or other materials provided with the distribution. 15: * 16: * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19: * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26: * SUCH DAMAGE. 27: */ 28: 29: /* LINTLIBRARY */ 30: 31: #include <sys/cdefs.h> 32: #if 0 33: __FBSDID("$FreeBSD: src/lib/msun/src/s_fma.c,v 1.5 2008/04/03 06:14:51 das Exp $"); 34: #endif 35: 36: #include <fenv.h> 37: #include <float.h> 38: #include <math.h> 39: 40: /* 41: * Fused multiply-add: Compute x * y + z with a single rounding error. 42: * 43: * We use scaling to avoid overflow/underflow, along with the 44: * canonical precision-doubling technique adapted from: 45: * 46: * Dekker, T. A Floating-Point Technique for Extending the 47: * Available Precision. Numer. Math. 18, 224-242 (1971). 48: * 49: * This algorithm is sensitive to the rounding precision. FPUs such 50: * as the i387 must be set in double-precision mode if variables are 51: * to be stored in FP registers in order to avoid incorrect results. 52: * This is the default on FreeBSD, but not on many other systems. 53: * 54: * Hardware instructions should be used on architectures that support it, 55: * since this implementation will likely be several times slower. 56: */ 57: #if LDBL_MANT_DIG != 113 58: double 59: fma(double x, double y, double z) 60: { 61: static const double split = 0x1p27 + 1.0; 62: double xs, ys, zs; 63: double c, cc, hx, hy, p, q, tx, ty; 64: double r, rr, s; 65: int oround; 66: int ex, ey, ez; 67: int spread; 68: 69: /* 70: * Handle special cases. The order of operations and the particular 71: * return values here are crucial in handling special cases involving 72: * infinities, NaNs, overflows, and signed zeroes correctly. 73: */ 74: if (x == 0.0 || y == 0.0) 75: return (x * y + z); 76: if (z == 0.0) 77: return (x * y); 78: if (!isfinite(x) || !isfinite(y)) 79: return (x * y + z); 80: if (!isfinite(z)) 81: return (z); 82: 83: xs = frexp(x, &ex); 84: ys = frexp(y, &ey); 85: zs = frexp(z, &ez); 86: oround = /*fegetround()*/FE_TONEAREST; 87: spread = ex + ey - ez; 88: 89: /* 90: * If x * y and z are many orders of magnitude apart, the scaling 91: * will overflow, so we handle these cases specially. Rounding 92: * modes other than FE_TONEAREST are painful. 93: */ 94: if (spread > DBL_MANT_DIG * 2) { 95: fenv_t env; 96: /*feraiseexcept(FE_INEXACT)*/; 97: switch(oround) { 98: case FE_TONEAREST: 99: return (x * y); 100: case FE_TOWARDZERO: 101: if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 102: return (x * y); 103: /*feholdexcept(&env)*/; 104: r = x * y; 105: if (!/*fetestexcept(FE_INEXACT)*/0) 106: r = nextafter(r, 0); 107: /*feupdateenv(&env)*/; 108: return (r); 109: case FE_DOWNWARD: 110: if (z > 0.0) 111: return (x * y); 112: /*feholdexcept(&env)*/; 113: r = x * y; 114: if (!/*fetestexcept(FE_INEXACT)*/0) 115: r = nextafter(r, -INFINITY); 116: /*feupdateenv(&env)*/; 117: return (r); 118: default: /* FE_UPWARD */ 119: if (z < 0.0) 120: return (x * y); 121: /*feholdexcept(&env)*/; 122: r = x * y; 123: if (!/*fetestexcept(FE_INEXACT)*/0) 124: r = nextafter(r, INFINITY); 125: /*feupdateenv(&env)*/; 126: return (r); 127: } 128: } 129: if (spread < -DBL_MANT_DIG) { 130: /*feraiseexcept(FE_INEXACT)*/; 131: if (!isnormal(z)) 132: /*feraiseexcept(FE_UNDERFLOW)*/; 133: switch (oround) { 134: case FE_TONEAREST: 135: return (z); 136: case FE_TOWARDZERO: 137: if (x > 0.0 ^ y < 0.0 ^ z < 0.0) 138: return (z); 139: else 140: return (nextafter(z, 0)); 141: case FE_DOWNWARD: 142: if (x > 0.0 ^ y < 0.0) 143: return (z); 144: else 145: return (nextafter(z, -INFINITY)); 146: default: /* FE_UPWARD */ 147: if (x > 0.0 ^ y < 0.0) 148: return (nextafter(z, INFINITY)); 149: else 150: return (z); 151: } 152: } 153: 154: /* 155: * Use Dekker's algorithm to perform the multiplication and 156: * subsequent addition in twice the machine precision. 157: * Arrange so that x * y = c + cc, and x * y + z = r + rr. 158: */ 159: /*fesetround(FE_TONEAREST)*/; 160: 161: p = xs * split; 162: hx = xs - p; 163: hx += p; 164: tx = xs - hx; 165: 166: p = ys * split; 167: hy = ys - p; 168: hy += p; 169: ty = ys - hy; 170: 171: p = hx * hy; 172: q = hx * ty + tx * hy; 173: c = p + q; 174: cc = p - c + q + tx * ty; 175: 176: zs = ldexp(zs, -spread); 177: r = c + zs; 178: s = r - c; 179: rr = (c - (r - s)) + (zs - s) + cc; 180: 181: spread = ex + ey; 182: if (spread + ilogb(r) > -1023) { 183: /*fesetround(oround)*/; 184: r = r + rr; 185: } else { 186: /* 187: * The result is subnormal, so we round before scaling to 188: * avoid double rounding. 189: */ 190: p = ldexp(copysign(0x1p-1022, r), -spread); 191: c = r + p; 192: s = c - r; 193: cc = (r - (c - s)) + (p - s) + rr; 194: /*fesetround(oround)*/; 195: r = (c + cc) - p; 196: } 197: return (ldexp(r, spread)); 198: } 199: #else /* LDBL_MANT_DIG == 113 */ 200: /* 201: * 113 bits of precision is more than twice the precision of a double, 202: * so it is enough to represent the intermediate product exactly. 203: */ 204: double 205: fma(double x, double y, double z) 206: { 207: return ((long double)x * y + z); 208: } 209: #endif /* LDBL_MANT_DIG != 113 */ 210: 211: #if LDBL_MANT_DIG == 53 212: #ifdef lint 213: /* PROTOLIB1 */ 214: long double fmal(long double, long double, long double); 215: #else /* lint */ 216: __weak_alias(fmal, fma); 217: #endif /* lint */ 218: #endif /* LDBL_MANT_DIG == 53 */