gonzui


Format: Advanced Search

t2ex/bsd_source/lib/libc/src_bsd/math/e_sqrt.cbare sourcepermlink (0.04 seconds)

Search this content:

    1: /* @(#)e_sqrt.c 5.1 93/09/24 */
    2: /*
    3:  * ====================================================
    4:  * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
    5:  *
    6:  * Developed at SunPro, a Sun Microsystems, Inc. business.
    7:  * Permission to use, copy, modify, and distribute this
    8:  * software is freely granted, provided that this notice 
    9:  * is preserved.
   10:  * ====================================================
   11:  */
   12: 
   13: /* sqrt(x)
   14:  * Return correctly rounded sqrt.
   15:  *           ------------------------------------------
   16:  *           |  Use the hardware sqrt if you have one |
   17:  *           ------------------------------------------
   18:  * Method: 
   19:  *   Bit by bit method using integer arithmetic. (Slow, but portable) 
   20:  *   1. Normalization
   21:  *      Scale x to y in [1,4) with even powers of 2: 
   22:  *      find an integer k such that  1 <= (y=x*2^(2k)) < 4, then
   23:  *              sqrt(x) = 2^k * sqrt(y)
   24:  *   2. Bit by bit computation
   25:  *      Let q  = sqrt(y) truncated to i bit after binary point (q = 1),
   26:  *           i                                                        0
   27:  *                                     i+1         2
   28:  *          s  = 2*q , and   y  =  2   * ( y - q  ).           (1)
   29:  *           i      i            i                 i
   30:  *                                                        
   31:  *      To compute q    from q , one checks whether 
   32:  *                  i+1       i                       
   33:  *
   34:  *                            -(i+1) 2
   35:  *                      (q + 2      ) <= y.                        (2)
   36:  *                        i
   37:  *                                                            -(i+1)
   38:  *      If (2) is false, then q   = q ; otherwise q   = q  + 2      .
   39:  *                         i+1   i             i+1   i
   40:  *
   41:  *      With some algebric manipulation, it is not difficult to see
   42:  *      that (2) is equivalent to 
   43:  *                             -(i+1)
   44:  *                      s  +  2       <= y                 (3)
   45:  *                       i                i
   46:  *
   47:  *      The advantage of (3) is that s  and y  can be computed by 
   48:  *                                    i      i
   49:  *      the following recurrence formula:
   50:  *          if (3) is false
   51:  *
   52:  *          s     =  s  ,    y    = y   ;                       (4)
   53:  *           i+1      i               i+1    i
   54:  *
   55:  *          otherwise,
   56:  *                         -i                     -(i+1)
   57:  *          s          =  s  + 2  ,  y    = y  -  s  - 2            (5)
   58:  *           i+1      i          i+1    i     i
   59:  *                              
   60:  *      One may easily use induction to prove (4) and (5). 
   61:  *      Note. Since the left hand side of (3) contain only i+2 bits,
   62:  *            it does not necessary to do a full (53-bit) comparison 
   63:  *            in (3).
   64:  *   3. Final rounding
   65:  *      After generating the 53 bits result, we compute one more bit.
   66:  *      Together with the remainder, we can decide whether the
   67:  *      result is exact, bigger than 1/2ulp, or less than 1/2ulp
   68:  *      (it will never equal to 1/2ulp).
   69:  *      The rounding mode can be detected by checking whether
   70:  *      huge + tiny is equal to huge, and whether huge - tiny is
   71:  *      equal to huge for some floating point number "huge" and "tiny".
   72:  *              
   73:  * Special cases:
   74:  *      sqrt(+-0) = +-0      ... exact
   75:  *      sqrt(inf) = inf
   76:  *      sqrt(-ve) = NaN              ... with invalid signal
   77:  *      sqrt(NaN) = NaN              ... with invalid signal for signaling NaN
   78:  *
   79:  * Other methods : see the appended file at the end of the program below.
   80:  *---------------
   81:  */
   82: 
   83: /* LINTLIBRARY */
   84: 
   85: #include <sys/cdefs.h>
   86: #include <float.h>
   87: #include <math.h>
   88: 
   89: #include "math_private.h"
   90: 
   91: static  const double     one = 1.0, tiny=1.0e-300;
   92: 
   93: double
   94: sqrt(double x)
   95: {
   96:         double z;
   97:         int32_t sign = (int)0x80000000; 
   98:         int32_t ix0,s0,q,m,t,i;
   99:         u_int32_t r,t1,s1,ix1,q1;
  100: 
  101:         EXTRACT_WORDS(ix0,ix1,x);
  102: 
  103:     /* take care of Inf and NaN */
  104:         if((ix0&0x7ff00000)==0x7ff00000) {                     
  105:             return x*x+x;              /* sqrt(NaN)=NaN, sqrt(+inf)=+inf
  106:                                            sqrt(-inf)=sNaN */
  107:         } 
  108:     /* take care of zero */
  109:         if(ix0<=0) {
  110:             if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */
  111:             else if(ix0<0)
  112:                 return (x-x)/(x-x);           /* sqrt(-ve) = sNaN */
  113:         }
  114:     /* normalize x */
  115:         m = (ix0>>20);
  116:         if(m==0) {                             /* subnormal x */
  117:             while(ix0==0) {
  118:                 m -= 21;
  119:                 ix0 |= (ix1>>11); ix1 <<= 21;
  120:             }
  121:             for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1;
  122:             m -= i-1;
  123:             ix0 |= (ix1>>(32-i));
  124:             ix1 <<= i;
  125:         }
  126:         m -= 1023;     /* unbias exponent */
  127:         ix0 = (ix0&0x000fffff)|0x00100000;
  128:         if(m&1){       /* odd m, double x to make it even */
  129:             ix0 += ix0 + ((ix1&sign)>>31);
  130:             ix1 += ix1;
  131:         }
  132:         m >>= 1;       /* m = [m/2] */
  133: 
  134:     /* generate sqrt(x) bit by bit */
  135:         ix0 += ix0 + ((ix1&sign)>>31);
  136:         ix1 += ix1;
  137:         q = q1 = s0 = s1 = 0;  /* [q,q1] = sqrt(x) */
  138:         r = 0x00200000;                /* r = moving bit from right to left */
  139: 
  140:         while(r!=0) {
  141:             t = s0+r; 
  142:             if(t<=ix0) { 
  143:                 s0   = t+r; 
  144:                 ix0 -= t; 
  145:                 q   += r; 
  146:             } 
  147:             ix0 += ix0 + ((ix1&sign)>>31);
  148:             ix1 += ix1;
  149:             r>>=1;
  150:         }
  151: 
  152:         r = sign;
  153:         while(r!=0) {
  154:             t1 = s1+r; 
  155:             t  = s0;
  156:             if((t<ix0)||((t==ix0)&&(t1<=ix1))) { 
  157:                 s1  = t1+r;
  158:                 if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1;
  159:                 ix0 -= t;
  160:                 if (ix1 < t1) ix0 -= 1;
  161:                 ix1 -= t1;
  162:                 q1  += r;
  163:             }
  164:             ix0 += ix0 + ((ix1&sign)>>31);
  165:             ix1 += ix1;
  166:             r>>=1;
  167:         }
  168: 
  169:     /* use floating add to find out rounding direction */
  170:         if((ix0|ix1)!=0) {
  171:             z = one-tiny; /* trigger inexact flag */
  172:             if (z>=one) {
  173:                 z = one+tiny;
  174:                 if (q1==(u_int32_t)0xffffffff) { q1=0; q += 1;}
  175:                 else if (z>one) {
  176:                     if (q1==(u_int32_t)0xfffffffe) q+=1;
  177:                     q1+=2; 
  178:                 } else
  179:                     q1 += (q1&1);
  180:             }
  181:         }
  182:         ix0 = (q>>1)+0x3fe00000;
  183:         ix1 =  q1>>1;
  184:         if ((q&1)==1) ix1 |= sign;
  185:         ix0 += (m <<20);
  186:         INSERT_WORDS(z,ix0,ix1);
  187:         return z;
  188: }
  189: 
  190: /*
  191: Other methods  (use floating-point arithmetic)
  192: -------------
  193: (This is a copy of a drafted paper by Prof W. Kahan 
  194: and K.C. Ng, written in May, 1986)
  195: 
  196:         Two algorithms are given here to implement sqrt(x) 
  197:         (IEEE double precision arithmetic) in software.
  198:         Both supply sqrt(x) correctly rounded. The first algorithm (in
  199:         Section A) uses newton iterations and involves four divisions.
  200:         The second one uses reciproot iterations to avoid division, but
  201:         requires more multiplications. Both algorithms need the ability
  202:         to chop results of arithmetic operations instead of round them, 
  203:         and the INEXACT flag to indicate when an arithmetic operation
  204:         is executed exactly with no roundoff error, all part of the 
  205:         standard (IEEE 754-1985). The ability to perform shift, add,
  206:         subtract and logical AND operations upon 32-bit words is needed
  207:         too, though not part of the standard.
  208: 
  209: A.  sqrt(x) by Newton Iteration
  210: 
  211:    (1)  Initial approximation
  212: 
  213:         Let x0 and x1 be the leading and the trailing 32-bit words of
  214:         a floating point number x (in IEEE double format) respectively 
  215: 
  216:             1    11                 52                             ...widths
  217:            ------------------------------------------------------
  218:         x: |s|   e     |             f                               |
  219:            ------------------------------------------------------
  220:               msb    lsb  msb                                lsb ...order
  221: 
  222:  
  223:              ------------------------               ------------------------
  224:         x0:  |s|   e    |    f1     |   x1: |          f2           |
  225:              ------------------------               ------------------------
  226: 
  227:         By performing shifts and subtracts on x0 and x1 (both regarded
  228:         as integers), we obtain an 8-bit approximation of sqrt(x) as
  229:         follows.
  230: 
  231:                 k  := (x0>>1) + 0x1ff80000;
  232:                 y0 := k - T1[31&(k>>15)].     ... y ~ sqrt(x) to 8 bits
  233:         Here k is a 32-bit integer and T1[] is an integer array containing
  234:         correction terms. Now magically the floating value of y (y's
  235:         leading 32-bit word is y0, the value of its trailing word is 0)
  236:         approximates sqrt(x) to almost 8-bit.
  237: 
  238:         Value of T1:
  239:         static int T1[32]= {
  240:         0,     1024,       3062, 5746,   9193,     13348,      18162,       23592,
  241:         29598, 36145,  43202,   50740,    58733,     67158,      75992,       85215,
  242:         83599, 71378,  60428,   50647,    41945,     34246,      27478,       21581,
  243:         16499, 12183,  8588,    5674,      3403,        1742,  661,     130,};
  244: 
  245:     (2) Iterative refinement
  246: 
  247:         Apply Heron's rule three times to y, we have y approximates 
  248:         sqrt(x) to within 1 ulp (Unit in the Last Place):
  249: 
  250:                 y := (y+x/y)/2                ... almost 17 sig. bits
  251:                 y := (y+x/y)/2                ... almost 35 sig. bits
  252:                 y := y-(y-x/y)/2      ... within 1 ulp
  253: 
  254: 
  255:         Remark 1.
  256:             Another way to improve y to within 1 ulp is:
  257: 
  258:                 y := (y+x/y)          ... almost 17 sig. bits to 2*sqrt(x)
  259:                 y := y - 0x00100006   ... almost 18 sig. bits to sqrt(x)
  260: 
  261:                                 2
  262:                             (x-y )*y
  263:                 y := y + 2* ----------        ...within 1 ulp
  264:                                2
  265:                              3y  + x
  266: 
  267: 
  268:         This formula has one division fewer than the one above; however,
  269:         it requires more multiplications and additions. Also x must be
  270:         scaled in advance to avoid spurious overflow in evaluating the
  271:         expression 3y*y+x. Hence it is not recommended uless division
  272:         is slow. If division is very slow, then one should use the 
  273:         reciproot algorithm given in section B.
  274: 
  275:     (3) Final adjustment
  276: 
  277:         By twiddling y's last bit it is possible to force y to be 
  278:         correctly rounded according to the prevailing rounding mode
  279:         as follows. Let r and i be copies of the rounding mode and
  280:         inexact flag before entering the square root program. Also we
  281:         use the expression y+-ulp for the next representable floating
  282:         numbers (up and down) of y. Note that y+-ulp = either fixed
  283:         point y+-1, or multiply y by nextafter(1,+-inf) in chopped
  284:         mode.
  285: 
  286:                 I := FALSE;   ... reset INEXACT flag I
  287:                 R := RZ;      ... set rounding mode to round-toward-zero
  288:                 z := x/y;     ... chopped quotient, possibly inexact
  289:                 If(not I) then {      ... if the quotient is exact
  290:                     if(z=y) {
  291:                         I := i;        ... restore inexact flag
  292:                         R := r;  ... restore rounded mode
  293:                         return sqrt(x):=y.
  294:                     } else {
  295:                         z := z - ulp;        ... special rounding
  296:                     }
  297:                 }
  298:                 i := TRUE;            ... sqrt(x) is inexact
  299:                 If (r=RN) then z=z+ulp        ... rounded-to-nearest
  300:                 If (r=RP) then {      ... round-toward-+inf
  301:                     y = y+ulp; z=z+ulp;
  302:                 }
  303:                 y := y+z;             ... chopped sum
  304:                 y0:=y0-0x00100000;    ... y := y/2 is correctly rounded.
  305:                 I := i;                       ... restore inexact flag
  306:                 R := r;                ... restore rounded mode
  307:                 return sqrt(x):=y.
  308:                     
  309:     (4) Special cases
  310: 
  311:         Square root of +inf, +-0, or NaN is itself;
  312:         Square root of a negative number is NaN with invalid signal.
  313: 
  314: 
  315: B.  sqrt(x) by Reciproot Iteration
  316: 
  317:    (1)  Initial approximation
  318: 
  319:         Let x0 and x1 be the leading and the trailing 32-bit words of
  320:         a floating point number x (in IEEE double format) respectively
  321:         (see section A). By performing shifs and subtracts on x0 and y0,
  322:         we obtain a 7.8-bit approximation of 1/sqrt(x) as follows.
  323: 
  324:             k := 0x5fe80000 - (x0>>1);
  325:             y0:= k - T2[63&(k>>14)].   ... y ~ 1/sqrt(x) to 7.8 bits
  326: 
  327:         Here k is a 32-bit integer and T2[] is an integer array 
  328:         containing correction terms. Now magically the floating
  329:         value of y (y's leading 32-bit word is y0, the value of
  330:         its trailing word y1 is set to zero) approximates 1/sqrt(x)
  331:         to almost 7.8-bit.
  332: 
  333:         Value of T2:
  334:         static int T2[64]= {
  335:         0x1500,        0x2ef8,        0x4d67,        0x6b02,        0x87be,        0xa395,        0xbe7a,        0xd866,
  336:         0xf14a,        0x1091b,0x11fcd,0x13552,0x14999,0x15c98,0x16e34,0x17e5f,
  337:         0x18d03,0x19a01,0x1a545,0x1ae8a,0x1b5c4,0x1bb01,0x1bfde,0x1c28d,
  338:         0x1c2de,0x1c0db,0x1ba73,0x1b11c,0x1a4b5,0x1953d,0x18266,0x16be0,
  339:         0x1683e,0x179d8,0x18a4d,0x19992,0x1a789,0x1b445,0x1bf61,0x1c989,
  340:         0x1d16d,0x1d77b,0x1dddf,0x1e2ad,0x1e5bf,0x1e6e8,0x1e654,0x1e3cd,
  341:         0x1df2a,0x1d635,0x1cb16,0x1be2c,0x1ae4e,0x19bde,0x1868e,0x16e2e,
  342:         0x1527f,0x1334a,0x11051,0xe951,        0xbe01,        0x8e0d,        0x5924,        0x1edd,};
  343: 
  344:     (2) Iterative refinement
  345: 
  346:         Apply Reciproot iteration three times to y and multiply the
  347:         result by x to get an approximation z that matches sqrt(x)
  348:         to about 1 ulp. To be exact, we will have 
  349:                 -1ulp < sqrt(x)-z<1.0625ulp.
  350:         
  351:         ... set rounding mode to Round-to-nearest
  352:            y := y*(1.5-0.5*x*y*y)      ... almost 15 sig. bits to 1/sqrt(x)
  353:            y := y*((1.5-2^-30)+0.5*x*y*y)... about 29 sig. bits to 1/sqrt(x)
  354:         ... special arrangement for better accuracy
  355:            z := x*y                    ... 29 bits to sqrt(x), with z*y<1
  356:            z := z + 0.5*z*(1-z*y)      ... about 1 ulp to sqrt(x)
  357: 
  358:         Remark 2. The constant 1.5-2^-30 is chosen to bias the error so that
  359:         (a) the term z*y in the final iteration is always less than 1; 
  360:         (b) the error in the final result is biased upward so that
  361:                 -1 ulp < sqrt(x) - z < 1.0625 ulp
  362:             instead of |sqrt(x)-z|<1.03125ulp.
  363: 
  364:     (3) Final adjustment
  365: 
  366:         By twiddling y's last bit it is possible to force y to be 
  367:         correctly rounded according to the prevailing rounding mode
  368:         as follows. Let r and i be copies of the rounding mode and
  369:         inexact flag before entering the square root program. Also we
  370:         use the expression y+-ulp for the next representable floating
  371:         numbers (up and down) of y. Note that y+-ulp = either fixed
  372:         point y+-1, or multiply y by nextafter(1,+-inf) in chopped
  373:         mode.
  374: 
  375:         R := RZ;               ... set rounding mode to round-toward-zero
  376:         switch(r) {
  377:             case RN:           ... round-to-nearest
  378:                if(x<= z*(z-ulp)...chopped) z = z - ulp; else
  379:                if(x<= z*(z+ulp)...chopped) z = z; else z = z+ulp;
  380:                break;
  381:             case RZ:case RM:   ... round-to-zero or round-to--inf
  382:                R:=RP;          ... reset rounding mod to round-to-+inf
  383:                if(x<z*z ... rounded up) z = z - ulp; else
  384:                if(x>=(z+ulp)*(z+ulp) ...rounded up) z = z+ulp;
  385:                break;
  386:             case RP:           ... round-to-+inf
  387:                if(x>(z+ulp)*(z+ulp)...chopped) z = z+2*ulp; else
  388:                if(x>z*z ...chopped) z = z+ulp;
  389:                break;
  390:         }
  391: 
  392:         Remark 3. The above comparisons can be done in fixed point. For
  393:         example, to compare x and w=z*z chopped, it suffices to compare
  394:         x1 and w1 (the trailing parts of x and w), regarding them as
  395:         two's complement integers.
  396: 
  397:         ...Is z an exact square root?
  398:         To determine whether z is an exact square root of x, let z1 be the
  399:         trailing part of z, and also let x0 and x1 be the leading and
  400:         trailing parts of x.
  401: 
  402:         If ((z1&0x03ffffff)!=0)        ... not exact if trailing 26 bits of z!=0
  403:             I := 1;            ... Raise Inexact flag: z is not exact
  404:         else {
  405:             j := 1 - [(x0>>20)&1]      ... j = logb(x) mod 2
  406:             k := z1 >> 26;             ... get z's 25-th and 26-th 
  407:                                             fraction bits
  408:             I := i or (k&j) or ((k&(j+j+1))!=(x1&3));
  409:         }
  410:         R:= r          ... restore rounded mode
  411:         return sqrt(x):=z.
  412: 
  413:         If multiplication is cheaper then the foregoing red tape, the 
  414:         Inexact flag can be evaluated by
  415: 
  416:             I := i;
  417:             I := (z*z!=x) or I.
  418: 
  419:         Note that z*z can overwrite I; this value must be sensed if it is 
  420:         True.
  421: 
  422:         Remark 4. If z*z = x exactly, then bit 25 to bit 0 of z1 must be
  423:         zero.
  424: 
  425:                     --------------------
  426:                 z1: |        f2        | 
  427:                     --------------------
  428:                 bit 31                   bit 0
  429: 
  430:         Further more, bit 27 and 26 of z1, bit 0 and 1 of x1, and the odd
  431:         or even of logb(x) have the following relations:
  432: 
  433:         -------------------------------------------------
  434:         bit 27,26 of z1                bit 1,0 of x1 logb(x)
  435:         -------------------------------------------------
  436:         00                     00                odd and even
  437:         01                     01                even
  438:         10                     10                odd
  439:         10                     00                even
  440:         11                     01                even
  441:         -------------------------------------------------
  442: 
  443:     (4) Special cases (see (4) of Section A).   
  444:  
  445:  */
  446: 
  447: #if     LDBL_MANT_DIG == 53
  448: #ifdef  lint
  449: /* PROTOLIB1 */
  450: long double sqrtl(long double);
  451: #else   /* lint */
  452: __weak_alias(sqrtl, sqrt);
  453: #endif  /* lint */
  454: #endif  /* LDBL_MANT_DIG == 53 */