1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 #include "libm_synonyms.h"
  32 #include "libm_inlines.h"
  33 
  34 #ifdef _LITTLE_ENDIAN
  35 #define HI(x)   *(1+(int*)x)
  36 #define LO(x)   *(unsigned*)x
  37 #else
  38 #define HI(x)   *(int*)x
  39 #define LO(x)   *(1+(unsigned*)x)
  40 #endif
  41 
  42 #ifdef __RESTRICT
  43 #define restrict _Restrict
  44 #else
  45 #define restrict
  46 #endif
  47 
  48 /* double hypot(double x, double y)
  49  *
  50  * Method :
  51  *      1. Special cases:
  52  *              x or y is +Inf or -Inf                          => +Inf
  53  *              x or y is NaN                                   => QNaN
  54  *      2. Computes hypot(x,y):
  55  *              hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
  56  *      Where:
  57  *              m = max(|x|,|y|)
  58  *              xnm = x * (1/m)
  59  *              ynm = y * (1/m)
  60  *
  61  *      Compute xnm * xnm + ynm * ynm by simulating
  62  *      muti-precision arithmetic.
  63  *
  64  * Accuracy:
  65  *      Maximum error observed: less than 0.872 ulp after 16.777.216.000
  66  *      results.
  67  */
  68 
  69 #define sqrt __sqrt
  70 
  71 extern double sqrt(double);
  72 extern double fabs(double);
  73 
  74 static const unsigned long long LCONST[] = {
  75 0x41b0000000000000ULL,  /* D2ON28 = 2 ** 28             */
  76 0x0010000000000000ULL,  /* D2ONM1022 = 2 ** -1022       */
  77 0x7fd0000000000000ULL   /* D2ONP1022 = 2 **  1022       */
  78 };
  79 
  80 static void
  81 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
  82         int stridey, double * restrict pz, int stridez);
  83 
  84 #pragma no_inline(__vhypot_n)
  85 
  86 #define RETURN(ret)                                             \
  87 {                                                               \
  88         *pz = (ret);                                            \
  89         py += stridey;                                          \
  90         pz += stridez;                                          \
  91         if (n_n == 0)                                           \
  92         {                                                       \
  93                 hx0 = HI(px);                                   \
  94                 hy0 = HI(py);                                   \
  95                 spx = px; spy = py; spz = pz;                   \
  96                 continue;                                       \
  97         }                                                       \
  98         n--;                                                    \
  99         break;                                                  \
 100 }
 101 
 102 void
 103 __vhypot(int n, double * restrict px, int stridex, double * restrict py,
 104         int stridey, double * restrict pz, int stridez)
 105 {
 106         int             hx0, hx1, hy0, j0, diff;
 107         double          x_hi, x_lo, y_hi, y_lo;
 108         double          scl = 0;
 109         double          x, y, res;
 110         double          *spx, *spy, *spz;
 111         int             n_n;
 112         double          D2ON28 = ((double*)LCONST)[0];          /* 2 ** 28      */
 113         double          D2ONM1022 = ((double*)LCONST)[1];       /* 2 **-1022    */
 114         double          D2ONP1022 = ((double*)LCONST)[2];       /* 2 ** 1022    */
 115 
 116         while (n > 1)
 117         {
 118                 n_n = 0;
 119                 spx = px;
 120                 spy = py;
 121                 spz = pz;
 122                 hx0 = HI(px);
 123                 hy0 = HI(py);
 124                 for (; n > 1 ; n--)
 125                 {
 126                         px += stridex;
 127                         hx0 &= 0x7fffffff;
 128                         hy0 &= 0x7fffffff;
 129 
 130                         if (hx0 >= 0x7fe00000)       /* |X| >= 2**1023 or Inf or NaN */
 131                         {
 132                                 diff = hy0 - hx0;
 133                                 j0 = diff >> 31;
 134                                 j0 = hy0 - (diff & j0);
 135                                 j0 &= 0x7ff00000;
 136                                 x = *(px - stridex);
 137                                 y = *py;
 138                                 x = fabs(x);
 139                                 y = fabs(y);
 140                                 if (j0 >= 0x7ff00000)        /* |X| or |Y| = Inf or NaN */
 141                                 {
 142                                         int lx = LO((px - stridex));
 143                                         int ly = LO(py);
 144                                         if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
 145                                         else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
 146                                         else res = x + y;
 147                                         RETURN (res)
 148                                 }
 149                                 else
 150                                 {
 151                                         j0 = diff >> 31;
 152                                         if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
 153                                         {
 154                                                 x *= D2ONM1022;
 155                                                 y *= D2ONM1022;
 156 
 157                                                 x_hi = (x + D2ON28) - D2ON28;
 158                                                 x_lo = x - x_hi;
 159                                                 y_hi = (y + D2ON28) - D2ON28;
 160                                                 y_lo = y - y_hi;
 161                                                 res = (x_hi * x_hi + y_hi * y_hi);
 162                                                 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
 163 
 164                                                 res = sqrt (res);
 165 
 166                                                 res = D2ONP1022 * res;
 167                                                 RETURN (res)
 168                                         }
 169                                         else RETURN (x + y)
 170                                 }
 171                         }
 172                         if (hy0 >= 0x7fe00000)       /* |Y| >= 2**1023 or Inf or NaN */
 173                         {
 174                                 diff = hy0 - hx0;
 175                                 j0 = diff >> 31;
 176                                 j0 = hy0 - (diff & j0);
 177                                 j0 &= 0x7ff00000;
 178                                 x = *(px - stridex);
 179                                 y = *py;
 180                                 x = fabs(x);
 181                                 y = fabs(y);
 182                                 if (j0 >= 0x7ff00000)        /* |X| or |Y| = Inf or NaN */
 183                                 {
 184                                         int lx = LO((px - stridex));
 185                                         int ly = LO(py);
 186                                         if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
 187                                         else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
 188                                         else res = x + y;
 189                                         RETURN (res)
 190                                 }
 191                                 else
 192                                 {
 193                                         j0 = diff >> 31;
 194                                         if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
 195                                         {
 196                                                 x *= D2ONM1022;
 197                                                 y *= D2ONM1022;
 198 
 199                                                 x_hi = (x + D2ON28) - D2ON28;
 200                                                 x_lo = x - x_hi;
 201                                                 y_hi = (y + D2ON28) - D2ON28;
 202                                                 y_lo = y - y_hi;
 203                                                 res = (x_hi * x_hi + y_hi * y_hi);
 204                                                 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
 205 
 206                                                 res = sqrt (res);
 207 
 208                                                 res = D2ONP1022 * res;
 209                                                 RETURN (res)
 210                                         }
 211                                         else RETURN (x + y)
 212                                 }
 213                         }
 214 
 215                         hx1 = HI(px);
 216 
 217                         if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */
 218                         {
 219                                 x = *(px - stridex);
 220                                 y = *py;
 221 
 222                                 x *= D2ONP1022;
 223                                 y *= D2ONP1022;
 224 
 225                                 x_hi = (x + D2ON28) - D2ON28;
 226                                 x_lo = x - x_hi;
 227                                 y_hi = (y + D2ON28) - D2ON28;
 228                                 y_lo = y - y_hi;
 229                                 res = (x_hi * x_hi + y_hi * y_hi);
 230                                 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
 231 
 232                                 res = sqrt(res);
 233 
 234                                 res = D2ONM1022 * res;
 235                                 RETURN (res)
 236                         }
 237 
 238                         hx0 = hx1;
 239                         py += stridey;
 240                         pz += stridez;
 241                         n_n++;
 242                         hy0 = HI(py);
 243                 }
 244                 if (n_n > 0)
 245                         __vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez);
 246         }
 247 
 248         if (n > 0)
 249         {
 250                 x = *px;
 251                 y = *py;
 252                 hx0 = HI(px);
 253                 hy0 = HI(py);
 254 
 255                 hx0 &= 0x7fffffff;
 256                 hy0 &= 0x7fffffff;
 257 
 258                 diff = hy0 - hx0;
 259                 j0 = diff >> 31;
 260                 j0 = hy0 - (diff & j0);
 261                 j0 &= 0x7ff00000;
 262 
 263                 if (j0 >= 0x7fe00000)        /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
 264                 {
 265                         x = fabs(x);
 266                         y = fabs(y);
 267                         if (j0 >= 0x7ff00000)        /* |X| or |Y| = Inf or NaN */
 268                         {
 269                                 int lx = LO(px);
 270                                 int ly = LO(py);
 271                                 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
 272                                 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
 273                                 else res = x + y;
 274                                 *pz = res;
 275                                 return;
 276                         }
 277                         else
 278                         {
 279                                 j0 = diff >> 31;
 280                                 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
 281                                 {
 282                                         x *= D2ONM1022;
 283                                         y *= D2ONM1022;
 284 
 285                                         x_hi = (x + D2ON28) - D2ON28;
 286                                         x_lo = x - x_hi;
 287                                         y_hi = (y + D2ON28) - D2ON28;
 288                                         y_lo = y - y_hi;
 289                                         res = (x_hi * x_hi + y_hi * y_hi);
 290                                         res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
 291 
 292                                         res = sqrt (res);
 293 
 294                                         res = D2ONP1022 * res;
 295                                         *pz = res;
 296                                         return;
 297                                 }
 298                                 else
 299                                 {
 300                                         *pz = x + y;
 301                                         return;
 302                                 }
 303                         }
 304                 }
 305 
 306                 if (j0 < 0x00100000) /* X and Y are subnormal */
 307                 {
 308                         x *= D2ONP1022;
 309                         y *= D2ONP1022;
 310 
 311                         x_hi = (x + D2ON28) - D2ON28;
 312                         x_lo = x - x_hi;
 313                         y_hi = (y + D2ON28) - D2ON28;
 314                         y_lo = y - y_hi;
 315                         res = (x_hi * x_hi + y_hi * y_hi);
 316                         res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
 317 
 318                         res = sqrt(res);
 319 
 320                         res = D2ONM1022 * res;
 321                         *pz = res;
 322                         return;
 323                 }
 324 
 325                 HI(&scl) = (0x7fe00000 - j0);
 326 
 327                 x *= scl;
 328                 y *= scl;
 329 
 330                 x_hi = (x + D2ON28) - D2ON28;
 331                 y_hi = (y + D2ON28) - D2ON28;
 332                 x_lo = x - x_hi;
 333                 y_lo = y - y_hi;
 334 
 335                 res = (x_hi * x_hi + y_hi * y_hi);
 336                 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
 337 
 338                 res = sqrt(res);
 339 
 340                 HI(&scl) = j0;
 341 
 342                 res = scl * res;
 343                 *pz = res;
 344         }
 345 }
 346 
 347 static void
 348 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
 349         int stridey, double * restrict pz, int stridez)
 350 {
 351         int             hx0, hy0, j0, diff0;
 352         double          x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
 353         double          x0, y0, res0;
 354         double          D2ON28 = ((double*)LCONST)[0];          /* 2 ** 28      */
 355 
 356         for(; n > 0 ; n--)
 357         {
 358                 x0 = *px;
 359                 y0 = *py;
 360                 hx0 = HI(px);
 361                 hy0 = HI(py);
 362 
 363                 hx0 &= 0x7fffffff;
 364                 hy0 &= 0x7fffffff;
 365 
 366                 diff0 = hy0 - hx0;
 367                 j0 = diff0 >> 31;
 368                 j0 = hy0 - (diff0 & j0);
 369                 j0 &= 0x7ff00000;
 370 
 371                 px += stridex;
 372                 py += stridey;
 373 
 374                 HI(&scl0) = (0x7fe00000 - j0);
 375 
 376                 x0 *= scl0;
 377                 y0 *= scl0;
 378 
 379                 x_hi0 = (x0 + D2ON28) - D2ON28;
 380                 y_hi0 = (y0 + D2ON28) - D2ON28;
 381                 x_lo0 = x0 - x_hi0;
 382                 y_lo0 = y0 - y_hi0;
 383 
 384                 res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
 385                 res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
 386 
 387                 res0 = sqrt(res0);
 388 
 389                 HI(&scl0) = j0;
 390 
 391                 res0 = scl0 * res0;
 392                 *pz = res0;
 393 
 394                 pz += stridez;
 395         }
 396 }
 397