1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29 /*
  30  * __vsincosf: single precision vector sincos
  31  *
  32  * Algorithm:
  33  *
  34  * For |x| < pi/4, approximate sin(x) by a polynomial x+x*z*(S0+
  35  * z*(S1+z*S2)) and cos(x) by a polynomial 1+z*(-1/2+z*(C0+z*(C1+
  36  * z*C2))), where z = x*x, all evaluated in double precision.
  37  *
  38  * Accuracy:
  39  *
  40  * The largest error is less than 0.6 ulps.
  41  */
  42 
  43 #include <sys/isa_defs.h>
  44 
  45 #ifdef _LITTLE_ENDIAN
  46 #define HI(x)   *(1+(int *)&x)
  47 #define LO(x)   *(unsigned *)&x
  48 #else
  49 #define HI(x)   *(int *)&x
  50 #define LO(x)   *(1+(unsigned *)&x)
  51 #endif
  52 
  53 #ifdef __RESTRICT
  54 #define restrict _Restrict
  55 #else
  56 #define restrict
  57 #endif
  58 
  59 extern int __vlibm_rem_pio2m(double *, double *, int, int, int);
  60 
  61 static const double C[] = {
  62         -1.66666552424430847168e-01,    /* 2^ -3 * -1.5555460000000 */
  63         8.33219196647405624390e-03,     /* 2^ -7 *  1.11077E0000000 */
  64         -1.95187909412197768688e-04,    /* 2^-13 * -1.9956B60000000 */
  65         1.0,
  66         -0.5,
  67         4.16666455566883087158e-02,     /* 2^ -5 *  1.55554A0000000 */
  68         -1.38873036485165357590e-03,    /* 2^-10 * -1.6C0C1E0000000 */
  69         2.44309903791872784495e-05,     /* 2^-16 *  1.99E24E0000000 */
  70         0.636619772367581343075535,     /* 2^ -1  * 1.45F306DC9C883 */
  71         6755399441055744.0,             /* 2^ 52  * 1.8000000000000 */
  72         1.570796326734125614166,        /* 2^  0  * 1.921FB54400000 */
  73         6.077100506506192601475e-11,    /* 2^-34  * 1.0B4611A626331 */
  74 };
  75 
  76 #define S0      C[0]
  77 #define S1      C[1]
  78 #define S2      C[2]
  79 #define one     C[3]
  80 #define mhalf   C[4]
  81 #define C0      C[5]
  82 #define C1      C[6]
  83 #define C2      C[7]
  84 #define invpio2 C[8]
  85 #define c3two51 C[9]
  86 #define pio2_1  C[10]
  87 #define pio2_t  C[11]
  88 
  89 #define PREPROCESS(N, sindex, cindex, label)                            \
  90         hx = *(int *)x;                                                 \
  91         ix = hx & 0x7fffffff;                                               \
  92         t = *x;                                                         \
  93         x += stridex;                                                   \
  94         if (ix <= 0x3f490fdb) { /* |x| < pi/4 */                  \
  95                 if (ix == 0) {                                          \
  96                         s[sindex] = t;                                  \
  97                         c[cindex] = one;                                \
  98                         goto label;                                     \
  99                 }                                                       \
 100                 y##N = (double)t;                                       \
 101                 n##N = 0;                                               \
 102         } else if (ix <= 0x49c90fdb) { /* |x| < 2^19*pi */                \
 103                 y##N = (double)t;                                       \
 104                 medium = 1;                                             \
 105         } else {                                                        \
 106                 if (ix >= 0x7f800000) { /* inf or nan */             \
 107                         s[sindex] = c[cindex] = t / t;                  \
 108                         goto label;                                     \
 109                 }                                                       \
 110                 z##N = y##N = (double)t;                                \
 111                 hx = HI(y##N);                                          \
 112                 n##N = ((hx >> 20) & 0x7ff) - 1046;                   \
 113                 HI(z##N) = (hx & 0xfffff) | 0x41600000;                     \
 114                 n##N = __vlibm_rem_pio2m(&z##N, &y##N, n##N, 1, 0);     \
 115                 if (hx < 0) {                                                \
 116                         y##N = -y##N;                                   \
 117                         n##N = -n##N;                                   \
 118                 }                                                       \
 119                 z##N = y##N * y##N;                                     \
 120                 f##N = (float)(y##N + y##N * z##N * (S0 + z##N *        \
 121                     (S1 + z##N * S2)));                                 \
 122                 g##N = (float)(one + z##N * (mhalf + z##N * (C0 +       \
 123                     z##N * (C1 + z##N * C2))));                         \
 124                 if (n##N & 2) {                                             \
 125                         f##N = -f##N;                                   \
 126                         g##N = -g##N;                                   \
 127                 }                                                       \
 128                 if (n##N & 1) {                                             \
 129                         s[sindex] = g##N;                               \
 130                         c[cindex] = -f##N;                              \
 131                 } else {                                                \
 132                         s[sindex] = f##N;                               \
 133                         c[cindex] = g##N;                               \
 134                 }                                                       \
 135                 goto label;                                             \
 136         }
 137 
 138 #define PROCESS(N)                                                      \
 139         if (medium) {                                                   \
 140                 z##N = y##N * invpio2 + c3two51;                        \
 141                 n##N = LO(z##N);                                        \
 142                 z##N -= c3two51;                                        \
 143                 y##N = (y##N - z##N * pio2_1) - z##N * pio2_t;          \
 144         }                                                               \
 145         z##N = y##N * y##N;                                             \
 146         f##N = (float)(y##N + y##N * z##N * (S0 + z##N * (S1 + z##N * S2)));\
 147         g##N = (float)(one + z##N * (mhalf + z##N * (C0 + z##N *        \
 148             (C1 + z##N * C2))));                                        \
 149         if (n##N & 2) {                                                     \
 150                 f##N = -f##N;                                           \
 151                 g##N = -g##N;                                           \
 152         }                                                               \
 153         if (n##N & 1) {                                                     \
 154                 *s = g##N;                                              \
 155                 *c = -f##N;                                             \
 156         } else {                                                        \
 157                 *s = f##N;                                              \
 158                 *c = g##N;                                              \
 159         }                                                               \
 160         s += strides;                                                   \
 161         c += stridec
 162 
 163 void
 164 __vsincosf(int n, float *restrict x, int stridex,
 165     float *restrict s, int strides, float *restrict c, int stridec)
 166 {
 167         double          y0, y1, y2, y3;
 168         double          z0, z1, z2, z3;
 169         float           f0, f1, f2, f3, t;
 170         float           g0, g1, g2, g3;
 171         int             n0 = 0, n1 = 0, n2 = 0, n3, hx, ix, medium;
 172 
 173         s -= strides;
 174         c -= stridec;
 175 
 176         for (;;) {
 177 begin:
 178                 s += strides;
 179                 c += stridec;
 180 
 181                 if (--n < 0)
 182                         break;
 183 
 184                 medium = 0;
 185                 PREPROCESS(0, 0, 0, begin);
 186 
 187                 if (--n < 0)
 188                         goto process1;
 189 
 190                 PREPROCESS(1, strides, stridec, process1);
 191 
 192                 if (--n < 0)
 193                         goto process2;
 194 
 195                 PREPROCESS(2, (strides << 1), (stridec << 1), process2);
 196 
 197                 if (--n < 0)
 198                         goto process3;
 199 
 200                 PREPROCESS(3, (strides << 1) + strides,
 201                     (stridec << 1) + stridec, process3);
 202 
 203                 if (medium) {
 204                         z0 = y0 * invpio2 + c3two51;
 205                         z1 = y1 * invpio2 + c3two51;
 206                         z2 = y2 * invpio2 + c3two51;
 207                         z3 = y3 * invpio2 + c3two51;
 208 
 209                         n0 = LO(z0);
 210                         n1 = LO(z1);
 211                         n2 = LO(z2);
 212                         n3 = LO(z3);
 213 
 214                         z0 -= c3two51;
 215                         z1 -= c3two51;
 216                         z2 -= c3two51;
 217                         z3 -= c3two51;
 218 
 219                         y0 = (y0 - z0 * pio2_1) - z0 * pio2_t;
 220                         y1 = (y1 - z1 * pio2_1) - z1 * pio2_t;
 221                         y2 = (y2 - z2 * pio2_1) - z2 * pio2_t;
 222                         y3 = (y3 - z3 * pio2_1) - z3 * pio2_t;
 223                 }
 224 
 225                 z0 = y0 * y0;
 226                 z1 = y1 * y1;
 227                 z2 = y2 * y2;
 228                 z3 = y3 * y3;
 229 
 230                 f0 = (float)(y0 + y0 * z0 * (S0 + z0 * (S1 + z0 * S2)));
 231                 f1 = (float)(y1 + y1 * z1 * (S0 + z1 * (S1 + z1 * S2)));
 232                 f2 = (float)(y2 + y2 * z2 * (S0 + z2 * (S1 + z2 * S2)));
 233                 f3 = (float)(y3 + y3 * z3 * (S0 + z3 * (S1 + z3 * S2)));
 234 
 235                 g0 = (float)(one + z0 * (mhalf + z0 * (C0 + z0 *
 236                     (C1 + z0 * C2))));
 237                 g1 = (float)(one + z1 * (mhalf + z1 * (C0 + z1 *
 238                     (C1 + z1 * C2))));
 239                 g2 = (float)(one + z2 * (mhalf + z2 * (C0 + z2 *
 240                     (C1 + z2 * C2))));
 241                 g3 = (float)(one + z3 * (mhalf + z3 * (C0 + z3 *
 242                     (C1 + z3 * C2))));
 243 
 244                 if (n0 & 2) {
 245                         f0 = -f0;
 246                         g0 = -g0;
 247                 }
 248                 if (n1 & 2) {
 249                         f1 = -f1;
 250                         g1 = -g1;
 251                 }
 252                 if (n2 & 2) {
 253                         f2 = -f2;
 254                         g2 = -g2;
 255                 }
 256                 if (n3 & 2) {
 257                         f3 = -f3;
 258                         g3 = -g3;
 259                 }
 260 
 261                 if (n0 & 1) {
 262                         *s = g0;
 263                         *c = -f0;
 264                 } else {
 265                         *s = f0;
 266                         *c = g0;
 267                 }
 268                 s += strides;
 269                 c += stridec;
 270 
 271                 if (n1 & 1) {
 272                         *s = g1;
 273                         *c = -f1;
 274                 } else {
 275                         *s = f1;
 276                         *c = g1;
 277                 }
 278                 s += strides;
 279                 c += stridec;
 280 
 281                 if (n2 & 1) {
 282                         *s = g2;
 283                         *c = -f2;
 284                 } else {
 285                         *s = f2;
 286                         *c = g2;
 287                 }
 288                 s += strides;
 289                 c += stridec;
 290 
 291                 if (n3 & 1) {
 292                         *s = g3;
 293                         *c = -f3;
 294                 } else {
 295                         *s = f3;
 296                         *c = g3;
 297                 }
 298                 continue;
 299 
 300 process1:
 301                 PROCESS(0);
 302                 continue;
 303 
 304 process2:
 305                 PROCESS(0);
 306                 PROCESS(1);
 307                 continue;
 308 
 309 process3:
 310                 PROCESS(0);
 311                 PROCESS(1);
 312                 PROCESS(2);
 313         }
 314 }