webrev_693-libm-all-changes-since-original-2012-v02 Sdiff usr/src/lib/libmvec/common/_

Print this page

  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>

  31 
  32 #ifdef _LITTLE_ENDIAN
  33 #define HI(x)   *(1+(int*)x)
  34 #define LO(x)   *(unsigned*)x
  35 #else
  36 #define HI(x)   *(int*)x
  37 #define LO(x)   *(1+(unsigned*)x)
  38 #endif
  39 
  40 #ifdef __RESTRICT
  41 #define restrict _Restrict
  42 #else
  43 #define restrict
  44 #endif
  45 
  46 /*
  47  * vcos.1.c
  48  *
  49  * Vector cosine function.  Just slight modifications to vsin.8.c, mainly
  50  * in the primary range part.

  81 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
  82 
  83 /* Don't __ the following; acomp will handle it */
  84 extern double fabs( double );
  85 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
  86 
  87 /*
  88  * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
  89  *
  90  * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
  91  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
  92  *
  93  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
  94  */
  95 void
  96 __vcos( int n, double * restrict x, int stridex, double * restrict y,
  97         int stridey )
  98 {
  99         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
 100         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
 101         double          x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
 102         unsigned        hx0, hx1, hx2, xsb0, xsb1, xsb2;
 103         int                     i, biguns, nsave, sxsave, sysave;
 104 
 105         nsave = n;
 106         xsave = x;
 107         sxsave = stridex;
 108         ysave = y;
 109         sysave = stridey;
 110         biguns = 0;
 111 
 112         do /* MAIN LOOP */
 113         {
 114                 /* Gotos here so _break_ exits MAIN LOOP. */
 115 LOOP0:  /* Find first arg in right range. */
 116                 xsb0 = HI(x); /* get most significant word */
 117                 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
 118                 if ( hx0 > 0x3fe921fb ) {
 119                         /* Too big: arg reduction needed, so leave for second part */
 120                         biguns = 1;
 121                         goto MEDIUM;
 122                 }
 123                 if ( hx0 < 0x3e400000 ) {
 124                         /* Too small.  cos x ~ 1. */
 125                         volatile int v = *x;
 126                         *y = 1.0;
 127                         x += stridex;
 128                         y += stridey;
 129                         i = 0;
 130                         if ( --n <= 0 )
 131                                 break;
 132                         goto LOOP0;
 133                 }
 134                 x0 = *x;
 135                 py0 = y;
 136                 x += stridex;
 137                 y += stridey;
 138                 i = 1;
 139                 if ( --n <= 0 )
 140                         break;
 141 
 142 LOOP1: /* Get second arg, same as above. */
 143                 xsb1 = HI(x);
 144                 hx1 = xsb1 & ~0x80000000;
 145                 if ( hx1 > 0x3fe921fb )
 146                 {
 147                         biguns = 2;
 148                         goto MEDIUM;
 149                 }
 150                 if ( hx1 < 0x3e400000 )
 151                 {
 152                         volatile int v = *x;
 153                         *y = 1.0;
 154                         x += stridex;
 155                         y += stridey;
 156                         i = 1;
 157                         if ( --n <= 0 )
 158                                 break;
 159                         goto LOOP1;
 160                 }
 161                 x1 = *x;
 162                 py1 = y;
 163                 x += stridex;
 164                 y += stridey;
 165                 i = 2;
 166                 if ( --n <= 0 )
 167                         break;
 168 
 169 LOOP2: /* Get third arg, same as above. */
 170                 xsb2 = HI(x);
 171                 hx2 = xsb2 & ~0x80000000;
 172                 if ( hx2 > 0x3fe921fb )
 173                 {
 174                         biguns = 3;
 175                         goto MEDIUM;
 176                 }
 177                 if ( hx2 < 0x3e400000 )
 178                 {
 179                         volatile int v = *x;
 180                         *y = 1.0;
 181                         x += stridex;
 182                         y += stridey;
 183                         i = 2;
 184                         if ( --n <= 0 )
 185                                 break;
 186                         goto LOOP2;
 187                 }
 188                 x2 = *x;
 189                 py2 = y;
 190 
 191                 /*
 192                  * 0x3fc40000 = 5/32 ~ 0.15625
 193                  * Get msb after subtraction.  Will be 1 only if
 194                  * hx0 - 5/32 is negative.
 195                  */
 196                 i = ( hx0 - 0x3fc40000 ) >> 31;
 197                 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
 198                 i |= ( ( hx2 - 0x3fc40000 ) >> 29 ) & 4;
 199                 switch ( i )

  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 #include <sys/ccompile.h>
  32 
  33 #ifdef _LITTLE_ENDIAN
  34 #define HI(x)   *(1+(int*)x)
  35 #define LO(x)   *(unsigned*)x
  36 #else
  37 #define HI(x)   *(int*)x
  38 #define LO(x)   *(1+(unsigned*)x)
  39 #endif
  40 
  41 #ifdef __RESTRICT
  42 #define restrict _Restrict
  43 #else
  44 #define restrict
  45 #endif
  46 
  47 /*
  48  * vcos.1.c
  49  *
  50  * Vector cosine function.  Just slight modifications to vsin.8.c, mainly
  51  * in the primary range part.

  82 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
  83 
  84 /* Don't __ the following; acomp will handle it */
  85 extern double fabs( double );
  86 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
  87 
  88 /*
  89  * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
  90  *
  91  * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
  92  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
  93  *
  94  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
  95  */
  96 void
  97 __vcos( int n, double * restrict x, int stridex, double * restrict y,
  98         int stridey )
  99 {
 100         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
 101         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
 102         double          x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
 103         unsigned        hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
 104         int             i, biguns, nsave, sxsave, sysave;

 105         nsave = n;
 106         xsave = x;
 107         sxsave = stridex;
 108         ysave = y;
 109         sysave = stridey;
 110         biguns = 0;
 111 
 112         do /* MAIN LOOP */
 113         {
 114                 /* Gotos here so _break_ exits MAIN LOOP. */
 115 LOOP0:  /* Find first arg in right range. */
 116                 xsb0 = HI(x); /* get most significant word */
 117                 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
 118                 if ( hx0 > 0x3fe921fb ) {
 119                         /* Too big: arg reduction needed, so leave for second part */
 120                         biguns = 1;
 121                         goto MEDIUM;
 122                 }
 123                 if ( hx0 < 0x3e400000 ) {
 124                         /* Too small.  cos x ~ 1. */

 125                         *y = 1.0;
 126                         x += stridex;
 127                         y += stridey;
 128                         i = 0;
 129                         if ( --n <= 0 )
 130                                 break;
 131                         goto LOOP0;
 132                 }
 133                 x0 = *x;
 134                 py0 = y;
 135                 x += stridex;
 136                 y += stridey;
 137                 i = 1;
 138                 if ( --n <= 0 )
 139                         break;
 140 
 141 LOOP1: /* Get second arg, same as above. */
 142                 xsb1 = HI(x);
 143                 hx1 = xsb1 & ~0x80000000;
 144                 if ( hx1 > 0x3fe921fb )
 145                 {
 146                         biguns = 2;
 147                         goto MEDIUM;
 148                 }
 149                 if ( hx1 < 0x3e400000 )
 150                 {

 151                         *y = 1.0;
 152                         x += stridex;
 153                         y += stridey;
 154                         i = 1;
 155                         if ( --n <= 0 )
 156                                 break;
 157                         goto LOOP1;
 158                 }
 159                 x1 = *x;
 160                 py1 = y;
 161                 x += stridex;
 162                 y += stridey;
 163                 i = 2;
 164                 if ( --n <= 0 )
 165                         break;
 166 
 167 LOOP2: /* Get third arg, same as above. */
 168                 xsb2 = HI(x);
 169                 hx2 = xsb2 & ~0x80000000;
 170                 if ( hx2 > 0x3fe921fb )
 171                 {
 172                         biguns = 3;
 173                         goto MEDIUM;
 174                 }
 175                 if ( hx2 < 0x3e400000 )
 176                 {

 177                         *y = 1.0;
 178                         x += stridex;
 179                         y += stridey;
 180                         i = 2;
 181                         if ( --n <= 0 )
 182                                 break;
 183                         goto LOOP2;
 184                 }
 185                 x2 = *x;
 186                 py2 = y;
 187 
 188                 /*
 189                  * 0x3fc40000 = 5/32 ~ 0.15625
 190                  * Get msb after subtraction.  Will be 1 only if
 191                  * hx0 - 5/32 is negative.
 192                  */
 193                 i = ( hx0 - 0x3fc40000 ) >> 31;
 194                 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
 195                 i |= ( ( hx2 - 0x3fc40000 ) >> 29 ) & 4;
 196                 switch ( i )