Print this page




  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>

  31 
  32 #ifdef _LITTLE_ENDIAN
  33 #define HI(x)   *(1+(int*)x)
  34 #define LO(x)   *(unsigned*)x
  35 #else
  36 #define HI(x)   *(int*)x
  37 #define LO(x)   *(1+(unsigned*)x)
  38 #endif
  39 
  40 #ifdef __RESTRICT
  41 #define restrict _Restrict
  42 #else
  43 #define restrict
  44 #endif
  45 
  46 /*
  47  * vsincos.c
  48  *
  49  * Vector sine and cosine function.  Just slight modifications to vcos.c.
  50  */


  81  * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
  82  *
  83  * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
  84  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
  85  *
  86  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
  87  */
  88 void
  89 __vsincos( int n, double * restrict x, int stridex,
  90                                 double * restrict y, int stridey,
  91                                 double * restrict c, int stridec )
  92 {
  93         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
  94         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
  95         double          x0, x1, x2,
  96                                 *py0, *py1, *py2,
  97                                 *pc0, *pc1, *pc2,
  98                                 *xsave, *ysave, *csave;
  99         unsigned        hx0, hx1, hx2, xsb0, xsb1, xsb2;
 100         int                     i, biguns, nsave, sxsave, sysave, scsave;
 101 
 102         nsave = n;
 103         xsave = x;
 104         sxsave = stridex;
 105         ysave = y;
 106         sysave = stridey;
 107         csave = c;
 108         scsave = stridec;
 109         biguns = 0;
 110 
 111         do /* MAIN LOOP */
 112         {
 113 
 114                 /* Gotos here so _break_ exits MAIN LOOP. */
 115 LOOP0:  /* Find first arg in right range. */
 116                 xsb0 = HI(x); /* get most significant word */
 117                 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
 118                 if ( hx0 > 0x3fe921fb ) {
 119                         /* Too big: arg reduction needed, so leave for second part */
 120                         biguns = 1;
 121                         x += stridex;
 122                         y += stridey;
 123                         c += stridec;
 124                         i = 0;
 125                         if ( --n <= 0 )
 126                                 break;
 127                         goto LOOP0;
 128                 }
 129                 if ( hx0 < 0x3e400000 ) {
 130                         /* Too small.  cos x ~ 1, sin x ~ x. */
 131                         volatile int v = *x;
 132                         *c = 1.0;
 133                         *y = *x;
 134                         x += stridex;
 135                         y += stridey;
 136                         c += stridec;
 137                         i = 0;
 138                         if ( --n <= 0 )
 139                                 break;
 140                         goto LOOP0;
 141                 }
 142                 x0 = *x;
 143                 py0 = y;
 144                 pc0 = c;
 145                 x += stridex;
 146                 y += stridey;
 147                 c += stridec;
 148                 i = 1;
 149                 if ( --n <= 0 )
 150                         break;
 151 
 152 LOOP1: /* Get second arg, same as above. */
 153                 xsb1 = HI(x);
 154                 hx1 = xsb1 & ~0x80000000;
 155                 if ( hx1 > 0x3fe921fb )
 156                 {
 157                         biguns = 1;
 158                         x += stridex;
 159                         y += stridey;
 160                         c += stridec;
 161                         i = 1;
 162                         if ( --n <= 0 )
 163                                 break;
 164                         goto LOOP1;
 165                 }
 166                 if ( hx1 < 0x3e400000 )
 167                 {
 168                         volatile int v = *x;
 169                         *c = 1.0;
 170                         *y = *x;
 171                         x += stridex;
 172                         y += stridey;
 173                         c += stridec;
 174                         i = 1;
 175                         if ( --n <= 0 )
 176                                 break;
 177                         goto LOOP1;
 178                 }
 179                 x1 = *x;
 180                 py1 = y;
 181                 pc1 = c;
 182                 x += stridex;
 183                 y += stridey;
 184                 c += stridec;
 185                 i = 2;
 186                 if ( --n <= 0 )
 187                         break;
 188 
 189 LOOP2: /* Get third arg, same as above. */
 190                 xsb2 = HI(x);
 191                 hx2 = xsb2 & ~0x80000000;
 192                 if ( hx2 > 0x3fe921fb )
 193                 {
 194                         biguns = 1;
 195                         x += stridex;
 196                         y += stridey;
 197                         c += stridec;
 198                         i = 2;
 199                         if ( --n <= 0 )
 200                                 break;
 201                         goto LOOP2;
 202                 }
 203                 if ( hx2 < 0x3e400000 )
 204                 {
 205                         volatile int v = *x;
 206                         *c = 1.0;
 207                         *y = *x;
 208                         x += stridex;
 209                         y += stridey;
 210                         c += stridec;
 211                         i = 2;
 212                         if ( --n <= 0 )
 213                                 break;
 214                         goto LOOP2;
 215                 }
 216                 x2 = *x;
 217                 py2 = y;
 218                 pc2 = c;
 219 
 220                 /*
 221                  * 0x3fc40000 = 5/32 ~ 0.15625
 222                  * Get msb after subtraction.  Will be 1 only if
 223                  * hx0 - 5/32 is negative.
 224                  */
 225                 i = ( hx2 - 0x3fc40000 ) >> 31;




  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 #include <sys/ccompile.h>
  32 
  33 #ifdef _LITTLE_ENDIAN
  34 #define HI(x)   *(1+(int*)x)
  35 #define LO(x)   *(unsigned*)x
  36 #else
  37 #define HI(x)   *(int*)x
  38 #define LO(x)   *(1+(unsigned*)x)
  39 #endif
  40 
  41 #ifdef __RESTRICT
  42 #define restrict _Restrict
  43 #else
  44 #define restrict
  45 #endif
  46 
  47 /*
  48  * vsincos.c
  49  *
  50  * Vector sine and cosine function.  Just slight modifications to vcos.c.
  51  */


  82  * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
  83  *
  84  * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
  85  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
  86  *
  87  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
  88  */
  89 void
  90 __vsincos( int n, double * restrict x, int stridex,
  91                                 double * restrict y, int stridey,
  92                                 double * restrict c, int stridec )
  93 {
  94         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
  95         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
  96         double          x0, x1, x2,
  97                         *py0, *py1, *py2,
  98                         *pc0, *pc1, *pc2,
  99                         *xsave, *ysave, *csave;
 100         unsigned        hx0, hx1, hx2, xsb0, xsb1, xsb2;
 101         int             i, biguns, nsave, sxsave, sysave, scsave;

 102         nsave = n;
 103         xsave = x;
 104         sxsave = stridex;
 105         ysave = y;
 106         sysave = stridey;
 107         csave = c;
 108         scsave = stridec;
 109         biguns = 0;
 110 
 111         do /* MAIN LOOP */
 112         {
 113 
 114                 /* Gotos here so _break_ exits MAIN LOOP. */
 115 LOOP0:  /* Find first arg in right range. */
 116                 xsb0 = HI(x); /* get most significant word */
 117                 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
 118                 if ( hx0 > 0x3fe921fb ) {
 119                         /* Too big: arg reduction needed, so leave for second part */
 120                         biguns = 1;
 121                         x += stridex;
 122                         y += stridey;
 123                         c += stridec;
 124                         i = 0;
 125                         if ( --n <= 0 )
 126                                 break;
 127                         goto LOOP0;
 128                 }
 129                 if ( hx0 < 0x3e400000 ) {
 130                         /* Too small.  cos x ~ 1, sin x ~ x. */

 131                         *c = 1.0;
 132                         *y = *x;
 133                         x += stridex;
 134                         y += stridey;
 135                         c += stridec;
 136                         i = 0;
 137                         if ( --n <= 0 )
 138                                 break;
 139                         goto LOOP0;
 140                 }
 141                 x0 = *x;
 142                 py0 = y;
 143                 pc0 = c;
 144                 x += stridex;
 145                 y += stridey;
 146                 c += stridec;
 147                 i = 1;
 148                 if ( --n <= 0 )
 149                         break;
 150 
 151 LOOP1: /* Get second arg, same as above. */
 152                 xsb1 = HI(x);
 153                 hx1 = xsb1 & ~0x80000000;
 154                 if ( hx1 > 0x3fe921fb )
 155                 {
 156                         biguns = 1;
 157                         x += stridex;
 158                         y += stridey;
 159                         c += stridec;
 160                         i = 1;
 161                         if ( --n <= 0 )
 162                                 break;
 163                         goto LOOP1;
 164                 }
 165                 if ( hx1 < 0x3e400000 )
 166                 {

 167                         *c = 1.0;
 168                         *y = *x;
 169                         x += stridex;
 170                         y += stridey;
 171                         c += stridec;
 172                         i = 1;
 173                         if ( --n <= 0 )
 174                                 break;
 175                         goto LOOP1;
 176                 }
 177                 x1 = *x;
 178                 py1 = y;
 179                 pc1 = c;
 180                 x += stridex;
 181                 y += stridey;
 182                 c += stridec;
 183                 i = 2;
 184                 if ( --n <= 0 )
 185                         break;
 186 
 187 LOOP2: /* Get third arg, same as above. */
 188                 xsb2 = HI(x);
 189                 hx2 = xsb2 & ~0x80000000;
 190                 if ( hx2 > 0x3fe921fb )
 191                 {
 192                         biguns = 1;
 193                         x += stridex;
 194                         y += stridey;
 195                         c += stridec;
 196                         i = 2;
 197                         if ( --n <= 0 )
 198                                 break;
 199                         goto LOOP2;
 200                 }
 201                 if ( hx2 < 0x3e400000 )
 202                 {

 203                         *c = 1.0;
 204                         *y = *x;
 205                         x += stridex;
 206                         y += stridey;
 207                         c += stridec;
 208                         i = 2;
 209                         if ( --n <= 0 )
 210                                 break;
 211                         goto LOOP2;
 212                 }
 213                 x2 = *x;
 214                 py2 = y;
 215                 pc2 = c;
 216 
 217                 /*
 218                  * 0x3fc40000 = 5/32 ~ 0.15625
 219                  * Get msb after subtraction.  Will be 1 only if
 220                  * hx0 - 5/32 is negative.
 221                  */
 222                 i = ( hx2 - 0x3fc40000 ) >> 31;