1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vexpf.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  64
  35 !!  2^(i/256) - ((i & 0xf0) << 44), i = [0, 255]
  36 .CONST_TBL:
  37         .word   0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf
  38         .word   0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281
  39         .word   0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc
  40         .word   0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1
  41         .word   0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89
  42         .word   0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836
  43         .word   0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0
  44         .word   0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919
  45         .word   0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85
  46         .word   0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec
  47         .word   0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5
  48         .word   0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e
  49         .word   0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6
  50         .word   0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab
  51         .word   0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e
  52         .word   0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2
  53         .word   0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0
  54         .word   0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f
  55         .word   0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c
  56         .word   0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b
  57         .word   0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027
  58         .word   0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d
  59         .word   0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819
  60         .word   0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1
  61         .word   0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a
  62         .word   0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75
  63         .word   0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29
  64         .word   0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70
  65         .word   0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13
  66         .word   0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f
  67         .word   0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589
  68         .word   0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b
  69         .word   0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd
  70         .word   0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32
  71         .word   0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d
  72         .word   0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b
  73         .word   0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a
  74         .word   0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef
  75         .word   0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4
  76         .word   0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173
  77         .word   0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175
  78         .word   0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024
  79         .word   0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a
  80         .word   0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4
  81         .word   0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232
  82         .word   0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237
  83         .word   0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2
  84         .word   0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7
  85         .word   0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114
  86         .word   0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff
  87         .word   0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee
  88         .word   0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef
  89         .word   0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27
  90         .word   0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2
  91         .word   0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf
  92         .word   0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc
  93         .word   0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03
  94         .word   0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93
  95         .word   0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71
  96         .word   0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4
  97         .word   0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd
  98         .word   0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7
  99         .word   0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6
 100         .word   0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538
 101         .word   0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e
 102         .word   0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645
 103         .word   0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5
 104         .word   0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87
 105         .word   0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a
 106         .word   0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd
 107         .word   0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09
 108         .word   0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6
 109         .word   0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb
 110         .word   0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0
 111         .word   0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491
 112         .word   0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9
 113         .word   0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7
 114         .word   0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21
 115         .word   0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436
 116         .word   0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f
 117         .word   0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778
 118         .word   0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9
 119         .word   0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a
 120         .word   0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2
 121         .word   0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5
 122         .word   0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3
 123         .word   0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2
 124         .word   0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d
 125         .word   0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5
 126         .word   0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e
 127         .word   0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb
 128         .word   0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8
 129         .word   0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052
 130         .word   0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59
 131         .word   0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba
 132         .word   0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774
 133         .word   0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff
 134         .word   0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952
 135         .word   0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1
 136         .word   0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a
 137         .word   0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4
 138         .word   0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f
 139         .word   0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207
 140         .word   0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d
 141         .word   0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c
 142         .word   0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22
 143         .word   0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933
 144         .word   0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db
 145         .word   0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675
 146         .word   0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74
 147         .word   0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968
 148         .word   0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6
 149         .word   0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3
 150         .word   0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075
 151         .word   0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315
 152         .word   0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658
 153         .word   0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17
 154         .word   0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12
 155         .word   0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76
 156         .word   0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740
 157         .word   0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e
 158         .word   0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510
 159         .word   0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a
 160         .word   0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274
 161         .word   0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8
 162         .word   0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89
 163         .word   0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514
 164         .word   0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9
 165 
 166         .word   0x7149f2ca, 0x0da24260  ! 1.0e30f, 1.0e-30f
 167         .word   0x3ecebfbe, 0x9d182250  ! KA2 = 3.66556671660783833261e-06
 168         .word   0x3f662e43, 0xe2528362  ! KA1 = 2.70760782821392980564e-03
 169         .word   0x40771547, 0x652b82fe  ! K256ONLN2 = 369.3299304675746271
 170         .word   0x42aeac4f, 0x42b17218  ! THRESHOLD = 87.3365402f
 171                                         ! THRESHOLDL = 88.7228394f
 172 ! local storage indices
 173 
 174 #define tmp0            STACK_BIAS-32
 175 #define tmp1            STACK_BIAS-28
 176 #define tmp2            STACK_BIAS-24
 177 #define tmp3            STACK_BIAS-20
 178 #define tmp4            STACK_BIAS-16
 179 #define tmp5            STACK_BIAS-12
 180 #define tmp6            STACK_BIAS-8
 181 #define tmp7            STACK_BIAS-4
 182 
 183 ! sizeof temp storage - must be a multiple of 16 for V9
 184 #define tmps            0x20
 185 
 186 #define I5_THRESHOLD    %i5
 187 #define G1_CONST_TBL    %g5
 188 #define G5_CONST        %g1
 189 
 190 #define F62_K256ONLN2   %f62
 191 #define F60_KA2         %f60
 192 #define F58_KA1         %f58
 193 
 194 #define THRESHOLDL      %f0
 195 
 196 ! register use
 197 ! i0  n
 198 ! i1  x
 199 ! i2  stridex
 200 ! i3  y
 201 ! i4  stridey
 202 
 203 ! i5  0x42aeac4f (87.3365402f)
 204 
 205 ! g1  CONST_TBL
 206 ! g5  0x7fffffff
 207 
 208 ! f62 K256ONLN2 = 369.3299304675746271
 209 ! f60 KA2 = 3.66556671660783833261e-06
 210 ! f58 KA1 = 2.70760782821392980564e-03
 211 
 212 
 213 !               !!!!!  Algorithm  !!!!!
 214 !
 215 !  double y, dtmp, drez;
 216 !  int k, sign, Xi;
 217 !  float X, Y;
 218 !  int THRESHOLD = 0x42aeac4f; /* 87.3365402f */
 219 !  float THRESHOLDL = 88.7228394f;
 220 !  double KA2 = 3.66556671660783833261e-06;
 221 !  double KA1 = 2.70760782821392980564e-03;
 222 !  double K256ONLN2 = 369.3299304675746271;
 223 !  char *CONST_TBL;
 224 !
 225 !  X  = px[0];
 226 !  Xi = ((int*)px)[0];
 227 !  ax = Xi & 0x7fffffff;
 228 !
 229 !  if (ax > THRESHOLD) {
 230 !    sign = ((unsigned)Xi >> 29) & 4;
 231 !    if (ax >= 0x7f800000) {      /* Inf or NaN */
 232 !      if (ax > 0x7f800000) {     /* NaN */
 233 !        Y = X * X;               /* NaN -> NaN */
 234 !        return Y;
 235 !      }
 236 !      Y = (sign) ? zero : X;     /* +Inf -> +Inf , -Inf -> zero */
 237 !      return Y;
 238 !    }
 239 !
 240 !    if ( X < 0.0f || X >= THRESHOLDL ) {
 241 !      Y = ((float*)(CONST_TBL + 2048 + sign))[0];
 242 !         /* Xi >= THRESHOLDL : Y = 1.0e+30f */
 243 !         /* Xi < -THRESHOLD  : Y = 1.0e-30f */
 244 !      Y =  Y * Y;
 245 !         /* Xi >= THRESHOLDL : +Inf + overflow  */
 246 !         /* Xi < -THRESHOLD  : +0 + underflow */
 247 !      return Y;
 248 !    }
 249 !  }
 250 !  vis_write_gsr(12 << 3);
 251 !  y = (double) X;
 252 !  y = K256ONLN2 * y;
 253 !  k = (int) y;
 254 !  dtmp = (double) k;
 255 !  y -= dtmp;
 256 !  dtmp = y * KA2;
 257 !  dtmp += KA1;
 258 !  y *= dtmp;
 259 !  y = (y * KA2 + KA1) * y;
 260 !  ((int*)&drez)[0] = k;
 261 !  ((int*)&drez)[1] = 0;
 262 !  ((float*)&drez)[0] = vis_fpackfix(drez);
 263 !  k &= 255;
 264 !  k <<= 3;
 265 !  dtmp = ((double*)(CONST_TBL + k))[0];
 266 !  drez = vis_fpadd32(drez,dtmp);
 267 !  y *= drez;
 268 !  y += drez;
 269 !  Y = (float) y;
 270 !
 271 !
 272 !  fstod %f16,%f40                      ! y = (double) X
 273 !  fmuld F62_K256ONLN2,%f40,%f40        ! y *= K256ONLN2
 274 !  fdtoi %f40,%f16                      ! k = (int) y
 275 !  st  %f16,[%fp+tmp0]                  ! store k
 276 !  fitod %f16,%f34                      ! dtmp = (double) k
 277 !  fpackfix  %f16,%f16                  ! ((float*)&drez)[0] = vis_fpackfix(drez)
 278 !  fsubd %f40,%f34,%f40                 ! y -= dtmp
 279 !  fmuld F60_KA2,%f40,%f34              ! dtmp = y * KA2
 280 !  faddd F58_KA1,%f34,%f34              ! dtmp += KA1
 281 !  ld  [%fp+tmp0],%o0                   ! load k
 282 !  fmuld %f34,%f40,%f40                 ! y *= dtmp
 283 !  and %o0,255,%o0                      ! k &= 255
 284 !  sll  %o0,3,%o0                       ! k <<= 3
 285 !  ldd [G1_CONST_TBL+%o0],%f34          ! dtmp = ((double*)(CONST_TBL + k))[0]
 286 !  fpadd32 %f16,%f34,%f34               ! drez = vis_fpadd32(drez,dtmp)
 287 !  fmuld %f34,%f40,%f40                 ! y *= drez
 288 !  faddd %f34,%f40,%f40                 ! y += drez
 289 !  fdtos %f40,%f26                      ! (float) y
 290 !--------------------------------------------------------------------
 291 
 292         ENTRY(__vexpf)
 293         save    %sp,-SA(MINFRAME)-tmps,%sp
 294         PIC_SETUP(l7)
 295         PIC_SET(l7,.CONST_TBL,g5)
 296 
 297         wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
 298         wr      %g0,0x60,%gsr
 299 
 300         sll     %i2,2,%i2
 301         sll     %i4,2,%i4
 302 
 303         ldd     [G1_CONST_TBL+2056],F60_KA2
 304         sethi   %hi(0x7ffffc00),G5_CONST
 305         ldd     [G1_CONST_TBL+2064],F58_KA1
 306         add     G5_CONST,1023,G5_CONST
 307         ldd     [G1_CONST_TBL+2072],F62_K256ONLN2
 308         ld      [G1_CONST_TBL+2080],I5_THRESHOLD
 309         ld      [G1_CONST_TBL+2084],THRESHOLDL
 310 
 311         subcc   %i0,8,%i0
 312         bneg,pn %icc,.tail
 313         fzeros  %f3
 314 
 315 .main_loop_preload:
 316 
 317 ! preload 8 elements and get absolute values
 318         ld      [%i1],%l0               ! (0) Xi = ((int*)px)[0]
 319         fzeros  %f5
 320         ld      [%i1],%f16              ! (0) X = px[0]
 321         fzeros  %f7
 322         add     %i1,%i2,%o5             ! px += stridex
 323         ld      [%o5],%l1               ! (1) Xi = ((int*)px)[0]
 324         and     %l0,G5_CONST,%l0        ! (0) ax = Xi & 0x7fffffff
 325         fzeros  %f9
 326         ld      [%o5],%f2               ! (1) X = px[0]
 327         fzeros  %f11
 328         add     %o5,%i2,%i1             ! px += stridex
 329         ld      [%i1],%l2               ! (2) Xi = ((int*)px)[0]
 330         and     %l1,G5_CONST,%l1        ! (1) ax = Xi & 0x7fffffff
 331         fzeros  %f13
 332         ld      [%i1],%f4               ! (2) X = px[0]
 333         fzeros  %f15
 334         add     %i1,%i2,%o5             ! px += stridex
 335         ld      [%o5],%l3               ! (3) Xi = ((int*)px)[0]
 336         and     %l2,G5_CONST,%l2        ! (2) ax = Xi & 0x7fffffff
 337         fzeros  %f17
 338         ld      [%o5],%f6               ! (3) X = px[0]
 339         add     %o5,%i2,%o0             ! px += stridex
 340         ld      [%o0],%l4               ! (4) Xi = ((int*)px)[0]
 341         and     %l3,G5_CONST,%l3        ! (3) ax = Xi & 0x7fffffff
 342         add     %o0,%i2,%o1             ! px += stridex
 343         ld      [%o1],%l5               ! (5) Xi = ((int*)px)[0]
 344         add     %o1,%i2,%o2             ! px += stridex
 345         ld      [%o2],%l6               ! (6) Xi = ((int*)px)[0]
 346         and     %l4,G5_CONST,%l4        ! (4) ax = Xi & 0x7fffffff
 347         add     %o2,%i2,%o3             ! px += stridex
 348         ld      [%o3],%l7               ! (7) Xi = ((int*)px)[0]
 349         add     %o3,%i2,%i1             ! px += stridex
 350         and     %l5,G5_CONST,%l5        ! (5) ax = Xi & 0x7fffffff
 351         and     %l6,G5_CONST,%l6        ! (6) ax = Xi & 0x7fffffff
 352         ba      .main_loop
 353         and     %l7,G5_CONST,%l7        ! (7) ax = Xi & 0x7fffffff
 354 
 355         .align  16
 356 .main_loop:
 357         cmp     %l0,I5_THRESHOLD
 358         bg,pn   %icc,.spec0             ! (0) if (ax > THRESHOLD)
 359         lda     [%o0]%asi,%f8           ! (4) X = px[0]
 360         fstod   %f16,%f40               ! (0) y = (double) X
 361 .spec0_cont:
 362         cmp     %l1,I5_THRESHOLD
 363         bg,pn   %icc,.spec1             ! (1) if (ax > THRESHOLD)
 364         lda     [%o1]%asi,%f10          ! (5) X = px[0]
 365         fstod   %f2,%f42                ! (1) y = (double) X
 366 .spec1_cont:
 367         cmp     %l2,I5_THRESHOLD
 368         bg,pn   %icc,.spec2             ! (2) if (ax > THRESHOLD)
 369         lda     [%o2]%asi,%f12          ! (6) X = px[0]
 370         fstod   %f4,%f44                ! (2) y = (double) X
 371 .spec2_cont:
 372         cmp     %l3,I5_THRESHOLD
 373         bg,pn   %icc,.spec3             ! (3) if (ax > THRESHOLD)
 374         lda     [%o3]%asi,%f14          ! (7) X = px[0]
 375         fstod   %f6,%f46                ! (3) y = (double) X
 376 .spec3_cont:
 377         cmp     %l4,I5_THRESHOLD
 378         bg,pn   %icc,.spec4             ! (4) if (ax > THRESHOLD)
 379         fmuld   F62_K256ONLN2,%f40,%f40 ! (0) y *= K256ONLN2
 380         fstod   %f8,%f48                ! (4) y = (double) X
 381 .spec4_cont:
 382         cmp     %l5,I5_THRESHOLD
 383         bg,pn   %icc,.spec5             ! (5) if (ax > THRESHOLD)
 384         fmuld   F62_K256ONLN2,%f42,%f42 ! (1) y *= K256ONLN2
 385         fstod   %f10,%f50               ! (5) y = (double) X
 386 .spec5_cont:
 387         cmp     %l6,I5_THRESHOLD
 388         bg,pn   %icc,.spec6             ! (6) if (ax > THRESHOLD)
 389         fmuld   F62_K256ONLN2,%f44,%f44 ! (2) y *= K256ONLN2
 390         fstod   %f12,%f52               ! (6) y = (double) X
 391 .spec6_cont:
 392         cmp     %l7,I5_THRESHOLD
 393         bg,pn   %icc,.spec7             ! (7) if (ax > THRESHOLD)
 394         fmuld   F62_K256ONLN2,%f46,%f46 ! (3) y *= K256ONLN2
 395         fstod   %f14,%f54               ! (7) y = (double) X
 396 .spec7_cont:
 397         fdtoi   %f40,%f16               ! (0) k = (int) y
 398         st      %f16,[%fp+tmp0]
 399         fmuld   F62_K256ONLN2,%f48,%f48 ! (4) y *= K256ONLN2
 400 
 401         fdtoi   %f42,%f2                ! (1) k = (int) y
 402         st      %f2,[%fp+tmp1]
 403         fmuld   F62_K256ONLN2,%f50,%f50 ! (5) y *= K256ONLN2
 404 
 405         fdtoi   %f44,%f4                ! (2) k = (int) y
 406         st      %f4,[%fp+tmp2]
 407         fmuld   F62_K256ONLN2,%f52,%f52 ! (6) y *= K256ONLN2
 408 
 409         fdtoi   %f46,%f6                ! (3) k = (int) y
 410         st      %f6,[%fp+tmp3]
 411         fmuld   F62_K256ONLN2,%f54,%f54 ! (7) y *= K256ONLN2
 412 
 413         fdtoi   %f48,%f8                ! (4) k = (int) y
 414         st      %f8,[%fp+tmp4]
 415 
 416         fdtoi   %f50,%f10               ! (5) k = (int) y
 417         st      %f10,[%fp+tmp5]
 418 
 419         fitod   %f16,%f34               ! (0) dtmp = (double) k
 420         fpackfix        %f16,%f16       ! (0) ((float*)&drez)[0] = vis_fpackfix(drez)
 421         nop
 422         nop
 423 
 424         fdtoi   %f52,%f12               ! (6) k = (int) y
 425         st      %f12,[%fp+tmp6]
 426 
 427         fdtoi   %f54,%f14               ! (7) k = (int) y
 428         st      %f14,[%fp+tmp7]
 429 
 430         lda     [%i1]%asi,%l0           ! (8) Xi = ((int*)px)[0]
 431         add     %i1,%i2,%o5             ! px += stridex
 432         fitod   %f2,%f18                ! (1) dtmp = (double) k
 433         fpackfix        %f2,%f2         ! (1) ((float*)&drez)[0] = vis_fpackfix(drez)
 434 
 435         lda     [%o5]%asi,%l1           ! (9) Xi = ((int*)px)[0]
 436         add     %o5,%i2,%i1             ! px += stridex
 437         fitod   %f4,%f20                ! (2) dtmp = (double) k
 438         fpackfix        %f4,%f4         ! (2) ((float*)&drez)[0] = vis_fpackfix(drez)
 439 
 440         lda     [%i1]%asi,%l2           ! (10) Xi = ((int*)px)[0]
 441         add     %i1,%i2,%o5             ! px += stridex
 442         fitod   %f6,%f22                ! (3) dtmp = (double) k
 443         fpackfix        %f6,%f6         ! (3) ((float*)&drez)[0] = vis_fpackfix(drez)
 444 
 445         lda     [%o5]%asi,%l3           ! (11) Xi = ((int*)px)[0]
 446         add     %o5,%i2,%i1             ! px += stridex
 447         fitod   %f8,%f24                ! (4) dtmp = (double) k
 448         fpackfix        %f8,%f8         ! (4) ((float*)&drez)[0] = vis_fpackfix(drez)
 449 
 450         fitod   %f10,%f26               ! (5) dtmp = (double) k
 451         fpackfix        %f10,%f10       ! (5) ((float*)&drez)[0] = vis_fpackfix(drez)
 452 
 453         fitod   %f12,%f28               ! (6) dtmp = (double) k
 454         fpackfix        %f12,%f12       ! (6) ((float*)&drez)[0] = vis_fpackfix(drez)
 455 
 456         fitod   %f14,%f30               ! (7) dtmp = (double) k
 457         fpackfix        %f14,%f14       ! (7) ((float*)&drez)[0] = vis_fpackfix(drez)
 458 
 459         ld      [%fp+tmp0],%o0          ! (0) load k
 460         and     %l0,G5_CONST,%l0        ! (8) ax = Xi & 0x7fffffff
 461         fsubd   %f40,%f34,%f40          ! (0) y -= dtmp
 462 
 463         ld      [%fp+tmp1],%o1          ! (1) load k
 464         and     %l1,G5_CONST,%l1        ! (9) ax = Xi & 0x7fffffff
 465         fsubd   %f42,%f18,%f42          ! (1) y -= dtmp
 466 
 467         ld      [%fp+tmp2],%o2          ! (2) load k
 468         and     %l2,G5_CONST,%l2        ! (10) ax = Xi & 0x7fffffff
 469         and     %o0,255,%o0             ! (0) k &= 255
 470         fsubd   %f44,%f20,%f44          ! (2) y -= dtmp
 471 
 472         ld      [%fp+tmp3],%o3          ! (3) load k
 473         and     %o1,255,%o1             ! (1) k &= 255
 474         fsubd   %f46,%f22,%f46          ! (3) y -= dtmp
 475 
 476         sll     %o0,3,%o0               ! (0) k <<= 3
 477         sll     %o1,3,%o1               ! (1) k <<= 3
 478         fmuld   F60_KA2,%f40,%f34       ! (0) dtmp = y * KA2
 479         fsubd   %f48,%f24,%f48          ! (4) y -= dtmp
 480 
 481         and     %l3,G5_CONST,%l3        ! (11) ax = Xi & 0x7fffffff
 482         and     %o2,255,%o2             ! (2) k &= 255
 483         fmuld   F60_KA2,%f42,%f18       ! (1) dtmp = y * KA2
 484         fsubd   %f50,%f26,%f50          ! (5) y -= dtmp
 485 
 486         sll     %o2,3,%o2               ! (2) k <<= 3
 487         fmuld   F60_KA2,%f44,%f20       ! (2) dtmp = y * KA2
 488         fsubd   %f52,%f28,%f52          ! (6) y -= dtmp
 489 
 490         ld      [%fp+tmp4],%o4          ! (4) load k
 491         and     %o3,255,%o3             ! (3) k &= 255
 492         fmuld   F60_KA2,%f46,%f22       ! (3) dtmp = y * KA2
 493         fsubd   %f54,%f30,%f54          ! (7) y -= dtmp
 494 
 495         ld      [%fp+tmp5],%o5          ! (5) load k
 496         sll     %o3,3,%o3               ! (3) k <<= 3
 497         fmuld   F60_KA2,%f48,%f24       ! (4) dtmp = y * KA2
 498         faddd   F58_KA1,%f34,%f34       ! (0) dtmp += KA1
 499 
 500         ld      [%fp+tmp6],%o7          ! (6) load k
 501         and     %o4,255,%o4             ! (4) k &= 255
 502         fmuld   F60_KA2,%f50,%f26       ! (5) dtmp = y * KA2
 503         faddd   F58_KA1,%f18,%f18       ! (1) dtmp += KA1
 504 
 505         ld      [%fp+tmp7],%l4          ! (7) load k
 506         and     %o5,255,%o5             ! (5) k &= 255
 507         fmuld   F60_KA2,%f52,%f28       ! (6) dtmp = y * KA2
 508         faddd   F58_KA1,%f20,%f20       ! (2) dtmp += KA1
 509 
 510         sll     %o5,3,%o5               ! (5) k <<= 3
 511         fmuld   F60_KA2,%f54,%f30       ! (7) dtmp = y * KA2
 512         faddd   F58_KA1,%f22,%f22       ! (3) dtmp += KA1
 513 
 514         fmuld   %f34,%f40,%f40          ! (0) y *= dtmp
 515         ldd     [G1_CONST_TBL+%o0],%f34 ! (0) dtmp = ((double*)(CONST_TBL + k))[0]
 516         and     %l4,255,%l4             ! (7) k &= 255
 517         faddd   F58_KA1,%f24,%f24       ! (4) dtmp += KA1
 518 
 519         fmuld   %f18,%f42,%f42          ! (1) y *= dtmp
 520         ldd     [G1_CONST_TBL+%o1],%f18 ! (1) dtmp = ((double*)(CONST_TBL + k))[0]
 521         sll     %l4,3,%l4               ! (7) k <<= 3
 522         faddd   F58_KA1,%f26,%f26       ! (5) dtmp += KA1
 523 
 524         fmuld   %f20,%f44,%f44          ! (2) y *= dtmp
 525         ldd     [G1_CONST_TBL+%o2],%f20 ! (2) dtmp = ((double*)(CONST_TBL + k))[0]
 526         faddd   F58_KA1,%f28,%f28       ! (6) dtmp += KA1
 527 
 528         fmuld   %f22,%f46,%f46          ! (3) y *= dtmp
 529         ldd     [G1_CONST_TBL+%o3],%f22 ! (3) dtmp = ((double*)(CONST_TBL + k))[0]
 530         sll     %o4,3,%o4               ! (4) k <<= 3
 531         faddd   F58_KA1,%f30,%f30       ! (7) dtmp += KA1
 532 
 533         fmuld   %f24,%f48,%f48          ! (4) y *= dtmp
 534         ldd     [G1_CONST_TBL+%o4],%f24 ! (4) dtmp = ((double*)(CONST_TBL + k))[0]
 535         and     %o7,255,%o7             ! (6) k &= 255
 536         fpadd32 %f16,%f34,%f34          ! (0) drez = vis_fpadd32(drez,dtmp)
 537 
 538         fmuld   %f26,%f50,%f50          ! (5) y *= dtmp
 539         ldd     [G1_CONST_TBL+%o5],%f26 ! (5) dtmp = ((double*)(CONST_TBL + k))[0]
 540         sll     %o7,3,%o7               ! (6) k <<= 3
 541         fpadd32 %f2,%f18,%f18           ! (1) drez = vis_fpadd32(drez,dtmp)
 542 
 543         fmuld   %f28,%f52,%f52          ! (6) y *= dtmp
 544         ldd     [G1_CONST_TBL+%o7],%f28 ! (6) dtmp = ((double*)(CONST_TBL + k))[0]
 545         sll     %i2,2,%o0
 546         fpadd32 %f4,%f20,%f20           ! (2) drez = vis_fpadd32(drez,dtmp)
 547 
 548         fmuld   %f30,%f54,%f54          ! (7) y *= dtmp
 549         ldd     [G1_CONST_TBL+%l4],%f30 ! (7) dtmp = ((double*)(CONST_TBL + k))[0]
 550         sub     %i1,%o0,%o0
 551         fpadd32 %f6,%f22,%f22           ! (3) drez = vis_fpadd32(drez,dtmp)
 552 
 553         lda     [%i1]%asi,%l4           ! (12) Xi = ((int*)px)[0]
 554         add     %i1,%i2,%o1             ! px += stridex
 555         fpadd32 %f8,%f24,%f24           ! (4) drez = vis_fpadd32(drez,dtmp)
 556         fmuld   %f34,%f40,%f40          ! (0) y *= drez
 557 
 558         lda     [%o1]%asi,%l5           ! (13) Xi = ((int*)px)[0]
 559         add     %o1,%i2,%o2             ! px += stridex
 560         fpadd32 %f10,%f26,%f26          ! (5)  drez = vis_fpadd32(drez,dtmp)
 561         fmuld   %f18,%f42,%f42          ! (1)  y *= drez
 562 
 563         lda     [%o2]%asi,%l6           ! (14) Xi = ((int*)px)[0]
 564         add     %o2,%i2,%o3             ! px += stridex
 565         fpadd32 %f12,%f28,%f28          ! (6)  drez = vis_fpadd32(drez,dtmp)
 566         fmuld   %f20,%f44,%f44          ! (2)  y *= drez
 567 
 568         lda     [%o3]%asi,%l7           ! (15) Xi = ((int*)px)[0]
 569         add     %o3,%i2,%i1             ! px += stridex
 570         fpadd32 %f14,%f30,%f30          ! (7)  drez = vis_fpadd32(drez,dtmp)
 571         fmuld   %f22,%f46,%f46          ! (3)  y *= drez
 572 
 573         lda     [%o0]%asi,%f16          ! (8)  X = px[0]
 574         add     %o0,%i2,%o5
 575         fmuld   %f24,%f48,%f48          ! (4)  y *= drez
 576         faddd   %f34,%f40,%f40          ! (0)  y += drez
 577 
 578         lda     [%o5]%asi,%f2           ! (9)  X = px[0]
 579         add     %o5,%i2,%o0
 580         fmuld   %f26,%f50,%f50          ! (5)  y *= drez
 581         faddd   %f18,%f42,%f42          ! (1)  y += drez
 582 
 583         lda     [%o0]%asi,%f4           ! (10) X = px[0]
 584         add     %o0,%i2,%o5
 585         fmuld   %f28,%f52,%f52          ! (6)  y *= drez
 586         faddd   %f20,%f44,%f44          ! (2)  y += drez
 587 
 588         lda     [%o5]%asi,%f6           ! (11) X = px[0]
 589         add     %o5,%i2,%o0
 590         fmuld   %f30,%f54,%f54          ! (7)  y *= drez
 591         faddd   %f22,%f46,%f46          ! (3)  y += drez
 592 
 593         and     %l4,G5_CONST,%l4        ! (12) ax = Xi & 0x7fffffff
 594         faddd   %f24,%f48,%f48          ! (4)  y += drez
 595 
 596         and     %l5,G5_CONST,%l5        ! (13) ax = Xi & 0x7fffffff
 597         faddd   %f26,%f50,%f50          ! (5)  y += drez
 598 
 599         and     %l6,G5_CONST,%l6        ! (14) ax = Xi & 0x7fffffff
 600         faddd   %f28,%f52,%f52          ! (6)  y += drez
 601 
 602         and     %l7,G5_CONST,%l7        ! (15) ax = Xi & 0x7fffffff
 603         faddd   %f30,%f54,%f54          ! (7)  y += drez
 604 
 605         fdtos   %f40,%f26               ! (0) (float) y
 606         st      %f26,[%i3]
 607         add     %i3,%i4,%o4             ! py += stridey
 608 
 609         fdtos   %f42,%f18               ! (1) (float) y
 610         st      %f18,[%o4]
 611         add     %o4,%i4,%i3             ! py += stridey
 612 
 613         fdtos   %f44,%f20               ! (2) (float) y
 614         st      %f20,[%i3]
 615         add     %i3,%i4,%o4             ! py += stridey
 616 
 617         fdtos   %f46,%f22               ! (3) (float) y
 618         st      %f22,[%o4]
 619         add     %o4,%i4,%i3             ! py += stridey
 620 
 621         fdtos   %f48,%f24               ! (4) (float) y
 622         st      %f24,[%i3]
 623         subcc   %i0,8,%i0
 624         add     %i3,%i4,%o4             ! py += stridey
 625 
 626         fdtos   %f50,%f26               ! (5) (float) y
 627         st      %f26,[%o4]
 628         add     %o4,%i4,%o5             ! py += stridey
 629         add     %i4,%i4,%o7
 630 
 631         fdtos   %f52,%f28               ! (6) (float) y
 632         st      %f28,[%o5]
 633         add     %o5,%i4,%o4             ! py += stridey
 634         add     %o5,%o7,%i3             ! py += stridey
 635 
 636         fdtos   %f54,%f30               ! (7) (float) y
 637         st      %f30,[%o4]
 638         bpos,pt %icc,.main_loop
 639         nop
 640 .after_main_loop:
 641         sll     %i2,3,%o2
 642         sub     %i1,%o2,%i1
 643 
 644 .tail:
 645         add     %i0,8,%i0
 646         subcc   %i0,1,%i0
 647         bneg,pn %icc,.exit
 648 
 649         ld      [%i1],%l0
 650         ld      [%i1],%f2
 651         add     %i1,%i2,%i1
 652 
 653 .tail_loop:
 654         and     %l0,G5_CONST,%l1
 655         cmp     %l1,I5_THRESHOLD
 656         bg,pn   %icc,.tail_spec
 657         nop
 658 .tail_spec_cont:
 659         fstod   %f2,%f40
 660         fmuld   F62_K256ONLN2,%f40,%f40
 661         fdtoi   %f40,%f2
 662         st      %f2,[%fp+tmp0]
 663         fitod   %f2,%f16
 664         fpackfix        %f2,%f2
 665         fsubd   %f40,%f16,%f40
 666         fmuld   F60_KA2,%f40,%f16
 667         faddd   F58_KA1,%f16,%f16
 668         ld      [%fp+tmp0],%o0
 669         fmuld   %f16,%f40,%f40
 670         and     %o0,255,%o0
 671         sll     %o0,3,%o0
 672         ldd     [G1_CONST_TBL+%o0],%f16
 673         fpadd32 %f2,%f16,%f16
 674         lda     [%i1]%asi,%l0
 675         fmuld   %f16,%f40,%f40
 676         lda     [%i1]%asi,%f2
 677         faddd   %f16,%f40,%f40
 678         add     %i1,%i2,%i1
 679         fdtos   %f40,%f16
 680         st      %f16,[%i3]
 681         add     %i3,%i4,%i3
 682         subcc   %i0,1,%i0
 683         bpos,pt %icc,.tail_loop
 684         nop
 685 
 686 .exit:
 687         ret
 688         restore
 689 
 690 .tail_spec:
 691         sethi   %hi(0x7f800000),%o4
 692         cmp     %l1,%o4
 693         bl,pt   %icc,.tail_spec_out_of_range
 694         nop
 695 
 696         srl     %l0,29,%l0
 697         ble,pn  %icc,.tail_spec_inf
 698         andcc   %l0,4,%g0
 699 
 700 ! NaN -> NaN
 701 
 702         fmuls   %f2,%f2,%f2
 703         ba      .tail_spec_exit
 704         st      %f2,[%i3]
 705 
 706 .tail_spec_inf:
 707         be,a,pn %icc,.tail_spec_exit
 708         st      %f2,[%i3]
 709 
 710         ba      .tail_spec_exit
 711         st      %f3,[%i3]
 712 
 713 .tail_spec_out_of_range:
 714         fcmpes  %fcc0,%f2,%f3
 715         fcmpes  %fcc1,%f2,THRESHOLDL
 716         fbl,pn  %fcc0,1f                ! if ( X < 0.0f )
 717         nop
 718         fbl,pt  %fcc1,.tail_spec_cont   ! if ( X < THRESHOLDL )
 719         nop
 720 1:
 721         srl     %l0,29,%l0
 722         and     %l0,4,%l0
 723         add     %l0,2048,%l0
 724         ld      [G1_CONST_TBL+%l0],%f2
 725         fmuls   %f2,%f2,%f2
 726         st      %f2,[%i3]
 727 
 728 .tail_spec_exit:
 729         lda     [%i1]%asi,%l0
 730         lda     [%i1]%asi,%f2
 731         add     %i1,%i2,%i1
 732 
 733         subcc   %i0,1,%i0
 734         bpos,pt %icc,.tail_loop
 735         add     %i3,%i4,%i3
 736         ba      .exit
 737         nop
 738 
 739         .align  16
 740 .spec0:
 741         sethi   %hi(0x7f800000),%o5
 742         cmp     %l0,%o5
 743         bl,pt   %icc,.spec0_out_of_range
 744         sll     %i2,3,%o4
 745 
 746         ble,pn  %icc,.spec0_inf
 747         sub     %i1,%o4,%o4
 748 
 749 ! NaN -> NaN
 750 
 751         fmuls   %f16,%f16,%f16
 752         ba      .spec0_exit
 753         st      %f16,[%i3]
 754 
 755 .spec0_inf:
 756         ld      [%o4],%l0
 757         srl     %l0,29,%l0
 758         andcc   %l0,4,%l0
 759         be,a,pn %icc,.spec0_exit
 760         st      %f16,[%i3]
 761 
 762         ba      .spec0_exit
 763         st      %f3,[%i3]
 764 
 765 .spec0_out_of_range:
 766         fcmpes  %fcc0,%f16,%f3
 767         fcmpes  %fcc1,%f16,THRESHOLDL
 768         fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
 769         fstod   %f16,%f40                       ! (0) y = (double) X
 770         fbl,a,pt        %fcc1,.spec0_cont       ! if ( X < THRESHOLDL )
 771         fstod   %f16,%f40                       ! (0) y = (double) X
 772 1:
 773         sub     %i1,%o4,%o4
 774         ld      [%o4],%l0
 775         srl     %l0,29,%l0
 776         and     %l0,4,%l0
 777         add     %l0,2048,%l0
 778         ld      [G1_CONST_TBL+%l0],%f16
 779         fmuls   %f16,%f16,%f16
 780         st      %f16,[%i3]
 781 
 782 .spec0_exit:
 783         fmovs   %f2,%f16
 784         mov     %l1,%l0
 785         fmovs   %f4,%f2
 786         mov     %l2,%l1
 787         fmovs   %f6,%f4
 788         mov     %l3,%l2
 789         fmovs   %f8,%f6
 790         mov     %l4,%l3
 791         mov     %l5,%l4
 792         mov     %l6,%l5
 793         mov     %l7,%l6
 794         lda     [%i1]%asi,%l7
 795         add     %i1,%i2,%i1
 796         mov     %o1,%o0
 797         mov     %o2,%o1
 798         mov     %o3,%o2
 799         and     %l7,G5_CONST,%l7
 800         add     %o2,%i2,%o3
 801 
 802         subcc   %i0,1,%i0
 803         bpos,pt %icc,.main_loop
 804         add     %i3,%i4,%i3
 805         ba      .after_main_loop
 806         nop
 807 
 808         .align  16
 809 .spec1:
 810         sethi   %hi(0x7f800000),%o5
 811         cmp     %l1,%o5
 812         bge,pn  %icc,1f
 813         nop
 814         fcmpes  %fcc0,%f2,%f3
 815         fcmpes  %fcc1,%f2,THRESHOLDL
 816         fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
 817         fstod   %f2,%f42                        ! (1) y = (double) X
 818         fbl,a,pt        %fcc1,.spec1_cont       ! if ( X < THRESHOLDL )
 819         fstod   %f2,%f42                        ! (1) y = (double) X
 820 1:
 821         fmuld   F62_K256ONLN2,%f40,%f40
 822         fdtoi   %f40,%f16
 823         st      %f16,[%fp+tmp0]
 824         fitod   %f16,%f34
 825         fpackfix        %f16,%f16
 826         fsubd   %f40,%f34,%f40
 827         fmuld   F60_KA2,%f40,%f34
 828         faddd   F58_KA1,%f34,%f34
 829         ld      [%fp+tmp0],%o0
 830         fmuld   %f34,%f40,%f40
 831         and     %o0,255,%o0
 832         sll     %o0,3,%o0
 833         ldd     [G1_CONST_TBL+%o0],%f34
 834         fpadd32 %f16,%f34,%f34
 835         fmuld   %f34,%f40,%f40
 836         faddd   %f34,%f40,%f40
 837         fdtos   %f40,%f26
 838         st      %f26,[%i3]
 839         add     %i3,%i4,%i3
 840 
 841         cmp     %l1,%o5
 842         bl,pt   %icc,.spec1_out_of_range
 843         sll     %i2,3,%o4
 844 
 845         ble,pn  %icc,.spec1_inf
 846         sub     %i1,%o4,%o4
 847 
 848 ! NaN -> NaN
 849 
 850         fmuls   %f2,%f2,%f2
 851         ba      .spec1_exit
 852         st      %f2,[%i3]
 853 
 854 .spec1_inf:
 855         add     %o4,%i2,%o4
 856         ld      [%o4],%l0
 857         srl     %l0,29,%l0
 858         andcc   %l0,4,%l0
 859         be,a,pn %icc,.spec1_exit
 860         st      %f2,[%i3]
 861 
 862         ba      .spec1_exit
 863         st      %f3,[%i3]
 864 
 865 .spec1_out_of_range:
 866         sub     %i1,%o4,%o4
 867         add     %o4,%i2,%o4
 868         ld      [%o4],%l0
 869         srl     %l0,29,%l0
 870         and     %l0,4,%l0
 871         add     %l0,2048,%l0
 872         ld      [G1_CONST_TBL+%l0],%f2
 873         fmuls   %f2,%f2,%f2
 874         st      %f2,[%i3]
 875 
 876 .spec1_exit:
 877         fmovs   %f4,%f16
 878         mov     %l2,%l0
 879         fmovs   %f6,%f2
 880         mov     %l3,%l1
 881         fmovs   %f8,%f4
 882         mov     %l4,%l2
 883         fmovs   %f10,%f6
 884         mov     %l5,%l3
 885         mov     %l6,%l4
 886         mov     %l7,%l5
 887         lda     [%i1]%asi,%l6
 888         add     %i1,%i2,%i1
 889         lda     [%i1]%asi,%l7
 890         add     %i1,%i2,%i1
 891         and     %l6,G5_CONST,%l6
 892         and     %l7,G5_CONST,%l7
 893         mov     %o2,%o0
 894         mov     %o3,%o1
 895         add     %o1,%i2,%o2
 896         add     %o2,%i2,%o3
 897 
 898         subcc   %i0,2,%i0
 899         bpos,pt %icc,.main_loop
 900         add     %i3,%i4,%i3
 901         ba      .after_main_loop
 902         nop
 903 
 904         .align  16
 905 .spec2:
 906         sethi   %hi(0x7f800000),%o5
 907         cmp     %l2,%o5
 908         bge,pn  %icc,1f
 909         nop
 910         fcmpes  %fcc0,%f4,%f3
 911         fcmpes  %fcc1,%f4,THRESHOLDL
 912         fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
 913         fstod   %f4,%f44                        ! (2) y = (double) X
 914         fbl,a,pt        %fcc1,.spec2_cont       ! if ( X < THRESHOLDL )
 915         fstod   %f4,%f44                        ! (2) y = (double) X
 916 1:
 917         fmuld   F62_K256ONLN2,%f40,%f40
 918 
 919         fmuld   F62_K256ONLN2,%f42,%f42
 920 
 921         fdtoi   %f40,%f16
 922         st      %f16,[%fp+tmp0]
 923 
 924         fdtoi   %f42,%f2
 925         st      %f2,[%fp+tmp1]
 926 
 927         fitod   %f16,%f34
 928         fpackfix        %f16,%f16
 929 
 930         fitod   %f2,%f18
 931         fpackfix        %f2,%f2
 932 
 933         fsubd   %f40,%f34,%f40
 934 
 935         fsubd   %f42,%f18,%f42
 936 
 937         fmuld   F60_KA2,%f40,%f34
 938 
 939         fmuld   F60_KA2,%f42,%f18
 940 
 941         faddd   F58_KA1,%f34,%f34
 942 
 943         faddd   F58_KA1,%f18,%f18
 944 
 945         ld      [%fp+tmp0],%o0
 946         fmuld   %f34,%f40,%f40
 947 
 948         ld      [%fp+tmp1],%o1
 949         fmuld   %f18,%f42,%f42
 950 
 951         and     %o0,255,%o0
 952 
 953         and     %o1,255,%o1
 954 
 955         sll     %o0,3,%o0
 956 
 957         sll     %o1,3,%o1
 958 
 959         ldd     [G1_CONST_TBL+%o0],%f34
 960 
 961         ldd     [G1_CONST_TBL+%o1],%f18
 962 
 963         fpadd32 %f16,%f34,%f34
 964 
 965         fpadd32 %f2,%f18,%f18
 966 
 967         fmuld   %f34,%f40,%f40
 968 
 969         fmuld   %f18,%f42,%f42
 970 
 971         faddd   %f34,%f40,%f40
 972 
 973         faddd   %f18,%f42,%f42
 974 
 975         fdtos   %f40,%f26
 976         st      %f26,[%i3]
 977         add     %i3,%i4,%o4
 978 
 979         fdtos   %f42,%f18
 980         st      %f18,[%o4]
 981         add     %o4,%i4,%i3
 982 
 983         cmp     %l2,%o5
 984         sll     %i2,1,%o5
 985         bl,pt   %icc,.spec2_out_of_range
 986         sll     %i2,2,%o4
 987 
 988         ble,pn  %icc,.spec2_inf
 989         add     %o4,%o5,%o4
 990 
 991 ! NaN -> NaN
 992 
 993         fmuls   %f4,%f4,%f4
 994         ba      .spec2_exit
 995         st      %f4,[%i3]
 996 
 997 .spec2_inf:
 998         sub     %i1,%o4,%o4
 999         ld      [%o4],%l0
1000         srl     %l0,29,%l0
1001         andcc   %l0,4,%l0
1002         be,a,pn %icc,.spec2_exit
1003         st      %f4,[%i3]
1004 
1005         ba      .spec2_exit
1006         st      %f3,[%i3]
1007 
1008 .spec2_out_of_range:
1009         add     %o4,%o5,%o4
1010         sub     %i1,%o4,%o4
1011         ld      [%o4],%l0
1012         srl     %l0,29,%l0
1013         and     %l0,4,%l0
1014         add     %l0,2048,%l0
1015         ld      [G1_CONST_TBL+%l0],%f2
1016         fmuls   %f2,%f2,%f2
1017         st      %f2,[%i3]
1018 
1019 .spec2_exit:
1020         fmovs   %f6,%f16
1021         mov     %l3,%l0
1022         mov     %o3,%o0
1023         fmovs   %f8,%f2
1024         mov     %l4,%l1
1025         add     %o0,%i2,%o1
1026         fmovs   %f10,%f4
1027         mov     %l5,%l2
1028         add     %o1,%i2,%o2
1029         fmovs   %f12,%f6
1030         mov     %l6,%l3
1031         mov     %l7,%l4
1032         lda     [%i1]%asi,%l5
1033         add     %i1,%i2,%i1
1034         add     %o2,%i2,%o3
1035         lda     [%i1]%asi,%l6
1036         add     %i1,%i2,%i1
1037         lda     [%i1]%asi,%l7
1038         add     %i1,%i2,%i1
1039         and     %l5,G5_CONST,%l5
1040         and     %l6,G5_CONST,%l6
1041         and     %l7,G5_CONST,%l7
1042 
1043         subcc   %i0,3,%i0
1044         bpos,pt %icc,.main_loop
1045         add     %i3,%i4,%i3
1046         ba      .after_main_loop
1047         nop
1048 .spec3:
1049         sethi   %hi(0x7f800000),%o5
1050         cmp     %l3,%o5
1051         bge,pn  %icc,1f
1052         nop
1053         fcmpes  %fcc0,%f6,%f3
1054         fcmpes  %fcc1,%f6,THRESHOLDL
1055         fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
1056         fstod   %f6,%f46                        ! (3) y = (double) X
1057         fbl,a,pt        %fcc1,.spec3_cont       ! if ( X < THRESHOLDL )
1058         fstod   %f6,%f46                        ! (3) y = (double) X
1059 1:
1060         fmuld   F62_K256ONLN2,%f40,%f40
1061 
1062         fmuld   F62_K256ONLN2,%f42,%f42
1063 
1064         fmuld   F62_K256ONLN2,%f44,%f44
1065 
1066         fdtoi   %f40,%f16
1067         st      %f16,[%fp+tmp0]
1068 
1069         fdtoi   %f42,%f2
1070         st      %f2,[%fp+tmp1]
1071 
1072         fdtoi   %f44,%f4
1073         st      %f4,[%fp+tmp2]
1074 
1075         fitod   %f16,%f34
1076         fpackfix        %f16,%f16
1077 
1078         fitod   %f2,%f18
1079         fpackfix        %f2,%f2
1080 
1081         fitod   %f4,%f20
1082         fpackfix        %f4,%f4
1083 
1084         fsubd   %f40,%f34,%f40
1085 
1086         fsubd   %f42,%f18,%f42
1087 
1088         fsubd   %f44,%f20,%f44
1089 
1090         fmuld   F60_KA2,%f40,%f34
1091 
1092         fmuld   F60_KA2,%f42,%f18
1093 
1094         fmuld   F60_KA2,%f44,%f20
1095 
1096         faddd   F58_KA1,%f34,%f34
1097 
1098         faddd   F58_KA1,%f18,%f18
1099 
1100         faddd   F58_KA1,%f20,%f20
1101 
1102         ld      [%fp+tmp0],%o0
1103         fmuld   %f34,%f40,%f40
1104 
1105         ld      [%fp+tmp1],%o1
1106         fmuld   %f18,%f42,%f42
1107 
1108         ld      [%fp+tmp2],%o2
1109         fmuld   %f20,%f44,%f44
1110 
1111         and     %o0,255,%o0
1112         and     %o1,255,%o1
1113 
1114         and     %o2,255,%o2
1115         sll     %o0,3,%o0
1116 
1117         sll     %o1,3,%o1
1118         sll     %o2,3,%o2
1119 
1120         ldd     [G1_CONST_TBL+%o0],%f34
1121 
1122         ldd     [G1_CONST_TBL+%o1],%f18
1123 
1124         ldd     [G1_CONST_TBL+%o2],%f20
1125 
1126         fpadd32 %f16,%f34,%f34
1127 
1128         fpadd32 %f2,%f18,%f18
1129 
1130         fpadd32 %f4,%f20,%f20
1131 
1132         fmuld   %f34,%f40,%f40
1133 
1134         fmuld   %f18,%f42,%f42
1135 
1136         fmuld   %f20,%f44,%f44
1137 
1138         faddd   %f34,%f40,%f40
1139 
1140         faddd   %f18,%f42,%f42
1141 
1142         faddd   %f20,%f44,%f44
1143 
1144         fdtos   %f40,%f26
1145         st      %f26,[%i3]
1146         add     %i3,%i4,%o4
1147 
1148         fdtos   %f42,%f18
1149         st      %f18,[%o4]
1150         add     %o4,%i4,%i3
1151 
1152         fdtos   %f44,%f20
1153         st      %f20,[%i3]
1154         add     %i3,%i4,%i3
1155 
1156         cmp     %l3,%o5
1157         bl,pt   %icc,.spec3_out_of_range
1158         sll     %i2,2,%o4
1159 
1160         ble,pn  %icc,.spec3_inf
1161         add     %o4,%i2,%o4
1162 
1163 ! NaN -> NaN
1164 
1165         fmuls   %f6,%f6,%f6
1166         ba      .spec3_exit
1167         st      %f6,[%i3]
1168 
1169 .spec3_inf:
1170         sub     %i1,%o4,%o4
1171         ld      [%o4],%l0
1172         srl     %l0,29,%l0
1173         andcc   %l0,4,%l0
1174         be,a,pn %icc,.spec3_exit
1175         st      %f6,[%i3]
1176 
1177         ba      .spec3_exit
1178         st      %f3,[%i3]
1179 
1180 .spec3_out_of_range:
1181         add     %o4,%i2,%o4
1182         sub     %i1,%o4,%o4
1183         ld      [%o4],%l0
1184         srl     %l0,29,%l0
1185         and     %l0,4,%l0
1186         add     %l0,2048,%l0
1187         ld      [G1_CONST_TBL+%l0],%f2
1188         fmuls   %f2,%f2,%f2
1189         st      %f2,[%i3]
1190 
1191 .spec3_exit:
1192         fmovs   %f8,%f16
1193         mov     %l4,%l0
1194         fmovs   %f10,%f2
1195         mov     %l5,%l1
1196         fmovs   %f12,%f4
1197         mov     %l6,%l2
1198         fmovs   %f14,%f6
1199         mov     %l7,%l3
1200         mov     %i1,%o0
1201         lda     [%o0]%asi,%l4
1202         add     %o0,%i2,%o1
1203         lda     [%o1]%asi,%l5
1204         add     %o1,%i2,%o2
1205         lda     [%o2]%asi,%l6
1206         add     %o2,%i2,%o3
1207         lda     [%o3]%asi,%l7
1208         add     %o3,%i2,%i1
1209         and     %l4,G5_CONST,%l4
1210         and     %l5,G5_CONST,%l5
1211         and     %l6,G5_CONST,%l6
1212         and     %l7,G5_CONST,%l7
1213 
1214         subcc   %i0,4,%i0
1215         bpos,pt %icc,.main_loop
1216         add     %i3,%i4,%i3
1217         ba      .after_main_loop
1218         nop
1219 
1220         .align  16
1221 .spec4:
1222         sethi   %hi(0x7f800000),%o5
1223         cmp     %l4,%o5
1224         bge,pn  %icc,1f
1225         nop
1226         fcmpes  %fcc0,%f8,%f3
1227         fcmpes  %fcc1,%f8,THRESHOLDL
1228         fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
1229         fstod   %f8,%f48                        ! (4) y = (double) X
1230         fbl,a,pt        %fcc1,.spec4_cont       ! if ( X < THRESHOLDL )
1231         fstod   %f8,%f48                        ! (4) y = (double) X
1232 1:
1233         fmuld   F62_K256ONLN2,%f42,%f42
1234 
1235         fmuld   F62_K256ONLN2,%f44,%f44
1236 
1237         fmuld   F62_K256ONLN2,%f46,%f46
1238 
1239         fdtoi   %f40,%f16
1240         st      %f16,[%fp+tmp0]
1241 
1242         fdtoi   %f42,%f2
1243         st      %f2,[%fp+tmp1]
1244 
1245         fdtoi   %f44,%f4
1246         st      %f4,[%fp+tmp2]
1247 
1248         fdtoi   %f46,%f6
1249         st      %f6,[%fp+tmp3]
1250 
1251         fitod   %f16,%f34
1252         fpackfix        %f16,%f16
1253 
1254         fitod   %f2,%f18
1255         fpackfix        %f2,%f2
1256 
1257         fitod   %f4,%f20
1258         fpackfix        %f4,%f4
1259 
1260         fitod   %f6,%f22
1261         fpackfix        %f6,%f6
1262 
1263         fsubd   %f40,%f34,%f40
1264 
1265         fsubd   %f42,%f18,%f42
1266 
1267         fsubd   %f44,%f20,%f44
1268 
1269         fsubd   %f46,%f22,%f46
1270 
1271         fmuld   F60_KA2,%f40,%f34
1272 
1273         fmuld   F60_KA2,%f42,%f18
1274 
1275         fmuld   F60_KA2,%f44,%f20
1276 
1277         fmuld   F60_KA2,%f46,%f22
1278 
1279         faddd   F58_KA1,%f34,%f34
1280 
1281         faddd   F58_KA1,%f18,%f18
1282 
1283         faddd   F58_KA1,%f20,%f20
1284 
1285         faddd   F58_KA1,%f22,%f22
1286 
1287         ld      [%fp+tmp0],%o0
1288         fmuld   %f34,%f40,%f40
1289 
1290         ld      [%fp+tmp1],%o1
1291         fmuld   %f18,%f42,%f42
1292 
1293         ld      [%fp+tmp2],%o2
1294         fmuld   %f20,%f44,%f44
1295 
1296         ld      [%fp+tmp3],%o3
1297         fmuld   %f22,%f46,%f46
1298 
1299         and     %o0,255,%o0
1300         and     %o1,255,%o1
1301 
1302         and     %o2,255,%o2
1303         and     %o3,255,%o3
1304 
1305         sll     %o0,3,%o0
1306         sll     %o1,3,%o1
1307 
1308         sll     %o2,3,%o2
1309         sll     %o3,3,%o3
1310 
1311         ldd     [G1_CONST_TBL+%o0],%f34
1312 
1313         ldd     [G1_CONST_TBL+%o1],%f18
1314 
1315         ldd     [G1_CONST_TBL+%o2],%f20
1316 
1317         ldd     [G1_CONST_TBL+%o3],%f22
1318 
1319         fpadd32 %f16,%f34,%f34
1320 
1321         fpadd32 %f2,%f18,%f18
1322 
1323         fpadd32 %f4,%f20,%f20
1324 
1325         fpadd32 %f6,%f22,%f22
1326 
1327         fmuld   %f34,%f40,%f40
1328 
1329         fmuld   %f18,%f42,%f42
1330 
1331         fmuld   %f20,%f44,%f44
1332 
1333         fmuld   %f22,%f46,%f46
1334 
1335         faddd   %f34,%f40,%f40
1336 
1337         faddd   %f18,%f42,%f42
1338 
1339         faddd   %f20,%f44,%f44
1340 
1341         faddd   %f22,%f46,%f46
1342 
1343         fdtos   %f40,%f26
1344         st      %f26,[%i3]
1345         add     %i3,%i4,%o4
1346 
1347         fdtos   %f42,%f18
1348         st      %f18,[%o4]
1349         add     %o4,%i4,%i3
1350 
1351         fdtos   %f44,%f20
1352         st      %f20,[%i3]
1353         add     %i3,%i4,%o4
1354 
1355         fdtos   %f46,%f22
1356         st      %f22,[%o4]
1357         add     %o4,%i4,%i3
1358 
1359         cmp     %l4,%o5
1360         bl,pt   %icc,.spec4_out_of_range
1361         sll     %i2,2,%o4
1362 
1363         ble,pn  %icc,.spec4_inf
1364         sub     %i1,%o4,%o4
1365 
1366 ! NaN -> NaN
1367 
1368         fmuls   %f8,%f8,%f8
1369         ba      .spec4_exit
1370         st      %f8,[%i3]
1371 
1372 .spec4_inf:
1373         ld      [%o4],%l0
1374         srl     %l0,29,%l0
1375         andcc   %l0,4,%l0
1376         be,a,pn %icc,.spec4_exit
1377         st      %f8,[%i3]
1378 
1379         ba      .spec4_exit
1380         st      %f3,[%i3]
1381 
1382 .spec4_out_of_range:
1383         sub     %i1,%o4,%o4
1384         ld      [%o4],%l0
1385         srl     %l0,29,%l0
1386         and     %l0,4,%l0
1387         add     %l0,2048,%l0
1388         ld      [G1_CONST_TBL+%l0],%f2
1389         fmuls   %f2,%f2,%f2
1390         st      %f2,[%i3]
1391 
1392 .spec4_exit:
1393         fmovs   %f10,%f16
1394         mov     %l5,%l0
1395         fmovs   %f12,%f2
1396         mov     %l6,%l1
1397         fmovs   %f14,%f4
1398         mov     %l7,%l2
1399         lda     [%i1]%asi,%l3
1400         lda     [%i1]%asi,%f6
1401         add     %i1,%i2,%o0
1402         lda     [%o0]%asi,%l4
1403         add     %o0,%i2,%o1
1404         lda     [%o1]%asi,%l5
1405         add     %o1,%i2,%o2
1406         lda     [%o2]%asi,%l6
1407         add     %o2,%i2,%o3
1408         lda     [%o3]%asi,%l7
1409         add     %o3,%i2,%i1
1410         and     %l3,G5_CONST,%l3
1411         and     %l4,G5_CONST,%l4
1412         and     %l5,G5_CONST,%l5
1413         and     %l6,G5_CONST,%l6
1414         and     %l7,G5_CONST,%l7
1415 
1416         subcc   %i0,5,%i0
1417         bpos,pt %icc,.main_loop
1418         add     %i3,%i4,%i3
1419         ba      .after_main_loop
1420         nop
1421 
1422         .align 16
1423 .spec5:
1424         sethi   %hi(0x7f800000),%o5
1425         cmp     %l5,%o5
1426         bge,pn  %icc,1f
1427         nop
1428         fcmpes  %fcc0,%f10,%f3
1429         fcmpes  %fcc1,%f10,THRESHOLDL
1430         fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
1431         fstod   %f10,%f50                       ! (5) y = (double) X
1432         fbl,a,pt        %fcc1,.spec5_cont       ! if ( X < THRESHOLDL )
1433         fstod   %f10,%f50                       ! (5) y = (double) X
1434 1:
1435         fmuld   F62_K256ONLN2,%f44,%f44
1436 
1437         fmuld   F62_K256ONLN2,%f46,%f46
1438 
1439         fdtoi   %f40,%f16
1440         st      %f16,[%fp+tmp0]
1441         fmuld   F62_K256ONLN2,%f48,%f48
1442 
1443         fdtoi   %f42,%f2
1444         st      %f2,[%fp+tmp1]
1445 
1446         fdtoi   %f44,%f4
1447         st      %f4,[%fp+tmp2]
1448 
1449         fdtoi   %f46,%f6
1450         st      %f6,[%fp+tmp3]
1451 
1452         fdtoi   %f48,%f8
1453         st      %f8,[%fp+tmp4]
1454 
1455         fitod   %f16,%f34
1456         fpackfix        %f16,%f16
1457 
1458         fitod   %f2,%f18
1459         fpackfix        %f2,%f2
1460 
1461         fitod   %f4,%f20
1462         fpackfix        %f4,%f4
1463 
1464         fitod   %f6,%f22
1465         fpackfix        %f6,%f6
1466 
1467         fitod   %f8,%f24
1468         fpackfix        %f8,%f8
1469 
1470         ld      [%fp+tmp0],%o0
1471         fsubd   %f40,%f34,%f40
1472 
1473         ld      [%fp+tmp1],%o1
1474         fsubd   %f42,%f18,%f42
1475 
1476         ld      [%fp+tmp2],%o2
1477         and     %o0,255,%o0
1478         fsubd   %f44,%f20,%f44
1479 
1480         ld      [%fp+tmp3],%o3
1481         and     %o1,255,%o1
1482         fsubd   %f46,%f22,%f46
1483 
1484         sll     %o0,3,%o0
1485         sll     %o1,3,%o1
1486         fmuld   F60_KA2,%f40,%f34
1487         fsubd   %f48,%f24,%f48
1488 
1489         and     %o2,255,%o2
1490         fmuld   F60_KA2,%f42,%f18
1491 
1492         sll     %o2,3,%o2
1493         fmuld   F60_KA2,%f44,%f20
1494 
1495         ld      [%fp+tmp4],%o4
1496         and     %o3,255,%o3
1497         fmuld   F60_KA2,%f46,%f22
1498 
1499         sll     %o3,3,%o3
1500         fmuld   F60_KA2,%f48,%f24
1501         faddd   F58_KA1,%f34,%f34
1502 
1503         and     %o4,255,%o4
1504         faddd   F58_KA1,%f18,%f18
1505 
1506         faddd   F58_KA1,%f20,%f20
1507 
1508         faddd   F58_KA1,%f22,%f22
1509 
1510         fmuld   %f34,%f40,%f40
1511         ldd     [G1_CONST_TBL+%o0],%f34
1512         faddd   F58_KA1,%f24,%f24
1513 
1514         fmuld   %f18,%f42,%f42
1515         ldd     [G1_CONST_TBL+%o1],%f18
1516 
1517         fmuld   %f20,%f44,%f44
1518         ldd     [G1_CONST_TBL+%o2],%f20
1519 
1520         fmuld   %f22,%f46,%f46
1521         ldd     [G1_CONST_TBL+%o3],%f22
1522         sll     %o4,3,%o4
1523 
1524         fmuld   %f24,%f48,%f48
1525         ldd     [G1_CONST_TBL+%o4],%f24
1526         fpadd32 %f16,%f34,%f34
1527 
1528         fpadd32 %f2,%f18,%f18
1529 
1530         fpadd32 %f4,%f20,%f20
1531 
1532         fpadd32 %f6,%f22,%f22
1533 
1534         fpadd32 %f8,%f24,%f24
1535         fmuld   %f34,%f40,%f40
1536 
1537         fmuld   %f18,%f42,%f42
1538 
1539         fmuld   %f20,%f44,%f44
1540 
1541         fmuld   %f22,%f46,%f46
1542 
1543         fmuld   %f24,%f48,%f48
1544         faddd   %f34,%f40,%f40
1545 
1546         faddd   %f18,%f42,%f42
1547 
1548         faddd   %f20,%f44,%f44
1549 
1550         faddd   %f22,%f46,%f46
1551 
1552         faddd   %f24,%f48,%f48
1553 
1554         fdtos   %f40,%f26
1555         st      %f26,[%i3]
1556         add     %i3,%i4,%o4
1557 
1558         fdtos   %f42,%f18
1559         st      %f18,[%o4]
1560         add     %o4,%i4,%i3
1561 
1562         fdtos   %f44,%f20
1563         st      %f20,[%i3]
1564         add     %i3,%i4,%o4
1565 
1566         fdtos   %f46,%f22
1567         st      %f22,[%o4]
1568         add     %o4,%i4,%i3
1569 
1570         fdtos   %f48,%f24
1571         st      %f24,[%i3]
1572         add     %i3,%i4,%i3
1573 
1574         cmp     %l5,%o5
1575         bl,pt   %icc,.spec5_out_of_range
1576         sll     %i2,2,%o4
1577 
1578         ble,pn  %icc,.spec5_inf
1579         sub     %o4,%i2,%o4
1580 
1581 ! NaN -> NaN
1582 
1583         fmuls   %f10,%f10,%f10
1584         ba      .spec5_exit
1585         st      %f10,[%i3]
1586 
1587 .spec5_inf:
1588         sub     %i1,%o4,%o4
1589         ld      [%o4],%l0
1590         srl     %l0,29,%l0
1591         andcc   %l0,4,%l0
1592         be,a,pn %icc,.spec5_exit
1593         st      %f10,[%i3]
1594 
1595         ba      .spec5_exit
1596         st      %f3,[%i3]
1597 
1598 .spec5_out_of_range:
1599         sub     %o4,%i2,%o4
1600         sub     %i1,%o4,%o4
1601         ld      [%o4],%l0
1602         srl     %l0,29,%l0
1603         and     %l0,4,%l0
1604         add     %l0,2048,%l0
1605         ld      [G1_CONST_TBL+%l0],%f2
1606         fmuls   %f2,%f2,%f2
1607         st      %f2,[%i3]
1608 
1609 .spec5_exit:
1610         fmovs   %f12,%f16
1611         mov     %l6,%l0
1612         fmovs   %f14,%f2
1613         mov     %l7,%l1
1614         lda     [%i1]%asi,%l2
1615         lda     [%i1]%asi,%f4
1616         add     %i1,%i2,%i1
1617         lda     [%i1]%asi,%l3
1618         lda     [%i1]%asi,%f6
1619         add     %i1,%i2,%o0
1620         lda     [%o0]%asi,%l4
1621         add     %o0,%i2,%o1
1622         lda     [%o1]%asi,%l5
1623         add     %o1,%i2,%o2
1624         lda     [%o2]%asi,%l6
1625         add     %o2,%i2,%o3
1626         lda     [%o3]%asi,%l7
1627         add     %o3,%i2,%i1
1628         and     %l2,G5_CONST,%l2
1629         and     %l3,G5_CONST,%l3
1630         and     %l4,G5_CONST,%l4
1631         and     %l5,G5_CONST,%l5
1632         and     %l6,G5_CONST,%l6
1633         and     %l7,G5_CONST,%l7
1634 
1635         subcc   %i0,6,%i0
1636         bpos,pt %icc,.main_loop
1637         add     %i3,%i4,%i3
1638         ba      .after_main_loop
1639         nop
1640 .spec6:
1641         sethi   %hi(0x7f800000),%o5
1642         cmp     %l6,%o5
1643         bge,pn  %icc,1f
1644         nop
1645         fcmpes  %fcc0,%f12,%f3
1646         fcmpes  %fcc1,%f12,THRESHOLDL
1647         fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
1648         fstod   %f12,%f52                       ! (6) y = (double) X
1649         fbl,a,pt        %fcc1,.spec6_cont       ! if ( X < THRESHOLDL )
1650         fstod   %f12,%f52                       ! (6) y = (double) X
1651 1:
1652         fmuld   F62_K256ONLN2,%f46,%f46
1653 
1654         fdtoi   %f40,%f16
1655         st      %f16,[%fp+tmp0]
1656         fmuld   F62_K256ONLN2,%f48,%f48
1657 
1658         fdtoi   %f42,%f2
1659         st      %f2,[%fp+tmp1]
1660         fmuld   F62_K256ONLN2,%f50,%f50
1661 
1662         fdtoi   %f44,%f4
1663         st      %f4,[%fp+tmp2]
1664 
1665         fdtoi   %f46,%f6
1666         st      %f6,[%fp+tmp3]
1667 
1668         fdtoi   %f48,%f8
1669         st      %f8,[%fp+tmp4]
1670 
1671         fdtoi   %f50,%f10
1672         st      %f10,[%fp+tmp5]
1673 
1674         fitod   %f16,%f34
1675         fpackfix        %f16,%f16
1676 
1677         fitod   %f2,%f18
1678         fpackfix        %f2,%f2
1679 
1680         fitod   %f4,%f20
1681         fpackfix        %f4,%f4
1682 
1683         fitod   %f6,%f22
1684         fpackfix        %f6,%f6
1685 
1686         fitod   %f8,%f24
1687         fpackfix        %f8,%f8
1688 
1689         fitod   %f10,%f26
1690         fpackfix        %f10,%f10
1691 
1692         ld      [%fp+tmp0],%o0
1693         fsubd   %f40,%f34,%f40
1694 
1695         ld      [%fp+tmp1],%o1
1696         fsubd   %f42,%f18,%f42
1697 
1698         ld      [%fp+tmp2],%o2
1699         and     %o0,255,%o0
1700         fsubd   %f44,%f20,%f44
1701 
1702         ld      [%fp+tmp3],%o3
1703         and     %o1,255,%o1
1704         fsubd   %f46,%f22,%f46
1705 
1706         sll     %o0,3,%o0
1707         sll     %o1,3,%o1
1708         fmuld   F60_KA2,%f40,%f34
1709         fsubd   %f48,%f24,%f48
1710 
1711         and     %o2,255,%o2
1712         fmuld   F60_KA2,%f42,%f18
1713         fsubd   %f50,%f26,%f50
1714 
1715         sll     %o2,3,%o2
1716         fmuld   F60_KA2,%f44,%f20
1717 
1718         ld      [%fp+tmp4],%o4
1719         and     %o3,255,%o3
1720         fmuld   F60_KA2,%f46,%f22
1721 
1722         ld      [%fp+tmp5],%o5
1723         sll     %o3,3,%o3
1724         fmuld   F60_KA2,%f48,%f24
1725         faddd   F58_KA1,%f34,%f34
1726 
1727         and     %o4,255,%o4
1728         fmuld   F60_KA2,%f50,%f26
1729         faddd   F58_KA1,%f18,%f18
1730 
1731         and     %o5,255,%o5
1732         faddd   F58_KA1,%f20,%f20
1733 
1734         sll     %o5,3,%o5
1735         faddd   F58_KA1,%f22,%f22
1736 
1737         fmuld   %f34,%f40,%f40
1738         ldd     [G1_CONST_TBL+%o0],%f34
1739         faddd   F58_KA1,%f24,%f24
1740 
1741         fmuld   %f18,%f42,%f42
1742         ldd     [G1_CONST_TBL+%o1],%f18
1743         faddd   F58_KA1,%f26,%f26
1744 
1745         fmuld   %f20,%f44,%f44
1746         ldd     [G1_CONST_TBL+%o2],%f20
1747 
1748         fmuld   %f22,%f46,%f46
1749         ldd     [G1_CONST_TBL+%o3],%f22
1750         sll     %o4,3,%o4
1751 
1752         fmuld   %f24,%f48,%f48
1753         ldd     [G1_CONST_TBL+%o4],%f24
1754         fpadd32 %f16,%f34,%f34
1755 
1756         fmuld   %f26,%f50,%f50
1757         ldd     [G1_CONST_TBL+%o5],%f26
1758         fpadd32 %f2,%f18,%f18
1759 
1760         fpadd32 %f4,%f20,%f20
1761 
1762         fpadd32 %f6,%f22,%f22
1763 
1764         fpadd32 %f8,%f24,%f24
1765         fmuld   %f34,%f40,%f40
1766 
1767         fpadd32 %f10,%f26,%f26
1768         fmuld   %f18,%f42,%f42
1769 
1770         fmuld   %f20,%f44,%f44
1771 
1772         fmuld   %f22,%f46,%f46
1773 
1774         fmuld   %f24,%f48,%f48
1775         faddd   %f34,%f40,%f40
1776 
1777         fmuld   %f26,%f50,%f50
1778         faddd   %f18,%f42,%f42
1779 
1780         faddd   %f20,%f44,%f44
1781 
1782         faddd   %f22,%f46,%f46
1783 
1784         faddd   %f24,%f48,%f48
1785 
1786         faddd   %f26,%f50,%f50
1787 
1788         fdtos   %f40,%f26
1789         st      %f26,[%i3]
1790         add     %i3,%i4,%o4
1791 
1792         fdtos   %f42,%f18
1793         st      %f18,[%o4]
1794         add     %o4,%i4,%i3
1795 
1796         fdtos   %f44,%f20
1797         st      %f20,[%i3]
1798         add     %i3,%i4,%o4
1799 
1800         fdtos   %f46,%f22
1801         st      %f22,[%o4]
1802         add     %o4,%i4,%i3
1803 
1804         fdtos   %f48,%f24
1805         st      %f24,[%i3]
1806         add     %i3,%i4,%o4
1807 
1808         fdtos   %f50,%f26
1809         st      %f26,[%o4]
1810         add     %o4,%i4,%i3
1811 
1812         sethi   %hi(0x7f800000),%o5
1813         cmp     %l6,%o5
1814         bl,pt   %icc,.spec6_out_of_range
1815         sll     %i2,1,%o4
1816 
1817         ble,pn  %icc,.spec6_inf
1818         sub     %i1,%o4,%o4
1819 
1820 ! NaN -> NaN
1821 
1822         fmuls   %f12,%f12,%f12
1823         ba      .spec6_exit
1824         st      %f12,[%i3]
1825 
1826 .spec6_inf:
1827         ld      [%o4],%l0
1828         srl     %l0,29,%l0
1829         andcc   %l0,4,%l0
1830         be,a,pn %icc,.spec6_exit
1831         st      %f12,[%i3]
1832 
1833         ba      .spec6_exit
1834         st      %f3,[%i3]
1835 
1836 .spec6_out_of_range:
1837         sub     %i1,%o4,%o4
1838         ld      [%o4],%l0
1839         srl     %l0,29,%l0
1840         and     %l0,4,%l0
1841         add     %l0,2048,%l0
1842         ld      [G1_CONST_TBL+%l0],%f2
1843         fmuls   %f2,%f2,%f2
1844         st      %f2,[%i3]
1845 
1846 .spec6_exit:
1847         fmovs   %f14,%f16
1848         mov     %l7,%l0
1849         lda     [%i1]%asi,%l1
1850         lda     [%i1]%asi,%f2
1851         add     %i1,%i2,%i1
1852         lda     [%i1]%asi,%l2
1853         lda     [%i1]%asi,%f4
1854         add     %i1,%i2,%i1
1855         lda     [%i1]%asi,%l3
1856         lda     [%i1]%asi,%f6
1857         add     %i1,%i2,%o0
1858         lda     [%o0]%asi,%l4
1859         add     %o0,%i2,%o1
1860         lda     [%o1]%asi,%l5
1861         add     %o1,%i2,%o2
1862         lda     [%o2]%asi,%l6
1863         add     %o2,%i2,%o3
1864         lda     [%o3]%asi,%l7
1865         add     %o3,%i2,%i1
1866         and     %l1,G5_CONST,%l1
1867         and     %l2,G5_CONST,%l2
1868         and     %l3,G5_CONST,%l3
1869         and     %l4,G5_CONST,%l4
1870         and     %l5,G5_CONST,%l5
1871         and     %l6,G5_CONST,%l6
1872         and     %l7,G5_CONST,%l7
1873 
1874         subcc   %i0,7,%i0
1875         bpos,pt %icc,.main_loop
1876         add     %i3,%i4,%i3
1877         ba      .after_main_loop
1878         nop
1879 
1880         .align  16
1881 .spec7:
1882         sethi   %hi(0x7f800000),%o5
1883         cmp     %l7,%o5
1884         bge,pn  %icc,1f
1885         nop
1886         fcmpes  %fcc0,%f14,%f3
1887         fcmpes  %fcc1,%f14,THRESHOLDL
1888         fbl,a,pn        %fcc0,1f                ! if ( X < 0.0f )
1889         fstod   %f14,%f54                       ! (7) y = (double) X
1890         fbl,a,pt        %fcc1,.spec7_cont       ! if ( X < THRESHOLDL )
1891         fstod   %f14,%f54                       ! (7) y = (double) X
1892 1:
1893         fdtoi   %f40,%f16
1894         st      %f16,[%fp+tmp0]
1895         fmuld   F62_K256ONLN2,%f48,%f48
1896 
1897         fdtoi   %f42,%f2
1898         st      %f2,[%fp+tmp1]
1899         fmuld   F62_K256ONLN2,%f50,%f50
1900 
1901         fdtoi   %f44,%f4
1902         st      %f4,[%fp+tmp2]
1903         fmuld   F62_K256ONLN2,%f52,%f52
1904 
1905         fdtoi   %f46,%f6
1906         st      %f6,[%fp+tmp3]
1907 
1908         fdtoi   %f48,%f8
1909         st      %f8,[%fp+tmp4]
1910 
1911         fdtoi   %f50,%f10
1912         st      %f10,[%fp+tmp5]
1913 
1914         fdtoi   %f52,%f12
1915         st      %f12,[%fp+tmp6]
1916 
1917         fitod   %f16,%f34
1918         fpackfix        %f16,%f16
1919 
1920         fitod   %f2,%f18
1921         fpackfix        %f2,%f2
1922 
1923         fitod   %f4,%f20
1924         fpackfix        %f4,%f4
1925 
1926         fitod   %f6,%f22
1927         fpackfix        %f6,%f6
1928 
1929         fitod   %f8,%f24
1930         fpackfix        %f8,%f8
1931 
1932         fitod   %f10,%f26
1933         fpackfix        %f10,%f10
1934 
1935         fitod   %f12,%f28
1936         fpackfix        %f12,%f12
1937 
1938         ld      [%fp+tmp0],%o0
1939         fsubd   %f40,%f34,%f40
1940 
1941         ld      [%fp+tmp1],%o1
1942         fsubd   %f42,%f18,%f42
1943 
1944         ld      [%fp+tmp2],%o2
1945         and     %o0,255,%o0
1946         fsubd   %f44,%f20,%f44
1947 
1948         ld      [%fp+tmp3],%o3
1949         and     %o1,255,%o1
1950         fsubd   %f46,%f22,%f46
1951 
1952         sll     %o0,3,%o0
1953         sll     %o1,3,%o1
1954         fmuld   F60_KA2,%f40,%f34
1955         fsubd   %f48,%f24,%f48
1956 
1957         and     %o2,255,%o2
1958         fmuld   F60_KA2,%f42,%f18
1959         fsubd   %f50,%f26,%f50
1960 
1961         sll     %o2,3,%o2
1962         fmuld   F60_KA2,%f44,%f20
1963         fsubd   %f52,%f28,%f52
1964 
1965         ld      [%fp+tmp4],%o4
1966         and     %o3,255,%o3
1967         fmuld   F60_KA2,%f46,%f22
1968 
1969         ld      [%fp+tmp5],%o5
1970         sll     %o3,3,%o3
1971         fmuld   F60_KA2,%f48,%f24
1972         faddd   F58_KA1,%f34,%f34
1973 
1974         ld      [%fp+tmp6],%o7
1975         and     %o4,255,%o4
1976         fmuld   F60_KA2,%f50,%f26
1977         faddd   F58_KA1,%f18,%f18
1978 
1979         and     %o5,255,%o5
1980         fmuld   F60_KA2,%f52,%f28
1981         faddd   F58_KA1,%f20,%f20
1982 
1983         sll     %o5,3,%o5
1984         faddd   F58_KA1,%f22,%f22
1985 
1986         fmuld   %f34,%f40,%f40
1987         ldd     [G1_CONST_TBL+%o0],%f34
1988         faddd   F58_KA1,%f24,%f24
1989 
1990         fmuld   %f18,%f42,%f42
1991         ldd     [G1_CONST_TBL+%o1],%f18
1992         faddd   F58_KA1,%f26,%f26
1993 
1994         fmuld   %f20,%f44,%f44
1995         ldd     [G1_CONST_TBL+%o2],%f20
1996         faddd   F58_KA1,%f28,%f28
1997 
1998         fmuld   %f22,%f46,%f46
1999         ldd     [G1_CONST_TBL+%o3],%f22
2000         sll     %o4,3,%o4
2001 
2002         fmuld   %f24,%f48,%f48
2003         ldd     [G1_CONST_TBL+%o4],%f24
2004         and     %o7,255,%o7
2005         fpadd32 %f16,%f34,%f34
2006 
2007         fmuld   %f26,%f50,%f50
2008         ldd     [G1_CONST_TBL+%o5],%f26
2009         sll     %o7,3,%o7
2010         fpadd32 %f2,%f18,%f18
2011 
2012         fmuld   %f28,%f52,%f52
2013         ldd     [G1_CONST_TBL+%o7],%f28
2014         fpadd32 %f4,%f20,%f20
2015 
2016         fpadd32 %f6,%f22,%f22
2017 
2018         fpadd32 %f8,%f24,%f24
2019         fmuld   %f34,%f40,%f40
2020 
2021         fpadd32 %f10,%f26,%f26
2022         fmuld   %f18,%f42,%f42
2023 
2024         fpadd32 %f12,%f28,%f28
2025         fmuld   %f20,%f44,%f44
2026 
2027         fmuld   %f22,%f46,%f46
2028 
2029         fmuld   %f24,%f48,%f48
2030         faddd   %f34,%f40,%f40
2031 
2032         fmuld   %f26,%f50,%f50
2033         faddd   %f18,%f42,%f42
2034 
2035         fmuld   %f28,%f52,%f52
2036         faddd   %f20,%f44,%f44
2037 
2038         faddd   %f22,%f46,%f46
2039 
2040         faddd   %f24,%f48,%f48
2041 
2042         faddd   %f26,%f50,%f50
2043 
2044         faddd   %f28,%f52,%f52
2045 
2046         fdtos   %f40,%f26
2047         st      %f26,[%i3]
2048         add     %i3,%i4,%o4
2049 
2050         fdtos   %f42,%f18
2051         st      %f18,[%o4]
2052         add     %o4,%i4,%i3
2053 
2054         fdtos   %f44,%f20
2055         st      %f20,[%i3]
2056         add     %i3,%i4,%o4
2057 
2058         fdtos   %f46,%f22
2059         st      %f22,[%o4]
2060         add     %o4,%i4,%i3
2061 
2062         fdtos   %f48,%f24
2063         st      %f24,[%i3]
2064         add     %i3,%i4,%o4
2065 
2066         fdtos   %f50,%f26
2067         st      %f26,[%o4]
2068         add     %o4,%i4,%i3
2069 
2070         fdtos   %f52,%f28
2071         st      %f28,[%i3]
2072         add     %i3,%i4,%i3
2073 
2074         sethi   %hi(0x7f800000),%o5
2075         cmp     %l7,%o5
2076         bl,pt   %icc,.spec7_out_of_range
2077         sub     %i1,%i2,%o4
2078 
2079         ble,pn  %icc,.spec7_inf
2080         ld      [%o4],%l0
2081 
2082 ! NaN -> NaN
2083 
2084         fmuls   %f14,%f14,%f14
2085         ba      .spec7_exit
2086         st      %f14,[%i3]
2087 
2088 .spec7_inf:
2089         srl     %l0,29,%l0
2090         andcc   %l0,4,%l0
2091         be,a,pn %icc,.spec7_exit
2092         st      %f14,[%i3]
2093 
2094         ba      .spec7_exit
2095         st      %f3,[%i3]
2096 
2097 .spec7_out_of_range:
2098         ld      [%o4],%l0
2099         srl     %l0,29,%l0
2100         and     %l0,4,%l0
2101         add     %l0,2048,%l0
2102         ld      [G1_CONST_TBL+%l0],%f2
2103         fmuls   %f2,%f2,%f2
2104         st      %f2,[%i3]
2105 
2106 .spec7_exit:
2107         subcc   %i0,8,%i0
2108         bpos,pt %icc,.main_loop_preload
2109         add     %i3,%i4,%i3
2110 
2111         ba      .tail
2112         nop
2113         SET_SIZE(__vexpf)
2114