1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 */
24 /*
25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 .file "__vexpf.S"
30
31 #include "libm.h"
32
33 RO_DATA
34 .align 64
35 !! 2^(i/256) - ((i & 0xf0) << 44), i = [0, 255]
36 .CONST_TBL:
37 .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf
38 .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281
39 .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc
40 .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1
41 .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89
42 .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836
43 .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0
44 .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919
45 .word 0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85
46 .word 0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec
47 .word 0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5
48 .word 0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e
49 .word 0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6
50 .word 0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab
51 .word 0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e
52 .word 0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2
53 .word 0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0
54 .word 0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f
55 .word 0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c
56 .word 0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b
57 .word 0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027
58 .word 0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d
59 .word 0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819
60 .word 0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1
61 .word 0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a
62 .word 0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75
63 .word 0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29
64 .word 0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70
65 .word 0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13
66 .word 0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f
67 .word 0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589
68 .word 0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b
69 .word 0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd
70 .word 0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32
71 .word 0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d
72 .word 0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b
73 .word 0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a
74 .word 0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef
75 .word 0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4
76 .word 0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173
77 .word 0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175
78 .word 0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024
79 .word 0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a
80 .word 0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4
81 .word 0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232
82 .word 0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237
83 .word 0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2
84 .word 0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7
85 .word 0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114
86 .word 0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff
87 .word 0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee
88 .word 0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef
89 .word 0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27
90 .word 0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2
91 .word 0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf
92 .word 0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc
93 .word 0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03
94 .word 0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93
95 .word 0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71
96 .word 0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4
97 .word 0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd
98 .word 0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7
99 .word 0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6
100 .word 0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538
101 .word 0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e
102 .word 0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645
103 .word 0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5
104 .word 0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87
105 .word 0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a
106 .word 0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd
107 .word 0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09
108 .word 0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6
109 .word 0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb
110 .word 0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0
111 .word 0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491
112 .word 0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9
113 .word 0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7
114 .word 0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21
115 .word 0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436
116 .word 0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f
117 .word 0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778
118 .word 0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9
119 .word 0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a
120 .word 0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2
121 .word 0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5
122 .word 0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3
123 .word 0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2
124 .word 0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d
125 .word 0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5
126 .word 0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e
127 .word 0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb
128 .word 0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8
129 .word 0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052
130 .word 0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59
131 .word 0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba
132 .word 0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774
133 .word 0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff
134 .word 0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952
135 .word 0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1
136 .word 0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a
137 .word 0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4
138 .word 0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f
139 .word 0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207
140 .word 0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d
141 .word 0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c
142 .word 0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22
143 .word 0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933
144 .word 0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db
145 .word 0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675
146 .word 0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74
147 .word 0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968
148 .word 0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6
149 .word 0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3
150 .word 0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075
151 .word 0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315
152 .word 0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658
153 .word 0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17
154 .word 0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12
155 .word 0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76
156 .word 0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740
157 .word 0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e
158 .word 0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510
159 .word 0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a
160 .word 0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274
161 .word 0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8
162 .word 0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89
163 .word 0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514
164 .word 0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9
165
166 .word 0x7149f2ca, 0x0da24260 ! 1.0e30f, 1.0e-30f
167 .word 0x3ecebfbe, 0x9d182250 ! KA2 = 3.66556671660783833261e-06
168 .word 0x3f662e43, 0xe2528362 ! KA1 = 2.70760782821392980564e-03
169 .word 0x40771547, 0x652b82fe ! K256ONLN2 = 369.3299304675746271
170 .word 0x42aeac4f, 0x42b17218 ! THRESHOLD = 87.3365402f
171 ! THRESHOLDL = 88.7228394f
172 ! local storage indices
173
174 #define tmp0 STACK_BIAS-32
175 #define tmp1 STACK_BIAS-28
176 #define tmp2 STACK_BIAS-24
177 #define tmp3 STACK_BIAS-20
178 #define tmp4 STACK_BIAS-16
179 #define tmp5 STACK_BIAS-12
180 #define tmp6 STACK_BIAS-8
181 #define tmp7 STACK_BIAS-4
182
183 ! sizeof temp storage - must be a multiple of 16 for V9
184 #define tmps 0x20
185
186 #define I5_THRESHOLD %i5
187 #define G1_CONST_TBL %g5
188 #define G5_CONST %g1
189
190 #define F62_K256ONLN2 %f62
191 #define F60_KA2 %f60
192 #define F58_KA1 %f58
193
194 #define THRESHOLDL %f0
195
196 ! register use
197 ! i0 n
198 ! i1 x
199 ! i2 stridex
200 ! i3 y
201 ! i4 stridey
202
203 ! i5 0x42aeac4f (87.3365402f)
204
205 ! g1 CONST_TBL
206 ! g5 0x7fffffff
207
208 ! f62 K256ONLN2 = 369.3299304675746271
209 ! f60 KA2 = 3.66556671660783833261e-06
210 ! f58 KA1 = 2.70760782821392980564e-03
211
212
213 ! !!!!! Algorithm !!!!!
214 !
215 ! double y, dtmp, drez;
216 ! int k, sign, Xi;
217 ! float X, Y;
218 ! int THRESHOLD = 0x42aeac4f; /* 87.3365402f */
219 ! float THRESHOLDL = 88.7228394f;
220 ! double KA2 = 3.66556671660783833261e-06;
221 ! double KA1 = 2.70760782821392980564e-03;
222 ! double K256ONLN2 = 369.3299304675746271;
223 ! char *CONST_TBL;
224 !
225 ! X = px[0];
226 ! Xi = ((int*)px)[0];
227 ! ax = Xi & 0x7fffffff;
228 !
229 ! if (ax > THRESHOLD) {
230 ! sign = ((unsigned)Xi >> 29) & 4;
231 ! if (ax >= 0x7f800000) { /* Inf or NaN */
232 ! if (ax > 0x7f800000) { /* NaN */
233 ! Y = X * X; /* NaN -> NaN */
234 ! return Y;
235 ! }
236 ! Y = (sign) ? zero : X; /* +Inf -> +Inf , -Inf -> zero */
237 ! return Y;
238 ! }
239 !
240 ! if ( X < 0.0f || X >= THRESHOLDL ) {
241 ! Y = ((float*)(CONST_TBL + 2048 + sign))[0];
242 ! /* Xi >= THRESHOLDL : Y = 1.0e+30f */
243 ! /* Xi < -THRESHOLD : Y = 1.0e-30f */
244 ! Y = Y * Y;
245 ! /* Xi >= THRESHOLDL : +Inf + overflow */
246 ! /* Xi < -THRESHOLD : +0 + underflow */
247 ! return Y;
248 ! }
249 ! }
250 ! vis_write_gsr(12 << 3);
251 ! y = (double) X;
252 ! y = K256ONLN2 * y;
253 ! k = (int) y;
254 ! dtmp = (double) k;
255 ! y -= dtmp;
256 ! dtmp = y * KA2;
257 ! dtmp += KA1;
258 ! y *= dtmp;
259 ! y = (y * KA2 + KA1) * y;
260 ! ((int*)&drez)[0] = k;
261 ! ((int*)&drez)[1] = 0;
262 ! ((float*)&drez)[0] = vis_fpackfix(drez);
263 ! k &= 255;
264 ! k <<= 3;
265 ! dtmp = ((double*)(CONST_TBL + k))[0];
266 ! drez = vis_fpadd32(drez,dtmp);
267 ! y *= drez;
268 ! y += drez;
269 ! Y = (float) y;
270 !
271 !
272 ! fstod %f16,%f40 ! y = (double) X
273 ! fmuld F62_K256ONLN2,%f40,%f40 ! y *= K256ONLN2
274 ! fdtoi %f40,%f16 ! k = (int) y
275 ! st %f16,[%fp+tmp0] ! store k
276 ! fitod %f16,%f34 ! dtmp = (double) k
277 ! fpackfix %f16,%f16 ! ((float*)&drez)[0] = vis_fpackfix(drez)
278 ! fsubd %f40,%f34,%f40 ! y -= dtmp
279 ! fmuld F60_KA2,%f40,%f34 ! dtmp = y * KA2
280 ! faddd F58_KA1,%f34,%f34 ! dtmp += KA1
281 ! ld [%fp+tmp0],%o0 ! load k
282 ! fmuld %f34,%f40,%f40 ! y *= dtmp
283 ! and %o0,255,%o0 ! k &= 255
284 ! sll %o0,3,%o0 ! k <<= 3
285 ! ldd [G1_CONST_TBL+%o0],%f34 ! dtmp = ((double*)(CONST_TBL + k))[0]
286 ! fpadd32 %f16,%f34,%f34 ! drez = vis_fpadd32(drez,dtmp)
287 ! fmuld %f34,%f40,%f40 ! y *= drez
288 ! faddd %f34,%f40,%f40 ! y += drez
289 ! fdtos %f40,%f26 ! (float) y
290 !--------------------------------------------------------------------
291
292 ENTRY(__vexpf)
293 save %sp,-SA(MINFRAME)-tmps,%sp
294 PIC_SETUP(l7)
295 PIC_SET(l7,.CONST_TBL,g5)
296
297 wr %g0,0x82,%asi ! set %asi for non-faulting loads
298 wr %g0,0x60,%gsr
299
300 sll %i2,2,%i2
301 sll %i4,2,%i4
302
303 ldd [G1_CONST_TBL+2056],F60_KA2
304 sethi %hi(0x7ffffc00),G5_CONST
305 ldd [G1_CONST_TBL+2064],F58_KA1
306 add G5_CONST,1023,G5_CONST
307 ldd [G1_CONST_TBL+2072],F62_K256ONLN2
308 ld [G1_CONST_TBL+2080],I5_THRESHOLD
309 ld [G1_CONST_TBL+2084],THRESHOLDL
310
311 subcc %i0,8,%i0
312 bneg,pn %icc,.tail
313 fzeros %f3
314
315 .main_loop_preload:
316
317 ! preload 8 elements and get absolute values
318 ld [%i1],%l0 ! (0) Xi = ((int*)px)[0]
319 fzeros %f5
320 ld [%i1],%f16 ! (0) X = px[0]
321 fzeros %f7
322 add %i1,%i2,%o5 ! px += stridex
323 ld [%o5],%l1 ! (1) Xi = ((int*)px)[0]
324 and %l0,G5_CONST,%l0 ! (0) ax = Xi & 0x7fffffff
325 fzeros %f9
326 ld [%o5],%f2 ! (1) X = px[0]
327 fzeros %f11
328 add %o5,%i2,%i1 ! px += stridex
329 ld [%i1],%l2 ! (2) Xi = ((int*)px)[0]
330 and %l1,G5_CONST,%l1 ! (1) ax = Xi & 0x7fffffff
331 fzeros %f13
332 ld [%i1],%f4 ! (2) X = px[0]
333 fzeros %f15
334 add %i1,%i2,%o5 ! px += stridex
335 ld [%o5],%l3 ! (3) Xi = ((int*)px)[0]
336 and %l2,G5_CONST,%l2 ! (2) ax = Xi & 0x7fffffff
337 fzeros %f17
338 ld [%o5],%f6 ! (3) X = px[0]
339 add %o5,%i2,%o0 ! px += stridex
340 ld [%o0],%l4 ! (4) Xi = ((int*)px)[0]
341 and %l3,G5_CONST,%l3 ! (3) ax = Xi & 0x7fffffff
342 add %o0,%i2,%o1 ! px += stridex
343 ld [%o1],%l5 ! (5) Xi = ((int*)px)[0]
344 add %o1,%i2,%o2 ! px += stridex
345 ld [%o2],%l6 ! (6) Xi = ((int*)px)[0]
346 and %l4,G5_CONST,%l4 ! (4) ax = Xi & 0x7fffffff
347 add %o2,%i2,%o3 ! px += stridex
348 ld [%o3],%l7 ! (7) Xi = ((int*)px)[0]
349 add %o3,%i2,%i1 ! px += stridex
350 and %l5,G5_CONST,%l5 ! (5) ax = Xi & 0x7fffffff
351 and %l6,G5_CONST,%l6 ! (6) ax = Xi & 0x7fffffff
352 ba .main_loop
353 and %l7,G5_CONST,%l7 ! (7) ax = Xi & 0x7fffffff
354
355 .align 16
356 .main_loop:
357 cmp %l0,I5_THRESHOLD
358 bg,pn %icc,.spec0 ! (0) if (ax > THRESHOLD)
359 lda [%o0]%asi,%f8 ! (4) X = px[0]
360 fstod %f16,%f40 ! (0) y = (double) X
361 .spec0_cont:
362 cmp %l1,I5_THRESHOLD
363 bg,pn %icc,.spec1 ! (1) if (ax > THRESHOLD)
364 lda [%o1]%asi,%f10 ! (5) X = px[0]
365 fstod %f2,%f42 ! (1) y = (double) X
366 .spec1_cont:
367 cmp %l2,I5_THRESHOLD
368 bg,pn %icc,.spec2 ! (2) if (ax > THRESHOLD)
369 lda [%o2]%asi,%f12 ! (6) X = px[0]
370 fstod %f4,%f44 ! (2) y = (double) X
371 .spec2_cont:
372 cmp %l3,I5_THRESHOLD
373 bg,pn %icc,.spec3 ! (3) if (ax > THRESHOLD)
374 lda [%o3]%asi,%f14 ! (7) X = px[0]
375 fstod %f6,%f46 ! (3) y = (double) X
376 .spec3_cont:
377 cmp %l4,I5_THRESHOLD
378 bg,pn %icc,.spec4 ! (4) if (ax > THRESHOLD)
379 fmuld F62_K256ONLN2,%f40,%f40 ! (0) y *= K256ONLN2
380 fstod %f8,%f48 ! (4) y = (double) X
381 .spec4_cont:
382 cmp %l5,I5_THRESHOLD
383 bg,pn %icc,.spec5 ! (5) if (ax > THRESHOLD)
384 fmuld F62_K256ONLN2,%f42,%f42 ! (1) y *= K256ONLN2
385 fstod %f10,%f50 ! (5) y = (double) X
386 .spec5_cont:
387 cmp %l6,I5_THRESHOLD
388 bg,pn %icc,.spec6 ! (6) if (ax > THRESHOLD)
389 fmuld F62_K256ONLN2,%f44,%f44 ! (2) y *= K256ONLN2
390 fstod %f12,%f52 ! (6) y = (double) X
391 .spec6_cont:
392 cmp %l7,I5_THRESHOLD
393 bg,pn %icc,.spec7 ! (7) if (ax > THRESHOLD)
394 fmuld F62_K256ONLN2,%f46,%f46 ! (3) y *= K256ONLN2
395 fstod %f14,%f54 ! (7) y = (double) X
396 .spec7_cont:
397 fdtoi %f40,%f16 ! (0) k = (int) y
398 st %f16,[%fp+tmp0]
399 fmuld F62_K256ONLN2,%f48,%f48 ! (4) y *= K256ONLN2
400
401 fdtoi %f42,%f2 ! (1) k = (int) y
402 st %f2,[%fp+tmp1]
403 fmuld F62_K256ONLN2,%f50,%f50 ! (5) y *= K256ONLN2
404
405 fdtoi %f44,%f4 ! (2) k = (int) y
406 st %f4,[%fp+tmp2]
407 fmuld F62_K256ONLN2,%f52,%f52 ! (6) y *= K256ONLN2
408
409 fdtoi %f46,%f6 ! (3) k = (int) y
410 st %f6,[%fp+tmp3]
411 fmuld F62_K256ONLN2,%f54,%f54 ! (7) y *= K256ONLN2
412
413 fdtoi %f48,%f8 ! (4) k = (int) y
414 st %f8,[%fp+tmp4]
415
416 fdtoi %f50,%f10 ! (5) k = (int) y
417 st %f10,[%fp+tmp5]
418
419 fitod %f16,%f34 ! (0) dtmp = (double) k
420 fpackfix %f16,%f16 ! (0) ((float*)&drez)[0] = vis_fpackfix(drez)
421 nop
422 nop
423
424 fdtoi %f52,%f12 ! (6) k = (int) y
425 st %f12,[%fp+tmp6]
426
427 fdtoi %f54,%f14 ! (7) k = (int) y
428 st %f14,[%fp+tmp7]
429
430 lda [%i1]%asi,%l0 ! (8) Xi = ((int*)px)[0]
431 add %i1,%i2,%o5 ! px += stridex
432 fitod %f2,%f18 ! (1) dtmp = (double) k
433 fpackfix %f2,%f2 ! (1) ((float*)&drez)[0] = vis_fpackfix(drez)
434
435 lda [%o5]%asi,%l1 ! (9) Xi = ((int*)px)[0]
436 add %o5,%i2,%i1 ! px += stridex
437 fitod %f4,%f20 ! (2) dtmp = (double) k
438 fpackfix %f4,%f4 ! (2) ((float*)&drez)[0] = vis_fpackfix(drez)
439
440 lda [%i1]%asi,%l2 ! (10) Xi = ((int*)px)[0]
441 add %i1,%i2,%o5 ! px += stridex
442 fitod %f6,%f22 ! (3) dtmp = (double) k
443 fpackfix %f6,%f6 ! (3) ((float*)&drez)[0] = vis_fpackfix(drez)
444
445 lda [%o5]%asi,%l3 ! (11) Xi = ((int*)px)[0]
446 add %o5,%i2,%i1 ! px += stridex
447 fitod %f8,%f24 ! (4) dtmp = (double) k
448 fpackfix %f8,%f8 ! (4) ((float*)&drez)[0] = vis_fpackfix(drez)
449
450 fitod %f10,%f26 ! (5) dtmp = (double) k
451 fpackfix %f10,%f10 ! (5) ((float*)&drez)[0] = vis_fpackfix(drez)
452
453 fitod %f12,%f28 ! (6) dtmp = (double) k
454 fpackfix %f12,%f12 ! (6) ((float*)&drez)[0] = vis_fpackfix(drez)
455
456 fitod %f14,%f30 ! (7) dtmp = (double) k
457 fpackfix %f14,%f14 ! (7) ((float*)&drez)[0] = vis_fpackfix(drez)
458
459 ld [%fp+tmp0],%o0 ! (0) load k
460 and %l0,G5_CONST,%l0 ! (8) ax = Xi & 0x7fffffff
461 fsubd %f40,%f34,%f40 ! (0) y -= dtmp
462
463 ld [%fp+tmp1],%o1 ! (1) load k
464 and %l1,G5_CONST,%l1 ! (9) ax = Xi & 0x7fffffff
465 fsubd %f42,%f18,%f42 ! (1) y -= dtmp
466
467 ld [%fp+tmp2],%o2 ! (2) load k
468 and %l2,G5_CONST,%l2 ! (10) ax = Xi & 0x7fffffff
469 and %o0,255,%o0 ! (0) k &= 255
470 fsubd %f44,%f20,%f44 ! (2) y -= dtmp
471
472 ld [%fp+tmp3],%o3 ! (3) load k
473 and %o1,255,%o1 ! (1) k &= 255
474 fsubd %f46,%f22,%f46 ! (3) y -= dtmp
475
476 sll %o0,3,%o0 ! (0) k <<= 3
477 sll %o1,3,%o1 ! (1) k <<= 3
478 fmuld F60_KA2,%f40,%f34 ! (0) dtmp = y * KA2
479 fsubd %f48,%f24,%f48 ! (4) y -= dtmp
480
481 and %l3,G5_CONST,%l3 ! (11) ax = Xi & 0x7fffffff
482 and %o2,255,%o2 ! (2) k &= 255
483 fmuld F60_KA2,%f42,%f18 ! (1) dtmp = y * KA2
484 fsubd %f50,%f26,%f50 ! (5) y -= dtmp
485
486 sll %o2,3,%o2 ! (2) k <<= 3
487 fmuld F60_KA2,%f44,%f20 ! (2) dtmp = y * KA2
488 fsubd %f52,%f28,%f52 ! (6) y -= dtmp
489
490 ld [%fp+tmp4],%o4 ! (4) load k
491 and %o3,255,%o3 ! (3) k &= 255
492 fmuld F60_KA2,%f46,%f22 ! (3) dtmp = y * KA2
493 fsubd %f54,%f30,%f54 ! (7) y -= dtmp
494
495 ld [%fp+tmp5],%o5 ! (5) load k
496 sll %o3,3,%o3 ! (3) k <<= 3
497 fmuld F60_KA2,%f48,%f24 ! (4) dtmp = y * KA2
498 faddd F58_KA1,%f34,%f34 ! (0) dtmp += KA1
499
500 ld [%fp+tmp6],%o7 ! (6) load k
501 and %o4,255,%o4 ! (4) k &= 255
502 fmuld F60_KA2,%f50,%f26 ! (5) dtmp = y * KA2
503 faddd F58_KA1,%f18,%f18 ! (1) dtmp += KA1
504
505 ld [%fp+tmp7],%l4 ! (7) load k
506 and %o5,255,%o5 ! (5) k &= 255
507 fmuld F60_KA2,%f52,%f28 ! (6) dtmp = y * KA2
508 faddd F58_KA1,%f20,%f20 ! (2) dtmp += KA1
509
510 sll %o5,3,%o5 ! (5) k <<= 3
511 fmuld F60_KA2,%f54,%f30 ! (7) dtmp = y * KA2
512 faddd F58_KA1,%f22,%f22 ! (3) dtmp += KA1
513
514 fmuld %f34,%f40,%f40 ! (0) y *= dtmp
515 ldd [G1_CONST_TBL+%o0],%f34 ! (0) dtmp = ((double*)(CONST_TBL + k))[0]
516 and %l4,255,%l4 ! (7) k &= 255
517 faddd F58_KA1,%f24,%f24 ! (4) dtmp += KA1
518
519 fmuld %f18,%f42,%f42 ! (1) y *= dtmp
520 ldd [G1_CONST_TBL+%o1],%f18 ! (1) dtmp = ((double*)(CONST_TBL + k))[0]
521 sll %l4,3,%l4 ! (7) k <<= 3
522 faddd F58_KA1,%f26,%f26 ! (5) dtmp += KA1
523
524 fmuld %f20,%f44,%f44 ! (2) y *= dtmp
525 ldd [G1_CONST_TBL+%o2],%f20 ! (2) dtmp = ((double*)(CONST_TBL + k))[0]
526 faddd F58_KA1,%f28,%f28 ! (6) dtmp += KA1
527
528 fmuld %f22,%f46,%f46 ! (3) y *= dtmp
529 ldd [G1_CONST_TBL+%o3],%f22 ! (3) dtmp = ((double*)(CONST_TBL + k))[0]
530 sll %o4,3,%o4 ! (4) k <<= 3
531 faddd F58_KA1,%f30,%f30 ! (7) dtmp += KA1
532
533 fmuld %f24,%f48,%f48 ! (4) y *= dtmp
534 ldd [G1_CONST_TBL+%o4],%f24 ! (4) dtmp = ((double*)(CONST_TBL + k))[0]
535 and %o7,255,%o7 ! (6) k &= 255
536 fpadd32 %f16,%f34,%f34 ! (0) drez = vis_fpadd32(drez,dtmp)
537
538 fmuld %f26,%f50,%f50 ! (5) y *= dtmp
539 ldd [G1_CONST_TBL+%o5],%f26 ! (5) dtmp = ((double*)(CONST_TBL + k))[0]
540 sll %o7,3,%o7 ! (6) k <<= 3
541 fpadd32 %f2,%f18,%f18 ! (1) drez = vis_fpadd32(drez,dtmp)
542
543 fmuld %f28,%f52,%f52 ! (6) y *= dtmp
544 ldd [G1_CONST_TBL+%o7],%f28 ! (6) dtmp = ((double*)(CONST_TBL + k))[0]
545 sll %i2,2,%o0
546 fpadd32 %f4,%f20,%f20 ! (2) drez = vis_fpadd32(drez,dtmp)
547
548 fmuld %f30,%f54,%f54 ! (7) y *= dtmp
549 ldd [G1_CONST_TBL+%l4],%f30 ! (7) dtmp = ((double*)(CONST_TBL + k))[0]
550 sub %i1,%o0,%o0
551 fpadd32 %f6,%f22,%f22 ! (3) drez = vis_fpadd32(drez,dtmp)
552
553 lda [%i1]%asi,%l4 ! (12) Xi = ((int*)px)[0]
554 add %i1,%i2,%o1 ! px += stridex
555 fpadd32 %f8,%f24,%f24 ! (4) drez = vis_fpadd32(drez,dtmp)
556 fmuld %f34,%f40,%f40 ! (0) y *= drez
557
558 lda [%o1]%asi,%l5 ! (13) Xi = ((int*)px)[0]
559 add %o1,%i2,%o2 ! px += stridex
560 fpadd32 %f10,%f26,%f26 ! (5) drez = vis_fpadd32(drez,dtmp)
561 fmuld %f18,%f42,%f42 ! (1) y *= drez
562
563 lda [%o2]%asi,%l6 ! (14) Xi = ((int*)px)[0]
564 add %o2,%i2,%o3 ! px += stridex
565 fpadd32 %f12,%f28,%f28 ! (6) drez = vis_fpadd32(drez,dtmp)
566 fmuld %f20,%f44,%f44 ! (2) y *= drez
567
568 lda [%o3]%asi,%l7 ! (15) Xi = ((int*)px)[0]
569 add %o3,%i2,%i1 ! px += stridex
570 fpadd32 %f14,%f30,%f30 ! (7) drez = vis_fpadd32(drez,dtmp)
571 fmuld %f22,%f46,%f46 ! (3) y *= drez
572
573 lda [%o0]%asi,%f16 ! (8) X = px[0]
574 add %o0,%i2,%o5
575 fmuld %f24,%f48,%f48 ! (4) y *= drez
576 faddd %f34,%f40,%f40 ! (0) y += drez
577
578 lda [%o5]%asi,%f2 ! (9) X = px[0]
579 add %o5,%i2,%o0
580 fmuld %f26,%f50,%f50 ! (5) y *= drez
581 faddd %f18,%f42,%f42 ! (1) y += drez
582
583 lda [%o0]%asi,%f4 ! (10) X = px[0]
584 add %o0,%i2,%o5
585 fmuld %f28,%f52,%f52 ! (6) y *= drez
586 faddd %f20,%f44,%f44 ! (2) y += drez
587
588 lda [%o5]%asi,%f6 ! (11) X = px[0]
589 add %o5,%i2,%o0
590 fmuld %f30,%f54,%f54 ! (7) y *= drez
591 faddd %f22,%f46,%f46 ! (3) y += drez
592
593 and %l4,G5_CONST,%l4 ! (12) ax = Xi & 0x7fffffff
594 faddd %f24,%f48,%f48 ! (4) y += drez
595
596 and %l5,G5_CONST,%l5 ! (13) ax = Xi & 0x7fffffff
597 faddd %f26,%f50,%f50 ! (5) y += drez
598
599 and %l6,G5_CONST,%l6 ! (14) ax = Xi & 0x7fffffff
600 faddd %f28,%f52,%f52 ! (6) y += drez
601
602 and %l7,G5_CONST,%l7 ! (15) ax = Xi & 0x7fffffff
603 faddd %f30,%f54,%f54 ! (7) y += drez
604
605 fdtos %f40,%f26 ! (0) (float) y
606 st %f26,[%i3]
607 add %i3,%i4,%o4 ! py += stridey
608
609 fdtos %f42,%f18 ! (1) (float) y
610 st %f18,[%o4]
611 add %o4,%i4,%i3 ! py += stridey
612
613 fdtos %f44,%f20 ! (2) (float) y
614 st %f20,[%i3]
615 add %i3,%i4,%o4 ! py += stridey
616
617 fdtos %f46,%f22 ! (3) (float) y
618 st %f22,[%o4]
619 add %o4,%i4,%i3 ! py += stridey
620
621 fdtos %f48,%f24 ! (4) (float) y
622 st %f24,[%i3]
623 subcc %i0,8,%i0
624 add %i3,%i4,%o4 ! py += stridey
625
626 fdtos %f50,%f26 ! (5) (float) y
627 st %f26,[%o4]
628 add %o4,%i4,%o5 ! py += stridey
629 add %i4,%i4,%o7
630
631 fdtos %f52,%f28 ! (6) (float) y
632 st %f28,[%o5]
633 add %o5,%i4,%o4 ! py += stridey
634 add %o5,%o7,%i3 ! py += stridey
635
636 fdtos %f54,%f30 ! (7) (float) y
637 st %f30,[%o4]
638 bpos,pt %icc,.main_loop
639 nop
640 .after_main_loop:
641 sll %i2,3,%o2
642 sub %i1,%o2,%i1
643
644 .tail:
645 add %i0,8,%i0
646 subcc %i0,1,%i0
647 bneg,pn %icc,.exit
648
649 ld [%i1],%l0
650 ld [%i1],%f2
651 add %i1,%i2,%i1
652
653 .tail_loop:
654 and %l0,G5_CONST,%l1
655 cmp %l1,I5_THRESHOLD
656 bg,pn %icc,.tail_spec
657 nop
658 .tail_spec_cont:
659 fstod %f2,%f40
660 fmuld F62_K256ONLN2,%f40,%f40
661 fdtoi %f40,%f2
662 st %f2,[%fp+tmp0]
663 fitod %f2,%f16
664 fpackfix %f2,%f2
665 fsubd %f40,%f16,%f40
666 fmuld F60_KA2,%f40,%f16
667 faddd F58_KA1,%f16,%f16
668 ld [%fp+tmp0],%o0
669 fmuld %f16,%f40,%f40
670 and %o0,255,%o0
671 sll %o0,3,%o0
672 ldd [G1_CONST_TBL+%o0],%f16
673 fpadd32 %f2,%f16,%f16
674 lda [%i1]%asi,%l0
675 fmuld %f16,%f40,%f40
676 lda [%i1]%asi,%f2
677 faddd %f16,%f40,%f40
678 add %i1,%i2,%i1
679 fdtos %f40,%f16
680 st %f16,[%i3]
681 add %i3,%i4,%i3
682 subcc %i0,1,%i0
683 bpos,pt %icc,.tail_loop
684 nop
685
686 .exit:
687 ret
688 restore
689
690 .tail_spec:
691 sethi %hi(0x7f800000),%o4
692 cmp %l1,%o4
693 bl,pt %icc,.tail_spec_out_of_range
694 nop
695
696 srl %l0,29,%l0
697 ble,pn %icc,.tail_spec_inf
698 andcc %l0,4,%g0
699
700 ! NaN -> NaN
701
702 fmuls %f2,%f2,%f2
703 ba .tail_spec_exit
704 st %f2,[%i3]
705
706 .tail_spec_inf:
707 be,a,pn %icc,.tail_spec_exit
708 st %f2,[%i3]
709
710 ba .tail_spec_exit
711 st %f3,[%i3]
712
713 .tail_spec_out_of_range:
714 fcmpes %fcc0,%f2,%f3
715 fcmpes %fcc1,%f2,THRESHOLDL
716 fbl,pn %fcc0,1f ! if ( X < 0.0f )
717 nop
718 fbl,pt %fcc1,.tail_spec_cont ! if ( X < THRESHOLDL )
719 nop
720 1:
721 srl %l0,29,%l0
722 and %l0,4,%l0
723 add %l0,2048,%l0
724 ld [G1_CONST_TBL+%l0],%f2
725 fmuls %f2,%f2,%f2
726 st %f2,[%i3]
727
728 .tail_spec_exit:
729 lda [%i1]%asi,%l0
730 lda [%i1]%asi,%f2
731 add %i1,%i2,%i1
732
733 subcc %i0,1,%i0
734 bpos,pt %icc,.tail_loop
735 add %i3,%i4,%i3
736 ba .exit
737 nop
738
739 .align 16
740 .spec0:
741 sethi %hi(0x7f800000),%o5
742 cmp %l0,%o5
743 bl,pt %icc,.spec0_out_of_range
744 sll %i2,3,%o4
745
746 ble,pn %icc,.spec0_inf
747 sub %i1,%o4,%o4
748
749 ! NaN -> NaN
750
751 fmuls %f16,%f16,%f16
752 ba .spec0_exit
753 st %f16,[%i3]
754
755 .spec0_inf:
756 ld [%o4],%l0
757 srl %l0,29,%l0
758 andcc %l0,4,%l0
759 be,a,pn %icc,.spec0_exit
760 st %f16,[%i3]
761
762 ba .spec0_exit
763 st %f3,[%i3]
764
765 .spec0_out_of_range:
766 fcmpes %fcc0,%f16,%f3
767 fcmpes %fcc1,%f16,THRESHOLDL
768 fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
769 fstod %f16,%f40 ! (0) y = (double) X
770 fbl,a,pt %fcc1,.spec0_cont ! if ( X < THRESHOLDL )
771 fstod %f16,%f40 ! (0) y = (double) X
772 1:
773 sub %i1,%o4,%o4
774 ld [%o4],%l0
775 srl %l0,29,%l0
776 and %l0,4,%l0
777 add %l0,2048,%l0
778 ld [G1_CONST_TBL+%l0],%f16
779 fmuls %f16,%f16,%f16
780 st %f16,[%i3]
781
782 .spec0_exit:
783 fmovs %f2,%f16
784 mov %l1,%l0
785 fmovs %f4,%f2
786 mov %l2,%l1
787 fmovs %f6,%f4
788 mov %l3,%l2
789 fmovs %f8,%f6
790 mov %l4,%l3
791 mov %l5,%l4
792 mov %l6,%l5
793 mov %l7,%l6
794 lda [%i1]%asi,%l7
795 add %i1,%i2,%i1
796 mov %o1,%o0
797 mov %o2,%o1
798 mov %o3,%o2
799 and %l7,G5_CONST,%l7
800 add %o2,%i2,%o3
801
802 subcc %i0,1,%i0
803 bpos,pt %icc,.main_loop
804 add %i3,%i4,%i3
805 ba .after_main_loop
806 nop
807
808 .align 16
809 .spec1:
810 sethi %hi(0x7f800000),%o5
811 cmp %l1,%o5
812 bge,pn %icc,1f
813 nop
814 fcmpes %fcc0,%f2,%f3
815 fcmpes %fcc1,%f2,THRESHOLDL
816 fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
817 fstod %f2,%f42 ! (1) y = (double) X
818 fbl,a,pt %fcc1,.spec1_cont ! if ( X < THRESHOLDL )
819 fstod %f2,%f42 ! (1) y = (double) X
820 1:
821 fmuld F62_K256ONLN2,%f40,%f40
822 fdtoi %f40,%f16
823 st %f16,[%fp+tmp0]
824 fitod %f16,%f34
825 fpackfix %f16,%f16
826 fsubd %f40,%f34,%f40
827 fmuld F60_KA2,%f40,%f34
828 faddd F58_KA1,%f34,%f34
829 ld [%fp+tmp0],%o0
830 fmuld %f34,%f40,%f40
831 and %o0,255,%o0
832 sll %o0,3,%o0
833 ldd [G1_CONST_TBL+%o0],%f34
834 fpadd32 %f16,%f34,%f34
835 fmuld %f34,%f40,%f40
836 faddd %f34,%f40,%f40
837 fdtos %f40,%f26
838 st %f26,[%i3]
839 add %i3,%i4,%i3
840
841 cmp %l1,%o5
842 bl,pt %icc,.spec1_out_of_range
843 sll %i2,3,%o4
844
845 ble,pn %icc,.spec1_inf
846 sub %i1,%o4,%o4
847
848 ! NaN -> NaN
849
850 fmuls %f2,%f2,%f2
851 ba .spec1_exit
852 st %f2,[%i3]
853
854 .spec1_inf:
855 add %o4,%i2,%o4
856 ld [%o4],%l0
857 srl %l0,29,%l0
858 andcc %l0,4,%l0
859 be,a,pn %icc,.spec1_exit
860 st %f2,[%i3]
861
862 ba .spec1_exit
863 st %f3,[%i3]
864
865 .spec1_out_of_range:
866 sub %i1,%o4,%o4
867 add %o4,%i2,%o4
868 ld [%o4],%l0
869 srl %l0,29,%l0
870 and %l0,4,%l0
871 add %l0,2048,%l0
872 ld [G1_CONST_TBL+%l0],%f2
873 fmuls %f2,%f2,%f2
874 st %f2,[%i3]
875
876 .spec1_exit:
877 fmovs %f4,%f16
878 mov %l2,%l0
879 fmovs %f6,%f2
880 mov %l3,%l1
881 fmovs %f8,%f4
882 mov %l4,%l2
883 fmovs %f10,%f6
884 mov %l5,%l3
885 mov %l6,%l4
886 mov %l7,%l5
887 lda [%i1]%asi,%l6
888 add %i1,%i2,%i1
889 lda [%i1]%asi,%l7
890 add %i1,%i2,%i1
891 and %l6,G5_CONST,%l6
892 and %l7,G5_CONST,%l7
893 mov %o2,%o0
894 mov %o3,%o1
895 add %o1,%i2,%o2
896 add %o2,%i2,%o3
897
898 subcc %i0,2,%i0
899 bpos,pt %icc,.main_loop
900 add %i3,%i4,%i3
901 ba .after_main_loop
902 nop
903
904 .align 16
905 .spec2:
906 sethi %hi(0x7f800000),%o5
907 cmp %l2,%o5
908 bge,pn %icc,1f
909 nop
910 fcmpes %fcc0,%f4,%f3
911 fcmpes %fcc1,%f4,THRESHOLDL
912 fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
913 fstod %f4,%f44 ! (2) y = (double) X
914 fbl,a,pt %fcc1,.spec2_cont ! if ( X < THRESHOLDL )
915 fstod %f4,%f44 ! (2) y = (double) X
916 1:
917 fmuld F62_K256ONLN2,%f40,%f40
918
919 fmuld F62_K256ONLN2,%f42,%f42
920
921 fdtoi %f40,%f16
922 st %f16,[%fp+tmp0]
923
924 fdtoi %f42,%f2
925 st %f2,[%fp+tmp1]
926
927 fitod %f16,%f34
928 fpackfix %f16,%f16
929
930 fitod %f2,%f18
931 fpackfix %f2,%f2
932
933 fsubd %f40,%f34,%f40
934
935 fsubd %f42,%f18,%f42
936
937 fmuld F60_KA2,%f40,%f34
938
939 fmuld F60_KA2,%f42,%f18
940
941 faddd F58_KA1,%f34,%f34
942
943 faddd F58_KA1,%f18,%f18
944
945 ld [%fp+tmp0],%o0
946 fmuld %f34,%f40,%f40
947
948 ld [%fp+tmp1],%o1
949 fmuld %f18,%f42,%f42
950
951 and %o0,255,%o0
952
953 and %o1,255,%o1
954
955 sll %o0,3,%o0
956
957 sll %o1,3,%o1
958
959 ldd [G1_CONST_TBL+%o0],%f34
960
961 ldd [G1_CONST_TBL+%o1],%f18
962
963 fpadd32 %f16,%f34,%f34
964
965 fpadd32 %f2,%f18,%f18
966
967 fmuld %f34,%f40,%f40
968
969 fmuld %f18,%f42,%f42
970
971 faddd %f34,%f40,%f40
972
973 faddd %f18,%f42,%f42
974
975 fdtos %f40,%f26
976 st %f26,[%i3]
977 add %i3,%i4,%o4
978
979 fdtos %f42,%f18
980 st %f18,[%o4]
981 add %o4,%i4,%i3
982
983 cmp %l2,%o5
984 sll %i2,1,%o5
985 bl,pt %icc,.spec2_out_of_range
986 sll %i2,2,%o4
987
988 ble,pn %icc,.spec2_inf
989 add %o4,%o5,%o4
990
991 ! NaN -> NaN
992
993 fmuls %f4,%f4,%f4
994 ba .spec2_exit
995 st %f4,[%i3]
996
997 .spec2_inf:
998 sub %i1,%o4,%o4
999 ld [%o4],%l0
1000 srl %l0,29,%l0
1001 andcc %l0,4,%l0
1002 be,a,pn %icc,.spec2_exit
1003 st %f4,[%i3]
1004
1005 ba .spec2_exit
1006 st %f3,[%i3]
1007
1008 .spec2_out_of_range:
1009 add %o4,%o5,%o4
1010 sub %i1,%o4,%o4
1011 ld [%o4],%l0
1012 srl %l0,29,%l0
1013 and %l0,4,%l0
1014 add %l0,2048,%l0
1015 ld [G1_CONST_TBL+%l0],%f2
1016 fmuls %f2,%f2,%f2
1017 st %f2,[%i3]
1018
1019 .spec2_exit:
1020 fmovs %f6,%f16
1021 mov %l3,%l0
1022 mov %o3,%o0
1023 fmovs %f8,%f2
1024 mov %l4,%l1
1025 add %o0,%i2,%o1
1026 fmovs %f10,%f4
1027 mov %l5,%l2
1028 add %o1,%i2,%o2
1029 fmovs %f12,%f6
1030 mov %l6,%l3
1031 mov %l7,%l4
1032 lda [%i1]%asi,%l5
1033 add %i1,%i2,%i1
1034 add %o2,%i2,%o3
1035 lda [%i1]%asi,%l6
1036 add %i1,%i2,%i1
1037 lda [%i1]%asi,%l7
1038 add %i1,%i2,%i1
1039 and %l5,G5_CONST,%l5
1040 and %l6,G5_CONST,%l6
1041 and %l7,G5_CONST,%l7
1042
1043 subcc %i0,3,%i0
1044 bpos,pt %icc,.main_loop
1045 add %i3,%i4,%i3
1046 ba .after_main_loop
1047 nop
1048 .spec3:
1049 sethi %hi(0x7f800000),%o5
1050 cmp %l3,%o5
1051 bge,pn %icc,1f
1052 nop
1053 fcmpes %fcc0,%f6,%f3
1054 fcmpes %fcc1,%f6,THRESHOLDL
1055 fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
1056 fstod %f6,%f46 ! (3) y = (double) X
1057 fbl,a,pt %fcc1,.spec3_cont ! if ( X < THRESHOLDL )
1058 fstod %f6,%f46 ! (3) y = (double) X
1059 1:
1060 fmuld F62_K256ONLN2,%f40,%f40
1061
1062 fmuld F62_K256ONLN2,%f42,%f42
1063
1064 fmuld F62_K256ONLN2,%f44,%f44
1065
1066 fdtoi %f40,%f16
1067 st %f16,[%fp+tmp0]
1068
1069 fdtoi %f42,%f2
1070 st %f2,[%fp+tmp1]
1071
1072 fdtoi %f44,%f4
1073 st %f4,[%fp+tmp2]
1074
1075 fitod %f16,%f34
1076 fpackfix %f16,%f16
1077
1078 fitod %f2,%f18
1079 fpackfix %f2,%f2
1080
1081 fitod %f4,%f20
1082 fpackfix %f4,%f4
1083
1084 fsubd %f40,%f34,%f40
1085
1086 fsubd %f42,%f18,%f42
1087
1088 fsubd %f44,%f20,%f44
1089
1090 fmuld F60_KA2,%f40,%f34
1091
1092 fmuld F60_KA2,%f42,%f18
1093
1094 fmuld F60_KA2,%f44,%f20
1095
1096 faddd F58_KA1,%f34,%f34
1097
1098 faddd F58_KA1,%f18,%f18
1099
1100 faddd F58_KA1,%f20,%f20
1101
1102 ld [%fp+tmp0],%o0
1103 fmuld %f34,%f40,%f40
1104
1105 ld [%fp+tmp1],%o1
1106 fmuld %f18,%f42,%f42
1107
1108 ld [%fp+tmp2],%o2
1109 fmuld %f20,%f44,%f44
1110
1111 and %o0,255,%o0
1112 and %o1,255,%o1
1113
1114 and %o2,255,%o2
1115 sll %o0,3,%o0
1116
1117 sll %o1,3,%o1
1118 sll %o2,3,%o2
1119
1120 ldd [G1_CONST_TBL+%o0],%f34
1121
1122 ldd [G1_CONST_TBL+%o1],%f18
1123
1124 ldd [G1_CONST_TBL+%o2],%f20
1125
1126 fpadd32 %f16,%f34,%f34
1127
1128 fpadd32 %f2,%f18,%f18
1129
1130 fpadd32 %f4,%f20,%f20
1131
1132 fmuld %f34,%f40,%f40
1133
1134 fmuld %f18,%f42,%f42
1135
1136 fmuld %f20,%f44,%f44
1137
1138 faddd %f34,%f40,%f40
1139
1140 faddd %f18,%f42,%f42
1141
1142 faddd %f20,%f44,%f44
1143
1144 fdtos %f40,%f26
1145 st %f26,[%i3]
1146 add %i3,%i4,%o4
1147
1148 fdtos %f42,%f18
1149 st %f18,[%o4]
1150 add %o4,%i4,%i3
1151
1152 fdtos %f44,%f20
1153 st %f20,[%i3]
1154 add %i3,%i4,%i3
1155
1156 cmp %l3,%o5
1157 bl,pt %icc,.spec3_out_of_range
1158 sll %i2,2,%o4
1159
1160 ble,pn %icc,.spec3_inf
1161 add %o4,%i2,%o4
1162
1163 ! NaN -> NaN
1164
1165 fmuls %f6,%f6,%f6
1166 ba .spec3_exit
1167 st %f6,[%i3]
1168
1169 .spec3_inf:
1170 sub %i1,%o4,%o4
1171 ld [%o4],%l0
1172 srl %l0,29,%l0
1173 andcc %l0,4,%l0
1174 be,a,pn %icc,.spec3_exit
1175 st %f6,[%i3]
1176
1177 ba .spec3_exit
1178 st %f3,[%i3]
1179
1180 .spec3_out_of_range:
1181 add %o4,%i2,%o4
1182 sub %i1,%o4,%o4
1183 ld [%o4],%l0
1184 srl %l0,29,%l0
1185 and %l0,4,%l0
1186 add %l0,2048,%l0
1187 ld [G1_CONST_TBL+%l0],%f2
1188 fmuls %f2,%f2,%f2
1189 st %f2,[%i3]
1190
1191 .spec3_exit:
1192 fmovs %f8,%f16
1193 mov %l4,%l0
1194 fmovs %f10,%f2
1195 mov %l5,%l1
1196 fmovs %f12,%f4
1197 mov %l6,%l2
1198 fmovs %f14,%f6
1199 mov %l7,%l3
1200 mov %i1,%o0
1201 lda [%o0]%asi,%l4
1202 add %o0,%i2,%o1
1203 lda [%o1]%asi,%l5
1204 add %o1,%i2,%o2
1205 lda [%o2]%asi,%l6
1206 add %o2,%i2,%o3
1207 lda [%o3]%asi,%l7
1208 add %o3,%i2,%i1
1209 and %l4,G5_CONST,%l4
1210 and %l5,G5_CONST,%l5
1211 and %l6,G5_CONST,%l6
1212 and %l7,G5_CONST,%l7
1213
1214 subcc %i0,4,%i0
1215 bpos,pt %icc,.main_loop
1216 add %i3,%i4,%i3
1217 ba .after_main_loop
1218 nop
1219
1220 .align 16
1221 .spec4:
1222 sethi %hi(0x7f800000),%o5
1223 cmp %l4,%o5
1224 bge,pn %icc,1f
1225 nop
1226 fcmpes %fcc0,%f8,%f3
1227 fcmpes %fcc1,%f8,THRESHOLDL
1228 fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
1229 fstod %f8,%f48 ! (4) y = (double) X
1230 fbl,a,pt %fcc1,.spec4_cont ! if ( X < THRESHOLDL )
1231 fstod %f8,%f48 ! (4) y = (double) X
1232 1:
1233 fmuld F62_K256ONLN2,%f42,%f42
1234
1235 fmuld F62_K256ONLN2,%f44,%f44
1236
1237 fmuld F62_K256ONLN2,%f46,%f46
1238
1239 fdtoi %f40,%f16
1240 st %f16,[%fp+tmp0]
1241
1242 fdtoi %f42,%f2
1243 st %f2,[%fp+tmp1]
1244
1245 fdtoi %f44,%f4
1246 st %f4,[%fp+tmp2]
1247
1248 fdtoi %f46,%f6
1249 st %f6,[%fp+tmp3]
1250
1251 fitod %f16,%f34
1252 fpackfix %f16,%f16
1253
1254 fitod %f2,%f18
1255 fpackfix %f2,%f2
1256
1257 fitod %f4,%f20
1258 fpackfix %f4,%f4
1259
1260 fitod %f6,%f22
1261 fpackfix %f6,%f6
1262
1263 fsubd %f40,%f34,%f40
1264
1265 fsubd %f42,%f18,%f42
1266
1267 fsubd %f44,%f20,%f44
1268
1269 fsubd %f46,%f22,%f46
1270
1271 fmuld F60_KA2,%f40,%f34
1272
1273 fmuld F60_KA2,%f42,%f18
1274
1275 fmuld F60_KA2,%f44,%f20
1276
1277 fmuld F60_KA2,%f46,%f22
1278
1279 faddd F58_KA1,%f34,%f34
1280
1281 faddd F58_KA1,%f18,%f18
1282
1283 faddd F58_KA1,%f20,%f20
1284
1285 faddd F58_KA1,%f22,%f22
1286
1287 ld [%fp+tmp0],%o0
1288 fmuld %f34,%f40,%f40
1289
1290 ld [%fp+tmp1],%o1
1291 fmuld %f18,%f42,%f42
1292
1293 ld [%fp+tmp2],%o2
1294 fmuld %f20,%f44,%f44
1295
1296 ld [%fp+tmp3],%o3
1297 fmuld %f22,%f46,%f46
1298
1299 and %o0,255,%o0
1300 and %o1,255,%o1
1301
1302 and %o2,255,%o2
1303 and %o3,255,%o3
1304
1305 sll %o0,3,%o0
1306 sll %o1,3,%o1
1307
1308 sll %o2,3,%o2
1309 sll %o3,3,%o3
1310
1311 ldd [G1_CONST_TBL+%o0],%f34
1312
1313 ldd [G1_CONST_TBL+%o1],%f18
1314
1315 ldd [G1_CONST_TBL+%o2],%f20
1316
1317 ldd [G1_CONST_TBL+%o3],%f22
1318
1319 fpadd32 %f16,%f34,%f34
1320
1321 fpadd32 %f2,%f18,%f18
1322
1323 fpadd32 %f4,%f20,%f20
1324
1325 fpadd32 %f6,%f22,%f22
1326
1327 fmuld %f34,%f40,%f40
1328
1329 fmuld %f18,%f42,%f42
1330
1331 fmuld %f20,%f44,%f44
1332
1333 fmuld %f22,%f46,%f46
1334
1335 faddd %f34,%f40,%f40
1336
1337 faddd %f18,%f42,%f42
1338
1339 faddd %f20,%f44,%f44
1340
1341 faddd %f22,%f46,%f46
1342
1343 fdtos %f40,%f26
1344 st %f26,[%i3]
1345 add %i3,%i4,%o4
1346
1347 fdtos %f42,%f18
1348 st %f18,[%o4]
1349 add %o4,%i4,%i3
1350
1351 fdtos %f44,%f20
1352 st %f20,[%i3]
1353 add %i3,%i4,%o4
1354
1355 fdtos %f46,%f22
1356 st %f22,[%o4]
1357 add %o4,%i4,%i3
1358
1359 cmp %l4,%o5
1360 bl,pt %icc,.spec4_out_of_range
1361 sll %i2,2,%o4
1362
1363 ble,pn %icc,.spec4_inf
1364 sub %i1,%o4,%o4
1365
1366 ! NaN -> NaN
1367
1368 fmuls %f8,%f8,%f8
1369 ba .spec4_exit
1370 st %f8,[%i3]
1371
1372 .spec4_inf:
1373 ld [%o4],%l0
1374 srl %l0,29,%l0
1375 andcc %l0,4,%l0
1376 be,a,pn %icc,.spec4_exit
1377 st %f8,[%i3]
1378
1379 ba .spec4_exit
1380 st %f3,[%i3]
1381
1382 .spec4_out_of_range:
1383 sub %i1,%o4,%o4
1384 ld [%o4],%l0
1385 srl %l0,29,%l0
1386 and %l0,4,%l0
1387 add %l0,2048,%l0
1388 ld [G1_CONST_TBL+%l0],%f2
1389 fmuls %f2,%f2,%f2
1390 st %f2,[%i3]
1391
1392 .spec4_exit:
1393 fmovs %f10,%f16
1394 mov %l5,%l0
1395 fmovs %f12,%f2
1396 mov %l6,%l1
1397 fmovs %f14,%f4
1398 mov %l7,%l2
1399 lda [%i1]%asi,%l3
1400 lda [%i1]%asi,%f6
1401 add %i1,%i2,%o0
1402 lda [%o0]%asi,%l4
1403 add %o0,%i2,%o1
1404 lda [%o1]%asi,%l5
1405 add %o1,%i2,%o2
1406 lda [%o2]%asi,%l6
1407 add %o2,%i2,%o3
1408 lda [%o3]%asi,%l7
1409 add %o3,%i2,%i1
1410 and %l3,G5_CONST,%l3
1411 and %l4,G5_CONST,%l4
1412 and %l5,G5_CONST,%l5
1413 and %l6,G5_CONST,%l6
1414 and %l7,G5_CONST,%l7
1415
1416 subcc %i0,5,%i0
1417 bpos,pt %icc,.main_loop
1418 add %i3,%i4,%i3
1419 ba .after_main_loop
1420 nop
1421
1422 .align 16
1423 .spec5:
1424 sethi %hi(0x7f800000),%o5
1425 cmp %l5,%o5
1426 bge,pn %icc,1f
1427 nop
1428 fcmpes %fcc0,%f10,%f3
1429 fcmpes %fcc1,%f10,THRESHOLDL
1430 fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
1431 fstod %f10,%f50 ! (5) y = (double) X
1432 fbl,a,pt %fcc1,.spec5_cont ! if ( X < THRESHOLDL )
1433 fstod %f10,%f50 ! (5) y = (double) X
1434 1:
1435 fmuld F62_K256ONLN2,%f44,%f44
1436
1437 fmuld F62_K256ONLN2,%f46,%f46
1438
1439 fdtoi %f40,%f16
1440 st %f16,[%fp+tmp0]
1441 fmuld F62_K256ONLN2,%f48,%f48
1442
1443 fdtoi %f42,%f2
1444 st %f2,[%fp+tmp1]
1445
1446 fdtoi %f44,%f4
1447 st %f4,[%fp+tmp2]
1448
1449 fdtoi %f46,%f6
1450 st %f6,[%fp+tmp3]
1451
1452 fdtoi %f48,%f8
1453 st %f8,[%fp+tmp4]
1454
1455 fitod %f16,%f34
1456 fpackfix %f16,%f16
1457
1458 fitod %f2,%f18
1459 fpackfix %f2,%f2
1460
1461 fitod %f4,%f20
1462 fpackfix %f4,%f4
1463
1464 fitod %f6,%f22
1465 fpackfix %f6,%f6
1466
1467 fitod %f8,%f24
1468 fpackfix %f8,%f8
1469
1470 ld [%fp+tmp0],%o0
1471 fsubd %f40,%f34,%f40
1472
1473 ld [%fp+tmp1],%o1
1474 fsubd %f42,%f18,%f42
1475
1476 ld [%fp+tmp2],%o2
1477 and %o0,255,%o0
1478 fsubd %f44,%f20,%f44
1479
1480 ld [%fp+tmp3],%o3
1481 and %o1,255,%o1
1482 fsubd %f46,%f22,%f46
1483
1484 sll %o0,3,%o0
1485 sll %o1,3,%o1
1486 fmuld F60_KA2,%f40,%f34
1487 fsubd %f48,%f24,%f48
1488
1489 and %o2,255,%o2
1490 fmuld F60_KA2,%f42,%f18
1491
1492 sll %o2,3,%o2
1493 fmuld F60_KA2,%f44,%f20
1494
1495 ld [%fp+tmp4],%o4
1496 and %o3,255,%o3
1497 fmuld F60_KA2,%f46,%f22
1498
1499 sll %o3,3,%o3
1500 fmuld F60_KA2,%f48,%f24
1501 faddd F58_KA1,%f34,%f34
1502
1503 and %o4,255,%o4
1504 faddd F58_KA1,%f18,%f18
1505
1506 faddd F58_KA1,%f20,%f20
1507
1508 faddd F58_KA1,%f22,%f22
1509
1510 fmuld %f34,%f40,%f40
1511 ldd [G1_CONST_TBL+%o0],%f34
1512 faddd F58_KA1,%f24,%f24
1513
1514 fmuld %f18,%f42,%f42
1515 ldd [G1_CONST_TBL+%o1],%f18
1516
1517 fmuld %f20,%f44,%f44
1518 ldd [G1_CONST_TBL+%o2],%f20
1519
1520 fmuld %f22,%f46,%f46
1521 ldd [G1_CONST_TBL+%o3],%f22
1522 sll %o4,3,%o4
1523
1524 fmuld %f24,%f48,%f48
1525 ldd [G1_CONST_TBL+%o4],%f24
1526 fpadd32 %f16,%f34,%f34
1527
1528 fpadd32 %f2,%f18,%f18
1529
1530 fpadd32 %f4,%f20,%f20
1531
1532 fpadd32 %f6,%f22,%f22
1533
1534 fpadd32 %f8,%f24,%f24
1535 fmuld %f34,%f40,%f40
1536
1537 fmuld %f18,%f42,%f42
1538
1539 fmuld %f20,%f44,%f44
1540
1541 fmuld %f22,%f46,%f46
1542
1543 fmuld %f24,%f48,%f48
1544 faddd %f34,%f40,%f40
1545
1546 faddd %f18,%f42,%f42
1547
1548 faddd %f20,%f44,%f44
1549
1550 faddd %f22,%f46,%f46
1551
1552 faddd %f24,%f48,%f48
1553
1554 fdtos %f40,%f26
1555 st %f26,[%i3]
1556 add %i3,%i4,%o4
1557
1558 fdtos %f42,%f18
1559 st %f18,[%o4]
1560 add %o4,%i4,%i3
1561
1562 fdtos %f44,%f20
1563 st %f20,[%i3]
1564 add %i3,%i4,%o4
1565
1566 fdtos %f46,%f22
1567 st %f22,[%o4]
1568 add %o4,%i4,%i3
1569
1570 fdtos %f48,%f24
1571 st %f24,[%i3]
1572 add %i3,%i4,%i3
1573
1574 cmp %l5,%o5
1575 bl,pt %icc,.spec5_out_of_range
1576 sll %i2,2,%o4
1577
1578 ble,pn %icc,.spec5_inf
1579 sub %o4,%i2,%o4
1580
1581 ! NaN -> NaN
1582
1583 fmuls %f10,%f10,%f10
1584 ba .spec5_exit
1585 st %f10,[%i3]
1586
1587 .spec5_inf:
1588 sub %i1,%o4,%o4
1589 ld [%o4],%l0
1590 srl %l0,29,%l0
1591 andcc %l0,4,%l0
1592 be,a,pn %icc,.spec5_exit
1593 st %f10,[%i3]
1594
1595 ba .spec5_exit
1596 st %f3,[%i3]
1597
1598 .spec5_out_of_range:
1599 sub %o4,%i2,%o4
1600 sub %i1,%o4,%o4
1601 ld [%o4],%l0
1602 srl %l0,29,%l0
1603 and %l0,4,%l0
1604 add %l0,2048,%l0
1605 ld [G1_CONST_TBL+%l0],%f2
1606 fmuls %f2,%f2,%f2
1607 st %f2,[%i3]
1608
1609 .spec5_exit:
1610 fmovs %f12,%f16
1611 mov %l6,%l0
1612 fmovs %f14,%f2
1613 mov %l7,%l1
1614 lda [%i1]%asi,%l2
1615 lda [%i1]%asi,%f4
1616 add %i1,%i2,%i1
1617 lda [%i1]%asi,%l3
1618 lda [%i1]%asi,%f6
1619 add %i1,%i2,%o0
1620 lda [%o0]%asi,%l4
1621 add %o0,%i2,%o1
1622 lda [%o1]%asi,%l5
1623 add %o1,%i2,%o2
1624 lda [%o2]%asi,%l6
1625 add %o2,%i2,%o3
1626 lda [%o3]%asi,%l7
1627 add %o3,%i2,%i1
1628 and %l2,G5_CONST,%l2
1629 and %l3,G5_CONST,%l3
1630 and %l4,G5_CONST,%l4
1631 and %l5,G5_CONST,%l5
1632 and %l6,G5_CONST,%l6
1633 and %l7,G5_CONST,%l7
1634
1635 subcc %i0,6,%i0
1636 bpos,pt %icc,.main_loop
1637 add %i3,%i4,%i3
1638 ba .after_main_loop
1639 nop
1640 .spec6:
1641 sethi %hi(0x7f800000),%o5
1642 cmp %l6,%o5
1643 bge,pn %icc,1f
1644 nop
1645 fcmpes %fcc0,%f12,%f3
1646 fcmpes %fcc1,%f12,THRESHOLDL
1647 fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
1648 fstod %f12,%f52 ! (6) y = (double) X
1649 fbl,a,pt %fcc1,.spec6_cont ! if ( X < THRESHOLDL )
1650 fstod %f12,%f52 ! (6) y = (double) X
1651 1:
1652 fmuld F62_K256ONLN2,%f46,%f46
1653
1654 fdtoi %f40,%f16
1655 st %f16,[%fp+tmp0]
1656 fmuld F62_K256ONLN2,%f48,%f48
1657
1658 fdtoi %f42,%f2
1659 st %f2,[%fp+tmp1]
1660 fmuld F62_K256ONLN2,%f50,%f50
1661
1662 fdtoi %f44,%f4
1663 st %f4,[%fp+tmp2]
1664
1665 fdtoi %f46,%f6
1666 st %f6,[%fp+tmp3]
1667
1668 fdtoi %f48,%f8
1669 st %f8,[%fp+tmp4]
1670
1671 fdtoi %f50,%f10
1672 st %f10,[%fp+tmp5]
1673
1674 fitod %f16,%f34
1675 fpackfix %f16,%f16
1676
1677 fitod %f2,%f18
1678 fpackfix %f2,%f2
1679
1680 fitod %f4,%f20
1681 fpackfix %f4,%f4
1682
1683 fitod %f6,%f22
1684 fpackfix %f6,%f6
1685
1686 fitod %f8,%f24
1687 fpackfix %f8,%f8
1688
1689 fitod %f10,%f26
1690 fpackfix %f10,%f10
1691
1692 ld [%fp+tmp0],%o0
1693 fsubd %f40,%f34,%f40
1694
1695 ld [%fp+tmp1],%o1
1696 fsubd %f42,%f18,%f42
1697
1698 ld [%fp+tmp2],%o2
1699 and %o0,255,%o0
1700 fsubd %f44,%f20,%f44
1701
1702 ld [%fp+tmp3],%o3
1703 and %o1,255,%o1
1704 fsubd %f46,%f22,%f46
1705
1706 sll %o0,3,%o0
1707 sll %o1,3,%o1
1708 fmuld F60_KA2,%f40,%f34
1709 fsubd %f48,%f24,%f48
1710
1711 and %o2,255,%o2
1712 fmuld F60_KA2,%f42,%f18
1713 fsubd %f50,%f26,%f50
1714
1715 sll %o2,3,%o2
1716 fmuld F60_KA2,%f44,%f20
1717
1718 ld [%fp+tmp4],%o4
1719 and %o3,255,%o3
1720 fmuld F60_KA2,%f46,%f22
1721
1722 ld [%fp+tmp5],%o5
1723 sll %o3,3,%o3
1724 fmuld F60_KA2,%f48,%f24
1725 faddd F58_KA1,%f34,%f34
1726
1727 and %o4,255,%o4
1728 fmuld F60_KA2,%f50,%f26
1729 faddd F58_KA1,%f18,%f18
1730
1731 and %o5,255,%o5
1732 faddd F58_KA1,%f20,%f20
1733
1734 sll %o5,3,%o5
1735 faddd F58_KA1,%f22,%f22
1736
1737 fmuld %f34,%f40,%f40
1738 ldd [G1_CONST_TBL+%o0],%f34
1739 faddd F58_KA1,%f24,%f24
1740
1741 fmuld %f18,%f42,%f42
1742 ldd [G1_CONST_TBL+%o1],%f18
1743 faddd F58_KA1,%f26,%f26
1744
1745 fmuld %f20,%f44,%f44
1746 ldd [G1_CONST_TBL+%o2],%f20
1747
1748 fmuld %f22,%f46,%f46
1749 ldd [G1_CONST_TBL+%o3],%f22
1750 sll %o4,3,%o4
1751
1752 fmuld %f24,%f48,%f48
1753 ldd [G1_CONST_TBL+%o4],%f24
1754 fpadd32 %f16,%f34,%f34
1755
1756 fmuld %f26,%f50,%f50
1757 ldd [G1_CONST_TBL+%o5],%f26
1758 fpadd32 %f2,%f18,%f18
1759
1760 fpadd32 %f4,%f20,%f20
1761
1762 fpadd32 %f6,%f22,%f22
1763
1764 fpadd32 %f8,%f24,%f24
1765 fmuld %f34,%f40,%f40
1766
1767 fpadd32 %f10,%f26,%f26
1768 fmuld %f18,%f42,%f42
1769
1770 fmuld %f20,%f44,%f44
1771
1772 fmuld %f22,%f46,%f46
1773
1774 fmuld %f24,%f48,%f48
1775 faddd %f34,%f40,%f40
1776
1777 fmuld %f26,%f50,%f50
1778 faddd %f18,%f42,%f42
1779
1780 faddd %f20,%f44,%f44
1781
1782 faddd %f22,%f46,%f46
1783
1784 faddd %f24,%f48,%f48
1785
1786 faddd %f26,%f50,%f50
1787
1788 fdtos %f40,%f26
1789 st %f26,[%i3]
1790 add %i3,%i4,%o4
1791
1792 fdtos %f42,%f18
1793 st %f18,[%o4]
1794 add %o4,%i4,%i3
1795
1796 fdtos %f44,%f20
1797 st %f20,[%i3]
1798 add %i3,%i4,%o4
1799
1800 fdtos %f46,%f22
1801 st %f22,[%o4]
1802 add %o4,%i4,%i3
1803
1804 fdtos %f48,%f24
1805 st %f24,[%i3]
1806 add %i3,%i4,%o4
1807
1808 fdtos %f50,%f26
1809 st %f26,[%o4]
1810 add %o4,%i4,%i3
1811
1812 sethi %hi(0x7f800000),%o5
1813 cmp %l6,%o5
1814 bl,pt %icc,.spec6_out_of_range
1815 sll %i2,1,%o4
1816
1817 ble,pn %icc,.spec6_inf
1818 sub %i1,%o4,%o4
1819
1820 ! NaN -> NaN
1821
1822 fmuls %f12,%f12,%f12
1823 ba .spec6_exit
1824 st %f12,[%i3]
1825
1826 .spec6_inf:
1827 ld [%o4],%l0
1828 srl %l0,29,%l0
1829 andcc %l0,4,%l0
1830 be,a,pn %icc,.spec6_exit
1831 st %f12,[%i3]
1832
1833 ba .spec6_exit
1834 st %f3,[%i3]
1835
1836 .spec6_out_of_range:
1837 sub %i1,%o4,%o4
1838 ld [%o4],%l0
1839 srl %l0,29,%l0
1840 and %l0,4,%l0
1841 add %l0,2048,%l0
1842 ld [G1_CONST_TBL+%l0],%f2
1843 fmuls %f2,%f2,%f2
1844 st %f2,[%i3]
1845
1846 .spec6_exit:
1847 fmovs %f14,%f16
1848 mov %l7,%l0
1849 lda [%i1]%asi,%l1
1850 lda [%i1]%asi,%f2
1851 add %i1,%i2,%i1
1852 lda [%i1]%asi,%l2
1853 lda [%i1]%asi,%f4
1854 add %i1,%i2,%i1
1855 lda [%i1]%asi,%l3
1856 lda [%i1]%asi,%f6
1857 add %i1,%i2,%o0
1858 lda [%o0]%asi,%l4
1859 add %o0,%i2,%o1
1860 lda [%o1]%asi,%l5
1861 add %o1,%i2,%o2
1862 lda [%o2]%asi,%l6
1863 add %o2,%i2,%o3
1864 lda [%o3]%asi,%l7
1865 add %o3,%i2,%i1
1866 and %l1,G5_CONST,%l1
1867 and %l2,G5_CONST,%l2
1868 and %l3,G5_CONST,%l3
1869 and %l4,G5_CONST,%l4
1870 and %l5,G5_CONST,%l5
1871 and %l6,G5_CONST,%l6
1872 and %l7,G5_CONST,%l7
1873
1874 subcc %i0,7,%i0
1875 bpos,pt %icc,.main_loop
1876 add %i3,%i4,%i3
1877 ba .after_main_loop
1878 nop
1879
1880 .align 16
1881 .spec7:
1882 sethi %hi(0x7f800000),%o5
1883 cmp %l7,%o5
1884 bge,pn %icc,1f
1885 nop
1886 fcmpes %fcc0,%f14,%f3
1887 fcmpes %fcc1,%f14,THRESHOLDL
1888 fbl,a,pn %fcc0,1f ! if ( X < 0.0f )
1889 fstod %f14,%f54 ! (7) y = (double) X
1890 fbl,a,pt %fcc1,.spec7_cont ! if ( X < THRESHOLDL )
1891 fstod %f14,%f54 ! (7) y = (double) X
1892 1:
1893 fdtoi %f40,%f16
1894 st %f16,[%fp+tmp0]
1895 fmuld F62_K256ONLN2,%f48,%f48
1896
1897 fdtoi %f42,%f2
1898 st %f2,[%fp+tmp1]
1899 fmuld F62_K256ONLN2,%f50,%f50
1900
1901 fdtoi %f44,%f4
1902 st %f4,[%fp+tmp2]
1903 fmuld F62_K256ONLN2,%f52,%f52
1904
1905 fdtoi %f46,%f6
1906 st %f6,[%fp+tmp3]
1907
1908 fdtoi %f48,%f8
1909 st %f8,[%fp+tmp4]
1910
1911 fdtoi %f50,%f10
1912 st %f10,[%fp+tmp5]
1913
1914 fdtoi %f52,%f12
1915 st %f12,[%fp+tmp6]
1916
1917 fitod %f16,%f34
1918 fpackfix %f16,%f16
1919
1920 fitod %f2,%f18
1921 fpackfix %f2,%f2
1922
1923 fitod %f4,%f20
1924 fpackfix %f4,%f4
1925
1926 fitod %f6,%f22
1927 fpackfix %f6,%f6
1928
1929 fitod %f8,%f24
1930 fpackfix %f8,%f8
1931
1932 fitod %f10,%f26
1933 fpackfix %f10,%f10
1934
1935 fitod %f12,%f28
1936 fpackfix %f12,%f12
1937
1938 ld [%fp+tmp0],%o0
1939 fsubd %f40,%f34,%f40
1940
1941 ld [%fp+tmp1],%o1
1942 fsubd %f42,%f18,%f42
1943
1944 ld [%fp+tmp2],%o2
1945 and %o0,255,%o0
1946 fsubd %f44,%f20,%f44
1947
1948 ld [%fp+tmp3],%o3
1949 and %o1,255,%o1
1950 fsubd %f46,%f22,%f46
1951
1952 sll %o0,3,%o0
1953 sll %o1,3,%o1
1954 fmuld F60_KA2,%f40,%f34
1955 fsubd %f48,%f24,%f48
1956
1957 and %o2,255,%o2
1958 fmuld F60_KA2,%f42,%f18
1959 fsubd %f50,%f26,%f50
1960
1961 sll %o2,3,%o2
1962 fmuld F60_KA2,%f44,%f20
1963 fsubd %f52,%f28,%f52
1964
1965 ld [%fp+tmp4],%o4
1966 and %o3,255,%o3
1967 fmuld F60_KA2,%f46,%f22
1968
1969 ld [%fp+tmp5],%o5
1970 sll %o3,3,%o3
1971 fmuld F60_KA2,%f48,%f24
1972 faddd F58_KA1,%f34,%f34
1973
1974 ld [%fp+tmp6],%o7
1975 and %o4,255,%o4
1976 fmuld F60_KA2,%f50,%f26
1977 faddd F58_KA1,%f18,%f18
1978
1979 and %o5,255,%o5
1980 fmuld F60_KA2,%f52,%f28
1981 faddd F58_KA1,%f20,%f20
1982
1983 sll %o5,3,%o5
1984 faddd F58_KA1,%f22,%f22
1985
1986 fmuld %f34,%f40,%f40
1987 ldd [G1_CONST_TBL+%o0],%f34
1988 faddd F58_KA1,%f24,%f24
1989
1990 fmuld %f18,%f42,%f42
1991 ldd [G1_CONST_TBL+%o1],%f18
1992 faddd F58_KA1,%f26,%f26
1993
1994 fmuld %f20,%f44,%f44
1995 ldd [G1_CONST_TBL+%o2],%f20
1996 faddd F58_KA1,%f28,%f28
1997
1998 fmuld %f22,%f46,%f46
1999 ldd [G1_CONST_TBL+%o3],%f22
2000 sll %o4,3,%o4
2001
2002 fmuld %f24,%f48,%f48
2003 ldd [G1_CONST_TBL+%o4],%f24
2004 and %o7,255,%o7
2005 fpadd32 %f16,%f34,%f34
2006
2007 fmuld %f26,%f50,%f50
2008 ldd [G1_CONST_TBL+%o5],%f26
2009 sll %o7,3,%o7
2010 fpadd32 %f2,%f18,%f18
2011
2012 fmuld %f28,%f52,%f52
2013 ldd [G1_CONST_TBL+%o7],%f28
2014 fpadd32 %f4,%f20,%f20
2015
2016 fpadd32 %f6,%f22,%f22
2017
2018 fpadd32 %f8,%f24,%f24
2019 fmuld %f34,%f40,%f40
2020
2021 fpadd32 %f10,%f26,%f26
2022 fmuld %f18,%f42,%f42
2023
2024 fpadd32 %f12,%f28,%f28
2025 fmuld %f20,%f44,%f44
2026
2027 fmuld %f22,%f46,%f46
2028
2029 fmuld %f24,%f48,%f48
2030 faddd %f34,%f40,%f40
2031
2032 fmuld %f26,%f50,%f50
2033 faddd %f18,%f42,%f42
2034
2035 fmuld %f28,%f52,%f52
2036 faddd %f20,%f44,%f44
2037
2038 faddd %f22,%f46,%f46
2039
2040 faddd %f24,%f48,%f48
2041
2042 faddd %f26,%f50,%f50
2043
2044 faddd %f28,%f52,%f52
2045
2046 fdtos %f40,%f26
2047 st %f26,[%i3]
2048 add %i3,%i4,%o4
2049
2050 fdtos %f42,%f18
2051 st %f18,[%o4]
2052 add %o4,%i4,%i3
2053
2054 fdtos %f44,%f20
2055 st %f20,[%i3]
2056 add %i3,%i4,%o4
2057
2058 fdtos %f46,%f22
2059 st %f22,[%o4]
2060 add %o4,%i4,%i3
2061
2062 fdtos %f48,%f24
2063 st %f24,[%i3]
2064 add %i3,%i4,%o4
2065
2066 fdtos %f50,%f26
2067 st %f26,[%o4]
2068 add %o4,%i4,%i3
2069
2070 fdtos %f52,%f28
2071 st %f28,[%i3]
2072 add %i3,%i4,%i3
2073
2074 sethi %hi(0x7f800000),%o5
2075 cmp %l7,%o5
2076 bl,pt %icc,.spec7_out_of_range
2077 sub %i1,%i2,%o4
2078
2079 ble,pn %icc,.spec7_inf
2080 ld [%o4],%l0
2081
2082 ! NaN -> NaN
2083
2084 fmuls %f14,%f14,%f14
2085 ba .spec7_exit
2086 st %f14,[%i3]
2087
2088 .spec7_inf:
2089 srl %l0,29,%l0
2090 andcc %l0,4,%l0
2091 be,a,pn %icc,.spec7_exit
2092 st %f14,[%i3]
2093
2094 ba .spec7_exit
2095 st %f3,[%i3]
2096
2097 .spec7_out_of_range:
2098 ld [%o4],%l0
2099 srl %l0,29,%l0
2100 and %l0,4,%l0
2101 add %l0,2048,%l0
2102 ld [G1_CONST_TBL+%l0],%f2
2103 fmuls %f2,%f2,%f2
2104 st %f2,[%i3]
2105
2106 .spec7_exit:
2107 subcc %i0,8,%i0
2108 bpos,pt %icc,.main_loop_preload
2109 add %i3,%i4,%i3
2110
2111 ba .tail
2112 nop
2113 SET_SIZE(__vexpf)
2114