Print this page
libmvec: further unifdef'ing (LIBMVEC_SO_BUILD)
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/vis/__vsin_ultra3.S
+++ new/usr/src/lib/libmvec/common/vis/__vsin_ultra3.S
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
22 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 23 */
24 24 /*
25 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 26 * Use is subject to license terms.
27 27 */
28 28
29 29 .file "__vsin_ultra3.S"
30 30
31 31 #include "libm.h"
32 -#if defined(LIBMVEC_SO_BUILD)
33 32 .weak __vsin
34 33 .type __vsin,#function
35 34 __vsin = __vsin_ultra3
36 -#endif
37 35
38 36 RO_DATA
39 37 .align 64
40 38 constants:
41 39 .word 0x42c80000,0x00000000 ! 3 * 2^44
42 40 .word 0x43380000,0x00000000 ! 3 * 2^51
43 41 .word 0x3fe45f30,0x6dc9c883 ! invpio2
44 42 .word 0x3ff921fb,0x54442c00 ! pio2_1
45 43 .word 0x3d318469,0x898cc400 ! pio2_2
46 44 .word 0x3a71701b,0x839a2520 ! pio2_3
47 45 .word 0xbfc55555,0x55555533 ! pp1
48 46 .word 0x3f811111,0x10e7d53b ! pp2
49 47 .word 0xbf2a0167,0xe6b3cf9b ! pp3
50 48 .word 0xbfdfffff,0xffffff65 ! qq1
51 49 .word 0x3fa55555,0x54f88ed0 ! qq2
52 50 .word 0xbf56c12c,0xdd185f60 ! qq3
53 51
54 52 ! local storage indices
55 53
56 54 #define xsave STACK_BIAS-0x8
57 55 #define ysave STACK_BIAS-0x10
58 56 #define nsave STACK_BIAS-0x14
59 57 #define sxsave STACK_BIAS-0x18
60 58 #define sysave STACK_BIAS-0x1c
61 59 #define biguns STACK_BIAS-0x20
62 60 #define nk3 STACK_BIAS-0x24
63 61 #define nk2 STACK_BIAS-0x28
64 62 #define nk1 STACK_BIAS-0x2c
65 63 #define nk0 STACK_BIAS-0x30
66 64 #define junk STACK_BIAS-0x38
67 65 ! sizeof temp storage - must be a multiple of 16 for V9
68 66 #define tmps 0x40
69 67
70 68 ! register use
71 69
72 70 ! i0 n
73 71 ! i1 x
74 72 ! i2 stridex
75 73 ! i3 y
76 74 ! i4 stridey
77 75 ! i5 0x80000000
78 76
79 77 ! l0 hx0
80 78 ! l1 hx1
81 79 ! l2 hx2
82 80 ! l3 hx3
83 81 ! l4 k0
84 82 ! l5 k1
85 83 ! l6 k2
86 84 ! l7 k3
87 85
88 86 ! the following are 64-bit registers in both V8+ and V9
89 87
90 88 ! g1 __vlibm_TBL_sincos2
91 89 ! g5 scratch
92 90
93 91 ! o0 py0
94 92 ! o1 py1
95 93 ! o2 py2
96 94 ! o3 py3
97 95 ! o4 0x3e400000
98 96 ! o5 0x3fe921fb,0x4099251e
99 97 ! o7 scratch
100 98
101 99 ! f0 hx0
102 100 ! f2
103 101 ! f4
104 102 ! f6
105 103 ! f8 hx1
106 104 ! f10
107 105 ! f12
108 106 ! f14
109 107 ! f16 hx2
110 108 ! f18
111 109 ! f20
112 110 ! f22
113 111 ! f24 hx3
114 112 ! f26
115 113 ! f28
116 114 ! f30
117 115 ! f32
118 116 ! f34
119 117 ! f36
120 118 ! f38
121 119
122 120 #define c3two44 %f40
123 121 #define c3two51 %f42
124 122 #define invpio2 %f44
125 123 #define pio2_1 %f46
126 124 #define pio2_2 %f48
127 125 #define pio2_3 %f50
128 126 #define pp1 %f52
129 127 #define pp2 %f54
130 128 #define pp3 %f56
131 129 #define qq1 %f58
132 130 #define qq2 %f60
133 131 #define qq3 %f62
134 132
135 133 ENTRY(__vsin_ultra3)
136 134 save %sp,-SA(MINFRAME)-tmps,%sp
137 135 PIC_SETUP(l7)
138 136 PIC_SET(l7,constants,o0)
139 137 PIC_SET(l7,__vlibm_TBL_sincos2,o1)
140 138 mov %o1,%g1
141 139 wr %g0,0x82,%asi ! set %asi for non-faulting loads
142 140 #ifdef __sparcv9
143 141 stx %i1,[%fp+xsave] ! save arguments
144 142 stx %i3,[%fp+ysave]
145 143 #else
146 144 st %i1,[%fp+xsave] ! save arguments
147 145 st %i3,[%fp+ysave]
148 146 #endif
149 147 st %i0,[%fp+nsave]
150 148 st %i2,[%fp+sxsave]
151 149 st %i4,[%fp+sysave]
152 150 st %g0,[%fp+biguns] ! biguns = 0
153 151 ldd [%o0+0x00],c3two44 ! load/set up constants
154 152 ldd [%o0+0x08],c3two51
155 153 ldd [%o0+0x10],invpio2
156 154 ldd [%o0+0x18],pio2_1
157 155 ldd [%o0+0x20],pio2_2
158 156 ldd [%o0+0x28],pio2_3
159 157 ldd [%o0+0x30],pp1
160 158 ldd [%o0+0x38],pp2
161 159 ldd [%o0+0x40],pp3
162 160 ldd [%o0+0x48],qq1
163 161 ldd [%o0+0x50],qq2
164 162 ldd [%o0+0x58],qq3
165 163 sethi %hi(0x80000000),%i5
166 164 sethi %hi(0x3e400000),%o4
167 165 sethi %hi(0x3fe921fb),%o5
168 166 or %o5,%lo(0x3fe921fb),%o5
169 167 sllx %o5,32,%o5
170 168 sethi %hi(0x4099251e),%o7
171 169 or %o7,%lo(0x4099251e),%o7
172 170 or %o5,%o7,%o5
173 171 sll %i2,3,%i2 ! scale strides
174 172 sll %i4,3,%i4
175 173 add %fp,junk,%o1 ! loop prologue
176 174 add %fp,junk,%o2
177 175 add %fp,junk,%o3
178 176 ld [%i1],%l0 ! *x
179 177 ld [%i1],%f0
180 178 ld [%i1+4],%f3
181 179 andn %l0,%i5,%l0 ! mask off sign
182 180 ba .loop0
183 181 add %i1,%i2,%i1 ! x += stridex
184 182
185 183 ! 16-byte aligned
186 184 .align 16
187 185 .loop0:
188 186 lda [%i1]%asi,%l1 ! preload next argument
189 187 sub %l0,%o4,%g5
190 188 sub %o5,%l0,%o7
191 189 fabss %f0,%f2
192 190
193 191 lda [%i1]%asi,%f8
194 192 orcc %o7,%g5,%g0
195 193 mov %i3,%o0 ! py0 = y
196 194 bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e
197 195
198 196 ! delay slot
199 197 lda [%i1+4]%asi,%f11
200 198 addcc %i0,-1,%i0
201 199 add %i3,%i4,%i3 ! y += stridey
202 200 ble,pn %icc,.last1
203 201
204 202 ! delay slot
205 203 andn %l1,%i5,%l1
206 204 add %i1,%i2,%i1 ! x += stridex
207 205 faddd %f2,c3two44,%f4
208 206 st %f15,[%o1+4]
209 207
210 208 .loop1:
211 209 lda [%i1]%asi,%l2 ! preload next argument
212 210 sub %l1,%o4,%g5
213 211 sub %o5,%l1,%o7
214 212 fabss %f8,%f10
215 213
216 214 lda [%i1]%asi,%f16
217 215 orcc %o7,%g5,%g0
218 216 mov %i3,%o1 ! py1 = y
219 217 bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e
220 218
221 219 ! delay slot
222 220 lda [%i1+4]%asi,%f19
223 221 addcc %i0,-1,%i0
224 222 add %i3,%i4,%i3 ! y += stridey
225 223 ble,pn %icc,.last2
226 224
227 225 ! delay slot
228 226 andn %l2,%i5,%l2
229 227 add %i1,%i2,%i1 ! x += stridex
230 228 faddd %f10,c3two44,%f12
231 229 st %f23,[%o2+4]
232 230
233 231 .loop2:
234 232 lda [%i1]%asi,%l3 ! preload next argument
235 233 sub %l2,%o4,%g5
236 234 sub %o5,%l2,%o7
237 235 fabss %f16,%f18
238 236
239 237 lda [%i1]%asi,%f24
240 238 orcc %o7,%g5,%g0
241 239 mov %i3,%o2 ! py2 = y
242 240 bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e
243 241
244 242 ! delay slot
245 243 lda [%i1+4]%asi,%f27
246 244 addcc %i0,-1,%i0
247 245 add %i3,%i4,%i3 ! y += stridey
248 246 ble,pn %icc,.last3
249 247
250 248 ! delay slot
251 249 andn %l3,%i5,%l3
252 250 add %i1,%i2,%i1 ! x += stridex
253 251 faddd %f18,c3two44,%f20
254 252 st %f31,[%o3+4]
255 253
256 254 .loop3:
257 255 sub %l3,%o4,%g5
258 256 sub %o5,%l3,%o7
259 257 fabss %f24,%f26
260 258 st %f5,[%fp+nk0]
261 259
262 260 orcc %o7,%g5,%g0
263 261 mov %i3,%o3 ! py3 = y
264 262 bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e
265 263 ! delay slot
266 264 st %f13,[%fp+nk1]
267 265
268 266 !!! DONE?
269 267 .cont:
270 268 srlx %o5,32,%o7
271 269 add %i3,%i4,%i3 ! y += stridey
272 270 fmovs %f3,%f1
273 271 st %f21,[%fp+nk2]
274 272
275 273 sub %o7,%l0,%l0
276 274 sub %o7,%l1,%l1
277 275 faddd %f26,c3two44,%f28
278 276 st %f29,[%fp+nk3]
279 277
280 278 sub %o7,%l2,%l2
281 279 sub %o7,%l3,%l3
282 280 fmovs %f11,%f9
283 281
284 282 or %l0,%l1,%l0
285 283 or %l2,%l3,%l2
286 284 fmovs %f19,%f17
287 285
288 286 fmovs %f27,%f25
289 287 fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range
290 288
291 289 fmuld %f8,invpio2,%f14
292 290 ld [%fp+nk0],%l4
293 291
294 292 fmuld %f16,invpio2,%f22
295 293 ld [%fp+nk1],%l5
296 294
297 295 orcc %l0,%l2,%g0
298 296 bl,pn %icc,.medium
299 297 ! delay slot
300 298 fmuld %f24,invpio2,%f30
301 299 ld [%fp+nk2],%l6
302 300
303 301 ld [%fp+nk3],%l7
304 302 sll %l4,5,%l4 ! k
305 303 fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0
306 304
307 305 sll %l5,5,%l5
308 306 ldd [%l4+%g1],%f4
309 307 fcmpd %fcc1,%f8,pio2_3
310 308
311 309 sll %l6,5,%l6
312 310 ldd [%l5+%g1],%f12
313 311 fcmpd %fcc2,%f16,pio2_3
314 312
315 313 sll %l7,5,%l7
316 314 ldd [%l6+%g1],%f20
317 315 fcmpd %fcc3,%f24,pio2_3
318 316
319 317 ldd [%l7+%g1],%f28
320 318 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
321 319
322 320 fsubd %f10,%f12,%f10
323 321
324 322 fsubd %f18,%f20,%f18
325 323
326 324 fsubd %f26,%f28,%f26
327 325
328 326 fmuld %f2,%f2,%f0 ! z = x * x
329 327
330 328 fmuld %f10,%f10,%f8
331 329
332 330 fmuld %f18,%f18,%f16
333 331
334 332 fmuld %f26,%f26,%f24
335 333
336 334 fmuld %f0,pp3,%f6
337 335
338 336 fmuld %f8,pp3,%f14
339 337
340 338 fmuld %f16,pp3,%f22
341 339
342 340 fmuld %f24,pp3,%f30
343 341
344 342 faddd %f6,pp2,%f6
345 343 fmuld %f0,qq2,%f4
346 344
347 345 faddd %f14,pp2,%f14
348 346 fmuld %f8,qq2,%f12
349 347
350 348 faddd %f22,pp2,%f22
351 349 fmuld %f16,qq2,%f20
352 350
353 351 faddd %f30,pp2,%f30
354 352 fmuld %f24,qq2,%f28
355 353
356 354 fmuld %f0,%f6,%f6
357 355 faddd %f4,qq1,%f4
358 356
359 357 fmuld %f8,%f14,%f14
360 358 faddd %f12,qq1,%f12
361 359
362 360 fmuld %f16,%f22,%f22
363 361 faddd %f20,qq1,%f20
364 362
365 363 fmuld %f24,%f30,%f30
366 364 faddd %f28,qq1,%f28
367 365
368 366 faddd %f6,pp1,%f6
369 367 fmuld %f0,%f4,%f4
370 368 add %l4,%g1,%l4
371 369
372 370 faddd %f14,pp1,%f14
373 371 fmuld %f8,%f12,%f12
374 372 add %l5,%g1,%l5
375 373
376 374 faddd %f22,pp1,%f22
377 375 fmuld %f16,%f20,%f20
378 376 add %l6,%g1,%l6
379 377
380 378 faddd %f30,pp1,%f30
381 379 fmuld %f24,%f28,%f28
382 380 add %l7,%g1,%l7
383 381
384 382 fmuld %f0,%f6,%f6
385 383 ldd [%l4+8],%f0
386 384
387 385 fmuld %f8,%f14,%f14
388 386 ldd [%l5+8],%f8
389 387
390 388 fmuld %f16,%f22,%f22
391 389 ldd [%l6+8],%f16
392 390
393 391 fmuld %f24,%f30,%f30
394 392 ldd [%l7+8],%f24
395 393
396 394 fmuld %f2,%f6,%f6
397 395
398 396 fmuld %f10,%f14,%f14
399 397
400 398 fmuld %f18,%f22,%f22
401 399
402 400 fmuld %f26,%f30,%f30
403 401
404 402 faddd %f6,%f2,%f6
405 403 fmuld %f0,%f4,%f4
406 404 ldd [%l4+16],%f2
407 405
408 406 faddd %f14,%f10,%f14
409 407 fmuld %f8,%f12,%f12
410 408 ldd [%l5+16],%f10
411 409
412 410 faddd %f22,%f18,%f22
413 411 fmuld %f16,%f20,%f20
414 412 ldd [%l6+16],%f18
415 413
416 414 faddd %f30,%f26,%f30
417 415 fmuld %f24,%f28,%f28
418 416 ldd [%l7+16],%f26
419 417
420 418 fmuld %f2,%f6,%f6
421 419
422 420 fmuld %f10,%f14,%f14
423 421
424 422 fmuld %f18,%f22,%f22
425 423
426 424 fmuld %f26,%f30,%f30
427 425
428 426 faddd %f6,%f4,%f6
429 427
430 428 faddd %f14,%f12,%f14
431 429
432 430 faddd %f22,%f20,%f22
433 431
434 432 faddd %f30,%f28,%f30
435 433
436 434 faddd %f6,%f0,%f6
437 435
438 436 faddd %f14,%f8,%f14
439 437
440 438 faddd %f22,%f16,%f22
441 439
442 440 faddd %f30,%f24,%f30
443 441
444 442 fnegd %f6,%f4
445 443 lda [%i1]%asi,%l0 ! preload next argument
446 444
447 445 fnegd %f14,%f12
448 446 lda [%i1]%asi,%f0
449 447
450 448 fnegd %f22,%f20
451 449 lda [%i1+4]%asi,%f3
452 450
453 451 fnegd %f30,%f28
454 452 andn %l0,%i5,%l0
455 453 add %i1,%i2,%i1
456 454
457 455 fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s
458 456 st %f6,[%o0]
459 457
460 458 fmovdl %fcc1,%f12,%f14
461 459 st %f14,[%o1]
462 460
463 461 fmovdl %fcc2,%f20,%f22
464 462 st %f22,[%o2]
465 463
466 464 fmovdl %fcc3,%f28,%f30
467 465 st %f30,[%o3]
468 466 addcc %i0,-1,%i0
469 467
470 468 bg,pt %icc,.loop0
471 469 ! delay slot
472 470 st %f7,[%o0+4]
473 471
474 472 ba,pt %icc,.end
475 473 ! delay slot
476 474 nop
477 475
478 476
479 477 .align 16
480 478 .medium:
481 479 faddd %f6,c3two51,%f4
482 480 st %f5,[%fp+nk0]
483 481
484 482 faddd %f14,c3two51,%f12
485 483 st %f13,[%fp+nk1]
486 484
487 485 faddd %f22,c3two51,%f20
488 486 st %f21,[%fp+nk2]
489 487
490 488 faddd %f30,c3two51,%f28
491 489 st %f29,[%fp+nk3]
492 490
493 491 fsubd %f4,c3two51,%f6
494 492
495 493 fsubd %f12,c3two51,%f14
496 494
497 495 fsubd %f20,c3two51,%f22
498 496
499 497 fsubd %f28,c3two51,%f30
500 498
501 499 fmuld %f6,pio2_1,%f2
502 500 ld [%fp+nk0],%l0 ! n
503 501
504 502 fmuld %f14,pio2_1,%f10
505 503 ld [%fp+nk1],%l1
506 504
507 505 fmuld %f22,pio2_1,%f18
508 506 ld [%fp+nk2],%l2
509 507
510 508 fmuld %f30,pio2_1,%f26
511 509 ld [%fp+nk3],%l3
512 510
513 511 fsubd %f0,%f2,%f0
514 512 fmuld %f6,pio2_2,%f4
515 513
516 514 fsubd %f8,%f10,%f8
517 515 fmuld %f14,pio2_2,%f12
518 516
519 517 fsubd %f16,%f18,%f16
520 518 fmuld %f22,pio2_2,%f20
521 519
522 520 fsubd %f24,%f26,%f24
523 521 fmuld %f30,pio2_2,%f28
524 522
525 523 fsubd %f0,%f4,%f32
526 524
527 525 fsubd %f8,%f12,%f34
528 526
529 527 fsubd %f16,%f20,%f36
530 528
531 529 fsubd %f24,%f28,%f38
532 530
533 531 fsubd %f0,%f32,%f0
534 532 fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0
535 533
536 534 fsubd %f8,%f34,%f8
537 535 fcmple32 %f34,pio2_3,%l5
538 536
539 537 fsubd %f16,%f36,%f16
540 538 fcmple32 %f36,pio2_3,%l6
541 539
542 540 fsubd %f24,%f38,%f24
543 541 fcmple32 %f38,pio2_3,%l7
544 542
545 543 fsubd %f0,%f4,%f0
546 544 fmuld %f6,pio2_3,%f6
547 545 sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2
548 546
549 547 fsubd %f8,%f12,%f8
550 548 fmuld %f14,pio2_3,%f14
551 549 sll %l5,30,%l5
552 550
553 551 fsubd %f16,%f20,%f16
554 552 fmuld %f22,pio2_3,%f22
555 553 sll %l6,30,%l6
556 554
557 555 fsubd %f24,%f28,%f24
558 556 fmuld %f30,pio2_3,%f30
559 557 sll %l7,30,%l7
560 558
561 559 fsubd %f6,%f0,%f6
562 560 sra %l4,31,%l4
563 561
564 562 fsubd %f14,%f8,%f14
565 563 sra %l5,31,%l5
566 564
567 565 fsubd %f22,%f16,%f22
568 566 sra %l6,31,%l6
569 567
570 568 fsubd %f30,%f24,%f30
571 569 sra %l7,31,%l7
572 570
573 571 fsubd %f32,%f6,%f0 ! reduced x
574 572 xor %l0,%l4,%l0
575 573
576 574 fsubd %f34,%f14,%f8
577 575 xor %l1,%l5,%l1
578 576
579 577 fsubd %f36,%f22,%f16
580 578 xor %l2,%l6,%l2
581 579
582 580 fsubd %f38,%f30,%f24
583 581 xor %l3,%l7,%l3
584 582
585 583 fabsd %f0,%f2
586 584 sub %l0,%l4,%l0
587 585
588 586 fabsd %f8,%f10
589 587 sub %l1,%l5,%l1
590 588
591 589 fabsd %f16,%f18
592 590 sub %l2,%l6,%l2
593 591
594 592 fabsd %f24,%f26
595 593 sub %l3,%l7,%l3
596 594
597 595 faddd %f2,c3two44,%f4
598 596 st %f5,[%fp+nk0]
599 597 and %l4,2,%l4
600 598
601 599 faddd %f10,c3two44,%f12
602 600 st %f13,[%fp+nk1]
603 601 and %l5,2,%l5
604 602
605 603 faddd %f18,c3two44,%f20
606 604 st %f21,[%fp+nk2]
607 605 and %l6,2,%l6
608 606
609 607 faddd %f26,c3two44,%f28
610 608 st %f29,[%fp+nk3]
611 609 and %l7,2,%l7
612 610
613 611 fsubd %f32,%f0,%f4
614 612 xor %l0,%l4,%l0
615 613
616 614 fsubd %f34,%f8,%f12
617 615 xor %l1,%l5,%l1
618 616
619 617 fsubd %f36,%f16,%f20
620 618 xor %l2,%l6,%l2
621 619
622 620 fsubd %f38,%f24,%f28
623 621 xor %l3,%l7,%l3
624 622
625 623 fzero %f38
626 624 ld [%fp+nk0],%l4
627 625
628 626 fsubd %f4,%f6,%f6 ! w
629 627 ld [%fp+nk1],%l5
630 628
631 629 fsubd %f12,%f14,%f14
632 630 ld [%fp+nk2],%l6
633 631
634 632 fnegd %f38,%f38
635 633 ld [%fp+nk3],%l7
636 634 sll %l4,5,%l4 ! k
637 635
638 636 fsubd %f20,%f22,%f22
639 637 sll %l5,5,%l5
640 638
641 639 fsubd %f28,%f30,%f30
642 640 sll %l6,5,%l6
643 641
644 642 fand %f0,%f38,%f32 ! sign bit of x
645 643 ldd [%l4+%g1],%f4
646 644 sll %l7,5,%l7
647 645
648 646 fand %f8,%f38,%f34
649 647 ldd [%l5+%g1],%f12
650 648
651 649 fand %f16,%f38,%f36
652 650 ldd [%l6+%g1],%f20
653 651
654 652 fand %f24,%f38,%f38
655 653 ldd [%l7+%g1],%f28
656 654
657 655 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
658 656
659 657 fsubd %f10,%f12,%f10
660 658
661 659 fsubd %f18,%f20,%f18
662 660 nop
663 661
664 662 fsubd %f26,%f28,%f26
665 663 nop
666 664
667 665 ! 16-byte aligned
668 666 fmuld %f2,%f2,%f0 ! z = x * x
669 667 andcc %l0,1,%g0
670 668 bz,pn %icc,.case8
671 669 ! delay slot
672 670 fxor %f6,%f32,%f32
673 671
674 672 fmuld %f10,%f10,%f8
675 673 andcc %l1,1,%g0
676 674 bz,pn %icc,.case4
677 675 ! delay slot
678 676 fxor %f14,%f34,%f34
679 677
680 678 fmuld %f18,%f18,%f16
681 679 andcc %l2,1,%g0
682 680 bz,pn %icc,.case2
683 681 ! delay slot
684 682 fxor %f22,%f36,%f36
685 683
686 684 fmuld %f26,%f26,%f24
687 685 andcc %l3,1,%g0
688 686 bz,pn %icc,.case1
689 687 ! delay slot
690 688 fxor %f30,%f38,%f38
691 689
692 690 !.case0:
693 691 fmuld %f0,qq3,%f6 ! cos(x0)
694 692
695 693 fmuld %f8,qq3,%f14 ! cos(x1)
696 694
697 695 fmuld %f16,qq3,%f22 ! cos(x2)
698 696
699 697 fmuld %f24,qq3,%f30 ! cos(x3)
700 698
701 699 faddd %f6,qq2,%f6
702 700 fmuld %f0,pp2,%f4
703 701
704 702 faddd %f14,qq2,%f14
705 703 fmuld %f8,pp2,%f12
706 704
707 705 faddd %f22,qq2,%f22
708 706 fmuld %f16,pp2,%f20
709 707
710 708 faddd %f30,qq2,%f30
711 709 fmuld %f24,pp2,%f28
712 710
713 711 fmuld %f0,%f6,%f6
714 712 faddd %f4,pp1,%f4
715 713
716 714 fmuld %f8,%f14,%f14
717 715 faddd %f12,pp1,%f12
718 716
719 717 fmuld %f16,%f22,%f22
720 718 faddd %f20,pp1,%f20
721 719
722 720 fmuld %f24,%f30,%f30
723 721 faddd %f28,pp1,%f28
724 722
725 723 faddd %f6,qq1,%f6
726 724 fmuld %f0,%f4,%f4
727 725 add %l4,%g1,%l4
728 726
729 727 faddd %f14,qq1,%f14
730 728 fmuld %f8,%f12,%f12
731 729 add %l5,%g1,%l5
732 730
733 731 faddd %f22,qq1,%f22
734 732 fmuld %f16,%f20,%f20
735 733 add %l6,%g1,%l6
736 734
737 735 faddd %f30,qq1,%f30
738 736 fmuld %f24,%f28,%f28
739 737 add %l7,%g1,%l7
740 738
741 739 fmuld %f2,%f4,%f4
742 740
743 741 fmuld %f10,%f12,%f12
744 742
745 743 fmuld %f18,%f20,%f20
746 744
747 745 fmuld %f26,%f28,%f28
748 746
749 747 fmuld %f0,%f6,%f6
750 748 faddd %f4,%f32,%f4
751 749 ldd [%l4+16],%f0
752 750
753 751 fmuld %f8,%f14,%f14
754 752 faddd %f12,%f34,%f12
755 753 ldd [%l5+16],%f8
756 754
757 755 fmuld %f16,%f22,%f22
758 756 faddd %f20,%f36,%f20
759 757 ldd [%l6+16],%f16
760 758
761 759 fmuld %f24,%f30,%f30
762 760 faddd %f28,%f38,%f28
763 761 ldd [%l7+16],%f24
764 762
765 763 fmuld %f0,%f6,%f6
766 764 faddd %f4,%f2,%f4
767 765 ldd [%l4+8],%f32
768 766
769 767 fmuld %f8,%f14,%f14
770 768 faddd %f12,%f10,%f12
771 769 ldd [%l5+8],%f34
772 770
773 771 fmuld %f16,%f22,%f22
774 772 faddd %f20,%f18,%f20
775 773 ldd [%l6+8],%f36
776 774
777 775 fmuld %f24,%f30,%f30
778 776 faddd %f28,%f26,%f28
779 777 ldd [%l7+8],%f38
780 778
781 779 fmuld %f32,%f4,%f4
782 780
783 781 fmuld %f34,%f12,%f12
784 782
785 783 fmuld %f36,%f20,%f20
786 784
787 785 fmuld %f38,%f28,%f28
788 786
789 787 fsubd %f6,%f4,%f6
790 788
791 789 fsubd %f14,%f12,%f14
792 790
793 791 fsubd %f22,%f20,%f22
794 792
795 793 fsubd %f30,%f28,%f30
796 794
797 795 faddd %f6,%f0,%f6
798 796
799 797 faddd %f14,%f8,%f14
800 798
801 799 faddd %f22,%f16,%f22
802 800
803 801 faddd %f30,%f24,%f30
804 802 mov %l0,%l4
805 803
806 804 fnegd %f6,%f4
807 805 lda [%i1]%asi,%l0 ! preload next argument
808 806
809 807 fnegd %f14,%f12
810 808 lda [%i1]%asi,%f0
811 809
812 810 fnegd %f22,%f20
813 811 lda [%i1+4]%asi,%f3
814 812
815 813 fnegd %f30,%f28
816 814 andn %l0,%i5,%l0
817 815 add %i1,%i2,%i1
818 816
819 817 andcc %l4,2,%g0
820 818 fmovdnz %icc,%f4,%f6
821 819 st %f6,[%o0]
822 820
823 821 andcc %l1,2,%g0
824 822 fmovdnz %icc,%f12,%f14
825 823 st %f14,[%o1]
826 824
827 825 andcc %l2,2,%g0
828 826 fmovdnz %icc,%f20,%f22
829 827 st %f22,[%o2]
830 828
831 829 andcc %l3,2,%g0
832 830 fmovdnz %icc,%f28,%f30
833 831 st %f30,[%o3]
834 832
835 833 addcc %i0,-1,%i0
836 834 bg,pt %icc,.loop0
837 835 ! delay slot
838 836 st %f7,[%o0+4]
839 837
840 838 ba,pt %icc,.end
841 839 ! delay slot
842 840 nop
843 841
844 842 .align 16
845 843 .case1:
846 844 fmuld %f24,pp3,%f30 ! sin(x3)
847 845
848 846 fmuld %f0,qq3,%f6 ! cos(x0)
849 847
850 848 fmuld %f8,qq3,%f14 ! cos(x1)
851 849
852 850 fmuld %f16,qq3,%f22 ! cos(x2)
853 851
854 852 faddd %f30,pp2,%f30
855 853 fmuld %f24,qq2,%f28
856 854
857 855 faddd %f6,qq2,%f6
858 856 fmuld %f0,pp2,%f4
859 857
860 858 faddd %f14,qq2,%f14
861 859 fmuld %f8,pp2,%f12
862 860
863 861 faddd %f22,qq2,%f22
864 862 fmuld %f16,pp2,%f20
865 863
866 864 fmuld %f24,%f30,%f30
867 865 faddd %f28,qq1,%f28
868 866
869 867 fmuld %f0,%f6,%f6
870 868 faddd %f4,pp1,%f4
871 869
872 870 fmuld %f8,%f14,%f14
873 871 faddd %f12,pp1,%f12
874 872
875 873 fmuld %f16,%f22,%f22
876 874 faddd %f20,pp1,%f20
877 875
878 876 faddd %f30,pp1,%f30
879 877 fmuld %f24,%f28,%f28
880 878 add %l7,%g1,%l7
881 879
882 880 faddd %f6,qq1,%f6
883 881 fmuld %f0,%f4,%f4
884 882 add %l4,%g1,%l4
885 883
886 884 faddd %f14,qq1,%f14
887 885 fmuld %f8,%f12,%f12
888 886 add %l5,%g1,%l5
889 887
890 888 faddd %f22,qq1,%f22
891 889 fmuld %f16,%f20,%f20
892 890 add %l6,%g1,%l6
893 891
894 892 fmuld %f24,%f30,%f30
895 893
896 894 fmuld %f2,%f4,%f4
897 895
898 896 fmuld %f10,%f12,%f12
899 897
900 898 fmuld %f18,%f20,%f20
901 899
902 900 fmuld %f26,%f30,%f30
903 901 ldd [%l7+8],%f24
904 902
905 903 fmuld %f0,%f6,%f6
906 904 faddd %f4,%f32,%f4
907 905 ldd [%l4+16],%f0
908 906
909 907 fmuld %f8,%f14,%f14
910 908 faddd %f12,%f34,%f12
911 909 ldd [%l5+16],%f8
912 910
913 911 fmuld %f16,%f22,%f22
914 912 faddd %f20,%f36,%f20
915 913 ldd [%l6+16],%f16
916 914
917 915 fmuld %f24,%f28,%f28
918 916 faddd %f38,%f30,%f30
919 917
920 918 fmuld %f0,%f6,%f6
921 919 faddd %f4,%f2,%f4
922 920 ldd [%l4+8],%f32
923 921
924 922 fmuld %f8,%f14,%f14
925 923 faddd %f12,%f10,%f12
926 924 ldd [%l5+8],%f34
927 925
928 926 fmuld %f16,%f22,%f22
929 927 faddd %f20,%f18,%f20
930 928 ldd [%l6+8],%f36
931 929
932 930 faddd %f26,%f30,%f30
933 931 ldd [%l7+16],%f38
934 932
935 933 fmuld %f32,%f4,%f4
936 934
937 935 fmuld %f34,%f12,%f12
938 936
939 937 fmuld %f36,%f20,%f20
940 938
941 939 fmuld %f38,%f30,%f30
942 940
943 941 fsubd %f6,%f4,%f6
944 942
945 943 fsubd %f14,%f12,%f14
946 944
947 945 fsubd %f22,%f20,%f22
948 946
949 947 faddd %f30,%f28,%f30
950 948
951 949 faddd %f6,%f0,%f6
952 950
953 951 faddd %f14,%f8,%f14
954 952
955 953 faddd %f22,%f16,%f22
956 954
957 955 faddd %f30,%f24,%f30
958 956 mov %l0,%l4
959 957
960 958 fnegd %f6,%f4
961 959 lda [%i1]%asi,%l0 ! preload next argument
962 960
963 961 fnegd %f14,%f12
964 962 lda [%i1]%asi,%f0
965 963
966 964 fnegd %f22,%f20
967 965 lda [%i1+4]%asi,%f3
968 966
969 967 fnegd %f30,%f28
970 968 andn %l0,%i5,%l0
971 969 add %i1,%i2,%i1
972 970
973 971 andcc %l4,2,%g0
974 972 fmovdnz %icc,%f4,%f6
975 973 st %f6,[%o0]
976 974
977 975 andcc %l1,2,%g0
978 976 fmovdnz %icc,%f12,%f14
979 977 st %f14,[%o1]
980 978
981 979 andcc %l2,2,%g0
982 980 fmovdnz %icc,%f20,%f22
983 981 st %f22,[%o2]
984 982
985 983 andcc %l3,2,%g0
986 984 fmovdnz %icc,%f28,%f30
987 985 st %f30,[%o3]
988 986
989 987 addcc %i0,-1,%i0
990 988 bg,pt %icc,.loop0
991 989 ! delay slot
992 990 st %f7,[%o0+4]
993 991
994 992 ba,pt %icc,.end
995 993 ! delay slot
996 994 nop
997 995
998 996 .align 16
999 997 .case2:
1000 998 fmuld %f26,%f26,%f24
1001 999 andcc %l3,1,%g0
1002 1000 bz,pn %icc,.case3
1003 1001 ! delay slot
1004 1002 fxor %f30,%f38,%f38
1005 1003
1006 1004 fmuld %f16,pp3,%f22 ! sin(x2)
1007 1005
1008 1006 fmuld %f0,qq3,%f6 ! cos(x0)
1009 1007
1010 1008 fmuld %f8,qq3,%f14 ! cos(x1)
1011 1009
1012 1010 faddd %f22,pp2,%f22
1013 1011 fmuld %f16,qq2,%f20
1014 1012
1015 1013 fmuld %f24,qq3,%f30 ! cos(x3)
1016 1014
1017 1015 faddd %f6,qq2,%f6
1018 1016 fmuld %f0,pp2,%f4
1019 1017
1020 1018 faddd %f14,qq2,%f14
1021 1019 fmuld %f8,pp2,%f12
1022 1020
1023 1021 fmuld %f16,%f22,%f22
1024 1022 faddd %f20,qq1,%f20
1025 1023
1026 1024 faddd %f30,qq2,%f30
1027 1025 fmuld %f24,pp2,%f28
1028 1026
1029 1027 fmuld %f0,%f6,%f6
1030 1028 faddd %f4,pp1,%f4
1031 1029
1032 1030 fmuld %f8,%f14,%f14
1033 1031 faddd %f12,pp1,%f12
1034 1032
1035 1033 faddd %f22,pp1,%f22
1036 1034 fmuld %f16,%f20,%f20
1037 1035 add %l6,%g1,%l6
1038 1036
1039 1037 fmuld %f24,%f30,%f30
1040 1038 faddd %f28,pp1,%f28
1041 1039
1042 1040 faddd %f6,qq1,%f6
1043 1041 fmuld %f0,%f4,%f4
1044 1042 add %l4,%g1,%l4
1045 1043
1046 1044 faddd %f14,qq1,%f14
1047 1045 fmuld %f8,%f12,%f12
1048 1046 add %l5,%g1,%l5
1049 1047
1050 1048 fmuld %f16,%f22,%f22
1051 1049
1052 1050 faddd %f30,qq1,%f30
1053 1051 fmuld %f24,%f28,%f28
1054 1052 add %l7,%g1,%l7
1055 1053
1056 1054 fmuld %f2,%f4,%f4
1057 1055
1058 1056 fmuld %f10,%f12,%f12
1059 1057
1060 1058 fmuld %f18,%f22,%f22
1061 1059 ldd [%l6+8],%f16
1062 1060
1063 1061 fmuld %f26,%f28,%f28
1064 1062
1065 1063 fmuld %f0,%f6,%f6
1066 1064 faddd %f4,%f32,%f4
1067 1065 ldd [%l4+16],%f0
1068 1066
1069 1067 fmuld %f8,%f14,%f14
1070 1068 faddd %f12,%f34,%f12
1071 1069 ldd [%l5+16],%f8
1072 1070
1073 1071 fmuld %f16,%f20,%f20
1074 1072 faddd %f36,%f22,%f22
1075 1073
1076 1074 fmuld %f24,%f30,%f30
1077 1075 faddd %f28,%f38,%f28
1078 1076 ldd [%l7+16],%f24
1079 1077
1080 1078 fmuld %f0,%f6,%f6
1081 1079 faddd %f4,%f2,%f4
1082 1080 ldd [%l4+8],%f32
1083 1081
1084 1082 fmuld %f8,%f14,%f14
1085 1083 faddd %f12,%f10,%f12
1086 1084 ldd [%l5+8],%f34
1087 1085
1088 1086 faddd %f18,%f22,%f22
1089 1087 ldd [%l6+16],%f36
1090 1088
1091 1089 fmuld %f24,%f30,%f30
1092 1090 faddd %f28,%f26,%f28
1093 1091 ldd [%l7+8],%f38
1094 1092
1095 1093 fmuld %f32,%f4,%f4
1096 1094
1097 1095 fmuld %f34,%f12,%f12
1098 1096
1099 1097 fmuld %f36,%f22,%f22
1100 1098
1101 1099 fmuld %f38,%f28,%f28
1102 1100
1103 1101 fsubd %f6,%f4,%f6
1104 1102
1105 1103 fsubd %f14,%f12,%f14
1106 1104
1107 1105 faddd %f22,%f20,%f22
1108 1106
1109 1107 fsubd %f30,%f28,%f30
1110 1108
1111 1109 faddd %f6,%f0,%f6
1112 1110
1113 1111 faddd %f14,%f8,%f14
1114 1112
1115 1113 faddd %f22,%f16,%f22
1116 1114
1117 1115 faddd %f30,%f24,%f30
1118 1116 mov %l0,%l4
1119 1117
1120 1118 fnegd %f6,%f4
1121 1119 lda [%i1]%asi,%l0 ! preload next argument
1122 1120
1123 1121 fnegd %f14,%f12
1124 1122 lda [%i1]%asi,%f0
1125 1123
1126 1124 fnegd %f22,%f20
1127 1125 lda [%i1+4]%asi,%f3
1128 1126
1129 1127 fnegd %f30,%f28
1130 1128 andn %l0,%i5,%l0
1131 1129 add %i1,%i2,%i1
1132 1130
1133 1131 andcc %l4,2,%g0
1134 1132 fmovdnz %icc,%f4,%f6
1135 1133 st %f6,[%o0]
1136 1134
1137 1135 andcc %l1,2,%g0
1138 1136 fmovdnz %icc,%f12,%f14
1139 1137 st %f14,[%o1]
1140 1138
1141 1139 andcc %l2,2,%g0
1142 1140 fmovdnz %icc,%f20,%f22
1143 1141 st %f22,[%o2]
1144 1142
1145 1143 andcc %l3,2,%g0
1146 1144 fmovdnz %icc,%f28,%f30
1147 1145 st %f30,[%o3]
1148 1146
1149 1147 addcc %i0,-1,%i0
1150 1148 bg,pt %icc,.loop0
1151 1149 ! delay slot
1152 1150 st %f7,[%o0+4]
1153 1151
1154 1152 ba,pt %icc,.end
1155 1153 ! delay slot
1156 1154 nop
1157 1155
1158 1156 .align 16
1159 1157 .case3:
1160 1158 fmuld %f16,pp3,%f22 ! sin(x2)
1161 1159
1162 1160 fmuld %f24,pp3,%f30 ! sin(x3)
1163 1161
1164 1162 fmuld %f0,qq3,%f6 ! cos(x0)
1165 1163
1166 1164 fmuld %f8,qq3,%f14 ! cos(x1)
1167 1165
1168 1166 faddd %f22,pp2,%f22
1169 1167 fmuld %f16,qq2,%f20
1170 1168
1171 1169 faddd %f30,pp2,%f30
1172 1170 fmuld %f24,qq2,%f28
1173 1171
1174 1172 faddd %f6,qq2,%f6
1175 1173 fmuld %f0,pp2,%f4
1176 1174
1177 1175 faddd %f14,qq2,%f14
1178 1176 fmuld %f8,pp2,%f12
1179 1177
1180 1178 fmuld %f16,%f22,%f22
1181 1179 faddd %f20,qq1,%f20
1182 1180
1183 1181 fmuld %f24,%f30,%f30
1184 1182 faddd %f28,qq1,%f28
1185 1183
1186 1184 fmuld %f0,%f6,%f6
1187 1185 faddd %f4,pp1,%f4
1188 1186
1189 1187 fmuld %f8,%f14,%f14
1190 1188 faddd %f12,pp1,%f12
1191 1189
1192 1190 faddd %f22,pp1,%f22
1193 1191 fmuld %f16,%f20,%f20
1194 1192 add %l6,%g1,%l6
1195 1193
1196 1194 faddd %f30,pp1,%f30
1197 1195 fmuld %f24,%f28,%f28
1198 1196 add %l7,%g1,%l7
1199 1197
1200 1198 faddd %f6,qq1,%f6
1201 1199 fmuld %f0,%f4,%f4
1202 1200 add %l4,%g1,%l4
1203 1201
1204 1202 faddd %f14,qq1,%f14
1205 1203 fmuld %f8,%f12,%f12
1206 1204 add %l5,%g1,%l5
1207 1205
1208 1206 fmuld %f16,%f22,%f22
1209 1207
1210 1208 fmuld %f24,%f30,%f30
1211 1209
1212 1210 fmuld %f2,%f4,%f4
1213 1211
1214 1212 fmuld %f10,%f12,%f12
1215 1213
1216 1214 fmuld %f18,%f22,%f22
1217 1215 ldd [%l6+8],%f16
1218 1216
1219 1217 fmuld %f26,%f30,%f30
1220 1218 ldd [%l7+8],%f24
1221 1219
1222 1220 fmuld %f0,%f6,%f6
1223 1221 faddd %f4,%f32,%f4
1224 1222 ldd [%l4+16],%f0
1225 1223
1226 1224 fmuld %f8,%f14,%f14
1227 1225 faddd %f12,%f34,%f12
1228 1226 ldd [%l5+16],%f8
1229 1227
1230 1228 fmuld %f16,%f20,%f20
1231 1229 faddd %f36,%f22,%f22
1232 1230
1233 1231 fmuld %f24,%f28,%f28
1234 1232 faddd %f38,%f30,%f30
1235 1233
1236 1234 fmuld %f0,%f6,%f6
1237 1235 faddd %f4,%f2,%f4
1238 1236 ldd [%l4+8],%f32
1239 1237
1240 1238 fmuld %f8,%f14,%f14
1241 1239 faddd %f12,%f10,%f12
1242 1240 ldd [%l5+8],%f34
1243 1241
1244 1242 faddd %f18,%f22,%f22
1245 1243 ldd [%l6+16],%f36
1246 1244
1247 1245 faddd %f26,%f30,%f30
1248 1246 ldd [%l7+16],%f38
1249 1247
1250 1248 fmuld %f32,%f4,%f4
1251 1249
1252 1250 fmuld %f34,%f12,%f12
1253 1251
1254 1252 fmuld %f36,%f22,%f22
1255 1253
1256 1254 fmuld %f38,%f30,%f30
1257 1255
1258 1256 fsubd %f6,%f4,%f6
1259 1257
1260 1258 fsubd %f14,%f12,%f14
1261 1259
1262 1260 faddd %f22,%f20,%f22
1263 1261
1264 1262 faddd %f30,%f28,%f30
1265 1263
1266 1264 faddd %f6,%f0,%f6
1267 1265
1268 1266 faddd %f14,%f8,%f14
1269 1267
1270 1268 faddd %f22,%f16,%f22
1271 1269
1272 1270 faddd %f30,%f24,%f30
1273 1271 mov %l0,%l4
1274 1272
1275 1273 fnegd %f6,%f4
1276 1274 lda [%i1]%asi,%l0 ! preload next argument
1277 1275
1278 1276 fnegd %f14,%f12
1279 1277 lda [%i1]%asi,%f0
1280 1278
1281 1279 fnegd %f22,%f20
1282 1280 lda [%i1+4]%asi,%f3
1283 1281
1284 1282 fnegd %f30,%f28
1285 1283 andn %l0,%i5,%l0
1286 1284 add %i1,%i2,%i1
1287 1285
1288 1286 andcc %l4,2,%g0
1289 1287 fmovdnz %icc,%f4,%f6
1290 1288 st %f6,[%o0]
1291 1289
1292 1290 andcc %l1,2,%g0
1293 1291 fmovdnz %icc,%f12,%f14
1294 1292 st %f14,[%o1]
1295 1293
1296 1294 andcc %l2,2,%g0
1297 1295 fmovdnz %icc,%f20,%f22
1298 1296 st %f22,[%o2]
1299 1297
1300 1298 andcc %l3,2,%g0
1301 1299 fmovdnz %icc,%f28,%f30
1302 1300 st %f30,[%o3]
1303 1301
1304 1302 addcc %i0,-1,%i0
1305 1303 bg,pt %icc,.loop0
1306 1304 ! delay slot
1307 1305 st %f7,[%o0+4]
1308 1306
1309 1307 ba,pt %icc,.end
1310 1308 ! delay slot
1311 1309 nop
1312 1310
1313 1311 .align 16
1314 1312 .case4:
1315 1313 fmuld %f18,%f18,%f16
1316 1314 andcc %l2,1,%g0
1317 1315 bz,pn %icc,.case6
1318 1316 ! delay slot
1319 1317 fxor %f22,%f36,%f36
1320 1318
1321 1319 fmuld %f26,%f26,%f24
1322 1320 andcc %l3,1,%g0
1323 1321 bz,pn %icc,.case5
1324 1322 ! delay slot
1325 1323 fxor %f30,%f38,%f38
1326 1324
1327 1325 fmuld %f8,pp3,%f14 ! sin(x1)
1328 1326
1329 1327 fmuld %f0,qq3,%f6 ! cos(x0)
1330 1328
1331 1329 faddd %f14,pp2,%f14
1332 1330 fmuld %f8,qq2,%f12
1333 1331
1334 1332 fmuld %f16,qq3,%f22 ! cos(x2)
1335 1333
1336 1334 fmuld %f24,qq3,%f30 ! cos(x3)
1337 1335
1338 1336 faddd %f6,qq2,%f6
1339 1337 fmuld %f0,pp2,%f4
1340 1338
1341 1339 fmuld %f8,%f14,%f14
1342 1340 faddd %f12,qq1,%f12
1343 1341
1344 1342 faddd %f22,qq2,%f22
1345 1343 fmuld %f16,pp2,%f20
1346 1344
1347 1345 faddd %f30,qq2,%f30
1348 1346 fmuld %f24,pp2,%f28
1349 1347
1350 1348 fmuld %f0,%f6,%f6
1351 1349 faddd %f4,pp1,%f4
1352 1350
1353 1351 faddd %f14,pp1,%f14
1354 1352 fmuld %f8,%f12,%f12
1355 1353 add %l5,%g1,%l5
1356 1354
1357 1355 fmuld %f16,%f22,%f22
1358 1356 faddd %f20,pp1,%f20
1359 1357
1360 1358 fmuld %f24,%f30,%f30
1361 1359 faddd %f28,pp1,%f28
1362 1360
1363 1361 faddd %f6,qq1,%f6
1364 1362 fmuld %f0,%f4,%f4
1365 1363 add %l4,%g1,%l4
1366 1364
1367 1365 fmuld %f8,%f14,%f14
1368 1366
1369 1367 faddd %f22,qq1,%f22
1370 1368 fmuld %f16,%f20,%f20
1371 1369 add %l6,%g1,%l6
1372 1370
1373 1371 faddd %f30,qq1,%f30
1374 1372 fmuld %f24,%f28,%f28
1375 1373 add %l7,%g1,%l7
1376 1374
1377 1375 fmuld %f2,%f4,%f4
1378 1376
1379 1377 fmuld %f10,%f14,%f14
1380 1378 ldd [%l5+8],%f8
1381 1379
1382 1380 fmuld %f18,%f20,%f20
1383 1381
1384 1382 fmuld %f26,%f28,%f28
1385 1383
1386 1384 fmuld %f0,%f6,%f6
1387 1385 faddd %f4,%f32,%f4
1388 1386 ldd [%l4+16],%f0
1389 1387
1390 1388 fmuld %f8,%f12,%f12
1391 1389 faddd %f34,%f14,%f14
1392 1390
1393 1391 fmuld %f16,%f22,%f22
1394 1392 faddd %f20,%f36,%f20
1395 1393 ldd [%l6+16],%f16
1396 1394
1397 1395 fmuld %f24,%f30,%f30
1398 1396 faddd %f28,%f38,%f28
1399 1397 ldd [%l7+16],%f24
1400 1398
1401 1399 fmuld %f0,%f6,%f6
1402 1400 faddd %f4,%f2,%f4
1403 1401 ldd [%l4+8],%f32
1404 1402
1405 1403 faddd %f10,%f14,%f14
1406 1404 ldd [%l5+16],%f34
1407 1405
1408 1406 fmuld %f16,%f22,%f22
1409 1407 faddd %f20,%f18,%f20
1410 1408 ldd [%l6+8],%f36
1411 1409
1412 1410 fmuld %f24,%f30,%f30
1413 1411 faddd %f28,%f26,%f28
1414 1412 ldd [%l7+8],%f38
1415 1413
1416 1414 fmuld %f32,%f4,%f4
1417 1415
1418 1416 fmuld %f34,%f14,%f14
1419 1417
1420 1418 fmuld %f36,%f20,%f20
1421 1419
1422 1420 fmuld %f38,%f28,%f28
1423 1421
1424 1422 fsubd %f6,%f4,%f6
1425 1423
1426 1424 faddd %f14,%f12,%f14
1427 1425
1428 1426 fsubd %f22,%f20,%f22
1429 1427
1430 1428 fsubd %f30,%f28,%f30
1431 1429
1432 1430 faddd %f6,%f0,%f6
1433 1431
1434 1432 faddd %f14,%f8,%f14
1435 1433
1436 1434 faddd %f22,%f16,%f22
1437 1435
1438 1436 faddd %f30,%f24,%f30
1439 1437 mov %l0,%l4
1440 1438
1441 1439 fnegd %f6,%f4
1442 1440 lda [%i1]%asi,%l0 ! preload next argument
1443 1441
1444 1442 fnegd %f14,%f12
1445 1443 lda [%i1]%asi,%f0
1446 1444
1447 1445 fnegd %f22,%f20
1448 1446 lda [%i1+4]%asi,%f3
1449 1447
1450 1448 fnegd %f30,%f28
1451 1449 andn %l0,%i5,%l0
1452 1450 add %i1,%i2,%i1
1453 1451
1454 1452 andcc %l4,2,%g0
1455 1453 fmovdnz %icc,%f4,%f6
1456 1454 st %f6,[%o0]
1457 1455
1458 1456 andcc %l1,2,%g0
1459 1457 fmovdnz %icc,%f12,%f14
1460 1458 st %f14,[%o1]
1461 1459
1462 1460 andcc %l2,2,%g0
1463 1461 fmovdnz %icc,%f20,%f22
1464 1462 st %f22,[%o2]
1465 1463
1466 1464 andcc %l3,2,%g0
1467 1465 fmovdnz %icc,%f28,%f30
1468 1466 st %f30,[%o3]
1469 1467
1470 1468 addcc %i0,-1,%i0
1471 1469 bg,pt %icc,.loop0
1472 1470 ! delay slot
1473 1471 st %f7,[%o0+4]
1474 1472
1475 1473 ba,pt %icc,.end
1476 1474 ! delay slot
1477 1475 nop
1478 1476
1479 1477 .align 16
1480 1478 .case5:
1481 1479 fmuld %f8,pp3,%f14 ! sin(x1)
1482 1480
1483 1481 fmuld %f24,pp3,%f30 ! sin(x3)
1484 1482
1485 1483 fmuld %f0,qq3,%f6 ! cos(x0)
1486 1484
1487 1485 faddd %f14,pp2,%f14
1488 1486 fmuld %f8,qq2,%f12
1489 1487
1490 1488 fmuld %f16,qq3,%f22 ! cos(x2)
1491 1489
1492 1490 faddd %f30,pp2,%f30
1493 1491 fmuld %f24,qq2,%f28
1494 1492
1495 1493 faddd %f6,qq2,%f6
1496 1494 fmuld %f0,pp2,%f4
1497 1495
1498 1496 fmuld %f8,%f14,%f14
1499 1497 faddd %f12,qq1,%f12
1500 1498
1501 1499 faddd %f22,qq2,%f22
1502 1500 fmuld %f16,pp2,%f20
1503 1501
1504 1502 fmuld %f24,%f30,%f30
1505 1503 faddd %f28,qq1,%f28
1506 1504
1507 1505 fmuld %f0,%f6,%f6
1508 1506 faddd %f4,pp1,%f4
1509 1507
1510 1508 faddd %f14,pp1,%f14
1511 1509 fmuld %f8,%f12,%f12
1512 1510 add %l5,%g1,%l5
1513 1511
1514 1512 fmuld %f16,%f22,%f22
1515 1513 faddd %f20,pp1,%f20
1516 1514
1517 1515 faddd %f30,pp1,%f30
1518 1516 fmuld %f24,%f28,%f28
1519 1517 add %l7,%g1,%l7
1520 1518
1521 1519 faddd %f6,qq1,%f6
1522 1520 fmuld %f0,%f4,%f4
1523 1521 add %l4,%g1,%l4
1524 1522
1525 1523 fmuld %f8,%f14,%f14
1526 1524
1527 1525 faddd %f22,qq1,%f22
1528 1526 fmuld %f16,%f20,%f20
1529 1527 add %l6,%g1,%l6
1530 1528
1531 1529 fmuld %f24,%f30,%f30
1532 1530
1533 1531 fmuld %f2,%f4,%f4
1534 1532
1535 1533 fmuld %f10,%f14,%f14
1536 1534 ldd [%l5+8],%f8
1537 1535
1538 1536 fmuld %f18,%f20,%f20
1539 1537
1540 1538 fmuld %f26,%f30,%f30
1541 1539 ldd [%l7+8],%f24
1542 1540
1543 1541 fmuld %f0,%f6,%f6
1544 1542 faddd %f4,%f32,%f4
1545 1543 ldd [%l4+16],%f0
1546 1544
1547 1545 fmuld %f8,%f12,%f12
1548 1546 faddd %f34,%f14,%f14
1549 1547
1550 1548 fmuld %f16,%f22,%f22
1551 1549 faddd %f20,%f36,%f20
1552 1550 ldd [%l6+16],%f16
1553 1551
1554 1552 fmuld %f24,%f28,%f28
1555 1553 faddd %f38,%f30,%f30
1556 1554
1557 1555 fmuld %f0,%f6,%f6
1558 1556 faddd %f4,%f2,%f4
1559 1557 ldd [%l4+8],%f32
1560 1558
1561 1559 faddd %f10,%f14,%f14
1562 1560 ldd [%l5+16],%f34
1563 1561
1564 1562 fmuld %f16,%f22,%f22
1565 1563 faddd %f20,%f18,%f20
1566 1564 ldd [%l6+8],%f36
1567 1565
1568 1566 faddd %f26,%f30,%f30
1569 1567 ldd [%l7+16],%f38
1570 1568
1571 1569 fmuld %f32,%f4,%f4
1572 1570
1573 1571 fmuld %f34,%f14,%f14
1574 1572
1575 1573 fmuld %f36,%f20,%f20
1576 1574
1577 1575 fmuld %f38,%f30,%f30
1578 1576
1579 1577 fsubd %f6,%f4,%f6
1580 1578
1581 1579 faddd %f14,%f12,%f14
1582 1580
1583 1581 fsubd %f22,%f20,%f22
1584 1582
1585 1583 faddd %f30,%f28,%f30
1586 1584
1587 1585 faddd %f6,%f0,%f6
1588 1586
1589 1587 faddd %f14,%f8,%f14
1590 1588
1591 1589 faddd %f22,%f16,%f22
1592 1590
1593 1591 faddd %f30,%f24,%f30
1594 1592 mov %l0,%l4
1595 1593
1596 1594 fnegd %f6,%f4
1597 1595 lda [%i1]%asi,%l0 ! preload next argument
1598 1596
1599 1597 fnegd %f14,%f12
1600 1598 lda [%i1]%asi,%f0
1601 1599
1602 1600 fnegd %f22,%f20
1603 1601 lda [%i1+4]%asi,%f3
1604 1602
1605 1603 fnegd %f30,%f28
1606 1604 andn %l0,%i5,%l0
1607 1605 add %i1,%i2,%i1
1608 1606
1609 1607 andcc %l4,2,%g0
1610 1608 fmovdnz %icc,%f4,%f6
1611 1609 st %f6,[%o0]
1612 1610
1613 1611 andcc %l1,2,%g0
1614 1612 fmovdnz %icc,%f12,%f14
1615 1613 st %f14,[%o1]
1616 1614
1617 1615 andcc %l2,2,%g0
1618 1616 fmovdnz %icc,%f20,%f22
1619 1617 st %f22,[%o2]
1620 1618
1621 1619 andcc %l3,2,%g0
1622 1620 fmovdnz %icc,%f28,%f30
1623 1621 st %f30,[%o3]
1624 1622
1625 1623 addcc %i0,-1,%i0
1626 1624 bg,pt %icc,.loop0
1627 1625 ! delay slot
1628 1626 st %f7,[%o0+4]
1629 1627
1630 1628 ba,pt %icc,.end
1631 1629 ! delay slot
1632 1630 nop
1633 1631
1634 1632 .align 16
1635 1633 .case6:
1636 1634 fmuld %f26,%f26,%f24
1637 1635 andcc %l3,1,%g0
1638 1636 bz,pn %icc,.case7
1639 1637 ! delay slot
1640 1638 fxor %f30,%f38,%f38
1641 1639
1642 1640 fmuld %f8,pp3,%f14 ! sin(x1)
1643 1641
1644 1642 fmuld %f16,pp3,%f22 ! sin(x2)
1645 1643
1646 1644 fmuld %f0,qq3,%f6 ! cos(x0)
1647 1645
1648 1646 faddd %f14,pp2,%f14
1649 1647 fmuld %f8,qq2,%f12
1650 1648
1651 1649 faddd %f22,pp2,%f22
1652 1650 fmuld %f16,qq2,%f20
1653 1651
1654 1652 fmuld %f24,qq3,%f30 ! cos(x3)
1655 1653
1656 1654 faddd %f6,qq2,%f6
1657 1655 fmuld %f0,pp2,%f4
1658 1656
1659 1657 fmuld %f8,%f14,%f14
1660 1658 faddd %f12,qq1,%f12
1661 1659
1662 1660 fmuld %f16,%f22,%f22
1663 1661 faddd %f20,qq1,%f20
1664 1662
1665 1663 faddd %f30,qq2,%f30
1666 1664 fmuld %f24,pp2,%f28
1667 1665
1668 1666 fmuld %f0,%f6,%f6
1669 1667 faddd %f4,pp1,%f4
1670 1668
1671 1669 faddd %f14,pp1,%f14
1672 1670 fmuld %f8,%f12,%f12
1673 1671 add %l5,%g1,%l5
1674 1672
1675 1673 faddd %f22,pp1,%f22
1676 1674 fmuld %f16,%f20,%f20
1677 1675 add %l6,%g1,%l6
1678 1676
1679 1677 fmuld %f24,%f30,%f30
1680 1678 faddd %f28,pp1,%f28
1681 1679
1682 1680 faddd %f6,qq1,%f6
1683 1681 fmuld %f0,%f4,%f4
1684 1682 add %l4,%g1,%l4
1685 1683
1686 1684 fmuld %f8,%f14,%f14
1687 1685
1688 1686 fmuld %f16,%f22,%f22
1689 1687
1690 1688 faddd %f30,qq1,%f30
1691 1689 fmuld %f24,%f28,%f28
1692 1690 add %l7,%g1,%l7
1693 1691
1694 1692 fmuld %f2,%f4,%f4
1695 1693
1696 1694 fmuld %f10,%f14,%f14
1697 1695 ldd [%l5+8],%f8
1698 1696
1699 1697 fmuld %f18,%f22,%f22
1700 1698 ldd [%l6+8],%f16
1701 1699
1702 1700 fmuld %f26,%f28,%f28
1703 1701
1704 1702 fmuld %f0,%f6,%f6
1705 1703 faddd %f4,%f32,%f4
1706 1704 ldd [%l4+16],%f0
1707 1705
1708 1706 fmuld %f8,%f12,%f12
1709 1707 faddd %f34,%f14,%f14
1710 1708
1711 1709 fmuld %f16,%f20,%f20
1712 1710 faddd %f36,%f22,%f22
1713 1711
1714 1712 fmuld %f24,%f30,%f30
1715 1713 faddd %f28,%f38,%f28
1716 1714 ldd [%l7+16],%f24
1717 1715
1718 1716 fmuld %f0,%f6,%f6
1719 1717 faddd %f4,%f2,%f4
1720 1718 ldd [%l4+8],%f32
1721 1719
1722 1720 faddd %f10,%f14,%f14
1723 1721 ldd [%l5+16],%f34
1724 1722
1725 1723 faddd %f18,%f22,%f22
1726 1724 ldd [%l6+16],%f36
1727 1725
1728 1726 fmuld %f24,%f30,%f30
1729 1727 faddd %f28,%f26,%f28
1730 1728 ldd [%l7+8],%f38
1731 1729
1732 1730 fmuld %f32,%f4,%f4
1733 1731
1734 1732 fmuld %f34,%f14,%f14
1735 1733
1736 1734 fmuld %f36,%f22,%f22
1737 1735
1738 1736 fmuld %f38,%f28,%f28
1739 1737
1740 1738 fsubd %f6,%f4,%f6
1741 1739
1742 1740 faddd %f14,%f12,%f14
1743 1741
1744 1742 faddd %f22,%f20,%f22
1745 1743
1746 1744 fsubd %f30,%f28,%f30
1747 1745
1748 1746 faddd %f6,%f0,%f6
1749 1747
1750 1748 faddd %f14,%f8,%f14
1751 1749
1752 1750 faddd %f22,%f16,%f22
1753 1751
1754 1752 faddd %f30,%f24,%f30
1755 1753 mov %l0,%l4
1756 1754
1757 1755 fnegd %f6,%f4
1758 1756 lda [%i1]%asi,%l0 ! preload next argument
1759 1757
1760 1758 fnegd %f14,%f12
1761 1759 lda [%i1]%asi,%f0
1762 1760
1763 1761 fnegd %f22,%f20
1764 1762 lda [%i1+4]%asi,%f3
1765 1763
1766 1764 fnegd %f30,%f28
1767 1765 andn %l0,%i5,%l0
1768 1766 add %i1,%i2,%i1
1769 1767
1770 1768 andcc %l4,2,%g0
1771 1769 fmovdnz %icc,%f4,%f6
1772 1770 st %f6,[%o0]
1773 1771
1774 1772 andcc %l1,2,%g0
1775 1773 fmovdnz %icc,%f12,%f14
1776 1774 st %f14,[%o1]
1777 1775
1778 1776 andcc %l2,2,%g0
1779 1777 fmovdnz %icc,%f20,%f22
1780 1778 st %f22,[%o2]
1781 1779
1782 1780 andcc %l3,2,%g0
1783 1781 fmovdnz %icc,%f28,%f30
1784 1782 st %f30,[%o3]
1785 1783
1786 1784 addcc %i0,-1,%i0
1787 1785 bg,pt %icc,.loop0
1788 1786 ! delay slot
1789 1787 st %f7,[%o0+4]
1790 1788
1791 1789 ba,pt %icc,.end
1792 1790 ! delay slot
1793 1791 nop
1794 1792
1795 1793 .align 16
1796 1794 .case7:
1797 1795 fmuld %f8,pp3,%f14 ! sin(x1)
1798 1796
1799 1797 fmuld %f16,pp3,%f22 ! sin(x2)
1800 1798
1801 1799 fmuld %f24,pp3,%f30 ! sin(x3)
1802 1800
1803 1801 fmuld %f0,qq3,%f6 ! cos(x0)
1804 1802
1805 1803 faddd %f14,pp2,%f14
1806 1804 fmuld %f8,qq2,%f12
1807 1805
1808 1806 faddd %f22,pp2,%f22
1809 1807 fmuld %f16,qq2,%f20
1810 1808
1811 1809 faddd %f30,pp2,%f30
1812 1810 fmuld %f24,qq2,%f28
1813 1811
1814 1812 faddd %f6,qq2,%f6
1815 1813 fmuld %f0,pp2,%f4
1816 1814
1817 1815 fmuld %f8,%f14,%f14
1818 1816 faddd %f12,qq1,%f12
1819 1817
1820 1818 fmuld %f16,%f22,%f22
1821 1819 faddd %f20,qq1,%f20
1822 1820
1823 1821 fmuld %f24,%f30,%f30
1824 1822 faddd %f28,qq1,%f28
1825 1823
1826 1824 fmuld %f0,%f6,%f6
1827 1825 faddd %f4,pp1,%f4
1828 1826
1829 1827 faddd %f14,pp1,%f14
1830 1828 fmuld %f8,%f12,%f12
1831 1829 add %l5,%g1,%l5
1832 1830
1833 1831 faddd %f22,pp1,%f22
1834 1832 fmuld %f16,%f20,%f20
1835 1833 add %l6,%g1,%l6
1836 1834
1837 1835 faddd %f30,pp1,%f30
1838 1836 fmuld %f24,%f28,%f28
1839 1837 add %l7,%g1,%l7
1840 1838
1841 1839 faddd %f6,qq1,%f6
1842 1840 fmuld %f0,%f4,%f4
1843 1841 add %l4,%g1,%l4
1844 1842
1845 1843 fmuld %f8,%f14,%f14
1846 1844
1847 1845 fmuld %f16,%f22,%f22
1848 1846
1849 1847 fmuld %f24,%f30,%f30
1850 1848
1851 1849 fmuld %f2,%f4,%f4
1852 1850
1853 1851 fmuld %f10,%f14,%f14
1854 1852 ldd [%l5+8],%f8
1855 1853
1856 1854 fmuld %f18,%f22,%f22
1857 1855 ldd [%l6+8],%f16
1858 1856
1859 1857 fmuld %f26,%f30,%f30
1860 1858 ldd [%l7+8],%f24
1861 1859
1862 1860 fmuld %f0,%f6,%f6
1863 1861 faddd %f4,%f32,%f4
1864 1862 ldd [%l4+16],%f0
1865 1863
1866 1864 fmuld %f8,%f12,%f12
1867 1865 faddd %f34,%f14,%f14
1868 1866
1869 1867 fmuld %f16,%f20,%f20
1870 1868 faddd %f36,%f22,%f22
1871 1869
1872 1870 fmuld %f24,%f28,%f28
1873 1871 faddd %f38,%f30,%f30
1874 1872
1875 1873 fmuld %f0,%f6,%f6
1876 1874 faddd %f4,%f2,%f4
1877 1875 ldd [%l4+8],%f32
1878 1876
1879 1877 faddd %f10,%f14,%f14
1880 1878 ldd [%l5+16],%f34
1881 1879
1882 1880 faddd %f18,%f22,%f22
1883 1881 ldd [%l6+16],%f36
1884 1882
1885 1883 faddd %f26,%f30,%f30
1886 1884 ldd [%l7+16],%f38
1887 1885
1888 1886 fmuld %f32,%f4,%f4
1889 1887
1890 1888 fmuld %f34,%f14,%f14
1891 1889
1892 1890 fmuld %f36,%f22,%f22
1893 1891
1894 1892 fmuld %f38,%f30,%f30
1895 1893
1896 1894 fsubd %f6,%f4,%f6
1897 1895
1898 1896 faddd %f14,%f12,%f14
1899 1897
1900 1898 faddd %f22,%f20,%f22
1901 1899
1902 1900 faddd %f30,%f28,%f30
1903 1901
1904 1902 faddd %f6,%f0,%f6
1905 1903
1906 1904 faddd %f14,%f8,%f14
1907 1905
1908 1906 faddd %f22,%f16,%f22
1909 1907
1910 1908 faddd %f30,%f24,%f30
1911 1909 mov %l0,%l4
1912 1910
1913 1911 fnegd %f6,%f4
1914 1912 lda [%i1]%asi,%l0 ! preload next argument
1915 1913
1916 1914 fnegd %f14,%f12
1917 1915 lda [%i1]%asi,%f0
1918 1916
1919 1917 fnegd %f22,%f20
1920 1918 lda [%i1+4]%asi,%f3
1921 1919
1922 1920 fnegd %f30,%f28
1923 1921 andn %l0,%i5,%l0
1924 1922 add %i1,%i2,%i1
1925 1923
1926 1924 andcc %l4,2,%g0
1927 1925 fmovdnz %icc,%f4,%f6
1928 1926 st %f6,[%o0]
1929 1927
1930 1928 andcc %l1,2,%g0
1931 1929 fmovdnz %icc,%f12,%f14
1932 1930 st %f14,[%o1]
1933 1931
1934 1932 andcc %l2,2,%g0
1935 1933 fmovdnz %icc,%f20,%f22
1936 1934 st %f22,[%o2]
1937 1935
1938 1936 andcc %l3,2,%g0
1939 1937 fmovdnz %icc,%f28,%f30
1940 1938 st %f30,[%o3]
1941 1939
1942 1940 addcc %i0,-1,%i0
1943 1941 bg,pt %icc,.loop0
1944 1942 ! delay slot
1945 1943 st %f7,[%o0+4]
1946 1944
1947 1945 ba,pt %icc,.end
1948 1946 ! delay slot
1949 1947 nop
1950 1948
1951 1949 .align 16
1952 1950 .case8:
1953 1951 fmuld %f10,%f10,%f8
1954 1952 andcc %l1,1,%g0
1955 1953 bz,pn %icc,.case12
1956 1954 ! delay slot
1957 1955 fxor %f14,%f34,%f34
1958 1956
1959 1957 fmuld %f18,%f18,%f16
1960 1958 andcc %l2,1,%g0
1961 1959 bz,pn %icc,.case10
1962 1960 ! delay slot
1963 1961 fxor %f22,%f36,%f36
1964 1962
1965 1963 fmuld %f26,%f26,%f24
1966 1964 andcc %l3,1,%g0
1967 1965 bz,pn %icc,.case9
1968 1966 ! delay slot
1969 1967 fxor %f30,%f38,%f38
1970 1968
1971 1969 fmuld %f0,pp3,%f6 ! sin(x0)
1972 1970
1973 1971 faddd %f6,pp2,%f6
1974 1972 fmuld %f0,qq2,%f4
1975 1973
1976 1974 fmuld %f8,qq3,%f14 ! cos(x1)
1977 1975
1978 1976 fmuld %f16,qq3,%f22 ! cos(x2)
1979 1977
1980 1978 fmuld %f24,qq3,%f30 ! cos(x3)
1981 1979
1982 1980 fmuld %f0,%f6,%f6
1983 1981 faddd %f4,qq1,%f4
1984 1982
1985 1983 faddd %f14,qq2,%f14
1986 1984 fmuld %f8,pp2,%f12
1987 1985
1988 1986 faddd %f22,qq2,%f22
1989 1987 fmuld %f16,pp2,%f20
1990 1988
1991 1989 faddd %f30,qq2,%f30
1992 1990 fmuld %f24,pp2,%f28
1993 1991
1994 1992 faddd %f6,pp1,%f6
1995 1993 fmuld %f0,%f4,%f4
1996 1994 add %l4,%g1,%l4
1997 1995
1998 1996 fmuld %f8,%f14,%f14
1999 1997 faddd %f12,pp1,%f12
2000 1998
2001 1999 fmuld %f16,%f22,%f22
2002 2000 faddd %f20,pp1,%f20
2003 2001
2004 2002 fmuld %f24,%f30,%f30
2005 2003 faddd %f28,pp1,%f28
2006 2004
2007 2005 fmuld %f0,%f6,%f6
2008 2006
2009 2007 faddd %f14,qq1,%f14
2010 2008 fmuld %f8,%f12,%f12
2011 2009 add %l5,%g1,%l5
2012 2010
2013 2011 faddd %f22,qq1,%f22
2014 2012 fmuld %f16,%f20,%f20
2015 2013 add %l6,%g1,%l6
2016 2014
2017 2015 faddd %f30,qq1,%f30
2018 2016 fmuld %f24,%f28,%f28
2019 2017 add %l7,%g1,%l7
2020 2018
2021 2019 fmuld %f2,%f6,%f6
2022 2020 ldd [%l4+8],%f0
2023 2021
2024 2022 fmuld %f10,%f12,%f12
2025 2023
2026 2024 fmuld %f18,%f20,%f20
2027 2025
2028 2026 fmuld %f26,%f28,%f28
2029 2027
2030 2028 fmuld %f0,%f4,%f4
2031 2029 faddd %f32,%f6,%f6
2032 2030
2033 2031 fmuld %f8,%f14,%f14
2034 2032 faddd %f12,%f34,%f12
2035 2033 ldd [%l5+16],%f8
2036 2034
2037 2035 fmuld %f16,%f22,%f22
2038 2036 faddd %f20,%f36,%f20
2039 2037 ldd [%l6+16],%f16
2040 2038
2041 2039 fmuld %f24,%f30,%f30
2042 2040 faddd %f28,%f38,%f28
2043 2041 ldd [%l7+16],%f24
2044 2042
2045 2043 faddd %f2,%f6,%f6
2046 2044 ldd [%l4+16],%f32
2047 2045
2048 2046 fmuld %f8,%f14,%f14
2049 2047 faddd %f12,%f10,%f12
2050 2048 ldd [%l5+8],%f34
2051 2049
2052 2050 fmuld %f16,%f22,%f22
2053 2051 faddd %f20,%f18,%f20
2054 2052 ldd [%l6+8],%f36
2055 2053
2056 2054 fmuld %f24,%f30,%f30
2057 2055 faddd %f28,%f26,%f28
2058 2056 ldd [%l7+8],%f38
2059 2057
2060 2058 fmuld %f32,%f6,%f6
2061 2059
2062 2060 fmuld %f34,%f12,%f12
2063 2061
2064 2062 fmuld %f36,%f20,%f20
2065 2063
2066 2064 fmuld %f38,%f28,%f28
2067 2065
2068 2066 faddd %f6,%f4,%f6
2069 2067
2070 2068 fsubd %f14,%f12,%f14
2071 2069
2072 2070 fsubd %f22,%f20,%f22
2073 2071
2074 2072 fsubd %f30,%f28,%f30
2075 2073
2076 2074 faddd %f6,%f0,%f6
2077 2075
2078 2076 faddd %f14,%f8,%f14
2079 2077
2080 2078 faddd %f22,%f16,%f22
2081 2079
2082 2080 faddd %f30,%f24,%f30
2083 2081 mov %l0,%l4
2084 2082
2085 2083 fnegd %f6,%f4
2086 2084 lda [%i1]%asi,%l0 ! preload next argument
2087 2085
2088 2086 fnegd %f14,%f12
2089 2087 lda [%i1]%asi,%f0
2090 2088
2091 2089 fnegd %f22,%f20
2092 2090 lda [%i1+4]%asi,%f3
2093 2091
2094 2092 fnegd %f30,%f28
2095 2093 andn %l0,%i5,%l0
2096 2094 add %i1,%i2,%i1
2097 2095
2098 2096 andcc %l4,2,%g0
2099 2097 fmovdnz %icc,%f4,%f6
2100 2098 st %f6,[%o0]
2101 2099
2102 2100 andcc %l1,2,%g0
2103 2101 fmovdnz %icc,%f12,%f14
2104 2102 st %f14,[%o1]
2105 2103
2106 2104 andcc %l2,2,%g0
2107 2105 fmovdnz %icc,%f20,%f22
2108 2106 st %f22,[%o2]
2109 2107
2110 2108 andcc %l3,2,%g0
2111 2109 fmovdnz %icc,%f28,%f30
2112 2110 st %f30,[%o3]
2113 2111
2114 2112 addcc %i0,-1,%i0
2115 2113 bg,pt %icc,.loop0
2116 2114 ! delay slot
2117 2115 st %f7,[%o0+4]
2118 2116
2119 2117 ba,pt %icc,.end
2120 2118 ! delay slot
2121 2119 nop
2122 2120
2123 2121 .align 16
2124 2122 .case9:
2125 2123 fmuld %f0,pp3,%f6 ! sin(x0)
2126 2124
2127 2125 fmuld %f24,pp3,%f30 ! sin(x3)
2128 2126
2129 2127 faddd %f6,pp2,%f6
2130 2128 fmuld %f0,qq2,%f4
2131 2129
2132 2130 fmuld %f8,qq3,%f14 ! cos(x1)
2133 2131
2134 2132 fmuld %f16,qq3,%f22 ! cos(x2)
2135 2133
2136 2134 faddd %f30,pp2,%f30
2137 2135 fmuld %f24,qq2,%f28
2138 2136
2139 2137 fmuld %f0,%f6,%f6
2140 2138 faddd %f4,qq1,%f4
2141 2139
2142 2140 faddd %f14,qq2,%f14
2143 2141 fmuld %f8,pp2,%f12
2144 2142
2145 2143 faddd %f22,qq2,%f22
2146 2144 fmuld %f16,pp2,%f20
2147 2145
2148 2146 fmuld %f24,%f30,%f30
2149 2147 faddd %f28,qq1,%f28
2150 2148
2151 2149 faddd %f6,pp1,%f6
2152 2150 fmuld %f0,%f4,%f4
2153 2151 add %l4,%g1,%l4
2154 2152
2155 2153 fmuld %f8,%f14,%f14
2156 2154 faddd %f12,pp1,%f12
2157 2155
2158 2156 fmuld %f16,%f22,%f22
2159 2157 faddd %f20,pp1,%f20
2160 2158
2161 2159 faddd %f30,pp1,%f30
2162 2160 fmuld %f24,%f28,%f28
2163 2161 add %l7,%g1,%l7
2164 2162
2165 2163 fmuld %f0,%f6,%f6
2166 2164
2167 2165 faddd %f14,qq1,%f14
2168 2166 fmuld %f8,%f12,%f12
2169 2167 add %l5,%g1,%l5
2170 2168
2171 2169 faddd %f22,qq1,%f22
2172 2170 fmuld %f16,%f20,%f20
2173 2171 add %l6,%g1,%l6
2174 2172
2175 2173 fmuld %f24,%f30,%f30
2176 2174
2177 2175 fmuld %f2,%f6,%f6
2178 2176 ldd [%l4+8],%f0
2179 2177
2180 2178 fmuld %f10,%f12,%f12
2181 2179
2182 2180 fmuld %f18,%f20,%f20
2183 2181
2184 2182 fmuld %f26,%f30,%f30
2185 2183 ldd [%l7+8],%f24
2186 2184
2187 2185 fmuld %f0,%f4,%f4
2188 2186 faddd %f32,%f6,%f6
2189 2187
2190 2188 fmuld %f8,%f14,%f14
2191 2189 faddd %f12,%f34,%f12
2192 2190 ldd [%l5+16],%f8
2193 2191
2194 2192 fmuld %f16,%f22,%f22
2195 2193 faddd %f20,%f36,%f20
2196 2194 ldd [%l6+16],%f16
2197 2195
2198 2196 fmuld %f24,%f28,%f28
2199 2197 faddd %f38,%f30,%f30
2200 2198
2201 2199 faddd %f2,%f6,%f6
2202 2200 ldd [%l4+16],%f32
2203 2201
2204 2202 fmuld %f8,%f14,%f14
2205 2203 faddd %f12,%f10,%f12
2206 2204 ldd [%l5+8],%f34
2207 2205
2208 2206 fmuld %f16,%f22,%f22
2209 2207 faddd %f20,%f18,%f20
2210 2208 ldd [%l6+8],%f36
2211 2209
2212 2210 faddd %f26,%f30,%f30
2213 2211 ldd [%l7+16],%f38
2214 2212
2215 2213 fmuld %f32,%f6,%f6
2216 2214
2217 2215 fmuld %f34,%f12,%f12
2218 2216
2219 2217 fmuld %f36,%f20,%f20
2220 2218
2221 2219 fmuld %f38,%f30,%f30
2222 2220
2223 2221 faddd %f6,%f4,%f6
2224 2222
2225 2223 fsubd %f14,%f12,%f14
2226 2224
2227 2225 fsubd %f22,%f20,%f22
2228 2226
2229 2227 faddd %f30,%f28,%f30
2230 2228
2231 2229 faddd %f6,%f0,%f6
2232 2230
2233 2231 faddd %f14,%f8,%f14
2234 2232
2235 2233 faddd %f22,%f16,%f22
2236 2234
2237 2235 faddd %f30,%f24,%f30
2238 2236 mov %l0,%l4
2239 2237
2240 2238 fnegd %f6,%f4
2241 2239 lda [%i1]%asi,%l0 ! preload next argument
2242 2240
2243 2241 fnegd %f14,%f12
2244 2242 lda [%i1]%asi,%f0
2245 2243
2246 2244 fnegd %f22,%f20
2247 2245 lda [%i1+4]%asi,%f3
2248 2246
2249 2247 fnegd %f30,%f28
2250 2248 andn %l0,%i5,%l0
2251 2249 add %i1,%i2,%i1
2252 2250
2253 2251 andcc %l4,2,%g0
2254 2252 fmovdnz %icc,%f4,%f6
2255 2253 st %f6,[%o0]
2256 2254
2257 2255 andcc %l1,2,%g0
2258 2256 fmovdnz %icc,%f12,%f14
2259 2257 st %f14,[%o1]
2260 2258
2261 2259 andcc %l2,2,%g0
2262 2260 fmovdnz %icc,%f20,%f22
2263 2261 st %f22,[%o2]
2264 2262
2265 2263 andcc %l3,2,%g0
2266 2264 fmovdnz %icc,%f28,%f30
2267 2265 st %f30,[%o3]
2268 2266
2269 2267 addcc %i0,-1,%i0
2270 2268 bg,pt %icc,.loop0
2271 2269 ! delay slot
2272 2270 st %f7,[%o0+4]
2273 2271
2274 2272 ba,pt %icc,.end
2275 2273 ! delay slot
2276 2274 nop
2277 2275
2278 2276 .align 16
2279 2277 .case10:
2280 2278 fmuld %f26,%f26,%f24
2281 2279 andcc %l3,1,%g0
2282 2280 bz,pn %icc,.case11
2283 2281 ! delay slot
2284 2282 fxor %f30,%f38,%f38
2285 2283
2286 2284 fmuld %f0,pp3,%f6 ! sin(x0)
2287 2285
2288 2286 fmuld %f16,pp3,%f22 ! sin(x2)
2289 2287
2290 2288 faddd %f6,pp2,%f6
2291 2289 fmuld %f0,qq2,%f4
2292 2290
2293 2291 fmuld %f8,qq3,%f14 ! cos(x1)
2294 2292
2295 2293 faddd %f22,pp2,%f22
2296 2294 fmuld %f16,qq2,%f20
2297 2295
2298 2296 fmuld %f24,qq3,%f30 ! cos(x3)
2299 2297
2300 2298 fmuld %f0,%f6,%f6
2301 2299 faddd %f4,qq1,%f4
2302 2300
2303 2301 faddd %f14,qq2,%f14
2304 2302 fmuld %f8,pp2,%f12
2305 2303
2306 2304 fmuld %f16,%f22,%f22
2307 2305 faddd %f20,qq1,%f20
2308 2306
2309 2307 faddd %f30,qq2,%f30
2310 2308 fmuld %f24,pp2,%f28
2311 2309
2312 2310 faddd %f6,pp1,%f6
2313 2311 fmuld %f0,%f4,%f4
2314 2312 add %l4,%g1,%l4
2315 2313
2316 2314 fmuld %f8,%f14,%f14
2317 2315 faddd %f12,pp1,%f12
2318 2316
2319 2317 faddd %f22,pp1,%f22
2320 2318 fmuld %f16,%f20,%f20
2321 2319 add %l6,%g1,%l6
2322 2320
2323 2321 fmuld %f24,%f30,%f30
2324 2322 faddd %f28,pp1,%f28
2325 2323
2326 2324 fmuld %f0,%f6,%f6
2327 2325
2328 2326 faddd %f14,qq1,%f14
2329 2327 fmuld %f8,%f12,%f12
2330 2328 add %l5,%g1,%l5
2331 2329
2332 2330 fmuld %f16,%f22,%f22
2333 2331
2334 2332 faddd %f30,qq1,%f30
2335 2333 fmuld %f24,%f28,%f28
2336 2334 add %l7,%g1,%l7
2337 2335
2338 2336 fmuld %f2,%f6,%f6
2339 2337 ldd [%l4+8],%f0
2340 2338
2341 2339 fmuld %f10,%f12,%f12
2342 2340
2343 2341 fmuld %f18,%f22,%f22
2344 2342 ldd [%l6+8],%f16
2345 2343
2346 2344 fmuld %f26,%f28,%f28
2347 2345
2348 2346 fmuld %f0,%f4,%f4
2349 2347 faddd %f32,%f6,%f6
2350 2348
2351 2349 fmuld %f8,%f14,%f14
2352 2350 faddd %f12,%f34,%f12
2353 2351 ldd [%l5+16],%f8
2354 2352
2355 2353 fmuld %f16,%f20,%f20
2356 2354 faddd %f36,%f22,%f22
2357 2355
2358 2356 fmuld %f24,%f30,%f30
2359 2357 faddd %f28,%f38,%f28
2360 2358 ldd [%l7+16],%f24
2361 2359
2362 2360 faddd %f2,%f6,%f6
2363 2361 ldd [%l4+16],%f32
2364 2362
2365 2363 fmuld %f8,%f14,%f14
2366 2364 faddd %f12,%f10,%f12
2367 2365 ldd [%l5+8],%f34
2368 2366
2369 2367 faddd %f18,%f22,%f22
2370 2368 ldd [%l6+16],%f36
2371 2369
2372 2370 fmuld %f24,%f30,%f30
2373 2371 faddd %f28,%f26,%f28
2374 2372 ldd [%l7+8],%f38
2375 2373
2376 2374 fmuld %f32,%f6,%f6
2377 2375
2378 2376 fmuld %f34,%f12,%f12
2379 2377
2380 2378 fmuld %f36,%f22,%f22
2381 2379
2382 2380 fmuld %f38,%f28,%f28
2383 2381
2384 2382 faddd %f6,%f4,%f6
2385 2383
2386 2384 fsubd %f14,%f12,%f14
2387 2385
2388 2386 faddd %f22,%f20,%f22
2389 2387
2390 2388 fsubd %f30,%f28,%f30
2391 2389
2392 2390 faddd %f6,%f0,%f6
2393 2391
2394 2392 faddd %f14,%f8,%f14
2395 2393
2396 2394 faddd %f22,%f16,%f22
2397 2395
2398 2396 faddd %f30,%f24,%f30
2399 2397 mov %l0,%l4
2400 2398
2401 2399 fnegd %f6,%f4
2402 2400 lda [%i1]%asi,%l0 ! preload next argument
2403 2401
2404 2402 fnegd %f14,%f12
2405 2403 lda [%i1]%asi,%f0
2406 2404
2407 2405 fnegd %f22,%f20
2408 2406 lda [%i1+4]%asi,%f3
2409 2407
2410 2408 fnegd %f30,%f28
2411 2409 andn %l0,%i5,%l0
2412 2410 add %i1,%i2,%i1
2413 2411
2414 2412 andcc %l4,2,%g0
2415 2413 fmovdnz %icc,%f4,%f6
2416 2414 st %f6,[%o0]
2417 2415
2418 2416 andcc %l1,2,%g0
2419 2417 fmovdnz %icc,%f12,%f14
2420 2418 st %f14,[%o1]
2421 2419
2422 2420 andcc %l2,2,%g0
2423 2421 fmovdnz %icc,%f20,%f22
2424 2422 st %f22,[%o2]
2425 2423
2426 2424 andcc %l3,2,%g0
2427 2425 fmovdnz %icc,%f28,%f30
2428 2426 st %f30,[%o3]
2429 2427
2430 2428 addcc %i0,-1,%i0
2431 2429 bg,pt %icc,.loop0
2432 2430 ! delay slot
2433 2431 st %f7,[%o0+4]
2434 2432
2435 2433 ba,pt %icc,.end
2436 2434 ! delay slot
2437 2435 nop
2438 2436
2439 2437 .align 16
2440 2438 .case11:
2441 2439 fmuld %f0,pp3,%f6 ! sin(x0)
2442 2440
2443 2441 fmuld %f16,pp3,%f22 ! sin(x2)
2444 2442
2445 2443 fmuld %f24,pp3,%f30 ! sin(x3)
2446 2444
2447 2445 faddd %f6,pp2,%f6
2448 2446 fmuld %f0,qq2,%f4
2449 2447
2450 2448 fmuld %f8,qq3,%f14 ! cos(x1)
2451 2449
2452 2450 faddd %f22,pp2,%f22
2453 2451 fmuld %f16,qq2,%f20
2454 2452
2455 2453 faddd %f30,pp2,%f30
2456 2454 fmuld %f24,qq2,%f28
2457 2455
2458 2456 fmuld %f0,%f6,%f6
2459 2457 faddd %f4,qq1,%f4
2460 2458
2461 2459 faddd %f14,qq2,%f14
2462 2460 fmuld %f8,pp2,%f12
2463 2461
2464 2462 fmuld %f16,%f22,%f22
2465 2463 faddd %f20,qq1,%f20
2466 2464
2467 2465 fmuld %f24,%f30,%f30
2468 2466 faddd %f28,qq1,%f28
2469 2467
2470 2468 faddd %f6,pp1,%f6
2471 2469 fmuld %f0,%f4,%f4
2472 2470 add %l4,%g1,%l4
2473 2471
2474 2472 fmuld %f8,%f14,%f14
2475 2473 faddd %f12,pp1,%f12
2476 2474
2477 2475 faddd %f22,pp1,%f22
2478 2476 fmuld %f16,%f20,%f20
2479 2477 add %l6,%g1,%l6
2480 2478
2481 2479 faddd %f30,pp1,%f30
2482 2480 fmuld %f24,%f28,%f28
2483 2481 add %l7,%g1,%l7
2484 2482
2485 2483 fmuld %f0,%f6,%f6
2486 2484
2487 2485 faddd %f14,qq1,%f14
2488 2486 fmuld %f8,%f12,%f12
2489 2487 add %l5,%g1,%l5
2490 2488
2491 2489 fmuld %f16,%f22,%f22
2492 2490
2493 2491 fmuld %f24,%f30,%f30
2494 2492
2495 2493 fmuld %f2,%f6,%f6
2496 2494 ldd [%l4+8],%f0
2497 2495
2498 2496 fmuld %f10,%f12,%f12
2499 2497
2500 2498 fmuld %f18,%f22,%f22
2501 2499 ldd [%l6+8],%f16
2502 2500
2503 2501 fmuld %f26,%f30,%f30
2504 2502 ldd [%l7+8],%f24
2505 2503
2506 2504 fmuld %f0,%f4,%f4
2507 2505 faddd %f32,%f6,%f6
2508 2506
2509 2507 fmuld %f8,%f14,%f14
2510 2508 faddd %f12,%f34,%f12
2511 2509 ldd [%l5+16],%f8
2512 2510
2513 2511 fmuld %f16,%f20,%f20
2514 2512 faddd %f36,%f22,%f22
2515 2513
2516 2514 fmuld %f24,%f28,%f28
2517 2515 faddd %f38,%f30,%f30
2518 2516
2519 2517 faddd %f2,%f6,%f6
2520 2518 ldd [%l4+16],%f32
2521 2519
2522 2520 fmuld %f8,%f14,%f14
2523 2521 faddd %f12,%f10,%f12
2524 2522 ldd [%l5+8],%f34
2525 2523
2526 2524 faddd %f18,%f22,%f22
2527 2525 ldd [%l6+16],%f36
2528 2526
2529 2527 faddd %f26,%f30,%f30
2530 2528 ldd [%l7+16],%f38
2531 2529
2532 2530 fmuld %f32,%f6,%f6
2533 2531
2534 2532 fmuld %f34,%f12,%f12
2535 2533
2536 2534 fmuld %f36,%f22,%f22
2537 2535
2538 2536 fmuld %f38,%f30,%f30
2539 2537
2540 2538 faddd %f6,%f4,%f6
2541 2539
2542 2540 fsubd %f14,%f12,%f14
2543 2541
2544 2542 faddd %f22,%f20,%f22
2545 2543
2546 2544 faddd %f30,%f28,%f30
2547 2545
2548 2546 faddd %f6,%f0,%f6
2549 2547
2550 2548 faddd %f14,%f8,%f14
2551 2549
2552 2550 faddd %f22,%f16,%f22
2553 2551
2554 2552 faddd %f30,%f24,%f30
2555 2553 mov %l0,%l4
2556 2554
2557 2555 fnegd %f6,%f4
2558 2556 lda [%i1]%asi,%l0 ! preload next argument
2559 2557
2560 2558 fnegd %f14,%f12
2561 2559 lda [%i1]%asi,%f0
2562 2560
2563 2561 fnegd %f22,%f20
2564 2562 lda [%i1+4]%asi,%f3
2565 2563
2566 2564 fnegd %f30,%f28
2567 2565 andn %l0,%i5,%l0
2568 2566 add %i1,%i2,%i1
2569 2567
2570 2568 andcc %l4,2,%g0
2571 2569 fmovdnz %icc,%f4,%f6
2572 2570 st %f6,[%o0]
2573 2571
2574 2572 andcc %l1,2,%g0
2575 2573 fmovdnz %icc,%f12,%f14
2576 2574 st %f14,[%o1]
2577 2575
2578 2576 andcc %l2,2,%g0
2579 2577 fmovdnz %icc,%f20,%f22
2580 2578 st %f22,[%o2]
2581 2579
2582 2580 andcc %l3,2,%g0
2583 2581 fmovdnz %icc,%f28,%f30
2584 2582 st %f30,[%o3]
2585 2583
2586 2584 addcc %i0,-1,%i0
2587 2585 bg,pt %icc,.loop0
2588 2586 ! delay slot
2589 2587 st %f7,[%o0+4]
2590 2588
2591 2589 ba,pt %icc,.end
2592 2590 ! delay slot
2593 2591 nop
2594 2592
2595 2593 .align 16
2596 2594 .case12:
2597 2595 fmuld %f18,%f18,%f16
2598 2596 andcc %l2,1,%g0
2599 2597 bz,pn %icc,.case14
2600 2598 ! delay slot
2601 2599 fxor %f22,%f36,%f36
2602 2600
2603 2601 fmuld %f26,%f26,%f24
2604 2602 andcc %l3,1,%g0
2605 2603 bz,pn %icc,.case13
2606 2604 ! delay slot
2607 2605 fxor %f30,%f38,%f38
2608 2606
2609 2607 fmuld %f0,pp3,%f6 ! sin(x0)
2610 2608
2611 2609 fmuld %f8,pp3,%f14 ! sin(x1)
2612 2610
2613 2611 faddd %f6,pp2,%f6
2614 2612 fmuld %f0,qq2,%f4
2615 2613
2616 2614 faddd %f14,pp2,%f14
2617 2615 fmuld %f8,qq2,%f12
2618 2616
2619 2617 fmuld %f16,qq3,%f22 ! cos(x2)
2620 2618
2621 2619 fmuld %f24,qq3,%f30 ! cos(x3)
2622 2620
2623 2621 fmuld %f0,%f6,%f6
2624 2622 faddd %f4,qq1,%f4
2625 2623
2626 2624 fmuld %f8,%f14,%f14
2627 2625 faddd %f12,qq1,%f12
2628 2626
2629 2627 faddd %f22,qq2,%f22
2630 2628 fmuld %f16,pp2,%f20
2631 2629
2632 2630 faddd %f30,qq2,%f30
2633 2631 fmuld %f24,pp2,%f28
2634 2632
2635 2633 faddd %f6,pp1,%f6
2636 2634 fmuld %f0,%f4,%f4
2637 2635 add %l4,%g1,%l4
2638 2636
2639 2637 faddd %f14,pp1,%f14
2640 2638 fmuld %f8,%f12,%f12
2641 2639 add %l5,%g1,%l5
2642 2640
2643 2641 fmuld %f16,%f22,%f22
2644 2642 faddd %f20,pp1,%f20
2645 2643
2646 2644 fmuld %f24,%f30,%f30
2647 2645 faddd %f28,pp1,%f28
2648 2646
2649 2647 fmuld %f0,%f6,%f6
2650 2648
2651 2649 fmuld %f8,%f14,%f14
2652 2650
2653 2651 faddd %f22,qq1,%f22
2654 2652 fmuld %f16,%f20,%f20
2655 2653 add %l6,%g1,%l6
2656 2654
2657 2655 faddd %f30,qq1,%f30
2658 2656 fmuld %f24,%f28,%f28
2659 2657 add %l7,%g1,%l7
2660 2658
2661 2659 fmuld %f2,%f6,%f6
2662 2660 ldd [%l4+8],%f0
2663 2661
2664 2662 fmuld %f10,%f14,%f14
2665 2663 ldd [%l5+8],%f8
2666 2664
2667 2665 fmuld %f18,%f20,%f20
2668 2666
2669 2667 fmuld %f26,%f28,%f28
2670 2668
2671 2669 fmuld %f0,%f4,%f4
2672 2670 faddd %f32,%f6,%f6
2673 2671
2674 2672 fmuld %f8,%f12,%f12
2675 2673 faddd %f34,%f14,%f14
2676 2674
2677 2675 fmuld %f16,%f22,%f22
2678 2676 faddd %f20,%f36,%f20
2679 2677 ldd [%l6+16],%f16
2680 2678
2681 2679 fmuld %f24,%f30,%f30
2682 2680 faddd %f28,%f38,%f28
2683 2681 ldd [%l7+16],%f24
2684 2682
2685 2683 faddd %f2,%f6,%f6
2686 2684 ldd [%l4+16],%f32
2687 2685
2688 2686 faddd %f10,%f14,%f14
2689 2687 ldd [%l5+16],%f34
2690 2688
2691 2689 fmuld %f16,%f22,%f22
2692 2690 faddd %f20,%f18,%f20
2693 2691 ldd [%l6+8],%f36
2694 2692
2695 2693 fmuld %f24,%f30,%f30
2696 2694 faddd %f28,%f26,%f28
2697 2695 ldd [%l7+8],%f38
2698 2696
2699 2697 fmuld %f32,%f6,%f6
2700 2698
2701 2699 fmuld %f34,%f14,%f14
2702 2700
2703 2701 fmuld %f36,%f20,%f20
2704 2702
2705 2703 fmuld %f38,%f28,%f28
2706 2704
2707 2705 faddd %f6,%f4,%f6
2708 2706
2709 2707 faddd %f14,%f12,%f14
2710 2708
2711 2709 fsubd %f22,%f20,%f22
2712 2710
2713 2711 fsubd %f30,%f28,%f30
2714 2712
2715 2713 faddd %f6,%f0,%f6
2716 2714
2717 2715 faddd %f14,%f8,%f14
2718 2716
2719 2717 faddd %f22,%f16,%f22
2720 2718
2721 2719 faddd %f30,%f24,%f30
2722 2720 mov %l0,%l4
2723 2721
2724 2722 fnegd %f6,%f4
2725 2723 lda [%i1]%asi,%l0 ! preload next argument
2726 2724
2727 2725 fnegd %f14,%f12
2728 2726 lda [%i1]%asi,%f0
2729 2727
2730 2728 fnegd %f22,%f20
2731 2729 lda [%i1+4]%asi,%f3
2732 2730
2733 2731 fnegd %f30,%f28
2734 2732 andn %l0,%i5,%l0
2735 2733 add %i1,%i2,%i1
2736 2734
2737 2735 andcc %l4,2,%g0
2738 2736 fmovdnz %icc,%f4,%f6
2739 2737 st %f6,[%o0]
2740 2738
2741 2739 andcc %l1,2,%g0
2742 2740 fmovdnz %icc,%f12,%f14
2743 2741 st %f14,[%o1]
2744 2742
2745 2743 andcc %l2,2,%g0
2746 2744 fmovdnz %icc,%f20,%f22
2747 2745 st %f22,[%o2]
2748 2746
2749 2747 andcc %l3,2,%g0
2750 2748 fmovdnz %icc,%f28,%f30
2751 2749 st %f30,[%o3]
2752 2750
2753 2751 addcc %i0,-1,%i0
2754 2752 bg,pt %icc,.loop0
2755 2753 ! delay slot
2756 2754 st %f7,[%o0+4]
2757 2755
2758 2756 ba,pt %icc,.end
2759 2757 ! delay slot
2760 2758 nop
2761 2759
2762 2760 .align 16
2763 2761 .case13:
2764 2762 fmuld %f0,pp3,%f6 ! sin(x0)
2765 2763
2766 2764 fmuld %f8,pp3,%f14 ! sin(x1)
2767 2765
2768 2766 fmuld %f24,pp3,%f30 ! sin(x3)
2769 2767
2770 2768 faddd %f6,pp2,%f6
2771 2769 fmuld %f0,qq2,%f4
2772 2770
2773 2771 faddd %f14,pp2,%f14
2774 2772 fmuld %f8,qq2,%f12
2775 2773
2776 2774 fmuld %f16,qq3,%f22 ! cos(x2)
2777 2775
2778 2776 faddd %f30,pp2,%f30
2779 2777 fmuld %f24,qq2,%f28
2780 2778
2781 2779 fmuld %f0,%f6,%f6
2782 2780 faddd %f4,qq1,%f4
2783 2781
2784 2782 fmuld %f8,%f14,%f14
2785 2783 faddd %f12,qq1,%f12
2786 2784
2787 2785 faddd %f22,qq2,%f22
2788 2786 fmuld %f16,pp2,%f20
2789 2787
2790 2788 fmuld %f24,%f30,%f30
2791 2789 faddd %f28,qq1,%f28
2792 2790
2793 2791 faddd %f6,pp1,%f6
2794 2792 fmuld %f0,%f4,%f4
2795 2793 add %l4,%g1,%l4
2796 2794
2797 2795 faddd %f14,pp1,%f14
2798 2796 fmuld %f8,%f12,%f12
2799 2797 add %l5,%g1,%l5
2800 2798
2801 2799 fmuld %f16,%f22,%f22
2802 2800 faddd %f20,pp1,%f20
2803 2801
2804 2802 faddd %f30,pp1,%f30
2805 2803 fmuld %f24,%f28,%f28
2806 2804 add %l7,%g1,%l7
2807 2805
2808 2806 fmuld %f0,%f6,%f6
2809 2807
2810 2808 fmuld %f8,%f14,%f14
2811 2809
2812 2810 faddd %f22,qq1,%f22
2813 2811 fmuld %f16,%f20,%f20
2814 2812 add %l6,%g1,%l6
2815 2813
2816 2814 fmuld %f24,%f30,%f30
2817 2815
2818 2816 fmuld %f2,%f6,%f6
2819 2817 ldd [%l4+8],%f0
2820 2818
2821 2819 fmuld %f10,%f14,%f14
2822 2820 ldd [%l5+8],%f8
2823 2821
2824 2822 fmuld %f18,%f20,%f20
2825 2823
2826 2824 fmuld %f26,%f30,%f30
2827 2825 ldd [%l7+8],%f24
2828 2826
2829 2827 fmuld %f0,%f4,%f4
2830 2828 faddd %f32,%f6,%f6
2831 2829
2832 2830 fmuld %f8,%f12,%f12
2833 2831 faddd %f34,%f14,%f14
2834 2832
2835 2833 fmuld %f16,%f22,%f22
2836 2834 faddd %f20,%f36,%f20
2837 2835 ldd [%l6+16],%f16
2838 2836
2839 2837 fmuld %f24,%f28,%f28
2840 2838 faddd %f38,%f30,%f30
2841 2839
2842 2840 faddd %f2,%f6,%f6
2843 2841 ldd [%l4+16],%f32
2844 2842
2845 2843 faddd %f10,%f14,%f14
2846 2844 ldd [%l5+16],%f34
2847 2845
2848 2846 fmuld %f16,%f22,%f22
2849 2847 faddd %f20,%f18,%f20
2850 2848 ldd [%l6+8],%f36
2851 2849
2852 2850 faddd %f26,%f30,%f30
2853 2851 ldd [%l7+16],%f38
2854 2852
2855 2853 fmuld %f32,%f6,%f6
2856 2854
2857 2855 fmuld %f34,%f14,%f14
2858 2856
2859 2857 fmuld %f36,%f20,%f20
2860 2858
2861 2859 fmuld %f38,%f30,%f30
2862 2860
2863 2861 faddd %f6,%f4,%f6
2864 2862
2865 2863 faddd %f14,%f12,%f14
2866 2864
2867 2865 fsubd %f22,%f20,%f22
2868 2866
2869 2867 faddd %f30,%f28,%f30
2870 2868
2871 2869 faddd %f6,%f0,%f6
2872 2870
2873 2871 faddd %f14,%f8,%f14
2874 2872
2875 2873 faddd %f22,%f16,%f22
2876 2874
2877 2875 faddd %f30,%f24,%f30
2878 2876 mov %l0,%l4
2879 2877
2880 2878 fnegd %f6,%f4
2881 2879 lda [%i1]%asi,%l0 ! preload next argument
2882 2880
2883 2881 fnegd %f14,%f12
2884 2882 lda [%i1]%asi,%f0
2885 2883
2886 2884 fnegd %f22,%f20
2887 2885 lda [%i1+4]%asi,%f3
2888 2886
2889 2887 fnegd %f30,%f28
2890 2888 andn %l0,%i5,%l0
2891 2889 add %i1,%i2,%i1
2892 2890
2893 2891 andcc %l4,2,%g0
2894 2892 fmovdnz %icc,%f4,%f6
2895 2893 st %f6,[%o0]
2896 2894
2897 2895 andcc %l1,2,%g0
2898 2896 fmovdnz %icc,%f12,%f14
2899 2897 st %f14,[%o1]
2900 2898
2901 2899 andcc %l2,2,%g0
2902 2900 fmovdnz %icc,%f20,%f22
2903 2901 st %f22,[%o2]
2904 2902
2905 2903 andcc %l3,2,%g0
2906 2904 fmovdnz %icc,%f28,%f30
2907 2905 st %f30,[%o3]
2908 2906
2909 2907 addcc %i0,-1,%i0
2910 2908 bg,pt %icc,.loop0
2911 2909 ! delay slot
2912 2910 st %f7,[%o0+4]
2913 2911
2914 2912 ba,pt %icc,.end
2915 2913 ! delay slot
2916 2914 nop
2917 2915
2918 2916 .align 16
2919 2917 .case14:
2920 2918 fmuld %f26,%f26,%f24
2921 2919 andcc %l3,1,%g0
2922 2920 bz,pn %icc,.case15
2923 2921 ! delay slot
2924 2922 fxor %f30,%f38,%f38
2925 2923
2926 2924 fmuld %f0,pp3,%f6 ! sin(x0)
2927 2925
2928 2926 fmuld %f8,pp3,%f14 ! sin(x1)
2929 2927
2930 2928 fmuld %f16,pp3,%f22 ! sin(x2)
2931 2929
2932 2930 faddd %f6,pp2,%f6
2933 2931 fmuld %f0,qq2,%f4
2934 2932
2935 2933 faddd %f14,pp2,%f14
2936 2934 fmuld %f8,qq2,%f12
2937 2935
2938 2936 faddd %f22,pp2,%f22
2939 2937 fmuld %f16,qq2,%f20
2940 2938
2941 2939 fmuld %f24,qq3,%f30 ! cos(x3)
2942 2940
2943 2941 fmuld %f0,%f6,%f6
2944 2942 faddd %f4,qq1,%f4
2945 2943
2946 2944 fmuld %f8,%f14,%f14
2947 2945 faddd %f12,qq1,%f12
2948 2946
2949 2947 fmuld %f16,%f22,%f22
2950 2948 faddd %f20,qq1,%f20
2951 2949
2952 2950 faddd %f30,qq2,%f30
2953 2951 fmuld %f24,pp2,%f28
2954 2952
2955 2953 faddd %f6,pp1,%f6
2956 2954 fmuld %f0,%f4,%f4
2957 2955 add %l4,%g1,%l4
2958 2956
2959 2957 faddd %f14,pp1,%f14
2960 2958 fmuld %f8,%f12,%f12
2961 2959 add %l5,%g1,%l5
2962 2960
2963 2961 faddd %f22,pp1,%f22
2964 2962 fmuld %f16,%f20,%f20
2965 2963 add %l6,%g1,%l6
2966 2964
2967 2965 fmuld %f24,%f30,%f30
2968 2966 faddd %f28,pp1,%f28
2969 2967
2970 2968 fmuld %f0,%f6,%f6
2971 2969
2972 2970 fmuld %f8,%f14,%f14
2973 2971
2974 2972 fmuld %f16,%f22,%f22
2975 2973
2976 2974 faddd %f30,qq1,%f30
2977 2975 fmuld %f24,%f28,%f28
2978 2976 add %l7,%g1,%l7
2979 2977
2980 2978 fmuld %f2,%f6,%f6
2981 2979 ldd [%l4+8],%f0
2982 2980
2983 2981 fmuld %f10,%f14,%f14
2984 2982 ldd [%l5+8],%f8
2985 2983
2986 2984 fmuld %f18,%f22,%f22
2987 2985 ldd [%l6+8],%f16
2988 2986
2989 2987 fmuld %f26,%f28,%f28
2990 2988
2991 2989 fmuld %f0,%f4,%f4
2992 2990 faddd %f32,%f6,%f6
2993 2991
2994 2992 fmuld %f8,%f12,%f12
2995 2993 faddd %f34,%f14,%f14
2996 2994
2997 2995 fmuld %f16,%f20,%f20
2998 2996 faddd %f36,%f22,%f22
2999 2997
3000 2998 fmuld %f24,%f30,%f30
3001 2999 faddd %f28,%f38,%f28
3002 3000 ldd [%l7+16],%f24
3003 3001
3004 3002 faddd %f2,%f6,%f6
3005 3003 ldd [%l4+16],%f32
3006 3004
3007 3005 faddd %f10,%f14,%f14
3008 3006 ldd [%l5+16],%f34
3009 3007
3010 3008 faddd %f18,%f22,%f22
3011 3009 ldd [%l6+16],%f36
3012 3010
3013 3011 fmuld %f24,%f30,%f30
3014 3012 faddd %f28,%f26,%f28
3015 3013 ldd [%l7+8],%f38
3016 3014
3017 3015 fmuld %f32,%f6,%f6
3018 3016
3019 3017 fmuld %f34,%f14,%f14
3020 3018
3021 3019 fmuld %f36,%f22,%f22
3022 3020
3023 3021 fmuld %f38,%f28,%f28
3024 3022
3025 3023 faddd %f6,%f4,%f6
3026 3024
3027 3025 faddd %f14,%f12,%f14
3028 3026
3029 3027 faddd %f22,%f20,%f22
3030 3028
3031 3029 fsubd %f30,%f28,%f30
3032 3030
3033 3031 faddd %f6,%f0,%f6
3034 3032
3035 3033 faddd %f14,%f8,%f14
3036 3034
3037 3035 faddd %f22,%f16,%f22
3038 3036
3039 3037 faddd %f30,%f24,%f30
3040 3038 mov %l0,%l4
3041 3039
3042 3040 fnegd %f6,%f4
3043 3041 lda [%i1]%asi,%l0 ! preload next argument
3044 3042
3045 3043 fnegd %f14,%f12
3046 3044 lda [%i1]%asi,%f0
3047 3045
3048 3046 fnegd %f22,%f20
3049 3047 lda [%i1+4]%asi,%f3
3050 3048
3051 3049 fnegd %f30,%f28
3052 3050 andn %l0,%i5,%l0
3053 3051 add %i1,%i2,%i1
3054 3052
3055 3053 andcc %l4,2,%g0
3056 3054 fmovdnz %icc,%f4,%f6
3057 3055 st %f6,[%o0]
3058 3056
3059 3057 andcc %l1,2,%g0
3060 3058 fmovdnz %icc,%f12,%f14
3061 3059 st %f14,[%o1]
3062 3060
3063 3061 andcc %l2,2,%g0
3064 3062 fmovdnz %icc,%f20,%f22
3065 3063 st %f22,[%o2]
3066 3064
3067 3065 andcc %l3,2,%g0
3068 3066 fmovdnz %icc,%f28,%f30
3069 3067 st %f30,[%o3]
3070 3068
3071 3069 addcc %i0,-1,%i0
3072 3070 bg,pt %icc,.loop0
3073 3071 ! delay slot
3074 3072 st %f7,[%o0+4]
3075 3073
3076 3074 ba,pt %icc,.end
3077 3075 ! delay slot
3078 3076 nop
3079 3077
3080 3078 .align 16
3081 3079 .case15:
3082 3080 fmuld %f0,pp3,%f6 ! sin(x0)
3083 3081
3084 3082 fmuld %f8,pp3,%f14 ! sin(x1)
3085 3083
3086 3084 fmuld %f16,pp3,%f22 ! sin(x2)
3087 3085
3088 3086 fmuld %f24,pp3,%f30 ! sin(x3)
3089 3087
3090 3088 faddd %f6,pp2,%f6
3091 3089 fmuld %f0,qq2,%f4
3092 3090
3093 3091 faddd %f14,pp2,%f14
3094 3092 fmuld %f8,qq2,%f12
3095 3093
3096 3094 faddd %f22,pp2,%f22
3097 3095 fmuld %f16,qq2,%f20
3098 3096
3099 3097 faddd %f30,pp2,%f30
3100 3098 fmuld %f24,qq2,%f28
3101 3099
3102 3100 fmuld %f0,%f6,%f6
3103 3101 faddd %f4,qq1,%f4
3104 3102
3105 3103 fmuld %f8,%f14,%f14
3106 3104 faddd %f12,qq1,%f12
3107 3105
3108 3106 fmuld %f16,%f22,%f22
3109 3107 faddd %f20,qq1,%f20
3110 3108
3111 3109 fmuld %f24,%f30,%f30
3112 3110 faddd %f28,qq1,%f28
3113 3111
3114 3112 faddd %f6,pp1,%f6
3115 3113 fmuld %f0,%f4,%f4
3116 3114 add %l4,%g1,%l4
3117 3115
3118 3116 faddd %f14,pp1,%f14
3119 3117 fmuld %f8,%f12,%f12
3120 3118 add %l5,%g1,%l5
3121 3119
3122 3120 faddd %f22,pp1,%f22
3123 3121 fmuld %f16,%f20,%f20
3124 3122 add %l6,%g1,%l6
3125 3123
3126 3124 faddd %f30,pp1,%f30
3127 3125 fmuld %f24,%f28,%f28
3128 3126 add %l7,%g1,%l7
3129 3127
3130 3128 fmuld %f0,%f6,%f6
3131 3129
3132 3130 fmuld %f8,%f14,%f14
3133 3131
3134 3132 fmuld %f16,%f22,%f22
3135 3133
3136 3134 fmuld %f24,%f30,%f30
3137 3135
3138 3136 fmuld %f2,%f6,%f6
3139 3137 ldd [%l4+8],%f0
3140 3138
3141 3139 fmuld %f10,%f14,%f14
3142 3140 ldd [%l5+8],%f8
3143 3141
3144 3142 fmuld %f18,%f22,%f22
3145 3143 ldd [%l6+8],%f16
3146 3144
3147 3145 fmuld %f26,%f30,%f30
3148 3146 ldd [%l7+8],%f24
3149 3147
3150 3148 fmuld %f0,%f4,%f4
3151 3149 faddd %f32,%f6,%f6
3152 3150
3153 3151 fmuld %f8,%f12,%f12
3154 3152 faddd %f34,%f14,%f14
3155 3153
3156 3154 fmuld %f16,%f20,%f20
3157 3155 faddd %f36,%f22,%f22
3158 3156
3159 3157 fmuld %f24,%f28,%f28
3160 3158 faddd %f38,%f30,%f30
3161 3159
3162 3160 faddd %f2,%f6,%f6
3163 3161 ldd [%l4+16],%f32
3164 3162
3165 3163 faddd %f10,%f14,%f14
3166 3164 ldd [%l5+16],%f34
3167 3165
3168 3166 faddd %f18,%f22,%f22
3169 3167 ldd [%l6+16],%f36
3170 3168
3171 3169 faddd %f26,%f30,%f30
3172 3170 ldd [%l7+16],%f38
3173 3171
3174 3172 fmuld %f32,%f6,%f6
3175 3173
3176 3174 fmuld %f34,%f14,%f14
3177 3175
3178 3176 fmuld %f36,%f22,%f22
3179 3177
3180 3178 fmuld %f38,%f30,%f30
3181 3179
3182 3180 faddd %f6,%f4,%f6
3183 3181
3184 3182 faddd %f14,%f12,%f14
3185 3183
3186 3184 faddd %f22,%f20,%f22
3187 3185
3188 3186 faddd %f30,%f28,%f30
3189 3187
3190 3188 faddd %f6,%f0,%f6
3191 3189
3192 3190 faddd %f14,%f8,%f14
3193 3191
3194 3192 faddd %f22,%f16,%f22
3195 3193
3196 3194 faddd %f30,%f24,%f30
3197 3195 mov %l0,%l4
3198 3196
3199 3197 fnegd %f6,%f4
3200 3198 lda [%i1]%asi,%l0 ! preload next argument
3201 3199
3202 3200 fnegd %f14,%f12
3203 3201 lda [%i1]%asi,%f0
3204 3202
3205 3203 fnegd %f22,%f20
3206 3204 lda [%i1+4]%asi,%f3
3207 3205
3208 3206 fnegd %f30,%f28
3209 3207 andn %l0,%i5,%l0
3210 3208 add %i1,%i2,%i1
3211 3209
3212 3210 andcc %l4,2,%g0
3213 3211 fmovdnz %icc,%f4,%f6
3214 3212 st %f6,[%o0]
3215 3213
3216 3214 andcc %l1,2,%g0
3217 3215 fmovdnz %icc,%f12,%f14
3218 3216 st %f14,[%o1]
3219 3217
3220 3218 andcc %l2,2,%g0
3221 3219 fmovdnz %icc,%f20,%f22
3222 3220 st %f22,[%o2]
3223 3221
3224 3222 andcc %l3,2,%g0
3225 3223 fmovdnz %icc,%f28,%f30
3226 3224 st %f30,[%o3]
3227 3225
3228 3226 addcc %i0,-1,%i0
3229 3227 bg,pt %icc,.loop0
3230 3228 ! delay slot
3231 3229 st %f7,[%o0+4]
3232 3230
3233 3231 ba,pt %icc,.end
3234 3232 ! delay slot
3235 3233 nop
3236 3234
3237 3235
3238 3236 .align 16
3239 3237 .end:
3240 3238 st %f15,[%o1+4]
3241 3239 st %f23,[%o2+4]
3242 3240 st %f31,[%o3+4]
3243 3241 ld [%fp+biguns],%i5
3244 3242 tst %i5 ! check for huge arguments remaining
3245 3243 be,pt %icc,.exit
3246 3244 ! delay slot
3247 3245 nop
3248 3246 #ifdef __sparcv9
3249 3247 ldx [%fp+xsave],%o1
3250 3248 ldx [%fp+ysave],%o3
3251 3249 #else
3252 3250 ld [%fp+xsave],%o1
3253 3251 ld [%fp+ysave],%o3
3254 3252 #endif
3255 3253 ld [%fp+nsave],%o0
3256 3254 ld [%fp+sxsave],%o2
3257 3255 ld [%fp+sysave],%o4
3258 3256 sra %o2,0,%o2 ! sign-extend for V9
3259 3257 sra %o4,0,%o4
3260 3258 call __vlibm_vsin_big_ultra3
3261 3259 sra %o5,0,%o5 ! delay slot
3262 3260
3263 3261 .exit:
3264 3262 ret
3265 3263 restore
3266 3264
3267 3265
3268 3266 .align 16
3269 3267 .last1:
3270 3268 faddd %f2,c3two44,%f4
3271 3269 st %f15,[%o1+4]
3272 3270 .last1_from_range1:
3273 3271 mov 0,%l1
3274 3272 fzeros %f8
3275 3273 fzero %f10
3276 3274 add %fp,junk,%o1
3277 3275 .last2:
3278 3276 faddd %f10,c3two44,%f12
3279 3277 st %f23,[%o2+4]
3280 3278 .last2_from_range2:
3281 3279 mov 0,%l2
3282 3280 fzeros %f16
3283 3281 fzero %f18
3284 3282 add %fp,junk,%o2
3285 3283 .last3:
3286 3284 faddd %f18,c3two44,%f20
3287 3285 st %f31,[%o3+4]
3288 3286 st %f5,[%fp+nk0]
3289 3287 st %f13,[%fp+nk1]
3290 3288 .last3_from_range3:
3291 3289 mov 0,%l3
3292 3290 fzeros %f24
3293 3291 fzero %f26
3294 3292 ba,pt %icc,.cont
3295 3293 ! delay slot
3296 3294 add %fp,junk,%o3
3297 3295
3298 3296
3299 3297 .align 16
3300 3298 .range0:
3301 3299 cmp %l0,%o4
3302 3300 bl,pt %icc,1f ! hx < 0x3e400000
3303 3301 ! delay slot, harmless if branch taken
3304 3302 sethi %hi(0x7ff00000),%o7
3305 3303 cmp %l0,%o7
3306 3304 bl,a,pt %icc,2f ! branch if finite
3307 3305 ! delay slot, squashed if branch not taken
3308 3306 st %o4,[%fp+biguns] ! set biguns
3309 3307 fzero %f0
3310 3308 fmuld %f2,%f0,%f2
3311 3309 st %f2,[%o0]
3312 3310 ba,pt %icc,2f
3313 3311 ! delay slot
3314 3312 st %f3,[%o0+4]
3315 3313 1:
3316 3314 fdtoi %f2,%f4 ! raise inexact if not zero
3317 3315 st %f0,[%o0]
3318 3316 st %f3,[%o0+4]
3319 3317 2:
3320 3318 addcc %i0,-1,%i0
3321 3319 ble,pn %icc,.end
3322 3320 ! delay slot, harmless if branch taken
3323 3321 add %i3,%i4,%i3 ! y += stridey
3324 3322 andn %l1,%i5,%l0 ! hx &= ~0x80000000
3325 3323 fmovs %f8,%f0
3326 3324 fmovs %f11,%f3
3327 3325 ba,pt %icc,.loop0
3328 3326 ! delay slot
3329 3327 add %i1,%i2,%i1 ! x += stridex
3330 3328
3331 3329
3332 3330 .align 16
3333 3331 .range1:
3334 3332 cmp %l1,%o4
3335 3333 bl,pt %icc,1f ! hx < 0x3e400000
3336 3334 ! delay slot, harmless if branch taken
3337 3335 sethi %hi(0x7ff00000),%o7
3338 3336 cmp %l1,%o7
3339 3337 bl,a,pt %icc,2f ! branch if finite
3340 3338 ! delay slot, squashed if branch not taken
3341 3339 st %o4,[%fp+biguns] ! set biguns
3342 3340 fzero %f8
3343 3341 fmuld %f10,%f8,%f10
3344 3342 st %f10,[%o1]
3345 3343 ba,pt %icc,2f
3346 3344 ! delay slot
3347 3345 st %f11,[%o1+4]
3348 3346 1:
3349 3347 fdtoi %f10,%f12 ! raise inexact if not zero
3350 3348 st %f8,[%o1]
3351 3349 st %f11,[%o1+4]
3352 3350 2:
3353 3351 addcc %i0,-1,%i0
3354 3352 ble,pn %icc,.last1_from_range1
3355 3353 ! delay slot, harmless if branch taken
3356 3354 add %i3,%i4,%i3 ! y += stridey
3357 3355 andn %l2,%i5,%l1 ! hx &= ~0x80000000
3358 3356 fmovs %f16,%f8
3359 3357 fmovs %f19,%f11
3360 3358 ba,pt %icc,.loop1
3361 3359 ! delay slot
3362 3360 add %i1,%i2,%i1 ! x += stridex
3363 3361
3364 3362
3365 3363 .align 16
3366 3364 .range2:
3367 3365 cmp %l2,%o4
3368 3366 bl,pt %icc,1f ! hx < 0x3e400000
3369 3367 ! delay slot, harmless if branch taken
3370 3368 sethi %hi(0x7ff00000),%o7
3371 3369 cmp %l2,%o7
3372 3370 bl,a,pt %icc,2f ! branch if finite
3373 3371 ! delay slot, squashed if branch not taken
3374 3372 st %o4,[%fp+biguns] ! set biguns
3375 3373 fzero %f16
3376 3374 fmuld %f18,%f16,%f18
3377 3375 st %f18,[%o2]
3378 3376 ba,pt %icc,2f
3379 3377 ! delay slot
3380 3378 st %f19,[%o2+4]
3381 3379 1:
3382 3380 fdtoi %f18,%f20 ! raise inexact if not zero
3383 3381 st %f16,[%o2]
3384 3382 st %f19,[%o2+4]
3385 3383 2:
3386 3384 addcc %i0,-1,%i0
3387 3385 ble,pn %icc,.last2_from_range2
3388 3386 ! delay slot, harmless if branch taken
3389 3387 add %i3,%i4,%i3 ! y += stridey
3390 3388 andn %l3,%i5,%l2 ! hx &= ~0x80000000
3391 3389 fmovs %f24,%f16
3392 3390 fmovs %f27,%f19
3393 3391 ba,pt %icc,.loop2
3394 3392 ! delay slot
3395 3393 add %i1,%i2,%i1 ! x += stridex
3396 3394
3397 3395
3398 3396 .align 16
3399 3397 .range3:
3400 3398 cmp %l3,%o4
3401 3399 bl,pt %icc,1f ! hx < 0x3e400000
3402 3400 ! delay slot, harmless if branch taken
3403 3401 sethi %hi(0x7ff00000),%o7
3404 3402 cmp %l3,%o7
3405 3403 bl,a,pt %icc,2f ! branch if finite
3406 3404 ! delay slot, squashed if branch not taken
3407 3405 st %o4,[%fp+biguns] ! set biguns
3408 3406 fzero %f24
3409 3407 fmuld %f26,%f24,%f26
3410 3408 st %f26,[%o3]
3411 3409 ba,pt %icc,2f
3412 3410 ! delay slot
3413 3411 st %f27,[%o3+4]
3414 3412 1:
3415 3413 fdtoi %f26,%f28 ! raise inexact if not zero
3416 3414 st %f24,[%o3]
3417 3415 st %f27,[%o3+4]
3418 3416 2:
3419 3417 addcc %i0,-1,%i0
3420 3418 ble,pn %icc,.last3_from_range3
3421 3419 ! delay slot, harmless if branch taken
3422 3420 add %i3,%i4,%i3 ! y += stridey
3423 3421 ld [%i1],%l3
3424 3422 ld [%i1],%f24
3425 3423 ld [%i1+4],%f27
3426 3424 andn %l3,%i5,%l3 ! hx &= ~0x80000000
3427 3425 ba,pt %icc,.loop3
3428 3426 ! delay slot
3429 3427 add %i1,%i2,%i1 ! x += stridex
3430 3428
3431 3429 SET_SIZE(__vsin_ultra3)
3432 3430
↓ open down ↓ |
3386 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX