Print this page
5262 libm needs to be carefully unifdef'd
5268 libm doesn't need to hide symbols which are already local
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Reviewed by: Igor Kozhukhov <ikozhukhov@gmail.com>
Reviewed by: Gordon Ross <gwr@nexenta.com>
Approved by: Gordon Ross <gwr@nexenta.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/vis/__vcos_ultra3.S
+++ new/usr/src/lib/libmvec/common/vis/__vcos_ultra3.S
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
22 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 23 */
24 24 /*
25 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 26 * Use is subject to license terms.
27 27 */
28 28
29 29 .file "__vcos_ultra3.S"
30 30
31 31 #include "libm.h"
32 -#if defined(LIBMVEC_SO_BUILD)
33 32 .weak __vcos
34 33 .type __vcos,#function
35 34 __vcos = __vcos_ultra3
36 -#endif
37 35
38 36 RO_DATA
39 37 .align 64
40 38 constants:
41 39 .word 0x42c80000,0x00000000 ! 3 * 2^44
42 40 .word 0x43380000,0x00000000 ! 3 * 2^51
43 41 .word 0x3fe45f30,0x6dc9c883 ! invpio2
44 42 .word 0x3ff921fb,0x54442c00 ! pio2_1
45 43 .word 0x3d318469,0x898cc400 ! pio2_2
46 44 .word 0x3a71701b,0x839a2520 ! pio2_3
47 45 .word 0xbfc55555,0x55555533 ! pp1
48 46 .word 0x3f811111,0x10e7d53b ! pp2
49 47 .word 0xbf2a0167,0xe6b3cf9b ! pp3
50 48 .word 0xbfdfffff,0xffffff65 ! qq1
51 49 .word 0x3fa55555,0x54f88ed0 ! qq2
52 50 .word 0xbf56c12c,0xdd185f60 ! qq3
53 51
54 52 ! local storage indices
55 53
56 54 #define xsave STACK_BIAS-0x8
57 55 #define ysave STACK_BIAS-0x10
58 56 #define nsave STACK_BIAS-0x14
59 57 #define sxsave STACK_BIAS-0x18
60 58 #define sysave STACK_BIAS-0x1c
61 59 #define biguns STACK_BIAS-0x20
62 60 #define nk3 STACK_BIAS-0x24
63 61 #define nk2 STACK_BIAS-0x28
64 62 #define nk1 STACK_BIAS-0x2c
65 63 #define nk0 STACK_BIAS-0x30
66 64 #define junk STACK_BIAS-0x38
67 65 ! sizeof temp storage - must be a multiple of 16 for V9
68 66 #define tmps 0x40
69 67
70 68 ! register use
71 69
72 70 ! i0 n
73 71 ! i1 x
74 72 ! i2 stridex
75 73 ! i3 y
76 74 ! i4 stridey
77 75 ! i5 0x80000000
78 76
79 77 ! l0 hx0
80 78 ! l1 hx1
81 79 ! l2 hx2
82 80 ! l3 hx3
83 81 ! l4 k0
84 82 ! l5 k1
85 83 ! l6 k2
86 84 ! l7 k3
87 85
88 86 ! the following are 64-bit registers in both V8+ and V9
89 87
90 88 ! g1 __vlibm_TBL_sincos2
91 89 ! g5 scratch
92 90
93 91 ! o0 py0
94 92 ! o1 py1
95 93 ! o2 py2
96 94 ! o3 py3
97 95 ! o4 0x3e400000
98 96 ! o5 0x3fe921fb,0x4099251e
99 97 ! o7 scratch
100 98
101 99 ! f0 hx0
102 100 ! f2
103 101 ! f4
104 102 ! f6
105 103 ! f8 hx1
106 104 ! f10
107 105 ! f12
108 106 ! f14
109 107 ! f16 hx2
110 108 ! f18
111 109 ! f20
112 110 ! f22
113 111 ! f24 hx3
114 112 ! f26
115 113 ! f28
116 114 ! f30
117 115 ! f32
118 116 ! f34
119 117 ! f36
120 118 ! f38
121 119
122 120 #define c3two44 %f40
123 121 #define c3two51 %f42
124 122 #define invpio2 %f44
125 123 #define pio2_1 %f46
126 124 #define pio2_2 %f48
127 125 #define pio2_3 %f50
128 126 #define pp1 %f52
129 127 #define pp2 %f54
130 128 #define pp3 %f56
131 129 #define qq1 %f58
132 130 #define qq2 %f60
133 131 #define qq3 %f62
134 132
135 133 ENTRY(__vcos_ultra3)
136 134 save %sp,-SA(MINFRAME)-tmps,%sp
137 135 PIC_SETUP(l7)
138 136 PIC_SET(l7,constants,o0)
139 137 PIC_SET(l7,__vlibm_TBL_sincos2,o1)
140 138 mov %o1,%g1
141 139 wr %g0,0x82,%asi ! set %asi for non-faulting loads
142 140 #ifdef __sparcv9
143 141 stx %i1,[%fp+xsave] ! save arguments
144 142 stx %i3,[%fp+ysave]
145 143 #else
146 144 st %i1,[%fp+xsave] ! save arguments
147 145 st %i3,[%fp+ysave]
148 146 #endif
149 147 st %i0,[%fp+nsave]
150 148 st %i2,[%fp+sxsave]
151 149 st %i4,[%fp+sysave]
152 150 st %g0,[%fp+biguns] ! biguns = 0
153 151 ldd [%o0+0x00],c3two44 ! load/set up constants
154 152 ldd [%o0+0x08],c3two51
155 153 ldd [%o0+0x10],invpio2
156 154 ldd [%o0+0x18],pio2_1
157 155 ldd [%o0+0x20],pio2_2
158 156 ldd [%o0+0x28],pio2_3
159 157 ldd [%o0+0x30],pp1
160 158 ldd [%o0+0x38],pp2
161 159 ldd [%o0+0x40],pp3
162 160 ldd [%o0+0x48],qq1
163 161 ldd [%o0+0x50],qq2
164 162 ldd [%o0+0x58],qq3
165 163 sethi %hi(0x80000000),%i5
166 164 sethi %hi(0x3e400000),%o4
167 165 sethi %hi(0x3fe921fb),%o5
168 166 or %o5,%lo(0x3fe921fb),%o5
169 167 sllx %o5,32,%o5
170 168 sethi %hi(0x4099251e),%o7
171 169 or %o7,%lo(0x4099251e),%o7
172 170 or %o5,%o7,%o5
173 171 sll %i2,3,%i2 ! scale strides
174 172 sll %i4,3,%i4
175 173 add %fp,junk,%o1 ! loop prologue
176 174 add %fp,junk,%o2
177 175 add %fp,junk,%o3
178 176 ld [%i1],%l0 ! *x
179 177 ld [%i1],%f0
180 178 ld [%i1+4],%f3
181 179 andn %l0,%i5,%l0 ! mask off sign
182 180 add %i1,%i2,%i1 ! x += stridex
183 181 ba .loop0
184 182 nop
185 183
186 184 ! 16-byte aligned
187 185 .align 16
188 186 .loop0:
189 187 lda [%i1]%asi,%l1 ! preload next argument
190 188 sub %l0,%o4,%g5
191 189 sub %o5,%l0,%o7
192 190 fabss %f0,%f2
193 191
194 192 lda [%i1]%asi,%f8
195 193 orcc %o7,%g5,%g0
196 194 mov %i3,%o0 ! py0 = y
197 195 bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e
198 196
199 197 ! delay slot
200 198 lda [%i1+4]%asi,%f11
201 199 addcc %i0,-1,%i0
202 200 add %i3,%i4,%i3 ! y += stridey
203 201 ble,pn %icc,.last1
204 202
205 203 ! delay slot
206 204 andn %l1,%i5,%l1
207 205 add %i1,%i2,%i1 ! x += stridex
208 206 faddd %f2,c3two44,%f4
209 207 st %f15,[%o1+4]
210 208
211 209 .loop1:
212 210 lda [%i1]%asi,%l2 ! preload next argument
213 211 sub %l1,%o4,%g5
214 212 sub %o5,%l1,%o7
215 213 fabss %f8,%f10
216 214
217 215 lda [%i1]%asi,%f16
218 216 orcc %o7,%g5,%g0
219 217 mov %i3,%o1 ! py1 = y
220 218 bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e
221 219
222 220 ! delay slot
223 221 lda [%i1+4]%asi,%f19
224 222 addcc %i0,-1,%i0
225 223 add %i3,%i4,%i3 ! y += stridey
226 224 ble,pn %icc,.last2
227 225
228 226 ! delay slot
229 227 andn %l2,%i5,%l2
230 228 add %i1,%i2,%i1 ! x += stridex
231 229 faddd %f10,c3two44,%f12
232 230 st %f23,[%o2+4]
233 231
234 232 .loop2:
235 233 lda [%i1]%asi,%l3 ! preload next argument
236 234 sub %l2,%o4,%g5
237 235 sub %o5,%l2,%o7
238 236 fabss %f16,%f18
239 237
240 238 lda [%i1]%asi,%f24
241 239 orcc %o7,%g5,%g0
242 240 mov %i3,%o2 ! py2 = y
243 241 bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e
244 242
245 243 ! delay slot
246 244 lda [%i1+4]%asi,%f27
247 245 addcc %i0,-1,%i0
248 246 add %i3,%i4,%i3 ! y += stridey
249 247 ble,pn %icc,.last3
250 248
251 249 ! delay slot
252 250 andn %l3,%i5,%l3
253 251 add %i1,%i2,%i1 ! x += stridex
254 252 faddd %f18,c3two44,%f20
255 253 st %f31,[%o3+4]
256 254
257 255 .loop3:
258 256 sub %l3,%o4,%g5
259 257 sub %o5,%l3,%o7
260 258 fabss %f24,%f26
261 259 st %f5,[%fp+nk0]
262 260
263 261 orcc %o7,%g5,%g0
264 262 mov %i3,%o3 ! py3 = y
265 263 bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e
266 264 ! delay slot
267 265 st %f13,[%fp+nk1]
268 266
269 267 !!! DONE?
270 268 .cont:
271 269 srlx %o5,32,%o7
272 270 add %i3,%i4,%i3 ! y += stridey
273 271 fmovs %f3,%f1
274 272 st %f21,[%fp+nk2]
275 273
276 274 sub %o7,%l0,%l0
277 275 sub %o7,%l1,%l1
278 276 faddd %f26,c3two44,%f28
279 277 st %f29,[%fp+nk3]
280 278
281 279 sub %o7,%l2,%l2
282 280 sub %o7,%l3,%l3
283 281 fmovs %f11,%f9
284 282
285 283 or %l0,%l1,%l0
286 284 or %l2,%l3,%l2
287 285 fmovs %f19,%f17
288 286
289 287 fmovs %f27,%f25
290 288 fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range
291 289
292 290 fmuld %f8,invpio2,%f14
293 291 ld [%fp+nk0],%l4
294 292
295 293 fmuld %f16,invpio2,%f22
296 294 ld [%fp+nk1],%l5
297 295
298 296 orcc %l0,%l2,%g0
299 297 bl,pn %icc,.medium
300 298 ! delay slot
301 299 fmuld %f24,invpio2,%f30
302 300 ld [%fp+nk2],%l6
303 301
304 302 ld [%fp+nk3],%l7
305 303 sll %l4,5,%l4 ! k
306 304 fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0
307 305
308 306 sll %l5,5,%l5
309 307 ldd [%l4+%g1],%f4
310 308 fcmpd %fcc1,%f8,pio2_3
311 309
312 310 sll %l6,5,%l6
313 311 ldd [%l5+%g1],%f12
314 312 fcmpd %fcc2,%f16,pio2_3
315 313
316 314 sll %l7,5,%l7
317 315 ldd [%l6+%g1],%f20
318 316 fcmpd %fcc3,%f24,pio2_3
319 317
320 318 ldd [%l7+%g1],%f28
321 319 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
322 320
323 321 fsubd %f10,%f12,%f10
324 322
325 323 fsubd %f18,%f20,%f18
326 324
327 325 fsubd %f26,%f28,%f26
328 326
329 327 fmuld %f2,%f2,%f0 ! z = x * x
330 328
331 329 fmuld %f10,%f10,%f8
332 330
333 331 fmuld %f18,%f18,%f16
334 332
335 333 fmuld %f26,%f26,%f24
336 334
337 335 fmuld %f0,qq3,%f6
338 336
339 337 fmuld %f8,qq3,%f14
340 338
341 339 fmuld %f16,qq3,%f22
342 340
343 341 fmuld %f24,qq3,%f30
344 342
345 343 faddd %f6,qq2,%f6
346 344 fmuld %f0,pp2,%f4
347 345
348 346 faddd %f14,qq2,%f14
349 347 fmuld %f8,pp2,%f12
350 348
351 349 faddd %f22,qq2,%f22
352 350 fmuld %f16,pp2,%f20
353 351
354 352 faddd %f30,qq2,%f30
355 353 fmuld %f24,pp2,%f28
356 354
357 355 fmuld %f0,%f6,%f6
358 356 faddd %f4,pp1,%f4
359 357
360 358 fmuld %f8,%f14,%f14
361 359 faddd %f12,pp1,%f12
362 360
363 361 fmuld %f16,%f22,%f22
364 362 faddd %f20,pp1,%f20
365 363
366 364 fmuld %f24,%f30,%f30
367 365 faddd %f28,pp1,%f28
368 366
369 367 faddd %f6,qq1,%f6
370 368 fmuld %f0,%f4,%f4
371 369 add %l4,%g1,%l4
372 370
373 371 faddd %f14,qq1,%f14
374 372 fmuld %f8,%f12,%f12
375 373 add %l5,%g1,%l5
376 374
377 375 faddd %f22,qq1,%f22
378 376 fmuld %f16,%f20,%f20
379 377 add %l6,%g1,%l6
380 378
381 379 faddd %f30,qq1,%f30
382 380 fmuld %f24,%f28,%f28
383 381 add %l7,%g1,%l7
384 382
385 383 fmuld %f2,%f4,%f4
386 384
387 385 fmuld %f10,%f12,%f12
388 386
389 387 fmuld %f18,%f20,%f20
390 388
391 389 fmuld %f26,%f28,%f28
392 390
393 391 fmuld %f0,%f6,%f6
394 392 faddd %f4,%f2,%f4
395 393 ldd [%l4+16],%f32
396 394
397 395 fmuld %f8,%f14,%f14
398 396 faddd %f12,%f10,%f12
399 397 ldd [%l5+16],%f34
400 398
401 399 fmuld %f16,%f22,%f22
402 400 faddd %f20,%f18,%f20
403 401 ldd [%l6+16],%f36
404 402
405 403 fmuld %f24,%f30,%f30
406 404 faddd %f28,%f26,%f28
407 405 ldd [%l7+16],%f38
408 406
409 407 fmuld %f32,%f6,%f6
410 408 ldd [%l4+8],%f2
411 409
412 410 fmuld %f34,%f14,%f14
413 411 ldd [%l5+8],%f10
414 412
415 413 fmuld %f36,%f22,%f22
416 414 ldd [%l6+8],%f18
417 415
418 416 fmuld %f38,%f30,%f30
419 417 ldd [%l7+8],%f26
420 418
421 419 fmuld %f2,%f4,%f4
422 420
423 421 fmuld %f10,%f12,%f12
424 422
425 423 fmuld %f18,%f20,%f20
426 424
427 425 fmuld %f26,%f28,%f28
428 426
429 427 fsubd %f6,%f4,%f6
430 428 lda [%i1]%asi,%l0 ! preload next argument
431 429
432 430 fsubd %f14,%f12,%f14
433 431 lda [%i1]%asi,%f0
434 432
435 433 fsubd %f22,%f20,%f22
436 434 lda [%i1+4]%asi,%f3
437 435
438 436 fsubd %f30,%f28,%f30
439 437 andn %l0,%i5,%l0
440 438 add %i1,%i2,%i1
441 439
442 440 faddd %f6,%f32,%f6
443 441 st %f6,[%o0]
444 442
445 443 faddd %f14,%f34,%f14
446 444 st %f14,[%o1]
447 445
448 446 faddd %f22,%f36,%f22
449 447 st %f22,[%o2]
450 448
451 449 faddd %f30,%f38,%f30
452 450 st %f30,[%o3]
453 451 addcc %i0,-1,%i0
454 452
455 453 bg,pt %icc,.loop0
456 454 ! delay slot
457 455 st %f7,[%o0+4]
458 456
459 457 ba,pt %icc,.end
460 458 ! delay slot
461 459 nop
462 460
463 461
464 462 .align 16
465 463 .medium:
466 464 faddd %f6,c3two51,%f4
467 465 st %f5,[%fp+nk0]
468 466
469 467 faddd %f14,c3two51,%f12
470 468 st %f13,[%fp+nk1]
471 469
472 470 faddd %f22,c3two51,%f20
473 471 st %f21,[%fp+nk2]
474 472
475 473 faddd %f30,c3two51,%f28
476 474 st %f29,[%fp+nk3]
477 475
478 476 fsubd %f4,c3two51,%f6
479 477
480 478 fsubd %f12,c3two51,%f14
481 479
482 480 fsubd %f20,c3two51,%f22
483 481
484 482 fsubd %f28,c3two51,%f30
485 483
486 484 fmuld %f6,pio2_1,%f2
487 485 ld [%fp+nk0],%l0 ! n
488 486
489 487 fmuld %f14,pio2_1,%f10
490 488 ld [%fp+nk1],%l1
491 489
492 490 fmuld %f22,pio2_1,%f18
493 491 ld [%fp+nk2],%l2
494 492
495 493 fmuld %f30,pio2_1,%f26
496 494 ld [%fp+nk3],%l3
497 495
498 496 fsubd %f0,%f2,%f0
499 497 fmuld %f6,pio2_2,%f4
500 498 add %l0,1,%l0
501 499
502 500 fsubd %f8,%f10,%f8
503 501 fmuld %f14,pio2_2,%f12
504 502 add %l1,1,%l1
505 503
506 504 fsubd %f16,%f18,%f16
507 505 fmuld %f22,pio2_2,%f20
508 506 add %l2,1,%l2
509 507
510 508 fsubd %f24,%f26,%f24
511 509 fmuld %f30,pio2_2,%f28
512 510 add %l3,1,%l3
513 511
514 512 fsubd %f0,%f4,%f32
515 513
516 514 fsubd %f8,%f12,%f34
517 515
518 516 fsubd %f16,%f20,%f36
519 517
520 518 fsubd %f24,%f28,%f38
521 519
522 520 fsubd %f0,%f32,%f0
523 521 fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0
524 522
525 523 fsubd %f8,%f34,%f8
526 524 fcmple32 %f34,pio2_3,%l5
527 525
528 526 fsubd %f16,%f36,%f16
529 527 fcmple32 %f36,pio2_3,%l6
530 528
531 529 fsubd %f24,%f38,%f24
532 530 fcmple32 %f38,pio2_3,%l7
533 531
534 532 fsubd %f0,%f4,%f0
535 533 fmuld %f6,pio2_3,%f6
536 534 sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2
537 535
538 536 fsubd %f8,%f12,%f8
539 537 fmuld %f14,pio2_3,%f14
540 538 sll %l5,30,%l5
541 539
542 540 fsubd %f16,%f20,%f16
543 541 fmuld %f22,pio2_3,%f22
544 542 sll %l6,30,%l6
545 543
546 544 fsubd %f24,%f28,%f24
547 545 fmuld %f30,pio2_3,%f30
548 546 sll %l7,30,%l7
549 547
550 548 fsubd %f6,%f0,%f6
551 549 sra %l4,31,%l4
552 550
553 551 fsubd %f14,%f8,%f14
554 552 sra %l5,31,%l5
555 553
556 554 fsubd %f22,%f16,%f22
557 555 sra %l6,31,%l6
558 556
559 557 fsubd %f30,%f24,%f30
560 558 sra %l7,31,%l7
561 559
562 560 fsubd %f32,%f6,%f0 ! reduced x
563 561 xor %l0,%l4,%l0
564 562
565 563 fsubd %f34,%f14,%f8
566 564 xor %l1,%l5,%l1
567 565
568 566 fsubd %f36,%f22,%f16
569 567 xor %l2,%l6,%l2
570 568
571 569 fsubd %f38,%f30,%f24
572 570 xor %l3,%l7,%l3
573 571
574 572 fabsd %f0,%f2
575 573 sub %l0,%l4,%l0
576 574
577 575 fabsd %f8,%f10
578 576 sub %l1,%l5,%l1
579 577
580 578 fabsd %f16,%f18
581 579 sub %l2,%l6,%l2
582 580
583 581 fabsd %f24,%f26
584 582 sub %l3,%l7,%l3
585 583
586 584 faddd %f2,c3two44,%f4
587 585 st %f5,[%fp+nk0]
588 586 and %l4,2,%l4
589 587
590 588 faddd %f10,c3two44,%f12
591 589 st %f13,[%fp+nk1]
592 590 and %l5,2,%l5
593 591
594 592 faddd %f18,c3two44,%f20
595 593 st %f21,[%fp+nk2]
596 594 and %l6,2,%l6
597 595
598 596 faddd %f26,c3two44,%f28
599 597 st %f29,[%fp+nk3]
600 598 and %l7,2,%l7
601 599
602 600 fsubd %f32,%f0,%f4
603 601 xor %l0,%l4,%l0
604 602
605 603 fsubd %f34,%f8,%f12
606 604 xor %l1,%l5,%l1
607 605
608 606 fsubd %f36,%f16,%f20
609 607 xor %l2,%l6,%l2
610 608
611 609 fsubd %f38,%f24,%f28
612 610 xor %l3,%l7,%l3
613 611
614 612 fzero %f38
615 613 ld [%fp+nk0],%l4
616 614
617 615 fsubd %f4,%f6,%f6 ! w
618 616 ld [%fp+nk1],%l5
619 617
620 618 fsubd %f12,%f14,%f14
621 619 ld [%fp+nk2],%l6
622 620
623 621 fnegd %f38,%f38
624 622 ld [%fp+nk3],%l7
625 623 sll %l4,5,%l4 ! k
626 624
627 625 fsubd %f20,%f22,%f22
628 626 sll %l5,5,%l5
629 627
630 628 fsubd %f28,%f30,%f30
631 629 sll %l6,5,%l6
632 630
633 631 fand %f0,%f38,%f32 ! sign bit of x
634 632 ldd [%l4+%g1],%f4
635 633 sll %l7,5,%l7
636 634
637 635 fand %f8,%f38,%f34
638 636 ldd [%l5+%g1],%f12
639 637
640 638 fand %f16,%f38,%f36
641 639 ldd [%l6+%g1],%f20
642 640
643 641 fand %f24,%f38,%f38
644 642 ldd [%l7+%g1],%f28
645 643
646 644 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k]
647 645
648 646 fsubd %f10,%f12,%f10
649 647
650 648 fsubd %f18,%f20,%f18
651 649 nop
652 650
653 651 fsubd %f26,%f28,%f26
654 652 nop
655 653
656 654 ! 16-byte aligned
657 655 fmuld %f2,%f2,%f0 ! z = x * x
658 656 andcc %l0,1,%g0
659 657 bz,pn %icc,.case8
660 658 ! delay slot
661 659 fxor %f6,%f32,%f32
662 660
663 661 fmuld %f10,%f10,%f8
664 662 andcc %l1,1,%g0
665 663 bz,pn %icc,.case4
666 664 ! delay slot
667 665 fxor %f14,%f34,%f34
668 666
669 667 fmuld %f18,%f18,%f16
670 668 andcc %l2,1,%g0
671 669 bz,pn %icc,.case2
672 670 ! delay slot
673 671 fxor %f22,%f36,%f36
674 672
675 673 fmuld %f26,%f26,%f24
676 674 andcc %l3,1,%g0
677 675 bz,pn %icc,.case1
678 676 ! delay slot
679 677 fxor %f30,%f38,%f38
680 678
681 679 !.case0:
682 680 fmuld %f0,qq3,%f6 ! cos(x0)
683 681
684 682 fmuld %f8,qq3,%f14 ! cos(x1)
685 683
686 684 fmuld %f16,qq3,%f22 ! cos(x2)
687 685
688 686 fmuld %f24,qq3,%f30 ! cos(x3)
689 687
690 688 faddd %f6,qq2,%f6
691 689 fmuld %f0,pp2,%f4
692 690
693 691 faddd %f14,qq2,%f14
694 692 fmuld %f8,pp2,%f12
695 693
696 694 faddd %f22,qq2,%f22
697 695 fmuld %f16,pp2,%f20
698 696
699 697 faddd %f30,qq2,%f30
700 698 fmuld %f24,pp2,%f28
701 699
702 700 fmuld %f0,%f6,%f6
703 701 faddd %f4,pp1,%f4
704 702
705 703 fmuld %f8,%f14,%f14
706 704 faddd %f12,pp1,%f12
707 705
708 706 fmuld %f16,%f22,%f22
709 707 faddd %f20,pp1,%f20
710 708
711 709 fmuld %f24,%f30,%f30
712 710 faddd %f28,pp1,%f28
713 711
714 712 faddd %f6,qq1,%f6
715 713 fmuld %f0,%f4,%f4
716 714 add %l4,%g1,%l4
717 715
718 716 faddd %f14,qq1,%f14
719 717 fmuld %f8,%f12,%f12
720 718 add %l5,%g1,%l5
721 719
722 720 faddd %f22,qq1,%f22
723 721 fmuld %f16,%f20,%f20
724 722 add %l6,%g1,%l6
725 723
726 724 faddd %f30,qq1,%f30
727 725 fmuld %f24,%f28,%f28
728 726 add %l7,%g1,%l7
729 727
730 728 fmuld %f2,%f4,%f4
731 729
732 730 fmuld %f10,%f12,%f12
733 731
734 732 fmuld %f18,%f20,%f20
735 733
736 734 fmuld %f26,%f28,%f28
737 735
738 736 fmuld %f0,%f6,%f6
739 737 faddd %f4,%f32,%f4
740 738 ldd [%l4+16],%f0
741 739
742 740 fmuld %f8,%f14,%f14
743 741 faddd %f12,%f34,%f12
744 742 ldd [%l5+16],%f8
745 743
746 744 fmuld %f16,%f22,%f22
747 745 faddd %f20,%f36,%f20
748 746 ldd [%l6+16],%f16
749 747
750 748 fmuld %f24,%f30,%f30
751 749 faddd %f28,%f38,%f28
752 750 ldd [%l7+16],%f24
753 751
754 752 fmuld %f0,%f6,%f6
755 753 faddd %f4,%f2,%f4
756 754 ldd [%l4+8],%f32
757 755
758 756 fmuld %f8,%f14,%f14
759 757 faddd %f12,%f10,%f12
760 758 ldd [%l5+8],%f34
761 759
762 760 fmuld %f16,%f22,%f22
763 761 faddd %f20,%f18,%f20
764 762 ldd [%l6+8],%f36
765 763
766 764 fmuld %f24,%f30,%f30
767 765 faddd %f28,%f26,%f28
768 766 ldd [%l7+8],%f38
769 767
770 768 fmuld %f32,%f4,%f4
771 769
772 770 fmuld %f34,%f12,%f12
773 771
774 772 fmuld %f36,%f20,%f20
775 773
776 774 fmuld %f38,%f28,%f28
777 775
778 776 fsubd %f6,%f4,%f6
779 777
780 778 fsubd %f14,%f12,%f14
781 779
782 780 fsubd %f22,%f20,%f22
783 781
784 782 fsubd %f30,%f28,%f30
785 783
786 784 faddd %f6,%f0,%f6
787 785
788 786 faddd %f14,%f8,%f14
789 787
790 788 faddd %f22,%f16,%f22
791 789
792 790 faddd %f30,%f24,%f30
793 791 mov %l0,%l4
794 792
795 793 fnegd %f6,%f4
796 794 lda [%i1]%asi,%l0 ! preload next argument
797 795
798 796 fnegd %f14,%f12
799 797 lda [%i1]%asi,%f0
800 798
801 799 fnegd %f22,%f20
802 800 lda [%i1+4]%asi,%f3
803 801
804 802 fnegd %f30,%f28
805 803 andn %l0,%i5,%l0
806 804 add %i1,%i2,%i1
807 805
808 806 andcc %l4,2,%g0
809 807 fmovdnz %icc,%f4,%f6
810 808 st %f6,[%o0]
811 809
812 810 andcc %l1,2,%g0
813 811 fmovdnz %icc,%f12,%f14
814 812 st %f14,[%o1]
815 813
816 814 andcc %l2,2,%g0
817 815 fmovdnz %icc,%f20,%f22
818 816 st %f22,[%o2]
819 817
820 818 andcc %l3,2,%g0
821 819 fmovdnz %icc,%f28,%f30
822 820 st %f30,[%o3]
823 821
824 822 addcc %i0,-1,%i0
825 823 bg,pt %icc,.loop0
826 824 ! delay slot
827 825 st %f7,[%o0+4]
828 826
829 827 ba,pt %icc,.end
830 828 ! delay slot
831 829 nop
832 830
833 831 .align 16
834 832 .case1:
835 833 fmuld %f24,pp3,%f30 ! sin(x3)
836 834
837 835 fmuld %f0,qq3,%f6 ! cos(x0)
838 836
839 837 fmuld %f8,qq3,%f14 ! cos(x1)
840 838
841 839 fmuld %f16,qq3,%f22 ! cos(x2)
842 840
843 841 faddd %f30,pp2,%f30
844 842 fmuld %f24,qq2,%f28
845 843
846 844 faddd %f6,qq2,%f6
847 845 fmuld %f0,pp2,%f4
848 846
849 847 faddd %f14,qq2,%f14
850 848 fmuld %f8,pp2,%f12
851 849
852 850 faddd %f22,qq2,%f22
853 851 fmuld %f16,pp2,%f20
854 852
855 853 fmuld %f24,%f30,%f30
856 854 faddd %f28,qq1,%f28
857 855
858 856 fmuld %f0,%f6,%f6
859 857 faddd %f4,pp1,%f4
860 858
861 859 fmuld %f8,%f14,%f14
862 860 faddd %f12,pp1,%f12
863 861
864 862 fmuld %f16,%f22,%f22
865 863 faddd %f20,pp1,%f20
866 864
867 865 faddd %f30,pp1,%f30
868 866 fmuld %f24,%f28,%f28
869 867 add %l7,%g1,%l7
870 868
871 869 faddd %f6,qq1,%f6
872 870 fmuld %f0,%f4,%f4
873 871 add %l4,%g1,%l4
874 872
875 873 faddd %f14,qq1,%f14
876 874 fmuld %f8,%f12,%f12
877 875 add %l5,%g1,%l5
878 876
879 877 faddd %f22,qq1,%f22
880 878 fmuld %f16,%f20,%f20
881 879 add %l6,%g1,%l6
882 880
883 881 fmuld %f24,%f30,%f30
884 882
885 883 fmuld %f2,%f4,%f4
886 884
887 885 fmuld %f10,%f12,%f12
888 886
889 887 fmuld %f18,%f20,%f20
890 888
891 889 fmuld %f26,%f30,%f30
892 890 ldd [%l7+8],%f24
893 891
894 892 fmuld %f0,%f6,%f6
895 893 faddd %f4,%f32,%f4
896 894 ldd [%l4+16],%f0
897 895
898 896 fmuld %f8,%f14,%f14
899 897 faddd %f12,%f34,%f12
900 898 ldd [%l5+16],%f8
901 899
902 900 fmuld %f16,%f22,%f22
903 901 faddd %f20,%f36,%f20
904 902 ldd [%l6+16],%f16
905 903
906 904 fmuld %f24,%f28,%f28
907 905 faddd %f38,%f30,%f30
908 906
909 907 fmuld %f0,%f6,%f6
910 908 faddd %f4,%f2,%f4
911 909 ldd [%l4+8],%f32
912 910
913 911 fmuld %f8,%f14,%f14
914 912 faddd %f12,%f10,%f12
915 913 ldd [%l5+8],%f34
916 914
917 915 fmuld %f16,%f22,%f22
918 916 faddd %f20,%f18,%f20
919 917 ldd [%l6+8],%f36
920 918
921 919 faddd %f26,%f30,%f30
922 920 ldd [%l7+16],%f38
923 921
924 922 fmuld %f32,%f4,%f4
925 923
926 924 fmuld %f34,%f12,%f12
927 925
928 926 fmuld %f36,%f20,%f20
929 927
930 928 fmuld %f38,%f30,%f30
931 929
932 930 fsubd %f6,%f4,%f6
933 931
934 932 fsubd %f14,%f12,%f14
935 933
936 934 fsubd %f22,%f20,%f22
937 935
938 936 faddd %f30,%f28,%f30
939 937
940 938 faddd %f6,%f0,%f6
941 939
942 940 faddd %f14,%f8,%f14
943 941
944 942 faddd %f22,%f16,%f22
945 943
946 944 faddd %f30,%f24,%f30
947 945 mov %l0,%l4
948 946
949 947 fnegd %f6,%f4
950 948 lda [%i1]%asi,%l0 ! preload next argument
951 949
952 950 fnegd %f14,%f12
953 951 lda [%i1]%asi,%f0
954 952
955 953 fnegd %f22,%f20
956 954 lda [%i1+4]%asi,%f3
957 955
958 956 fnegd %f30,%f28
959 957 andn %l0,%i5,%l0
960 958 add %i1,%i2,%i1
961 959
962 960 andcc %l4,2,%g0
963 961 fmovdnz %icc,%f4,%f6
964 962 st %f6,[%o0]
965 963
966 964 andcc %l1,2,%g0
967 965 fmovdnz %icc,%f12,%f14
968 966 st %f14,[%o1]
969 967
970 968 andcc %l2,2,%g0
971 969 fmovdnz %icc,%f20,%f22
972 970 st %f22,[%o2]
973 971
974 972 andcc %l3,2,%g0
975 973 fmovdnz %icc,%f28,%f30
976 974 st %f30,[%o3]
977 975
978 976 addcc %i0,-1,%i0
979 977 bg,pt %icc,.loop0
980 978 ! delay slot
981 979 st %f7,[%o0+4]
982 980
983 981 ba,pt %icc,.end
984 982 ! delay slot
985 983 nop
986 984
987 985 .align 16
988 986 .case2:
989 987 fmuld %f26,%f26,%f24
990 988 andcc %l3,1,%g0
991 989 bz,pn %icc,.case3
992 990 ! delay slot
993 991 fxor %f30,%f38,%f38
994 992
995 993 fmuld %f16,pp3,%f22 ! sin(x2)
996 994
997 995 fmuld %f0,qq3,%f6 ! cos(x0)
998 996
999 997 fmuld %f8,qq3,%f14 ! cos(x1)
1000 998
1001 999 faddd %f22,pp2,%f22
1002 1000 fmuld %f16,qq2,%f20
1003 1001
1004 1002 fmuld %f24,qq3,%f30 ! cos(x3)
1005 1003
1006 1004 faddd %f6,qq2,%f6
1007 1005 fmuld %f0,pp2,%f4
1008 1006
1009 1007 faddd %f14,qq2,%f14
1010 1008 fmuld %f8,pp2,%f12
1011 1009
1012 1010 fmuld %f16,%f22,%f22
1013 1011 faddd %f20,qq1,%f20
1014 1012
1015 1013 faddd %f30,qq2,%f30
1016 1014 fmuld %f24,pp2,%f28
1017 1015
1018 1016 fmuld %f0,%f6,%f6
1019 1017 faddd %f4,pp1,%f4
1020 1018
1021 1019 fmuld %f8,%f14,%f14
1022 1020 faddd %f12,pp1,%f12
1023 1021
1024 1022 faddd %f22,pp1,%f22
1025 1023 fmuld %f16,%f20,%f20
1026 1024 add %l6,%g1,%l6
1027 1025
1028 1026 fmuld %f24,%f30,%f30
1029 1027 faddd %f28,pp1,%f28
1030 1028
1031 1029 faddd %f6,qq1,%f6
1032 1030 fmuld %f0,%f4,%f4
1033 1031 add %l4,%g1,%l4
1034 1032
1035 1033 faddd %f14,qq1,%f14
1036 1034 fmuld %f8,%f12,%f12
1037 1035 add %l5,%g1,%l5
1038 1036
1039 1037 fmuld %f16,%f22,%f22
1040 1038
1041 1039 faddd %f30,qq1,%f30
1042 1040 fmuld %f24,%f28,%f28
1043 1041 add %l7,%g1,%l7
1044 1042
1045 1043 fmuld %f2,%f4,%f4
1046 1044
1047 1045 fmuld %f10,%f12,%f12
1048 1046
1049 1047 fmuld %f18,%f22,%f22
1050 1048 ldd [%l6+8],%f16
1051 1049
1052 1050 fmuld %f26,%f28,%f28
1053 1051
1054 1052 fmuld %f0,%f6,%f6
1055 1053 faddd %f4,%f32,%f4
1056 1054 ldd [%l4+16],%f0
1057 1055
1058 1056 fmuld %f8,%f14,%f14
1059 1057 faddd %f12,%f34,%f12
1060 1058 ldd [%l5+16],%f8
1061 1059
1062 1060 fmuld %f16,%f20,%f20
1063 1061 faddd %f36,%f22,%f22
1064 1062
1065 1063 fmuld %f24,%f30,%f30
1066 1064 faddd %f28,%f38,%f28
1067 1065 ldd [%l7+16],%f24
1068 1066
1069 1067 fmuld %f0,%f6,%f6
1070 1068 faddd %f4,%f2,%f4
1071 1069 ldd [%l4+8],%f32
1072 1070
1073 1071 fmuld %f8,%f14,%f14
1074 1072 faddd %f12,%f10,%f12
1075 1073 ldd [%l5+8],%f34
1076 1074
1077 1075 faddd %f18,%f22,%f22
1078 1076 ldd [%l6+16],%f36
1079 1077
1080 1078 fmuld %f24,%f30,%f30
1081 1079 faddd %f28,%f26,%f28
1082 1080 ldd [%l7+8],%f38
1083 1081
1084 1082 fmuld %f32,%f4,%f4
1085 1083
1086 1084 fmuld %f34,%f12,%f12
1087 1085
1088 1086 fmuld %f36,%f22,%f22
1089 1087
1090 1088 fmuld %f38,%f28,%f28
1091 1089
1092 1090 fsubd %f6,%f4,%f6
1093 1091
1094 1092 fsubd %f14,%f12,%f14
1095 1093
1096 1094 faddd %f22,%f20,%f22
1097 1095
1098 1096 fsubd %f30,%f28,%f30
1099 1097
1100 1098 faddd %f6,%f0,%f6
1101 1099
1102 1100 faddd %f14,%f8,%f14
1103 1101
1104 1102 faddd %f22,%f16,%f22
1105 1103
1106 1104 faddd %f30,%f24,%f30
1107 1105 mov %l0,%l4
1108 1106
1109 1107 fnegd %f6,%f4
1110 1108 lda [%i1]%asi,%l0 ! preload next argument
1111 1109
1112 1110 fnegd %f14,%f12
1113 1111 lda [%i1]%asi,%f0
1114 1112
1115 1113 fnegd %f22,%f20
1116 1114 lda [%i1+4]%asi,%f3
1117 1115
1118 1116 fnegd %f30,%f28
1119 1117 andn %l0,%i5,%l0
1120 1118 add %i1,%i2,%i1
1121 1119
1122 1120 andcc %l4,2,%g0
1123 1121 fmovdnz %icc,%f4,%f6
1124 1122 st %f6,[%o0]
1125 1123
1126 1124 andcc %l1,2,%g0
1127 1125 fmovdnz %icc,%f12,%f14
1128 1126 st %f14,[%o1]
1129 1127
1130 1128 andcc %l2,2,%g0
1131 1129 fmovdnz %icc,%f20,%f22
1132 1130 st %f22,[%o2]
1133 1131
1134 1132 andcc %l3,2,%g0
1135 1133 fmovdnz %icc,%f28,%f30
1136 1134 st %f30,[%o3]
1137 1135
1138 1136 addcc %i0,-1,%i0
1139 1137 bg,pt %icc,.loop0
1140 1138 ! delay slot
1141 1139 st %f7,[%o0+4]
1142 1140
1143 1141 ba,pt %icc,.end
1144 1142 ! delay slot
1145 1143 nop
1146 1144
1147 1145 .align 16
1148 1146 .case3:
1149 1147 fmuld %f16,pp3,%f22 ! sin(x2)
1150 1148
1151 1149 fmuld %f24,pp3,%f30 ! sin(x3)
1152 1150
1153 1151 fmuld %f0,qq3,%f6 ! cos(x0)
1154 1152
1155 1153 fmuld %f8,qq3,%f14 ! cos(x1)
1156 1154
1157 1155 faddd %f22,pp2,%f22
1158 1156 fmuld %f16,qq2,%f20
1159 1157
1160 1158 faddd %f30,pp2,%f30
1161 1159 fmuld %f24,qq2,%f28
1162 1160
1163 1161 faddd %f6,qq2,%f6
1164 1162 fmuld %f0,pp2,%f4
1165 1163
1166 1164 faddd %f14,qq2,%f14
1167 1165 fmuld %f8,pp2,%f12
1168 1166
1169 1167 fmuld %f16,%f22,%f22
1170 1168 faddd %f20,qq1,%f20
1171 1169
1172 1170 fmuld %f24,%f30,%f30
1173 1171 faddd %f28,qq1,%f28
1174 1172
1175 1173 fmuld %f0,%f6,%f6
1176 1174 faddd %f4,pp1,%f4
1177 1175
1178 1176 fmuld %f8,%f14,%f14
1179 1177 faddd %f12,pp1,%f12
1180 1178
1181 1179 faddd %f22,pp1,%f22
1182 1180 fmuld %f16,%f20,%f20
1183 1181 add %l6,%g1,%l6
1184 1182
1185 1183 faddd %f30,pp1,%f30
1186 1184 fmuld %f24,%f28,%f28
1187 1185 add %l7,%g1,%l7
1188 1186
1189 1187 faddd %f6,qq1,%f6
1190 1188 fmuld %f0,%f4,%f4
1191 1189 add %l4,%g1,%l4
1192 1190
1193 1191 faddd %f14,qq1,%f14
1194 1192 fmuld %f8,%f12,%f12
1195 1193 add %l5,%g1,%l5
1196 1194
1197 1195 fmuld %f16,%f22,%f22
1198 1196
1199 1197 fmuld %f24,%f30,%f30
1200 1198
1201 1199 fmuld %f2,%f4,%f4
1202 1200
1203 1201 fmuld %f10,%f12,%f12
1204 1202
1205 1203 fmuld %f18,%f22,%f22
1206 1204 ldd [%l6+8],%f16
1207 1205
1208 1206 fmuld %f26,%f30,%f30
1209 1207 ldd [%l7+8],%f24
1210 1208
1211 1209 fmuld %f0,%f6,%f6
1212 1210 faddd %f4,%f32,%f4
1213 1211 ldd [%l4+16],%f0
1214 1212
1215 1213 fmuld %f8,%f14,%f14
1216 1214 faddd %f12,%f34,%f12
1217 1215 ldd [%l5+16],%f8
1218 1216
1219 1217 fmuld %f16,%f20,%f20
1220 1218 faddd %f36,%f22,%f22
1221 1219
1222 1220 fmuld %f24,%f28,%f28
1223 1221 faddd %f38,%f30,%f30
1224 1222
1225 1223 fmuld %f0,%f6,%f6
1226 1224 faddd %f4,%f2,%f4
1227 1225 ldd [%l4+8],%f32
1228 1226
1229 1227 fmuld %f8,%f14,%f14
1230 1228 faddd %f12,%f10,%f12
1231 1229 ldd [%l5+8],%f34
1232 1230
1233 1231 faddd %f18,%f22,%f22
1234 1232 ldd [%l6+16],%f36
1235 1233
1236 1234 faddd %f26,%f30,%f30
1237 1235 ldd [%l7+16],%f38
1238 1236
1239 1237 fmuld %f32,%f4,%f4
1240 1238
1241 1239 fmuld %f34,%f12,%f12
1242 1240
1243 1241 fmuld %f36,%f22,%f22
1244 1242
1245 1243 fmuld %f38,%f30,%f30
1246 1244
1247 1245 fsubd %f6,%f4,%f6
1248 1246
1249 1247 fsubd %f14,%f12,%f14
1250 1248
1251 1249 faddd %f22,%f20,%f22
1252 1250
1253 1251 faddd %f30,%f28,%f30
1254 1252
1255 1253 faddd %f6,%f0,%f6
1256 1254
1257 1255 faddd %f14,%f8,%f14
1258 1256
1259 1257 faddd %f22,%f16,%f22
1260 1258
1261 1259 faddd %f30,%f24,%f30
1262 1260 mov %l0,%l4
1263 1261
1264 1262 fnegd %f6,%f4
1265 1263 lda [%i1]%asi,%l0 ! preload next argument
1266 1264
1267 1265 fnegd %f14,%f12
1268 1266 lda [%i1]%asi,%f0
1269 1267
1270 1268 fnegd %f22,%f20
1271 1269 lda [%i1+4]%asi,%f3
1272 1270
1273 1271 fnegd %f30,%f28
1274 1272 andn %l0,%i5,%l0
1275 1273 add %i1,%i2,%i1
1276 1274
1277 1275 andcc %l4,2,%g0
1278 1276 fmovdnz %icc,%f4,%f6
1279 1277 st %f6,[%o0]
1280 1278
1281 1279 andcc %l1,2,%g0
1282 1280 fmovdnz %icc,%f12,%f14
1283 1281 st %f14,[%o1]
1284 1282
1285 1283 andcc %l2,2,%g0
1286 1284 fmovdnz %icc,%f20,%f22
1287 1285 st %f22,[%o2]
1288 1286
1289 1287 andcc %l3,2,%g0
1290 1288 fmovdnz %icc,%f28,%f30
1291 1289 st %f30,[%o3]
1292 1290
1293 1291 addcc %i0,-1,%i0
1294 1292 bg,pt %icc,.loop0
1295 1293 ! delay slot
1296 1294 st %f7,[%o0+4]
1297 1295
1298 1296 ba,pt %icc,.end
1299 1297 ! delay slot
1300 1298 nop
1301 1299
1302 1300 .align 16
1303 1301 .case4:
1304 1302 fmuld %f18,%f18,%f16
1305 1303 andcc %l2,1,%g0
1306 1304 bz,pn %icc,.case6
1307 1305 ! delay slot
1308 1306 fxor %f22,%f36,%f36
1309 1307
1310 1308 fmuld %f26,%f26,%f24
1311 1309 andcc %l3,1,%g0
1312 1310 bz,pn %icc,.case5
1313 1311 ! delay slot
1314 1312 fxor %f30,%f38,%f38
1315 1313
1316 1314 fmuld %f8,pp3,%f14 ! sin(x1)
1317 1315
1318 1316 fmuld %f0,qq3,%f6 ! cos(x0)
1319 1317
1320 1318 faddd %f14,pp2,%f14
1321 1319 fmuld %f8,qq2,%f12
1322 1320
1323 1321 fmuld %f16,qq3,%f22 ! cos(x2)
1324 1322
1325 1323 fmuld %f24,qq3,%f30 ! cos(x3)
1326 1324
1327 1325 faddd %f6,qq2,%f6
1328 1326 fmuld %f0,pp2,%f4
1329 1327
1330 1328 fmuld %f8,%f14,%f14
1331 1329 faddd %f12,qq1,%f12
1332 1330
1333 1331 faddd %f22,qq2,%f22
1334 1332 fmuld %f16,pp2,%f20
1335 1333
1336 1334 faddd %f30,qq2,%f30
1337 1335 fmuld %f24,pp2,%f28
1338 1336
1339 1337 fmuld %f0,%f6,%f6
1340 1338 faddd %f4,pp1,%f4
1341 1339
1342 1340 faddd %f14,pp1,%f14
1343 1341 fmuld %f8,%f12,%f12
1344 1342 add %l5,%g1,%l5
1345 1343
1346 1344 fmuld %f16,%f22,%f22
1347 1345 faddd %f20,pp1,%f20
1348 1346
1349 1347 fmuld %f24,%f30,%f30
1350 1348 faddd %f28,pp1,%f28
1351 1349
1352 1350 faddd %f6,qq1,%f6
1353 1351 fmuld %f0,%f4,%f4
1354 1352 add %l4,%g1,%l4
1355 1353
1356 1354 fmuld %f8,%f14,%f14
1357 1355
1358 1356 faddd %f22,qq1,%f22
1359 1357 fmuld %f16,%f20,%f20
1360 1358 add %l6,%g1,%l6
1361 1359
1362 1360 faddd %f30,qq1,%f30
1363 1361 fmuld %f24,%f28,%f28
1364 1362 add %l7,%g1,%l7
1365 1363
1366 1364 fmuld %f2,%f4,%f4
1367 1365
1368 1366 fmuld %f10,%f14,%f14
1369 1367 ldd [%l5+8],%f8
1370 1368
1371 1369 fmuld %f18,%f20,%f20
1372 1370
1373 1371 fmuld %f26,%f28,%f28
1374 1372
1375 1373 fmuld %f0,%f6,%f6
1376 1374 faddd %f4,%f32,%f4
1377 1375 ldd [%l4+16],%f0
1378 1376
1379 1377 fmuld %f8,%f12,%f12
1380 1378 faddd %f34,%f14,%f14
1381 1379
1382 1380 fmuld %f16,%f22,%f22
1383 1381 faddd %f20,%f36,%f20
1384 1382 ldd [%l6+16],%f16
1385 1383
1386 1384 fmuld %f24,%f30,%f30
1387 1385 faddd %f28,%f38,%f28
1388 1386 ldd [%l7+16],%f24
1389 1387
1390 1388 fmuld %f0,%f6,%f6
1391 1389 faddd %f4,%f2,%f4
1392 1390 ldd [%l4+8],%f32
1393 1391
1394 1392 faddd %f10,%f14,%f14
1395 1393 ldd [%l5+16],%f34
1396 1394
1397 1395 fmuld %f16,%f22,%f22
1398 1396 faddd %f20,%f18,%f20
1399 1397 ldd [%l6+8],%f36
1400 1398
1401 1399 fmuld %f24,%f30,%f30
1402 1400 faddd %f28,%f26,%f28
1403 1401 ldd [%l7+8],%f38
1404 1402
1405 1403 fmuld %f32,%f4,%f4
1406 1404
1407 1405 fmuld %f34,%f14,%f14
1408 1406
1409 1407 fmuld %f36,%f20,%f20
1410 1408
1411 1409 fmuld %f38,%f28,%f28
1412 1410
1413 1411 fsubd %f6,%f4,%f6
1414 1412
1415 1413 faddd %f14,%f12,%f14
1416 1414
1417 1415 fsubd %f22,%f20,%f22
1418 1416
1419 1417 fsubd %f30,%f28,%f30
1420 1418
1421 1419 faddd %f6,%f0,%f6
1422 1420
1423 1421 faddd %f14,%f8,%f14
1424 1422
1425 1423 faddd %f22,%f16,%f22
1426 1424
1427 1425 faddd %f30,%f24,%f30
1428 1426 mov %l0,%l4
1429 1427
1430 1428 fnegd %f6,%f4
1431 1429 lda [%i1]%asi,%l0 ! preload next argument
1432 1430
1433 1431 fnegd %f14,%f12
1434 1432 lda [%i1]%asi,%f0
1435 1433
1436 1434 fnegd %f22,%f20
1437 1435 lda [%i1+4]%asi,%f3
1438 1436
1439 1437 fnegd %f30,%f28
1440 1438 andn %l0,%i5,%l0
1441 1439 add %i1,%i2,%i1
1442 1440
1443 1441 andcc %l4,2,%g0
1444 1442 fmovdnz %icc,%f4,%f6
1445 1443 st %f6,[%o0]
1446 1444
1447 1445 andcc %l1,2,%g0
1448 1446 fmovdnz %icc,%f12,%f14
1449 1447 st %f14,[%o1]
1450 1448
1451 1449 andcc %l2,2,%g0
1452 1450 fmovdnz %icc,%f20,%f22
1453 1451 st %f22,[%o2]
1454 1452
1455 1453 andcc %l3,2,%g0
1456 1454 fmovdnz %icc,%f28,%f30
1457 1455 st %f30,[%o3]
1458 1456
1459 1457 addcc %i0,-1,%i0
1460 1458 bg,pt %icc,.loop0
1461 1459 ! delay slot
1462 1460 st %f7,[%o0+4]
1463 1461
1464 1462 ba,pt %icc,.end
1465 1463 ! delay slot
1466 1464 nop
1467 1465
1468 1466 .align 16
1469 1467 .case5:
1470 1468 fmuld %f8,pp3,%f14 ! sin(x1)
1471 1469
1472 1470 fmuld %f24,pp3,%f30 ! sin(x3)
1473 1471
1474 1472 fmuld %f0,qq3,%f6 ! cos(x0)
1475 1473
1476 1474 faddd %f14,pp2,%f14
1477 1475 fmuld %f8,qq2,%f12
1478 1476
1479 1477 fmuld %f16,qq3,%f22 ! cos(x2)
1480 1478
1481 1479 faddd %f30,pp2,%f30
1482 1480 fmuld %f24,qq2,%f28
1483 1481
1484 1482 faddd %f6,qq2,%f6
1485 1483 fmuld %f0,pp2,%f4
1486 1484
1487 1485 fmuld %f8,%f14,%f14
1488 1486 faddd %f12,qq1,%f12
1489 1487
1490 1488 faddd %f22,qq2,%f22
1491 1489 fmuld %f16,pp2,%f20
1492 1490
1493 1491 fmuld %f24,%f30,%f30
1494 1492 faddd %f28,qq1,%f28
1495 1493
1496 1494 fmuld %f0,%f6,%f6
1497 1495 faddd %f4,pp1,%f4
1498 1496
1499 1497 faddd %f14,pp1,%f14
1500 1498 fmuld %f8,%f12,%f12
1501 1499 add %l5,%g1,%l5
1502 1500
1503 1501 fmuld %f16,%f22,%f22
1504 1502 faddd %f20,pp1,%f20
1505 1503
1506 1504 faddd %f30,pp1,%f30
1507 1505 fmuld %f24,%f28,%f28
1508 1506 add %l7,%g1,%l7
1509 1507
1510 1508 faddd %f6,qq1,%f6
1511 1509 fmuld %f0,%f4,%f4
1512 1510 add %l4,%g1,%l4
1513 1511
1514 1512 fmuld %f8,%f14,%f14
1515 1513
1516 1514 faddd %f22,qq1,%f22
1517 1515 fmuld %f16,%f20,%f20
1518 1516 add %l6,%g1,%l6
1519 1517
1520 1518 fmuld %f24,%f30,%f30
1521 1519
1522 1520 fmuld %f2,%f4,%f4
1523 1521
1524 1522 fmuld %f10,%f14,%f14
1525 1523 ldd [%l5+8],%f8
1526 1524
1527 1525 fmuld %f18,%f20,%f20
1528 1526
1529 1527 fmuld %f26,%f30,%f30
1530 1528 ldd [%l7+8],%f24
1531 1529
1532 1530 fmuld %f0,%f6,%f6
1533 1531 faddd %f4,%f32,%f4
1534 1532 ldd [%l4+16],%f0
1535 1533
1536 1534 fmuld %f8,%f12,%f12
1537 1535 faddd %f34,%f14,%f14
1538 1536
1539 1537 fmuld %f16,%f22,%f22
1540 1538 faddd %f20,%f36,%f20
1541 1539 ldd [%l6+16],%f16
1542 1540
1543 1541 fmuld %f24,%f28,%f28
1544 1542 faddd %f38,%f30,%f30
1545 1543
1546 1544 fmuld %f0,%f6,%f6
1547 1545 faddd %f4,%f2,%f4
1548 1546 ldd [%l4+8],%f32
1549 1547
1550 1548 faddd %f10,%f14,%f14
1551 1549 ldd [%l5+16],%f34
1552 1550
1553 1551 fmuld %f16,%f22,%f22
1554 1552 faddd %f20,%f18,%f20
1555 1553 ldd [%l6+8],%f36
1556 1554
1557 1555 faddd %f26,%f30,%f30
1558 1556 ldd [%l7+16],%f38
1559 1557
1560 1558 fmuld %f32,%f4,%f4
1561 1559
1562 1560 fmuld %f34,%f14,%f14
1563 1561
1564 1562 fmuld %f36,%f20,%f20
1565 1563
1566 1564 fmuld %f38,%f30,%f30
1567 1565
1568 1566 fsubd %f6,%f4,%f6
1569 1567
1570 1568 faddd %f14,%f12,%f14
1571 1569
1572 1570 fsubd %f22,%f20,%f22
1573 1571
1574 1572 faddd %f30,%f28,%f30
1575 1573
1576 1574 faddd %f6,%f0,%f6
1577 1575
1578 1576 faddd %f14,%f8,%f14
1579 1577
1580 1578 faddd %f22,%f16,%f22
1581 1579
1582 1580 faddd %f30,%f24,%f30
1583 1581 mov %l0,%l4
1584 1582
1585 1583 fnegd %f6,%f4
1586 1584 lda [%i1]%asi,%l0 ! preload next argument
1587 1585
1588 1586 fnegd %f14,%f12
1589 1587 lda [%i1]%asi,%f0
1590 1588
1591 1589 fnegd %f22,%f20
1592 1590 lda [%i1+4]%asi,%f3
1593 1591
1594 1592 fnegd %f30,%f28
1595 1593 andn %l0,%i5,%l0
1596 1594 add %i1,%i2,%i1
1597 1595
1598 1596 andcc %l4,2,%g0
1599 1597 fmovdnz %icc,%f4,%f6
1600 1598 st %f6,[%o0]
1601 1599
1602 1600 andcc %l1,2,%g0
1603 1601 fmovdnz %icc,%f12,%f14
1604 1602 st %f14,[%o1]
1605 1603
1606 1604 andcc %l2,2,%g0
1607 1605 fmovdnz %icc,%f20,%f22
1608 1606 st %f22,[%o2]
1609 1607
1610 1608 andcc %l3,2,%g0
1611 1609 fmovdnz %icc,%f28,%f30
1612 1610 st %f30,[%o3]
1613 1611
1614 1612 addcc %i0,-1,%i0
1615 1613 bg,pt %icc,.loop0
1616 1614 ! delay slot
1617 1615 st %f7,[%o0+4]
1618 1616
1619 1617 ba,pt %icc,.end
1620 1618 ! delay slot
1621 1619 nop
1622 1620
1623 1621 .align 16
1624 1622 .case6:
1625 1623 fmuld %f26,%f26,%f24
1626 1624 andcc %l3,1,%g0
1627 1625 bz,pn %icc,.case7
1628 1626 ! delay slot
1629 1627 fxor %f30,%f38,%f38
1630 1628
1631 1629 fmuld %f8,pp3,%f14 ! sin(x1)
1632 1630
1633 1631 fmuld %f16,pp3,%f22 ! sin(x2)
1634 1632
1635 1633 fmuld %f0,qq3,%f6 ! cos(x0)
1636 1634
1637 1635 faddd %f14,pp2,%f14
1638 1636 fmuld %f8,qq2,%f12
1639 1637
1640 1638 faddd %f22,pp2,%f22
1641 1639 fmuld %f16,qq2,%f20
1642 1640
1643 1641 fmuld %f24,qq3,%f30 ! cos(x3)
1644 1642
1645 1643 faddd %f6,qq2,%f6
1646 1644 fmuld %f0,pp2,%f4
1647 1645
1648 1646 fmuld %f8,%f14,%f14
1649 1647 faddd %f12,qq1,%f12
1650 1648
1651 1649 fmuld %f16,%f22,%f22
1652 1650 faddd %f20,qq1,%f20
1653 1651
1654 1652 faddd %f30,qq2,%f30
1655 1653 fmuld %f24,pp2,%f28
1656 1654
1657 1655 fmuld %f0,%f6,%f6
1658 1656 faddd %f4,pp1,%f4
1659 1657
1660 1658 faddd %f14,pp1,%f14
1661 1659 fmuld %f8,%f12,%f12
1662 1660 add %l5,%g1,%l5
1663 1661
1664 1662 faddd %f22,pp1,%f22
1665 1663 fmuld %f16,%f20,%f20
1666 1664 add %l6,%g1,%l6
1667 1665
1668 1666 fmuld %f24,%f30,%f30
1669 1667 faddd %f28,pp1,%f28
1670 1668
1671 1669 faddd %f6,qq1,%f6
1672 1670 fmuld %f0,%f4,%f4
1673 1671 add %l4,%g1,%l4
1674 1672
1675 1673 fmuld %f8,%f14,%f14
1676 1674
1677 1675 fmuld %f16,%f22,%f22
1678 1676
1679 1677 faddd %f30,qq1,%f30
1680 1678 fmuld %f24,%f28,%f28
1681 1679 add %l7,%g1,%l7
1682 1680
1683 1681 fmuld %f2,%f4,%f4
1684 1682
1685 1683 fmuld %f10,%f14,%f14
1686 1684 ldd [%l5+8],%f8
1687 1685
1688 1686 fmuld %f18,%f22,%f22
1689 1687 ldd [%l6+8],%f16
1690 1688
1691 1689 fmuld %f26,%f28,%f28
1692 1690
1693 1691 fmuld %f0,%f6,%f6
1694 1692 faddd %f4,%f32,%f4
1695 1693 ldd [%l4+16],%f0
1696 1694
1697 1695 fmuld %f8,%f12,%f12
1698 1696 faddd %f34,%f14,%f14
1699 1697
1700 1698 fmuld %f16,%f20,%f20
1701 1699 faddd %f36,%f22,%f22
1702 1700
1703 1701 fmuld %f24,%f30,%f30
1704 1702 faddd %f28,%f38,%f28
1705 1703 ldd [%l7+16],%f24
1706 1704
1707 1705 fmuld %f0,%f6,%f6
1708 1706 faddd %f4,%f2,%f4
1709 1707 ldd [%l4+8],%f32
1710 1708
1711 1709 faddd %f10,%f14,%f14
1712 1710 ldd [%l5+16],%f34
1713 1711
1714 1712 faddd %f18,%f22,%f22
1715 1713 ldd [%l6+16],%f36
1716 1714
1717 1715 fmuld %f24,%f30,%f30
1718 1716 faddd %f28,%f26,%f28
1719 1717 ldd [%l7+8],%f38
1720 1718
1721 1719 fmuld %f32,%f4,%f4
1722 1720
1723 1721 fmuld %f34,%f14,%f14
1724 1722
1725 1723 fmuld %f36,%f22,%f22
1726 1724
1727 1725 fmuld %f38,%f28,%f28
1728 1726
1729 1727 fsubd %f6,%f4,%f6
1730 1728
1731 1729 faddd %f14,%f12,%f14
1732 1730
1733 1731 faddd %f22,%f20,%f22
1734 1732
1735 1733 fsubd %f30,%f28,%f30
1736 1734
1737 1735 faddd %f6,%f0,%f6
1738 1736
1739 1737 faddd %f14,%f8,%f14
1740 1738
1741 1739 faddd %f22,%f16,%f22
1742 1740
1743 1741 faddd %f30,%f24,%f30
1744 1742 mov %l0,%l4
1745 1743
1746 1744 fnegd %f6,%f4
1747 1745 lda [%i1]%asi,%l0 ! preload next argument
1748 1746
1749 1747 fnegd %f14,%f12
1750 1748 lda [%i1]%asi,%f0
1751 1749
1752 1750 fnegd %f22,%f20
1753 1751 lda [%i1+4]%asi,%f3
1754 1752
1755 1753 fnegd %f30,%f28
1756 1754 andn %l0,%i5,%l0
1757 1755 add %i1,%i2,%i1
1758 1756
1759 1757 andcc %l4,2,%g0
1760 1758 fmovdnz %icc,%f4,%f6
1761 1759 st %f6,[%o0]
1762 1760
1763 1761 andcc %l1,2,%g0
1764 1762 fmovdnz %icc,%f12,%f14
1765 1763 st %f14,[%o1]
1766 1764
1767 1765 andcc %l2,2,%g0
1768 1766 fmovdnz %icc,%f20,%f22
1769 1767 st %f22,[%o2]
1770 1768
1771 1769 andcc %l3,2,%g0
1772 1770 fmovdnz %icc,%f28,%f30
1773 1771 st %f30,[%o3]
1774 1772
1775 1773 addcc %i0,-1,%i0
1776 1774 bg,pt %icc,.loop0
1777 1775 ! delay slot
1778 1776 st %f7,[%o0+4]
1779 1777
1780 1778 ba,pt %icc,.end
1781 1779 ! delay slot
1782 1780 nop
1783 1781
1784 1782 .align 16
1785 1783 .case7:
1786 1784 fmuld %f8,pp3,%f14 ! sin(x1)
1787 1785
1788 1786 fmuld %f16,pp3,%f22 ! sin(x2)
1789 1787
1790 1788 fmuld %f24,pp3,%f30 ! sin(x3)
1791 1789
1792 1790 fmuld %f0,qq3,%f6 ! cos(x0)
1793 1791
1794 1792 faddd %f14,pp2,%f14
1795 1793 fmuld %f8,qq2,%f12
1796 1794
1797 1795 faddd %f22,pp2,%f22
1798 1796 fmuld %f16,qq2,%f20
1799 1797
1800 1798 faddd %f30,pp2,%f30
1801 1799 fmuld %f24,qq2,%f28
1802 1800
1803 1801 faddd %f6,qq2,%f6
1804 1802 fmuld %f0,pp2,%f4
1805 1803
1806 1804 fmuld %f8,%f14,%f14
1807 1805 faddd %f12,qq1,%f12
1808 1806
1809 1807 fmuld %f16,%f22,%f22
1810 1808 faddd %f20,qq1,%f20
1811 1809
1812 1810 fmuld %f24,%f30,%f30
1813 1811 faddd %f28,qq1,%f28
1814 1812
1815 1813 fmuld %f0,%f6,%f6
1816 1814 faddd %f4,pp1,%f4
1817 1815
1818 1816 faddd %f14,pp1,%f14
1819 1817 fmuld %f8,%f12,%f12
1820 1818 add %l5,%g1,%l5
1821 1819
1822 1820 faddd %f22,pp1,%f22
1823 1821 fmuld %f16,%f20,%f20
1824 1822 add %l6,%g1,%l6
1825 1823
1826 1824 faddd %f30,pp1,%f30
1827 1825 fmuld %f24,%f28,%f28
1828 1826 add %l7,%g1,%l7
1829 1827
1830 1828 faddd %f6,qq1,%f6
1831 1829 fmuld %f0,%f4,%f4
1832 1830 add %l4,%g1,%l4
1833 1831
1834 1832 fmuld %f8,%f14,%f14
1835 1833
1836 1834 fmuld %f16,%f22,%f22
1837 1835
1838 1836 fmuld %f24,%f30,%f30
1839 1837
1840 1838 fmuld %f2,%f4,%f4
1841 1839
1842 1840 fmuld %f10,%f14,%f14
1843 1841 ldd [%l5+8],%f8
1844 1842
1845 1843 fmuld %f18,%f22,%f22
1846 1844 ldd [%l6+8],%f16
1847 1845
1848 1846 fmuld %f26,%f30,%f30
1849 1847 ldd [%l7+8],%f24
1850 1848
1851 1849 fmuld %f0,%f6,%f6
1852 1850 faddd %f4,%f32,%f4
1853 1851 ldd [%l4+16],%f0
1854 1852
1855 1853 fmuld %f8,%f12,%f12
1856 1854 faddd %f34,%f14,%f14
1857 1855
1858 1856 fmuld %f16,%f20,%f20
1859 1857 faddd %f36,%f22,%f22
1860 1858
1861 1859 fmuld %f24,%f28,%f28
1862 1860 faddd %f38,%f30,%f30
1863 1861
1864 1862 fmuld %f0,%f6,%f6
1865 1863 faddd %f4,%f2,%f4
1866 1864 ldd [%l4+8],%f32
1867 1865
1868 1866 faddd %f10,%f14,%f14
1869 1867 ldd [%l5+16],%f34
1870 1868
1871 1869 faddd %f18,%f22,%f22
1872 1870 ldd [%l6+16],%f36
1873 1871
1874 1872 faddd %f26,%f30,%f30
1875 1873 ldd [%l7+16],%f38
1876 1874
1877 1875 fmuld %f32,%f4,%f4
1878 1876
1879 1877 fmuld %f34,%f14,%f14
1880 1878
1881 1879 fmuld %f36,%f22,%f22
1882 1880
1883 1881 fmuld %f38,%f30,%f30
1884 1882
1885 1883 fsubd %f6,%f4,%f6
1886 1884
1887 1885 faddd %f14,%f12,%f14
1888 1886
1889 1887 faddd %f22,%f20,%f22
1890 1888
1891 1889 faddd %f30,%f28,%f30
1892 1890
1893 1891 faddd %f6,%f0,%f6
1894 1892
1895 1893 faddd %f14,%f8,%f14
1896 1894
1897 1895 faddd %f22,%f16,%f22
1898 1896
1899 1897 faddd %f30,%f24,%f30
1900 1898 mov %l0,%l4
1901 1899
1902 1900 fnegd %f6,%f4
1903 1901 lda [%i1]%asi,%l0 ! preload next argument
1904 1902
1905 1903 fnegd %f14,%f12
1906 1904 lda [%i1]%asi,%f0
1907 1905
1908 1906 fnegd %f22,%f20
1909 1907 lda [%i1+4]%asi,%f3
1910 1908
1911 1909 fnegd %f30,%f28
1912 1910 andn %l0,%i5,%l0
1913 1911 add %i1,%i2,%i1
1914 1912
1915 1913 andcc %l4,2,%g0
1916 1914 fmovdnz %icc,%f4,%f6
1917 1915 st %f6,[%o0]
1918 1916
1919 1917 andcc %l1,2,%g0
1920 1918 fmovdnz %icc,%f12,%f14
1921 1919 st %f14,[%o1]
1922 1920
1923 1921 andcc %l2,2,%g0
1924 1922 fmovdnz %icc,%f20,%f22
1925 1923 st %f22,[%o2]
1926 1924
1927 1925 andcc %l3,2,%g0
1928 1926 fmovdnz %icc,%f28,%f30
1929 1927 st %f30,[%o3]
1930 1928
1931 1929 addcc %i0,-1,%i0
1932 1930 bg,pt %icc,.loop0
1933 1931 ! delay slot
1934 1932 st %f7,[%o0+4]
1935 1933
1936 1934 ba,pt %icc,.end
1937 1935 ! delay slot
1938 1936 nop
1939 1937
1940 1938 .align 16
1941 1939 .case8:
1942 1940 fmuld %f10,%f10,%f8
1943 1941 andcc %l1,1,%g0
1944 1942 bz,pn %icc,.case12
1945 1943 ! delay slot
1946 1944 fxor %f14,%f34,%f34
1947 1945
1948 1946 fmuld %f18,%f18,%f16
1949 1947 andcc %l2,1,%g0
1950 1948 bz,pn %icc,.case10
1951 1949 ! delay slot
1952 1950 fxor %f22,%f36,%f36
1953 1951
1954 1952 fmuld %f26,%f26,%f24
1955 1953 andcc %l3,1,%g0
1956 1954 bz,pn %icc,.case9
1957 1955 ! delay slot
1958 1956 fxor %f30,%f38,%f38
1959 1957
1960 1958 fmuld %f0,pp3,%f6 ! sin(x0)
1961 1959
1962 1960 faddd %f6,pp2,%f6
1963 1961 fmuld %f0,qq2,%f4
1964 1962
1965 1963 fmuld %f8,qq3,%f14 ! cos(x1)
1966 1964
1967 1965 fmuld %f16,qq3,%f22 ! cos(x2)
1968 1966
1969 1967 fmuld %f24,qq3,%f30 ! cos(x3)
1970 1968
1971 1969 fmuld %f0,%f6,%f6
1972 1970 faddd %f4,qq1,%f4
1973 1971
1974 1972 faddd %f14,qq2,%f14
1975 1973 fmuld %f8,pp2,%f12
1976 1974
1977 1975 faddd %f22,qq2,%f22
1978 1976 fmuld %f16,pp2,%f20
1979 1977
1980 1978 faddd %f30,qq2,%f30
1981 1979 fmuld %f24,pp2,%f28
1982 1980
1983 1981 faddd %f6,pp1,%f6
1984 1982 fmuld %f0,%f4,%f4
1985 1983 add %l4,%g1,%l4
1986 1984
1987 1985 fmuld %f8,%f14,%f14
1988 1986 faddd %f12,pp1,%f12
1989 1987
1990 1988 fmuld %f16,%f22,%f22
1991 1989 faddd %f20,pp1,%f20
1992 1990
1993 1991 fmuld %f24,%f30,%f30
1994 1992 faddd %f28,pp1,%f28
1995 1993
1996 1994 fmuld %f0,%f6,%f6
1997 1995
1998 1996 faddd %f14,qq1,%f14
1999 1997 fmuld %f8,%f12,%f12
2000 1998 add %l5,%g1,%l5
2001 1999
2002 2000 faddd %f22,qq1,%f22
2003 2001 fmuld %f16,%f20,%f20
2004 2002 add %l6,%g1,%l6
2005 2003
2006 2004 faddd %f30,qq1,%f30
2007 2005 fmuld %f24,%f28,%f28
2008 2006 add %l7,%g1,%l7
2009 2007
2010 2008 fmuld %f2,%f6,%f6
2011 2009 ldd [%l4+8],%f0
2012 2010
2013 2011 fmuld %f10,%f12,%f12
2014 2012
2015 2013 fmuld %f18,%f20,%f20
2016 2014
2017 2015 fmuld %f26,%f28,%f28
2018 2016
2019 2017 fmuld %f0,%f4,%f4
2020 2018 faddd %f32,%f6,%f6
2021 2019
2022 2020 fmuld %f8,%f14,%f14
2023 2021 faddd %f12,%f34,%f12
2024 2022 ldd [%l5+16],%f8
2025 2023
2026 2024 fmuld %f16,%f22,%f22
2027 2025 faddd %f20,%f36,%f20
2028 2026 ldd [%l6+16],%f16
2029 2027
2030 2028 fmuld %f24,%f30,%f30
2031 2029 faddd %f28,%f38,%f28
2032 2030 ldd [%l7+16],%f24
2033 2031
2034 2032 faddd %f2,%f6,%f6
2035 2033 ldd [%l4+16],%f32
2036 2034
2037 2035 fmuld %f8,%f14,%f14
2038 2036 faddd %f12,%f10,%f12
2039 2037 ldd [%l5+8],%f34
2040 2038
2041 2039 fmuld %f16,%f22,%f22
2042 2040 faddd %f20,%f18,%f20
2043 2041 ldd [%l6+8],%f36
2044 2042
2045 2043 fmuld %f24,%f30,%f30
2046 2044 faddd %f28,%f26,%f28
2047 2045 ldd [%l7+8],%f38
2048 2046
2049 2047 fmuld %f32,%f6,%f6
2050 2048
2051 2049 fmuld %f34,%f12,%f12
2052 2050
2053 2051 fmuld %f36,%f20,%f20
2054 2052
2055 2053 fmuld %f38,%f28,%f28
2056 2054
2057 2055 faddd %f6,%f4,%f6
2058 2056
2059 2057 fsubd %f14,%f12,%f14
2060 2058
2061 2059 fsubd %f22,%f20,%f22
2062 2060
2063 2061 fsubd %f30,%f28,%f30
2064 2062
2065 2063 faddd %f6,%f0,%f6
2066 2064
2067 2065 faddd %f14,%f8,%f14
2068 2066
2069 2067 faddd %f22,%f16,%f22
2070 2068
2071 2069 faddd %f30,%f24,%f30
2072 2070 mov %l0,%l4
2073 2071
2074 2072 fnegd %f6,%f4
2075 2073 lda [%i1]%asi,%l0 ! preload next argument
2076 2074
2077 2075 fnegd %f14,%f12
2078 2076 lda [%i1]%asi,%f0
2079 2077
2080 2078 fnegd %f22,%f20
2081 2079 lda [%i1+4]%asi,%f3
2082 2080
2083 2081 fnegd %f30,%f28
2084 2082 andn %l0,%i5,%l0
2085 2083 add %i1,%i2,%i1
2086 2084
2087 2085 andcc %l4,2,%g0
2088 2086 fmovdnz %icc,%f4,%f6
2089 2087 st %f6,[%o0]
2090 2088
2091 2089 andcc %l1,2,%g0
2092 2090 fmovdnz %icc,%f12,%f14
2093 2091 st %f14,[%o1]
2094 2092
2095 2093 andcc %l2,2,%g0
2096 2094 fmovdnz %icc,%f20,%f22
2097 2095 st %f22,[%o2]
2098 2096
2099 2097 andcc %l3,2,%g0
2100 2098 fmovdnz %icc,%f28,%f30
2101 2099 st %f30,[%o3]
2102 2100
2103 2101 addcc %i0,-1,%i0
2104 2102 bg,pt %icc,.loop0
2105 2103 ! delay slot
2106 2104 st %f7,[%o0+4]
2107 2105
2108 2106 ba,pt %icc,.end
2109 2107 ! delay slot
2110 2108 nop
2111 2109
2112 2110 .align 16
2113 2111 .case9:
2114 2112 fmuld %f0,pp3,%f6 ! sin(x0)
2115 2113
2116 2114 fmuld %f24,pp3,%f30 ! sin(x3)
2117 2115
2118 2116 faddd %f6,pp2,%f6
2119 2117 fmuld %f0,qq2,%f4
2120 2118
2121 2119 fmuld %f8,qq3,%f14 ! cos(x1)
2122 2120
2123 2121 fmuld %f16,qq3,%f22 ! cos(x2)
2124 2122
2125 2123 faddd %f30,pp2,%f30
2126 2124 fmuld %f24,qq2,%f28
2127 2125
2128 2126 fmuld %f0,%f6,%f6
2129 2127 faddd %f4,qq1,%f4
2130 2128
2131 2129 faddd %f14,qq2,%f14
2132 2130 fmuld %f8,pp2,%f12
2133 2131
2134 2132 faddd %f22,qq2,%f22
2135 2133 fmuld %f16,pp2,%f20
2136 2134
2137 2135 fmuld %f24,%f30,%f30
2138 2136 faddd %f28,qq1,%f28
2139 2137
2140 2138 faddd %f6,pp1,%f6
2141 2139 fmuld %f0,%f4,%f4
2142 2140 add %l4,%g1,%l4
2143 2141
2144 2142 fmuld %f8,%f14,%f14
2145 2143 faddd %f12,pp1,%f12
2146 2144
2147 2145 fmuld %f16,%f22,%f22
2148 2146 faddd %f20,pp1,%f20
2149 2147
2150 2148 faddd %f30,pp1,%f30
2151 2149 fmuld %f24,%f28,%f28
2152 2150 add %l7,%g1,%l7
2153 2151
2154 2152 fmuld %f0,%f6,%f6
2155 2153
2156 2154 faddd %f14,qq1,%f14
2157 2155 fmuld %f8,%f12,%f12
2158 2156 add %l5,%g1,%l5
2159 2157
2160 2158 faddd %f22,qq1,%f22
2161 2159 fmuld %f16,%f20,%f20
2162 2160 add %l6,%g1,%l6
2163 2161
2164 2162 fmuld %f24,%f30,%f30
2165 2163
2166 2164 fmuld %f2,%f6,%f6
2167 2165 ldd [%l4+8],%f0
2168 2166
2169 2167 fmuld %f10,%f12,%f12
2170 2168
2171 2169 fmuld %f18,%f20,%f20
2172 2170
2173 2171 fmuld %f26,%f30,%f30
2174 2172 ldd [%l7+8],%f24
2175 2173
2176 2174 fmuld %f0,%f4,%f4
2177 2175 faddd %f32,%f6,%f6
2178 2176
2179 2177 fmuld %f8,%f14,%f14
2180 2178 faddd %f12,%f34,%f12
2181 2179 ldd [%l5+16],%f8
2182 2180
2183 2181 fmuld %f16,%f22,%f22
2184 2182 faddd %f20,%f36,%f20
2185 2183 ldd [%l6+16],%f16
2186 2184
2187 2185 fmuld %f24,%f28,%f28
2188 2186 faddd %f38,%f30,%f30
2189 2187
2190 2188 faddd %f2,%f6,%f6
2191 2189 ldd [%l4+16],%f32
2192 2190
2193 2191 fmuld %f8,%f14,%f14
2194 2192 faddd %f12,%f10,%f12
2195 2193 ldd [%l5+8],%f34
2196 2194
2197 2195 fmuld %f16,%f22,%f22
2198 2196 faddd %f20,%f18,%f20
2199 2197 ldd [%l6+8],%f36
2200 2198
2201 2199 faddd %f26,%f30,%f30
2202 2200 ldd [%l7+16],%f38
2203 2201
2204 2202 fmuld %f32,%f6,%f6
2205 2203
2206 2204 fmuld %f34,%f12,%f12
2207 2205
2208 2206 fmuld %f36,%f20,%f20
2209 2207
2210 2208 fmuld %f38,%f30,%f30
2211 2209
2212 2210 faddd %f6,%f4,%f6
2213 2211
2214 2212 fsubd %f14,%f12,%f14
2215 2213
2216 2214 fsubd %f22,%f20,%f22
2217 2215
2218 2216 faddd %f30,%f28,%f30
2219 2217
2220 2218 faddd %f6,%f0,%f6
2221 2219
2222 2220 faddd %f14,%f8,%f14
2223 2221
2224 2222 faddd %f22,%f16,%f22
2225 2223
2226 2224 faddd %f30,%f24,%f30
2227 2225 mov %l0,%l4
2228 2226
2229 2227 fnegd %f6,%f4
2230 2228 lda [%i1]%asi,%l0 ! preload next argument
2231 2229
2232 2230 fnegd %f14,%f12
2233 2231 lda [%i1]%asi,%f0
2234 2232
2235 2233 fnegd %f22,%f20
2236 2234 lda [%i1+4]%asi,%f3
2237 2235
2238 2236 fnegd %f30,%f28
2239 2237 andn %l0,%i5,%l0
2240 2238 add %i1,%i2,%i1
2241 2239
2242 2240 andcc %l4,2,%g0
2243 2241 fmovdnz %icc,%f4,%f6
2244 2242 st %f6,[%o0]
2245 2243
2246 2244 andcc %l1,2,%g0
2247 2245 fmovdnz %icc,%f12,%f14
2248 2246 st %f14,[%o1]
2249 2247
2250 2248 andcc %l2,2,%g0
2251 2249 fmovdnz %icc,%f20,%f22
2252 2250 st %f22,[%o2]
2253 2251
2254 2252 andcc %l3,2,%g0
2255 2253 fmovdnz %icc,%f28,%f30
2256 2254 st %f30,[%o3]
2257 2255
2258 2256 addcc %i0,-1,%i0
2259 2257 bg,pt %icc,.loop0
2260 2258 ! delay slot
2261 2259 st %f7,[%o0+4]
2262 2260
2263 2261 ba,pt %icc,.end
2264 2262 ! delay slot
2265 2263 nop
2266 2264
2267 2265 .align 16
2268 2266 .case10:
2269 2267 fmuld %f26,%f26,%f24
2270 2268 andcc %l3,1,%g0
2271 2269 bz,pn %icc,.case11
2272 2270 ! delay slot
2273 2271 fxor %f30,%f38,%f38
2274 2272
2275 2273 fmuld %f0,pp3,%f6 ! sin(x0)
2276 2274
2277 2275 fmuld %f16,pp3,%f22 ! sin(x2)
2278 2276
2279 2277 faddd %f6,pp2,%f6
2280 2278 fmuld %f0,qq2,%f4
2281 2279
2282 2280 fmuld %f8,qq3,%f14 ! cos(x1)
2283 2281
2284 2282 faddd %f22,pp2,%f22
2285 2283 fmuld %f16,qq2,%f20
2286 2284
2287 2285 fmuld %f24,qq3,%f30 ! cos(x3)
2288 2286
2289 2287 fmuld %f0,%f6,%f6
2290 2288 faddd %f4,qq1,%f4
2291 2289
2292 2290 faddd %f14,qq2,%f14
2293 2291 fmuld %f8,pp2,%f12
2294 2292
2295 2293 fmuld %f16,%f22,%f22
2296 2294 faddd %f20,qq1,%f20
2297 2295
2298 2296 faddd %f30,qq2,%f30
2299 2297 fmuld %f24,pp2,%f28
2300 2298
2301 2299 faddd %f6,pp1,%f6
2302 2300 fmuld %f0,%f4,%f4
2303 2301 add %l4,%g1,%l4
2304 2302
2305 2303 fmuld %f8,%f14,%f14
2306 2304 faddd %f12,pp1,%f12
2307 2305
2308 2306 faddd %f22,pp1,%f22
2309 2307 fmuld %f16,%f20,%f20
2310 2308 add %l6,%g1,%l6
2311 2309
2312 2310 fmuld %f24,%f30,%f30
2313 2311 faddd %f28,pp1,%f28
2314 2312
2315 2313 fmuld %f0,%f6,%f6
2316 2314
2317 2315 faddd %f14,qq1,%f14
2318 2316 fmuld %f8,%f12,%f12
2319 2317 add %l5,%g1,%l5
2320 2318
2321 2319 fmuld %f16,%f22,%f22
2322 2320
2323 2321 faddd %f30,qq1,%f30
2324 2322 fmuld %f24,%f28,%f28
2325 2323 add %l7,%g1,%l7
2326 2324
2327 2325 fmuld %f2,%f6,%f6
2328 2326 ldd [%l4+8],%f0
2329 2327
2330 2328 fmuld %f10,%f12,%f12
2331 2329
2332 2330 fmuld %f18,%f22,%f22
2333 2331 ldd [%l6+8],%f16
2334 2332
2335 2333 fmuld %f26,%f28,%f28
2336 2334
2337 2335 fmuld %f0,%f4,%f4
2338 2336 faddd %f32,%f6,%f6
2339 2337
2340 2338 fmuld %f8,%f14,%f14
2341 2339 faddd %f12,%f34,%f12
2342 2340 ldd [%l5+16],%f8
2343 2341
2344 2342 fmuld %f16,%f20,%f20
2345 2343 faddd %f36,%f22,%f22
2346 2344
2347 2345 fmuld %f24,%f30,%f30
2348 2346 faddd %f28,%f38,%f28
2349 2347 ldd [%l7+16],%f24
2350 2348
2351 2349 faddd %f2,%f6,%f6
2352 2350 ldd [%l4+16],%f32
2353 2351
2354 2352 fmuld %f8,%f14,%f14
2355 2353 faddd %f12,%f10,%f12
2356 2354 ldd [%l5+8],%f34
2357 2355
2358 2356 faddd %f18,%f22,%f22
2359 2357 ldd [%l6+16],%f36
2360 2358
2361 2359 fmuld %f24,%f30,%f30
2362 2360 faddd %f28,%f26,%f28
2363 2361 ldd [%l7+8],%f38
2364 2362
2365 2363 fmuld %f32,%f6,%f6
2366 2364
2367 2365 fmuld %f34,%f12,%f12
2368 2366
2369 2367 fmuld %f36,%f22,%f22
2370 2368
2371 2369 fmuld %f38,%f28,%f28
2372 2370
2373 2371 faddd %f6,%f4,%f6
2374 2372
2375 2373 fsubd %f14,%f12,%f14
2376 2374
2377 2375 faddd %f22,%f20,%f22
2378 2376
2379 2377 fsubd %f30,%f28,%f30
2380 2378
2381 2379 faddd %f6,%f0,%f6
2382 2380
2383 2381 faddd %f14,%f8,%f14
2384 2382
2385 2383 faddd %f22,%f16,%f22
2386 2384
2387 2385 faddd %f30,%f24,%f30
2388 2386 mov %l0,%l4
2389 2387
2390 2388 fnegd %f6,%f4
2391 2389 lda [%i1]%asi,%l0 ! preload next argument
2392 2390
2393 2391 fnegd %f14,%f12
2394 2392 lda [%i1]%asi,%f0
2395 2393
2396 2394 fnegd %f22,%f20
2397 2395 lda [%i1+4]%asi,%f3
2398 2396
2399 2397 fnegd %f30,%f28
2400 2398 andn %l0,%i5,%l0
2401 2399 add %i1,%i2,%i1
2402 2400
2403 2401 andcc %l4,2,%g0
2404 2402 fmovdnz %icc,%f4,%f6
2405 2403 st %f6,[%o0]
2406 2404
2407 2405 andcc %l1,2,%g0
2408 2406 fmovdnz %icc,%f12,%f14
2409 2407 st %f14,[%o1]
2410 2408
2411 2409 andcc %l2,2,%g0
2412 2410 fmovdnz %icc,%f20,%f22
2413 2411 st %f22,[%o2]
2414 2412
2415 2413 andcc %l3,2,%g0
2416 2414 fmovdnz %icc,%f28,%f30
2417 2415 st %f30,[%o3]
2418 2416
2419 2417 addcc %i0,-1,%i0
2420 2418 bg,pt %icc,.loop0
2421 2419 ! delay slot
2422 2420 st %f7,[%o0+4]
2423 2421
2424 2422 ba,pt %icc,.end
2425 2423 ! delay slot
2426 2424 nop
2427 2425
2428 2426 .align 16
2429 2427 .case11:
2430 2428 fmuld %f0,pp3,%f6 ! sin(x0)
2431 2429
2432 2430 fmuld %f16,pp3,%f22 ! sin(x2)
2433 2431
2434 2432 fmuld %f24,pp3,%f30 ! sin(x3)
2435 2433
2436 2434 faddd %f6,pp2,%f6
2437 2435 fmuld %f0,qq2,%f4
2438 2436
2439 2437 fmuld %f8,qq3,%f14 ! cos(x1)
2440 2438
2441 2439 faddd %f22,pp2,%f22
2442 2440 fmuld %f16,qq2,%f20
2443 2441
2444 2442 faddd %f30,pp2,%f30
2445 2443 fmuld %f24,qq2,%f28
2446 2444
2447 2445 fmuld %f0,%f6,%f6
2448 2446 faddd %f4,qq1,%f4
2449 2447
2450 2448 faddd %f14,qq2,%f14
2451 2449 fmuld %f8,pp2,%f12
2452 2450
2453 2451 fmuld %f16,%f22,%f22
2454 2452 faddd %f20,qq1,%f20
2455 2453
2456 2454 fmuld %f24,%f30,%f30
2457 2455 faddd %f28,qq1,%f28
2458 2456
2459 2457 faddd %f6,pp1,%f6
2460 2458 fmuld %f0,%f4,%f4
2461 2459 add %l4,%g1,%l4
2462 2460
2463 2461 fmuld %f8,%f14,%f14
2464 2462 faddd %f12,pp1,%f12
2465 2463
2466 2464 faddd %f22,pp1,%f22
2467 2465 fmuld %f16,%f20,%f20
2468 2466 add %l6,%g1,%l6
2469 2467
2470 2468 faddd %f30,pp1,%f30
2471 2469 fmuld %f24,%f28,%f28
2472 2470 add %l7,%g1,%l7
2473 2471
2474 2472 fmuld %f0,%f6,%f6
2475 2473
2476 2474 faddd %f14,qq1,%f14
2477 2475 fmuld %f8,%f12,%f12
2478 2476 add %l5,%g1,%l5
2479 2477
2480 2478 fmuld %f16,%f22,%f22
2481 2479
2482 2480 fmuld %f24,%f30,%f30
2483 2481
2484 2482 fmuld %f2,%f6,%f6
2485 2483 ldd [%l4+8],%f0
2486 2484
2487 2485 fmuld %f10,%f12,%f12
2488 2486
2489 2487 fmuld %f18,%f22,%f22
2490 2488 ldd [%l6+8],%f16
2491 2489
2492 2490 fmuld %f26,%f30,%f30
2493 2491 ldd [%l7+8],%f24
2494 2492
2495 2493 fmuld %f0,%f4,%f4
2496 2494 faddd %f32,%f6,%f6
2497 2495
2498 2496 fmuld %f8,%f14,%f14
2499 2497 faddd %f12,%f34,%f12
2500 2498 ldd [%l5+16],%f8
2501 2499
2502 2500 fmuld %f16,%f20,%f20
2503 2501 faddd %f36,%f22,%f22
2504 2502
2505 2503 fmuld %f24,%f28,%f28
2506 2504 faddd %f38,%f30,%f30
2507 2505
2508 2506 faddd %f2,%f6,%f6
2509 2507 ldd [%l4+16],%f32
2510 2508
2511 2509 fmuld %f8,%f14,%f14
2512 2510 faddd %f12,%f10,%f12
2513 2511 ldd [%l5+8],%f34
2514 2512
2515 2513 faddd %f18,%f22,%f22
2516 2514 ldd [%l6+16],%f36
2517 2515
2518 2516 faddd %f26,%f30,%f30
2519 2517 ldd [%l7+16],%f38
2520 2518
2521 2519 fmuld %f32,%f6,%f6
2522 2520
2523 2521 fmuld %f34,%f12,%f12
2524 2522
2525 2523 fmuld %f36,%f22,%f22
2526 2524
2527 2525 fmuld %f38,%f30,%f30
2528 2526
2529 2527 faddd %f6,%f4,%f6
2530 2528
2531 2529 fsubd %f14,%f12,%f14
2532 2530
2533 2531 faddd %f22,%f20,%f22
2534 2532
2535 2533 faddd %f30,%f28,%f30
2536 2534
2537 2535 faddd %f6,%f0,%f6
2538 2536
2539 2537 faddd %f14,%f8,%f14
2540 2538
2541 2539 faddd %f22,%f16,%f22
2542 2540
2543 2541 faddd %f30,%f24,%f30
2544 2542 mov %l0,%l4
2545 2543
2546 2544 fnegd %f6,%f4
2547 2545 lda [%i1]%asi,%l0 ! preload next argument
2548 2546
2549 2547 fnegd %f14,%f12
2550 2548 lda [%i1]%asi,%f0
2551 2549
2552 2550 fnegd %f22,%f20
2553 2551 lda [%i1+4]%asi,%f3
2554 2552
2555 2553 fnegd %f30,%f28
2556 2554 andn %l0,%i5,%l0
2557 2555 add %i1,%i2,%i1
2558 2556
2559 2557 andcc %l4,2,%g0
2560 2558 fmovdnz %icc,%f4,%f6
2561 2559 st %f6,[%o0]
2562 2560
2563 2561 andcc %l1,2,%g0
2564 2562 fmovdnz %icc,%f12,%f14
2565 2563 st %f14,[%o1]
2566 2564
2567 2565 andcc %l2,2,%g0
2568 2566 fmovdnz %icc,%f20,%f22
2569 2567 st %f22,[%o2]
2570 2568
2571 2569 andcc %l3,2,%g0
2572 2570 fmovdnz %icc,%f28,%f30
2573 2571 st %f30,[%o3]
2574 2572
2575 2573 addcc %i0,-1,%i0
2576 2574 bg,pt %icc,.loop0
2577 2575 ! delay slot
2578 2576 st %f7,[%o0+4]
2579 2577
2580 2578 ba,pt %icc,.end
2581 2579 ! delay slot
2582 2580 nop
2583 2581
2584 2582 .align 16
2585 2583 .case12:
2586 2584 fmuld %f18,%f18,%f16
2587 2585 andcc %l2,1,%g0
2588 2586 bz,pn %icc,.case14
2589 2587 ! delay slot
2590 2588 fxor %f22,%f36,%f36
2591 2589
2592 2590 fmuld %f26,%f26,%f24
2593 2591 andcc %l3,1,%g0
2594 2592 bz,pn %icc,.case13
2595 2593 ! delay slot
2596 2594 fxor %f30,%f38,%f38
2597 2595
2598 2596 fmuld %f0,pp3,%f6 ! sin(x0)
2599 2597
2600 2598 fmuld %f8,pp3,%f14 ! sin(x1)
2601 2599
2602 2600 faddd %f6,pp2,%f6
2603 2601 fmuld %f0,qq2,%f4
2604 2602
2605 2603 faddd %f14,pp2,%f14
2606 2604 fmuld %f8,qq2,%f12
2607 2605
2608 2606 fmuld %f16,qq3,%f22 ! cos(x2)
2609 2607
2610 2608 fmuld %f24,qq3,%f30 ! cos(x3)
2611 2609
2612 2610 fmuld %f0,%f6,%f6
2613 2611 faddd %f4,qq1,%f4
2614 2612
2615 2613 fmuld %f8,%f14,%f14
2616 2614 faddd %f12,qq1,%f12
2617 2615
2618 2616 faddd %f22,qq2,%f22
2619 2617 fmuld %f16,pp2,%f20
2620 2618
2621 2619 faddd %f30,qq2,%f30
2622 2620 fmuld %f24,pp2,%f28
2623 2621
2624 2622 faddd %f6,pp1,%f6
2625 2623 fmuld %f0,%f4,%f4
2626 2624 add %l4,%g1,%l4
2627 2625
2628 2626 faddd %f14,pp1,%f14
2629 2627 fmuld %f8,%f12,%f12
2630 2628 add %l5,%g1,%l5
2631 2629
2632 2630 fmuld %f16,%f22,%f22
2633 2631 faddd %f20,pp1,%f20
2634 2632
2635 2633 fmuld %f24,%f30,%f30
2636 2634 faddd %f28,pp1,%f28
2637 2635
2638 2636 fmuld %f0,%f6,%f6
2639 2637
2640 2638 fmuld %f8,%f14,%f14
2641 2639
2642 2640 faddd %f22,qq1,%f22
2643 2641 fmuld %f16,%f20,%f20
2644 2642 add %l6,%g1,%l6
2645 2643
2646 2644 faddd %f30,qq1,%f30
2647 2645 fmuld %f24,%f28,%f28
2648 2646 add %l7,%g1,%l7
2649 2647
2650 2648 fmuld %f2,%f6,%f6
2651 2649 ldd [%l4+8],%f0
2652 2650
2653 2651 fmuld %f10,%f14,%f14
2654 2652 ldd [%l5+8],%f8
2655 2653
2656 2654 fmuld %f18,%f20,%f20
2657 2655
2658 2656 fmuld %f26,%f28,%f28
2659 2657
2660 2658 fmuld %f0,%f4,%f4
2661 2659 faddd %f32,%f6,%f6
2662 2660
2663 2661 fmuld %f8,%f12,%f12
2664 2662 faddd %f34,%f14,%f14
2665 2663
2666 2664 fmuld %f16,%f22,%f22
2667 2665 faddd %f20,%f36,%f20
2668 2666 ldd [%l6+16],%f16
2669 2667
2670 2668 fmuld %f24,%f30,%f30
2671 2669 faddd %f28,%f38,%f28
2672 2670 ldd [%l7+16],%f24
2673 2671
2674 2672 faddd %f2,%f6,%f6
2675 2673 ldd [%l4+16],%f32
2676 2674
2677 2675 faddd %f10,%f14,%f14
2678 2676 ldd [%l5+16],%f34
2679 2677
2680 2678 fmuld %f16,%f22,%f22
2681 2679 faddd %f20,%f18,%f20
2682 2680 ldd [%l6+8],%f36
2683 2681
2684 2682 fmuld %f24,%f30,%f30
2685 2683 faddd %f28,%f26,%f28
2686 2684 ldd [%l7+8],%f38
2687 2685
2688 2686 fmuld %f32,%f6,%f6
2689 2687
2690 2688 fmuld %f34,%f14,%f14
2691 2689
2692 2690 fmuld %f36,%f20,%f20
2693 2691
2694 2692 fmuld %f38,%f28,%f28
2695 2693
2696 2694 faddd %f6,%f4,%f6
2697 2695
2698 2696 faddd %f14,%f12,%f14
2699 2697
2700 2698 fsubd %f22,%f20,%f22
2701 2699
2702 2700 fsubd %f30,%f28,%f30
2703 2701
2704 2702 faddd %f6,%f0,%f6
2705 2703
2706 2704 faddd %f14,%f8,%f14
2707 2705
2708 2706 faddd %f22,%f16,%f22
2709 2707
2710 2708 faddd %f30,%f24,%f30
2711 2709 mov %l0,%l4
2712 2710
2713 2711 fnegd %f6,%f4
2714 2712 lda [%i1]%asi,%l0 ! preload next argument
2715 2713
2716 2714 fnegd %f14,%f12
2717 2715 lda [%i1]%asi,%f0
2718 2716
2719 2717 fnegd %f22,%f20
2720 2718 lda [%i1+4]%asi,%f3
2721 2719
2722 2720 fnegd %f30,%f28
2723 2721 andn %l0,%i5,%l0
2724 2722 add %i1,%i2,%i1
2725 2723
2726 2724 andcc %l4,2,%g0
2727 2725 fmovdnz %icc,%f4,%f6
2728 2726 st %f6,[%o0]
2729 2727
2730 2728 andcc %l1,2,%g0
2731 2729 fmovdnz %icc,%f12,%f14
2732 2730 st %f14,[%o1]
2733 2731
2734 2732 andcc %l2,2,%g0
2735 2733 fmovdnz %icc,%f20,%f22
2736 2734 st %f22,[%o2]
2737 2735
2738 2736 andcc %l3,2,%g0
2739 2737 fmovdnz %icc,%f28,%f30
2740 2738 st %f30,[%o3]
2741 2739
2742 2740 addcc %i0,-1,%i0
2743 2741 bg,pt %icc,.loop0
2744 2742 ! delay slot
2745 2743 st %f7,[%o0+4]
2746 2744
2747 2745 ba,pt %icc,.end
2748 2746 ! delay slot
2749 2747 nop
2750 2748
2751 2749 .align 16
2752 2750 .case13:
2753 2751 fmuld %f0,pp3,%f6 ! sin(x0)
2754 2752
2755 2753 fmuld %f8,pp3,%f14 ! sin(x1)
2756 2754
2757 2755 fmuld %f24,pp3,%f30 ! sin(x3)
2758 2756
2759 2757 faddd %f6,pp2,%f6
2760 2758 fmuld %f0,qq2,%f4
2761 2759
2762 2760 faddd %f14,pp2,%f14
2763 2761 fmuld %f8,qq2,%f12
2764 2762
2765 2763 fmuld %f16,qq3,%f22 ! cos(x2)
2766 2764
2767 2765 faddd %f30,pp2,%f30
2768 2766 fmuld %f24,qq2,%f28
2769 2767
2770 2768 fmuld %f0,%f6,%f6
2771 2769 faddd %f4,qq1,%f4
2772 2770
2773 2771 fmuld %f8,%f14,%f14
2774 2772 faddd %f12,qq1,%f12
2775 2773
2776 2774 faddd %f22,qq2,%f22
2777 2775 fmuld %f16,pp2,%f20
2778 2776
2779 2777 fmuld %f24,%f30,%f30
2780 2778 faddd %f28,qq1,%f28
2781 2779
2782 2780 faddd %f6,pp1,%f6
2783 2781 fmuld %f0,%f4,%f4
2784 2782 add %l4,%g1,%l4
2785 2783
2786 2784 faddd %f14,pp1,%f14
2787 2785 fmuld %f8,%f12,%f12
2788 2786 add %l5,%g1,%l5
2789 2787
2790 2788 fmuld %f16,%f22,%f22
2791 2789 faddd %f20,pp1,%f20
2792 2790
2793 2791 faddd %f30,pp1,%f30
2794 2792 fmuld %f24,%f28,%f28
2795 2793 add %l7,%g1,%l7
2796 2794
2797 2795 fmuld %f0,%f6,%f6
2798 2796
2799 2797 fmuld %f8,%f14,%f14
2800 2798
2801 2799 faddd %f22,qq1,%f22
2802 2800 fmuld %f16,%f20,%f20
2803 2801 add %l6,%g1,%l6
2804 2802
2805 2803 fmuld %f24,%f30,%f30
2806 2804
2807 2805 fmuld %f2,%f6,%f6
2808 2806 ldd [%l4+8],%f0
2809 2807
2810 2808 fmuld %f10,%f14,%f14
2811 2809 ldd [%l5+8],%f8
2812 2810
2813 2811 fmuld %f18,%f20,%f20
2814 2812
2815 2813 fmuld %f26,%f30,%f30
2816 2814 ldd [%l7+8],%f24
2817 2815
2818 2816 fmuld %f0,%f4,%f4
2819 2817 faddd %f32,%f6,%f6
2820 2818
2821 2819 fmuld %f8,%f12,%f12
2822 2820 faddd %f34,%f14,%f14
2823 2821
2824 2822 fmuld %f16,%f22,%f22
2825 2823 faddd %f20,%f36,%f20
2826 2824 ldd [%l6+16],%f16
2827 2825
2828 2826 fmuld %f24,%f28,%f28
2829 2827 faddd %f38,%f30,%f30
2830 2828
2831 2829 faddd %f2,%f6,%f6
2832 2830 ldd [%l4+16],%f32
2833 2831
2834 2832 faddd %f10,%f14,%f14
2835 2833 ldd [%l5+16],%f34
2836 2834
2837 2835 fmuld %f16,%f22,%f22
2838 2836 faddd %f20,%f18,%f20
2839 2837 ldd [%l6+8],%f36
2840 2838
2841 2839 faddd %f26,%f30,%f30
2842 2840 ldd [%l7+16],%f38
2843 2841
2844 2842 fmuld %f32,%f6,%f6
2845 2843
2846 2844 fmuld %f34,%f14,%f14
2847 2845
2848 2846 fmuld %f36,%f20,%f20
2849 2847
2850 2848 fmuld %f38,%f30,%f30
2851 2849
2852 2850 faddd %f6,%f4,%f6
2853 2851
2854 2852 faddd %f14,%f12,%f14
2855 2853
2856 2854 fsubd %f22,%f20,%f22
2857 2855
2858 2856 faddd %f30,%f28,%f30
2859 2857
2860 2858 faddd %f6,%f0,%f6
2861 2859
2862 2860 faddd %f14,%f8,%f14
2863 2861
2864 2862 faddd %f22,%f16,%f22
2865 2863
2866 2864 faddd %f30,%f24,%f30
2867 2865 mov %l0,%l4
2868 2866
2869 2867 fnegd %f6,%f4
2870 2868 lda [%i1]%asi,%l0 ! preload next argument
2871 2869
2872 2870 fnegd %f14,%f12
2873 2871 lda [%i1]%asi,%f0
2874 2872
2875 2873 fnegd %f22,%f20
2876 2874 lda [%i1+4]%asi,%f3
2877 2875
2878 2876 fnegd %f30,%f28
2879 2877 andn %l0,%i5,%l0
2880 2878 add %i1,%i2,%i1
2881 2879
2882 2880 andcc %l4,2,%g0
2883 2881 fmovdnz %icc,%f4,%f6
2884 2882 st %f6,[%o0]
2885 2883
2886 2884 andcc %l1,2,%g0
2887 2885 fmovdnz %icc,%f12,%f14
2888 2886 st %f14,[%o1]
2889 2887
2890 2888 andcc %l2,2,%g0
2891 2889 fmovdnz %icc,%f20,%f22
2892 2890 st %f22,[%o2]
2893 2891
2894 2892 andcc %l3,2,%g0
2895 2893 fmovdnz %icc,%f28,%f30
2896 2894 st %f30,[%o3]
2897 2895
2898 2896 addcc %i0,-1,%i0
2899 2897 bg,pt %icc,.loop0
2900 2898 ! delay slot
2901 2899 st %f7,[%o0+4]
2902 2900
2903 2901 ba,pt %icc,.end
2904 2902 ! delay slot
2905 2903 nop
2906 2904
2907 2905 .align 16
2908 2906 .case14:
2909 2907 fmuld %f26,%f26,%f24
2910 2908 andcc %l3,1,%g0
2911 2909 bz,pn %icc,.case15
2912 2910 ! delay slot
2913 2911 fxor %f30,%f38,%f38
2914 2912
2915 2913 fmuld %f0,pp3,%f6 ! sin(x0)
2916 2914
2917 2915 fmuld %f8,pp3,%f14 ! sin(x1)
2918 2916
2919 2917 fmuld %f16,pp3,%f22 ! sin(x2)
2920 2918
2921 2919 faddd %f6,pp2,%f6
2922 2920 fmuld %f0,qq2,%f4
2923 2921
2924 2922 faddd %f14,pp2,%f14
2925 2923 fmuld %f8,qq2,%f12
2926 2924
2927 2925 faddd %f22,pp2,%f22
2928 2926 fmuld %f16,qq2,%f20
2929 2927
2930 2928 fmuld %f24,qq3,%f30 ! cos(x3)
2931 2929
2932 2930 fmuld %f0,%f6,%f6
2933 2931 faddd %f4,qq1,%f4
2934 2932
2935 2933 fmuld %f8,%f14,%f14
2936 2934 faddd %f12,qq1,%f12
2937 2935
2938 2936 fmuld %f16,%f22,%f22
2939 2937 faddd %f20,qq1,%f20
2940 2938
2941 2939 faddd %f30,qq2,%f30
2942 2940 fmuld %f24,pp2,%f28
2943 2941
2944 2942 faddd %f6,pp1,%f6
2945 2943 fmuld %f0,%f4,%f4
2946 2944 add %l4,%g1,%l4
2947 2945
2948 2946 faddd %f14,pp1,%f14
2949 2947 fmuld %f8,%f12,%f12
2950 2948 add %l5,%g1,%l5
2951 2949
2952 2950 faddd %f22,pp1,%f22
2953 2951 fmuld %f16,%f20,%f20
2954 2952 add %l6,%g1,%l6
2955 2953
2956 2954 fmuld %f24,%f30,%f30
2957 2955 faddd %f28,pp1,%f28
2958 2956
2959 2957 fmuld %f0,%f6,%f6
2960 2958
2961 2959 fmuld %f8,%f14,%f14
2962 2960
2963 2961 fmuld %f16,%f22,%f22
2964 2962
2965 2963 faddd %f30,qq1,%f30
2966 2964 fmuld %f24,%f28,%f28
2967 2965 add %l7,%g1,%l7
2968 2966
2969 2967 fmuld %f2,%f6,%f6
2970 2968 ldd [%l4+8],%f0
2971 2969
2972 2970 fmuld %f10,%f14,%f14
2973 2971 ldd [%l5+8],%f8
2974 2972
2975 2973 fmuld %f18,%f22,%f22
2976 2974 ldd [%l6+8],%f16
2977 2975
2978 2976 fmuld %f26,%f28,%f28
2979 2977
2980 2978 fmuld %f0,%f4,%f4
2981 2979 faddd %f32,%f6,%f6
2982 2980
2983 2981 fmuld %f8,%f12,%f12
2984 2982 faddd %f34,%f14,%f14
2985 2983
2986 2984 fmuld %f16,%f20,%f20
2987 2985 faddd %f36,%f22,%f22
2988 2986
2989 2987 fmuld %f24,%f30,%f30
2990 2988 faddd %f28,%f38,%f28
2991 2989 ldd [%l7+16],%f24
2992 2990
2993 2991 faddd %f2,%f6,%f6
2994 2992 ldd [%l4+16],%f32
2995 2993
2996 2994 faddd %f10,%f14,%f14
2997 2995 ldd [%l5+16],%f34
2998 2996
2999 2997 faddd %f18,%f22,%f22
3000 2998 ldd [%l6+16],%f36
3001 2999
3002 3000 fmuld %f24,%f30,%f30
3003 3001 faddd %f28,%f26,%f28
3004 3002 ldd [%l7+8],%f38
3005 3003
3006 3004 fmuld %f32,%f6,%f6
3007 3005
3008 3006 fmuld %f34,%f14,%f14
3009 3007
3010 3008 fmuld %f36,%f22,%f22
3011 3009
3012 3010 fmuld %f38,%f28,%f28
3013 3011
3014 3012 faddd %f6,%f4,%f6
3015 3013
3016 3014 faddd %f14,%f12,%f14
3017 3015
3018 3016 faddd %f22,%f20,%f22
3019 3017
3020 3018 fsubd %f30,%f28,%f30
3021 3019
3022 3020 faddd %f6,%f0,%f6
3023 3021
3024 3022 faddd %f14,%f8,%f14
3025 3023
3026 3024 faddd %f22,%f16,%f22
3027 3025
3028 3026 faddd %f30,%f24,%f30
3029 3027 mov %l0,%l4
3030 3028
3031 3029 fnegd %f6,%f4
3032 3030 lda [%i1]%asi,%l0 ! preload next argument
3033 3031
3034 3032 fnegd %f14,%f12
3035 3033 lda [%i1]%asi,%f0
3036 3034
3037 3035 fnegd %f22,%f20
3038 3036 lda [%i1+4]%asi,%f3
3039 3037
3040 3038 fnegd %f30,%f28
3041 3039 andn %l0,%i5,%l0
3042 3040 add %i1,%i2,%i1
3043 3041
3044 3042 andcc %l4,2,%g0
3045 3043 fmovdnz %icc,%f4,%f6
3046 3044 st %f6,[%o0]
3047 3045
3048 3046 andcc %l1,2,%g0
3049 3047 fmovdnz %icc,%f12,%f14
3050 3048 st %f14,[%o1]
3051 3049
3052 3050 andcc %l2,2,%g0
3053 3051 fmovdnz %icc,%f20,%f22
3054 3052 st %f22,[%o2]
3055 3053
3056 3054 andcc %l3,2,%g0
3057 3055 fmovdnz %icc,%f28,%f30
3058 3056 st %f30,[%o3]
3059 3057
3060 3058 addcc %i0,-1,%i0
3061 3059 bg,pt %icc,.loop0
3062 3060 ! delay slot
3063 3061 st %f7,[%o0+4]
3064 3062
3065 3063 ba,pt %icc,.end
3066 3064 ! delay slot
3067 3065 nop
3068 3066
3069 3067 .align 16
3070 3068 .case15:
3071 3069 fmuld %f0,pp3,%f6 ! sin(x0)
3072 3070
3073 3071 fmuld %f8,pp3,%f14 ! sin(x1)
3074 3072
3075 3073 fmuld %f16,pp3,%f22 ! sin(x2)
3076 3074
3077 3075 fmuld %f24,pp3,%f30 ! sin(x3)
3078 3076
3079 3077 faddd %f6,pp2,%f6
3080 3078 fmuld %f0,qq2,%f4
3081 3079
3082 3080 faddd %f14,pp2,%f14
3083 3081 fmuld %f8,qq2,%f12
3084 3082
3085 3083 faddd %f22,pp2,%f22
3086 3084 fmuld %f16,qq2,%f20
3087 3085
3088 3086 faddd %f30,pp2,%f30
3089 3087 fmuld %f24,qq2,%f28
3090 3088
3091 3089 fmuld %f0,%f6,%f6
3092 3090 faddd %f4,qq1,%f4
3093 3091
3094 3092 fmuld %f8,%f14,%f14
3095 3093 faddd %f12,qq1,%f12
3096 3094
3097 3095 fmuld %f16,%f22,%f22
3098 3096 faddd %f20,qq1,%f20
3099 3097
3100 3098 fmuld %f24,%f30,%f30
3101 3099 faddd %f28,qq1,%f28
3102 3100
3103 3101 faddd %f6,pp1,%f6
3104 3102 fmuld %f0,%f4,%f4
3105 3103 add %l4,%g1,%l4
3106 3104
3107 3105 faddd %f14,pp1,%f14
3108 3106 fmuld %f8,%f12,%f12
3109 3107 add %l5,%g1,%l5
3110 3108
3111 3109 faddd %f22,pp1,%f22
3112 3110 fmuld %f16,%f20,%f20
3113 3111 add %l6,%g1,%l6
3114 3112
3115 3113 faddd %f30,pp1,%f30
3116 3114 fmuld %f24,%f28,%f28
3117 3115 add %l7,%g1,%l7
3118 3116
3119 3117 fmuld %f0,%f6,%f6
3120 3118
3121 3119 fmuld %f8,%f14,%f14
3122 3120
3123 3121 fmuld %f16,%f22,%f22
3124 3122
3125 3123 fmuld %f24,%f30,%f30
3126 3124
3127 3125 fmuld %f2,%f6,%f6
3128 3126 ldd [%l4+8],%f0
3129 3127
3130 3128 fmuld %f10,%f14,%f14
3131 3129 ldd [%l5+8],%f8
3132 3130
3133 3131 fmuld %f18,%f22,%f22
3134 3132 ldd [%l6+8],%f16
3135 3133
3136 3134 fmuld %f26,%f30,%f30
3137 3135 ldd [%l7+8],%f24
3138 3136
3139 3137 fmuld %f0,%f4,%f4
3140 3138 faddd %f32,%f6,%f6
3141 3139
3142 3140 fmuld %f8,%f12,%f12
3143 3141 faddd %f34,%f14,%f14
3144 3142
3145 3143 fmuld %f16,%f20,%f20
3146 3144 faddd %f36,%f22,%f22
3147 3145
3148 3146 fmuld %f24,%f28,%f28
3149 3147 faddd %f38,%f30,%f30
3150 3148
3151 3149 faddd %f2,%f6,%f6
3152 3150 ldd [%l4+16],%f32
3153 3151
3154 3152 faddd %f10,%f14,%f14
3155 3153 ldd [%l5+16],%f34
3156 3154
3157 3155 faddd %f18,%f22,%f22
3158 3156 ldd [%l6+16],%f36
3159 3157
3160 3158 faddd %f26,%f30,%f30
3161 3159 ldd [%l7+16],%f38
3162 3160
3163 3161 fmuld %f32,%f6,%f6
3164 3162
3165 3163 fmuld %f34,%f14,%f14
3166 3164
3167 3165 fmuld %f36,%f22,%f22
3168 3166
3169 3167 fmuld %f38,%f30,%f30
3170 3168
3171 3169 faddd %f6,%f4,%f6
3172 3170
3173 3171 faddd %f14,%f12,%f14
3174 3172
3175 3173 faddd %f22,%f20,%f22
3176 3174
3177 3175 faddd %f30,%f28,%f30
3178 3176
3179 3177 faddd %f6,%f0,%f6
3180 3178
3181 3179 faddd %f14,%f8,%f14
3182 3180
3183 3181 faddd %f22,%f16,%f22
3184 3182
3185 3183 faddd %f30,%f24,%f30
3186 3184 mov %l0,%l4
3187 3185
3188 3186 fnegd %f6,%f4
3189 3187 lda [%i1]%asi,%l0 ! preload next argument
3190 3188
3191 3189 fnegd %f14,%f12
3192 3190 lda [%i1]%asi,%f0
3193 3191
3194 3192 fnegd %f22,%f20
3195 3193 lda [%i1+4]%asi,%f3
3196 3194
3197 3195 fnegd %f30,%f28
3198 3196 andn %l0,%i5,%l0
3199 3197 add %i1,%i2,%i1
3200 3198
3201 3199 andcc %l4,2,%g0
3202 3200 fmovdnz %icc,%f4,%f6
3203 3201 st %f6,[%o0]
3204 3202
3205 3203 andcc %l1,2,%g0
3206 3204 fmovdnz %icc,%f12,%f14
3207 3205 st %f14,[%o1]
3208 3206
3209 3207 andcc %l2,2,%g0
3210 3208 fmovdnz %icc,%f20,%f22
3211 3209 st %f22,[%o2]
3212 3210
3213 3211 andcc %l3,2,%g0
3214 3212 fmovdnz %icc,%f28,%f30
3215 3213 st %f30,[%o3]
3216 3214
3217 3215 addcc %i0,-1,%i0
3218 3216 bg,pt %icc,.loop0
3219 3217 ! delay slot
3220 3218 st %f7,[%o0+4]
3221 3219
3222 3220 ba,pt %icc,.end
3223 3221 ! delay slot
3224 3222 nop
3225 3223
3226 3224
3227 3225 .align 16
3228 3226 .end:
3229 3227 st %f15,[%o1+4]
3230 3228 st %f23,[%o2+4]
3231 3229 st %f31,[%o3+4]
3232 3230 ld [%fp+biguns],%i5
3233 3231 tst %i5 ! check for huge arguments remaining
3234 3232 be,pt %icc,.exit
3235 3233 ! delay slot
3236 3234 nop
3237 3235 #ifdef __sparcv9
3238 3236 ldx [%fp+xsave],%o1
3239 3237 ldx [%fp+ysave],%o3
3240 3238 #else
3241 3239 ld [%fp+xsave],%o1
3242 3240 ld [%fp+ysave],%o3
3243 3241 #endif
3244 3242 ld [%fp+nsave],%o0
3245 3243 ld [%fp+sxsave],%o2
3246 3244 ld [%fp+sysave],%o4
3247 3245 sra %o2,0,%o2 ! sign-extend for V9
3248 3246 sra %o4,0,%o4
3249 3247 call __vlibm_vcos_big_ultra3
3250 3248 sra %o5,0,%o5 ! delay slot
3251 3249
3252 3250 .exit:
3253 3251 ret
3254 3252 restore
3255 3253
3256 3254
3257 3255 .align 16
3258 3256 .last1:
3259 3257 faddd %f2,c3two44,%f4
3260 3258 st %f15,[%o1+4]
3261 3259 .last1_from_range1:
3262 3260 mov 0,%l1
3263 3261 fzeros %f8
3264 3262 fzero %f10
3265 3263 add %fp,junk,%o1
3266 3264 .last2:
3267 3265 faddd %f10,c3two44,%f12
3268 3266 st %f23,[%o2+4]
3269 3267 .last2_from_range2:
3270 3268 mov 0,%l2
3271 3269 fzeros %f16
3272 3270 fzero %f18
3273 3271 add %fp,junk,%o2
3274 3272 .last3:
3275 3273 faddd %f18,c3two44,%f20
3276 3274 st %f31,[%o3+4]
3277 3275 st %f5,[%fp+nk0]
3278 3276 st %f13,[%fp+nk1]
3279 3277 .last3_from_range3:
3280 3278 mov 0,%l3
3281 3279 fzeros %f24
3282 3280 fzero %f26
3283 3281 ba,pt %icc,.cont
3284 3282 ! delay slot
3285 3283 add %fp,junk,%o3
3286 3284
3287 3285
3288 3286 .align 16
3289 3287 .range0:
3290 3288 cmp %l0,%o4
3291 3289 bl,pt %icc,1f ! hx < 0x3e400000
3292 3290 ! delay slot, harmless if branch taken
3293 3291 sethi %hi(0x7ff00000),%o7
3294 3292 cmp %l0,%o7
3295 3293 bl,a,pt %icc,2f ! branch if finite
3296 3294 ! delay slot, squashed if branch not taken
3297 3295 st %o4,[%fp+biguns] ! set biguns
3298 3296 fzero %f0
3299 3297 fmuld %f2,%f0,%f2
3300 3298 st %f2,[%o0]
3301 3299 ba,pt %icc,2f
3302 3300 ! delay slot
3303 3301 st %f3,[%o0+4]
3304 3302 1:
3305 3303 fdtoi %f2,%f4 ! raise inexact if not zero
3306 3304 sethi %hi(0x3ff00000),%o7
3307 3305 st %o7,[%o0]
3308 3306 st %g0,[%o0+4]
3309 3307 2:
3310 3308 addcc %i0,-1,%i0
3311 3309 ble,pn %icc,.end
3312 3310 ! delay slot, harmless if branch taken
3313 3311 add %i3,%i4,%i3 ! y += stridey
3314 3312 andn %l1,%i5,%l0 ! hx &= ~0x80000000
3315 3313 fmovs %f8,%f0
3316 3314 fmovs %f11,%f3
3317 3315 ba,pt %icc,.loop0
3318 3316 ! delay slot
3319 3317 add %i1,%i2,%i1 ! x += stridex
3320 3318
3321 3319
3322 3320 .align 16
3323 3321 .range1:
3324 3322 cmp %l1,%o4
3325 3323 bl,pt %icc,1f ! hx < 0x3e400000
3326 3324 ! delay slot, harmless if branch taken
3327 3325 sethi %hi(0x7ff00000),%o7
3328 3326 cmp %l1,%o7
3329 3327 bl,a,pt %icc,2f ! branch if finite
3330 3328 ! delay slot, squashed if branch not taken
3331 3329 st %o4,[%fp+biguns] ! set biguns
3332 3330 fzero %f8
3333 3331 fmuld %f10,%f8,%f10
3334 3332 st %f10,[%o1]
3335 3333 ba,pt %icc,2f
3336 3334 ! delay slot
3337 3335 st %f11,[%o1+4]
3338 3336 1:
3339 3337 fdtoi %f10,%f12 ! raise inexact if not zero
3340 3338 sethi %hi(0x3ff00000),%o7
3341 3339 st %o7,[%o1]
3342 3340 st %g0,[%o1+4]
3343 3341 2:
3344 3342 addcc %i0,-1,%i0
3345 3343 ble,pn %icc,.last1_from_range1
3346 3344 ! delay slot, harmless if branch taken
3347 3345 add %i3,%i4,%i3 ! y += stridey
3348 3346 andn %l2,%i5,%l1 ! hx &= ~0x80000000
3349 3347 fmovs %f16,%f8
3350 3348 fmovs %f19,%f11
3351 3349 ba,pt %icc,.loop1
3352 3350 ! delay slot
3353 3351 add %i1,%i2,%i1 ! x += stridex
3354 3352
3355 3353
3356 3354 .align 16
3357 3355 .range2:
3358 3356 cmp %l2,%o4
3359 3357 bl,pt %icc,1f ! hx < 0x3e400000
3360 3358 ! delay slot, harmless if branch taken
3361 3359 sethi %hi(0x7ff00000),%o7
3362 3360 cmp %l2,%o7
3363 3361 bl,a,pt %icc,2f ! branch if finite
3364 3362 ! delay slot, squashed if branch not taken
3365 3363 st %o4,[%fp+biguns] ! set biguns
3366 3364 fzero %f16
3367 3365 fmuld %f18,%f16,%f18
3368 3366 st %f18,[%o2]
3369 3367 ba,pt %icc,2f
3370 3368 ! delay slot
3371 3369 st %f19,[%o2+4]
3372 3370 1:
3373 3371 fdtoi %f18,%f20 ! raise inexact if not zero
3374 3372 sethi %hi(0x3ff00000),%o7
3375 3373 st %o7,[%o2]
3376 3374 st %g0,[%o2+4]
3377 3375 2:
3378 3376 addcc %i0,-1,%i0
3379 3377 ble,pn %icc,.last2_from_range2
3380 3378 ! delay slot, harmless if branch taken
3381 3379 add %i3,%i4,%i3 ! y += stridey
3382 3380 andn %l3,%i5,%l2 ! hx &= ~0x80000000
3383 3381 fmovs %f24,%f16
3384 3382 fmovs %f27,%f19
3385 3383 ba,pt %icc,.loop2
3386 3384 ! delay slot
3387 3385 add %i1,%i2,%i1 ! x += stridex
3388 3386
3389 3387
3390 3388 .align 16
3391 3389 .range3:
3392 3390 cmp %l3,%o4
3393 3391 bl,pt %icc,1f ! hx < 0x3e400000
3394 3392 ! delay slot, harmless if branch taken
3395 3393 sethi %hi(0x7ff00000),%o7
3396 3394 cmp %l3,%o7
3397 3395 bl,a,pt %icc,2f ! branch if finite
3398 3396 ! delay slot, squashed if branch not taken
3399 3397 st %o4,[%fp+biguns] ! set biguns
3400 3398 fzero %f24
3401 3399 fmuld %f26,%f24,%f26
3402 3400 st %f26,[%o3]
3403 3401 ba,pt %icc,2f
3404 3402 ! delay slot
3405 3403 st %f27,[%o3+4]
3406 3404 1:
3407 3405 fdtoi %f26,%f28 ! raise inexact if not zero
3408 3406 sethi %hi(0x3ff00000),%o7
3409 3407 st %o7,[%o3]
3410 3408 st %g0,[%o3+4]
3411 3409 2:
3412 3410 addcc %i0,-1,%i0
3413 3411 ble,pn %icc,.last3_from_range3
3414 3412 ! delay slot, harmless if branch taken
3415 3413 add %i3,%i4,%i3 ! y += stridey
3416 3414 ld [%i1],%l3
3417 3415 ld [%i1],%f24
3418 3416 ld [%i1+4],%f27
3419 3417 andn %l3,%i5,%l3 ! hx &= ~0x80000000
3420 3418 ba,pt %icc,.loop3
3421 3419 ! delay slot
3422 3420 add %i1,%i2,%i1 ! x += stridex
3423 3421
3424 3422 SET_SIZE(__vcos_ultra3)
3425 3423
↓ open down ↓ |
3379 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX