1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vsin_ultra3.S" 30 31 #include "libm.h" 32 .weak __vsin 33 .type __vsin,#function 34 __vsin = __vsin_ultra3 35 36 RO_DATA 37 .align 64 38 constants: 39 .word 0x42c80000,0x00000000 ! 3 * 2^44 40 .word 0x43380000,0x00000000 ! 3 * 2^51 41 .word 0x3fe45f30,0x6dc9c883 ! invpio2 42 .word 0x3ff921fb,0x54442c00 ! pio2_1 43 .word 0x3d318469,0x898cc400 ! pio2_2 44 .word 0x3a71701b,0x839a2520 ! pio2_3 45 .word 0xbfc55555,0x55555533 ! pp1 46 .word 0x3f811111,0x10e7d53b ! pp2 47 .word 0xbf2a0167,0xe6b3cf9b ! pp3 48 .word 0xbfdfffff,0xffffff65 ! qq1 49 .word 0x3fa55555,0x54f88ed0 ! qq2 50 .word 0xbf56c12c,0xdd185f60 ! qq3 51 52 ! local storage indices 53 54 #define xsave STACK_BIAS-0x8 55 #define ysave STACK_BIAS-0x10 56 #define nsave STACK_BIAS-0x14 57 #define sxsave STACK_BIAS-0x18 58 #define sysave STACK_BIAS-0x1c 59 #define biguns STACK_BIAS-0x20 60 #define nk3 STACK_BIAS-0x24 61 #define nk2 STACK_BIAS-0x28 62 #define nk1 STACK_BIAS-0x2c 63 #define nk0 STACK_BIAS-0x30 64 #define junk STACK_BIAS-0x38 65 ! sizeof temp storage - must be a multiple of 16 for V9 66 #define tmps 0x40 67 68 ! register use 69 70 ! i0 n 71 ! i1 x 72 ! i2 stridex 73 ! i3 y 74 ! i4 stridey 75 ! i5 0x80000000 76 77 ! l0 hx0 78 ! l1 hx1 79 ! l2 hx2 80 ! l3 hx3 81 ! l4 k0 82 ! l5 k1 83 ! l6 k2 84 ! l7 k3 85 86 ! the following are 64-bit registers in both V8+ and V9 87 88 ! g1 __vlibm_TBL_sincos2 89 ! g5 scratch 90 91 ! o0 py0 92 ! o1 py1 93 ! o2 py2 94 ! o3 py3 95 ! o4 0x3e400000 96 ! o5 0x3fe921fb,0x4099251e 97 ! o7 scratch 98 99 ! f0 hx0 100 ! f2 101 ! f4 102 ! f6 103 ! f8 hx1 104 ! f10 105 ! f12 106 ! f14 107 ! f16 hx2 108 ! f18 109 ! f20 110 ! f22 111 ! f24 hx3 112 ! f26 113 ! f28 114 ! f30 115 ! f32 116 ! f34 117 ! f36 118 ! f38 119 120 #define c3two44 %f40 121 #define c3two51 %f42 122 #define invpio2 %f44 123 #define pio2_1 %f46 124 #define pio2_2 %f48 125 #define pio2_3 %f50 126 #define pp1 %f52 127 #define pp2 %f54 128 #define pp3 %f56 129 #define qq1 %f58 130 #define qq2 %f60 131 #define qq3 %f62 132 133 ENTRY(__vsin_ultra3) 134 save %sp,-SA(MINFRAME)-tmps,%sp 135 PIC_SETUP(l7) 136 PIC_SET(l7,constants,o0) 137 PIC_SET(l7,__vlibm_TBL_sincos2,o1) 138 mov %o1,%g1 139 wr %g0,0x82,%asi ! set %asi for non-faulting loads 140 #ifdef __sparcv9 141 stx %i1,[%fp+xsave] ! save arguments 142 stx %i3,[%fp+ysave] 143 #else 144 st %i1,[%fp+xsave] ! save arguments 145 st %i3,[%fp+ysave] 146 #endif 147 st %i0,[%fp+nsave] 148 st %i2,[%fp+sxsave] 149 st %i4,[%fp+sysave] 150 st %g0,[%fp+biguns] ! biguns = 0 151 ldd [%o0+0x00],c3two44 ! load/set up constants 152 ldd [%o0+0x08],c3two51 153 ldd [%o0+0x10],invpio2 154 ldd [%o0+0x18],pio2_1 155 ldd [%o0+0x20],pio2_2 156 ldd [%o0+0x28],pio2_3 157 ldd [%o0+0x30],pp1 158 ldd [%o0+0x38],pp2 159 ldd [%o0+0x40],pp3 160 ldd [%o0+0x48],qq1 161 ldd [%o0+0x50],qq2 162 ldd [%o0+0x58],qq3 163 sethi %hi(0x80000000),%i5 164 sethi %hi(0x3e400000),%o4 165 sethi %hi(0x3fe921fb),%o5 166 or %o5,%lo(0x3fe921fb),%o5 167 sllx %o5,32,%o5 168 sethi %hi(0x4099251e),%o7 169 or %o7,%lo(0x4099251e),%o7 170 or %o5,%o7,%o5 171 sll %i2,3,%i2 ! scale strides 172 sll %i4,3,%i4 173 add %fp,junk,%o1 ! loop prologue 174 add %fp,junk,%o2 175 add %fp,junk,%o3 176 ld [%i1],%l0 ! *x 177 ld [%i1],%f0 178 ld [%i1+4],%f3 179 andn %l0,%i5,%l0 ! mask off sign 180 ba .loop0 181 add %i1,%i2,%i1 ! x += stridex 182 183 ! 16-byte aligned 184 .align 16 185 .loop0: 186 lda [%i1]%asi,%l1 ! preload next argument 187 sub %l0,%o4,%g5 188 sub %o5,%l0,%o7 189 fabss %f0,%f2 190 191 lda [%i1]%asi,%f8 192 orcc %o7,%g5,%g0 193 mov %i3,%o0 ! py0 = y 194 bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e 195 196 ! delay slot 197 lda [%i1+4]%asi,%f11 198 addcc %i0,-1,%i0 199 add %i3,%i4,%i3 ! y += stridey 200 ble,pn %icc,.last1 201 202 ! delay slot 203 andn %l1,%i5,%l1 204 add %i1,%i2,%i1 ! x += stridex 205 faddd %f2,c3two44,%f4 206 st %f15,[%o1+4] 207 208 .loop1: 209 lda [%i1]%asi,%l2 ! preload next argument 210 sub %l1,%o4,%g5 211 sub %o5,%l1,%o7 212 fabss %f8,%f10 213 214 lda [%i1]%asi,%f16 215 orcc %o7,%g5,%g0 216 mov %i3,%o1 ! py1 = y 217 bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e 218 219 ! delay slot 220 lda [%i1+4]%asi,%f19 221 addcc %i0,-1,%i0 222 add %i3,%i4,%i3 ! y += stridey 223 ble,pn %icc,.last2 224 225 ! delay slot 226 andn %l2,%i5,%l2 227 add %i1,%i2,%i1 ! x += stridex 228 faddd %f10,c3two44,%f12 229 st %f23,[%o2+4] 230 231 .loop2: 232 lda [%i1]%asi,%l3 ! preload next argument 233 sub %l2,%o4,%g5 234 sub %o5,%l2,%o7 235 fabss %f16,%f18 236 237 lda [%i1]%asi,%f24 238 orcc %o7,%g5,%g0 239 mov %i3,%o2 ! py2 = y 240 bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e 241 242 ! delay slot 243 lda [%i1+4]%asi,%f27 244 addcc %i0,-1,%i0 245 add %i3,%i4,%i3 ! y += stridey 246 ble,pn %icc,.last3 247 248 ! delay slot 249 andn %l3,%i5,%l3 250 add %i1,%i2,%i1 ! x += stridex 251 faddd %f18,c3two44,%f20 252 st %f31,[%o3+4] 253 254 .loop3: 255 sub %l3,%o4,%g5 256 sub %o5,%l3,%o7 257 fabss %f24,%f26 258 st %f5,[%fp+nk0] 259 260 orcc %o7,%g5,%g0 261 mov %i3,%o3 ! py3 = y 262 bl,pn %icc,.range3 ! hx < 0x3e400000 or > hx 0x4099251e 263 ! delay slot 264 st %f13,[%fp+nk1] 265 266 !!! DONE? 267 .cont: 268 srlx %o5,32,%o7 269 add %i3,%i4,%i3 ! y += stridey 270 fmovs %f3,%f1 271 st %f21,[%fp+nk2] 272 273 sub %o7,%l0,%l0 274 sub %o7,%l1,%l1 275 faddd %f26,c3two44,%f28 276 st %f29,[%fp+nk3] 277 278 sub %o7,%l2,%l2 279 sub %o7,%l3,%l3 280 fmovs %f11,%f9 281 282 or %l0,%l1,%l0 283 or %l2,%l3,%l2 284 fmovs %f19,%f17 285 286 fmovs %f27,%f25 287 fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range 288 289 fmuld %f8,invpio2,%f14 290 ld [%fp+nk0],%l4 291 292 fmuld %f16,invpio2,%f22 293 ld [%fp+nk1],%l5 294 295 orcc %l0,%l2,%g0 296 bl,pn %icc,.medium 297 ! delay slot 298 fmuld %f24,invpio2,%f30 299 ld [%fp+nk2],%l6 300 301 ld [%fp+nk3],%l7 302 sll %l4,5,%l4 ! k 303 fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 304 305 sll %l5,5,%l5 306 ldd [%l4+%g1],%f4 307 fcmpd %fcc1,%f8,pio2_3 308 309 sll %l6,5,%l6 310 ldd [%l5+%g1],%f12 311 fcmpd %fcc2,%f16,pio2_3 312 313 sll %l7,5,%l7 314 ldd [%l6+%g1],%f20 315 fcmpd %fcc3,%f24,pio2_3 316 317 ldd [%l7+%g1],%f28 318 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] 319 320 fsubd %f10,%f12,%f10 321 322 fsubd %f18,%f20,%f18 323 324 fsubd %f26,%f28,%f26 325 326 fmuld %f2,%f2,%f0 ! z = x * x 327 328 fmuld %f10,%f10,%f8 329 330 fmuld %f18,%f18,%f16 331 332 fmuld %f26,%f26,%f24 333 334 fmuld %f0,pp3,%f6 335 336 fmuld %f8,pp3,%f14 337 338 fmuld %f16,pp3,%f22 339 340 fmuld %f24,pp3,%f30 341 342 faddd %f6,pp2,%f6 343 fmuld %f0,qq2,%f4 344 345 faddd %f14,pp2,%f14 346 fmuld %f8,qq2,%f12 347 348 faddd %f22,pp2,%f22 349 fmuld %f16,qq2,%f20 350 351 faddd %f30,pp2,%f30 352 fmuld %f24,qq2,%f28 353 354 fmuld %f0,%f6,%f6 355 faddd %f4,qq1,%f4 356 357 fmuld %f8,%f14,%f14 358 faddd %f12,qq1,%f12 359 360 fmuld %f16,%f22,%f22 361 faddd %f20,qq1,%f20 362 363 fmuld %f24,%f30,%f30 364 faddd %f28,qq1,%f28 365 366 faddd %f6,pp1,%f6 367 fmuld %f0,%f4,%f4 368 add %l4,%g1,%l4 369 370 faddd %f14,pp1,%f14 371 fmuld %f8,%f12,%f12 372 add %l5,%g1,%l5 373 374 faddd %f22,pp1,%f22 375 fmuld %f16,%f20,%f20 376 add %l6,%g1,%l6 377 378 faddd %f30,pp1,%f30 379 fmuld %f24,%f28,%f28 380 add %l7,%g1,%l7 381 382 fmuld %f0,%f6,%f6 383 ldd [%l4+8],%f0 384 385 fmuld %f8,%f14,%f14 386 ldd [%l5+8],%f8 387 388 fmuld %f16,%f22,%f22 389 ldd [%l6+8],%f16 390 391 fmuld %f24,%f30,%f30 392 ldd [%l7+8],%f24 393 394 fmuld %f2,%f6,%f6 395 396 fmuld %f10,%f14,%f14 397 398 fmuld %f18,%f22,%f22 399 400 fmuld %f26,%f30,%f30 401 402 faddd %f6,%f2,%f6 403 fmuld %f0,%f4,%f4 404 ldd [%l4+16],%f2 405 406 faddd %f14,%f10,%f14 407 fmuld %f8,%f12,%f12 408 ldd [%l5+16],%f10 409 410 faddd %f22,%f18,%f22 411 fmuld %f16,%f20,%f20 412 ldd [%l6+16],%f18 413 414 faddd %f30,%f26,%f30 415 fmuld %f24,%f28,%f28 416 ldd [%l7+16],%f26 417 418 fmuld %f2,%f6,%f6 419 420 fmuld %f10,%f14,%f14 421 422 fmuld %f18,%f22,%f22 423 424 fmuld %f26,%f30,%f30 425 426 faddd %f6,%f4,%f6 427 428 faddd %f14,%f12,%f14 429 430 faddd %f22,%f20,%f22 431 432 faddd %f30,%f28,%f30 433 434 faddd %f6,%f0,%f6 435 436 faddd %f14,%f8,%f14 437 438 faddd %f22,%f16,%f22 439 440 faddd %f30,%f24,%f30 441 442 fnegd %f6,%f4 443 lda [%i1]%asi,%l0 ! preload next argument 444 445 fnegd %f14,%f12 446 lda [%i1]%asi,%f0 447 448 fnegd %f22,%f20 449 lda [%i1+4]%asi,%f3 450 451 fnegd %f30,%f28 452 andn %l0,%i5,%l0 453 add %i1,%i2,%i1 454 455 fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s 456 st %f6,[%o0] 457 458 fmovdl %fcc1,%f12,%f14 459 st %f14,[%o1] 460 461 fmovdl %fcc2,%f20,%f22 462 st %f22,[%o2] 463 464 fmovdl %fcc3,%f28,%f30 465 st %f30,[%o3] 466 addcc %i0,-1,%i0 467 468 bg,pt %icc,.loop0 469 ! delay slot 470 st %f7,[%o0+4] 471 472 ba,pt %icc,.end 473 ! delay slot 474 nop 475 476 477 .align 16 478 .medium: 479 faddd %f6,c3two51,%f4 480 st %f5,[%fp+nk0] 481 482 faddd %f14,c3two51,%f12 483 st %f13,[%fp+nk1] 484 485 faddd %f22,c3two51,%f20 486 st %f21,[%fp+nk2] 487 488 faddd %f30,c3two51,%f28 489 st %f29,[%fp+nk3] 490 491 fsubd %f4,c3two51,%f6 492 493 fsubd %f12,c3two51,%f14 494 495 fsubd %f20,c3two51,%f22 496 497 fsubd %f28,c3two51,%f30 498 499 fmuld %f6,pio2_1,%f2 500 ld [%fp+nk0],%l0 ! n 501 502 fmuld %f14,pio2_1,%f10 503 ld [%fp+nk1],%l1 504 505 fmuld %f22,pio2_1,%f18 506 ld [%fp+nk2],%l2 507 508 fmuld %f30,pio2_1,%f26 509 ld [%fp+nk3],%l3 510 511 fsubd %f0,%f2,%f0 512 fmuld %f6,pio2_2,%f4 513 514 fsubd %f8,%f10,%f8 515 fmuld %f14,pio2_2,%f12 516 517 fsubd %f16,%f18,%f16 518 fmuld %f22,pio2_2,%f20 519 520 fsubd %f24,%f26,%f24 521 fmuld %f30,pio2_2,%f28 522 523 fsubd %f0,%f4,%f32 524 525 fsubd %f8,%f12,%f34 526 527 fsubd %f16,%f20,%f36 528 529 fsubd %f24,%f28,%f38 530 531 fsubd %f0,%f32,%f0 532 fcmple32 %f32,pio2_3,%l4 ! x <= pio2_3 iff x < 0 533 534 fsubd %f8,%f34,%f8 535 fcmple32 %f34,pio2_3,%l5 536 537 fsubd %f16,%f36,%f16 538 fcmple32 %f36,pio2_3,%l6 539 540 fsubd %f24,%f38,%f24 541 fcmple32 %f38,pio2_3,%l7 542 543 fsubd %f0,%f4,%f0 544 fmuld %f6,pio2_3,%f6 545 sll %l4,30,%l4 ! if (x < 0) n = -n ^ 2 546 547 fsubd %f8,%f12,%f8 548 fmuld %f14,pio2_3,%f14 549 sll %l5,30,%l5 550 551 fsubd %f16,%f20,%f16 552 fmuld %f22,pio2_3,%f22 553 sll %l6,30,%l6 554 555 fsubd %f24,%f28,%f24 556 fmuld %f30,pio2_3,%f30 557 sll %l7,30,%l7 558 559 fsubd %f6,%f0,%f6 560 sra %l4,31,%l4 561 562 fsubd %f14,%f8,%f14 563 sra %l5,31,%l5 564 565 fsubd %f22,%f16,%f22 566 sra %l6,31,%l6 567 568 fsubd %f30,%f24,%f30 569 sra %l7,31,%l7 570 571 fsubd %f32,%f6,%f0 ! reduced x 572 xor %l0,%l4,%l0 573 574 fsubd %f34,%f14,%f8 575 xor %l1,%l5,%l1 576 577 fsubd %f36,%f22,%f16 578 xor %l2,%l6,%l2 579 580 fsubd %f38,%f30,%f24 581 xor %l3,%l7,%l3 582 583 fabsd %f0,%f2 584 sub %l0,%l4,%l0 585 586 fabsd %f8,%f10 587 sub %l1,%l5,%l1 588 589 fabsd %f16,%f18 590 sub %l2,%l6,%l2 591 592 fabsd %f24,%f26 593 sub %l3,%l7,%l3 594 595 faddd %f2,c3two44,%f4 596 st %f5,[%fp+nk0] 597 and %l4,2,%l4 598 599 faddd %f10,c3two44,%f12 600 st %f13,[%fp+nk1] 601 and %l5,2,%l5 602 603 faddd %f18,c3two44,%f20 604 st %f21,[%fp+nk2] 605 and %l6,2,%l6 606 607 faddd %f26,c3two44,%f28 608 st %f29,[%fp+nk3] 609 and %l7,2,%l7 610 611 fsubd %f32,%f0,%f4 612 xor %l0,%l4,%l0 613 614 fsubd %f34,%f8,%f12 615 xor %l1,%l5,%l1 616 617 fsubd %f36,%f16,%f20 618 xor %l2,%l6,%l2 619 620 fsubd %f38,%f24,%f28 621 xor %l3,%l7,%l3 622 623 fzero %f38 624 ld [%fp+nk0],%l4 625 626 fsubd %f4,%f6,%f6 ! w 627 ld [%fp+nk1],%l5 628 629 fsubd %f12,%f14,%f14 630 ld [%fp+nk2],%l6 631 632 fnegd %f38,%f38 633 ld [%fp+nk3],%l7 634 sll %l4,5,%l4 ! k 635 636 fsubd %f20,%f22,%f22 637 sll %l5,5,%l5 638 639 fsubd %f28,%f30,%f30 640 sll %l6,5,%l6 641 642 fand %f0,%f38,%f32 ! sign bit of x 643 ldd [%l4+%g1],%f4 644 sll %l7,5,%l7 645 646 fand %f8,%f38,%f34 647 ldd [%l5+%g1],%f12 648 649 fand %f16,%f38,%f36 650 ldd [%l6+%g1],%f20 651 652 fand %f24,%f38,%f38 653 ldd [%l7+%g1],%f28 654 655 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] 656 657 fsubd %f10,%f12,%f10 658 659 fsubd %f18,%f20,%f18 660 nop 661 662 fsubd %f26,%f28,%f26 663 nop 664 665 ! 16-byte aligned 666 fmuld %f2,%f2,%f0 ! z = x * x 667 andcc %l0,1,%g0 668 bz,pn %icc,.case8 669 ! delay slot 670 fxor %f6,%f32,%f32 671 672 fmuld %f10,%f10,%f8 673 andcc %l1,1,%g0 674 bz,pn %icc,.case4 675 ! delay slot 676 fxor %f14,%f34,%f34 677 678 fmuld %f18,%f18,%f16 679 andcc %l2,1,%g0 680 bz,pn %icc,.case2 681 ! delay slot 682 fxor %f22,%f36,%f36 683 684 fmuld %f26,%f26,%f24 685 andcc %l3,1,%g0 686 bz,pn %icc,.case1 687 ! delay slot 688 fxor %f30,%f38,%f38 689 690 !.case0: 691 fmuld %f0,qq3,%f6 ! cos(x0) 692 693 fmuld %f8,qq3,%f14 ! cos(x1) 694 695 fmuld %f16,qq3,%f22 ! cos(x2) 696 697 fmuld %f24,qq3,%f30 ! cos(x3) 698 699 faddd %f6,qq2,%f6 700 fmuld %f0,pp2,%f4 701 702 faddd %f14,qq2,%f14 703 fmuld %f8,pp2,%f12 704 705 faddd %f22,qq2,%f22 706 fmuld %f16,pp2,%f20 707 708 faddd %f30,qq2,%f30 709 fmuld %f24,pp2,%f28 710 711 fmuld %f0,%f6,%f6 712 faddd %f4,pp1,%f4 713 714 fmuld %f8,%f14,%f14 715 faddd %f12,pp1,%f12 716 717 fmuld %f16,%f22,%f22 718 faddd %f20,pp1,%f20 719 720 fmuld %f24,%f30,%f30 721 faddd %f28,pp1,%f28 722 723 faddd %f6,qq1,%f6 724 fmuld %f0,%f4,%f4 725 add %l4,%g1,%l4 726 727 faddd %f14,qq1,%f14 728 fmuld %f8,%f12,%f12 729 add %l5,%g1,%l5 730 731 faddd %f22,qq1,%f22 732 fmuld %f16,%f20,%f20 733 add %l6,%g1,%l6 734 735 faddd %f30,qq1,%f30 736 fmuld %f24,%f28,%f28 737 add %l7,%g1,%l7 738 739 fmuld %f2,%f4,%f4 740 741 fmuld %f10,%f12,%f12 742 743 fmuld %f18,%f20,%f20 744 745 fmuld %f26,%f28,%f28 746 747 fmuld %f0,%f6,%f6 748 faddd %f4,%f32,%f4 749 ldd [%l4+16],%f0 750 751 fmuld %f8,%f14,%f14 752 faddd %f12,%f34,%f12 753 ldd [%l5+16],%f8 754 755 fmuld %f16,%f22,%f22 756 faddd %f20,%f36,%f20 757 ldd [%l6+16],%f16 758 759 fmuld %f24,%f30,%f30 760 faddd %f28,%f38,%f28 761 ldd [%l7+16],%f24 762 763 fmuld %f0,%f6,%f6 764 faddd %f4,%f2,%f4 765 ldd [%l4+8],%f32 766 767 fmuld %f8,%f14,%f14 768 faddd %f12,%f10,%f12 769 ldd [%l5+8],%f34 770 771 fmuld %f16,%f22,%f22 772 faddd %f20,%f18,%f20 773 ldd [%l6+8],%f36 774 775 fmuld %f24,%f30,%f30 776 faddd %f28,%f26,%f28 777 ldd [%l7+8],%f38 778 779 fmuld %f32,%f4,%f4 780 781 fmuld %f34,%f12,%f12 782 783 fmuld %f36,%f20,%f20 784 785 fmuld %f38,%f28,%f28 786 787 fsubd %f6,%f4,%f6 788 789 fsubd %f14,%f12,%f14 790 791 fsubd %f22,%f20,%f22 792 793 fsubd %f30,%f28,%f30 794 795 faddd %f6,%f0,%f6 796 797 faddd %f14,%f8,%f14 798 799 faddd %f22,%f16,%f22 800 801 faddd %f30,%f24,%f30 802 mov %l0,%l4 803 804 fnegd %f6,%f4 805 lda [%i1]%asi,%l0 ! preload next argument 806 807 fnegd %f14,%f12 808 lda [%i1]%asi,%f0 809 810 fnegd %f22,%f20 811 lda [%i1+4]%asi,%f3 812 813 fnegd %f30,%f28 814 andn %l0,%i5,%l0 815 add %i1,%i2,%i1 816 817 andcc %l4,2,%g0 818 fmovdnz %icc,%f4,%f6 819 st %f6,[%o0] 820 821 andcc %l1,2,%g0 822 fmovdnz %icc,%f12,%f14 823 st %f14,[%o1] 824 825 andcc %l2,2,%g0 826 fmovdnz %icc,%f20,%f22 827 st %f22,[%o2] 828 829 andcc %l3,2,%g0 830 fmovdnz %icc,%f28,%f30 831 st %f30,[%o3] 832 833 addcc %i0,-1,%i0 834 bg,pt %icc,.loop0 835 ! delay slot 836 st %f7,[%o0+4] 837 838 ba,pt %icc,.end 839 ! delay slot 840 nop 841 842 .align 16 843 .case1: 844 fmuld %f24,pp3,%f30 ! sin(x3) 845 846 fmuld %f0,qq3,%f6 ! cos(x0) 847 848 fmuld %f8,qq3,%f14 ! cos(x1) 849 850 fmuld %f16,qq3,%f22 ! cos(x2) 851 852 faddd %f30,pp2,%f30 853 fmuld %f24,qq2,%f28 854 855 faddd %f6,qq2,%f6 856 fmuld %f0,pp2,%f4 857 858 faddd %f14,qq2,%f14 859 fmuld %f8,pp2,%f12 860 861 faddd %f22,qq2,%f22 862 fmuld %f16,pp2,%f20 863 864 fmuld %f24,%f30,%f30 865 faddd %f28,qq1,%f28 866 867 fmuld %f0,%f6,%f6 868 faddd %f4,pp1,%f4 869 870 fmuld %f8,%f14,%f14 871 faddd %f12,pp1,%f12 872 873 fmuld %f16,%f22,%f22 874 faddd %f20,pp1,%f20 875 876 faddd %f30,pp1,%f30 877 fmuld %f24,%f28,%f28 878 add %l7,%g1,%l7 879 880 faddd %f6,qq1,%f6 881 fmuld %f0,%f4,%f4 882 add %l4,%g1,%l4 883 884 faddd %f14,qq1,%f14 885 fmuld %f8,%f12,%f12 886 add %l5,%g1,%l5 887 888 faddd %f22,qq1,%f22 889 fmuld %f16,%f20,%f20 890 add %l6,%g1,%l6 891 892 fmuld %f24,%f30,%f30 893 894 fmuld %f2,%f4,%f4 895 896 fmuld %f10,%f12,%f12 897 898 fmuld %f18,%f20,%f20 899 900 fmuld %f26,%f30,%f30 901 ldd [%l7+8],%f24 902 903 fmuld %f0,%f6,%f6 904 faddd %f4,%f32,%f4 905 ldd [%l4+16],%f0 906 907 fmuld %f8,%f14,%f14 908 faddd %f12,%f34,%f12 909 ldd [%l5+16],%f8 910 911 fmuld %f16,%f22,%f22 912 faddd %f20,%f36,%f20 913 ldd [%l6+16],%f16 914 915 fmuld %f24,%f28,%f28 916 faddd %f38,%f30,%f30 917 918 fmuld %f0,%f6,%f6 919 faddd %f4,%f2,%f4 920 ldd [%l4+8],%f32 921 922 fmuld %f8,%f14,%f14 923 faddd %f12,%f10,%f12 924 ldd [%l5+8],%f34 925 926 fmuld %f16,%f22,%f22 927 faddd %f20,%f18,%f20 928 ldd [%l6+8],%f36 929 930 faddd %f26,%f30,%f30 931 ldd [%l7+16],%f38 932 933 fmuld %f32,%f4,%f4 934 935 fmuld %f34,%f12,%f12 936 937 fmuld %f36,%f20,%f20 938 939 fmuld %f38,%f30,%f30 940 941 fsubd %f6,%f4,%f6 942 943 fsubd %f14,%f12,%f14 944 945 fsubd %f22,%f20,%f22 946 947 faddd %f30,%f28,%f30 948 949 faddd %f6,%f0,%f6 950 951 faddd %f14,%f8,%f14 952 953 faddd %f22,%f16,%f22 954 955 faddd %f30,%f24,%f30 956 mov %l0,%l4 957 958 fnegd %f6,%f4 959 lda [%i1]%asi,%l0 ! preload next argument 960 961 fnegd %f14,%f12 962 lda [%i1]%asi,%f0 963 964 fnegd %f22,%f20 965 lda [%i1+4]%asi,%f3 966 967 fnegd %f30,%f28 968 andn %l0,%i5,%l0 969 add %i1,%i2,%i1 970 971 andcc %l4,2,%g0 972 fmovdnz %icc,%f4,%f6 973 st %f6,[%o0] 974 975 andcc %l1,2,%g0 976 fmovdnz %icc,%f12,%f14 977 st %f14,[%o1] 978 979 andcc %l2,2,%g0 980 fmovdnz %icc,%f20,%f22 981 st %f22,[%o2] 982 983 andcc %l3,2,%g0 984 fmovdnz %icc,%f28,%f30 985 st %f30,[%o3] 986 987 addcc %i0,-1,%i0 988 bg,pt %icc,.loop0 989 ! delay slot 990 st %f7,[%o0+4] 991 992 ba,pt %icc,.end 993 ! delay slot 994 nop 995 996 .align 16 997 .case2: 998 fmuld %f26,%f26,%f24 999 andcc %l3,1,%g0 1000 bz,pn %icc,.case3 1001 ! delay slot 1002 fxor %f30,%f38,%f38 1003 1004 fmuld %f16,pp3,%f22 ! sin(x2) 1005 1006 fmuld %f0,qq3,%f6 ! cos(x0) 1007 1008 fmuld %f8,qq3,%f14 ! cos(x1) 1009 1010 faddd %f22,pp2,%f22 1011 fmuld %f16,qq2,%f20 1012 1013 fmuld %f24,qq3,%f30 ! cos(x3) 1014 1015 faddd %f6,qq2,%f6 1016 fmuld %f0,pp2,%f4 1017 1018 faddd %f14,qq2,%f14 1019 fmuld %f8,pp2,%f12 1020 1021 fmuld %f16,%f22,%f22 1022 faddd %f20,qq1,%f20 1023 1024 faddd %f30,qq2,%f30 1025 fmuld %f24,pp2,%f28 1026 1027 fmuld %f0,%f6,%f6 1028 faddd %f4,pp1,%f4 1029 1030 fmuld %f8,%f14,%f14 1031 faddd %f12,pp1,%f12 1032 1033 faddd %f22,pp1,%f22 1034 fmuld %f16,%f20,%f20 1035 add %l6,%g1,%l6 1036 1037 fmuld %f24,%f30,%f30 1038 faddd %f28,pp1,%f28 1039 1040 faddd %f6,qq1,%f6 1041 fmuld %f0,%f4,%f4 1042 add %l4,%g1,%l4 1043 1044 faddd %f14,qq1,%f14 1045 fmuld %f8,%f12,%f12 1046 add %l5,%g1,%l5 1047 1048 fmuld %f16,%f22,%f22 1049 1050 faddd %f30,qq1,%f30 1051 fmuld %f24,%f28,%f28 1052 add %l7,%g1,%l7 1053 1054 fmuld %f2,%f4,%f4 1055 1056 fmuld %f10,%f12,%f12 1057 1058 fmuld %f18,%f22,%f22 1059 ldd [%l6+8],%f16 1060 1061 fmuld %f26,%f28,%f28 1062 1063 fmuld %f0,%f6,%f6 1064 faddd %f4,%f32,%f4 1065 ldd [%l4+16],%f0 1066 1067 fmuld %f8,%f14,%f14 1068 faddd %f12,%f34,%f12 1069 ldd [%l5+16],%f8 1070 1071 fmuld %f16,%f20,%f20 1072 faddd %f36,%f22,%f22 1073 1074 fmuld %f24,%f30,%f30 1075 faddd %f28,%f38,%f28 1076 ldd [%l7+16],%f24 1077 1078 fmuld %f0,%f6,%f6 1079 faddd %f4,%f2,%f4 1080 ldd [%l4+8],%f32 1081 1082 fmuld %f8,%f14,%f14 1083 faddd %f12,%f10,%f12 1084 ldd [%l5+8],%f34 1085 1086 faddd %f18,%f22,%f22 1087 ldd [%l6+16],%f36 1088 1089 fmuld %f24,%f30,%f30 1090 faddd %f28,%f26,%f28 1091 ldd [%l7+8],%f38 1092 1093 fmuld %f32,%f4,%f4 1094 1095 fmuld %f34,%f12,%f12 1096 1097 fmuld %f36,%f22,%f22 1098 1099 fmuld %f38,%f28,%f28 1100 1101 fsubd %f6,%f4,%f6 1102 1103 fsubd %f14,%f12,%f14 1104 1105 faddd %f22,%f20,%f22 1106 1107 fsubd %f30,%f28,%f30 1108 1109 faddd %f6,%f0,%f6 1110 1111 faddd %f14,%f8,%f14 1112 1113 faddd %f22,%f16,%f22 1114 1115 faddd %f30,%f24,%f30 1116 mov %l0,%l4 1117 1118 fnegd %f6,%f4 1119 lda [%i1]%asi,%l0 ! preload next argument 1120 1121 fnegd %f14,%f12 1122 lda [%i1]%asi,%f0 1123 1124 fnegd %f22,%f20 1125 lda [%i1+4]%asi,%f3 1126 1127 fnegd %f30,%f28 1128 andn %l0,%i5,%l0 1129 add %i1,%i2,%i1 1130 1131 andcc %l4,2,%g0 1132 fmovdnz %icc,%f4,%f6 1133 st %f6,[%o0] 1134 1135 andcc %l1,2,%g0 1136 fmovdnz %icc,%f12,%f14 1137 st %f14,[%o1] 1138 1139 andcc %l2,2,%g0 1140 fmovdnz %icc,%f20,%f22 1141 st %f22,[%o2] 1142 1143 andcc %l3,2,%g0 1144 fmovdnz %icc,%f28,%f30 1145 st %f30,[%o3] 1146 1147 addcc %i0,-1,%i0 1148 bg,pt %icc,.loop0 1149 ! delay slot 1150 st %f7,[%o0+4] 1151 1152 ba,pt %icc,.end 1153 ! delay slot 1154 nop 1155 1156 .align 16 1157 .case3: 1158 fmuld %f16,pp3,%f22 ! sin(x2) 1159 1160 fmuld %f24,pp3,%f30 ! sin(x3) 1161 1162 fmuld %f0,qq3,%f6 ! cos(x0) 1163 1164 fmuld %f8,qq3,%f14 ! cos(x1) 1165 1166 faddd %f22,pp2,%f22 1167 fmuld %f16,qq2,%f20 1168 1169 faddd %f30,pp2,%f30 1170 fmuld %f24,qq2,%f28 1171 1172 faddd %f6,qq2,%f6 1173 fmuld %f0,pp2,%f4 1174 1175 faddd %f14,qq2,%f14 1176 fmuld %f8,pp2,%f12 1177 1178 fmuld %f16,%f22,%f22 1179 faddd %f20,qq1,%f20 1180 1181 fmuld %f24,%f30,%f30 1182 faddd %f28,qq1,%f28 1183 1184 fmuld %f0,%f6,%f6 1185 faddd %f4,pp1,%f4 1186 1187 fmuld %f8,%f14,%f14 1188 faddd %f12,pp1,%f12 1189 1190 faddd %f22,pp1,%f22 1191 fmuld %f16,%f20,%f20 1192 add %l6,%g1,%l6 1193 1194 faddd %f30,pp1,%f30 1195 fmuld %f24,%f28,%f28 1196 add %l7,%g1,%l7 1197 1198 faddd %f6,qq1,%f6 1199 fmuld %f0,%f4,%f4 1200 add %l4,%g1,%l4 1201 1202 faddd %f14,qq1,%f14 1203 fmuld %f8,%f12,%f12 1204 add %l5,%g1,%l5 1205 1206 fmuld %f16,%f22,%f22 1207 1208 fmuld %f24,%f30,%f30 1209 1210 fmuld %f2,%f4,%f4 1211 1212 fmuld %f10,%f12,%f12 1213 1214 fmuld %f18,%f22,%f22 1215 ldd [%l6+8],%f16 1216 1217 fmuld %f26,%f30,%f30 1218 ldd [%l7+8],%f24 1219 1220 fmuld %f0,%f6,%f6 1221 faddd %f4,%f32,%f4 1222 ldd [%l4+16],%f0 1223 1224 fmuld %f8,%f14,%f14 1225 faddd %f12,%f34,%f12 1226 ldd [%l5+16],%f8 1227 1228 fmuld %f16,%f20,%f20 1229 faddd %f36,%f22,%f22 1230 1231 fmuld %f24,%f28,%f28 1232 faddd %f38,%f30,%f30 1233 1234 fmuld %f0,%f6,%f6 1235 faddd %f4,%f2,%f4 1236 ldd [%l4+8],%f32 1237 1238 fmuld %f8,%f14,%f14 1239 faddd %f12,%f10,%f12 1240 ldd [%l5+8],%f34 1241 1242 faddd %f18,%f22,%f22 1243 ldd [%l6+16],%f36 1244 1245 faddd %f26,%f30,%f30 1246 ldd [%l7+16],%f38 1247 1248 fmuld %f32,%f4,%f4 1249 1250 fmuld %f34,%f12,%f12 1251 1252 fmuld %f36,%f22,%f22 1253 1254 fmuld %f38,%f30,%f30 1255 1256 fsubd %f6,%f4,%f6 1257 1258 fsubd %f14,%f12,%f14 1259 1260 faddd %f22,%f20,%f22 1261 1262 faddd %f30,%f28,%f30 1263 1264 faddd %f6,%f0,%f6 1265 1266 faddd %f14,%f8,%f14 1267 1268 faddd %f22,%f16,%f22 1269 1270 faddd %f30,%f24,%f30 1271 mov %l0,%l4 1272 1273 fnegd %f6,%f4 1274 lda [%i1]%asi,%l0 ! preload next argument 1275 1276 fnegd %f14,%f12 1277 lda [%i1]%asi,%f0 1278 1279 fnegd %f22,%f20 1280 lda [%i1+4]%asi,%f3 1281 1282 fnegd %f30,%f28 1283 andn %l0,%i5,%l0 1284 add %i1,%i2,%i1 1285 1286 andcc %l4,2,%g0 1287 fmovdnz %icc,%f4,%f6 1288 st %f6,[%o0] 1289 1290 andcc %l1,2,%g0 1291 fmovdnz %icc,%f12,%f14 1292 st %f14,[%o1] 1293 1294 andcc %l2,2,%g0 1295 fmovdnz %icc,%f20,%f22 1296 st %f22,[%o2] 1297 1298 andcc %l3,2,%g0 1299 fmovdnz %icc,%f28,%f30 1300 st %f30,[%o3] 1301 1302 addcc %i0,-1,%i0 1303 bg,pt %icc,.loop0 1304 ! delay slot 1305 st %f7,[%o0+4] 1306 1307 ba,pt %icc,.end 1308 ! delay slot 1309 nop 1310 1311 .align 16 1312 .case4: 1313 fmuld %f18,%f18,%f16 1314 andcc %l2,1,%g0 1315 bz,pn %icc,.case6 1316 ! delay slot 1317 fxor %f22,%f36,%f36 1318 1319 fmuld %f26,%f26,%f24 1320 andcc %l3,1,%g0 1321 bz,pn %icc,.case5 1322 ! delay slot 1323 fxor %f30,%f38,%f38 1324 1325 fmuld %f8,pp3,%f14 ! sin(x1) 1326 1327 fmuld %f0,qq3,%f6 ! cos(x0) 1328 1329 faddd %f14,pp2,%f14 1330 fmuld %f8,qq2,%f12 1331 1332 fmuld %f16,qq3,%f22 ! cos(x2) 1333 1334 fmuld %f24,qq3,%f30 ! cos(x3) 1335 1336 faddd %f6,qq2,%f6 1337 fmuld %f0,pp2,%f4 1338 1339 fmuld %f8,%f14,%f14 1340 faddd %f12,qq1,%f12 1341 1342 faddd %f22,qq2,%f22 1343 fmuld %f16,pp2,%f20 1344 1345 faddd %f30,qq2,%f30 1346 fmuld %f24,pp2,%f28 1347 1348 fmuld %f0,%f6,%f6 1349 faddd %f4,pp1,%f4 1350 1351 faddd %f14,pp1,%f14 1352 fmuld %f8,%f12,%f12 1353 add %l5,%g1,%l5 1354 1355 fmuld %f16,%f22,%f22 1356 faddd %f20,pp1,%f20 1357 1358 fmuld %f24,%f30,%f30 1359 faddd %f28,pp1,%f28 1360 1361 faddd %f6,qq1,%f6 1362 fmuld %f0,%f4,%f4 1363 add %l4,%g1,%l4 1364 1365 fmuld %f8,%f14,%f14 1366 1367 faddd %f22,qq1,%f22 1368 fmuld %f16,%f20,%f20 1369 add %l6,%g1,%l6 1370 1371 faddd %f30,qq1,%f30 1372 fmuld %f24,%f28,%f28 1373 add %l7,%g1,%l7 1374 1375 fmuld %f2,%f4,%f4 1376 1377 fmuld %f10,%f14,%f14 1378 ldd [%l5+8],%f8 1379 1380 fmuld %f18,%f20,%f20 1381 1382 fmuld %f26,%f28,%f28 1383 1384 fmuld %f0,%f6,%f6 1385 faddd %f4,%f32,%f4 1386 ldd [%l4+16],%f0 1387 1388 fmuld %f8,%f12,%f12 1389 faddd %f34,%f14,%f14 1390 1391 fmuld %f16,%f22,%f22 1392 faddd %f20,%f36,%f20 1393 ldd [%l6+16],%f16 1394 1395 fmuld %f24,%f30,%f30 1396 faddd %f28,%f38,%f28 1397 ldd [%l7+16],%f24 1398 1399 fmuld %f0,%f6,%f6 1400 faddd %f4,%f2,%f4 1401 ldd [%l4+8],%f32 1402 1403 faddd %f10,%f14,%f14 1404 ldd [%l5+16],%f34 1405 1406 fmuld %f16,%f22,%f22 1407 faddd %f20,%f18,%f20 1408 ldd [%l6+8],%f36 1409 1410 fmuld %f24,%f30,%f30 1411 faddd %f28,%f26,%f28 1412 ldd [%l7+8],%f38 1413 1414 fmuld %f32,%f4,%f4 1415 1416 fmuld %f34,%f14,%f14 1417 1418 fmuld %f36,%f20,%f20 1419 1420 fmuld %f38,%f28,%f28 1421 1422 fsubd %f6,%f4,%f6 1423 1424 faddd %f14,%f12,%f14 1425 1426 fsubd %f22,%f20,%f22 1427 1428 fsubd %f30,%f28,%f30 1429 1430 faddd %f6,%f0,%f6 1431 1432 faddd %f14,%f8,%f14 1433 1434 faddd %f22,%f16,%f22 1435 1436 faddd %f30,%f24,%f30 1437 mov %l0,%l4 1438 1439 fnegd %f6,%f4 1440 lda [%i1]%asi,%l0 ! preload next argument 1441 1442 fnegd %f14,%f12 1443 lda [%i1]%asi,%f0 1444 1445 fnegd %f22,%f20 1446 lda [%i1+4]%asi,%f3 1447 1448 fnegd %f30,%f28 1449 andn %l0,%i5,%l0 1450 add %i1,%i2,%i1 1451 1452 andcc %l4,2,%g0 1453 fmovdnz %icc,%f4,%f6 1454 st %f6,[%o0] 1455 1456 andcc %l1,2,%g0 1457 fmovdnz %icc,%f12,%f14 1458 st %f14,[%o1] 1459 1460 andcc %l2,2,%g0 1461 fmovdnz %icc,%f20,%f22 1462 st %f22,[%o2] 1463 1464 andcc %l3,2,%g0 1465 fmovdnz %icc,%f28,%f30 1466 st %f30,[%o3] 1467 1468 addcc %i0,-1,%i0 1469 bg,pt %icc,.loop0 1470 ! delay slot 1471 st %f7,[%o0+4] 1472 1473 ba,pt %icc,.end 1474 ! delay slot 1475 nop 1476 1477 .align 16 1478 .case5: 1479 fmuld %f8,pp3,%f14 ! sin(x1) 1480 1481 fmuld %f24,pp3,%f30 ! sin(x3) 1482 1483 fmuld %f0,qq3,%f6 ! cos(x0) 1484 1485 faddd %f14,pp2,%f14 1486 fmuld %f8,qq2,%f12 1487 1488 fmuld %f16,qq3,%f22 ! cos(x2) 1489 1490 faddd %f30,pp2,%f30 1491 fmuld %f24,qq2,%f28 1492 1493 faddd %f6,qq2,%f6 1494 fmuld %f0,pp2,%f4 1495 1496 fmuld %f8,%f14,%f14 1497 faddd %f12,qq1,%f12 1498 1499 faddd %f22,qq2,%f22 1500 fmuld %f16,pp2,%f20 1501 1502 fmuld %f24,%f30,%f30 1503 faddd %f28,qq1,%f28 1504 1505 fmuld %f0,%f6,%f6 1506 faddd %f4,pp1,%f4 1507 1508 faddd %f14,pp1,%f14 1509 fmuld %f8,%f12,%f12 1510 add %l5,%g1,%l5 1511 1512 fmuld %f16,%f22,%f22 1513 faddd %f20,pp1,%f20 1514 1515 faddd %f30,pp1,%f30 1516 fmuld %f24,%f28,%f28 1517 add %l7,%g1,%l7 1518 1519 faddd %f6,qq1,%f6 1520 fmuld %f0,%f4,%f4 1521 add %l4,%g1,%l4 1522 1523 fmuld %f8,%f14,%f14 1524 1525 faddd %f22,qq1,%f22 1526 fmuld %f16,%f20,%f20 1527 add %l6,%g1,%l6 1528 1529 fmuld %f24,%f30,%f30 1530 1531 fmuld %f2,%f4,%f4 1532 1533 fmuld %f10,%f14,%f14 1534 ldd [%l5+8],%f8 1535 1536 fmuld %f18,%f20,%f20 1537 1538 fmuld %f26,%f30,%f30 1539 ldd [%l7+8],%f24 1540 1541 fmuld %f0,%f6,%f6 1542 faddd %f4,%f32,%f4 1543 ldd [%l4+16],%f0 1544 1545 fmuld %f8,%f12,%f12 1546 faddd %f34,%f14,%f14 1547 1548 fmuld %f16,%f22,%f22 1549 faddd %f20,%f36,%f20 1550 ldd [%l6+16],%f16 1551 1552 fmuld %f24,%f28,%f28 1553 faddd %f38,%f30,%f30 1554 1555 fmuld %f0,%f6,%f6 1556 faddd %f4,%f2,%f4 1557 ldd [%l4+8],%f32 1558 1559 faddd %f10,%f14,%f14 1560 ldd [%l5+16],%f34 1561 1562 fmuld %f16,%f22,%f22 1563 faddd %f20,%f18,%f20 1564 ldd [%l6+8],%f36 1565 1566 faddd %f26,%f30,%f30 1567 ldd [%l7+16],%f38 1568 1569 fmuld %f32,%f4,%f4 1570 1571 fmuld %f34,%f14,%f14 1572 1573 fmuld %f36,%f20,%f20 1574 1575 fmuld %f38,%f30,%f30 1576 1577 fsubd %f6,%f4,%f6 1578 1579 faddd %f14,%f12,%f14 1580 1581 fsubd %f22,%f20,%f22 1582 1583 faddd %f30,%f28,%f30 1584 1585 faddd %f6,%f0,%f6 1586 1587 faddd %f14,%f8,%f14 1588 1589 faddd %f22,%f16,%f22 1590 1591 faddd %f30,%f24,%f30 1592 mov %l0,%l4 1593 1594 fnegd %f6,%f4 1595 lda [%i1]%asi,%l0 ! preload next argument 1596 1597 fnegd %f14,%f12 1598 lda [%i1]%asi,%f0 1599 1600 fnegd %f22,%f20 1601 lda [%i1+4]%asi,%f3 1602 1603 fnegd %f30,%f28 1604 andn %l0,%i5,%l0 1605 add %i1,%i2,%i1 1606 1607 andcc %l4,2,%g0 1608 fmovdnz %icc,%f4,%f6 1609 st %f6,[%o0] 1610 1611 andcc %l1,2,%g0 1612 fmovdnz %icc,%f12,%f14 1613 st %f14,[%o1] 1614 1615 andcc %l2,2,%g0 1616 fmovdnz %icc,%f20,%f22 1617 st %f22,[%o2] 1618 1619 andcc %l3,2,%g0 1620 fmovdnz %icc,%f28,%f30 1621 st %f30,[%o3] 1622 1623 addcc %i0,-1,%i0 1624 bg,pt %icc,.loop0 1625 ! delay slot 1626 st %f7,[%o0+4] 1627 1628 ba,pt %icc,.end 1629 ! delay slot 1630 nop 1631 1632 .align 16 1633 .case6: 1634 fmuld %f26,%f26,%f24 1635 andcc %l3,1,%g0 1636 bz,pn %icc,.case7 1637 ! delay slot 1638 fxor %f30,%f38,%f38 1639 1640 fmuld %f8,pp3,%f14 ! sin(x1) 1641 1642 fmuld %f16,pp3,%f22 ! sin(x2) 1643 1644 fmuld %f0,qq3,%f6 ! cos(x0) 1645 1646 faddd %f14,pp2,%f14 1647 fmuld %f8,qq2,%f12 1648 1649 faddd %f22,pp2,%f22 1650 fmuld %f16,qq2,%f20 1651 1652 fmuld %f24,qq3,%f30 ! cos(x3) 1653 1654 faddd %f6,qq2,%f6 1655 fmuld %f0,pp2,%f4 1656 1657 fmuld %f8,%f14,%f14 1658 faddd %f12,qq1,%f12 1659 1660 fmuld %f16,%f22,%f22 1661 faddd %f20,qq1,%f20 1662 1663 faddd %f30,qq2,%f30 1664 fmuld %f24,pp2,%f28 1665 1666 fmuld %f0,%f6,%f6 1667 faddd %f4,pp1,%f4 1668 1669 faddd %f14,pp1,%f14 1670 fmuld %f8,%f12,%f12 1671 add %l5,%g1,%l5 1672 1673 faddd %f22,pp1,%f22 1674 fmuld %f16,%f20,%f20 1675 add %l6,%g1,%l6 1676 1677 fmuld %f24,%f30,%f30 1678 faddd %f28,pp1,%f28 1679 1680 faddd %f6,qq1,%f6 1681 fmuld %f0,%f4,%f4 1682 add %l4,%g1,%l4 1683 1684 fmuld %f8,%f14,%f14 1685 1686 fmuld %f16,%f22,%f22 1687 1688 faddd %f30,qq1,%f30 1689 fmuld %f24,%f28,%f28 1690 add %l7,%g1,%l7 1691 1692 fmuld %f2,%f4,%f4 1693 1694 fmuld %f10,%f14,%f14 1695 ldd [%l5+8],%f8 1696 1697 fmuld %f18,%f22,%f22 1698 ldd [%l6+8],%f16 1699 1700 fmuld %f26,%f28,%f28 1701 1702 fmuld %f0,%f6,%f6 1703 faddd %f4,%f32,%f4 1704 ldd [%l4+16],%f0 1705 1706 fmuld %f8,%f12,%f12 1707 faddd %f34,%f14,%f14 1708 1709 fmuld %f16,%f20,%f20 1710 faddd %f36,%f22,%f22 1711 1712 fmuld %f24,%f30,%f30 1713 faddd %f28,%f38,%f28 1714 ldd [%l7+16],%f24 1715 1716 fmuld %f0,%f6,%f6 1717 faddd %f4,%f2,%f4 1718 ldd [%l4+8],%f32 1719 1720 faddd %f10,%f14,%f14 1721 ldd [%l5+16],%f34 1722 1723 faddd %f18,%f22,%f22 1724 ldd [%l6+16],%f36 1725 1726 fmuld %f24,%f30,%f30 1727 faddd %f28,%f26,%f28 1728 ldd [%l7+8],%f38 1729 1730 fmuld %f32,%f4,%f4 1731 1732 fmuld %f34,%f14,%f14 1733 1734 fmuld %f36,%f22,%f22 1735 1736 fmuld %f38,%f28,%f28 1737 1738 fsubd %f6,%f4,%f6 1739 1740 faddd %f14,%f12,%f14 1741 1742 faddd %f22,%f20,%f22 1743 1744 fsubd %f30,%f28,%f30 1745 1746 faddd %f6,%f0,%f6 1747 1748 faddd %f14,%f8,%f14 1749 1750 faddd %f22,%f16,%f22 1751 1752 faddd %f30,%f24,%f30 1753 mov %l0,%l4 1754 1755 fnegd %f6,%f4 1756 lda [%i1]%asi,%l0 ! preload next argument 1757 1758 fnegd %f14,%f12 1759 lda [%i1]%asi,%f0 1760 1761 fnegd %f22,%f20 1762 lda [%i1+4]%asi,%f3 1763 1764 fnegd %f30,%f28 1765 andn %l0,%i5,%l0 1766 add %i1,%i2,%i1 1767 1768 andcc %l4,2,%g0 1769 fmovdnz %icc,%f4,%f6 1770 st %f6,[%o0] 1771 1772 andcc %l1,2,%g0 1773 fmovdnz %icc,%f12,%f14 1774 st %f14,[%o1] 1775 1776 andcc %l2,2,%g0 1777 fmovdnz %icc,%f20,%f22 1778 st %f22,[%o2] 1779 1780 andcc %l3,2,%g0 1781 fmovdnz %icc,%f28,%f30 1782 st %f30,[%o3] 1783 1784 addcc %i0,-1,%i0 1785 bg,pt %icc,.loop0 1786 ! delay slot 1787 st %f7,[%o0+4] 1788 1789 ba,pt %icc,.end 1790 ! delay slot 1791 nop 1792 1793 .align 16 1794 .case7: 1795 fmuld %f8,pp3,%f14 ! sin(x1) 1796 1797 fmuld %f16,pp3,%f22 ! sin(x2) 1798 1799 fmuld %f24,pp3,%f30 ! sin(x3) 1800 1801 fmuld %f0,qq3,%f6 ! cos(x0) 1802 1803 faddd %f14,pp2,%f14 1804 fmuld %f8,qq2,%f12 1805 1806 faddd %f22,pp2,%f22 1807 fmuld %f16,qq2,%f20 1808 1809 faddd %f30,pp2,%f30 1810 fmuld %f24,qq2,%f28 1811 1812 faddd %f6,qq2,%f6 1813 fmuld %f0,pp2,%f4 1814 1815 fmuld %f8,%f14,%f14 1816 faddd %f12,qq1,%f12 1817 1818 fmuld %f16,%f22,%f22 1819 faddd %f20,qq1,%f20 1820 1821 fmuld %f24,%f30,%f30 1822 faddd %f28,qq1,%f28 1823 1824 fmuld %f0,%f6,%f6 1825 faddd %f4,pp1,%f4 1826 1827 faddd %f14,pp1,%f14 1828 fmuld %f8,%f12,%f12 1829 add %l5,%g1,%l5 1830 1831 faddd %f22,pp1,%f22 1832 fmuld %f16,%f20,%f20 1833 add %l6,%g1,%l6 1834 1835 faddd %f30,pp1,%f30 1836 fmuld %f24,%f28,%f28 1837 add %l7,%g1,%l7 1838 1839 faddd %f6,qq1,%f6 1840 fmuld %f0,%f4,%f4 1841 add %l4,%g1,%l4 1842 1843 fmuld %f8,%f14,%f14 1844 1845 fmuld %f16,%f22,%f22 1846 1847 fmuld %f24,%f30,%f30 1848 1849 fmuld %f2,%f4,%f4 1850 1851 fmuld %f10,%f14,%f14 1852 ldd [%l5+8],%f8 1853 1854 fmuld %f18,%f22,%f22 1855 ldd [%l6+8],%f16 1856 1857 fmuld %f26,%f30,%f30 1858 ldd [%l7+8],%f24 1859 1860 fmuld %f0,%f6,%f6 1861 faddd %f4,%f32,%f4 1862 ldd [%l4+16],%f0 1863 1864 fmuld %f8,%f12,%f12 1865 faddd %f34,%f14,%f14 1866 1867 fmuld %f16,%f20,%f20 1868 faddd %f36,%f22,%f22 1869 1870 fmuld %f24,%f28,%f28 1871 faddd %f38,%f30,%f30 1872 1873 fmuld %f0,%f6,%f6 1874 faddd %f4,%f2,%f4 1875 ldd [%l4+8],%f32 1876 1877 faddd %f10,%f14,%f14 1878 ldd [%l5+16],%f34 1879 1880 faddd %f18,%f22,%f22 1881 ldd [%l6+16],%f36 1882 1883 faddd %f26,%f30,%f30 1884 ldd [%l7+16],%f38 1885 1886 fmuld %f32,%f4,%f4 1887 1888 fmuld %f34,%f14,%f14 1889 1890 fmuld %f36,%f22,%f22 1891 1892 fmuld %f38,%f30,%f30 1893 1894 fsubd %f6,%f4,%f6 1895 1896 faddd %f14,%f12,%f14 1897 1898 faddd %f22,%f20,%f22 1899 1900 faddd %f30,%f28,%f30 1901 1902 faddd %f6,%f0,%f6 1903 1904 faddd %f14,%f8,%f14 1905 1906 faddd %f22,%f16,%f22 1907 1908 faddd %f30,%f24,%f30 1909 mov %l0,%l4 1910 1911 fnegd %f6,%f4 1912 lda [%i1]%asi,%l0 ! preload next argument 1913 1914 fnegd %f14,%f12 1915 lda [%i1]%asi,%f0 1916 1917 fnegd %f22,%f20 1918 lda [%i1+4]%asi,%f3 1919 1920 fnegd %f30,%f28 1921 andn %l0,%i5,%l0 1922 add %i1,%i2,%i1 1923 1924 andcc %l4,2,%g0 1925 fmovdnz %icc,%f4,%f6 1926 st %f6,[%o0] 1927 1928 andcc %l1,2,%g0 1929 fmovdnz %icc,%f12,%f14 1930 st %f14,[%o1] 1931 1932 andcc %l2,2,%g0 1933 fmovdnz %icc,%f20,%f22 1934 st %f22,[%o2] 1935 1936 andcc %l3,2,%g0 1937 fmovdnz %icc,%f28,%f30 1938 st %f30,[%o3] 1939 1940 addcc %i0,-1,%i0 1941 bg,pt %icc,.loop0 1942 ! delay slot 1943 st %f7,[%o0+4] 1944 1945 ba,pt %icc,.end 1946 ! delay slot 1947 nop 1948 1949 .align 16 1950 .case8: 1951 fmuld %f10,%f10,%f8 1952 andcc %l1,1,%g0 1953 bz,pn %icc,.case12 1954 ! delay slot 1955 fxor %f14,%f34,%f34 1956 1957 fmuld %f18,%f18,%f16 1958 andcc %l2,1,%g0 1959 bz,pn %icc,.case10 1960 ! delay slot 1961 fxor %f22,%f36,%f36 1962 1963 fmuld %f26,%f26,%f24 1964 andcc %l3,1,%g0 1965 bz,pn %icc,.case9 1966 ! delay slot 1967 fxor %f30,%f38,%f38 1968 1969 fmuld %f0,pp3,%f6 ! sin(x0) 1970 1971 faddd %f6,pp2,%f6 1972 fmuld %f0,qq2,%f4 1973 1974 fmuld %f8,qq3,%f14 ! cos(x1) 1975 1976 fmuld %f16,qq3,%f22 ! cos(x2) 1977 1978 fmuld %f24,qq3,%f30 ! cos(x3) 1979 1980 fmuld %f0,%f6,%f6 1981 faddd %f4,qq1,%f4 1982 1983 faddd %f14,qq2,%f14 1984 fmuld %f8,pp2,%f12 1985 1986 faddd %f22,qq2,%f22 1987 fmuld %f16,pp2,%f20 1988 1989 faddd %f30,qq2,%f30 1990 fmuld %f24,pp2,%f28 1991 1992 faddd %f6,pp1,%f6 1993 fmuld %f0,%f4,%f4 1994 add %l4,%g1,%l4 1995 1996 fmuld %f8,%f14,%f14 1997 faddd %f12,pp1,%f12 1998 1999 fmuld %f16,%f22,%f22 2000 faddd %f20,pp1,%f20 2001 2002 fmuld %f24,%f30,%f30 2003 faddd %f28,pp1,%f28 2004 2005 fmuld %f0,%f6,%f6 2006 2007 faddd %f14,qq1,%f14 2008 fmuld %f8,%f12,%f12 2009 add %l5,%g1,%l5 2010 2011 faddd %f22,qq1,%f22 2012 fmuld %f16,%f20,%f20 2013 add %l6,%g1,%l6 2014 2015 faddd %f30,qq1,%f30 2016 fmuld %f24,%f28,%f28 2017 add %l7,%g1,%l7 2018 2019 fmuld %f2,%f6,%f6 2020 ldd [%l4+8],%f0 2021 2022 fmuld %f10,%f12,%f12 2023 2024 fmuld %f18,%f20,%f20 2025 2026 fmuld %f26,%f28,%f28 2027 2028 fmuld %f0,%f4,%f4 2029 faddd %f32,%f6,%f6 2030 2031 fmuld %f8,%f14,%f14 2032 faddd %f12,%f34,%f12 2033 ldd [%l5+16],%f8 2034 2035 fmuld %f16,%f22,%f22 2036 faddd %f20,%f36,%f20 2037 ldd [%l6+16],%f16 2038 2039 fmuld %f24,%f30,%f30 2040 faddd %f28,%f38,%f28 2041 ldd [%l7+16],%f24 2042 2043 faddd %f2,%f6,%f6 2044 ldd [%l4+16],%f32 2045 2046 fmuld %f8,%f14,%f14 2047 faddd %f12,%f10,%f12 2048 ldd [%l5+8],%f34 2049 2050 fmuld %f16,%f22,%f22 2051 faddd %f20,%f18,%f20 2052 ldd [%l6+8],%f36 2053 2054 fmuld %f24,%f30,%f30 2055 faddd %f28,%f26,%f28 2056 ldd [%l7+8],%f38 2057 2058 fmuld %f32,%f6,%f6 2059 2060 fmuld %f34,%f12,%f12 2061 2062 fmuld %f36,%f20,%f20 2063 2064 fmuld %f38,%f28,%f28 2065 2066 faddd %f6,%f4,%f6 2067 2068 fsubd %f14,%f12,%f14 2069 2070 fsubd %f22,%f20,%f22 2071 2072 fsubd %f30,%f28,%f30 2073 2074 faddd %f6,%f0,%f6 2075 2076 faddd %f14,%f8,%f14 2077 2078 faddd %f22,%f16,%f22 2079 2080 faddd %f30,%f24,%f30 2081 mov %l0,%l4 2082 2083 fnegd %f6,%f4 2084 lda [%i1]%asi,%l0 ! preload next argument 2085 2086 fnegd %f14,%f12 2087 lda [%i1]%asi,%f0 2088 2089 fnegd %f22,%f20 2090 lda [%i1+4]%asi,%f3 2091 2092 fnegd %f30,%f28 2093 andn %l0,%i5,%l0 2094 add %i1,%i2,%i1 2095 2096 andcc %l4,2,%g0 2097 fmovdnz %icc,%f4,%f6 2098 st %f6,[%o0] 2099 2100 andcc %l1,2,%g0 2101 fmovdnz %icc,%f12,%f14 2102 st %f14,[%o1] 2103 2104 andcc %l2,2,%g0 2105 fmovdnz %icc,%f20,%f22 2106 st %f22,[%o2] 2107 2108 andcc %l3,2,%g0 2109 fmovdnz %icc,%f28,%f30 2110 st %f30,[%o3] 2111 2112 addcc %i0,-1,%i0 2113 bg,pt %icc,.loop0 2114 ! delay slot 2115 st %f7,[%o0+4] 2116 2117 ba,pt %icc,.end 2118 ! delay slot 2119 nop 2120 2121 .align 16 2122 .case9: 2123 fmuld %f0,pp3,%f6 ! sin(x0) 2124 2125 fmuld %f24,pp3,%f30 ! sin(x3) 2126 2127 faddd %f6,pp2,%f6 2128 fmuld %f0,qq2,%f4 2129 2130 fmuld %f8,qq3,%f14 ! cos(x1) 2131 2132 fmuld %f16,qq3,%f22 ! cos(x2) 2133 2134 faddd %f30,pp2,%f30 2135 fmuld %f24,qq2,%f28 2136 2137 fmuld %f0,%f6,%f6 2138 faddd %f4,qq1,%f4 2139 2140 faddd %f14,qq2,%f14 2141 fmuld %f8,pp2,%f12 2142 2143 faddd %f22,qq2,%f22 2144 fmuld %f16,pp2,%f20 2145 2146 fmuld %f24,%f30,%f30 2147 faddd %f28,qq1,%f28 2148 2149 faddd %f6,pp1,%f6 2150 fmuld %f0,%f4,%f4 2151 add %l4,%g1,%l4 2152 2153 fmuld %f8,%f14,%f14 2154 faddd %f12,pp1,%f12 2155 2156 fmuld %f16,%f22,%f22 2157 faddd %f20,pp1,%f20 2158 2159 faddd %f30,pp1,%f30 2160 fmuld %f24,%f28,%f28 2161 add %l7,%g1,%l7 2162 2163 fmuld %f0,%f6,%f6 2164 2165 faddd %f14,qq1,%f14 2166 fmuld %f8,%f12,%f12 2167 add %l5,%g1,%l5 2168 2169 faddd %f22,qq1,%f22 2170 fmuld %f16,%f20,%f20 2171 add %l6,%g1,%l6 2172 2173 fmuld %f24,%f30,%f30 2174 2175 fmuld %f2,%f6,%f6 2176 ldd [%l4+8],%f0 2177 2178 fmuld %f10,%f12,%f12 2179 2180 fmuld %f18,%f20,%f20 2181 2182 fmuld %f26,%f30,%f30 2183 ldd [%l7+8],%f24 2184 2185 fmuld %f0,%f4,%f4 2186 faddd %f32,%f6,%f6 2187 2188 fmuld %f8,%f14,%f14 2189 faddd %f12,%f34,%f12 2190 ldd [%l5+16],%f8 2191 2192 fmuld %f16,%f22,%f22 2193 faddd %f20,%f36,%f20 2194 ldd [%l6+16],%f16 2195 2196 fmuld %f24,%f28,%f28 2197 faddd %f38,%f30,%f30 2198 2199 faddd %f2,%f6,%f6 2200 ldd [%l4+16],%f32 2201 2202 fmuld %f8,%f14,%f14 2203 faddd %f12,%f10,%f12 2204 ldd [%l5+8],%f34 2205 2206 fmuld %f16,%f22,%f22 2207 faddd %f20,%f18,%f20 2208 ldd [%l6+8],%f36 2209 2210 faddd %f26,%f30,%f30 2211 ldd [%l7+16],%f38 2212 2213 fmuld %f32,%f6,%f6 2214 2215 fmuld %f34,%f12,%f12 2216 2217 fmuld %f36,%f20,%f20 2218 2219 fmuld %f38,%f30,%f30 2220 2221 faddd %f6,%f4,%f6 2222 2223 fsubd %f14,%f12,%f14 2224 2225 fsubd %f22,%f20,%f22 2226 2227 faddd %f30,%f28,%f30 2228 2229 faddd %f6,%f0,%f6 2230 2231 faddd %f14,%f8,%f14 2232 2233 faddd %f22,%f16,%f22 2234 2235 faddd %f30,%f24,%f30 2236 mov %l0,%l4 2237 2238 fnegd %f6,%f4 2239 lda [%i1]%asi,%l0 ! preload next argument 2240 2241 fnegd %f14,%f12 2242 lda [%i1]%asi,%f0 2243 2244 fnegd %f22,%f20 2245 lda [%i1+4]%asi,%f3 2246 2247 fnegd %f30,%f28 2248 andn %l0,%i5,%l0 2249 add %i1,%i2,%i1 2250 2251 andcc %l4,2,%g0 2252 fmovdnz %icc,%f4,%f6 2253 st %f6,[%o0] 2254 2255 andcc %l1,2,%g0 2256 fmovdnz %icc,%f12,%f14 2257 st %f14,[%o1] 2258 2259 andcc %l2,2,%g0 2260 fmovdnz %icc,%f20,%f22 2261 st %f22,[%o2] 2262 2263 andcc %l3,2,%g0 2264 fmovdnz %icc,%f28,%f30 2265 st %f30,[%o3] 2266 2267 addcc %i0,-1,%i0 2268 bg,pt %icc,.loop0 2269 ! delay slot 2270 st %f7,[%o0+4] 2271 2272 ba,pt %icc,.end 2273 ! delay slot 2274 nop 2275 2276 .align 16 2277 .case10: 2278 fmuld %f26,%f26,%f24 2279 andcc %l3,1,%g0 2280 bz,pn %icc,.case11 2281 ! delay slot 2282 fxor %f30,%f38,%f38 2283 2284 fmuld %f0,pp3,%f6 ! sin(x0) 2285 2286 fmuld %f16,pp3,%f22 ! sin(x2) 2287 2288 faddd %f6,pp2,%f6 2289 fmuld %f0,qq2,%f4 2290 2291 fmuld %f8,qq3,%f14 ! cos(x1) 2292 2293 faddd %f22,pp2,%f22 2294 fmuld %f16,qq2,%f20 2295 2296 fmuld %f24,qq3,%f30 ! cos(x3) 2297 2298 fmuld %f0,%f6,%f6 2299 faddd %f4,qq1,%f4 2300 2301 faddd %f14,qq2,%f14 2302 fmuld %f8,pp2,%f12 2303 2304 fmuld %f16,%f22,%f22 2305 faddd %f20,qq1,%f20 2306 2307 faddd %f30,qq2,%f30 2308 fmuld %f24,pp2,%f28 2309 2310 faddd %f6,pp1,%f6 2311 fmuld %f0,%f4,%f4 2312 add %l4,%g1,%l4 2313 2314 fmuld %f8,%f14,%f14 2315 faddd %f12,pp1,%f12 2316 2317 faddd %f22,pp1,%f22 2318 fmuld %f16,%f20,%f20 2319 add %l6,%g1,%l6 2320 2321 fmuld %f24,%f30,%f30 2322 faddd %f28,pp1,%f28 2323 2324 fmuld %f0,%f6,%f6 2325 2326 faddd %f14,qq1,%f14 2327 fmuld %f8,%f12,%f12 2328 add %l5,%g1,%l5 2329 2330 fmuld %f16,%f22,%f22 2331 2332 faddd %f30,qq1,%f30 2333 fmuld %f24,%f28,%f28 2334 add %l7,%g1,%l7 2335 2336 fmuld %f2,%f6,%f6 2337 ldd [%l4+8],%f0 2338 2339 fmuld %f10,%f12,%f12 2340 2341 fmuld %f18,%f22,%f22 2342 ldd [%l6+8],%f16 2343 2344 fmuld %f26,%f28,%f28 2345 2346 fmuld %f0,%f4,%f4 2347 faddd %f32,%f6,%f6 2348 2349 fmuld %f8,%f14,%f14 2350 faddd %f12,%f34,%f12 2351 ldd [%l5+16],%f8 2352 2353 fmuld %f16,%f20,%f20 2354 faddd %f36,%f22,%f22 2355 2356 fmuld %f24,%f30,%f30 2357 faddd %f28,%f38,%f28 2358 ldd [%l7+16],%f24 2359 2360 faddd %f2,%f6,%f6 2361 ldd [%l4+16],%f32 2362 2363 fmuld %f8,%f14,%f14 2364 faddd %f12,%f10,%f12 2365 ldd [%l5+8],%f34 2366 2367 faddd %f18,%f22,%f22 2368 ldd [%l6+16],%f36 2369 2370 fmuld %f24,%f30,%f30 2371 faddd %f28,%f26,%f28 2372 ldd [%l7+8],%f38 2373 2374 fmuld %f32,%f6,%f6 2375 2376 fmuld %f34,%f12,%f12 2377 2378 fmuld %f36,%f22,%f22 2379 2380 fmuld %f38,%f28,%f28 2381 2382 faddd %f6,%f4,%f6 2383 2384 fsubd %f14,%f12,%f14 2385 2386 faddd %f22,%f20,%f22 2387 2388 fsubd %f30,%f28,%f30 2389 2390 faddd %f6,%f0,%f6 2391 2392 faddd %f14,%f8,%f14 2393 2394 faddd %f22,%f16,%f22 2395 2396 faddd %f30,%f24,%f30 2397 mov %l0,%l4 2398 2399 fnegd %f6,%f4 2400 lda [%i1]%asi,%l0 ! preload next argument 2401 2402 fnegd %f14,%f12 2403 lda [%i1]%asi,%f0 2404 2405 fnegd %f22,%f20 2406 lda [%i1+4]%asi,%f3 2407 2408 fnegd %f30,%f28 2409 andn %l0,%i5,%l0 2410 add %i1,%i2,%i1 2411 2412 andcc %l4,2,%g0 2413 fmovdnz %icc,%f4,%f6 2414 st %f6,[%o0] 2415 2416 andcc %l1,2,%g0 2417 fmovdnz %icc,%f12,%f14 2418 st %f14,[%o1] 2419 2420 andcc %l2,2,%g0 2421 fmovdnz %icc,%f20,%f22 2422 st %f22,[%o2] 2423 2424 andcc %l3,2,%g0 2425 fmovdnz %icc,%f28,%f30 2426 st %f30,[%o3] 2427 2428 addcc %i0,-1,%i0 2429 bg,pt %icc,.loop0 2430 ! delay slot 2431 st %f7,[%o0+4] 2432 2433 ba,pt %icc,.end 2434 ! delay slot 2435 nop 2436 2437 .align 16 2438 .case11: 2439 fmuld %f0,pp3,%f6 ! sin(x0) 2440 2441 fmuld %f16,pp3,%f22 ! sin(x2) 2442 2443 fmuld %f24,pp3,%f30 ! sin(x3) 2444 2445 faddd %f6,pp2,%f6 2446 fmuld %f0,qq2,%f4 2447 2448 fmuld %f8,qq3,%f14 ! cos(x1) 2449 2450 faddd %f22,pp2,%f22 2451 fmuld %f16,qq2,%f20 2452 2453 faddd %f30,pp2,%f30 2454 fmuld %f24,qq2,%f28 2455 2456 fmuld %f0,%f6,%f6 2457 faddd %f4,qq1,%f4 2458 2459 faddd %f14,qq2,%f14 2460 fmuld %f8,pp2,%f12 2461 2462 fmuld %f16,%f22,%f22 2463 faddd %f20,qq1,%f20 2464 2465 fmuld %f24,%f30,%f30 2466 faddd %f28,qq1,%f28 2467 2468 faddd %f6,pp1,%f6 2469 fmuld %f0,%f4,%f4 2470 add %l4,%g1,%l4 2471 2472 fmuld %f8,%f14,%f14 2473 faddd %f12,pp1,%f12 2474 2475 faddd %f22,pp1,%f22 2476 fmuld %f16,%f20,%f20 2477 add %l6,%g1,%l6 2478 2479 faddd %f30,pp1,%f30 2480 fmuld %f24,%f28,%f28 2481 add %l7,%g1,%l7 2482 2483 fmuld %f0,%f6,%f6 2484 2485 faddd %f14,qq1,%f14 2486 fmuld %f8,%f12,%f12 2487 add %l5,%g1,%l5 2488 2489 fmuld %f16,%f22,%f22 2490 2491 fmuld %f24,%f30,%f30 2492 2493 fmuld %f2,%f6,%f6 2494 ldd [%l4+8],%f0 2495 2496 fmuld %f10,%f12,%f12 2497 2498 fmuld %f18,%f22,%f22 2499 ldd [%l6+8],%f16 2500 2501 fmuld %f26,%f30,%f30 2502 ldd [%l7+8],%f24 2503 2504 fmuld %f0,%f4,%f4 2505 faddd %f32,%f6,%f6 2506 2507 fmuld %f8,%f14,%f14 2508 faddd %f12,%f34,%f12 2509 ldd [%l5+16],%f8 2510 2511 fmuld %f16,%f20,%f20 2512 faddd %f36,%f22,%f22 2513 2514 fmuld %f24,%f28,%f28 2515 faddd %f38,%f30,%f30 2516 2517 faddd %f2,%f6,%f6 2518 ldd [%l4+16],%f32 2519 2520 fmuld %f8,%f14,%f14 2521 faddd %f12,%f10,%f12 2522 ldd [%l5+8],%f34 2523 2524 faddd %f18,%f22,%f22 2525 ldd [%l6+16],%f36 2526 2527 faddd %f26,%f30,%f30 2528 ldd [%l7+16],%f38 2529 2530 fmuld %f32,%f6,%f6 2531 2532 fmuld %f34,%f12,%f12 2533 2534 fmuld %f36,%f22,%f22 2535 2536 fmuld %f38,%f30,%f30 2537 2538 faddd %f6,%f4,%f6 2539 2540 fsubd %f14,%f12,%f14 2541 2542 faddd %f22,%f20,%f22 2543 2544 faddd %f30,%f28,%f30 2545 2546 faddd %f6,%f0,%f6 2547 2548 faddd %f14,%f8,%f14 2549 2550 faddd %f22,%f16,%f22 2551 2552 faddd %f30,%f24,%f30 2553 mov %l0,%l4 2554 2555 fnegd %f6,%f4 2556 lda [%i1]%asi,%l0 ! preload next argument 2557 2558 fnegd %f14,%f12 2559 lda [%i1]%asi,%f0 2560 2561 fnegd %f22,%f20 2562 lda [%i1+4]%asi,%f3 2563 2564 fnegd %f30,%f28 2565 andn %l0,%i5,%l0 2566 add %i1,%i2,%i1 2567 2568 andcc %l4,2,%g0 2569 fmovdnz %icc,%f4,%f6 2570 st %f6,[%o0] 2571 2572 andcc %l1,2,%g0 2573 fmovdnz %icc,%f12,%f14 2574 st %f14,[%o1] 2575 2576 andcc %l2,2,%g0 2577 fmovdnz %icc,%f20,%f22 2578 st %f22,[%o2] 2579 2580 andcc %l3,2,%g0 2581 fmovdnz %icc,%f28,%f30 2582 st %f30,[%o3] 2583 2584 addcc %i0,-1,%i0 2585 bg,pt %icc,.loop0 2586 ! delay slot 2587 st %f7,[%o0+4] 2588 2589 ba,pt %icc,.end 2590 ! delay slot 2591 nop 2592 2593 .align 16 2594 .case12: 2595 fmuld %f18,%f18,%f16 2596 andcc %l2,1,%g0 2597 bz,pn %icc,.case14 2598 ! delay slot 2599 fxor %f22,%f36,%f36 2600 2601 fmuld %f26,%f26,%f24 2602 andcc %l3,1,%g0 2603 bz,pn %icc,.case13 2604 ! delay slot 2605 fxor %f30,%f38,%f38 2606 2607 fmuld %f0,pp3,%f6 ! sin(x0) 2608 2609 fmuld %f8,pp3,%f14 ! sin(x1) 2610 2611 faddd %f6,pp2,%f6 2612 fmuld %f0,qq2,%f4 2613 2614 faddd %f14,pp2,%f14 2615 fmuld %f8,qq2,%f12 2616 2617 fmuld %f16,qq3,%f22 ! cos(x2) 2618 2619 fmuld %f24,qq3,%f30 ! cos(x3) 2620 2621 fmuld %f0,%f6,%f6 2622 faddd %f4,qq1,%f4 2623 2624 fmuld %f8,%f14,%f14 2625 faddd %f12,qq1,%f12 2626 2627 faddd %f22,qq2,%f22 2628 fmuld %f16,pp2,%f20 2629 2630 faddd %f30,qq2,%f30 2631 fmuld %f24,pp2,%f28 2632 2633 faddd %f6,pp1,%f6 2634 fmuld %f0,%f4,%f4 2635 add %l4,%g1,%l4 2636 2637 faddd %f14,pp1,%f14 2638 fmuld %f8,%f12,%f12 2639 add %l5,%g1,%l5 2640 2641 fmuld %f16,%f22,%f22 2642 faddd %f20,pp1,%f20 2643 2644 fmuld %f24,%f30,%f30 2645 faddd %f28,pp1,%f28 2646 2647 fmuld %f0,%f6,%f6 2648 2649 fmuld %f8,%f14,%f14 2650 2651 faddd %f22,qq1,%f22 2652 fmuld %f16,%f20,%f20 2653 add %l6,%g1,%l6 2654 2655 faddd %f30,qq1,%f30 2656 fmuld %f24,%f28,%f28 2657 add %l7,%g1,%l7 2658 2659 fmuld %f2,%f6,%f6 2660 ldd [%l4+8],%f0 2661 2662 fmuld %f10,%f14,%f14 2663 ldd [%l5+8],%f8 2664 2665 fmuld %f18,%f20,%f20 2666 2667 fmuld %f26,%f28,%f28 2668 2669 fmuld %f0,%f4,%f4 2670 faddd %f32,%f6,%f6 2671 2672 fmuld %f8,%f12,%f12 2673 faddd %f34,%f14,%f14 2674 2675 fmuld %f16,%f22,%f22 2676 faddd %f20,%f36,%f20 2677 ldd [%l6+16],%f16 2678 2679 fmuld %f24,%f30,%f30 2680 faddd %f28,%f38,%f28 2681 ldd [%l7+16],%f24 2682 2683 faddd %f2,%f6,%f6 2684 ldd [%l4+16],%f32 2685 2686 faddd %f10,%f14,%f14 2687 ldd [%l5+16],%f34 2688 2689 fmuld %f16,%f22,%f22 2690 faddd %f20,%f18,%f20 2691 ldd [%l6+8],%f36 2692 2693 fmuld %f24,%f30,%f30 2694 faddd %f28,%f26,%f28 2695 ldd [%l7+8],%f38 2696 2697 fmuld %f32,%f6,%f6 2698 2699 fmuld %f34,%f14,%f14 2700 2701 fmuld %f36,%f20,%f20 2702 2703 fmuld %f38,%f28,%f28 2704 2705 faddd %f6,%f4,%f6 2706 2707 faddd %f14,%f12,%f14 2708 2709 fsubd %f22,%f20,%f22 2710 2711 fsubd %f30,%f28,%f30 2712 2713 faddd %f6,%f0,%f6 2714 2715 faddd %f14,%f8,%f14 2716 2717 faddd %f22,%f16,%f22 2718 2719 faddd %f30,%f24,%f30 2720 mov %l0,%l4 2721 2722 fnegd %f6,%f4 2723 lda [%i1]%asi,%l0 ! preload next argument 2724 2725 fnegd %f14,%f12 2726 lda [%i1]%asi,%f0 2727 2728 fnegd %f22,%f20 2729 lda [%i1+4]%asi,%f3 2730 2731 fnegd %f30,%f28 2732 andn %l0,%i5,%l0 2733 add %i1,%i2,%i1 2734 2735 andcc %l4,2,%g0 2736 fmovdnz %icc,%f4,%f6 2737 st %f6,[%o0] 2738 2739 andcc %l1,2,%g0 2740 fmovdnz %icc,%f12,%f14 2741 st %f14,[%o1] 2742 2743 andcc %l2,2,%g0 2744 fmovdnz %icc,%f20,%f22 2745 st %f22,[%o2] 2746 2747 andcc %l3,2,%g0 2748 fmovdnz %icc,%f28,%f30 2749 st %f30,[%o3] 2750 2751 addcc %i0,-1,%i0 2752 bg,pt %icc,.loop0 2753 ! delay slot 2754 st %f7,[%o0+4] 2755 2756 ba,pt %icc,.end 2757 ! delay slot 2758 nop 2759 2760 .align 16 2761 .case13: 2762 fmuld %f0,pp3,%f6 ! sin(x0) 2763 2764 fmuld %f8,pp3,%f14 ! sin(x1) 2765 2766 fmuld %f24,pp3,%f30 ! sin(x3) 2767 2768 faddd %f6,pp2,%f6 2769 fmuld %f0,qq2,%f4 2770 2771 faddd %f14,pp2,%f14 2772 fmuld %f8,qq2,%f12 2773 2774 fmuld %f16,qq3,%f22 ! cos(x2) 2775 2776 faddd %f30,pp2,%f30 2777 fmuld %f24,qq2,%f28 2778 2779 fmuld %f0,%f6,%f6 2780 faddd %f4,qq1,%f4 2781 2782 fmuld %f8,%f14,%f14 2783 faddd %f12,qq1,%f12 2784 2785 faddd %f22,qq2,%f22 2786 fmuld %f16,pp2,%f20 2787 2788 fmuld %f24,%f30,%f30 2789 faddd %f28,qq1,%f28 2790 2791 faddd %f6,pp1,%f6 2792 fmuld %f0,%f4,%f4 2793 add %l4,%g1,%l4 2794 2795 faddd %f14,pp1,%f14 2796 fmuld %f8,%f12,%f12 2797 add %l5,%g1,%l5 2798 2799 fmuld %f16,%f22,%f22 2800 faddd %f20,pp1,%f20 2801 2802 faddd %f30,pp1,%f30 2803 fmuld %f24,%f28,%f28 2804 add %l7,%g1,%l7 2805 2806 fmuld %f0,%f6,%f6 2807 2808 fmuld %f8,%f14,%f14 2809 2810 faddd %f22,qq1,%f22 2811 fmuld %f16,%f20,%f20 2812 add %l6,%g1,%l6 2813 2814 fmuld %f24,%f30,%f30 2815 2816 fmuld %f2,%f6,%f6 2817 ldd [%l4+8],%f0 2818 2819 fmuld %f10,%f14,%f14 2820 ldd [%l5+8],%f8 2821 2822 fmuld %f18,%f20,%f20 2823 2824 fmuld %f26,%f30,%f30 2825 ldd [%l7+8],%f24 2826 2827 fmuld %f0,%f4,%f4 2828 faddd %f32,%f6,%f6 2829 2830 fmuld %f8,%f12,%f12 2831 faddd %f34,%f14,%f14 2832 2833 fmuld %f16,%f22,%f22 2834 faddd %f20,%f36,%f20 2835 ldd [%l6+16],%f16 2836 2837 fmuld %f24,%f28,%f28 2838 faddd %f38,%f30,%f30 2839 2840 faddd %f2,%f6,%f6 2841 ldd [%l4+16],%f32 2842 2843 faddd %f10,%f14,%f14 2844 ldd [%l5+16],%f34 2845 2846 fmuld %f16,%f22,%f22 2847 faddd %f20,%f18,%f20 2848 ldd [%l6+8],%f36 2849 2850 faddd %f26,%f30,%f30 2851 ldd [%l7+16],%f38 2852 2853 fmuld %f32,%f6,%f6 2854 2855 fmuld %f34,%f14,%f14 2856 2857 fmuld %f36,%f20,%f20 2858 2859 fmuld %f38,%f30,%f30 2860 2861 faddd %f6,%f4,%f6 2862 2863 faddd %f14,%f12,%f14 2864 2865 fsubd %f22,%f20,%f22 2866 2867 faddd %f30,%f28,%f30 2868 2869 faddd %f6,%f0,%f6 2870 2871 faddd %f14,%f8,%f14 2872 2873 faddd %f22,%f16,%f22 2874 2875 faddd %f30,%f24,%f30 2876 mov %l0,%l4 2877 2878 fnegd %f6,%f4 2879 lda [%i1]%asi,%l0 ! preload next argument 2880 2881 fnegd %f14,%f12 2882 lda [%i1]%asi,%f0 2883 2884 fnegd %f22,%f20 2885 lda [%i1+4]%asi,%f3 2886 2887 fnegd %f30,%f28 2888 andn %l0,%i5,%l0 2889 add %i1,%i2,%i1 2890 2891 andcc %l4,2,%g0 2892 fmovdnz %icc,%f4,%f6 2893 st %f6,[%o0] 2894 2895 andcc %l1,2,%g0 2896 fmovdnz %icc,%f12,%f14 2897 st %f14,[%o1] 2898 2899 andcc %l2,2,%g0 2900 fmovdnz %icc,%f20,%f22 2901 st %f22,[%o2] 2902 2903 andcc %l3,2,%g0 2904 fmovdnz %icc,%f28,%f30 2905 st %f30,[%o3] 2906 2907 addcc %i0,-1,%i0 2908 bg,pt %icc,.loop0 2909 ! delay slot 2910 st %f7,[%o0+4] 2911 2912 ba,pt %icc,.end 2913 ! delay slot 2914 nop 2915 2916 .align 16 2917 .case14: 2918 fmuld %f26,%f26,%f24 2919 andcc %l3,1,%g0 2920 bz,pn %icc,.case15 2921 ! delay slot 2922 fxor %f30,%f38,%f38 2923 2924 fmuld %f0,pp3,%f6 ! sin(x0) 2925 2926 fmuld %f8,pp3,%f14 ! sin(x1) 2927 2928 fmuld %f16,pp3,%f22 ! sin(x2) 2929 2930 faddd %f6,pp2,%f6 2931 fmuld %f0,qq2,%f4 2932 2933 faddd %f14,pp2,%f14 2934 fmuld %f8,qq2,%f12 2935 2936 faddd %f22,pp2,%f22 2937 fmuld %f16,qq2,%f20 2938 2939 fmuld %f24,qq3,%f30 ! cos(x3) 2940 2941 fmuld %f0,%f6,%f6 2942 faddd %f4,qq1,%f4 2943 2944 fmuld %f8,%f14,%f14 2945 faddd %f12,qq1,%f12 2946 2947 fmuld %f16,%f22,%f22 2948 faddd %f20,qq1,%f20 2949 2950 faddd %f30,qq2,%f30 2951 fmuld %f24,pp2,%f28 2952 2953 faddd %f6,pp1,%f6 2954 fmuld %f0,%f4,%f4 2955 add %l4,%g1,%l4 2956 2957 faddd %f14,pp1,%f14 2958 fmuld %f8,%f12,%f12 2959 add %l5,%g1,%l5 2960 2961 faddd %f22,pp1,%f22 2962 fmuld %f16,%f20,%f20 2963 add %l6,%g1,%l6 2964 2965 fmuld %f24,%f30,%f30 2966 faddd %f28,pp1,%f28 2967 2968 fmuld %f0,%f6,%f6 2969 2970 fmuld %f8,%f14,%f14 2971 2972 fmuld %f16,%f22,%f22 2973 2974 faddd %f30,qq1,%f30 2975 fmuld %f24,%f28,%f28 2976 add %l7,%g1,%l7 2977 2978 fmuld %f2,%f6,%f6 2979 ldd [%l4+8],%f0 2980 2981 fmuld %f10,%f14,%f14 2982 ldd [%l5+8],%f8 2983 2984 fmuld %f18,%f22,%f22 2985 ldd [%l6+8],%f16 2986 2987 fmuld %f26,%f28,%f28 2988 2989 fmuld %f0,%f4,%f4 2990 faddd %f32,%f6,%f6 2991 2992 fmuld %f8,%f12,%f12 2993 faddd %f34,%f14,%f14 2994 2995 fmuld %f16,%f20,%f20 2996 faddd %f36,%f22,%f22 2997 2998 fmuld %f24,%f30,%f30 2999 faddd %f28,%f38,%f28 3000 ldd [%l7+16],%f24 3001 3002 faddd %f2,%f6,%f6 3003 ldd [%l4+16],%f32 3004 3005 faddd %f10,%f14,%f14 3006 ldd [%l5+16],%f34 3007 3008 faddd %f18,%f22,%f22 3009 ldd [%l6+16],%f36 3010 3011 fmuld %f24,%f30,%f30 3012 faddd %f28,%f26,%f28 3013 ldd [%l7+8],%f38 3014 3015 fmuld %f32,%f6,%f6 3016 3017 fmuld %f34,%f14,%f14 3018 3019 fmuld %f36,%f22,%f22 3020 3021 fmuld %f38,%f28,%f28 3022 3023 faddd %f6,%f4,%f6 3024 3025 faddd %f14,%f12,%f14 3026 3027 faddd %f22,%f20,%f22 3028 3029 fsubd %f30,%f28,%f30 3030 3031 faddd %f6,%f0,%f6 3032 3033 faddd %f14,%f8,%f14 3034 3035 faddd %f22,%f16,%f22 3036 3037 faddd %f30,%f24,%f30 3038 mov %l0,%l4 3039 3040 fnegd %f6,%f4 3041 lda [%i1]%asi,%l0 ! preload next argument 3042 3043 fnegd %f14,%f12 3044 lda [%i1]%asi,%f0 3045 3046 fnegd %f22,%f20 3047 lda [%i1+4]%asi,%f3 3048 3049 fnegd %f30,%f28 3050 andn %l0,%i5,%l0 3051 add %i1,%i2,%i1 3052 3053 andcc %l4,2,%g0 3054 fmovdnz %icc,%f4,%f6 3055 st %f6,[%o0] 3056 3057 andcc %l1,2,%g0 3058 fmovdnz %icc,%f12,%f14 3059 st %f14,[%o1] 3060 3061 andcc %l2,2,%g0 3062 fmovdnz %icc,%f20,%f22 3063 st %f22,[%o2] 3064 3065 andcc %l3,2,%g0 3066 fmovdnz %icc,%f28,%f30 3067 st %f30,[%o3] 3068 3069 addcc %i0,-1,%i0 3070 bg,pt %icc,.loop0 3071 ! delay slot 3072 st %f7,[%o0+4] 3073 3074 ba,pt %icc,.end 3075 ! delay slot 3076 nop 3077 3078 .align 16 3079 .case15: 3080 fmuld %f0,pp3,%f6 ! sin(x0) 3081 3082 fmuld %f8,pp3,%f14 ! sin(x1) 3083 3084 fmuld %f16,pp3,%f22 ! sin(x2) 3085 3086 fmuld %f24,pp3,%f30 ! sin(x3) 3087 3088 faddd %f6,pp2,%f6 3089 fmuld %f0,qq2,%f4 3090 3091 faddd %f14,pp2,%f14 3092 fmuld %f8,qq2,%f12 3093 3094 faddd %f22,pp2,%f22 3095 fmuld %f16,qq2,%f20 3096 3097 faddd %f30,pp2,%f30 3098 fmuld %f24,qq2,%f28 3099 3100 fmuld %f0,%f6,%f6 3101 faddd %f4,qq1,%f4 3102 3103 fmuld %f8,%f14,%f14 3104 faddd %f12,qq1,%f12 3105 3106 fmuld %f16,%f22,%f22 3107 faddd %f20,qq1,%f20 3108 3109 fmuld %f24,%f30,%f30 3110 faddd %f28,qq1,%f28 3111 3112 faddd %f6,pp1,%f6 3113 fmuld %f0,%f4,%f4 3114 add %l4,%g1,%l4 3115 3116 faddd %f14,pp1,%f14 3117 fmuld %f8,%f12,%f12 3118 add %l5,%g1,%l5 3119 3120 faddd %f22,pp1,%f22 3121 fmuld %f16,%f20,%f20 3122 add %l6,%g1,%l6 3123 3124 faddd %f30,pp1,%f30 3125 fmuld %f24,%f28,%f28 3126 add %l7,%g1,%l7 3127 3128 fmuld %f0,%f6,%f6 3129 3130 fmuld %f8,%f14,%f14 3131 3132 fmuld %f16,%f22,%f22 3133 3134 fmuld %f24,%f30,%f30 3135 3136 fmuld %f2,%f6,%f6 3137 ldd [%l4+8],%f0 3138 3139 fmuld %f10,%f14,%f14 3140 ldd [%l5+8],%f8 3141 3142 fmuld %f18,%f22,%f22 3143 ldd [%l6+8],%f16 3144 3145 fmuld %f26,%f30,%f30 3146 ldd [%l7+8],%f24 3147 3148 fmuld %f0,%f4,%f4 3149 faddd %f32,%f6,%f6 3150 3151 fmuld %f8,%f12,%f12 3152 faddd %f34,%f14,%f14 3153 3154 fmuld %f16,%f20,%f20 3155 faddd %f36,%f22,%f22 3156 3157 fmuld %f24,%f28,%f28 3158 faddd %f38,%f30,%f30 3159 3160 faddd %f2,%f6,%f6 3161 ldd [%l4+16],%f32 3162 3163 faddd %f10,%f14,%f14 3164 ldd [%l5+16],%f34 3165 3166 faddd %f18,%f22,%f22 3167 ldd [%l6+16],%f36 3168 3169 faddd %f26,%f30,%f30 3170 ldd [%l7+16],%f38 3171 3172 fmuld %f32,%f6,%f6 3173 3174 fmuld %f34,%f14,%f14 3175 3176 fmuld %f36,%f22,%f22 3177 3178 fmuld %f38,%f30,%f30 3179 3180 faddd %f6,%f4,%f6 3181 3182 faddd %f14,%f12,%f14 3183 3184 faddd %f22,%f20,%f22 3185 3186 faddd %f30,%f28,%f30 3187 3188 faddd %f6,%f0,%f6 3189 3190 faddd %f14,%f8,%f14 3191 3192 faddd %f22,%f16,%f22 3193 3194 faddd %f30,%f24,%f30 3195 mov %l0,%l4 3196 3197 fnegd %f6,%f4 3198 lda [%i1]%asi,%l0 ! preload next argument 3199 3200 fnegd %f14,%f12 3201 lda [%i1]%asi,%f0 3202 3203 fnegd %f22,%f20 3204 lda [%i1+4]%asi,%f3 3205 3206 fnegd %f30,%f28 3207 andn %l0,%i5,%l0 3208 add %i1,%i2,%i1 3209 3210 andcc %l4,2,%g0 3211 fmovdnz %icc,%f4,%f6 3212 st %f6,[%o0] 3213 3214 andcc %l1,2,%g0 3215 fmovdnz %icc,%f12,%f14 3216 st %f14,[%o1] 3217 3218 andcc %l2,2,%g0 3219 fmovdnz %icc,%f20,%f22 3220 st %f22,[%o2] 3221 3222 andcc %l3,2,%g0 3223 fmovdnz %icc,%f28,%f30 3224 st %f30,[%o3] 3225 3226 addcc %i0,-1,%i0 3227 bg,pt %icc,.loop0 3228 ! delay slot 3229 st %f7,[%o0+4] 3230 3231 ba,pt %icc,.end 3232 ! delay slot 3233 nop 3234 3235 3236 .align 16 3237 .end: 3238 st %f15,[%o1+4] 3239 st %f23,[%o2+4] 3240 st %f31,[%o3+4] 3241 ld [%fp+biguns],%i5 3242 tst %i5 ! check for huge arguments remaining 3243 be,pt %icc,.exit 3244 ! delay slot 3245 nop 3246 #ifdef __sparcv9 3247 ldx [%fp+xsave],%o1 3248 ldx [%fp+ysave],%o3 3249 #else 3250 ld [%fp+xsave],%o1 3251 ld [%fp+ysave],%o3 3252 #endif 3253 ld [%fp+nsave],%o0 3254 ld [%fp+sxsave],%o2 3255 ld [%fp+sysave],%o4 3256 sra %o2,0,%o2 ! sign-extend for V9 3257 sra %o4,0,%o4 3258 call __vlibm_vsin_big_ultra3 3259 sra %o5,0,%o5 ! delay slot 3260 3261 .exit: 3262 ret 3263 restore 3264 3265 3266 .align 16 3267 .last1: 3268 faddd %f2,c3two44,%f4 3269 st %f15,[%o1+4] 3270 .last1_from_range1: 3271 mov 0,%l1 3272 fzeros %f8 3273 fzero %f10 3274 add %fp,junk,%o1 3275 .last2: 3276 faddd %f10,c3two44,%f12 3277 st %f23,[%o2+4] 3278 .last2_from_range2: 3279 mov 0,%l2 3280 fzeros %f16 3281 fzero %f18 3282 add %fp,junk,%o2 3283 .last3: 3284 faddd %f18,c3two44,%f20 3285 st %f31,[%o3+4] 3286 st %f5,[%fp+nk0] 3287 st %f13,[%fp+nk1] 3288 .last3_from_range3: 3289 mov 0,%l3 3290 fzeros %f24 3291 fzero %f26 3292 ba,pt %icc,.cont 3293 ! delay slot 3294 add %fp,junk,%o3 3295 3296 3297 .align 16 3298 .range0: 3299 cmp %l0,%o4 3300 bl,pt %icc,1f ! hx < 0x3e400000 3301 ! delay slot, harmless if branch taken 3302 sethi %hi(0x7ff00000),%o7 3303 cmp %l0,%o7 3304 bl,a,pt %icc,2f ! branch if finite 3305 ! delay slot, squashed if branch not taken 3306 st %o4,[%fp+biguns] ! set biguns 3307 fzero %f0 3308 fmuld %f2,%f0,%f2 3309 st %f2,[%o0] 3310 ba,pt %icc,2f 3311 ! delay slot 3312 st %f3,[%o0+4] 3313 1: 3314 fdtoi %f2,%f4 ! raise inexact if not zero 3315 st %f0,[%o0] 3316 st %f3,[%o0+4] 3317 2: 3318 addcc %i0,-1,%i0 3319 ble,pn %icc,.end 3320 ! delay slot, harmless if branch taken 3321 add %i3,%i4,%i3 ! y += stridey 3322 andn %l1,%i5,%l0 ! hx &= ~0x80000000 3323 fmovs %f8,%f0 3324 fmovs %f11,%f3 3325 ba,pt %icc,.loop0 3326 ! delay slot 3327 add %i1,%i2,%i1 ! x += stridex 3328 3329 3330 .align 16 3331 .range1: 3332 cmp %l1,%o4 3333 bl,pt %icc,1f ! hx < 0x3e400000 3334 ! delay slot, harmless if branch taken 3335 sethi %hi(0x7ff00000),%o7 3336 cmp %l1,%o7 3337 bl,a,pt %icc,2f ! branch if finite 3338 ! delay slot, squashed if branch not taken 3339 st %o4,[%fp+biguns] ! set biguns 3340 fzero %f8 3341 fmuld %f10,%f8,%f10 3342 st %f10,[%o1] 3343 ba,pt %icc,2f 3344 ! delay slot 3345 st %f11,[%o1+4] 3346 1: 3347 fdtoi %f10,%f12 ! raise inexact if not zero 3348 st %f8,[%o1] 3349 st %f11,[%o1+4] 3350 2: 3351 addcc %i0,-1,%i0 3352 ble,pn %icc,.last1_from_range1 3353 ! delay slot, harmless if branch taken 3354 add %i3,%i4,%i3 ! y += stridey 3355 andn %l2,%i5,%l1 ! hx &= ~0x80000000 3356 fmovs %f16,%f8 3357 fmovs %f19,%f11 3358 ba,pt %icc,.loop1 3359 ! delay slot 3360 add %i1,%i2,%i1 ! x += stridex 3361 3362 3363 .align 16 3364 .range2: 3365 cmp %l2,%o4 3366 bl,pt %icc,1f ! hx < 0x3e400000 3367 ! delay slot, harmless if branch taken 3368 sethi %hi(0x7ff00000),%o7 3369 cmp %l2,%o7 3370 bl,a,pt %icc,2f ! branch if finite 3371 ! delay slot, squashed if branch not taken 3372 st %o4,[%fp+biguns] ! set biguns 3373 fzero %f16 3374 fmuld %f18,%f16,%f18 3375 st %f18,[%o2] 3376 ba,pt %icc,2f 3377 ! delay slot 3378 st %f19,[%o2+4] 3379 1: 3380 fdtoi %f18,%f20 ! raise inexact if not zero 3381 st %f16,[%o2] 3382 st %f19,[%o2+4] 3383 2: 3384 addcc %i0,-1,%i0 3385 ble,pn %icc,.last2_from_range2 3386 ! delay slot, harmless if branch taken 3387 add %i3,%i4,%i3 ! y += stridey 3388 andn %l3,%i5,%l2 ! hx &= ~0x80000000 3389 fmovs %f24,%f16 3390 fmovs %f27,%f19 3391 ba,pt %icc,.loop2 3392 ! delay slot 3393 add %i1,%i2,%i1 ! x += stridex 3394 3395 3396 .align 16 3397 .range3: 3398 cmp %l3,%o4 3399 bl,pt %icc,1f ! hx < 0x3e400000 3400 ! delay slot, harmless if branch taken 3401 sethi %hi(0x7ff00000),%o7 3402 cmp %l3,%o7 3403 bl,a,pt %icc,2f ! branch if finite 3404 ! delay slot, squashed if branch not taken 3405 st %o4,[%fp+biguns] ! set biguns 3406 fzero %f24 3407 fmuld %f26,%f24,%f26 3408 st %f26,[%o3] 3409 ba,pt %icc,2f 3410 ! delay slot 3411 st %f27,[%o3+4] 3412 1: 3413 fdtoi %f26,%f28 ! raise inexact if not zero 3414 st %f24,[%o3] 3415 st %f27,[%o3+4] 3416 2: 3417 addcc %i0,-1,%i0 3418 ble,pn %icc,.last3_from_range3 3419 ! delay slot, harmless if branch taken 3420 add %i3,%i4,%i3 ! y += stridey 3421 ld [%i1],%l3 3422 ld [%i1],%f24 3423 ld [%i1+4],%f27 3424 andn %l3,%i5,%l3 ! hx &= ~0x80000000 3425 ba,pt %icc,.loop3 3426 ! delay slot 3427 add %i1,%i2,%i1 ! x += stridex 3428 3429 SET_SIZE(__vsin_ultra3) 3430