1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vsincos.S" 30 31 #include "libm.h" 32 33 RO_DATA 34 .align 64 35 constants: 36 .word 0x42c80000,0x00000000 ! 3 * 2^44 37 .word 0x43380000,0x00000000 ! 3 * 2^51 38 .word 0x3fe45f30,0x6dc9c883 ! invpio2 39 .word 0x3ff921fb,0x54442c00 ! pio2_1 40 .word 0x3d318469,0x898cc400 ! pio2_2 41 .word 0x3a71701b,0x839a2520 ! pio2_3 42 .word 0xbfc55555,0x55555533 ! pp1 43 .word 0x3f811111,0x10e7d53b ! pp2 44 .word 0xbf2a0167,0xe6b3cf9b ! pp3 45 .word 0xbfdfffff,0xffffff65 ! qq1 46 .word 0x3fa55555,0x54f88ed0 ! qq2 47 .word 0xbf56c12c,0xdd185f60 ! qq3 48 49 ! local storage indices 50 51 #define xsave STACK_BIAS-0x8 52 #define ssave STACK_BIAS-0x10 53 #define csave STACK_BIAS-0x18 54 #define nsave STACK_BIAS-0x1c 55 #define sxsave STACK_BIAS-0x20 56 #define sssave STACK_BIAS-0x24 57 #define biguns STACK_BIAS-0x28 58 #define junk STACK_BIAS-0x30 59 #define nk2 STACK_BIAS-0x38 60 #define nk1 STACK_BIAS-0x3c 61 #define nk0 STACK_BIAS-0x40 62 ! sizeof temp storage - must be a multiple of 16 for V9 63 #define tmps 0x40 64 65 ! register use 66 67 ! i0 n 68 ! i1 x 69 ! i2 stridex 70 ! i3 s 71 ! i4 strides 72 ! i5 0x80000000,n0 73 74 ! l0 hx0,k0 75 ! l1 hx1,k1 76 ! l2 hx2,k2 77 ! l3 c 78 ! l4 pc0 79 ! l5 pc1 80 ! l6 pc2 81 ! l7 stridec 82 83 ! the following are 64-bit registers in both V8+ and V9 84 85 ! g1 __vlibm_TBL_sincos2 86 ! g5 scratch,n1 87 88 ! o0 ps0 89 ! o1 ps1 90 ! o2 ps2 91 ! o3 0x3fe921fb 92 ! o4 0x3e400000 93 ! o5 0x4099251e 94 ! o7 scratch,n2 95 96 ! f0 x0,z0 97 ! f2 abs(x0) 98 ! f4 99 ! f6 100 ! f8 101 ! f10 x1,z1 102 ! f12 abs(x1) 103 ! f14 104 ! f16 105 ! f18 106 ! f20 x2,z2 107 ! f22 abs(x2) 108 ! f24 109 ! f26 110 ! f28 111 ! f30 112 ! f32 113 ! f34 114 ! f36 115 ! f38 116 117 #define c3two44 %f40 118 #define c3two51 %f42 119 #define invpio2 %f44 120 #define pio2_1 %f46 121 #define pio2_2 %f48 122 #define pio2_3 %f50 123 #define pp1 %f52 124 #define pp2 %f54 125 #define pp3 %f56 126 #define qq1 %f58 127 #define qq2 %f60 128 #define qq3 %f62 129 130 ENTRY(__vsincos) 131 save %sp,-SA(MINFRAME)-tmps,%sp 132 PIC_SETUP(l7) 133 PIC_SET(l7,constants,o0) 134 PIC_SET(l7,__vlibm_TBL_sincos2,o1) 135 mov %o1,%g1 136 wr %g0,0x82,%asi ! set %asi for non-faulting loads 137 #ifdef __sparcv9 138 stx %i1,[%fp+xsave] ! save arguments 139 stx %i3,[%fp+ssave] 140 stx %i5,[%fp+csave] 141 ldx [%fp+STACK_BIAS+0xb0],%l7 142 #else 143 st %i1,[%fp+xsave] ! save arguments 144 st %i3,[%fp+ssave] 145 st %i5,[%fp+csave] 146 ld [%fp+0x5c],%l7 147 #endif 148 st %i0,[%fp+nsave] 149 st %i2,[%fp+sxsave] 150 st %i4,[%fp+sssave] 151 mov %i5,%l3 152 st %g0,[%fp+biguns] ! biguns = 0 153 ldd [%o0+0x00],c3two44 ! load/set up constants 154 ldd [%o0+0x08],c3two51 155 ldd [%o0+0x10],invpio2 156 ldd [%o0+0x18],pio2_1 157 ldd [%o0+0x20],pio2_2 158 ldd [%o0+0x28],pio2_3 159 ldd [%o0+0x30],pp1 160 ldd [%o0+0x38],pp2 161 ldd [%o0+0x40],pp3 162 ldd [%o0+0x48],qq1 163 ldd [%o0+0x50],qq2 164 ldd [%o0+0x58],qq3 165 sethi %hi(0x80000000),%i5 166 sethi %hi(0x3e400000),%o4 167 sethi %hi(0x3fe921fb),%o3 168 or %o3,%lo(0x3fe921fb),%o3 169 sethi %hi(0x4099251e),%o5 170 or %o5,%lo(0x4099251e),%o5 171 sll %i2,3,%i2 ! scale strides 172 sll %i4,3,%i4 173 sll %l7,3,%l7 174 add %fp,junk,%o0 ! loop prologue 175 add %fp,junk,%o1 176 add %fp,junk,%o2 177 ld [%i1],%l0 ! *x 178 ld [%i1],%f0 179 ld [%i1+4],%f3 180 andn %l0,%i5,%l0 ! mask off sign 181 ba .loop0 182 add %i1,%i2,%i1 ! x += stridex 183 184 ! 16-byte aligned 185 .align 16 186 .loop0: 187 lda [%i1]%asi,%l1 ! preload next argument 188 sub %l0,%o4,%g5 189 sub %o5,%l0,%o7 190 fabss %f0,%f2 191 192 lda [%i1]%asi,%f10 193 orcc %o7,%g5,%g0 194 mov %i3,%o0 ! ps0 = s 195 bl,pn %icc,.range0 ! hx < 0x3e400000 or hx > 0x4099251e 196 197 ! delay slot 198 lda [%i1+4]%asi,%f13 199 addcc %i0,-1,%i0 200 add %i3,%i4,%i3 ! s += strides 201 202 mov %l3,%l4 ! pc0 = c 203 add %l3,%l7,%l3 ! c += stridec 204 ble,pn %icc,.last1 205 206 ! delay slot 207 andn %l1,%i5,%l1 208 add %i1,%i2,%i1 ! x += stridex 209 faddd %f2,c3two44,%f4 210 st %f17,[%o1+4] 211 212 .loop1: 213 lda [%i1]%asi,%l2 ! preload next argument 214 sub %l1,%o4,%g5 215 sub %o5,%l1,%o7 216 fabss %f10,%f12 217 218 lda [%i1]%asi,%f20 219 orcc %o7,%g5,%g0 220 mov %i3,%o1 ! ps1 = s 221 bl,pn %icc,.range1 ! hx < 0x3e400000 or hx > 0x4099251e 222 223 ! delay slot 224 lda [%i1+4]%asi,%f23 225 addcc %i0,-1,%i0 226 add %i3,%i4,%i3 ! s += strides 227 228 mov %l3,%l5 ! pc1 = c 229 add %l3,%l7,%l3 ! c += stridec 230 ble,pn %icc,.last2 231 232 ! delay slot 233 andn %l2,%i5,%l2 234 add %i1,%i2,%i1 ! x += stridex 235 faddd %f12,c3two44,%f14 236 st %f27,[%o2+4] 237 238 .loop2: 239 sub %l2,%o4,%g5 240 sub %o5,%l2,%o7 241 fabss %f20,%f22 242 st %f5,[%fp+nk0] 243 244 orcc %o7,%g5,%g0 245 mov %i3,%o2 ! ps2 = s 246 bl,pn %icc,.range2 ! hx < 0x3e400000 or hx > 0x4099251e 247 ! delay slot 248 st %f15,[%fp+nk1] 249 250 mov %l3,%l6 ! pc2 = c 251 252 .cont: 253 add %i3,%i4,%i3 ! s += strides 254 add %l3,%l7,%l3 ! c += stridec 255 faddd %f22,c3two44,%f24 256 st %f25,[%fp+nk2] 257 258 sub %o3,%l0,%l0 259 sub %o3,%l1,%l1 260 fmovs %f3,%f1 261 262 sub %o3,%l2,%l2 263 fmovs %f13,%f11 264 265 or %l0,%l1,%l0 266 orcc %l0,%l2,%g0 267 fmovs %f23,%f21 268 269 fmuld %f0,invpio2,%f6 ! x * invpio2, for medium range 270 271 fmuld %f10,invpio2,%f16 272 ld [%fp+nk0],%l0 273 274 fmuld %f20,invpio2,%f26 275 ld [%fp+nk1],%l1 276 277 bl,pn %icc,.medium 278 ! delay slot 279 ld [%fp+nk2],%l2 280 281 sll %l0,5,%l0 ! k 282 fcmpd %fcc0,%f0,pio2_3 ! x < pio2_3 iff x < 0 283 284 sll %l1,5,%l1 285 ldd [%l0+%g1],%f4 286 fcmpd %fcc1,%f10,pio2_3 287 288 sll %l2,5,%l2 289 ldd [%l1+%g1],%f14 290 fcmpd %fcc2,%f20,pio2_3 291 292 ldd [%l2+%g1],%f24 293 294 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] 295 296 fsubd %f12,%f14,%f12 297 298 fsubd %f22,%f24,%f22 299 300 fmuld %f2,%f2,%f0 ! z = x * x 301 302 fmuld %f12,%f12,%f10 303 304 fmuld %f22,%f22,%f20 305 306 fmuld %f0,pp3,%f6 307 308 fmuld %f10,pp3,%f16 309 310 fmuld %f20,pp3,%f26 311 312 faddd %f6,pp2,%f6 313 fmuld %f0,qq3,%f4 314 315 faddd %f16,pp2,%f16 316 fmuld %f10,qq3,%f14 317 318 faddd %f26,pp2,%f26 319 fmuld %f20,qq3,%f24 320 321 fmuld %f0,%f6,%f6 322 faddd %f4,qq2,%f4 323 324 fmuld %f10,%f16,%f16 325 faddd %f14,qq2,%f14 326 327 fmuld %f20,%f26,%f26 328 faddd %f24,qq2,%f24 329 330 faddd %f6,pp1,%f6 331 fmuld %f0,%f4,%f4 332 add %l0,%g1,%l0 333 334 faddd %f16,pp1,%f16 335 fmuld %f10,%f14,%f14 336 add %l1,%g1,%l1 337 338 faddd %f26,pp1,%f26 339 fmuld %f20,%f24,%f24 340 add %l2,%g1,%l2 341 342 fmuld %f0,%f6,%f6 343 faddd %f4,qq1,%f4 344 345 fmuld %f10,%f16,%f16 346 faddd %f14,qq1,%f14 347 348 fmuld %f20,%f26,%f26 349 faddd %f24,qq1,%f24 350 351 fmuld %f2,%f6,%f6 352 ldd [%l0+8],%f8 353 354 fmuld %f12,%f16,%f16 355 ldd [%l1+8],%f18 356 357 fmuld %f22,%f26,%f26 358 ldd [%l2+8],%f28 359 360 faddd %f6,%f2,%f6 361 fmuld %f0,%f4,%f4 362 ldd [%l0+16],%f30 363 364 faddd %f16,%f12,%f16 365 fmuld %f10,%f14,%f14 366 ldd [%l1+16],%f32 367 368 faddd %f26,%f22,%f26 369 fmuld %f20,%f24,%f24 370 ldd [%l2+16],%f34 371 372 fmuld %f8,%f6,%f0 ! s * spoly 373 374 fmuld %f18,%f16,%f10 375 376 fmuld %f28,%f26,%f20 377 378 fmuld %f30,%f4,%f2 ! c * cpoly 379 380 fmuld %f32,%f14,%f12 381 382 fmuld %f34,%f24,%f22 383 384 fmuld %f30,%f6,%f6 ! c * spoly 385 fsubd %f2,%f0,%f2 386 387 fmuld %f32,%f16,%f16 388 fsubd %f12,%f10,%f12 389 390 fmuld %f34,%f26,%f26 391 fsubd %f22,%f20,%f22 392 393 fmuld %f8,%f4,%f4 ! s * cpoly 394 faddd %f2,%f30,%f2 395 st %f2,[%l4] 396 397 fmuld %f18,%f14,%f14 398 faddd %f12,%f32,%f12 399 st %f3,[%l4+4] 400 401 fmuld %f28,%f24,%f24 402 faddd %f22,%f34,%f22 403 st %f12,[%l5] 404 405 faddd %f6,%f4,%f6 406 st %f13,[%l5+4] 407 408 faddd %f16,%f14,%f16 409 st %f22,[%l6] 410 411 faddd %f26,%f24,%f26 412 st %f23,[%l6+4] 413 414 faddd %f6,%f8,%f6 415 416 faddd %f16,%f18,%f16 417 418 faddd %f26,%f28,%f26 419 420 fnegd %f6,%f4 421 lda [%i1]%asi,%l0 ! preload next argument 422 423 fnegd %f16,%f14 424 lda [%i1]%asi,%f0 425 426 fnegd %f26,%f24 427 lda [%i1+4]%asi,%f3 428 andn %l0,%i5,%l0 429 add %i1,%i2,%i1 430 431 fmovdl %fcc0,%f4,%f6 ! (hx < -0)? -s : s 432 st %f6,[%o0] 433 434 fmovdl %fcc1,%f14,%f16 435 st %f16,[%o1] 436 437 fmovdl %fcc2,%f24,%f26 438 st %f26,[%o2] 439 addcc %i0,-1,%i0 440 441 bg,pt %icc,.loop0 442 ! delay slot 443 st %f7,[%o0+4] 444 445 ba,pt %icc,.end 446 ! delay slot 447 nop 448 449 450 .align 16 451 .medium: 452 faddd %f6,c3two51,%f4 453 st %f5,[%fp+nk0] 454 455 faddd %f16,c3two51,%f14 456 st %f15,[%fp+nk1] 457 458 faddd %f26,c3two51,%f24 459 st %f25,[%fp+nk2] 460 461 fsubd %f4,c3two51,%f6 462 463 fsubd %f14,c3two51,%f16 464 465 fsubd %f24,c3two51,%f26 466 467 fmuld %f6,pio2_1,%f2 468 ld [%fp+nk0],%i5 ! n 469 470 fmuld %f16,pio2_1,%f12 471 ld [%fp+nk1],%g5 472 473 fmuld %f26,pio2_1,%f22 474 ld [%fp+nk2],%o7 475 476 fsubd %f0,%f2,%f0 477 fmuld %f6,pio2_2,%f4 478 mov %o0,%o4 ! if (n & 1) swap ps, pc 479 andcc %i5,1,%g0 480 481 fsubd %f10,%f12,%f10 482 fmuld %f16,pio2_2,%f14 483 movnz %icc,%l4,%o0 484 and %i5,3,%i5 485 486 fsubd %f20,%f22,%f20 487 fmuld %f26,pio2_2,%f24 488 movnz %icc,%o4,%l4 489 490 fsubd %f0,%f4,%f30 491 mov %o1,%o4 492 andcc %g5,1,%g0 493 494 fsubd %f10,%f14,%f32 495 movnz %icc,%l5,%o1 496 and %g5,3,%g5 497 498 fsubd %f20,%f24,%f34 499 movnz %icc,%o4,%l5 500 501 fsubd %f0,%f30,%f0 502 fcmple32 %f30,pio2_3,%l0 ! x <= pio2_3 iff x < 0 503 mov %o2,%o4 504 andcc %o7,1,%g0 505 506 fsubd %f10,%f32,%f10 507 fcmple32 %f32,pio2_3,%l1 508 movnz %icc,%l6,%o2 509 and %o7,3,%o7 510 511 fsubd %f20,%f34,%f20 512 fcmple32 %f34,pio2_3,%l2 513 movnz %icc,%o4,%l6 514 515 fsubd %f0,%f4,%f0 516 fmuld %f6,pio2_3,%f6 517 add %i5,1,%o4 ! n = (n >> 1) | (((n + 1) ^ l) & 2) 518 srl %i5,1,%i5 519 520 fsubd %f10,%f14,%f10 521 fmuld %f16,pio2_3,%f16 522 xor %o4,%l0,%o4 523 524 fsubd %f20,%f24,%f20 525 fmuld %f26,pio2_3,%f26 526 and %o4,2,%o4 527 528 fsubd %f6,%f0,%f6 529 or %i5,%o4,%i5 530 531 fsubd %f16,%f10,%f16 532 add %g5,1,%o4 533 srl %g5,1,%g5 534 535 fsubd %f26,%f20,%f26 536 xor %o4,%l1,%o4 537 538 fsubd %f30,%f6,%f0 ! reduced x 539 and %o4,2,%o4 540 541 fsubd %f32,%f16,%f10 542 or %g5,%o4,%g5 543 544 fsubd %f34,%f26,%f20 545 add %o7,1,%o4 546 srl %o7,1,%o7 547 548 fzero %f38 549 xor %o4,%l2,%o4 550 551 fabsd %f0,%f2 552 and %o4,2,%o4 553 554 fabsd %f10,%f12 555 or %o7,%o4,%o7 556 557 fabsd %f20,%f22 558 sethi %hi(0x3e400000),%o4 559 560 fnegd %f38,%f38 561 562 faddd %f2,c3two44,%f4 563 st %f5,[%fp+nk0] 564 565 faddd %f12,c3two44,%f14 566 st %f15,[%fp+nk1] 567 568 faddd %f22,c3two44,%f24 569 st %f25,[%fp+nk2] 570 571 fsubd %f30,%f0,%f4 572 573 fsubd %f32,%f10,%f14 574 575 fsubd %f34,%f20,%f24 576 577 fsubd %f4,%f6,%f6 ! w 578 ld [%fp+nk0],%l0 579 580 fsubd %f14,%f16,%f16 581 ld [%fp+nk1],%l1 582 583 fsubd %f24,%f26,%f26 584 ld [%fp+nk2],%l2 585 sll %l0,5,%l0 ! k 586 587 fand %f0,%f38,%f30 ! sign bit of x 588 ldd [%l0+%g1],%f4 589 sll %l1,5,%l1 590 591 fand %f10,%f38,%f32 592 ldd [%l1+%g1],%f14 593 sll %l2,5,%l2 594 595 fand %f20,%f38,%f34 596 ldd [%l2+%g1],%f24 597 598 fsubd %f2,%f4,%f2 ! x -= __vlibm_TBL_sincos2[k] 599 600 fsubd %f12,%f14,%f12 601 602 fsubd %f22,%f24,%f22 603 604 fmuld %f2,%f2,%f0 ! z = x * x 605 fxor %f6,%f30,%f30 606 607 fmuld %f12,%f12,%f10 608 fxor %f16,%f32,%f32 609 610 fmuld %f22,%f22,%f20 611 fxor %f26,%f34,%f34 612 613 fmuld %f0,pp3,%f6 614 615 fmuld %f10,pp3,%f16 616 617 fmuld %f20,pp3,%f26 618 619 faddd %f6,pp2,%f6 620 fmuld %f0,qq3,%f4 621 622 faddd %f16,pp2,%f16 623 fmuld %f10,qq3,%f14 624 625 faddd %f26,pp2,%f26 626 fmuld %f20,qq3,%f24 627 628 fmuld %f0,%f6,%f6 629 faddd %f4,qq2,%f4 630 631 fmuld %f10,%f16,%f16 632 faddd %f14,qq2,%f14 633 634 fmuld %f20,%f26,%f26 635 faddd %f24,qq2,%f24 636 637 faddd %f6,pp1,%f6 638 fmuld %f0,%f4,%f4 639 add %l0,%g1,%l0 640 641 faddd %f16,pp1,%f16 642 fmuld %f10,%f14,%f14 643 add %l1,%g1,%l1 644 645 faddd %f26,pp1,%f26 646 fmuld %f20,%f24,%f24 647 add %l2,%g1,%l2 648 649 fmuld %f0,%f6,%f6 650 faddd %f4,qq1,%f4 651 652 fmuld %f10,%f16,%f16 653 faddd %f14,qq1,%f14 654 655 fmuld %f20,%f26,%f26 656 faddd %f24,qq1,%f24 657 658 fmuld %f2,%f6,%f6 659 ldd [%l0+16],%f8 660 661 fmuld %f12,%f16,%f16 662 ldd [%l1+16],%f18 663 664 fmuld %f22,%f26,%f26 665 ldd [%l2+16],%f28 666 667 faddd %f6,%f30,%f6 668 fmuld %f0,%f4,%f4 669 ldd [%l0+8],%f30 670 671 faddd %f16,%f32,%f16 672 fmuld %f10,%f14,%f14 673 ldd [%l1+8],%f32 674 675 faddd %f26,%f34,%f26 676 fmuld %f20,%f24,%f24 677 ldd [%l2+8],%f34 678 679 fmuld %f8,%f4,%f0 ! c * cpoly 680 faddd %f6,%f2,%f6 681 682 fmuld %f18,%f14,%f10 683 faddd %f16,%f12,%f16 684 685 fmuld %f28,%f24,%f20 686 faddd %f26,%f22,%f26 687 688 fmuld %f30,%f6,%f2 ! s * spoly 689 690 fmuld %f32,%f16,%f12 691 692 fmuld %f34,%f26,%f22 693 694 fmuld %f8,%f6,%f6 ! c * spoly 695 fsubd %f0,%f2,%f2 696 697 fmuld %f18,%f16,%f16 698 fsubd %f10,%f12,%f12 699 700 fmuld %f28,%f26,%f26 701 fsubd %f20,%f22,%f22 702 703 fmuld %f30,%f4,%f4 ! s * cpoly 704 faddd %f8,%f2,%f8 705 706 fmuld %f32,%f14,%f14 707 faddd %f18,%f12,%f18 708 709 fmuld %f34,%f24,%f24 710 faddd %f28,%f22,%f28 711 712 faddd %f4,%f6,%f6 713 714 faddd %f14,%f16,%f16 715 716 faddd %f24,%f26,%f26 717 718 faddd %f30,%f6,%f6 ! now %f6 = sin |x|, %f8 = cos |x| 719 720 faddd %f32,%f16,%f16 721 722 faddd %f34,%f26,%f26 723 724 fnegd %f8,%f4 ! if (n & 1) c = -c 725 lda [%i1]%asi,%l0 ! preload next argument 726 mov %i5,%l1 727 728 fnegd %f18,%f14 729 lda [%i1]%asi,%f0 730 sethi %hi(0x80000000),%i5 731 732 fnegd %f28,%f24 733 lda [%i1+4]%asi,%f3 734 735 andcc %l1,1,%g0 736 fmovdnz %icc,%f4,%f8 737 st %f8,[%l4] 738 739 andcc %g5,1,%g0 740 fmovdnz %icc,%f14,%f18 741 st %f9,[%l4+4] 742 743 andcc %o7,1,%g0 744 fmovdnz %icc,%f24,%f28 745 st %f18,[%l5] 746 747 fnegd %f6,%f4 ! if (n & 2) s = -s 748 st %f19,[%l5+4] 749 andn %l0,%i5,%l0 750 751 fnegd %f16,%f14 752 st %f28,[%l6] 753 add %i1,%i2,%i1 754 755 fnegd %f26,%f24 756 st %f29,[%l6+4] 757 758 andcc %l1,2,%g0 759 fmovdnz %icc,%f4,%f6 760 st %f6,[%o0] 761 762 andcc %g5,2,%g0 763 fmovdnz %icc,%f14,%f16 764 st %f16,[%o1] 765 766 andcc %o7,2,%g0 767 fmovdnz %icc,%f24,%f26 768 st %f26,[%o2] 769 770 addcc %i0,-1,%i0 771 bg,pt %icc,.loop0 772 ! delay slot 773 st %f7,[%o0+4] 774 775 ba,pt %icc,.end 776 ! delay slot 777 nop 778 779 780 .align 16 781 .end: 782 st %f17,[%o1+4] 783 st %f27,[%o2+4] 784 ld [%fp+biguns],%i5 785 tst %i5 ! check for huge arguments remaining 786 be,pt %icc,.exit 787 ! delay slot 788 nop 789 #ifdef __sparcv9 790 stx %o5,[%sp+STACK_BIAS+0xb8] 791 ldx [%fp+xsave],%o1 792 ldx [%fp+ssave],%o3 793 ldx [%fp+csave],%o5 794 ldx [%fp+STACK_BIAS+0xb0],%i5 795 stx %i5,[%sp+STACK_BIAS+0xb0] 796 #else 797 st %o5,[%sp+0x60] 798 ld [%fp+xsave],%o1 799 ld [%fp+ssave],%o3 800 ld [%fp+csave],%o5 801 ld [%fp+0x5c],%i5 802 st %i5,[%sp+0x5c] 803 #endif 804 ld [%fp+nsave],%o0 805 ld [%fp+sxsave],%o2 806 ld [%fp+sssave],%o4 807 sra %o2,0,%o2 ! sign-extend for V9 808 call __vlibm_vsincos_big 809 sra %o4,0,%o4 ! delay slot 810 811 .exit: 812 ret 813 restore 814 815 816 .align 16 817 .last1: 818 faddd %f2,c3two44,%f4 819 st %f17,[%o1+4] 820 .last1_from_range1: 821 mov 0,%l1 822 fzeros %f10 823 fzero %f12 824 add %fp,junk,%o1 825 add %fp,junk,%l5 826 .last2: 827 faddd %f12,c3two44,%f14 828 st %f27,[%o2+4] 829 st %f5,[%fp+nk0] 830 st %f15,[%fp+nk1] 831 .last2_from_range2: 832 mov 0,%l2 833 fzeros %f20 834 fzero %f22 835 add %fp,junk,%o2 836 ba,pt %icc,.cont 837 ! delay slot 838 add %fp,junk,%l6 839 840 841 .align 16 842 .range0: 843 cmp %l0,%o4 844 bl,pt %icc,1f ! hx < 0x3e400000 845 ! delay slot, harmless if branch taken 846 sethi %hi(0x7ff00000),%o7 847 cmp %l0,%o7 848 bl,a,pt %icc,2f ! branch if finite 849 ! delay slot, squashed if branch not taken 850 st %o4,[%fp+biguns] ! set biguns 851 fzero %f0 852 fmuld %f2,%f0,%f2 853 st %f2,[%o0] 854 st %f3,[%o0+4] 855 st %f2,[%l3] 856 ba,pt %icc,2f 857 ! delay slot 858 st %f3,[%l3+4] 859 1: 860 fdtoi %f2,%f4 ! raise inexact if not zero 861 st %f0,[%o0] 862 st %f3,[%o0+4] 863 sethi %hi(0x3ff00000),%g5 864 st %g5,[%l3] 865 st %g0,[%l3+4] 866 2: 867 addcc %i0,-1,%i0 868 ble,pn %icc,.end 869 ! delay slot, harmless if branch taken 870 add %i3,%i4,%i3 ! s += strides 871 add %l3,%l7,%l3 ! c += stridec 872 andn %l1,%i5,%l0 ! hx &= ~0x80000000 873 fmovs %f10,%f0 874 fmovs %f13,%f3 875 ba,pt %icc,.loop0 876 ! delay slot 877 add %i1,%i2,%i1 ! x += stridex 878 879 880 .align 16 881 .range1: 882 cmp %l1,%o4 883 bl,pt %icc,1f ! hx < 0x3e400000 884 ! delay slot, harmless if branch taken 885 sethi %hi(0x7ff00000),%o7 886 cmp %l1,%o7 887 bl,a,pt %icc,2f ! branch if finite 888 ! delay slot, squashed if branch not taken 889 st %o4,[%fp+biguns] ! set biguns 890 fzero %f10 891 fmuld %f12,%f10,%f12 892 st %f12,[%o1] 893 st %f13,[%o1+4] 894 st %f12,[%l3] 895 ba,pt %icc,2f 896 ! delay slot 897 st %f13,[%l3+4] 898 1: 899 fdtoi %f12,%f14 ! raise inexact if not zero 900 st %f10,[%o1] 901 st %f13,[%o1+4] 902 sethi %hi(0x3ff00000),%g5 903 st %g5,[%l3] 904 st %g0,[%l3+4] 905 2: 906 addcc %i0,-1,%i0 907 ble,pn %icc,.last1_from_range1 908 ! delay slot, harmless if branch taken 909 add %i3,%i4,%i3 ! s += strides 910 add %l3,%l7,%l3 ! c += stridec 911 andn %l2,%i5,%l1 ! hx &= ~0x80000000 912 fmovs %f20,%f10 913 fmovs %f23,%f13 914 ba,pt %icc,.loop1 915 ! delay slot 916 add %i1,%i2,%i1 ! x += stridex 917 918 919 .align 16 920 .range2: 921 cmp %l2,%o4 922 bl,pt %icc,1f ! hx < 0x3e400000 923 ! delay slot, harmless if branch taken 924 sethi %hi(0x7ff00000),%o7 925 cmp %l2,%o7 926 bl,a,pt %icc,2f ! branch if finite 927 ! delay slot, squashed if branch not taken 928 st %o4,[%fp+biguns] ! set biguns 929 fzero %f20 930 fmuld %f22,%f20,%f22 931 st %f22,[%o2] 932 st %f23,[%o2+4] 933 st %f22,[%l3] 934 ba,pt %icc,2f 935 ! delay slot 936 st %f23,[%l3+4] 937 1: 938 fdtoi %f22,%f24 ! raise inexact if not zero 939 st %f20,[%o2] 940 st %f23,[%o2+4] 941 sethi %hi(0x3ff00000),%g5 942 st %g5,[%l3] 943 st %g0,[%l3+4] 944 2: 945 addcc %i0,-1,%i0 946 ble,pn %icc,.last2_from_range2 947 ! delay slot, harmless if branch taken 948 add %i3,%i4,%i3 ! s += strides 949 add %l3,%l7,%l3 ! c += stridec 950 ld [%i1],%l2 951 ld [%i1],%f20 952 ld [%i1+4],%f23 953 andn %l2,%i5,%l2 ! hx &= ~0x80000000 954 ba,pt %icc,.loop2 955 ! delay slot 956 add %i1,%i2,%i1 ! x += stridex 957 958 SET_SIZE(__vsincos) 959