1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vatan2.S" 30 31 #include "libm.h" 32 33 RO_DATA 34 .align 64 35 constants: 36 .word 0x3ff921fb,0x54442d18 ! pio2 37 .word 0x3c91a626,0x33145c07 ! pio2_lo 38 .word 0xbfd55555,0x555554ee ! p1 39 .word 0x3fc99999,0x997a1559 ! p2 40 .word 0xbfc24923,0x158dfe02 ! p3 41 .word 0x3fbc639d,0x0ed1347b ! p4 42 .word 0xffffffff,0x00000000 ! mask 43 .word 0x3fc00000,0x00000000 ! twom3 44 .word 0x46d00000,0x00000000 ! two110 45 .word 0x3fe921fb,0x54442d18 ! pio4 46 47 ! local storage indices 48 49 #define xscl STACK_BIAS-0x8 50 #define yscl STACK_BIAS-0x10 51 #define twom3 STACK_BIAS-0x18 52 #define two110 STACK_BIAS-0x20 53 #define pio4 STACK_BIAS-0x28 54 #define junk STACK_BIAS-0x30 55 ! sizeof temp storage - must be a multiple of 16 for V9 56 #define tmps 0x30 57 58 ! register use 59 60 ! i0 n 61 ! i1 y 62 ! i2 stridey 63 ! i3 x 64 ! i4 stridex 65 ! i5 z 66 67 ! l0 k0 68 ! l1 k1 69 ! l2 k2 70 ! l3 hx 71 ! l4 pz0 72 ! l5 pz1 73 ! l6 pz2 74 ! l7 stridez 75 76 ! the following are 64-bit registers in both V8+ and V9 77 78 ! g1 __vlibm_TBL_atan2 79 ! g5 80 81 ! o0 hy 82 ! o1 0x00004000 83 ! o2 0x1420 84 ! o3 0x7fe00000 85 ! o4 0x03600000 86 ! o5 0x00100000 87 ! o7 88 89 ! f0 y0 90 ! f2 x0 91 ! f4 t0 92 ! f6 ah0 93 ! f8 al0 94 ! f10 y1 95 ! f12 x1 96 ! f14 t1 97 ! f16 ah1 98 ! f18 al1 99 ! f20 y2 100 ! f22 x2 101 ! f24 t2 102 ! f26 ah2 103 ! f28 al2 104 ! f30 105 ! f32 106 ! f34 107 ! f36 sx0 108 ! f38 sx1 109 ! f40 sx2 110 ! f42 sy0 111 ! f44 sy1 112 ! f46 sy2 113 114 #define mask %f48 115 #define signbit %f50 116 #define pio2 %f52 117 #define pio2_lo %f54 118 #define p1 %f56 119 #define p2 %f58 120 #define p3 %f60 121 #define p4 %f62 122 123 ENTRY(__vatan2) 124 save %sp,-SA(MINFRAME)-tmps,%sp 125 PIC_SETUP(l7) 126 PIC_SET(l7,constants,o0) 127 PIC_SET(l7,__vlibm_TBL_atan2,o1) 128 wr %g0,0x82,%asi ! set %asi for non-faulting loads 129 mov %o1, %g1 130 #ifdef __sparcv9 131 ldx [%fp+STACK_BIAS+0xb0],%l7 132 #else 133 ld [%fp+0x5c],%l7 134 #endif 135 ldd [%o0+0x00],pio2 ! load/set up constants 136 ldd [%o0+0x08],pio2_lo 137 ldd [%o0+0x10],p1 138 ldd [%o0+0x18],p2 139 ldd [%o0+0x20],p3 140 ldd [%o0+0x28],p4 141 ldd [%o0+0x30],mask 142 fzero signbit 143 fnegd signbit,signbit 144 sethi %hi(0x00004000),%o1 145 sethi %hi(0x1420),%o2 146 or %o2,%lo(0x1420),%o2 147 sethi %hi(0x7fe00000),%o3 148 sethi %hi(0x03600000),%o4 149 sethi %hi(0x00100000),%o5 150 ldd [%o0+0x38],%f0 ! copy rarely used constants to stack 151 ldd [%o0+0x40],%f2 152 ldd [%o0+0x48],%f4 153 std %f0,[%fp+twom3] 154 std %f2,[%fp+two110] 155 std %f4,[%fp+pio4] 156 sll %i2,3,%i2 ! scale strides 157 sll %i4,3,%i4 158 sll %l7,3,%l7 159 fzero %f20 ! loop prologue 160 fzero %f22 161 fzero %f24 162 fzero %f26 163 fzero %f46 164 add %fp,junk,%l6 165 ld [%i1],%f0 ! *y 166 ld [%i1+4],%f1 167 ld [%i3],%f8 ! *x 168 ld [%i3+4],%f9 169 ld [%i1],%o0 ! hy 170 ba .loop 171 ld [%i3],%l3 ! hx 172 173 ! 16-byte aligned 174 .align 16 175 .loop: 176 fabsd %f0,%f4 177 mov %i5,%l4 178 add %i1,%i2,%i1 ! y += stridey 179 180 fabsd %f8,%f2 181 add %i3,%i4,%i3 ! x += stridex 182 add %i5,%l7,%i5 ! z += stridez 183 184 fand %f0,signbit,%f42 185 sethi %hi(0x80000000),%g5 186 187 fand %f8,signbit,%f36 188 andn %o0,%g5,%o0 189 andn %l3,%g5,%l3 190 191 fcmpd %fcc0,%f4,%f2 192 193 fmovd %f4,%f0 194 195 fmovdg %fcc0,%f2,%f0 ! swap if |y| > |x| 196 197 fmovdg %fcc0,%f4,%f2 198 mov %o0,%o7 199 lda [%i1]%asi,%f10 ! preload next argument 200 201 faddd %f26,%f20,%f26 202 lda [%i1+4]%asi,%f11 203 204 faddd %f22,%f24,%f22 205 movg %fcc0,%l3,%o0 206 207 movg %fcc0,%o7,%l3 208 209 fbu,pn %fcc0,.nan0 ! if x or y is nan 210 ! delay slot 211 lda [%i3]%asi,%f18 212 213 sub %l3,%o0,%l0 ! hx - hy 214 sub %l3,%o3,%g5 215 fabsd %f10,%f14 216 lda [%i3+4]%asi,%f19 217 218 sub %l0,%o4,%o7 219 faddd %f22,%f26,%f26 220 221 andcc %g5,%o7,%g0 222 bge,pn %icc,.big0 ! if |x| or |x/y| is big 223 ! delay slot 224 nop 225 226 fabsd %f18,%f12 227 cmp %o0,%o5 228 bl,pn %icc,.small0 ! if |y| is small 229 ! delay slot 230 lda [%i1]%asi,%o0 231 232 add %l0,%o1,%l0 ! k 233 addcc %i0,-1,%i0 234 ble,pn %icc,.last1 235 ! delay slot 236 lda [%i3]%asi,%l3 237 238 .cont1: 239 srl %l0,10,%l0 240 mov %i5,%l5 241 fxor %f26,%f46,%f26 242 st %f26,[%l6] 243 244 fand %f10,signbit,%f44 245 andn %l0,0x1f,%l0 246 add %i1,%i2,%i1 247 st %f27,[%l6+4] 248 249 fand %f18,signbit,%f38 250 cmp %l0,%o2 251 movg %icc,%o2,%l0 252 253 fcmpd %fcc1,%f14,%f12 254 add %i3,%i4,%i3 255 add %i5,%l7,%i5 256 257 fmovd %f14,%f10 258 add %l0,%g1,%l0 259 sethi %hi(0x80000000),%g5 260 261 ldd [%l0+0x10],%f4 262 fand %f2,mask,%f6 263 andn %o0,%g5,%o0 264 andn %l3,%g5,%l3 265 266 fmovdg %fcc1,%f12,%f10 267 268 fmovdg %fcc1,%f14,%f12 269 mov %o0,%o7 270 lda [%i1]%asi,%f20 271 272 fsubd %f2,%f6,%f30 273 fmuld %f6,%f4,%f6 274 movg %fcc1,%l3,%o0 275 276 fmuld %f0,%f4,%f8 277 movg %fcc1,%o7,%l3 278 279 lda [%i1+4]%asi,%f21 280 fbu,pn %fcc1,.nan1 281 ! delay slot 282 nop 283 284 lda [%i3]%asi,%f28 285 sub %l3,%o0,%l1 286 sub %l3,%o3,%g5 287 288 lda [%i3+4]%asi,%f29 289 fmuld %f30,%f4,%f30 290 fsubd %f0,%f6,%f4 291 sub %l1,%o4,%o7 292 293 fabsd %f20,%f24 294 andcc %g5,%o7,%g0 295 bge,pn %icc,.big1 296 ! delay slot 297 nop 298 299 faddd %f2,%f8,%f8 300 cmp %o0,%o5 301 bl,pn %icc,.small1 302 ! delay slot 303 lda [%i1]%asi,%o0 304 305 fabsd %f28,%f22 306 add %l1,%o1,%l1 307 addcc %i0,-1,%i0 308 lda [%i3]%asi,%l3 309 310 fsubd %f4,%f30,%f4 311 srl %l1,10,%l1 312 ble,pn %icc,.last2 313 ! delay slot 314 mov %i5,%l6 315 316 .cont2: 317 fand %f20,signbit,%f46 318 andn %l1,0x1f,%l1 319 add %i1,%i2,%i1 320 321 fand %f28,signbit,%f40 322 cmp %l1,%o2 323 movg %icc,%o2,%l1 324 325 fcmpd %fcc2,%f24,%f22 326 add %i3,%i4,%i3 327 add %i5,%l7,%i5 328 329 fdivd %f4,%f8,%f4 330 fmovd %f24,%f20 331 add %l1,%g1,%l1 332 sethi %hi(0x80000000),%g5 333 334 ldd [%l1+0x10],%f14 335 fand %f12,mask,%f16 336 andn %o0,%g5,%o0 337 andn %l3,%g5,%l3 338 339 fmovdg %fcc2,%f22,%f20 340 341 fmovdg %fcc2,%f24,%f22 342 mov %o0,%o7 343 344 fsubd %f12,%f16,%f32 345 fmuld %f16,%f14,%f16 346 movg %fcc2,%l3,%o0 347 348 fnegd pio2_lo,%f8 ! al 349 fmuld %f10,%f14,%f18 350 movg %fcc2,%o7,%l3 351 352 fzero %f0 353 fbu,pn %fcc2,.nan2 354 ! delay slot 355 nop 356 357 fmovdg %fcc0,signbit,%f0 358 sub %l3,%o0,%l2 359 sub %l3,%o3,%g5 360 361 fmuld %f32,%f14,%f32 362 fsubd %f10,%f16,%f14 363 sub %l2,%o4,%o7 364 365 faddd %f12,%f18,%f18 366 andcc %g5,%o7,%g0 367 bge,pn %icc,.big2 368 ! delay slot 369 nop 370 371 fxor %f36,%f0,%f36 372 cmp %o0,%o5 373 bl,pn %icc,.small2 374 ! delay slot 375 nop 376 377 .cont3: 378 fmovdg %fcc0,signbit,%f8 379 add %l2,%o1,%l2 380 381 fsubd %f14,%f32,%f14 382 srl %l2,10,%l2 383 384 fxor %f36,pio2_lo,%f30 ! al 385 andn %l2,0x1f,%l2 386 387 fxor %f36,pio2,%f0 ! ah 388 cmp %l2,%o2 389 movg %icc,%o2,%l2 390 391 fxor %f42,%f36,%f42 ! sy 392 393 faddd %f8,%f30,%f8 394 ldd [%l0+0x8],%f30 395 add %l2,%g1,%l2 396 397 fdivd %f14,%f18,%f14 398 fzero %f10 399 400 ldd [%l2+0x10],%f24 401 fand %f22,mask,%f26 402 403 fmovdg %fcc1,signbit,%f10 404 405 fmuld %f4,%f4,%f36 406 faddd %f8,%f30,%f8 407 408 fsubd %f22,%f26,%f34 409 fmuld %f26,%f24,%f26 410 411 fmuld %f20,%f24,%f28 412 fxor %f38,%f10,%f38 413 414 fmuld %f4,p3,%f6 415 fnegd pio2_lo,%f18 416 417 fmuld %f36,p2,%f2 418 fmovdg %fcc1,signbit,%f18 419 420 fmuld %f36,%f4,%f36 421 fxor %f38,pio2,%f10 422 423 fmuld %f34,%f24,%f34 424 fsubd %f20,%f26,%f24 425 426 faddd %f22,%f28,%f28 427 428 faddd %f2,p1,%f2 429 430 fmuld %f36,p4,%f30 431 fxor %f38,pio2_lo,%f32 432 433 fsubd %f24,%f34,%f24 434 435 fxor %f44,%f38,%f44 436 437 fmuld %f36,%f2,%f2 438 faddd %f18,%f32,%f18 439 ldd [%l1+0x8],%f32 440 441 fmuld %f36,%f36,%f36 442 faddd %f6,%f30,%f30 443 444 fdivd %f24,%f28,%f24 445 fzero %f20 446 447 fmovdg %fcc2,signbit,%f20 448 449 faddd %f2,%f8,%f2 450 451 fmuld %f14,%f14,%f38 452 faddd %f18,%f32,%f18 453 454 fmuld %f36,%f30,%f36 455 fxor %f40,%f20,%f40 456 457 fnegd pio2,%f6 ! ah 458 fmuld %f14,p3,%f16 459 460 fmovdg %fcc0,signbit,%f6 461 462 fmuld %f38,p2,%f12 463 fnegd pio2_lo,%f28 464 465 faddd %f2,%f36,%f2 466 fmuld %f38,%f14,%f38 467 468 faddd %f6,%f0,%f6 469 ldd [%l0],%f0 470 471 fmovdg %fcc2,signbit,%f28 472 473 faddd %f12,p1,%f12 474 475 fmuld %f38,p4,%f32 476 fxor %f40,pio2_lo,%f34 477 478 fxor %f40,pio2,%f20 479 480 faddd %f2,%f4,%f2 481 482 fmuld %f38,%f12,%f12 483 fxor %f46,%f40,%f46 484 485 fmuld %f38,%f38,%f38 486 faddd %f16,%f32,%f32 487 488 faddd %f28,%f34,%f28 489 ldd [%l2+0x8],%f34 490 491 faddd %f6,%f0,%f6 492 lda [%i1]%asi,%f0 ! preload next argument 493 494 faddd %f12,%f18,%f12 495 lda [%i1+4]%asi,%f1 496 497 fmuld %f24,%f24,%f40 498 lda [%i3]%asi,%f8 499 500 fmuld %f38,%f32,%f38 501 faddd %f28,%f34,%f28 502 lda [%i3+4]%asi,%f9 503 504 fnegd pio2,%f16 505 fmuld %f24,p3,%f26 506 lda [%i1]%asi,%o0 507 508 fmovdg %fcc1,signbit,%f16 509 lda [%i3]%asi,%l3 510 511 fmuld %f40,p2,%f22 512 513 faddd %f12,%f38,%f12 514 fmuld %f40,%f24,%f40 515 516 faddd %f2,%f6,%f6 517 518 faddd %f16,%f10,%f16 519 ldd [%l1],%f10 520 521 faddd %f22,p1,%f22 522 523 faddd %f12,%f14,%f12 524 fmuld %f40,p4,%f34 525 526 fxor %f6,%f42,%f6 527 st %f6,[%l4] 528 529 faddd %f16,%f10,%f16 530 st %f7,[%l4+4] 531 532 fmuld %f40,%f22,%f22 533 534 fmuld %f40,%f40,%f40 535 faddd %f26,%f34,%f34 536 537 fnegd pio2,%f26 538 539 faddd %f12,%f16,%f16 540 541 faddd %f22,%f28,%f22 542 543 fmuld %f40,%f34,%f40 544 fmovdg %fcc2,signbit,%f26 545 546 ! - 547 548 fxor %f16,%f44,%f16 549 st %f16,[%l5] 550 551 faddd %f26,%f20,%f26 552 st %f17,[%l5+4] 553 addcc %i0,-1,%i0 554 555 faddd %f22,%f40,%f22 556 bg,pt %icc,.loop 557 ! delay slot 558 ldd [%l2],%f20 559 560 561 faddd %f26,%f20,%f26 562 faddd %f22,%f24,%f22 563 faddd %f22,%f26,%f26 564 .done_from_special0: 565 fxor %f26,%f46,%f26 566 st %f26,[%l6] 567 st %f27,[%l6+4] 568 ret 569 restore 570 571 572 573 .align 16 574 .last1: 575 fmovd pio2,%f10 ! set up dummy arguments 576 fmovd pio2,%f18 577 fabsd %f10,%f14 578 fabsd %f18,%f12 579 sethi %hi(0x3ff921fb),%o0 580 or %o0,%lo(0x3ff921fb),%o0 581 mov %o0,%l3 582 ba,pt %icc,.cont1 583 ! delay slot 584 add %fp,junk,%i5 585 586 587 588 .align 16 589 .last2: 590 fmovd pio2,%f20 591 fmovd pio2,%f28 592 fabsd %f20,%f24 593 fabsd %f28,%f22 594 sethi %hi(0x3ff921fb),%o0 595 or %o0,%lo(0x3ff921fb),%o0 596 mov %o0,%l3 597 ba,pt %icc,.cont2 598 ! delay slot 599 add %fp,junk,%l6 600 601 602 603 .align 16 604 .nan0: 605 faddd %f22,%f26,%f26 606 .nan0_from_special0: 607 fabsd %f10,%f14 608 lda [%i3+4]%asi,%f19 609 fabsd %f18,%f12 610 lda [%i1]%asi,%o0 611 lda [%i3]%asi,%l3 612 ba,pt %icc,.special0 613 ! delay slot 614 fmuld %f0,%f2,%f6 615 616 617 .align 16 618 .big0: 619 fabsd %f18,%f12 620 lda [%i1]%asi,%o0 621 lda [%i3]%asi,%l3 622 cmp %g5,%o5 623 bge,pn %icc,.return_ah0 ! if hx >= 0x7ff00000 624 ! delay slot 625 nop 626 cmp %l0,%o4 627 bge,pn %icc,1f ! if hx - hy >= 0x03600000 628 ! delay slot 629 nop 630 ldd [%fp+twom3],%f6 631 fmuld %f0,%f6,%f0 632 fmuld %f2,%f6,%f2 633 add %l0,%o1,%l0 634 addcc %i0,-1,%i0 635 ble,pn %icc,.last1 636 ! delay slot 637 nop 638 ba,pt %icc,.cont1 639 ! delay slot 640 nop 641 1: 642 fbg,pn %fcc0,.return_ah0 643 ! delay slot 644 nop 645 fcmpd %fcc3,%f8,signbit 646 fbl,pn %fcc3,.return_ah0 647 ! delay slot 648 nop 649 ba,pt %icc,.special0 650 ! delay slot 651 fdivd %f0,%f2,%f6 652 653 654 .align 16 655 .small0: 656 lda [%i3]%asi,%l3 657 fcmpd %fcc3,%f0,signbit 658 fbe,pt %fcc3,.return_ah0 659 ! delay slot 660 nop 661 ldd [%fp+two110],%f6 662 fmuld %f0,%f6,%f0 663 fmuld %f2,%f6,%f2 664 st %f0,[%fp+yscl] 665 ld [%fp+yscl],%o7 666 st %f2,[%fp+xscl] 667 ld [%fp+xscl],%l0 668 sub %l0,%o7,%l0 669 add %l0,%o1,%l0 670 addcc %i0,-1,%i0 671 ble,pn %icc,.last1 672 ! delay slot 673 nop 674 ba,pt %icc,.cont1 675 ! delay slot 676 nop 677 678 679 .align 16 680 .return_ah0: 681 fzero %f0 682 fmovdg %fcc0,signbit,%f0 683 fxor %f36,%f0,%f36 684 fxor %f36,pio2,%f0 685 fxor %f42,%f36,%f42 686 fnegd pio2,%f6 687 fmovdg %fcc0,signbit,%f6 688 faddd %f6,%f0,%f6 689 sub %g5,%l0,%o7 690 cmp %o7,%o5 691 bl,pt %icc,1f ! if hy < 0x7ff00000 692 ! delay slot 693 nop 694 ldd [%fp+pio4],%f0 695 faddd %f6,%f0,%f6 696 1: 697 fdtoi %f6,%f4 698 .special0: 699 fxor %f6,%f42,%f6 700 st %f6,[%l4] 701 st %f7,[%l4+4] 702 addcc %i0,-1,%i0 703 ble,pn %icc,.done_from_special0 704 ! delay slot 705 nop 706 fmovd %f10,%f0 707 fmovd %f18,%f8 708 fmovd %f14,%f4 709 fmovd %f12,%f2 710 mov %i5,%l4 711 add %i1,%i2,%i1 712 add %i3,%i4,%i3 713 add %i5,%l7,%i5 714 fand %f0,signbit,%f42 715 sethi %hi(0x80000000),%g5 716 fand %f8,signbit,%f36 717 andn %o0,%g5,%o0 718 andn %l3,%g5,%l3 719 fcmpd %fcc0,%f4,%f2 720 fmovd %f4,%f0 721 fmovdg %fcc0,%f2,%f0 722 fmovdg %fcc0,%f4,%f2 723 mov %o0,%o7 724 movg %fcc0,%l3,%o0 725 movg %fcc0,%o7,%l3 726 lda [%i1]%asi,%f10 727 lda [%i1+4]%asi,%f11 728 fbu,pn %fcc0,.nan0_from_special0 729 ! delay slot 730 lda [%i3]%asi,%f18 731 fabsd %f10,%f14 732 lda [%i3+4]%asi,%f19 733 sub %l3,%o0,%l0 734 sub %l3,%o3,%g5 735 sub %l0,%o4,%o7 736 andcc %g5,%o7,%g0 737 bge,pn %icc,.big0 738 ! delay slot 739 nop 740 fabsd %f18,%f12 741 cmp %o0,%o5 742 bl,pn %icc,.small0 743 ! delay slot 744 lda [%i1]%asi,%o0 745 add %l0,%o1,%l0 746 addcc %i0,-1,%i0 747 ble,pn %icc,.last1 748 ! delay slot 749 lda [%i3]%asi,%l3 750 ba,pt %icc,.cont1 751 ! delay slot 752 nop 753 754 755 756 .align 16 757 .nan1: 758 fmuld %f30,%f4,%f30 759 fsubd %f0,%f6,%f4 760 faddd %f2,%f8,%f8 761 fsubd %f4,%f30,%f4 762 .nan1_from_special1: 763 lda [%i3]%asi,%f28 764 lda [%i3+4]%asi,%f29 765 fabsd %f20,%f24 766 lda [%i1]%asi,%o0 767 fabsd %f28,%f22 768 lda [%i3]%asi,%l3 769 mov %i5,%l6 770 ba,pt %icc,.special1 771 ! delay slot 772 fmuld %f10,%f12,%f16 773 774 775 .align 16 776 .big1: 777 faddd %f2,%f8,%f8 778 fsubd %f4,%f30,%f4 779 .big1_from_special1: 780 lda [%i1]%asi,%o0 781 fabsd %f28,%f22 782 lda [%i3]%asi,%l3 783 mov %i5,%l6 784 cmp %g5,%o5 785 bge,pn %icc,.return_ah1 786 ! delay slot 787 nop 788 cmp %l1,%o4 789 bge,pn %icc,1f 790 ! delay slot 791 nop 792 ldd [%fp+twom3],%f16 793 fmuld %f10,%f16,%f10 794 fmuld %f12,%f16,%f12 795 add %l1,%o1,%l1 796 srl %l1,10,%l1 797 addcc %i0,-1,%i0 798 ble,pn %icc,.last2 799 ! delay slot 800 nop 801 ba,pt %icc,.cont2 802 ! delay slot 803 nop 804 1: 805 fbg,pn %fcc1,.return_ah1 806 ! delay slot 807 nop 808 fcmpd %fcc3,%f18,signbit 809 fbl,pn %fcc3,.return_ah1 810 ! delay slot 811 nop 812 ba,pt %icc,.special1 813 ! delay slot 814 fdivd %f10,%f12,%f16 815 816 817 .align 16 818 .small1: 819 fsubd %f4,%f30,%f4 820 .small1_from_special1: 821 fabsd %f28,%f22 822 lda [%i3]%asi,%l3 823 mov %i5,%l6 824 fcmpd %fcc3,%f10,signbit 825 fbe,pt %fcc3,.return_ah1 826 ! delay slot 827 nop 828 ldd [%fp+two110],%f16 829 fmuld %f10,%f16,%f10 830 fmuld %f12,%f16,%f12 831 st %f10,[%fp+yscl] 832 ld [%fp+yscl],%o7 833 st %f12,[%fp+xscl] 834 ld [%fp+xscl],%l1 835 sub %l1,%o7,%l1 836 add %l1,%o1,%l1 837 srl %l1,10,%l1 838 addcc %i0,-1,%i0 839 ble,pn %icc,.last2 840 ! delay slot 841 nop 842 ba,pt %icc,.cont2 843 ! delay slot 844 nop 845 846 847 .align 16 848 .return_ah1: 849 fzero %f10 850 fmovdg %fcc1,signbit,%f10 851 fxor %f38,%f10,%f38 852 fxor %f38,pio2,%f10 853 fxor %f44,%f38,%f44 854 fnegd pio2,%f16 855 fmovdg %fcc1,signbit,%f16 856 faddd %f16,%f10,%f16 857 sub %g5,%l1,%o7 858 cmp %o7,%o5 859 bl,pt %icc,1f 860 ! delay slot 861 nop 862 ldd [%fp+pio4],%f10 863 faddd %f16,%f10,%f16 864 1: 865 fdtoi %f16,%f14 866 .special1: 867 fxor %f16,%f44,%f16 868 st %f16,[%l5] 869 st %f17,[%l5+4] 870 addcc %i0,-1,%i0 871 bg,pn %icc,1f 872 ! delay slot 873 nop 874 fmovd pio2,%f20 ! set up dummy argument 875 fmovd pio2,%f28 876 fabsd %f20,%f24 877 fabsd %f28,%f22 878 sethi %hi(0x3ff921fb),%o0 879 or %o0,%lo(0x3ff921fb),%o0 880 mov %o0,%l3 881 add %fp,junk,%i5 882 1: 883 fmovd %f20,%f10 884 fmovd %f28,%f18 885 fmovd %f24,%f14 886 fmovd %f22,%f12 887 mov %i5,%l5 888 add %i1,%i2,%i1 889 add %i3,%i4,%i3 890 add %i5,%l7,%i5 891 fand %f10,signbit,%f44 892 sethi %hi(0x80000000),%g5 893 fand %f18,signbit,%f38 894 andn %o0,%g5,%o0 895 andn %l3,%g5,%l3 896 fcmpd %fcc1,%f14,%f12 897 fmovd %f14,%f10 898 fmovdg %fcc1,%f12,%f10 899 fmovdg %fcc1,%f14,%f12 900 mov %o0,%o7 901 movg %fcc1,%l3,%o0 902 movg %fcc1,%o7,%l3 903 lda [%i1]%asi,%f20 904 lda [%i1+4]%asi,%f21 905 fbu,pn %fcc1,.nan1_from_special1 906 ! delay slot 907 nop 908 lda [%i3]%asi,%f28 909 lda [%i3+4]%asi,%f29 910 fabsd %f20,%f24 911 sub %l3,%o0,%l1 912 sub %l3,%o3,%g5 913 sub %l1,%o4,%o7 914 andcc %g5,%o7,%g0 915 bge,pn %icc,.big1_from_special1 916 ! delay slot 917 nop 918 cmp %o0,%o5 919 bl,pn %icc,.small1_from_special1 920 ! delay slot 921 lda [%i1]%asi,%o0 922 fabsd %f28,%f22 923 lda [%i3]%asi,%l3 924 add %l1,%o1,%l1 925 srl %l1,10,%l1 926 addcc %i0,-1,%i0 927 ble,pn %icc,.last2 928 ! delay slot 929 mov %i5,%l6 930 ba,pt %icc,.cont2 931 ! delay slot 932 nop 933 934 935 936 .align 16 937 .nan2: 938 fmovdg %fcc0,signbit,%f0 939 fmuld %f32,%f14,%f32 940 fsubd %f10,%f16,%f14 941 faddd %f12,%f18,%f18 942 fxor %f36,%f0,%f36 943 .nan2_from_special2: 944 ba,pt %icc,.special2 945 ! delay slot 946 fmuld %f20,%f22,%f26 947 948 949 .align 16 950 .big2: 951 fxor %f36,%f0,%f36 952 .big2_from_special2: 953 cmp %g5,%o5 954 bge,pn %icc,.return_ah2 955 ! delay slot 956 nop 957 cmp %l2,%o4 958 bge,pn %icc,1f 959 ! delay slot 960 nop 961 ldd [%fp+twom3],%f26 962 fmuld %f20,%f26,%f20 963 fmuld %f22,%f26,%f22 964 ba,pt %icc,.cont3 965 ! delay slot 966 nop 967 1: 968 fbg,pn %fcc2,.return_ah2 969 ! delay slot 970 nop 971 fcmpd %fcc3,%f28,signbit 972 fbl,pn %fcc3,.return_ah2 973 ! delay slot 974 nop 975 ba,pt %icc,.special2 976 ! delay slot 977 fdivd %f20,%f22,%f26 978 979 980 .align 16 981 .small2: 982 fcmpd %fcc3,%f20,signbit 983 fbe,pt %fcc3,.return_ah2 984 ! delay slot 985 nop 986 ldd [%fp+two110],%f26 987 fmuld %f20,%f26,%f20 988 fmuld %f22,%f26,%f22 989 st %f20,[%fp+yscl] 990 ld [%fp+yscl],%o7 991 st %f22,[%fp+xscl] 992 ld [%fp+xscl],%l2 993 sub %l2,%o7,%l2 994 ba,pt %icc,.cont3 995 ! delay slot 996 nop 997 998 999 .align 16 1000 .return_ah2: 1001 fzero %f20 1002 fmovdg %fcc2,signbit,%f20 1003 fxor %f40,%f20,%f40 1004 fxor %f40,pio2,%f20 1005 fxor %f46,%f40,%f46 1006 fnegd pio2,%f26 1007 fmovdg %fcc2,signbit,%f26 1008 faddd %f26,%f20,%f26 1009 sub %g5,%l2,%o7 1010 cmp %o7,%o5 1011 bl,pt %icc,1f 1012 ! delay slot 1013 nop 1014 ldd [%fp+pio4],%f20 1015 faddd %f26,%f20,%f26 1016 1: 1017 fdtoi %f26,%f24 1018 .special2: 1019 fxor %f26,%f46,%f26 1020 st %f26,[%l6] 1021 st %f27,[%l6+4] 1022 addcc %i0,-1,%i0 1023 bg,pn %icc,1f 1024 ! delay slot 1025 nop 1026 fmovd pio2,%f20 ! set up dummy argument 1027 fmovd pio2,%f22 1028 fzero %f40 1029 fzero %f46 1030 mov 0,%l2 1031 ba,pt %icc,.cont3 1032 ! delay slot 1033 add %fp,junk,%l6 1034 1: 1035 lda [%i1]%asi,%f20 1036 lda [%i1+4]%asi,%f21 1037 lda [%i3]%asi,%f28 1038 lda [%i3+4]%asi,%f29 1039 fabsd %f20,%f24 1040 lda [%i1]%asi,%o0 1041 fabsd %f28,%f22 1042 lda [%i3]%asi,%l3 1043 mov %i5,%l6 1044 fand %f20,signbit,%f46 1045 add %i1,%i2,%i1 1046 fand %f28,signbit,%f40 1047 fcmpd %fcc2,%f24,%f22 1048 add %i3,%i4,%i3 1049 add %i5,%l7,%i5 1050 fmovd %f24,%f20 1051 sethi %hi(0x80000000),%g5 1052 andn %o0,%g5,%o0 1053 andn %l3,%g5,%l3 1054 fmovdg %fcc2,%f22,%f20 1055 fmovdg %fcc2,%f24,%f22 1056 mov %o0,%o7 1057 movg %fcc2,%l3,%o0 1058 movg %fcc2,%o7,%l3 1059 fbu,pn %fcc2,.nan2_from_special2 1060 ! delay slot 1061 nop 1062 sub %l3,%o0,%l2 1063 sub %l3,%o3,%g5 1064 sub %l2,%o4,%o7 1065 andcc %g5,%o7,%g0 1066 bge,pn %icc,.big2_from_special2 1067 ! delay slot 1068 nop 1069 cmp %o0,%o5 1070 bl,pn %icc,.small2 1071 ! delay slot 1072 nop 1073 ba,pt %icc,.cont3 1074 ! delay slot 1075 nop 1076 1077 SET_SIZE(__vatan2) 1078