1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vatan2.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  64
  35 constants:
  36         .word   0x3ff921fb,0x54442d18   ! pio2
  37         .word   0x3c91a626,0x33145c07   ! pio2_lo
  38         .word   0xbfd55555,0x555554ee   ! p1
  39         .word   0x3fc99999,0x997a1559   ! p2
  40         .word   0xbfc24923,0x158dfe02   ! p3
  41         .word   0x3fbc639d,0x0ed1347b   ! p4
  42         .word   0xffffffff,0x00000000   ! mask
  43         .word   0x3fc00000,0x00000000   ! twom3
  44         .word   0x46d00000,0x00000000   ! two110
  45         .word   0x3fe921fb,0x54442d18   ! pio4
  46 
  47 ! local storage indices
  48 
  49 #define xscl            STACK_BIAS-0x8
  50 #define yscl            STACK_BIAS-0x10
  51 #define twom3           STACK_BIAS-0x18
  52 #define two110          STACK_BIAS-0x20
  53 #define pio4            STACK_BIAS-0x28
  54 #define junk            STACK_BIAS-0x30
  55 ! sizeof temp storage - must be a multiple of 16 for V9
  56 #define tmps            0x30
  57 
  58 ! register use
  59 
  60 ! i0  n
  61 ! i1  y
  62 ! i2  stridey
  63 ! i3  x
  64 ! i4  stridex
  65 ! i5  z
  66 
  67 ! l0  k0
  68 ! l1  k1
  69 ! l2  k2
  70 ! l3  hx
  71 ! l4  pz0
  72 ! l5  pz1
  73 ! l6  pz2
  74 ! l7  stridez
  75 
  76 ! the following are 64-bit registers in both V8+ and V9
  77 
  78 ! g1  __vlibm_TBL_atan2
  79 ! g5  
  80 
  81 ! o0  hy
  82 ! o1  0x00004000
  83 ! o2  0x1420
  84 ! o3  0x7fe00000
  85 ! o4  0x03600000
  86 ! o5  0x00100000
  87 ! o7  
  88 
  89 ! f0  y0
  90 ! f2  x0
  91 ! f4  t0
  92 ! f6  ah0
  93 ! f8  al0
  94 ! f10 y1
  95 ! f12 x1
  96 ! f14 t1
  97 ! f16 ah1
  98 ! f18 al1
  99 ! f20 y2
 100 ! f22 x2
 101 ! f24 t2
 102 ! f26 ah2
 103 ! f28 al2
 104 ! f30 
 105 ! f32 
 106 ! f34 
 107 ! f36 sx0
 108 ! f38 sx1
 109 ! f40 sx2
 110 ! f42 sy0
 111 ! f44 sy1
 112 ! f46 sy2
 113 
 114 #define mask    %f48
 115 #define signbit %f50
 116 #define pio2    %f52
 117 #define pio2_lo %f54
 118 #define p1      %f56
 119 #define p2      %f58
 120 #define p3      %f60
 121 #define p4      %f62
 122 
 123         ENTRY(__vatan2)
 124         save    %sp,-SA(MINFRAME)-tmps,%sp
 125         PIC_SETUP(l7)
 126         PIC_SET(l7,constants,o0)
 127         PIC_SET(l7,__vlibm_TBL_atan2,o1)
 128         wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
 129         mov     %o1, %g1
 130 #ifdef __sparcv9
 131         ldx     [%fp+STACK_BIAS+0xb0],%l7
 132 #else
 133         ld      [%fp+0x5c],%l7
 134 #endif
 135         ldd     [%o0+0x00],pio2         ! load/set up constants
 136         ldd     [%o0+0x08],pio2_lo
 137         ldd     [%o0+0x10],p1
 138         ldd     [%o0+0x18],p2
 139         ldd     [%o0+0x20],p3
 140         ldd     [%o0+0x28],p4
 141         ldd     [%o0+0x30],mask
 142         fzero   signbit
 143         fnegd   signbit,signbit
 144         sethi   %hi(0x00004000),%o1
 145         sethi   %hi(0x1420),%o2
 146         or      %o2,%lo(0x1420),%o2
 147         sethi   %hi(0x7fe00000),%o3
 148         sethi   %hi(0x03600000),%o4
 149         sethi   %hi(0x00100000),%o5
 150         ldd     [%o0+0x38],%f0          ! copy rarely used constants to stack
 151         ldd     [%o0+0x40],%f2
 152         ldd     [%o0+0x48],%f4
 153         std     %f0,[%fp+twom3]
 154         std     %f2,[%fp+two110]
 155         std     %f4,[%fp+pio4]
 156         sll     %i2,3,%i2               ! scale strides
 157         sll     %i4,3,%i4
 158         sll     %l7,3,%l7
 159         fzero   %f20                    ! loop prologue
 160         fzero   %f22
 161         fzero   %f24
 162         fzero   %f26
 163         fzero   %f46
 164         add     %fp,junk,%l6    
 165         ld      [%i1],%f0               ! *y
 166         ld      [%i1+4],%f1
 167         ld      [%i3],%f8               ! *x
 168         ld      [%i3+4],%f9
 169         ld      [%i1],%o0               ! hy
 170         ba      .loop
 171         ld      [%i3],%l3               ! hx
 172 
 173 ! 16-byte aligned
 174         .align  16
 175 .loop:
 176         fabsd   %f0,%f4
 177         mov     %i5,%l4
 178         add     %i1,%i2,%i1             ! y += stridey
 179 
 180         fabsd   %f8,%f2
 181         add     %i3,%i4,%i3             ! x += stridex
 182         add     %i5,%l7,%i5             ! z += stridez
 183 
 184         fand    %f0,signbit,%f42
 185         sethi   %hi(0x80000000),%g5
 186 
 187         fand    %f8,signbit,%f36
 188         andn    %o0,%g5,%o0
 189         andn    %l3,%g5,%l3
 190 
 191         fcmpd   %fcc0,%f4,%f2
 192 
 193         fmovd   %f4,%f0
 194 
 195         fmovdg  %fcc0,%f2,%f0           ! swap if |y| > |x|
 196 
 197         fmovdg  %fcc0,%f4,%f2
 198         mov     %o0,%o7
 199          lda    [%i1]%asi,%f10          ! preload next argument
 200 
 201           faddd %f26,%f20,%f26
 202          lda    [%i1+4]%asi,%f11
 203 
 204           faddd %f22,%f24,%f22
 205         movg    %fcc0,%l3,%o0
 206 
 207         movg    %fcc0,%o7,%l3
 208 
 209         fbu,pn  %fcc0,.nan0             ! if x or y is nan
 210 ! delay slot
 211          lda    [%i3]%asi,%f18
 212 
 213         sub     %l3,%o0,%l0             ! hx - hy
 214         sub     %l3,%o3,%g5
 215          fabsd  %f10,%f14
 216          lda    [%i3+4]%asi,%f19
 217 
 218         sub     %l0,%o4,%o7
 219           faddd %f22,%f26,%f26
 220 
 221         andcc   %g5,%o7,%g0
 222         bge,pn  %icc,.big0              ! if |x| or |x/y| is big
 223 ! delay slot
 224         nop
 225 
 226          fabsd  %f18,%f12
 227         cmp     %o0,%o5
 228         bl,pn   %icc,.small0            ! if |y| is small
 229 ! delay slot
 230          lda    [%i1]%asi,%o0
 231 
 232         add     %l0,%o1,%l0             ! k
 233         addcc   %i0,-1,%i0
 234         ble,pn  %icc,.last1
 235 ! delay slot
 236          lda    [%i3]%asi,%l3
 237 
 238 .cont1:
 239         srl     %l0,10,%l0
 240          mov    %i5,%l5
 241           fxor  %f26,%f46,%f26
 242           st    %f26,[%l6]
 243 
 244          fand   %f10,signbit,%f44
 245         andn    %l0,0x1f,%l0
 246          add    %i1,%i2,%i1
 247           st    %f27,[%l6+4]
 248 
 249          fand   %f18,signbit,%f38
 250         cmp     %l0,%o2
 251         movg    %icc,%o2,%l0
 252 
 253          fcmpd  %fcc1,%f14,%f12
 254          add    %i3,%i4,%i3
 255          add    %i5,%l7,%i5
 256 
 257          fmovd  %f14,%f10
 258         add     %l0,%g1,%l0
 259          sethi  %hi(0x80000000),%g5
 260 
 261         ldd     [%l0+0x10],%f4
 262         fand    %f2,mask,%f6
 263          andn   %o0,%g5,%o0
 264          andn   %l3,%g5,%l3
 265 
 266          fmovdg %fcc1,%f12,%f10
 267 
 268          fmovdg %fcc1,%f14,%f12
 269          mov    %o0,%o7
 270           lda   [%i1]%asi,%f20
 271 
 272         fsubd   %f2,%f6,%f30
 273         fmuld   %f6,%f4,%f6
 274          movg   %fcc1,%l3,%o0
 275 
 276         fmuld   %f0,%f4,%f8
 277          movg   %fcc1,%o7,%l3
 278 
 279           lda   [%i1+4]%asi,%f21
 280          fbu,pn %fcc1,.nan1
 281 ! delay slot
 282          nop
 283 
 284           lda   [%i3]%asi,%f28
 285          sub    %l3,%o0,%l1
 286          sub    %l3,%o3,%g5
 287 
 288           lda   [%i3+4]%asi,%f29
 289         fmuld   %f30,%f4,%f30
 290         fsubd   %f0,%f6,%f4
 291          sub    %l1,%o4,%o7
 292 
 293           fabsd %f20,%f24
 294          andcc  %g5,%o7,%g0
 295          bge,pn %icc,.big1
 296 ! delay slot
 297          nop
 298 
 299         faddd   %f2,%f8,%f8
 300          cmp    %o0,%o5
 301          bl,pn  %icc,.small1
 302 ! delay slot
 303           lda   [%i1]%asi,%o0
 304 
 305           fabsd %f28,%f22
 306          add    %l1,%o1,%l1
 307          addcc  %i0,-1,%i0
 308           lda   [%i3]%asi,%l3
 309 
 310         fsubd   %f4,%f30,%f4
 311          srl    %l1,10,%l1
 312          ble,pn %icc,.last2
 313 ! delay slot
 314           mov   %i5,%l6
 315 
 316 .cont2:
 317           fand  %f20,signbit,%f46
 318          andn   %l1,0x1f,%l1
 319           add   %i1,%i2,%i1
 320 
 321           fand  %f28,signbit,%f40
 322          cmp    %l1,%o2
 323          movg   %icc,%o2,%l1
 324 
 325           fcmpd %fcc2,%f24,%f22
 326           add   %i3,%i4,%i3
 327           add   %i5,%l7,%i5
 328 
 329         fdivd   %f4,%f8,%f4
 330           fmovd %f24,%f20
 331          add    %l1,%g1,%l1
 332           sethi %hi(0x80000000),%g5
 333 
 334          ldd    [%l1+0x10],%f14
 335          fand   %f12,mask,%f16
 336           andn  %o0,%g5,%o0
 337           andn  %l3,%g5,%l3
 338 
 339           fmovdg %fcc2,%f22,%f20
 340 
 341           fmovdg %fcc2,%f24,%f22
 342           mov   %o0,%o7
 343 
 344          fsubd  %f12,%f16,%f32
 345          fmuld  %f16,%f14,%f16
 346           movg  %fcc2,%l3,%o0
 347 
 348         fnegd   pio2_lo,%f8             ! al
 349          fmuld  %f10,%f14,%f18
 350           movg  %fcc2,%o7,%l3
 351 
 352         fzero   %f0
 353           fbu,pn %fcc2,.nan2
 354 ! delay slot
 355           nop
 356 
 357         fmovdg  %fcc0,signbit,%f0
 358           sub   %l3,%o0,%l2
 359           sub   %l3,%o3,%g5
 360 
 361          fmuld  %f32,%f14,%f32
 362          fsubd  %f10,%f16,%f14
 363           sub   %l2,%o4,%o7
 364 
 365          faddd  %f12,%f18,%f18
 366           andcc %g5,%o7,%g0
 367           bge,pn %icc,.big2
 368 ! delay slot
 369           nop
 370 
 371         fxor    %f36,%f0,%f36
 372           cmp   %o0,%o5
 373           bl,pn %icc,.small2
 374 ! delay slot
 375           nop
 376 
 377 .cont3:
 378         fmovdg  %fcc0,signbit,%f8
 379           add   %l2,%o1,%l2
 380 
 381          fsubd  %f14,%f32,%f14
 382           srl   %l2,10,%l2
 383 
 384         fxor    %f36,pio2_lo,%f30       ! al
 385           andn  %l2,0x1f,%l2
 386 
 387         fxor    %f36,pio2,%f0           ! ah
 388           cmp   %l2,%o2
 389           movg  %icc,%o2,%l2
 390 
 391         fxor    %f42,%f36,%f42          ! sy
 392 
 393         faddd   %f8,%f30,%f8
 394         ldd     [%l0+0x8],%f30
 395           add   %l2,%g1,%l2
 396 
 397          fdivd  %f14,%f18,%f14
 398          fzero  %f10
 399 
 400           ldd   [%l2+0x10],%f24
 401           fand  %f22,mask,%f26
 402 
 403          fmovdg %fcc1,signbit,%f10
 404 
 405         fmuld   %f4,%f4,%f36
 406         faddd   %f8,%f30,%f8
 407 
 408           fsubd %f22,%f26,%f34
 409           fmuld %f26,%f24,%f26
 410 
 411           fmuld %f20,%f24,%f28
 412          fxor   %f38,%f10,%f38
 413 
 414         fmuld   %f4,p3,%f6
 415          fnegd  pio2_lo,%f18
 416 
 417         fmuld   %f36,p2,%f2
 418          fmovdg %fcc1,signbit,%f18
 419 
 420         fmuld   %f36,%f4,%f36
 421          fxor   %f38,pio2,%f10
 422 
 423           fmuld %f34,%f24,%f34
 424           fsubd %f20,%f26,%f24
 425 
 426           faddd %f22,%f28,%f28
 427 
 428         faddd   %f2,p1,%f2
 429 
 430         fmuld   %f36,p4,%f30
 431          fxor   %f38,pio2_lo,%f32
 432 
 433           fsubd %f24,%f34,%f24
 434 
 435          fxor   %f44,%f38,%f44
 436 
 437         fmuld   %f36,%f2,%f2
 438          faddd  %f18,%f32,%f18
 439          ldd    [%l1+0x8],%f32
 440 
 441         fmuld   %f36,%f36,%f36
 442         faddd   %f6,%f30,%f30
 443 
 444           fdivd %f24,%f28,%f24
 445           fzero %f20
 446 
 447           fmovdg %fcc2,signbit,%f20
 448 
 449         faddd   %f2,%f8,%f2
 450 
 451          fmuld  %f14,%f14,%f38
 452          faddd  %f18,%f32,%f18
 453 
 454         fmuld   %f36,%f30,%f36
 455           fxor  %f40,%f20,%f40
 456 
 457         fnegd   pio2,%f6                ! ah
 458          fmuld  %f14,p3,%f16
 459 
 460         fmovdg  %fcc0,signbit,%f6
 461 
 462          fmuld  %f38,p2,%f12
 463           fnegd pio2_lo,%f28
 464 
 465         faddd   %f2,%f36,%f2
 466          fmuld  %f38,%f14,%f38
 467 
 468         faddd   %f6,%f0,%f6
 469         ldd     [%l0],%f0
 470 
 471           fmovdg %fcc2,signbit,%f28
 472 
 473          faddd  %f12,p1,%f12
 474 
 475          fmuld  %f38,p4,%f32
 476           fxor  %f40,pio2_lo,%f34
 477 
 478           fxor  %f40,pio2,%f20
 479 
 480         faddd   %f2,%f4,%f2
 481 
 482          fmuld  %f38,%f12,%f12
 483           fxor  %f46,%f40,%f46
 484 
 485          fmuld  %f38,%f38,%f38
 486          faddd  %f16,%f32,%f32
 487 
 488           faddd %f28,%f34,%f28
 489           ldd   [%l2+0x8],%f34
 490 
 491         faddd   %f6,%f0,%f6
 492         lda     [%i1]%asi,%f0           ! preload next argument
 493 
 494          faddd  %f12,%f18,%f12
 495         lda     [%i1+4]%asi,%f1
 496 
 497           fmuld %f24,%f24,%f40
 498         lda     [%i3]%asi,%f8
 499 
 500          fmuld  %f38,%f32,%f38
 501           faddd %f28,%f34,%f28
 502         lda     [%i3+4]%asi,%f9
 503 
 504          fnegd  pio2,%f16
 505           fmuld %f24,p3,%f26
 506         lda     [%i1]%asi,%o0
 507 
 508          fmovdg %fcc1,signbit,%f16
 509         lda     [%i3]%asi,%l3
 510 
 511           fmuld %f40,p2,%f22
 512 
 513          faddd  %f12,%f38,%f12
 514           fmuld %f40,%f24,%f40
 515 
 516         faddd   %f2,%f6,%f6
 517 
 518          faddd  %f16,%f10,%f16
 519          ldd    [%l1],%f10
 520 
 521           faddd %f22,p1,%f22
 522 
 523          faddd  %f12,%f14,%f12
 524           fmuld %f40,p4,%f34
 525 
 526         fxor    %f6,%f42,%f6
 527         st      %f6,[%l4]
 528 
 529          faddd  %f16,%f10,%f16
 530         st      %f7,[%l4+4]
 531 
 532           fmuld %f40,%f22,%f22
 533 
 534           fmuld %f40,%f40,%f40
 535           faddd %f26,%f34,%f34
 536 
 537           fnegd pio2,%f26
 538 
 539          faddd  %f12,%f16,%f16
 540 
 541           faddd %f22,%f28,%f22
 542 
 543           fmuld %f40,%f34,%f40
 544           fmovdg %fcc2,signbit,%f26
 545 
 546 ! -
 547 
 548          fxor   %f16,%f44,%f16
 549          st     %f16,[%l5]
 550 
 551           faddd %f26,%f20,%f26
 552          st     %f17,[%l5+4]
 553         addcc   %i0,-1,%i0
 554 
 555           faddd %f22,%f40,%f22
 556         bg,pt   %icc,.loop
 557 ! delay slot
 558           ldd   [%l2],%f20
 559 
 560 
 561           faddd %f26,%f20,%f26
 562           faddd %f22,%f24,%f22
 563           faddd %f22,%f26,%f26
 564 .done_from_special0:
 565           fxor  %f26,%f46,%f26
 566           st    %f26,[%l6]
 567           st    %f27,[%l6+4]
 568           ret
 569           restore
 570 
 571 
 572 
 573         .align  16
 574 .last1:
 575         fmovd   pio2,%f10               ! set up dummy arguments
 576         fmovd   pio2,%f18
 577         fabsd   %f10,%f14
 578         fabsd   %f18,%f12
 579         sethi   %hi(0x3ff921fb),%o0
 580         or      %o0,%lo(0x3ff921fb),%o0
 581         mov     %o0,%l3
 582         ba,pt   %icc,.cont1
 583 ! delay slot
 584         add     %fp,junk,%i5
 585 
 586 
 587 
 588         .align  16
 589 .last2:
 590         fmovd   pio2,%f20
 591         fmovd   pio2,%f28
 592         fabsd   %f20,%f24
 593         fabsd   %f28,%f22
 594         sethi   %hi(0x3ff921fb),%o0
 595         or      %o0,%lo(0x3ff921fb),%o0
 596         mov     %o0,%l3
 597         ba,pt   %icc,.cont2
 598 ! delay slot
 599         add     %fp,junk,%l6
 600 
 601 
 602 
 603         .align  16
 604 .nan0:
 605           faddd %f22,%f26,%f26
 606 .nan0_from_special0:
 607          fabsd  %f10,%f14
 608          lda    [%i3+4]%asi,%f19
 609          fabsd  %f18,%f12
 610          lda    [%i1]%asi,%o0
 611          lda    [%i3]%asi,%l3
 612         ba,pt   %icc,.special0
 613 ! delay slot
 614         fmuld   %f0,%f2,%f6
 615 
 616 
 617         .align  16
 618 .big0:
 619          fabsd  %f18,%f12
 620          lda    [%i1]%asi,%o0
 621          lda    [%i3]%asi,%l3
 622         cmp     %g5,%o5 
 623         bge,pn  %icc,.return_ah0        ! if hx >= 0x7ff00000
 624 ! delay slot
 625         nop
 626         cmp     %l0,%o4
 627         bge,pn  %icc,1f                 ! if hx - hy >= 0x03600000
 628 ! delay slot
 629         nop
 630         ldd     [%fp+twom3],%f6
 631         fmuld   %f0,%f6,%f0
 632         fmuld   %f2,%f6,%f2
 633         add     %l0,%o1,%l0
 634         addcc   %i0,-1,%i0
 635         ble,pn  %icc,.last1
 636 ! delay slot
 637         nop
 638         ba,pt   %icc,.cont1
 639 ! delay slot
 640         nop
 641 1:
 642         fbg,pn  %fcc0,.return_ah0
 643 ! delay slot
 644         nop
 645         fcmpd   %fcc3,%f8,signbit
 646         fbl,pn  %fcc3,.return_ah0
 647 ! delay slot
 648         nop
 649         ba,pt   %icc,.special0
 650 ! delay slot
 651         fdivd   %f0,%f2,%f6
 652 
 653 
 654         .align  16
 655 .small0:
 656          lda    [%i3]%asi,%l3
 657         fcmpd   %fcc3,%f0,signbit
 658         fbe,pt  %fcc3,.return_ah0
 659 ! delay slot
 660         nop
 661         ldd     [%fp+two110],%f6
 662         fmuld   %f0,%f6,%f0
 663         fmuld   %f2,%f6,%f2
 664         st      %f0,[%fp+yscl]
 665         ld      [%fp+yscl],%o7
 666         st      %f2,[%fp+xscl]
 667         ld      [%fp+xscl],%l0
 668         sub     %l0,%o7,%l0
 669         add     %l0,%o1,%l0
 670         addcc   %i0,-1,%i0
 671         ble,pn  %icc,.last1
 672 ! delay slot
 673         nop
 674         ba,pt   %icc,.cont1
 675 ! delay slot
 676         nop
 677 
 678 
 679         .align  16
 680 .return_ah0:
 681         fzero   %f0
 682         fmovdg  %fcc0,signbit,%f0
 683         fxor    %f36,%f0,%f36
 684         fxor    %f36,pio2,%f0
 685         fxor    %f42,%f36,%f42
 686         fnegd   pio2,%f6
 687         fmovdg  %fcc0,signbit,%f6
 688         faddd   %f6,%f0,%f6
 689         sub     %g5,%l0,%o7
 690         cmp     %o7,%o5
 691         bl,pt   %icc,1f                 ! if hy < 0x7ff00000
 692 ! delay slot
 693         nop
 694         ldd     [%fp+pio4],%f0
 695         faddd   %f6,%f0,%f6
 696 1:
 697         fdtoi   %f6,%f4
 698 .special0:
 699         fxor    %f6,%f42,%f6
 700         st      %f6,[%l4]
 701         st      %f7,[%l4+4]
 702         addcc   %i0,-1,%i0
 703         ble,pn  %icc,.done_from_special0
 704 ! delay slot
 705         nop
 706         fmovd   %f10,%f0
 707         fmovd   %f18,%f8
 708         fmovd   %f14,%f4
 709         fmovd   %f12,%f2
 710         mov     %i5,%l4
 711         add     %i1,%i2,%i1
 712         add     %i3,%i4,%i3
 713         add     %i5,%l7,%i5
 714         fand    %f0,signbit,%f42
 715         sethi   %hi(0x80000000),%g5
 716         fand    %f8,signbit,%f36
 717         andn    %o0,%g5,%o0
 718         andn    %l3,%g5,%l3
 719         fcmpd   %fcc0,%f4,%f2
 720         fmovd   %f4,%f0
 721         fmovdg  %fcc0,%f2,%f0
 722         fmovdg  %fcc0,%f4,%f2
 723         mov     %o0,%o7
 724         movg    %fcc0,%l3,%o0
 725         movg    %fcc0,%o7,%l3
 726          lda    [%i1]%asi,%f10
 727          lda    [%i1+4]%asi,%f11
 728         fbu,pn  %fcc0,.nan0_from_special0
 729 ! delay slot
 730          lda    [%i3]%asi,%f18
 731          fabsd  %f10,%f14
 732          lda    [%i3+4]%asi,%f19
 733         sub     %l3,%o0,%l0
 734         sub     %l3,%o3,%g5
 735         sub     %l0,%o4,%o7
 736         andcc   %g5,%o7,%g0
 737         bge,pn  %icc,.big0
 738 ! delay slot
 739         nop
 740          fabsd  %f18,%f12
 741         cmp     %o0,%o5
 742         bl,pn   %icc,.small0
 743 ! delay slot
 744          lda    [%i1]%asi,%o0
 745         add     %l0,%o1,%l0
 746         addcc   %i0,-1,%i0
 747         ble,pn  %icc,.last1
 748 ! delay slot
 749          lda    [%i3]%asi,%l3
 750         ba,pt   %icc,.cont1
 751 ! delay slot
 752         nop
 753 
 754 
 755 
 756         .align  16
 757 .nan1:
 758         fmuld   %f30,%f4,%f30
 759         fsubd   %f0,%f6,%f4
 760         faddd   %f2,%f8,%f8
 761         fsubd   %f4,%f30,%f4
 762 .nan1_from_special1:
 763          lda    [%i3]%asi,%f28
 764          lda    [%i3+4]%asi,%f29
 765          fabsd  %f20,%f24
 766          lda    [%i1]%asi,%o0
 767          fabsd  %f28,%f22
 768          lda    [%i3]%asi,%l3
 769          mov    %i5,%l6
 770         ba,pt   %icc,.special1
 771 ! delay slot
 772         fmuld   %f10,%f12,%f16
 773 
 774 
 775         .align  16
 776 .big1:
 777         faddd   %f2,%f8,%f8
 778         fsubd   %f4,%f30,%f4
 779 .big1_from_special1:
 780          lda    [%i1]%asi,%o0
 781          fabsd  %f28,%f22
 782          lda    [%i3]%asi,%l3
 783          mov    %i5,%l6
 784         cmp     %g5,%o5 
 785         bge,pn  %icc,.return_ah1
 786 ! delay slot
 787         nop
 788         cmp     %l1,%o4
 789         bge,pn  %icc,1f
 790 ! delay slot
 791         nop
 792         ldd     [%fp+twom3],%f16
 793         fmuld   %f10,%f16,%f10
 794         fmuld   %f12,%f16,%f12
 795         add     %l1,%o1,%l1
 796         srl     %l1,10,%l1
 797         addcc   %i0,-1,%i0
 798         ble,pn  %icc,.last2
 799 ! delay slot
 800         nop
 801         ba,pt   %icc,.cont2
 802 ! delay slot
 803         nop
 804 1:
 805         fbg,pn  %fcc1,.return_ah1
 806 ! delay slot
 807         nop
 808         fcmpd   %fcc3,%f18,signbit
 809         fbl,pn  %fcc3,.return_ah1
 810 ! delay slot
 811         nop
 812         ba,pt   %icc,.special1
 813 ! delay slot
 814         fdivd   %f10,%f12,%f16
 815 
 816 
 817         .align  16
 818 .small1:
 819         fsubd   %f4,%f30,%f4
 820 .small1_from_special1:
 821          fabsd  %f28,%f22
 822          lda    [%i3]%asi,%l3
 823          mov    %i5,%l6
 824         fcmpd   %fcc3,%f10,signbit
 825         fbe,pt  %fcc3,.return_ah1
 826 ! delay slot
 827         nop
 828         ldd     [%fp+two110],%f16
 829         fmuld   %f10,%f16,%f10
 830         fmuld   %f12,%f16,%f12
 831         st      %f10,[%fp+yscl]
 832         ld      [%fp+yscl],%o7
 833         st      %f12,[%fp+xscl]
 834         ld      [%fp+xscl],%l1
 835         sub     %l1,%o7,%l1
 836         add     %l1,%o1,%l1
 837         srl     %l1,10,%l1
 838         addcc   %i0,-1,%i0
 839         ble,pn  %icc,.last2
 840 ! delay slot
 841         nop
 842         ba,pt   %icc,.cont2
 843 ! delay slot
 844         nop
 845 
 846 
 847         .align  16
 848 .return_ah1:
 849         fzero   %f10
 850         fmovdg  %fcc1,signbit,%f10
 851         fxor    %f38,%f10,%f38
 852         fxor    %f38,pio2,%f10
 853         fxor    %f44,%f38,%f44
 854         fnegd   pio2,%f16
 855         fmovdg  %fcc1,signbit,%f16
 856         faddd   %f16,%f10,%f16
 857         sub     %g5,%l1,%o7
 858         cmp     %o7,%o5
 859         bl,pt   %icc,1f 
 860 ! delay slot
 861         nop
 862         ldd     [%fp+pio4],%f10
 863         faddd   %f16,%f10,%f16
 864 1:
 865         fdtoi   %f16,%f14
 866 .special1:
 867         fxor    %f16,%f44,%f16
 868         st      %f16,[%l5]
 869         st      %f17,[%l5+4]
 870         addcc   %i0,-1,%i0
 871         bg,pn   %icc,1f
 872 ! delay slot
 873         nop
 874         fmovd   pio2,%f20               ! set up dummy argument
 875         fmovd   pio2,%f28
 876         fabsd   %f20,%f24
 877         fabsd   %f28,%f22
 878         sethi   %hi(0x3ff921fb),%o0
 879         or      %o0,%lo(0x3ff921fb),%o0
 880         mov     %o0,%l3
 881         add     %fp,junk,%i5
 882 1:
 883         fmovd   %f20,%f10
 884         fmovd   %f28,%f18
 885         fmovd   %f24,%f14
 886         fmovd   %f22,%f12
 887         mov     %i5,%l5
 888         add     %i1,%i2,%i1
 889         add     %i3,%i4,%i3
 890         add     %i5,%l7,%i5
 891         fand    %f10,signbit,%f44
 892         sethi   %hi(0x80000000),%g5
 893         fand    %f18,signbit,%f38
 894         andn    %o0,%g5,%o0
 895         andn    %l3,%g5,%l3
 896         fcmpd   %fcc1,%f14,%f12
 897         fmovd   %f14,%f10
 898         fmovdg  %fcc1,%f12,%f10
 899         fmovdg  %fcc1,%f14,%f12
 900         mov     %o0,%o7
 901         movg    %fcc1,%l3,%o0
 902         movg    %fcc1,%o7,%l3
 903          lda    [%i1]%asi,%f20
 904          lda    [%i1+4]%asi,%f21
 905         fbu,pn  %fcc1,.nan1_from_special1
 906 ! delay slot
 907         nop
 908          lda    [%i3]%asi,%f28
 909          lda    [%i3+4]%asi,%f29
 910          fabsd  %f20,%f24
 911         sub     %l3,%o0,%l1
 912         sub     %l3,%o3,%g5
 913         sub     %l1,%o4,%o7
 914         andcc   %g5,%o7,%g0
 915         bge,pn  %icc,.big1_from_special1
 916 ! delay slot
 917         nop
 918         cmp     %o0,%o5
 919         bl,pn   %icc,.small1_from_special1
 920 ! delay slot
 921          lda    [%i1]%asi,%o0
 922          fabsd  %f28,%f22
 923          lda    [%i3]%asi,%l3
 924         add     %l1,%o1,%l1
 925         srl     %l1,10,%l1
 926         addcc   %i0,-1,%i0
 927         ble,pn  %icc,.last2
 928 ! delay slot
 929          mov    %i5,%l6
 930         ba,pt   %icc,.cont2
 931 ! delay slot
 932         nop
 933 
 934 
 935 
 936         .align  16
 937 .nan2:
 938         fmovdg  %fcc0,signbit,%f0
 939          fmuld  %f32,%f14,%f32
 940          fsubd  %f10,%f16,%f14
 941          faddd  %f12,%f18,%f18
 942         fxor    %f36,%f0,%f36
 943 .nan2_from_special2:
 944         ba,pt   %icc,.special2
 945 ! delay slot
 946         fmuld   %f20,%f22,%f26
 947 
 948 
 949         .align  16
 950 .big2:
 951         fxor    %f36,%f0,%f36
 952 .big2_from_special2:
 953         cmp     %g5,%o5 
 954         bge,pn  %icc,.return_ah2
 955 ! delay slot
 956         nop
 957         cmp     %l2,%o4
 958         bge,pn  %icc,1f
 959 ! delay slot
 960         nop
 961         ldd     [%fp+twom3],%f26
 962         fmuld   %f20,%f26,%f20
 963         fmuld   %f22,%f26,%f22
 964         ba,pt   %icc,.cont3
 965 ! delay slot
 966         nop
 967 1:
 968         fbg,pn  %fcc2,.return_ah2
 969 ! delay slot
 970         nop
 971         fcmpd   %fcc3,%f28,signbit
 972         fbl,pn  %fcc3,.return_ah2
 973 ! delay slot
 974         nop
 975         ba,pt   %icc,.special2
 976 ! delay slot
 977         fdivd   %f20,%f22,%f26
 978 
 979 
 980         .align  16
 981 .small2:
 982         fcmpd   %fcc3,%f20,signbit
 983         fbe,pt  %fcc3,.return_ah2
 984 ! delay slot
 985         nop
 986         ldd     [%fp+two110],%f26
 987         fmuld   %f20,%f26,%f20
 988         fmuld   %f22,%f26,%f22
 989         st      %f20,[%fp+yscl]
 990         ld      [%fp+yscl],%o7
 991         st      %f22,[%fp+xscl]
 992         ld      [%fp+xscl],%l2
 993         sub     %l2,%o7,%l2
 994         ba,pt   %icc,.cont3
 995 ! delay slot
 996         nop
 997 
 998 
 999         .align  16
1000 .return_ah2:
1001         fzero   %f20
1002         fmovdg  %fcc2,signbit,%f20
1003         fxor    %f40,%f20,%f40
1004         fxor    %f40,pio2,%f20
1005         fxor    %f46,%f40,%f46
1006         fnegd   pio2,%f26
1007         fmovdg  %fcc2,signbit,%f26
1008         faddd   %f26,%f20,%f26
1009         sub     %g5,%l2,%o7
1010         cmp     %o7,%o5
1011         bl,pt   %icc,1f 
1012 ! delay slot
1013         nop
1014         ldd     [%fp+pio4],%f20
1015         faddd   %f26,%f20,%f26
1016 1:
1017         fdtoi   %f26,%f24
1018 .special2:
1019         fxor    %f26,%f46,%f26
1020         st      %f26,[%l6]
1021         st      %f27,[%l6+4]
1022         addcc   %i0,-1,%i0
1023         bg,pn   %icc,1f
1024 ! delay slot
1025         nop
1026         fmovd   pio2,%f20               ! set up dummy argument
1027         fmovd   pio2,%f22
1028         fzero   %f40
1029         fzero   %f46
1030         mov     0,%l2
1031         ba,pt   %icc,.cont3
1032 ! delay slot
1033         add     %fp,junk,%l6
1034 1:
1035         lda     [%i1]%asi,%f20
1036         lda     [%i1+4]%asi,%f21
1037         lda     [%i3]%asi,%f28
1038         lda     [%i3+4]%asi,%f29
1039         fabsd   %f20,%f24
1040         lda     [%i1]%asi,%o0
1041         fabsd   %f28,%f22
1042         lda     [%i3]%asi,%l3
1043         mov     %i5,%l6
1044         fand    %f20,signbit,%f46
1045         add     %i1,%i2,%i1
1046         fand    %f28,signbit,%f40
1047         fcmpd   %fcc2,%f24,%f22
1048         add     %i3,%i4,%i3
1049         add     %i5,%l7,%i5
1050         fmovd   %f24,%f20
1051         sethi   %hi(0x80000000),%g5
1052         andn    %o0,%g5,%o0
1053         andn    %l3,%g5,%l3
1054         fmovdg  %fcc2,%f22,%f20
1055         fmovdg  %fcc2,%f24,%f22
1056         mov     %o0,%o7
1057         movg    %fcc2,%l3,%o0
1058         movg    %fcc2,%o7,%l3
1059         fbu,pn  %fcc2,.nan2_from_special2
1060 ! delay slot
1061         nop
1062         sub     %l3,%o0,%l2
1063         sub     %l3,%o3,%g5
1064         sub     %l2,%o4,%o7
1065         andcc   %g5,%o7,%g0
1066         bge,pn  %icc,.big2_from_special2
1067 ! delay slot
1068         nop
1069         cmp     %o0,%o5
1070         bl,pn   %icc,.small2
1071 ! delay slot
1072         nop
1073         ba,pt   %icc,.cont3
1074 ! delay slot
1075         nop
1076 
1077         SET_SIZE(__vatan2)
1078