1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vsin.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  64
  35 constants:
  36         .word   0x3ec718e3,0xa6972785
  37         .word   0x3ef9fd39,0x94293940
  38         .word   0xbf2a019f,0x75ee4be1
  39         .word   0xbf56c16b,0xba552569
  40         .word   0x3f811111,0x1108c703
  41         .word   0x3fa55555,0x554f5b35
  42         .word   0xbfc55555,0x555554d0
  43         .word   0xbfdfffff,0xffffff85
  44         .word   0x3ff00000,0x00000000
  45         .word   0xbfc55555,0x5551fc28
  46         .word   0x3f811107,0x62eacc9d
  47         .word   0xbfdfffff,0xffff6328
  48         .word   0x3fa55551,0x5f7acf0c
  49         .word   0x3fe45f30,0x6dc9c883
  50         .word   0x43380000,0x00000000
  51         .word   0x3ff921fb,0x54400000
  52         .word   0x3dd0b461,0x1a600000
  53         .word   0x3ba3198a,0x2e000000
  54         .word   0x397b839a,0x252049c1
  55         .word   0x80000000,0x00004000
  56         .word   0xffff8000,0x00000000   ! N.B.: low-order words used
  57         .word   0x3fc90000,0x80000000   ! for sign bit hacking; see
  58         .word   0x3fc40000,0x00000000   ! references to "thresh" below
  59 
  60 #define p4              0x0
  61 #define q4              0x08
  62 #define p3              0x10
  63 #define q3              0x18
  64 #define p2              0x20
  65 #define q2              0x28
  66 #define p1              0x30
  67 #define q1              0x38
  68 #define one             0x40
  69 #define pp1             0x48
  70 #define pp2             0x50
  71 #define qq1             0x58
  72 #define qq2             0x60
  73 #define invpio2         0x68
  74 #define round           0x70
  75 #define pio2_1          0x78
  76 #define pio2_2          0x80
  77 #define pio2_3          0x88
  78 #define pio2_3t         0x90
  79 #define f30val          0x98
  80 #define mask            0xa0
  81 #define thresh          0xa8
  82 
  83 ! local storage indices
  84 
  85 #define xsave           STACK_BIAS-0x8
  86 #define ysave           STACK_BIAS-0x10
  87 #define nsave           STACK_BIAS-0x14
  88 #define sxsave          STACK_BIAS-0x18
  89 #define sysave          STACK_BIAS-0x1c
  90 #define biguns          STACK_BIAS-0x20
  91 #define n2              STACK_BIAS-0x24
  92 #define n1              STACK_BIAS-0x28
  93 #define n0              STACK_BIAS-0x2c
  94 #define x2_1            STACK_BIAS-0x40
  95 #define x1_1            STACK_BIAS-0x50
  96 #define x0_1            STACK_BIAS-0x60
  97 #define y2_0            STACK_BIAS-0x70
  98 #define y1_0            STACK_BIAS-0x80
  99 #define y0_0            STACK_BIAS-0x90
 100 ! sizeof temp storage - must be a multiple of 16 for V9
 101 #define tmps            0x90
 102 
 103 !--------------------------------------------------------------
 104 !       Some defines to keep code more readable
 105 #define LIM_l6          %l6
 106 !       in primary range, contains |x| upper limit when cos(x)=1.
 107 !       in transferring to medium range, denotes what loop was active.
 108 !--------------------------------------------------------------
 109 
 110         ENTRY(__vsin)
 111         save    %sp,-SA(MINFRAME)-tmps,%sp
 112         PIC_SETUP(g5)
 113         PIC_SET(g5,__vlibm_TBL_sincos_hi,l3)
 114         PIC_SET(g5,__vlibm_TBL_sincos_lo,l4)
 115         PIC_SET(g5,constants,l5)
 116         mov     %l5,%g1
 117         wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
 118 
 119 ! ========== primary range ==========
 120 
 121 ! register use
 122 
 123 ! i0  n
 124 ! i1  x
 125 ! i2  stridex
 126 ! i3  y
 127 ! i4  stridey
 128 ! i5  0x80000000
 129 
 130 ! l0  hx0
 131 ! l1  hx1
 132 ! l2  hx2
 133 ! l3  __vlibm_TBL_sincos_hi
 134 ! l4  __vlibm_TBL_sincos_lo
 135 ! l5  0x3fc90000
 136 ! l6  0x3e400000
 137 ! l7  0x3fe921fb
 138 
 139 ! the following are 64-bit registers in both V8+ and V9
 140 
 141 ! g1  scratch
 142 ! g5  
 143 
 144 ! o0  py0
 145 ! o1  py1
 146 ! o2  py2
 147 ! o3  oy0
 148 ! o4  oy1
 149 ! o5  oy2
 150 ! o7  scratch
 151 
 152 ! f0  x0
 153 ! f2  
 154 ! f4  
 155 ! f6  
 156 ! f8  scratch for table base
 157 ! f9  signbit0
 158 ! f10 x1
 159 ! f12 
 160 ! f14 
 161 ! f16 
 162 ! f18 scratch for table base
 163 ! f19 signbit1
 164 ! f20 x2
 165 ! f22 
 166 ! f24 
 167 ! f26 
 168 ! f28 scratch for table base
 169 ! f29 signbit2
 170 ! f30 0x80000000
 171 ! f31 0x4000
 172 ! f32 
 173 ! f34 
 174 ! f36 
 175 ! f38 
 176 ! f40 
 177 ! f42 
 178 ! f44 0xffff800000000000
 179 ! f46 p1
 180 ! f48 p2
 181 ! f50 p3
 182 ! f52 p4
 183 ! f54 one
 184 ! f56 pp1
 185 ! f58 pp2
 186 ! f60 qq1
 187 ! f62 qq2
 188 
 189 #ifdef __sparcv9
 190         stx     %i1,[%fp+xsave]         ! save arguments
 191         stx     %i3,[%fp+ysave]
 192 #else
 193         st      %i1,[%fp+xsave]         ! save arguments
 194         st      %i3,[%fp+ysave]
 195 #endif
 196         st      %i0,[%fp+nsave]
 197         st      %i2,[%fp+sxsave]
 198         st      %i4,[%fp+sysave]
 199         sethi   %hi(0x80000000),%i5     ! load/set up constants
 200         sethi   %hi(0x3fc90000),%l5
 201         sethi   %hi(0x3e400000),LIM_l6
 202         sethi   %hi(0x3fe921fb),%l7
 203         or      %l7,%lo(0x3fe921fb),%l7
 204         ldd     [%g1+f30val],%f30
 205         ldd     [%g1+mask],%f44
 206         ldd     [%g1+p1],%f46
 207         ldd     [%g1+p2],%f48
 208         ldd     [%g1+p3],%f50
 209         ldd     [%g1+p4],%f52
 210         ldd     [%g1+one],%f54
 211         ldd     [%g1+pp1],%f56
 212         ldd     [%g1+pp2],%f58
 213         ldd     [%g1+qq1],%f60
 214         ldd     [%g1+qq2],%f62
 215         sll     %i2,3,%i2               ! scale strides
 216         sll     %i4,3,%i4
 217         add     %fp,x0_1,%o3            ! precondition loop
 218         add     %fp,x0_1,%o4
 219         add     %fp,x0_1,%o5
 220         ld      [%i1],%l0               ! hx = *x
 221         ld      [%i1],%f0
 222         ld      [%i1+4],%f1
 223         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
 224         add     %i1,%i2,%i1             ! x += stridex
 225 
 226         ba,pt   %icc,.loop0
 227 ! delay slot
 228         nop
 229 
 230         .align 32
 231 .loop0:
 232         lda     [%i1]%asi,%l1           ! preload next argument
 233         sub     %l0,LIM_l6,%g1
 234         sub     %l7,%l0,%o7
 235         fands   %f0,%f30,%f9            ! save signbit
 236 
 237         lda     [%i1]%asi,%f10
 238         orcc    %o7,%g1,%g0
 239         mov     %i3,%o0                 ! py0 = y
 240         bl,pn   %icc,.range0            ! if hx < 0x3e400000 or > 0x3fe921fb
 241 
 242 ! delay slot
 243         lda     [%i1+4]%asi,%f11
 244         addcc   %i0,-1,%i0
 245         add     %i3,%i4,%i3             ! y += stridey
 246         ble,pn  %icc,.endloop1
 247 
 248 ! delay slot
 249         andn    %l1,%i5,%l1
 250         add     %i1,%i2,%i1             ! x += stridex
 251         fabsd   %f0,%f0
 252         fmuld   %f54,%f54,%f54          ! one*one; a nop for alignment only
 253 
 254 .loop1:
 255         lda     [%i1]%asi,%l2           ! preload next argument
 256         sub     %l1,LIM_l6,%g1
 257         sub     %l7,%l1,%o7
 258         fands   %f10,%f30,%f19          ! save signbit
 259 
 260         lda     [%i1]%asi,%f20
 261         orcc    %o7,%g1,%g0
 262         mov     %i3,%o1                 ! py1 = y
 263         bl,pn   %icc,.range1            ! if hx < 0x3e400000 or > 0x3fe921fb
 264 
 265 ! delay slot
 266         lda     [%i1+4]%asi,%f21
 267         addcc   %i0,-1,%i0
 268         add     %i3,%i4,%i3             ! y += stridey
 269         ble,pn  %icc,.endloop2
 270 
 271 ! delay slot
 272         andn    %l2,%i5,%l2
 273         add     %i1,%i2,%i1             ! x += stridex
 274         fabsd   %f10,%f10
 275         fmuld   %f54,%f54,%f54          ! one*one; a nop for alignment only
 276 
 277 .loop2:
 278         st      %f6,[%o3]
 279         sub     %l2,LIM_l6,%g1
 280         sub     %l7,%l2,%o7
 281         fands   %f20,%f30,%f29          ! save signbit
 282 
 283         st      %f7,[%o3+4]
 284         orcc    %g1,%o7,%g0
 285         mov     %i3,%o2                 ! py2 = y
 286         bl,pn   %icc,.range2            ! if hx < 0x3e400000 or > 0x3fe921fb
 287 
 288 ! delay slot
 289         add     %i3,%i4,%i3             ! y += stridey
 290         cmp     %l0,%l5
 291         fabsd   %f20,%f20
 292         bl,pn   %icc,.case4
 293 
 294 ! delay slot
 295         st      %f16,[%o4]
 296         cmp     %l1,%l5
 297         fpadd32s %f0,%f31,%f8
 298         bl,pn   %icc,.case2
 299 
 300 ! delay slot
 301         st      %f17,[%o4+4]
 302         cmp     %l2,%l5
 303         fpadd32s %f10,%f31,%f18
 304         bl,pn   %icc,.case1
 305 
 306 ! delay slot
 307         st      %f26,[%o5]
 308         mov     %o0,%o3
 309         sethi   %hi(0x3fc3c000),%o7
 310         fpadd32s %f20,%f31,%f28
 311 
 312         st      %f27,[%o5+4]
 313         fand    %f8,%f44,%f2
 314         mov     %o1,%o4
 315 
 316         fand    %f18,%f44,%f12
 317         mov     %o2,%o5
 318         sub     %l0,%o7,%l0
 319 
 320         fand    %f28,%f44,%f22
 321         sub     %l1,%o7,%l1
 322         sub     %l2,%o7,%l2
 323 
 324         fsubd   %f0,%f2,%f0
 325         srl     %l0,10,%l0
 326         add     %l3,8,%g1
 327 
 328         fsubd   %f10,%f12,%f10
 329         srl     %l1,10,%l1
 330 
 331         fsubd   %f20,%f22,%f20
 332         srl     %l2,10,%l2
 333 
 334         fmuld   %f0,%f0,%f2
 335         andn    %l0,0x1f,%l0
 336 
 337         fmuld   %f10,%f10,%f12
 338         andn    %l1,0x1f,%l1
 339 
 340         fmuld   %f20,%f20,%f22
 341         andn    %l2,0x1f,%l2
 342 
 343         fmuld   %f2,%f58,%f6
 344         ldd     [%l3+%l0],%f32
 345 
 346         fmuld   %f12,%f58,%f16
 347         ldd     [%l3+%l1],%f36
 348 
 349         fmuld   %f22,%f58,%f26
 350         ldd     [%l3+%l2],%f40
 351 
 352         faddd   %f6,%f56,%f6
 353         fmuld   %f2,%f62,%f4
 354         ldd     [%g1+%l0],%f34
 355 
 356         faddd   %f16,%f56,%f16
 357         fmuld   %f12,%f62,%f14
 358         ldd     [%g1+%l1],%f38
 359 
 360         faddd   %f26,%f56,%f26
 361         fmuld   %f22,%f62,%f24
 362         ldd     [%g1+%l2],%f42
 363 
 364         fmuld   %f2,%f6,%f6
 365         faddd   %f4,%f60,%f4
 366 
 367         fmuld   %f12,%f16,%f16
 368         faddd   %f14,%f60,%f14
 369 
 370         fmuld   %f22,%f26,%f26
 371         faddd   %f24,%f60,%f24
 372 
 373         faddd   %f6,%f54,%f6
 374         fmuld   %f2,%f4,%f4
 375 
 376         faddd   %f16,%f54,%f16
 377         fmuld   %f12,%f14,%f14
 378 
 379         faddd   %f26,%f54,%f26
 380         fmuld   %f22,%f24,%f24
 381 
 382         fmuld   %f0,%f6,%f6
 383         ldd     [%l4+%l0],%f2
 384 
 385         fmuld   %f10,%f16,%f16
 386         ldd     [%l4+%l1],%f12
 387 
 388         fmuld   %f20,%f26,%f26
 389         ldd     [%l4+%l2],%f22
 390 
 391         fmuld   %f4,%f32,%f4
 392         lda     [%i1]%asi,%l0           ! preload next argument
 393 
 394         fmuld   %f14,%f36,%f14
 395         lda     [%i1]%asi,%f0
 396 
 397         fmuld   %f24,%f40,%f24
 398         lda     [%i1+4]%asi,%f1
 399 
 400         fmuld   %f6,%f34,%f6
 401         add     %i1,%i2,%i1             ! x += stridex
 402 
 403         fmuld   %f16,%f38,%f16
 404 
 405         fmuld   %f26,%f42,%f26
 406 
 407         faddd   %f6,%f4,%f6
 408 
 409         faddd   %f16,%f14,%f16
 410 
 411         faddd   %f26,%f24,%f26
 412 
 413         faddd   %f6,%f2,%f6
 414 
 415         faddd   %f16,%f12,%f16
 416 
 417         faddd   %f26,%f22,%f26
 418 
 419         faddd   %f6,%f32,%f6
 420 
 421         faddd   %f16,%f36,%f16
 422 
 423         faddd   %f26,%f40,%f26
 424         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
 425 
 426         fors    %f6,%f9,%f6
 427         addcc   %i0,-1,%i0
 428 
 429         fors    %f16,%f19,%f16
 430         bg,pt   %icc,.loop0
 431 
 432 ! delay slot
 433         fors    %f26,%f29,%f26
 434 
 435         ba,pt   %icc,.endloop0
 436 ! delay slot
 437         nop
 438 
 439         .align  32
 440 .case1:
 441         st      %f27,[%o5+4]
 442         sethi   %hi(0x3fc3c000),%o7
 443         add     %l3,8,%g1
 444         fand    %f8,%f44,%f2
 445 
 446         sub     %l0,%o7,%l0
 447         sub     %l1,%o7,%l1
 448         fand    %f18,%f44,%f12
 449         fmuld   %f20,%f20,%f22
 450 
 451         fsubd   %f0,%f2,%f0
 452         srl     %l0,10,%l0
 453         mov     %o0,%o3
 454 
 455         fsubd   %f10,%f12,%f10
 456         srl     %l1,10,%l1
 457         mov     %o1,%o4
 458 
 459         fmuld   %f22,%f52,%f24
 460         mov     %o2,%o5
 461 
 462         fmuld   %f0,%f0,%f2
 463         andn    %l0,0x1f,%l0
 464 
 465         fmuld   %f10,%f10,%f12
 466         andn    %l1,0x1f,%l1
 467 
 468         faddd   %f24,%f50,%f24
 469 
 470         fmuld   %f2,%f58,%f6
 471         ldd     [%l3+%l0],%f32
 472 
 473         fmuld   %f12,%f58,%f16
 474         ldd     [%l3+%l1],%f36
 475 
 476         fmuld   %f22,%f24,%f24
 477 
 478         faddd   %f6,%f56,%f6
 479         fmuld   %f2,%f62,%f4
 480         ldd     [%g1+%l0],%f34
 481 
 482         faddd   %f16,%f56,%f16
 483         fmuld   %f12,%f62,%f14
 484         ldd     [%g1+%l1],%f38
 485 
 486         faddd   %f24,%f48,%f24
 487 
 488         fmuld   %f2,%f6,%f6
 489         faddd   %f4,%f60,%f4
 490 
 491         fmuld   %f12,%f16,%f16
 492         faddd   %f14,%f60,%f14
 493 
 494         fmuld   %f22,%f24,%f24
 495 
 496         faddd   %f6,%f54,%f6
 497         fmuld   %f2,%f4,%f4
 498 
 499         faddd   %f16,%f54,%f16
 500         fmuld   %f12,%f14,%f14
 501 
 502         faddd   %f24,%f46,%f24
 503 
 504         fmuld   %f0,%f6,%f6
 505         ldd     [%l4+%l0],%f2
 506 
 507         fmuld   %f10,%f16,%f16
 508         ldd     [%l4+%l1],%f12
 509 
 510         fmuld   %f4,%f32,%f4
 511         lda     [%i1]%asi,%l0           ! preload next argument
 512 
 513         fmuld   %f14,%f36,%f14
 514         lda     [%i1]%asi,%f0
 515 
 516         fmuld   %f6,%f34,%f6
 517         lda     [%i1+4]%asi,%f1
 518 
 519         fmuld   %f16,%f38,%f16
 520         add     %i1,%i2,%i1             ! x += stridex
 521 
 522         fmuld   %f22,%f24,%f24
 523 
 524         faddd   %f6,%f4,%f6
 525 
 526         faddd   %f16,%f14,%f16
 527 
 528         fmuld   %f20,%f24,%f24
 529 
 530         faddd   %f6,%f2,%f6
 531 
 532         faddd   %f16,%f12,%f16
 533 
 534         faddd   %f20,%f24,%f26
 535 
 536         faddd   %f6,%f32,%f6
 537 
 538         faddd   %f16,%f36,%f16
 539         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
 540 
 541         fors    %f26,%f29,%f26
 542         addcc   %i0,-1,%i0
 543 
 544         fors    %f6,%f9,%f6
 545         bg,pt   %icc,.loop0
 546 
 547 ! delay slot
 548         fors    %f16,%f19,%f16
 549 
 550         ba,pt   %icc,.endloop0
 551 ! delay slot
 552         nop
 553 
 554         .align  32
 555 .case2:
 556         st      %f26,[%o5]
 557         cmp     %l2,%l5
 558         fpadd32s %f20,%f31,%f28
 559         bl,pn   %icc,.case3
 560 
 561 ! delay slot
 562         st      %f27,[%o5+4]
 563         sethi   %hi(0x3fc3c000),%o7
 564         add     %l3,8,%g1
 565         fand    %f8,%f44,%f2
 566 
 567         sub     %l0,%o7,%l0
 568         sub     %l2,%o7,%l2
 569         fand    %f28,%f44,%f22
 570         fmuld   %f10,%f10,%f12
 571 
 572         fsubd   %f0,%f2,%f0
 573         srl     %l0,10,%l0
 574         mov     %o0,%o3
 575 
 576         fsubd   %f20,%f22,%f20
 577         srl     %l2,10,%l2
 578         mov     %o2,%o5
 579 
 580         fmuld   %f12,%f52,%f14
 581         mov     %o1,%o4
 582 
 583         fmuld   %f0,%f0,%f2
 584         andn    %l0,0x1f,%l0
 585 
 586         fmuld   %f20,%f20,%f22
 587         andn    %l2,0x1f,%l2
 588 
 589         faddd   %f14,%f50,%f14
 590 
 591         fmuld   %f2,%f58,%f6
 592         ldd     [%l3+%l0],%f32
 593 
 594         fmuld   %f22,%f58,%f26
 595         ldd     [%l3+%l2],%f40
 596 
 597         fmuld   %f12,%f14,%f14
 598 
 599         faddd   %f6,%f56,%f6
 600         fmuld   %f2,%f62,%f4
 601         ldd     [%g1+%l0],%f34
 602 
 603         faddd   %f26,%f56,%f26
 604         fmuld   %f22,%f62,%f24
 605         ldd     [%g1+%l2],%f42
 606 
 607         faddd   %f14,%f48,%f14
 608 
 609         fmuld   %f2,%f6,%f6
 610         faddd   %f4,%f60,%f4
 611 
 612         fmuld   %f22,%f26,%f26
 613         faddd   %f24,%f60,%f24
 614 
 615         fmuld   %f12,%f14,%f14
 616 
 617         faddd   %f6,%f54,%f6
 618         fmuld   %f2,%f4,%f4
 619 
 620         faddd   %f26,%f54,%f26
 621         fmuld   %f22,%f24,%f24
 622 
 623         faddd   %f14,%f46,%f14
 624 
 625         fmuld   %f0,%f6,%f6
 626         ldd     [%l4+%l0],%f2
 627 
 628         fmuld   %f20,%f26,%f26
 629         ldd     [%l4+%l2],%f22
 630 
 631         fmuld   %f4,%f32,%f4
 632         lda     [%i1]%asi,%l0           ! preload next argument
 633 
 634         fmuld   %f24,%f40,%f24
 635         lda     [%i1]%asi,%f0
 636 
 637         fmuld   %f6,%f34,%f6
 638         lda     [%i1+4]%asi,%f1
 639 
 640         fmuld   %f26,%f42,%f26
 641         add     %i1,%i2,%i1             ! x += stridex
 642 
 643         fmuld   %f12,%f14,%f14
 644 
 645         faddd   %f6,%f4,%f6
 646 
 647         faddd   %f26,%f24,%f26
 648 
 649         fmuld   %f10,%f14,%f14
 650 
 651         faddd   %f6,%f2,%f6
 652 
 653         faddd   %f26,%f22,%f26
 654 
 655         faddd   %f10,%f14,%f16
 656 
 657         faddd   %f6,%f32,%f6
 658 
 659         faddd   %f26,%f40,%f26
 660         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
 661 
 662         fors    %f16,%f19,%f16
 663         addcc   %i0,-1,%i0
 664 
 665         fors    %f6,%f9,%f6
 666         bg,pt   %icc,.loop0
 667 
 668 ! delay slot
 669         fors    %f26,%f29,%f26
 670 
 671         ba,pt   %icc,.endloop0
 672 ! delay slot
 673         nop
 674 
 675         .align  32
 676 .case3:
 677         sethi   %hi(0x3fc3c000),%o7
 678         add     %l3,8,%g1
 679         fand    %f8,%f44,%f2
 680         fmuld   %f10,%f10,%f12
 681 
 682         sub     %l0,%o7,%l0
 683         fmuld   %f20,%f20,%f22
 684 
 685         fsubd   %f0,%f2,%f0
 686         srl     %l0,10,%l0
 687         mov     %o0,%o3
 688 
 689         fmuld   %f12,%f52,%f14
 690         mov     %o1,%o4
 691 
 692         fmuld   %f22,%f52,%f24
 693         mov     %o2,%o5
 694 
 695         fmuld   %f0,%f0,%f2
 696         andn    %l0,0x1f,%l0
 697 
 698         faddd   %f14,%f50,%f14
 699 
 700         faddd   %f24,%f50,%f24
 701 
 702         fmuld   %f2,%f58,%f6
 703         ldd     [%l3+%l0],%f32
 704 
 705         fmuld   %f12,%f14,%f14
 706 
 707         fmuld   %f22,%f24,%f24
 708 
 709         faddd   %f6,%f56,%f6
 710         fmuld   %f2,%f62,%f4
 711         ldd     [%g1+%l0],%f34
 712 
 713         faddd   %f14,%f48,%f14
 714 
 715         faddd   %f24,%f48,%f24
 716 
 717         fmuld   %f2,%f6,%f6
 718         faddd   %f4,%f60,%f4
 719 
 720         fmuld   %f12,%f14,%f14
 721 
 722         fmuld   %f22,%f24,%f24
 723 
 724         faddd   %f6,%f54,%f6
 725         fmuld   %f2,%f4,%f4
 726 
 727         faddd   %f14,%f46,%f14
 728 
 729         faddd   %f24,%f46,%f24
 730 
 731         fmuld   %f0,%f6,%f6
 732         ldd     [%l4+%l0],%f2
 733 
 734         fmuld   %f4,%f32,%f4
 735         lda     [%i1]%asi,%l0           ! preload next argument
 736 
 737         fmuld   %f12,%f14,%f14
 738         lda     [%i1]%asi,%f0
 739 
 740         fmuld   %f6,%f34,%f6
 741         lda     [%i1+4]%asi,%f1
 742 
 743         fmuld   %f22,%f24,%f24
 744         add     %i1,%i2,%i1             ! x += stridex
 745 
 746         fmuld   %f10,%f14,%f14
 747 
 748         faddd   %f6,%f4,%f6
 749 
 750         fmuld   %f20,%f24,%f24
 751 
 752         faddd   %f10,%f14,%f16
 753 
 754         faddd   %f6,%f2,%f6
 755 
 756         faddd   %f20,%f24,%f26
 757 
 758         fors    %f16,%f19,%f16
 759         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
 760 
 761         faddd   %f6,%f32,%f6
 762         addcc   %i0,-1,%i0
 763 
 764         fors    %f26,%f29,%f26
 765         bg,pt   %icc,.loop0
 766 
 767 ! delay slot
 768         fors    %f6,%f9,%f6
 769 
 770         ba,pt   %icc,.endloop0
 771 ! delay slot
 772         nop
 773 
 774         .align  32
 775 .case4:
 776         st      %f17,[%o4+4]
 777         cmp     %l1,%l5
 778         fpadd32s %f10,%f31,%f18
 779         bl,pn   %icc,.case6
 780 
 781 ! delay slot
 782         st      %f26,[%o5]
 783         cmp     %l2,%l5
 784         fpadd32s %f20,%f31,%f28
 785         bl,pn   %icc,.case5
 786 
 787 ! delay slot
 788         st      %f27,[%o5+4]
 789         sethi   %hi(0x3fc3c000),%o7
 790         add     %l3,8,%g1
 791         fand    %f18,%f44,%f12
 792 
 793         sub     %l1,%o7,%l1
 794         sub     %l2,%o7,%l2
 795         fand    %f28,%f44,%f22
 796         fmuld   %f0,%f0,%f2
 797 
 798         fsubd   %f10,%f12,%f10
 799         srl     %l1,10,%l1
 800         mov     %o1,%o4
 801 
 802         fsubd   %f20,%f22,%f20
 803         srl     %l2,10,%l2
 804         mov     %o2,%o5
 805 
 806         fmovd   %f0,%f6
 807         fmuld   %f2,%f52,%f4
 808         mov     %o0,%o3
 809 
 810         fmuld   %f10,%f10,%f12
 811         andn    %l1,0x1f,%l1
 812 
 813         fmuld   %f20,%f20,%f22
 814         andn    %l2,0x1f,%l2
 815 
 816         faddd   %f4,%f50,%f4
 817 
 818         fmuld   %f12,%f58,%f16
 819         ldd     [%l3+%l1],%f36
 820 
 821         fmuld   %f22,%f58,%f26
 822         ldd     [%l3+%l2],%f40
 823 
 824         fmuld   %f2,%f4,%f4
 825 
 826         faddd   %f16,%f56,%f16
 827         fmuld   %f12,%f62,%f14
 828         ldd     [%g1+%l1],%f38
 829 
 830         faddd   %f26,%f56,%f26
 831         fmuld   %f22,%f62,%f24
 832         ldd     [%g1+%l2],%f42
 833 
 834         faddd   %f4,%f48,%f4
 835 
 836         fmuld   %f12,%f16,%f16
 837         faddd   %f14,%f60,%f14
 838 
 839         fmuld   %f22,%f26,%f26
 840         faddd   %f24,%f60,%f24
 841 
 842         fmuld   %f2,%f4,%f4
 843 
 844         faddd   %f16,%f54,%f16
 845         fmuld   %f12,%f14,%f14
 846 
 847         faddd   %f26,%f54,%f26
 848         fmuld   %f22,%f24,%f24
 849 
 850         faddd   %f4,%f46,%f4
 851 
 852         fmuld   %f10,%f16,%f16
 853         ldd     [%l4+%l1],%f12
 854 
 855         fmuld   %f20,%f26,%f26
 856         ldd     [%l4+%l2],%f22
 857 
 858         fmuld   %f14,%f36,%f14
 859         lda     [%i1]%asi,%l0           ! preload next argument
 860 
 861         fmuld   %f24,%f40,%f24
 862         lda     [%i1]%asi,%f0
 863 
 864         fmuld   %f16,%f38,%f16
 865         lda     [%i1+4]%asi,%f1
 866 
 867         fmuld   %f26,%f42,%f26
 868         add     %i1,%i2,%i1             ! x += stridex
 869 
 870         fmuld   %f2,%f4,%f4
 871 
 872         faddd   %f16,%f14,%f16
 873 
 874         faddd   %f26,%f24,%f26
 875 
 876         fmuld   %f6,%f4,%f4
 877 
 878         faddd   %f16,%f12,%f16
 879 
 880         faddd   %f26,%f22,%f26
 881 
 882         faddd   %f6,%f4,%f6
 883 
 884         faddd   %f16,%f36,%f16
 885 
 886         faddd   %f26,%f40,%f26
 887         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
 888 
 889         fors    %f6,%f9,%f6
 890         addcc   %i0,-1,%i0
 891 
 892         fors    %f16,%f19,%f16
 893         bg,pt   %icc,.loop0
 894 
 895 ! delay slot
 896         fors    %f26,%f29,%f26
 897 
 898         ba,pt   %icc,.endloop0
 899 ! delay slot
 900         nop
 901 
 902         .align  32
 903 .case5:
 904         sethi   %hi(0x3fc3c000),%o7
 905         add     %l3,8,%g1
 906         fand    %f18,%f44,%f12
 907         fmuld   %f0,%f0,%f2
 908 
 909         sub     %l1,%o7,%l1
 910         fmuld   %f20,%f20,%f22
 911 
 912         fsubd   %f10,%f12,%f10
 913         srl     %l1,10,%l1
 914         mov     %o1,%o4
 915 
 916         fmovd   %f0,%f6
 917         fmuld   %f2,%f52,%f4
 918         mov     %o0,%o3
 919 
 920         fmuld   %f22,%f52,%f24
 921         mov     %o2,%o5
 922 
 923         fmuld   %f10,%f10,%f12
 924         andn    %l1,0x1f,%l1
 925 
 926         faddd   %f4,%f50,%f4
 927 
 928         faddd   %f24,%f50,%f24
 929 
 930         fmuld   %f12,%f58,%f16
 931         ldd     [%l3+%l1],%f36
 932 
 933         fmuld   %f2,%f4,%f4
 934 
 935         fmuld   %f22,%f24,%f24
 936 
 937         faddd   %f16,%f56,%f16
 938         fmuld   %f12,%f62,%f14
 939         ldd     [%g1+%l1],%f38
 940 
 941         faddd   %f4,%f48,%f4
 942 
 943         faddd   %f24,%f48,%f24
 944 
 945         fmuld   %f12,%f16,%f16
 946         faddd   %f14,%f60,%f14
 947 
 948         fmuld   %f2,%f4,%f4
 949 
 950         fmuld   %f22,%f24,%f24
 951 
 952         faddd   %f16,%f54,%f16
 953         fmuld   %f12,%f14,%f14
 954 
 955         faddd   %f4,%f46,%f4
 956 
 957         faddd   %f24,%f46,%f24
 958 
 959         fmuld   %f10,%f16,%f16
 960         ldd     [%l4+%l1],%f12
 961 
 962         fmuld   %f14,%f36,%f14
 963         lda     [%i1]%asi,%l0           ! preload next argument
 964 
 965         fmuld   %f2,%f4,%f4
 966         lda     [%i1]%asi,%f0
 967 
 968         fmuld   %f16,%f38,%f16
 969         lda     [%i1+4]%asi,%f1
 970 
 971         fmuld   %f22,%f24,%f24
 972         add     %i1,%i2,%i1             ! x += stridex
 973 
 974         fmuld   %f6,%f4,%f4
 975 
 976         faddd   %f16,%f14,%f16
 977 
 978         fmuld   %f20,%f24,%f24
 979 
 980         faddd   %f6,%f4,%f6
 981 
 982         faddd   %f16,%f12,%f16
 983 
 984         faddd   %f20,%f24,%f26
 985 
 986         fors    %f6,%f9,%f6
 987         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
 988 
 989         faddd   %f16,%f36,%f16
 990         addcc   %i0,-1,%i0
 991 
 992         fors    %f26,%f29,%f26
 993         bg,pt   %icc,.loop0
 994 
 995 ! delay slot
 996         fors    %f16,%f19,%f16
 997 
 998         ba,pt   %icc,.endloop0
 999 ! delay slot
1000         nop
1001 
1002         .align  32
1003 .case6:
1004         st      %f27,[%o5+4]
1005         cmp     %l2,%l5
1006         fpadd32s %f20,%f31,%f28
1007         bl,pn   %icc,.case7
1008 
1009 ! delay slot
1010         sethi   %hi(0x3fc3c000),%o7
1011         add     %l3,8,%g1
1012         fand    %f28,%f44,%f22
1013         fmuld   %f0,%f0,%f2
1014 
1015         sub     %l2,%o7,%l2
1016         fmuld   %f10,%f10,%f12
1017 
1018         fsubd   %f20,%f22,%f20
1019         srl     %l2,10,%l2
1020         mov     %o2,%o5
1021 
1022         fmovd   %f0,%f6
1023         fmuld   %f2,%f52,%f4
1024         mov     %o0,%o3
1025 
1026         fmuld   %f12,%f52,%f14
1027         mov     %o1,%o4
1028 
1029         fmuld   %f20,%f20,%f22
1030         andn    %l2,0x1f,%l2
1031 
1032         faddd   %f4,%f50,%f4
1033 
1034         faddd   %f14,%f50,%f14
1035 
1036         fmuld   %f22,%f58,%f26
1037         ldd     [%l3+%l2],%f40
1038 
1039         fmuld   %f2,%f4,%f4
1040 
1041         fmuld   %f12,%f14,%f14
1042 
1043         faddd   %f26,%f56,%f26
1044         fmuld   %f22,%f62,%f24
1045         ldd     [%g1+%l2],%f42
1046 
1047         faddd   %f4,%f48,%f4
1048 
1049         faddd   %f14,%f48,%f14
1050 
1051         fmuld   %f22,%f26,%f26
1052         faddd   %f24,%f60,%f24
1053 
1054         fmuld   %f2,%f4,%f4
1055 
1056         fmuld   %f12,%f14,%f14
1057 
1058         faddd   %f26,%f54,%f26
1059         fmuld   %f22,%f24,%f24
1060 
1061         faddd   %f4,%f46,%f4
1062 
1063         faddd   %f14,%f46,%f14
1064 
1065         fmuld   %f20,%f26,%f26
1066         ldd     [%l4+%l2],%f22
1067 
1068         fmuld   %f24,%f40,%f24
1069         lda     [%i1]%asi,%l0           ! preload next argument
1070 
1071         fmuld   %f2,%f4,%f4
1072         lda     [%i1]%asi,%f0
1073 
1074         fmuld   %f26,%f42,%f26
1075         lda     [%i1+4]%asi,%f1
1076 
1077         fmuld   %f12,%f14,%f14
1078         add     %i1,%i2,%i1             ! x += stridex
1079 
1080         fmuld   %f6,%f4,%f4
1081 
1082         faddd   %f26,%f24,%f26
1083 
1084         fmuld   %f10,%f14,%f14
1085 
1086         faddd   %f6,%f4,%f6
1087 
1088         faddd   %f26,%f22,%f26
1089 
1090         faddd   %f10,%f14,%f16
1091 
1092         fors    %f6,%f9,%f6
1093         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
1094 
1095         faddd   %f26,%f40,%f26
1096         addcc   %i0,-1,%i0
1097 
1098         fors    %f16,%f19,%f16
1099         bg,pt   %icc,.loop0
1100 
1101 ! delay slot
1102         fors    %f26,%f29,%f26
1103 
1104         ba,pt   %icc,.endloop0
1105 ! delay slot
1106         nop
1107 
1108         .align  32
1109 .case7:
1110         fmuld   %f0,%f0,%f2
1111         fmovd   %f0,%f6
1112         mov     %o0,%o3
1113 
1114         fmuld   %f10,%f10,%f12
1115         mov     %o1,%o4
1116 
1117         fmuld   %f20,%f20,%f22
1118         mov     %o2,%o5
1119 
1120         fmuld   %f2,%f52,%f4
1121         lda     [%i1]%asi,%l0           ! preload next argument
1122 
1123         fmuld   %f12,%f52,%f14
1124         lda     [%i1]%asi,%f0
1125 
1126         fmuld   %f22,%f52,%f24
1127         lda     [%i1+4]%asi,%f1
1128 
1129         faddd   %f4,%f50,%f4
1130         add     %i1,%i2,%i1             ! x += stridex
1131 
1132         faddd   %f14,%f50,%f14
1133 
1134         faddd   %f24,%f50,%f24
1135 
1136         fmuld   %f2,%f4,%f4
1137 
1138         fmuld   %f12,%f14,%f14
1139 
1140         fmuld   %f22,%f24,%f24
1141 
1142         faddd   %f4,%f48,%f4
1143 
1144         faddd   %f14,%f48,%f14
1145 
1146         faddd   %f24,%f48,%f24
1147 
1148         fmuld   %f2,%f4,%f4
1149 
1150         fmuld   %f12,%f14,%f14
1151 
1152         fmuld   %f22,%f24,%f24
1153 
1154         faddd   %f4,%f46,%f4
1155 
1156         faddd   %f14,%f46,%f14
1157 
1158         faddd   %f24,%f46,%f24
1159 
1160         fmuld   %f2,%f4,%f4
1161 
1162         fmuld   %f12,%f14,%f14
1163 
1164         fmuld   %f22,%f24,%f24
1165 
1166         fmuld   %f6,%f4,%f4
1167 
1168         fmuld   %f10,%f14,%f14
1169 
1170         fmuld   %f20,%f24,%f24
1171 
1172         faddd   %f6,%f4,%f6
1173 
1174         faddd   %f10,%f14,%f16
1175 
1176         faddd   %f20,%f24,%f26
1177         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
1178 
1179         fors    %f6,%f9,%f6
1180         addcc   %i0,-1,%i0
1181 
1182         fors    %f16,%f19,%f16
1183         bg,pt   %icc,.loop0
1184 
1185 ! delay slot
1186         fors    %f26,%f29,%f26
1187 
1188         ba,pt   %icc,.endloop0
1189 ! delay slot
1190         nop
1191 
1192 
1193         .align  32
1194 .endloop2:
1195         cmp     %l1,%l5
1196         bl,pn   %icc,1f
1197 ! delay slot
1198         fabsd   %f10,%f10
1199         sethi   %hi(0x3fc3c000),%o7
1200         fpadd32s %f10,%f31,%f18
1201         add     %l3,8,%g1
1202         fand    %f18,%f44,%f12
1203         sub     %l1,%o7,%l1
1204         fsubd   %f10,%f12,%f10
1205         srl     %l1,10,%l1
1206         fmuld   %f10,%f10,%f12
1207         andn    %l1,0x1f,%l1
1208         fmuld   %f12,%f58,%f20
1209         ldd     [%l3+%l1],%f36
1210         faddd   %f20,%f56,%f20
1211         fmuld   %f12,%f62,%f14
1212         ldd     [%g1+%l1],%f38
1213         fmuld   %f12,%f20,%f20
1214         faddd   %f14,%f60,%f14
1215         faddd   %f20,%f54,%f20
1216         fmuld   %f12,%f14,%f14
1217         fmuld   %f10,%f20,%f20
1218         ldd     [%l4+%l1],%f12
1219         fmuld   %f14,%f36,%f14
1220         fmuld   %f20,%f38,%f20
1221         faddd   %f20,%f14,%f20
1222         faddd   %f20,%f12,%f20
1223         ba,pt   %icc,2f
1224 ! delay slot
1225         faddd   %f20,%f36,%f20
1226 1:
1227         fmuld   %f10,%f10,%f12
1228         fmuld   %f12,%f52,%f14
1229         faddd   %f14,%f50,%f14
1230         fmuld   %f12,%f14,%f14
1231         faddd   %f14,%f48,%f14
1232         fmuld   %f12,%f14,%f14
1233         faddd   %f14,%f46,%f14
1234         fmuld   %f12,%f14,%f14
1235         fmuld   %f10,%f14,%f14
1236         faddd   %f10,%f14,%f20
1237 2:
1238         fors    %f20,%f19,%f20
1239         st      %f20,[%o1]
1240         st      %f21,[%o1+4]
1241 
1242 .endloop1:
1243         cmp     %l0,%l5
1244         bl,pn   %icc,1f
1245 ! delay slot
1246         fabsd   %f0,%f0
1247         sethi   %hi(0x3fc3c000),%o7
1248         fpadd32s %f0,%f31,%f8
1249         add     %l3,8,%g1
1250         fand    %f8,%f44,%f2
1251         sub     %l0,%o7,%l0
1252         fsubd   %f0,%f2,%f0
1253         srl     %l0,10,%l0
1254         fmuld   %f0,%f0,%f2
1255         andn    %l0,0x1f,%l0
1256         fmuld   %f2,%f58,%f20
1257         ldd     [%l3+%l0],%f32
1258         faddd   %f20,%f56,%f20
1259         fmuld   %f2,%f62,%f4
1260         ldd     [%g1+%l0],%f34
1261         fmuld   %f2,%f20,%f20
1262         faddd   %f4,%f60,%f4
1263         faddd   %f20,%f54,%f20
1264         fmuld   %f2,%f4,%f4
1265         fmuld   %f0,%f20,%f20
1266         ldd     [%l4+%l0],%f2
1267         fmuld   %f4,%f32,%f4
1268         fmuld   %f20,%f34,%f20
1269         faddd   %f20,%f4,%f20
1270         faddd   %f20,%f2,%f20
1271         ba,pt   %icc,2f
1272 ! delay slot
1273         faddd   %f20,%f32,%f20
1274 1:
1275         fmuld   %f0,%f0,%f2
1276         fmuld   %f2,%f52,%f4
1277         faddd   %f4,%f50,%f4
1278         fmuld   %f2,%f4,%f4
1279         faddd   %f4,%f48,%f4
1280         fmuld   %f2,%f4,%f4
1281         faddd   %f4,%f46,%f4
1282         fmuld   %f2,%f4,%f4
1283         fmuld   %f0,%f4,%f4
1284         faddd   %f0,%f4,%f20
1285 2:
1286         fors    %f20,%f9,%f20
1287         st      %f20,[%o0]
1288         st      %f21,[%o0+4]
1289 
1290 .endloop0:
1291         st      %f6,[%o3]
1292         st      %f7,[%o3+4]
1293         st      %f16,[%o4]
1294         st      %f17,[%o4+4]
1295         st      %f26,[%o5]
1296         st      %f27,[%o5+4]
1297 
1298 ! return.  finished off with only primary range arguments.
1299 
1300         ret
1301         restore
1302 
1303 
1304         .align  32
1305 .range0:
1306         cmp     %l0,LIM_l6
1307         bg,a,pt %icc,.MEDIUM            ! branch if x is not tiny
1308 ! delay slot, annulled if branch not taken
1309         mov     0x1,LIM_l6              ! set "processing loop0"
1310         st      %f0,[%o0]               ! *y = *x with inexact if x nonzero
1311         st      %f1,[%o0+4]
1312         fdtoi   %f0,%f2
1313         addcc   %i0,-1,%i0
1314         ble,pn  %icc,.endloop0
1315 ! delay slot, harmless if branch taken
1316         add     %i3,%i4,%i3             ! y += stridey
1317         andn    %l1,%i5,%l0             ! hx &= ~0x80000000
1318         fmovd   %f10,%f0
1319         ba,pt   %icc,.loop0
1320 ! delay slot
1321         add     %i1,%i2,%i1             ! x += stridex
1322 
1323 
1324         .align  32
1325 .range1:
1326         cmp     %l1,LIM_l6
1327         bg,a,pt %icc,.MEDIUM            ! branch if x is not tiny
1328 ! delay slot, annulled if branch not taken
1329         mov     0x2,LIM_l6              ! set "processing loop1"
1330         st      %f10,[%o1]              ! *y = *x with inexact if x nonzero
1331         st      %f11,[%o1+4]
1332         fdtoi   %f10,%f12
1333         addcc   %i0,-1,%i0
1334         ble,pn  %icc,.endloop1
1335 ! delay slot, harmless if branch taken
1336         add     %i3,%i4,%i3             ! y += stridey
1337         andn    %l2,%i5,%l1             ! hx &= ~0x80000000
1338         fmovd   %f20,%f10
1339         ba,pt   %icc,.loop1
1340 ! delay slot
1341         add     %i1,%i2,%i1             ! x += stridex
1342 
1343 
1344         .align  32
1345 .range2:
1346         cmp     %l2,LIM_l6
1347         bg,a,pt %icc,.MEDIUM            ! branch if x is not tiny
1348 ! delay slot, annulled if branch not taken
1349         mov     0x3,LIM_l6              ! set "processing loop2"
1350         st      %f20,[%o2]              ! *y = *x with inexact if x nonzero
1351         st      %f21,[%o2+4]
1352         fdtoi   %f20,%f22
1353 1:
1354         addcc   %i0,-1,%i0
1355         ble,pn  %icc,.endloop2
1356 ! delay slot
1357         nop
1358         ld      [%i1],%l2
1359         ld      [%i1],%f20
1360         ld      [%i1+4],%f21
1361         andn    %l2,%i5,%l2             ! hx &= ~0x80000000
1362         ba,pt   %icc,.loop2
1363 ! delay slot
1364         add     %i1,%i2,%i1             ! x += stridex
1365 
1366 
1367         .align  32
1368 .MEDIUM:
1369 
1370 ! ========== medium range ==========
1371 
1372 ! register use
1373 
1374 ! i0  n
1375 ! i1  x
1376 ! i2  stridex
1377 ! i3  y
1378 ! i4  stridey
1379 ! i5  0x80000000
1380 
1381 ! l0  hx0
1382 ! l1  hx1
1383 ! l2  hx2
1384 ! l3  __vlibm_TBL_sincos_hi
1385 ! l4  __vlibm_TBL_sincos_lo
1386 ! l5  constants
1387 ! l6  in transition from pri-range and here, use for biguns
1388 ! l7  0x413921fb
1389 
1390 ! the following are 64-bit registers in both V8+ and V9
1391 
1392 ! g1  scratch
1393 ! g5  
1394 
1395 ! o0  py0
1396 ! o1  py1
1397 ! o2  py2
1398 ! o3  n0
1399 ! o4  n1
1400 ! o5  n2
1401 ! o7  scratch
1402 
1403 ! f0  x0
1404 ! f2  n0,y0
1405 ! f4  
1406 ! f6  
1407 ! f8  scratch for table base
1408 ! f9  signbit0
1409 ! f10 x1
1410 ! f12 n1,y1
1411 ! f14 
1412 ! f16 
1413 ! f18 scratch for table base
1414 ! f19 signbit1
1415 ! f20 x2
1416 ! f22 n2,y2
1417 ! f24 
1418 ! f26 
1419 ! f28 scratch for table base
1420 ! f29 signbit2
1421 ! f30 0x80000000
1422 ! f31 0x4000
1423 ! f32 
1424 ! f34 
1425 ! f36 
1426 ! f38 
1427 ! f40 invpio2
1428 ! f42 round
1429 ! f44 0xffff800000000000
1430 ! f46 pio2_1
1431 ! f48 pio2_2
1432 ! f50 pio2_3
1433 ! f52 pio2_3t
1434 ! f54 one
1435 ! f56 pp1
1436 ! f58 pp2
1437 ! f60 qq1
1438 ! f62 qq2
1439 
1440         PIC_SET(g5,constants,l5)
1441 
1442         ! %o3,%o4,%o5 need to be stored
1443         st      %f6,[%o3]
1444         sethi   %hi(0x413921fb),%l7
1445         st      %f7,[%o3+4]
1446         or      %l7,%lo(0x413921fb),%l7
1447         st      %f16,[%o4]
1448         st      %f17,[%o4+4]
1449         st      %f26,[%o5]
1450         st      %f27,[%o5+4]
1451         ldd     [%l5+invpio2],%f40
1452         ldd     [%l5+round],%f42
1453         ldd     [%l5+pio2_1],%f46
1454         ldd     [%l5+pio2_2],%f48
1455         ldd     [%l5+pio2_3],%f50
1456         ldd     [%l5+pio2_3t],%f52
1457         std     %f54,[%fp+x0_1+8]       ! set up stack data
1458         std     %f54,[%fp+x1_1+8]
1459         std     %f54,[%fp+x2_1+8]
1460         stx     %g0,[%fp+y0_0+8]
1461         stx     %g0,[%fp+y1_0+8]
1462         stx     %g0,[%fp+y2_0+8]
1463 
1464 !       branched here in the middle of the array.  Need to adjust 
1465 !       for the members of the triple that were selected in the primary
1466 !       loop.
1467 
1468 !       no adjustment since all three selected here
1469         subcc   LIM_l6,0x1,%g0          ! continue in LOOP0?
1470         bz,a    %icc,.LOOP0
1471         mov     0x0,LIM_l6              ! delay slot set biguns=0
1472 
1473 !       ajust 1st triple since 2d and 3d done here
1474         subcc   LIM_l6,0x2,%g0          ! continue in LOOP1?
1475         fors    %f0,%f9,%f0             ! restore sign bit
1476         fmuld   %f0,%f40,%f2            ! adj LOOP0
1477         bz,a    %icc,.LOOP1
1478         mov     0x0,LIM_l6              ! delay slot set biguns=0
1479 
1480 !       ajust 1st and 2d triple since 3d done here
1481         subcc   LIM_l6,0x3,%g0          ! continue in LOOP2?
1482         !done fmuld     %f0,%f40,%f2            ! adj LOOP0
1483         sub     %i3,%i4,%i3             ! adjust to not double increment
1484         fors    %f10,%f19,%f10          ! restore sign bit
1485         fmuld   %f10,%f40,%f12          ! adj LOOP1
1486         faddd   %f2,%f42,%f2            ! adj LOOP1
1487         bz,a    %icc,.LOOP2
1488         mov     0x0,LIM_l6              ! delay slot set biguns=0
1489 
1490         .align 32
1491 .LOOP0:
1492         lda     [%i1]%asi,%l1           ! preload next argument
1493         mov     %i3,%o0                 ! py0 = y
1494         lda     [%i1]%asi,%f10
1495         cmp     %l0,%l7
1496         add     %i3,%i4,%i3             ! y += stridey
1497         bg,pn   %icc,.BIG0              ! if hx > 0x413921fb
1498 
1499 ! delay slot
1500         lda     [%i1+4]%asi,%f11
1501         addcc   %i0,-1,%i0
1502         add     %i1,%i2,%i1             ! x += stridex
1503         ble,pn  %icc,.ENDLOOP1
1504 
1505 ! delay slot
1506         andn    %l1,%i5,%l1
1507         nop
1508         fmuld   %f0,%f40,%f2
1509         fabsd   %f54,%f54               ! a nop for alignment only
1510 
1511 .LOOP1:
1512         lda     [%i1]%asi,%l2           ! preload next argument
1513         mov     %i3,%o1                 ! py1 = y
1514 
1515         lda     [%i1]%asi,%f20
1516         cmp     %l1,%l7
1517         add     %i3,%i4,%i3             ! y += stridey
1518         bg,pn   %icc,.BIG1              ! if hx > 0x413921fb
1519 
1520 ! delay slot
1521         lda     [%i1+4]%asi,%f21
1522         addcc   %i0,-1,%i0
1523         add     %i1,%i2,%i1             ! x += stridex
1524         ble,pn  %icc,.ENDLOOP2
1525 
1526 ! delay slot
1527         andn    %l2,%i5,%l2
1528         nop
1529         fmuld   %f10,%f40,%f12
1530         faddd   %f2,%f42,%f2
1531 
1532 .LOOP2:
1533         st      %f3,[%fp+n0]
1534         mov     %i3,%o2                 ! py2 = y
1535 
1536         cmp     %l2,%l7
1537         add     %i3,%i4,%i3             ! y += stridey
1538         fmuld   %f20,%f40,%f22
1539         bg,pn   %icc,.BIG2              ! if hx > 0x413921fb
1540 
1541 ! delay slot
1542         add     %l5,thresh+4,%o7
1543         faddd   %f12,%f42,%f12
1544         st      %f13,[%fp+n1]
1545 
1546 ! -
1547 
1548         add     %l5,thresh,%g1
1549         faddd   %f22,%f42,%f22
1550         st      %f23,[%fp+n2]
1551 
1552         fsubd   %f2,%f42,%f2            ! n
1553 
1554         fsubd   %f12,%f42,%f12          ! n
1555 
1556         fsubd   %f22,%f42,%f22          ! n
1557 
1558         fmuld   %f2,%f46,%f4
1559 
1560         fmuld   %f12,%f46,%f14
1561 
1562         fmuld   %f22,%f46,%f24
1563 
1564         fsubd   %f0,%f4,%f4
1565         fmuld   %f2,%f48,%f6
1566 
1567         fsubd   %f10,%f14,%f14
1568         fmuld   %f12,%f48,%f16
1569 
1570         fsubd   %f20,%f24,%f24
1571         fmuld   %f22,%f48,%f26
1572 
1573         fsubd   %f4,%f6,%f0
1574         ld      [%fp+n0],%o3
1575 
1576         fsubd   %f14,%f16,%f10
1577         ld      [%fp+n1],%o4
1578 
1579         fsubd   %f24,%f26,%f20
1580         ld      [%fp+n2],%o5
1581 
1582         fsubd   %f4,%f0,%f32
1583         and     %o3,1,%o3
1584 
1585         fsubd   %f14,%f10,%f34
1586         and     %o4,1,%o4
1587 
1588         fsubd   %f24,%f20,%f36
1589         and     %o5,1,%o5
1590 
1591         fsubd   %f32,%f6,%f32
1592         fmuld   %f2,%f50,%f8
1593         sll     %o3,3,%o3
1594 
1595         fsubd   %f34,%f16,%f34
1596         fmuld   %f12,%f50,%f18
1597         sll     %o4,3,%o4
1598 
1599         fsubd   %f36,%f26,%f36
1600         fmuld   %f22,%f50,%f28
1601         sll     %o5,3,%o5
1602 
1603         fsubd   %f8,%f32,%f8
1604         ld      [%g1+%o3],%f6
1605 
1606         fsubd   %f18,%f34,%f18
1607         ld      [%g1+%o4],%f16
1608 
1609         fsubd   %f28,%f36,%f28
1610         ld      [%g1+%o5],%f26
1611 
1612         fsubd   %f0,%f8,%f4
1613 
1614         fsubd   %f10,%f18,%f14
1615 
1616         fsubd   %f20,%f28,%f24
1617 
1618         fsubd   %f0,%f4,%f32
1619 
1620         fsubd   %f10,%f14,%f34
1621 
1622         fsubd   %f20,%f24,%f36
1623 
1624         fsubd   %f32,%f8,%f32
1625         fmuld   %f2,%f52,%f2
1626 
1627         fsubd   %f34,%f18,%f34
1628         fmuld   %f12,%f52,%f12
1629 
1630         fsubd   %f36,%f28,%f36
1631         fmuld   %f22,%f52,%f22
1632 
1633         fsubd   %f2,%f32,%f2
1634         ld      [%o7+%o3],%f8
1635 
1636         fsubd   %f12,%f34,%f12
1637         ld      [%o7+%o4],%f18
1638 
1639         fsubd   %f22,%f36,%f22
1640         ld      [%o7+%o5],%f28
1641 
1642         fsubd   %f4,%f2,%f0             ! x
1643 
1644         fsubd   %f14,%f12,%f10          ! x
1645 
1646         fsubd   %f24,%f22,%f20          ! x
1647 
1648         fsubd   %f4,%f0,%f4
1649 
1650         fsubd   %f14,%f10,%f14
1651 
1652         fsubd   %f24,%f20,%f24
1653 
1654         fands   %f0,%f30,%f9            ! save signbit
1655 
1656         fands   %f10,%f30,%f19          ! save signbit
1657 
1658         fands   %f20,%f30,%f29          ! save signbit
1659 
1660         fabsd   %f0,%f0
1661         std     %f0,[%fp+x0_1]
1662 
1663         fabsd   %f10,%f10
1664         std     %f10,[%fp+x1_1]
1665 
1666         fabsd   %f20,%f20
1667         std     %f20,[%fp+x2_1]
1668 
1669         fsubd   %f4,%f2,%f2             ! y
1670 
1671         fsubd   %f14,%f12,%f12          ! y
1672 
1673         fsubd   %f24,%f22,%f22          ! y
1674 
1675         fcmpgt32 %f6,%f0,%l0
1676 
1677         fcmpgt32 %f16,%f10,%l1
1678 
1679         fcmpgt32 %f26,%f20,%l2
1680 
1681 ! -- 16 byte aligned
1682         fxors   %f2,%f9,%f2
1683 
1684         fxors   %f12,%f19,%f12
1685 
1686         fxors   %f22,%f29,%f22
1687 
1688         fands   %f9,%f8,%f9             ! if (n & 1) clear sign bit
1689         andcc   %l0,2,%g0
1690         bne,pn  %icc,.CASE4
1691 
1692 ! delay slot
1693         fands   %f19,%f18,%f19          ! if (n & 1) clear sign bit
1694         andcc   %l1,2,%g0
1695         bne,pn  %icc,.CASE2
1696 
1697 ! delay slot
1698         fands   %f29,%f28,%f29          ! if (n & 1) clear sign bit
1699         andcc   %l2,2,%g0
1700         bne,pn  %icc,.CASE1
1701 
1702 ! delay slot
1703         fpadd32s %f0,%f31,%f8
1704         sethi   %hi(0x3fc3c000),%o7
1705         ld      [%fp+x0_1],%l0
1706 
1707         fpadd32s %f10,%f31,%f18
1708         add     %l3,8,%g1
1709         ld      [%fp+x1_1],%l1
1710 
1711         fpadd32s %f20,%f31,%f28
1712         ld      [%fp+x2_1],%l2
1713 
1714         fand    %f8,%f44,%f4
1715         sub     %l0,%o7,%l0
1716 
1717         fand    %f18,%f44,%f14
1718         sub     %l1,%o7,%l1
1719 
1720         fand    %f28,%f44,%f24
1721         sub     %l2,%o7,%l2
1722 
1723         fsubd   %f0,%f4,%f0
1724         srl     %l0,10,%l0
1725 
1726         fsubd   %f10,%f14,%f10
1727         srl     %l1,10,%l1
1728 
1729         fsubd   %f20,%f24,%f20
1730         srl     %l2,10,%l2
1731 
1732         faddd   %f0,%f2,%f0
1733         andn    %l0,0x1f,%l0
1734 
1735         faddd   %f10,%f12,%f10
1736         andn    %l1,0x1f,%l1
1737 
1738         faddd   %f20,%f22,%f20
1739         andn    %l2,0x1f,%l2
1740 
1741         fmuld   %f0,%f0,%f2
1742         add     %l0,%o3,%l0
1743 
1744         fmuld   %f10,%f10,%f12
1745         add     %l1,%o4,%l1
1746 
1747         fmuld   %f20,%f20,%f22
1748         add     %l2,%o5,%l2
1749 
1750         fmuld   %f2,%f58,%f6
1751         ldd     [%l3+%l0],%f32
1752 
1753         fmuld   %f12,%f58,%f16
1754         ldd     [%l3+%l1],%f34
1755 
1756         fmuld   %f22,%f58,%f26
1757         ldd     [%l3+%l2],%f36
1758 
1759         faddd   %f6,%f56,%f6
1760         fmuld   %f2,%f62,%f4
1761 
1762         faddd   %f16,%f56,%f16
1763         fmuld   %f12,%f62,%f14
1764 
1765         faddd   %f26,%f56,%f26
1766         fmuld   %f22,%f62,%f24
1767 
1768         fmuld   %f2,%f6,%f6
1769         faddd   %f4,%f60,%f4
1770 
1771         fmuld   %f12,%f16,%f16
1772         faddd   %f14,%f60,%f14
1773 
1774         fmuld   %f22,%f26,%f26
1775         faddd   %f24,%f60,%f24
1776 
1777         faddd   %f6,%f54,%f6
1778         fmuld   %f2,%f4,%f4
1779 
1780         faddd   %f16,%f54,%f16
1781         fmuld   %f12,%f14,%f14
1782 
1783         faddd   %f26,%f54,%f26
1784         fmuld   %f22,%f24,%f24
1785 
1786         fmuld   %f0,%f6,%f6
1787         ldd     [%g1+%l0],%f2
1788 
1789         fmuld   %f10,%f16,%f16
1790         ldd     [%g1+%l1],%f12
1791 
1792         fmuld   %f20,%f26,%f26
1793         ldd     [%g1+%l2],%f22
1794 
1795         fmuld   %f4,%f32,%f4
1796         ldd     [%l4+%l0],%f0
1797 
1798         fmuld   %f14,%f34,%f14
1799         ldd     [%l4+%l1],%f10
1800 
1801         fmuld   %f24,%f36,%f24
1802         ldd     [%l4+%l2],%f20
1803 
1804         fmuld   %f6,%f2,%f6
1805 
1806         fmuld   %f16,%f12,%f16
1807 
1808         fmuld   %f26,%f22,%f26
1809 
1810         faddd   %f6,%f4,%f6
1811 
1812         faddd   %f16,%f14,%f16
1813 
1814         faddd   %f26,%f24,%f26
1815 
1816         faddd   %f6,%f0,%f6
1817 
1818         faddd   %f16,%f10,%f16
1819 
1820         faddd   %f26,%f20,%f26
1821 
1822         faddd   %f6,%f32,%f6
1823 
1824         faddd   %f16,%f34,%f16
1825 
1826         faddd   %f26,%f36,%f26
1827 
1828 .FIXSIGN:
1829         ld      [%fp+n0],%o3
1830         add     %l5,thresh-4,%g1
1831 
1832         ld      [%fp+n1],%o4
1833 
1834         ld      [%fp+n2],%o5
1835         and     %o3,2,%o3
1836 
1837         sll     %o3,2,%o3
1838         and     %o4,2,%o4
1839         lda     [%i1]%asi,%l0           ! preload next argument
1840 
1841         sll     %o4,2,%o4
1842         and     %o5,2,%o5
1843         ld      [%g1+%o3],%f8
1844 
1845         sll     %o5,2,%o5
1846         ld      [%g1+%o4],%f18
1847 
1848         ld      [%g1+%o5],%f28
1849         fxors   %f9,%f8,%f9
1850 
1851         lda     [%i1]%asi,%f0
1852         fxors   %f29,%f28,%f29
1853 
1854         lda     [%i1+4]%asi,%f1
1855         fxors   %f19,%f18,%f19
1856 
1857         fors    %f6,%f9,%f6             ! tack on sign
1858         add     %i1,%i2,%i1             ! x += stridex
1859         st      %f6,[%o0]
1860 
1861         fors    %f26,%f29,%f26          ! tack on sign
1862         st      %f7,[%o0+4]
1863 
1864         fors    %f16,%f19,%f16          ! tack on sign
1865         st      %f26,[%o2]
1866 
1867         st      %f27,[%o2+4]
1868         addcc   %i0,-1,%i0
1869 
1870         st      %f16,[%o1]
1871         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
1872         bg,pt   %icc,.LOOP0
1873 
1874 ! delay slot
1875         st      %f17,[%o1+4]
1876 
1877         ba,pt   %icc,.ENDLOOP0
1878 ! delay slot
1879         nop
1880 
1881         .align  32
1882 .CASE1:
1883         fpadd32s %f10,%f31,%f18
1884         sethi   %hi(0x3fc3c000),%o7
1885         ld      [%fp+x0_1],%l0
1886 
1887         fand    %f8,%f44,%f4
1888         add     %l3,8,%g1
1889         ld      [%fp+x1_1],%l1
1890 
1891         fand    %f18,%f44,%f14
1892         sub     %l0,%o7,%l0
1893 
1894         fsubd   %f0,%f4,%f0
1895         srl     %l0,10,%l0
1896         sub     %l1,%o7,%l1
1897 
1898         fsubd   %f10,%f14,%f10
1899         srl     %l1,10,%l1
1900 
1901         fmuld   %f20,%f20,%f20
1902         ldd     [%l5+%o5],%f36
1903         add     %l5,%o5,%l2
1904 
1905         faddd   %f0,%f2,%f0
1906         andn    %l0,0x1f,%l0
1907 
1908         faddd   %f10,%f12,%f10
1909         andn    %l1,0x1f,%l1
1910 
1911         fmuld   %f20,%f36,%f24
1912         ldd     [%l2+0x10],%f26
1913         add     %fp,%o5,%o5
1914 
1915         fmuld   %f0,%f0,%f2
1916         add     %l0,%o3,%l0
1917 
1918         fmuld   %f10,%f10,%f12
1919         add     %l1,%o4,%l1
1920 
1921         faddd   %f24,%f26,%f24
1922         ldd     [%l2+0x20],%f36
1923 
1924         fmuld   %f2,%f58,%f6
1925         ldd     [%l3+%l0],%f32
1926 
1927         fmuld   %f12,%f58,%f16
1928         ldd     [%l3+%l1],%f34
1929 
1930         fmuld   %f20,%f24,%f24
1931         ldd     [%l2+0x30],%f26
1932 
1933         faddd   %f6,%f56,%f6
1934         fmuld   %f2,%f62,%f4
1935 
1936         faddd   %f16,%f56,%f16
1937         fmuld   %f12,%f62,%f14
1938 
1939         faddd   %f24,%f36,%f24
1940         ldd     [%o5+x2_1],%f36
1941 
1942         fmuld   %f2,%f6,%f6
1943         faddd   %f4,%f60,%f4
1944 
1945         fmuld   %f12,%f16,%f16
1946         faddd   %f14,%f60,%f14
1947 
1948         fmuld   %f20,%f24,%f24
1949 
1950         faddd   %f6,%f54,%f6
1951         fmuld   %f2,%f4,%f4
1952         ldd     [%g1+%l0],%f2
1953 
1954         faddd   %f16,%f54,%f16
1955         fmuld   %f12,%f14,%f14
1956         ldd     [%g1+%l1],%f12
1957 
1958         faddd   %f24,%f26,%f24
1959 
1960         fmuld   %f0,%f6,%f6
1961         ldd     [%l4+%l0],%f0
1962 
1963         fmuld   %f10,%f16,%f16
1964         ldd     [%l4+%l1],%f10
1965 
1966         fmuld   %f4,%f32,%f4
1967         std     %f22,[%fp+y2_0]
1968 
1969         fmuld   %f14,%f34,%f14
1970 
1971         fmuld   %f6,%f2,%f6
1972 
1973         fmuld   %f16,%f12,%f16
1974 
1975         fmuld   %f20,%f24,%f24
1976 
1977         faddd   %f6,%f4,%f6
1978 
1979         faddd   %f16,%f14,%f16
1980 
1981         fmuld   %f36,%f24,%f24
1982         ldd     [%o5+y2_0],%f22
1983 
1984         faddd   %f6,%f0,%f6
1985 
1986         faddd   %f16,%f10,%f16
1987 
1988         faddd   %f24,%f22,%f24
1989 
1990         faddd   %f6,%f32,%f6
1991 
1992         faddd   %f16,%f34,%f16
1993         ba,pt   %icc,.FIXSIGN
1994 
1995 ! delay slot
1996         faddd   %f36,%f24,%f26
1997 
1998         .align  32
1999 .CASE2:
2000         fpadd32s %f0,%f31,%f8
2001         ld      [%fp+x0_1],%l0
2002         andcc   %l2,2,%g0
2003         bne,pn  %icc,.CASE3
2004 
2005 ! delay slot
2006         sethi   %hi(0x3fc3c000),%o7
2007         fpadd32s %f20,%f31,%f28
2008         ld      [%fp+x2_1],%l2
2009 
2010         fand    %f8,%f44,%f4
2011         sub     %l0,%o7,%l0
2012         add     %l3,8,%g1
2013 
2014         fand    %f28,%f44,%f24
2015         sub     %l2,%o7,%l2
2016 
2017         fsubd   %f0,%f4,%f0
2018         srl     %l0,10,%l0
2019 
2020         fsubd   %f20,%f24,%f20
2021         srl     %l2,10,%l2
2022 
2023         fmuld   %f10,%f10,%f10
2024         ldd     [%l5+%o4],%f34
2025         add     %l5,%o4,%l1
2026 
2027         faddd   %f0,%f2,%f0
2028         andn    %l0,0x1f,%l0
2029 
2030         faddd   %f20,%f22,%f20
2031         andn    %l2,0x1f,%l2
2032 
2033         fmuld   %f10,%f34,%f14
2034         ldd     [%l1+0x10],%f16
2035         add     %fp,%o4,%o4
2036 
2037         fmuld   %f0,%f0,%f2
2038         add     %l0,%o3,%l0
2039 
2040         fmuld   %f20,%f20,%f22
2041         add     %l2,%o5,%l2
2042 
2043         faddd   %f14,%f16,%f14
2044         ldd     [%l1+0x20],%f34
2045 
2046         fmuld   %f2,%f58,%f6
2047         ldd     [%l3+%l0],%f32
2048 
2049         fmuld   %f22,%f58,%f26
2050         ldd     [%l3+%l2],%f36
2051 
2052         fmuld   %f10,%f14,%f14
2053         ldd     [%l1+0x30],%f16
2054 
2055         faddd   %f6,%f56,%f6
2056         fmuld   %f2,%f62,%f4
2057 
2058         faddd   %f26,%f56,%f26
2059         fmuld   %f22,%f62,%f24
2060 
2061         faddd   %f14,%f34,%f14
2062         ldd     [%o4+x1_1],%f34
2063 
2064         fmuld   %f2,%f6,%f6
2065         faddd   %f4,%f60,%f4
2066 
2067         fmuld   %f22,%f26,%f26
2068         faddd   %f24,%f60,%f24
2069 
2070         fmuld   %f10,%f14,%f14
2071 
2072         faddd   %f6,%f54,%f6
2073         fmuld   %f2,%f4,%f4
2074         ldd     [%g1+%l0],%f2
2075 
2076         faddd   %f26,%f54,%f26
2077         fmuld   %f22,%f24,%f24
2078         ldd     [%g1+%l2],%f22
2079 
2080         faddd   %f14,%f16,%f14
2081 
2082         fmuld   %f0,%f6,%f6
2083         ldd     [%l4+%l0],%f0
2084 
2085         fmuld   %f20,%f26,%f26
2086         ldd     [%l4+%l2],%f20
2087 
2088         fmuld   %f4,%f32,%f4
2089         std     %f12,[%fp+y1_0]
2090 
2091         fmuld   %f24,%f36,%f24
2092 
2093         fmuld   %f6,%f2,%f6
2094 
2095         fmuld   %f26,%f22,%f26
2096 
2097         fmuld   %f10,%f14,%f14
2098 
2099         faddd   %f6,%f4,%f6
2100 
2101         faddd   %f26,%f24,%f26
2102 
2103         fmuld   %f34,%f14,%f14
2104         ldd     [%o4+y1_0],%f12
2105 
2106         faddd   %f6,%f0,%f6
2107 
2108         faddd   %f26,%f20,%f26
2109 
2110         faddd   %f14,%f12,%f14
2111 
2112         faddd   %f6,%f32,%f6
2113 
2114         faddd   %f26,%f36,%f26
2115         ba,pt   %icc,.FIXSIGN
2116 
2117 ! delay slot
2118         faddd   %f34,%f14,%f16
2119 
2120         .align  32
2121 .CASE3:
2122         fand    %f8,%f44,%f4
2123         add     %l3,8,%g1
2124         sub     %l0,%o7,%l0
2125 
2126         fmuld   %f10,%f10,%f10
2127         ldd     [%l5+%o4],%f34
2128         add     %l5,%o4,%l1
2129 
2130         fsubd   %f0,%f4,%f0
2131         srl     %l0,10,%l0
2132 
2133         fmuld   %f20,%f20,%f20
2134         ldd     [%l5+%o5],%f36
2135         add     %l5,%o5,%l2
2136 
2137         fmuld   %f10,%f34,%f14
2138         ldd     [%l1+0x10],%f16
2139         add     %fp,%o4,%o4
2140 
2141         faddd   %f0,%f2,%f0
2142         andn    %l0,0x1f,%l0
2143 
2144         fmuld   %f20,%f36,%f24
2145         ldd     [%l2+0x10],%f26
2146         add     %fp,%o5,%o5
2147 
2148         faddd   %f14,%f16,%f14
2149         ldd     [%l1+0x20],%f34
2150 
2151         fmuld   %f0,%f0,%f2
2152         add     %l0,%o3,%l0
2153 
2154         faddd   %f24,%f26,%f24
2155         ldd     [%l2+0x20],%f36
2156 
2157         fmuld   %f10,%f14,%f14
2158         ldd     [%l1+0x30],%f16
2159 
2160         fmuld   %f2,%f58,%f6
2161         ldd     [%l3+%l0],%f32
2162 
2163         fmuld   %f20,%f24,%f24
2164         ldd     [%l2+0x30],%f26
2165 
2166         faddd   %f14,%f34,%f14
2167         ldd     [%o4+x1_1],%f34
2168 
2169         faddd   %f6,%f56,%f6
2170         fmuld   %f2,%f62,%f4
2171 
2172         faddd   %f24,%f36,%f24
2173         ldd     [%o5+x2_1],%f36
2174 
2175         fmuld   %f10,%f14,%f14
2176         std     %f12,[%fp+y1_0]
2177 
2178         fmuld   %f2,%f6,%f6
2179         faddd   %f4,%f60,%f4
2180 
2181         fmuld   %f20,%f24,%f24
2182         std     %f22,[%fp+y2_0]
2183 
2184         faddd   %f14,%f16,%f14
2185 
2186         faddd   %f6,%f54,%f6
2187         fmuld   %f2,%f4,%f4
2188         ldd     [%g1+%l0],%f2
2189 
2190         faddd   %f24,%f26,%f24
2191 
2192         fmuld   %f10,%f14,%f14
2193 
2194         fmuld   %f0,%f6,%f6
2195         ldd     [%l4+%l0],%f0
2196 
2197         fmuld   %f4,%f32,%f4
2198 
2199         fmuld   %f20,%f24,%f24
2200 
2201         fmuld   %f6,%f2,%f6
2202 
2203         fmuld   %f34,%f14,%f14
2204         ldd     [%o4+y1_0],%f12
2205 
2206         fmuld   %f36,%f24,%f24
2207         ldd     [%o5+y2_0],%f22
2208 
2209         faddd   %f6,%f4,%f6
2210 
2211         faddd   %f14,%f12,%f14
2212 
2213         faddd   %f24,%f22,%f24
2214 
2215         faddd   %f6,%f0,%f6
2216 
2217         faddd   %f34,%f14,%f16
2218 
2219         faddd   %f36,%f24,%f26
2220         ba,pt   %icc,.FIXSIGN
2221 
2222 ! delay slot
2223         faddd   %f6,%f32,%f6
2224 
2225         .align  32
2226 .CASE4:
2227         fands   %f29,%f28,%f29          ! if (n & 1) clear sign bit
2228         sethi   %hi(0x3fc3c000),%o7
2229         andcc   %l1,2,%g0
2230         bne,pn  %icc,.CASE6
2231 
2232 ! delay slot
2233         andcc   %l2,2,%g0
2234         fpadd32s %f10,%f31,%f18
2235         ld      [%fp+x1_1],%l1
2236         bne,pn  %icc,.CASE5
2237 
2238 ! delay slot
2239         add     %l3,8,%g1
2240         ld      [%fp+x2_1],%l2
2241         fpadd32s %f20,%f31,%f28
2242 
2243         fand    %f18,%f44,%f14
2244         sub     %l1,%o7,%l1
2245 
2246         fand    %f28,%f44,%f24
2247         sub     %l2,%o7,%l2
2248 
2249         fsubd   %f10,%f14,%f10
2250         srl     %l1,10,%l1
2251 
2252         fsubd   %f20,%f24,%f20
2253         srl     %l2,10,%l2
2254 
2255         fmuld   %f0,%f0,%f0
2256         ldd     [%l5+%o3],%f32
2257         add     %l5,%o3,%l0
2258 
2259         faddd   %f10,%f12,%f10
2260         andn    %l1,0x1f,%l1
2261 
2262         faddd   %f20,%f22,%f20
2263         andn    %l2,0x1f,%l2
2264 
2265         fmuld   %f0,%f32,%f4
2266         ldd     [%l0+0x10],%f6
2267         add     %fp,%o3,%o3
2268 
2269         fmuld   %f10,%f10,%f12
2270         add     %l1,%o4,%l1
2271 
2272         fmuld   %f20,%f20,%f22
2273         add     %l2,%o5,%l2
2274 
2275         faddd   %f4,%f6,%f4
2276         ldd     [%l0+0x20],%f32
2277 
2278         fmuld   %f12,%f58,%f16
2279         ldd     [%l3+%l1],%f34
2280 
2281         fmuld   %f22,%f58,%f26
2282         ldd     [%l3+%l2],%f36
2283 
2284         fmuld   %f0,%f4,%f4
2285         ldd     [%l0+0x30],%f6
2286 
2287         faddd   %f16,%f56,%f16
2288         fmuld   %f12,%f62,%f14
2289 
2290         faddd   %f26,%f56,%f26
2291         fmuld   %f22,%f62,%f24
2292 
2293         faddd   %f4,%f32,%f4
2294         ldd     [%o3+x0_1],%f32
2295 
2296         fmuld   %f12,%f16,%f16
2297         faddd   %f14,%f60,%f14
2298 
2299         fmuld   %f22,%f26,%f26
2300         faddd   %f24,%f60,%f24
2301 
2302         fmuld   %f0,%f4,%f4
2303 
2304         faddd   %f16,%f54,%f16
2305         fmuld   %f12,%f14,%f14
2306         ldd     [%g1+%l1],%f12
2307 
2308         faddd   %f26,%f54,%f26
2309         fmuld   %f22,%f24,%f24
2310         ldd     [%g1+%l2],%f22
2311 
2312         faddd   %f4,%f6,%f4
2313 
2314         fmuld   %f10,%f16,%f16
2315         ldd     [%l4+%l1],%f10
2316 
2317         fmuld   %f20,%f26,%f26
2318         ldd     [%l4+%l2],%f20
2319 
2320         fmuld   %f14,%f34,%f14
2321         std     %f2,[%fp+y0_0]
2322 
2323         fmuld   %f24,%f36,%f24
2324 
2325         fmuld   %f0,%f4,%f4
2326 
2327         fmuld   %f16,%f12,%f16
2328 
2329         fmuld   %f26,%f22,%f26
2330 
2331         fmuld   %f32,%f4,%f4
2332         ldd     [%o3+y0_0],%f2
2333 
2334         faddd   %f16,%f14,%f16
2335 
2336         faddd   %f26,%f24,%f26
2337 
2338         faddd   %f4,%f2,%f4
2339 
2340         faddd   %f16,%f10,%f16
2341 
2342         faddd   %f26,%f20,%f26
2343 
2344         faddd   %f32,%f4,%f6
2345 
2346         faddd   %f16,%f34,%f16
2347         ba,pt   %icc,.FIXSIGN
2348 
2349 ! delay slot
2350         faddd   %f26,%f36,%f26
2351 
2352         .align  32
2353 .CASE5:
2354         fand    %f18,%f44,%f14
2355         sub     %l1,%o7,%l1
2356 
2357         fmuld   %f0,%f0,%f0
2358         ldd     [%l5+%o3],%f32
2359         add     %l5,%o3,%l0
2360 
2361         fsubd   %f10,%f14,%f10
2362         srl     %l1,10,%l1
2363 
2364         fmuld   %f20,%f20,%f20
2365         ldd     [%l5+%o5],%f36
2366         add     %l5,%o5,%l2
2367 
2368         fmuld   %f0,%f32,%f4
2369         ldd     [%l0+0x10],%f6
2370         add     %fp,%o3,%o3
2371 
2372         faddd   %f10,%f12,%f10
2373         andn    %l1,0x1f,%l1
2374 
2375         fmuld   %f20,%f36,%f24
2376         ldd     [%l2+0x10],%f26
2377         add     %fp,%o5,%o5
2378 
2379         faddd   %f4,%f6,%f4
2380         ldd     [%l0+0x20],%f32
2381 
2382         fmuld   %f10,%f10,%f12
2383         add     %l1,%o4,%l1
2384 
2385         faddd   %f24,%f26,%f24
2386         ldd     [%l2+0x20],%f36
2387 
2388         fmuld   %f0,%f4,%f4
2389         ldd     [%l0+0x30],%f6
2390 
2391         fmuld   %f12,%f58,%f16
2392         ldd     [%l3+%l1],%f34
2393 
2394         fmuld   %f20,%f24,%f24
2395         ldd     [%l2+0x30],%f26
2396 
2397         faddd   %f4,%f32,%f4
2398         ldd     [%o3+x0_1],%f32
2399 
2400         faddd   %f16,%f56,%f16
2401         fmuld   %f12,%f62,%f14
2402 
2403         faddd   %f24,%f36,%f24
2404         ldd     [%o5+x2_1],%f36
2405 
2406         fmuld   %f0,%f4,%f4
2407         std     %f2,[%fp+y0_0]
2408 
2409         fmuld   %f12,%f16,%f16
2410         faddd   %f14,%f60,%f14
2411 
2412         fmuld   %f20,%f24,%f24
2413         std     %f22,[%fp+y2_0]
2414 
2415         faddd   %f4,%f6,%f4
2416 
2417         faddd   %f16,%f54,%f16
2418         fmuld   %f12,%f14,%f14
2419         ldd     [%g1+%l1],%f12
2420 
2421         faddd   %f24,%f26,%f24
2422 
2423         fmuld   %f0,%f4,%f4
2424 
2425         fmuld   %f10,%f16,%f16
2426         ldd     [%l4+%l1],%f10
2427 
2428         fmuld   %f14,%f34,%f14
2429 
2430         fmuld   %f20,%f24,%f24
2431 
2432         fmuld   %f16,%f12,%f16
2433 
2434         fmuld   %f32,%f4,%f4
2435         ldd     [%o3+y0_0],%f2
2436 
2437         fmuld   %f36,%f24,%f24
2438         ldd     [%o5+y2_0],%f22
2439 
2440         faddd   %f16,%f14,%f16
2441 
2442         faddd   %f4,%f2,%f4
2443 
2444         faddd   %f24,%f22,%f24
2445 
2446         faddd   %f16,%f10,%f16
2447 
2448         faddd   %f32,%f4,%f6
2449 
2450         faddd   %f36,%f24,%f26
2451         ba,pt   %icc,.FIXSIGN
2452 
2453 ! delay slot
2454         faddd   %f16,%f34,%f16
2455 
2456         .align  32
2457 .CASE6:
2458         ld      [%fp+x2_1],%l2
2459         add     %l3,8,%g1
2460         bne,pn  %icc,.CASE7
2461 ! delay slot
2462         fpadd32s %f20,%f31,%f28
2463 
2464         fand    %f28,%f44,%f24
2465         ldd     [%l5+%o3],%f32
2466         add     %l5,%o3,%l0
2467 
2468         fmuld   %f0,%f0,%f0
2469         sub     %l2,%o7,%l2
2470 
2471         fsubd   %f20,%f24,%f20
2472         srl     %l2,10,%l2
2473 
2474         fmuld   %f10,%f10,%f10
2475         ldd     [%l5+%o4],%f34
2476         add     %l5,%o4,%l1
2477 
2478         fmuld   %f0,%f32,%f4
2479         ldd     [%l0+0x10],%f6
2480         add     %fp,%o3,%o3
2481 
2482         faddd   %f20,%f22,%f20
2483         andn    %l2,0x1f,%l2
2484 
2485         fmuld   %f10,%f34,%f14
2486         ldd     [%l1+0x10],%f16
2487         add     %fp,%o4,%o4
2488 
2489         faddd   %f4,%f6,%f4
2490         ldd     [%l0+0x20],%f32
2491 
2492         fmuld   %f20,%f20,%f22
2493         add     %l2,%o5,%l2
2494 
2495         faddd   %f14,%f16,%f14
2496         ldd     [%l1+0x20],%f34
2497 
2498         fmuld   %f0,%f4,%f4
2499         ldd     [%l0+0x30],%f6
2500 
2501         fmuld   %f22,%f58,%f26
2502         ldd     [%l3+%l2],%f36
2503 
2504         fmuld   %f10,%f14,%f14
2505         ldd     [%l1+0x30],%f16
2506 
2507         faddd   %f4,%f32,%f4
2508         ldd     [%o3+x0_1],%f32
2509 
2510         faddd   %f26,%f56,%f26
2511         fmuld   %f22,%f62,%f24
2512 
2513         faddd   %f14,%f34,%f14
2514         ldd     [%o4+x1_1],%f34
2515 
2516         fmuld   %f0,%f4,%f4
2517         std     %f2,[%fp+y0_0]
2518 
2519         fmuld   %f22,%f26,%f26
2520         faddd   %f24,%f60,%f24
2521 
2522         fmuld   %f10,%f14,%f14
2523         std     %f12,[%fp+y1_0]
2524 
2525         faddd   %f4,%f6,%f4
2526 
2527         faddd   %f26,%f54,%f26
2528         fmuld   %f22,%f24,%f24
2529         ldd     [%g1+%l2],%f22
2530 
2531         faddd   %f14,%f16,%f14
2532 
2533         fmuld   %f0,%f4,%f4
2534 
2535         fmuld   %f20,%f26,%f26
2536         ldd     [%l4+%l2],%f20
2537 
2538         fmuld   %f24,%f36,%f24
2539 
2540         fmuld   %f10,%f14,%f14
2541 
2542         fmuld   %f26,%f22,%f26
2543 
2544         fmuld   %f32,%f4,%f4
2545         ldd     [%o3+y0_0],%f2
2546 
2547         fmuld   %f34,%f14,%f14
2548         ldd     [%o4+y1_0],%f12
2549 
2550         faddd   %f26,%f24,%f26
2551 
2552         faddd   %f4,%f2,%f4
2553 
2554         faddd   %f14,%f12,%f14
2555 
2556         faddd   %f26,%f20,%f26
2557 
2558         faddd   %f32,%f4,%f6
2559 
2560         faddd   %f34,%f14,%f16
2561         ba,pt   %icc,.FIXSIGN
2562 
2563 ! delay slot
2564         faddd   %f26,%f36,%f26
2565 
2566         .align  32
2567 .CASE7:
2568         fmuld   %f0,%f0,%f0
2569         ldd     [%l5+%o3],%f32
2570         add     %l5,%o3,%l0
2571 
2572         fmuld   %f10,%f10,%f10
2573         ldd     [%l5+%o4],%f34
2574         add     %l5,%o4,%l1
2575 
2576         fmuld   %f20,%f20,%f20
2577         ldd     [%l5+%o5],%f36
2578         add     %l5,%o5,%l2
2579 
2580         fmuld   %f0,%f32,%f4
2581         ldd     [%l0+0x10],%f6
2582         add     %fp,%o3,%o3
2583 
2584         fmuld   %f10,%f34,%f14
2585         ldd     [%l1+0x10],%f16
2586         add     %fp,%o4,%o4
2587 
2588         fmuld   %f20,%f36,%f24
2589         ldd     [%l2+0x10],%f26
2590         add     %fp,%o5,%o5
2591 
2592         faddd   %f4,%f6,%f4
2593         ldd     [%l0+0x20],%f32
2594 
2595         faddd   %f14,%f16,%f14
2596         ldd     [%l1+0x20],%f34
2597 
2598         faddd   %f24,%f26,%f24
2599         ldd     [%l2+0x20],%f36
2600 
2601         fmuld   %f0,%f4,%f4
2602         ldd     [%l0+0x30],%f6
2603 
2604         fmuld   %f10,%f14,%f14
2605         ldd     [%l1+0x30],%f16
2606 
2607         fmuld   %f20,%f24,%f24
2608         ldd     [%l2+0x30],%f26
2609 
2610         faddd   %f4,%f32,%f4
2611         ldd     [%o3+x0_1],%f32
2612 
2613         faddd   %f14,%f34,%f14
2614         ldd     [%o4+x1_1],%f34
2615 
2616         faddd   %f24,%f36,%f24
2617         ldd     [%o5+x2_1],%f36
2618 
2619         fmuld   %f0,%f4,%f4
2620         std     %f2,[%fp+y0_0]
2621 
2622         fmuld   %f10,%f14,%f14
2623         std     %f12,[%fp+y1_0]
2624 
2625         fmuld   %f20,%f24,%f24
2626         std     %f22,[%fp+y2_0]
2627 
2628         faddd   %f4,%f6,%f4
2629 
2630         faddd   %f14,%f16,%f14
2631 
2632         faddd   %f24,%f26,%f24
2633 
2634         fmuld   %f0,%f4,%f4
2635 
2636         fmuld   %f10,%f14,%f14
2637 
2638         fmuld   %f20,%f24,%f24
2639 
2640         fmuld   %f32,%f4,%f4
2641         ldd     [%o3+y0_0],%f2
2642 
2643         fmuld   %f34,%f14,%f14
2644         ldd     [%o4+y1_0],%f12
2645 
2646         fmuld   %f36,%f24,%f24
2647         ldd     [%o5+y2_0],%f22
2648 
2649         faddd   %f4,%f2,%f4
2650 
2651         faddd   %f14,%f12,%f14
2652 
2653         faddd   %f24,%f22,%f24
2654 
2655         faddd   %f32,%f4,%f6
2656 
2657         faddd   %f34,%f14,%f16
2658         ba,pt   %icc,.FIXSIGN
2659 
2660 ! delay slot
2661         faddd   %f36,%f24,%f26
2662 
2663 
2664         .align  32
2665 .ENDLOOP2:
2666         fmuld   %f10,%f40,%f12
2667         add     %l5,thresh,%g1
2668         faddd   %f12,%f42,%f12
2669         st      %f13,[%fp+n1]
2670         fsubd   %f12,%f42,%f12          ! n
2671         fmuld   %f12,%f46,%f14
2672         fsubd   %f10,%f14,%f14
2673         fmuld   %f12,%f48,%f16
2674         fsubd   %f14,%f16,%f10
2675         ld      [%fp+n1],%o4
2676         fsubd   %f14,%f10,%f34
2677         and     %o4,1,%o4
2678         fsubd   %f34,%f16,%f34
2679         fmuld   %f12,%f50,%f18
2680         sll     %o4,3,%o4
2681         fsubd   %f18,%f34,%f18
2682         ld      [%g1+%o4],%f16
2683         fsubd   %f10,%f18,%f14
2684         fsubd   %f10,%f14,%f34
2685         add     %l5,thresh+4,%o7
2686         fsubd   %f34,%f18,%f34
2687         fmuld   %f12,%f52,%f12
2688         fsubd   %f12,%f34,%f12
2689         ld      [%o7+%o4],%f18
2690         fsubd   %f14,%f12,%f10          ! x
2691         fsubd   %f14,%f10,%f14
2692         fands   %f10,%f30,%f19          ! save signbit
2693         fabsd   %f10,%f10
2694         std     %f10,[%fp+x1_1]
2695         fsubd   %f14,%f12,%f12          ! y
2696         fcmpgt32 %f16,%f10,%l1
2697         fxors   %f12,%f19,%f12
2698         fands   %f19,%f18,%f19          ! if (n & 1) clear sign bit
2699         andcc   %l1,2,%g0
2700         bne,pn  %icc,1f
2701 ! delay slot
2702         nop
2703         fpadd32s %f10,%f31,%f18
2704         ld      [%fp+x1_1],%l1
2705         fand    %f18,%f44,%f14
2706         sethi   %hi(0x3fc3c000),%o7
2707         add     %l3,8,%g1
2708         fsubd   %f10,%f14,%f10
2709         sub     %l1,%o7,%l1
2710         srl     %l1,10,%l1
2711         faddd   %f10,%f12,%f10
2712         andn    %l1,0x1f,%l1
2713         fmuld   %f10,%f10,%f12
2714         add     %l1,%o4,%l1
2715         fmuld   %f12,%f58,%f16
2716         ldd     [%l3+%l1],%f34
2717         faddd   %f16,%f56,%f16
2718         fmuld   %f12,%f62,%f14
2719         fmuld   %f12,%f16,%f16
2720         faddd   %f14,%f60,%f14
2721         faddd   %f16,%f54,%f16
2722         fmuld   %f12,%f14,%f14
2723         ldd     [%g1+%l1],%f12
2724         fmuld   %f10,%f16,%f16
2725         ldd     [%l4+%l1],%f10
2726         fmuld   %f14,%f34,%f14
2727         fmuld   %f16,%f12,%f16
2728         faddd   %f16,%f14,%f16
2729         faddd   %f16,%f10,%f16
2730         ba,pt   %icc,2f
2731         faddd   %f16,%f34,%f16
2732 1:
2733         fmuld   %f10,%f10,%f10
2734         ldd     [%l5+%o4],%f34
2735         add     %l5,%o4,%l1
2736         fmuld   %f10,%f34,%f14
2737         ldd     [%l1+0x10],%f16
2738         add     %fp,%o4,%o4
2739         faddd   %f14,%f16,%f14
2740         ldd     [%l1+0x20],%f34
2741         fmuld   %f10,%f14,%f14
2742         ldd     [%l1+0x30],%f16
2743         faddd   %f14,%f34,%f14
2744         ldd     [%o4+x1_1],%f34
2745         fmuld   %f10,%f14,%f14
2746         std     %f12,[%fp+y1_0]
2747         faddd   %f14,%f16,%f14
2748         fmuld   %f10,%f14,%f14
2749         fmuld   %f34,%f14,%f14
2750         ldd     [%o4+y1_0],%f12
2751         faddd   %f14,%f12,%f14
2752         faddd   %f34,%f14,%f16
2753 2:
2754         add     %l5,thresh-4,%g1
2755         ld      [%fp+n1],%o4
2756         and     %o4,2,%o4
2757         sll     %o4,2,%o4
2758         ld      [%g1+%o4],%f18
2759         fxors   %f19,%f18,%f19
2760         fors    %f16,%f19,%f16          ! tack on sign
2761         st      %f16,[%o1]
2762         st      %f17,[%o1+4]
2763 
2764 .ENDLOOP1:
2765         fmuld   %f0,%f40,%f2
2766         add     %l5,thresh,%g1
2767         faddd   %f2,%f42,%f2
2768         st      %f3,[%fp+n0]
2769         fsubd   %f2,%f42,%f2            ! n
2770         fmuld   %f2,%f46,%f4
2771         fsubd   %f0,%f4,%f4
2772         fmuld   %f2,%f48,%f6
2773         fsubd   %f4,%f6,%f0
2774         ld      [%fp+n0],%o3
2775         fsubd   %f4,%f0,%f32
2776         and     %o3,1,%o3
2777         fsubd   %f32,%f6,%f32
2778         fmuld   %f2,%f50,%f8
2779         sll     %o3,3,%o3
2780         fsubd   %f8,%f32,%f8
2781         ld      [%g1+%o3],%f6
2782         fsubd   %f0,%f8,%f4
2783         fsubd   %f0,%f4,%f32
2784         add     %l5,thresh+4,%o7
2785         fsubd   %f32,%f8,%f32
2786         fmuld   %f2,%f52,%f2
2787         fsubd   %f2,%f32,%f2
2788         ld      [%o7+%o3],%f8
2789         fsubd   %f4,%f2,%f0             ! x
2790         fsubd   %f4,%f0,%f4
2791         fands   %f0,%f30,%f9            ! save signbit
2792         fabsd   %f0,%f0
2793         std     %f0,[%fp+x0_1]
2794         fsubd   %f4,%f2,%f2             ! y
2795         fcmpgt32 %f6,%f0,%l0
2796         fxors   %f2,%f9,%f2
2797         fands   %f9,%f8,%f9             ! if (n & 1) clear sign bit
2798         andcc   %l0,2,%g0
2799         bne,pn  %icc,1f
2800 ! delay slot
2801         nop
2802         fpadd32s %f0,%f31,%f8
2803         ld      [%fp+x0_1],%l0
2804         fand    %f8,%f44,%f4
2805         sethi   %hi(0x3fc3c000),%o7
2806         add     %l3,8,%g1
2807         fsubd   %f0,%f4,%f0
2808         sub     %l0,%o7,%l0
2809         srl     %l0,10,%l0
2810         faddd   %f0,%f2,%f0
2811         andn    %l0,0x1f,%l0
2812         fmuld   %f0,%f0,%f2
2813         add     %l0,%o3,%l0
2814         fmuld   %f2,%f58,%f6
2815         ldd     [%l3+%l0],%f32
2816         faddd   %f6,%f56,%f6
2817         fmuld   %f2,%f62,%f4
2818         fmuld   %f2,%f6,%f6
2819         faddd   %f4,%f60,%f4
2820         faddd   %f6,%f54,%f6
2821         fmuld   %f2,%f4,%f4
2822         ldd     [%g1+%l0],%f2
2823         fmuld   %f0,%f6,%f6
2824         ldd     [%l4+%l0],%f0
2825         fmuld   %f4,%f32,%f4
2826         fmuld   %f6,%f2,%f6
2827         faddd   %f6,%f4,%f6
2828         faddd   %f6,%f0,%f6
2829         ba,pt   %icc,2f
2830         faddd   %f6,%f32,%f6
2831 1:
2832         fmuld   %f0,%f0,%f0
2833         ldd     [%l5+%o3],%f32
2834         add     %l5,%o3,%l0
2835         fmuld   %f0,%f32,%f4
2836         ldd     [%l0+0x10],%f6
2837         add     %fp,%o3,%o3
2838         faddd   %f4,%f6,%f4
2839         ldd     [%l0+0x20],%f32
2840         fmuld   %f0,%f4,%f4
2841         ldd     [%l0+0x30],%f6
2842         faddd   %f4,%f32,%f4
2843         ldd     [%o3+x0_1],%f32
2844         fmuld   %f0,%f4,%f4
2845         std     %f2,[%fp+y0_0]
2846         faddd   %f4,%f6,%f4
2847         fmuld   %f0,%f4,%f4
2848         fmuld   %f32,%f4,%f4
2849         ldd     [%o3+y0_0],%f2
2850         faddd   %f4,%f2,%f4
2851         faddd   %f32,%f4,%f6
2852 2:
2853         add     %l5,thresh-4,%g1
2854         ld      [%fp+n0],%o3
2855         and     %o3,2,%o3
2856         sll     %o3,2,%o3
2857         ld      [%g1+%o3],%f8
2858         fxors   %f9,%f8,%f9
2859         fors    %f6,%f9,%f6             ! tack on sign
2860         st      %f6,[%o0]
2861         st      %f7,[%o0+4]
2862 
2863 .ENDLOOP0:
2864 
2865 ! check for huge arguments remaining
2866 
2867         tst     LIM_l6
2868         be,pt   %icc,.exit
2869 ! delay slot
2870         nop
2871 
2872 ! ========== huge range (use C code) ==========
2873 
2874 #ifdef __sparcv9
2875         ldx     [%fp+xsave],%o1
2876         ldx     [%fp+ysave],%o3
2877 #else
2878         ld      [%fp+xsave],%o1
2879         ld      [%fp+ysave],%o3
2880 #endif
2881         ld      [%fp+nsave],%o0
2882         ld      [%fp+sxsave],%o2
2883         ld      [%fp+sysave],%o4
2884         sra     %o2,0,%o2               ! sign-extend for V9
2885         sra     %o4,0,%o4
2886         call    __vlibm_vsin_big
2887         mov     %l7,%o5                 ! delay slot
2888 
2889 .exit:
2890         ret
2891         restore
2892 
2893 
2894         .align  32
2895 .SKIP0:
2896         addcc   %i0,-1,%i0
2897         ble,pn  %icc,.ENDLOOP0
2898 ! delay slot, harmless if branch taken
2899         add     %i3,%i4,%i3             ! y += stridey
2900         andn    %l1,%i5,%l0             ! hx &= ~0x80000000
2901         fmovs   %f10,%f0
2902         ld      [%i1+4],%f1
2903         ba,pt   %icc,.LOOP0
2904 ! delay slot
2905         add     %i1,%i2,%i1             ! x += stridex
2906 
2907 
2908         .align  32
2909 .SKIP1:
2910         addcc   %i0,-1,%i0
2911         ble,pn  %icc,.ENDLOOP1
2912 ! delay slot, harmless if branch taken
2913         add     %i3,%i4,%i3             ! y += stridey
2914         andn    %l2,%i5,%l1             ! hx &= ~0x80000000
2915         fmovs   %f20,%f10
2916         ld      [%i1+4],%f11
2917         ba,pt   %icc,.LOOP1
2918 ! delay slot
2919         add     %i1,%i2,%i1             ! x += stridex
2920 
2921 
2922         .align  32
2923 .SKIP2:
2924         addcc   %i0,-1,%i0
2925         ble,pn  %icc,.ENDLOOP2
2926 ! delay slot, harmless if branch taken
2927         add     %i3,%i4,%i3             ! y += stridey
2928         ld      [%i1],%l2
2929         ld      [%i1],%f20
2930         ld      [%i1+4],%f21
2931         andn    %l2,%i5,%l2             ! hx &= ~0x80000000
2932         ba,pt   %icc,.LOOP2
2933 ! delay slot
2934         add     %i1,%i2,%i1             ! x += stridex
2935 
2936 
2937         .align  32
2938 .BIG0:
2939         sethi   %hi(0x7ff00000),%o7
2940         cmp     %l0,%o7
2941         bl,a,pt %icc,1f                 ! if hx < 0x7ff00000
2942 ! delay slot, annulled if branch not taken
2943         mov     %l7,LIM_l6              ! set biguns flag or
2944         fsubd   %f0,%f0,%f0             ! y = x - x
2945         st      %f0,[%o0]
2946         st      %f1,[%o0+4]
2947 1:
2948         addcc   %i0,-1,%i0
2949         ble,pn  %icc,.ENDLOOP0
2950 ! delay slot, harmless if branch taken
2951         andn    %l1,%i5,%l0             ! hx &= ~0x80000000
2952         fmovd   %f10,%f0
2953         ba,pt   %icc,.LOOP0
2954 ! delay slot
2955         add     %i1,%i2,%i1             ! x += stridex
2956 
2957 
2958         .align  32
2959 .BIG1:
2960         sethi   %hi(0x7ff00000),%o7
2961         cmp     %l1,%o7
2962         bl,a,pt %icc,1f                 ! if hx < 0x7ff00000
2963 ! delay slot, annulled if branch not taken
2964         mov     %l7,LIM_l6              ! set biguns flag or
2965         fsubd   %f10,%f10,%f10          ! y = x - x
2966         st      %f10,[%o1]
2967         st      %f11,[%o1+4]
2968 1:
2969         addcc   %i0,-1,%i0
2970         ble,pn  %icc,.ENDLOOP1
2971 ! delay slot, harmless if branch taken
2972         andn    %l2,%i5,%l1             ! hx &= ~0x80000000
2973         fmovd   %f20,%f10
2974         ba,pt   %icc,.LOOP1
2975 ! delay slot
2976         add     %i1,%i2,%i1             ! x += stridex
2977 
2978 
2979         .align  32
2980 .BIG2:
2981         sethi   %hi(0x7ff00000),%o7
2982         cmp     %l2,%o7
2983         bl,a,pt %icc,1f                 ! if hx < 0x7ff00000
2984 ! delay slot, annulled if branch not taken
2985         mov     %l7,LIM_l6              ! set biguns flag or
2986         fsubd   %f20,%f20,%f20          ! y = x - x
2987         st      %f20,[%o2]
2988         st      %f21,[%o2+4]
2989 1:
2990         addcc   %i0,-1,%i0
2991         ble,pn  %icc,.ENDLOOP2
2992 ! delay slot
2993         nop
2994         ld      [%i1],%l2
2995         ld      [%i1],%f20
2996         ld      [%i1+4],%f21
2997         andn    %l2,%i5,%l2             ! hx &= ~0x80000000
2998         ba,pt   %icc,.LOOP2
2999 ! delay slot
3000         add     %i1,%i2,%i1             ! x += stridex
3001 
3002         SET_SIZE(__vsin)
3003