1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vsin_ultra3.S"
  30 
  31 #include "libm.h"
  32         .weak   __vsin
  33         .type   __vsin,#function
  34         __vsin = __vsin_ultra3
  35 
  36         RO_DATA
  37         .align  64
  38 constants:
  39         .word   0x42c80000,0x00000000   ! 3 * 2^44
  40         .word   0x43380000,0x00000000   ! 3 * 2^51
  41         .word   0x3fe45f30,0x6dc9c883   ! invpio2
  42         .word   0x3ff921fb,0x54442c00   ! pio2_1
  43         .word   0x3d318469,0x898cc400   ! pio2_2
  44         .word   0x3a71701b,0x839a2520   ! pio2_3
  45         .word   0xbfc55555,0x55555533   ! pp1
  46         .word   0x3f811111,0x10e7d53b   ! pp2
  47         .word   0xbf2a0167,0xe6b3cf9b   ! pp3
  48         .word   0xbfdfffff,0xffffff65   ! qq1
  49         .word   0x3fa55555,0x54f88ed0   ! qq2
  50         .word   0xbf56c12c,0xdd185f60   ! qq3
  51 
  52 ! local storage indices
  53 
  54 #define xsave           STACK_BIAS-0x8
  55 #define ysave           STACK_BIAS-0x10
  56 #define nsave           STACK_BIAS-0x14
  57 #define sxsave          STACK_BIAS-0x18
  58 #define sysave          STACK_BIAS-0x1c
  59 #define biguns          STACK_BIAS-0x20
  60 #define nk3             STACK_BIAS-0x24
  61 #define nk2             STACK_BIAS-0x28
  62 #define nk1             STACK_BIAS-0x2c
  63 #define nk0             STACK_BIAS-0x30
  64 #define junk            STACK_BIAS-0x38
  65 ! sizeof temp storage - must be a multiple of 16 for V9
  66 #define tmps            0x40
  67 
  68 ! register use
  69 
  70 ! i0  n
  71 ! i1  x
  72 ! i2  stridex
  73 ! i3  y
  74 ! i4  stridey
  75 ! i5  0x80000000
  76 
  77 ! l0  hx0
  78 ! l1  hx1
  79 ! l2  hx2
  80 ! l3  hx3
  81 ! l4  k0
  82 ! l5  k1
  83 ! l6  k2
  84 ! l7  k3
  85 
  86 ! the following are 64-bit registers in both V8+ and V9
  87 
  88 ! g1  __vlibm_TBL_sincos2
  89 ! g5  scratch
  90 
  91 ! o0  py0
  92 ! o1  py1
  93 ! o2  py2
  94 ! o3  py3
  95 ! o4  0x3e400000
  96 ! o5  0x3fe921fb,0x4099251e
  97 ! o7  scratch
  98 
  99 ! f0  hx0
 100 ! f2  
 101 ! f4  
 102 ! f6  
 103 ! f8  hx1
 104 ! f10 
 105 ! f12 
 106 ! f14 
 107 ! f16 hx2
 108 ! f18 
 109 ! f20 
 110 ! f22 
 111 ! f24 hx3
 112 ! f26 
 113 ! f28 
 114 ! f30 
 115 ! f32 
 116 ! f34 
 117 ! f36
 118 ! f38
 119 
 120 #define c3two44 %f40
 121 #define c3two51 %f42
 122 #define invpio2 %f44
 123 #define pio2_1  %f46
 124 #define pio2_2  %f48
 125 #define pio2_3  %f50
 126 #define pp1     %f52
 127 #define pp2     %f54
 128 #define pp3     %f56
 129 #define qq1     %f58
 130 #define qq2     %f60
 131 #define qq3     %f62
 132 
 133         ENTRY(__vsin_ultra3)
 134         save    %sp,-SA(MINFRAME)-tmps,%sp
 135         PIC_SETUP(l7)
 136         PIC_SET(l7,constants,o0)
 137         PIC_SET(l7,__vlibm_TBL_sincos2,o1)
 138         mov     %o1,%g1
 139         wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
 140 #ifdef __sparcv9
 141         stx     %i1,[%fp+xsave]         ! save arguments
 142         stx     %i3,[%fp+ysave]
 143 #else
 144         st      %i1,[%fp+xsave]         ! save arguments
 145         st      %i3,[%fp+ysave]
 146 #endif
 147         st      %i0,[%fp+nsave]
 148         st      %i2,[%fp+sxsave]
 149         st      %i4,[%fp+sysave]
 150         st      %g0,[%fp+biguns]        ! biguns = 0
 151         ldd     [%o0+0x00],c3two44      ! load/set up constants
 152         ldd     [%o0+0x08],c3two51
 153         ldd     [%o0+0x10],invpio2
 154         ldd     [%o0+0x18],pio2_1
 155         ldd     [%o0+0x20],pio2_2
 156         ldd     [%o0+0x28],pio2_3
 157         ldd     [%o0+0x30],pp1
 158         ldd     [%o0+0x38],pp2
 159         ldd     [%o0+0x40],pp3
 160         ldd     [%o0+0x48],qq1
 161         ldd     [%o0+0x50],qq2
 162         ldd     [%o0+0x58],qq3
 163         sethi   %hi(0x80000000),%i5
 164         sethi   %hi(0x3e400000),%o4
 165         sethi   %hi(0x3fe921fb),%o5
 166         or      %o5,%lo(0x3fe921fb),%o5
 167         sllx    %o5,32,%o5
 168         sethi   %hi(0x4099251e),%o7
 169         or      %o7,%lo(0x4099251e),%o7
 170         or      %o5,%o7,%o5
 171         sll     %i2,3,%i2               ! scale strides
 172         sll     %i4,3,%i4
 173         add     %fp,junk,%o1            ! loop prologue
 174         add     %fp,junk,%o2
 175         add     %fp,junk,%o3
 176         ld      [%i1],%l0               ! *x
 177         ld      [%i1],%f0
 178         ld      [%i1+4],%f3
 179         andn    %l0,%i5,%l0             ! mask off sign
 180         ba      .loop0
 181         add     %i1,%i2,%i1             ! x += stridex
 182 
 183 ! 16-byte aligned
 184         .align  16
 185 .loop0:
 186         lda     [%i1]%asi,%l1           ! preload next argument
 187         sub     %l0,%o4,%g5
 188         sub     %o5,%l0,%o7
 189         fabss   %f0,%f2
 190 
 191         lda     [%i1]%asi,%f8
 192         orcc    %o7,%g5,%g0
 193         mov     %i3,%o0                 ! py0 = y
 194         bl,pn   %icc,.range0            ! hx < 0x3e400000 or hx > 0x4099251e
 195 
 196 ! delay slot
 197         lda     [%i1+4]%asi,%f11
 198         addcc   %i0,-1,%i0
 199         add     %i3,%i4,%i3             ! y += stridey
 200         ble,pn  %icc,.last1
 201 
 202 ! delay slot
 203         andn    %l1,%i5,%l1
 204         add     %i1,%i2,%i1             ! x += stridex
 205         faddd   %f2,c3two44,%f4
 206         st      %f15,[%o1+4]
 207 
 208 .loop1:
 209         lda     [%i1]%asi,%l2           ! preload next argument
 210         sub     %l1,%o4,%g5
 211         sub     %o5,%l1,%o7
 212         fabss   %f8,%f10
 213 
 214         lda     [%i1]%asi,%f16
 215         orcc    %o7,%g5,%g0
 216         mov     %i3,%o1                 ! py1 = y
 217         bl,pn   %icc,.range1            ! hx < 0x3e400000 or hx > 0x4099251e
 218 
 219 ! delay slot
 220         lda     [%i1+4]%asi,%f19
 221         addcc   %i0,-1,%i0
 222         add     %i3,%i4,%i3             ! y += stridey
 223         ble,pn  %icc,.last2
 224 
 225 ! delay slot
 226         andn    %l2,%i5,%l2
 227         add     %i1,%i2,%i1             ! x += stridex
 228         faddd   %f10,c3two44,%f12
 229         st      %f23,[%o2+4]
 230 
 231 .loop2:
 232         lda     [%i1]%asi,%l3           ! preload next argument
 233         sub     %l2,%o4,%g5
 234         sub     %o5,%l2,%o7
 235         fabss   %f16,%f18
 236 
 237         lda     [%i1]%asi,%f24
 238         orcc    %o7,%g5,%g0
 239         mov     %i3,%o2                 ! py2 = y
 240         bl,pn   %icc,.range2            ! hx < 0x3e400000 or hx > 0x4099251e
 241 
 242 ! delay slot
 243         lda     [%i1+4]%asi,%f27
 244         addcc   %i0,-1,%i0
 245         add     %i3,%i4,%i3             ! y += stridey
 246         ble,pn  %icc,.last3
 247 
 248 ! delay slot
 249         andn    %l3,%i5,%l3
 250         add     %i1,%i2,%i1             ! x += stridex
 251         faddd   %f18,c3two44,%f20
 252         st      %f31,[%o3+4]
 253 
 254 .loop3:
 255         sub     %l3,%o4,%g5
 256         sub     %o5,%l3,%o7
 257         fabss   %f24,%f26
 258         st      %f5,[%fp+nk0]
 259 
 260         orcc    %o7,%g5,%g0
 261         mov     %i3,%o3                 ! py3 = y
 262         bl,pn   %icc,.range3            ! hx < 0x3e400000 or > hx 0x4099251e
 263 ! delay slot
 264         st      %f13,[%fp+nk1]
 265 
 266 !!! DONE?
 267 .cont:
 268         srlx    %o5,32,%o7
 269         add     %i3,%i4,%i3             ! y += stridey
 270         fmovs   %f3,%f1
 271         st      %f21,[%fp+nk2]
 272 
 273         sub     %o7,%l0,%l0
 274         sub     %o7,%l1,%l1
 275         faddd   %f26,c3two44,%f28
 276         st      %f29,[%fp+nk3]
 277 
 278         sub     %o7,%l2,%l2
 279         sub     %o7,%l3,%l3
 280         fmovs   %f11,%f9
 281 
 282         or      %l0,%l1,%l0
 283         or      %l2,%l3,%l2
 284         fmovs   %f19,%f17
 285 
 286         fmovs   %f27,%f25
 287         fmuld   %f0,invpio2,%f6         ! x * invpio2, for medium range
 288 
 289         fmuld   %f8,invpio2,%f14
 290         ld      [%fp+nk0],%l4
 291 
 292         fmuld   %f16,invpio2,%f22
 293         ld      [%fp+nk1],%l5
 294 
 295         orcc    %l0,%l2,%g0
 296         bl,pn   %icc,.medium
 297 ! delay slot
 298         fmuld   %f24,invpio2,%f30
 299         ld      [%fp+nk2],%l6
 300 
 301         ld      [%fp+nk3],%l7
 302         sll     %l4,5,%l4               ! k
 303         fcmpd   %fcc0,%f0,pio2_3        ! x < pio2_3 iff x < 0
 304 
 305         sll     %l5,5,%l5
 306         ldd     [%l4+%g1],%f4
 307         fcmpd   %fcc1,%f8,pio2_3
 308 
 309         sll     %l6,5,%l6
 310         ldd     [%l5+%g1],%f12
 311         fcmpd   %fcc2,%f16,pio2_3
 312 
 313         sll     %l7,5,%l7
 314         ldd     [%l6+%g1],%f20
 315         fcmpd   %fcc3,%f24,pio2_3
 316 
 317         ldd     [%l7+%g1],%f28
 318         fsubd   %f2,%f4,%f2             ! x -= __vlibm_TBL_sincos2[k]
 319 
 320         fsubd   %f10,%f12,%f10
 321 
 322         fsubd   %f18,%f20,%f18
 323 
 324         fsubd   %f26,%f28,%f26
 325 
 326         fmuld   %f2,%f2,%f0             ! z = x * x
 327 
 328         fmuld   %f10,%f10,%f8
 329 
 330         fmuld   %f18,%f18,%f16
 331 
 332         fmuld   %f26,%f26,%f24
 333 
 334         fmuld   %f0,pp3,%f6
 335 
 336         fmuld   %f8,pp3,%f14
 337 
 338         fmuld   %f16,pp3,%f22
 339 
 340         fmuld   %f24,pp3,%f30
 341 
 342         faddd   %f6,pp2,%f6
 343         fmuld   %f0,qq2,%f4
 344 
 345         faddd   %f14,pp2,%f14
 346         fmuld   %f8,qq2,%f12
 347 
 348         faddd   %f22,pp2,%f22
 349         fmuld   %f16,qq2,%f20
 350 
 351         faddd   %f30,pp2,%f30
 352         fmuld   %f24,qq2,%f28
 353 
 354         fmuld   %f0,%f6,%f6
 355         faddd   %f4,qq1,%f4
 356 
 357         fmuld   %f8,%f14,%f14
 358         faddd   %f12,qq1,%f12
 359 
 360         fmuld   %f16,%f22,%f22
 361         faddd   %f20,qq1,%f20
 362 
 363         fmuld   %f24,%f30,%f30
 364         faddd   %f28,qq1,%f28
 365 
 366         faddd   %f6,pp1,%f6
 367         fmuld   %f0,%f4,%f4
 368         add     %l4,%g1,%l4
 369 
 370         faddd   %f14,pp1,%f14
 371         fmuld   %f8,%f12,%f12
 372         add     %l5,%g1,%l5
 373 
 374         faddd   %f22,pp1,%f22
 375         fmuld   %f16,%f20,%f20
 376         add     %l6,%g1,%l6
 377 
 378         faddd   %f30,pp1,%f30
 379         fmuld   %f24,%f28,%f28
 380         add     %l7,%g1,%l7
 381 
 382         fmuld   %f0,%f6,%f6
 383         ldd     [%l4+8],%f0
 384 
 385         fmuld   %f8,%f14,%f14
 386         ldd     [%l5+8],%f8
 387 
 388         fmuld   %f16,%f22,%f22
 389         ldd     [%l6+8],%f16
 390 
 391         fmuld   %f24,%f30,%f30
 392         ldd     [%l7+8],%f24
 393 
 394         fmuld   %f2,%f6,%f6
 395 
 396         fmuld   %f10,%f14,%f14
 397 
 398         fmuld   %f18,%f22,%f22
 399 
 400         fmuld   %f26,%f30,%f30
 401 
 402         faddd   %f6,%f2,%f6
 403         fmuld   %f0,%f4,%f4
 404         ldd     [%l4+16],%f2
 405 
 406         faddd   %f14,%f10,%f14
 407         fmuld   %f8,%f12,%f12
 408         ldd     [%l5+16],%f10
 409 
 410         faddd   %f22,%f18,%f22
 411         fmuld   %f16,%f20,%f20
 412         ldd     [%l6+16],%f18
 413 
 414         faddd   %f30,%f26,%f30
 415         fmuld   %f24,%f28,%f28
 416         ldd     [%l7+16],%f26
 417 
 418         fmuld   %f2,%f6,%f6
 419 
 420         fmuld   %f10,%f14,%f14
 421 
 422         fmuld   %f18,%f22,%f22
 423 
 424         fmuld   %f26,%f30,%f30
 425 
 426         faddd   %f6,%f4,%f6
 427 
 428         faddd   %f14,%f12,%f14
 429 
 430         faddd   %f22,%f20,%f22
 431 
 432         faddd   %f30,%f28,%f30
 433 
 434         faddd   %f6,%f0,%f6
 435 
 436         faddd   %f14,%f8,%f14
 437 
 438         faddd   %f22,%f16,%f22
 439 
 440         faddd   %f30,%f24,%f30
 441 
 442         fnegd   %f6,%f4
 443         lda     [%i1]%asi,%l0           ! preload next argument
 444 
 445         fnegd   %f14,%f12
 446         lda     [%i1]%asi,%f0
 447 
 448         fnegd   %f22,%f20
 449         lda     [%i1+4]%asi,%f3
 450 
 451         fnegd   %f30,%f28
 452         andn    %l0,%i5,%l0
 453         add     %i1,%i2,%i1
 454 
 455         fmovdl  %fcc0,%f4,%f6           ! (hx < -0)? -s : s
 456         st      %f6,[%o0]
 457 
 458         fmovdl  %fcc1,%f12,%f14
 459         st      %f14,[%o1]
 460 
 461         fmovdl  %fcc2,%f20,%f22
 462         st      %f22,[%o2]
 463 
 464         fmovdl  %fcc3,%f28,%f30
 465         st      %f30,[%o3]
 466         addcc   %i0,-1,%i0
 467 
 468         bg,pt   %icc,.loop0
 469 ! delay slot
 470         st      %f7,[%o0+4]
 471 
 472         ba,pt   %icc,.end
 473 ! delay slot
 474         nop
 475 
 476 
 477         .align  16
 478 .medium:
 479         faddd   %f6,c3two51,%f4
 480         st      %f5,[%fp+nk0]
 481 
 482         faddd   %f14,c3two51,%f12
 483         st      %f13,[%fp+nk1]
 484 
 485         faddd   %f22,c3two51,%f20
 486         st      %f21,[%fp+nk2]
 487 
 488         faddd   %f30,c3two51,%f28
 489         st      %f29,[%fp+nk3]
 490 
 491         fsubd   %f4,c3two51,%f6
 492 
 493         fsubd   %f12,c3two51,%f14
 494 
 495         fsubd   %f20,c3two51,%f22
 496 
 497         fsubd   %f28,c3two51,%f30
 498 
 499         fmuld   %f6,pio2_1,%f2
 500         ld      [%fp+nk0],%l0           ! n
 501 
 502         fmuld   %f14,pio2_1,%f10
 503         ld      [%fp+nk1],%l1
 504 
 505         fmuld   %f22,pio2_1,%f18
 506         ld      [%fp+nk2],%l2
 507 
 508         fmuld   %f30,pio2_1,%f26
 509         ld      [%fp+nk3],%l3
 510 
 511         fsubd   %f0,%f2,%f0
 512         fmuld   %f6,pio2_2,%f4
 513 
 514         fsubd   %f8,%f10,%f8
 515         fmuld   %f14,pio2_2,%f12
 516 
 517         fsubd   %f16,%f18,%f16
 518         fmuld   %f22,pio2_2,%f20
 519 
 520         fsubd   %f24,%f26,%f24
 521         fmuld   %f30,pio2_2,%f28
 522 
 523         fsubd   %f0,%f4,%f32
 524 
 525         fsubd   %f8,%f12,%f34
 526 
 527         fsubd   %f16,%f20,%f36
 528 
 529         fsubd   %f24,%f28,%f38
 530 
 531         fsubd   %f0,%f32,%f0
 532         fcmple32 %f32,pio2_3,%l4        ! x <= pio2_3 iff x < 0
 533 
 534         fsubd   %f8,%f34,%f8
 535         fcmple32 %f34,pio2_3,%l5
 536 
 537         fsubd   %f16,%f36,%f16
 538         fcmple32 %f36,pio2_3,%l6
 539 
 540         fsubd   %f24,%f38,%f24
 541         fcmple32 %f38,pio2_3,%l7
 542 
 543         fsubd   %f0,%f4,%f0
 544         fmuld   %f6,pio2_3,%f6
 545         sll     %l4,30,%l4              ! if (x < 0) n = -n ^ 2
 546 
 547         fsubd   %f8,%f12,%f8
 548         fmuld   %f14,pio2_3,%f14
 549         sll     %l5,30,%l5
 550 
 551         fsubd   %f16,%f20,%f16
 552         fmuld   %f22,pio2_3,%f22
 553         sll     %l6,30,%l6
 554 
 555         fsubd   %f24,%f28,%f24
 556         fmuld   %f30,pio2_3,%f30
 557         sll     %l7,30,%l7
 558 
 559         fsubd   %f6,%f0,%f6
 560         sra     %l4,31,%l4
 561 
 562         fsubd   %f14,%f8,%f14
 563         sra     %l5,31,%l5
 564 
 565         fsubd   %f22,%f16,%f22
 566         sra     %l6,31,%l6
 567 
 568         fsubd   %f30,%f24,%f30
 569         sra     %l7,31,%l7
 570 
 571         fsubd   %f32,%f6,%f0            ! reduced x
 572         xor     %l0,%l4,%l0
 573 
 574         fsubd   %f34,%f14,%f8
 575         xor     %l1,%l5,%l1
 576 
 577         fsubd   %f36,%f22,%f16
 578         xor     %l2,%l6,%l2
 579 
 580         fsubd   %f38,%f30,%f24
 581         xor     %l3,%l7,%l3
 582 
 583         fabsd   %f0,%f2
 584         sub     %l0,%l4,%l0
 585 
 586         fabsd   %f8,%f10
 587         sub     %l1,%l5,%l1
 588 
 589         fabsd   %f16,%f18
 590         sub     %l2,%l6,%l2
 591 
 592         fabsd   %f24,%f26
 593         sub     %l3,%l7,%l3
 594 
 595         faddd   %f2,c3two44,%f4
 596         st      %f5,[%fp+nk0]
 597         and     %l4,2,%l4
 598 
 599         faddd   %f10,c3two44,%f12
 600         st      %f13,[%fp+nk1]
 601         and     %l5,2,%l5
 602 
 603         faddd   %f18,c3two44,%f20
 604         st      %f21,[%fp+nk2]
 605         and     %l6,2,%l6
 606 
 607         faddd   %f26,c3two44,%f28
 608         st      %f29,[%fp+nk3]
 609         and     %l7,2,%l7
 610 
 611         fsubd   %f32,%f0,%f4
 612         xor     %l0,%l4,%l0
 613 
 614         fsubd   %f34,%f8,%f12
 615         xor     %l1,%l5,%l1
 616 
 617         fsubd   %f36,%f16,%f20
 618         xor     %l2,%l6,%l2
 619 
 620         fsubd   %f38,%f24,%f28
 621         xor     %l3,%l7,%l3
 622 
 623         fzero   %f38
 624         ld      [%fp+nk0],%l4
 625 
 626         fsubd   %f4,%f6,%f6             ! w
 627         ld      [%fp+nk1],%l5
 628 
 629         fsubd   %f12,%f14,%f14
 630         ld      [%fp+nk2],%l6
 631 
 632         fnegd   %f38,%f38
 633         ld      [%fp+nk3],%l7
 634         sll     %l4,5,%l4               ! k
 635 
 636         fsubd   %f20,%f22,%f22
 637         sll     %l5,5,%l5
 638 
 639         fsubd   %f28,%f30,%f30
 640         sll     %l6,5,%l6
 641 
 642         fand    %f0,%f38,%f32           ! sign bit of x
 643         ldd     [%l4+%g1],%f4
 644         sll     %l7,5,%l7
 645 
 646         fand    %f8,%f38,%f34
 647         ldd     [%l5+%g1],%f12
 648 
 649         fand    %f16,%f38,%f36
 650         ldd     [%l6+%g1],%f20
 651 
 652         fand    %f24,%f38,%f38
 653         ldd     [%l7+%g1],%f28
 654 
 655         fsubd   %f2,%f4,%f2             ! x -= __vlibm_TBL_sincos2[k]
 656 
 657         fsubd   %f10,%f12,%f10
 658 
 659         fsubd   %f18,%f20,%f18
 660         nop
 661 
 662         fsubd   %f26,%f28,%f26
 663         nop
 664 
 665 ! 16-byte aligned
 666         fmuld   %f2,%f2,%f0             ! z = x * x
 667         andcc   %l0,1,%g0
 668         bz,pn   %icc,.case8
 669 ! delay slot
 670         fxor    %f6,%f32,%f32
 671 
 672         fmuld   %f10,%f10,%f8
 673         andcc   %l1,1,%g0
 674         bz,pn   %icc,.case4
 675 ! delay slot
 676         fxor    %f14,%f34,%f34
 677 
 678         fmuld   %f18,%f18,%f16
 679         andcc   %l2,1,%g0
 680         bz,pn   %icc,.case2
 681 ! delay slot
 682         fxor    %f22,%f36,%f36
 683 
 684         fmuld   %f26,%f26,%f24
 685         andcc   %l3,1,%g0
 686         bz,pn   %icc,.case1
 687 ! delay slot
 688         fxor    %f30,%f38,%f38
 689 
 690 !.case0:
 691         fmuld   %f0,qq3,%f6             ! cos(x0)
 692 
 693         fmuld   %f8,qq3,%f14            ! cos(x1)
 694 
 695         fmuld   %f16,qq3,%f22           ! cos(x2)
 696 
 697         fmuld   %f24,qq3,%f30           ! cos(x3)
 698 
 699         faddd   %f6,qq2,%f6
 700         fmuld   %f0,pp2,%f4
 701 
 702         faddd   %f14,qq2,%f14
 703         fmuld   %f8,pp2,%f12
 704 
 705         faddd   %f22,qq2,%f22
 706         fmuld   %f16,pp2,%f20
 707 
 708         faddd   %f30,qq2,%f30
 709         fmuld   %f24,pp2,%f28
 710 
 711         fmuld   %f0,%f6,%f6
 712         faddd   %f4,pp1,%f4
 713 
 714         fmuld   %f8,%f14,%f14
 715         faddd   %f12,pp1,%f12
 716 
 717         fmuld   %f16,%f22,%f22
 718         faddd   %f20,pp1,%f20
 719 
 720         fmuld   %f24,%f30,%f30
 721         faddd   %f28,pp1,%f28
 722 
 723         faddd   %f6,qq1,%f6
 724         fmuld   %f0,%f4,%f4
 725         add     %l4,%g1,%l4
 726 
 727         faddd   %f14,qq1,%f14
 728         fmuld   %f8,%f12,%f12
 729         add     %l5,%g1,%l5
 730 
 731         faddd   %f22,qq1,%f22
 732         fmuld   %f16,%f20,%f20
 733         add     %l6,%g1,%l6
 734 
 735         faddd   %f30,qq1,%f30
 736         fmuld   %f24,%f28,%f28
 737         add     %l7,%g1,%l7
 738 
 739         fmuld   %f2,%f4,%f4
 740 
 741         fmuld   %f10,%f12,%f12
 742 
 743         fmuld   %f18,%f20,%f20
 744 
 745         fmuld   %f26,%f28,%f28
 746 
 747         fmuld   %f0,%f6,%f6
 748         faddd   %f4,%f32,%f4
 749         ldd     [%l4+16],%f0
 750 
 751         fmuld   %f8,%f14,%f14
 752         faddd   %f12,%f34,%f12
 753         ldd     [%l5+16],%f8
 754 
 755         fmuld   %f16,%f22,%f22
 756         faddd   %f20,%f36,%f20
 757         ldd     [%l6+16],%f16
 758 
 759         fmuld   %f24,%f30,%f30
 760         faddd   %f28,%f38,%f28
 761         ldd     [%l7+16],%f24
 762 
 763         fmuld   %f0,%f6,%f6
 764         faddd   %f4,%f2,%f4
 765         ldd     [%l4+8],%f32
 766 
 767         fmuld   %f8,%f14,%f14
 768         faddd   %f12,%f10,%f12
 769         ldd     [%l5+8],%f34
 770 
 771         fmuld   %f16,%f22,%f22
 772         faddd   %f20,%f18,%f20
 773         ldd     [%l6+8],%f36
 774 
 775         fmuld   %f24,%f30,%f30
 776         faddd   %f28,%f26,%f28
 777         ldd     [%l7+8],%f38
 778 
 779         fmuld   %f32,%f4,%f4
 780 
 781         fmuld   %f34,%f12,%f12
 782 
 783         fmuld   %f36,%f20,%f20
 784 
 785         fmuld   %f38,%f28,%f28
 786 
 787         fsubd   %f6,%f4,%f6
 788 
 789         fsubd   %f14,%f12,%f14
 790 
 791         fsubd   %f22,%f20,%f22
 792 
 793         fsubd   %f30,%f28,%f30
 794 
 795         faddd   %f6,%f0,%f6
 796 
 797         faddd   %f14,%f8,%f14
 798 
 799         faddd   %f22,%f16,%f22
 800 
 801         faddd   %f30,%f24,%f30
 802         mov     %l0,%l4
 803 
 804         fnegd   %f6,%f4
 805         lda     [%i1]%asi,%l0           ! preload next argument
 806 
 807         fnegd   %f14,%f12
 808         lda     [%i1]%asi,%f0
 809 
 810         fnegd   %f22,%f20
 811         lda     [%i1+4]%asi,%f3
 812 
 813         fnegd   %f30,%f28
 814         andn    %l0,%i5,%l0
 815         add     %i1,%i2,%i1
 816 
 817         andcc   %l4,2,%g0
 818         fmovdnz %icc,%f4,%f6
 819         st      %f6,[%o0]
 820 
 821         andcc   %l1,2,%g0
 822         fmovdnz %icc,%f12,%f14
 823         st      %f14,[%o1]
 824 
 825         andcc   %l2,2,%g0
 826         fmovdnz %icc,%f20,%f22
 827         st      %f22,[%o2]
 828 
 829         andcc   %l3,2,%g0
 830         fmovdnz %icc,%f28,%f30
 831         st      %f30,[%o3]
 832 
 833         addcc   %i0,-1,%i0
 834         bg,pt   %icc,.loop0
 835 ! delay slot
 836         st      %f7,[%o0+4]
 837 
 838         ba,pt   %icc,.end
 839 ! delay slot
 840         nop
 841 
 842         .align  16
 843 .case1:
 844         fmuld   %f24,pp3,%f30           ! sin(x3)
 845 
 846         fmuld   %f0,qq3,%f6             ! cos(x0)
 847 
 848         fmuld   %f8,qq3,%f14            ! cos(x1)
 849 
 850         fmuld   %f16,qq3,%f22           ! cos(x2)
 851 
 852         faddd   %f30,pp2,%f30
 853         fmuld   %f24,qq2,%f28
 854 
 855         faddd   %f6,qq2,%f6
 856         fmuld   %f0,pp2,%f4
 857 
 858         faddd   %f14,qq2,%f14
 859         fmuld   %f8,pp2,%f12
 860 
 861         faddd   %f22,qq2,%f22
 862         fmuld   %f16,pp2,%f20
 863 
 864         fmuld   %f24,%f30,%f30
 865         faddd   %f28,qq1,%f28
 866 
 867         fmuld   %f0,%f6,%f6
 868         faddd   %f4,pp1,%f4
 869 
 870         fmuld   %f8,%f14,%f14
 871         faddd   %f12,pp1,%f12
 872 
 873         fmuld   %f16,%f22,%f22
 874         faddd   %f20,pp1,%f20
 875 
 876         faddd   %f30,pp1,%f30
 877         fmuld   %f24,%f28,%f28
 878         add     %l7,%g1,%l7
 879 
 880         faddd   %f6,qq1,%f6
 881         fmuld   %f0,%f4,%f4
 882         add     %l4,%g1,%l4
 883 
 884         faddd   %f14,qq1,%f14
 885         fmuld   %f8,%f12,%f12
 886         add     %l5,%g1,%l5
 887 
 888         faddd   %f22,qq1,%f22
 889         fmuld   %f16,%f20,%f20
 890         add     %l6,%g1,%l6
 891 
 892         fmuld   %f24,%f30,%f30
 893 
 894         fmuld   %f2,%f4,%f4
 895 
 896         fmuld   %f10,%f12,%f12
 897 
 898         fmuld   %f18,%f20,%f20
 899 
 900         fmuld   %f26,%f30,%f30
 901         ldd     [%l7+8],%f24
 902 
 903         fmuld   %f0,%f6,%f6
 904         faddd   %f4,%f32,%f4
 905         ldd     [%l4+16],%f0
 906 
 907         fmuld   %f8,%f14,%f14
 908         faddd   %f12,%f34,%f12
 909         ldd     [%l5+16],%f8
 910 
 911         fmuld   %f16,%f22,%f22
 912         faddd   %f20,%f36,%f20
 913         ldd     [%l6+16],%f16
 914 
 915         fmuld   %f24,%f28,%f28
 916         faddd   %f38,%f30,%f30
 917 
 918         fmuld   %f0,%f6,%f6
 919         faddd   %f4,%f2,%f4
 920         ldd     [%l4+8],%f32
 921 
 922         fmuld   %f8,%f14,%f14
 923         faddd   %f12,%f10,%f12
 924         ldd     [%l5+8],%f34
 925 
 926         fmuld   %f16,%f22,%f22
 927         faddd   %f20,%f18,%f20
 928         ldd     [%l6+8],%f36
 929 
 930         faddd   %f26,%f30,%f30
 931         ldd     [%l7+16],%f38
 932 
 933         fmuld   %f32,%f4,%f4
 934 
 935         fmuld   %f34,%f12,%f12
 936 
 937         fmuld   %f36,%f20,%f20
 938 
 939         fmuld   %f38,%f30,%f30
 940 
 941         fsubd   %f6,%f4,%f6
 942 
 943         fsubd   %f14,%f12,%f14
 944 
 945         fsubd   %f22,%f20,%f22
 946 
 947         faddd   %f30,%f28,%f30
 948 
 949         faddd   %f6,%f0,%f6
 950 
 951         faddd   %f14,%f8,%f14
 952 
 953         faddd   %f22,%f16,%f22
 954 
 955         faddd   %f30,%f24,%f30
 956         mov     %l0,%l4
 957 
 958         fnegd   %f6,%f4
 959         lda     [%i1]%asi,%l0           ! preload next argument
 960 
 961         fnegd   %f14,%f12
 962         lda     [%i1]%asi,%f0
 963 
 964         fnegd   %f22,%f20
 965         lda     [%i1+4]%asi,%f3
 966 
 967         fnegd   %f30,%f28
 968         andn    %l0,%i5,%l0
 969         add     %i1,%i2,%i1
 970 
 971         andcc   %l4,2,%g0
 972         fmovdnz %icc,%f4,%f6
 973         st      %f6,[%o0]
 974 
 975         andcc   %l1,2,%g0
 976         fmovdnz %icc,%f12,%f14
 977         st      %f14,[%o1]
 978 
 979         andcc   %l2,2,%g0
 980         fmovdnz %icc,%f20,%f22
 981         st      %f22,[%o2]
 982 
 983         andcc   %l3,2,%g0
 984         fmovdnz %icc,%f28,%f30
 985         st      %f30,[%o3]
 986 
 987         addcc   %i0,-1,%i0
 988         bg,pt   %icc,.loop0
 989 ! delay slot
 990         st      %f7,[%o0+4]
 991 
 992         ba,pt   %icc,.end
 993 ! delay slot
 994         nop
 995 
 996         .align  16
 997 .case2:
 998         fmuld   %f26,%f26,%f24
 999         andcc   %l3,1,%g0
1000         bz,pn   %icc,.case3
1001 ! delay slot
1002         fxor    %f30,%f38,%f38
1003 
1004         fmuld   %f16,pp3,%f22           ! sin(x2)
1005 
1006         fmuld   %f0,qq3,%f6             ! cos(x0)
1007 
1008         fmuld   %f8,qq3,%f14            ! cos(x1)
1009 
1010         faddd   %f22,pp2,%f22
1011         fmuld   %f16,qq2,%f20
1012 
1013         fmuld   %f24,qq3,%f30           ! cos(x3)
1014 
1015         faddd   %f6,qq2,%f6
1016         fmuld   %f0,pp2,%f4
1017 
1018         faddd   %f14,qq2,%f14
1019         fmuld   %f8,pp2,%f12
1020 
1021         fmuld   %f16,%f22,%f22
1022         faddd   %f20,qq1,%f20
1023 
1024         faddd   %f30,qq2,%f30
1025         fmuld   %f24,pp2,%f28
1026 
1027         fmuld   %f0,%f6,%f6
1028         faddd   %f4,pp1,%f4
1029 
1030         fmuld   %f8,%f14,%f14
1031         faddd   %f12,pp1,%f12
1032 
1033         faddd   %f22,pp1,%f22
1034         fmuld   %f16,%f20,%f20
1035         add     %l6,%g1,%l6
1036 
1037         fmuld   %f24,%f30,%f30
1038         faddd   %f28,pp1,%f28
1039 
1040         faddd   %f6,qq1,%f6
1041         fmuld   %f0,%f4,%f4
1042         add     %l4,%g1,%l4
1043 
1044         faddd   %f14,qq1,%f14
1045         fmuld   %f8,%f12,%f12
1046         add     %l5,%g1,%l5
1047 
1048         fmuld   %f16,%f22,%f22
1049 
1050         faddd   %f30,qq1,%f30
1051         fmuld   %f24,%f28,%f28
1052         add     %l7,%g1,%l7
1053 
1054         fmuld   %f2,%f4,%f4
1055 
1056         fmuld   %f10,%f12,%f12
1057 
1058         fmuld   %f18,%f22,%f22
1059         ldd     [%l6+8],%f16
1060 
1061         fmuld   %f26,%f28,%f28
1062 
1063         fmuld   %f0,%f6,%f6
1064         faddd   %f4,%f32,%f4
1065         ldd     [%l4+16],%f0
1066 
1067         fmuld   %f8,%f14,%f14
1068         faddd   %f12,%f34,%f12
1069         ldd     [%l5+16],%f8
1070 
1071         fmuld   %f16,%f20,%f20
1072         faddd   %f36,%f22,%f22
1073 
1074         fmuld   %f24,%f30,%f30
1075         faddd   %f28,%f38,%f28
1076         ldd     [%l7+16],%f24
1077 
1078         fmuld   %f0,%f6,%f6
1079         faddd   %f4,%f2,%f4
1080         ldd     [%l4+8],%f32
1081 
1082         fmuld   %f8,%f14,%f14
1083         faddd   %f12,%f10,%f12
1084         ldd     [%l5+8],%f34
1085 
1086         faddd   %f18,%f22,%f22
1087         ldd     [%l6+16],%f36
1088 
1089         fmuld   %f24,%f30,%f30
1090         faddd   %f28,%f26,%f28
1091         ldd     [%l7+8],%f38
1092 
1093         fmuld   %f32,%f4,%f4
1094 
1095         fmuld   %f34,%f12,%f12
1096 
1097         fmuld   %f36,%f22,%f22
1098 
1099         fmuld   %f38,%f28,%f28
1100 
1101         fsubd   %f6,%f4,%f6
1102 
1103         fsubd   %f14,%f12,%f14
1104 
1105         faddd   %f22,%f20,%f22
1106 
1107         fsubd   %f30,%f28,%f30
1108 
1109         faddd   %f6,%f0,%f6
1110 
1111         faddd   %f14,%f8,%f14
1112 
1113         faddd   %f22,%f16,%f22
1114 
1115         faddd   %f30,%f24,%f30
1116         mov     %l0,%l4
1117 
1118         fnegd   %f6,%f4
1119         lda     [%i1]%asi,%l0           ! preload next argument
1120 
1121         fnegd   %f14,%f12
1122         lda     [%i1]%asi,%f0
1123 
1124         fnegd   %f22,%f20
1125         lda     [%i1+4]%asi,%f3
1126 
1127         fnegd   %f30,%f28
1128         andn    %l0,%i5,%l0
1129         add     %i1,%i2,%i1
1130 
1131         andcc   %l4,2,%g0
1132         fmovdnz %icc,%f4,%f6
1133         st      %f6,[%o0]
1134 
1135         andcc   %l1,2,%g0
1136         fmovdnz %icc,%f12,%f14
1137         st      %f14,[%o1]
1138 
1139         andcc   %l2,2,%g0
1140         fmovdnz %icc,%f20,%f22
1141         st      %f22,[%o2]
1142 
1143         andcc   %l3,2,%g0
1144         fmovdnz %icc,%f28,%f30
1145         st      %f30,[%o3]
1146 
1147         addcc   %i0,-1,%i0
1148         bg,pt   %icc,.loop0
1149 ! delay slot
1150         st      %f7,[%o0+4]
1151 
1152         ba,pt   %icc,.end
1153 ! delay slot
1154         nop
1155 
1156         .align  16
1157 .case3:
1158         fmuld   %f16,pp3,%f22           ! sin(x2)
1159 
1160         fmuld   %f24,pp3,%f30           ! sin(x3)
1161 
1162         fmuld   %f0,qq3,%f6             ! cos(x0)
1163 
1164         fmuld   %f8,qq3,%f14            ! cos(x1)
1165 
1166         faddd   %f22,pp2,%f22
1167         fmuld   %f16,qq2,%f20
1168 
1169         faddd   %f30,pp2,%f30
1170         fmuld   %f24,qq2,%f28
1171 
1172         faddd   %f6,qq2,%f6
1173         fmuld   %f0,pp2,%f4
1174 
1175         faddd   %f14,qq2,%f14
1176         fmuld   %f8,pp2,%f12
1177 
1178         fmuld   %f16,%f22,%f22
1179         faddd   %f20,qq1,%f20
1180 
1181         fmuld   %f24,%f30,%f30
1182         faddd   %f28,qq1,%f28
1183 
1184         fmuld   %f0,%f6,%f6
1185         faddd   %f4,pp1,%f4
1186 
1187         fmuld   %f8,%f14,%f14
1188         faddd   %f12,pp1,%f12
1189 
1190         faddd   %f22,pp1,%f22
1191         fmuld   %f16,%f20,%f20
1192         add     %l6,%g1,%l6
1193 
1194         faddd   %f30,pp1,%f30
1195         fmuld   %f24,%f28,%f28
1196         add     %l7,%g1,%l7
1197 
1198         faddd   %f6,qq1,%f6
1199         fmuld   %f0,%f4,%f4
1200         add     %l4,%g1,%l4
1201 
1202         faddd   %f14,qq1,%f14
1203         fmuld   %f8,%f12,%f12
1204         add     %l5,%g1,%l5
1205 
1206         fmuld   %f16,%f22,%f22
1207 
1208         fmuld   %f24,%f30,%f30
1209 
1210         fmuld   %f2,%f4,%f4
1211 
1212         fmuld   %f10,%f12,%f12
1213 
1214         fmuld   %f18,%f22,%f22
1215         ldd     [%l6+8],%f16
1216 
1217         fmuld   %f26,%f30,%f30
1218         ldd     [%l7+8],%f24
1219 
1220         fmuld   %f0,%f6,%f6
1221         faddd   %f4,%f32,%f4
1222         ldd     [%l4+16],%f0
1223 
1224         fmuld   %f8,%f14,%f14
1225         faddd   %f12,%f34,%f12
1226         ldd     [%l5+16],%f8
1227 
1228         fmuld   %f16,%f20,%f20
1229         faddd   %f36,%f22,%f22
1230 
1231         fmuld   %f24,%f28,%f28
1232         faddd   %f38,%f30,%f30
1233 
1234         fmuld   %f0,%f6,%f6
1235         faddd   %f4,%f2,%f4
1236         ldd     [%l4+8],%f32
1237 
1238         fmuld   %f8,%f14,%f14
1239         faddd   %f12,%f10,%f12
1240         ldd     [%l5+8],%f34
1241 
1242         faddd   %f18,%f22,%f22
1243         ldd     [%l6+16],%f36
1244 
1245         faddd   %f26,%f30,%f30
1246         ldd     [%l7+16],%f38
1247 
1248         fmuld   %f32,%f4,%f4
1249 
1250         fmuld   %f34,%f12,%f12
1251 
1252         fmuld   %f36,%f22,%f22
1253 
1254         fmuld   %f38,%f30,%f30
1255 
1256         fsubd   %f6,%f4,%f6
1257 
1258         fsubd   %f14,%f12,%f14
1259 
1260         faddd   %f22,%f20,%f22
1261 
1262         faddd   %f30,%f28,%f30
1263 
1264         faddd   %f6,%f0,%f6
1265 
1266         faddd   %f14,%f8,%f14
1267 
1268         faddd   %f22,%f16,%f22
1269 
1270         faddd   %f30,%f24,%f30
1271         mov     %l0,%l4
1272 
1273         fnegd   %f6,%f4
1274         lda     [%i1]%asi,%l0           ! preload next argument
1275 
1276         fnegd   %f14,%f12
1277         lda     [%i1]%asi,%f0
1278 
1279         fnegd   %f22,%f20
1280         lda     [%i1+4]%asi,%f3
1281 
1282         fnegd   %f30,%f28
1283         andn    %l0,%i5,%l0
1284         add     %i1,%i2,%i1
1285 
1286         andcc   %l4,2,%g0
1287         fmovdnz %icc,%f4,%f6
1288         st      %f6,[%o0]
1289 
1290         andcc   %l1,2,%g0
1291         fmovdnz %icc,%f12,%f14
1292         st      %f14,[%o1]
1293 
1294         andcc   %l2,2,%g0
1295         fmovdnz %icc,%f20,%f22
1296         st      %f22,[%o2]
1297 
1298         andcc   %l3,2,%g0
1299         fmovdnz %icc,%f28,%f30
1300         st      %f30,[%o3]
1301 
1302         addcc   %i0,-1,%i0
1303         bg,pt   %icc,.loop0
1304 ! delay slot
1305         st      %f7,[%o0+4]
1306 
1307         ba,pt   %icc,.end
1308 ! delay slot
1309         nop
1310 
1311         .align  16
1312 .case4:
1313         fmuld   %f18,%f18,%f16
1314         andcc   %l2,1,%g0
1315         bz,pn   %icc,.case6
1316 ! delay slot
1317         fxor    %f22,%f36,%f36
1318 
1319         fmuld   %f26,%f26,%f24
1320         andcc   %l3,1,%g0
1321         bz,pn   %icc,.case5
1322 ! delay slot
1323         fxor    %f30,%f38,%f38
1324 
1325         fmuld   %f8,pp3,%f14            ! sin(x1)
1326 
1327         fmuld   %f0,qq3,%f6             ! cos(x0)
1328 
1329         faddd   %f14,pp2,%f14
1330         fmuld   %f8,qq2,%f12
1331 
1332         fmuld   %f16,qq3,%f22           ! cos(x2)
1333 
1334         fmuld   %f24,qq3,%f30           ! cos(x3)
1335 
1336         faddd   %f6,qq2,%f6
1337         fmuld   %f0,pp2,%f4
1338 
1339         fmuld   %f8,%f14,%f14
1340         faddd   %f12,qq1,%f12
1341 
1342         faddd   %f22,qq2,%f22
1343         fmuld   %f16,pp2,%f20
1344 
1345         faddd   %f30,qq2,%f30
1346         fmuld   %f24,pp2,%f28
1347 
1348         fmuld   %f0,%f6,%f6
1349         faddd   %f4,pp1,%f4
1350 
1351         faddd   %f14,pp1,%f14
1352         fmuld   %f8,%f12,%f12
1353         add     %l5,%g1,%l5
1354 
1355         fmuld   %f16,%f22,%f22
1356         faddd   %f20,pp1,%f20
1357 
1358         fmuld   %f24,%f30,%f30
1359         faddd   %f28,pp1,%f28
1360 
1361         faddd   %f6,qq1,%f6
1362         fmuld   %f0,%f4,%f4
1363         add     %l4,%g1,%l4
1364 
1365         fmuld   %f8,%f14,%f14
1366 
1367         faddd   %f22,qq1,%f22
1368         fmuld   %f16,%f20,%f20
1369         add     %l6,%g1,%l6
1370 
1371         faddd   %f30,qq1,%f30
1372         fmuld   %f24,%f28,%f28
1373         add     %l7,%g1,%l7
1374 
1375         fmuld   %f2,%f4,%f4
1376 
1377         fmuld   %f10,%f14,%f14
1378         ldd     [%l5+8],%f8
1379 
1380         fmuld   %f18,%f20,%f20
1381 
1382         fmuld   %f26,%f28,%f28
1383 
1384         fmuld   %f0,%f6,%f6
1385         faddd   %f4,%f32,%f4
1386         ldd     [%l4+16],%f0
1387 
1388         fmuld   %f8,%f12,%f12
1389         faddd   %f34,%f14,%f14
1390 
1391         fmuld   %f16,%f22,%f22
1392         faddd   %f20,%f36,%f20
1393         ldd     [%l6+16],%f16
1394 
1395         fmuld   %f24,%f30,%f30
1396         faddd   %f28,%f38,%f28
1397         ldd     [%l7+16],%f24
1398 
1399         fmuld   %f0,%f6,%f6
1400         faddd   %f4,%f2,%f4
1401         ldd     [%l4+8],%f32
1402 
1403         faddd   %f10,%f14,%f14
1404         ldd     [%l5+16],%f34
1405 
1406         fmuld   %f16,%f22,%f22
1407         faddd   %f20,%f18,%f20
1408         ldd     [%l6+8],%f36
1409 
1410         fmuld   %f24,%f30,%f30
1411         faddd   %f28,%f26,%f28
1412         ldd     [%l7+8],%f38
1413 
1414         fmuld   %f32,%f4,%f4
1415 
1416         fmuld   %f34,%f14,%f14
1417 
1418         fmuld   %f36,%f20,%f20
1419 
1420         fmuld   %f38,%f28,%f28
1421 
1422         fsubd   %f6,%f4,%f6
1423 
1424         faddd   %f14,%f12,%f14
1425 
1426         fsubd   %f22,%f20,%f22
1427 
1428         fsubd   %f30,%f28,%f30
1429 
1430         faddd   %f6,%f0,%f6
1431 
1432         faddd   %f14,%f8,%f14
1433 
1434         faddd   %f22,%f16,%f22
1435 
1436         faddd   %f30,%f24,%f30
1437         mov     %l0,%l4
1438 
1439         fnegd   %f6,%f4
1440         lda     [%i1]%asi,%l0           ! preload next argument
1441 
1442         fnegd   %f14,%f12
1443         lda     [%i1]%asi,%f0
1444 
1445         fnegd   %f22,%f20
1446         lda     [%i1+4]%asi,%f3
1447 
1448         fnegd   %f30,%f28
1449         andn    %l0,%i5,%l0
1450         add     %i1,%i2,%i1
1451 
1452         andcc   %l4,2,%g0
1453         fmovdnz %icc,%f4,%f6
1454         st      %f6,[%o0]
1455 
1456         andcc   %l1,2,%g0
1457         fmovdnz %icc,%f12,%f14
1458         st      %f14,[%o1]
1459 
1460         andcc   %l2,2,%g0
1461         fmovdnz %icc,%f20,%f22
1462         st      %f22,[%o2]
1463 
1464         andcc   %l3,2,%g0
1465         fmovdnz %icc,%f28,%f30
1466         st      %f30,[%o3]
1467 
1468         addcc   %i0,-1,%i0
1469         bg,pt   %icc,.loop0
1470 ! delay slot
1471         st      %f7,[%o0+4]
1472 
1473         ba,pt   %icc,.end
1474 ! delay slot
1475         nop
1476 
1477         .align  16
1478 .case5:
1479         fmuld   %f8,pp3,%f14            ! sin(x1)
1480 
1481         fmuld   %f24,pp3,%f30           ! sin(x3)
1482 
1483         fmuld   %f0,qq3,%f6             ! cos(x0)
1484 
1485         faddd   %f14,pp2,%f14
1486         fmuld   %f8,qq2,%f12
1487 
1488         fmuld   %f16,qq3,%f22           ! cos(x2)
1489 
1490         faddd   %f30,pp2,%f30
1491         fmuld   %f24,qq2,%f28
1492 
1493         faddd   %f6,qq2,%f6
1494         fmuld   %f0,pp2,%f4
1495 
1496         fmuld   %f8,%f14,%f14
1497         faddd   %f12,qq1,%f12
1498 
1499         faddd   %f22,qq2,%f22
1500         fmuld   %f16,pp2,%f20
1501 
1502         fmuld   %f24,%f30,%f30
1503         faddd   %f28,qq1,%f28
1504 
1505         fmuld   %f0,%f6,%f6
1506         faddd   %f4,pp1,%f4
1507 
1508         faddd   %f14,pp1,%f14
1509         fmuld   %f8,%f12,%f12
1510         add     %l5,%g1,%l5
1511 
1512         fmuld   %f16,%f22,%f22
1513         faddd   %f20,pp1,%f20
1514 
1515         faddd   %f30,pp1,%f30
1516         fmuld   %f24,%f28,%f28
1517         add     %l7,%g1,%l7
1518 
1519         faddd   %f6,qq1,%f6
1520         fmuld   %f0,%f4,%f4
1521         add     %l4,%g1,%l4
1522 
1523         fmuld   %f8,%f14,%f14
1524 
1525         faddd   %f22,qq1,%f22
1526         fmuld   %f16,%f20,%f20
1527         add     %l6,%g1,%l6
1528 
1529         fmuld   %f24,%f30,%f30
1530 
1531         fmuld   %f2,%f4,%f4
1532 
1533         fmuld   %f10,%f14,%f14
1534         ldd     [%l5+8],%f8
1535 
1536         fmuld   %f18,%f20,%f20
1537 
1538         fmuld   %f26,%f30,%f30
1539         ldd     [%l7+8],%f24
1540 
1541         fmuld   %f0,%f6,%f6
1542         faddd   %f4,%f32,%f4
1543         ldd     [%l4+16],%f0
1544 
1545         fmuld   %f8,%f12,%f12
1546         faddd   %f34,%f14,%f14
1547 
1548         fmuld   %f16,%f22,%f22
1549         faddd   %f20,%f36,%f20
1550         ldd     [%l6+16],%f16
1551 
1552         fmuld   %f24,%f28,%f28
1553         faddd   %f38,%f30,%f30
1554 
1555         fmuld   %f0,%f6,%f6
1556         faddd   %f4,%f2,%f4
1557         ldd     [%l4+8],%f32
1558 
1559         faddd   %f10,%f14,%f14
1560         ldd     [%l5+16],%f34
1561 
1562         fmuld   %f16,%f22,%f22
1563         faddd   %f20,%f18,%f20
1564         ldd     [%l6+8],%f36
1565 
1566         faddd   %f26,%f30,%f30
1567         ldd     [%l7+16],%f38
1568 
1569         fmuld   %f32,%f4,%f4
1570 
1571         fmuld   %f34,%f14,%f14
1572 
1573         fmuld   %f36,%f20,%f20
1574 
1575         fmuld   %f38,%f30,%f30
1576 
1577         fsubd   %f6,%f4,%f6
1578 
1579         faddd   %f14,%f12,%f14
1580 
1581         fsubd   %f22,%f20,%f22
1582 
1583         faddd   %f30,%f28,%f30
1584 
1585         faddd   %f6,%f0,%f6
1586 
1587         faddd   %f14,%f8,%f14
1588 
1589         faddd   %f22,%f16,%f22
1590 
1591         faddd   %f30,%f24,%f30
1592         mov     %l0,%l4
1593 
1594         fnegd   %f6,%f4
1595         lda     [%i1]%asi,%l0           ! preload next argument
1596 
1597         fnegd   %f14,%f12
1598         lda     [%i1]%asi,%f0
1599 
1600         fnegd   %f22,%f20
1601         lda     [%i1+4]%asi,%f3
1602 
1603         fnegd   %f30,%f28
1604         andn    %l0,%i5,%l0
1605         add     %i1,%i2,%i1
1606 
1607         andcc   %l4,2,%g0
1608         fmovdnz %icc,%f4,%f6
1609         st      %f6,[%o0]
1610 
1611         andcc   %l1,2,%g0
1612         fmovdnz %icc,%f12,%f14
1613         st      %f14,[%o1]
1614 
1615         andcc   %l2,2,%g0
1616         fmovdnz %icc,%f20,%f22
1617         st      %f22,[%o2]
1618 
1619         andcc   %l3,2,%g0
1620         fmovdnz %icc,%f28,%f30
1621         st      %f30,[%o3]
1622 
1623         addcc   %i0,-1,%i0
1624         bg,pt   %icc,.loop0
1625 ! delay slot
1626         st      %f7,[%o0+4]
1627 
1628         ba,pt   %icc,.end
1629 ! delay slot
1630         nop
1631 
1632         .align  16
1633 .case6:
1634         fmuld   %f26,%f26,%f24
1635         andcc   %l3,1,%g0
1636         bz,pn   %icc,.case7
1637 ! delay slot
1638         fxor    %f30,%f38,%f38
1639 
1640         fmuld   %f8,pp3,%f14            ! sin(x1)
1641 
1642         fmuld   %f16,pp3,%f22           ! sin(x2)
1643 
1644         fmuld   %f0,qq3,%f6             ! cos(x0)
1645 
1646         faddd   %f14,pp2,%f14
1647         fmuld   %f8,qq2,%f12
1648 
1649         faddd   %f22,pp2,%f22
1650         fmuld   %f16,qq2,%f20
1651 
1652         fmuld   %f24,qq3,%f30           ! cos(x3)
1653 
1654         faddd   %f6,qq2,%f6
1655         fmuld   %f0,pp2,%f4
1656 
1657         fmuld   %f8,%f14,%f14
1658         faddd   %f12,qq1,%f12
1659 
1660         fmuld   %f16,%f22,%f22
1661         faddd   %f20,qq1,%f20
1662 
1663         faddd   %f30,qq2,%f30
1664         fmuld   %f24,pp2,%f28
1665 
1666         fmuld   %f0,%f6,%f6
1667         faddd   %f4,pp1,%f4
1668 
1669         faddd   %f14,pp1,%f14
1670         fmuld   %f8,%f12,%f12
1671         add     %l5,%g1,%l5
1672 
1673         faddd   %f22,pp1,%f22
1674         fmuld   %f16,%f20,%f20
1675         add     %l6,%g1,%l6
1676 
1677         fmuld   %f24,%f30,%f30
1678         faddd   %f28,pp1,%f28
1679 
1680         faddd   %f6,qq1,%f6
1681         fmuld   %f0,%f4,%f4
1682         add     %l4,%g1,%l4
1683 
1684         fmuld   %f8,%f14,%f14
1685 
1686         fmuld   %f16,%f22,%f22
1687 
1688         faddd   %f30,qq1,%f30
1689         fmuld   %f24,%f28,%f28
1690         add     %l7,%g1,%l7
1691 
1692         fmuld   %f2,%f4,%f4
1693 
1694         fmuld   %f10,%f14,%f14
1695         ldd     [%l5+8],%f8
1696 
1697         fmuld   %f18,%f22,%f22
1698         ldd     [%l6+8],%f16
1699 
1700         fmuld   %f26,%f28,%f28
1701 
1702         fmuld   %f0,%f6,%f6
1703         faddd   %f4,%f32,%f4
1704         ldd     [%l4+16],%f0
1705 
1706         fmuld   %f8,%f12,%f12
1707         faddd   %f34,%f14,%f14
1708 
1709         fmuld   %f16,%f20,%f20
1710         faddd   %f36,%f22,%f22
1711 
1712         fmuld   %f24,%f30,%f30
1713         faddd   %f28,%f38,%f28
1714         ldd     [%l7+16],%f24
1715 
1716         fmuld   %f0,%f6,%f6
1717         faddd   %f4,%f2,%f4
1718         ldd     [%l4+8],%f32
1719 
1720         faddd   %f10,%f14,%f14
1721         ldd     [%l5+16],%f34
1722 
1723         faddd   %f18,%f22,%f22
1724         ldd     [%l6+16],%f36
1725 
1726         fmuld   %f24,%f30,%f30
1727         faddd   %f28,%f26,%f28
1728         ldd     [%l7+8],%f38
1729 
1730         fmuld   %f32,%f4,%f4
1731 
1732         fmuld   %f34,%f14,%f14
1733 
1734         fmuld   %f36,%f22,%f22
1735 
1736         fmuld   %f38,%f28,%f28
1737 
1738         fsubd   %f6,%f4,%f6
1739 
1740         faddd   %f14,%f12,%f14
1741 
1742         faddd   %f22,%f20,%f22
1743 
1744         fsubd   %f30,%f28,%f30
1745 
1746         faddd   %f6,%f0,%f6
1747 
1748         faddd   %f14,%f8,%f14
1749 
1750         faddd   %f22,%f16,%f22
1751 
1752         faddd   %f30,%f24,%f30
1753         mov     %l0,%l4
1754 
1755         fnegd   %f6,%f4
1756         lda     [%i1]%asi,%l0           ! preload next argument
1757 
1758         fnegd   %f14,%f12
1759         lda     [%i1]%asi,%f0
1760 
1761         fnegd   %f22,%f20
1762         lda     [%i1+4]%asi,%f3
1763 
1764         fnegd   %f30,%f28
1765         andn    %l0,%i5,%l0
1766         add     %i1,%i2,%i1
1767 
1768         andcc   %l4,2,%g0
1769         fmovdnz %icc,%f4,%f6
1770         st      %f6,[%o0]
1771 
1772         andcc   %l1,2,%g0
1773         fmovdnz %icc,%f12,%f14
1774         st      %f14,[%o1]
1775 
1776         andcc   %l2,2,%g0
1777         fmovdnz %icc,%f20,%f22
1778         st      %f22,[%o2]
1779 
1780         andcc   %l3,2,%g0
1781         fmovdnz %icc,%f28,%f30
1782         st      %f30,[%o3]
1783 
1784         addcc   %i0,-1,%i0
1785         bg,pt   %icc,.loop0
1786 ! delay slot
1787         st      %f7,[%o0+4]
1788 
1789         ba,pt   %icc,.end
1790 ! delay slot
1791         nop
1792 
1793         .align  16
1794 .case7:
1795         fmuld   %f8,pp3,%f14            ! sin(x1)
1796 
1797         fmuld   %f16,pp3,%f22           ! sin(x2)
1798 
1799         fmuld   %f24,pp3,%f30           ! sin(x3)
1800 
1801         fmuld   %f0,qq3,%f6             ! cos(x0)
1802 
1803         faddd   %f14,pp2,%f14
1804         fmuld   %f8,qq2,%f12
1805 
1806         faddd   %f22,pp2,%f22
1807         fmuld   %f16,qq2,%f20
1808 
1809         faddd   %f30,pp2,%f30
1810         fmuld   %f24,qq2,%f28
1811 
1812         faddd   %f6,qq2,%f6
1813         fmuld   %f0,pp2,%f4
1814 
1815         fmuld   %f8,%f14,%f14
1816         faddd   %f12,qq1,%f12
1817 
1818         fmuld   %f16,%f22,%f22
1819         faddd   %f20,qq1,%f20
1820 
1821         fmuld   %f24,%f30,%f30
1822         faddd   %f28,qq1,%f28
1823 
1824         fmuld   %f0,%f6,%f6
1825         faddd   %f4,pp1,%f4
1826 
1827         faddd   %f14,pp1,%f14
1828         fmuld   %f8,%f12,%f12
1829         add     %l5,%g1,%l5
1830 
1831         faddd   %f22,pp1,%f22
1832         fmuld   %f16,%f20,%f20
1833         add     %l6,%g1,%l6
1834 
1835         faddd   %f30,pp1,%f30
1836         fmuld   %f24,%f28,%f28
1837         add     %l7,%g1,%l7
1838 
1839         faddd   %f6,qq1,%f6
1840         fmuld   %f0,%f4,%f4
1841         add     %l4,%g1,%l4
1842 
1843         fmuld   %f8,%f14,%f14
1844 
1845         fmuld   %f16,%f22,%f22
1846 
1847         fmuld   %f24,%f30,%f30
1848 
1849         fmuld   %f2,%f4,%f4
1850 
1851         fmuld   %f10,%f14,%f14
1852         ldd     [%l5+8],%f8
1853 
1854         fmuld   %f18,%f22,%f22
1855         ldd     [%l6+8],%f16
1856 
1857         fmuld   %f26,%f30,%f30
1858         ldd     [%l7+8],%f24
1859 
1860         fmuld   %f0,%f6,%f6
1861         faddd   %f4,%f32,%f4
1862         ldd     [%l4+16],%f0
1863 
1864         fmuld   %f8,%f12,%f12
1865         faddd   %f34,%f14,%f14
1866 
1867         fmuld   %f16,%f20,%f20
1868         faddd   %f36,%f22,%f22
1869 
1870         fmuld   %f24,%f28,%f28
1871         faddd   %f38,%f30,%f30
1872 
1873         fmuld   %f0,%f6,%f6
1874         faddd   %f4,%f2,%f4
1875         ldd     [%l4+8],%f32
1876 
1877         faddd   %f10,%f14,%f14
1878         ldd     [%l5+16],%f34
1879 
1880         faddd   %f18,%f22,%f22
1881         ldd     [%l6+16],%f36
1882 
1883         faddd   %f26,%f30,%f30
1884         ldd     [%l7+16],%f38
1885 
1886         fmuld   %f32,%f4,%f4
1887 
1888         fmuld   %f34,%f14,%f14
1889 
1890         fmuld   %f36,%f22,%f22
1891 
1892         fmuld   %f38,%f30,%f30
1893 
1894         fsubd   %f6,%f4,%f6
1895 
1896         faddd   %f14,%f12,%f14
1897 
1898         faddd   %f22,%f20,%f22
1899 
1900         faddd   %f30,%f28,%f30
1901 
1902         faddd   %f6,%f0,%f6
1903 
1904         faddd   %f14,%f8,%f14
1905 
1906         faddd   %f22,%f16,%f22
1907 
1908         faddd   %f30,%f24,%f30
1909         mov     %l0,%l4
1910 
1911         fnegd   %f6,%f4
1912         lda     [%i1]%asi,%l0           ! preload next argument
1913 
1914         fnegd   %f14,%f12
1915         lda     [%i1]%asi,%f0
1916 
1917         fnegd   %f22,%f20
1918         lda     [%i1+4]%asi,%f3
1919 
1920         fnegd   %f30,%f28
1921         andn    %l0,%i5,%l0
1922         add     %i1,%i2,%i1
1923 
1924         andcc   %l4,2,%g0
1925         fmovdnz %icc,%f4,%f6
1926         st      %f6,[%o0]
1927 
1928         andcc   %l1,2,%g0
1929         fmovdnz %icc,%f12,%f14
1930         st      %f14,[%o1]
1931 
1932         andcc   %l2,2,%g0
1933         fmovdnz %icc,%f20,%f22
1934         st      %f22,[%o2]
1935 
1936         andcc   %l3,2,%g0
1937         fmovdnz %icc,%f28,%f30
1938         st      %f30,[%o3]
1939 
1940         addcc   %i0,-1,%i0
1941         bg,pt   %icc,.loop0
1942 ! delay slot
1943         st      %f7,[%o0+4]
1944 
1945         ba,pt   %icc,.end
1946 ! delay slot
1947         nop
1948 
1949         .align  16
1950 .case8:
1951         fmuld   %f10,%f10,%f8
1952         andcc   %l1,1,%g0
1953         bz,pn   %icc,.case12
1954 ! delay slot
1955         fxor    %f14,%f34,%f34
1956 
1957         fmuld   %f18,%f18,%f16
1958         andcc   %l2,1,%g0
1959         bz,pn   %icc,.case10
1960 ! delay slot
1961         fxor    %f22,%f36,%f36
1962 
1963         fmuld   %f26,%f26,%f24
1964         andcc   %l3,1,%g0
1965         bz,pn   %icc,.case9
1966 ! delay slot
1967         fxor    %f30,%f38,%f38
1968 
1969         fmuld   %f0,pp3,%f6             ! sin(x0)
1970 
1971         faddd   %f6,pp2,%f6
1972         fmuld   %f0,qq2,%f4
1973 
1974         fmuld   %f8,qq3,%f14            ! cos(x1)
1975 
1976         fmuld   %f16,qq3,%f22           ! cos(x2)
1977 
1978         fmuld   %f24,qq3,%f30           ! cos(x3)
1979 
1980         fmuld   %f0,%f6,%f6
1981         faddd   %f4,qq1,%f4
1982 
1983         faddd   %f14,qq2,%f14
1984         fmuld   %f8,pp2,%f12
1985 
1986         faddd   %f22,qq2,%f22
1987         fmuld   %f16,pp2,%f20
1988 
1989         faddd   %f30,qq2,%f30
1990         fmuld   %f24,pp2,%f28
1991 
1992         faddd   %f6,pp1,%f6
1993         fmuld   %f0,%f4,%f4
1994         add     %l4,%g1,%l4
1995 
1996         fmuld   %f8,%f14,%f14
1997         faddd   %f12,pp1,%f12
1998 
1999         fmuld   %f16,%f22,%f22
2000         faddd   %f20,pp1,%f20
2001 
2002         fmuld   %f24,%f30,%f30
2003         faddd   %f28,pp1,%f28
2004 
2005         fmuld   %f0,%f6,%f6
2006 
2007         faddd   %f14,qq1,%f14
2008         fmuld   %f8,%f12,%f12
2009         add     %l5,%g1,%l5
2010 
2011         faddd   %f22,qq1,%f22
2012         fmuld   %f16,%f20,%f20
2013         add     %l6,%g1,%l6
2014 
2015         faddd   %f30,qq1,%f30
2016         fmuld   %f24,%f28,%f28
2017         add     %l7,%g1,%l7
2018 
2019         fmuld   %f2,%f6,%f6
2020         ldd     [%l4+8],%f0
2021 
2022         fmuld   %f10,%f12,%f12
2023 
2024         fmuld   %f18,%f20,%f20
2025 
2026         fmuld   %f26,%f28,%f28
2027 
2028         fmuld   %f0,%f4,%f4
2029         faddd   %f32,%f6,%f6
2030 
2031         fmuld   %f8,%f14,%f14
2032         faddd   %f12,%f34,%f12
2033         ldd     [%l5+16],%f8
2034 
2035         fmuld   %f16,%f22,%f22
2036         faddd   %f20,%f36,%f20
2037         ldd     [%l6+16],%f16
2038 
2039         fmuld   %f24,%f30,%f30
2040         faddd   %f28,%f38,%f28
2041         ldd     [%l7+16],%f24
2042 
2043         faddd   %f2,%f6,%f6
2044         ldd     [%l4+16],%f32
2045 
2046         fmuld   %f8,%f14,%f14
2047         faddd   %f12,%f10,%f12
2048         ldd     [%l5+8],%f34
2049 
2050         fmuld   %f16,%f22,%f22
2051         faddd   %f20,%f18,%f20
2052         ldd     [%l6+8],%f36
2053 
2054         fmuld   %f24,%f30,%f30
2055         faddd   %f28,%f26,%f28
2056         ldd     [%l7+8],%f38
2057 
2058         fmuld   %f32,%f6,%f6
2059 
2060         fmuld   %f34,%f12,%f12
2061 
2062         fmuld   %f36,%f20,%f20
2063 
2064         fmuld   %f38,%f28,%f28
2065 
2066         faddd   %f6,%f4,%f6
2067 
2068         fsubd   %f14,%f12,%f14
2069 
2070         fsubd   %f22,%f20,%f22
2071 
2072         fsubd   %f30,%f28,%f30
2073 
2074         faddd   %f6,%f0,%f6
2075 
2076         faddd   %f14,%f8,%f14
2077 
2078         faddd   %f22,%f16,%f22
2079 
2080         faddd   %f30,%f24,%f30
2081         mov     %l0,%l4
2082 
2083         fnegd   %f6,%f4
2084         lda     [%i1]%asi,%l0           ! preload next argument
2085 
2086         fnegd   %f14,%f12
2087         lda     [%i1]%asi,%f0
2088 
2089         fnegd   %f22,%f20
2090         lda     [%i1+4]%asi,%f3
2091 
2092         fnegd   %f30,%f28
2093         andn    %l0,%i5,%l0
2094         add     %i1,%i2,%i1
2095 
2096         andcc   %l4,2,%g0
2097         fmovdnz %icc,%f4,%f6
2098         st      %f6,[%o0]
2099 
2100         andcc   %l1,2,%g0
2101         fmovdnz %icc,%f12,%f14
2102         st      %f14,[%o1]
2103 
2104         andcc   %l2,2,%g0
2105         fmovdnz %icc,%f20,%f22
2106         st      %f22,[%o2]
2107 
2108         andcc   %l3,2,%g0
2109         fmovdnz %icc,%f28,%f30
2110         st      %f30,[%o3]
2111 
2112         addcc   %i0,-1,%i0
2113         bg,pt   %icc,.loop0
2114 ! delay slot
2115         st      %f7,[%o0+4]
2116 
2117         ba,pt   %icc,.end
2118 ! delay slot
2119         nop
2120 
2121         .align  16
2122 .case9:
2123         fmuld   %f0,pp3,%f6             ! sin(x0)
2124 
2125         fmuld   %f24,pp3,%f30           ! sin(x3)
2126 
2127         faddd   %f6,pp2,%f6
2128         fmuld   %f0,qq2,%f4
2129 
2130         fmuld   %f8,qq3,%f14            ! cos(x1)
2131 
2132         fmuld   %f16,qq3,%f22           ! cos(x2)
2133 
2134         faddd   %f30,pp2,%f30
2135         fmuld   %f24,qq2,%f28
2136 
2137         fmuld   %f0,%f6,%f6
2138         faddd   %f4,qq1,%f4
2139 
2140         faddd   %f14,qq2,%f14
2141         fmuld   %f8,pp2,%f12
2142 
2143         faddd   %f22,qq2,%f22
2144         fmuld   %f16,pp2,%f20
2145 
2146         fmuld   %f24,%f30,%f30
2147         faddd   %f28,qq1,%f28
2148 
2149         faddd   %f6,pp1,%f6
2150         fmuld   %f0,%f4,%f4
2151         add     %l4,%g1,%l4
2152 
2153         fmuld   %f8,%f14,%f14
2154         faddd   %f12,pp1,%f12
2155 
2156         fmuld   %f16,%f22,%f22
2157         faddd   %f20,pp1,%f20
2158 
2159         faddd   %f30,pp1,%f30
2160         fmuld   %f24,%f28,%f28
2161         add     %l7,%g1,%l7
2162 
2163         fmuld   %f0,%f6,%f6
2164 
2165         faddd   %f14,qq1,%f14
2166         fmuld   %f8,%f12,%f12
2167         add     %l5,%g1,%l5
2168 
2169         faddd   %f22,qq1,%f22
2170         fmuld   %f16,%f20,%f20
2171         add     %l6,%g1,%l6
2172 
2173         fmuld   %f24,%f30,%f30
2174 
2175         fmuld   %f2,%f6,%f6
2176         ldd     [%l4+8],%f0
2177 
2178         fmuld   %f10,%f12,%f12
2179 
2180         fmuld   %f18,%f20,%f20
2181 
2182         fmuld   %f26,%f30,%f30
2183         ldd     [%l7+8],%f24
2184 
2185         fmuld   %f0,%f4,%f4
2186         faddd   %f32,%f6,%f6
2187 
2188         fmuld   %f8,%f14,%f14
2189         faddd   %f12,%f34,%f12
2190         ldd     [%l5+16],%f8
2191 
2192         fmuld   %f16,%f22,%f22
2193         faddd   %f20,%f36,%f20
2194         ldd     [%l6+16],%f16
2195 
2196         fmuld   %f24,%f28,%f28
2197         faddd   %f38,%f30,%f30
2198 
2199         faddd   %f2,%f6,%f6
2200         ldd     [%l4+16],%f32
2201 
2202         fmuld   %f8,%f14,%f14
2203         faddd   %f12,%f10,%f12
2204         ldd     [%l5+8],%f34
2205 
2206         fmuld   %f16,%f22,%f22
2207         faddd   %f20,%f18,%f20
2208         ldd     [%l6+8],%f36
2209 
2210         faddd   %f26,%f30,%f30
2211         ldd     [%l7+16],%f38
2212 
2213         fmuld   %f32,%f6,%f6
2214 
2215         fmuld   %f34,%f12,%f12
2216 
2217         fmuld   %f36,%f20,%f20
2218 
2219         fmuld   %f38,%f30,%f30
2220 
2221         faddd   %f6,%f4,%f6
2222 
2223         fsubd   %f14,%f12,%f14
2224 
2225         fsubd   %f22,%f20,%f22
2226 
2227         faddd   %f30,%f28,%f30
2228 
2229         faddd   %f6,%f0,%f6
2230 
2231         faddd   %f14,%f8,%f14
2232 
2233         faddd   %f22,%f16,%f22
2234 
2235         faddd   %f30,%f24,%f30
2236         mov     %l0,%l4
2237 
2238         fnegd   %f6,%f4
2239         lda     [%i1]%asi,%l0           ! preload next argument
2240 
2241         fnegd   %f14,%f12
2242         lda     [%i1]%asi,%f0
2243 
2244         fnegd   %f22,%f20
2245         lda     [%i1+4]%asi,%f3
2246 
2247         fnegd   %f30,%f28
2248         andn    %l0,%i5,%l0
2249         add     %i1,%i2,%i1
2250 
2251         andcc   %l4,2,%g0
2252         fmovdnz %icc,%f4,%f6
2253         st      %f6,[%o0]
2254 
2255         andcc   %l1,2,%g0
2256         fmovdnz %icc,%f12,%f14
2257         st      %f14,[%o1]
2258 
2259         andcc   %l2,2,%g0
2260         fmovdnz %icc,%f20,%f22
2261         st      %f22,[%o2]
2262 
2263         andcc   %l3,2,%g0
2264         fmovdnz %icc,%f28,%f30
2265         st      %f30,[%o3]
2266 
2267         addcc   %i0,-1,%i0
2268         bg,pt   %icc,.loop0
2269 ! delay slot
2270         st      %f7,[%o0+4]
2271 
2272         ba,pt   %icc,.end
2273 ! delay slot
2274         nop
2275 
2276         .align  16
2277 .case10:
2278         fmuld   %f26,%f26,%f24
2279         andcc   %l3,1,%g0
2280         bz,pn   %icc,.case11
2281 ! delay slot
2282         fxor    %f30,%f38,%f38
2283 
2284         fmuld   %f0,pp3,%f6             ! sin(x0)
2285 
2286         fmuld   %f16,pp3,%f22           ! sin(x2)
2287 
2288         faddd   %f6,pp2,%f6
2289         fmuld   %f0,qq2,%f4
2290 
2291         fmuld   %f8,qq3,%f14            ! cos(x1)
2292 
2293         faddd   %f22,pp2,%f22
2294         fmuld   %f16,qq2,%f20
2295 
2296         fmuld   %f24,qq3,%f30           ! cos(x3)
2297 
2298         fmuld   %f0,%f6,%f6
2299         faddd   %f4,qq1,%f4
2300 
2301         faddd   %f14,qq2,%f14
2302         fmuld   %f8,pp2,%f12
2303 
2304         fmuld   %f16,%f22,%f22
2305         faddd   %f20,qq1,%f20
2306 
2307         faddd   %f30,qq2,%f30
2308         fmuld   %f24,pp2,%f28
2309 
2310         faddd   %f6,pp1,%f6
2311         fmuld   %f0,%f4,%f4
2312         add     %l4,%g1,%l4
2313 
2314         fmuld   %f8,%f14,%f14
2315         faddd   %f12,pp1,%f12
2316 
2317         faddd   %f22,pp1,%f22
2318         fmuld   %f16,%f20,%f20
2319         add     %l6,%g1,%l6
2320 
2321         fmuld   %f24,%f30,%f30
2322         faddd   %f28,pp1,%f28
2323 
2324         fmuld   %f0,%f6,%f6
2325 
2326         faddd   %f14,qq1,%f14
2327         fmuld   %f8,%f12,%f12
2328         add     %l5,%g1,%l5
2329 
2330         fmuld   %f16,%f22,%f22
2331 
2332         faddd   %f30,qq1,%f30
2333         fmuld   %f24,%f28,%f28
2334         add     %l7,%g1,%l7
2335 
2336         fmuld   %f2,%f6,%f6
2337         ldd     [%l4+8],%f0
2338 
2339         fmuld   %f10,%f12,%f12
2340 
2341         fmuld   %f18,%f22,%f22
2342         ldd     [%l6+8],%f16
2343 
2344         fmuld   %f26,%f28,%f28
2345 
2346         fmuld   %f0,%f4,%f4
2347         faddd   %f32,%f6,%f6
2348 
2349         fmuld   %f8,%f14,%f14
2350         faddd   %f12,%f34,%f12
2351         ldd     [%l5+16],%f8
2352 
2353         fmuld   %f16,%f20,%f20
2354         faddd   %f36,%f22,%f22
2355 
2356         fmuld   %f24,%f30,%f30
2357         faddd   %f28,%f38,%f28
2358         ldd     [%l7+16],%f24
2359 
2360         faddd   %f2,%f6,%f6
2361         ldd     [%l4+16],%f32
2362 
2363         fmuld   %f8,%f14,%f14
2364         faddd   %f12,%f10,%f12
2365         ldd     [%l5+8],%f34
2366 
2367         faddd   %f18,%f22,%f22
2368         ldd     [%l6+16],%f36
2369 
2370         fmuld   %f24,%f30,%f30
2371         faddd   %f28,%f26,%f28
2372         ldd     [%l7+8],%f38
2373 
2374         fmuld   %f32,%f6,%f6
2375 
2376         fmuld   %f34,%f12,%f12
2377 
2378         fmuld   %f36,%f22,%f22
2379 
2380         fmuld   %f38,%f28,%f28
2381 
2382         faddd   %f6,%f4,%f6
2383 
2384         fsubd   %f14,%f12,%f14
2385 
2386         faddd   %f22,%f20,%f22
2387 
2388         fsubd   %f30,%f28,%f30
2389 
2390         faddd   %f6,%f0,%f6
2391 
2392         faddd   %f14,%f8,%f14
2393 
2394         faddd   %f22,%f16,%f22
2395 
2396         faddd   %f30,%f24,%f30
2397         mov     %l0,%l4
2398 
2399         fnegd   %f6,%f4
2400         lda     [%i1]%asi,%l0           ! preload next argument
2401 
2402         fnegd   %f14,%f12
2403         lda     [%i1]%asi,%f0
2404 
2405         fnegd   %f22,%f20
2406         lda     [%i1+4]%asi,%f3
2407 
2408         fnegd   %f30,%f28
2409         andn    %l0,%i5,%l0
2410         add     %i1,%i2,%i1
2411 
2412         andcc   %l4,2,%g0
2413         fmovdnz %icc,%f4,%f6
2414         st      %f6,[%o0]
2415 
2416         andcc   %l1,2,%g0
2417         fmovdnz %icc,%f12,%f14
2418         st      %f14,[%o1]
2419 
2420         andcc   %l2,2,%g0
2421         fmovdnz %icc,%f20,%f22
2422         st      %f22,[%o2]
2423 
2424         andcc   %l3,2,%g0
2425         fmovdnz %icc,%f28,%f30
2426         st      %f30,[%o3]
2427 
2428         addcc   %i0,-1,%i0
2429         bg,pt   %icc,.loop0
2430 ! delay slot
2431         st      %f7,[%o0+4]
2432 
2433         ba,pt   %icc,.end
2434 ! delay slot
2435         nop
2436 
2437         .align  16
2438 .case11:
2439         fmuld   %f0,pp3,%f6             ! sin(x0)
2440 
2441         fmuld   %f16,pp3,%f22           ! sin(x2)
2442 
2443         fmuld   %f24,pp3,%f30           ! sin(x3)
2444 
2445         faddd   %f6,pp2,%f6
2446         fmuld   %f0,qq2,%f4
2447 
2448         fmuld   %f8,qq3,%f14            ! cos(x1)
2449 
2450         faddd   %f22,pp2,%f22
2451         fmuld   %f16,qq2,%f20
2452 
2453         faddd   %f30,pp2,%f30
2454         fmuld   %f24,qq2,%f28
2455 
2456         fmuld   %f0,%f6,%f6
2457         faddd   %f4,qq1,%f4
2458 
2459         faddd   %f14,qq2,%f14
2460         fmuld   %f8,pp2,%f12
2461 
2462         fmuld   %f16,%f22,%f22
2463         faddd   %f20,qq1,%f20
2464 
2465         fmuld   %f24,%f30,%f30
2466         faddd   %f28,qq1,%f28
2467 
2468         faddd   %f6,pp1,%f6
2469         fmuld   %f0,%f4,%f4
2470         add     %l4,%g1,%l4
2471 
2472         fmuld   %f8,%f14,%f14
2473         faddd   %f12,pp1,%f12
2474 
2475         faddd   %f22,pp1,%f22
2476         fmuld   %f16,%f20,%f20
2477         add     %l6,%g1,%l6
2478 
2479         faddd   %f30,pp1,%f30
2480         fmuld   %f24,%f28,%f28
2481         add     %l7,%g1,%l7
2482 
2483         fmuld   %f0,%f6,%f6
2484 
2485         faddd   %f14,qq1,%f14
2486         fmuld   %f8,%f12,%f12
2487         add     %l5,%g1,%l5
2488 
2489         fmuld   %f16,%f22,%f22
2490 
2491         fmuld   %f24,%f30,%f30
2492 
2493         fmuld   %f2,%f6,%f6
2494         ldd     [%l4+8],%f0
2495 
2496         fmuld   %f10,%f12,%f12
2497 
2498         fmuld   %f18,%f22,%f22
2499         ldd     [%l6+8],%f16
2500 
2501         fmuld   %f26,%f30,%f30
2502         ldd     [%l7+8],%f24
2503 
2504         fmuld   %f0,%f4,%f4
2505         faddd   %f32,%f6,%f6
2506 
2507         fmuld   %f8,%f14,%f14
2508         faddd   %f12,%f34,%f12
2509         ldd     [%l5+16],%f8
2510 
2511         fmuld   %f16,%f20,%f20
2512         faddd   %f36,%f22,%f22
2513 
2514         fmuld   %f24,%f28,%f28
2515         faddd   %f38,%f30,%f30
2516 
2517         faddd   %f2,%f6,%f6
2518         ldd     [%l4+16],%f32
2519 
2520         fmuld   %f8,%f14,%f14
2521         faddd   %f12,%f10,%f12
2522         ldd     [%l5+8],%f34
2523 
2524         faddd   %f18,%f22,%f22
2525         ldd     [%l6+16],%f36
2526 
2527         faddd   %f26,%f30,%f30
2528         ldd     [%l7+16],%f38
2529 
2530         fmuld   %f32,%f6,%f6
2531 
2532         fmuld   %f34,%f12,%f12
2533 
2534         fmuld   %f36,%f22,%f22
2535 
2536         fmuld   %f38,%f30,%f30
2537 
2538         faddd   %f6,%f4,%f6
2539 
2540         fsubd   %f14,%f12,%f14
2541 
2542         faddd   %f22,%f20,%f22
2543 
2544         faddd   %f30,%f28,%f30
2545 
2546         faddd   %f6,%f0,%f6
2547 
2548         faddd   %f14,%f8,%f14
2549 
2550         faddd   %f22,%f16,%f22
2551 
2552         faddd   %f30,%f24,%f30
2553         mov     %l0,%l4
2554 
2555         fnegd   %f6,%f4
2556         lda     [%i1]%asi,%l0           ! preload next argument
2557 
2558         fnegd   %f14,%f12
2559         lda     [%i1]%asi,%f0
2560 
2561         fnegd   %f22,%f20
2562         lda     [%i1+4]%asi,%f3
2563 
2564         fnegd   %f30,%f28
2565         andn    %l0,%i5,%l0
2566         add     %i1,%i2,%i1
2567 
2568         andcc   %l4,2,%g0
2569         fmovdnz %icc,%f4,%f6
2570         st      %f6,[%o0]
2571 
2572         andcc   %l1,2,%g0
2573         fmovdnz %icc,%f12,%f14
2574         st      %f14,[%o1]
2575 
2576         andcc   %l2,2,%g0
2577         fmovdnz %icc,%f20,%f22
2578         st      %f22,[%o2]
2579 
2580         andcc   %l3,2,%g0
2581         fmovdnz %icc,%f28,%f30
2582         st      %f30,[%o3]
2583 
2584         addcc   %i0,-1,%i0
2585         bg,pt   %icc,.loop0
2586 ! delay slot
2587         st      %f7,[%o0+4]
2588 
2589         ba,pt   %icc,.end
2590 ! delay slot
2591         nop
2592 
2593         .align  16
2594 .case12:
2595         fmuld   %f18,%f18,%f16
2596         andcc   %l2,1,%g0
2597         bz,pn   %icc,.case14
2598 ! delay slot
2599         fxor    %f22,%f36,%f36
2600 
2601         fmuld   %f26,%f26,%f24
2602         andcc   %l3,1,%g0
2603         bz,pn   %icc,.case13
2604 ! delay slot
2605         fxor    %f30,%f38,%f38
2606 
2607         fmuld   %f0,pp3,%f6             ! sin(x0)
2608 
2609         fmuld   %f8,pp3,%f14            ! sin(x1)
2610 
2611         faddd   %f6,pp2,%f6
2612         fmuld   %f0,qq2,%f4
2613 
2614         faddd   %f14,pp2,%f14
2615         fmuld   %f8,qq2,%f12
2616 
2617         fmuld   %f16,qq3,%f22           ! cos(x2)
2618 
2619         fmuld   %f24,qq3,%f30           ! cos(x3)
2620 
2621         fmuld   %f0,%f6,%f6
2622         faddd   %f4,qq1,%f4
2623 
2624         fmuld   %f8,%f14,%f14
2625         faddd   %f12,qq1,%f12
2626 
2627         faddd   %f22,qq2,%f22
2628         fmuld   %f16,pp2,%f20
2629 
2630         faddd   %f30,qq2,%f30
2631         fmuld   %f24,pp2,%f28
2632 
2633         faddd   %f6,pp1,%f6
2634         fmuld   %f0,%f4,%f4
2635         add     %l4,%g1,%l4
2636 
2637         faddd   %f14,pp1,%f14
2638         fmuld   %f8,%f12,%f12
2639         add     %l5,%g1,%l5
2640 
2641         fmuld   %f16,%f22,%f22
2642         faddd   %f20,pp1,%f20
2643 
2644         fmuld   %f24,%f30,%f30
2645         faddd   %f28,pp1,%f28
2646 
2647         fmuld   %f0,%f6,%f6
2648 
2649         fmuld   %f8,%f14,%f14
2650 
2651         faddd   %f22,qq1,%f22
2652         fmuld   %f16,%f20,%f20
2653         add     %l6,%g1,%l6
2654 
2655         faddd   %f30,qq1,%f30
2656         fmuld   %f24,%f28,%f28
2657         add     %l7,%g1,%l7
2658 
2659         fmuld   %f2,%f6,%f6
2660         ldd     [%l4+8],%f0
2661 
2662         fmuld   %f10,%f14,%f14
2663         ldd     [%l5+8],%f8
2664 
2665         fmuld   %f18,%f20,%f20
2666 
2667         fmuld   %f26,%f28,%f28
2668 
2669         fmuld   %f0,%f4,%f4
2670         faddd   %f32,%f6,%f6
2671 
2672         fmuld   %f8,%f12,%f12
2673         faddd   %f34,%f14,%f14
2674 
2675         fmuld   %f16,%f22,%f22
2676         faddd   %f20,%f36,%f20
2677         ldd     [%l6+16],%f16
2678 
2679         fmuld   %f24,%f30,%f30
2680         faddd   %f28,%f38,%f28
2681         ldd     [%l7+16],%f24
2682 
2683         faddd   %f2,%f6,%f6
2684         ldd     [%l4+16],%f32
2685 
2686         faddd   %f10,%f14,%f14
2687         ldd     [%l5+16],%f34
2688 
2689         fmuld   %f16,%f22,%f22
2690         faddd   %f20,%f18,%f20
2691         ldd     [%l6+8],%f36
2692 
2693         fmuld   %f24,%f30,%f30
2694         faddd   %f28,%f26,%f28
2695         ldd     [%l7+8],%f38
2696 
2697         fmuld   %f32,%f6,%f6
2698 
2699         fmuld   %f34,%f14,%f14
2700 
2701         fmuld   %f36,%f20,%f20
2702 
2703         fmuld   %f38,%f28,%f28
2704 
2705         faddd   %f6,%f4,%f6
2706 
2707         faddd   %f14,%f12,%f14
2708 
2709         fsubd   %f22,%f20,%f22
2710 
2711         fsubd   %f30,%f28,%f30
2712 
2713         faddd   %f6,%f0,%f6
2714 
2715         faddd   %f14,%f8,%f14
2716 
2717         faddd   %f22,%f16,%f22
2718 
2719         faddd   %f30,%f24,%f30
2720         mov     %l0,%l4
2721 
2722         fnegd   %f6,%f4
2723         lda     [%i1]%asi,%l0           ! preload next argument
2724 
2725         fnegd   %f14,%f12
2726         lda     [%i1]%asi,%f0
2727 
2728         fnegd   %f22,%f20
2729         lda     [%i1+4]%asi,%f3
2730 
2731         fnegd   %f30,%f28
2732         andn    %l0,%i5,%l0
2733         add     %i1,%i2,%i1
2734 
2735         andcc   %l4,2,%g0
2736         fmovdnz %icc,%f4,%f6
2737         st      %f6,[%o0]
2738 
2739         andcc   %l1,2,%g0
2740         fmovdnz %icc,%f12,%f14
2741         st      %f14,[%o1]
2742 
2743         andcc   %l2,2,%g0
2744         fmovdnz %icc,%f20,%f22
2745         st      %f22,[%o2]
2746 
2747         andcc   %l3,2,%g0
2748         fmovdnz %icc,%f28,%f30
2749         st      %f30,[%o3]
2750 
2751         addcc   %i0,-1,%i0
2752         bg,pt   %icc,.loop0
2753 ! delay slot
2754         st      %f7,[%o0+4]
2755 
2756         ba,pt   %icc,.end
2757 ! delay slot
2758         nop
2759 
2760         .align  16
2761 .case13:
2762         fmuld   %f0,pp3,%f6             ! sin(x0)
2763 
2764         fmuld   %f8,pp3,%f14            ! sin(x1)
2765 
2766         fmuld   %f24,pp3,%f30           ! sin(x3)
2767 
2768         faddd   %f6,pp2,%f6
2769         fmuld   %f0,qq2,%f4
2770 
2771         faddd   %f14,pp2,%f14
2772         fmuld   %f8,qq2,%f12
2773 
2774         fmuld   %f16,qq3,%f22           ! cos(x2)
2775 
2776         faddd   %f30,pp2,%f30
2777         fmuld   %f24,qq2,%f28
2778 
2779         fmuld   %f0,%f6,%f6
2780         faddd   %f4,qq1,%f4
2781 
2782         fmuld   %f8,%f14,%f14
2783         faddd   %f12,qq1,%f12
2784 
2785         faddd   %f22,qq2,%f22
2786         fmuld   %f16,pp2,%f20
2787 
2788         fmuld   %f24,%f30,%f30
2789         faddd   %f28,qq1,%f28
2790 
2791         faddd   %f6,pp1,%f6
2792         fmuld   %f0,%f4,%f4
2793         add     %l4,%g1,%l4
2794 
2795         faddd   %f14,pp1,%f14
2796         fmuld   %f8,%f12,%f12
2797         add     %l5,%g1,%l5
2798 
2799         fmuld   %f16,%f22,%f22
2800         faddd   %f20,pp1,%f20
2801 
2802         faddd   %f30,pp1,%f30
2803         fmuld   %f24,%f28,%f28
2804         add     %l7,%g1,%l7
2805 
2806         fmuld   %f0,%f6,%f6
2807 
2808         fmuld   %f8,%f14,%f14
2809 
2810         faddd   %f22,qq1,%f22
2811         fmuld   %f16,%f20,%f20
2812         add     %l6,%g1,%l6
2813 
2814         fmuld   %f24,%f30,%f30
2815 
2816         fmuld   %f2,%f6,%f6
2817         ldd     [%l4+8],%f0
2818 
2819         fmuld   %f10,%f14,%f14
2820         ldd     [%l5+8],%f8
2821 
2822         fmuld   %f18,%f20,%f20
2823 
2824         fmuld   %f26,%f30,%f30
2825         ldd     [%l7+8],%f24
2826 
2827         fmuld   %f0,%f4,%f4
2828         faddd   %f32,%f6,%f6
2829 
2830         fmuld   %f8,%f12,%f12
2831         faddd   %f34,%f14,%f14
2832 
2833         fmuld   %f16,%f22,%f22
2834         faddd   %f20,%f36,%f20
2835         ldd     [%l6+16],%f16
2836 
2837         fmuld   %f24,%f28,%f28
2838         faddd   %f38,%f30,%f30
2839 
2840         faddd   %f2,%f6,%f6
2841         ldd     [%l4+16],%f32
2842 
2843         faddd   %f10,%f14,%f14
2844         ldd     [%l5+16],%f34
2845 
2846         fmuld   %f16,%f22,%f22
2847         faddd   %f20,%f18,%f20
2848         ldd     [%l6+8],%f36
2849 
2850         faddd   %f26,%f30,%f30
2851         ldd     [%l7+16],%f38
2852 
2853         fmuld   %f32,%f6,%f6
2854 
2855         fmuld   %f34,%f14,%f14
2856 
2857         fmuld   %f36,%f20,%f20
2858 
2859         fmuld   %f38,%f30,%f30
2860 
2861         faddd   %f6,%f4,%f6
2862 
2863         faddd   %f14,%f12,%f14
2864 
2865         fsubd   %f22,%f20,%f22
2866 
2867         faddd   %f30,%f28,%f30
2868 
2869         faddd   %f6,%f0,%f6
2870 
2871         faddd   %f14,%f8,%f14
2872 
2873         faddd   %f22,%f16,%f22
2874 
2875         faddd   %f30,%f24,%f30
2876         mov     %l0,%l4
2877 
2878         fnegd   %f6,%f4
2879         lda     [%i1]%asi,%l0           ! preload next argument
2880 
2881         fnegd   %f14,%f12
2882         lda     [%i1]%asi,%f0
2883 
2884         fnegd   %f22,%f20
2885         lda     [%i1+4]%asi,%f3
2886 
2887         fnegd   %f30,%f28
2888         andn    %l0,%i5,%l0
2889         add     %i1,%i2,%i1
2890 
2891         andcc   %l4,2,%g0
2892         fmovdnz %icc,%f4,%f6
2893         st      %f6,[%o0]
2894 
2895         andcc   %l1,2,%g0
2896         fmovdnz %icc,%f12,%f14
2897         st      %f14,[%o1]
2898 
2899         andcc   %l2,2,%g0
2900         fmovdnz %icc,%f20,%f22
2901         st      %f22,[%o2]
2902 
2903         andcc   %l3,2,%g0
2904         fmovdnz %icc,%f28,%f30
2905         st      %f30,[%o3]
2906 
2907         addcc   %i0,-1,%i0
2908         bg,pt   %icc,.loop0
2909 ! delay slot
2910         st      %f7,[%o0+4]
2911 
2912         ba,pt   %icc,.end
2913 ! delay slot
2914         nop
2915 
2916         .align  16
2917 .case14:
2918         fmuld   %f26,%f26,%f24
2919         andcc   %l3,1,%g0
2920         bz,pn   %icc,.case15
2921 ! delay slot
2922         fxor    %f30,%f38,%f38
2923 
2924         fmuld   %f0,pp3,%f6             ! sin(x0)
2925 
2926         fmuld   %f8,pp3,%f14            ! sin(x1)
2927 
2928         fmuld   %f16,pp3,%f22           ! sin(x2)
2929 
2930         faddd   %f6,pp2,%f6
2931         fmuld   %f0,qq2,%f4
2932 
2933         faddd   %f14,pp2,%f14
2934         fmuld   %f8,qq2,%f12
2935 
2936         faddd   %f22,pp2,%f22
2937         fmuld   %f16,qq2,%f20
2938 
2939         fmuld   %f24,qq3,%f30           ! cos(x3)
2940 
2941         fmuld   %f0,%f6,%f6
2942         faddd   %f4,qq1,%f4
2943 
2944         fmuld   %f8,%f14,%f14
2945         faddd   %f12,qq1,%f12
2946 
2947         fmuld   %f16,%f22,%f22
2948         faddd   %f20,qq1,%f20
2949 
2950         faddd   %f30,qq2,%f30
2951         fmuld   %f24,pp2,%f28
2952 
2953         faddd   %f6,pp1,%f6
2954         fmuld   %f0,%f4,%f4
2955         add     %l4,%g1,%l4
2956 
2957         faddd   %f14,pp1,%f14
2958         fmuld   %f8,%f12,%f12
2959         add     %l5,%g1,%l5
2960 
2961         faddd   %f22,pp1,%f22
2962         fmuld   %f16,%f20,%f20
2963         add     %l6,%g1,%l6
2964 
2965         fmuld   %f24,%f30,%f30
2966         faddd   %f28,pp1,%f28
2967 
2968         fmuld   %f0,%f6,%f6
2969 
2970         fmuld   %f8,%f14,%f14
2971 
2972         fmuld   %f16,%f22,%f22
2973 
2974         faddd   %f30,qq1,%f30
2975         fmuld   %f24,%f28,%f28
2976         add     %l7,%g1,%l7
2977 
2978         fmuld   %f2,%f6,%f6
2979         ldd     [%l4+8],%f0
2980 
2981         fmuld   %f10,%f14,%f14
2982         ldd     [%l5+8],%f8
2983 
2984         fmuld   %f18,%f22,%f22
2985         ldd     [%l6+8],%f16
2986 
2987         fmuld   %f26,%f28,%f28
2988 
2989         fmuld   %f0,%f4,%f4
2990         faddd   %f32,%f6,%f6
2991 
2992         fmuld   %f8,%f12,%f12
2993         faddd   %f34,%f14,%f14
2994 
2995         fmuld   %f16,%f20,%f20
2996         faddd   %f36,%f22,%f22
2997 
2998         fmuld   %f24,%f30,%f30
2999         faddd   %f28,%f38,%f28
3000         ldd     [%l7+16],%f24
3001 
3002         faddd   %f2,%f6,%f6
3003         ldd     [%l4+16],%f32
3004 
3005         faddd   %f10,%f14,%f14
3006         ldd     [%l5+16],%f34
3007 
3008         faddd   %f18,%f22,%f22
3009         ldd     [%l6+16],%f36
3010 
3011         fmuld   %f24,%f30,%f30
3012         faddd   %f28,%f26,%f28
3013         ldd     [%l7+8],%f38
3014 
3015         fmuld   %f32,%f6,%f6
3016 
3017         fmuld   %f34,%f14,%f14
3018 
3019         fmuld   %f36,%f22,%f22
3020 
3021         fmuld   %f38,%f28,%f28
3022 
3023         faddd   %f6,%f4,%f6
3024 
3025         faddd   %f14,%f12,%f14
3026 
3027         faddd   %f22,%f20,%f22
3028 
3029         fsubd   %f30,%f28,%f30
3030 
3031         faddd   %f6,%f0,%f6
3032 
3033         faddd   %f14,%f8,%f14
3034 
3035         faddd   %f22,%f16,%f22
3036 
3037         faddd   %f30,%f24,%f30
3038         mov     %l0,%l4
3039 
3040         fnegd   %f6,%f4
3041         lda     [%i1]%asi,%l0           ! preload next argument
3042 
3043         fnegd   %f14,%f12
3044         lda     [%i1]%asi,%f0
3045 
3046         fnegd   %f22,%f20
3047         lda     [%i1+4]%asi,%f3
3048 
3049         fnegd   %f30,%f28
3050         andn    %l0,%i5,%l0
3051         add     %i1,%i2,%i1
3052 
3053         andcc   %l4,2,%g0
3054         fmovdnz %icc,%f4,%f6
3055         st      %f6,[%o0]
3056 
3057         andcc   %l1,2,%g0
3058         fmovdnz %icc,%f12,%f14
3059         st      %f14,[%o1]
3060 
3061         andcc   %l2,2,%g0
3062         fmovdnz %icc,%f20,%f22
3063         st      %f22,[%o2]
3064 
3065         andcc   %l3,2,%g0
3066         fmovdnz %icc,%f28,%f30
3067         st      %f30,[%o3]
3068 
3069         addcc   %i0,-1,%i0
3070         bg,pt   %icc,.loop0
3071 ! delay slot
3072         st      %f7,[%o0+4]
3073 
3074         ba,pt   %icc,.end
3075 ! delay slot
3076         nop
3077 
3078         .align  16
3079 .case15:
3080         fmuld   %f0,pp3,%f6             ! sin(x0)
3081 
3082         fmuld   %f8,pp3,%f14            ! sin(x1)
3083 
3084         fmuld   %f16,pp3,%f22           ! sin(x2)
3085 
3086         fmuld   %f24,pp3,%f30           ! sin(x3)
3087 
3088         faddd   %f6,pp2,%f6
3089         fmuld   %f0,qq2,%f4
3090 
3091         faddd   %f14,pp2,%f14
3092         fmuld   %f8,qq2,%f12
3093 
3094         faddd   %f22,pp2,%f22
3095         fmuld   %f16,qq2,%f20
3096 
3097         faddd   %f30,pp2,%f30
3098         fmuld   %f24,qq2,%f28
3099 
3100         fmuld   %f0,%f6,%f6
3101         faddd   %f4,qq1,%f4
3102 
3103         fmuld   %f8,%f14,%f14
3104         faddd   %f12,qq1,%f12
3105 
3106         fmuld   %f16,%f22,%f22
3107         faddd   %f20,qq1,%f20
3108 
3109         fmuld   %f24,%f30,%f30
3110         faddd   %f28,qq1,%f28
3111 
3112         faddd   %f6,pp1,%f6
3113         fmuld   %f0,%f4,%f4
3114         add     %l4,%g1,%l4
3115 
3116         faddd   %f14,pp1,%f14
3117         fmuld   %f8,%f12,%f12
3118         add     %l5,%g1,%l5
3119 
3120         faddd   %f22,pp1,%f22
3121         fmuld   %f16,%f20,%f20
3122         add     %l6,%g1,%l6
3123 
3124         faddd   %f30,pp1,%f30
3125         fmuld   %f24,%f28,%f28
3126         add     %l7,%g1,%l7
3127 
3128         fmuld   %f0,%f6,%f6
3129 
3130         fmuld   %f8,%f14,%f14
3131 
3132         fmuld   %f16,%f22,%f22
3133 
3134         fmuld   %f24,%f30,%f30
3135 
3136         fmuld   %f2,%f6,%f6
3137         ldd     [%l4+8],%f0
3138 
3139         fmuld   %f10,%f14,%f14
3140         ldd     [%l5+8],%f8
3141 
3142         fmuld   %f18,%f22,%f22
3143         ldd     [%l6+8],%f16
3144 
3145         fmuld   %f26,%f30,%f30
3146         ldd     [%l7+8],%f24
3147 
3148         fmuld   %f0,%f4,%f4
3149         faddd   %f32,%f6,%f6
3150 
3151         fmuld   %f8,%f12,%f12
3152         faddd   %f34,%f14,%f14
3153 
3154         fmuld   %f16,%f20,%f20
3155         faddd   %f36,%f22,%f22
3156 
3157         fmuld   %f24,%f28,%f28
3158         faddd   %f38,%f30,%f30
3159 
3160         faddd   %f2,%f6,%f6
3161         ldd     [%l4+16],%f32
3162 
3163         faddd   %f10,%f14,%f14
3164         ldd     [%l5+16],%f34
3165 
3166         faddd   %f18,%f22,%f22
3167         ldd     [%l6+16],%f36
3168 
3169         faddd   %f26,%f30,%f30
3170         ldd     [%l7+16],%f38
3171 
3172         fmuld   %f32,%f6,%f6
3173 
3174         fmuld   %f34,%f14,%f14
3175 
3176         fmuld   %f36,%f22,%f22
3177 
3178         fmuld   %f38,%f30,%f30
3179 
3180         faddd   %f6,%f4,%f6
3181 
3182         faddd   %f14,%f12,%f14
3183 
3184         faddd   %f22,%f20,%f22
3185 
3186         faddd   %f30,%f28,%f30
3187 
3188         faddd   %f6,%f0,%f6
3189 
3190         faddd   %f14,%f8,%f14
3191 
3192         faddd   %f22,%f16,%f22
3193 
3194         faddd   %f30,%f24,%f30
3195         mov     %l0,%l4
3196 
3197         fnegd   %f6,%f4
3198         lda     [%i1]%asi,%l0           ! preload next argument
3199 
3200         fnegd   %f14,%f12
3201         lda     [%i1]%asi,%f0
3202 
3203         fnegd   %f22,%f20
3204         lda     [%i1+4]%asi,%f3
3205 
3206         fnegd   %f30,%f28
3207         andn    %l0,%i5,%l0
3208         add     %i1,%i2,%i1
3209 
3210         andcc   %l4,2,%g0
3211         fmovdnz %icc,%f4,%f6
3212         st      %f6,[%o0]
3213 
3214         andcc   %l1,2,%g0
3215         fmovdnz %icc,%f12,%f14
3216         st      %f14,[%o1]
3217 
3218         andcc   %l2,2,%g0
3219         fmovdnz %icc,%f20,%f22
3220         st      %f22,[%o2]
3221 
3222         andcc   %l3,2,%g0
3223         fmovdnz %icc,%f28,%f30
3224         st      %f30,[%o3]
3225 
3226         addcc   %i0,-1,%i0
3227         bg,pt   %icc,.loop0
3228 ! delay slot
3229         st      %f7,[%o0+4]
3230 
3231         ba,pt   %icc,.end
3232 ! delay slot
3233         nop
3234 
3235 
3236         .align  16
3237 .end:
3238         st      %f15,[%o1+4]
3239         st      %f23,[%o2+4]
3240         st      %f31,[%o3+4]
3241         ld      [%fp+biguns],%i5
3242         tst     %i5                     ! check for huge arguments remaining
3243         be,pt   %icc,.exit
3244 ! delay slot
3245         nop
3246 #ifdef __sparcv9
3247         ldx     [%fp+xsave],%o1
3248         ldx     [%fp+ysave],%o3
3249 #else
3250         ld      [%fp+xsave],%o1
3251         ld      [%fp+ysave],%o3
3252 #endif
3253         ld      [%fp+nsave],%o0
3254         ld      [%fp+sxsave],%o2
3255         ld      [%fp+sysave],%o4
3256         sra     %o2,0,%o2               ! sign-extend for V9
3257         sra     %o4,0,%o4
3258         call    __vlibm_vsin_big_ultra3
3259         sra     %o5,0,%o5               ! delay slot
3260 
3261 .exit:
3262         ret
3263         restore
3264 
3265 
3266         .align  16
3267 .last1:
3268         faddd   %f2,c3two44,%f4
3269         st      %f15,[%o1+4]
3270 .last1_from_range1:
3271         mov     0,%l1
3272         fzeros  %f8
3273         fzero   %f10
3274         add     %fp,junk,%o1
3275 .last2:
3276         faddd   %f10,c3two44,%f12
3277         st      %f23,[%o2+4]
3278 .last2_from_range2:
3279         mov     0,%l2
3280         fzeros  %f16
3281         fzero   %f18
3282         add     %fp,junk,%o2
3283 .last3:
3284         faddd   %f18,c3two44,%f20
3285         st      %f31,[%o3+4]
3286         st      %f5,[%fp+nk0]
3287         st      %f13,[%fp+nk1]
3288 .last3_from_range3:
3289         mov     0,%l3
3290         fzeros  %f24
3291         fzero   %f26
3292         ba,pt   %icc,.cont
3293 ! delay slot
3294         add     %fp,junk,%o3
3295 
3296 
3297         .align  16
3298 .range0:
3299         cmp     %l0,%o4
3300         bl,pt   %icc,1f                 ! hx < 0x3e400000
3301 ! delay slot, harmless if branch taken
3302         sethi   %hi(0x7ff00000),%o7
3303         cmp     %l0,%o7
3304         bl,a,pt %icc,2f                 ! branch if finite
3305 ! delay slot, squashed if branch not taken
3306         st      %o4,[%fp+biguns]        ! set biguns
3307         fzero   %f0
3308         fmuld   %f2,%f0,%f2
3309         st      %f2,[%o0]
3310         ba,pt   %icc,2f
3311 ! delay slot
3312         st      %f3,[%o0+4]
3313 1:
3314         fdtoi   %f2,%f4                 ! raise inexact if not zero
3315         st      %f0,[%o0]
3316         st      %f3,[%o0+4]
3317 2:
3318         addcc   %i0,-1,%i0
3319         ble,pn  %icc,.end
3320 ! delay slot, harmless if branch taken
3321         add     %i3,%i4,%i3             ! y += stridey
3322         andn    %l1,%i5,%l0             ! hx &= ~0x80000000
3323         fmovs   %f8,%f0
3324         fmovs   %f11,%f3
3325         ba,pt   %icc,.loop0
3326 ! delay slot
3327         add     %i1,%i2,%i1             ! x += stridex
3328 
3329 
3330         .align  16
3331 .range1:
3332         cmp     %l1,%o4
3333         bl,pt   %icc,1f                 ! hx < 0x3e400000
3334 ! delay slot, harmless if branch taken
3335         sethi   %hi(0x7ff00000),%o7
3336         cmp     %l1,%o7
3337         bl,a,pt %icc,2f                 ! branch if finite
3338 ! delay slot, squashed if branch not taken
3339         st      %o4,[%fp+biguns]        ! set biguns
3340         fzero   %f8
3341         fmuld   %f10,%f8,%f10
3342         st      %f10,[%o1]
3343         ba,pt   %icc,2f
3344 ! delay slot
3345         st      %f11,[%o1+4]
3346 1:
3347         fdtoi   %f10,%f12               ! raise inexact if not zero
3348         st      %f8,[%o1]
3349         st      %f11,[%o1+4]
3350 2:
3351         addcc   %i0,-1,%i0
3352         ble,pn  %icc,.last1_from_range1
3353 ! delay slot, harmless if branch taken
3354         add     %i3,%i4,%i3             ! y += stridey
3355         andn    %l2,%i5,%l1             ! hx &= ~0x80000000
3356         fmovs   %f16,%f8
3357         fmovs   %f19,%f11
3358         ba,pt   %icc,.loop1
3359 ! delay slot
3360         add     %i1,%i2,%i1             ! x += stridex
3361 
3362 
3363         .align  16
3364 .range2:
3365         cmp     %l2,%o4
3366         bl,pt   %icc,1f                 ! hx < 0x3e400000
3367 ! delay slot, harmless if branch taken
3368         sethi   %hi(0x7ff00000),%o7
3369         cmp     %l2,%o7
3370         bl,a,pt %icc,2f                 ! branch if finite
3371 ! delay slot, squashed if branch not taken
3372         st      %o4,[%fp+biguns]        ! set biguns
3373         fzero   %f16
3374         fmuld   %f18,%f16,%f18
3375         st      %f18,[%o2]
3376         ba,pt   %icc,2f
3377 ! delay slot
3378         st      %f19,[%o2+4]
3379 1:
3380         fdtoi   %f18,%f20               ! raise inexact if not zero
3381         st      %f16,[%o2]
3382         st      %f19,[%o2+4]
3383 2:
3384         addcc   %i0,-1,%i0
3385         ble,pn  %icc,.last2_from_range2
3386 ! delay slot, harmless if branch taken
3387         add     %i3,%i4,%i3             ! y += stridey
3388         andn    %l3,%i5,%l2             ! hx &= ~0x80000000
3389         fmovs   %f24,%f16
3390         fmovs   %f27,%f19
3391         ba,pt   %icc,.loop2
3392 ! delay slot
3393         add     %i1,%i2,%i1             ! x += stridex
3394 
3395 
3396         .align  16
3397 .range3:
3398         cmp     %l3,%o4
3399         bl,pt   %icc,1f                 ! hx < 0x3e400000
3400 ! delay slot, harmless if branch taken
3401         sethi   %hi(0x7ff00000),%o7
3402         cmp     %l3,%o7
3403         bl,a,pt %icc,2f                 ! branch if finite
3404 ! delay slot, squashed if branch not taken
3405         st      %o4,[%fp+biguns]        ! set biguns
3406         fzero   %f24
3407         fmuld   %f26,%f24,%f26
3408         st      %f26,[%o3]
3409         ba,pt   %icc,2f
3410 ! delay slot
3411         st      %f27,[%o3+4]
3412 1:
3413         fdtoi   %f26,%f28               ! raise inexact if not zero
3414         st      %f24,[%o3]
3415         st      %f27,[%o3+4]
3416 2:
3417         addcc   %i0,-1,%i0
3418         ble,pn  %icc,.last3_from_range3
3419 ! delay slot, harmless if branch taken
3420         add     %i3,%i4,%i3             ! y += stridey
3421         ld      [%i1],%l3
3422         ld      [%i1],%f24
3423         ld      [%i1+4],%f27
3424         andn    %l3,%i5,%l3             ! hx &= ~0x80000000
3425         ba,pt   %icc,.loop3
3426 ! delay slot
3427         add     %i1,%i2,%i1             ! x += stridex
3428 
3429         SET_SIZE(__vsin_ultra3)
3430