1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vsin_ultra3.S"
  30 
  31 #include "libm.h"
  32 #if defined(LIBMVEC_SO_BUILD)
  33         .weak   __vsin
  34         .type   __vsin,#function
  35         __vsin = __vsin_ultra3
  36 #endif
  37 
  38         RO_DATA
  39         .align  64
  40 constants:
  41         .word   0x42c80000,0x00000000   ! 3 * 2^44
  42         .word   0x43380000,0x00000000   ! 3 * 2^51
  43         .word   0x3fe45f30,0x6dc9c883   ! invpio2
  44         .word   0x3ff921fb,0x54442c00   ! pio2_1
  45         .word   0x3d318469,0x898cc400   ! pio2_2
  46         .word   0x3a71701b,0x839a2520   ! pio2_3
  47         .word   0xbfc55555,0x55555533   ! pp1
  48         .word   0x3f811111,0x10e7d53b   ! pp2
  49         .word   0xbf2a0167,0xe6b3cf9b   ! pp3
  50         .word   0xbfdfffff,0xffffff65   ! qq1
  51         .word   0x3fa55555,0x54f88ed0   ! qq2
  52         .word   0xbf56c12c,0xdd185f60   ! qq3
  53 
  54 ! local storage indices
  55 
  56 #define xsave           STACK_BIAS-0x8
  57 #define ysave           STACK_BIAS-0x10
  58 #define nsave           STACK_BIAS-0x14
  59 #define sxsave          STACK_BIAS-0x18
  60 #define sysave          STACK_BIAS-0x1c
  61 #define biguns          STACK_BIAS-0x20
  62 #define nk3             STACK_BIAS-0x24
  63 #define nk2             STACK_BIAS-0x28
  64 #define nk1             STACK_BIAS-0x2c
  65 #define nk0             STACK_BIAS-0x30
  66 #define junk            STACK_BIAS-0x38
  67 ! sizeof temp storage - must be a multiple of 16 for V9
  68 #define tmps            0x40
  69 
  70 ! register use
  71 
  72 ! i0  n
  73 ! i1  x
  74 ! i2  stridex
  75 ! i3  y
  76 ! i4  stridey
  77 ! i5  0x80000000
  78 
  79 ! l0  hx0
  80 ! l1  hx1
  81 ! l2  hx2
  82 ! l3  hx3
  83 ! l4  k0
  84 ! l5  k1
  85 ! l6  k2
  86 ! l7  k3
  87 
  88 ! the following are 64-bit registers in both V8+ and V9
  89 
  90 ! g1  __vlibm_TBL_sincos2
  91 ! g5  scratch
  92 
  93 ! o0  py0
  94 ! o1  py1
  95 ! o2  py2
  96 ! o3  py3
  97 ! o4  0x3e400000
  98 ! o5  0x3fe921fb,0x4099251e
  99 ! o7  scratch
 100 
 101 ! f0  hx0
 102 ! f2  
 103 ! f4  
 104 ! f6  
 105 ! f8  hx1
 106 ! f10 
 107 ! f12 
 108 ! f14 
 109 ! f16 hx2
 110 ! f18 
 111 ! f20 
 112 ! f22 
 113 ! f24 hx3
 114 ! f26 
 115 ! f28 
 116 ! f30 
 117 ! f32 
 118 ! f34 
 119 ! f36
 120 ! f38
 121 
 122 #define c3two44 %f40
 123 #define c3two51 %f42
 124 #define invpio2 %f44
 125 #define pio2_1  %f46
 126 #define pio2_2  %f48
 127 #define pio2_3  %f50
 128 #define pp1     %f52
 129 #define pp2     %f54
 130 #define pp3     %f56
 131 #define qq1     %f58
 132 #define qq2     %f60
 133 #define qq3     %f62
 134 
 135         ENTRY(__vsin_ultra3)
 136         save    %sp,-SA(MINFRAME)-tmps,%sp
 137         PIC_SETUP(l7)
 138         PIC_SET(l7,constants,o0)
 139         PIC_SET(l7,__vlibm_TBL_sincos2,o1)
 140         mov     %o1,%g1
 141         wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
 142 #ifdef __sparcv9
 143         stx     %i1,[%fp+xsave]         ! save arguments
 144         stx     %i3,[%fp+ysave]
 145 #else
 146         st      %i1,[%fp+xsave]         ! save arguments
 147         st      %i3,[%fp+ysave]
 148 #endif
 149         st      %i0,[%fp+nsave]
 150         st      %i2,[%fp+sxsave]
 151         st      %i4,[%fp+sysave]
 152         st      %g0,[%fp+biguns]        ! biguns = 0
 153         ldd     [%o0+0x00],c3two44      ! load/set up constants
 154         ldd     [%o0+0x08],c3two51
 155         ldd     [%o0+0x10],invpio2
 156         ldd     [%o0+0x18],pio2_1
 157         ldd     [%o0+0x20],pio2_2
 158         ldd     [%o0+0x28],pio2_3
 159         ldd     [%o0+0x30],pp1
 160         ldd     [%o0+0x38],pp2
 161         ldd     [%o0+0x40],pp3
 162         ldd     [%o0+0x48],qq1
 163         ldd     [%o0+0x50],qq2
 164         ldd     [%o0+0x58],qq3
 165         sethi   %hi(0x80000000),%i5
 166         sethi   %hi(0x3e400000),%o4
 167         sethi   %hi(0x3fe921fb),%o5
 168         or      %o5,%lo(0x3fe921fb),%o5
 169         sllx    %o5,32,%o5
 170         sethi   %hi(0x4099251e),%o7
 171         or      %o7,%lo(0x4099251e),%o7
 172         or      %o5,%o7,%o5
 173         sll     %i2,3,%i2               ! scale strides
 174         sll     %i4,3,%i4
 175         add     %fp,junk,%o1            ! loop prologue
 176         add     %fp,junk,%o2
 177         add     %fp,junk,%o3
 178         ld      [%i1],%l0               ! *x
 179         ld      [%i1],%f0
 180         ld      [%i1+4],%f3
 181         andn    %l0,%i5,%l0             ! mask off sign
 182         ba      .loop0
 183         add     %i1,%i2,%i1             ! x += stridex
 184 
 185 ! 16-byte aligned
 186         .align  16
 187 .loop0:
 188         lda     [%i1]%asi,%l1           ! preload next argument
 189         sub     %l0,%o4,%g5
 190         sub     %o5,%l0,%o7
 191         fabss   %f0,%f2
 192 
 193         lda     [%i1]%asi,%f8
 194         orcc    %o7,%g5,%g0
 195         mov     %i3,%o0                 ! py0 = y
 196         bl,pn   %icc,.range0            ! hx < 0x3e400000 or hx > 0x4099251e
 197 
 198 ! delay slot
 199         lda     [%i1+4]%asi,%f11
 200         addcc   %i0,-1,%i0
 201         add     %i3,%i4,%i3             ! y += stridey
 202         ble,pn  %icc,.last1
 203 
 204 ! delay slot
 205         andn    %l1,%i5,%l1
 206         add     %i1,%i2,%i1             ! x += stridex
 207         faddd   %f2,c3two44,%f4
 208         st      %f15,[%o1+4]
 209 
 210 .loop1:
 211         lda     [%i1]%asi,%l2           ! preload next argument
 212         sub     %l1,%o4,%g5
 213         sub     %o5,%l1,%o7
 214         fabss   %f8,%f10
 215 
 216         lda     [%i1]%asi,%f16
 217         orcc    %o7,%g5,%g0
 218         mov     %i3,%o1                 ! py1 = y
 219         bl,pn   %icc,.range1            ! hx < 0x3e400000 or hx > 0x4099251e
 220 
 221 ! delay slot
 222         lda     [%i1+4]%asi,%f19
 223         addcc   %i0,-1,%i0
 224         add     %i3,%i4,%i3             ! y += stridey
 225         ble,pn  %icc,.last2
 226 
 227 ! delay slot
 228         andn    %l2,%i5,%l2
 229         add     %i1,%i2,%i1             ! x += stridex
 230         faddd   %f10,c3two44,%f12
 231         st      %f23,[%o2+4]
 232 
 233 .loop2:
 234         lda     [%i1]%asi,%l3           ! preload next argument
 235         sub     %l2,%o4,%g5
 236         sub     %o5,%l2,%o7
 237         fabss   %f16,%f18
 238 
 239         lda     [%i1]%asi,%f24
 240         orcc    %o7,%g5,%g0
 241         mov     %i3,%o2                 ! py2 = y
 242         bl,pn   %icc,.range2            ! hx < 0x3e400000 or hx > 0x4099251e
 243 
 244 ! delay slot
 245         lda     [%i1+4]%asi,%f27
 246         addcc   %i0,-1,%i0
 247         add     %i3,%i4,%i3             ! y += stridey
 248         ble,pn  %icc,.last3
 249 
 250 ! delay slot
 251         andn    %l3,%i5,%l3
 252         add     %i1,%i2,%i1             ! x += stridex
 253         faddd   %f18,c3two44,%f20
 254         st      %f31,[%o3+4]
 255 
 256 .loop3:
 257         sub     %l3,%o4,%g5
 258         sub     %o5,%l3,%o7
 259         fabss   %f24,%f26
 260         st      %f5,[%fp+nk0]
 261 
 262         orcc    %o7,%g5,%g0
 263         mov     %i3,%o3                 ! py3 = y
 264         bl,pn   %icc,.range3            ! hx < 0x3e400000 or > hx 0x4099251e
 265 ! delay slot
 266         st      %f13,[%fp+nk1]
 267 
 268 !!! DONE?
 269 .cont:
 270         srlx    %o5,32,%o7
 271         add     %i3,%i4,%i3             ! y += stridey
 272         fmovs   %f3,%f1
 273         st      %f21,[%fp+nk2]
 274 
 275         sub     %o7,%l0,%l0
 276         sub     %o7,%l1,%l1
 277         faddd   %f26,c3two44,%f28
 278         st      %f29,[%fp+nk3]
 279 
 280         sub     %o7,%l2,%l2
 281         sub     %o7,%l3,%l3
 282         fmovs   %f11,%f9
 283 
 284         or      %l0,%l1,%l0
 285         or      %l2,%l3,%l2
 286         fmovs   %f19,%f17
 287 
 288         fmovs   %f27,%f25
 289         fmuld   %f0,invpio2,%f6         ! x * invpio2, for medium range
 290 
 291         fmuld   %f8,invpio2,%f14
 292         ld      [%fp+nk0],%l4
 293 
 294         fmuld   %f16,invpio2,%f22
 295         ld      [%fp+nk1],%l5
 296 
 297         orcc    %l0,%l2,%g0
 298         bl,pn   %icc,.medium
 299 ! delay slot
 300         fmuld   %f24,invpio2,%f30
 301         ld      [%fp+nk2],%l6
 302 
 303         ld      [%fp+nk3],%l7
 304         sll     %l4,5,%l4               ! k
 305         fcmpd   %fcc0,%f0,pio2_3        ! x < pio2_3 iff x < 0
 306 
 307         sll     %l5,5,%l5
 308         ldd     [%l4+%g1],%f4
 309         fcmpd   %fcc1,%f8,pio2_3
 310 
 311         sll     %l6,5,%l6
 312         ldd     [%l5+%g1],%f12
 313         fcmpd   %fcc2,%f16,pio2_3
 314 
 315         sll     %l7,5,%l7
 316         ldd     [%l6+%g1],%f20
 317         fcmpd   %fcc3,%f24,pio2_3
 318 
 319         ldd     [%l7+%g1],%f28
 320         fsubd   %f2,%f4,%f2             ! x -= __vlibm_TBL_sincos2[k]
 321 
 322         fsubd   %f10,%f12,%f10
 323 
 324         fsubd   %f18,%f20,%f18
 325 
 326         fsubd   %f26,%f28,%f26
 327 
 328         fmuld   %f2,%f2,%f0             ! z = x * x
 329 
 330         fmuld   %f10,%f10,%f8
 331 
 332         fmuld   %f18,%f18,%f16
 333 
 334         fmuld   %f26,%f26,%f24
 335 
 336         fmuld   %f0,pp3,%f6
 337 
 338         fmuld   %f8,pp3,%f14
 339 
 340         fmuld   %f16,pp3,%f22
 341 
 342         fmuld   %f24,pp3,%f30
 343 
 344         faddd   %f6,pp2,%f6
 345         fmuld   %f0,qq2,%f4
 346 
 347         faddd   %f14,pp2,%f14
 348         fmuld   %f8,qq2,%f12
 349 
 350         faddd   %f22,pp2,%f22
 351         fmuld   %f16,qq2,%f20
 352 
 353         faddd   %f30,pp2,%f30
 354         fmuld   %f24,qq2,%f28
 355 
 356         fmuld   %f0,%f6,%f6
 357         faddd   %f4,qq1,%f4
 358 
 359         fmuld   %f8,%f14,%f14
 360         faddd   %f12,qq1,%f12
 361 
 362         fmuld   %f16,%f22,%f22
 363         faddd   %f20,qq1,%f20
 364 
 365         fmuld   %f24,%f30,%f30
 366         faddd   %f28,qq1,%f28
 367 
 368         faddd   %f6,pp1,%f6
 369         fmuld   %f0,%f4,%f4
 370         add     %l4,%g1,%l4
 371 
 372         faddd   %f14,pp1,%f14
 373         fmuld   %f8,%f12,%f12
 374         add     %l5,%g1,%l5
 375 
 376         faddd   %f22,pp1,%f22
 377         fmuld   %f16,%f20,%f20
 378         add     %l6,%g1,%l6
 379 
 380         faddd   %f30,pp1,%f30
 381         fmuld   %f24,%f28,%f28
 382         add     %l7,%g1,%l7
 383 
 384         fmuld   %f0,%f6,%f6
 385         ldd     [%l4+8],%f0
 386 
 387         fmuld   %f8,%f14,%f14
 388         ldd     [%l5+8],%f8
 389 
 390         fmuld   %f16,%f22,%f22
 391         ldd     [%l6+8],%f16
 392 
 393         fmuld   %f24,%f30,%f30
 394         ldd     [%l7+8],%f24
 395 
 396         fmuld   %f2,%f6,%f6
 397 
 398         fmuld   %f10,%f14,%f14
 399 
 400         fmuld   %f18,%f22,%f22
 401 
 402         fmuld   %f26,%f30,%f30
 403 
 404         faddd   %f6,%f2,%f6
 405         fmuld   %f0,%f4,%f4
 406         ldd     [%l4+16],%f2
 407 
 408         faddd   %f14,%f10,%f14
 409         fmuld   %f8,%f12,%f12
 410         ldd     [%l5+16],%f10
 411 
 412         faddd   %f22,%f18,%f22
 413         fmuld   %f16,%f20,%f20
 414         ldd     [%l6+16],%f18
 415 
 416         faddd   %f30,%f26,%f30
 417         fmuld   %f24,%f28,%f28
 418         ldd     [%l7+16],%f26
 419 
 420         fmuld   %f2,%f6,%f6
 421 
 422         fmuld   %f10,%f14,%f14
 423 
 424         fmuld   %f18,%f22,%f22
 425 
 426         fmuld   %f26,%f30,%f30
 427 
 428         faddd   %f6,%f4,%f6
 429 
 430         faddd   %f14,%f12,%f14
 431 
 432         faddd   %f22,%f20,%f22
 433 
 434         faddd   %f30,%f28,%f30
 435 
 436         faddd   %f6,%f0,%f6
 437 
 438         faddd   %f14,%f8,%f14
 439 
 440         faddd   %f22,%f16,%f22
 441 
 442         faddd   %f30,%f24,%f30
 443 
 444         fnegd   %f6,%f4
 445         lda     [%i1]%asi,%l0           ! preload next argument
 446 
 447         fnegd   %f14,%f12
 448         lda     [%i1]%asi,%f0
 449 
 450         fnegd   %f22,%f20
 451         lda     [%i1+4]%asi,%f3
 452 
 453         fnegd   %f30,%f28
 454         andn    %l0,%i5,%l0
 455         add     %i1,%i2,%i1
 456 
 457         fmovdl  %fcc0,%f4,%f6           ! (hx < -0)? -s : s
 458         st      %f6,[%o0]
 459 
 460         fmovdl  %fcc1,%f12,%f14
 461         st      %f14,[%o1]
 462 
 463         fmovdl  %fcc2,%f20,%f22
 464         st      %f22,[%o2]
 465 
 466         fmovdl  %fcc3,%f28,%f30
 467         st      %f30,[%o3]
 468         addcc   %i0,-1,%i0
 469 
 470         bg,pt   %icc,.loop0
 471 ! delay slot
 472         st      %f7,[%o0+4]
 473 
 474         ba,pt   %icc,.end
 475 ! delay slot
 476         nop
 477 
 478 
 479         .align  16
 480 .medium:
 481         faddd   %f6,c3two51,%f4
 482         st      %f5,[%fp+nk0]
 483 
 484         faddd   %f14,c3two51,%f12
 485         st      %f13,[%fp+nk1]
 486 
 487         faddd   %f22,c3two51,%f20
 488         st      %f21,[%fp+nk2]
 489 
 490         faddd   %f30,c3two51,%f28
 491         st      %f29,[%fp+nk3]
 492 
 493         fsubd   %f4,c3two51,%f6
 494 
 495         fsubd   %f12,c3two51,%f14
 496 
 497         fsubd   %f20,c3two51,%f22
 498 
 499         fsubd   %f28,c3two51,%f30
 500 
 501         fmuld   %f6,pio2_1,%f2
 502         ld      [%fp+nk0],%l0           ! n
 503 
 504         fmuld   %f14,pio2_1,%f10
 505         ld      [%fp+nk1],%l1
 506 
 507         fmuld   %f22,pio2_1,%f18
 508         ld      [%fp+nk2],%l2
 509 
 510         fmuld   %f30,pio2_1,%f26
 511         ld      [%fp+nk3],%l3
 512 
 513         fsubd   %f0,%f2,%f0
 514         fmuld   %f6,pio2_2,%f4
 515 
 516         fsubd   %f8,%f10,%f8
 517         fmuld   %f14,pio2_2,%f12
 518 
 519         fsubd   %f16,%f18,%f16
 520         fmuld   %f22,pio2_2,%f20
 521 
 522         fsubd   %f24,%f26,%f24
 523         fmuld   %f30,pio2_2,%f28
 524 
 525         fsubd   %f0,%f4,%f32
 526 
 527         fsubd   %f8,%f12,%f34
 528 
 529         fsubd   %f16,%f20,%f36
 530 
 531         fsubd   %f24,%f28,%f38
 532 
 533         fsubd   %f0,%f32,%f0
 534         fcmple32 %f32,pio2_3,%l4        ! x <= pio2_3 iff x < 0
 535 
 536         fsubd   %f8,%f34,%f8
 537         fcmple32 %f34,pio2_3,%l5
 538 
 539         fsubd   %f16,%f36,%f16
 540         fcmple32 %f36,pio2_3,%l6
 541 
 542         fsubd   %f24,%f38,%f24
 543         fcmple32 %f38,pio2_3,%l7
 544 
 545         fsubd   %f0,%f4,%f0
 546         fmuld   %f6,pio2_3,%f6
 547         sll     %l4,30,%l4              ! if (x < 0) n = -n ^ 2
 548 
 549         fsubd   %f8,%f12,%f8
 550         fmuld   %f14,pio2_3,%f14
 551         sll     %l5,30,%l5
 552 
 553         fsubd   %f16,%f20,%f16
 554         fmuld   %f22,pio2_3,%f22
 555         sll     %l6,30,%l6
 556 
 557         fsubd   %f24,%f28,%f24
 558         fmuld   %f30,pio2_3,%f30
 559         sll     %l7,30,%l7
 560 
 561         fsubd   %f6,%f0,%f6
 562         sra     %l4,31,%l4
 563 
 564         fsubd   %f14,%f8,%f14
 565         sra     %l5,31,%l5
 566 
 567         fsubd   %f22,%f16,%f22
 568         sra     %l6,31,%l6
 569 
 570         fsubd   %f30,%f24,%f30
 571         sra     %l7,31,%l7
 572 
 573         fsubd   %f32,%f6,%f0            ! reduced x
 574         xor     %l0,%l4,%l0
 575 
 576         fsubd   %f34,%f14,%f8
 577         xor     %l1,%l5,%l1
 578 
 579         fsubd   %f36,%f22,%f16
 580         xor     %l2,%l6,%l2
 581 
 582         fsubd   %f38,%f30,%f24
 583         xor     %l3,%l7,%l3
 584 
 585         fabsd   %f0,%f2
 586         sub     %l0,%l4,%l0
 587 
 588         fabsd   %f8,%f10
 589         sub     %l1,%l5,%l1
 590 
 591         fabsd   %f16,%f18
 592         sub     %l2,%l6,%l2
 593 
 594         fabsd   %f24,%f26
 595         sub     %l3,%l7,%l3
 596 
 597         faddd   %f2,c3two44,%f4
 598         st      %f5,[%fp+nk0]
 599         and     %l4,2,%l4
 600 
 601         faddd   %f10,c3two44,%f12
 602         st      %f13,[%fp+nk1]
 603         and     %l5,2,%l5
 604 
 605         faddd   %f18,c3two44,%f20
 606         st      %f21,[%fp+nk2]
 607         and     %l6,2,%l6
 608 
 609         faddd   %f26,c3two44,%f28
 610         st      %f29,[%fp+nk3]
 611         and     %l7,2,%l7
 612 
 613         fsubd   %f32,%f0,%f4
 614         xor     %l0,%l4,%l0
 615 
 616         fsubd   %f34,%f8,%f12
 617         xor     %l1,%l5,%l1
 618 
 619         fsubd   %f36,%f16,%f20
 620         xor     %l2,%l6,%l2
 621 
 622         fsubd   %f38,%f24,%f28
 623         xor     %l3,%l7,%l3
 624 
 625         fzero   %f38
 626         ld      [%fp+nk0],%l4
 627 
 628         fsubd   %f4,%f6,%f6             ! w
 629         ld      [%fp+nk1],%l5
 630 
 631         fsubd   %f12,%f14,%f14
 632         ld      [%fp+nk2],%l6
 633 
 634         fnegd   %f38,%f38
 635         ld      [%fp+nk3],%l7
 636         sll     %l4,5,%l4               ! k
 637 
 638         fsubd   %f20,%f22,%f22
 639         sll     %l5,5,%l5
 640 
 641         fsubd   %f28,%f30,%f30
 642         sll     %l6,5,%l6
 643 
 644         fand    %f0,%f38,%f32           ! sign bit of x
 645         ldd     [%l4+%g1],%f4
 646         sll     %l7,5,%l7
 647 
 648         fand    %f8,%f38,%f34
 649         ldd     [%l5+%g1],%f12
 650 
 651         fand    %f16,%f38,%f36
 652         ldd     [%l6+%g1],%f20
 653 
 654         fand    %f24,%f38,%f38
 655         ldd     [%l7+%g1],%f28
 656 
 657         fsubd   %f2,%f4,%f2             ! x -= __vlibm_TBL_sincos2[k]
 658 
 659         fsubd   %f10,%f12,%f10
 660 
 661         fsubd   %f18,%f20,%f18
 662         nop
 663 
 664         fsubd   %f26,%f28,%f26
 665         nop
 666 
 667 ! 16-byte aligned
 668         fmuld   %f2,%f2,%f0             ! z = x * x
 669         andcc   %l0,1,%g0
 670         bz,pn   %icc,.case8
 671 ! delay slot
 672         fxor    %f6,%f32,%f32
 673 
 674         fmuld   %f10,%f10,%f8
 675         andcc   %l1,1,%g0
 676         bz,pn   %icc,.case4
 677 ! delay slot
 678         fxor    %f14,%f34,%f34
 679 
 680         fmuld   %f18,%f18,%f16
 681         andcc   %l2,1,%g0
 682         bz,pn   %icc,.case2
 683 ! delay slot
 684         fxor    %f22,%f36,%f36
 685 
 686         fmuld   %f26,%f26,%f24
 687         andcc   %l3,1,%g0
 688         bz,pn   %icc,.case1
 689 ! delay slot
 690         fxor    %f30,%f38,%f38
 691 
 692 !.case0:
 693         fmuld   %f0,qq3,%f6             ! cos(x0)
 694 
 695         fmuld   %f8,qq3,%f14            ! cos(x1)
 696 
 697         fmuld   %f16,qq3,%f22           ! cos(x2)
 698 
 699         fmuld   %f24,qq3,%f30           ! cos(x3)
 700 
 701         faddd   %f6,qq2,%f6
 702         fmuld   %f0,pp2,%f4
 703 
 704         faddd   %f14,qq2,%f14
 705         fmuld   %f8,pp2,%f12
 706 
 707         faddd   %f22,qq2,%f22
 708         fmuld   %f16,pp2,%f20
 709 
 710         faddd   %f30,qq2,%f30
 711         fmuld   %f24,pp2,%f28
 712 
 713         fmuld   %f0,%f6,%f6
 714         faddd   %f4,pp1,%f4
 715 
 716         fmuld   %f8,%f14,%f14
 717         faddd   %f12,pp1,%f12
 718 
 719         fmuld   %f16,%f22,%f22
 720         faddd   %f20,pp1,%f20
 721 
 722         fmuld   %f24,%f30,%f30
 723         faddd   %f28,pp1,%f28
 724 
 725         faddd   %f6,qq1,%f6
 726         fmuld   %f0,%f4,%f4
 727         add     %l4,%g1,%l4
 728 
 729         faddd   %f14,qq1,%f14
 730         fmuld   %f8,%f12,%f12
 731         add     %l5,%g1,%l5
 732 
 733         faddd   %f22,qq1,%f22
 734         fmuld   %f16,%f20,%f20
 735         add     %l6,%g1,%l6
 736 
 737         faddd   %f30,qq1,%f30
 738         fmuld   %f24,%f28,%f28
 739         add     %l7,%g1,%l7
 740 
 741         fmuld   %f2,%f4,%f4
 742 
 743         fmuld   %f10,%f12,%f12
 744 
 745         fmuld   %f18,%f20,%f20
 746 
 747         fmuld   %f26,%f28,%f28
 748 
 749         fmuld   %f0,%f6,%f6
 750         faddd   %f4,%f32,%f4
 751         ldd     [%l4+16],%f0
 752 
 753         fmuld   %f8,%f14,%f14
 754         faddd   %f12,%f34,%f12
 755         ldd     [%l5+16],%f8
 756 
 757         fmuld   %f16,%f22,%f22
 758         faddd   %f20,%f36,%f20
 759         ldd     [%l6+16],%f16
 760 
 761         fmuld   %f24,%f30,%f30
 762         faddd   %f28,%f38,%f28
 763         ldd     [%l7+16],%f24
 764 
 765         fmuld   %f0,%f6,%f6
 766         faddd   %f4,%f2,%f4
 767         ldd     [%l4+8],%f32
 768 
 769         fmuld   %f8,%f14,%f14
 770         faddd   %f12,%f10,%f12
 771         ldd     [%l5+8],%f34
 772 
 773         fmuld   %f16,%f22,%f22
 774         faddd   %f20,%f18,%f20
 775         ldd     [%l6+8],%f36
 776 
 777         fmuld   %f24,%f30,%f30
 778         faddd   %f28,%f26,%f28
 779         ldd     [%l7+8],%f38
 780 
 781         fmuld   %f32,%f4,%f4
 782 
 783         fmuld   %f34,%f12,%f12
 784 
 785         fmuld   %f36,%f20,%f20
 786 
 787         fmuld   %f38,%f28,%f28
 788 
 789         fsubd   %f6,%f4,%f6
 790 
 791         fsubd   %f14,%f12,%f14
 792 
 793         fsubd   %f22,%f20,%f22
 794 
 795         fsubd   %f30,%f28,%f30
 796 
 797         faddd   %f6,%f0,%f6
 798 
 799         faddd   %f14,%f8,%f14
 800 
 801         faddd   %f22,%f16,%f22
 802 
 803         faddd   %f30,%f24,%f30
 804         mov     %l0,%l4
 805 
 806         fnegd   %f6,%f4
 807         lda     [%i1]%asi,%l0           ! preload next argument
 808 
 809         fnegd   %f14,%f12
 810         lda     [%i1]%asi,%f0
 811 
 812         fnegd   %f22,%f20
 813         lda     [%i1+4]%asi,%f3
 814 
 815         fnegd   %f30,%f28
 816         andn    %l0,%i5,%l0
 817         add     %i1,%i2,%i1
 818 
 819         andcc   %l4,2,%g0
 820         fmovdnz %icc,%f4,%f6
 821         st      %f6,[%o0]
 822 
 823         andcc   %l1,2,%g0
 824         fmovdnz %icc,%f12,%f14
 825         st      %f14,[%o1]
 826 
 827         andcc   %l2,2,%g0
 828         fmovdnz %icc,%f20,%f22
 829         st      %f22,[%o2]
 830 
 831         andcc   %l3,2,%g0
 832         fmovdnz %icc,%f28,%f30
 833         st      %f30,[%o3]
 834 
 835         addcc   %i0,-1,%i0
 836         bg,pt   %icc,.loop0
 837 ! delay slot
 838         st      %f7,[%o0+4]
 839 
 840         ba,pt   %icc,.end
 841 ! delay slot
 842         nop
 843 
 844         .align  16
 845 .case1:
 846         fmuld   %f24,pp3,%f30           ! sin(x3)
 847 
 848         fmuld   %f0,qq3,%f6             ! cos(x0)
 849 
 850         fmuld   %f8,qq3,%f14            ! cos(x1)
 851 
 852         fmuld   %f16,qq3,%f22           ! cos(x2)
 853 
 854         faddd   %f30,pp2,%f30
 855         fmuld   %f24,qq2,%f28
 856 
 857         faddd   %f6,qq2,%f6
 858         fmuld   %f0,pp2,%f4
 859 
 860         faddd   %f14,qq2,%f14
 861         fmuld   %f8,pp2,%f12
 862 
 863         faddd   %f22,qq2,%f22
 864         fmuld   %f16,pp2,%f20
 865 
 866         fmuld   %f24,%f30,%f30
 867         faddd   %f28,qq1,%f28
 868 
 869         fmuld   %f0,%f6,%f6
 870         faddd   %f4,pp1,%f4
 871 
 872         fmuld   %f8,%f14,%f14
 873         faddd   %f12,pp1,%f12
 874 
 875         fmuld   %f16,%f22,%f22
 876         faddd   %f20,pp1,%f20
 877 
 878         faddd   %f30,pp1,%f30
 879         fmuld   %f24,%f28,%f28
 880         add     %l7,%g1,%l7
 881 
 882         faddd   %f6,qq1,%f6
 883         fmuld   %f0,%f4,%f4
 884         add     %l4,%g1,%l4
 885 
 886         faddd   %f14,qq1,%f14
 887         fmuld   %f8,%f12,%f12
 888         add     %l5,%g1,%l5
 889 
 890         faddd   %f22,qq1,%f22
 891         fmuld   %f16,%f20,%f20
 892         add     %l6,%g1,%l6
 893 
 894         fmuld   %f24,%f30,%f30
 895 
 896         fmuld   %f2,%f4,%f4
 897 
 898         fmuld   %f10,%f12,%f12
 899 
 900         fmuld   %f18,%f20,%f20
 901 
 902         fmuld   %f26,%f30,%f30
 903         ldd     [%l7+8],%f24
 904 
 905         fmuld   %f0,%f6,%f6
 906         faddd   %f4,%f32,%f4
 907         ldd     [%l4+16],%f0
 908 
 909         fmuld   %f8,%f14,%f14
 910         faddd   %f12,%f34,%f12
 911         ldd     [%l5+16],%f8
 912 
 913         fmuld   %f16,%f22,%f22
 914         faddd   %f20,%f36,%f20
 915         ldd     [%l6+16],%f16
 916 
 917         fmuld   %f24,%f28,%f28
 918         faddd   %f38,%f30,%f30
 919 
 920         fmuld   %f0,%f6,%f6
 921         faddd   %f4,%f2,%f4
 922         ldd     [%l4+8],%f32
 923 
 924         fmuld   %f8,%f14,%f14
 925         faddd   %f12,%f10,%f12
 926         ldd     [%l5+8],%f34
 927 
 928         fmuld   %f16,%f22,%f22
 929         faddd   %f20,%f18,%f20
 930         ldd     [%l6+8],%f36
 931 
 932         faddd   %f26,%f30,%f30
 933         ldd     [%l7+16],%f38
 934 
 935         fmuld   %f32,%f4,%f4
 936 
 937         fmuld   %f34,%f12,%f12
 938 
 939         fmuld   %f36,%f20,%f20
 940 
 941         fmuld   %f38,%f30,%f30
 942 
 943         fsubd   %f6,%f4,%f6
 944 
 945         fsubd   %f14,%f12,%f14
 946 
 947         fsubd   %f22,%f20,%f22
 948 
 949         faddd   %f30,%f28,%f30
 950 
 951         faddd   %f6,%f0,%f6
 952 
 953         faddd   %f14,%f8,%f14
 954 
 955         faddd   %f22,%f16,%f22
 956 
 957         faddd   %f30,%f24,%f30
 958         mov     %l0,%l4
 959 
 960         fnegd   %f6,%f4
 961         lda     [%i1]%asi,%l0           ! preload next argument
 962 
 963         fnegd   %f14,%f12
 964         lda     [%i1]%asi,%f0
 965 
 966         fnegd   %f22,%f20
 967         lda     [%i1+4]%asi,%f3
 968 
 969         fnegd   %f30,%f28
 970         andn    %l0,%i5,%l0
 971         add     %i1,%i2,%i1
 972 
 973         andcc   %l4,2,%g0
 974         fmovdnz %icc,%f4,%f6
 975         st      %f6,[%o0]
 976 
 977         andcc   %l1,2,%g0
 978         fmovdnz %icc,%f12,%f14
 979         st      %f14,[%o1]
 980 
 981         andcc   %l2,2,%g0
 982         fmovdnz %icc,%f20,%f22
 983         st      %f22,[%o2]
 984 
 985         andcc   %l3,2,%g0
 986         fmovdnz %icc,%f28,%f30
 987         st      %f30,[%o3]
 988 
 989         addcc   %i0,-1,%i0
 990         bg,pt   %icc,.loop0
 991 ! delay slot
 992         st      %f7,[%o0+4]
 993 
 994         ba,pt   %icc,.end
 995 ! delay slot
 996         nop
 997 
 998         .align  16
 999 .case2:
1000         fmuld   %f26,%f26,%f24
1001         andcc   %l3,1,%g0
1002         bz,pn   %icc,.case3
1003 ! delay slot
1004         fxor    %f30,%f38,%f38
1005 
1006         fmuld   %f16,pp3,%f22           ! sin(x2)
1007 
1008         fmuld   %f0,qq3,%f6             ! cos(x0)
1009 
1010         fmuld   %f8,qq3,%f14            ! cos(x1)
1011 
1012         faddd   %f22,pp2,%f22
1013         fmuld   %f16,qq2,%f20
1014 
1015         fmuld   %f24,qq3,%f30           ! cos(x3)
1016 
1017         faddd   %f6,qq2,%f6
1018         fmuld   %f0,pp2,%f4
1019 
1020         faddd   %f14,qq2,%f14
1021         fmuld   %f8,pp2,%f12
1022 
1023         fmuld   %f16,%f22,%f22
1024         faddd   %f20,qq1,%f20
1025 
1026         faddd   %f30,qq2,%f30
1027         fmuld   %f24,pp2,%f28
1028 
1029         fmuld   %f0,%f6,%f6
1030         faddd   %f4,pp1,%f4
1031 
1032         fmuld   %f8,%f14,%f14
1033         faddd   %f12,pp1,%f12
1034 
1035         faddd   %f22,pp1,%f22
1036         fmuld   %f16,%f20,%f20
1037         add     %l6,%g1,%l6
1038 
1039         fmuld   %f24,%f30,%f30
1040         faddd   %f28,pp1,%f28
1041 
1042         faddd   %f6,qq1,%f6
1043         fmuld   %f0,%f4,%f4
1044         add     %l4,%g1,%l4
1045 
1046         faddd   %f14,qq1,%f14
1047         fmuld   %f8,%f12,%f12
1048         add     %l5,%g1,%l5
1049 
1050         fmuld   %f16,%f22,%f22
1051 
1052         faddd   %f30,qq1,%f30
1053         fmuld   %f24,%f28,%f28
1054         add     %l7,%g1,%l7
1055 
1056         fmuld   %f2,%f4,%f4
1057 
1058         fmuld   %f10,%f12,%f12
1059 
1060         fmuld   %f18,%f22,%f22
1061         ldd     [%l6+8],%f16
1062 
1063         fmuld   %f26,%f28,%f28
1064 
1065         fmuld   %f0,%f6,%f6
1066         faddd   %f4,%f32,%f4
1067         ldd     [%l4+16],%f0
1068 
1069         fmuld   %f8,%f14,%f14
1070         faddd   %f12,%f34,%f12
1071         ldd     [%l5+16],%f8
1072 
1073         fmuld   %f16,%f20,%f20
1074         faddd   %f36,%f22,%f22
1075 
1076         fmuld   %f24,%f30,%f30
1077         faddd   %f28,%f38,%f28
1078         ldd     [%l7+16],%f24
1079 
1080         fmuld   %f0,%f6,%f6
1081         faddd   %f4,%f2,%f4
1082         ldd     [%l4+8],%f32
1083 
1084         fmuld   %f8,%f14,%f14
1085         faddd   %f12,%f10,%f12
1086         ldd     [%l5+8],%f34
1087 
1088         faddd   %f18,%f22,%f22
1089         ldd     [%l6+16],%f36
1090 
1091         fmuld   %f24,%f30,%f30
1092         faddd   %f28,%f26,%f28
1093         ldd     [%l7+8],%f38
1094 
1095         fmuld   %f32,%f4,%f4
1096 
1097         fmuld   %f34,%f12,%f12
1098 
1099         fmuld   %f36,%f22,%f22
1100 
1101         fmuld   %f38,%f28,%f28
1102 
1103         fsubd   %f6,%f4,%f6
1104 
1105         fsubd   %f14,%f12,%f14
1106 
1107         faddd   %f22,%f20,%f22
1108 
1109         fsubd   %f30,%f28,%f30
1110 
1111         faddd   %f6,%f0,%f6
1112 
1113         faddd   %f14,%f8,%f14
1114 
1115         faddd   %f22,%f16,%f22
1116 
1117         faddd   %f30,%f24,%f30
1118         mov     %l0,%l4
1119 
1120         fnegd   %f6,%f4
1121         lda     [%i1]%asi,%l0           ! preload next argument
1122 
1123         fnegd   %f14,%f12
1124         lda     [%i1]%asi,%f0
1125 
1126         fnegd   %f22,%f20
1127         lda     [%i1+4]%asi,%f3
1128 
1129         fnegd   %f30,%f28
1130         andn    %l0,%i5,%l0
1131         add     %i1,%i2,%i1
1132 
1133         andcc   %l4,2,%g0
1134         fmovdnz %icc,%f4,%f6
1135         st      %f6,[%o0]
1136 
1137         andcc   %l1,2,%g0
1138         fmovdnz %icc,%f12,%f14
1139         st      %f14,[%o1]
1140 
1141         andcc   %l2,2,%g0
1142         fmovdnz %icc,%f20,%f22
1143         st      %f22,[%o2]
1144 
1145         andcc   %l3,2,%g0
1146         fmovdnz %icc,%f28,%f30
1147         st      %f30,[%o3]
1148 
1149         addcc   %i0,-1,%i0
1150         bg,pt   %icc,.loop0
1151 ! delay slot
1152         st      %f7,[%o0+4]
1153 
1154         ba,pt   %icc,.end
1155 ! delay slot
1156         nop
1157 
1158         .align  16
1159 .case3:
1160         fmuld   %f16,pp3,%f22           ! sin(x2)
1161 
1162         fmuld   %f24,pp3,%f30           ! sin(x3)
1163 
1164         fmuld   %f0,qq3,%f6             ! cos(x0)
1165 
1166         fmuld   %f8,qq3,%f14            ! cos(x1)
1167 
1168         faddd   %f22,pp2,%f22
1169         fmuld   %f16,qq2,%f20
1170 
1171         faddd   %f30,pp2,%f30
1172         fmuld   %f24,qq2,%f28
1173 
1174         faddd   %f6,qq2,%f6
1175         fmuld   %f0,pp2,%f4
1176 
1177         faddd   %f14,qq2,%f14
1178         fmuld   %f8,pp2,%f12
1179 
1180         fmuld   %f16,%f22,%f22
1181         faddd   %f20,qq1,%f20
1182 
1183         fmuld   %f24,%f30,%f30
1184         faddd   %f28,qq1,%f28
1185 
1186         fmuld   %f0,%f6,%f6
1187         faddd   %f4,pp1,%f4
1188 
1189         fmuld   %f8,%f14,%f14
1190         faddd   %f12,pp1,%f12
1191 
1192         faddd   %f22,pp1,%f22
1193         fmuld   %f16,%f20,%f20
1194         add     %l6,%g1,%l6
1195 
1196         faddd   %f30,pp1,%f30
1197         fmuld   %f24,%f28,%f28
1198         add     %l7,%g1,%l7
1199 
1200         faddd   %f6,qq1,%f6
1201         fmuld   %f0,%f4,%f4
1202         add     %l4,%g1,%l4
1203 
1204         faddd   %f14,qq1,%f14
1205         fmuld   %f8,%f12,%f12
1206         add     %l5,%g1,%l5
1207 
1208         fmuld   %f16,%f22,%f22
1209 
1210         fmuld   %f24,%f30,%f30
1211 
1212         fmuld   %f2,%f4,%f4
1213 
1214         fmuld   %f10,%f12,%f12
1215 
1216         fmuld   %f18,%f22,%f22
1217         ldd     [%l6+8],%f16
1218 
1219         fmuld   %f26,%f30,%f30
1220         ldd     [%l7+8],%f24
1221 
1222         fmuld   %f0,%f6,%f6
1223         faddd   %f4,%f32,%f4
1224         ldd     [%l4+16],%f0
1225 
1226         fmuld   %f8,%f14,%f14
1227         faddd   %f12,%f34,%f12
1228         ldd     [%l5+16],%f8
1229 
1230         fmuld   %f16,%f20,%f20
1231         faddd   %f36,%f22,%f22
1232 
1233         fmuld   %f24,%f28,%f28
1234         faddd   %f38,%f30,%f30
1235 
1236         fmuld   %f0,%f6,%f6
1237         faddd   %f4,%f2,%f4
1238         ldd     [%l4+8],%f32
1239 
1240         fmuld   %f8,%f14,%f14
1241         faddd   %f12,%f10,%f12
1242         ldd     [%l5+8],%f34
1243 
1244         faddd   %f18,%f22,%f22
1245         ldd     [%l6+16],%f36
1246 
1247         faddd   %f26,%f30,%f30
1248         ldd     [%l7+16],%f38
1249 
1250         fmuld   %f32,%f4,%f4
1251 
1252         fmuld   %f34,%f12,%f12
1253 
1254         fmuld   %f36,%f22,%f22
1255 
1256         fmuld   %f38,%f30,%f30
1257 
1258         fsubd   %f6,%f4,%f6
1259 
1260         fsubd   %f14,%f12,%f14
1261 
1262         faddd   %f22,%f20,%f22
1263 
1264         faddd   %f30,%f28,%f30
1265 
1266         faddd   %f6,%f0,%f6
1267 
1268         faddd   %f14,%f8,%f14
1269 
1270         faddd   %f22,%f16,%f22
1271 
1272         faddd   %f30,%f24,%f30
1273         mov     %l0,%l4
1274 
1275         fnegd   %f6,%f4
1276         lda     [%i1]%asi,%l0           ! preload next argument
1277 
1278         fnegd   %f14,%f12
1279         lda     [%i1]%asi,%f0
1280 
1281         fnegd   %f22,%f20
1282         lda     [%i1+4]%asi,%f3
1283 
1284         fnegd   %f30,%f28
1285         andn    %l0,%i5,%l0
1286         add     %i1,%i2,%i1
1287 
1288         andcc   %l4,2,%g0
1289         fmovdnz %icc,%f4,%f6
1290         st      %f6,[%o0]
1291 
1292         andcc   %l1,2,%g0
1293         fmovdnz %icc,%f12,%f14
1294         st      %f14,[%o1]
1295 
1296         andcc   %l2,2,%g0
1297         fmovdnz %icc,%f20,%f22
1298         st      %f22,[%o2]
1299 
1300         andcc   %l3,2,%g0
1301         fmovdnz %icc,%f28,%f30
1302         st      %f30,[%o3]
1303 
1304         addcc   %i0,-1,%i0
1305         bg,pt   %icc,.loop0
1306 ! delay slot
1307         st      %f7,[%o0+4]
1308 
1309         ba,pt   %icc,.end
1310 ! delay slot
1311         nop
1312 
1313         .align  16
1314 .case4:
1315         fmuld   %f18,%f18,%f16
1316         andcc   %l2,1,%g0
1317         bz,pn   %icc,.case6
1318 ! delay slot
1319         fxor    %f22,%f36,%f36
1320 
1321         fmuld   %f26,%f26,%f24
1322         andcc   %l3,1,%g0
1323         bz,pn   %icc,.case5
1324 ! delay slot
1325         fxor    %f30,%f38,%f38
1326 
1327         fmuld   %f8,pp3,%f14            ! sin(x1)
1328 
1329         fmuld   %f0,qq3,%f6             ! cos(x0)
1330 
1331         faddd   %f14,pp2,%f14
1332         fmuld   %f8,qq2,%f12
1333 
1334         fmuld   %f16,qq3,%f22           ! cos(x2)
1335 
1336         fmuld   %f24,qq3,%f30           ! cos(x3)
1337 
1338         faddd   %f6,qq2,%f6
1339         fmuld   %f0,pp2,%f4
1340 
1341         fmuld   %f8,%f14,%f14
1342         faddd   %f12,qq1,%f12
1343 
1344         faddd   %f22,qq2,%f22
1345         fmuld   %f16,pp2,%f20
1346 
1347         faddd   %f30,qq2,%f30
1348         fmuld   %f24,pp2,%f28
1349 
1350         fmuld   %f0,%f6,%f6
1351         faddd   %f4,pp1,%f4
1352 
1353         faddd   %f14,pp1,%f14
1354         fmuld   %f8,%f12,%f12
1355         add     %l5,%g1,%l5
1356 
1357         fmuld   %f16,%f22,%f22
1358         faddd   %f20,pp1,%f20
1359 
1360         fmuld   %f24,%f30,%f30
1361         faddd   %f28,pp1,%f28
1362 
1363         faddd   %f6,qq1,%f6
1364         fmuld   %f0,%f4,%f4
1365         add     %l4,%g1,%l4
1366 
1367         fmuld   %f8,%f14,%f14
1368 
1369         faddd   %f22,qq1,%f22
1370         fmuld   %f16,%f20,%f20
1371         add     %l6,%g1,%l6
1372 
1373         faddd   %f30,qq1,%f30
1374         fmuld   %f24,%f28,%f28
1375         add     %l7,%g1,%l7
1376 
1377         fmuld   %f2,%f4,%f4
1378 
1379         fmuld   %f10,%f14,%f14
1380         ldd     [%l5+8],%f8
1381 
1382         fmuld   %f18,%f20,%f20
1383 
1384         fmuld   %f26,%f28,%f28
1385 
1386         fmuld   %f0,%f6,%f6
1387         faddd   %f4,%f32,%f4
1388         ldd     [%l4+16],%f0
1389 
1390         fmuld   %f8,%f12,%f12
1391         faddd   %f34,%f14,%f14
1392 
1393         fmuld   %f16,%f22,%f22
1394         faddd   %f20,%f36,%f20
1395         ldd     [%l6+16],%f16
1396 
1397         fmuld   %f24,%f30,%f30
1398         faddd   %f28,%f38,%f28
1399         ldd     [%l7+16],%f24
1400 
1401         fmuld   %f0,%f6,%f6
1402         faddd   %f4,%f2,%f4
1403         ldd     [%l4+8],%f32
1404 
1405         faddd   %f10,%f14,%f14
1406         ldd     [%l5+16],%f34
1407 
1408         fmuld   %f16,%f22,%f22
1409         faddd   %f20,%f18,%f20
1410         ldd     [%l6+8],%f36
1411 
1412         fmuld   %f24,%f30,%f30
1413         faddd   %f28,%f26,%f28
1414         ldd     [%l7+8],%f38
1415 
1416         fmuld   %f32,%f4,%f4
1417 
1418         fmuld   %f34,%f14,%f14
1419 
1420         fmuld   %f36,%f20,%f20
1421 
1422         fmuld   %f38,%f28,%f28
1423 
1424         fsubd   %f6,%f4,%f6
1425 
1426         faddd   %f14,%f12,%f14
1427 
1428         fsubd   %f22,%f20,%f22
1429 
1430         fsubd   %f30,%f28,%f30
1431 
1432         faddd   %f6,%f0,%f6
1433 
1434         faddd   %f14,%f8,%f14
1435 
1436         faddd   %f22,%f16,%f22
1437 
1438         faddd   %f30,%f24,%f30
1439         mov     %l0,%l4
1440 
1441         fnegd   %f6,%f4
1442         lda     [%i1]%asi,%l0           ! preload next argument
1443 
1444         fnegd   %f14,%f12
1445         lda     [%i1]%asi,%f0
1446 
1447         fnegd   %f22,%f20
1448         lda     [%i1+4]%asi,%f3
1449 
1450         fnegd   %f30,%f28
1451         andn    %l0,%i5,%l0
1452         add     %i1,%i2,%i1
1453 
1454         andcc   %l4,2,%g0
1455         fmovdnz %icc,%f4,%f6
1456         st      %f6,[%o0]
1457 
1458         andcc   %l1,2,%g0
1459         fmovdnz %icc,%f12,%f14
1460         st      %f14,[%o1]
1461 
1462         andcc   %l2,2,%g0
1463         fmovdnz %icc,%f20,%f22
1464         st      %f22,[%o2]
1465 
1466         andcc   %l3,2,%g0
1467         fmovdnz %icc,%f28,%f30
1468         st      %f30,[%o3]
1469 
1470         addcc   %i0,-1,%i0
1471         bg,pt   %icc,.loop0
1472 ! delay slot
1473         st      %f7,[%o0+4]
1474 
1475         ba,pt   %icc,.end
1476 ! delay slot
1477         nop
1478 
1479         .align  16
1480 .case5:
1481         fmuld   %f8,pp3,%f14            ! sin(x1)
1482 
1483         fmuld   %f24,pp3,%f30           ! sin(x3)
1484 
1485         fmuld   %f0,qq3,%f6             ! cos(x0)
1486 
1487         faddd   %f14,pp2,%f14
1488         fmuld   %f8,qq2,%f12
1489 
1490         fmuld   %f16,qq3,%f22           ! cos(x2)
1491 
1492         faddd   %f30,pp2,%f30
1493         fmuld   %f24,qq2,%f28
1494 
1495         faddd   %f6,qq2,%f6
1496         fmuld   %f0,pp2,%f4
1497 
1498         fmuld   %f8,%f14,%f14
1499         faddd   %f12,qq1,%f12
1500 
1501         faddd   %f22,qq2,%f22
1502         fmuld   %f16,pp2,%f20
1503 
1504         fmuld   %f24,%f30,%f30
1505         faddd   %f28,qq1,%f28
1506 
1507         fmuld   %f0,%f6,%f6
1508         faddd   %f4,pp1,%f4
1509 
1510         faddd   %f14,pp1,%f14
1511         fmuld   %f8,%f12,%f12
1512         add     %l5,%g1,%l5
1513 
1514         fmuld   %f16,%f22,%f22
1515         faddd   %f20,pp1,%f20
1516 
1517         faddd   %f30,pp1,%f30
1518         fmuld   %f24,%f28,%f28
1519         add     %l7,%g1,%l7
1520 
1521         faddd   %f6,qq1,%f6
1522         fmuld   %f0,%f4,%f4
1523         add     %l4,%g1,%l4
1524 
1525         fmuld   %f8,%f14,%f14
1526 
1527         faddd   %f22,qq1,%f22
1528         fmuld   %f16,%f20,%f20
1529         add     %l6,%g1,%l6
1530 
1531         fmuld   %f24,%f30,%f30
1532 
1533         fmuld   %f2,%f4,%f4
1534 
1535         fmuld   %f10,%f14,%f14
1536         ldd     [%l5+8],%f8
1537 
1538         fmuld   %f18,%f20,%f20
1539 
1540         fmuld   %f26,%f30,%f30
1541         ldd     [%l7+8],%f24
1542 
1543         fmuld   %f0,%f6,%f6
1544         faddd   %f4,%f32,%f4
1545         ldd     [%l4+16],%f0
1546 
1547         fmuld   %f8,%f12,%f12
1548         faddd   %f34,%f14,%f14
1549 
1550         fmuld   %f16,%f22,%f22
1551         faddd   %f20,%f36,%f20
1552         ldd     [%l6+16],%f16
1553 
1554         fmuld   %f24,%f28,%f28
1555         faddd   %f38,%f30,%f30
1556 
1557         fmuld   %f0,%f6,%f6
1558         faddd   %f4,%f2,%f4
1559         ldd     [%l4+8],%f32
1560 
1561         faddd   %f10,%f14,%f14
1562         ldd     [%l5+16],%f34
1563 
1564         fmuld   %f16,%f22,%f22
1565         faddd   %f20,%f18,%f20
1566         ldd     [%l6+8],%f36
1567 
1568         faddd   %f26,%f30,%f30
1569         ldd     [%l7+16],%f38
1570 
1571         fmuld   %f32,%f4,%f4
1572 
1573         fmuld   %f34,%f14,%f14
1574 
1575         fmuld   %f36,%f20,%f20
1576 
1577         fmuld   %f38,%f30,%f30
1578 
1579         fsubd   %f6,%f4,%f6
1580 
1581         faddd   %f14,%f12,%f14
1582 
1583         fsubd   %f22,%f20,%f22
1584 
1585         faddd   %f30,%f28,%f30
1586 
1587         faddd   %f6,%f0,%f6
1588 
1589         faddd   %f14,%f8,%f14
1590 
1591         faddd   %f22,%f16,%f22
1592 
1593         faddd   %f30,%f24,%f30
1594         mov     %l0,%l4
1595 
1596         fnegd   %f6,%f4
1597         lda     [%i1]%asi,%l0           ! preload next argument
1598 
1599         fnegd   %f14,%f12
1600         lda     [%i1]%asi,%f0
1601 
1602         fnegd   %f22,%f20
1603         lda     [%i1+4]%asi,%f3
1604 
1605         fnegd   %f30,%f28
1606         andn    %l0,%i5,%l0
1607         add     %i1,%i2,%i1
1608 
1609         andcc   %l4,2,%g0
1610         fmovdnz %icc,%f4,%f6
1611         st      %f6,[%o0]
1612 
1613         andcc   %l1,2,%g0
1614         fmovdnz %icc,%f12,%f14
1615         st      %f14,[%o1]
1616 
1617         andcc   %l2,2,%g0
1618         fmovdnz %icc,%f20,%f22
1619         st      %f22,[%o2]
1620 
1621         andcc   %l3,2,%g0
1622         fmovdnz %icc,%f28,%f30
1623         st      %f30,[%o3]
1624 
1625         addcc   %i0,-1,%i0
1626         bg,pt   %icc,.loop0
1627 ! delay slot
1628         st      %f7,[%o0+4]
1629 
1630         ba,pt   %icc,.end
1631 ! delay slot
1632         nop
1633 
1634         .align  16
1635 .case6:
1636         fmuld   %f26,%f26,%f24
1637         andcc   %l3,1,%g0
1638         bz,pn   %icc,.case7
1639 ! delay slot
1640         fxor    %f30,%f38,%f38
1641 
1642         fmuld   %f8,pp3,%f14            ! sin(x1)
1643 
1644         fmuld   %f16,pp3,%f22           ! sin(x2)
1645 
1646         fmuld   %f0,qq3,%f6             ! cos(x0)
1647 
1648         faddd   %f14,pp2,%f14
1649         fmuld   %f8,qq2,%f12
1650 
1651         faddd   %f22,pp2,%f22
1652         fmuld   %f16,qq2,%f20
1653 
1654         fmuld   %f24,qq3,%f30           ! cos(x3)
1655 
1656         faddd   %f6,qq2,%f6
1657         fmuld   %f0,pp2,%f4
1658 
1659         fmuld   %f8,%f14,%f14
1660         faddd   %f12,qq1,%f12
1661 
1662         fmuld   %f16,%f22,%f22
1663         faddd   %f20,qq1,%f20
1664 
1665         faddd   %f30,qq2,%f30
1666         fmuld   %f24,pp2,%f28
1667 
1668         fmuld   %f0,%f6,%f6
1669         faddd   %f4,pp1,%f4
1670 
1671         faddd   %f14,pp1,%f14
1672         fmuld   %f8,%f12,%f12
1673         add     %l5,%g1,%l5
1674 
1675         faddd   %f22,pp1,%f22
1676         fmuld   %f16,%f20,%f20
1677         add     %l6,%g1,%l6
1678 
1679         fmuld   %f24,%f30,%f30
1680         faddd   %f28,pp1,%f28
1681 
1682         faddd   %f6,qq1,%f6
1683         fmuld   %f0,%f4,%f4
1684         add     %l4,%g1,%l4
1685 
1686         fmuld   %f8,%f14,%f14
1687 
1688         fmuld   %f16,%f22,%f22
1689 
1690         faddd   %f30,qq1,%f30
1691         fmuld   %f24,%f28,%f28
1692         add     %l7,%g1,%l7
1693 
1694         fmuld   %f2,%f4,%f4
1695 
1696         fmuld   %f10,%f14,%f14
1697         ldd     [%l5+8],%f8
1698 
1699         fmuld   %f18,%f22,%f22
1700         ldd     [%l6+8],%f16
1701 
1702         fmuld   %f26,%f28,%f28
1703 
1704         fmuld   %f0,%f6,%f6
1705         faddd   %f4,%f32,%f4
1706         ldd     [%l4+16],%f0
1707 
1708         fmuld   %f8,%f12,%f12
1709         faddd   %f34,%f14,%f14
1710 
1711         fmuld   %f16,%f20,%f20
1712         faddd   %f36,%f22,%f22
1713 
1714         fmuld   %f24,%f30,%f30
1715         faddd   %f28,%f38,%f28
1716         ldd     [%l7+16],%f24
1717 
1718         fmuld   %f0,%f6,%f6
1719         faddd   %f4,%f2,%f4
1720         ldd     [%l4+8],%f32
1721 
1722         faddd   %f10,%f14,%f14
1723         ldd     [%l5+16],%f34
1724 
1725         faddd   %f18,%f22,%f22
1726         ldd     [%l6+16],%f36
1727 
1728         fmuld   %f24,%f30,%f30
1729         faddd   %f28,%f26,%f28
1730         ldd     [%l7+8],%f38
1731 
1732         fmuld   %f32,%f4,%f4
1733 
1734         fmuld   %f34,%f14,%f14
1735 
1736         fmuld   %f36,%f22,%f22
1737 
1738         fmuld   %f38,%f28,%f28
1739 
1740         fsubd   %f6,%f4,%f6
1741 
1742         faddd   %f14,%f12,%f14
1743 
1744         faddd   %f22,%f20,%f22
1745 
1746         fsubd   %f30,%f28,%f30
1747 
1748         faddd   %f6,%f0,%f6
1749 
1750         faddd   %f14,%f8,%f14
1751 
1752         faddd   %f22,%f16,%f22
1753 
1754         faddd   %f30,%f24,%f30
1755         mov     %l0,%l4
1756 
1757         fnegd   %f6,%f4
1758         lda     [%i1]%asi,%l0           ! preload next argument
1759 
1760         fnegd   %f14,%f12
1761         lda     [%i1]%asi,%f0
1762 
1763         fnegd   %f22,%f20
1764         lda     [%i1+4]%asi,%f3
1765 
1766         fnegd   %f30,%f28
1767         andn    %l0,%i5,%l0
1768         add     %i1,%i2,%i1
1769 
1770         andcc   %l4,2,%g0
1771         fmovdnz %icc,%f4,%f6
1772         st      %f6,[%o0]
1773 
1774         andcc   %l1,2,%g0
1775         fmovdnz %icc,%f12,%f14
1776         st      %f14,[%o1]
1777 
1778         andcc   %l2,2,%g0
1779         fmovdnz %icc,%f20,%f22
1780         st      %f22,[%o2]
1781 
1782         andcc   %l3,2,%g0
1783         fmovdnz %icc,%f28,%f30
1784         st      %f30,[%o3]
1785 
1786         addcc   %i0,-1,%i0
1787         bg,pt   %icc,.loop0
1788 ! delay slot
1789         st      %f7,[%o0+4]
1790 
1791         ba,pt   %icc,.end
1792 ! delay slot
1793         nop
1794 
1795         .align  16
1796 .case7:
1797         fmuld   %f8,pp3,%f14            ! sin(x1)
1798 
1799         fmuld   %f16,pp3,%f22           ! sin(x2)
1800 
1801         fmuld   %f24,pp3,%f30           ! sin(x3)
1802 
1803         fmuld   %f0,qq3,%f6             ! cos(x0)
1804 
1805         faddd   %f14,pp2,%f14
1806         fmuld   %f8,qq2,%f12
1807 
1808         faddd   %f22,pp2,%f22
1809         fmuld   %f16,qq2,%f20
1810 
1811         faddd   %f30,pp2,%f30
1812         fmuld   %f24,qq2,%f28
1813 
1814         faddd   %f6,qq2,%f6
1815         fmuld   %f0,pp2,%f4
1816 
1817         fmuld   %f8,%f14,%f14
1818         faddd   %f12,qq1,%f12
1819 
1820         fmuld   %f16,%f22,%f22
1821         faddd   %f20,qq1,%f20
1822 
1823         fmuld   %f24,%f30,%f30
1824         faddd   %f28,qq1,%f28
1825 
1826         fmuld   %f0,%f6,%f6
1827         faddd   %f4,pp1,%f4
1828 
1829         faddd   %f14,pp1,%f14
1830         fmuld   %f8,%f12,%f12
1831         add     %l5,%g1,%l5
1832 
1833         faddd   %f22,pp1,%f22
1834         fmuld   %f16,%f20,%f20
1835         add     %l6,%g1,%l6
1836 
1837         faddd   %f30,pp1,%f30
1838         fmuld   %f24,%f28,%f28
1839         add     %l7,%g1,%l7
1840 
1841         faddd   %f6,qq1,%f6
1842         fmuld   %f0,%f4,%f4
1843         add     %l4,%g1,%l4
1844 
1845         fmuld   %f8,%f14,%f14
1846 
1847         fmuld   %f16,%f22,%f22
1848 
1849         fmuld   %f24,%f30,%f30
1850 
1851         fmuld   %f2,%f4,%f4
1852 
1853         fmuld   %f10,%f14,%f14
1854         ldd     [%l5+8],%f8
1855 
1856         fmuld   %f18,%f22,%f22
1857         ldd     [%l6+8],%f16
1858 
1859         fmuld   %f26,%f30,%f30
1860         ldd     [%l7+8],%f24
1861 
1862         fmuld   %f0,%f6,%f6
1863         faddd   %f4,%f32,%f4
1864         ldd     [%l4+16],%f0
1865 
1866         fmuld   %f8,%f12,%f12
1867         faddd   %f34,%f14,%f14
1868 
1869         fmuld   %f16,%f20,%f20
1870         faddd   %f36,%f22,%f22
1871 
1872         fmuld   %f24,%f28,%f28
1873         faddd   %f38,%f30,%f30
1874 
1875         fmuld   %f0,%f6,%f6
1876         faddd   %f4,%f2,%f4
1877         ldd     [%l4+8],%f32
1878 
1879         faddd   %f10,%f14,%f14
1880         ldd     [%l5+16],%f34
1881 
1882         faddd   %f18,%f22,%f22
1883         ldd     [%l6+16],%f36
1884 
1885         faddd   %f26,%f30,%f30
1886         ldd     [%l7+16],%f38
1887 
1888         fmuld   %f32,%f4,%f4
1889 
1890         fmuld   %f34,%f14,%f14
1891 
1892         fmuld   %f36,%f22,%f22
1893 
1894         fmuld   %f38,%f30,%f30
1895 
1896         fsubd   %f6,%f4,%f6
1897 
1898         faddd   %f14,%f12,%f14
1899 
1900         faddd   %f22,%f20,%f22
1901 
1902         faddd   %f30,%f28,%f30
1903 
1904         faddd   %f6,%f0,%f6
1905 
1906         faddd   %f14,%f8,%f14
1907 
1908         faddd   %f22,%f16,%f22
1909 
1910         faddd   %f30,%f24,%f30
1911         mov     %l0,%l4
1912 
1913         fnegd   %f6,%f4
1914         lda     [%i1]%asi,%l0           ! preload next argument
1915 
1916         fnegd   %f14,%f12
1917         lda     [%i1]%asi,%f0
1918 
1919         fnegd   %f22,%f20
1920         lda     [%i1+4]%asi,%f3
1921 
1922         fnegd   %f30,%f28
1923         andn    %l0,%i5,%l0
1924         add     %i1,%i2,%i1
1925 
1926         andcc   %l4,2,%g0
1927         fmovdnz %icc,%f4,%f6
1928         st      %f6,[%o0]
1929 
1930         andcc   %l1,2,%g0
1931         fmovdnz %icc,%f12,%f14
1932         st      %f14,[%o1]
1933 
1934         andcc   %l2,2,%g0
1935         fmovdnz %icc,%f20,%f22
1936         st      %f22,[%o2]
1937 
1938         andcc   %l3,2,%g0
1939         fmovdnz %icc,%f28,%f30
1940         st      %f30,[%o3]
1941 
1942         addcc   %i0,-1,%i0
1943         bg,pt   %icc,.loop0
1944 ! delay slot
1945         st      %f7,[%o0+4]
1946 
1947         ba,pt   %icc,.end
1948 ! delay slot
1949         nop
1950 
1951         .align  16
1952 .case8:
1953         fmuld   %f10,%f10,%f8
1954         andcc   %l1,1,%g0
1955         bz,pn   %icc,.case12
1956 ! delay slot
1957         fxor    %f14,%f34,%f34
1958 
1959         fmuld   %f18,%f18,%f16
1960         andcc   %l2,1,%g0
1961         bz,pn   %icc,.case10
1962 ! delay slot
1963         fxor    %f22,%f36,%f36
1964 
1965         fmuld   %f26,%f26,%f24
1966         andcc   %l3,1,%g0
1967         bz,pn   %icc,.case9
1968 ! delay slot
1969         fxor    %f30,%f38,%f38
1970 
1971         fmuld   %f0,pp3,%f6             ! sin(x0)
1972 
1973         faddd   %f6,pp2,%f6
1974         fmuld   %f0,qq2,%f4
1975 
1976         fmuld   %f8,qq3,%f14            ! cos(x1)
1977 
1978         fmuld   %f16,qq3,%f22           ! cos(x2)
1979 
1980         fmuld   %f24,qq3,%f30           ! cos(x3)
1981 
1982         fmuld   %f0,%f6,%f6
1983         faddd   %f4,qq1,%f4
1984 
1985         faddd   %f14,qq2,%f14
1986         fmuld   %f8,pp2,%f12
1987 
1988         faddd   %f22,qq2,%f22
1989         fmuld   %f16,pp2,%f20
1990 
1991         faddd   %f30,qq2,%f30
1992         fmuld   %f24,pp2,%f28
1993 
1994         faddd   %f6,pp1,%f6
1995         fmuld   %f0,%f4,%f4
1996         add     %l4,%g1,%l4
1997 
1998         fmuld   %f8,%f14,%f14
1999         faddd   %f12,pp1,%f12
2000 
2001         fmuld   %f16,%f22,%f22
2002         faddd   %f20,pp1,%f20
2003 
2004         fmuld   %f24,%f30,%f30
2005         faddd   %f28,pp1,%f28
2006 
2007         fmuld   %f0,%f6,%f6
2008 
2009         faddd   %f14,qq1,%f14
2010         fmuld   %f8,%f12,%f12
2011         add     %l5,%g1,%l5
2012 
2013         faddd   %f22,qq1,%f22
2014         fmuld   %f16,%f20,%f20
2015         add     %l6,%g1,%l6
2016 
2017         faddd   %f30,qq1,%f30
2018         fmuld   %f24,%f28,%f28
2019         add     %l7,%g1,%l7
2020 
2021         fmuld   %f2,%f6,%f6
2022         ldd     [%l4+8],%f0
2023 
2024         fmuld   %f10,%f12,%f12
2025 
2026         fmuld   %f18,%f20,%f20
2027 
2028         fmuld   %f26,%f28,%f28
2029 
2030         fmuld   %f0,%f4,%f4
2031         faddd   %f32,%f6,%f6
2032 
2033         fmuld   %f8,%f14,%f14
2034         faddd   %f12,%f34,%f12
2035         ldd     [%l5+16],%f8
2036 
2037         fmuld   %f16,%f22,%f22
2038         faddd   %f20,%f36,%f20
2039         ldd     [%l6+16],%f16
2040 
2041         fmuld   %f24,%f30,%f30
2042         faddd   %f28,%f38,%f28
2043         ldd     [%l7+16],%f24
2044 
2045         faddd   %f2,%f6,%f6
2046         ldd     [%l4+16],%f32
2047 
2048         fmuld   %f8,%f14,%f14
2049         faddd   %f12,%f10,%f12
2050         ldd     [%l5+8],%f34
2051 
2052         fmuld   %f16,%f22,%f22
2053         faddd   %f20,%f18,%f20
2054         ldd     [%l6+8],%f36
2055 
2056         fmuld   %f24,%f30,%f30
2057         faddd   %f28,%f26,%f28
2058         ldd     [%l7+8],%f38
2059 
2060         fmuld   %f32,%f6,%f6
2061 
2062         fmuld   %f34,%f12,%f12
2063 
2064         fmuld   %f36,%f20,%f20
2065 
2066         fmuld   %f38,%f28,%f28
2067 
2068         faddd   %f6,%f4,%f6
2069 
2070         fsubd   %f14,%f12,%f14
2071 
2072         fsubd   %f22,%f20,%f22
2073 
2074         fsubd   %f30,%f28,%f30
2075 
2076         faddd   %f6,%f0,%f6
2077 
2078         faddd   %f14,%f8,%f14
2079 
2080         faddd   %f22,%f16,%f22
2081 
2082         faddd   %f30,%f24,%f30
2083         mov     %l0,%l4
2084 
2085         fnegd   %f6,%f4
2086         lda     [%i1]%asi,%l0           ! preload next argument
2087 
2088         fnegd   %f14,%f12
2089         lda     [%i1]%asi,%f0
2090 
2091         fnegd   %f22,%f20
2092         lda     [%i1+4]%asi,%f3
2093 
2094         fnegd   %f30,%f28
2095         andn    %l0,%i5,%l0
2096         add     %i1,%i2,%i1
2097 
2098         andcc   %l4,2,%g0
2099         fmovdnz %icc,%f4,%f6
2100         st      %f6,[%o0]
2101 
2102         andcc   %l1,2,%g0
2103         fmovdnz %icc,%f12,%f14
2104         st      %f14,[%o1]
2105 
2106         andcc   %l2,2,%g0
2107         fmovdnz %icc,%f20,%f22
2108         st      %f22,[%o2]
2109 
2110         andcc   %l3,2,%g0
2111         fmovdnz %icc,%f28,%f30
2112         st      %f30,[%o3]
2113 
2114         addcc   %i0,-1,%i0
2115         bg,pt   %icc,.loop0
2116 ! delay slot
2117         st      %f7,[%o0+4]
2118 
2119         ba,pt   %icc,.end
2120 ! delay slot
2121         nop
2122 
2123         .align  16
2124 .case9:
2125         fmuld   %f0,pp3,%f6             ! sin(x0)
2126 
2127         fmuld   %f24,pp3,%f30           ! sin(x3)
2128 
2129         faddd   %f6,pp2,%f6
2130         fmuld   %f0,qq2,%f4
2131 
2132         fmuld   %f8,qq3,%f14            ! cos(x1)
2133 
2134         fmuld   %f16,qq3,%f22           ! cos(x2)
2135 
2136         faddd   %f30,pp2,%f30
2137         fmuld   %f24,qq2,%f28
2138 
2139         fmuld   %f0,%f6,%f6
2140         faddd   %f4,qq1,%f4
2141 
2142         faddd   %f14,qq2,%f14
2143         fmuld   %f8,pp2,%f12
2144 
2145         faddd   %f22,qq2,%f22
2146         fmuld   %f16,pp2,%f20
2147 
2148         fmuld   %f24,%f30,%f30
2149         faddd   %f28,qq1,%f28
2150 
2151         faddd   %f6,pp1,%f6
2152         fmuld   %f0,%f4,%f4
2153         add     %l4,%g1,%l4
2154 
2155         fmuld   %f8,%f14,%f14
2156         faddd   %f12,pp1,%f12
2157 
2158         fmuld   %f16,%f22,%f22
2159         faddd   %f20,pp1,%f20
2160 
2161         faddd   %f30,pp1,%f30
2162         fmuld   %f24,%f28,%f28
2163         add     %l7,%g1,%l7
2164 
2165         fmuld   %f0,%f6,%f6
2166 
2167         faddd   %f14,qq1,%f14
2168         fmuld   %f8,%f12,%f12
2169         add     %l5,%g1,%l5
2170 
2171         faddd   %f22,qq1,%f22
2172         fmuld   %f16,%f20,%f20
2173         add     %l6,%g1,%l6
2174 
2175         fmuld   %f24,%f30,%f30
2176 
2177         fmuld   %f2,%f6,%f6
2178         ldd     [%l4+8],%f0
2179 
2180         fmuld   %f10,%f12,%f12
2181 
2182         fmuld   %f18,%f20,%f20
2183 
2184         fmuld   %f26,%f30,%f30
2185         ldd     [%l7+8],%f24
2186 
2187         fmuld   %f0,%f4,%f4
2188         faddd   %f32,%f6,%f6
2189 
2190         fmuld   %f8,%f14,%f14
2191         faddd   %f12,%f34,%f12
2192         ldd     [%l5+16],%f8
2193 
2194         fmuld   %f16,%f22,%f22
2195         faddd   %f20,%f36,%f20
2196         ldd     [%l6+16],%f16
2197 
2198         fmuld   %f24,%f28,%f28
2199         faddd   %f38,%f30,%f30
2200 
2201         faddd   %f2,%f6,%f6
2202         ldd     [%l4+16],%f32
2203 
2204         fmuld   %f8,%f14,%f14
2205         faddd   %f12,%f10,%f12
2206         ldd     [%l5+8],%f34
2207 
2208         fmuld   %f16,%f22,%f22
2209         faddd   %f20,%f18,%f20
2210         ldd     [%l6+8],%f36
2211 
2212         faddd   %f26,%f30,%f30
2213         ldd     [%l7+16],%f38
2214 
2215         fmuld   %f32,%f6,%f6
2216 
2217         fmuld   %f34,%f12,%f12
2218 
2219         fmuld   %f36,%f20,%f20
2220 
2221         fmuld   %f38,%f30,%f30
2222 
2223         faddd   %f6,%f4,%f6
2224 
2225         fsubd   %f14,%f12,%f14
2226 
2227         fsubd   %f22,%f20,%f22
2228 
2229         faddd   %f30,%f28,%f30
2230 
2231         faddd   %f6,%f0,%f6
2232 
2233         faddd   %f14,%f8,%f14
2234 
2235         faddd   %f22,%f16,%f22
2236 
2237         faddd   %f30,%f24,%f30
2238         mov     %l0,%l4
2239 
2240         fnegd   %f6,%f4
2241         lda     [%i1]%asi,%l0           ! preload next argument
2242 
2243         fnegd   %f14,%f12
2244         lda     [%i1]%asi,%f0
2245 
2246         fnegd   %f22,%f20
2247         lda     [%i1+4]%asi,%f3
2248 
2249         fnegd   %f30,%f28
2250         andn    %l0,%i5,%l0
2251         add     %i1,%i2,%i1
2252 
2253         andcc   %l4,2,%g0
2254         fmovdnz %icc,%f4,%f6
2255         st      %f6,[%o0]
2256 
2257         andcc   %l1,2,%g0
2258         fmovdnz %icc,%f12,%f14
2259         st      %f14,[%o1]
2260 
2261         andcc   %l2,2,%g0
2262         fmovdnz %icc,%f20,%f22
2263         st      %f22,[%o2]
2264 
2265         andcc   %l3,2,%g0
2266         fmovdnz %icc,%f28,%f30
2267         st      %f30,[%o3]
2268 
2269         addcc   %i0,-1,%i0
2270         bg,pt   %icc,.loop0
2271 ! delay slot
2272         st      %f7,[%o0+4]
2273 
2274         ba,pt   %icc,.end
2275 ! delay slot
2276         nop
2277 
2278         .align  16
2279 .case10:
2280         fmuld   %f26,%f26,%f24
2281         andcc   %l3,1,%g0
2282         bz,pn   %icc,.case11
2283 ! delay slot
2284         fxor    %f30,%f38,%f38
2285 
2286         fmuld   %f0,pp3,%f6             ! sin(x0)
2287 
2288         fmuld   %f16,pp3,%f22           ! sin(x2)
2289 
2290         faddd   %f6,pp2,%f6
2291         fmuld   %f0,qq2,%f4
2292 
2293         fmuld   %f8,qq3,%f14            ! cos(x1)
2294 
2295         faddd   %f22,pp2,%f22
2296         fmuld   %f16,qq2,%f20
2297 
2298         fmuld   %f24,qq3,%f30           ! cos(x3)
2299 
2300         fmuld   %f0,%f6,%f6
2301         faddd   %f4,qq1,%f4
2302 
2303         faddd   %f14,qq2,%f14
2304         fmuld   %f8,pp2,%f12
2305 
2306         fmuld   %f16,%f22,%f22
2307         faddd   %f20,qq1,%f20
2308 
2309         faddd   %f30,qq2,%f30
2310         fmuld   %f24,pp2,%f28
2311 
2312         faddd   %f6,pp1,%f6
2313         fmuld   %f0,%f4,%f4
2314         add     %l4,%g1,%l4
2315 
2316         fmuld   %f8,%f14,%f14
2317         faddd   %f12,pp1,%f12
2318 
2319         faddd   %f22,pp1,%f22
2320         fmuld   %f16,%f20,%f20
2321         add     %l6,%g1,%l6
2322 
2323         fmuld   %f24,%f30,%f30
2324         faddd   %f28,pp1,%f28
2325 
2326         fmuld   %f0,%f6,%f6
2327 
2328         faddd   %f14,qq1,%f14
2329         fmuld   %f8,%f12,%f12
2330         add     %l5,%g1,%l5
2331 
2332         fmuld   %f16,%f22,%f22
2333 
2334         faddd   %f30,qq1,%f30
2335         fmuld   %f24,%f28,%f28
2336         add     %l7,%g1,%l7
2337 
2338         fmuld   %f2,%f6,%f6
2339         ldd     [%l4+8],%f0
2340 
2341         fmuld   %f10,%f12,%f12
2342 
2343         fmuld   %f18,%f22,%f22
2344         ldd     [%l6+8],%f16
2345 
2346         fmuld   %f26,%f28,%f28
2347 
2348         fmuld   %f0,%f4,%f4
2349         faddd   %f32,%f6,%f6
2350 
2351         fmuld   %f8,%f14,%f14
2352         faddd   %f12,%f34,%f12
2353         ldd     [%l5+16],%f8
2354 
2355         fmuld   %f16,%f20,%f20
2356         faddd   %f36,%f22,%f22
2357 
2358         fmuld   %f24,%f30,%f30
2359         faddd   %f28,%f38,%f28
2360         ldd     [%l7+16],%f24
2361 
2362         faddd   %f2,%f6,%f6
2363         ldd     [%l4+16],%f32
2364 
2365         fmuld   %f8,%f14,%f14
2366         faddd   %f12,%f10,%f12
2367         ldd     [%l5+8],%f34
2368 
2369         faddd   %f18,%f22,%f22
2370         ldd     [%l6+16],%f36
2371 
2372         fmuld   %f24,%f30,%f30
2373         faddd   %f28,%f26,%f28
2374         ldd     [%l7+8],%f38
2375 
2376         fmuld   %f32,%f6,%f6
2377 
2378         fmuld   %f34,%f12,%f12
2379 
2380         fmuld   %f36,%f22,%f22
2381 
2382         fmuld   %f38,%f28,%f28
2383 
2384         faddd   %f6,%f4,%f6
2385 
2386         fsubd   %f14,%f12,%f14
2387 
2388         faddd   %f22,%f20,%f22
2389 
2390         fsubd   %f30,%f28,%f30
2391 
2392         faddd   %f6,%f0,%f6
2393 
2394         faddd   %f14,%f8,%f14
2395 
2396         faddd   %f22,%f16,%f22
2397 
2398         faddd   %f30,%f24,%f30
2399         mov     %l0,%l4
2400 
2401         fnegd   %f6,%f4
2402         lda     [%i1]%asi,%l0           ! preload next argument
2403 
2404         fnegd   %f14,%f12
2405         lda     [%i1]%asi,%f0
2406 
2407         fnegd   %f22,%f20
2408         lda     [%i1+4]%asi,%f3
2409 
2410         fnegd   %f30,%f28
2411         andn    %l0,%i5,%l0
2412         add     %i1,%i2,%i1
2413 
2414         andcc   %l4,2,%g0
2415         fmovdnz %icc,%f4,%f6
2416         st      %f6,[%o0]
2417 
2418         andcc   %l1,2,%g0
2419         fmovdnz %icc,%f12,%f14
2420         st      %f14,[%o1]
2421 
2422         andcc   %l2,2,%g0
2423         fmovdnz %icc,%f20,%f22
2424         st      %f22,[%o2]
2425 
2426         andcc   %l3,2,%g0
2427         fmovdnz %icc,%f28,%f30
2428         st      %f30,[%o3]
2429 
2430         addcc   %i0,-1,%i0
2431         bg,pt   %icc,.loop0
2432 ! delay slot
2433         st      %f7,[%o0+4]
2434 
2435         ba,pt   %icc,.end
2436 ! delay slot
2437         nop
2438 
2439         .align  16
2440 .case11:
2441         fmuld   %f0,pp3,%f6             ! sin(x0)
2442 
2443         fmuld   %f16,pp3,%f22           ! sin(x2)
2444 
2445         fmuld   %f24,pp3,%f30           ! sin(x3)
2446 
2447         faddd   %f6,pp2,%f6
2448         fmuld   %f0,qq2,%f4
2449 
2450         fmuld   %f8,qq3,%f14            ! cos(x1)
2451 
2452         faddd   %f22,pp2,%f22
2453         fmuld   %f16,qq2,%f20
2454 
2455         faddd   %f30,pp2,%f30
2456         fmuld   %f24,qq2,%f28
2457 
2458         fmuld   %f0,%f6,%f6
2459         faddd   %f4,qq1,%f4
2460 
2461         faddd   %f14,qq2,%f14
2462         fmuld   %f8,pp2,%f12
2463 
2464         fmuld   %f16,%f22,%f22
2465         faddd   %f20,qq1,%f20
2466 
2467         fmuld   %f24,%f30,%f30
2468         faddd   %f28,qq1,%f28
2469 
2470         faddd   %f6,pp1,%f6
2471         fmuld   %f0,%f4,%f4
2472         add     %l4,%g1,%l4
2473 
2474         fmuld   %f8,%f14,%f14
2475         faddd   %f12,pp1,%f12
2476 
2477         faddd   %f22,pp1,%f22
2478         fmuld   %f16,%f20,%f20
2479         add     %l6,%g1,%l6
2480 
2481         faddd   %f30,pp1,%f30
2482         fmuld   %f24,%f28,%f28
2483         add     %l7,%g1,%l7
2484 
2485         fmuld   %f0,%f6,%f6
2486 
2487         faddd   %f14,qq1,%f14
2488         fmuld   %f8,%f12,%f12
2489         add     %l5,%g1,%l5
2490 
2491         fmuld   %f16,%f22,%f22
2492 
2493         fmuld   %f24,%f30,%f30
2494 
2495         fmuld   %f2,%f6,%f6
2496         ldd     [%l4+8],%f0
2497 
2498         fmuld   %f10,%f12,%f12
2499 
2500         fmuld   %f18,%f22,%f22
2501         ldd     [%l6+8],%f16
2502 
2503         fmuld   %f26,%f30,%f30
2504         ldd     [%l7+8],%f24
2505 
2506         fmuld   %f0,%f4,%f4
2507         faddd   %f32,%f6,%f6
2508 
2509         fmuld   %f8,%f14,%f14
2510         faddd   %f12,%f34,%f12
2511         ldd     [%l5+16],%f8
2512 
2513         fmuld   %f16,%f20,%f20
2514         faddd   %f36,%f22,%f22
2515 
2516         fmuld   %f24,%f28,%f28
2517         faddd   %f38,%f30,%f30
2518 
2519         faddd   %f2,%f6,%f6
2520         ldd     [%l4+16],%f32
2521 
2522         fmuld   %f8,%f14,%f14
2523         faddd   %f12,%f10,%f12
2524         ldd     [%l5+8],%f34
2525 
2526         faddd   %f18,%f22,%f22
2527         ldd     [%l6+16],%f36
2528 
2529         faddd   %f26,%f30,%f30
2530         ldd     [%l7+16],%f38
2531 
2532         fmuld   %f32,%f6,%f6
2533 
2534         fmuld   %f34,%f12,%f12
2535 
2536         fmuld   %f36,%f22,%f22
2537 
2538         fmuld   %f38,%f30,%f30
2539 
2540         faddd   %f6,%f4,%f6
2541 
2542         fsubd   %f14,%f12,%f14
2543 
2544         faddd   %f22,%f20,%f22
2545 
2546         faddd   %f30,%f28,%f30
2547 
2548         faddd   %f6,%f0,%f6
2549 
2550         faddd   %f14,%f8,%f14
2551 
2552         faddd   %f22,%f16,%f22
2553 
2554         faddd   %f30,%f24,%f30
2555         mov     %l0,%l4
2556 
2557         fnegd   %f6,%f4
2558         lda     [%i1]%asi,%l0           ! preload next argument
2559 
2560         fnegd   %f14,%f12
2561         lda     [%i1]%asi,%f0
2562 
2563         fnegd   %f22,%f20
2564         lda     [%i1+4]%asi,%f3
2565 
2566         fnegd   %f30,%f28
2567         andn    %l0,%i5,%l0
2568         add     %i1,%i2,%i1
2569 
2570         andcc   %l4,2,%g0
2571         fmovdnz %icc,%f4,%f6
2572         st      %f6,[%o0]
2573 
2574         andcc   %l1,2,%g0
2575         fmovdnz %icc,%f12,%f14
2576         st      %f14,[%o1]
2577 
2578         andcc   %l2,2,%g0
2579         fmovdnz %icc,%f20,%f22
2580         st      %f22,[%o2]
2581 
2582         andcc   %l3,2,%g0
2583         fmovdnz %icc,%f28,%f30
2584         st      %f30,[%o3]
2585 
2586         addcc   %i0,-1,%i0
2587         bg,pt   %icc,.loop0
2588 ! delay slot
2589         st      %f7,[%o0+4]
2590 
2591         ba,pt   %icc,.end
2592 ! delay slot
2593         nop
2594 
2595         .align  16
2596 .case12:
2597         fmuld   %f18,%f18,%f16
2598         andcc   %l2,1,%g0
2599         bz,pn   %icc,.case14
2600 ! delay slot
2601         fxor    %f22,%f36,%f36
2602 
2603         fmuld   %f26,%f26,%f24
2604         andcc   %l3,1,%g0
2605         bz,pn   %icc,.case13
2606 ! delay slot
2607         fxor    %f30,%f38,%f38
2608 
2609         fmuld   %f0,pp3,%f6             ! sin(x0)
2610 
2611         fmuld   %f8,pp3,%f14            ! sin(x1)
2612 
2613         faddd   %f6,pp2,%f6
2614         fmuld   %f0,qq2,%f4
2615 
2616         faddd   %f14,pp2,%f14
2617         fmuld   %f8,qq2,%f12
2618 
2619         fmuld   %f16,qq3,%f22           ! cos(x2)
2620 
2621         fmuld   %f24,qq3,%f30           ! cos(x3)
2622 
2623         fmuld   %f0,%f6,%f6
2624         faddd   %f4,qq1,%f4
2625 
2626         fmuld   %f8,%f14,%f14
2627         faddd   %f12,qq1,%f12
2628 
2629         faddd   %f22,qq2,%f22
2630         fmuld   %f16,pp2,%f20
2631 
2632         faddd   %f30,qq2,%f30
2633         fmuld   %f24,pp2,%f28
2634 
2635         faddd   %f6,pp1,%f6
2636         fmuld   %f0,%f4,%f4
2637         add     %l4,%g1,%l4
2638 
2639         faddd   %f14,pp1,%f14
2640         fmuld   %f8,%f12,%f12
2641         add     %l5,%g1,%l5
2642 
2643         fmuld   %f16,%f22,%f22
2644         faddd   %f20,pp1,%f20
2645 
2646         fmuld   %f24,%f30,%f30
2647         faddd   %f28,pp1,%f28
2648 
2649         fmuld   %f0,%f6,%f6
2650 
2651         fmuld   %f8,%f14,%f14
2652 
2653         faddd   %f22,qq1,%f22
2654         fmuld   %f16,%f20,%f20
2655         add     %l6,%g1,%l6
2656 
2657         faddd   %f30,qq1,%f30
2658         fmuld   %f24,%f28,%f28
2659         add     %l7,%g1,%l7
2660 
2661         fmuld   %f2,%f6,%f6
2662         ldd     [%l4+8],%f0
2663 
2664         fmuld   %f10,%f14,%f14
2665         ldd     [%l5+8],%f8
2666 
2667         fmuld   %f18,%f20,%f20
2668 
2669         fmuld   %f26,%f28,%f28
2670 
2671         fmuld   %f0,%f4,%f4
2672         faddd   %f32,%f6,%f6
2673 
2674         fmuld   %f8,%f12,%f12
2675         faddd   %f34,%f14,%f14
2676 
2677         fmuld   %f16,%f22,%f22
2678         faddd   %f20,%f36,%f20
2679         ldd     [%l6+16],%f16
2680 
2681         fmuld   %f24,%f30,%f30
2682         faddd   %f28,%f38,%f28
2683         ldd     [%l7+16],%f24
2684 
2685         faddd   %f2,%f6,%f6
2686         ldd     [%l4+16],%f32
2687 
2688         faddd   %f10,%f14,%f14
2689         ldd     [%l5+16],%f34
2690 
2691         fmuld   %f16,%f22,%f22
2692         faddd   %f20,%f18,%f20
2693         ldd     [%l6+8],%f36
2694 
2695         fmuld   %f24,%f30,%f30
2696         faddd   %f28,%f26,%f28
2697         ldd     [%l7+8],%f38
2698 
2699         fmuld   %f32,%f6,%f6
2700 
2701         fmuld   %f34,%f14,%f14
2702 
2703         fmuld   %f36,%f20,%f20
2704 
2705         fmuld   %f38,%f28,%f28
2706 
2707         faddd   %f6,%f4,%f6
2708 
2709         faddd   %f14,%f12,%f14
2710 
2711         fsubd   %f22,%f20,%f22
2712 
2713         fsubd   %f30,%f28,%f30
2714 
2715         faddd   %f6,%f0,%f6
2716 
2717         faddd   %f14,%f8,%f14
2718 
2719         faddd   %f22,%f16,%f22
2720 
2721         faddd   %f30,%f24,%f30
2722         mov     %l0,%l4
2723 
2724         fnegd   %f6,%f4
2725         lda     [%i1]%asi,%l0           ! preload next argument
2726 
2727         fnegd   %f14,%f12
2728         lda     [%i1]%asi,%f0
2729 
2730         fnegd   %f22,%f20
2731         lda     [%i1+4]%asi,%f3
2732 
2733         fnegd   %f30,%f28
2734         andn    %l0,%i5,%l0
2735         add     %i1,%i2,%i1
2736 
2737         andcc   %l4,2,%g0
2738         fmovdnz %icc,%f4,%f6
2739         st      %f6,[%o0]
2740 
2741         andcc   %l1,2,%g0
2742         fmovdnz %icc,%f12,%f14
2743         st      %f14,[%o1]
2744 
2745         andcc   %l2,2,%g0
2746         fmovdnz %icc,%f20,%f22
2747         st      %f22,[%o2]
2748 
2749         andcc   %l3,2,%g0
2750         fmovdnz %icc,%f28,%f30
2751         st      %f30,[%o3]
2752 
2753         addcc   %i0,-1,%i0
2754         bg,pt   %icc,.loop0
2755 ! delay slot
2756         st      %f7,[%o0+4]
2757 
2758         ba,pt   %icc,.end
2759 ! delay slot
2760         nop
2761 
2762         .align  16
2763 .case13:
2764         fmuld   %f0,pp3,%f6             ! sin(x0)
2765 
2766         fmuld   %f8,pp3,%f14            ! sin(x1)
2767 
2768         fmuld   %f24,pp3,%f30           ! sin(x3)
2769 
2770         faddd   %f6,pp2,%f6
2771         fmuld   %f0,qq2,%f4
2772 
2773         faddd   %f14,pp2,%f14
2774         fmuld   %f8,qq2,%f12
2775 
2776         fmuld   %f16,qq3,%f22           ! cos(x2)
2777 
2778         faddd   %f30,pp2,%f30
2779         fmuld   %f24,qq2,%f28
2780 
2781         fmuld   %f0,%f6,%f6
2782         faddd   %f4,qq1,%f4
2783 
2784         fmuld   %f8,%f14,%f14
2785         faddd   %f12,qq1,%f12
2786 
2787         faddd   %f22,qq2,%f22
2788         fmuld   %f16,pp2,%f20
2789 
2790         fmuld   %f24,%f30,%f30
2791         faddd   %f28,qq1,%f28
2792 
2793         faddd   %f6,pp1,%f6
2794         fmuld   %f0,%f4,%f4
2795         add     %l4,%g1,%l4
2796 
2797         faddd   %f14,pp1,%f14
2798         fmuld   %f8,%f12,%f12
2799         add     %l5,%g1,%l5
2800 
2801         fmuld   %f16,%f22,%f22
2802         faddd   %f20,pp1,%f20
2803 
2804         faddd   %f30,pp1,%f30
2805         fmuld   %f24,%f28,%f28
2806         add     %l7,%g1,%l7
2807 
2808         fmuld   %f0,%f6,%f6
2809 
2810         fmuld   %f8,%f14,%f14
2811 
2812         faddd   %f22,qq1,%f22
2813         fmuld   %f16,%f20,%f20
2814         add     %l6,%g1,%l6
2815 
2816         fmuld   %f24,%f30,%f30
2817 
2818         fmuld   %f2,%f6,%f6
2819         ldd     [%l4+8],%f0
2820 
2821         fmuld   %f10,%f14,%f14
2822         ldd     [%l5+8],%f8
2823 
2824         fmuld   %f18,%f20,%f20
2825 
2826         fmuld   %f26,%f30,%f30
2827         ldd     [%l7+8],%f24
2828 
2829         fmuld   %f0,%f4,%f4
2830         faddd   %f32,%f6,%f6
2831 
2832         fmuld   %f8,%f12,%f12
2833         faddd   %f34,%f14,%f14
2834 
2835         fmuld   %f16,%f22,%f22
2836         faddd   %f20,%f36,%f20
2837         ldd     [%l6+16],%f16
2838 
2839         fmuld   %f24,%f28,%f28
2840         faddd   %f38,%f30,%f30
2841 
2842         faddd   %f2,%f6,%f6
2843         ldd     [%l4+16],%f32
2844 
2845         faddd   %f10,%f14,%f14
2846         ldd     [%l5+16],%f34
2847 
2848         fmuld   %f16,%f22,%f22
2849         faddd   %f20,%f18,%f20
2850         ldd     [%l6+8],%f36
2851 
2852         faddd   %f26,%f30,%f30
2853         ldd     [%l7+16],%f38
2854 
2855         fmuld   %f32,%f6,%f6
2856 
2857         fmuld   %f34,%f14,%f14
2858 
2859         fmuld   %f36,%f20,%f20
2860 
2861         fmuld   %f38,%f30,%f30
2862 
2863         faddd   %f6,%f4,%f6
2864 
2865         faddd   %f14,%f12,%f14
2866 
2867         fsubd   %f22,%f20,%f22
2868 
2869         faddd   %f30,%f28,%f30
2870 
2871         faddd   %f6,%f0,%f6
2872 
2873         faddd   %f14,%f8,%f14
2874 
2875         faddd   %f22,%f16,%f22
2876 
2877         faddd   %f30,%f24,%f30
2878         mov     %l0,%l4
2879 
2880         fnegd   %f6,%f4
2881         lda     [%i1]%asi,%l0           ! preload next argument
2882 
2883         fnegd   %f14,%f12
2884         lda     [%i1]%asi,%f0
2885 
2886         fnegd   %f22,%f20
2887         lda     [%i1+4]%asi,%f3
2888 
2889         fnegd   %f30,%f28
2890         andn    %l0,%i5,%l0
2891         add     %i1,%i2,%i1
2892 
2893         andcc   %l4,2,%g0
2894         fmovdnz %icc,%f4,%f6
2895         st      %f6,[%o0]
2896 
2897         andcc   %l1,2,%g0
2898         fmovdnz %icc,%f12,%f14
2899         st      %f14,[%o1]
2900 
2901         andcc   %l2,2,%g0
2902         fmovdnz %icc,%f20,%f22
2903         st      %f22,[%o2]
2904 
2905         andcc   %l3,2,%g0
2906         fmovdnz %icc,%f28,%f30
2907         st      %f30,[%o3]
2908 
2909         addcc   %i0,-1,%i0
2910         bg,pt   %icc,.loop0
2911 ! delay slot
2912         st      %f7,[%o0+4]
2913 
2914         ba,pt   %icc,.end
2915 ! delay slot
2916         nop
2917 
2918         .align  16
2919 .case14:
2920         fmuld   %f26,%f26,%f24
2921         andcc   %l3,1,%g0
2922         bz,pn   %icc,.case15
2923 ! delay slot
2924         fxor    %f30,%f38,%f38
2925 
2926         fmuld   %f0,pp3,%f6             ! sin(x0)
2927 
2928         fmuld   %f8,pp3,%f14            ! sin(x1)
2929 
2930         fmuld   %f16,pp3,%f22           ! sin(x2)
2931 
2932         faddd   %f6,pp2,%f6
2933         fmuld   %f0,qq2,%f4
2934 
2935         faddd   %f14,pp2,%f14
2936         fmuld   %f8,qq2,%f12
2937 
2938         faddd   %f22,pp2,%f22
2939         fmuld   %f16,qq2,%f20
2940 
2941         fmuld   %f24,qq3,%f30           ! cos(x3)
2942 
2943         fmuld   %f0,%f6,%f6
2944         faddd   %f4,qq1,%f4
2945 
2946         fmuld   %f8,%f14,%f14
2947         faddd   %f12,qq1,%f12
2948 
2949         fmuld   %f16,%f22,%f22
2950         faddd   %f20,qq1,%f20
2951 
2952         faddd   %f30,qq2,%f30
2953         fmuld   %f24,pp2,%f28
2954 
2955         faddd   %f6,pp1,%f6
2956         fmuld   %f0,%f4,%f4
2957         add     %l4,%g1,%l4
2958 
2959         faddd   %f14,pp1,%f14
2960         fmuld   %f8,%f12,%f12
2961         add     %l5,%g1,%l5
2962 
2963         faddd   %f22,pp1,%f22
2964         fmuld   %f16,%f20,%f20
2965         add     %l6,%g1,%l6
2966 
2967         fmuld   %f24,%f30,%f30
2968         faddd   %f28,pp1,%f28
2969 
2970         fmuld   %f0,%f6,%f6
2971 
2972         fmuld   %f8,%f14,%f14
2973 
2974         fmuld   %f16,%f22,%f22
2975 
2976         faddd   %f30,qq1,%f30
2977         fmuld   %f24,%f28,%f28
2978         add     %l7,%g1,%l7
2979 
2980         fmuld   %f2,%f6,%f6
2981         ldd     [%l4+8],%f0
2982 
2983         fmuld   %f10,%f14,%f14
2984         ldd     [%l5+8],%f8
2985 
2986         fmuld   %f18,%f22,%f22
2987         ldd     [%l6+8],%f16
2988 
2989         fmuld   %f26,%f28,%f28
2990 
2991         fmuld   %f0,%f4,%f4
2992         faddd   %f32,%f6,%f6
2993 
2994         fmuld   %f8,%f12,%f12
2995         faddd   %f34,%f14,%f14
2996 
2997         fmuld   %f16,%f20,%f20
2998         faddd   %f36,%f22,%f22
2999 
3000         fmuld   %f24,%f30,%f30
3001         faddd   %f28,%f38,%f28
3002         ldd     [%l7+16],%f24
3003 
3004         faddd   %f2,%f6,%f6
3005         ldd     [%l4+16],%f32
3006 
3007         faddd   %f10,%f14,%f14
3008         ldd     [%l5+16],%f34
3009 
3010         faddd   %f18,%f22,%f22
3011         ldd     [%l6+16],%f36
3012 
3013         fmuld   %f24,%f30,%f30
3014         faddd   %f28,%f26,%f28
3015         ldd     [%l7+8],%f38
3016 
3017         fmuld   %f32,%f6,%f6
3018 
3019         fmuld   %f34,%f14,%f14
3020 
3021         fmuld   %f36,%f22,%f22
3022 
3023         fmuld   %f38,%f28,%f28
3024 
3025         faddd   %f6,%f4,%f6
3026 
3027         faddd   %f14,%f12,%f14
3028 
3029         faddd   %f22,%f20,%f22
3030 
3031         fsubd   %f30,%f28,%f30
3032 
3033         faddd   %f6,%f0,%f6
3034 
3035         faddd   %f14,%f8,%f14
3036 
3037         faddd   %f22,%f16,%f22
3038 
3039         faddd   %f30,%f24,%f30
3040         mov     %l0,%l4
3041 
3042         fnegd   %f6,%f4
3043         lda     [%i1]%asi,%l0           ! preload next argument
3044 
3045         fnegd   %f14,%f12
3046         lda     [%i1]%asi,%f0
3047 
3048         fnegd   %f22,%f20
3049         lda     [%i1+4]%asi,%f3
3050 
3051         fnegd   %f30,%f28
3052         andn    %l0,%i5,%l0
3053         add     %i1,%i2,%i1
3054 
3055         andcc   %l4,2,%g0
3056         fmovdnz %icc,%f4,%f6
3057         st      %f6,[%o0]
3058 
3059         andcc   %l1,2,%g0
3060         fmovdnz %icc,%f12,%f14
3061         st      %f14,[%o1]
3062 
3063         andcc   %l2,2,%g0
3064         fmovdnz %icc,%f20,%f22
3065         st      %f22,[%o2]
3066 
3067         andcc   %l3,2,%g0
3068         fmovdnz %icc,%f28,%f30
3069         st      %f30,[%o3]
3070 
3071         addcc   %i0,-1,%i0
3072         bg,pt   %icc,.loop0
3073 ! delay slot
3074         st      %f7,[%o0+4]
3075 
3076         ba,pt   %icc,.end
3077 ! delay slot
3078         nop
3079 
3080         .align  16
3081 .case15:
3082         fmuld   %f0,pp3,%f6             ! sin(x0)
3083 
3084         fmuld   %f8,pp3,%f14            ! sin(x1)
3085 
3086         fmuld   %f16,pp3,%f22           ! sin(x2)
3087 
3088         fmuld   %f24,pp3,%f30           ! sin(x3)
3089 
3090         faddd   %f6,pp2,%f6
3091         fmuld   %f0,qq2,%f4
3092 
3093         faddd   %f14,pp2,%f14
3094         fmuld   %f8,qq2,%f12
3095 
3096         faddd   %f22,pp2,%f22
3097         fmuld   %f16,qq2,%f20
3098 
3099         faddd   %f30,pp2,%f30
3100         fmuld   %f24,qq2,%f28
3101 
3102         fmuld   %f0,%f6,%f6
3103         faddd   %f4,qq1,%f4
3104 
3105         fmuld   %f8,%f14,%f14
3106         faddd   %f12,qq1,%f12
3107 
3108         fmuld   %f16,%f22,%f22
3109         faddd   %f20,qq1,%f20
3110 
3111         fmuld   %f24,%f30,%f30
3112         faddd   %f28,qq1,%f28
3113 
3114         faddd   %f6,pp1,%f6
3115         fmuld   %f0,%f4,%f4
3116         add     %l4,%g1,%l4
3117 
3118         faddd   %f14,pp1,%f14
3119         fmuld   %f8,%f12,%f12
3120         add     %l5,%g1,%l5
3121 
3122         faddd   %f22,pp1,%f22
3123         fmuld   %f16,%f20,%f20
3124         add     %l6,%g1,%l6
3125 
3126         faddd   %f30,pp1,%f30
3127         fmuld   %f24,%f28,%f28
3128         add     %l7,%g1,%l7
3129 
3130         fmuld   %f0,%f6,%f6
3131 
3132         fmuld   %f8,%f14,%f14
3133 
3134         fmuld   %f16,%f22,%f22
3135 
3136         fmuld   %f24,%f30,%f30
3137 
3138         fmuld   %f2,%f6,%f6
3139         ldd     [%l4+8],%f0
3140 
3141         fmuld   %f10,%f14,%f14
3142         ldd     [%l5+8],%f8
3143 
3144         fmuld   %f18,%f22,%f22
3145         ldd     [%l6+8],%f16
3146 
3147         fmuld   %f26,%f30,%f30
3148         ldd     [%l7+8],%f24
3149 
3150         fmuld   %f0,%f4,%f4
3151         faddd   %f32,%f6,%f6
3152 
3153         fmuld   %f8,%f12,%f12
3154         faddd   %f34,%f14,%f14
3155 
3156         fmuld   %f16,%f20,%f20
3157         faddd   %f36,%f22,%f22
3158 
3159         fmuld   %f24,%f28,%f28
3160         faddd   %f38,%f30,%f30
3161 
3162         faddd   %f2,%f6,%f6
3163         ldd     [%l4+16],%f32
3164 
3165         faddd   %f10,%f14,%f14
3166         ldd     [%l5+16],%f34
3167 
3168         faddd   %f18,%f22,%f22
3169         ldd     [%l6+16],%f36
3170 
3171         faddd   %f26,%f30,%f30
3172         ldd     [%l7+16],%f38
3173 
3174         fmuld   %f32,%f6,%f6
3175 
3176         fmuld   %f34,%f14,%f14
3177 
3178         fmuld   %f36,%f22,%f22
3179 
3180         fmuld   %f38,%f30,%f30
3181 
3182         faddd   %f6,%f4,%f6
3183 
3184         faddd   %f14,%f12,%f14
3185 
3186         faddd   %f22,%f20,%f22
3187 
3188         faddd   %f30,%f28,%f30
3189 
3190         faddd   %f6,%f0,%f6
3191 
3192         faddd   %f14,%f8,%f14
3193 
3194         faddd   %f22,%f16,%f22
3195 
3196         faddd   %f30,%f24,%f30
3197         mov     %l0,%l4
3198 
3199         fnegd   %f6,%f4
3200         lda     [%i1]%asi,%l0           ! preload next argument
3201 
3202         fnegd   %f14,%f12
3203         lda     [%i1]%asi,%f0
3204 
3205         fnegd   %f22,%f20
3206         lda     [%i1+4]%asi,%f3
3207 
3208         fnegd   %f30,%f28
3209         andn    %l0,%i5,%l0
3210         add     %i1,%i2,%i1
3211 
3212         andcc   %l4,2,%g0
3213         fmovdnz %icc,%f4,%f6
3214         st      %f6,[%o0]
3215 
3216         andcc   %l1,2,%g0
3217         fmovdnz %icc,%f12,%f14
3218         st      %f14,[%o1]
3219 
3220         andcc   %l2,2,%g0
3221         fmovdnz %icc,%f20,%f22
3222         st      %f22,[%o2]
3223 
3224         andcc   %l3,2,%g0
3225         fmovdnz %icc,%f28,%f30
3226         st      %f30,[%o3]
3227 
3228         addcc   %i0,-1,%i0
3229         bg,pt   %icc,.loop0
3230 ! delay slot
3231         st      %f7,[%o0+4]
3232 
3233         ba,pt   %icc,.end
3234 ! delay slot
3235         nop
3236 
3237 
3238         .align  16
3239 .end:
3240         st      %f15,[%o1+4]
3241         st      %f23,[%o2+4]
3242         st      %f31,[%o3+4]
3243         ld      [%fp+biguns],%i5
3244         tst     %i5                     ! check for huge arguments remaining
3245         be,pt   %icc,.exit
3246 ! delay slot
3247         nop
3248 #ifdef __sparcv9
3249         ldx     [%fp+xsave],%o1
3250         ldx     [%fp+ysave],%o3
3251 #else
3252         ld      [%fp+xsave],%o1
3253         ld      [%fp+ysave],%o3
3254 #endif
3255         ld      [%fp+nsave],%o0
3256         ld      [%fp+sxsave],%o2
3257         ld      [%fp+sysave],%o4
3258         sra     %o2,0,%o2               ! sign-extend for V9
3259         sra     %o4,0,%o4
3260         call    __vlibm_vsin_big_ultra3
3261         sra     %o5,0,%o5               ! delay slot
3262 
3263 .exit:
3264         ret
3265         restore
3266 
3267 
3268         .align  16
3269 .last1:
3270         faddd   %f2,c3two44,%f4
3271         st      %f15,[%o1+4]
3272 .last1_from_range1:
3273         mov     0,%l1
3274         fzeros  %f8
3275         fzero   %f10
3276         add     %fp,junk,%o1
3277 .last2:
3278         faddd   %f10,c3two44,%f12
3279         st      %f23,[%o2+4]
3280 .last2_from_range2:
3281         mov     0,%l2
3282         fzeros  %f16
3283         fzero   %f18
3284         add     %fp,junk,%o2
3285 .last3:
3286         faddd   %f18,c3two44,%f20
3287         st      %f31,[%o3+4]
3288         st      %f5,[%fp+nk0]
3289         st      %f13,[%fp+nk1]
3290 .last3_from_range3:
3291         mov     0,%l3
3292         fzeros  %f24
3293         fzero   %f26
3294         ba,pt   %icc,.cont
3295 ! delay slot
3296         add     %fp,junk,%o3
3297 
3298 
3299         .align  16
3300 .range0:
3301         cmp     %l0,%o4
3302         bl,pt   %icc,1f                 ! hx < 0x3e400000
3303 ! delay slot, harmless if branch taken
3304         sethi   %hi(0x7ff00000),%o7
3305         cmp     %l0,%o7
3306         bl,a,pt %icc,2f                 ! branch if finite
3307 ! delay slot, squashed if branch not taken
3308         st      %o4,[%fp+biguns]        ! set biguns
3309         fzero   %f0
3310         fmuld   %f2,%f0,%f2
3311         st      %f2,[%o0]
3312         ba,pt   %icc,2f
3313 ! delay slot
3314         st      %f3,[%o0+4]
3315 1:
3316         fdtoi   %f2,%f4                 ! raise inexact if not zero
3317         st      %f0,[%o0]
3318         st      %f3,[%o0+4]
3319 2:
3320         addcc   %i0,-1,%i0
3321         ble,pn  %icc,.end
3322 ! delay slot, harmless if branch taken
3323         add     %i3,%i4,%i3             ! y += stridey
3324         andn    %l1,%i5,%l0             ! hx &= ~0x80000000
3325         fmovs   %f8,%f0
3326         fmovs   %f11,%f3
3327         ba,pt   %icc,.loop0
3328 ! delay slot
3329         add     %i1,%i2,%i1             ! x += stridex
3330 
3331 
3332         .align  16
3333 .range1:
3334         cmp     %l1,%o4
3335         bl,pt   %icc,1f                 ! hx < 0x3e400000
3336 ! delay slot, harmless if branch taken
3337         sethi   %hi(0x7ff00000),%o7
3338         cmp     %l1,%o7
3339         bl,a,pt %icc,2f                 ! branch if finite
3340 ! delay slot, squashed if branch not taken
3341         st      %o4,[%fp+biguns]        ! set biguns
3342         fzero   %f8
3343         fmuld   %f10,%f8,%f10
3344         st      %f10,[%o1]
3345         ba,pt   %icc,2f
3346 ! delay slot
3347         st      %f11,[%o1+4]
3348 1:
3349         fdtoi   %f10,%f12               ! raise inexact if not zero
3350         st      %f8,[%o1]
3351         st      %f11,[%o1+4]
3352 2:
3353         addcc   %i0,-1,%i0
3354         ble,pn  %icc,.last1_from_range1
3355 ! delay slot, harmless if branch taken
3356         add     %i3,%i4,%i3             ! y += stridey
3357         andn    %l2,%i5,%l1             ! hx &= ~0x80000000
3358         fmovs   %f16,%f8
3359         fmovs   %f19,%f11
3360         ba,pt   %icc,.loop1
3361 ! delay slot
3362         add     %i1,%i2,%i1             ! x += stridex
3363 
3364 
3365         .align  16
3366 .range2:
3367         cmp     %l2,%o4
3368         bl,pt   %icc,1f                 ! hx < 0x3e400000
3369 ! delay slot, harmless if branch taken
3370         sethi   %hi(0x7ff00000),%o7
3371         cmp     %l2,%o7
3372         bl,a,pt %icc,2f                 ! branch if finite
3373 ! delay slot, squashed if branch not taken
3374         st      %o4,[%fp+biguns]        ! set biguns
3375         fzero   %f16
3376         fmuld   %f18,%f16,%f18
3377         st      %f18,[%o2]
3378         ba,pt   %icc,2f
3379 ! delay slot
3380         st      %f19,[%o2+4]
3381 1:
3382         fdtoi   %f18,%f20               ! raise inexact if not zero
3383         st      %f16,[%o2]
3384         st      %f19,[%o2+4]
3385 2:
3386         addcc   %i0,-1,%i0
3387         ble,pn  %icc,.last2_from_range2
3388 ! delay slot, harmless if branch taken
3389         add     %i3,%i4,%i3             ! y += stridey
3390         andn    %l3,%i5,%l2             ! hx &= ~0x80000000
3391         fmovs   %f24,%f16
3392         fmovs   %f27,%f19
3393         ba,pt   %icc,.loop2
3394 ! delay slot
3395         add     %i1,%i2,%i1             ! x += stridex
3396 
3397 
3398         .align  16
3399 .range3:
3400         cmp     %l3,%o4
3401         bl,pt   %icc,1f                 ! hx < 0x3e400000
3402 ! delay slot, harmless if branch taken
3403         sethi   %hi(0x7ff00000),%o7
3404         cmp     %l3,%o7
3405         bl,a,pt %icc,2f                 ! branch if finite
3406 ! delay slot, squashed if branch not taken
3407         st      %o4,[%fp+biguns]        ! set biguns
3408         fzero   %f24
3409         fmuld   %f26,%f24,%f26
3410         st      %f26,[%o3]
3411         ba,pt   %icc,2f
3412 ! delay slot
3413         st      %f27,[%o3+4]
3414 1:
3415         fdtoi   %f26,%f28               ! raise inexact if not zero
3416         st      %f24,[%o3]
3417         st      %f27,[%o3+4]
3418 2:
3419         addcc   %i0,-1,%i0
3420         ble,pn  %icc,.last3_from_range3
3421 ! delay slot, harmless if branch taken
3422         add     %i3,%i4,%i3             ! y += stridey
3423         ld      [%i1],%l3
3424         ld      [%i1],%f24
3425         ld      [%i1+4],%f27
3426         andn    %l3,%i5,%l3             ! hx &= ~0x80000000
3427         ba,pt   %icc,.loop3
3428 ! delay slot
3429         add     %i1,%i2,%i1             ! x += stridex
3430 
3431         SET_SIZE(__vsin_ultra3)
3432