1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vcos_ultra3.S"
  30 
  31 #include "libm.h"
  32 #if defined(LIBMVEC_SO_BUILD)
  33         .weak   __vcos
  34         .type   __vcos,#function
  35         __vcos = __vcos_ultra3
  36 #endif
  37 
  38         RO_DATA
  39         .align  64
  40 constants:
  41         .word   0x42c80000,0x00000000   ! 3 * 2^44
  42         .word   0x43380000,0x00000000   ! 3 * 2^51
  43         .word   0x3fe45f30,0x6dc9c883   ! invpio2
  44         .word   0x3ff921fb,0x54442c00   ! pio2_1
  45         .word   0x3d318469,0x898cc400   ! pio2_2
  46         .word   0x3a71701b,0x839a2520   ! pio2_3
  47         .word   0xbfc55555,0x55555533   ! pp1
  48         .word   0x3f811111,0x10e7d53b   ! pp2
  49         .word   0xbf2a0167,0xe6b3cf9b   ! pp3
  50         .word   0xbfdfffff,0xffffff65   ! qq1
  51         .word   0x3fa55555,0x54f88ed0   ! qq2
  52         .word   0xbf56c12c,0xdd185f60   ! qq3
  53 
  54 ! local storage indices
  55 
  56 #define xsave           STACK_BIAS-0x8
  57 #define ysave           STACK_BIAS-0x10
  58 #define nsave           STACK_BIAS-0x14
  59 #define sxsave          STACK_BIAS-0x18
  60 #define sysave          STACK_BIAS-0x1c
  61 #define biguns          STACK_BIAS-0x20
  62 #define nk3             STACK_BIAS-0x24
  63 #define nk2             STACK_BIAS-0x28
  64 #define nk1             STACK_BIAS-0x2c
  65 #define nk0             STACK_BIAS-0x30
  66 #define junk            STACK_BIAS-0x38
  67 ! sizeof temp storage - must be a multiple of 16 for V9
  68 #define tmps            0x40
  69 
  70 ! register use
  71 
  72 ! i0  n
  73 ! i1  x
  74 ! i2  stridex
  75 ! i3  y
  76 ! i4  stridey
  77 ! i5  0x80000000
  78 
  79 ! l0  hx0
  80 ! l1  hx1
  81 ! l2  hx2
  82 ! l3  hx3
  83 ! l4  k0
  84 ! l5  k1
  85 ! l6  k2
  86 ! l7  k3
  87 
  88 ! the following are 64-bit registers in both V8+ and V9
  89 
  90 ! g1  __vlibm_TBL_sincos2
  91 ! g5  scratch
  92 
  93 ! o0  py0
  94 ! o1  py1
  95 ! o2  py2
  96 ! o3  py3
  97 ! o4  0x3e400000
  98 ! o5  0x3fe921fb,0x4099251e
  99 ! o7  scratch
 100 
 101 ! f0  hx0
 102 ! f2  
 103 ! f4  
 104 ! f6  
 105 ! f8  hx1
 106 ! f10 
 107 ! f12 
 108 ! f14 
 109 ! f16 hx2
 110 ! f18 
 111 ! f20 
 112 ! f22 
 113 ! f24 hx3
 114 ! f26 
 115 ! f28 
 116 ! f30 
 117 ! f32 
 118 ! f34 
 119 ! f36
 120 ! f38
 121 
 122 #define c3two44 %f40
 123 #define c3two51 %f42
 124 #define invpio2 %f44
 125 #define pio2_1  %f46
 126 #define pio2_2  %f48
 127 #define pio2_3  %f50
 128 #define pp1     %f52
 129 #define pp2     %f54
 130 #define pp3     %f56
 131 #define qq1     %f58
 132 #define qq2     %f60
 133 #define qq3     %f62
 134 
 135         ENTRY(__vcos_ultra3)
 136         save    %sp,-SA(MINFRAME)-tmps,%sp
 137         PIC_SETUP(l7)
 138         PIC_SET(l7,constants,o0)
 139         PIC_SET(l7,__vlibm_TBL_sincos2,o1)
 140         mov     %o1,%g1
 141         wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
 142 #ifdef __sparcv9
 143         stx     %i1,[%fp+xsave]         ! save arguments
 144         stx     %i3,[%fp+ysave]
 145 #else
 146         st      %i1,[%fp+xsave]         ! save arguments
 147         st      %i3,[%fp+ysave]
 148 #endif
 149         st      %i0,[%fp+nsave]
 150         st      %i2,[%fp+sxsave]
 151         st      %i4,[%fp+sysave]
 152         st      %g0,[%fp+biguns]        ! biguns = 0
 153         ldd     [%o0+0x00],c3two44      ! load/set up constants
 154         ldd     [%o0+0x08],c3two51
 155         ldd     [%o0+0x10],invpio2
 156         ldd     [%o0+0x18],pio2_1
 157         ldd     [%o0+0x20],pio2_2
 158         ldd     [%o0+0x28],pio2_3
 159         ldd     [%o0+0x30],pp1
 160         ldd     [%o0+0x38],pp2
 161         ldd     [%o0+0x40],pp3
 162         ldd     [%o0+0x48],qq1
 163         ldd     [%o0+0x50],qq2
 164         ldd     [%o0+0x58],qq3
 165         sethi   %hi(0x80000000),%i5
 166         sethi   %hi(0x3e400000),%o4
 167         sethi   %hi(0x3fe921fb),%o5
 168         or      %o5,%lo(0x3fe921fb),%o5
 169         sllx    %o5,32,%o5
 170         sethi   %hi(0x4099251e),%o7
 171         or      %o7,%lo(0x4099251e),%o7
 172         or      %o5,%o7,%o5
 173         sll     %i2,3,%i2               ! scale strides
 174         sll     %i4,3,%i4
 175         add     %fp,junk,%o1            ! loop prologue
 176         add     %fp,junk,%o2
 177         add     %fp,junk,%o3
 178         ld      [%i1],%l0               ! *x
 179         ld      [%i1],%f0
 180         ld      [%i1+4],%f3
 181         andn    %l0,%i5,%l0             ! mask off sign
 182         add     %i1,%i2,%i1             ! x += stridex
 183         ba      .loop0
 184         nop
 185 
 186 ! 16-byte aligned
 187         .align  16
 188 .loop0:
 189         lda     [%i1]%asi,%l1           ! preload next argument
 190         sub     %l0,%o4,%g5
 191         sub     %o5,%l0,%o7
 192         fabss   %f0,%f2
 193 
 194         lda     [%i1]%asi,%f8
 195         orcc    %o7,%g5,%g0
 196         mov     %i3,%o0                 ! py0 = y
 197         bl,pn   %icc,.range0            ! hx < 0x3e400000 or hx > 0x4099251e
 198 
 199 ! delay slot
 200         lda     [%i1+4]%asi,%f11
 201         addcc   %i0,-1,%i0
 202         add     %i3,%i4,%i3             ! y += stridey
 203         ble,pn  %icc,.last1
 204 
 205 ! delay slot
 206         andn    %l1,%i5,%l1
 207         add     %i1,%i2,%i1             ! x += stridex
 208         faddd   %f2,c3two44,%f4
 209         st      %f15,[%o1+4]
 210 
 211 .loop1:
 212         lda     [%i1]%asi,%l2           ! preload next argument
 213         sub     %l1,%o4,%g5
 214         sub     %o5,%l1,%o7
 215         fabss   %f8,%f10
 216 
 217         lda     [%i1]%asi,%f16
 218         orcc    %o7,%g5,%g0
 219         mov     %i3,%o1                 ! py1 = y
 220         bl,pn   %icc,.range1            ! hx < 0x3e400000 or hx > 0x4099251e
 221 
 222 ! delay slot
 223         lda     [%i1+4]%asi,%f19
 224         addcc   %i0,-1,%i0
 225         add     %i3,%i4,%i3             ! y += stridey
 226         ble,pn  %icc,.last2
 227 
 228 ! delay slot
 229         andn    %l2,%i5,%l2
 230         add     %i1,%i2,%i1             ! x += stridex
 231         faddd   %f10,c3two44,%f12
 232         st      %f23,[%o2+4]
 233 
 234 .loop2:
 235         lda     [%i1]%asi,%l3           ! preload next argument
 236         sub     %l2,%o4,%g5
 237         sub     %o5,%l2,%o7
 238         fabss   %f16,%f18
 239 
 240         lda     [%i1]%asi,%f24
 241         orcc    %o7,%g5,%g0
 242         mov     %i3,%o2                 ! py2 = y
 243         bl,pn   %icc,.range2            ! hx < 0x3e400000 or hx > 0x4099251e
 244 
 245 ! delay slot
 246         lda     [%i1+4]%asi,%f27
 247         addcc   %i0,-1,%i0
 248         add     %i3,%i4,%i3             ! y += stridey
 249         ble,pn  %icc,.last3
 250 
 251 ! delay slot
 252         andn    %l3,%i5,%l3
 253         add     %i1,%i2,%i1             ! x += stridex
 254         faddd   %f18,c3two44,%f20
 255         st      %f31,[%o3+4]
 256 
 257 .loop3:
 258         sub     %l3,%o4,%g5
 259         sub     %o5,%l3,%o7
 260         fabss   %f24,%f26
 261         st      %f5,[%fp+nk0]
 262 
 263         orcc    %o7,%g5,%g0
 264         mov     %i3,%o3                 ! py3 = y
 265         bl,pn   %icc,.range3            ! hx < 0x3e400000 or > hx 0x4099251e
 266 ! delay slot
 267         st      %f13,[%fp+nk1]
 268 
 269 !!! DONE?
 270 .cont:
 271         srlx    %o5,32,%o7
 272         add     %i3,%i4,%i3             ! y += stridey
 273         fmovs   %f3,%f1
 274         st      %f21,[%fp+nk2]
 275 
 276         sub     %o7,%l0,%l0
 277         sub     %o7,%l1,%l1
 278         faddd   %f26,c3two44,%f28
 279         st      %f29,[%fp+nk3]
 280 
 281         sub     %o7,%l2,%l2
 282         sub     %o7,%l3,%l3
 283         fmovs   %f11,%f9
 284 
 285         or      %l0,%l1,%l0
 286         or      %l2,%l3,%l2
 287         fmovs   %f19,%f17
 288 
 289         fmovs   %f27,%f25
 290         fmuld   %f0,invpio2,%f6         ! x * invpio2, for medium range
 291 
 292         fmuld   %f8,invpio2,%f14
 293         ld      [%fp+nk0],%l4
 294 
 295         fmuld   %f16,invpio2,%f22
 296         ld      [%fp+nk1],%l5
 297 
 298         orcc    %l0,%l2,%g0
 299         bl,pn   %icc,.medium
 300 ! delay slot
 301         fmuld   %f24,invpio2,%f30
 302         ld      [%fp+nk2],%l6
 303 
 304         ld      [%fp+nk3],%l7
 305         sll     %l4,5,%l4               ! k
 306         fcmpd   %fcc0,%f0,pio2_3        ! x < pio2_3 iff x < 0
 307 
 308         sll     %l5,5,%l5
 309         ldd     [%l4+%g1],%f4
 310         fcmpd   %fcc1,%f8,pio2_3
 311 
 312         sll     %l6,5,%l6
 313         ldd     [%l5+%g1],%f12
 314         fcmpd   %fcc2,%f16,pio2_3
 315 
 316         sll     %l7,5,%l7
 317         ldd     [%l6+%g1],%f20
 318         fcmpd   %fcc3,%f24,pio2_3
 319 
 320         ldd     [%l7+%g1],%f28
 321         fsubd   %f2,%f4,%f2             ! x -= __vlibm_TBL_sincos2[k]
 322 
 323         fsubd   %f10,%f12,%f10
 324 
 325         fsubd   %f18,%f20,%f18
 326 
 327         fsubd   %f26,%f28,%f26
 328 
 329         fmuld   %f2,%f2,%f0             ! z = x * x
 330 
 331         fmuld   %f10,%f10,%f8
 332 
 333         fmuld   %f18,%f18,%f16
 334 
 335         fmuld   %f26,%f26,%f24
 336 
 337         fmuld   %f0,qq3,%f6
 338 
 339         fmuld   %f8,qq3,%f14
 340 
 341         fmuld   %f16,qq3,%f22
 342 
 343         fmuld   %f24,qq3,%f30
 344 
 345         faddd   %f6,qq2,%f6
 346         fmuld   %f0,pp2,%f4
 347 
 348         faddd   %f14,qq2,%f14
 349         fmuld   %f8,pp2,%f12
 350 
 351         faddd   %f22,qq2,%f22
 352         fmuld   %f16,pp2,%f20
 353 
 354         faddd   %f30,qq2,%f30
 355         fmuld   %f24,pp2,%f28
 356 
 357         fmuld   %f0,%f6,%f6
 358         faddd   %f4,pp1,%f4
 359 
 360         fmuld   %f8,%f14,%f14
 361         faddd   %f12,pp1,%f12
 362 
 363         fmuld   %f16,%f22,%f22
 364         faddd   %f20,pp1,%f20
 365 
 366         fmuld   %f24,%f30,%f30
 367         faddd   %f28,pp1,%f28
 368 
 369         faddd   %f6,qq1,%f6
 370         fmuld   %f0,%f4,%f4
 371         add     %l4,%g1,%l4
 372 
 373         faddd   %f14,qq1,%f14
 374         fmuld   %f8,%f12,%f12
 375         add     %l5,%g1,%l5
 376 
 377         faddd   %f22,qq1,%f22
 378         fmuld   %f16,%f20,%f20
 379         add     %l6,%g1,%l6
 380 
 381         faddd   %f30,qq1,%f30
 382         fmuld   %f24,%f28,%f28
 383         add     %l7,%g1,%l7
 384 
 385         fmuld   %f2,%f4,%f4
 386 
 387         fmuld   %f10,%f12,%f12
 388 
 389         fmuld   %f18,%f20,%f20
 390 
 391         fmuld   %f26,%f28,%f28
 392 
 393         fmuld   %f0,%f6,%f6
 394         faddd   %f4,%f2,%f4
 395         ldd     [%l4+16],%f32
 396 
 397         fmuld   %f8,%f14,%f14
 398         faddd   %f12,%f10,%f12
 399         ldd     [%l5+16],%f34
 400 
 401         fmuld   %f16,%f22,%f22
 402         faddd   %f20,%f18,%f20
 403         ldd     [%l6+16],%f36
 404 
 405         fmuld   %f24,%f30,%f30
 406         faddd   %f28,%f26,%f28
 407         ldd     [%l7+16],%f38
 408 
 409         fmuld   %f32,%f6,%f6
 410         ldd     [%l4+8],%f2
 411 
 412         fmuld   %f34,%f14,%f14
 413         ldd     [%l5+8],%f10
 414 
 415         fmuld   %f36,%f22,%f22
 416         ldd     [%l6+8],%f18
 417 
 418         fmuld   %f38,%f30,%f30
 419         ldd     [%l7+8],%f26
 420 
 421         fmuld   %f2,%f4,%f4
 422 
 423         fmuld   %f10,%f12,%f12
 424 
 425         fmuld   %f18,%f20,%f20
 426 
 427         fmuld   %f26,%f28,%f28
 428 
 429         fsubd   %f6,%f4,%f6
 430         lda     [%i1]%asi,%l0           ! preload next argument
 431 
 432         fsubd   %f14,%f12,%f14
 433         lda     [%i1]%asi,%f0
 434 
 435         fsubd   %f22,%f20,%f22
 436         lda     [%i1+4]%asi,%f3
 437 
 438         fsubd   %f30,%f28,%f30
 439         andn    %l0,%i5,%l0
 440         add     %i1,%i2,%i1
 441 
 442         faddd   %f6,%f32,%f6
 443         st      %f6,[%o0]
 444 
 445         faddd   %f14,%f34,%f14
 446         st      %f14,[%o1]
 447 
 448         faddd   %f22,%f36,%f22
 449         st      %f22,[%o2]
 450 
 451         faddd   %f30,%f38,%f30
 452         st      %f30,[%o3]
 453         addcc   %i0,-1,%i0
 454 
 455         bg,pt   %icc,.loop0
 456 ! delay slot
 457         st      %f7,[%o0+4]
 458 
 459         ba,pt   %icc,.end
 460 ! delay slot
 461         nop
 462 
 463 
 464         .align  16
 465 .medium:
 466         faddd   %f6,c3two51,%f4
 467         st      %f5,[%fp+nk0]
 468 
 469         faddd   %f14,c3two51,%f12
 470         st      %f13,[%fp+nk1]
 471 
 472         faddd   %f22,c3two51,%f20
 473         st      %f21,[%fp+nk2]
 474 
 475         faddd   %f30,c3two51,%f28
 476         st      %f29,[%fp+nk3]
 477 
 478         fsubd   %f4,c3two51,%f6
 479 
 480         fsubd   %f12,c3two51,%f14
 481 
 482         fsubd   %f20,c3two51,%f22
 483 
 484         fsubd   %f28,c3two51,%f30
 485 
 486         fmuld   %f6,pio2_1,%f2
 487         ld      [%fp+nk0],%l0           ! n
 488 
 489         fmuld   %f14,pio2_1,%f10
 490         ld      [%fp+nk1],%l1
 491 
 492         fmuld   %f22,pio2_1,%f18
 493         ld      [%fp+nk2],%l2
 494 
 495         fmuld   %f30,pio2_1,%f26
 496         ld      [%fp+nk3],%l3
 497 
 498         fsubd   %f0,%f2,%f0
 499         fmuld   %f6,pio2_2,%f4
 500         add     %l0,1,%l0
 501 
 502         fsubd   %f8,%f10,%f8
 503         fmuld   %f14,pio2_2,%f12
 504         add     %l1,1,%l1
 505 
 506         fsubd   %f16,%f18,%f16
 507         fmuld   %f22,pio2_2,%f20
 508         add     %l2,1,%l2
 509 
 510         fsubd   %f24,%f26,%f24
 511         fmuld   %f30,pio2_2,%f28
 512         add     %l3,1,%l3
 513 
 514         fsubd   %f0,%f4,%f32
 515 
 516         fsubd   %f8,%f12,%f34
 517 
 518         fsubd   %f16,%f20,%f36
 519 
 520         fsubd   %f24,%f28,%f38
 521 
 522         fsubd   %f0,%f32,%f0
 523         fcmple32 %f32,pio2_3,%l4        ! x <= pio2_3 iff x < 0
 524 
 525         fsubd   %f8,%f34,%f8
 526         fcmple32 %f34,pio2_3,%l5
 527 
 528         fsubd   %f16,%f36,%f16
 529         fcmple32 %f36,pio2_3,%l6
 530 
 531         fsubd   %f24,%f38,%f24
 532         fcmple32 %f38,pio2_3,%l7
 533 
 534         fsubd   %f0,%f4,%f0
 535         fmuld   %f6,pio2_3,%f6
 536         sll     %l4,30,%l4              ! if (x < 0) n = -n ^ 2
 537 
 538         fsubd   %f8,%f12,%f8
 539         fmuld   %f14,pio2_3,%f14
 540         sll     %l5,30,%l5
 541 
 542         fsubd   %f16,%f20,%f16
 543         fmuld   %f22,pio2_3,%f22
 544         sll     %l6,30,%l6
 545 
 546         fsubd   %f24,%f28,%f24
 547         fmuld   %f30,pio2_3,%f30
 548         sll     %l7,30,%l7
 549 
 550         fsubd   %f6,%f0,%f6
 551         sra     %l4,31,%l4
 552 
 553         fsubd   %f14,%f8,%f14
 554         sra     %l5,31,%l5
 555 
 556         fsubd   %f22,%f16,%f22
 557         sra     %l6,31,%l6
 558 
 559         fsubd   %f30,%f24,%f30
 560         sra     %l7,31,%l7
 561 
 562         fsubd   %f32,%f6,%f0            ! reduced x
 563         xor     %l0,%l4,%l0
 564 
 565         fsubd   %f34,%f14,%f8
 566         xor     %l1,%l5,%l1
 567 
 568         fsubd   %f36,%f22,%f16
 569         xor     %l2,%l6,%l2
 570 
 571         fsubd   %f38,%f30,%f24
 572         xor     %l3,%l7,%l3
 573 
 574         fabsd   %f0,%f2
 575         sub     %l0,%l4,%l0
 576 
 577         fabsd   %f8,%f10
 578         sub     %l1,%l5,%l1
 579 
 580         fabsd   %f16,%f18
 581         sub     %l2,%l6,%l2
 582 
 583         fabsd   %f24,%f26
 584         sub     %l3,%l7,%l3
 585 
 586         faddd   %f2,c3two44,%f4
 587         st      %f5,[%fp+nk0]
 588         and     %l4,2,%l4
 589 
 590         faddd   %f10,c3two44,%f12
 591         st      %f13,[%fp+nk1]
 592         and     %l5,2,%l5
 593 
 594         faddd   %f18,c3two44,%f20
 595         st      %f21,[%fp+nk2]
 596         and     %l6,2,%l6
 597 
 598         faddd   %f26,c3two44,%f28
 599         st      %f29,[%fp+nk3]
 600         and     %l7,2,%l7
 601 
 602         fsubd   %f32,%f0,%f4
 603         xor     %l0,%l4,%l0
 604 
 605         fsubd   %f34,%f8,%f12
 606         xor     %l1,%l5,%l1
 607 
 608         fsubd   %f36,%f16,%f20
 609         xor     %l2,%l6,%l2
 610 
 611         fsubd   %f38,%f24,%f28
 612         xor     %l3,%l7,%l3
 613 
 614         fzero   %f38
 615         ld      [%fp+nk0],%l4
 616 
 617         fsubd   %f4,%f6,%f6             ! w
 618         ld      [%fp+nk1],%l5
 619 
 620         fsubd   %f12,%f14,%f14
 621         ld      [%fp+nk2],%l6
 622 
 623         fnegd   %f38,%f38
 624         ld      [%fp+nk3],%l7
 625         sll     %l4,5,%l4               ! k
 626 
 627         fsubd   %f20,%f22,%f22
 628         sll     %l5,5,%l5
 629 
 630         fsubd   %f28,%f30,%f30
 631         sll     %l6,5,%l6
 632 
 633         fand    %f0,%f38,%f32           ! sign bit of x
 634         ldd     [%l4+%g1],%f4
 635         sll     %l7,5,%l7
 636 
 637         fand    %f8,%f38,%f34
 638         ldd     [%l5+%g1],%f12
 639 
 640         fand    %f16,%f38,%f36
 641         ldd     [%l6+%g1],%f20
 642 
 643         fand    %f24,%f38,%f38
 644         ldd     [%l7+%g1],%f28
 645 
 646         fsubd   %f2,%f4,%f2             ! x -= __vlibm_TBL_sincos2[k]
 647 
 648         fsubd   %f10,%f12,%f10
 649 
 650         fsubd   %f18,%f20,%f18
 651         nop
 652 
 653         fsubd   %f26,%f28,%f26
 654         nop
 655 
 656 ! 16-byte aligned
 657         fmuld   %f2,%f2,%f0             ! z = x * x
 658         andcc   %l0,1,%g0
 659         bz,pn   %icc,.case8
 660 ! delay slot
 661         fxor    %f6,%f32,%f32
 662 
 663         fmuld   %f10,%f10,%f8
 664         andcc   %l1,1,%g0
 665         bz,pn   %icc,.case4
 666 ! delay slot
 667         fxor    %f14,%f34,%f34
 668 
 669         fmuld   %f18,%f18,%f16
 670         andcc   %l2,1,%g0
 671         bz,pn   %icc,.case2
 672 ! delay slot
 673         fxor    %f22,%f36,%f36
 674 
 675         fmuld   %f26,%f26,%f24
 676         andcc   %l3,1,%g0
 677         bz,pn   %icc,.case1
 678 ! delay slot
 679         fxor    %f30,%f38,%f38
 680 
 681 !.case0:
 682         fmuld   %f0,qq3,%f6             ! cos(x0)
 683 
 684         fmuld   %f8,qq3,%f14            ! cos(x1)
 685 
 686         fmuld   %f16,qq3,%f22           ! cos(x2)
 687 
 688         fmuld   %f24,qq3,%f30           ! cos(x3)
 689 
 690         faddd   %f6,qq2,%f6
 691         fmuld   %f0,pp2,%f4
 692 
 693         faddd   %f14,qq2,%f14
 694         fmuld   %f8,pp2,%f12
 695 
 696         faddd   %f22,qq2,%f22
 697         fmuld   %f16,pp2,%f20
 698 
 699         faddd   %f30,qq2,%f30
 700         fmuld   %f24,pp2,%f28
 701 
 702         fmuld   %f0,%f6,%f6
 703         faddd   %f4,pp1,%f4
 704 
 705         fmuld   %f8,%f14,%f14
 706         faddd   %f12,pp1,%f12
 707 
 708         fmuld   %f16,%f22,%f22
 709         faddd   %f20,pp1,%f20
 710 
 711         fmuld   %f24,%f30,%f30
 712         faddd   %f28,pp1,%f28
 713 
 714         faddd   %f6,qq1,%f6
 715         fmuld   %f0,%f4,%f4
 716         add     %l4,%g1,%l4
 717 
 718         faddd   %f14,qq1,%f14
 719         fmuld   %f8,%f12,%f12
 720         add     %l5,%g1,%l5
 721 
 722         faddd   %f22,qq1,%f22
 723         fmuld   %f16,%f20,%f20
 724         add     %l6,%g1,%l6
 725 
 726         faddd   %f30,qq1,%f30
 727         fmuld   %f24,%f28,%f28
 728         add     %l7,%g1,%l7
 729 
 730         fmuld   %f2,%f4,%f4
 731 
 732         fmuld   %f10,%f12,%f12
 733 
 734         fmuld   %f18,%f20,%f20
 735 
 736         fmuld   %f26,%f28,%f28
 737 
 738         fmuld   %f0,%f6,%f6
 739         faddd   %f4,%f32,%f4
 740         ldd     [%l4+16],%f0
 741 
 742         fmuld   %f8,%f14,%f14
 743         faddd   %f12,%f34,%f12
 744         ldd     [%l5+16],%f8
 745 
 746         fmuld   %f16,%f22,%f22
 747         faddd   %f20,%f36,%f20
 748         ldd     [%l6+16],%f16
 749 
 750         fmuld   %f24,%f30,%f30
 751         faddd   %f28,%f38,%f28
 752         ldd     [%l7+16],%f24
 753 
 754         fmuld   %f0,%f6,%f6
 755         faddd   %f4,%f2,%f4
 756         ldd     [%l4+8],%f32
 757 
 758         fmuld   %f8,%f14,%f14
 759         faddd   %f12,%f10,%f12
 760         ldd     [%l5+8],%f34
 761 
 762         fmuld   %f16,%f22,%f22
 763         faddd   %f20,%f18,%f20
 764         ldd     [%l6+8],%f36
 765 
 766         fmuld   %f24,%f30,%f30
 767         faddd   %f28,%f26,%f28
 768         ldd     [%l7+8],%f38
 769 
 770         fmuld   %f32,%f4,%f4
 771 
 772         fmuld   %f34,%f12,%f12
 773 
 774         fmuld   %f36,%f20,%f20
 775 
 776         fmuld   %f38,%f28,%f28
 777 
 778         fsubd   %f6,%f4,%f6
 779 
 780         fsubd   %f14,%f12,%f14
 781 
 782         fsubd   %f22,%f20,%f22
 783 
 784         fsubd   %f30,%f28,%f30
 785 
 786         faddd   %f6,%f0,%f6
 787 
 788         faddd   %f14,%f8,%f14
 789 
 790         faddd   %f22,%f16,%f22
 791 
 792         faddd   %f30,%f24,%f30
 793         mov     %l0,%l4
 794 
 795         fnegd   %f6,%f4
 796         lda     [%i1]%asi,%l0           ! preload next argument
 797 
 798         fnegd   %f14,%f12
 799         lda     [%i1]%asi,%f0
 800 
 801         fnegd   %f22,%f20
 802         lda     [%i1+4]%asi,%f3
 803 
 804         fnegd   %f30,%f28
 805         andn    %l0,%i5,%l0
 806         add     %i1,%i2,%i1
 807 
 808         andcc   %l4,2,%g0
 809         fmovdnz %icc,%f4,%f6
 810         st      %f6,[%o0]
 811 
 812         andcc   %l1,2,%g0
 813         fmovdnz %icc,%f12,%f14
 814         st      %f14,[%o1]
 815 
 816         andcc   %l2,2,%g0
 817         fmovdnz %icc,%f20,%f22
 818         st      %f22,[%o2]
 819 
 820         andcc   %l3,2,%g0
 821         fmovdnz %icc,%f28,%f30
 822         st      %f30,[%o3]
 823 
 824         addcc   %i0,-1,%i0
 825         bg,pt   %icc,.loop0
 826 ! delay slot
 827         st      %f7,[%o0+4]
 828 
 829         ba,pt   %icc,.end
 830 ! delay slot
 831         nop
 832 
 833         .align  16
 834 .case1:
 835         fmuld   %f24,pp3,%f30           ! sin(x3)
 836 
 837         fmuld   %f0,qq3,%f6             ! cos(x0)
 838 
 839         fmuld   %f8,qq3,%f14            ! cos(x1)
 840 
 841         fmuld   %f16,qq3,%f22           ! cos(x2)
 842 
 843         faddd   %f30,pp2,%f30
 844         fmuld   %f24,qq2,%f28
 845 
 846         faddd   %f6,qq2,%f6
 847         fmuld   %f0,pp2,%f4
 848 
 849         faddd   %f14,qq2,%f14
 850         fmuld   %f8,pp2,%f12
 851 
 852         faddd   %f22,qq2,%f22
 853         fmuld   %f16,pp2,%f20
 854 
 855         fmuld   %f24,%f30,%f30
 856         faddd   %f28,qq1,%f28
 857 
 858         fmuld   %f0,%f6,%f6
 859         faddd   %f4,pp1,%f4
 860 
 861         fmuld   %f8,%f14,%f14
 862         faddd   %f12,pp1,%f12
 863 
 864         fmuld   %f16,%f22,%f22
 865         faddd   %f20,pp1,%f20
 866 
 867         faddd   %f30,pp1,%f30
 868         fmuld   %f24,%f28,%f28
 869         add     %l7,%g1,%l7
 870 
 871         faddd   %f6,qq1,%f6
 872         fmuld   %f0,%f4,%f4
 873         add     %l4,%g1,%l4
 874 
 875         faddd   %f14,qq1,%f14
 876         fmuld   %f8,%f12,%f12
 877         add     %l5,%g1,%l5
 878 
 879         faddd   %f22,qq1,%f22
 880         fmuld   %f16,%f20,%f20
 881         add     %l6,%g1,%l6
 882 
 883         fmuld   %f24,%f30,%f30
 884 
 885         fmuld   %f2,%f4,%f4
 886 
 887         fmuld   %f10,%f12,%f12
 888 
 889         fmuld   %f18,%f20,%f20
 890 
 891         fmuld   %f26,%f30,%f30
 892         ldd     [%l7+8],%f24
 893 
 894         fmuld   %f0,%f6,%f6
 895         faddd   %f4,%f32,%f4
 896         ldd     [%l4+16],%f0
 897 
 898         fmuld   %f8,%f14,%f14
 899         faddd   %f12,%f34,%f12
 900         ldd     [%l5+16],%f8
 901 
 902         fmuld   %f16,%f22,%f22
 903         faddd   %f20,%f36,%f20
 904         ldd     [%l6+16],%f16
 905 
 906         fmuld   %f24,%f28,%f28
 907         faddd   %f38,%f30,%f30
 908 
 909         fmuld   %f0,%f6,%f6
 910         faddd   %f4,%f2,%f4
 911         ldd     [%l4+8],%f32
 912 
 913         fmuld   %f8,%f14,%f14
 914         faddd   %f12,%f10,%f12
 915         ldd     [%l5+8],%f34
 916 
 917         fmuld   %f16,%f22,%f22
 918         faddd   %f20,%f18,%f20
 919         ldd     [%l6+8],%f36
 920 
 921         faddd   %f26,%f30,%f30
 922         ldd     [%l7+16],%f38
 923 
 924         fmuld   %f32,%f4,%f4
 925 
 926         fmuld   %f34,%f12,%f12
 927 
 928         fmuld   %f36,%f20,%f20
 929 
 930         fmuld   %f38,%f30,%f30
 931 
 932         fsubd   %f6,%f4,%f6
 933 
 934         fsubd   %f14,%f12,%f14
 935 
 936         fsubd   %f22,%f20,%f22
 937 
 938         faddd   %f30,%f28,%f30
 939 
 940         faddd   %f6,%f0,%f6
 941 
 942         faddd   %f14,%f8,%f14
 943 
 944         faddd   %f22,%f16,%f22
 945 
 946         faddd   %f30,%f24,%f30
 947         mov     %l0,%l4
 948 
 949         fnegd   %f6,%f4
 950         lda     [%i1]%asi,%l0           ! preload next argument
 951 
 952         fnegd   %f14,%f12
 953         lda     [%i1]%asi,%f0
 954 
 955         fnegd   %f22,%f20
 956         lda     [%i1+4]%asi,%f3
 957 
 958         fnegd   %f30,%f28
 959         andn    %l0,%i5,%l0
 960         add     %i1,%i2,%i1
 961 
 962         andcc   %l4,2,%g0
 963         fmovdnz %icc,%f4,%f6
 964         st      %f6,[%o0]
 965 
 966         andcc   %l1,2,%g0
 967         fmovdnz %icc,%f12,%f14
 968         st      %f14,[%o1]
 969 
 970         andcc   %l2,2,%g0
 971         fmovdnz %icc,%f20,%f22
 972         st      %f22,[%o2]
 973 
 974         andcc   %l3,2,%g0
 975         fmovdnz %icc,%f28,%f30
 976         st      %f30,[%o3]
 977 
 978         addcc   %i0,-1,%i0
 979         bg,pt   %icc,.loop0
 980 ! delay slot
 981         st      %f7,[%o0+4]
 982 
 983         ba,pt   %icc,.end
 984 ! delay slot
 985         nop
 986 
 987         .align  16
 988 .case2:
 989         fmuld   %f26,%f26,%f24
 990         andcc   %l3,1,%g0
 991         bz,pn   %icc,.case3
 992 ! delay slot
 993         fxor    %f30,%f38,%f38
 994 
 995         fmuld   %f16,pp3,%f22           ! sin(x2)
 996 
 997         fmuld   %f0,qq3,%f6             ! cos(x0)
 998 
 999         fmuld   %f8,qq3,%f14            ! cos(x1)
1000 
1001         faddd   %f22,pp2,%f22
1002         fmuld   %f16,qq2,%f20
1003 
1004         fmuld   %f24,qq3,%f30           ! cos(x3)
1005 
1006         faddd   %f6,qq2,%f6
1007         fmuld   %f0,pp2,%f4
1008 
1009         faddd   %f14,qq2,%f14
1010         fmuld   %f8,pp2,%f12
1011 
1012         fmuld   %f16,%f22,%f22
1013         faddd   %f20,qq1,%f20
1014 
1015         faddd   %f30,qq2,%f30
1016         fmuld   %f24,pp2,%f28
1017 
1018         fmuld   %f0,%f6,%f6
1019         faddd   %f4,pp1,%f4
1020 
1021         fmuld   %f8,%f14,%f14
1022         faddd   %f12,pp1,%f12
1023 
1024         faddd   %f22,pp1,%f22
1025         fmuld   %f16,%f20,%f20
1026         add     %l6,%g1,%l6
1027 
1028         fmuld   %f24,%f30,%f30
1029         faddd   %f28,pp1,%f28
1030 
1031         faddd   %f6,qq1,%f6
1032         fmuld   %f0,%f4,%f4
1033         add     %l4,%g1,%l4
1034 
1035         faddd   %f14,qq1,%f14
1036         fmuld   %f8,%f12,%f12
1037         add     %l5,%g1,%l5
1038 
1039         fmuld   %f16,%f22,%f22
1040 
1041         faddd   %f30,qq1,%f30
1042         fmuld   %f24,%f28,%f28
1043         add     %l7,%g1,%l7
1044 
1045         fmuld   %f2,%f4,%f4
1046 
1047         fmuld   %f10,%f12,%f12
1048 
1049         fmuld   %f18,%f22,%f22
1050         ldd     [%l6+8],%f16
1051 
1052         fmuld   %f26,%f28,%f28
1053 
1054         fmuld   %f0,%f6,%f6
1055         faddd   %f4,%f32,%f4
1056         ldd     [%l4+16],%f0
1057 
1058         fmuld   %f8,%f14,%f14
1059         faddd   %f12,%f34,%f12
1060         ldd     [%l5+16],%f8
1061 
1062         fmuld   %f16,%f20,%f20
1063         faddd   %f36,%f22,%f22
1064 
1065         fmuld   %f24,%f30,%f30
1066         faddd   %f28,%f38,%f28
1067         ldd     [%l7+16],%f24
1068 
1069         fmuld   %f0,%f6,%f6
1070         faddd   %f4,%f2,%f4
1071         ldd     [%l4+8],%f32
1072 
1073         fmuld   %f8,%f14,%f14
1074         faddd   %f12,%f10,%f12
1075         ldd     [%l5+8],%f34
1076 
1077         faddd   %f18,%f22,%f22
1078         ldd     [%l6+16],%f36
1079 
1080         fmuld   %f24,%f30,%f30
1081         faddd   %f28,%f26,%f28
1082         ldd     [%l7+8],%f38
1083 
1084         fmuld   %f32,%f4,%f4
1085 
1086         fmuld   %f34,%f12,%f12
1087 
1088         fmuld   %f36,%f22,%f22
1089 
1090         fmuld   %f38,%f28,%f28
1091 
1092         fsubd   %f6,%f4,%f6
1093 
1094         fsubd   %f14,%f12,%f14
1095 
1096         faddd   %f22,%f20,%f22
1097 
1098         fsubd   %f30,%f28,%f30
1099 
1100         faddd   %f6,%f0,%f6
1101 
1102         faddd   %f14,%f8,%f14
1103 
1104         faddd   %f22,%f16,%f22
1105 
1106         faddd   %f30,%f24,%f30
1107         mov     %l0,%l4
1108 
1109         fnegd   %f6,%f4
1110         lda     [%i1]%asi,%l0           ! preload next argument
1111 
1112         fnegd   %f14,%f12
1113         lda     [%i1]%asi,%f0
1114 
1115         fnegd   %f22,%f20
1116         lda     [%i1+4]%asi,%f3
1117 
1118         fnegd   %f30,%f28
1119         andn    %l0,%i5,%l0
1120         add     %i1,%i2,%i1
1121 
1122         andcc   %l4,2,%g0
1123         fmovdnz %icc,%f4,%f6
1124         st      %f6,[%o0]
1125 
1126         andcc   %l1,2,%g0
1127         fmovdnz %icc,%f12,%f14
1128         st      %f14,[%o1]
1129 
1130         andcc   %l2,2,%g0
1131         fmovdnz %icc,%f20,%f22
1132         st      %f22,[%o2]
1133 
1134         andcc   %l3,2,%g0
1135         fmovdnz %icc,%f28,%f30
1136         st      %f30,[%o3]
1137 
1138         addcc   %i0,-1,%i0
1139         bg,pt   %icc,.loop0
1140 ! delay slot
1141         st      %f7,[%o0+4]
1142 
1143         ba,pt   %icc,.end
1144 ! delay slot
1145         nop
1146 
1147         .align  16
1148 .case3:
1149         fmuld   %f16,pp3,%f22           ! sin(x2)
1150 
1151         fmuld   %f24,pp3,%f30           ! sin(x3)
1152 
1153         fmuld   %f0,qq3,%f6             ! cos(x0)
1154 
1155         fmuld   %f8,qq3,%f14            ! cos(x1)
1156 
1157         faddd   %f22,pp2,%f22
1158         fmuld   %f16,qq2,%f20
1159 
1160         faddd   %f30,pp2,%f30
1161         fmuld   %f24,qq2,%f28
1162 
1163         faddd   %f6,qq2,%f6
1164         fmuld   %f0,pp2,%f4
1165 
1166         faddd   %f14,qq2,%f14
1167         fmuld   %f8,pp2,%f12
1168 
1169         fmuld   %f16,%f22,%f22
1170         faddd   %f20,qq1,%f20
1171 
1172         fmuld   %f24,%f30,%f30
1173         faddd   %f28,qq1,%f28
1174 
1175         fmuld   %f0,%f6,%f6
1176         faddd   %f4,pp1,%f4
1177 
1178         fmuld   %f8,%f14,%f14
1179         faddd   %f12,pp1,%f12
1180 
1181         faddd   %f22,pp1,%f22
1182         fmuld   %f16,%f20,%f20
1183         add     %l6,%g1,%l6
1184 
1185         faddd   %f30,pp1,%f30
1186         fmuld   %f24,%f28,%f28
1187         add     %l7,%g1,%l7
1188 
1189         faddd   %f6,qq1,%f6
1190         fmuld   %f0,%f4,%f4
1191         add     %l4,%g1,%l4
1192 
1193         faddd   %f14,qq1,%f14
1194         fmuld   %f8,%f12,%f12
1195         add     %l5,%g1,%l5
1196 
1197         fmuld   %f16,%f22,%f22
1198 
1199         fmuld   %f24,%f30,%f30
1200 
1201         fmuld   %f2,%f4,%f4
1202 
1203         fmuld   %f10,%f12,%f12
1204 
1205         fmuld   %f18,%f22,%f22
1206         ldd     [%l6+8],%f16
1207 
1208         fmuld   %f26,%f30,%f30
1209         ldd     [%l7+8],%f24
1210 
1211         fmuld   %f0,%f6,%f6
1212         faddd   %f4,%f32,%f4
1213         ldd     [%l4+16],%f0
1214 
1215         fmuld   %f8,%f14,%f14
1216         faddd   %f12,%f34,%f12
1217         ldd     [%l5+16],%f8
1218 
1219         fmuld   %f16,%f20,%f20
1220         faddd   %f36,%f22,%f22
1221 
1222         fmuld   %f24,%f28,%f28
1223         faddd   %f38,%f30,%f30
1224 
1225         fmuld   %f0,%f6,%f6
1226         faddd   %f4,%f2,%f4
1227         ldd     [%l4+8],%f32
1228 
1229         fmuld   %f8,%f14,%f14
1230         faddd   %f12,%f10,%f12
1231         ldd     [%l5+8],%f34
1232 
1233         faddd   %f18,%f22,%f22
1234         ldd     [%l6+16],%f36
1235 
1236         faddd   %f26,%f30,%f30
1237         ldd     [%l7+16],%f38
1238 
1239         fmuld   %f32,%f4,%f4
1240 
1241         fmuld   %f34,%f12,%f12
1242 
1243         fmuld   %f36,%f22,%f22
1244 
1245         fmuld   %f38,%f30,%f30
1246 
1247         fsubd   %f6,%f4,%f6
1248 
1249         fsubd   %f14,%f12,%f14
1250 
1251         faddd   %f22,%f20,%f22
1252 
1253         faddd   %f30,%f28,%f30
1254 
1255         faddd   %f6,%f0,%f6
1256 
1257         faddd   %f14,%f8,%f14
1258 
1259         faddd   %f22,%f16,%f22
1260 
1261         faddd   %f30,%f24,%f30
1262         mov     %l0,%l4
1263 
1264         fnegd   %f6,%f4
1265         lda     [%i1]%asi,%l0           ! preload next argument
1266 
1267         fnegd   %f14,%f12
1268         lda     [%i1]%asi,%f0
1269 
1270         fnegd   %f22,%f20
1271         lda     [%i1+4]%asi,%f3
1272 
1273         fnegd   %f30,%f28
1274         andn    %l0,%i5,%l0
1275         add     %i1,%i2,%i1
1276 
1277         andcc   %l4,2,%g0
1278         fmovdnz %icc,%f4,%f6
1279         st      %f6,[%o0]
1280 
1281         andcc   %l1,2,%g0
1282         fmovdnz %icc,%f12,%f14
1283         st      %f14,[%o1]
1284 
1285         andcc   %l2,2,%g0
1286         fmovdnz %icc,%f20,%f22
1287         st      %f22,[%o2]
1288 
1289         andcc   %l3,2,%g0
1290         fmovdnz %icc,%f28,%f30
1291         st      %f30,[%o3]
1292 
1293         addcc   %i0,-1,%i0
1294         bg,pt   %icc,.loop0
1295 ! delay slot
1296         st      %f7,[%o0+4]
1297 
1298         ba,pt   %icc,.end
1299 ! delay slot
1300         nop
1301 
1302         .align  16
1303 .case4:
1304         fmuld   %f18,%f18,%f16
1305         andcc   %l2,1,%g0
1306         bz,pn   %icc,.case6
1307 ! delay slot
1308         fxor    %f22,%f36,%f36
1309 
1310         fmuld   %f26,%f26,%f24
1311         andcc   %l3,1,%g0
1312         bz,pn   %icc,.case5
1313 ! delay slot
1314         fxor    %f30,%f38,%f38
1315 
1316         fmuld   %f8,pp3,%f14            ! sin(x1)
1317 
1318         fmuld   %f0,qq3,%f6             ! cos(x0)
1319 
1320         faddd   %f14,pp2,%f14
1321         fmuld   %f8,qq2,%f12
1322 
1323         fmuld   %f16,qq3,%f22           ! cos(x2)
1324 
1325         fmuld   %f24,qq3,%f30           ! cos(x3)
1326 
1327         faddd   %f6,qq2,%f6
1328         fmuld   %f0,pp2,%f4
1329 
1330         fmuld   %f8,%f14,%f14
1331         faddd   %f12,qq1,%f12
1332 
1333         faddd   %f22,qq2,%f22
1334         fmuld   %f16,pp2,%f20
1335 
1336         faddd   %f30,qq2,%f30
1337         fmuld   %f24,pp2,%f28
1338 
1339         fmuld   %f0,%f6,%f6
1340         faddd   %f4,pp1,%f4
1341 
1342         faddd   %f14,pp1,%f14
1343         fmuld   %f8,%f12,%f12
1344         add     %l5,%g1,%l5
1345 
1346         fmuld   %f16,%f22,%f22
1347         faddd   %f20,pp1,%f20
1348 
1349         fmuld   %f24,%f30,%f30
1350         faddd   %f28,pp1,%f28
1351 
1352         faddd   %f6,qq1,%f6
1353         fmuld   %f0,%f4,%f4
1354         add     %l4,%g1,%l4
1355 
1356         fmuld   %f8,%f14,%f14
1357 
1358         faddd   %f22,qq1,%f22
1359         fmuld   %f16,%f20,%f20
1360         add     %l6,%g1,%l6
1361 
1362         faddd   %f30,qq1,%f30
1363         fmuld   %f24,%f28,%f28
1364         add     %l7,%g1,%l7
1365 
1366         fmuld   %f2,%f4,%f4
1367 
1368         fmuld   %f10,%f14,%f14
1369         ldd     [%l5+8],%f8
1370 
1371         fmuld   %f18,%f20,%f20
1372 
1373         fmuld   %f26,%f28,%f28
1374 
1375         fmuld   %f0,%f6,%f6
1376         faddd   %f4,%f32,%f4
1377         ldd     [%l4+16],%f0
1378 
1379         fmuld   %f8,%f12,%f12
1380         faddd   %f34,%f14,%f14
1381 
1382         fmuld   %f16,%f22,%f22
1383         faddd   %f20,%f36,%f20
1384         ldd     [%l6+16],%f16
1385 
1386         fmuld   %f24,%f30,%f30
1387         faddd   %f28,%f38,%f28
1388         ldd     [%l7+16],%f24
1389 
1390         fmuld   %f0,%f6,%f6
1391         faddd   %f4,%f2,%f4
1392         ldd     [%l4+8],%f32
1393 
1394         faddd   %f10,%f14,%f14
1395         ldd     [%l5+16],%f34
1396 
1397         fmuld   %f16,%f22,%f22
1398         faddd   %f20,%f18,%f20
1399         ldd     [%l6+8],%f36
1400 
1401         fmuld   %f24,%f30,%f30
1402         faddd   %f28,%f26,%f28
1403         ldd     [%l7+8],%f38
1404 
1405         fmuld   %f32,%f4,%f4
1406 
1407         fmuld   %f34,%f14,%f14
1408 
1409         fmuld   %f36,%f20,%f20
1410 
1411         fmuld   %f38,%f28,%f28
1412 
1413         fsubd   %f6,%f4,%f6
1414 
1415         faddd   %f14,%f12,%f14
1416 
1417         fsubd   %f22,%f20,%f22
1418 
1419         fsubd   %f30,%f28,%f30
1420 
1421         faddd   %f6,%f0,%f6
1422 
1423         faddd   %f14,%f8,%f14
1424 
1425         faddd   %f22,%f16,%f22
1426 
1427         faddd   %f30,%f24,%f30
1428         mov     %l0,%l4
1429 
1430         fnegd   %f6,%f4
1431         lda     [%i1]%asi,%l0           ! preload next argument
1432 
1433         fnegd   %f14,%f12
1434         lda     [%i1]%asi,%f0
1435 
1436         fnegd   %f22,%f20
1437         lda     [%i1+4]%asi,%f3
1438 
1439         fnegd   %f30,%f28
1440         andn    %l0,%i5,%l0
1441         add     %i1,%i2,%i1
1442 
1443         andcc   %l4,2,%g0
1444         fmovdnz %icc,%f4,%f6
1445         st      %f6,[%o0]
1446 
1447         andcc   %l1,2,%g0
1448         fmovdnz %icc,%f12,%f14
1449         st      %f14,[%o1]
1450 
1451         andcc   %l2,2,%g0
1452         fmovdnz %icc,%f20,%f22
1453         st      %f22,[%o2]
1454 
1455         andcc   %l3,2,%g0
1456         fmovdnz %icc,%f28,%f30
1457         st      %f30,[%o3]
1458 
1459         addcc   %i0,-1,%i0
1460         bg,pt   %icc,.loop0
1461 ! delay slot
1462         st      %f7,[%o0+4]
1463 
1464         ba,pt   %icc,.end
1465 ! delay slot
1466         nop
1467 
1468         .align  16
1469 .case5:
1470         fmuld   %f8,pp3,%f14            ! sin(x1)
1471 
1472         fmuld   %f24,pp3,%f30           ! sin(x3)
1473 
1474         fmuld   %f0,qq3,%f6             ! cos(x0)
1475 
1476         faddd   %f14,pp2,%f14
1477         fmuld   %f8,qq2,%f12
1478 
1479         fmuld   %f16,qq3,%f22           ! cos(x2)
1480 
1481         faddd   %f30,pp2,%f30
1482         fmuld   %f24,qq2,%f28
1483 
1484         faddd   %f6,qq2,%f6
1485         fmuld   %f0,pp2,%f4
1486 
1487         fmuld   %f8,%f14,%f14
1488         faddd   %f12,qq1,%f12
1489 
1490         faddd   %f22,qq2,%f22
1491         fmuld   %f16,pp2,%f20
1492 
1493         fmuld   %f24,%f30,%f30
1494         faddd   %f28,qq1,%f28
1495 
1496         fmuld   %f0,%f6,%f6
1497         faddd   %f4,pp1,%f4
1498 
1499         faddd   %f14,pp1,%f14
1500         fmuld   %f8,%f12,%f12
1501         add     %l5,%g1,%l5
1502 
1503         fmuld   %f16,%f22,%f22
1504         faddd   %f20,pp1,%f20
1505 
1506         faddd   %f30,pp1,%f30
1507         fmuld   %f24,%f28,%f28
1508         add     %l7,%g1,%l7
1509 
1510         faddd   %f6,qq1,%f6
1511         fmuld   %f0,%f4,%f4
1512         add     %l4,%g1,%l4
1513 
1514         fmuld   %f8,%f14,%f14
1515 
1516         faddd   %f22,qq1,%f22
1517         fmuld   %f16,%f20,%f20
1518         add     %l6,%g1,%l6
1519 
1520         fmuld   %f24,%f30,%f30
1521 
1522         fmuld   %f2,%f4,%f4
1523 
1524         fmuld   %f10,%f14,%f14
1525         ldd     [%l5+8],%f8
1526 
1527         fmuld   %f18,%f20,%f20
1528 
1529         fmuld   %f26,%f30,%f30
1530         ldd     [%l7+8],%f24
1531 
1532         fmuld   %f0,%f6,%f6
1533         faddd   %f4,%f32,%f4
1534         ldd     [%l4+16],%f0
1535 
1536         fmuld   %f8,%f12,%f12
1537         faddd   %f34,%f14,%f14
1538 
1539         fmuld   %f16,%f22,%f22
1540         faddd   %f20,%f36,%f20
1541         ldd     [%l6+16],%f16
1542 
1543         fmuld   %f24,%f28,%f28
1544         faddd   %f38,%f30,%f30
1545 
1546         fmuld   %f0,%f6,%f6
1547         faddd   %f4,%f2,%f4
1548         ldd     [%l4+8],%f32
1549 
1550         faddd   %f10,%f14,%f14
1551         ldd     [%l5+16],%f34
1552 
1553         fmuld   %f16,%f22,%f22
1554         faddd   %f20,%f18,%f20
1555         ldd     [%l6+8],%f36
1556 
1557         faddd   %f26,%f30,%f30
1558         ldd     [%l7+16],%f38
1559 
1560         fmuld   %f32,%f4,%f4
1561 
1562         fmuld   %f34,%f14,%f14
1563 
1564         fmuld   %f36,%f20,%f20
1565 
1566         fmuld   %f38,%f30,%f30
1567 
1568         fsubd   %f6,%f4,%f6
1569 
1570         faddd   %f14,%f12,%f14
1571 
1572         fsubd   %f22,%f20,%f22
1573 
1574         faddd   %f30,%f28,%f30
1575 
1576         faddd   %f6,%f0,%f6
1577 
1578         faddd   %f14,%f8,%f14
1579 
1580         faddd   %f22,%f16,%f22
1581 
1582         faddd   %f30,%f24,%f30
1583         mov     %l0,%l4
1584 
1585         fnegd   %f6,%f4
1586         lda     [%i1]%asi,%l0           ! preload next argument
1587 
1588         fnegd   %f14,%f12
1589         lda     [%i1]%asi,%f0
1590 
1591         fnegd   %f22,%f20
1592         lda     [%i1+4]%asi,%f3
1593 
1594         fnegd   %f30,%f28
1595         andn    %l0,%i5,%l0
1596         add     %i1,%i2,%i1
1597 
1598         andcc   %l4,2,%g0
1599         fmovdnz %icc,%f4,%f6
1600         st      %f6,[%o0]
1601 
1602         andcc   %l1,2,%g0
1603         fmovdnz %icc,%f12,%f14
1604         st      %f14,[%o1]
1605 
1606         andcc   %l2,2,%g0
1607         fmovdnz %icc,%f20,%f22
1608         st      %f22,[%o2]
1609 
1610         andcc   %l3,2,%g0
1611         fmovdnz %icc,%f28,%f30
1612         st      %f30,[%o3]
1613 
1614         addcc   %i0,-1,%i0
1615         bg,pt   %icc,.loop0
1616 ! delay slot
1617         st      %f7,[%o0+4]
1618 
1619         ba,pt   %icc,.end
1620 ! delay slot
1621         nop
1622 
1623         .align  16
1624 .case6:
1625         fmuld   %f26,%f26,%f24
1626         andcc   %l3,1,%g0
1627         bz,pn   %icc,.case7
1628 ! delay slot
1629         fxor    %f30,%f38,%f38
1630 
1631         fmuld   %f8,pp3,%f14            ! sin(x1)
1632 
1633         fmuld   %f16,pp3,%f22           ! sin(x2)
1634 
1635         fmuld   %f0,qq3,%f6             ! cos(x0)
1636 
1637         faddd   %f14,pp2,%f14
1638         fmuld   %f8,qq2,%f12
1639 
1640         faddd   %f22,pp2,%f22
1641         fmuld   %f16,qq2,%f20
1642 
1643         fmuld   %f24,qq3,%f30           ! cos(x3)
1644 
1645         faddd   %f6,qq2,%f6
1646         fmuld   %f0,pp2,%f4
1647 
1648         fmuld   %f8,%f14,%f14
1649         faddd   %f12,qq1,%f12
1650 
1651         fmuld   %f16,%f22,%f22
1652         faddd   %f20,qq1,%f20
1653 
1654         faddd   %f30,qq2,%f30
1655         fmuld   %f24,pp2,%f28
1656 
1657         fmuld   %f0,%f6,%f6
1658         faddd   %f4,pp1,%f4
1659 
1660         faddd   %f14,pp1,%f14
1661         fmuld   %f8,%f12,%f12
1662         add     %l5,%g1,%l5
1663 
1664         faddd   %f22,pp1,%f22
1665         fmuld   %f16,%f20,%f20
1666         add     %l6,%g1,%l6
1667 
1668         fmuld   %f24,%f30,%f30
1669         faddd   %f28,pp1,%f28
1670 
1671         faddd   %f6,qq1,%f6
1672         fmuld   %f0,%f4,%f4
1673         add     %l4,%g1,%l4
1674 
1675         fmuld   %f8,%f14,%f14
1676 
1677         fmuld   %f16,%f22,%f22
1678 
1679         faddd   %f30,qq1,%f30
1680         fmuld   %f24,%f28,%f28
1681         add     %l7,%g1,%l7
1682 
1683         fmuld   %f2,%f4,%f4
1684 
1685         fmuld   %f10,%f14,%f14
1686         ldd     [%l5+8],%f8
1687 
1688         fmuld   %f18,%f22,%f22
1689         ldd     [%l6+8],%f16
1690 
1691         fmuld   %f26,%f28,%f28
1692 
1693         fmuld   %f0,%f6,%f6
1694         faddd   %f4,%f32,%f4
1695         ldd     [%l4+16],%f0
1696 
1697         fmuld   %f8,%f12,%f12
1698         faddd   %f34,%f14,%f14
1699 
1700         fmuld   %f16,%f20,%f20
1701         faddd   %f36,%f22,%f22
1702 
1703         fmuld   %f24,%f30,%f30
1704         faddd   %f28,%f38,%f28
1705         ldd     [%l7+16],%f24
1706 
1707         fmuld   %f0,%f6,%f6
1708         faddd   %f4,%f2,%f4
1709         ldd     [%l4+8],%f32
1710 
1711         faddd   %f10,%f14,%f14
1712         ldd     [%l5+16],%f34
1713 
1714         faddd   %f18,%f22,%f22
1715         ldd     [%l6+16],%f36
1716 
1717         fmuld   %f24,%f30,%f30
1718         faddd   %f28,%f26,%f28
1719         ldd     [%l7+8],%f38
1720 
1721         fmuld   %f32,%f4,%f4
1722 
1723         fmuld   %f34,%f14,%f14
1724 
1725         fmuld   %f36,%f22,%f22
1726 
1727         fmuld   %f38,%f28,%f28
1728 
1729         fsubd   %f6,%f4,%f6
1730 
1731         faddd   %f14,%f12,%f14
1732 
1733         faddd   %f22,%f20,%f22
1734 
1735         fsubd   %f30,%f28,%f30
1736 
1737         faddd   %f6,%f0,%f6
1738 
1739         faddd   %f14,%f8,%f14
1740 
1741         faddd   %f22,%f16,%f22
1742 
1743         faddd   %f30,%f24,%f30
1744         mov     %l0,%l4
1745 
1746         fnegd   %f6,%f4
1747         lda     [%i1]%asi,%l0           ! preload next argument
1748 
1749         fnegd   %f14,%f12
1750         lda     [%i1]%asi,%f0
1751 
1752         fnegd   %f22,%f20
1753         lda     [%i1+4]%asi,%f3
1754 
1755         fnegd   %f30,%f28
1756         andn    %l0,%i5,%l0
1757         add     %i1,%i2,%i1
1758 
1759         andcc   %l4,2,%g0
1760         fmovdnz %icc,%f4,%f6
1761         st      %f6,[%o0]
1762 
1763         andcc   %l1,2,%g0
1764         fmovdnz %icc,%f12,%f14
1765         st      %f14,[%o1]
1766 
1767         andcc   %l2,2,%g0
1768         fmovdnz %icc,%f20,%f22
1769         st      %f22,[%o2]
1770 
1771         andcc   %l3,2,%g0
1772         fmovdnz %icc,%f28,%f30
1773         st      %f30,[%o3]
1774 
1775         addcc   %i0,-1,%i0
1776         bg,pt   %icc,.loop0
1777 ! delay slot
1778         st      %f7,[%o0+4]
1779 
1780         ba,pt   %icc,.end
1781 ! delay slot
1782         nop
1783 
1784         .align  16
1785 .case7:
1786         fmuld   %f8,pp3,%f14            ! sin(x1)
1787 
1788         fmuld   %f16,pp3,%f22           ! sin(x2)
1789 
1790         fmuld   %f24,pp3,%f30           ! sin(x3)
1791 
1792         fmuld   %f0,qq3,%f6             ! cos(x0)
1793 
1794         faddd   %f14,pp2,%f14
1795         fmuld   %f8,qq2,%f12
1796 
1797         faddd   %f22,pp2,%f22
1798         fmuld   %f16,qq2,%f20
1799 
1800         faddd   %f30,pp2,%f30
1801         fmuld   %f24,qq2,%f28
1802 
1803         faddd   %f6,qq2,%f6
1804         fmuld   %f0,pp2,%f4
1805 
1806         fmuld   %f8,%f14,%f14
1807         faddd   %f12,qq1,%f12
1808 
1809         fmuld   %f16,%f22,%f22
1810         faddd   %f20,qq1,%f20
1811 
1812         fmuld   %f24,%f30,%f30
1813         faddd   %f28,qq1,%f28
1814 
1815         fmuld   %f0,%f6,%f6
1816         faddd   %f4,pp1,%f4
1817 
1818         faddd   %f14,pp1,%f14
1819         fmuld   %f8,%f12,%f12
1820         add     %l5,%g1,%l5
1821 
1822         faddd   %f22,pp1,%f22
1823         fmuld   %f16,%f20,%f20
1824         add     %l6,%g1,%l6
1825 
1826         faddd   %f30,pp1,%f30
1827         fmuld   %f24,%f28,%f28
1828         add     %l7,%g1,%l7
1829 
1830         faddd   %f6,qq1,%f6
1831         fmuld   %f0,%f4,%f4
1832         add     %l4,%g1,%l4
1833 
1834         fmuld   %f8,%f14,%f14
1835 
1836         fmuld   %f16,%f22,%f22
1837 
1838         fmuld   %f24,%f30,%f30
1839 
1840         fmuld   %f2,%f4,%f4
1841 
1842         fmuld   %f10,%f14,%f14
1843         ldd     [%l5+8],%f8
1844 
1845         fmuld   %f18,%f22,%f22
1846         ldd     [%l6+8],%f16
1847 
1848         fmuld   %f26,%f30,%f30
1849         ldd     [%l7+8],%f24
1850 
1851         fmuld   %f0,%f6,%f6
1852         faddd   %f4,%f32,%f4
1853         ldd     [%l4+16],%f0
1854 
1855         fmuld   %f8,%f12,%f12
1856         faddd   %f34,%f14,%f14
1857 
1858         fmuld   %f16,%f20,%f20
1859         faddd   %f36,%f22,%f22
1860 
1861         fmuld   %f24,%f28,%f28
1862         faddd   %f38,%f30,%f30
1863 
1864         fmuld   %f0,%f6,%f6
1865         faddd   %f4,%f2,%f4
1866         ldd     [%l4+8],%f32
1867 
1868         faddd   %f10,%f14,%f14
1869         ldd     [%l5+16],%f34
1870 
1871         faddd   %f18,%f22,%f22
1872         ldd     [%l6+16],%f36
1873 
1874         faddd   %f26,%f30,%f30
1875         ldd     [%l7+16],%f38
1876 
1877         fmuld   %f32,%f4,%f4
1878 
1879         fmuld   %f34,%f14,%f14
1880 
1881         fmuld   %f36,%f22,%f22
1882 
1883         fmuld   %f38,%f30,%f30
1884 
1885         fsubd   %f6,%f4,%f6
1886 
1887         faddd   %f14,%f12,%f14
1888 
1889         faddd   %f22,%f20,%f22
1890 
1891         faddd   %f30,%f28,%f30
1892 
1893         faddd   %f6,%f0,%f6
1894 
1895         faddd   %f14,%f8,%f14
1896 
1897         faddd   %f22,%f16,%f22
1898 
1899         faddd   %f30,%f24,%f30
1900         mov     %l0,%l4
1901 
1902         fnegd   %f6,%f4
1903         lda     [%i1]%asi,%l0           ! preload next argument
1904 
1905         fnegd   %f14,%f12
1906         lda     [%i1]%asi,%f0
1907 
1908         fnegd   %f22,%f20
1909         lda     [%i1+4]%asi,%f3
1910 
1911         fnegd   %f30,%f28
1912         andn    %l0,%i5,%l0
1913         add     %i1,%i2,%i1
1914 
1915         andcc   %l4,2,%g0
1916         fmovdnz %icc,%f4,%f6
1917         st      %f6,[%o0]
1918 
1919         andcc   %l1,2,%g0
1920         fmovdnz %icc,%f12,%f14
1921         st      %f14,[%o1]
1922 
1923         andcc   %l2,2,%g0
1924         fmovdnz %icc,%f20,%f22
1925         st      %f22,[%o2]
1926 
1927         andcc   %l3,2,%g0
1928         fmovdnz %icc,%f28,%f30
1929         st      %f30,[%o3]
1930 
1931         addcc   %i0,-1,%i0
1932         bg,pt   %icc,.loop0
1933 ! delay slot
1934         st      %f7,[%o0+4]
1935 
1936         ba,pt   %icc,.end
1937 ! delay slot
1938         nop
1939 
1940         .align  16
1941 .case8:
1942         fmuld   %f10,%f10,%f8
1943         andcc   %l1,1,%g0
1944         bz,pn   %icc,.case12
1945 ! delay slot
1946         fxor    %f14,%f34,%f34
1947 
1948         fmuld   %f18,%f18,%f16
1949         andcc   %l2,1,%g0
1950         bz,pn   %icc,.case10
1951 ! delay slot
1952         fxor    %f22,%f36,%f36
1953 
1954         fmuld   %f26,%f26,%f24
1955         andcc   %l3,1,%g0
1956         bz,pn   %icc,.case9
1957 ! delay slot
1958         fxor    %f30,%f38,%f38
1959 
1960         fmuld   %f0,pp3,%f6             ! sin(x0)
1961 
1962         faddd   %f6,pp2,%f6
1963         fmuld   %f0,qq2,%f4
1964 
1965         fmuld   %f8,qq3,%f14            ! cos(x1)
1966 
1967         fmuld   %f16,qq3,%f22           ! cos(x2)
1968 
1969         fmuld   %f24,qq3,%f30           ! cos(x3)
1970 
1971         fmuld   %f0,%f6,%f6
1972         faddd   %f4,qq1,%f4
1973 
1974         faddd   %f14,qq2,%f14
1975         fmuld   %f8,pp2,%f12
1976 
1977         faddd   %f22,qq2,%f22
1978         fmuld   %f16,pp2,%f20
1979 
1980         faddd   %f30,qq2,%f30
1981         fmuld   %f24,pp2,%f28
1982 
1983         faddd   %f6,pp1,%f6
1984         fmuld   %f0,%f4,%f4
1985         add     %l4,%g1,%l4
1986 
1987         fmuld   %f8,%f14,%f14
1988         faddd   %f12,pp1,%f12
1989 
1990         fmuld   %f16,%f22,%f22
1991         faddd   %f20,pp1,%f20
1992 
1993         fmuld   %f24,%f30,%f30
1994         faddd   %f28,pp1,%f28
1995 
1996         fmuld   %f0,%f6,%f6
1997 
1998         faddd   %f14,qq1,%f14
1999         fmuld   %f8,%f12,%f12
2000         add     %l5,%g1,%l5
2001 
2002         faddd   %f22,qq1,%f22
2003         fmuld   %f16,%f20,%f20
2004         add     %l6,%g1,%l6
2005 
2006         faddd   %f30,qq1,%f30
2007         fmuld   %f24,%f28,%f28
2008         add     %l7,%g1,%l7
2009 
2010         fmuld   %f2,%f6,%f6
2011         ldd     [%l4+8],%f0
2012 
2013         fmuld   %f10,%f12,%f12
2014 
2015         fmuld   %f18,%f20,%f20
2016 
2017         fmuld   %f26,%f28,%f28
2018 
2019         fmuld   %f0,%f4,%f4
2020         faddd   %f32,%f6,%f6
2021 
2022         fmuld   %f8,%f14,%f14
2023         faddd   %f12,%f34,%f12
2024         ldd     [%l5+16],%f8
2025 
2026         fmuld   %f16,%f22,%f22
2027         faddd   %f20,%f36,%f20
2028         ldd     [%l6+16],%f16
2029 
2030         fmuld   %f24,%f30,%f30
2031         faddd   %f28,%f38,%f28
2032         ldd     [%l7+16],%f24
2033 
2034         faddd   %f2,%f6,%f6
2035         ldd     [%l4+16],%f32
2036 
2037         fmuld   %f8,%f14,%f14
2038         faddd   %f12,%f10,%f12
2039         ldd     [%l5+8],%f34
2040 
2041         fmuld   %f16,%f22,%f22
2042         faddd   %f20,%f18,%f20
2043         ldd     [%l6+8],%f36
2044 
2045         fmuld   %f24,%f30,%f30
2046         faddd   %f28,%f26,%f28
2047         ldd     [%l7+8],%f38
2048 
2049         fmuld   %f32,%f6,%f6
2050 
2051         fmuld   %f34,%f12,%f12
2052 
2053         fmuld   %f36,%f20,%f20
2054 
2055         fmuld   %f38,%f28,%f28
2056 
2057         faddd   %f6,%f4,%f6
2058 
2059         fsubd   %f14,%f12,%f14
2060 
2061         fsubd   %f22,%f20,%f22
2062 
2063         fsubd   %f30,%f28,%f30
2064 
2065         faddd   %f6,%f0,%f6
2066 
2067         faddd   %f14,%f8,%f14
2068 
2069         faddd   %f22,%f16,%f22
2070 
2071         faddd   %f30,%f24,%f30
2072         mov     %l0,%l4
2073 
2074         fnegd   %f6,%f4
2075         lda     [%i1]%asi,%l0           ! preload next argument
2076 
2077         fnegd   %f14,%f12
2078         lda     [%i1]%asi,%f0
2079 
2080         fnegd   %f22,%f20
2081         lda     [%i1+4]%asi,%f3
2082 
2083         fnegd   %f30,%f28
2084         andn    %l0,%i5,%l0
2085         add     %i1,%i2,%i1
2086 
2087         andcc   %l4,2,%g0
2088         fmovdnz %icc,%f4,%f6
2089         st      %f6,[%o0]
2090 
2091         andcc   %l1,2,%g0
2092         fmovdnz %icc,%f12,%f14
2093         st      %f14,[%o1]
2094 
2095         andcc   %l2,2,%g0
2096         fmovdnz %icc,%f20,%f22
2097         st      %f22,[%o2]
2098 
2099         andcc   %l3,2,%g0
2100         fmovdnz %icc,%f28,%f30
2101         st      %f30,[%o3]
2102 
2103         addcc   %i0,-1,%i0
2104         bg,pt   %icc,.loop0
2105 ! delay slot
2106         st      %f7,[%o0+4]
2107 
2108         ba,pt   %icc,.end
2109 ! delay slot
2110         nop
2111 
2112         .align  16
2113 .case9:
2114         fmuld   %f0,pp3,%f6             ! sin(x0)
2115 
2116         fmuld   %f24,pp3,%f30           ! sin(x3)
2117 
2118         faddd   %f6,pp2,%f6
2119         fmuld   %f0,qq2,%f4
2120 
2121         fmuld   %f8,qq3,%f14            ! cos(x1)
2122 
2123         fmuld   %f16,qq3,%f22           ! cos(x2)
2124 
2125         faddd   %f30,pp2,%f30
2126         fmuld   %f24,qq2,%f28
2127 
2128         fmuld   %f0,%f6,%f6
2129         faddd   %f4,qq1,%f4
2130 
2131         faddd   %f14,qq2,%f14
2132         fmuld   %f8,pp2,%f12
2133 
2134         faddd   %f22,qq2,%f22
2135         fmuld   %f16,pp2,%f20
2136 
2137         fmuld   %f24,%f30,%f30
2138         faddd   %f28,qq1,%f28
2139 
2140         faddd   %f6,pp1,%f6
2141         fmuld   %f0,%f4,%f4
2142         add     %l4,%g1,%l4
2143 
2144         fmuld   %f8,%f14,%f14
2145         faddd   %f12,pp1,%f12
2146 
2147         fmuld   %f16,%f22,%f22
2148         faddd   %f20,pp1,%f20
2149 
2150         faddd   %f30,pp1,%f30
2151         fmuld   %f24,%f28,%f28
2152         add     %l7,%g1,%l7
2153 
2154         fmuld   %f0,%f6,%f6
2155 
2156         faddd   %f14,qq1,%f14
2157         fmuld   %f8,%f12,%f12
2158         add     %l5,%g1,%l5
2159 
2160         faddd   %f22,qq1,%f22
2161         fmuld   %f16,%f20,%f20
2162         add     %l6,%g1,%l6
2163 
2164         fmuld   %f24,%f30,%f30
2165 
2166         fmuld   %f2,%f6,%f6
2167         ldd     [%l4+8],%f0
2168 
2169         fmuld   %f10,%f12,%f12
2170 
2171         fmuld   %f18,%f20,%f20
2172 
2173         fmuld   %f26,%f30,%f30
2174         ldd     [%l7+8],%f24
2175 
2176         fmuld   %f0,%f4,%f4
2177         faddd   %f32,%f6,%f6
2178 
2179         fmuld   %f8,%f14,%f14
2180         faddd   %f12,%f34,%f12
2181         ldd     [%l5+16],%f8
2182 
2183         fmuld   %f16,%f22,%f22
2184         faddd   %f20,%f36,%f20
2185         ldd     [%l6+16],%f16
2186 
2187         fmuld   %f24,%f28,%f28
2188         faddd   %f38,%f30,%f30
2189 
2190         faddd   %f2,%f6,%f6
2191         ldd     [%l4+16],%f32
2192 
2193         fmuld   %f8,%f14,%f14
2194         faddd   %f12,%f10,%f12
2195         ldd     [%l5+8],%f34
2196 
2197         fmuld   %f16,%f22,%f22
2198         faddd   %f20,%f18,%f20
2199         ldd     [%l6+8],%f36
2200 
2201         faddd   %f26,%f30,%f30
2202         ldd     [%l7+16],%f38
2203 
2204         fmuld   %f32,%f6,%f6
2205 
2206         fmuld   %f34,%f12,%f12
2207 
2208         fmuld   %f36,%f20,%f20
2209 
2210         fmuld   %f38,%f30,%f30
2211 
2212         faddd   %f6,%f4,%f6
2213 
2214         fsubd   %f14,%f12,%f14
2215 
2216         fsubd   %f22,%f20,%f22
2217 
2218         faddd   %f30,%f28,%f30
2219 
2220         faddd   %f6,%f0,%f6
2221 
2222         faddd   %f14,%f8,%f14
2223 
2224         faddd   %f22,%f16,%f22
2225 
2226         faddd   %f30,%f24,%f30
2227         mov     %l0,%l4
2228 
2229         fnegd   %f6,%f4
2230         lda     [%i1]%asi,%l0           ! preload next argument
2231 
2232         fnegd   %f14,%f12
2233         lda     [%i1]%asi,%f0
2234 
2235         fnegd   %f22,%f20
2236         lda     [%i1+4]%asi,%f3
2237 
2238         fnegd   %f30,%f28
2239         andn    %l0,%i5,%l0
2240         add     %i1,%i2,%i1
2241 
2242         andcc   %l4,2,%g0
2243         fmovdnz %icc,%f4,%f6
2244         st      %f6,[%o0]
2245 
2246         andcc   %l1,2,%g0
2247         fmovdnz %icc,%f12,%f14
2248         st      %f14,[%o1]
2249 
2250         andcc   %l2,2,%g0
2251         fmovdnz %icc,%f20,%f22
2252         st      %f22,[%o2]
2253 
2254         andcc   %l3,2,%g0
2255         fmovdnz %icc,%f28,%f30
2256         st      %f30,[%o3]
2257 
2258         addcc   %i0,-1,%i0
2259         bg,pt   %icc,.loop0
2260 ! delay slot
2261         st      %f7,[%o0+4]
2262 
2263         ba,pt   %icc,.end
2264 ! delay slot
2265         nop
2266 
2267         .align  16
2268 .case10:
2269         fmuld   %f26,%f26,%f24
2270         andcc   %l3,1,%g0
2271         bz,pn   %icc,.case11
2272 ! delay slot
2273         fxor    %f30,%f38,%f38
2274 
2275         fmuld   %f0,pp3,%f6             ! sin(x0)
2276 
2277         fmuld   %f16,pp3,%f22           ! sin(x2)
2278 
2279         faddd   %f6,pp2,%f6
2280         fmuld   %f0,qq2,%f4
2281 
2282         fmuld   %f8,qq3,%f14            ! cos(x1)
2283 
2284         faddd   %f22,pp2,%f22
2285         fmuld   %f16,qq2,%f20
2286 
2287         fmuld   %f24,qq3,%f30           ! cos(x3)
2288 
2289         fmuld   %f0,%f6,%f6
2290         faddd   %f4,qq1,%f4
2291 
2292         faddd   %f14,qq2,%f14
2293         fmuld   %f8,pp2,%f12
2294 
2295         fmuld   %f16,%f22,%f22
2296         faddd   %f20,qq1,%f20
2297 
2298         faddd   %f30,qq2,%f30
2299         fmuld   %f24,pp2,%f28
2300 
2301         faddd   %f6,pp1,%f6
2302         fmuld   %f0,%f4,%f4
2303         add     %l4,%g1,%l4
2304 
2305         fmuld   %f8,%f14,%f14
2306         faddd   %f12,pp1,%f12
2307 
2308         faddd   %f22,pp1,%f22
2309         fmuld   %f16,%f20,%f20
2310         add     %l6,%g1,%l6
2311 
2312         fmuld   %f24,%f30,%f30
2313         faddd   %f28,pp1,%f28
2314 
2315         fmuld   %f0,%f6,%f6
2316 
2317         faddd   %f14,qq1,%f14
2318         fmuld   %f8,%f12,%f12
2319         add     %l5,%g1,%l5
2320 
2321         fmuld   %f16,%f22,%f22
2322 
2323         faddd   %f30,qq1,%f30
2324         fmuld   %f24,%f28,%f28
2325         add     %l7,%g1,%l7
2326 
2327         fmuld   %f2,%f6,%f6
2328         ldd     [%l4+8],%f0
2329 
2330         fmuld   %f10,%f12,%f12
2331 
2332         fmuld   %f18,%f22,%f22
2333         ldd     [%l6+8],%f16
2334 
2335         fmuld   %f26,%f28,%f28
2336 
2337         fmuld   %f0,%f4,%f4
2338         faddd   %f32,%f6,%f6
2339 
2340         fmuld   %f8,%f14,%f14
2341         faddd   %f12,%f34,%f12
2342         ldd     [%l5+16],%f8
2343 
2344         fmuld   %f16,%f20,%f20
2345         faddd   %f36,%f22,%f22
2346 
2347         fmuld   %f24,%f30,%f30
2348         faddd   %f28,%f38,%f28
2349         ldd     [%l7+16],%f24
2350 
2351         faddd   %f2,%f6,%f6
2352         ldd     [%l4+16],%f32
2353 
2354         fmuld   %f8,%f14,%f14
2355         faddd   %f12,%f10,%f12
2356         ldd     [%l5+8],%f34
2357 
2358         faddd   %f18,%f22,%f22
2359         ldd     [%l6+16],%f36
2360 
2361         fmuld   %f24,%f30,%f30
2362         faddd   %f28,%f26,%f28
2363         ldd     [%l7+8],%f38
2364 
2365         fmuld   %f32,%f6,%f6
2366 
2367         fmuld   %f34,%f12,%f12
2368 
2369         fmuld   %f36,%f22,%f22
2370 
2371         fmuld   %f38,%f28,%f28
2372 
2373         faddd   %f6,%f4,%f6
2374 
2375         fsubd   %f14,%f12,%f14
2376 
2377         faddd   %f22,%f20,%f22
2378 
2379         fsubd   %f30,%f28,%f30
2380 
2381         faddd   %f6,%f0,%f6
2382 
2383         faddd   %f14,%f8,%f14
2384 
2385         faddd   %f22,%f16,%f22
2386 
2387         faddd   %f30,%f24,%f30
2388         mov     %l0,%l4
2389 
2390         fnegd   %f6,%f4
2391         lda     [%i1]%asi,%l0           ! preload next argument
2392 
2393         fnegd   %f14,%f12
2394         lda     [%i1]%asi,%f0
2395 
2396         fnegd   %f22,%f20
2397         lda     [%i1+4]%asi,%f3
2398 
2399         fnegd   %f30,%f28
2400         andn    %l0,%i5,%l0
2401         add     %i1,%i2,%i1
2402 
2403         andcc   %l4,2,%g0
2404         fmovdnz %icc,%f4,%f6
2405         st      %f6,[%o0]
2406 
2407         andcc   %l1,2,%g0
2408         fmovdnz %icc,%f12,%f14
2409         st      %f14,[%o1]
2410 
2411         andcc   %l2,2,%g0
2412         fmovdnz %icc,%f20,%f22
2413         st      %f22,[%o2]
2414 
2415         andcc   %l3,2,%g0
2416         fmovdnz %icc,%f28,%f30
2417         st      %f30,[%o3]
2418 
2419         addcc   %i0,-1,%i0
2420         bg,pt   %icc,.loop0
2421 ! delay slot
2422         st      %f7,[%o0+4]
2423 
2424         ba,pt   %icc,.end
2425 ! delay slot
2426         nop
2427 
2428         .align  16
2429 .case11:
2430         fmuld   %f0,pp3,%f6             ! sin(x0)
2431 
2432         fmuld   %f16,pp3,%f22           ! sin(x2)
2433 
2434         fmuld   %f24,pp3,%f30           ! sin(x3)
2435 
2436         faddd   %f6,pp2,%f6
2437         fmuld   %f0,qq2,%f4
2438 
2439         fmuld   %f8,qq3,%f14            ! cos(x1)
2440 
2441         faddd   %f22,pp2,%f22
2442         fmuld   %f16,qq2,%f20
2443 
2444         faddd   %f30,pp2,%f30
2445         fmuld   %f24,qq2,%f28
2446 
2447         fmuld   %f0,%f6,%f6
2448         faddd   %f4,qq1,%f4
2449 
2450         faddd   %f14,qq2,%f14
2451         fmuld   %f8,pp2,%f12
2452 
2453         fmuld   %f16,%f22,%f22
2454         faddd   %f20,qq1,%f20
2455 
2456         fmuld   %f24,%f30,%f30
2457         faddd   %f28,qq1,%f28
2458 
2459         faddd   %f6,pp1,%f6
2460         fmuld   %f0,%f4,%f4
2461         add     %l4,%g1,%l4
2462 
2463         fmuld   %f8,%f14,%f14
2464         faddd   %f12,pp1,%f12
2465 
2466         faddd   %f22,pp1,%f22
2467         fmuld   %f16,%f20,%f20
2468         add     %l6,%g1,%l6
2469 
2470         faddd   %f30,pp1,%f30
2471         fmuld   %f24,%f28,%f28
2472         add     %l7,%g1,%l7
2473 
2474         fmuld   %f0,%f6,%f6
2475 
2476         faddd   %f14,qq1,%f14
2477         fmuld   %f8,%f12,%f12
2478         add     %l5,%g1,%l5
2479 
2480         fmuld   %f16,%f22,%f22
2481 
2482         fmuld   %f24,%f30,%f30
2483 
2484         fmuld   %f2,%f6,%f6
2485         ldd     [%l4+8],%f0
2486 
2487         fmuld   %f10,%f12,%f12
2488 
2489         fmuld   %f18,%f22,%f22
2490         ldd     [%l6+8],%f16
2491 
2492         fmuld   %f26,%f30,%f30
2493         ldd     [%l7+8],%f24
2494 
2495         fmuld   %f0,%f4,%f4
2496         faddd   %f32,%f6,%f6
2497 
2498         fmuld   %f8,%f14,%f14
2499         faddd   %f12,%f34,%f12
2500         ldd     [%l5+16],%f8
2501 
2502         fmuld   %f16,%f20,%f20
2503         faddd   %f36,%f22,%f22
2504 
2505         fmuld   %f24,%f28,%f28
2506         faddd   %f38,%f30,%f30
2507 
2508         faddd   %f2,%f6,%f6
2509         ldd     [%l4+16],%f32
2510 
2511         fmuld   %f8,%f14,%f14
2512         faddd   %f12,%f10,%f12
2513         ldd     [%l5+8],%f34
2514 
2515         faddd   %f18,%f22,%f22
2516         ldd     [%l6+16],%f36
2517 
2518         faddd   %f26,%f30,%f30
2519         ldd     [%l7+16],%f38
2520 
2521         fmuld   %f32,%f6,%f6
2522 
2523         fmuld   %f34,%f12,%f12
2524 
2525         fmuld   %f36,%f22,%f22
2526 
2527         fmuld   %f38,%f30,%f30
2528 
2529         faddd   %f6,%f4,%f6
2530 
2531         fsubd   %f14,%f12,%f14
2532 
2533         faddd   %f22,%f20,%f22
2534 
2535         faddd   %f30,%f28,%f30
2536 
2537         faddd   %f6,%f0,%f6
2538 
2539         faddd   %f14,%f8,%f14
2540 
2541         faddd   %f22,%f16,%f22
2542 
2543         faddd   %f30,%f24,%f30
2544         mov     %l0,%l4
2545 
2546         fnegd   %f6,%f4
2547         lda     [%i1]%asi,%l0           ! preload next argument
2548 
2549         fnegd   %f14,%f12
2550         lda     [%i1]%asi,%f0
2551 
2552         fnegd   %f22,%f20
2553         lda     [%i1+4]%asi,%f3
2554 
2555         fnegd   %f30,%f28
2556         andn    %l0,%i5,%l0
2557         add     %i1,%i2,%i1
2558 
2559         andcc   %l4,2,%g0
2560         fmovdnz %icc,%f4,%f6
2561         st      %f6,[%o0]
2562 
2563         andcc   %l1,2,%g0
2564         fmovdnz %icc,%f12,%f14
2565         st      %f14,[%o1]
2566 
2567         andcc   %l2,2,%g0
2568         fmovdnz %icc,%f20,%f22
2569         st      %f22,[%o2]
2570 
2571         andcc   %l3,2,%g0
2572         fmovdnz %icc,%f28,%f30
2573         st      %f30,[%o3]
2574 
2575         addcc   %i0,-1,%i0
2576         bg,pt   %icc,.loop0
2577 ! delay slot
2578         st      %f7,[%o0+4]
2579 
2580         ba,pt   %icc,.end
2581 ! delay slot
2582         nop
2583 
2584         .align  16
2585 .case12:
2586         fmuld   %f18,%f18,%f16
2587         andcc   %l2,1,%g0
2588         bz,pn   %icc,.case14
2589 ! delay slot
2590         fxor    %f22,%f36,%f36
2591 
2592         fmuld   %f26,%f26,%f24
2593         andcc   %l3,1,%g0
2594         bz,pn   %icc,.case13
2595 ! delay slot
2596         fxor    %f30,%f38,%f38
2597 
2598         fmuld   %f0,pp3,%f6             ! sin(x0)
2599 
2600         fmuld   %f8,pp3,%f14            ! sin(x1)
2601 
2602         faddd   %f6,pp2,%f6
2603         fmuld   %f0,qq2,%f4
2604 
2605         faddd   %f14,pp2,%f14
2606         fmuld   %f8,qq2,%f12
2607 
2608         fmuld   %f16,qq3,%f22           ! cos(x2)
2609 
2610         fmuld   %f24,qq3,%f30           ! cos(x3)
2611 
2612         fmuld   %f0,%f6,%f6
2613         faddd   %f4,qq1,%f4
2614 
2615         fmuld   %f8,%f14,%f14
2616         faddd   %f12,qq1,%f12
2617 
2618         faddd   %f22,qq2,%f22
2619         fmuld   %f16,pp2,%f20
2620 
2621         faddd   %f30,qq2,%f30
2622         fmuld   %f24,pp2,%f28
2623 
2624         faddd   %f6,pp1,%f6
2625         fmuld   %f0,%f4,%f4
2626         add     %l4,%g1,%l4
2627 
2628         faddd   %f14,pp1,%f14
2629         fmuld   %f8,%f12,%f12
2630         add     %l5,%g1,%l5
2631 
2632         fmuld   %f16,%f22,%f22
2633         faddd   %f20,pp1,%f20
2634 
2635         fmuld   %f24,%f30,%f30
2636         faddd   %f28,pp1,%f28
2637 
2638         fmuld   %f0,%f6,%f6
2639 
2640         fmuld   %f8,%f14,%f14
2641 
2642         faddd   %f22,qq1,%f22
2643         fmuld   %f16,%f20,%f20
2644         add     %l6,%g1,%l6
2645 
2646         faddd   %f30,qq1,%f30
2647         fmuld   %f24,%f28,%f28
2648         add     %l7,%g1,%l7
2649 
2650         fmuld   %f2,%f6,%f6
2651         ldd     [%l4+8],%f0
2652 
2653         fmuld   %f10,%f14,%f14
2654         ldd     [%l5+8],%f8
2655 
2656         fmuld   %f18,%f20,%f20
2657 
2658         fmuld   %f26,%f28,%f28
2659 
2660         fmuld   %f0,%f4,%f4
2661         faddd   %f32,%f6,%f6
2662 
2663         fmuld   %f8,%f12,%f12
2664         faddd   %f34,%f14,%f14
2665 
2666         fmuld   %f16,%f22,%f22
2667         faddd   %f20,%f36,%f20
2668         ldd     [%l6+16],%f16
2669 
2670         fmuld   %f24,%f30,%f30
2671         faddd   %f28,%f38,%f28
2672         ldd     [%l7+16],%f24
2673 
2674         faddd   %f2,%f6,%f6
2675         ldd     [%l4+16],%f32
2676 
2677         faddd   %f10,%f14,%f14
2678         ldd     [%l5+16],%f34
2679 
2680         fmuld   %f16,%f22,%f22
2681         faddd   %f20,%f18,%f20
2682         ldd     [%l6+8],%f36
2683 
2684         fmuld   %f24,%f30,%f30
2685         faddd   %f28,%f26,%f28
2686         ldd     [%l7+8],%f38
2687 
2688         fmuld   %f32,%f6,%f6
2689 
2690         fmuld   %f34,%f14,%f14
2691 
2692         fmuld   %f36,%f20,%f20
2693 
2694         fmuld   %f38,%f28,%f28
2695 
2696         faddd   %f6,%f4,%f6
2697 
2698         faddd   %f14,%f12,%f14
2699 
2700         fsubd   %f22,%f20,%f22
2701 
2702         fsubd   %f30,%f28,%f30
2703 
2704         faddd   %f6,%f0,%f6
2705 
2706         faddd   %f14,%f8,%f14
2707 
2708         faddd   %f22,%f16,%f22
2709 
2710         faddd   %f30,%f24,%f30
2711         mov     %l0,%l4
2712 
2713         fnegd   %f6,%f4
2714         lda     [%i1]%asi,%l0           ! preload next argument
2715 
2716         fnegd   %f14,%f12
2717         lda     [%i1]%asi,%f0
2718 
2719         fnegd   %f22,%f20
2720         lda     [%i1+4]%asi,%f3
2721 
2722         fnegd   %f30,%f28
2723         andn    %l0,%i5,%l0
2724         add     %i1,%i2,%i1
2725 
2726         andcc   %l4,2,%g0
2727         fmovdnz %icc,%f4,%f6
2728         st      %f6,[%o0]
2729 
2730         andcc   %l1,2,%g0
2731         fmovdnz %icc,%f12,%f14
2732         st      %f14,[%o1]
2733 
2734         andcc   %l2,2,%g0
2735         fmovdnz %icc,%f20,%f22
2736         st      %f22,[%o2]
2737 
2738         andcc   %l3,2,%g0
2739         fmovdnz %icc,%f28,%f30
2740         st      %f30,[%o3]
2741 
2742         addcc   %i0,-1,%i0
2743         bg,pt   %icc,.loop0
2744 ! delay slot
2745         st      %f7,[%o0+4]
2746 
2747         ba,pt   %icc,.end
2748 ! delay slot
2749         nop
2750 
2751         .align  16
2752 .case13:
2753         fmuld   %f0,pp3,%f6             ! sin(x0)
2754 
2755         fmuld   %f8,pp3,%f14            ! sin(x1)
2756 
2757         fmuld   %f24,pp3,%f30           ! sin(x3)
2758 
2759         faddd   %f6,pp2,%f6
2760         fmuld   %f0,qq2,%f4
2761 
2762         faddd   %f14,pp2,%f14
2763         fmuld   %f8,qq2,%f12
2764 
2765         fmuld   %f16,qq3,%f22           ! cos(x2)
2766 
2767         faddd   %f30,pp2,%f30
2768         fmuld   %f24,qq2,%f28
2769 
2770         fmuld   %f0,%f6,%f6
2771         faddd   %f4,qq1,%f4
2772 
2773         fmuld   %f8,%f14,%f14
2774         faddd   %f12,qq1,%f12
2775 
2776         faddd   %f22,qq2,%f22
2777         fmuld   %f16,pp2,%f20
2778 
2779         fmuld   %f24,%f30,%f30
2780         faddd   %f28,qq1,%f28
2781 
2782         faddd   %f6,pp1,%f6
2783         fmuld   %f0,%f4,%f4
2784         add     %l4,%g1,%l4
2785 
2786         faddd   %f14,pp1,%f14
2787         fmuld   %f8,%f12,%f12
2788         add     %l5,%g1,%l5
2789 
2790         fmuld   %f16,%f22,%f22
2791         faddd   %f20,pp1,%f20
2792 
2793         faddd   %f30,pp1,%f30
2794         fmuld   %f24,%f28,%f28
2795         add     %l7,%g1,%l7
2796 
2797         fmuld   %f0,%f6,%f6
2798 
2799         fmuld   %f8,%f14,%f14
2800 
2801         faddd   %f22,qq1,%f22
2802         fmuld   %f16,%f20,%f20
2803         add     %l6,%g1,%l6
2804 
2805         fmuld   %f24,%f30,%f30
2806 
2807         fmuld   %f2,%f6,%f6
2808         ldd     [%l4+8],%f0
2809 
2810         fmuld   %f10,%f14,%f14
2811         ldd     [%l5+8],%f8
2812 
2813         fmuld   %f18,%f20,%f20
2814 
2815         fmuld   %f26,%f30,%f30
2816         ldd     [%l7+8],%f24
2817 
2818         fmuld   %f0,%f4,%f4
2819         faddd   %f32,%f6,%f6
2820 
2821         fmuld   %f8,%f12,%f12
2822         faddd   %f34,%f14,%f14
2823 
2824         fmuld   %f16,%f22,%f22
2825         faddd   %f20,%f36,%f20
2826         ldd     [%l6+16],%f16
2827 
2828         fmuld   %f24,%f28,%f28
2829         faddd   %f38,%f30,%f30
2830 
2831         faddd   %f2,%f6,%f6
2832         ldd     [%l4+16],%f32
2833 
2834         faddd   %f10,%f14,%f14
2835         ldd     [%l5+16],%f34
2836 
2837         fmuld   %f16,%f22,%f22
2838         faddd   %f20,%f18,%f20
2839         ldd     [%l6+8],%f36
2840 
2841         faddd   %f26,%f30,%f30
2842         ldd     [%l7+16],%f38
2843 
2844         fmuld   %f32,%f6,%f6
2845 
2846         fmuld   %f34,%f14,%f14
2847 
2848         fmuld   %f36,%f20,%f20
2849 
2850         fmuld   %f38,%f30,%f30
2851 
2852         faddd   %f6,%f4,%f6
2853 
2854         faddd   %f14,%f12,%f14
2855 
2856         fsubd   %f22,%f20,%f22
2857 
2858         faddd   %f30,%f28,%f30
2859 
2860         faddd   %f6,%f0,%f6
2861 
2862         faddd   %f14,%f8,%f14
2863 
2864         faddd   %f22,%f16,%f22
2865 
2866         faddd   %f30,%f24,%f30
2867         mov     %l0,%l4
2868 
2869         fnegd   %f6,%f4
2870         lda     [%i1]%asi,%l0           ! preload next argument
2871 
2872         fnegd   %f14,%f12
2873         lda     [%i1]%asi,%f0
2874 
2875         fnegd   %f22,%f20
2876         lda     [%i1+4]%asi,%f3
2877 
2878         fnegd   %f30,%f28
2879         andn    %l0,%i5,%l0
2880         add     %i1,%i2,%i1
2881 
2882         andcc   %l4,2,%g0
2883         fmovdnz %icc,%f4,%f6
2884         st      %f6,[%o0]
2885 
2886         andcc   %l1,2,%g0
2887         fmovdnz %icc,%f12,%f14
2888         st      %f14,[%o1]
2889 
2890         andcc   %l2,2,%g0
2891         fmovdnz %icc,%f20,%f22
2892         st      %f22,[%o2]
2893 
2894         andcc   %l3,2,%g0
2895         fmovdnz %icc,%f28,%f30
2896         st      %f30,[%o3]
2897 
2898         addcc   %i0,-1,%i0
2899         bg,pt   %icc,.loop0
2900 ! delay slot
2901         st      %f7,[%o0+4]
2902 
2903         ba,pt   %icc,.end
2904 ! delay slot
2905         nop
2906 
2907         .align  16
2908 .case14:
2909         fmuld   %f26,%f26,%f24
2910         andcc   %l3,1,%g0
2911         bz,pn   %icc,.case15
2912 ! delay slot
2913         fxor    %f30,%f38,%f38
2914 
2915         fmuld   %f0,pp3,%f6             ! sin(x0)
2916 
2917         fmuld   %f8,pp3,%f14            ! sin(x1)
2918 
2919         fmuld   %f16,pp3,%f22           ! sin(x2)
2920 
2921         faddd   %f6,pp2,%f6
2922         fmuld   %f0,qq2,%f4
2923 
2924         faddd   %f14,pp2,%f14
2925         fmuld   %f8,qq2,%f12
2926 
2927         faddd   %f22,pp2,%f22
2928         fmuld   %f16,qq2,%f20
2929 
2930         fmuld   %f24,qq3,%f30           ! cos(x3)
2931 
2932         fmuld   %f0,%f6,%f6
2933         faddd   %f4,qq1,%f4
2934 
2935         fmuld   %f8,%f14,%f14
2936         faddd   %f12,qq1,%f12
2937 
2938         fmuld   %f16,%f22,%f22
2939         faddd   %f20,qq1,%f20
2940 
2941         faddd   %f30,qq2,%f30
2942         fmuld   %f24,pp2,%f28
2943 
2944         faddd   %f6,pp1,%f6
2945         fmuld   %f0,%f4,%f4
2946         add     %l4,%g1,%l4
2947 
2948         faddd   %f14,pp1,%f14
2949         fmuld   %f8,%f12,%f12
2950         add     %l5,%g1,%l5
2951 
2952         faddd   %f22,pp1,%f22
2953         fmuld   %f16,%f20,%f20
2954         add     %l6,%g1,%l6
2955 
2956         fmuld   %f24,%f30,%f30
2957         faddd   %f28,pp1,%f28
2958 
2959         fmuld   %f0,%f6,%f6
2960 
2961         fmuld   %f8,%f14,%f14
2962 
2963         fmuld   %f16,%f22,%f22
2964 
2965         faddd   %f30,qq1,%f30
2966         fmuld   %f24,%f28,%f28
2967         add     %l7,%g1,%l7
2968 
2969         fmuld   %f2,%f6,%f6
2970         ldd     [%l4+8],%f0
2971 
2972         fmuld   %f10,%f14,%f14
2973         ldd     [%l5+8],%f8
2974 
2975         fmuld   %f18,%f22,%f22
2976         ldd     [%l6+8],%f16
2977 
2978         fmuld   %f26,%f28,%f28
2979 
2980         fmuld   %f0,%f4,%f4
2981         faddd   %f32,%f6,%f6
2982 
2983         fmuld   %f8,%f12,%f12
2984         faddd   %f34,%f14,%f14
2985 
2986         fmuld   %f16,%f20,%f20
2987         faddd   %f36,%f22,%f22
2988 
2989         fmuld   %f24,%f30,%f30
2990         faddd   %f28,%f38,%f28
2991         ldd     [%l7+16],%f24
2992 
2993         faddd   %f2,%f6,%f6
2994         ldd     [%l4+16],%f32
2995 
2996         faddd   %f10,%f14,%f14
2997         ldd     [%l5+16],%f34
2998 
2999         faddd   %f18,%f22,%f22
3000         ldd     [%l6+16],%f36
3001 
3002         fmuld   %f24,%f30,%f30
3003         faddd   %f28,%f26,%f28
3004         ldd     [%l7+8],%f38
3005 
3006         fmuld   %f32,%f6,%f6
3007 
3008         fmuld   %f34,%f14,%f14
3009 
3010         fmuld   %f36,%f22,%f22
3011 
3012         fmuld   %f38,%f28,%f28
3013 
3014         faddd   %f6,%f4,%f6
3015 
3016         faddd   %f14,%f12,%f14
3017 
3018         faddd   %f22,%f20,%f22
3019 
3020         fsubd   %f30,%f28,%f30
3021 
3022         faddd   %f6,%f0,%f6
3023 
3024         faddd   %f14,%f8,%f14
3025 
3026         faddd   %f22,%f16,%f22
3027 
3028         faddd   %f30,%f24,%f30
3029         mov     %l0,%l4
3030 
3031         fnegd   %f6,%f4
3032         lda     [%i1]%asi,%l0           ! preload next argument
3033 
3034         fnegd   %f14,%f12
3035         lda     [%i1]%asi,%f0
3036 
3037         fnegd   %f22,%f20
3038         lda     [%i1+4]%asi,%f3
3039 
3040         fnegd   %f30,%f28
3041         andn    %l0,%i5,%l0
3042         add     %i1,%i2,%i1
3043 
3044         andcc   %l4,2,%g0
3045         fmovdnz %icc,%f4,%f6
3046         st      %f6,[%o0]
3047 
3048         andcc   %l1,2,%g0
3049         fmovdnz %icc,%f12,%f14
3050         st      %f14,[%o1]
3051 
3052         andcc   %l2,2,%g0
3053         fmovdnz %icc,%f20,%f22
3054         st      %f22,[%o2]
3055 
3056         andcc   %l3,2,%g0
3057         fmovdnz %icc,%f28,%f30
3058         st      %f30,[%o3]
3059 
3060         addcc   %i0,-1,%i0
3061         bg,pt   %icc,.loop0
3062 ! delay slot
3063         st      %f7,[%o0+4]
3064 
3065         ba,pt   %icc,.end
3066 ! delay slot
3067         nop
3068 
3069         .align  16
3070 .case15:
3071         fmuld   %f0,pp3,%f6             ! sin(x0)
3072 
3073         fmuld   %f8,pp3,%f14            ! sin(x1)
3074 
3075         fmuld   %f16,pp3,%f22           ! sin(x2)
3076 
3077         fmuld   %f24,pp3,%f30           ! sin(x3)
3078 
3079         faddd   %f6,pp2,%f6
3080         fmuld   %f0,qq2,%f4
3081 
3082         faddd   %f14,pp2,%f14
3083         fmuld   %f8,qq2,%f12
3084 
3085         faddd   %f22,pp2,%f22
3086         fmuld   %f16,qq2,%f20
3087 
3088         faddd   %f30,pp2,%f30
3089         fmuld   %f24,qq2,%f28
3090 
3091         fmuld   %f0,%f6,%f6
3092         faddd   %f4,qq1,%f4
3093 
3094         fmuld   %f8,%f14,%f14
3095         faddd   %f12,qq1,%f12
3096 
3097         fmuld   %f16,%f22,%f22
3098         faddd   %f20,qq1,%f20
3099 
3100         fmuld   %f24,%f30,%f30
3101         faddd   %f28,qq1,%f28
3102 
3103         faddd   %f6,pp1,%f6
3104         fmuld   %f0,%f4,%f4
3105         add     %l4,%g1,%l4
3106 
3107         faddd   %f14,pp1,%f14
3108         fmuld   %f8,%f12,%f12
3109         add     %l5,%g1,%l5
3110 
3111         faddd   %f22,pp1,%f22
3112         fmuld   %f16,%f20,%f20
3113         add     %l6,%g1,%l6
3114 
3115         faddd   %f30,pp1,%f30
3116         fmuld   %f24,%f28,%f28
3117         add     %l7,%g1,%l7
3118 
3119         fmuld   %f0,%f6,%f6
3120 
3121         fmuld   %f8,%f14,%f14
3122 
3123         fmuld   %f16,%f22,%f22
3124 
3125         fmuld   %f24,%f30,%f30
3126 
3127         fmuld   %f2,%f6,%f6
3128         ldd     [%l4+8],%f0
3129 
3130         fmuld   %f10,%f14,%f14
3131         ldd     [%l5+8],%f8
3132 
3133         fmuld   %f18,%f22,%f22
3134         ldd     [%l6+8],%f16
3135 
3136         fmuld   %f26,%f30,%f30
3137         ldd     [%l7+8],%f24
3138 
3139         fmuld   %f0,%f4,%f4
3140         faddd   %f32,%f6,%f6
3141 
3142         fmuld   %f8,%f12,%f12
3143         faddd   %f34,%f14,%f14
3144 
3145         fmuld   %f16,%f20,%f20
3146         faddd   %f36,%f22,%f22
3147 
3148         fmuld   %f24,%f28,%f28
3149         faddd   %f38,%f30,%f30
3150 
3151         faddd   %f2,%f6,%f6
3152         ldd     [%l4+16],%f32
3153 
3154         faddd   %f10,%f14,%f14
3155         ldd     [%l5+16],%f34
3156 
3157         faddd   %f18,%f22,%f22
3158         ldd     [%l6+16],%f36
3159 
3160         faddd   %f26,%f30,%f30
3161         ldd     [%l7+16],%f38
3162 
3163         fmuld   %f32,%f6,%f6
3164 
3165         fmuld   %f34,%f14,%f14
3166 
3167         fmuld   %f36,%f22,%f22
3168 
3169         fmuld   %f38,%f30,%f30
3170 
3171         faddd   %f6,%f4,%f6
3172 
3173         faddd   %f14,%f12,%f14
3174 
3175         faddd   %f22,%f20,%f22
3176 
3177         faddd   %f30,%f28,%f30
3178 
3179         faddd   %f6,%f0,%f6
3180 
3181         faddd   %f14,%f8,%f14
3182 
3183         faddd   %f22,%f16,%f22
3184 
3185         faddd   %f30,%f24,%f30
3186         mov     %l0,%l4
3187 
3188         fnegd   %f6,%f4
3189         lda     [%i1]%asi,%l0           ! preload next argument
3190 
3191         fnegd   %f14,%f12
3192         lda     [%i1]%asi,%f0
3193 
3194         fnegd   %f22,%f20
3195         lda     [%i1+4]%asi,%f3
3196 
3197         fnegd   %f30,%f28
3198         andn    %l0,%i5,%l0
3199         add     %i1,%i2,%i1
3200 
3201         andcc   %l4,2,%g0
3202         fmovdnz %icc,%f4,%f6
3203         st      %f6,[%o0]
3204 
3205         andcc   %l1,2,%g0
3206         fmovdnz %icc,%f12,%f14
3207         st      %f14,[%o1]
3208 
3209         andcc   %l2,2,%g0
3210         fmovdnz %icc,%f20,%f22
3211         st      %f22,[%o2]
3212 
3213         andcc   %l3,2,%g0
3214         fmovdnz %icc,%f28,%f30
3215         st      %f30,[%o3]
3216 
3217         addcc   %i0,-1,%i0
3218         bg,pt   %icc,.loop0
3219 ! delay slot
3220         st      %f7,[%o0+4]
3221 
3222         ba,pt   %icc,.end
3223 ! delay slot
3224         nop
3225 
3226 
3227         .align  16
3228 .end:
3229         st      %f15,[%o1+4]
3230         st      %f23,[%o2+4]
3231         st      %f31,[%o3+4]
3232         ld      [%fp+biguns],%i5
3233         tst     %i5                     ! check for huge arguments remaining
3234         be,pt   %icc,.exit
3235 ! delay slot
3236         nop
3237 #ifdef __sparcv9
3238         ldx     [%fp+xsave],%o1
3239         ldx     [%fp+ysave],%o3
3240 #else
3241         ld      [%fp+xsave],%o1
3242         ld      [%fp+ysave],%o3
3243 #endif
3244         ld      [%fp+nsave],%o0
3245         ld      [%fp+sxsave],%o2
3246         ld      [%fp+sysave],%o4
3247         sra     %o2,0,%o2               ! sign-extend for V9
3248         sra     %o4,0,%o4
3249         call    __vlibm_vcos_big_ultra3
3250         sra     %o5,0,%o5               ! delay slot
3251 
3252 .exit:
3253         ret
3254         restore
3255 
3256 
3257         .align  16
3258 .last1:
3259         faddd   %f2,c3two44,%f4
3260         st      %f15,[%o1+4]
3261 .last1_from_range1:
3262         mov     0,%l1
3263         fzeros  %f8
3264         fzero   %f10
3265         add     %fp,junk,%o1
3266 .last2:
3267         faddd   %f10,c3two44,%f12
3268         st      %f23,[%o2+4]
3269 .last2_from_range2:
3270         mov     0,%l2
3271         fzeros  %f16
3272         fzero   %f18
3273         add     %fp,junk,%o2
3274 .last3:
3275         faddd   %f18,c3two44,%f20
3276         st      %f31,[%o3+4]
3277         st      %f5,[%fp+nk0]
3278         st      %f13,[%fp+nk1]
3279 .last3_from_range3:
3280         mov     0,%l3
3281         fzeros  %f24
3282         fzero   %f26
3283         ba,pt   %icc,.cont
3284 ! delay slot
3285         add     %fp,junk,%o3
3286 
3287 
3288         .align  16
3289 .range0:
3290         cmp     %l0,%o4
3291         bl,pt   %icc,1f                 ! hx < 0x3e400000
3292 ! delay slot, harmless if branch taken
3293         sethi   %hi(0x7ff00000),%o7
3294         cmp     %l0,%o7
3295         bl,a,pt %icc,2f                 ! branch if finite
3296 ! delay slot, squashed if branch not taken
3297         st      %o4,[%fp+biguns]        ! set biguns
3298         fzero   %f0
3299         fmuld   %f2,%f0,%f2
3300         st      %f2,[%o0]
3301         ba,pt   %icc,2f
3302 ! delay slot
3303         st      %f3,[%o0+4]
3304 1:
3305         fdtoi   %f2,%f4                 ! raise inexact if not zero
3306         sethi   %hi(0x3ff00000),%o7
3307         st      %o7,[%o0]
3308         st      %g0,[%o0+4]
3309 2:
3310         addcc   %i0,-1,%i0
3311         ble,pn  %icc,.end
3312 ! delay slot, harmless if branch taken
3313         add     %i3,%i4,%i3             ! y += stridey
3314         andn    %l1,%i5,%l0             ! hx &= ~0x80000000
3315         fmovs   %f8,%f0
3316         fmovs   %f11,%f3
3317         ba,pt   %icc,.loop0
3318 ! delay slot
3319         add     %i1,%i2,%i1             ! x += stridex
3320 
3321 
3322         .align  16
3323 .range1:
3324         cmp     %l1,%o4
3325         bl,pt   %icc,1f                 ! hx < 0x3e400000
3326 ! delay slot, harmless if branch taken
3327         sethi   %hi(0x7ff00000),%o7
3328         cmp     %l1,%o7
3329         bl,a,pt %icc,2f                 ! branch if finite
3330 ! delay slot, squashed if branch not taken
3331         st      %o4,[%fp+biguns]        ! set biguns
3332         fzero   %f8
3333         fmuld   %f10,%f8,%f10
3334         st      %f10,[%o1]
3335         ba,pt   %icc,2f
3336 ! delay slot
3337         st      %f11,[%o1+4]
3338 1:
3339         fdtoi   %f10,%f12               ! raise inexact if not zero
3340         sethi   %hi(0x3ff00000),%o7
3341         st      %o7,[%o1]
3342         st      %g0,[%o1+4]
3343 2:
3344         addcc   %i0,-1,%i0
3345         ble,pn  %icc,.last1_from_range1
3346 ! delay slot, harmless if branch taken
3347         add     %i3,%i4,%i3             ! y += stridey
3348         andn    %l2,%i5,%l1             ! hx &= ~0x80000000
3349         fmovs   %f16,%f8
3350         fmovs   %f19,%f11
3351         ba,pt   %icc,.loop1
3352 ! delay slot
3353         add     %i1,%i2,%i1             ! x += stridex
3354 
3355 
3356         .align  16
3357 .range2:
3358         cmp     %l2,%o4
3359         bl,pt   %icc,1f                 ! hx < 0x3e400000
3360 ! delay slot, harmless if branch taken
3361         sethi   %hi(0x7ff00000),%o7
3362         cmp     %l2,%o7
3363         bl,a,pt %icc,2f                 ! branch if finite
3364 ! delay slot, squashed if branch not taken
3365         st      %o4,[%fp+biguns]        ! set biguns
3366         fzero   %f16
3367         fmuld   %f18,%f16,%f18
3368         st      %f18,[%o2]
3369         ba,pt   %icc,2f
3370 ! delay slot
3371         st      %f19,[%o2+4]
3372 1:
3373         fdtoi   %f18,%f20               ! raise inexact if not zero
3374         sethi   %hi(0x3ff00000),%o7
3375         st      %o7,[%o2]
3376         st      %g0,[%o2+4]
3377 2:
3378         addcc   %i0,-1,%i0
3379         ble,pn  %icc,.last2_from_range2
3380 ! delay slot, harmless if branch taken
3381         add     %i3,%i4,%i3             ! y += stridey
3382         andn    %l3,%i5,%l2             ! hx &= ~0x80000000
3383         fmovs   %f24,%f16
3384         fmovs   %f27,%f19
3385         ba,pt   %icc,.loop2
3386 ! delay slot
3387         add     %i1,%i2,%i1             ! x += stridex
3388 
3389 
3390         .align  16
3391 .range3:
3392         cmp     %l3,%o4
3393         bl,pt   %icc,1f                 ! hx < 0x3e400000
3394 ! delay slot, harmless if branch taken
3395         sethi   %hi(0x7ff00000),%o7
3396         cmp     %l3,%o7
3397         bl,a,pt %icc,2f                 ! branch if finite
3398 ! delay slot, squashed if branch not taken
3399         st      %o4,[%fp+biguns]        ! set biguns
3400         fzero   %f24
3401         fmuld   %f26,%f24,%f26
3402         st      %f26,[%o3]
3403         ba,pt   %icc,2f
3404 ! delay slot
3405         st      %f27,[%o3+4]
3406 1:
3407         fdtoi   %f26,%f28               ! raise inexact if not zero
3408         sethi   %hi(0x3ff00000),%o7
3409         st      %o7,[%o3]
3410         st      %g0,[%o3+4]
3411 2:
3412         addcc   %i0,-1,%i0
3413         ble,pn  %icc,.last3_from_range3
3414 ! delay slot, harmless if branch taken
3415         add     %i3,%i4,%i3             ! y += stridey
3416         ld      [%i1],%l3
3417         ld      [%i1],%f24
3418         ld      [%i1+4],%f27
3419         andn    %l3,%i5,%l3             ! hx &= ~0x80000000
3420         ba,pt   %icc,.loop3
3421 ! delay slot
3422         add     %i1,%i2,%i1             ! x += stridex
3423 
3424         SET_SIZE(__vcos_ultra3)
3425