1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vcos_ultra3.S"
  30 
  31 #include "libm.h"
  32         .weak   __vcos
  33         .type   __vcos,#function
  34         __vcos = __vcos_ultra3
  35 
  36         RO_DATA
  37         .align  64
  38 constants:
  39         .word   0x42c80000,0x00000000   ! 3 * 2^44
  40         .word   0x43380000,0x00000000   ! 3 * 2^51
  41         .word   0x3fe45f30,0x6dc9c883   ! invpio2
  42         .word   0x3ff921fb,0x54442c00   ! pio2_1
  43         .word   0x3d318469,0x898cc400   ! pio2_2
  44         .word   0x3a71701b,0x839a2520   ! pio2_3
  45         .word   0xbfc55555,0x55555533   ! pp1
  46         .word   0x3f811111,0x10e7d53b   ! pp2
  47         .word   0xbf2a0167,0xe6b3cf9b   ! pp3
  48         .word   0xbfdfffff,0xffffff65   ! qq1
  49         .word   0x3fa55555,0x54f88ed0   ! qq2
  50         .word   0xbf56c12c,0xdd185f60   ! qq3
  51 
  52 ! local storage indices
  53 
  54 #define xsave           STACK_BIAS-0x8
  55 #define ysave           STACK_BIAS-0x10
  56 #define nsave           STACK_BIAS-0x14
  57 #define sxsave          STACK_BIAS-0x18
  58 #define sysave          STACK_BIAS-0x1c
  59 #define biguns          STACK_BIAS-0x20
  60 #define nk3             STACK_BIAS-0x24
  61 #define nk2             STACK_BIAS-0x28
  62 #define nk1             STACK_BIAS-0x2c
  63 #define nk0             STACK_BIAS-0x30
  64 #define junk            STACK_BIAS-0x38
  65 ! sizeof temp storage - must be a multiple of 16 for V9
  66 #define tmps            0x40
  67 
  68 ! register use
  69 
  70 ! i0  n
  71 ! i1  x
  72 ! i2  stridex
  73 ! i3  y
  74 ! i4  stridey
  75 ! i5  0x80000000
  76 
  77 ! l0  hx0
  78 ! l1  hx1
  79 ! l2  hx2
  80 ! l3  hx3
  81 ! l4  k0
  82 ! l5  k1
  83 ! l6  k2
  84 ! l7  k3
  85 
  86 ! the following are 64-bit registers in both V8+ and V9
  87 
  88 ! g1  __vlibm_TBL_sincos2
  89 ! g5  scratch
  90 
  91 ! o0  py0
  92 ! o1  py1
  93 ! o2  py2
  94 ! o3  py3
  95 ! o4  0x3e400000
  96 ! o5  0x3fe921fb,0x4099251e
  97 ! o7  scratch
  98 
  99 ! f0  hx0
 100 ! f2  
 101 ! f4  
 102 ! f6  
 103 ! f8  hx1
 104 ! f10 
 105 ! f12 
 106 ! f14 
 107 ! f16 hx2
 108 ! f18 
 109 ! f20 
 110 ! f22 
 111 ! f24 hx3
 112 ! f26 
 113 ! f28 
 114 ! f30 
 115 ! f32 
 116 ! f34 
 117 ! f36
 118 ! f38
 119 
 120 #define c3two44 %f40
 121 #define c3two51 %f42
 122 #define invpio2 %f44
 123 #define pio2_1  %f46
 124 #define pio2_2  %f48
 125 #define pio2_3  %f50
 126 #define pp1     %f52
 127 #define pp2     %f54
 128 #define pp3     %f56
 129 #define qq1     %f58
 130 #define qq2     %f60
 131 #define qq3     %f62
 132 
 133         ENTRY(__vcos_ultra3)
 134         save    %sp,-SA(MINFRAME)-tmps,%sp
 135         PIC_SETUP(l7)
 136         PIC_SET(l7,constants,o0)
 137         PIC_SET(l7,__vlibm_TBL_sincos2,o1)
 138         mov     %o1,%g1
 139         wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
 140 #ifdef __sparcv9
 141         stx     %i1,[%fp+xsave]         ! save arguments
 142         stx     %i3,[%fp+ysave]
 143 #else
 144         st      %i1,[%fp+xsave]         ! save arguments
 145         st      %i3,[%fp+ysave]
 146 #endif
 147         st      %i0,[%fp+nsave]
 148         st      %i2,[%fp+sxsave]
 149         st      %i4,[%fp+sysave]
 150         st      %g0,[%fp+biguns]        ! biguns = 0
 151         ldd     [%o0+0x00],c3two44      ! load/set up constants
 152         ldd     [%o0+0x08],c3two51
 153         ldd     [%o0+0x10],invpio2
 154         ldd     [%o0+0x18],pio2_1
 155         ldd     [%o0+0x20],pio2_2
 156         ldd     [%o0+0x28],pio2_3
 157         ldd     [%o0+0x30],pp1
 158         ldd     [%o0+0x38],pp2
 159         ldd     [%o0+0x40],pp3
 160         ldd     [%o0+0x48],qq1
 161         ldd     [%o0+0x50],qq2
 162         ldd     [%o0+0x58],qq3
 163         sethi   %hi(0x80000000),%i5
 164         sethi   %hi(0x3e400000),%o4
 165         sethi   %hi(0x3fe921fb),%o5
 166         or      %o5,%lo(0x3fe921fb),%o5
 167         sllx    %o5,32,%o5
 168         sethi   %hi(0x4099251e),%o7
 169         or      %o7,%lo(0x4099251e),%o7
 170         or      %o5,%o7,%o5
 171         sll     %i2,3,%i2               ! scale strides
 172         sll     %i4,3,%i4
 173         add     %fp,junk,%o1            ! loop prologue
 174         add     %fp,junk,%o2
 175         add     %fp,junk,%o3
 176         ld      [%i1],%l0               ! *x
 177         ld      [%i1],%f0
 178         ld      [%i1+4],%f3
 179         andn    %l0,%i5,%l0             ! mask off sign
 180         add     %i1,%i2,%i1             ! x += stridex
 181         ba      .loop0
 182         nop
 183 
 184 ! 16-byte aligned
 185         .align  16
 186 .loop0:
 187         lda     [%i1]%asi,%l1           ! preload next argument
 188         sub     %l0,%o4,%g5
 189         sub     %o5,%l0,%o7
 190         fabss   %f0,%f2
 191 
 192         lda     [%i1]%asi,%f8
 193         orcc    %o7,%g5,%g0
 194         mov     %i3,%o0                 ! py0 = y
 195         bl,pn   %icc,.range0            ! hx < 0x3e400000 or hx > 0x4099251e
 196 
 197 ! delay slot
 198         lda     [%i1+4]%asi,%f11
 199         addcc   %i0,-1,%i0
 200         add     %i3,%i4,%i3             ! y += stridey
 201         ble,pn  %icc,.last1
 202 
 203 ! delay slot
 204         andn    %l1,%i5,%l1
 205         add     %i1,%i2,%i1             ! x += stridex
 206         faddd   %f2,c3two44,%f4
 207         st      %f15,[%o1+4]
 208 
 209 .loop1:
 210         lda     [%i1]%asi,%l2           ! preload next argument
 211         sub     %l1,%o4,%g5
 212         sub     %o5,%l1,%o7
 213         fabss   %f8,%f10
 214 
 215         lda     [%i1]%asi,%f16
 216         orcc    %o7,%g5,%g0
 217         mov     %i3,%o1                 ! py1 = y
 218         bl,pn   %icc,.range1            ! hx < 0x3e400000 or hx > 0x4099251e
 219 
 220 ! delay slot
 221         lda     [%i1+4]%asi,%f19
 222         addcc   %i0,-1,%i0
 223         add     %i3,%i4,%i3             ! y += stridey
 224         ble,pn  %icc,.last2
 225 
 226 ! delay slot
 227         andn    %l2,%i5,%l2
 228         add     %i1,%i2,%i1             ! x += stridex
 229         faddd   %f10,c3two44,%f12
 230         st      %f23,[%o2+4]
 231 
 232 .loop2:
 233         lda     [%i1]%asi,%l3           ! preload next argument
 234         sub     %l2,%o4,%g5
 235         sub     %o5,%l2,%o7
 236         fabss   %f16,%f18
 237 
 238         lda     [%i1]%asi,%f24
 239         orcc    %o7,%g5,%g0
 240         mov     %i3,%o2                 ! py2 = y
 241         bl,pn   %icc,.range2            ! hx < 0x3e400000 or hx > 0x4099251e
 242 
 243 ! delay slot
 244         lda     [%i1+4]%asi,%f27
 245         addcc   %i0,-1,%i0
 246         add     %i3,%i4,%i3             ! y += stridey
 247         ble,pn  %icc,.last3
 248 
 249 ! delay slot
 250         andn    %l3,%i5,%l3
 251         add     %i1,%i2,%i1             ! x += stridex
 252         faddd   %f18,c3two44,%f20
 253         st      %f31,[%o3+4]
 254 
 255 .loop3:
 256         sub     %l3,%o4,%g5
 257         sub     %o5,%l3,%o7
 258         fabss   %f24,%f26
 259         st      %f5,[%fp+nk0]
 260 
 261         orcc    %o7,%g5,%g0
 262         mov     %i3,%o3                 ! py3 = y
 263         bl,pn   %icc,.range3            ! hx < 0x3e400000 or > hx 0x4099251e
 264 ! delay slot
 265         st      %f13,[%fp+nk1]
 266 
 267 !!! DONE?
 268 .cont:
 269         srlx    %o5,32,%o7
 270         add     %i3,%i4,%i3             ! y += stridey
 271         fmovs   %f3,%f1
 272         st      %f21,[%fp+nk2]
 273 
 274         sub     %o7,%l0,%l0
 275         sub     %o7,%l1,%l1
 276         faddd   %f26,c3two44,%f28
 277         st      %f29,[%fp+nk3]
 278 
 279         sub     %o7,%l2,%l2
 280         sub     %o7,%l3,%l3
 281         fmovs   %f11,%f9
 282 
 283         or      %l0,%l1,%l0
 284         or      %l2,%l3,%l2
 285         fmovs   %f19,%f17
 286 
 287         fmovs   %f27,%f25
 288         fmuld   %f0,invpio2,%f6         ! x * invpio2, for medium range
 289 
 290         fmuld   %f8,invpio2,%f14
 291         ld      [%fp+nk0],%l4
 292 
 293         fmuld   %f16,invpio2,%f22
 294         ld      [%fp+nk1],%l5
 295 
 296         orcc    %l0,%l2,%g0
 297         bl,pn   %icc,.medium
 298 ! delay slot
 299         fmuld   %f24,invpio2,%f30
 300         ld      [%fp+nk2],%l6
 301 
 302         ld      [%fp+nk3],%l7
 303         sll     %l4,5,%l4               ! k
 304         fcmpd   %fcc0,%f0,pio2_3        ! x < pio2_3 iff x < 0
 305 
 306         sll     %l5,5,%l5
 307         ldd     [%l4+%g1],%f4
 308         fcmpd   %fcc1,%f8,pio2_3
 309 
 310         sll     %l6,5,%l6
 311         ldd     [%l5+%g1],%f12
 312         fcmpd   %fcc2,%f16,pio2_3
 313 
 314         sll     %l7,5,%l7
 315         ldd     [%l6+%g1],%f20
 316         fcmpd   %fcc3,%f24,pio2_3
 317 
 318         ldd     [%l7+%g1],%f28
 319         fsubd   %f2,%f4,%f2             ! x -= __vlibm_TBL_sincos2[k]
 320 
 321         fsubd   %f10,%f12,%f10
 322 
 323         fsubd   %f18,%f20,%f18
 324 
 325         fsubd   %f26,%f28,%f26
 326 
 327         fmuld   %f2,%f2,%f0             ! z = x * x
 328 
 329         fmuld   %f10,%f10,%f8
 330 
 331         fmuld   %f18,%f18,%f16
 332 
 333         fmuld   %f26,%f26,%f24
 334 
 335         fmuld   %f0,qq3,%f6
 336 
 337         fmuld   %f8,qq3,%f14
 338 
 339         fmuld   %f16,qq3,%f22
 340 
 341         fmuld   %f24,qq3,%f30
 342 
 343         faddd   %f6,qq2,%f6
 344         fmuld   %f0,pp2,%f4
 345 
 346         faddd   %f14,qq2,%f14
 347         fmuld   %f8,pp2,%f12
 348 
 349         faddd   %f22,qq2,%f22
 350         fmuld   %f16,pp2,%f20
 351 
 352         faddd   %f30,qq2,%f30
 353         fmuld   %f24,pp2,%f28
 354 
 355         fmuld   %f0,%f6,%f6
 356         faddd   %f4,pp1,%f4
 357 
 358         fmuld   %f8,%f14,%f14
 359         faddd   %f12,pp1,%f12
 360 
 361         fmuld   %f16,%f22,%f22
 362         faddd   %f20,pp1,%f20
 363 
 364         fmuld   %f24,%f30,%f30
 365         faddd   %f28,pp1,%f28
 366 
 367         faddd   %f6,qq1,%f6
 368         fmuld   %f0,%f4,%f4
 369         add     %l4,%g1,%l4
 370 
 371         faddd   %f14,qq1,%f14
 372         fmuld   %f8,%f12,%f12
 373         add     %l5,%g1,%l5
 374 
 375         faddd   %f22,qq1,%f22
 376         fmuld   %f16,%f20,%f20
 377         add     %l6,%g1,%l6
 378 
 379         faddd   %f30,qq1,%f30
 380         fmuld   %f24,%f28,%f28
 381         add     %l7,%g1,%l7
 382 
 383         fmuld   %f2,%f4,%f4
 384 
 385         fmuld   %f10,%f12,%f12
 386 
 387         fmuld   %f18,%f20,%f20
 388 
 389         fmuld   %f26,%f28,%f28
 390 
 391         fmuld   %f0,%f6,%f6
 392         faddd   %f4,%f2,%f4
 393         ldd     [%l4+16],%f32
 394 
 395         fmuld   %f8,%f14,%f14
 396         faddd   %f12,%f10,%f12
 397         ldd     [%l5+16],%f34
 398 
 399         fmuld   %f16,%f22,%f22
 400         faddd   %f20,%f18,%f20
 401         ldd     [%l6+16],%f36
 402 
 403         fmuld   %f24,%f30,%f30
 404         faddd   %f28,%f26,%f28
 405         ldd     [%l7+16],%f38
 406 
 407         fmuld   %f32,%f6,%f6
 408         ldd     [%l4+8],%f2
 409 
 410         fmuld   %f34,%f14,%f14
 411         ldd     [%l5+8],%f10
 412 
 413         fmuld   %f36,%f22,%f22
 414         ldd     [%l6+8],%f18
 415 
 416         fmuld   %f38,%f30,%f30
 417         ldd     [%l7+8],%f26
 418 
 419         fmuld   %f2,%f4,%f4
 420 
 421         fmuld   %f10,%f12,%f12
 422 
 423         fmuld   %f18,%f20,%f20
 424 
 425         fmuld   %f26,%f28,%f28
 426 
 427         fsubd   %f6,%f4,%f6
 428         lda     [%i1]%asi,%l0           ! preload next argument
 429 
 430         fsubd   %f14,%f12,%f14
 431         lda     [%i1]%asi,%f0
 432 
 433         fsubd   %f22,%f20,%f22
 434         lda     [%i1+4]%asi,%f3
 435 
 436         fsubd   %f30,%f28,%f30
 437         andn    %l0,%i5,%l0
 438         add     %i1,%i2,%i1
 439 
 440         faddd   %f6,%f32,%f6
 441         st      %f6,[%o0]
 442 
 443         faddd   %f14,%f34,%f14
 444         st      %f14,[%o1]
 445 
 446         faddd   %f22,%f36,%f22
 447         st      %f22,[%o2]
 448 
 449         faddd   %f30,%f38,%f30
 450         st      %f30,[%o3]
 451         addcc   %i0,-1,%i0
 452 
 453         bg,pt   %icc,.loop0
 454 ! delay slot
 455         st      %f7,[%o0+4]
 456 
 457         ba,pt   %icc,.end
 458 ! delay slot
 459         nop
 460 
 461 
 462         .align  16
 463 .medium:
 464         faddd   %f6,c3two51,%f4
 465         st      %f5,[%fp+nk0]
 466 
 467         faddd   %f14,c3two51,%f12
 468         st      %f13,[%fp+nk1]
 469 
 470         faddd   %f22,c3two51,%f20
 471         st      %f21,[%fp+nk2]
 472 
 473         faddd   %f30,c3two51,%f28
 474         st      %f29,[%fp+nk3]
 475 
 476         fsubd   %f4,c3two51,%f6
 477 
 478         fsubd   %f12,c3two51,%f14
 479 
 480         fsubd   %f20,c3two51,%f22
 481 
 482         fsubd   %f28,c3two51,%f30
 483 
 484         fmuld   %f6,pio2_1,%f2
 485         ld      [%fp+nk0],%l0           ! n
 486 
 487         fmuld   %f14,pio2_1,%f10
 488         ld      [%fp+nk1],%l1
 489 
 490         fmuld   %f22,pio2_1,%f18
 491         ld      [%fp+nk2],%l2
 492 
 493         fmuld   %f30,pio2_1,%f26
 494         ld      [%fp+nk3],%l3
 495 
 496         fsubd   %f0,%f2,%f0
 497         fmuld   %f6,pio2_2,%f4
 498         add     %l0,1,%l0
 499 
 500         fsubd   %f8,%f10,%f8
 501         fmuld   %f14,pio2_2,%f12
 502         add     %l1,1,%l1
 503 
 504         fsubd   %f16,%f18,%f16
 505         fmuld   %f22,pio2_2,%f20
 506         add     %l2,1,%l2
 507 
 508         fsubd   %f24,%f26,%f24
 509         fmuld   %f30,pio2_2,%f28
 510         add     %l3,1,%l3
 511 
 512         fsubd   %f0,%f4,%f32
 513 
 514         fsubd   %f8,%f12,%f34
 515 
 516         fsubd   %f16,%f20,%f36
 517 
 518         fsubd   %f24,%f28,%f38
 519 
 520         fsubd   %f0,%f32,%f0
 521         fcmple32 %f32,pio2_3,%l4        ! x <= pio2_3 iff x < 0
 522 
 523         fsubd   %f8,%f34,%f8
 524         fcmple32 %f34,pio2_3,%l5
 525 
 526         fsubd   %f16,%f36,%f16
 527         fcmple32 %f36,pio2_3,%l6
 528 
 529         fsubd   %f24,%f38,%f24
 530         fcmple32 %f38,pio2_3,%l7
 531 
 532         fsubd   %f0,%f4,%f0
 533         fmuld   %f6,pio2_3,%f6
 534         sll     %l4,30,%l4              ! if (x < 0) n = -n ^ 2
 535 
 536         fsubd   %f8,%f12,%f8
 537         fmuld   %f14,pio2_3,%f14
 538         sll     %l5,30,%l5
 539 
 540         fsubd   %f16,%f20,%f16
 541         fmuld   %f22,pio2_3,%f22
 542         sll     %l6,30,%l6
 543 
 544         fsubd   %f24,%f28,%f24
 545         fmuld   %f30,pio2_3,%f30
 546         sll     %l7,30,%l7
 547 
 548         fsubd   %f6,%f0,%f6
 549         sra     %l4,31,%l4
 550 
 551         fsubd   %f14,%f8,%f14
 552         sra     %l5,31,%l5
 553 
 554         fsubd   %f22,%f16,%f22
 555         sra     %l6,31,%l6
 556 
 557         fsubd   %f30,%f24,%f30
 558         sra     %l7,31,%l7
 559 
 560         fsubd   %f32,%f6,%f0            ! reduced x
 561         xor     %l0,%l4,%l0
 562 
 563         fsubd   %f34,%f14,%f8
 564         xor     %l1,%l5,%l1
 565 
 566         fsubd   %f36,%f22,%f16
 567         xor     %l2,%l6,%l2
 568 
 569         fsubd   %f38,%f30,%f24
 570         xor     %l3,%l7,%l3
 571 
 572         fabsd   %f0,%f2
 573         sub     %l0,%l4,%l0
 574 
 575         fabsd   %f8,%f10
 576         sub     %l1,%l5,%l1
 577 
 578         fabsd   %f16,%f18
 579         sub     %l2,%l6,%l2
 580 
 581         fabsd   %f24,%f26
 582         sub     %l3,%l7,%l3
 583 
 584         faddd   %f2,c3two44,%f4
 585         st      %f5,[%fp+nk0]
 586         and     %l4,2,%l4
 587 
 588         faddd   %f10,c3two44,%f12
 589         st      %f13,[%fp+nk1]
 590         and     %l5,2,%l5
 591 
 592         faddd   %f18,c3two44,%f20
 593         st      %f21,[%fp+nk2]
 594         and     %l6,2,%l6
 595 
 596         faddd   %f26,c3two44,%f28
 597         st      %f29,[%fp+nk3]
 598         and     %l7,2,%l7
 599 
 600         fsubd   %f32,%f0,%f4
 601         xor     %l0,%l4,%l0
 602 
 603         fsubd   %f34,%f8,%f12
 604         xor     %l1,%l5,%l1
 605 
 606         fsubd   %f36,%f16,%f20
 607         xor     %l2,%l6,%l2
 608 
 609         fsubd   %f38,%f24,%f28
 610         xor     %l3,%l7,%l3
 611 
 612         fzero   %f38
 613         ld      [%fp+nk0],%l4
 614 
 615         fsubd   %f4,%f6,%f6             ! w
 616         ld      [%fp+nk1],%l5
 617 
 618         fsubd   %f12,%f14,%f14
 619         ld      [%fp+nk2],%l6
 620 
 621         fnegd   %f38,%f38
 622         ld      [%fp+nk3],%l7
 623         sll     %l4,5,%l4               ! k
 624 
 625         fsubd   %f20,%f22,%f22
 626         sll     %l5,5,%l5
 627 
 628         fsubd   %f28,%f30,%f30
 629         sll     %l6,5,%l6
 630 
 631         fand    %f0,%f38,%f32           ! sign bit of x
 632         ldd     [%l4+%g1],%f4
 633         sll     %l7,5,%l7
 634 
 635         fand    %f8,%f38,%f34
 636         ldd     [%l5+%g1],%f12
 637 
 638         fand    %f16,%f38,%f36
 639         ldd     [%l6+%g1],%f20
 640 
 641         fand    %f24,%f38,%f38
 642         ldd     [%l7+%g1],%f28
 643 
 644         fsubd   %f2,%f4,%f2             ! x -= __vlibm_TBL_sincos2[k]
 645 
 646         fsubd   %f10,%f12,%f10
 647 
 648         fsubd   %f18,%f20,%f18
 649         nop
 650 
 651         fsubd   %f26,%f28,%f26
 652         nop
 653 
 654 ! 16-byte aligned
 655         fmuld   %f2,%f2,%f0             ! z = x * x
 656         andcc   %l0,1,%g0
 657         bz,pn   %icc,.case8
 658 ! delay slot
 659         fxor    %f6,%f32,%f32
 660 
 661         fmuld   %f10,%f10,%f8
 662         andcc   %l1,1,%g0
 663         bz,pn   %icc,.case4
 664 ! delay slot
 665         fxor    %f14,%f34,%f34
 666 
 667         fmuld   %f18,%f18,%f16
 668         andcc   %l2,1,%g0
 669         bz,pn   %icc,.case2
 670 ! delay slot
 671         fxor    %f22,%f36,%f36
 672 
 673         fmuld   %f26,%f26,%f24
 674         andcc   %l3,1,%g0
 675         bz,pn   %icc,.case1
 676 ! delay slot
 677         fxor    %f30,%f38,%f38
 678 
 679 !.case0:
 680         fmuld   %f0,qq3,%f6             ! cos(x0)
 681 
 682         fmuld   %f8,qq3,%f14            ! cos(x1)
 683 
 684         fmuld   %f16,qq3,%f22           ! cos(x2)
 685 
 686         fmuld   %f24,qq3,%f30           ! cos(x3)
 687 
 688         faddd   %f6,qq2,%f6
 689         fmuld   %f0,pp2,%f4
 690 
 691         faddd   %f14,qq2,%f14
 692         fmuld   %f8,pp2,%f12
 693 
 694         faddd   %f22,qq2,%f22
 695         fmuld   %f16,pp2,%f20
 696 
 697         faddd   %f30,qq2,%f30
 698         fmuld   %f24,pp2,%f28
 699 
 700         fmuld   %f0,%f6,%f6
 701         faddd   %f4,pp1,%f4
 702 
 703         fmuld   %f8,%f14,%f14
 704         faddd   %f12,pp1,%f12
 705 
 706         fmuld   %f16,%f22,%f22
 707         faddd   %f20,pp1,%f20
 708 
 709         fmuld   %f24,%f30,%f30
 710         faddd   %f28,pp1,%f28
 711 
 712         faddd   %f6,qq1,%f6
 713         fmuld   %f0,%f4,%f4
 714         add     %l4,%g1,%l4
 715 
 716         faddd   %f14,qq1,%f14
 717         fmuld   %f8,%f12,%f12
 718         add     %l5,%g1,%l5
 719 
 720         faddd   %f22,qq1,%f22
 721         fmuld   %f16,%f20,%f20
 722         add     %l6,%g1,%l6
 723 
 724         faddd   %f30,qq1,%f30
 725         fmuld   %f24,%f28,%f28
 726         add     %l7,%g1,%l7
 727 
 728         fmuld   %f2,%f4,%f4
 729 
 730         fmuld   %f10,%f12,%f12
 731 
 732         fmuld   %f18,%f20,%f20
 733 
 734         fmuld   %f26,%f28,%f28
 735 
 736         fmuld   %f0,%f6,%f6
 737         faddd   %f4,%f32,%f4
 738         ldd     [%l4+16],%f0
 739 
 740         fmuld   %f8,%f14,%f14
 741         faddd   %f12,%f34,%f12
 742         ldd     [%l5+16],%f8
 743 
 744         fmuld   %f16,%f22,%f22
 745         faddd   %f20,%f36,%f20
 746         ldd     [%l6+16],%f16
 747 
 748         fmuld   %f24,%f30,%f30
 749         faddd   %f28,%f38,%f28
 750         ldd     [%l7+16],%f24
 751 
 752         fmuld   %f0,%f6,%f6
 753         faddd   %f4,%f2,%f4
 754         ldd     [%l4+8],%f32
 755 
 756         fmuld   %f8,%f14,%f14
 757         faddd   %f12,%f10,%f12
 758         ldd     [%l5+8],%f34
 759 
 760         fmuld   %f16,%f22,%f22
 761         faddd   %f20,%f18,%f20
 762         ldd     [%l6+8],%f36
 763 
 764         fmuld   %f24,%f30,%f30
 765         faddd   %f28,%f26,%f28
 766         ldd     [%l7+8],%f38
 767 
 768         fmuld   %f32,%f4,%f4
 769 
 770         fmuld   %f34,%f12,%f12
 771 
 772         fmuld   %f36,%f20,%f20
 773 
 774         fmuld   %f38,%f28,%f28
 775 
 776         fsubd   %f6,%f4,%f6
 777 
 778         fsubd   %f14,%f12,%f14
 779 
 780         fsubd   %f22,%f20,%f22
 781 
 782         fsubd   %f30,%f28,%f30
 783 
 784         faddd   %f6,%f0,%f6
 785 
 786         faddd   %f14,%f8,%f14
 787 
 788         faddd   %f22,%f16,%f22
 789 
 790         faddd   %f30,%f24,%f30
 791         mov     %l0,%l4
 792 
 793         fnegd   %f6,%f4
 794         lda     [%i1]%asi,%l0           ! preload next argument
 795 
 796         fnegd   %f14,%f12
 797         lda     [%i1]%asi,%f0
 798 
 799         fnegd   %f22,%f20
 800         lda     [%i1+4]%asi,%f3
 801 
 802         fnegd   %f30,%f28
 803         andn    %l0,%i5,%l0
 804         add     %i1,%i2,%i1
 805 
 806         andcc   %l4,2,%g0
 807         fmovdnz %icc,%f4,%f6
 808         st      %f6,[%o0]
 809 
 810         andcc   %l1,2,%g0
 811         fmovdnz %icc,%f12,%f14
 812         st      %f14,[%o1]
 813 
 814         andcc   %l2,2,%g0
 815         fmovdnz %icc,%f20,%f22
 816         st      %f22,[%o2]
 817 
 818         andcc   %l3,2,%g0
 819         fmovdnz %icc,%f28,%f30
 820         st      %f30,[%o3]
 821 
 822         addcc   %i0,-1,%i0
 823         bg,pt   %icc,.loop0
 824 ! delay slot
 825         st      %f7,[%o0+4]
 826 
 827         ba,pt   %icc,.end
 828 ! delay slot
 829         nop
 830 
 831         .align  16
 832 .case1:
 833         fmuld   %f24,pp3,%f30           ! sin(x3)
 834 
 835         fmuld   %f0,qq3,%f6             ! cos(x0)
 836 
 837         fmuld   %f8,qq3,%f14            ! cos(x1)
 838 
 839         fmuld   %f16,qq3,%f22           ! cos(x2)
 840 
 841         faddd   %f30,pp2,%f30
 842         fmuld   %f24,qq2,%f28
 843 
 844         faddd   %f6,qq2,%f6
 845         fmuld   %f0,pp2,%f4
 846 
 847         faddd   %f14,qq2,%f14
 848         fmuld   %f8,pp2,%f12
 849 
 850         faddd   %f22,qq2,%f22
 851         fmuld   %f16,pp2,%f20
 852 
 853         fmuld   %f24,%f30,%f30
 854         faddd   %f28,qq1,%f28
 855 
 856         fmuld   %f0,%f6,%f6
 857         faddd   %f4,pp1,%f4
 858 
 859         fmuld   %f8,%f14,%f14
 860         faddd   %f12,pp1,%f12
 861 
 862         fmuld   %f16,%f22,%f22
 863         faddd   %f20,pp1,%f20
 864 
 865         faddd   %f30,pp1,%f30
 866         fmuld   %f24,%f28,%f28
 867         add     %l7,%g1,%l7
 868 
 869         faddd   %f6,qq1,%f6
 870         fmuld   %f0,%f4,%f4
 871         add     %l4,%g1,%l4
 872 
 873         faddd   %f14,qq1,%f14
 874         fmuld   %f8,%f12,%f12
 875         add     %l5,%g1,%l5
 876 
 877         faddd   %f22,qq1,%f22
 878         fmuld   %f16,%f20,%f20
 879         add     %l6,%g1,%l6
 880 
 881         fmuld   %f24,%f30,%f30
 882 
 883         fmuld   %f2,%f4,%f4
 884 
 885         fmuld   %f10,%f12,%f12
 886 
 887         fmuld   %f18,%f20,%f20
 888 
 889         fmuld   %f26,%f30,%f30
 890         ldd     [%l7+8],%f24
 891 
 892         fmuld   %f0,%f6,%f6
 893         faddd   %f4,%f32,%f4
 894         ldd     [%l4+16],%f0
 895 
 896         fmuld   %f8,%f14,%f14
 897         faddd   %f12,%f34,%f12
 898         ldd     [%l5+16],%f8
 899 
 900         fmuld   %f16,%f22,%f22
 901         faddd   %f20,%f36,%f20
 902         ldd     [%l6+16],%f16
 903 
 904         fmuld   %f24,%f28,%f28
 905         faddd   %f38,%f30,%f30
 906 
 907         fmuld   %f0,%f6,%f6
 908         faddd   %f4,%f2,%f4
 909         ldd     [%l4+8],%f32
 910 
 911         fmuld   %f8,%f14,%f14
 912         faddd   %f12,%f10,%f12
 913         ldd     [%l5+8],%f34
 914 
 915         fmuld   %f16,%f22,%f22
 916         faddd   %f20,%f18,%f20
 917         ldd     [%l6+8],%f36
 918 
 919         faddd   %f26,%f30,%f30
 920         ldd     [%l7+16],%f38
 921 
 922         fmuld   %f32,%f4,%f4
 923 
 924         fmuld   %f34,%f12,%f12
 925 
 926         fmuld   %f36,%f20,%f20
 927 
 928         fmuld   %f38,%f30,%f30
 929 
 930         fsubd   %f6,%f4,%f6
 931 
 932         fsubd   %f14,%f12,%f14
 933 
 934         fsubd   %f22,%f20,%f22
 935 
 936         faddd   %f30,%f28,%f30
 937 
 938         faddd   %f6,%f0,%f6
 939 
 940         faddd   %f14,%f8,%f14
 941 
 942         faddd   %f22,%f16,%f22
 943 
 944         faddd   %f30,%f24,%f30
 945         mov     %l0,%l4
 946 
 947         fnegd   %f6,%f4
 948         lda     [%i1]%asi,%l0           ! preload next argument
 949 
 950         fnegd   %f14,%f12
 951         lda     [%i1]%asi,%f0
 952 
 953         fnegd   %f22,%f20
 954         lda     [%i1+4]%asi,%f3
 955 
 956         fnegd   %f30,%f28
 957         andn    %l0,%i5,%l0
 958         add     %i1,%i2,%i1
 959 
 960         andcc   %l4,2,%g0
 961         fmovdnz %icc,%f4,%f6
 962         st      %f6,[%o0]
 963 
 964         andcc   %l1,2,%g0
 965         fmovdnz %icc,%f12,%f14
 966         st      %f14,[%o1]
 967 
 968         andcc   %l2,2,%g0
 969         fmovdnz %icc,%f20,%f22
 970         st      %f22,[%o2]
 971 
 972         andcc   %l3,2,%g0
 973         fmovdnz %icc,%f28,%f30
 974         st      %f30,[%o3]
 975 
 976         addcc   %i0,-1,%i0
 977         bg,pt   %icc,.loop0
 978 ! delay slot
 979         st      %f7,[%o0+4]
 980 
 981         ba,pt   %icc,.end
 982 ! delay slot
 983         nop
 984 
 985         .align  16
 986 .case2:
 987         fmuld   %f26,%f26,%f24
 988         andcc   %l3,1,%g0
 989         bz,pn   %icc,.case3
 990 ! delay slot
 991         fxor    %f30,%f38,%f38
 992 
 993         fmuld   %f16,pp3,%f22           ! sin(x2)
 994 
 995         fmuld   %f0,qq3,%f6             ! cos(x0)
 996 
 997         fmuld   %f8,qq3,%f14            ! cos(x1)
 998 
 999         faddd   %f22,pp2,%f22
1000         fmuld   %f16,qq2,%f20
1001 
1002         fmuld   %f24,qq3,%f30           ! cos(x3)
1003 
1004         faddd   %f6,qq2,%f6
1005         fmuld   %f0,pp2,%f4
1006 
1007         faddd   %f14,qq2,%f14
1008         fmuld   %f8,pp2,%f12
1009 
1010         fmuld   %f16,%f22,%f22
1011         faddd   %f20,qq1,%f20
1012 
1013         faddd   %f30,qq2,%f30
1014         fmuld   %f24,pp2,%f28
1015 
1016         fmuld   %f0,%f6,%f6
1017         faddd   %f4,pp1,%f4
1018 
1019         fmuld   %f8,%f14,%f14
1020         faddd   %f12,pp1,%f12
1021 
1022         faddd   %f22,pp1,%f22
1023         fmuld   %f16,%f20,%f20
1024         add     %l6,%g1,%l6
1025 
1026         fmuld   %f24,%f30,%f30
1027         faddd   %f28,pp1,%f28
1028 
1029         faddd   %f6,qq1,%f6
1030         fmuld   %f0,%f4,%f4
1031         add     %l4,%g1,%l4
1032 
1033         faddd   %f14,qq1,%f14
1034         fmuld   %f8,%f12,%f12
1035         add     %l5,%g1,%l5
1036 
1037         fmuld   %f16,%f22,%f22
1038 
1039         faddd   %f30,qq1,%f30
1040         fmuld   %f24,%f28,%f28
1041         add     %l7,%g1,%l7
1042 
1043         fmuld   %f2,%f4,%f4
1044 
1045         fmuld   %f10,%f12,%f12
1046 
1047         fmuld   %f18,%f22,%f22
1048         ldd     [%l6+8],%f16
1049 
1050         fmuld   %f26,%f28,%f28
1051 
1052         fmuld   %f0,%f6,%f6
1053         faddd   %f4,%f32,%f4
1054         ldd     [%l4+16],%f0
1055 
1056         fmuld   %f8,%f14,%f14
1057         faddd   %f12,%f34,%f12
1058         ldd     [%l5+16],%f8
1059 
1060         fmuld   %f16,%f20,%f20
1061         faddd   %f36,%f22,%f22
1062 
1063         fmuld   %f24,%f30,%f30
1064         faddd   %f28,%f38,%f28
1065         ldd     [%l7+16],%f24
1066 
1067         fmuld   %f0,%f6,%f6
1068         faddd   %f4,%f2,%f4
1069         ldd     [%l4+8],%f32
1070 
1071         fmuld   %f8,%f14,%f14
1072         faddd   %f12,%f10,%f12
1073         ldd     [%l5+8],%f34
1074 
1075         faddd   %f18,%f22,%f22
1076         ldd     [%l6+16],%f36
1077 
1078         fmuld   %f24,%f30,%f30
1079         faddd   %f28,%f26,%f28
1080         ldd     [%l7+8],%f38
1081 
1082         fmuld   %f32,%f4,%f4
1083 
1084         fmuld   %f34,%f12,%f12
1085 
1086         fmuld   %f36,%f22,%f22
1087 
1088         fmuld   %f38,%f28,%f28
1089 
1090         fsubd   %f6,%f4,%f6
1091 
1092         fsubd   %f14,%f12,%f14
1093 
1094         faddd   %f22,%f20,%f22
1095 
1096         fsubd   %f30,%f28,%f30
1097 
1098         faddd   %f6,%f0,%f6
1099 
1100         faddd   %f14,%f8,%f14
1101 
1102         faddd   %f22,%f16,%f22
1103 
1104         faddd   %f30,%f24,%f30
1105         mov     %l0,%l4
1106 
1107         fnegd   %f6,%f4
1108         lda     [%i1]%asi,%l0           ! preload next argument
1109 
1110         fnegd   %f14,%f12
1111         lda     [%i1]%asi,%f0
1112 
1113         fnegd   %f22,%f20
1114         lda     [%i1+4]%asi,%f3
1115 
1116         fnegd   %f30,%f28
1117         andn    %l0,%i5,%l0
1118         add     %i1,%i2,%i1
1119 
1120         andcc   %l4,2,%g0
1121         fmovdnz %icc,%f4,%f6
1122         st      %f6,[%o0]
1123 
1124         andcc   %l1,2,%g0
1125         fmovdnz %icc,%f12,%f14
1126         st      %f14,[%o1]
1127 
1128         andcc   %l2,2,%g0
1129         fmovdnz %icc,%f20,%f22
1130         st      %f22,[%o2]
1131 
1132         andcc   %l3,2,%g0
1133         fmovdnz %icc,%f28,%f30
1134         st      %f30,[%o3]
1135 
1136         addcc   %i0,-1,%i0
1137         bg,pt   %icc,.loop0
1138 ! delay slot
1139         st      %f7,[%o0+4]
1140 
1141         ba,pt   %icc,.end
1142 ! delay slot
1143         nop
1144 
1145         .align  16
1146 .case3:
1147         fmuld   %f16,pp3,%f22           ! sin(x2)
1148 
1149         fmuld   %f24,pp3,%f30           ! sin(x3)
1150 
1151         fmuld   %f0,qq3,%f6             ! cos(x0)
1152 
1153         fmuld   %f8,qq3,%f14            ! cos(x1)
1154 
1155         faddd   %f22,pp2,%f22
1156         fmuld   %f16,qq2,%f20
1157 
1158         faddd   %f30,pp2,%f30
1159         fmuld   %f24,qq2,%f28
1160 
1161         faddd   %f6,qq2,%f6
1162         fmuld   %f0,pp2,%f4
1163 
1164         faddd   %f14,qq2,%f14
1165         fmuld   %f8,pp2,%f12
1166 
1167         fmuld   %f16,%f22,%f22
1168         faddd   %f20,qq1,%f20
1169 
1170         fmuld   %f24,%f30,%f30
1171         faddd   %f28,qq1,%f28
1172 
1173         fmuld   %f0,%f6,%f6
1174         faddd   %f4,pp1,%f4
1175 
1176         fmuld   %f8,%f14,%f14
1177         faddd   %f12,pp1,%f12
1178 
1179         faddd   %f22,pp1,%f22
1180         fmuld   %f16,%f20,%f20
1181         add     %l6,%g1,%l6
1182 
1183         faddd   %f30,pp1,%f30
1184         fmuld   %f24,%f28,%f28
1185         add     %l7,%g1,%l7
1186 
1187         faddd   %f6,qq1,%f6
1188         fmuld   %f0,%f4,%f4
1189         add     %l4,%g1,%l4
1190 
1191         faddd   %f14,qq1,%f14
1192         fmuld   %f8,%f12,%f12
1193         add     %l5,%g1,%l5
1194 
1195         fmuld   %f16,%f22,%f22
1196 
1197         fmuld   %f24,%f30,%f30
1198 
1199         fmuld   %f2,%f4,%f4
1200 
1201         fmuld   %f10,%f12,%f12
1202 
1203         fmuld   %f18,%f22,%f22
1204         ldd     [%l6+8],%f16
1205 
1206         fmuld   %f26,%f30,%f30
1207         ldd     [%l7+8],%f24
1208 
1209         fmuld   %f0,%f6,%f6
1210         faddd   %f4,%f32,%f4
1211         ldd     [%l4+16],%f0
1212 
1213         fmuld   %f8,%f14,%f14
1214         faddd   %f12,%f34,%f12
1215         ldd     [%l5+16],%f8
1216 
1217         fmuld   %f16,%f20,%f20
1218         faddd   %f36,%f22,%f22
1219 
1220         fmuld   %f24,%f28,%f28
1221         faddd   %f38,%f30,%f30
1222 
1223         fmuld   %f0,%f6,%f6
1224         faddd   %f4,%f2,%f4
1225         ldd     [%l4+8],%f32
1226 
1227         fmuld   %f8,%f14,%f14
1228         faddd   %f12,%f10,%f12
1229         ldd     [%l5+8],%f34
1230 
1231         faddd   %f18,%f22,%f22
1232         ldd     [%l6+16],%f36
1233 
1234         faddd   %f26,%f30,%f30
1235         ldd     [%l7+16],%f38
1236 
1237         fmuld   %f32,%f4,%f4
1238 
1239         fmuld   %f34,%f12,%f12
1240 
1241         fmuld   %f36,%f22,%f22
1242 
1243         fmuld   %f38,%f30,%f30
1244 
1245         fsubd   %f6,%f4,%f6
1246 
1247         fsubd   %f14,%f12,%f14
1248 
1249         faddd   %f22,%f20,%f22
1250 
1251         faddd   %f30,%f28,%f30
1252 
1253         faddd   %f6,%f0,%f6
1254 
1255         faddd   %f14,%f8,%f14
1256 
1257         faddd   %f22,%f16,%f22
1258 
1259         faddd   %f30,%f24,%f30
1260         mov     %l0,%l4
1261 
1262         fnegd   %f6,%f4
1263         lda     [%i1]%asi,%l0           ! preload next argument
1264 
1265         fnegd   %f14,%f12
1266         lda     [%i1]%asi,%f0
1267 
1268         fnegd   %f22,%f20
1269         lda     [%i1+4]%asi,%f3
1270 
1271         fnegd   %f30,%f28
1272         andn    %l0,%i5,%l0
1273         add     %i1,%i2,%i1
1274 
1275         andcc   %l4,2,%g0
1276         fmovdnz %icc,%f4,%f6
1277         st      %f6,[%o0]
1278 
1279         andcc   %l1,2,%g0
1280         fmovdnz %icc,%f12,%f14
1281         st      %f14,[%o1]
1282 
1283         andcc   %l2,2,%g0
1284         fmovdnz %icc,%f20,%f22
1285         st      %f22,[%o2]
1286 
1287         andcc   %l3,2,%g0
1288         fmovdnz %icc,%f28,%f30
1289         st      %f30,[%o3]
1290 
1291         addcc   %i0,-1,%i0
1292         bg,pt   %icc,.loop0
1293 ! delay slot
1294         st      %f7,[%o0+4]
1295 
1296         ba,pt   %icc,.end
1297 ! delay slot
1298         nop
1299 
1300         .align  16
1301 .case4:
1302         fmuld   %f18,%f18,%f16
1303         andcc   %l2,1,%g0
1304         bz,pn   %icc,.case6
1305 ! delay slot
1306         fxor    %f22,%f36,%f36
1307 
1308         fmuld   %f26,%f26,%f24
1309         andcc   %l3,1,%g0
1310         bz,pn   %icc,.case5
1311 ! delay slot
1312         fxor    %f30,%f38,%f38
1313 
1314         fmuld   %f8,pp3,%f14            ! sin(x1)
1315 
1316         fmuld   %f0,qq3,%f6             ! cos(x0)
1317 
1318         faddd   %f14,pp2,%f14
1319         fmuld   %f8,qq2,%f12
1320 
1321         fmuld   %f16,qq3,%f22           ! cos(x2)
1322 
1323         fmuld   %f24,qq3,%f30           ! cos(x3)
1324 
1325         faddd   %f6,qq2,%f6
1326         fmuld   %f0,pp2,%f4
1327 
1328         fmuld   %f8,%f14,%f14
1329         faddd   %f12,qq1,%f12
1330 
1331         faddd   %f22,qq2,%f22
1332         fmuld   %f16,pp2,%f20
1333 
1334         faddd   %f30,qq2,%f30
1335         fmuld   %f24,pp2,%f28
1336 
1337         fmuld   %f0,%f6,%f6
1338         faddd   %f4,pp1,%f4
1339 
1340         faddd   %f14,pp1,%f14
1341         fmuld   %f8,%f12,%f12
1342         add     %l5,%g1,%l5
1343 
1344         fmuld   %f16,%f22,%f22
1345         faddd   %f20,pp1,%f20
1346 
1347         fmuld   %f24,%f30,%f30
1348         faddd   %f28,pp1,%f28
1349 
1350         faddd   %f6,qq1,%f6
1351         fmuld   %f0,%f4,%f4
1352         add     %l4,%g1,%l4
1353 
1354         fmuld   %f8,%f14,%f14
1355 
1356         faddd   %f22,qq1,%f22
1357         fmuld   %f16,%f20,%f20
1358         add     %l6,%g1,%l6
1359 
1360         faddd   %f30,qq1,%f30
1361         fmuld   %f24,%f28,%f28
1362         add     %l7,%g1,%l7
1363 
1364         fmuld   %f2,%f4,%f4
1365 
1366         fmuld   %f10,%f14,%f14
1367         ldd     [%l5+8],%f8
1368 
1369         fmuld   %f18,%f20,%f20
1370 
1371         fmuld   %f26,%f28,%f28
1372 
1373         fmuld   %f0,%f6,%f6
1374         faddd   %f4,%f32,%f4
1375         ldd     [%l4+16],%f0
1376 
1377         fmuld   %f8,%f12,%f12
1378         faddd   %f34,%f14,%f14
1379 
1380         fmuld   %f16,%f22,%f22
1381         faddd   %f20,%f36,%f20
1382         ldd     [%l6+16],%f16
1383 
1384         fmuld   %f24,%f30,%f30
1385         faddd   %f28,%f38,%f28
1386         ldd     [%l7+16],%f24
1387 
1388         fmuld   %f0,%f6,%f6
1389         faddd   %f4,%f2,%f4
1390         ldd     [%l4+8],%f32
1391 
1392         faddd   %f10,%f14,%f14
1393         ldd     [%l5+16],%f34
1394 
1395         fmuld   %f16,%f22,%f22
1396         faddd   %f20,%f18,%f20
1397         ldd     [%l6+8],%f36
1398 
1399         fmuld   %f24,%f30,%f30
1400         faddd   %f28,%f26,%f28
1401         ldd     [%l7+8],%f38
1402 
1403         fmuld   %f32,%f4,%f4
1404 
1405         fmuld   %f34,%f14,%f14
1406 
1407         fmuld   %f36,%f20,%f20
1408 
1409         fmuld   %f38,%f28,%f28
1410 
1411         fsubd   %f6,%f4,%f6
1412 
1413         faddd   %f14,%f12,%f14
1414 
1415         fsubd   %f22,%f20,%f22
1416 
1417         fsubd   %f30,%f28,%f30
1418 
1419         faddd   %f6,%f0,%f6
1420 
1421         faddd   %f14,%f8,%f14
1422 
1423         faddd   %f22,%f16,%f22
1424 
1425         faddd   %f30,%f24,%f30
1426         mov     %l0,%l4
1427 
1428         fnegd   %f6,%f4
1429         lda     [%i1]%asi,%l0           ! preload next argument
1430 
1431         fnegd   %f14,%f12
1432         lda     [%i1]%asi,%f0
1433 
1434         fnegd   %f22,%f20
1435         lda     [%i1+4]%asi,%f3
1436 
1437         fnegd   %f30,%f28
1438         andn    %l0,%i5,%l0
1439         add     %i1,%i2,%i1
1440 
1441         andcc   %l4,2,%g0
1442         fmovdnz %icc,%f4,%f6
1443         st      %f6,[%o0]
1444 
1445         andcc   %l1,2,%g0
1446         fmovdnz %icc,%f12,%f14
1447         st      %f14,[%o1]
1448 
1449         andcc   %l2,2,%g0
1450         fmovdnz %icc,%f20,%f22
1451         st      %f22,[%o2]
1452 
1453         andcc   %l3,2,%g0
1454         fmovdnz %icc,%f28,%f30
1455         st      %f30,[%o3]
1456 
1457         addcc   %i0,-1,%i0
1458         bg,pt   %icc,.loop0
1459 ! delay slot
1460         st      %f7,[%o0+4]
1461 
1462         ba,pt   %icc,.end
1463 ! delay slot
1464         nop
1465 
1466         .align  16
1467 .case5:
1468         fmuld   %f8,pp3,%f14            ! sin(x1)
1469 
1470         fmuld   %f24,pp3,%f30           ! sin(x3)
1471 
1472         fmuld   %f0,qq3,%f6             ! cos(x0)
1473 
1474         faddd   %f14,pp2,%f14
1475         fmuld   %f8,qq2,%f12
1476 
1477         fmuld   %f16,qq3,%f22           ! cos(x2)
1478 
1479         faddd   %f30,pp2,%f30
1480         fmuld   %f24,qq2,%f28
1481 
1482         faddd   %f6,qq2,%f6
1483         fmuld   %f0,pp2,%f4
1484 
1485         fmuld   %f8,%f14,%f14
1486         faddd   %f12,qq1,%f12
1487 
1488         faddd   %f22,qq2,%f22
1489         fmuld   %f16,pp2,%f20
1490 
1491         fmuld   %f24,%f30,%f30
1492         faddd   %f28,qq1,%f28
1493 
1494         fmuld   %f0,%f6,%f6
1495         faddd   %f4,pp1,%f4
1496 
1497         faddd   %f14,pp1,%f14
1498         fmuld   %f8,%f12,%f12
1499         add     %l5,%g1,%l5
1500 
1501         fmuld   %f16,%f22,%f22
1502         faddd   %f20,pp1,%f20
1503 
1504         faddd   %f30,pp1,%f30
1505         fmuld   %f24,%f28,%f28
1506         add     %l7,%g1,%l7
1507 
1508         faddd   %f6,qq1,%f6
1509         fmuld   %f0,%f4,%f4
1510         add     %l4,%g1,%l4
1511 
1512         fmuld   %f8,%f14,%f14
1513 
1514         faddd   %f22,qq1,%f22
1515         fmuld   %f16,%f20,%f20
1516         add     %l6,%g1,%l6
1517 
1518         fmuld   %f24,%f30,%f30
1519 
1520         fmuld   %f2,%f4,%f4
1521 
1522         fmuld   %f10,%f14,%f14
1523         ldd     [%l5+8],%f8
1524 
1525         fmuld   %f18,%f20,%f20
1526 
1527         fmuld   %f26,%f30,%f30
1528         ldd     [%l7+8],%f24
1529 
1530         fmuld   %f0,%f6,%f6
1531         faddd   %f4,%f32,%f4
1532         ldd     [%l4+16],%f0
1533 
1534         fmuld   %f8,%f12,%f12
1535         faddd   %f34,%f14,%f14
1536 
1537         fmuld   %f16,%f22,%f22
1538         faddd   %f20,%f36,%f20
1539         ldd     [%l6+16],%f16
1540 
1541         fmuld   %f24,%f28,%f28
1542         faddd   %f38,%f30,%f30
1543 
1544         fmuld   %f0,%f6,%f6
1545         faddd   %f4,%f2,%f4
1546         ldd     [%l4+8],%f32
1547 
1548         faddd   %f10,%f14,%f14
1549         ldd     [%l5+16],%f34
1550 
1551         fmuld   %f16,%f22,%f22
1552         faddd   %f20,%f18,%f20
1553         ldd     [%l6+8],%f36
1554 
1555         faddd   %f26,%f30,%f30
1556         ldd     [%l7+16],%f38
1557 
1558         fmuld   %f32,%f4,%f4
1559 
1560         fmuld   %f34,%f14,%f14
1561 
1562         fmuld   %f36,%f20,%f20
1563 
1564         fmuld   %f38,%f30,%f30
1565 
1566         fsubd   %f6,%f4,%f6
1567 
1568         faddd   %f14,%f12,%f14
1569 
1570         fsubd   %f22,%f20,%f22
1571 
1572         faddd   %f30,%f28,%f30
1573 
1574         faddd   %f6,%f0,%f6
1575 
1576         faddd   %f14,%f8,%f14
1577 
1578         faddd   %f22,%f16,%f22
1579 
1580         faddd   %f30,%f24,%f30
1581         mov     %l0,%l4
1582 
1583         fnegd   %f6,%f4
1584         lda     [%i1]%asi,%l0           ! preload next argument
1585 
1586         fnegd   %f14,%f12
1587         lda     [%i1]%asi,%f0
1588 
1589         fnegd   %f22,%f20
1590         lda     [%i1+4]%asi,%f3
1591 
1592         fnegd   %f30,%f28
1593         andn    %l0,%i5,%l0
1594         add     %i1,%i2,%i1
1595 
1596         andcc   %l4,2,%g0
1597         fmovdnz %icc,%f4,%f6
1598         st      %f6,[%o0]
1599 
1600         andcc   %l1,2,%g0
1601         fmovdnz %icc,%f12,%f14
1602         st      %f14,[%o1]
1603 
1604         andcc   %l2,2,%g0
1605         fmovdnz %icc,%f20,%f22
1606         st      %f22,[%o2]
1607 
1608         andcc   %l3,2,%g0
1609         fmovdnz %icc,%f28,%f30
1610         st      %f30,[%o3]
1611 
1612         addcc   %i0,-1,%i0
1613         bg,pt   %icc,.loop0
1614 ! delay slot
1615         st      %f7,[%o0+4]
1616 
1617         ba,pt   %icc,.end
1618 ! delay slot
1619         nop
1620 
1621         .align  16
1622 .case6:
1623         fmuld   %f26,%f26,%f24
1624         andcc   %l3,1,%g0
1625         bz,pn   %icc,.case7
1626 ! delay slot
1627         fxor    %f30,%f38,%f38
1628 
1629         fmuld   %f8,pp3,%f14            ! sin(x1)
1630 
1631         fmuld   %f16,pp3,%f22           ! sin(x2)
1632 
1633         fmuld   %f0,qq3,%f6             ! cos(x0)
1634 
1635         faddd   %f14,pp2,%f14
1636         fmuld   %f8,qq2,%f12
1637 
1638         faddd   %f22,pp2,%f22
1639         fmuld   %f16,qq2,%f20
1640 
1641         fmuld   %f24,qq3,%f30           ! cos(x3)
1642 
1643         faddd   %f6,qq2,%f6
1644         fmuld   %f0,pp2,%f4
1645 
1646         fmuld   %f8,%f14,%f14
1647         faddd   %f12,qq1,%f12
1648 
1649         fmuld   %f16,%f22,%f22
1650         faddd   %f20,qq1,%f20
1651 
1652         faddd   %f30,qq2,%f30
1653         fmuld   %f24,pp2,%f28
1654 
1655         fmuld   %f0,%f6,%f6
1656         faddd   %f4,pp1,%f4
1657 
1658         faddd   %f14,pp1,%f14
1659         fmuld   %f8,%f12,%f12
1660         add     %l5,%g1,%l5
1661 
1662         faddd   %f22,pp1,%f22
1663         fmuld   %f16,%f20,%f20
1664         add     %l6,%g1,%l6
1665 
1666         fmuld   %f24,%f30,%f30
1667         faddd   %f28,pp1,%f28
1668 
1669         faddd   %f6,qq1,%f6
1670         fmuld   %f0,%f4,%f4
1671         add     %l4,%g1,%l4
1672 
1673         fmuld   %f8,%f14,%f14
1674 
1675         fmuld   %f16,%f22,%f22
1676 
1677         faddd   %f30,qq1,%f30
1678         fmuld   %f24,%f28,%f28
1679         add     %l7,%g1,%l7
1680 
1681         fmuld   %f2,%f4,%f4
1682 
1683         fmuld   %f10,%f14,%f14
1684         ldd     [%l5+8],%f8
1685 
1686         fmuld   %f18,%f22,%f22
1687         ldd     [%l6+8],%f16
1688 
1689         fmuld   %f26,%f28,%f28
1690 
1691         fmuld   %f0,%f6,%f6
1692         faddd   %f4,%f32,%f4
1693         ldd     [%l4+16],%f0
1694 
1695         fmuld   %f8,%f12,%f12
1696         faddd   %f34,%f14,%f14
1697 
1698         fmuld   %f16,%f20,%f20
1699         faddd   %f36,%f22,%f22
1700 
1701         fmuld   %f24,%f30,%f30
1702         faddd   %f28,%f38,%f28
1703         ldd     [%l7+16],%f24
1704 
1705         fmuld   %f0,%f6,%f6
1706         faddd   %f4,%f2,%f4
1707         ldd     [%l4+8],%f32
1708 
1709         faddd   %f10,%f14,%f14
1710         ldd     [%l5+16],%f34
1711 
1712         faddd   %f18,%f22,%f22
1713         ldd     [%l6+16],%f36
1714 
1715         fmuld   %f24,%f30,%f30
1716         faddd   %f28,%f26,%f28
1717         ldd     [%l7+8],%f38
1718 
1719         fmuld   %f32,%f4,%f4
1720 
1721         fmuld   %f34,%f14,%f14
1722 
1723         fmuld   %f36,%f22,%f22
1724 
1725         fmuld   %f38,%f28,%f28
1726 
1727         fsubd   %f6,%f4,%f6
1728 
1729         faddd   %f14,%f12,%f14
1730 
1731         faddd   %f22,%f20,%f22
1732 
1733         fsubd   %f30,%f28,%f30
1734 
1735         faddd   %f6,%f0,%f6
1736 
1737         faddd   %f14,%f8,%f14
1738 
1739         faddd   %f22,%f16,%f22
1740 
1741         faddd   %f30,%f24,%f30
1742         mov     %l0,%l4
1743 
1744         fnegd   %f6,%f4
1745         lda     [%i1]%asi,%l0           ! preload next argument
1746 
1747         fnegd   %f14,%f12
1748         lda     [%i1]%asi,%f0
1749 
1750         fnegd   %f22,%f20
1751         lda     [%i1+4]%asi,%f3
1752 
1753         fnegd   %f30,%f28
1754         andn    %l0,%i5,%l0
1755         add     %i1,%i2,%i1
1756 
1757         andcc   %l4,2,%g0
1758         fmovdnz %icc,%f4,%f6
1759         st      %f6,[%o0]
1760 
1761         andcc   %l1,2,%g0
1762         fmovdnz %icc,%f12,%f14
1763         st      %f14,[%o1]
1764 
1765         andcc   %l2,2,%g0
1766         fmovdnz %icc,%f20,%f22
1767         st      %f22,[%o2]
1768 
1769         andcc   %l3,2,%g0
1770         fmovdnz %icc,%f28,%f30
1771         st      %f30,[%o3]
1772 
1773         addcc   %i0,-1,%i0
1774         bg,pt   %icc,.loop0
1775 ! delay slot
1776         st      %f7,[%o0+4]
1777 
1778         ba,pt   %icc,.end
1779 ! delay slot
1780         nop
1781 
1782         .align  16
1783 .case7:
1784         fmuld   %f8,pp3,%f14            ! sin(x1)
1785 
1786         fmuld   %f16,pp3,%f22           ! sin(x2)
1787 
1788         fmuld   %f24,pp3,%f30           ! sin(x3)
1789 
1790         fmuld   %f0,qq3,%f6             ! cos(x0)
1791 
1792         faddd   %f14,pp2,%f14
1793         fmuld   %f8,qq2,%f12
1794 
1795         faddd   %f22,pp2,%f22
1796         fmuld   %f16,qq2,%f20
1797 
1798         faddd   %f30,pp2,%f30
1799         fmuld   %f24,qq2,%f28
1800 
1801         faddd   %f6,qq2,%f6
1802         fmuld   %f0,pp2,%f4
1803 
1804         fmuld   %f8,%f14,%f14
1805         faddd   %f12,qq1,%f12
1806 
1807         fmuld   %f16,%f22,%f22
1808         faddd   %f20,qq1,%f20
1809 
1810         fmuld   %f24,%f30,%f30
1811         faddd   %f28,qq1,%f28
1812 
1813         fmuld   %f0,%f6,%f6
1814         faddd   %f4,pp1,%f4
1815 
1816         faddd   %f14,pp1,%f14
1817         fmuld   %f8,%f12,%f12
1818         add     %l5,%g1,%l5
1819 
1820         faddd   %f22,pp1,%f22
1821         fmuld   %f16,%f20,%f20
1822         add     %l6,%g1,%l6
1823 
1824         faddd   %f30,pp1,%f30
1825         fmuld   %f24,%f28,%f28
1826         add     %l7,%g1,%l7
1827 
1828         faddd   %f6,qq1,%f6
1829         fmuld   %f0,%f4,%f4
1830         add     %l4,%g1,%l4
1831 
1832         fmuld   %f8,%f14,%f14
1833 
1834         fmuld   %f16,%f22,%f22
1835 
1836         fmuld   %f24,%f30,%f30
1837 
1838         fmuld   %f2,%f4,%f4
1839 
1840         fmuld   %f10,%f14,%f14
1841         ldd     [%l5+8],%f8
1842 
1843         fmuld   %f18,%f22,%f22
1844         ldd     [%l6+8],%f16
1845 
1846         fmuld   %f26,%f30,%f30
1847         ldd     [%l7+8],%f24
1848 
1849         fmuld   %f0,%f6,%f6
1850         faddd   %f4,%f32,%f4
1851         ldd     [%l4+16],%f0
1852 
1853         fmuld   %f8,%f12,%f12
1854         faddd   %f34,%f14,%f14
1855 
1856         fmuld   %f16,%f20,%f20
1857         faddd   %f36,%f22,%f22
1858 
1859         fmuld   %f24,%f28,%f28
1860         faddd   %f38,%f30,%f30
1861 
1862         fmuld   %f0,%f6,%f6
1863         faddd   %f4,%f2,%f4
1864         ldd     [%l4+8],%f32
1865 
1866         faddd   %f10,%f14,%f14
1867         ldd     [%l5+16],%f34
1868 
1869         faddd   %f18,%f22,%f22
1870         ldd     [%l6+16],%f36
1871 
1872         faddd   %f26,%f30,%f30
1873         ldd     [%l7+16],%f38
1874 
1875         fmuld   %f32,%f4,%f4
1876 
1877         fmuld   %f34,%f14,%f14
1878 
1879         fmuld   %f36,%f22,%f22
1880 
1881         fmuld   %f38,%f30,%f30
1882 
1883         fsubd   %f6,%f4,%f6
1884 
1885         faddd   %f14,%f12,%f14
1886 
1887         faddd   %f22,%f20,%f22
1888 
1889         faddd   %f30,%f28,%f30
1890 
1891         faddd   %f6,%f0,%f6
1892 
1893         faddd   %f14,%f8,%f14
1894 
1895         faddd   %f22,%f16,%f22
1896 
1897         faddd   %f30,%f24,%f30
1898         mov     %l0,%l4
1899 
1900         fnegd   %f6,%f4
1901         lda     [%i1]%asi,%l0           ! preload next argument
1902 
1903         fnegd   %f14,%f12
1904         lda     [%i1]%asi,%f0
1905 
1906         fnegd   %f22,%f20
1907         lda     [%i1+4]%asi,%f3
1908 
1909         fnegd   %f30,%f28
1910         andn    %l0,%i5,%l0
1911         add     %i1,%i2,%i1
1912 
1913         andcc   %l4,2,%g0
1914         fmovdnz %icc,%f4,%f6
1915         st      %f6,[%o0]
1916 
1917         andcc   %l1,2,%g0
1918         fmovdnz %icc,%f12,%f14
1919         st      %f14,[%o1]
1920 
1921         andcc   %l2,2,%g0
1922         fmovdnz %icc,%f20,%f22
1923         st      %f22,[%o2]
1924 
1925         andcc   %l3,2,%g0
1926         fmovdnz %icc,%f28,%f30
1927         st      %f30,[%o3]
1928 
1929         addcc   %i0,-1,%i0
1930         bg,pt   %icc,.loop0
1931 ! delay slot
1932         st      %f7,[%o0+4]
1933 
1934         ba,pt   %icc,.end
1935 ! delay slot
1936         nop
1937 
1938         .align  16
1939 .case8:
1940         fmuld   %f10,%f10,%f8
1941         andcc   %l1,1,%g0
1942         bz,pn   %icc,.case12
1943 ! delay slot
1944         fxor    %f14,%f34,%f34
1945 
1946         fmuld   %f18,%f18,%f16
1947         andcc   %l2,1,%g0
1948         bz,pn   %icc,.case10
1949 ! delay slot
1950         fxor    %f22,%f36,%f36
1951 
1952         fmuld   %f26,%f26,%f24
1953         andcc   %l3,1,%g0
1954         bz,pn   %icc,.case9
1955 ! delay slot
1956         fxor    %f30,%f38,%f38
1957 
1958         fmuld   %f0,pp3,%f6             ! sin(x0)
1959 
1960         faddd   %f6,pp2,%f6
1961         fmuld   %f0,qq2,%f4
1962 
1963         fmuld   %f8,qq3,%f14            ! cos(x1)
1964 
1965         fmuld   %f16,qq3,%f22           ! cos(x2)
1966 
1967         fmuld   %f24,qq3,%f30           ! cos(x3)
1968 
1969         fmuld   %f0,%f6,%f6
1970         faddd   %f4,qq1,%f4
1971 
1972         faddd   %f14,qq2,%f14
1973         fmuld   %f8,pp2,%f12
1974 
1975         faddd   %f22,qq2,%f22
1976         fmuld   %f16,pp2,%f20
1977 
1978         faddd   %f30,qq2,%f30
1979         fmuld   %f24,pp2,%f28
1980 
1981         faddd   %f6,pp1,%f6
1982         fmuld   %f0,%f4,%f4
1983         add     %l4,%g1,%l4
1984 
1985         fmuld   %f8,%f14,%f14
1986         faddd   %f12,pp1,%f12
1987 
1988         fmuld   %f16,%f22,%f22
1989         faddd   %f20,pp1,%f20
1990 
1991         fmuld   %f24,%f30,%f30
1992         faddd   %f28,pp1,%f28
1993 
1994         fmuld   %f0,%f6,%f6
1995 
1996         faddd   %f14,qq1,%f14
1997         fmuld   %f8,%f12,%f12
1998         add     %l5,%g1,%l5
1999 
2000         faddd   %f22,qq1,%f22
2001         fmuld   %f16,%f20,%f20
2002         add     %l6,%g1,%l6
2003 
2004         faddd   %f30,qq1,%f30
2005         fmuld   %f24,%f28,%f28
2006         add     %l7,%g1,%l7
2007 
2008         fmuld   %f2,%f6,%f6
2009         ldd     [%l4+8],%f0
2010 
2011         fmuld   %f10,%f12,%f12
2012 
2013         fmuld   %f18,%f20,%f20
2014 
2015         fmuld   %f26,%f28,%f28
2016 
2017         fmuld   %f0,%f4,%f4
2018         faddd   %f32,%f6,%f6
2019 
2020         fmuld   %f8,%f14,%f14
2021         faddd   %f12,%f34,%f12
2022         ldd     [%l5+16],%f8
2023 
2024         fmuld   %f16,%f22,%f22
2025         faddd   %f20,%f36,%f20
2026         ldd     [%l6+16],%f16
2027 
2028         fmuld   %f24,%f30,%f30
2029         faddd   %f28,%f38,%f28
2030         ldd     [%l7+16],%f24
2031 
2032         faddd   %f2,%f6,%f6
2033         ldd     [%l4+16],%f32
2034 
2035         fmuld   %f8,%f14,%f14
2036         faddd   %f12,%f10,%f12
2037         ldd     [%l5+8],%f34
2038 
2039         fmuld   %f16,%f22,%f22
2040         faddd   %f20,%f18,%f20
2041         ldd     [%l6+8],%f36
2042 
2043         fmuld   %f24,%f30,%f30
2044         faddd   %f28,%f26,%f28
2045         ldd     [%l7+8],%f38
2046 
2047         fmuld   %f32,%f6,%f6
2048 
2049         fmuld   %f34,%f12,%f12
2050 
2051         fmuld   %f36,%f20,%f20
2052 
2053         fmuld   %f38,%f28,%f28
2054 
2055         faddd   %f6,%f4,%f6
2056 
2057         fsubd   %f14,%f12,%f14
2058 
2059         fsubd   %f22,%f20,%f22
2060 
2061         fsubd   %f30,%f28,%f30
2062 
2063         faddd   %f6,%f0,%f6
2064 
2065         faddd   %f14,%f8,%f14
2066 
2067         faddd   %f22,%f16,%f22
2068 
2069         faddd   %f30,%f24,%f30
2070         mov     %l0,%l4
2071 
2072         fnegd   %f6,%f4
2073         lda     [%i1]%asi,%l0           ! preload next argument
2074 
2075         fnegd   %f14,%f12
2076         lda     [%i1]%asi,%f0
2077 
2078         fnegd   %f22,%f20
2079         lda     [%i1+4]%asi,%f3
2080 
2081         fnegd   %f30,%f28
2082         andn    %l0,%i5,%l0
2083         add     %i1,%i2,%i1
2084 
2085         andcc   %l4,2,%g0
2086         fmovdnz %icc,%f4,%f6
2087         st      %f6,[%o0]
2088 
2089         andcc   %l1,2,%g0
2090         fmovdnz %icc,%f12,%f14
2091         st      %f14,[%o1]
2092 
2093         andcc   %l2,2,%g0
2094         fmovdnz %icc,%f20,%f22
2095         st      %f22,[%o2]
2096 
2097         andcc   %l3,2,%g0
2098         fmovdnz %icc,%f28,%f30
2099         st      %f30,[%o3]
2100 
2101         addcc   %i0,-1,%i0
2102         bg,pt   %icc,.loop0
2103 ! delay slot
2104         st      %f7,[%o0+4]
2105 
2106         ba,pt   %icc,.end
2107 ! delay slot
2108         nop
2109 
2110         .align  16
2111 .case9:
2112         fmuld   %f0,pp3,%f6             ! sin(x0)
2113 
2114         fmuld   %f24,pp3,%f30           ! sin(x3)
2115 
2116         faddd   %f6,pp2,%f6
2117         fmuld   %f0,qq2,%f4
2118 
2119         fmuld   %f8,qq3,%f14            ! cos(x1)
2120 
2121         fmuld   %f16,qq3,%f22           ! cos(x2)
2122 
2123         faddd   %f30,pp2,%f30
2124         fmuld   %f24,qq2,%f28
2125 
2126         fmuld   %f0,%f6,%f6
2127         faddd   %f4,qq1,%f4
2128 
2129         faddd   %f14,qq2,%f14
2130         fmuld   %f8,pp2,%f12
2131 
2132         faddd   %f22,qq2,%f22
2133         fmuld   %f16,pp2,%f20
2134 
2135         fmuld   %f24,%f30,%f30
2136         faddd   %f28,qq1,%f28
2137 
2138         faddd   %f6,pp1,%f6
2139         fmuld   %f0,%f4,%f4
2140         add     %l4,%g1,%l4
2141 
2142         fmuld   %f8,%f14,%f14
2143         faddd   %f12,pp1,%f12
2144 
2145         fmuld   %f16,%f22,%f22
2146         faddd   %f20,pp1,%f20
2147 
2148         faddd   %f30,pp1,%f30
2149         fmuld   %f24,%f28,%f28
2150         add     %l7,%g1,%l7
2151 
2152         fmuld   %f0,%f6,%f6
2153 
2154         faddd   %f14,qq1,%f14
2155         fmuld   %f8,%f12,%f12
2156         add     %l5,%g1,%l5
2157 
2158         faddd   %f22,qq1,%f22
2159         fmuld   %f16,%f20,%f20
2160         add     %l6,%g1,%l6
2161 
2162         fmuld   %f24,%f30,%f30
2163 
2164         fmuld   %f2,%f6,%f6
2165         ldd     [%l4+8],%f0
2166 
2167         fmuld   %f10,%f12,%f12
2168 
2169         fmuld   %f18,%f20,%f20
2170 
2171         fmuld   %f26,%f30,%f30
2172         ldd     [%l7+8],%f24
2173 
2174         fmuld   %f0,%f4,%f4
2175         faddd   %f32,%f6,%f6
2176 
2177         fmuld   %f8,%f14,%f14
2178         faddd   %f12,%f34,%f12
2179         ldd     [%l5+16],%f8
2180 
2181         fmuld   %f16,%f22,%f22
2182         faddd   %f20,%f36,%f20
2183         ldd     [%l6+16],%f16
2184 
2185         fmuld   %f24,%f28,%f28
2186         faddd   %f38,%f30,%f30
2187 
2188         faddd   %f2,%f6,%f6
2189         ldd     [%l4+16],%f32
2190 
2191         fmuld   %f8,%f14,%f14
2192         faddd   %f12,%f10,%f12
2193         ldd     [%l5+8],%f34
2194 
2195         fmuld   %f16,%f22,%f22
2196         faddd   %f20,%f18,%f20
2197         ldd     [%l6+8],%f36
2198 
2199         faddd   %f26,%f30,%f30
2200         ldd     [%l7+16],%f38
2201 
2202         fmuld   %f32,%f6,%f6
2203 
2204         fmuld   %f34,%f12,%f12
2205 
2206         fmuld   %f36,%f20,%f20
2207 
2208         fmuld   %f38,%f30,%f30
2209 
2210         faddd   %f6,%f4,%f6
2211 
2212         fsubd   %f14,%f12,%f14
2213 
2214         fsubd   %f22,%f20,%f22
2215 
2216         faddd   %f30,%f28,%f30
2217 
2218         faddd   %f6,%f0,%f6
2219 
2220         faddd   %f14,%f8,%f14
2221 
2222         faddd   %f22,%f16,%f22
2223 
2224         faddd   %f30,%f24,%f30
2225         mov     %l0,%l4
2226 
2227         fnegd   %f6,%f4
2228         lda     [%i1]%asi,%l0           ! preload next argument
2229 
2230         fnegd   %f14,%f12
2231         lda     [%i1]%asi,%f0
2232 
2233         fnegd   %f22,%f20
2234         lda     [%i1+4]%asi,%f3
2235 
2236         fnegd   %f30,%f28
2237         andn    %l0,%i5,%l0
2238         add     %i1,%i2,%i1
2239 
2240         andcc   %l4,2,%g0
2241         fmovdnz %icc,%f4,%f6
2242         st      %f6,[%o0]
2243 
2244         andcc   %l1,2,%g0
2245         fmovdnz %icc,%f12,%f14
2246         st      %f14,[%o1]
2247 
2248         andcc   %l2,2,%g0
2249         fmovdnz %icc,%f20,%f22
2250         st      %f22,[%o2]
2251 
2252         andcc   %l3,2,%g0
2253         fmovdnz %icc,%f28,%f30
2254         st      %f30,[%o3]
2255 
2256         addcc   %i0,-1,%i0
2257         bg,pt   %icc,.loop0
2258 ! delay slot
2259         st      %f7,[%o0+4]
2260 
2261         ba,pt   %icc,.end
2262 ! delay slot
2263         nop
2264 
2265         .align  16
2266 .case10:
2267         fmuld   %f26,%f26,%f24
2268         andcc   %l3,1,%g0
2269         bz,pn   %icc,.case11
2270 ! delay slot
2271         fxor    %f30,%f38,%f38
2272 
2273         fmuld   %f0,pp3,%f6             ! sin(x0)
2274 
2275         fmuld   %f16,pp3,%f22           ! sin(x2)
2276 
2277         faddd   %f6,pp2,%f6
2278         fmuld   %f0,qq2,%f4
2279 
2280         fmuld   %f8,qq3,%f14            ! cos(x1)
2281 
2282         faddd   %f22,pp2,%f22
2283         fmuld   %f16,qq2,%f20
2284 
2285         fmuld   %f24,qq3,%f30           ! cos(x3)
2286 
2287         fmuld   %f0,%f6,%f6
2288         faddd   %f4,qq1,%f4
2289 
2290         faddd   %f14,qq2,%f14
2291         fmuld   %f8,pp2,%f12
2292 
2293         fmuld   %f16,%f22,%f22
2294         faddd   %f20,qq1,%f20
2295 
2296         faddd   %f30,qq2,%f30
2297         fmuld   %f24,pp2,%f28
2298 
2299         faddd   %f6,pp1,%f6
2300         fmuld   %f0,%f4,%f4
2301         add     %l4,%g1,%l4
2302 
2303         fmuld   %f8,%f14,%f14
2304         faddd   %f12,pp1,%f12
2305 
2306         faddd   %f22,pp1,%f22
2307         fmuld   %f16,%f20,%f20
2308         add     %l6,%g1,%l6
2309 
2310         fmuld   %f24,%f30,%f30
2311         faddd   %f28,pp1,%f28
2312 
2313         fmuld   %f0,%f6,%f6
2314 
2315         faddd   %f14,qq1,%f14
2316         fmuld   %f8,%f12,%f12
2317         add     %l5,%g1,%l5
2318 
2319         fmuld   %f16,%f22,%f22
2320 
2321         faddd   %f30,qq1,%f30
2322         fmuld   %f24,%f28,%f28
2323         add     %l7,%g1,%l7
2324 
2325         fmuld   %f2,%f6,%f6
2326         ldd     [%l4+8],%f0
2327 
2328         fmuld   %f10,%f12,%f12
2329 
2330         fmuld   %f18,%f22,%f22
2331         ldd     [%l6+8],%f16
2332 
2333         fmuld   %f26,%f28,%f28
2334 
2335         fmuld   %f0,%f4,%f4
2336         faddd   %f32,%f6,%f6
2337 
2338         fmuld   %f8,%f14,%f14
2339         faddd   %f12,%f34,%f12
2340         ldd     [%l5+16],%f8
2341 
2342         fmuld   %f16,%f20,%f20
2343         faddd   %f36,%f22,%f22
2344 
2345         fmuld   %f24,%f30,%f30
2346         faddd   %f28,%f38,%f28
2347         ldd     [%l7+16],%f24
2348 
2349         faddd   %f2,%f6,%f6
2350         ldd     [%l4+16],%f32
2351 
2352         fmuld   %f8,%f14,%f14
2353         faddd   %f12,%f10,%f12
2354         ldd     [%l5+8],%f34
2355 
2356         faddd   %f18,%f22,%f22
2357         ldd     [%l6+16],%f36
2358 
2359         fmuld   %f24,%f30,%f30
2360         faddd   %f28,%f26,%f28
2361         ldd     [%l7+8],%f38
2362 
2363         fmuld   %f32,%f6,%f6
2364 
2365         fmuld   %f34,%f12,%f12
2366 
2367         fmuld   %f36,%f22,%f22
2368 
2369         fmuld   %f38,%f28,%f28
2370 
2371         faddd   %f6,%f4,%f6
2372 
2373         fsubd   %f14,%f12,%f14
2374 
2375         faddd   %f22,%f20,%f22
2376 
2377         fsubd   %f30,%f28,%f30
2378 
2379         faddd   %f6,%f0,%f6
2380 
2381         faddd   %f14,%f8,%f14
2382 
2383         faddd   %f22,%f16,%f22
2384 
2385         faddd   %f30,%f24,%f30
2386         mov     %l0,%l4
2387 
2388         fnegd   %f6,%f4
2389         lda     [%i1]%asi,%l0           ! preload next argument
2390 
2391         fnegd   %f14,%f12
2392         lda     [%i1]%asi,%f0
2393 
2394         fnegd   %f22,%f20
2395         lda     [%i1+4]%asi,%f3
2396 
2397         fnegd   %f30,%f28
2398         andn    %l0,%i5,%l0
2399         add     %i1,%i2,%i1
2400 
2401         andcc   %l4,2,%g0
2402         fmovdnz %icc,%f4,%f6
2403         st      %f6,[%o0]
2404 
2405         andcc   %l1,2,%g0
2406         fmovdnz %icc,%f12,%f14
2407         st      %f14,[%o1]
2408 
2409         andcc   %l2,2,%g0
2410         fmovdnz %icc,%f20,%f22
2411         st      %f22,[%o2]
2412 
2413         andcc   %l3,2,%g0
2414         fmovdnz %icc,%f28,%f30
2415         st      %f30,[%o3]
2416 
2417         addcc   %i0,-1,%i0
2418         bg,pt   %icc,.loop0
2419 ! delay slot
2420         st      %f7,[%o0+4]
2421 
2422         ba,pt   %icc,.end
2423 ! delay slot
2424         nop
2425 
2426         .align  16
2427 .case11:
2428         fmuld   %f0,pp3,%f6             ! sin(x0)
2429 
2430         fmuld   %f16,pp3,%f22           ! sin(x2)
2431 
2432         fmuld   %f24,pp3,%f30           ! sin(x3)
2433 
2434         faddd   %f6,pp2,%f6
2435         fmuld   %f0,qq2,%f4
2436 
2437         fmuld   %f8,qq3,%f14            ! cos(x1)
2438 
2439         faddd   %f22,pp2,%f22
2440         fmuld   %f16,qq2,%f20
2441 
2442         faddd   %f30,pp2,%f30
2443         fmuld   %f24,qq2,%f28
2444 
2445         fmuld   %f0,%f6,%f6
2446         faddd   %f4,qq1,%f4
2447 
2448         faddd   %f14,qq2,%f14
2449         fmuld   %f8,pp2,%f12
2450 
2451         fmuld   %f16,%f22,%f22
2452         faddd   %f20,qq1,%f20
2453 
2454         fmuld   %f24,%f30,%f30
2455         faddd   %f28,qq1,%f28
2456 
2457         faddd   %f6,pp1,%f6
2458         fmuld   %f0,%f4,%f4
2459         add     %l4,%g1,%l4
2460 
2461         fmuld   %f8,%f14,%f14
2462         faddd   %f12,pp1,%f12
2463 
2464         faddd   %f22,pp1,%f22
2465         fmuld   %f16,%f20,%f20
2466         add     %l6,%g1,%l6
2467 
2468         faddd   %f30,pp1,%f30
2469         fmuld   %f24,%f28,%f28
2470         add     %l7,%g1,%l7
2471 
2472         fmuld   %f0,%f6,%f6
2473 
2474         faddd   %f14,qq1,%f14
2475         fmuld   %f8,%f12,%f12
2476         add     %l5,%g1,%l5
2477 
2478         fmuld   %f16,%f22,%f22
2479 
2480         fmuld   %f24,%f30,%f30
2481 
2482         fmuld   %f2,%f6,%f6
2483         ldd     [%l4+8],%f0
2484 
2485         fmuld   %f10,%f12,%f12
2486 
2487         fmuld   %f18,%f22,%f22
2488         ldd     [%l6+8],%f16
2489 
2490         fmuld   %f26,%f30,%f30
2491         ldd     [%l7+8],%f24
2492 
2493         fmuld   %f0,%f4,%f4
2494         faddd   %f32,%f6,%f6
2495 
2496         fmuld   %f8,%f14,%f14
2497         faddd   %f12,%f34,%f12
2498         ldd     [%l5+16],%f8
2499 
2500         fmuld   %f16,%f20,%f20
2501         faddd   %f36,%f22,%f22
2502 
2503         fmuld   %f24,%f28,%f28
2504         faddd   %f38,%f30,%f30
2505 
2506         faddd   %f2,%f6,%f6
2507         ldd     [%l4+16],%f32
2508 
2509         fmuld   %f8,%f14,%f14
2510         faddd   %f12,%f10,%f12
2511         ldd     [%l5+8],%f34
2512 
2513         faddd   %f18,%f22,%f22
2514         ldd     [%l6+16],%f36
2515 
2516         faddd   %f26,%f30,%f30
2517         ldd     [%l7+16],%f38
2518 
2519         fmuld   %f32,%f6,%f6
2520 
2521         fmuld   %f34,%f12,%f12
2522 
2523         fmuld   %f36,%f22,%f22
2524 
2525         fmuld   %f38,%f30,%f30
2526 
2527         faddd   %f6,%f4,%f6
2528 
2529         fsubd   %f14,%f12,%f14
2530 
2531         faddd   %f22,%f20,%f22
2532 
2533         faddd   %f30,%f28,%f30
2534 
2535         faddd   %f6,%f0,%f6
2536 
2537         faddd   %f14,%f8,%f14
2538 
2539         faddd   %f22,%f16,%f22
2540 
2541         faddd   %f30,%f24,%f30
2542         mov     %l0,%l4
2543 
2544         fnegd   %f6,%f4
2545         lda     [%i1]%asi,%l0           ! preload next argument
2546 
2547         fnegd   %f14,%f12
2548         lda     [%i1]%asi,%f0
2549 
2550         fnegd   %f22,%f20
2551         lda     [%i1+4]%asi,%f3
2552 
2553         fnegd   %f30,%f28
2554         andn    %l0,%i5,%l0
2555         add     %i1,%i2,%i1
2556 
2557         andcc   %l4,2,%g0
2558         fmovdnz %icc,%f4,%f6
2559         st      %f6,[%o0]
2560 
2561         andcc   %l1,2,%g0
2562         fmovdnz %icc,%f12,%f14
2563         st      %f14,[%o1]
2564 
2565         andcc   %l2,2,%g0
2566         fmovdnz %icc,%f20,%f22
2567         st      %f22,[%o2]
2568 
2569         andcc   %l3,2,%g0
2570         fmovdnz %icc,%f28,%f30
2571         st      %f30,[%o3]
2572 
2573         addcc   %i0,-1,%i0
2574         bg,pt   %icc,.loop0
2575 ! delay slot
2576         st      %f7,[%o0+4]
2577 
2578         ba,pt   %icc,.end
2579 ! delay slot
2580         nop
2581 
2582         .align  16
2583 .case12:
2584         fmuld   %f18,%f18,%f16
2585         andcc   %l2,1,%g0
2586         bz,pn   %icc,.case14
2587 ! delay slot
2588         fxor    %f22,%f36,%f36
2589 
2590         fmuld   %f26,%f26,%f24
2591         andcc   %l3,1,%g0
2592         bz,pn   %icc,.case13
2593 ! delay slot
2594         fxor    %f30,%f38,%f38
2595 
2596         fmuld   %f0,pp3,%f6             ! sin(x0)
2597 
2598         fmuld   %f8,pp3,%f14            ! sin(x1)
2599 
2600         faddd   %f6,pp2,%f6
2601         fmuld   %f0,qq2,%f4
2602 
2603         faddd   %f14,pp2,%f14
2604         fmuld   %f8,qq2,%f12
2605 
2606         fmuld   %f16,qq3,%f22           ! cos(x2)
2607 
2608         fmuld   %f24,qq3,%f30           ! cos(x3)
2609 
2610         fmuld   %f0,%f6,%f6
2611         faddd   %f4,qq1,%f4
2612 
2613         fmuld   %f8,%f14,%f14
2614         faddd   %f12,qq1,%f12
2615 
2616         faddd   %f22,qq2,%f22
2617         fmuld   %f16,pp2,%f20
2618 
2619         faddd   %f30,qq2,%f30
2620         fmuld   %f24,pp2,%f28
2621 
2622         faddd   %f6,pp1,%f6
2623         fmuld   %f0,%f4,%f4
2624         add     %l4,%g1,%l4
2625 
2626         faddd   %f14,pp1,%f14
2627         fmuld   %f8,%f12,%f12
2628         add     %l5,%g1,%l5
2629 
2630         fmuld   %f16,%f22,%f22
2631         faddd   %f20,pp1,%f20
2632 
2633         fmuld   %f24,%f30,%f30
2634         faddd   %f28,pp1,%f28
2635 
2636         fmuld   %f0,%f6,%f6
2637 
2638         fmuld   %f8,%f14,%f14
2639 
2640         faddd   %f22,qq1,%f22
2641         fmuld   %f16,%f20,%f20
2642         add     %l6,%g1,%l6
2643 
2644         faddd   %f30,qq1,%f30
2645         fmuld   %f24,%f28,%f28
2646         add     %l7,%g1,%l7
2647 
2648         fmuld   %f2,%f6,%f6
2649         ldd     [%l4+8],%f0
2650 
2651         fmuld   %f10,%f14,%f14
2652         ldd     [%l5+8],%f8
2653 
2654         fmuld   %f18,%f20,%f20
2655 
2656         fmuld   %f26,%f28,%f28
2657 
2658         fmuld   %f0,%f4,%f4
2659         faddd   %f32,%f6,%f6
2660 
2661         fmuld   %f8,%f12,%f12
2662         faddd   %f34,%f14,%f14
2663 
2664         fmuld   %f16,%f22,%f22
2665         faddd   %f20,%f36,%f20
2666         ldd     [%l6+16],%f16
2667 
2668         fmuld   %f24,%f30,%f30
2669         faddd   %f28,%f38,%f28
2670         ldd     [%l7+16],%f24
2671 
2672         faddd   %f2,%f6,%f6
2673         ldd     [%l4+16],%f32
2674 
2675         faddd   %f10,%f14,%f14
2676         ldd     [%l5+16],%f34
2677 
2678         fmuld   %f16,%f22,%f22
2679         faddd   %f20,%f18,%f20
2680         ldd     [%l6+8],%f36
2681 
2682         fmuld   %f24,%f30,%f30
2683         faddd   %f28,%f26,%f28
2684         ldd     [%l7+8],%f38
2685 
2686         fmuld   %f32,%f6,%f6
2687 
2688         fmuld   %f34,%f14,%f14
2689 
2690         fmuld   %f36,%f20,%f20
2691 
2692         fmuld   %f38,%f28,%f28
2693 
2694         faddd   %f6,%f4,%f6
2695 
2696         faddd   %f14,%f12,%f14
2697 
2698         fsubd   %f22,%f20,%f22
2699 
2700         fsubd   %f30,%f28,%f30
2701 
2702         faddd   %f6,%f0,%f6
2703 
2704         faddd   %f14,%f8,%f14
2705 
2706         faddd   %f22,%f16,%f22
2707 
2708         faddd   %f30,%f24,%f30
2709         mov     %l0,%l4
2710 
2711         fnegd   %f6,%f4
2712         lda     [%i1]%asi,%l0           ! preload next argument
2713 
2714         fnegd   %f14,%f12
2715         lda     [%i1]%asi,%f0
2716 
2717         fnegd   %f22,%f20
2718         lda     [%i1+4]%asi,%f3
2719 
2720         fnegd   %f30,%f28
2721         andn    %l0,%i5,%l0
2722         add     %i1,%i2,%i1
2723 
2724         andcc   %l4,2,%g0
2725         fmovdnz %icc,%f4,%f6
2726         st      %f6,[%o0]
2727 
2728         andcc   %l1,2,%g0
2729         fmovdnz %icc,%f12,%f14
2730         st      %f14,[%o1]
2731 
2732         andcc   %l2,2,%g0
2733         fmovdnz %icc,%f20,%f22
2734         st      %f22,[%o2]
2735 
2736         andcc   %l3,2,%g0
2737         fmovdnz %icc,%f28,%f30
2738         st      %f30,[%o3]
2739 
2740         addcc   %i0,-1,%i0
2741         bg,pt   %icc,.loop0
2742 ! delay slot
2743         st      %f7,[%o0+4]
2744 
2745         ba,pt   %icc,.end
2746 ! delay slot
2747         nop
2748 
2749         .align  16
2750 .case13:
2751         fmuld   %f0,pp3,%f6             ! sin(x0)
2752 
2753         fmuld   %f8,pp3,%f14            ! sin(x1)
2754 
2755         fmuld   %f24,pp3,%f30           ! sin(x3)
2756 
2757         faddd   %f6,pp2,%f6
2758         fmuld   %f0,qq2,%f4
2759 
2760         faddd   %f14,pp2,%f14
2761         fmuld   %f8,qq2,%f12
2762 
2763         fmuld   %f16,qq3,%f22           ! cos(x2)
2764 
2765         faddd   %f30,pp2,%f30
2766         fmuld   %f24,qq2,%f28
2767 
2768         fmuld   %f0,%f6,%f6
2769         faddd   %f4,qq1,%f4
2770 
2771         fmuld   %f8,%f14,%f14
2772         faddd   %f12,qq1,%f12
2773 
2774         faddd   %f22,qq2,%f22
2775         fmuld   %f16,pp2,%f20
2776 
2777         fmuld   %f24,%f30,%f30
2778         faddd   %f28,qq1,%f28
2779 
2780         faddd   %f6,pp1,%f6
2781         fmuld   %f0,%f4,%f4
2782         add     %l4,%g1,%l4
2783 
2784         faddd   %f14,pp1,%f14
2785         fmuld   %f8,%f12,%f12
2786         add     %l5,%g1,%l5
2787 
2788         fmuld   %f16,%f22,%f22
2789         faddd   %f20,pp1,%f20
2790 
2791         faddd   %f30,pp1,%f30
2792         fmuld   %f24,%f28,%f28
2793         add     %l7,%g1,%l7
2794 
2795         fmuld   %f0,%f6,%f6
2796 
2797         fmuld   %f8,%f14,%f14
2798 
2799         faddd   %f22,qq1,%f22
2800         fmuld   %f16,%f20,%f20
2801         add     %l6,%g1,%l6
2802 
2803         fmuld   %f24,%f30,%f30
2804 
2805         fmuld   %f2,%f6,%f6
2806         ldd     [%l4+8],%f0
2807 
2808         fmuld   %f10,%f14,%f14
2809         ldd     [%l5+8],%f8
2810 
2811         fmuld   %f18,%f20,%f20
2812 
2813         fmuld   %f26,%f30,%f30
2814         ldd     [%l7+8],%f24
2815 
2816         fmuld   %f0,%f4,%f4
2817         faddd   %f32,%f6,%f6
2818 
2819         fmuld   %f8,%f12,%f12
2820         faddd   %f34,%f14,%f14
2821 
2822         fmuld   %f16,%f22,%f22
2823         faddd   %f20,%f36,%f20
2824         ldd     [%l6+16],%f16
2825 
2826         fmuld   %f24,%f28,%f28
2827         faddd   %f38,%f30,%f30
2828 
2829         faddd   %f2,%f6,%f6
2830         ldd     [%l4+16],%f32
2831 
2832         faddd   %f10,%f14,%f14
2833         ldd     [%l5+16],%f34
2834 
2835         fmuld   %f16,%f22,%f22
2836         faddd   %f20,%f18,%f20
2837         ldd     [%l6+8],%f36
2838 
2839         faddd   %f26,%f30,%f30
2840         ldd     [%l7+16],%f38
2841 
2842         fmuld   %f32,%f6,%f6
2843 
2844         fmuld   %f34,%f14,%f14
2845 
2846         fmuld   %f36,%f20,%f20
2847 
2848         fmuld   %f38,%f30,%f30
2849 
2850         faddd   %f6,%f4,%f6
2851 
2852         faddd   %f14,%f12,%f14
2853 
2854         fsubd   %f22,%f20,%f22
2855 
2856         faddd   %f30,%f28,%f30
2857 
2858         faddd   %f6,%f0,%f6
2859 
2860         faddd   %f14,%f8,%f14
2861 
2862         faddd   %f22,%f16,%f22
2863 
2864         faddd   %f30,%f24,%f30
2865         mov     %l0,%l4
2866 
2867         fnegd   %f6,%f4
2868         lda     [%i1]%asi,%l0           ! preload next argument
2869 
2870         fnegd   %f14,%f12
2871         lda     [%i1]%asi,%f0
2872 
2873         fnegd   %f22,%f20
2874         lda     [%i1+4]%asi,%f3
2875 
2876         fnegd   %f30,%f28
2877         andn    %l0,%i5,%l0
2878         add     %i1,%i2,%i1
2879 
2880         andcc   %l4,2,%g0
2881         fmovdnz %icc,%f4,%f6
2882         st      %f6,[%o0]
2883 
2884         andcc   %l1,2,%g0
2885         fmovdnz %icc,%f12,%f14
2886         st      %f14,[%o1]
2887 
2888         andcc   %l2,2,%g0
2889         fmovdnz %icc,%f20,%f22
2890         st      %f22,[%o2]
2891 
2892         andcc   %l3,2,%g0
2893         fmovdnz %icc,%f28,%f30
2894         st      %f30,[%o3]
2895 
2896         addcc   %i0,-1,%i0
2897         bg,pt   %icc,.loop0
2898 ! delay slot
2899         st      %f7,[%o0+4]
2900 
2901         ba,pt   %icc,.end
2902 ! delay slot
2903         nop
2904 
2905         .align  16
2906 .case14:
2907         fmuld   %f26,%f26,%f24
2908         andcc   %l3,1,%g0
2909         bz,pn   %icc,.case15
2910 ! delay slot
2911         fxor    %f30,%f38,%f38
2912 
2913         fmuld   %f0,pp3,%f6             ! sin(x0)
2914 
2915         fmuld   %f8,pp3,%f14            ! sin(x1)
2916 
2917         fmuld   %f16,pp3,%f22           ! sin(x2)
2918 
2919         faddd   %f6,pp2,%f6
2920         fmuld   %f0,qq2,%f4
2921 
2922         faddd   %f14,pp2,%f14
2923         fmuld   %f8,qq2,%f12
2924 
2925         faddd   %f22,pp2,%f22
2926         fmuld   %f16,qq2,%f20
2927 
2928         fmuld   %f24,qq3,%f30           ! cos(x3)
2929 
2930         fmuld   %f0,%f6,%f6
2931         faddd   %f4,qq1,%f4
2932 
2933         fmuld   %f8,%f14,%f14
2934         faddd   %f12,qq1,%f12
2935 
2936         fmuld   %f16,%f22,%f22
2937         faddd   %f20,qq1,%f20
2938 
2939         faddd   %f30,qq2,%f30
2940         fmuld   %f24,pp2,%f28
2941 
2942         faddd   %f6,pp1,%f6
2943         fmuld   %f0,%f4,%f4
2944         add     %l4,%g1,%l4
2945 
2946         faddd   %f14,pp1,%f14
2947         fmuld   %f8,%f12,%f12
2948         add     %l5,%g1,%l5
2949 
2950         faddd   %f22,pp1,%f22
2951         fmuld   %f16,%f20,%f20
2952         add     %l6,%g1,%l6
2953 
2954         fmuld   %f24,%f30,%f30
2955         faddd   %f28,pp1,%f28
2956 
2957         fmuld   %f0,%f6,%f6
2958 
2959         fmuld   %f8,%f14,%f14
2960 
2961         fmuld   %f16,%f22,%f22
2962 
2963         faddd   %f30,qq1,%f30
2964         fmuld   %f24,%f28,%f28
2965         add     %l7,%g1,%l7
2966 
2967         fmuld   %f2,%f6,%f6
2968         ldd     [%l4+8],%f0
2969 
2970         fmuld   %f10,%f14,%f14
2971         ldd     [%l5+8],%f8
2972 
2973         fmuld   %f18,%f22,%f22
2974         ldd     [%l6+8],%f16
2975 
2976         fmuld   %f26,%f28,%f28
2977 
2978         fmuld   %f0,%f4,%f4
2979         faddd   %f32,%f6,%f6
2980 
2981         fmuld   %f8,%f12,%f12
2982         faddd   %f34,%f14,%f14
2983 
2984         fmuld   %f16,%f20,%f20
2985         faddd   %f36,%f22,%f22
2986 
2987         fmuld   %f24,%f30,%f30
2988         faddd   %f28,%f38,%f28
2989         ldd     [%l7+16],%f24
2990 
2991         faddd   %f2,%f6,%f6
2992         ldd     [%l4+16],%f32
2993 
2994         faddd   %f10,%f14,%f14
2995         ldd     [%l5+16],%f34
2996 
2997         faddd   %f18,%f22,%f22
2998         ldd     [%l6+16],%f36
2999 
3000         fmuld   %f24,%f30,%f30
3001         faddd   %f28,%f26,%f28
3002         ldd     [%l7+8],%f38
3003 
3004         fmuld   %f32,%f6,%f6
3005 
3006         fmuld   %f34,%f14,%f14
3007 
3008         fmuld   %f36,%f22,%f22
3009 
3010         fmuld   %f38,%f28,%f28
3011 
3012         faddd   %f6,%f4,%f6
3013 
3014         faddd   %f14,%f12,%f14
3015 
3016         faddd   %f22,%f20,%f22
3017 
3018         fsubd   %f30,%f28,%f30
3019 
3020         faddd   %f6,%f0,%f6
3021 
3022         faddd   %f14,%f8,%f14
3023 
3024         faddd   %f22,%f16,%f22
3025 
3026         faddd   %f30,%f24,%f30
3027         mov     %l0,%l4
3028 
3029         fnegd   %f6,%f4
3030         lda     [%i1]%asi,%l0           ! preload next argument
3031 
3032         fnegd   %f14,%f12
3033         lda     [%i1]%asi,%f0
3034 
3035         fnegd   %f22,%f20
3036         lda     [%i1+4]%asi,%f3
3037 
3038         fnegd   %f30,%f28
3039         andn    %l0,%i5,%l0
3040         add     %i1,%i2,%i1
3041 
3042         andcc   %l4,2,%g0
3043         fmovdnz %icc,%f4,%f6
3044         st      %f6,[%o0]
3045 
3046         andcc   %l1,2,%g0
3047         fmovdnz %icc,%f12,%f14
3048         st      %f14,[%o1]
3049 
3050         andcc   %l2,2,%g0
3051         fmovdnz %icc,%f20,%f22
3052         st      %f22,[%o2]
3053 
3054         andcc   %l3,2,%g0
3055         fmovdnz %icc,%f28,%f30
3056         st      %f30,[%o3]
3057 
3058         addcc   %i0,-1,%i0
3059         bg,pt   %icc,.loop0
3060 ! delay slot
3061         st      %f7,[%o0+4]
3062 
3063         ba,pt   %icc,.end
3064 ! delay slot
3065         nop
3066 
3067         .align  16
3068 .case15:
3069         fmuld   %f0,pp3,%f6             ! sin(x0)
3070 
3071         fmuld   %f8,pp3,%f14            ! sin(x1)
3072 
3073         fmuld   %f16,pp3,%f22           ! sin(x2)
3074 
3075         fmuld   %f24,pp3,%f30           ! sin(x3)
3076 
3077         faddd   %f6,pp2,%f6
3078         fmuld   %f0,qq2,%f4
3079 
3080         faddd   %f14,pp2,%f14
3081         fmuld   %f8,qq2,%f12
3082 
3083         faddd   %f22,pp2,%f22
3084         fmuld   %f16,qq2,%f20
3085 
3086         faddd   %f30,pp2,%f30
3087         fmuld   %f24,qq2,%f28
3088 
3089         fmuld   %f0,%f6,%f6
3090         faddd   %f4,qq1,%f4
3091 
3092         fmuld   %f8,%f14,%f14
3093         faddd   %f12,qq1,%f12
3094 
3095         fmuld   %f16,%f22,%f22
3096         faddd   %f20,qq1,%f20
3097 
3098         fmuld   %f24,%f30,%f30
3099         faddd   %f28,qq1,%f28
3100 
3101         faddd   %f6,pp1,%f6
3102         fmuld   %f0,%f4,%f4
3103         add     %l4,%g1,%l4
3104 
3105         faddd   %f14,pp1,%f14
3106         fmuld   %f8,%f12,%f12
3107         add     %l5,%g1,%l5
3108 
3109         faddd   %f22,pp1,%f22
3110         fmuld   %f16,%f20,%f20
3111         add     %l6,%g1,%l6
3112 
3113         faddd   %f30,pp1,%f30
3114         fmuld   %f24,%f28,%f28
3115         add     %l7,%g1,%l7
3116 
3117         fmuld   %f0,%f6,%f6
3118 
3119         fmuld   %f8,%f14,%f14
3120 
3121         fmuld   %f16,%f22,%f22
3122 
3123         fmuld   %f24,%f30,%f30
3124 
3125         fmuld   %f2,%f6,%f6
3126         ldd     [%l4+8],%f0
3127 
3128         fmuld   %f10,%f14,%f14
3129         ldd     [%l5+8],%f8
3130 
3131         fmuld   %f18,%f22,%f22
3132         ldd     [%l6+8],%f16
3133 
3134         fmuld   %f26,%f30,%f30
3135         ldd     [%l7+8],%f24
3136 
3137         fmuld   %f0,%f4,%f4
3138         faddd   %f32,%f6,%f6
3139 
3140         fmuld   %f8,%f12,%f12
3141         faddd   %f34,%f14,%f14
3142 
3143         fmuld   %f16,%f20,%f20
3144         faddd   %f36,%f22,%f22
3145 
3146         fmuld   %f24,%f28,%f28
3147         faddd   %f38,%f30,%f30
3148 
3149         faddd   %f2,%f6,%f6
3150         ldd     [%l4+16],%f32
3151 
3152         faddd   %f10,%f14,%f14
3153         ldd     [%l5+16],%f34
3154 
3155         faddd   %f18,%f22,%f22
3156         ldd     [%l6+16],%f36
3157 
3158         faddd   %f26,%f30,%f30
3159         ldd     [%l7+16],%f38
3160 
3161         fmuld   %f32,%f6,%f6
3162 
3163         fmuld   %f34,%f14,%f14
3164 
3165         fmuld   %f36,%f22,%f22
3166 
3167         fmuld   %f38,%f30,%f30
3168 
3169         faddd   %f6,%f4,%f6
3170 
3171         faddd   %f14,%f12,%f14
3172 
3173         faddd   %f22,%f20,%f22
3174 
3175         faddd   %f30,%f28,%f30
3176 
3177         faddd   %f6,%f0,%f6
3178 
3179         faddd   %f14,%f8,%f14
3180 
3181         faddd   %f22,%f16,%f22
3182 
3183         faddd   %f30,%f24,%f30
3184         mov     %l0,%l4
3185 
3186         fnegd   %f6,%f4
3187         lda     [%i1]%asi,%l0           ! preload next argument
3188 
3189         fnegd   %f14,%f12
3190         lda     [%i1]%asi,%f0
3191 
3192         fnegd   %f22,%f20
3193         lda     [%i1+4]%asi,%f3
3194 
3195         fnegd   %f30,%f28
3196         andn    %l0,%i5,%l0
3197         add     %i1,%i2,%i1
3198 
3199         andcc   %l4,2,%g0
3200         fmovdnz %icc,%f4,%f6
3201         st      %f6,[%o0]
3202 
3203         andcc   %l1,2,%g0
3204         fmovdnz %icc,%f12,%f14
3205         st      %f14,[%o1]
3206 
3207         andcc   %l2,2,%g0
3208         fmovdnz %icc,%f20,%f22
3209         st      %f22,[%o2]
3210 
3211         andcc   %l3,2,%g0
3212         fmovdnz %icc,%f28,%f30
3213         st      %f30,[%o3]
3214 
3215         addcc   %i0,-1,%i0
3216         bg,pt   %icc,.loop0
3217 ! delay slot
3218         st      %f7,[%o0+4]
3219 
3220         ba,pt   %icc,.end
3221 ! delay slot
3222         nop
3223 
3224 
3225         .align  16
3226 .end:
3227         st      %f15,[%o1+4]
3228         st      %f23,[%o2+4]
3229         st      %f31,[%o3+4]
3230         ld      [%fp+biguns],%i5
3231         tst     %i5                     ! check for huge arguments remaining
3232         be,pt   %icc,.exit
3233 ! delay slot
3234         nop
3235 #ifdef __sparcv9
3236         ldx     [%fp+xsave],%o1
3237         ldx     [%fp+ysave],%o3
3238 #else
3239         ld      [%fp+xsave],%o1
3240         ld      [%fp+ysave],%o3
3241 #endif
3242         ld      [%fp+nsave],%o0
3243         ld      [%fp+sxsave],%o2
3244         ld      [%fp+sysave],%o4
3245         sra     %o2,0,%o2               ! sign-extend for V9
3246         sra     %o4,0,%o4
3247         call    __vlibm_vcos_big_ultra3
3248         sra     %o5,0,%o5               ! delay slot
3249 
3250 .exit:
3251         ret
3252         restore
3253 
3254 
3255         .align  16
3256 .last1:
3257         faddd   %f2,c3two44,%f4
3258         st      %f15,[%o1+4]
3259 .last1_from_range1:
3260         mov     0,%l1
3261         fzeros  %f8
3262         fzero   %f10
3263         add     %fp,junk,%o1
3264 .last2:
3265         faddd   %f10,c3two44,%f12
3266         st      %f23,[%o2+4]
3267 .last2_from_range2:
3268         mov     0,%l2
3269         fzeros  %f16
3270         fzero   %f18
3271         add     %fp,junk,%o2
3272 .last3:
3273         faddd   %f18,c3two44,%f20
3274         st      %f31,[%o3+4]
3275         st      %f5,[%fp+nk0]
3276         st      %f13,[%fp+nk1]
3277 .last3_from_range3:
3278         mov     0,%l3
3279         fzeros  %f24
3280         fzero   %f26
3281         ba,pt   %icc,.cont
3282 ! delay slot
3283         add     %fp,junk,%o3
3284 
3285 
3286         .align  16
3287 .range0:
3288         cmp     %l0,%o4
3289         bl,pt   %icc,1f                 ! hx < 0x3e400000
3290 ! delay slot, harmless if branch taken
3291         sethi   %hi(0x7ff00000),%o7
3292         cmp     %l0,%o7
3293         bl,a,pt %icc,2f                 ! branch if finite
3294 ! delay slot, squashed if branch not taken
3295         st      %o4,[%fp+biguns]        ! set biguns
3296         fzero   %f0
3297         fmuld   %f2,%f0,%f2
3298         st      %f2,[%o0]
3299         ba,pt   %icc,2f
3300 ! delay slot
3301         st      %f3,[%o0+4]
3302 1:
3303         fdtoi   %f2,%f4                 ! raise inexact if not zero
3304         sethi   %hi(0x3ff00000),%o7
3305         st      %o7,[%o0]
3306         st      %g0,[%o0+4]
3307 2:
3308         addcc   %i0,-1,%i0
3309         ble,pn  %icc,.end
3310 ! delay slot, harmless if branch taken
3311         add     %i3,%i4,%i3             ! y += stridey
3312         andn    %l1,%i5,%l0             ! hx &= ~0x80000000
3313         fmovs   %f8,%f0
3314         fmovs   %f11,%f3
3315         ba,pt   %icc,.loop0
3316 ! delay slot
3317         add     %i1,%i2,%i1             ! x += stridex
3318 
3319 
3320         .align  16
3321 .range1:
3322         cmp     %l1,%o4
3323         bl,pt   %icc,1f                 ! hx < 0x3e400000
3324 ! delay slot, harmless if branch taken
3325         sethi   %hi(0x7ff00000),%o7
3326         cmp     %l1,%o7
3327         bl,a,pt %icc,2f                 ! branch if finite
3328 ! delay slot, squashed if branch not taken
3329         st      %o4,[%fp+biguns]        ! set biguns
3330         fzero   %f8
3331         fmuld   %f10,%f8,%f10
3332         st      %f10,[%o1]
3333         ba,pt   %icc,2f
3334 ! delay slot
3335         st      %f11,[%o1+4]
3336 1:
3337         fdtoi   %f10,%f12               ! raise inexact if not zero
3338         sethi   %hi(0x3ff00000),%o7
3339         st      %o7,[%o1]
3340         st      %g0,[%o1+4]
3341 2:
3342         addcc   %i0,-1,%i0
3343         ble,pn  %icc,.last1_from_range1
3344 ! delay slot, harmless if branch taken
3345         add     %i3,%i4,%i3             ! y += stridey
3346         andn    %l2,%i5,%l1             ! hx &= ~0x80000000
3347         fmovs   %f16,%f8
3348         fmovs   %f19,%f11
3349         ba,pt   %icc,.loop1
3350 ! delay slot
3351         add     %i1,%i2,%i1             ! x += stridex
3352 
3353 
3354         .align  16
3355 .range2:
3356         cmp     %l2,%o4
3357         bl,pt   %icc,1f                 ! hx < 0x3e400000
3358 ! delay slot, harmless if branch taken
3359         sethi   %hi(0x7ff00000),%o7
3360         cmp     %l2,%o7
3361         bl,a,pt %icc,2f                 ! branch if finite
3362 ! delay slot, squashed if branch not taken
3363         st      %o4,[%fp+biguns]        ! set biguns
3364         fzero   %f16
3365         fmuld   %f18,%f16,%f18
3366         st      %f18,[%o2]
3367         ba,pt   %icc,2f
3368 ! delay slot
3369         st      %f19,[%o2+4]
3370 1:
3371         fdtoi   %f18,%f20               ! raise inexact if not zero
3372         sethi   %hi(0x3ff00000),%o7
3373         st      %o7,[%o2]
3374         st      %g0,[%o2+4]
3375 2:
3376         addcc   %i0,-1,%i0
3377         ble,pn  %icc,.last2_from_range2
3378 ! delay slot, harmless if branch taken
3379         add     %i3,%i4,%i3             ! y += stridey
3380         andn    %l3,%i5,%l2             ! hx &= ~0x80000000
3381         fmovs   %f24,%f16
3382         fmovs   %f27,%f19
3383         ba,pt   %icc,.loop2
3384 ! delay slot
3385         add     %i1,%i2,%i1             ! x += stridex
3386 
3387 
3388         .align  16
3389 .range3:
3390         cmp     %l3,%o4
3391         bl,pt   %icc,1f                 ! hx < 0x3e400000
3392 ! delay slot, harmless if branch taken
3393         sethi   %hi(0x7ff00000),%o7
3394         cmp     %l3,%o7
3395         bl,a,pt %icc,2f                 ! branch if finite
3396 ! delay slot, squashed if branch not taken
3397         st      %o4,[%fp+biguns]        ! set biguns
3398         fzero   %f24
3399         fmuld   %f26,%f24,%f26
3400         st      %f26,[%o3]
3401         ba,pt   %icc,2f
3402 ! delay slot
3403         st      %f27,[%o3+4]
3404 1:
3405         fdtoi   %f26,%f28               ! raise inexact if not zero
3406         sethi   %hi(0x3ff00000),%o7
3407         st      %o7,[%o3]
3408         st      %g0,[%o3+4]
3409 2:
3410         addcc   %i0,-1,%i0
3411         ble,pn  %icc,.last3_from_range3
3412 ! delay slot, harmless if branch taken
3413         add     %i3,%i4,%i3             ! y += stridey
3414         ld      [%i1],%l3
3415         ld      [%i1],%f24
3416         ld      [%i1+4],%f27
3417         andn    %l3,%i5,%l3             ! hx &= ~0x80000000
3418         ba,pt   %icc,.loop3
3419 ! delay slot
3420         add     %i1,%i2,%i1             ! x += stridex
3421 
3422         SET_SIZE(__vcos_ultra3)
3423