1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vsincos.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  64
  35 constants:
  36         .word   0x42c80000,0x00000000   ! 3 * 2^44
  37         .word   0x43380000,0x00000000   ! 3 * 2^51
  38         .word   0x3fe45f30,0x6dc9c883   ! invpio2
  39         .word   0x3ff921fb,0x54442c00   ! pio2_1
  40         .word   0x3d318469,0x898cc400   ! pio2_2
  41         .word   0x3a71701b,0x839a2520   ! pio2_3
  42         .word   0xbfc55555,0x55555533   ! pp1
  43         .word   0x3f811111,0x10e7d53b   ! pp2
  44         .word   0xbf2a0167,0xe6b3cf9b   ! pp3
  45         .word   0xbfdfffff,0xffffff65   ! qq1
  46         .word   0x3fa55555,0x54f88ed0   ! qq2
  47         .word   0xbf56c12c,0xdd185f60   ! qq3
  48 
  49 ! local storage indices
  50 
  51 #define xsave           STACK_BIAS-0x8
  52 #define ssave           STACK_BIAS-0x10
  53 #define csave           STACK_BIAS-0x18
  54 #define nsave           STACK_BIAS-0x1c
  55 #define sxsave          STACK_BIAS-0x20
  56 #define sssave          STACK_BIAS-0x24
  57 #define biguns          STACK_BIAS-0x28
  58 #define junk            STACK_BIAS-0x30
  59 #define nk2             STACK_BIAS-0x38
  60 #define nk1             STACK_BIAS-0x3c
  61 #define nk0             STACK_BIAS-0x40
  62 ! sizeof temp storage - must be a multiple of 16 for V9
  63 #define tmps            0x40
  64 
  65 ! register use
  66 
  67 ! i0  n
  68 ! i1  x
  69 ! i2  stridex
  70 ! i3  s
  71 ! i4  strides
  72 ! i5  0x80000000,n0
  73 
  74 ! l0  hx0,k0
  75 ! l1  hx1,k1
  76 ! l2  hx2,k2
  77 ! l3  c
  78 ! l4  pc0
  79 ! l5  pc1
  80 ! l6  pc2
  81 ! l7  stridec
  82 
  83 ! the following are 64-bit registers in both V8+ and V9
  84 
  85 ! g1  __vlibm_TBL_sincos2
  86 ! g5  scratch,n1
  87 
  88 ! o0  ps0
  89 ! o1  ps1
  90 ! o2  ps2
  91 ! o3  0x3fe921fb
  92 ! o4  0x3e400000
  93 ! o5  0x4099251e
  94 ! o7  scratch,n2
  95 
  96 ! f0  x0,z0
  97 ! f2  abs(x0)
  98 ! f4  
  99 ! f6  
 100 ! f8  
 101 ! f10 x1,z1
 102 ! f12 abs(x1)
 103 ! f14 
 104 ! f16 
 105 ! f18 
 106 ! f20 x2,z2
 107 ! f22 abs(x2)
 108 ! f24 
 109 ! f26 
 110 ! f28 
 111 ! f30 
 112 ! f32 
 113 ! f34 
 114 ! f36
 115 ! f38
 116 
 117 #define c3two44 %f40
 118 #define c3two51 %f42
 119 #define invpio2 %f44
 120 #define pio2_1  %f46
 121 #define pio2_2  %f48
 122 #define pio2_3  %f50
 123 #define pp1     %f52
 124 #define pp2     %f54
 125 #define pp3     %f56
 126 #define qq1     %f58
 127 #define qq2     %f60
 128 #define qq3     %f62
 129 
 130         ENTRY(__vsincos)
 131         save    %sp,-SA(MINFRAME)-tmps,%sp
 132         PIC_SETUP(l7)
 133         PIC_SET(l7,constants,o0)
 134         PIC_SET(l7,__vlibm_TBL_sincos2,o1)
 135         mov     %o1,%g1
 136         wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
 137 #ifdef __sparcv9
 138         stx     %i1,[%fp+xsave]         ! save arguments
 139         stx     %i3,[%fp+ssave]
 140         stx     %i5,[%fp+csave]
 141         ldx     [%fp+STACK_BIAS+0xb0],%l7
 142 #else
 143         st      %i1,[%fp+xsave]         ! save arguments
 144         st      %i3,[%fp+ssave]
 145         st      %i5,[%fp+csave]
 146         ld      [%fp+0x5c],%l7
 147 #endif
 148         st      %i0,[%fp+nsave]
 149         st      %i2,[%fp+sxsave]
 150         st      %i4,[%fp+sssave]
 151         mov     %i5,%l3
 152         st      %g0,[%fp+biguns]        ! biguns = 0
 153         ldd     [%o0+0x00],c3two44      ! load/set up constants
 154         ldd     [%o0+0x08],c3two51
 155         ldd     [%o0+0x10],invpio2
 156         ldd     [%o0+0x18],pio2_1
 157         ldd     [%o0+0x20],pio2_2
 158         ldd     [%o0+0x28],pio2_3
 159         ldd     [%o0+0x30],pp1
 160         ldd     [%o0+0x38],pp2
 161         ldd     [%o0+0x40],pp3
 162         ldd     [%o0+0x48],qq1
 163         ldd     [%o0+0x50],qq2
 164         ldd     [%o0+0x58],qq3
 165         sethi   %hi(0x80000000),%i5
 166         sethi   %hi(0x3e400000),%o4
 167         sethi   %hi(0x3fe921fb),%o3
 168         or      %o3,%lo(0x3fe921fb),%o3
 169         sethi   %hi(0x4099251e),%o5
 170         or      %o5,%lo(0x4099251e),%o5
 171         sll     %i2,3,%i2               ! scale strides
 172         sll     %i4,3,%i4
 173         sll     %l7,3,%l7
 174         add     %fp,junk,%o0            ! loop prologue
 175         add     %fp,junk,%o1
 176         add     %fp,junk,%o2
 177         ld      [%i1],%l0               ! *x
 178         ld      [%i1],%f0
 179         ld      [%i1+4],%f3
 180         andn    %l0,%i5,%l0             ! mask off sign
 181         ba      .loop0
 182         add     %i1,%i2,%i1             ! x += stridex
 183 
 184 ! 16-byte aligned
 185         .align  16
 186 .loop0:
 187         lda     [%i1]%asi,%l1           ! preload next argument
 188         sub     %l0,%o4,%g5
 189         sub     %o5,%l0,%o7
 190         fabss   %f0,%f2
 191 
 192         lda     [%i1]%asi,%f10
 193         orcc    %o7,%g5,%g0
 194         mov     %i3,%o0                 ! ps0 = s
 195         bl,pn   %icc,.range0            ! hx < 0x3e400000 or hx > 0x4099251e
 196 
 197 ! delay slot
 198         lda     [%i1+4]%asi,%f13
 199         addcc   %i0,-1,%i0
 200         add     %i3,%i4,%i3             ! s += strides
 201 
 202         mov     %l3,%l4                 ! pc0 = c
 203         add     %l3,%l7,%l3             ! c += stridec
 204         ble,pn  %icc,.last1
 205 
 206 ! delay slot
 207         andn    %l1,%i5,%l1
 208         add     %i1,%i2,%i1             ! x += stridex
 209         faddd   %f2,c3two44,%f4
 210         st      %f17,[%o1+4]
 211 
 212 .loop1:
 213         lda     [%i1]%asi,%l2           ! preload next argument
 214         sub     %l1,%o4,%g5
 215         sub     %o5,%l1,%o7
 216         fabss   %f10,%f12
 217 
 218         lda     [%i1]%asi,%f20
 219         orcc    %o7,%g5,%g0
 220         mov     %i3,%o1                 ! ps1 = s
 221         bl,pn   %icc,.range1            ! hx < 0x3e400000 or hx > 0x4099251e
 222 
 223 ! delay slot
 224         lda     [%i1+4]%asi,%f23
 225         addcc   %i0,-1,%i0
 226         add     %i3,%i4,%i3             ! s += strides
 227 
 228         mov     %l3,%l5                 ! pc1 = c
 229         add     %l3,%l7,%l3             ! c += stridec
 230         ble,pn  %icc,.last2
 231 
 232 ! delay slot
 233         andn    %l2,%i5,%l2
 234         add     %i1,%i2,%i1             ! x += stridex
 235         faddd   %f12,c3two44,%f14
 236         st      %f27,[%o2+4]
 237 
 238 .loop2:
 239         sub     %l2,%o4,%g5
 240         sub     %o5,%l2,%o7
 241         fabss   %f20,%f22
 242         st      %f5,[%fp+nk0]
 243 
 244         orcc    %o7,%g5,%g0
 245         mov     %i3,%o2                 ! ps2 = s
 246         bl,pn   %icc,.range2            ! hx < 0x3e400000 or hx > 0x4099251e
 247 ! delay slot
 248         st      %f15,[%fp+nk1]
 249 
 250         mov     %l3,%l6                 ! pc2 = c
 251 
 252 .cont:
 253         add     %i3,%i4,%i3             ! s += strides
 254         add     %l3,%l7,%l3             ! c += stridec
 255         faddd   %f22,c3two44,%f24
 256         st      %f25,[%fp+nk2]
 257 
 258         sub     %o3,%l0,%l0
 259         sub     %o3,%l1,%l1
 260         fmovs   %f3,%f1
 261 
 262         sub     %o3,%l2,%l2
 263         fmovs   %f13,%f11
 264 
 265         or      %l0,%l1,%l0
 266         orcc    %l0,%l2,%g0
 267         fmovs   %f23,%f21
 268 
 269         fmuld   %f0,invpio2,%f6         ! x * invpio2, for medium range
 270 
 271         fmuld   %f10,invpio2,%f16
 272         ld      [%fp+nk0],%l0
 273 
 274         fmuld   %f20,invpio2,%f26
 275         ld      [%fp+nk1],%l1
 276 
 277         bl,pn   %icc,.medium
 278 ! delay slot
 279         ld      [%fp+nk2],%l2
 280 
 281         sll     %l0,5,%l0               ! k
 282         fcmpd   %fcc0,%f0,pio2_3        ! x < pio2_3 iff x < 0
 283 
 284         sll     %l1,5,%l1
 285         ldd     [%l0+%g1],%f4
 286         fcmpd   %fcc1,%f10,pio2_3
 287 
 288         sll     %l2,5,%l2
 289         ldd     [%l1+%g1],%f14
 290         fcmpd   %fcc2,%f20,pio2_3
 291 
 292         ldd     [%l2+%g1],%f24
 293 
 294         fsubd   %f2,%f4,%f2             ! x -= __vlibm_TBL_sincos2[k]
 295 
 296         fsubd   %f12,%f14,%f12
 297 
 298         fsubd   %f22,%f24,%f22
 299 
 300         fmuld   %f2,%f2,%f0             ! z = x * x
 301 
 302         fmuld   %f12,%f12,%f10
 303 
 304         fmuld   %f22,%f22,%f20
 305 
 306         fmuld   %f0,pp3,%f6
 307 
 308         fmuld   %f10,pp3,%f16
 309 
 310         fmuld   %f20,pp3,%f26
 311 
 312         faddd   %f6,pp2,%f6
 313         fmuld   %f0,qq3,%f4
 314 
 315         faddd   %f16,pp2,%f16
 316         fmuld   %f10,qq3,%f14
 317 
 318         faddd   %f26,pp2,%f26
 319         fmuld   %f20,qq3,%f24
 320 
 321         fmuld   %f0,%f6,%f6
 322         faddd   %f4,qq2,%f4
 323 
 324         fmuld   %f10,%f16,%f16
 325         faddd   %f14,qq2,%f14
 326 
 327         fmuld   %f20,%f26,%f26
 328         faddd   %f24,qq2,%f24
 329 
 330         faddd   %f6,pp1,%f6
 331         fmuld   %f0,%f4,%f4
 332         add     %l0,%g1,%l0
 333 
 334         faddd   %f16,pp1,%f16
 335         fmuld   %f10,%f14,%f14
 336         add     %l1,%g1,%l1
 337 
 338         faddd   %f26,pp1,%f26
 339         fmuld   %f20,%f24,%f24
 340         add     %l2,%g1,%l2
 341 
 342         fmuld   %f0,%f6,%f6
 343         faddd   %f4,qq1,%f4
 344 
 345         fmuld   %f10,%f16,%f16
 346         faddd   %f14,qq1,%f14
 347 
 348         fmuld   %f20,%f26,%f26
 349         faddd   %f24,qq1,%f24
 350 
 351         fmuld   %f2,%f6,%f6
 352         ldd     [%l0+8],%f8
 353 
 354         fmuld   %f12,%f16,%f16
 355         ldd     [%l1+8],%f18
 356 
 357         fmuld   %f22,%f26,%f26
 358         ldd     [%l2+8],%f28
 359 
 360         faddd   %f6,%f2,%f6
 361         fmuld   %f0,%f4,%f4
 362         ldd     [%l0+16],%f30
 363 
 364         faddd   %f16,%f12,%f16
 365         fmuld   %f10,%f14,%f14
 366         ldd     [%l1+16],%f32
 367 
 368         faddd   %f26,%f22,%f26
 369         fmuld   %f20,%f24,%f24
 370         ldd     [%l2+16],%f34
 371 
 372         fmuld   %f8,%f6,%f0             ! s * spoly
 373 
 374         fmuld   %f18,%f16,%f10
 375 
 376         fmuld   %f28,%f26,%f20
 377 
 378         fmuld   %f30,%f4,%f2            ! c * cpoly
 379 
 380         fmuld   %f32,%f14,%f12
 381 
 382         fmuld   %f34,%f24,%f22
 383 
 384         fmuld   %f30,%f6,%f6            ! c * spoly
 385         fsubd   %f2,%f0,%f2
 386 
 387         fmuld   %f32,%f16,%f16
 388         fsubd   %f12,%f10,%f12
 389 
 390         fmuld   %f34,%f26,%f26
 391         fsubd   %f22,%f20,%f22
 392 
 393         fmuld   %f8,%f4,%f4             ! s * cpoly
 394         faddd   %f2,%f30,%f2
 395         st      %f2,[%l4]
 396 
 397         fmuld   %f18,%f14,%f14
 398         faddd   %f12,%f32,%f12
 399         st      %f3,[%l4+4]
 400 
 401         fmuld   %f28,%f24,%f24
 402         faddd   %f22,%f34,%f22
 403         st      %f12,[%l5]
 404 
 405         faddd   %f6,%f4,%f6
 406         st      %f13,[%l5+4]
 407 
 408         faddd   %f16,%f14,%f16
 409         st      %f22,[%l6]
 410 
 411         faddd   %f26,%f24,%f26
 412         st      %f23,[%l6+4]
 413 
 414         faddd   %f6,%f8,%f6
 415 
 416         faddd   %f16,%f18,%f16
 417 
 418         faddd   %f26,%f28,%f26
 419 
 420         fnegd   %f6,%f4
 421         lda     [%i1]%asi,%l0           ! preload next argument
 422 
 423         fnegd   %f16,%f14
 424         lda     [%i1]%asi,%f0
 425 
 426         fnegd   %f26,%f24
 427         lda     [%i1+4]%asi,%f3
 428         andn    %l0,%i5,%l0
 429         add     %i1,%i2,%i1
 430 
 431         fmovdl  %fcc0,%f4,%f6           ! (hx < -0)? -s : s
 432         st      %f6,[%o0]
 433 
 434         fmovdl  %fcc1,%f14,%f16
 435         st      %f16,[%o1]
 436 
 437         fmovdl  %fcc2,%f24,%f26
 438         st      %f26,[%o2]
 439         addcc   %i0,-1,%i0
 440 
 441         bg,pt   %icc,.loop0
 442 ! delay slot
 443         st      %f7,[%o0+4]
 444 
 445         ba,pt   %icc,.end
 446 ! delay slot
 447         nop
 448 
 449 
 450         .align  16
 451 .medium:
 452         faddd   %f6,c3two51,%f4
 453         st      %f5,[%fp+nk0]
 454 
 455         faddd   %f16,c3two51,%f14
 456         st      %f15,[%fp+nk1]
 457 
 458         faddd   %f26,c3two51,%f24
 459         st      %f25,[%fp+nk2]
 460 
 461         fsubd   %f4,c3two51,%f6
 462 
 463         fsubd   %f14,c3two51,%f16
 464 
 465         fsubd   %f24,c3two51,%f26
 466 
 467         fmuld   %f6,pio2_1,%f2
 468         ld      [%fp+nk0],%i5           ! n
 469 
 470         fmuld   %f16,pio2_1,%f12
 471         ld      [%fp+nk1],%g5
 472 
 473         fmuld   %f26,pio2_1,%f22
 474         ld      [%fp+nk2],%o7
 475 
 476         fsubd   %f0,%f2,%f0
 477         fmuld   %f6,pio2_2,%f4
 478         mov     %o0,%o4                 ! if (n & 1) swap ps, pc
 479         andcc   %i5,1,%g0
 480 
 481         fsubd   %f10,%f12,%f10
 482         fmuld   %f16,pio2_2,%f14
 483         movnz   %icc,%l4,%o0
 484         and     %i5,3,%i5
 485 
 486         fsubd   %f20,%f22,%f20
 487         fmuld   %f26,pio2_2,%f24
 488         movnz   %icc,%o4,%l4
 489 
 490         fsubd   %f0,%f4,%f30
 491         mov     %o1,%o4
 492         andcc   %g5,1,%g0
 493 
 494         fsubd   %f10,%f14,%f32
 495         movnz   %icc,%l5,%o1
 496         and     %g5,3,%g5
 497 
 498         fsubd   %f20,%f24,%f34
 499         movnz   %icc,%o4,%l5
 500 
 501         fsubd   %f0,%f30,%f0
 502         fcmple32 %f30,pio2_3,%l0        ! x <= pio2_3 iff x < 0
 503         mov     %o2,%o4
 504         andcc   %o7,1,%g0
 505 
 506         fsubd   %f10,%f32,%f10
 507         fcmple32 %f32,pio2_3,%l1
 508         movnz   %icc,%l6,%o2
 509         and     %o7,3,%o7
 510 
 511         fsubd   %f20,%f34,%f20
 512         fcmple32 %f34,pio2_3,%l2
 513         movnz   %icc,%o4,%l6
 514 
 515         fsubd   %f0,%f4,%f0
 516         fmuld   %f6,pio2_3,%f6
 517         add     %i5,1,%o4               ! n = (n >> 1) | (((n + 1) ^ l) & 2)
 518         srl     %i5,1,%i5
 519 
 520         fsubd   %f10,%f14,%f10
 521         fmuld   %f16,pio2_3,%f16
 522         xor     %o4,%l0,%o4
 523 
 524         fsubd   %f20,%f24,%f20
 525         fmuld   %f26,pio2_3,%f26
 526         and     %o4,2,%o4
 527 
 528         fsubd   %f6,%f0,%f6
 529         or      %i5,%o4,%i5
 530 
 531         fsubd   %f16,%f10,%f16
 532         add     %g5,1,%o4
 533         srl     %g5,1,%g5
 534 
 535         fsubd   %f26,%f20,%f26
 536         xor     %o4,%l1,%o4
 537 
 538         fsubd   %f30,%f6,%f0            ! reduced x
 539         and     %o4,2,%o4
 540 
 541         fsubd   %f32,%f16,%f10
 542         or      %g5,%o4,%g5
 543 
 544         fsubd   %f34,%f26,%f20
 545         add     %o7,1,%o4
 546         srl     %o7,1,%o7
 547 
 548         fzero   %f38
 549         xor     %o4,%l2,%o4
 550 
 551         fabsd   %f0,%f2
 552         and     %o4,2,%o4
 553 
 554         fabsd   %f10,%f12
 555         or      %o7,%o4,%o7
 556 
 557         fabsd   %f20,%f22
 558         sethi   %hi(0x3e400000),%o4
 559 
 560         fnegd   %f38,%f38
 561 
 562         faddd   %f2,c3two44,%f4
 563         st      %f5,[%fp+nk0]
 564 
 565         faddd   %f12,c3two44,%f14
 566         st      %f15,[%fp+nk1]
 567 
 568         faddd   %f22,c3two44,%f24
 569         st      %f25,[%fp+nk2]
 570 
 571         fsubd   %f30,%f0,%f4
 572 
 573         fsubd   %f32,%f10,%f14
 574 
 575         fsubd   %f34,%f20,%f24
 576 
 577         fsubd   %f4,%f6,%f6             ! w
 578         ld      [%fp+nk0],%l0
 579 
 580         fsubd   %f14,%f16,%f16
 581         ld      [%fp+nk1],%l1
 582 
 583         fsubd   %f24,%f26,%f26
 584         ld      [%fp+nk2],%l2
 585         sll     %l0,5,%l0               ! k
 586 
 587         fand    %f0,%f38,%f30           ! sign bit of x
 588         ldd     [%l0+%g1],%f4
 589         sll     %l1,5,%l1
 590 
 591         fand    %f10,%f38,%f32
 592         ldd     [%l1+%g1],%f14
 593         sll     %l2,5,%l2
 594 
 595         fand    %f20,%f38,%f34
 596         ldd     [%l2+%g1],%f24
 597 
 598         fsubd   %f2,%f4,%f2             ! x -= __vlibm_TBL_sincos2[k]
 599 
 600         fsubd   %f12,%f14,%f12
 601 
 602         fsubd   %f22,%f24,%f22
 603 
 604         fmuld   %f2,%f2,%f0             ! z = x * x
 605         fxor    %f6,%f30,%f30
 606 
 607         fmuld   %f12,%f12,%f10
 608         fxor    %f16,%f32,%f32
 609 
 610         fmuld   %f22,%f22,%f20
 611         fxor    %f26,%f34,%f34
 612 
 613         fmuld   %f0,pp3,%f6
 614 
 615         fmuld   %f10,pp3,%f16
 616 
 617         fmuld   %f20,pp3,%f26
 618 
 619         faddd   %f6,pp2,%f6
 620         fmuld   %f0,qq3,%f4
 621 
 622         faddd   %f16,pp2,%f16
 623         fmuld   %f10,qq3,%f14
 624 
 625         faddd   %f26,pp2,%f26
 626         fmuld   %f20,qq3,%f24
 627 
 628         fmuld   %f0,%f6,%f6
 629         faddd   %f4,qq2,%f4
 630 
 631         fmuld   %f10,%f16,%f16
 632         faddd   %f14,qq2,%f14
 633 
 634         fmuld   %f20,%f26,%f26
 635         faddd   %f24,qq2,%f24
 636 
 637         faddd   %f6,pp1,%f6
 638         fmuld   %f0,%f4,%f4
 639         add     %l0,%g1,%l0
 640 
 641         faddd   %f16,pp1,%f16
 642         fmuld   %f10,%f14,%f14
 643         add     %l1,%g1,%l1
 644 
 645         faddd   %f26,pp1,%f26
 646         fmuld   %f20,%f24,%f24
 647         add     %l2,%g1,%l2
 648 
 649         fmuld   %f0,%f6,%f6
 650         faddd   %f4,qq1,%f4
 651 
 652         fmuld   %f10,%f16,%f16
 653         faddd   %f14,qq1,%f14
 654 
 655         fmuld   %f20,%f26,%f26
 656         faddd   %f24,qq1,%f24
 657 
 658         fmuld   %f2,%f6,%f6
 659         ldd     [%l0+16],%f8
 660 
 661         fmuld   %f12,%f16,%f16
 662         ldd     [%l1+16],%f18
 663 
 664         fmuld   %f22,%f26,%f26
 665         ldd     [%l2+16],%f28
 666 
 667         faddd   %f6,%f30,%f6
 668         fmuld   %f0,%f4,%f4
 669         ldd     [%l0+8],%f30
 670 
 671         faddd   %f16,%f32,%f16
 672         fmuld   %f10,%f14,%f14
 673         ldd     [%l1+8],%f32
 674 
 675         faddd   %f26,%f34,%f26
 676         fmuld   %f20,%f24,%f24
 677         ldd     [%l2+8],%f34
 678 
 679         fmuld   %f8,%f4,%f0             ! c * cpoly
 680         faddd   %f6,%f2,%f6
 681 
 682         fmuld   %f18,%f14,%f10
 683         faddd   %f16,%f12,%f16
 684 
 685         fmuld   %f28,%f24,%f20
 686         faddd   %f26,%f22,%f26
 687 
 688         fmuld   %f30,%f6,%f2            ! s * spoly
 689 
 690         fmuld   %f32,%f16,%f12
 691 
 692         fmuld   %f34,%f26,%f22
 693 
 694         fmuld   %f8,%f6,%f6             ! c * spoly
 695         fsubd   %f0,%f2,%f2
 696 
 697         fmuld   %f18,%f16,%f16
 698         fsubd   %f10,%f12,%f12
 699 
 700         fmuld   %f28,%f26,%f26
 701         fsubd   %f20,%f22,%f22
 702 
 703         fmuld   %f30,%f4,%f4            ! s * cpoly
 704         faddd   %f8,%f2,%f8
 705 
 706         fmuld   %f32,%f14,%f14
 707         faddd   %f18,%f12,%f18
 708 
 709         fmuld   %f34,%f24,%f24
 710         faddd   %f28,%f22,%f28
 711 
 712         faddd   %f4,%f6,%f6
 713 
 714         faddd   %f14,%f16,%f16
 715 
 716         faddd   %f24,%f26,%f26
 717 
 718         faddd   %f30,%f6,%f6            ! now %f6 = sin |x|, %f8 = cos |x|
 719 
 720         faddd   %f32,%f16,%f16
 721 
 722         faddd   %f34,%f26,%f26
 723 
 724         fnegd   %f8,%f4                 ! if (n & 1) c = -c
 725         lda     [%i1]%asi,%l0           ! preload next argument
 726         mov     %i5,%l1
 727 
 728         fnegd   %f18,%f14
 729         lda     [%i1]%asi,%f0
 730         sethi   %hi(0x80000000),%i5
 731 
 732         fnegd   %f28,%f24
 733         lda     [%i1+4]%asi,%f3
 734 
 735         andcc   %l1,1,%g0
 736         fmovdnz %icc,%f4,%f8
 737         st      %f8,[%l4]
 738 
 739         andcc   %g5,1,%g0
 740         fmovdnz %icc,%f14,%f18
 741         st      %f9,[%l4+4]
 742 
 743         andcc   %o7,1,%g0
 744         fmovdnz %icc,%f24,%f28
 745         st      %f18,[%l5]
 746 
 747         fnegd   %f6,%f4                 ! if (n & 2) s = -s
 748         st      %f19,[%l5+4]
 749         andn    %l0,%i5,%l0
 750 
 751         fnegd   %f16,%f14
 752         st      %f28,[%l6]
 753         add     %i1,%i2,%i1
 754 
 755         fnegd   %f26,%f24
 756         st      %f29,[%l6+4]
 757 
 758         andcc   %l1,2,%g0
 759         fmovdnz %icc,%f4,%f6
 760         st      %f6,[%o0]
 761 
 762         andcc   %g5,2,%g0
 763         fmovdnz %icc,%f14,%f16
 764         st      %f16,[%o1]
 765 
 766         andcc   %o7,2,%g0
 767         fmovdnz %icc,%f24,%f26
 768         st      %f26,[%o2]
 769 
 770         addcc   %i0,-1,%i0
 771         bg,pt   %icc,.loop0
 772 ! delay slot
 773         st      %f7,[%o0+4]
 774 
 775         ba,pt   %icc,.end
 776 ! delay slot
 777         nop
 778 
 779 
 780         .align  16
 781 .end:
 782         st      %f17,[%o1+4]
 783         st      %f27,[%o2+4]
 784         ld      [%fp+biguns],%i5
 785         tst     %i5                     ! check for huge arguments remaining
 786         be,pt   %icc,.exit
 787 ! delay slot
 788         nop
 789 #ifdef __sparcv9
 790         stx     %o5,[%sp+STACK_BIAS+0xb8]
 791         ldx     [%fp+xsave],%o1
 792         ldx     [%fp+ssave],%o3
 793         ldx     [%fp+csave],%o5
 794         ldx     [%fp+STACK_BIAS+0xb0],%i5
 795         stx     %i5,[%sp+STACK_BIAS+0xb0]
 796 #else
 797         st      %o5,[%sp+0x60]
 798         ld      [%fp+xsave],%o1
 799         ld      [%fp+ssave],%o3
 800         ld      [%fp+csave],%o5
 801         ld      [%fp+0x5c],%i5
 802         st      %i5,[%sp+0x5c]
 803 #endif
 804         ld      [%fp+nsave],%o0
 805         ld      [%fp+sxsave],%o2
 806         ld      [%fp+sssave],%o4
 807         sra     %o2,0,%o2               ! sign-extend for V9
 808         call    __vlibm_vsincos_big
 809         sra     %o4,0,%o4               ! delay slot
 810 
 811 .exit:
 812         ret
 813         restore
 814 
 815 
 816         .align  16
 817 .last1:
 818         faddd   %f2,c3two44,%f4
 819         st      %f17,[%o1+4]
 820 .last1_from_range1:
 821         mov     0,%l1
 822         fzeros  %f10
 823         fzero   %f12
 824         add     %fp,junk,%o1
 825         add     %fp,junk,%l5
 826 .last2:
 827         faddd   %f12,c3two44,%f14
 828         st      %f27,[%o2+4]
 829         st      %f5,[%fp+nk0]
 830         st      %f15,[%fp+nk1]
 831 .last2_from_range2:
 832         mov     0,%l2
 833         fzeros  %f20
 834         fzero   %f22
 835         add     %fp,junk,%o2
 836         ba,pt   %icc,.cont
 837 ! delay slot
 838         add     %fp,junk,%l6
 839 
 840 
 841         .align  16
 842 .range0:
 843         cmp     %l0,%o4
 844         bl,pt   %icc,1f                 ! hx < 0x3e400000
 845 ! delay slot, harmless if branch taken
 846         sethi   %hi(0x7ff00000),%o7
 847         cmp     %l0,%o7
 848         bl,a,pt %icc,2f                 ! branch if finite
 849 ! delay slot, squashed if branch not taken
 850         st      %o4,[%fp+biguns]        ! set biguns
 851         fzero   %f0
 852         fmuld   %f2,%f0,%f2
 853         st      %f2,[%o0]
 854         st      %f3,[%o0+4]
 855         st      %f2,[%l3]
 856         ba,pt   %icc,2f
 857 ! delay slot
 858         st      %f3,[%l3+4]
 859 1:
 860         fdtoi   %f2,%f4                 ! raise inexact if not zero
 861         st      %f0,[%o0]
 862         st      %f3,[%o0+4]
 863         sethi   %hi(0x3ff00000),%g5
 864         st      %g5,[%l3]
 865         st      %g0,[%l3+4]
 866 2:
 867         addcc   %i0,-1,%i0
 868         ble,pn  %icc,.end
 869 ! delay slot, harmless if branch taken
 870         add     %i3,%i4,%i3             ! s += strides
 871         add     %l3,%l7,%l3             ! c += stridec
 872         andn    %l1,%i5,%l0             ! hx &= ~0x80000000
 873         fmovs   %f10,%f0
 874         fmovs   %f13,%f3
 875         ba,pt   %icc,.loop0
 876 ! delay slot
 877         add     %i1,%i2,%i1             ! x += stridex
 878 
 879 
 880         .align  16
 881 .range1:
 882         cmp     %l1,%o4
 883         bl,pt   %icc,1f                 ! hx < 0x3e400000
 884 ! delay slot, harmless if branch taken
 885         sethi   %hi(0x7ff00000),%o7
 886         cmp     %l1,%o7
 887         bl,a,pt %icc,2f                 ! branch if finite
 888 ! delay slot, squashed if branch not taken
 889         st      %o4,[%fp+biguns]        ! set biguns
 890         fzero   %f10
 891         fmuld   %f12,%f10,%f12
 892         st      %f12,[%o1]
 893         st      %f13,[%o1+4]
 894         st      %f12,[%l3]
 895         ba,pt   %icc,2f
 896 ! delay slot
 897         st      %f13,[%l3+4]
 898 1:
 899         fdtoi   %f12,%f14               ! raise inexact if not zero
 900         st      %f10,[%o1]
 901         st      %f13,[%o1+4]
 902         sethi   %hi(0x3ff00000),%g5
 903         st      %g5,[%l3]
 904         st      %g0,[%l3+4]
 905 2:
 906         addcc   %i0,-1,%i0
 907         ble,pn  %icc,.last1_from_range1
 908 ! delay slot, harmless if branch taken
 909         add     %i3,%i4,%i3             ! s += strides
 910         add     %l3,%l7,%l3             ! c += stridec
 911         andn    %l2,%i5,%l1             ! hx &= ~0x80000000
 912         fmovs   %f20,%f10
 913         fmovs   %f23,%f13
 914         ba,pt   %icc,.loop1
 915 ! delay slot
 916         add     %i1,%i2,%i1             ! x += stridex
 917 
 918 
 919         .align  16
 920 .range2:
 921         cmp     %l2,%o4
 922         bl,pt   %icc,1f                 ! hx < 0x3e400000
 923 ! delay slot, harmless if branch taken
 924         sethi   %hi(0x7ff00000),%o7
 925         cmp     %l2,%o7
 926         bl,a,pt %icc,2f                 ! branch if finite
 927 ! delay slot, squashed if branch not taken
 928         st      %o4,[%fp+biguns]        ! set biguns
 929         fzero   %f20
 930         fmuld   %f22,%f20,%f22
 931         st      %f22,[%o2]
 932         st      %f23,[%o2+4]
 933         st      %f22,[%l3]
 934         ba,pt   %icc,2f
 935 ! delay slot
 936         st      %f23,[%l3+4]
 937 1:
 938         fdtoi   %f22,%f24               ! raise inexact if not zero
 939         st      %f20,[%o2]
 940         st      %f23,[%o2+4]
 941         sethi   %hi(0x3ff00000),%g5
 942         st      %g5,[%l3]
 943         st      %g0,[%l3+4]
 944 2:
 945         addcc   %i0,-1,%i0
 946         ble,pn  %icc,.last2_from_range2
 947 ! delay slot, harmless if branch taken
 948         add     %i3,%i4,%i3             ! s += strides
 949         add     %l3,%l7,%l3             ! c += stridec
 950         ld      [%i1],%l2
 951         ld      [%i1],%f20
 952         ld      [%i1+4],%f23
 953         andn    %l2,%i5,%l2             ! hx &= ~0x80000000
 954         ba,pt   %icc,.loop2
 955 ! delay slot
 956         add     %i1,%i2,%i1             ! x += stridex
 957 
 958         SET_SIZE(__vsincos)
 959