1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vsincosf.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  64
  35 constants:
  36         .word   0xbfc55554,0x60000000
  37         .word   0x3f811077,0xe0000000
  38         .word   0xbf29956b,0x60000000
  39         .word   0x3ff00000,0x00000000
  40         .word   0xbfe00000,0x00000000
  41         .word   0x3fa55554,0xa0000000
  42         .word   0xbf56c0c1,0xe0000000
  43         .word   0x3ef99e24,0xe0000000
  44         .word   0x3fe45f30,0x6dc9c883
  45         .word   0x43380000,0x00000000
  46         .word   0x3ff921fb,0x54400000
  47         .word   0x3dd0b461,0x1a626331
  48         .word   0x3f490fdb,0
  49         .word   0x49c90fdb,0
  50         .word   0x7f800000,0
  51         .word   0x80000000,0
  52 
  53 #define S0              0x0
  54 #define S1              0x08
  55 #define S2              0x10
  56 #define one             0x18
  57 #define mhalf           0x20
  58 #define C0              0x28
  59 #define C1              0x30
  60 #define C2              0x38
  61 #define invpio2         0x40
  62 #define round           0x48
  63 #define pio2_1          0x50
  64 #define pio2_t          0x58
  65 #define thresh1         0x60
  66 #define thresh2         0x68
  67 #define inf             0x70
  68 #define signbit         0x78
  69 
  70 ! local storage indices
  71 
  72 #define xsave           STACK_BIAS-0x8
  73 #define ssave           STACK_BIAS-0x10
  74 #define csave           STACK_BIAS-0x18
  75 #define nsave           STACK_BIAS-0x1c
  76 #define sxsave          STACK_BIAS-0x20
  77 #define sssave          STACK_BIAS-0x24
  78 #define junk            STACK_BIAS-0x28
  79 #define n3              STACK_BIAS-0x38
  80 #define n2              STACK_BIAS-0x40
  81 #define n1              STACK_BIAS-0x48
  82 #define n0              STACK_BIAS-0x50
  83 ! sizeof temp storage - must be a multiple of 16 for V9
  84 #define tmps            0x50
  85 
  86 ! register use
  87 
  88 ! i0  n
  89 ! i1  x
  90 ! i2  stridex
  91 ! i3  s
  92 ! i4  strides
  93 ! i5  biguns
  94 
  95 ! l0  ps0
  96 ! l1  ps1
  97 ! l2  ps2
  98 ! l3  ps3
  99 ! l4  pc0
 100 ! l5  pc1
 101 ! l6  pc2
 102 ! l7  pc3
 103 
 104 ! the following are 64-bit registers in both V8+ and V9
 105 
 106 ! g1  
 107 ! g5  
 108 
 109 ! o0  n0
 110 ! o1  n1
 111 ! o2  n2
 112 ! o3  n3
 113 ! o4  c
 114 ! o5  stridec
 115 ! o7  
 116 
 117 ! f0  x0
 118 ! f2  x1
 119 ! f4  x2
 120 ! f6  x3
 121 ! f8  thresh1 (pi/4)
 122 ! f10 s0
 123 ! f12 s1
 124 ! f14 s2
 125 ! f16 s3
 126 ! f18 thresh2 (2^19 pi)
 127 ! f20 c0
 128 ! f22 c1
 129 ! f24 c2
 130 ! f26 c3
 131 ! f28 signbit
 132 ! f30 
 133 ! f32 
 134 ! f34 
 135 ! f36 
 136 ! f38 inf
 137 ! f40 S0
 138 ! f42 S1
 139 ! f44 S2
 140 ! f46 one
 141 ! f48 mhalf
 142 ! f50 C0
 143 ! f52 C1
 144 ! f54 C2
 145 ! f56 invpio2
 146 ! f58 round
 147 ! f60 pio2_1
 148 ! f62 pio2_t
 149 
 150         ENTRY(__vsincosf)
 151         save    %sp,-SA(MINFRAME)-tmps,%sp
 152         PIC_SETUP(l7)
 153         PIC_SET(l7,constants,o0)
 154         mov     %o0,%g1
 155 
 156 #ifdef __sparcv9
 157         stx     %i1,[%fp+xsave]         ! save arguments
 158         stx     %i3,[%fp+ssave]
 159         stx     %i5,[%fp+csave]
 160         ldx     [%fp+STACK_BIAS+0xb0],%o5
 161 #else
 162         st      %i1,[%fp+xsave]         ! save arguments
 163         st      %i3,[%fp+ssave]
 164         st      %i5,[%fp+csave]
 165         ld      [%fp+0x5c],%o5
 166 #endif
 167         st      %i0,[%fp+nsave]
 168         st      %i2,[%fp+sxsave]
 169         st      %i4,[%fp+sssave]
 170         mov     %i5,%o4
 171         mov     0,%i5                   ! biguns = 0
 172         ldd     [%g1+S0],%f40           ! load constants
 173         ldd     [%g1+S1],%f42
 174         ldd     [%g1+S2],%f44
 175         ldd     [%g1+one],%f46
 176         ldd     [%g1+mhalf],%f48
 177         ldd     [%g1+C0],%f50
 178         ldd     [%g1+C1],%f52
 179         ldd     [%g1+C2],%f54
 180         ldd     [%g1+invpio2],%f56
 181         ldd     [%g1+round],%f58
 182         ldd     [%g1+pio2_1],%f60
 183         ldd     [%g1+pio2_t],%f62
 184         ldd     [%g1+thresh1],%f8
 185         ldd     [%g1+thresh2],%f18
 186         ldd     [%g1+inf],%f38
 187         ldd     [%g1+signbit],%f28
 188         sll     %i2,2,%i2               ! scale strides
 189         sll     %i4,2,%i4
 190         sll     %o5,2,%o5
 191         nop
 192         fzero   %f10                    ! loop prologue
 193         add     %fp,junk,%l0
 194         fzero   %f20
 195         add     %fp,junk,%l4
 196         fzero   %f12
 197         add     %fp,junk,%l1
 198         fzero   %f22
 199         add     %fp,junk,%l5
 200         fzero   %f14
 201         add     %fp,junk,%l2
 202         fzero   %f24
 203         add     %fp,junk,%l6
 204         fzero   %f16
 205         add     %fp,junk,%l3
 206         fzero   %f26
 207         ba      .start
 208         add     %fp,junk,%l7
 209 
 210 ! 16-byte aligned
 211         .align  16
 212 .start:
 213         ld      [%i1],%f0               ! *x
 214         add     %i1,%i2,%i1             ! x += stridex
 215         addcc   %i0,-1,%i0
 216         fdtos   %f10,%f10
 217 
 218         st      %f10,[%l0]
 219         mov     %i3,%l0                 ! ps0 = s
 220         add     %i3,%i4,%i3             ! s += strides
 221         fdtos   %f20,%f20
 222 
 223         st      %f20,[%l4]
 224         mov     %o4,%l4                 ! pc0 = c
 225         ble,pn  %icc,.last1
 226 ! delay slot
 227         add     %o4,%o5,%o4             ! c += stridec
 228 
 229         ld      [%i1],%f2               ! *x
 230         add     %i1,%i2,%i1             ! x += stridex
 231         addcc   %i0,-1,%i0
 232         fdtos   %f12,%f12
 233 
 234         st      %f12,[%l1]
 235         mov     %i3,%l1                 ! ps1 = s
 236         add     %i3,%i4,%i3             ! s += strides
 237         fdtos   %f22,%f22
 238 
 239         st      %f22,[%l5]
 240         mov     %o4,%l5                 ! pc1 = c
 241         ble,pn  %icc,.last2
 242 ! delay slot
 243         add     %o4,%o5,%o4             ! c += stridec
 244 
 245         ld      [%i1],%f4               ! *x
 246         add     %i1,%i2,%i1             ! x += stridex
 247         addcc   %i0,-1,%i0
 248         fdtos   %f14,%f14
 249 
 250         st      %f14,[%l2]
 251         mov     %i3,%l2                 ! ps2 = s
 252         add     %i3,%i4,%i3             ! s += strides
 253         fdtos   %f24,%f24
 254 
 255         st      %f24,[%l6]
 256         mov     %o4,%l6                 ! pc2 = c
 257         ble,pn  %icc,.last3
 258 ! delay slot
 259         add     %o4,%o5,%o4             ! c += stridec
 260 
 261         ld      [%i1],%f6               ! *x
 262         add     %i1,%i2,%i1             ! x += stridex
 263         nop
 264         fdtos   %f16,%f16
 265 
 266         st      %f16,[%l3]
 267         mov     %i3,%l3                 ! ps3 = s
 268         add     %i3,%i4,%i3             ! s += strides
 269         fdtos   %f26,%f26
 270 
 271         st      %f26,[%l7]
 272         mov     %o4,%l7                 ! pc3 = c
 273         add     %o4,%o5,%o4             ! c += stridec
 274 .cont:
 275         fabsd   %f0,%f30
 276 
 277         fabsd   %f2,%f32
 278 
 279         fabsd   %f4,%f34
 280 
 281         fabsd   %f6,%f36
 282         fcmple32 %f30,%f18,%o0
 283 
 284         fcmple32 %f32,%f18,%o1
 285 
 286         fcmple32 %f34,%f18,%o2
 287 
 288         fcmple32 %f36,%f18,%o3
 289         nop
 290 
 291 ! 16-byte aligned
 292         andcc   %o0,2,%g0
 293         bz,pn   %icc,.range0            ! branch if > 2^19 pi
 294 ! delay slot
 295         fcmple32 %f30,%f8,%o0
 296 
 297 .check1:
 298         andcc   %o1,2,%g0
 299         bz,pn   %icc,.range1            ! branch if > 2^19 pi
 300 ! delay slot
 301         fcmple32 %f32,%f8,%o1
 302 
 303 .check2:
 304         andcc   %o2,2,%g0
 305         bz,pn   %icc,.range2            ! branch if > 2^19 pi
 306 ! delay slot
 307         fcmple32 %f34,%f8,%o2
 308 
 309 .check3:
 310         andcc   %o3,2,%g0
 311         bz,pn   %icc,.range3            ! branch if > 2^19 pi
 312 ! delay slot
 313         fcmple32 %f36,%f8,%o3
 314 
 315 .checkprimary:
 316         fsmuld  %f0,%f0,%f30
 317         fstod   %f0,%f0
 318 
 319         fsmuld  %f2,%f2,%f32
 320         fstod   %f2,%f2
 321         and     %o0,%o1,%o7
 322 
 323         fsmuld  %f4,%f4,%f34
 324         fstod   %f4,%f4
 325         and     %o2,%o7,%o7
 326 
 327         fsmuld  %f6,%f6,%f36
 328         fstod   %f6,%f6
 329         and     %o3,%o7,%o7
 330 
 331         fmuld   %f30,%f54,%f20
 332         andcc   %o7,2,%g0
 333         bz,pn   %icc,.medium            ! branch if any argument is > pi/4
 334 ! delay slot
 335         nop
 336 
 337         fmuld   %f32,%f54,%f22
 338 
 339         fmuld   %f34,%f54,%f24
 340 
 341         fmuld   %f36,%f54,%f26
 342 
 343         faddd   %f20,%f52,%f20
 344         fmuld   %f30,%f44,%f10
 345 
 346         faddd   %f22,%f52,%f22
 347         fmuld   %f32,%f44,%f12
 348 
 349         faddd   %f24,%f52,%f24
 350         fmuld   %f34,%f44,%f14
 351 
 352         faddd   %f26,%f52,%f26
 353         fmuld   %f36,%f44,%f16
 354 
 355         fmuld   %f30,%f20,%f20
 356         faddd   %f10,%f42,%f10
 357 
 358         fmuld   %f32,%f22,%f22
 359         faddd   %f12,%f42,%f12
 360 
 361         fmuld   %f34,%f24,%f24
 362         faddd   %f14,%f42,%f14
 363 
 364         fmuld   %f36,%f26,%f26
 365         faddd   %f16,%f42,%f16
 366 
 367         faddd   %f20,%f50,%f20
 368         fmuld   %f30,%f10,%f10
 369 
 370         faddd   %f22,%f50,%f22
 371         fmuld   %f32,%f12,%f12
 372 
 373         faddd   %f24,%f50,%f24
 374         fmuld   %f34,%f14,%f14
 375 
 376         faddd   %f26,%f50,%f26
 377         fmuld   %f36,%f16,%f16
 378 
 379         fmuld   %f30,%f20,%f20
 380         faddd   %f10,%f40,%f10
 381 
 382         fmuld   %f32,%f22,%f22
 383         faddd   %f12,%f40,%f12
 384 
 385         fmuld   %f34,%f24,%f24
 386         faddd   %f14,%f40,%f14
 387 
 388         fmuld   %f36,%f26,%f26
 389         faddd   %f16,%f40,%f16
 390 
 391         faddd   %f20,%f48,%f20
 392         fmuld   %f30,%f10,%f10
 393 
 394         faddd   %f22,%f48,%f22
 395         fmuld   %f32,%f12,%f12
 396 
 397         faddd   %f24,%f48,%f24
 398         fmuld   %f34,%f14,%f14
 399 
 400         faddd   %f26,%f48,%f26
 401         fmuld   %f36,%f16,%f16
 402 
 403         fmuld   %f30,%f20,%f20
 404         faddd   %f10,%f46,%f10
 405 
 406         fmuld   %f32,%f22,%f22
 407         faddd   %f12,%f46,%f12
 408 
 409         fmuld   %f34,%f24,%f24
 410         faddd   %f14,%f46,%f14
 411 
 412         fmuld   %f36,%f26,%f26
 413         faddd   %f16,%f46,%f16
 414 
 415         faddd   %f20,%f46,%f20
 416         fmuld   %f0,%f10,%f10
 417 
 418         faddd   %f22,%f46,%f22
 419         fmuld   %f2,%f12,%f12
 420 
 421         faddd   %f24,%f46,%f24
 422         fmuld   %f4,%f14,%f14
 423         addcc   %i0,-1,%i0
 424 
 425         faddd   %f26,%f46,%f26
 426         bg,pt   %icc,.start
 427 ! delay slot
 428         fmuld   %f6,%f16,%f16
 429 
 430         ba,pt   %icc,.end
 431 ! delay slot
 432         nop
 433 
 434 
 435         .align  16
 436 .medium:
 437         fmuld   %f0,%f56,%f10
 438 
 439         fmuld   %f2,%f56,%f12
 440 
 441         fmuld   %f4,%f56,%f14
 442 
 443         fmuld   %f6,%f56,%f16
 444 
 445         faddd   %f10,%f58,%f10
 446         st      %f11,[%fp+n0]
 447 
 448         faddd   %f12,%f58,%f12
 449         st      %f13,[%fp+n1]
 450 
 451         faddd   %f14,%f58,%f14
 452         st      %f15,[%fp+n2]
 453 
 454         faddd   %f16,%f58,%f16
 455         st      %f17,[%fp+n3]
 456 
 457         fsubd   %f10,%f58,%f10
 458 
 459         fsubd   %f12,%f58,%f12
 460 
 461         fsubd   %f14,%f58,%f14
 462 
 463         fsubd   %f16,%f58,%f16
 464 
 465         fmuld   %f10,%f60,%f20
 466         ld      [%fp+n0],%o0
 467 
 468         fmuld   %f12,%f60,%f22
 469         ld      [%fp+n1],%o1
 470 
 471         fmuld   %f14,%f60,%f24
 472         ld      [%fp+n2],%o2
 473 
 474         fmuld   %f16,%f60,%f26
 475         ld      [%fp+n3],%o3
 476 
 477         fsubd   %f0,%f20,%f0
 478         fmuld   %f10,%f62,%f30
 479         and     %o0,1,%o0
 480         mov     %l0,%g1
 481 
 482         fsubd   %f2,%f22,%f2
 483         fmuld   %f12,%f62,%f32
 484         and     %o1,1,%o1
 485         movrnz  %o0,%l4,%l0             ! if (n & 1) exchange ps and pc
 486 
 487         fsubd   %f4,%f24,%f4
 488         fmuld   %f14,%f62,%f34
 489         and     %o2,1,%o2
 490         movrnz  %o0,%g1,%l4
 491 
 492         fsubd   %f6,%f26,%f6
 493         fmuld   %f16,%f62,%f36
 494         and     %o3,1,%o3
 495         mov     %l1,%g1
 496 
 497         fsubd   %f0,%f30,%f0
 498         movrnz  %o1,%l5,%l1
 499 
 500         fsubd   %f2,%f32,%f2
 501         movrnz  %o1,%g1,%l5
 502 
 503         fsubd   %f4,%f34,%f4
 504         mov     %l2,%g1
 505 
 506         fsubd   %f6,%f36,%f6
 507         movrnz  %o2,%l6,%l2
 508 
 509         fmuld   %f0,%f0,%f30
 510         fnegd   %f0,%f10
 511         movrnz  %o2,%g1,%l6
 512 
 513         fmuld   %f2,%f2,%f32
 514         fnegd   %f2,%f12
 515         mov     %l3,%g1
 516 
 517         fmuld   %f4,%f4,%f34
 518         fnegd   %f4,%f14
 519         movrnz  %o3,%l7,%l3
 520 
 521         fmuld   %f6,%f6,%f36
 522         fnegd   %f6,%f16
 523         movrnz  %o3,%g1,%l7
 524 
 525         fmuld   %f30,%f54,%f20
 526         fmovrdnz %o0,%f10,%f0           ! if (n & 1) x = -x
 527 
 528         fmuld   %f32,%f54,%f22
 529         fmovrdnz %o1,%f12,%f2
 530 
 531         fmuld   %f34,%f54,%f24
 532         fmovrdnz %o2,%f14,%f4
 533 
 534         fmuld   %f36,%f54,%f26
 535         fmovrdnz %o3,%f16,%f6
 536 
 537         faddd   %f20,%f52,%f20
 538         fmuld   %f30,%f44,%f10
 539         ld      [%fp+n0],%o0
 540 
 541         faddd   %f22,%f52,%f22
 542         fmuld   %f32,%f44,%f12
 543         and     %o0,2,%o0
 544 
 545         faddd   %f24,%f52,%f24
 546         fmuld   %f34,%f44,%f14
 547         sllx    %o0,62,%g1
 548         stx     %g1,[%fp+n0]
 549 
 550         faddd   %f26,%f52,%f26
 551         fmuld   %f36,%f44,%f16
 552         ld      [%fp+n1],%o1
 553 
 554         fmuld   %f30,%f20,%f20
 555         faddd   %f10,%f42,%f10
 556         and     %o1,2,%o1
 557 
 558         fmuld   %f32,%f22,%f22
 559         faddd   %f12,%f42,%f12
 560         sllx    %o1,62,%g1
 561         stx     %g1,[%fp+n1]
 562 
 563         fmuld   %f34,%f24,%f24
 564         faddd   %f14,%f42,%f14
 565         ld      [%fp+n2],%o2
 566 
 567         fmuld   %f36,%f26,%f26
 568         faddd   %f16,%f42,%f16
 569         and     %o2,2,%o2
 570 
 571         faddd   %f20,%f50,%f20
 572         fmuld   %f30,%f10,%f10
 573         sllx    %o2,62,%g1
 574         stx     %g1,[%fp+n2]
 575 
 576         faddd   %f22,%f50,%f22
 577         fmuld   %f32,%f12,%f12
 578         ld      [%fp+n3],%o3
 579 
 580         faddd   %f24,%f50,%f24
 581         fmuld   %f34,%f14,%f14
 582         and     %o3,2,%o3
 583 
 584         faddd   %f26,%f50,%f26
 585         fmuld   %f36,%f16,%f16
 586         sllx    %o3,62,%g1
 587         stx     %g1,[%fp+n3]
 588 
 589         fmuld   %f30,%f20,%f20
 590         faddd   %f10,%f40,%f10
 591 
 592         fmuld   %f32,%f22,%f22
 593         faddd   %f12,%f40,%f12
 594 
 595         fmuld   %f34,%f24,%f24
 596         faddd   %f14,%f40,%f14
 597 
 598         fmuld   %f36,%f26,%f26
 599         faddd   %f16,%f40,%f16
 600 
 601         faddd   %f20,%f48,%f20
 602         fmuld   %f30,%f10,%f10
 603 
 604         faddd   %f22,%f48,%f22
 605         fmuld   %f32,%f12,%f12
 606 
 607         faddd   %f24,%f48,%f24
 608         fmuld   %f34,%f14,%f14
 609 
 610         faddd   %f26,%f48,%f26
 611         fmuld   %f36,%f16,%f16
 612 
 613         fmuld   %f30,%f20,%f20
 614         faddd   %f10,%f46,%f10
 615 
 616         fmuld   %f32,%f22,%f22
 617         faddd   %f12,%f46,%f12
 618 
 619         fmuld   %f34,%f24,%f24
 620         faddd   %f14,%f46,%f14
 621 
 622         fmuld   %f36,%f26,%f26
 623         faddd   %f16,%f46,%f16
 624 
 625         faddd   %f20,%f46,%f20
 626         fmuld   %f0,%f10,%f10
 627         ldd     [%fp+n0],%f30
 628 
 629         faddd   %f22,%f46,%f22
 630         fmuld   %f2,%f12,%f12
 631         ldd     [%fp+n1],%f32
 632 
 633         faddd   %f24,%f46,%f24
 634         fmuld   %f4,%f14,%f14
 635         ldd     [%fp+n2],%f34
 636 
 637         faddd   %f26,%f46,%f26
 638         fmuld   %f6,%f16,%f16
 639         ldd     [%fp+n3],%f36
 640 
 641         fxor    %f10,%f30,%f10          ! if (n & 2) negate s, c
 642 
 643         fxor    %f12,%f32,%f12
 644 
 645         fxor    %f14,%f34,%f14
 646 
 647         fxor    %f16,%f36,%f16
 648 
 649         fxor    %f20,%f30,%f20
 650 
 651         fxor    %f22,%f32,%f22
 652 
 653         fxor    %f24,%f34,%f24
 654 
 655         addcc   %i0,-1,%i0
 656         bg,pt   %icc,.start
 657 ! delay slot
 658         fxor    %f26,%f36,%f26
 659 
 660         ba,pt   %icc,.end
 661 ! delay slot
 662         nop
 663 
 664 
 665         .align  32
 666 .end:
 667         fdtos   %f10,%f10
 668         st      %f10,[%l0]
 669         fdtos   %f20,%f20
 670         st      %f20,[%l4]
 671         fdtos   %f12,%f12
 672         st      %f12,[%l1]
 673         fdtos   %f22,%f22
 674         st      %f22,[%l5]
 675         fdtos   %f14,%f14
 676         st      %f14,[%l2]
 677         fdtos   %f24,%f24
 678         st      %f24,[%l6]
 679         fdtos   %f16,%f16
 680         st      %f16,[%l3]
 681         fdtos   %f26,%f26
 682         tst     %i5                     ! check for huge arguments remaining
 683         be,pt   %icc,.exit
 684 ! delay slot
 685         st      %f26,[%l7]
 686 #ifdef __sparcv9
 687         ldx     [%fp+xsave],%o1
 688         ldx     [%fp+ssave],%o3
 689         ldx     [%fp+csave],%o5
 690         ldx     [%fp+STACK_BIAS+0xb0],%i5
 691         stx     %i5,[%sp+STACK_BIAS+0xb0]
 692 #else
 693         ld      [%fp+xsave],%o1
 694         ld      [%fp+ssave],%o3
 695         ld      [%fp+csave],%o5
 696         ld      [%fp+0x5c],%i5
 697         st      %i5,[%sp+0x5c]
 698 #endif
 699         ld      [%fp+nsave],%o0
 700         ld      [%fp+sxsave],%o2
 701         ld      [%fp+sssave],%o4
 702         sra     %o2,0,%o2               ! sign-extend for V9
 703         call    __vlibm_vsincos_bigf
 704         sra     %o4,0,%o4               ! delay slot
 705 
 706 .exit:
 707         ret
 708         restore
 709 
 710 
 711         .align  32
 712 .last1:
 713         fdtos   %f12,%f12
 714         st      %f12,[%l1]
 715         nop
 716         fdtos   %f22,%f22
 717         st      %f22,[%l5]
 718         fzeros  %f2
 719         add     %fp,junk,%l5
 720         add     %fp,junk,%l1
 721 .last2:
 722         fdtos   %f14,%f14
 723         st      %f14,[%l2]
 724         nop
 725         fdtos   %f24,%f24
 726         st      %f24,[%l6]
 727         fzeros  %f4
 728         add     %fp,junk,%l2
 729         add     %fp,junk,%l6
 730 .last3:
 731         fdtos   %f16,%f16
 732         st      %f16,[%l3]
 733         fdtos   %f26,%f26
 734         st      %f26,[%l7]
 735         fzeros  %f6
 736         add     %fp,junk,%l3
 737         ba,pt   %icc,.cont
 738 ! delay slot
 739         add     %fp,junk,%l7
 740 
 741 
 742         .align  16
 743 .range0:
 744         fcmpgt32 %f38,%f30,%o0
 745         andcc   %o0,2,%g0
 746         bnz,a,pt %icc,1f                ! branch if finite
 747 ! delay slot, squashed if branch not taken
 748         mov     1,%i5                   ! set biguns
 749         fzeros  %f1
 750         fmuls   %f0,%f1,%f0
 751         st      %f0,[%l0]
 752         st      %f0,[%l4]
 753 1:
 754         addcc   %i0,-1,%i0
 755         ble,pn  %icc,1f
 756 ! delay slot
 757         nop
 758         ld      [%i1],%f0
 759         add     %i1,%i2,%i1
 760         mov     %i3,%l0
 761         add     %i3,%i4,%i3
 762         fabsd   %f0,%f30
 763         mov     %o4,%l4
 764         add     %o4,%o5,%o4
 765         fcmple32 %f30,%f18,%o0
 766         andcc   %o0,2,%g0
 767         bz,pn   %icc,.range0
 768 ! delay slot
 769         nop
 770         ba,pt   %icc,.check1
 771 ! delay slot
 772         fcmple32 %f30,%f8,%o0
 773 1:
 774         fzero   %f0                     ! set up dummy argument
 775         add     %fp,junk,%l0
 776         add     %fp,junk,%l4
 777         mov     2,%o0
 778         ba,pt   %icc,.check1
 779 ! delay slot
 780         fzero   %f30
 781 
 782 
 783         .align  16
 784 .range1:
 785         fcmpgt32 %f38,%f32,%o1
 786         andcc   %o1,2,%g0
 787         bnz,a,pt %icc,1f                ! branch if finite
 788 ! delay slot, squashed if branch not taken
 789         mov     1,%i5                   ! set biguns
 790         fzeros  %f3
 791         fmuls   %f2,%f3,%f2
 792         st      %f2,[%l1]
 793         st      %f2,[%l5]
 794 1:
 795         addcc   %i0,-1,%i0
 796         ble,pn  %icc,1f
 797 ! delay slot
 798         nop
 799         ld      [%i1],%f2
 800         add     %i1,%i2,%i1
 801         mov     %i3,%l1
 802         add     %i3,%i4,%i3
 803         fabsd   %f2,%f32
 804         mov     %o4,%l5
 805         add     %o4,%o5,%o4
 806         fcmple32 %f32,%f18,%o1
 807         andcc   %o1,2,%g0
 808         bz,pn   %icc,.range1
 809 ! delay slot
 810         nop
 811         ba,pt   %icc,.check2
 812 ! delay slot
 813         fcmple32 %f32,%f8,%o1
 814 1:
 815         fzero   %f2                     ! set up dummy argument
 816         add     %fp,junk,%l1
 817         add     %fp,junk,%l5
 818         mov     2,%o1
 819         ba,pt   %icc,.check2
 820 ! delay slot
 821         fzero   %f32
 822 
 823 
 824         .align  16
 825 .range2:
 826         fcmpgt32 %f38,%f34,%o2
 827         andcc   %o2,2,%g0
 828         bnz,a,pt %icc,1f                ! branch if finite
 829 ! delay slot, squashed if branch not taken
 830         mov     1,%i5                   ! set biguns
 831         fzeros  %f5
 832         fmuls   %f4,%f5,%f4
 833         st      %f4,[%l2]
 834         st      %f4,[%l6]
 835 1:
 836         addcc   %i0,-1,%i0
 837         ble,pn  %icc,1f
 838 ! delay slot
 839         nop
 840         ld      [%i1],%f4
 841         add     %i1,%i2,%i1
 842         mov     %i3,%l2
 843         add     %i3,%i4,%i3
 844         fabsd   %f4,%f34
 845         mov     %o4,%l6
 846         add     %o4,%o5,%o4
 847         fcmple32 %f34,%f18,%o2
 848         andcc   %o2,2,%g0
 849         bz,pn   %icc,.range2
 850 ! delay slot
 851         nop
 852         ba,pt   %icc,.check3
 853 ! delay slot
 854         fcmple32 %f34,%f8,%o2
 855 1:
 856         fzero   %f4                     ! set up dummy argument
 857         add     %fp,junk,%l2
 858         add     %fp,junk,%l6
 859         mov     2,%o2
 860         ba,pt   %icc,.check3
 861 ! delay slot
 862         fzero   %f34
 863 
 864 
 865         .align  16
 866 .range3:
 867         fcmpgt32 %f38,%f36,%o3
 868         andcc   %o3,2,%g0
 869         bnz,a,pt %icc,1f                ! branch if finite
 870 ! delay slot, squashed if branch not taken
 871         mov     1,%i5                   ! set biguns
 872         fzeros  %f7
 873         fmuls   %f6,%f7,%f6
 874         st      %f6,[%l3]
 875         st      %f6,[%l7]
 876 1:
 877         addcc   %i0,-1,%i0
 878         ble,pn  %icc,1f
 879 ! delay slot
 880         nop
 881         ld      [%i1],%f6
 882         add     %i1,%i2,%i1
 883         mov     %i3,%l3
 884         add     %i3,%i4,%i3
 885         fabsd   %f6,%f36
 886         mov     %o4,%l7
 887         add     %o4,%o5,%o4
 888         fcmple32 %f36,%f18,%o3
 889         andcc   %o3,2,%g0
 890         bz,pn   %icc,.range3
 891 ! delay slot
 892         nop
 893         ba,pt   %icc,.checkprimary
 894 ! delay slot
 895         fcmple32 %f36,%f8,%o3
 896 1:
 897         fzero   %f6                     ! set up dummy argument
 898         add     %fp,junk,%l3
 899         add     %fp,junk,%l7
 900         mov     2,%o3
 901         ba,pt   %icc,.checkprimary
 902 ! delay slot
 903         fzero   %f36
 904 
 905         SET_SIZE(__vsincosf)
 906