1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vlog.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  32
  35 TBL:
  36         .word   0xbfd522ae, 0x0738a000
  37         .word   0xbd2ebe70, 0x8164c759
  38         .word   0xbfd3c252, 0x77333000
  39         .word   0xbd183b54, 0xb606bd5c
  40         .word   0xbfd26962, 0x1134e000
  41         .word   0x3d31b61f, 0x10522625
  42         .word   0xbfd1178e, 0x8227e000
  43         .word   0xbd31ef78, 0xce2d07f2
  44         .word   0xbfcf991c, 0x6cb3c000
  45         .word   0x3d390d04, 0xcd7cc834
  46         .word   0xbfcd1037, 0xf2656000
  47         .word   0x3d084a7e, 0x75b6f6e4
  48         .word   0xbfca93ed, 0x3c8ae000
  49         .word   0x3d287243, 0x50562169
  50         .word   0xbfc823c1, 0x6551a000
  51         .word   0xbd1e0ddb, 0x9a631e83
  52         .word   0xbfc5bf40, 0x6b544000
  53         .word   0x3d127023, 0xeb68981c
  54         .word   0xbfc365fc, 0xb015a000
  55         .word   0x3d3fd3a0, 0xafb9691b
  56         .word   0xbfc1178e, 0x8227e000
  57         .word   0xbd21ef78, 0xce2d07f2
  58         .word   0xbfbda727, 0x63844000
  59         .word   0xbd1a8940, 0x1fa71733
  60         .word   0xbfb9335e, 0x5d594000
  61         .word   0xbd23115c, 0x3abd47da
  62         .word   0xbfb4d311, 0x5d208000
  63         .word   0x3cf53a25, 0x82f4e1ef
  64         .word   0xbfb08598, 0xb59e4000
  65         .word   0x3d17e5dd, 0x7009902c
  66         .word   0xbfa894aa, 0x149f8000
  67         .word   0xbd39a19a, 0x8be97661
  68         .word   0xbfa0415d, 0x89e78000
  69         .word   0x3d3dddc7, 0xf461c516
  70         .word   0xbf902056, 0x58930000
  71         .word   0xbd3611d2, 0x7c8e8417
  72         .word   0x00000000, 0x00000000
  73         .word   0x00000000, 0x00000000
  74         .word   0x3f9f829b, 0x0e780000
  75         .word   0x3d298026, 0x7c7e09e4
  76         .word   0x3faf0a30, 0xc0110000
  77         .word   0x3d48a998, 0x5f325c5c
  78         .word   0x3fb6f0d2, 0x8ae58000
  79         .word   0xbd34b464, 0x1b664613
  80         .word   0x3fbe2707, 0x6e2b0000
  81         .word   0xbd2a342c, 0x2af0003c
  82         .word   0x3fc29552, 0xf8200000
  83         .word   0xbd35b967, 0xf4471dfc
  84         .word   0x3fc5ff30, 0x70a78000
  85         .word   0x3d43d3c8, 0x73e20a07
  86         .word   0x3fc9525a, 0x9cf44000
  87         .word   0x3d46b476, 0x41307539
  88         .word   0x3fcc8ff7, 0xc79a8000
  89         .word   0x3d4a21ac, 0x25d81ef3
  90         .word   0x3fcfb918, 0x6d5e4000
  91         .word   0xbd0d572a, 0xab993c87
  92         .word   0x3fd1675c, 0xababa000
  93         .word   0x3d38380e, 0x731f55c4
  94         .word   0x3fd2e8e2, 0xbae12000
  95         .word   0xbd267b1e, 0x99b72bd8
  96         .word   0x3fd4618b, 0xc21c6000
  97         .word   0xbd13d82f, 0x484c84cc
  98         .word   0x3fd5d1bd, 0xbf580000
  99         .word   0x3d4394a1, 0x1b1c1ee4
 100 ! constants:
 101         .word   0x40000000,0x00000000
 102         .word   0x3fe55555,0x555571da
 103         .word   0x3fd99999,0x8702be3a
 104         .word   0x3fd24af7,0x3f4569b1
 105         .word   0x3ea62e42,0xfee00000   ! scaled by 2**-20
 106         .word   0x3caa39ef,0x35793c76   ! scaled by 2**-20
 107         .word   0xffff8000,0x00000000
 108         .word   0x43200000
 109         .word   0xfff00000
 110         .word   0xc0194000
 111         .word   0x4000
 112 
 113 #define two             0x200
 114 #define A1              0x208
 115 #define A2              0x210
 116 #define A3              0x218
 117 #define ln2hi           0x220
 118 #define ln2lo           0x228
 119 #define mask            0x230
 120 #define ox43200000      0x238
 121 #define oxfff00000      0x23c
 122 #define oxc0194000      0x240
 123 #define ox4000          0x244
 124 
 125 ! local storage indices
 126 
 127 #define jnk             STACK_BIAS-0x8
 128 #define tmp2            STACK_BIAS-0x10
 129 #define tmp1            STACK_BIAS-0x18
 130 #define tmp0            STACK_BIAS-0x20
 131 ! sizeof temp storage - must be a multiple of 16 for V9
 132 #define tmps            0x20
 133 
 134 ! register use
 135 
 136 ! i0  n
 137 ! i1  x
 138 ! i2  stridex
 139 ! i3  y
 140 ! i4  stridey
 141 ! i5  
 142 
 143 ! g1  TBL
 144 
 145 ! l0  j0
 146 ! l1  j1
 147 ! l2  j2
 148 ! l3  
 149 ! l4  0x94000
 150 ! l5  
 151 ! l6  0x000fffff
 152 ! l7  0x7ff00000
 153 
 154 ! o0  py0
 155 ! o1  py1
 156 ! o2  py2
 157 ! o3  
 158 ! o4  
 159 ! o5  
 160 ! o7  
 161 
 162 ! f0  u0,q0
 163 ! f2  v0,(two-v0)-u0,z0
 164 ! f4  n0,f0,q0
 165 ! f6  s0
 166 ! f8  q
 167 ! f10 u1,q1
 168 ! f12 v1,(two-v1)-u1,z1
 169 ! f14 n1,f1,q1
 170 ! f16 s1
 171 ! f18 t
 172 ! f20 u2,q2
 173 ! f22 v2,(two-v2)-u2,q2
 174 ! f24 n2,f2,q2
 175 ! f26 s2
 176 ! f28 0xfff00000
 177 ! f29 0x43200000
 178 ! f30 0x4000
 179 ! f31 0xc0194000
 180 ! f32 t0
 181 ! f34 h0,f0-(c0-h0)
 182 ! f36 c0
 183 ! f38 A1
 184 ! f40 two
 185 ! f42 t1
 186 ! f44 h1,f1-(c1-h1)
 187 ! f46 c1
 188 ! f48 A2
 189 ! f50 0xffff8000...
 190 ! f52 t2
 191 ! f54 h2,f2-(c2-h2)
 192 ! f56 c2
 193 ! f58 A3
 194 ! f60 ln2hi
 195 ! f62 ln2lo
 196 
 197         ENTRY(__vlog)
 198         save    %sp,-SA(MINFRAME)-tmps,%sp
 199         PIC_SETUP(l7)
 200         PIC_SET(l7,TBL,o0)
 201         mov     %o0,%g1
 202         wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
 203         sethi   %hi(0x94000),%l4
 204         sethi   %hi(0x000fffff),%l6
 205         or      %l6,%lo(0x000fffff),%l6
 206         sethi   %hi(0x7ff00000),%l7
 207         ldd     [%g1+two],%f40
 208         ldd     [%g1+A1],%f38
 209         ldd     [%g1+A2],%f48
 210         ldd     [%g1+A3],%f58
 211         ldd     [%g1+ln2hi],%f60
 212         ldd     [%g1+ln2lo],%f62
 213         ldd     [%g1+mask],%f50
 214         ld      [%g1+ox43200000],%f29
 215         ld      [%g1+oxfff00000],%f28
 216         ld      [%g1+oxc0194000],%f31
 217         ld      [%g1+ox4000],%f30
 218         sll     %i2,3,%i2               ! scale strides
 219         sll     %i4,3,%i4
 220         add     %fp,jnk,%o0             ! precondition loop
 221         add     %fp,jnk,%o1
 222         add     %fp,jnk,%o2
 223         fzero   %f2
 224         fzero   %f6
 225         fzero   %f18
 226         fzero   %f36
 227         fzero   %f12
 228         fzero   %f14
 229         fzero   %f16
 230         fzero   %f42
 231         fzero   %f44
 232         fzero   %f46
 233         std     %f46,[%fp+tmp1]
 234         fzero   %f24
 235         fzero   %f26
 236         fzero   %f52
 237         fzero   %f54
 238         std     %f54,[%fp+tmp2]
 239         sub     %i3,%i4,%i3
 240         ld      [%i1],%l0               ! ix
 241         ld      [%i1],%f0               ! u.l[0] = *x
 242         ba      .loop0
 243         ld      [%i1+4],%f1             ! u.l[1] = *(1+x)
 244 
 245         .align  16
 246 ! -- 16 byte aligned
 247 .loop0:
 248         sub     %l0,%l7,%o3
 249         sub     %l6,%l0,%o4
 250         fpadd32s %f0,%f31,%f4           ! n = (ix + 0xc0194000) & 0xfff00000
 251         fmuld   %f6,%f2,%f8             ! (previous iteration)
 252 
 253         andcc   %o3,%o4,%o4
 254         bge,pn  %icc,.range0            ! ix <= 0x000fffff or >= 0x7ff00000
 255 ! delay slot
 256         fands   %f4,%f28,%f4
 257 
 258         add     %i1,%i2,%i1             ! x += stridex
 259         add     %i3,%i4,%i3             ! y += stridey
 260         fpsub32s %f0,%f4,%f0            ! u.l[0] -= n
 261 
 262 .cont0:
 263         lda     [%i1]%asi,%l1           ! preload next argument
 264         add     %l0,%l4,%l0             ! j = ix + 0x94000
 265         fpadd32s %f0,%f30,%f2           ! v.l[0] = u.l[0] + 0x4000
 266 
 267         lda     [%i1]%asi,%f10
 268         srl     %l0,11,%l0              ! j = (j >> 11) & 0x1f0
 269         fand    %f2,%f50,%f2            ! v.l &= 0xffff8000...
 270 
 271         lda     [%i1+4]%asi,%f11
 272         and     %l0,0x1f0,%l0
 273         fitod   %f4,%f32                ! (double) n
 274 
 275         add     %l0,8,%l3
 276         fsubd   %f0,%f2,%f4             ! f = u.d - v.d
 277 
 278         faddd   %f0,%f2,%f6             ! s = f / (u.d + v.d)
 279 
 280         fsubd   %f40,%f2,%f2            ! two - v.d
 281         fmuld   %f32,%f60,%f34          ! h = n * ln2hi + TBL[j]
 282 
 283         faddd   %f8,%f18,%f8            ! y = c + (t + q)
 284         fmuld   %f32,%f62,%f32          ! t = n * ln2lo + TBL[j+1]
 285 
 286         fdivd   %f4,%f6,%f6
 287 
 288         faddd   %f54,%f24,%f56          ! c = h + f
 289         fmuld   %f26,%f26,%f22          ! z = s * s
 290 
 291         faddd   %f8,%f36,%f8
 292         st      %f8,[%o0]
 293 
 294         st      %f9,[%o0+4]
 295         mov     %i3,%o0
 296         faddd   %f14,%f38,%f14
 297 
 298         fsubd   %f56,%f54,%f54          ! t += f - (c - h)
 299         fmuld   %f22,%f58,%f20          ! q = ...
 300 
 301         fsubd   %f2,%f0,%f2             ! (two - v.d) - u.d
 302         ldd     [%g1+%l0],%f36
 303 
 304         faddd   %f42,%f44,%f18
 305         fmuld   %f12,%f14,%f14
 306         ldd     [%fp+tmp1],%f12
 307 
 308         faddd   %f20,%f48,%f20
 309         nop
 310 
 311         faddd   %f34,%f36,%f34
 312         ldd     [%g1+%l3],%f0
 313 
 314         faddd   %f14,%f12,%f12
 315 
 316         fsubd   %f24,%f54,%f54
 317         fmuld   %f22,%f20,%f24
 318 
 319         std     %f2,[%fp+tmp0]
 320         addcc   %i0,-1,%i0
 321         ble,pn  %icc,.endloop0
 322 ! delay slot
 323         faddd   %f32,%f0,%f32
 324 
 325 ! -- 16 byte aligned
 326 .loop1:
 327         sub     %l1,%l7,%o3
 328         sub     %l6,%l1,%o4
 329         fpadd32s %f10,%f31,%f14         ! n = (ix + 0xc0194000) & 0xfff00000
 330         fmuld   %f16,%f12,%f8           ! (previous iteration)
 331 
 332         andcc   %o3,%o4,%o4
 333         bge,pn  %icc,.range1            ! ix <= 0x000fffff or >= 0x7ff00000
 334 ! delay slot
 335         fands   %f14,%f28,%f14
 336 
 337         add     %i1,%i2,%i1             ! x += stridex
 338         add     %i3,%i4,%i3             ! y += stridey
 339         fpsub32s %f10,%f14,%f10         ! u.l[0] -= n
 340 
 341 .cont1:
 342         lda     [%i1]%asi,%l2           ! preload next argument
 343         add     %l1,%l4,%l1             ! j = ix + 0x94000
 344         fpadd32s %f10,%f30,%f12         ! v.l[0] = u.l[0] + 0x4000
 345 
 346         lda     [%i1]%asi,%f20
 347         srl     %l1,11,%l1              ! j = (j >> 11) & 0x1f0
 348         fand    %f12,%f50,%f12          ! v.l &= 0xffff8000...
 349 
 350         lda     [%i1+4]%asi,%f21
 351         and     %l1,0x1f0,%l1
 352         fitod   %f14,%f42               ! (double) n
 353 
 354         add     %l1,8,%l3
 355         fsubd   %f10,%f12,%f14          ! f = u.d - v.d
 356 
 357         faddd   %f10,%f12,%f16          ! s = f / (u.d + v.d)
 358 
 359         fsubd   %f40,%f12,%f12          ! two - v.d
 360         fmuld   %f42,%f60,%f44          ! h = n * ln2hi + TBL[j]
 361 
 362         faddd   %f8,%f18,%f8            ! y = c + (t + q)
 363         fmuld   %f42,%f62,%f42          ! t = n * ln2lo + TBL[j+1]
 364 
 365         fdivd   %f14,%f16,%f16
 366 
 367         faddd   %f34,%f4,%f36           ! c = h + f
 368         fmuld   %f6,%f6,%f2             ! z = s * s
 369 
 370         faddd   %f8,%f46,%f8
 371         st      %f8,[%o1]
 372 
 373         st      %f9,[%o1+4]
 374         mov     %i3,%o1
 375         faddd   %f24,%f38,%f24
 376 
 377         fsubd   %f36,%f34,%f34          ! t += f - (c - h)
 378         fmuld   %f2,%f58,%f0            ! q = ...
 379 
 380         fsubd   %f12,%f10,%f12          ! (two - v.d) - u.d
 381         ldd     [%g1+%l1],%f46
 382 
 383         faddd   %f52,%f54,%f18
 384         fmuld   %f22,%f24,%f24
 385         ldd     [%fp+tmp2],%f22
 386 
 387         faddd   %f0,%f48,%f0
 388         nop
 389 
 390         faddd   %f44,%f46,%f44
 391         ldd     [%g1+%l3],%f10
 392 
 393         faddd   %f24,%f22,%f22
 394 
 395         fsubd   %f4,%f34,%f34
 396         fmuld   %f2,%f0,%f4
 397 
 398         std     %f12,[%fp+tmp1]
 399         addcc   %i0,-1,%i0
 400         ble,pn  %icc,.endloop1
 401 ! delay slot
 402         faddd   %f42,%f10,%f42
 403 
 404 ! -- 16 byte aligned
 405 .loop2:
 406         sub     %l2,%l7,%o3
 407         sub     %l6,%l2,%o4
 408         fpadd32s %f20,%f31,%f24         ! n = (ix + 0xc0194000) & 0xfff00000
 409         fmuld   %f26,%f22,%f8           ! (previous iteration)
 410 
 411         andcc   %o3,%o4,%o4
 412         bge,pn  %icc,.range2            ! ix <= 0x000fffff or >= 0x7ff00000
 413 ! delay slot
 414         fands   %f24,%f28,%f24
 415 
 416         add     %i1,%i2,%i1             ! x += stridex
 417         add     %i3,%i4,%i3             ! y += stridey
 418         fpsub32s %f20,%f24,%f20         ! u.l[0] -= n
 419 
 420 .cont2:
 421         lda     [%i1]%asi,%l0           ! preload next argument
 422         add     %l2,%l4,%l2             ! j = ix + 0x94000
 423         fpadd32s %f20,%f30,%f22         ! v.l[0] = u.l[0] + 0x4000
 424 
 425         lda     [%i1]%asi,%f0
 426         srl     %l2,11,%l2              ! j = (j >> 11) & 0x1f0
 427         fand    %f22,%f50,%f22          ! v.l &= 0xffff8000...
 428 
 429         lda     [%i1+4]%asi,%f1
 430         and     %l2,0x1f0,%l2
 431         fitod   %f24,%f52               ! (double) n
 432 
 433         add     %l2,8,%l3
 434         fsubd   %f20,%f22,%f24          ! f = u.d - v.d
 435 
 436         faddd   %f20,%f22,%f26          ! s = f / (u.d + v.d)
 437 
 438         fsubd   %f40,%f22,%f22          ! two - v.d
 439         fmuld   %f52,%f60,%f54          ! h = n * ln2hi + TBL[j]
 440 
 441         faddd   %f8,%f18,%f8            ! y = c + (t + q)
 442         fmuld   %f52,%f62,%f52          ! t = n * ln2lo + TBL[j+1]
 443 
 444         fdivd   %f24,%f26,%f26
 445 
 446         faddd   %f44,%f14,%f46          ! c = h + f
 447         fmuld   %f16,%f16,%f12          ! z = s * s
 448 
 449         faddd   %f8,%f56,%f8
 450         st      %f8,[%o2]
 451 
 452         st      %f9,[%o2+4]
 453         mov     %i3,%o2
 454         faddd   %f4,%f38,%f4
 455 
 456         fsubd   %f46,%f44,%f44          ! t += f - (c - h)
 457         fmuld   %f12,%f58,%f10          ! q = ...
 458 
 459         fsubd   %f22,%f20,%f22          ! (two - v.d) - u.d
 460         ldd     [%g1+%l2],%f56
 461 
 462         faddd   %f32,%f34,%f18
 463         fmuld   %f2,%f4,%f4
 464         ldd     [%fp+tmp0],%f2
 465 
 466         faddd   %f10,%f48,%f10
 467         nop
 468 
 469         faddd   %f54,%f56,%f54
 470         ldd     [%g1+%l3],%f20
 471 
 472         faddd   %f4,%f2,%f2
 473 
 474         fsubd   %f14,%f44,%f44
 475         fmuld   %f12,%f10,%f14
 476 
 477         std     %f22,[%fp+tmp2]
 478         addcc   %i0,-1,%i0
 479         bg,pt   %icc,.loop0
 480 ! delay slot
 481         faddd   %f52,%f20,%f52
 482 
 483 
 484 ! Once we get to the last element, we loop three more times to finish
 485 ! the computations in progress.  This means we will load past the end
 486 ! of the argument vector, but since we use non-faulting loads and never
 487 ! use the data, the only potential problem is cache miss.  (Note that
 488 ! when the argument is 2, the only exception that occurs in the compu-
 489 ! tation is an inexact result in the final addition, and we break out
 490 ! of the "extra" iterations before then.)
 491 .endloop2:
 492         sethi   %hi(0x40000000),%l0     ! "next argument" = two
 493         cmp     %i0,-3
 494         bg,a,pt %icc,.loop0
 495 ! delay slot
 496         fmovd   %f40,%f0
 497         ret
 498         restore
 499 
 500         .align  16
 501 .endloop0:
 502         sethi   %hi(0x40000000),%l1     ! "next argument" = two
 503         cmp     %i0,-3
 504         bg,a,pt %icc,.loop1
 505 ! delay slot
 506         fmovd   %f40,%f10
 507         ret
 508         restore
 509 
 510         .align  16
 511 .endloop1:
 512         sethi   %hi(0x40000000),%l2     ! "next argument" = two
 513         cmp     %i0,-3
 514         bg,a,pt %icc,.loop2
 515 ! delay slot
 516         fmovd   %f40,%f20
 517         ret
 518         restore
 519 
 520 
 521         .align  16
 522 .range0:
 523         cmp     %l0,%l7
 524         bgeu,pn %icc,2f                 ! if (unsigned) ix >= 0x7ff00000
 525 ! delay slot
 526         ld      [%i1+4],%o5
 527         fxtod   %f0,%f0                 ! scale by 2**1074 w/o trapping
 528         st      %f0,[%fp+tmp0]
 529         add     %i1,%i2,%i1             ! x += stridex
 530         orcc    %l0,%o5,%g0
 531         be,pn   %icc,1f                 ! if x == 0
 532 ! delay slot
 533         add     %i3,%i4,%i3             ! y += stridey
 534         fpadd32s %f0,%f31,%f4           ! n = (ix + 0xc0194000) & 0xfff00000
 535         fands   %f4,%f28,%f4
 536         fpsub32s %f0,%f4,%f0            ! u.l[0] -= n
 537         ld      [%fp+tmp0],%l0
 538         ba,pt   %icc,.cont0
 539 ! delay slot
 540         fpsub32s %f4,%f29,%f4           ! n -= 0x43200000
 541 1:
 542         fdivs   %f29,%f1,%f4            ! raise div-by-zero
 543         ba,pt   %icc,3f
 544 ! delay slot
 545         st      %f28,[%i3]              ! store -inf
 546 2:
 547         sll     %l0,1,%l0               ! lop off sign bit
 548         add     %i1,%i2,%i1             ! x += stridex
 549         orcc    %l0,%o5,%g0
 550         be,pn   %icc,1b                 ! if x == -0
 551 ! delay slot
 552         add     %i3,%i4,%i3             ! y += stridey
 553         fabsd   %f0,%f4                 ! *y = (x + |x|) * inf
 554         faddd   %f0,%f4,%f0
 555         fand    %f28,%f50,%f4
 556         fnegd   %f4,%f4
 557         fmuld   %f0,%f4,%f0
 558         st      %f0,[%i3]
 559 3:
 560         addcc   %i0,-1,%i0
 561         ble,pn  %icc,.endloop2
 562 ! delay slot
 563         st      %f1,[%i3+4]
 564         ld      [%i1],%l0               ! get next argument
 565         ld      [%i1],%f0
 566         ba,pt   %icc,.loop0
 567 ! delay slot
 568         ld      [%i1+4],%f1
 569 
 570 
 571         .align  16
 572 .range1:
 573         cmp     %l1,%l7
 574         bgeu,pn %icc,2f                 ! if (unsigned) ix >= 0x7ff00000
 575 ! delay slot
 576         ld      [%i1+4],%o5
 577         fxtod   %f10,%f10               ! scale by 2**1074 w/o trapping
 578         st      %f10,[%fp+tmp1]
 579         add     %i1,%i2,%i1             ! x += stridex
 580         orcc    %l1,%o5,%g0
 581         be,pn   %icc,1f                 ! if x == 0
 582 ! delay slot
 583         add     %i3,%i4,%i3             ! y += stridey
 584         fpadd32s %f10,%f31,%f14         ! n = (ix + 0xc0194000) & 0xfff00000
 585         fands   %f14,%f28,%f14
 586         fpsub32s %f10,%f14,%f10         ! u.l[0] -= n
 587         ld      [%fp+tmp1],%l1
 588         ba,pt   %icc,.cont1
 589 ! delay slot
 590         fpsub32s %f14,%f29,%f14         ! n -= 0x43200000
 591 1:
 592         fdivs   %f29,%f11,%f14          ! raise div-by-zero
 593         ba,pt   %icc,3f
 594 ! delay slot
 595         st      %f28,[%i3]              ! store -inf
 596 2:
 597         sll     %l1,1,%l1               ! lop off sign bit
 598         add     %i1,%i2,%i1             ! x += stridex
 599         orcc    %l1,%o5,%g0
 600         be,pn   %icc,1b                 ! if x == -0
 601 ! delay slot
 602         add     %i3,%i4,%i3             ! y += stridey
 603         fabsd   %f10,%f14               ! *y = (x + |x|) * inf
 604         faddd   %f10,%f14,%f10
 605         fand    %f28,%f50,%f14
 606         fnegd   %f14,%f14
 607         fmuld   %f10,%f14,%f10
 608         st      %f10,[%i3]
 609 3:
 610         addcc   %i0,-1,%i0
 611         ble,pn  %icc,.endloop0
 612 ! delay slot
 613         st      %f11,[%i3+4]
 614         ld      [%i1],%l1               ! get next argument
 615         ld      [%i1],%f10
 616         ba,pt   %icc,.loop1
 617 ! delay slot
 618         ld      [%i1+4],%f11
 619 
 620 
 621         .align  16
 622 .range2:
 623         cmp     %l2,%l7
 624         bgeu,pn %icc,2f                 ! if (unsigned) ix >= 0x7ff00000
 625 ! delay slot
 626         ld      [%i1+4],%o5
 627         fxtod   %f20,%f20               ! scale by 2**1074 w/o trapping
 628         st      %f20,[%fp+tmp2]
 629         add     %i1,%i2,%i1             ! x += stridex
 630         orcc    %l2,%o5,%g0
 631         be,pn   %icc,1f                 ! if x == 0
 632 ! delay slot
 633         add     %i3,%i4,%i3             ! y += stridey
 634         fpadd32s %f20,%f31,%f24         ! n = (ix + 0xc0194000) & 0xfff00000
 635         fands   %f24,%f28,%f24
 636         fpsub32s %f20,%f24,%f20         ! u.l[0] -= n
 637         ld      [%fp+tmp2],%l2
 638         ba,pt   %icc,.cont2
 639 ! delay slot
 640         fpsub32s %f24,%f29,%f24         ! n -= 0x43200000
 641 1:
 642         fdivs   %f29,%f21,%f24          ! raise div-by-zero
 643         ba,pt   %icc,3f
 644 ! delay slot
 645         st      %f28,[%i3]              ! store -inf
 646 2:
 647         sll     %l2,1,%l2               ! lop off sign bit
 648         add     %i1,%i2,%i1             ! x += stridex
 649         orcc    %l2,%o5,%g0
 650         be,pn   %icc,1b                 ! if x == -0
 651 ! delay slot
 652         add     %i3,%i4,%i3             ! y += stridey
 653         fabsd   %f20,%f24               ! *y = (x + |x|) * inf
 654         faddd   %f20,%f24,%f20
 655         fand    %f28,%f50,%f24
 656         fnegd   %f24,%f24
 657         fmuld   %f20,%f24,%f20
 658         st      %f20,[%i3]
 659 3:
 660         addcc   %i0,-1,%i0
 661         ble,pn  %icc,.endloop1
 662 ! delay slot
 663         st      %f21,[%i3+4]
 664         ld      [%i1],%l2               ! get next argument
 665         ld      [%i1],%f20
 666         ba,pt   %icc,.loop2
 667 ! delay slot
 668         ld      [%i1+4],%f21
 669         
 670         SET_SIZE(__vlog)
 671