1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vlog.S" 30 31 #include "libm.h" 32 33 RO_DATA 34 .align 32 35 TBL: 36 .word 0xbfd522ae, 0x0738a000 37 .word 0xbd2ebe70, 0x8164c759 38 .word 0xbfd3c252, 0x77333000 39 .word 0xbd183b54, 0xb606bd5c 40 .word 0xbfd26962, 0x1134e000 41 .word 0x3d31b61f, 0x10522625 42 .word 0xbfd1178e, 0x8227e000 43 .word 0xbd31ef78, 0xce2d07f2 44 .word 0xbfcf991c, 0x6cb3c000 45 .word 0x3d390d04, 0xcd7cc834 46 .word 0xbfcd1037, 0xf2656000 47 .word 0x3d084a7e, 0x75b6f6e4 48 .word 0xbfca93ed, 0x3c8ae000 49 .word 0x3d287243, 0x50562169 50 .word 0xbfc823c1, 0x6551a000 51 .word 0xbd1e0ddb, 0x9a631e83 52 .word 0xbfc5bf40, 0x6b544000 53 .word 0x3d127023, 0xeb68981c 54 .word 0xbfc365fc, 0xb015a000 55 .word 0x3d3fd3a0, 0xafb9691b 56 .word 0xbfc1178e, 0x8227e000 57 .word 0xbd21ef78, 0xce2d07f2 58 .word 0xbfbda727, 0x63844000 59 .word 0xbd1a8940, 0x1fa71733 60 .word 0xbfb9335e, 0x5d594000 61 .word 0xbd23115c, 0x3abd47da 62 .word 0xbfb4d311, 0x5d208000 63 .word 0x3cf53a25, 0x82f4e1ef 64 .word 0xbfb08598, 0xb59e4000 65 .word 0x3d17e5dd, 0x7009902c 66 .word 0xbfa894aa, 0x149f8000 67 .word 0xbd39a19a, 0x8be97661 68 .word 0xbfa0415d, 0x89e78000 69 .word 0x3d3dddc7, 0xf461c516 70 .word 0xbf902056, 0x58930000 71 .word 0xbd3611d2, 0x7c8e8417 72 .word 0x00000000, 0x00000000 73 .word 0x00000000, 0x00000000 74 .word 0x3f9f829b, 0x0e780000 75 .word 0x3d298026, 0x7c7e09e4 76 .word 0x3faf0a30, 0xc0110000 77 .word 0x3d48a998, 0x5f325c5c 78 .word 0x3fb6f0d2, 0x8ae58000 79 .word 0xbd34b464, 0x1b664613 80 .word 0x3fbe2707, 0x6e2b0000 81 .word 0xbd2a342c, 0x2af0003c 82 .word 0x3fc29552, 0xf8200000 83 .word 0xbd35b967, 0xf4471dfc 84 .word 0x3fc5ff30, 0x70a78000 85 .word 0x3d43d3c8, 0x73e20a07 86 .word 0x3fc9525a, 0x9cf44000 87 .word 0x3d46b476, 0x41307539 88 .word 0x3fcc8ff7, 0xc79a8000 89 .word 0x3d4a21ac, 0x25d81ef3 90 .word 0x3fcfb918, 0x6d5e4000 91 .word 0xbd0d572a, 0xab993c87 92 .word 0x3fd1675c, 0xababa000 93 .word 0x3d38380e, 0x731f55c4 94 .word 0x3fd2e8e2, 0xbae12000 95 .word 0xbd267b1e, 0x99b72bd8 96 .word 0x3fd4618b, 0xc21c6000 97 .word 0xbd13d82f, 0x484c84cc 98 .word 0x3fd5d1bd, 0xbf580000 99 .word 0x3d4394a1, 0x1b1c1ee4 100 ! constants: 101 .word 0x40000000,0x00000000 102 .word 0x3fe55555,0x555571da 103 .word 0x3fd99999,0x8702be3a 104 .word 0x3fd24af7,0x3f4569b1 105 .word 0x3ea62e42,0xfee00000 ! scaled by 2**-20 106 .word 0x3caa39ef,0x35793c76 ! scaled by 2**-20 107 .word 0xffff8000,0x00000000 108 .word 0x43200000 109 .word 0xfff00000 110 .word 0xc0194000 111 .word 0x4000 112 113 #define two 0x200 114 #define A1 0x208 115 #define A2 0x210 116 #define A3 0x218 117 #define ln2hi 0x220 118 #define ln2lo 0x228 119 #define mask 0x230 120 #define ox43200000 0x238 121 #define oxfff00000 0x23c 122 #define oxc0194000 0x240 123 #define ox4000 0x244 124 125 ! local storage indices 126 127 #define jnk STACK_BIAS-0x8 128 #define tmp2 STACK_BIAS-0x10 129 #define tmp1 STACK_BIAS-0x18 130 #define tmp0 STACK_BIAS-0x20 131 ! sizeof temp storage - must be a multiple of 16 for V9 132 #define tmps 0x20 133 134 ! register use 135 136 ! i0 n 137 ! i1 x 138 ! i2 stridex 139 ! i3 y 140 ! i4 stridey 141 ! i5 142 143 ! g1 TBL 144 145 ! l0 j0 146 ! l1 j1 147 ! l2 j2 148 ! l3 149 ! l4 0x94000 150 ! l5 151 ! l6 0x000fffff 152 ! l7 0x7ff00000 153 154 ! o0 py0 155 ! o1 py1 156 ! o2 py2 157 ! o3 158 ! o4 159 ! o5 160 ! o7 161 162 ! f0 u0,q0 163 ! f2 v0,(two-v0)-u0,z0 164 ! f4 n0,f0,q0 165 ! f6 s0 166 ! f8 q 167 ! f10 u1,q1 168 ! f12 v1,(two-v1)-u1,z1 169 ! f14 n1,f1,q1 170 ! f16 s1 171 ! f18 t 172 ! f20 u2,q2 173 ! f22 v2,(two-v2)-u2,q2 174 ! f24 n2,f2,q2 175 ! f26 s2 176 ! f28 0xfff00000 177 ! f29 0x43200000 178 ! f30 0x4000 179 ! f31 0xc0194000 180 ! f32 t0 181 ! f34 h0,f0-(c0-h0) 182 ! f36 c0 183 ! f38 A1 184 ! f40 two 185 ! f42 t1 186 ! f44 h1,f1-(c1-h1) 187 ! f46 c1 188 ! f48 A2 189 ! f50 0xffff8000... 190 ! f52 t2 191 ! f54 h2,f2-(c2-h2) 192 ! f56 c2 193 ! f58 A3 194 ! f60 ln2hi 195 ! f62 ln2lo 196 197 ENTRY(__vlog) 198 save %sp,-SA(MINFRAME)-tmps,%sp 199 PIC_SETUP(l7) 200 PIC_SET(l7,TBL,o0) 201 mov %o0,%g1 202 wr %g0,0x82,%asi ! set %asi for non-faulting loads 203 sethi %hi(0x94000),%l4 204 sethi %hi(0x000fffff),%l6 205 or %l6,%lo(0x000fffff),%l6 206 sethi %hi(0x7ff00000),%l7 207 ldd [%g1+two],%f40 208 ldd [%g1+A1],%f38 209 ldd [%g1+A2],%f48 210 ldd [%g1+A3],%f58 211 ldd [%g1+ln2hi],%f60 212 ldd [%g1+ln2lo],%f62 213 ldd [%g1+mask],%f50 214 ld [%g1+ox43200000],%f29 215 ld [%g1+oxfff00000],%f28 216 ld [%g1+oxc0194000],%f31 217 ld [%g1+ox4000],%f30 218 sll %i2,3,%i2 ! scale strides 219 sll %i4,3,%i4 220 add %fp,jnk,%o0 ! precondition loop 221 add %fp,jnk,%o1 222 add %fp,jnk,%o2 223 fzero %f2 224 fzero %f6 225 fzero %f18 226 fzero %f36 227 fzero %f12 228 fzero %f14 229 fzero %f16 230 fzero %f42 231 fzero %f44 232 fzero %f46 233 std %f46,[%fp+tmp1] 234 fzero %f24 235 fzero %f26 236 fzero %f52 237 fzero %f54 238 std %f54,[%fp+tmp2] 239 sub %i3,%i4,%i3 240 ld [%i1],%l0 ! ix 241 ld [%i1],%f0 ! u.l[0] = *x 242 ba .loop0 243 ld [%i1+4],%f1 ! u.l[1] = *(1+x) 244 245 .align 16 246 ! -- 16 byte aligned 247 .loop0: 248 sub %l0,%l7,%o3 249 sub %l6,%l0,%o4 250 fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000 251 fmuld %f6,%f2,%f8 ! (previous iteration) 252 253 andcc %o3,%o4,%o4 254 bge,pn %icc,.range0 ! ix <= 0x000fffff or >= 0x7ff00000 255 ! delay slot 256 fands %f4,%f28,%f4 257 258 add %i1,%i2,%i1 ! x += stridex 259 add %i3,%i4,%i3 ! y += stridey 260 fpsub32s %f0,%f4,%f0 ! u.l[0] -= n 261 262 .cont0: 263 lda [%i1]%asi,%l1 ! preload next argument 264 add %l0,%l4,%l0 ! j = ix + 0x94000 265 fpadd32s %f0,%f30,%f2 ! v.l[0] = u.l[0] + 0x4000 266 267 lda [%i1]%asi,%f10 268 srl %l0,11,%l0 ! j = (j >> 11) & 0x1f0 269 fand %f2,%f50,%f2 ! v.l &= 0xffff8000... 270 271 lda [%i1+4]%asi,%f11 272 and %l0,0x1f0,%l0 273 fitod %f4,%f32 ! (double) n 274 275 add %l0,8,%l3 276 fsubd %f0,%f2,%f4 ! f = u.d - v.d 277 278 faddd %f0,%f2,%f6 ! s = f / (u.d + v.d) 279 280 fsubd %f40,%f2,%f2 ! two - v.d 281 fmuld %f32,%f60,%f34 ! h = n * ln2hi + TBL[j] 282 283 faddd %f8,%f18,%f8 ! y = c + (t + q) 284 fmuld %f32,%f62,%f32 ! t = n * ln2lo + TBL[j+1] 285 286 fdivd %f4,%f6,%f6 287 288 faddd %f54,%f24,%f56 ! c = h + f 289 fmuld %f26,%f26,%f22 ! z = s * s 290 291 faddd %f8,%f36,%f8 292 st %f8,[%o0] 293 294 st %f9,[%o0+4] 295 mov %i3,%o0 296 faddd %f14,%f38,%f14 297 298 fsubd %f56,%f54,%f54 ! t += f - (c - h) 299 fmuld %f22,%f58,%f20 ! q = ... 300 301 fsubd %f2,%f0,%f2 ! (two - v.d) - u.d 302 ldd [%g1+%l0],%f36 303 304 faddd %f42,%f44,%f18 305 fmuld %f12,%f14,%f14 306 ldd [%fp+tmp1],%f12 307 308 faddd %f20,%f48,%f20 309 nop 310 311 faddd %f34,%f36,%f34 312 ldd [%g1+%l3],%f0 313 314 faddd %f14,%f12,%f12 315 316 fsubd %f24,%f54,%f54 317 fmuld %f22,%f20,%f24 318 319 std %f2,[%fp+tmp0] 320 addcc %i0,-1,%i0 321 ble,pn %icc,.endloop0 322 ! delay slot 323 faddd %f32,%f0,%f32 324 325 ! -- 16 byte aligned 326 .loop1: 327 sub %l1,%l7,%o3 328 sub %l6,%l1,%o4 329 fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000 330 fmuld %f16,%f12,%f8 ! (previous iteration) 331 332 andcc %o3,%o4,%o4 333 bge,pn %icc,.range1 ! ix <= 0x000fffff or >= 0x7ff00000 334 ! delay slot 335 fands %f14,%f28,%f14 336 337 add %i1,%i2,%i1 ! x += stridex 338 add %i3,%i4,%i3 ! y += stridey 339 fpsub32s %f10,%f14,%f10 ! u.l[0] -= n 340 341 .cont1: 342 lda [%i1]%asi,%l2 ! preload next argument 343 add %l1,%l4,%l1 ! j = ix + 0x94000 344 fpadd32s %f10,%f30,%f12 ! v.l[0] = u.l[0] + 0x4000 345 346 lda [%i1]%asi,%f20 347 srl %l1,11,%l1 ! j = (j >> 11) & 0x1f0 348 fand %f12,%f50,%f12 ! v.l &= 0xffff8000... 349 350 lda [%i1+4]%asi,%f21 351 and %l1,0x1f0,%l1 352 fitod %f14,%f42 ! (double) n 353 354 add %l1,8,%l3 355 fsubd %f10,%f12,%f14 ! f = u.d - v.d 356 357 faddd %f10,%f12,%f16 ! s = f / (u.d + v.d) 358 359 fsubd %f40,%f12,%f12 ! two - v.d 360 fmuld %f42,%f60,%f44 ! h = n * ln2hi + TBL[j] 361 362 faddd %f8,%f18,%f8 ! y = c + (t + q) 363 fmuld %f42,%f62,%f42 ! t = n * ln2lo + TBL[j+1] 364 365 fdivd %f14,%f16,%f16 366 367 faddd %f34,%f4,%f36 ! c = h + f 368 fmuld %f6,%f6,%f2 ! z = s * s 369 370 faddd %f8,%f46,%f8 371 st %f8,[%o1] 372 373 st %f9,[%o1+4] 374 mov %i3,%o1 375 faddd %f24,%f38,%f24 376 377 fsubd %f36,%f34,%f34 ! t += f - (c - h) 378 fmuld %f2,%f58,%f0 ! q = ... 379 380 fsubd %f12,%f10,%f12 ! (two - v.d) - u.d 381 ldd [%g1+%l1],%f46 382 383 faddd %f52,%f54,%f18 384 fmuld %f22,%f24,%f24 385 ldd [%fp+tmp2],%f22 386 387 faddd %f0,%f48,%f0 388 nop 389 390 faddd %f44,%f46,%f44 391 ldd [%g1+%l3],%f10 392 393 faddd %f24,%f22,%f22 394 395 fsubd %f4,%f34,%f34 396 fmuld %f2,%f0,%f4 397 398 std %f12,[%fp+tmp1] 399 addcc %i0,-1,%i0 400 ble,pn %icc,.endloop1 401 ! delay slot 402 faddd %f42,%f10,%f42 403 404 ! -- 16 byte aligned 405 .loop2: 406 sub %l2,%l7,%o3 407 sub %l6,%l2,%o4 408 fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000 409 fmuld %f26,%f22,%f8 ! (previous iteration) 410 411 andcc %o3,%o4,%o4 412 bge,pn %icc,.range2 ! ix <= 0x000fffff or >= 0x7ff00000 413 ! delay slot 414 fands %f24,%f28,%f24 415 416 add %i1,%i2,%i1 ! x += stridex 417 add %i3,%i4,%i3 ! y += stridey 418 fpsub32s %f20,%f24,%f20 ! u.l[0] -= n 419 420 .cont2: 421 lda [%i1]%asi,%l0 ! preload next argument 422 add %l2,%l4,%l2 ! j = ix + 0x94000 423 fpadd32s %f20,%f30,%f22 ! v.l[0] = u.l[0] + 0x4000 424 425 lda [%i1]%asi,%f0 426 srl %l2,11,%l2 ! j = (j >> 11) & 0x1f0 427 fand %f22,%f50,%f22 ! v.l &= 0xffff8000... 428 429 lda [%i1+4]%asi,%f1 430 and %l2,0x1f0,%l2 431 fitod %f24,%f52 ! (double) n 432 433 add %l2,8,%l3 434 fsubd %f20,%f22,%f24 ! f = u.d - v.d 435 436 faddd %f20,%f22,%f26 ! s = f / (u.d + v.d) 437 438 fsubd %f40,%f22,%f22 ! two - v.d 439 fmuld %f52,%f60,%f54 ! h = n * ln2hi + TBL[j] 440 441 faddd %f8,%f18,%f8 ! y = c + (t + q) 442 fmuld %f52,%f62,%f52 ! t = n * ln2lo + TBL[j+1] 443 444 fdivd %f24,%f26,%f26 445 446 faddd %f44,%f14,%f46 ! c = h + f 447 fmuld %f16,%f16,%f12 ! z = s * s 448 449 faddd %f8,%f56,%f8 450 st %f8,[%o2] 451 452 st %f9,[%o2+4] 453 mov %i3,%o2 454 faddd %f4,%f38,%f4 455 456 fsubd %f46,%f44,%f44 ! t += f - (c - h) 457 fmuld %f12,%f58,%f10 ! q = ... 458 459 fsubd %f22,%f20,%f22 ! (two - v.d) - u.d 460 ldd [%g1+%l2],%f56 461 462 faddd %f32,%f34,%f18 463 fmuld %f2,%f4,%f4 464 ldd [%fp+tmp0],%f2 465 466 faddd %f10,%f48,%f10 467 nop 468 469 faddd %f54,%f56,%f54 470 ldd [%g1+%l3],%f20 471 472 faddd %f4,%f2,%f2 473 474 fsubd %f14,%f44,%f44 475 fmuld %f12,%f10,%f14 476 477 std %f22,[%fp+tmp2] 478 addcc %i0,-1,%i0 479 bg,pt %icc,.loop0 480 ! delay slot 481 faddd %f52,%f20,%f52 482 483 484 ! Once we get to the last element, we loop three more times to finish 485 ! the computations in progress. This means we will load past the end 486 ! of the argument vector, but since we use non-faulting loads and never 487 ! use the data, the only potential problem is cache miss. (Note that 488 ! when the argument is 2, the only exception that occurs in the compu- 489 ! tation is an inexact result in the final addition, and we break out 490 ! of the "extra" iterations before then.) 491 .endloop2: 492 sethi %hi(0x40000000),%l0 ! "next argument" = two 493 cmp %i0,-3 494 bg,a,pt %icc,.loop0 495 ! delay slot 496 fmovd %f40,%f0 497 ret 498 restore 499 500 .align 16 501 .endloop0: 502 sethi %hi(0x40000000),%l1 ! "next argument" = two 503 cmp %i0,-3 504 bg,a,pt %icc,.loop1 505 ! delay slot 506 fmovd %f40,%f10 507 ret 508 restore 509 510 .align 16 511 .endloop1: 512 sethi %hi(0x40000000),%l2 ! "next argument" = two 513 cmp %i0,-3 514 bg,a,pt %icc,.loop2 515 ! delay slot 516 fmovd %f40,%f20 517 ret 518 restore 519 520 521 .align 16 522 .range0: 523 cmp %l0,%l7 524 bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 525 ! delay slot 526 ld [%i1+4],%o5 527 fxtod %f0,%f0 ! scale by 2**1074 w/o trapping 528 st %f0,[%fp+tmp0] 529 add %i1,%i2,%i1 ! x += stridex 530 orcc %l0,%o5,%g0 531 be,pn %icc,1f ! if x == 0 532 ! delay slot 533 add %i3,%i4,%i3 ! y += stridey 534 fpadd32s %f0,%f31,%f4 ! n = (ix + 0xc0194000) & 0xfff00000 535 fands %f4,%f28,%f4 536 fpsub32s %f0,%f4,%f0 ! u.l[0] -= n 537 ld [%fp+tmp0],%l0 538 ba,pt %icc,.cont0 539 ! delay slot 540 fpsub32s %f4,%f29,%f4 ! n -= 0x43200000 541 1: 542 fdivs %f29,%f1,%f4 ! raise div-by-zero 543 ba,pt %icc,3f 544 ! delay slot 545 st %f28,[%i3] ! store -inf 546 2: 547 sll %l0,1,%l0 ! lop off sign bit 548 add %i1,%i2,%i1 ! x += stridex 549 orcc %l0,%o5,%g0 550 be,pn %icc,1b ! if x == -0 551 ! delay slot 552 add %i3,%i4,%i3 ! y += stridey 553 fabsd %f0,%f4 ! *y = (x + |x|) * inf 554 faddd %f0,%f4,%f0 555 fand %f28,%f50,%f4 556 fnegd %f4,%f4 557 fmuld %f0,%f4,%f0 558 st %f0,[%i3] 559 3: 560 addcc %i0,-1,%i0 561 ble,pn %icc,.endloop2 562 ! delay slot 563 st %f1,[%i3+4] 564 ld [%i1],%l0 ! get next argument 565 ld [%i1],%f0 566 ba,pt %icc,.loop0 567 ! delay slot 568 ld [%i1+4],%f1 569 570 571 .align 16 572 .range1: 573 cmp %l1,%l7 574 bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 575 ! delay slot 576 ld [%i1+4],%o5 577 fxtod %f10,%f10 ! scale by 2**1074 w/o trapping 578 st %f10,[%fp+tmp1] 579 add %i1,%i2,%i1 ! x += stridex 580 orcc %l1,%o5,%g0 581 be,pn %icc,1f ! if x == 0 582 ! delay slot 583 add %i3,%i4,%i3 ! y += stridey 584 fpadd32s %f10,%f31,%f14 ! n = (ix + 0xc0194000) & 0xfff00000 585 fands %f14,%f28,%f14 586 fpsub32s %f10,%f14,%f10 ! u.l[0] -= n 587 ld [%fp+tmp1],%l1 588 ba,pt %icc,.cont1 589 ! delay slot 590 fpsub32s %f14,%f29,%f14 ! n -= 0x43200000 591 1: 592 fdivs %f29,%f11,%f14 ! raise div-by-zero 593 ba,pt %icc,3f 594 ! delay slot 595 st %f28,[%i3] ! store -inf 596 2: 597 sll %l1,1,%l1 ! lop off sign bit 598 add %i1,%i2,%i1 ! x += stridex 599 orcc %l1,%o5,%g0 600 be,pn %icc,1b ! if x == -0 601 ! delay slot 602 add %i3,%i4,%i3 ! y += stridey 603 fabsd %f10,%f14 ! *y = (x + |x|) * inf 604 faddd %f10,%f14,%f10 605 fand %f28,%f50,%f14 606 fnegd %f14,%f14 607 fmuld %f10,%f14,%f10 608 st %f10,[%i3] 609 3: 610 addcc %i0,-1,%i0 611 ble,pn %icc,.endloop0 612 ! delay slot 613 st %f11,[%i3+4] 614 ld [%i1],%l1 ! get next argument 615 ld [%i1],%f10 616 ba,pt %icc,.loop1 617 ! delay slot 618 ld [%i1+4],%f11 619 620 621 .align 16 622 .range2: 623 cmp %l2,%l7 624 bgeu,pn %icc,2f ! if (unsigned) ix >= 0x7ff00000 625 ! delay slot 626 ld [%i1+4],%o5 627 fxtod %f20,%f20 ! scale by 2**1074 w/o trapping 628 st %f20,[%fp+tmp2] 629 add %i1,%i2,%i1 ! x += stridex 630 orcc %l2,%o5,%g0 631 be,pn %icc,1f ! if x == 0 632 ! delay slot 633 add %i3,%i4,%i3 ! y += stridey 634 fpadd32s %f20,%f31,%f24 ! n = (ix + 0xc0194000) & 0xfff00000 635 fands %f24,%f28,%f24 636 fpsub32s %f20,%f24,%f20 ! u.l[0] -= n 637 ld [%fp+tmp2],%l2 638 ba,pt %icc,.cont2 639 ! delay slot 640 fpsub32s %f24,%f29,%f24 ! n -= 0x43200000 641 1: 642 fdivs %f29,%f21,%f24 ! raise div-by-zero 643 ba,pt %icc,3f 644 ! delay slot 645 st %f28,[%i3] ! store -inf 646 2: 647 sll %l2,1,%l2 ! lop off sign bit 648 add %i1,%i2,%i1 ! x += stridex 649 orcc %l2,%o5,%g0 650 be,pn %icc,1b ! if x == -0 651 ! delay slot 652 add %i3,%i4,%i3 ! y += stridey 653 fabsd %f20,%f24 ! *y = (x + |x|) * inf 654 faddd %f20,%f24,%f20 655 fand %f28,%f50,%f24 656 fnegd %f24,%f24 657 fmuld %f20,%f24,%f20 658 st %f20,[%i3] 659 3: 660 addcc %i0,-1,%i0 661 ble,pn %icc,.endloop1 662 ! delay slot 663 st %f21,[%i3+4] 664 ld [%i1],%l2 ! get next argument 665 ld [%i1],%f20 666 ba,pt %icc,.loop2 667 ! delay slot 668 ld [%i1+4],%f21 669 670 SET_SIZE(__vlog) 671