1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 /* 30 * __vexp: double precision vector exp 31 * 32 * Algorithm: 33 * 34 * Write x = (k + j/256)ln2 + r, where k and j are integers, j >= 0, 35 * and |r| <= ln2/512. Then exp(x) = 2^k * 2^(j/256) * exp(r). 36 * Compute exp(r) by a polynomial approximation exp(r) ~ 1 + p(r) 37 * where p(r) := r*(1+r*(B1+r*(B2+r*B3))). From a table, obtain 38 * h and l such that h ~ 2^(j/256) to double precision and h+l 39 * ~ 2^(j/256) to well more than double precision. Then exp(x) 40 * ~ 2^k * (h + (l + h * p(r))) to about double precision. Note 41 * that the multiplication by 2^k requires some finagling when 42 * the result might be subnormal. 43 * 44 * Accuracy: 45 * 46 * For normal results, the largest error observed is less than 47 * 0.6 ulps. For subnormal results, the largest error observed 48 * is 0.737 ulps. 49 */ 50 51 #include <sys/isa_defs.h> 52 53 #ifdef _LITTLE_ENDIAN 54 #define HI(x) *(1+(int *)&x) 55 #define LO(x) *(unsigned *)&x 56 #define DBLWORD(x, y) y, x 57 #else 58 #define HI(x) *(int *)&x 59 #define LO(x) *(1+(unsigned *)&x) 60 #define DBLWORD(x, y) x, y 61 #endif 62 63 #ifdef __RESTRICT 64 #define restrict _Restrict 65 #else 66 #define restrict 67 #endif 68 69 static const double TBL[] = { 70 1.00000000000000000000e+00, 0.00000000000000000000e+00, 71 1.00271127505020252180e+00, -3.63661592869226394432e-17, 72 1.00542990111280272636e+00, 9.49918653545503175702e-17, 73 1.00815589811841754830e+00, -3.25205875608430806089e-17, 74 1.01088928605170047526e+00, -1.52347786033685771763e-17, 75 1.01363008495148942956e+00, 9.28359976818356758749e-18, 76 1.01637831491095309566e+00, -5.77217007319966002766e-17, 77 1.01913399607773791367e+00, 3.60190498225966110587e-17, 78 1.02189714865411662714e+00, 5.10922502897344389359e-17, 79 1.02466779289713572076e+00, -7.56160786848777820704e-17, 80 1.02744594911876374610e+00, -4.95607417464536982418e-17, 81 1.03023163768604097967e+00, 3.31983004108081294377e-17, 82 1.03302487902122841490e+00, 7.60083887402708848935e-18, 83 1.03582569360195719810e+00, -7.80678239133763616702e-17, 84 1.03863410196137873065e+00, 5.99627378885251061843e-17, 85 1.04145012468831610342e+00, 3.78483048028757620966e-17, 86 1.04427378242741375480e+00, 8.55188970553796365958e-17, 87 1.04710509587928979336e+00, 7.27707724310431474861e-17, 88 1.04994408580068721015e+00, 5.59293784812700258637e-17, 89 1.05279077300462642341e+00, -9.62948289902693573942e-17, 90 1.05564517836055715705e+00, 1.75932573877209198414e-18, 91 1.05850732279451276163e+00, -7.15265185663778073796e-17, 92 1.06137722728926209292e+00, -1.19735370853656575649e-17, 93 1.06425491288446449900e+00, 5.07875419861123039357e-17, 94 1.06714040067682369717e+00, -7.89985396684158212226e-17, 95 1.07003371182024187291e+00, -9.93716271128891938112e-17, 96 1.07293486752597555522e+00, -3.83966884335882380671e-18, 97 1.07584388906279104781e+00, -1.00027161511441361125e-17, 98 1.07876079775711986031e+00, -6.65666043605659260344e-17, 99 1.08168561499321524977e+00, -4.78262390299708626556e-17, 100 1.08461836221330920615e+00, 3.16615284581634611576e-17, 101 1.08755906091776965994e+00, 5.40934930782029075923e-18, 102 1.09050773266525768967e+00, -3.04678207981247114697e-17, 103 1.09346439907288583981e+00, 1.44139581472692093420e-17, 104 1.09642908181637688259e+00, -5.91993348444931582405e-17, 105 1.09940180263022191376e+00, 7.17045959970192322483e-17, 106 1.10238258330784089090e+00, 5.26603687157069438656e-17, 107 1.10537144570174117320e+00, 8.23928876050021358995e-17, 108 1.10836841172367872588e+00, -8.78681384518052661558e-17, 109 1.11137350334481754821e+00, 5.56394502666969764311e-17, 110 1.11438674259589243221e+00, 1.04102784568455709549e-16, 111 1.11740815156736927882e+00, -7.97680590262822045601e-17, 112 1.12043775240960674644e+00, -6.20108590655417874998e-17, 113 1.12347556733301989773e+00, -9.69973758898704299544e-17, 114 1.12652161860824184814e+00, 5.16585675879545612073e-17, 115 1.12957592856628807887e+00, 6.71280585872625658758e-17, 116 1.13263851959871919561e+00, 3.23735616673800026374e-17, 117 1.13570941415780546357e+00, 5.06659992612615524241e-17, 118 1.13878863475669156458e+00, 8.91281267602540777782e-17, 119 1.14187620396956157620e+00, 4.65109117753141238741e-17, 120 1.14497214443180417298e+00, 4.64128989217001065651e-17, 121 1.14807647884017893780e+00, 6.89774023662719177044e-17, 122 1.15118922995298267331e+00, 3.25071021886382721198e-17, 123 1.15431042059021593538e+00, 1.04171289462732661865e-16, 124 1.15744007363375112085e+00, -9.12387123113440028710e-17, 125 1.16057821202749877898e+00, -3.26104020541739310553e-17, 126 1.16372485877757747552e+00, 3.82920483692409349872e-17, 127 1.16688003695248165847e+00, -8.79187957999916974198e-17, 128 1.17004376968325018993e+00, -1.84774420179000469438e-18, 129 1.17321608016363732041e+00, -7.28756258658499447915e-17, 130 1.17639699165028122074e+00, 5.55420325421807896277e-17, 131 1.17958652746287584456e+00, 1.00923127751003904354e-16, 132 1.18278471098434101449e+00, 1.54297543007907605845e-17, 133 1.18599156566099384058e+00, -9.20950683529310590495e-18, 134 1.18920711500272102690e+00, 3.98201523146564611098e-17, 135 1.19243138258315117817e+00, 4.39755141560972082715e-17, 136 1.19566439203982732842e+00, 4.61660367048148139743e-17, 137 1.19890616707438057986e+00, -9.80919335600842311848e-17, 138 1.20215673145270307565e+00, 6.64498149925230124489e-17, 139 1.20541610900512385918e+00, -3.35727219326752963448e-17, 140 1.20868432362658162482e+00, -4.74672594522898409739e-17, 141 1.21196139927680124337e+00, -4.89061107752111835732e-17, 142 1.21524735998046895524e+00, -7.71263069268148813091e-17, 143 1.21854222982740845183e+00, -9.00672695836383767487e-17, 144 1.22184603297275762301e+00, -1.06110212114026911612e-16, 145 1.22515879363714552674e+00, -8.90353381426998342947e-17, 146 1.22848053610687002468e+00, -1.89878163130252995312e-17, 147 1.23181128473407586199e+00, 7.38938247161005024655e-17, 148 1.23515106393693341325e+00, -1.07552443443078413783e-16, 149 1.23849989819981654016e+00, 2.76770205557396742995e-17, 150 1.24185781207348400201e+00, 4.65802759183693679123e-17, 151 1.24522483017525797955e+00, -4.67724044984672750044e-17, 152 1.24860097718920481924e+00, -8.26181099902196355046e-17, 153 1.25198627786631622172e+00, 4.83416715246989759959e-17, 154 1.25538075702469109629e+00, -6.71138982129687841853e-18, 155 1.25878443954971652730e+00, -8.42178258773059935677e-17, 156 1.26219735039425073886e+00, -3.08446488747384584900e-17, 157 1.26561951457880628169e+00, 4.25057700345086802072e-17, 158 1.26905095719173321989e+00, 2.66793213134218609523e-18, 159 1.27249170338940276181e+00, -1.05779162672124210291e-17, 160 1.27594177839639200123e+00, 9.91543024421429032951e-17, 161 1.27940120750566932450e+00, -9.75909500835606221035e-17, 162 1.28287001607877826359e+00, 1.71359491824356096814e-17, 163 1.28634822954602556777e+00, -3.41695570693618197638e-17, 164 1.28983587340666572274e+00, 8.94925753089759172195e-17, 165 1.29333297322908946647e+00, -2.97459044313275164581e-17, 166 1.29683955465100964055e+00, 2.53825027948883149593e-17, 167 1.30035564337965059423e+00, 5.67872810280221742200e-17, 168 1.30388126519193581210e+00, 8.64767559826787117946e-17, 169 1.30741644593467731816e+00, -7.33664565287886889230e-17, 170 1.31096121152476441374e+00, -7.18153613551945385697e-17, 171 1.31451558794935463581e+00, 2.26754331510458564505e-17, 172 1.31807960126606404927e+00, -5.45795582714915288619e-17, 173 1.32165327760315753913e+00, -2.48063824591302174150e-17, 174 1.32523664315974132322e+00, -2.85873121003886075697e-17, 175 1.32882972420595435459e+00, 4.08908622391016005195e-17, 176 1.33243254708316150037e+00, -5.10158663091674334319e-17, 177 1.33604513820414583236e+00, -5.89186635638880135250e-17, 178 1.33966752405330291609e+00, 8.92728259483173198426e-17, 179 1.34329973118683532185e+00, -5.80258089020143775130e-17, 180 1.34694178623294580355e+00, 3.22406510125467916913e-17, 181 1.35059371589203447428e+00, -8.28711038146241653260e-17, 182 1.35425554693689265129e+00, 7.70094837980298946162e-17, 183 1.35792730621290114179e+00, -9.52963574482518886709e-17, 184 1.36160902063822475405e+00, 1.53378766127066804593e-18, 185 1.36530071720401191548e+00, -1.00053631259747639350e-16, 186 1.36900242297459051599e+00, 9.59379791911884877256e-17, 187 1.37271416508766841424e+00, -4.49596059523484126201e-17, 188 1.37643597075453016920e+00, -6.89858893587180104162e-17, 189 1.38016786726023799048e+00, 1.05103145799699839462e-16, 190 1.38390988196383202258e+00, -6.77051165879478628716e-17, 191 1.38766204229852907481e+00, 8.42298427487541531762e-17, 192 1.39142437577192623621e+00, -4.90617486528898870821e-17, 193 1.39519690996620027157e+00, -9.32933622422549531960e-17, 194 1.39897967253831123635e+00, -9.61421320905132307233e-17, 195 1.40277269122020475933e+00, -5.29578324940798922316e-17, 196 1.40657599381901543545e+00, 7.03491481213642218800e-18, 197 1.41038960821727066275e+00, 4.16654872843506164270e-17, 198 1.41421356237309514547e+00, -9.66729331345291345105e-17, 199 1.41804788432041517510e+00, 2.27443854218552945230e-17, 200 1.42189260216916557589e+00, -1.60778289158902441338e-17, 201 1.42574774410549420800e+00, 9.88069075850060728430e-17, 202 1.42961333839197002327e+00, -1.20316424890536551792e-17, 203 1.43348941336778890054e+00, -5.80245424392682610310e-17, 204 1.43737599744898236764e+00, -4.20403401646755661225e-17, 205 1.44127311912862565713e+00, 5.60250365087898567501e-18, 206 1.44518080697704665027e+00, -3.02375813499398731940e-17, 207 1.44909908964203504311e+00, -6.25940500081930925441e-17, 208 1.45302799584905262265e+00, -5.77994860939610610226e-17, 209 1.45696755440144376514e+00, 5.64867945387699814049e-17, 210 1.46091779418064704466e+00, -5.60037718607521580013e-17, 211 1.46487874414640573129e+00, 9.53076754358715731900e-17, 212 1.46885043333698184220e+00, 8.46588275653362637570e-17, 213 1.47283289086936752810e+00, 6.69177408194058937165e-17, 214 1.47682614593949934623e+00, -3.48399455689279579579e-17, 215 1.48083022782247186733e+00, -9.68695210263061857841e-17, 216 1.48484516587275239274e+00, 1.07800867644074807559e-16, 217 1.48887098952439700383e+00, 6.15536715774287133031e-17, 218 1.49290772829126483501e+00, 1.41929201542840357707e-17, 219 1.49695541176723545540e+00, -2.86166325389915821109e-17, 220 1.50101406962642558440e+00, -6.41376727579023503859e-17, 221 1.50508373162340647333e+00, 7.07471061358284636429e-17, 222 1.50916442759342284141e+00, -1.01645532775429503911e-16, 223 1.51325618745260981335e+00, 8.88449785133871209093e-17, 224 1.51735904119821474190e+00, -4.30869947204334080070e-17, 225 1.52147301890881458952e+00, -5.99638767594568341985e-18, 226 1.52559815074453819506e+00, 1.11795187801605698722e-16, 227 1.52973446694728698603e+00, 3.78579211515721903683e-17, 228 1.53388199784095591305e+00, 8.87522684443844614135e-17, 229 1.53804077383165682669e+00, 1.01746723511613580618e-16, 230 1.54221082540794074411e+00, 7.94983480969762085616e-17, 231 1.54639218314102144802e+00, 1.06839600056572198028e-16, 232 1.55058487768499997372e+00, -1.46007065906893851791e-17, 233 1.55478893977708865215e+00, -8.00316135011603564104e-17, 234 1.55900440023783692922e+00, 3.78120705335752750188e-17, 235 1.56323128997135762930e+00, 7.48477764559073438896e-17, 236 1.56746963996555299659e+00, -1.03520617688497219883e-16, 237 1.57171948129234140268e+00, -3.34298400468720006928e-17, 238 1.57598084510788649659e+00, -1.01369164712783039808e-17, 239 1.58025376265282457844e+00, -5.16340292955446806159e-17, 240 1.58453826525249374946e+00, -1.93377170345857029304e-17, 241 1.58883438431716395023e+00, -5.99495011882447940052e-18, 242 1.59314215134226699888e+00, -1.00944065423119624890e-16, 243 1.59746159790862707339e+00, 2.48683927962209992069e-17, 244 1.60179275568269341434e+00, -6.05491745352778434252e-17, 245 1.60613565641677102924e+00, -1.03545452880599952591e-16, 246 1.61049033194925428347e+00, 2.47071925697978878522e-17, 247 1.61485681420486071325e+00, -7.31666339912512326264e-17, 248 1.61923513519486372836e+00, 2.09413341542290924068e-17, 249 1.62362532701732886764e+00, -3.58451285141447470996e-17, 250 1.62802742185734783398e+00, -6.71295508470708408630e-17, 251 1.63244145198727497181e+00, 9.85281923042999296414e-17, 252 1.63686744976696441078e+00, 7.69832507131987557450e-17, 253 1.64130544764400632118e+00, -9.24756873764070550805e-17, 254 1.64575547815396494578e+00, -1.01256799136747726038e-16, 255 1.65021757392061774183e+00, 9.13327958872990419009e-18, 256 1.65469176765619430114e+00, 9.64329430319602742879e-17, 257 1.65917809216161615815e+00, -7.27554555082304942180e-17, 258 1.66367658032673637614e+00, 5.89099269671309967045e-17, 259 1.66818726513058246397e+00, 4.26917801957061447430e-17, 260 1.67271017964159662839e+00, -5.47671596459956307616e-17, 261 1.67724535701787846875e+00, 8.30394950995073155275e-17, 262 1.68179283050742900407e+00, 8.19901002058149652013e-17, 263 1.68635263344839336774e+00, -7.18146327835800944212e-17, 264 1.69092479926930527867e+00, -9.66967147439488016590e-17, 265 1.69550936148933262260e+00, 7.23841687284516664081e-17, 266 1.70010635371852347753e+00, -8.02371937039770024589e-18, 267 1.70471580965805125096e+00, -2.72888328479728156257e-17, 268 1.70933776310046292579e+00, -9.86877945663293107628e-17, 269 1.71397224792992597386e+00, 6.47397510775336706412e-17, 270 1.71861929812247793414e+00, -1.85138041826311098821e-17, 271 1.72327894774627399244e+00, -9.52212380039379996275e-17, 272 1.72795123096183766975e+00, -1.07509818612046424459e-16, 273 1.73263618202231106658e+00, -1.69805107431541549407e-18, 274 1.73733383527370621735e+00, 3.16438929929295694659e-17, 275 1.74204422515515644498e+00, -1.52595911895078879236e-18, 276 1.74676738619916904760e+00, -1.07522904835075145042e-16, 277 1.75150335303187820735e+00, -5.12445042059672465939e-17, 278 1.75625216037329945351e+00, 2.96014069544887330703e-17, 279 1.76101384303758390359e+00, -7.94325312503922771057e-17, 280 1.76578843593327272643e+00, 9.46131501808326786660e-17, 281 1.77057597406355471392e+00, 5.96179451004055584767e-17, 282 1.77537649252652118825e+00, 6.42973179655657203396e-17, 283 1.78019002651542446181e+00, -5.28462728909161736517e-17, 284 1.78501661131893496481e+00, 1.53304001210313138184e-17, 285 1.78985628232140103755e+00, -4.15435466068334977098e-17, 286 1.79470907500310716820e+00, 1.82274584279120867698e-17, 287 1.79957502494053511732e+00, -2.52688923335889795224e-17, 288 1.80445416780662393208e+00, -5.17722240879331788328e-17, 289 1.80934653937103195886e+00, -9.03264140245002968190e-17, 290 1.81425217550039885595e+00, -9.96953153892034881983e-17, 291 1.81917111215860849427e+00, 7.40267690114583888997e-17, 292 1.82410338540705341259e+00, -1.01596278622770830650e-16, 293 1.82904903140489727420e+00, 6.88919290883569563697e-17, 294 1.83400808640934243066e+00, 3.28310722424562658722e-17, 295 1.83898058677589371079e+00, 6.91896974027251194233e-18, 296 1.84396656895862598446e+00, -5.93974202694996455028e-17, 297 1.84896606951045083811e+00, 9.02758044626108928816e-17, 298 1.85397912508338547077e+00, 9.76188749072759353840e-17, 299 1.85900577242882047990e+00, -9.52870546198994068663e-17, 300 1.86404604839778897940e+00, 6.54091268062057047791e-17, 301 1.86909998994123860427e+00, -9.93850521425506708290e-17, 302 1.87416763411029996256e+00, -6.12276341300414256164e-17, 303 1.87924901805656019427e+00, -1.62263155578358447799e-17, 304 1.88434417903233453195e+00, -8.22659312553371090551e-17, 305 1.88945315439093919352e+00, -9.00516828505912548531e-17, 306 1.89457598158696560731e+00, 3.40340353521652967060e-17, 307 1.89971269817655530332e+00, -3.85973976937851370678e-17, 308 1.90486334181767413831e+00, 6.53385751471827862895e-17, 309 1.91002795027038985154e+00, -5.90968800674406023686e-17, 310 1.91520656139714740007e+00, -1.06199460561959626376e-16, 311 1.92039921316304740273e+00, 7.11668154063031418621e-17, 312 1.92560594363612502811e+00, -9.91496376969374092749e-17, 313 1.93082679098762710623e+00, 6.16714970616910955284e-17, 314 1.93606179349229434727e+00, 1.03323859606763257448e-16, 315 1.94131098952864045160e+00, -6.63802989162148798984e-17, 316 1.94657441757923321823e+00, 6.81102234953387718436e-17, 317 1.95185211623097831790e+00, -2.19901696997935108603e-17, 318 1.95714412417540017941e+00, 8.96076779103666776760e-17, 319 1.96245048020892731699e+00, 1.09768440009135469493e-16, 320 1.96777122323317588126e+00, -1.03149280115311315109e-16, 321 1.97310639225523432039e+00, -7.45161786395603748608e-18, 322 1.97845602638795092787e+00, 4.03887531092781665750e-17, 323 1.98382016485021939189e+00, -2.20345441239106265716e-17, 324 1.98919884696726634310e+00, 8.20513263836919941553e-18, 325 1.99459211217094023461e+00, 1.79097103520026450854e-17 326 }; 327 328 static const union { 329 unsigned i[2]; 330 double d; 331 } C[] = { 332 { DBLWORD(0x43380000, 0x00000000) }, 333 { DBLWORD(0x40771547, 0x652b82fe) }, 334 { DBLWORD(0x3f662e42, 0xfee00000) }, 335 { DBLWORD(0x3d6a39ef, 0x35793c76) }, 336 { DBLWORD(0x3ff00000, 0x00000000) }, 337 { DBLWORD(0x3fdfffff, 0xfffffff6) }, 338 { DBLWORD(0x3fc55555, 0x721a1d14) }, 339 { DBLWORD(0x3fa55555, 0x6e0896af) }, 340 { DBLWORD(0x01000000, 0x00000000) }, 341 { DBLWORD(0x7f000000, 0x00000000) }, 342 { DBLWORD(0x40862e42, 0xfefa39ef) }, 343 { DBLWORD(0xc0874910, 0xd52d3051) }, 344 { DBLWORD(0xfff00000, 0x00000000) }, 345 { DBLWORD(0x00000000, 0x00000000) } 346 }; 347 348 #define round C[0].d 349 #define invln2_256 C[1].d 350 #define ln2_256h C[2].d 351 #define ln2_256l C[3].d 352 #define one C[4].d 353 #define B1 C[5].d 354 #define B2 C[6].d 355 #define B3 C[7].d 356 #define tiny C[8].d 357 #define huge C[9].d 358 #define othresh C[10].d 359 #define uthresh C[11].d 360 #define neginf C[12].d 361 #define zero C[13].d 362 363 #define PROCESS(N) \ 364 y##N = (x##N * invln2_256) + round; \ 365 j##N = LO(y##N); \ 366 y##N -= round; \ 367 k##N = j##N >> 8; \ 368 j##N = (j##N & 0xff) << 1; \ 369 x##N = (x##N - y##N * ln2_256h) - y##N * ln2_256l; \ 370 y##N = x##N * (one + x##N * (B1 + x##N * (B2 + x##N * B3))); \ 371 t##N = TBL[j##N]; \ 372 y##N = t##N + (TBL[j##N + 1] + t##N * y##N); \ 373 if (k##N < -1021) { \ 374 HI(y##N) += (k##N + 0x3ef) << 20; \ 375 y##N *= tiny; \ 376 } else { \ 377 HI(y##N) += k##N << 20; \ 378 } \ 379 *y = y##N; \ 380 y += stridey 381 382 #define PREPROCESS(N, index, label) \ 383 hx = HI(x[0]); \ 384 ix = hx & ~0x80000000; \ 385 x##N = *x; \ 386 x += stridex; \ 387 if (ix >= 0x40862e42) { \ 388 if (ix >= 0x7ff00000) { /* x is inf or nan */ \ 389 y[index] = (x##N == neginf)? zero : \ 390 x##N * x##N; \ 391 goto label; \ 392 } \ 393 if (x##N > othresh) { \ 394 y[index] = huge * huge; \ 395 goto label; \ 396 } \ 397 if (x##N < uthresh) { \ 398 y[index] = tiny * tiny; \ 399 goto label; \ 400 } \ 401 } else if (ix < 0x3e300000) { /* |x| < 2^-28 */ \ 402 y[index] = one + x##N; \ 403 goto label; \ 404 } 405 406 void 407 __vexp(int n, double *restrict x, int stridex, double *restrict y, 408 int stridey) 409 { 410 double x0, x1, x2, x3, x4, x5; 411 double y0, y1, y2, y3, y4, y5; 412 double t0, t1, t2, t3, t4, t5; 413 int k0, k1, k2, k3, k4, k5; 414 int j0, j1, j2, j3, j4, j5; 415 int hx, ix; 416 417 y -= stridey; 418 419 for (;;) { 420 begin: 421 if (--n < 0) 422 break; 423 y += stridey; 424 425 PREPROCESS(0, 0, begin); 426 427 if (--n < 0) 428 goto process1; 429 430 PREPROCESS(1, stridey, process1); 431 432 if (--n < 0) 433 goto process2; 434 435 PREPROCESS(2, stridey << 1, process2); 436 437 if (--n < 0) 438 goto process3; 439 440 PREPROCESS(3, (stridey << 1) + stridey, process3); 441 442 if (--n < 0) 443 goto process4; 444 445 PREPROCESS(4, stridey << 2, process4); 446 447 if (--n < 0) 448 goto process5; 449 450 PREPROCESS(5, (stridey << 2) + stridey, process5); 451 452 y0 = (x0 * invln2_256) + round; 453 y1 = (x1 * invln2_256) + round; 454 y2 = (x2 * invln2_256) + round; 455 y3 = (x3 * invln2_256) + round; 456 y4 = (x4 * invln2_256) + round; 457 y5 = (x5 * invln2_256) + round; 458 459 j0 = LO(y0); 460 j1 = LO(y1); 461 j2 = LO(y2); 462 j3 = LO(y3); 463 j4 = LO(y4); 464 j5 = LO(y5); 465 466 y0 -= round; 467 y1 -= round; 468 y2 -= round; 469 y3 -= round; 470 y4 -= round; 471 y5 -= round; 472 473 k0 = j0 >> 8; 474 k1 = j1 >> 8; 475 k2 = j2 >> 8; 476 k3 = j3 >> 8; 477 k4 = j4 >> 8; 478 k5 = j5 >> 8; 479 480 j0 = (j0 & 0xff) << 1; 481 j1 = (j1 & 0xff) << 1; 482 j2 = (j2 & 0xff) << 1; 483 j3 = (j3 & 0xff) << 1; 484 j4 = (j4 & 0xff) << 1; 485 j5 = (j5 & 0xff) << 1; 486 487 x0 = (x0 - y0 * ln2_256h) - y0 * ln2_256l; 488 x1 = (x1 - y1 * ln2_256h) - y1 * ln2_256l; 489 x2 = (x2 - y2 * ln2_256h) - y2 * ln2_256l; 490 x3 = (x3 - y3 * ln2_256h) - y3 * ln2_256l; 491 x4 = (x4 - y4 * ln2_256h) - y4 * ln2_256l; 492 x5 = (x5 - y5 * ln2_256h) - y5 * ln2_256l; 493 494 y0 = x0 * (one + x0 * (B1 + x0 * (B2 + x0 * B3))); 495 y1 = x1 * (one + x1 * (B1 + x1 * (B2 + x1 * B3))); 496 y2 = x2 * (one + x2 * (B1 + x2 * (B2 + x2 * B3))); 497 y3 = x3 * (one + x3 * (B1 + x3 * (B2 + x3 * B3))); 498 y4 = x4 * (one + x4 * (B1 + x4 * (B2 + x4 * B3))); 499 y5 = x5 * (one + x5 * (B1 + x5 * (B2 + x5 * B3))); 500 501 t0 = TBL[j0]; 502 t1 = TBL[j1]; 503 t2 = TBL[j2]; 504 t3 = TBL[j3]; 505 t4 = TBL[j4]; 506 t5 = TBL[j5]; 507 508 y0 = t0 + (TBL[j0 + 1] + t0 * y0); 509 y1 = t1 + (TBL[j1 + 1] + t1 * y1); 510 y2 = t2 + (TBL[j2 + 1] + t2 * y2); 511 y3 = t3 + (TBL[j3 + 1] + t3 * y3); 512 y4 = t4 + (TBL[j4 + 1] + t4 * y4); 513 y5 = t5 + (TBL[j5 + 1] + t5 * y5); 514 515 if (k0 < -1021) { 516 HI(y0) += (k0 + 0x3ef) << 20; 517 y0 *= tiny; 518 } else { 519 HI(y0) += k0 << 20; 520 } 521 if (k1 < -1021) { 522 HI(y1) += (k1 + 0x3ef) << 20; 523 y1 *= tiny; 524 } else { 525 HI(y1) += k1 << 20; 526 } 527 if (k2 < -1021) { 528 HI(y2) += (k2 + 0x3ef) << 20; 529 y2 *= tiny; 530 } else { 531 HI(y2) += k2 << 20; 532 } 533 if (k3 < -1021) { 534 HI(y3) += (k3 + 0x3ef) << 20; 535 y3 *= tiny; 536 } else { 537 HI(y3) += k3 << 20; 538 } 539 if (k4 < -1021) { 540 HI(y4) += (k4 + 0x3ef) << 20; 541 y4 *= tiny; 542 } else { 543 HI(y4) += k4 << 20; 544 } 545 if (k5 < -1021) { 546 HI(y5) += (k5 + 0x3ef) << 20; 547 y5 *= tiny; 548 } else { 549 HI(y5) += k5 << 20; 550 } 551 552 y[0] = y0; 553 y[stridey] = y1; 554 y[stridey << 1] = y2; 555 y[(stridey << 1) + stridey] = y3; 556 y[stridey << 2] = y4; 557 y[(stridey << 2) + stridey] = y5; 558 y += (stridey << 2) + stridey; 559 continue; 560 561 process1: 562 PROCESS(0); 563 continue; 564 565 process2: 566 PROCESS(0); 567 PROCESS(1); 568 continue; 569 570 process3: 571 PROCESS(0); 572 PROCESS(1); 573 PROCESS(2); 574 continue; 575 576 process4: 577 PROCESS(0); 578 PROCESS(1); 579 PROCESS(2); 580 PROCESS(3); 581 continue; 582 583 process5: 584 PROCESS(0); 585 PROCESS(1); 586 PROCESS(2); 587 PROCESS(3); 588 PROCESS(4); 589 } 590 }