Print this page
11210 libm should be cstyle(1ONBLD) clean
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libm/common/m9x/tgamma.c
+++ new/usr/src/lib/libm/common/m9x/tgamma.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 +
25 26 /*
26 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 28 * Use is subject to license terms.
28 29 */
29 30
30 31 #pragma weak __tgamma = tgamma
31 32
32 -/* INDENT OFF */
33 +/* BEGIN CSTYLED */
33 34 /*
34 35 * True gamma function
35 36 * double tgamma(double x)
36 37 *
37 38 * Error:
38 39 * ------
39 - * Less that one ulp for both positive and negative arguments.
40 + * Less that one ulp for both positive and negative arguments.
40 41 *
41 42 * Algorithm:
42 43 * ---------
43 44 * A: For negative argument
44 45 * (1) gamma(-n or -inf) is NaN
45 46 * (2) Underflow Threshold
46 47 * (3) Reduction to gamma(1+x)
47 48 * B: For x between 1 and 2
48 - * C: For x between 0 and 1
49 + * C: For x between 0 and 1
49 50 * D: For x between 2 and 8
50 51 * E: Overflow thresold {see over.c}
51 52 * F: For overflow_threshold >= x >= 8
52 53 *
53 54 * Implementation details
54 55 * -----------------------
55 56 * -pi
56 57 * (A) For negative argument, use gamma(-x) = ------------------------.
57 58 * (sin(pi*x)*gamma(1+x))
58 59 *
59 60 * (1) gamma(-n or -inf) is NaN with invalid signal by SUSv3 spec.
60 61 * (Ideally, gamma(-n) = 1/sinpi(n) = (-1)**(n+1) * inf.)
61 62 *
62 63 * (2) Underflow Threshold. For each precision, there is a value T
63 64 * such that when x>T and when x is not an integer, gamma(-x) will
64 65 * always underflow. A table of the underflow threshold value is given
65 66 * below. For proof, see file "under.c".
66 67 *
67 68 * Precision underflow threshold T =
68 69 * ----------------------------------------------------------------------
69 70 * single 41.000041962 = 41 + 11 ULP
70 71 * (machine format) 4224000B
71 72 * double 183.000000000000312639 = 183 + 11 ULP
72 73 * (machine format) 4066E000 0000000B
73 74 * quad 1774.0000000000000000000000000000017749370 = 1774 + 9 ULP
74 75 * (machine format) 4009BB80000000000000000000000009
75 76 * ----------------------------------------------------------------------
76 77 *
77 78 * (3) Reduction to gamma(1+x).
78 79 * Because of (1) and (2), we need only consider non-integral x
79 80 * such that 0<x<T. Let k = [x] and z = x-[x]. Define
80 81 * sin(x*pi) cos(x*pi)
81 82 * kpsin(x) = --------- and kpcos(x) = --------- . Then
82 83 * pi pi
83 84 * 1
84 85 * gamma(-x) = --------------------.
85 86 * -kpsin(x)*gamma(1+x)
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
86 87 * Since x = k+z,
87 88 * k+1
88 89 * -sin(x*pi) = -sin(k*pi+z*pi) = (-1) *sin(z*pi),
89 90 * k+1
90 91 * we have -kpsin(x) = (-1) * kpsin(z). We can further
91 92 * reduce z to t by
92 93 * (I) t = z when 0.00000 <= z < 0.31830...
93 94 * (II) t = 0.5-z when 0.31830... <= z < 0.681690...
94 95 * (III) t = 1-z when 0.681690... <= z < 1.00000
95 96 * and correspondingly
96 - * (I) kpsin(z) = kpsin(t) ... 0<= z < 0.3184
97 - * (II) kpsin(z) = kpcos(t) ... |t| < 0.182
98 - * (III) kpsin(z) = kpsin(t) ... 0<= t < 0.3184
97 + * (I) kpsin(z) = kpsin(t) ... 0<= z < 0.3184
98 + * (II) kpsin(z) = kpcos(t) ... |t| < 0.182
99 + * (III) kpsin(z) = kpsin(t) ... 0<= t < 0.3184
99 100 *
100 101 * Using a special Remez algorithm, we obtain the following polynomial
101 102 * approximation for kpsin(t) for 0<=t<0.3184:
102 103 *
103 104 * Computation note: in simulating higher precision arithmetic, kcpsin
104 105 * return head = t and tail = ks[0]*t^3 + (...) to maintain extra bits.
105 106 *
106 107 * Quad precision, remez error <= 2**(-129.74)
107 108 * 3 5 27
108 109 * kpsin(t) = t + ks[0] * t + ks[1] * t + ... + ks[12] * t
109 110 *
110 111 * ks[ 0] = -1.64493406684822643647241516664602518705158902870e+0000
111 112 * ks[ 1] = 8.11742425283353643637002772405874238094995726160e-0001
112 113 * ks[ 2] = -1.90751824122084213696472111835337366232282723933e-0001
113 114 * ks[ 3] = 2.61478478176548005046532613563241288115395517084e-0002
114 115 * ks[ 4] = -2.34608103545582363750893072647117829448016479971e-0003
115 116 * ks[ 5] = 1.48428793031071003684606647212534027556262040158e-0004
116 117 * ks[ 6] = -6.97587366165638046518462722252768122615952898698e-0006
117 118 * ks[ 7] = 2.53121740413702536928659271747187500934840057929e-0007
118 119 * ks[ 8] = -7.30471182221385990397683641695766121301933621956e-0009
119 120 * ks[ 9] = 1.71653847451163495739958249695549313987973589884e-0010
120 121 * ks[10] = -3.34813314714560776122245796929054813458341420565e-0012
121 122 * ks[11] = 5.50724992262622033449487808306969135431411753047e-0014
122 123 * ks[12] = -7.67678132753577998601234393215802221104236979928e-0016
123 124 *
124 125 * Double precision, Remez error <= 2**(-62.9)
125 126 * 3 5 15
126 127 * kpsin(t) = t + ks[0] * t + ks[1] * t + ... + ks[6] * t
127 128 *
128 129 * ks[0] = -1.644934066848226406065691 (0x3ffa51a6 625307d3)
129 130 * ks[1] = 8.11742425283341655883668741874008920850698590621e-0001
130 131 * ks[2] = -1.90751824120862873825597279118304943994042258291e-0001
131 132 * ks[3] = 2.61478477632554278317289628332654539353521911570e-0002
132 133 * ks[4] = -2.34607978510202710377617190278735525354347705866e-0003
133 134 * ks[5] = 1.48413292290051695897242899977121846763824221705e-0004
134 135 * ks[6] = -6.87730769637543488108688726777687262485357072242e-0006
135 136 *
136 137 * Single precision, Remez error <= 2**(-34.09)
↓ open down ↓ |
28 lines elided |
↑ open up ↑ |
137 138 * 3 5 9
138 139 * kpsin(t) = t + ks[0] * t + ks[1] * t + ... + ks[3] * t
139 140 *
140 141 * ks[0] = -1.64493404985645811354476665052005342839447790544e+0000
141 142 * ks[1] = 8.11740794458351064092797249069438269367389272270e-0001
142 143 * ks[2] = -1.90703144603551216933075809162889536878854055202e-0001
143 144 * ks[3] = 2.55742333994264563281155312271481108635575331201e-0002
144 145 *
145 146 * Computation note: in simulating higher precision arithmetic, kcpsin
146 147 * return head = t and tail = kc[0]*t^3 + (...) to maintain extra bits
147 - * precision.
148 + * precision.
148 149 *
149 150 * And for kpcos(t) for |t|< 0.183:
150 151 *
151 152 * Quad precision, remez <= 2**(-122.48)
152 153 * 2 4 22
153 154 * kpcos(t) = 1/pi + pi/2 * t + kc[2] * t + ... + kc[11] * t
154 155 *
155 156 * kc[2] = 1.29192819501249250731151312779548918765320728489e+0000
156 157 * kc[3] = -4.25027339979557573976029596929319207009444090366e-0001
157 158 * kc[4] = 7.49080661650990096109672954618317623888421628613e-0002
158 159 * kc[5] = -8.21458866111282287985539464173976555436050215120e-0003
159 160 * kc[6] = 6.14202578809529228503205255165761204750211603402e-0004
160 161 * kc[7] = -3.33073432691149607007217330302595267179545908740e-0005
161 162 * kc[8] = 1.36970959047832085796809745461530865597993680204e-0006
162 163 * kc[9] = -4.41780774262583514450246512727201806217271097336e-0008
163 164 * kc[10]= 1.14741409212381858820016567664488123478660705759e-0009
164 165 * kc[11]= -2.44261236114707374558437500654381006300502749632e-0011
165 166 *
166 167 * Double precision, remez < 2**(61.91)
167 168 * 2 4 12
168 169 * kpcos(t) = 1/pi + pi/2 *t + kc[2] * t + ... + kc[6] * t
169 170 *
170 171 * kc[2] = 1.29192819501230224953283586722575766189551966008e+0000
171 172 * kc[3] = -4.25027339940149518500158850753393173519732149213e-0001
172 173 * kc[4] = 7.49080625187015312373925142219429422375556727752e-0002
173 174 * kc[5] = -8.21442040906099210866977352284054849051348692715e-0003
174 175 * kc[6] = 6.10411356829515414575566564733632532333904115968e-0004
175 176 *
176 177 * Single precision, remez < 2**(-30.13)
177 178 * 2 6
178 179 * kpcos(t) = kc[0] + kc[1] * t + ... + kc[3] * t
179 180 *
180 181 * kc[0] = 3.18309886183790671537767526745028724068919291480e-0001
181 182 * kc[1] = -1.57079581447762568199467875065854538626594937791e+0000
182 183 * kc[2] = 1.29183528092558692844073004029568674027807393862e+0000
183 184 * kc[3] = -4.20232949771307685981015914425195471602739075537e-0001
184 185 *
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
185 186 * Computation note: in simulating higher precision arithmetic, kcpcos
186 187 * return head = 1/pi chopped, and tail = pi/2 *t^2 + (tail part of 1/pi
187 188 * + ...) to maintain extra bits precision. In particular, pi/2 * t^2
188 189 * is calculated with great care.
189 190 *
190 191 * Thus, the computation of gamma(-x), x>0, is:
191 192 * Let k = int(x), z = x-k.
192 193 * For z in (I)
193 194 * k+1
194 195 * (-1)
195 - * gamma(-x) = ------------------- ;
196 + * gamma(-x) = ------------------- ;
196 197 * kpsin(z)*gamma(1+x)
197 198 *
198 199 * otherwise, for z in (II),
199 200 * k+1
200 201 * (-1)
201 - * gamma(-x) = ----------------------- ;
202 + * gamma(-x) = ----------------------- ;
202 203 * kpcos(0.5-z)*gamma(1+x)
203 204 *
204 205 * otherwise, for z in (III),
205 206 * k+1
206 207 * (-1)
207 - * gamma(-x) = --------------------- .
208 + * gamma(-x) = --------------------- .
208 209 * kpsin(1-z)*gamma(1+x)
209 210 *
210 211 * Thus, the computation of gamma(-x) reduced to the computation of
211 212 * gamma(1+x) and kpsin(), kpcos().
212 213 *
213 214 * (B) For x between 1 and 2. We break [1,2] into three parts:
214 215 * GT1 = [1.0000, 1.2845]
215 - * GT2 = [1.2844, 1.6374]
216 - * GT3 = [1.6373, 2.0000]
216 + * GT2 = [1.2844, 1.6374]
217 + * GT3 = [1.6373, 2.0000]
217 218 *
218 219 * For x in GTi, i=1,2,3, let
219 - * z1 = 1.134861805732790769689793935774652917006
220 + * z1 = 1.134861805732790769689793935774652917006
220 221 * gz1 = gamma(z1) = 0.9382046279096824494097535615803269576988
221 222 * tz1 = gamma'(z1) = -0.3517214357852935791015625000000000000000
222 223 *
223 224 * z2 = 1.461632144968362341262659542325721328468e+0000
224 225 * gz2 = gamma(z2) = 0.8856031944108887002788159005825887332080
225 226 * tz2 = gamma'(z2) = 0.00
226 227 *
227 228 * z3 = 1.819773101100500601787868704921606996312e+0000
228 229 * gz3 = gamma(z3) = 0.9367814114636523216188468970808378497426
229 230 * tz3 = gamma'(z3) = 0.2805306315422058105468750000000000000000
230 231 *
231 232 * and
232 233 * y = x-zi ... for extra precision, write y = y.h + y.l
233 234 * Then
234 235 * gamma(x) = gzi + tzi*(y.h+y.l) + y*y*Ri(y),
235 236 * = gzi.h + (tzi*y.h + ((tzi*y.l+gzi.l) + y*y*Ri(y)))
236 237 * = gy.h + gy.l
237 238 * where
238 239 * (I) For double precision
239 240 *
240 241 * Ri(y) = Pi(y)/Qi(y), i=1,2,3;
241 242 *
242 243 * P1(y) = p1[0] + p1[1]*y + ... + p1[4]*y^4
243 244 * Q1(y) = q1[0] + q1[1]*y + ... + q1[5]*y^5
244 245 *
245 246 * P2(y) = p2[0] + p2[1]*y + ... + p2[3]*y^3
246 247 * Q2(y) = q2[0] + q2[1]*y + ... + q2[6]*y^6
247 248 *
248 249 * P3(y) = p3[0] + p3[1]*y + ... + p3[4]*y^4
249 250 * Q3(y) = q3[0] + q3[1]*y + ... + q3[5]*y^5
250 251 *
251 252 * Remez precision of Ri(y):
252 253 * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-62.3 ... for i = 1
253 254 * <= 2**-59.4 ... for i = 2
254 255 * <= 2**-62.1 ... for i = 3
255 256 *
256 257 * (II) For quad precision
257 258 *
258 259 * Ri(y) = Pi(y)/Qi(y), i=1,2,3;
259 260 *
260 261 * P1(y) = p1[0] + p1[1]*y + ... + p1[9]*y^9
261 262 * Q1(y) = q1[0] + q1[1]*y + ... + q1[8]*y^8
262 263 *
263 264 * P2(y) = p2[0] + p2[1]*y + ... + p2[9]*y^9
264 265 * Q2(y) = q2[0] + q2[1]*y + ... + q2[9]*y^9
265 266 *
266 267 * P3(y) = p3[0] + p3[1]*y + ... + p3[9]*y^9
267 268 * Q3(y) = q3[0] + q3[1]*y + ... + q3[9]*y^9
268 269 *
269 270 * Remez precision of Ri(y):
270 271 * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-118.2 ... for i = 1
271 272 * <= 2**-126.8 ... for i = 2
272 273 * <= 2**-119.5 ... for i = 3
273 274 *
274 275 * (III) For single precision
275 276 *
276 277 * Ri(y) = Pi(y), i=1,2,3;
277 278 *
278 279 * P1(y) = p1[0] + p1[1]*y + ... + p1[5]*y^5
279 280 *
280 281 * P2(y) = p2[0] + p2[1]*y + ... + p2[5]*y^5
281 282 *
282 283 * P3(y) = p3[0] + p3[1]*y + ... + p3[4]*y^4
283 284 *
284 285 * Remez precision of Ri(y):
285 286 * |gamma(x)-(gzi+tzi*y) - y*y*Ri(y)| <= 2**-30.8 ... for i = 1
286 287 * <= 2**-31.6 ... for i = 2
287 288 * <= 2**-29.5 ... for i = 3
288 289 *
289 290 * Notes. (1) GTi and zi are choosen to balance the interval width and
290 291 * minimize the distant between gamma(x) and the tangent line at
291 292 * zi. In particular, we have
292 293 * |gamma(x)-(gzi+tzi*(x-zi))| <= 0.01436... for x in [1,z2]
293 294 * <= 0.01265... for x in [z2,2]
294 295 *
295 296 * (2) zi are slightly adjusted so that tzi=gamma'(zi) is very
296 297 * close to a single precision value.
297 298 *
298 299 * Coefficents: Single precision
299 300 * i= 1:
300 301 * P1[0] = 7.09087253435088360271451613398019280077561279443e-0001
301 302 * P1[1] = -5.17229560788652108545141978238701790105241761089e-0001
302 303 * P1[2] = 5.23403394528150789405825222323770647162337764327e-0001
303 304 * P1[3] = -4.54586308717075010784041566069480411732634814899e-0001
304 305 * P1[4] = 4.20596490915239085459964590559256913498190955233e-0001
305 306 * P1[5] = -3.57307589712377520978332185838241458642142185789e-0001
306 307 *
307 308 * i = 2:
308 309 * p2[0] = 4.28486983980295198166056119223984284434264344578e-0001
309 310 * p2[1] = -1.30704539487709138528680121627899735386650103914e-0001
310 311 * p2[2] = 1.60856285038051955072861219352655851542955430871e-0001
311 312 * p2[3] = -9.22285161346010583774458802067371182158937943507e-0002
312 313 * p2[4] = 7.19240511767225260740890292605070595560626179357e-0002
↓ open down ↓ |
83 lines elided |
↑ open up ↑ |
313 314 * p2[5] = -4.88158265593355093703112238534484636193260459574e-0002
314 315 *
315 316 * i = 3
316 317 * p3[0] = 3.82409531118807759081121479786092134814808872880e-0001
317 318 * p3[1] = 2.65309888180188647956400403013495759365167853426e-0002
318 319 * p3[2] = 8.06815109775079171923561169415370309376296739835e-0002
319 320 * p3[3] = -1.54821591666137613928840890835174351674007764799e-0002
320 321 * p3[4] = 1.76308239242717268530498313416899188157165183405e-0002
321 322 *
322 323 * Coefficents: Double precision
323 - * i = 1:
324 + * i = 1:
324 325 * p1[0] = 0.70908683619977797008004927192814648151397705078125000
325 326 * p1[1] = 1.71987061393048558089579513384356441668351720061e-0001
326 327 * p1[2] = -3.19273345791990970293320316122813960527705450671e-0002
327 328 * p1[3] = 8.36172645419110036267169600390549973563534476989e-0003
328 329 * p1[4] = 1.13745336648572838333152213474277971244629758101e-0003
329 330 * q1[0] = 1.0
330 331 * q1[1] = 9.71980217826032937526460731778472389791321968082e-0001
331 332 * q1[2] = -7.43576743326756176594084137256042653497087666030e-0002
332 333 * q1[3] = -1.19345944932265559769719470515102012246995255372e-0001
333 334 * q1[4] = 1.59913445751425002620935120470781382215050284762e-0002
334 335 * q1[5] = 1.12601136853374984566572691306402321911547550783e-0003
335 - * i = 2:
336 + * i = 2:
336 337 * p2[0] = 0.42848681585558601181418225678498856723308563232421875
337 338 * p2[1] = 6.53596762668970816023718845105667418483122103629e-0002
338 339 * p2[2] = -6.97280829631212931321050770925128264272768936731e-0003
339 340 * p2[3] = 6.46342359021981718947208605674813260166116632899e-0003
340 341 * q2[0] = 1.0
341 342 * q2[1] = 4.57572620560506047062553957454062012327519313936e-0001
342 343 * q2[2] = -2.52182594886075452859655003407796103083422572036e-0001
343 344 * q2[3] = -1.82970945407778594681348166040103197178711552827e-0002
344 345 * q2[4] = 2.43574726993169566475227642128830141304953840502e-0002
345 346 * q2[5] = -5.20390406466942525358645957564897411258667085501e-0003
346 347 * q2[6] = 4.79520251383279837635552431988023256031951133885e-0004
347 - * i = 3:
348 + * i = 3:
348 349 * p3[0] = 0.382409479734567459008331979930517263710498809814453125
349 350 * p3[1] = 1.42876048697668161599069814043449301572928034140e-0001
350 351 * p3[2] = 3.42157571052250536817923866013561760785748899071e-0003
351 352 * p3[3] = -5.01542621710067521405087887856991700987709272937e-0004
352 353 * p3[4] = 8.89285814866740910123834688163838287618332122670e-0004
353 354 * q3[0] = 1.0
354 355 * q3[1] = 3.04253086629444201002215640948957897906299633168e-0001
355 356 * q3[2] = -2.23162407379999477282555672834881213873185520006e-0001
356 357 * q3[3] = -1.05060867741952065921809811933670131427552903636e-0002
357 358 * q3[4] = 1.70511763916186982473301861980856352005926669320e-0002
358 359 * q3[5] = -2.12950201683609187927899416700094630764182477464e-0003
359 360 *
360 361 * Note that all pi0 are exact in double, which is obtained by a
361 362 * special Remez Algorithm.
362 363 *
363 364 * Coefficents: Quad precision
364 - * i = 1:
365 + * i = 1:
365 366 * p1[0] = 0.709086836199777919037185741507610124611513720557
366 367 * p1[1] = 4.45754781206489035827915969367354835667391606951e-0001
367 368 * p1[2] = 3.21049298735832382311662273882632210062918153852e-0002
368 369 * p1[3] = -5.71296796342106617651765245858289197369688864350e-0003
369 370 * p1[4] = 6.04666892891998977081619174969855831606965352773e-0003
370 371 * p1[5] = 8.99106186996888711939627812174765258822658645168e-0004
371 372 * p1[6] = -6.96496846144407741431207008527018441810175568949e-0005
372 373 * p1[7] = 1.52597046118984020814225409300131445070213882429e-0005
373 374 * p1[8] = 5.68521076168495673844711465407432189190681541547e-0007
374 375 * p1[9] = 3.30749673519634895220582062520286565610418952979e-0008
375 376 * q1[0] = 1.0+0000
376 377 * q1[1] = 1.35806511721671070408570853537257079579490650668e+0000
377 378 * q1[2] = 2.97567810153429553405327140096063086994072952961e-0001
378 379 * q1[3] = -1.52956835982588571502954372821681851681118097870e-0001
379 380 * q1[4] = -2.88248519561420109768781615289082053597954521218e-0002
380 381 * q1[5] = 1.03475311719937405219789948456313936302378395955e-0002
381 382 * q1[6] = 4.12310203243891222368965360124391297374822742313e-0004
382 383 * q1[7] = -3.12653708152290867248931925120380729518332507388e-0004
383 384 * q1[8] = 2.36672170850409745237358105667757760527014332458e-0005
384 385 *
385 - * i = 2:
386 + * i = 2:
386 387 * p2[0] = 0.428486815855585429730209907810650616737756697477
387 388 * p2[1] = 2.63622124067885222919192651151581541943362617352e-0001
388 389 * p2[2] = 3.85520683670028865731877276741390421744971446855e-0002
389 390 * p2[3] = 3.05065978278128549958897133190295325258023525862e-0003
390 391 * p2[4] = 2.48232934951723128892080415054084339152450445081e-0003
391 392 * p2[5] = 3.67092777065632360693313762221411547741550105407e-0004
392 393 * p2[6] = 3.81228045616085789674530902563145250532194518946e-0006
393 394 * p2[7] = 4.61677225867087554059531455133839175822537617677e-0006
394 395 * p2[8] = 2.18209052385703200438239200991201916609364872993e-0007
395 396 * p2[9] = 1.00490538985245846460006244065624754421022542454e-0008
396 397 * q2[0] = 1.0
397 398 * q2[1] = 9.20276350207639290567783725273128544224570775056e-0001
398 399 * q2[2] = -4.79533683654165107448020515733883781138947771495e-0003
399 400 * q2[3] = -1.24538337585899300494444600248687901947684291683e-0001
400 401 * q2[4] = 4.49866050763472358547524708431719114204535491412e-0003
401 402 * q2[5] = 7.20715455697920560621638325356292640604078591907e-0003
402 403 * q2[6] = -8.68513169029126780280798337091982780598228096116e-0004
403 404 * q2[7] = -1.25104431629401181525027098222745544809974229874e-0004
404 405 * q2[8] = 3.10558344839000038489191304550998047521253437464e-0005
405 406 * q2[9] = -1.76829227852852176018537139573609433652506765712e-0006
406 407 *
407 408 * i = 3
408 409 * p3[0] = 0.3824094797345675048502747661075355640070439388902
409 410 * p3[1] = 3.42198093076618495415854906335908427159833377774e-0001
410 411 * p3[2] = 9.63828189500585568303961406863153237440702754858e-0002
411 412 * p3[3] = 8.76069421042696384852462044188520252156846768667e-0003
412 413 * p3[4] = 1.86477890389161491224872014149309015261897537488e-0003
413 414 * p3[5] = 8.16871354540309895879974742853701311541286944191e-0004
414 415 * p3[6] = 6.83783483674600322518695090864659381650125625216e-0005
415 416 * p3[7] = -1.10168269719261574708565935172719209272190828456e-0006
416 417 * p3[8] = 9.66243228508380420159234853278906717065629721016e-0007
417 418 * p3[9] = 2.31858885579177250541163820671121664974334728142e-0008
418 419 * q3[0] = 1.0
419 420 * q3[1] = 8.25479821168813634632437430090376252512793067339e-0001
420 421 * q3[2] = -1.62251363073937769739639623669295110346015576320e-0002
421 422 * q3[3] = -1.10621286905916732758745130629426559691187579852e-0001
422 423 * q3[4] = 3.48309693970985612644446415789230015515365291459e-0003
423 424 * q3[5] = 6.73553737487488333032431261131289672347043401328e-0003
424 425 * q3[6] = -7.63222008393372630162743587811004613050245128051e-0004
425 426 * q3[7] = -1.35792670669190631476784768961953711773073251336e-0004
426 427 * q3[8] = 3.19610150954223587006220730065608156460205690618e-0005
427 428 * q3[9] = -1.82096553862822346610109522015129585693354348322e-0006
428 429 *
429 430 * (C) For x between 0 and 1.
430 431 * Let P stand for the number of significant bits in the working precision.
431 432 * -P 1
432 433 * (1)For 0 <= x <= 2 , gamma(x) is computed by --- rounded to nearest.
433 434 * x
434 435 * The error is bound by 0.739 ulp(gamma(x)) in IEEE double precision.
435 436 * Proof.
↓ open down ↓ |
40 lines elided |
↑ open up ↑ |
436 437 * 1 2
437 438 * Since -------- ~ x + 0.577...*x - ..., we have, for small x,
438 439 * gamma(x)
439 440 * 1 1
440 441 * ----------- < gamma(x) < --- and
441 442 * x(1+0.578x) x
442 443 * 1 1 1
443 444 * 0 < --- - gamma(x) <= --- - ----------- < 0.578
444 445 * x x x(1+0.578x)
445 446 * 1 1 -P
446 - * The error is thus bounded by --- ulp(---) + 0.578. Since x <= 2 ,
447 + * The error is thus bounded by --- ulp(---) + 0.578. Since x <= 2 ,
447 448 * 2 x
448 449 * 1 P 1 P 1
449 450 * --- >= 2 , ulp(---) >= ulp(2 ) >= 2. Thus 0.578=0.289*2<=0.289ulp(-)
450 451 * x x x
451 452 * Thus
452 453 * 1 1
453 454 * | gamma(x) - [---] rounded | <= (0.5+0.289)*ulp(---).
454 455 * x x
455 456 * -P 1
456 457 * Note that for x<= 2 , it is easy to see that ulp(---)=ulp(gamma(x))
457 458 * x
458 459 * n 1
459 460 * except only when x = 2 , (n<= -53). In such cases, --- is exact
460 461 * x
461 - * and therefore the error is bounded by
462 + * and therefore the error is bounded by
462 463 * 1
463 464 * 0.298*ulp(---) = 0.298*2*ulp(gamma(x)) = 0.578ulp(gamma(x)).
464 465 * x
465 466 * Thus we conclude that the error in gamma is less than 0.739 ulp.
466 467 *
467 468 * (2)Otherwise, for x in GTi-1 (see B), let y = x-(zi-1). From (B) we obtain
468 469 * gamma(1+x)
469 470 * gamma(1+x) = gy.h + gy.l, then compute gamma(x) by -----------.
470 471 * x
471 472 * gy.h
472 473 * Implementaion note. Write x = x.h+x.l, and Let th = ----- chopped to
473 474 * x
474 475 * 20 bits, then
475 476 * gy.h+gy.l
476 477 * gamma(x) = th + (---------- - th )
477 478 * x
478 479 * 1
479 480 * = th + ---*(gy.h-th*x.h+gy.l-th*x.l)
480 481 * x
481 482 *
482 483 * (D) For x between 2 and 8. Let n = 1+x chopped to an integer. Then
483 484 *
484 485 * gamma(x)=(x-1)*(x-2)*...*(x-n)*gamma(x-n)
485 486 *
486 487 * Since x-n is between 1 and 2, we can apply (B) to compute gamma(x).
487 488 *
488 489 * Implementation detail. The computation of (x-1)(x-2)...(x-n) in simulated
489 490 * higher precision arithmetic can be somewhat optimized. For example, in
490 491 * computing (x-1)*(x-2)*(x-3)*(x-4), if we compute (x-1)*(x-4) = z.h+z.l,
491 492 * then (x-2)(x-3) = z.h+2+z.l readily. In below, we list the expression
492 493 * of the formula to compute gamma(x).
493 494 *
494 495 * Assume x-n is in GTi (i=1,2, or 3, see B for detail). Let y = x - n - zi.
495 496 * By (B) we have gamma(x-n) = gy.h+gy.l. If x = x.h+x.l, then we have
496 497 * n=1 (x in [2,3]):
497 498 * gamma(x) = (x-1)*gamma(x-1) = (x-1)*(gy.h+gy.l)
498 499 * = [(x.h-1)+x.l]*(gy.h+gy.l)
499 500 * n=2 (x in [3,4]):
500 501 * gamma(x) = (x-1)(x-2)*gamma(x-2) = (x-1)*(x-2)*(gy.h+gy.l)
501 502 * = ((x.h-2)+x.l)*((x.h-1)+x.l)*(gy.h+gy.l)
502 503 * = [x.h*(x.h-3)+2+x.l*(x+(x.h-3))]*(gy.h+gy.l)
503 504 * n=3 (x in [4,5])
504 505 * gamma(x) = (x-1)(x-2)(x-3)*(gy.h+gy.l)
505 506 * = (x.h*(x.h-3)+2+x.l*(x+(x.h-3)))*[((x.h-3)+x.l)(gy.h+gy.l)]
506 507 * n=4 (x in [5,6])
507 508 * gamma(x) = [(x-1)(x-4)]*[(x-2)(x-3)]*(gy.h+gy.l)
508 509 * = [(x.h*(x.h-5)+4+x.l(x+(x.h-5)))]*[(x-2)*(x-3)]*(gy.h+gy.l)
509 510 * = (y.h+y.l)*(y.h+1+y.l)*(gy.h+gy.l)
510 511 * n=5 (x in [6,7])
511 512 * gamma(x) = [(x-1)(x-4)]*[(x-2)(x-3)]*[(x-5)*(gy.h+gy.l)]
512 513 * n=6 (x in [7,8])
513 514 * gamma(x) = [(x-1)(x-6)]*[(x-2)(x-5)]*[(x-3)(x-4)]*(gy.h+gy.l)]
514 515 * = [(y.h+y.l)(y.h+4+y.l)][(y.h+6+y.l)(gy.h+gy.l)]
515 516 *
516 517 * (E)Overflow Thresold. For x > Overflow thresold of gamma,
517 518 * return huge*huge (overflow).
518 519 *
519 520 * By checking whether lgamma(x) >= 2**{128,1024,16384}, one can
520 521 * determine the overflow threshold for x in single, double, and
521 522 * quad precision. See over.c for details.
522 523 *
523 524 * The overflow threshold of gamma(x) are
524 525 *
525 526 * single: x = 3.5040096283e+01
526 527 * = 0x420C290F (IEEE single)
527 528 * double: x = 1.71624376956302711505e+02
528 529 * = 0x406573FAE561F647 (IEEE double)
529 530 * quad: x = 1.7555483429044629170038892160702032034177e+03
530 531 * = 0x4009B6E3180CD66A5C4206F128BA77F4 (quad)
531 532 *
532 533 * (F)For overflow_threshold >= x >= 8, we use asymptotic approximation.
533 534 * (1) Stirling's formula
534 535 *
↓ open down ↓ |
63 lines elided |
↑ open up ↑ |
535 536 * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
536 537 * = L1 + L2 + L3,
537 538 * where
538 539 * L1(x) = (x-.5)*(log(x)-1),
539 540 * L2 = .5(log(2pi)-1) = 0.41893853....,
540 541 * L3(x) = (1/x)P(1/(x*x)),
541 542 *
542 543 * The range of L1,L2, and L3 are as follows:
543 544 *
544 545 * ------------------------------------------------------------------
545 - * Range(L1) = (single) [8.09..,88.30..] =[2** 3.01..,2** 6.46..]
546 + * Range(L1) = (single) [8.09..,88.30..] =[2** 3.01..,2** 6.46..]
546 547 * (double) [8.09..,709.3..] =[2** 3.01..,2** 9.47..]
547 548 * (quad) [8.09..,11356.10..]=[2** 3.01..,2** 13.47..]
548 - * Range(L2) = 0.41893853.....
549 + * Range(L2) = 0.41893853.....
549 550 * Range(L3) = [0.0104...., 0.00048....] =[2**-6.58..,2**-11.02..]
550 551 * ------------------------------------------------------------------
551 552 *
552 553 * Gamma(x) is then computed by exp(L1+L2+L3).
553 554 *
554 555 * (2) Error analysis of (F):
555 556 * --------------------------
556 557 * The error in Gamma(x) depends on the error inherited in the computation
557 558 * of L= L1+L2+L3. Let L' be the computed value of L. The absolute error
558 559 * in L' is t = L-L'. Since exp(L') = exp(L-t) = exp(L)*exp(t) ~
559 560 * (1+t)*exp(L), the relative error in exp(L') is approximately t.
560 561 *
561 562 * To guarantee the relatively accuracy in exp(L'), we would like
562 563 * |t| < 2**(-P-5) where P denotes for the number of significant bits
563 564 * of the working precision. Consequently, each of the L1,L2, and L3
564 565 * must be computed with absolute error bounded by 2**(-P-5) in absolute
565 566 * value.
566 567 *
567 568 * Since L2 is a constant, it can be pre-computed to the desired accuracy.
568 569 * Also |L3| < 2**-6; therefore, it suffices to compute L3 with the
569 570 * working precision. That is,
570 571 * L3(x) approxmiate log(G(x))-(x-.5)(log(x)-1)-.5(log(2pi)-1)
571 572 * to a precision bounded by 2**(-P-5).
572 573 *
573 574 * 2**(-6)
574 575 * _________V___________________
575 576 * L1(x): |_________|___________________|
576 577 * __ ________________________
577 578 * L2: |__|________________________|
578 579 * __________________________
579 580 * + L3(x): |__________________________|
580 581 * -------------------------------------------
581 582 * [leading] + [Trailing]
582 583 *
583 584 * For L1(x)=(x-0.5)*(log(x)-1), we need ilogb(L1(x))+5 extra bits for
584 585 * both multiplicants to guarantee L1(x)'s absolute error is bounded by
585 586 * 2**(-P-5) in absolute value. Here ilogb(y) is defined to be the unbias
586 587 * binary exponent of y in IEEE format. We can get x-0.5 to the desire
587 588 * accuracy easily. It remains to compute log(x)-1 with ilogb(L1(x))+5
588 589 * extra bits accracy. Note that the range of L1 is 88.30.., 709.3.., and
589 590 * 11356.10... for single, double, and quadruple precision, we have
590 591 *
591 592 * single double quadruple
592 593 * ------------------------------------
593 594 * ilogb(L1(x))+5 <= 11 14 18
594 595 * ------------------------------------
595 596 *
596 597 * (3) Table Driven Method for log(x)-1:
597 598 * --------------------------------------
598 599 * Let x = 2**n * y, where 1 <= y < 2. Let Z={z(i),i=1,...,m}
599 600 * be a set of predetermined evenly distributed floating point numbers
600 601 * in [1, 2]. Let z(j) be the closest one to y, then
601 602 * log(x)-1 = n*log(2)-1 + log(y)
602 603 * = n*log(2)-1 + log(z(j)*y/z(j))
603 604 * = n*log(2)-1 + log(z(j)) + log(y/z(j))
604 605 * = T1(n) + T2(j) + T3,
↓ open down ↓ |
46 lines elided |
↑ open up ↑ |
605 606 *
606 607 * where T1(n) = n*log(2)-1 and T2(j) = log(z(j)). Both T1 and T2 can be
607 608 * pre-calculated and be looked-up in a table. Note that 8 <= x < 1756
608 609 * implies 3<=n<=10 implies 1.079.. < T1(n) < 6.931.
609 610 *
610 611 *
611 612 * y-z(i) y 1+s
612 613 * For T3, let s = --------; then ----- = ----- and
613 614 * y+z(i) z(i) 1-s
614 615 * 1+s 2 3 2 5
615 - * T3 = log(-----) = 2s + --- s + --- s + ....
616 + * T3 = log(-----) = 2s + --- s + --- s + ....
616 617 * 1-s 3 5
617 618 *
618 619 * Suppose the first term 2s is compute in extra precision. The
619 620 * dominating error in T3 would then be the rounding error of the
620 621 * second term 2/3*s**3. To force the rounding bounded by
621 622 * the required accuracy, we have
622 623 * single: |2/3*s**3| < 2**-11 == > |s|<0.09014...
623 624 * double: |2/3*s**3| < 2**-14 == > |s|<0.04507...
624 625 * quad : |2/3*s**3| < 2**-18 == > |s|<0.01788... = 2**(-5.80..)
625 626 *
626 627 * Base on this analysis, we choose Z = {z(i)|z(i)=1+i/64+1/128, 0<=i<=63}.
627 628 * For any y in [1,2), let j = [64*y] chopped to integer, then z(j) is
628 629 * the closest to y, and it is not difficult to see that |s| < 2**(-8).
629 630 * Please note that the polynomial approximation of T3 must be accurate
630 631 * -24-11 -35 -53-14 -67 -113-18 -131
631 632 * to 2 =2 , 2 = 2 , and 2 =2
632 633 * for single, double, and quadruple precision respectively.
633 634 *
634 635 * Inplementation notes.
635 636 * (1) Table look-up entries for T1(n) and T2(j), as well as the calculation
636 637 * of the leading term 2s in T3, are broken up into leading and trailing
637 638 * part such that (leading part)* 2**24 will always be an integer. That
638 639 * will guarantee the addition of the leading parts will be exact.
639 640 *
640 641 * 2**(-24)
641 642 * _________V___________________
642 643 * T1(n): |_________|___________________|
643 644 * _______ ______________________
644 645 * T2(j): |_______|______________________|
645 646 * ____ _______________________
646 647 * 2s: |____|_______________________|
647 648 * __________________________
648 649 * + T3(s)-2s: |__________________________|
649 650 * -------------------------------------------
650 651 * [leading] + [Trailing]
651 652 *
652 653 * (2) How to compute 2s accurately.
653 654 * (A) Compute v = 2s to the working precision. If |v| < 2**(-18),
654 655 * stop.
655 656 * (B) chopped v to 2**(-24): v = ((int)(v*2**24))/2**24
656 657 * (C) 2s = v + (2s - v), where
657 658 * 1
658 659 * 2s - v = --- * (2(y-z) - v*(y+z) )
659 660 * y+z
660 661 * 1
661 662 * = --- * ( [2(y-z) - v*(y+z)_h ] - v*(y+z)_l )
662 663 * y+z
663 664 * where (y+z)_h = (y+z) rounded to 24 bits by (double)(float),
664 665 * and (y+z)_l = ((z+z)-(y+z)_h)+(y-z). Note the the quantity
665 666 * in [] is exact.
666 667 * 2 4
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
667 668 * (3) Remez approximation for (T3(s)-2s)/s = T3[0]*s + T3[1]*s + ...:
668 669 * Single precision: 1 term (compute in double precision arithmetic)
669 670 * T3(s) = 2s + S1*s^3, S1 = 0.6666717231848518054693623697539230
670 671 * Remez error: |T3(s)/s - (2s+S1*s^3)| < 2**(-35.87)
671 672 * Double precision: 3 terms, Remez error is bounded by 2**(-72.40),
672 673 * see "tgamma_log"
673 674 * Quad precision: 7 terms, Remez error is bounded by 2**(-136.54),
674 675 * see "tgammal_log"
675 676 *
676 677 * The computation of 0.5*(ln(2pi)-1):
677 - * 0.5*(ln(2pi)-1) = 0.4189385332046727417803297364056176398614...
678 + * 0.5*(ln(2pi)-1) = 0.4189385332046727417803297364056176398614...
678 679 * split 0.5*(ln(2pi)-1) to hln2pi_h + hln2pi_l, where hln2pi_h is the
679 680 * leading 21 bits of the constant.
680 681 * hln2pi_h= 0.4189383983612060546875
681 682 * hln2pi_l= 1.348434666870928297364056176398612173648e-07
682 683 *
683 684 * The computation of 1/x*P(1/x^2) = log(G(x))-(x-.5)(ln(x)-1)-(.5ln(2pi)-1):
684 685 * Let s = 1/x <= 1/8 < 0.125. We have
685 686 * quad precision
686 687 * |GP(s) - s*P(s^2)| <= 2**(-120.6), where
687 688 * 3 5 39
688 689 * GP(s) = GP0*s+GP1*s +GP2*s +... +GP19*s ,
689 690 * GP0 = 0.083333333333333333333333333333333172839171301
690 691 * hex 0x3ffe5555 55555555 55555555 55555548
691 692 * GP1 = -2.77777777777777777777777777492501211999399424104e-0003
692 693 * GP2 = 7.93650793650793650793635650541638236350020883243e-0004
693 694 * GP3 = -5.95238095238095238057299772679324503339241961704e-0004
694 695 * GP4 = 8.41750841750841696138422987977683524926142600321e-0004
695 696 * GP5 = -1.91752691752686682825032547823699662178842123308e-0003
696 697 * GP6 = 6.41025641022403480921891559356473451161279359322e-0003
697 698 * GP7 = -2.95506535798414019189819587455577003732808185071e-0002
698 699 * GP8 = 1.79644367229970031486079180060923073476568732136e-0001
699 700 * GP9 = -1.39243086487274662174562872567057200255649290646e+0000
700 701 * GP10 = 1.34025874044417962188677816477842265259608269775e+0001
701 702 * GP11 = -1.56803713480127469414495545399982508700748274318e+0002
702 703 * GP12 = 2.18739841656201561694927630335099313968924493891e+0003
703 704 * GP13 = -3.55249848644100338419187038090925410976237921269e+0004
704 705 * GP14 = 6.43464880437835286216768959439484376449179576452e+0005
705 706 * GP15 = -1.20459154385577014992600342782821389605893904624e+0007
706 707 * GP16 = 2.09263249637351298563934942349749718491071093210e+0008
707 708 * GP17 = -2.96247483183169219343745316433899599834685703457e+0009
708 709 * GP18 = 2.88984933605896033154727626086506756972327292981e+0010
709 710 * GP19 = -1.40960434146030007732838382416230610302678063984e+0011
710 711 *
711 712 * double precision
712 713 * |GP(s) - s*P(s^2)| <= 2**(-63.5), where
713 714 * 3 5 7 9 11 13 15
714 715 * GP(s) = GP0*s+GP1*s +GP2*s +GP3*s +GP4*s +GP5*s +GP6*s +GP7*s ,
715 716 *
716 717 * GP0= 0.0833333333333333287074040640618477 (3FB55555 55555555)
717 718 * GP1= -2.77777777776649355200565611114627670089130772843e-0003
718 719 * GP2= 7.93650787486083724805476194170211775784158551509e-0004
719 720 * GP3= -5.95236628558314928757811419580281294593903582971e-0004
720 721 * GP4= 8.41566473999853451983137162780427812781178932540e-0004
721 722 * GP5= -1.90424776670441373564512942038926168175921303212e-0003
722 723 * GP6= 5.84933161530949666312333949534482303007354299178e-0003
723 724 * GP7= -1.59453228931082030262124832506144392496561694550e-0002
724 725 * single precision
725 726 * |GP(s) - s*P(s^2)| <= 2**(-37.78), where
726 727 * 3 5
727 728 * GP(s) = GP0*s+GP1*s +GP2*s
728 729 * GP0 = 8.33333330959694065245736888749042811909994573178e-0002
↓ open down ↓ |
41 lines elided |
↑ open up ↑ |
729 730 * GP1 = -2.77765545601667179767706600890361535225507762168e-0003
730 731 * GP2 = 7.77830853479775281781085278324621033523037489883e-0004
731 732 *
732 733 *
733 734 * Implementation note:
734 735 * z = (1/x), z2 = z*z, z4 = z2*z2;
735 736 * p = z*(GP0+z2*(GP1+....+z2*GP7))
736 737 * = z*(GP0+(z4*(GP2+z4*(GP4+z4*GP6))+z2*(GP1+z4*(GP3+z4*(GP5+z4*GP7)))))
737 738 *
738 739 * Adding everything up:
739 - * t = rr.h*ww.h+hln2pi_h ... exact
740 + * t = rr.h*ww.h+hln2pi_h ... exact
740 741 * w = (hln2pi_l + ((x-0.5)*ww.l+rr.l*ww.h)) + p
741 742 *
742 743 * Computing exp(t+w):
743 744 * s = t+w; write s = (n+j/32)*ln2+r, |r|<=(1/64)*ln2, then
744 745 * exp(s) = 2**n * (2**(j/32) + 2**(j/32)*expm1(r)), where
745 746 * expm1(r) = r + Et1*r^2 + Et2*r^3 + ... + Et5*r^6, and
746 747 * 2**(j/32) is obtained by table look-up S[j]+S_trail[j].
747 748 * Remez error bound:
748 749 * |exp(r) - (1+r+Et1*r^2+...+Et5*r^6)| <= 2^(-63).
749 750 */
751 +/* END CSTYLED */
750 752
751 753 #include "libm.h"
752 754
753 -#define __HI(x) ((int *) &x)[HIWORD]
754 -#define __LO(x) ((unsigned *) &x)[LOWORD]
755 +#define __HI(x) ((int *)&x)[HIWORD]
756 +#define __LO(x) ((unsigned *)&x)[LOWORD]
755 757
756 758 struct Double {
757 759 double h;
758 760 double l;
759 761 };
760 762
761 763 /* Hex value of GP0 shoule be 3FB55555 55555555 */
762 764 static const double c[] = {
763 765 +1.0,
764 766 +2.0,
765 767 +0.5,
766 768 +1.0e-300,
767 - +6.66666666666666740682e-01, /* A1=T3[0] */
768 - +3.99999999955626478023093908674902212920e-01, /* A2=T3[1] */
769 - +2.85720221533145659809237398709372330980e-01, /* A3=T3[2] */
770 - +0.0833333333333333287074040640618477, /* GP[0] */
769 + +6.66666666666666740682e-01, /* A1=T3[0] */
770 + +3.99999999955626478023093908674902212920e-01, /* A2=T3[1] */
771 + +2.85720221533145659809237398709372330980e-01, /* A3=T3[2] */
772 + +0.0833333333333333287074040640618477, /* GP[0] */
771 773 -2.77777777776649355200565611114627670089130772843e-03,
772 774 +7.93650787486083724805476194170211775784158551509e-04,
773 775 -5.95236628558314928757811419580281294593903582971e-04,
774 776 +8.41566473999853451983137162780427812781178932540e-04,
775 777 -1.90424776670441373564512942038926168175921303212e-03,
776 778 +5.84933161530949666312333949534482303007354299178e-03,
777 779 -1.59453228931082030262124832506144392496561694550e-02,
778 780 +4.18937683105468750000e-01, /* hln2pi_h */
779 781 +8.50099203991780279640e-07, /* hln2pi_l */
780 782 +4.18938533204672741744150788368695779923320328369e-01, /* hln2pi */
781 783 +2.16608493865351192653e-02, /* ln2_32hi */
782 784 +5.96317165397058656257e-12, /* ln2_32lo */
783 785 +4.61662413084468283841e+01, /* invln2_32 */
784 786 +5.0000000000000000000e-1, /* Et1 */
785 787 +1.66666666665223585560605991943703896196054020060e-01, /* Et2 */
786 788 +4.16666666665895103520154073534275286743788421687e-02, /* Et3 */
787 789 +8.33336844093536520775865096538773197505523826029e-03, /* Et4 */
788 790 +1.38889201930843436040204096950052984793587640227e-03, /* Et5 */
789 791 };
790 792
791 -#define one c[0]
792 -#define two c[1]
793 -#define half c[2]
794 -#define tiny c[3]
795 -#define A1 c[4]
796 -#define A2 c[5]
797 -#define A3 c[6]
798 -#define GP0 c[7]
799 -#define GP1 c[8]
800 -#define GP2 c[9]
801 -#define GP3 c[10]
802 -#define GP4 c[11]
803 -#define GP5 c[12]
804 -#define GP6 c[13]
805 -#define GP7 c[14]
806 -#define hln2pi_h c[15]
807 -#define hln2pi_l c[16]
808 -#define hln2pi c[17]
809 -#define ln2_32hi c[18]
810 -#define ln2_32lo c[19]
811 -#define invln2_32 c[20]
812 -#define Et1 c[21]
813 -#define Et2 c[22]
814 -#define Et3 c[23]
815 -#define Et4 c[24]
816 -#define Et5 c[25]
793 +#define one c[0]
794 +#define two c[1]
795 +#define half c[2]
796 +#define tiny c[3]
797 +#define A1 c[4]
798 +#define A2 c[5]
799 +#define A3 c[6]
800 +#define GP0 c[7]
801 +#define GP1 c[8]
802 +#define GP2 c[9]
803 +#define GP3 c[10]
804 +#define GP4 c[11]
805 +#define GP5 c[12]
806 +#define GP6 c[13]
807 +#define GP7 c[14]
808 +#define hln2pi_h c[15]
809 +#define hln2pi_l c[16]
810 +#define hln2pi c[17]
811 +#define ln2_32hi c[18]
812 +#define ln2_32lo c[19]
813 +#define invln2_32 c[20]
814 +#define Et1 c[21]
815 +#define Et2 c[22]
816 +#define Et3 c[23]
817 +#define Et4 c[24]
818 +#define Et5 c[25]
817 819
818 820 /*
819 821 * double precision coefficients for computing log(x)-1 in tgamma.
820 822 * See "algorithm" for details
821 823 *
822 824 * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2,
823 825 * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
824 826 * T1(n) = T1[2n,2n+1] = n*log(2)-1,
825 827 * T2(j) = T2[2j,2j+1] = log(z[j]),
826 828 * T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7
827 829 * = 2s + A1*s^3 + A2*s^5 + A3*s^7 (see const A1,A2,A3)
828 830 * Note
829 831 * (1) the leading entries are truncated to 24 binary point.
830 832 * See Remezpak/sun/tgamma_log_64.c
831 833 * (2) Remez error for T3(s) is bounded by 2**(-72.4)
832 834 * See mpremez/work/Log/tgamma_log_4_outr2
833 835 */
834 836
835 837 static const double T1[] = {
836 838 -1.00000000000000000000e+00, /* 0xBFF00000 0x00000000 */
837 839 +0.00000000000000000000e+00, /* 0x00000000 0x00000000 */
838 840 -3.06852817535400390625e-01, /* 0xBFD3A37A 0x00000000 */
839 841 -1.90465429995776763166e-09, /* 0xBE205C61 0x0CA86C38 */
840 842 +3.86294305324554443359e-01, /* 0x3FD8B90B 0xC0000000 */
841 843 +5.57953361754750897367e-08, /* 0x3E6DF473 0xDE6AF279 */
842 844 +1.07944148778915405273e+00, /* 0x3FF14564 0x70000000 */
843 845 +5.38906818755173187963e-08, /* 0x3E6CEEAD 0xCDA06BB5 */
844 846 +1.77258867025375366211e+00, /* 0x3FFC5C85 0xF0000000 */
845 847 +5.19860275755595544734e-08, /* 0x3E6BE8E7 0xBCD5E4F2 */
846 848 +2.46573585271835327148e+00, /* 0x4003B9D3 0xB8000000 */
847 849 +5.00813732756017835330e-08, /* 0x3E6AE321 0xAC0B5E2E */
848 850 +3.15888303518295288086e+00, /* 0x40094564 0x78000000 */
849 851 +4.81767189756440192100e-08, /* 0x3E69DD5B 0x9B40D76B */
850 852 +3.85203021764755249023e+00, /* 0x400ED0F5 0x38000000 */
851 853 +4.62720646756862482697e-08, /* 0x3E68D795 0x8A7650A7 */
852 854 +4.54517740011215209961e+00, /* 0x40122E42 0xFC000000 */
853 855 +4.43674103757284839467e-08, /* 0x3E67D1CF 0x79ABC9E4 */
854 856 +5.23832458257675170898e+00, /* 0x4014F40B 0x5C000000 */
855 857 +4.24627560757707130063e-08, /* 0x3E66CC09 0x68E14320 */
856 858 +5.93147176504135131836e+00, /* 0x4017B9D3 0xBC000000 */
857 859 +4.05581017758129486834e-08, /* 0x3E65C643 0x5816BC5D */
858 860 };
859 861
860 862 static const double T2[] = {
861 863 +7.78210163116455078125e-03, /* 0x3F7FE020 0x00000000 */
862 864 +3.88108903981662140884e-08, /* 0x3E64D620 0xCF11F86F */
863 865 +2.31670141220092773438e-02, /* 0x3F97B918 0x00000000 */
864 866 +4.51595251008850513740e-08, /* 0x3E683EAD 0x88D54940 */
865 867 +3.83188128471374511719e-02, /* 0x3FA39E86 0x00000000 */
866 868 +5.14549991480218823411e-08, /* 0x3E6B9FEB 0xD5FA9016 */
867 869 +5.32444715499877929688e-02, /* 0x3FAB42DC 0x00000000 */
868 870 +4.29688244898971182165e-08, /* 0x3E671197 0x1BEC28D1 */
869 871 +6.79506063461303710938e-02, /* 0x3FB16536 0x00000000 */
870 872 +5.55623773783008185114e-08, /* 0x3E6DD46F 0x5C1D0C4C */
871 873 +8.24436545372009277344e-02, /* 0x3FB51B07 0x00000000 */
872 874 +1.46738736635337847313e-08, /* 0x3E4F830C 0x1FB493C7 */
873 875 +9.67295765876770019531e-02, /* 0x3FB8C345 0x00000000 */
874 876 +4.98708741103424492282e-08, /* 0x3E6AC633 0x641EB597 */
875 877 +1.10814332962036132812e-01, /* 0x3FBC5E54 0x00000000 */
876 878 +3.33782539813823062226e-08, /* 0x3E61EB78 0xE862BAC3 */
877 879 +1.24703466892242431641e-01, /* 0x3FBFEC91 0x00000000 */
878 880 +1.16087148042227818450e-08, /* 0x3E48EDF5 0x5D551729 */
879 881 +1.38402283191680908203e-01, /* 0x3FC1B72A 0x80000000 */
880 882 +3.96674382274822001957e-08, /* 0x3E654BD9 0xE80A4181 */
881 883 +1.51916027069091796875e-01, /* 0x3FC371FC 0x00000000 */
882 884 +1.49567501781968021494e-08, /* 0x3E500F47 0xBA1DE6CB */
883 885 +1.65249526500701904297e-01, /* 0x3FC526E5 0x80000000 */
884 886 +4.63946052585787334062e-08, /* 0x3E68E86D 0x0DE8B900 */
885 887 +1.78407609462738037109e-01, /* 0x3FC6D60F 0x80000000 */
886 888 +4.80100802600100279538e-08, /* 0x3E69C674 0x8723551E */
887 889 +1.91394805908203125000e-01, /* 0x3FC87FA0 0x00000000 */
888 890 +4.70914263296092971436e-08, /* 0x3E694832 0x44240802 */
889 891 +2.04215526580810546875e-01, /* 0x3FCA23BC 0x00000000 */
890 892 +1.48478803446288209001e-08, /* 0x3E4FE2B5 0x63193712 */
891 893 +2.16873884201049804688e-01, /* 0x3FCBC286 0x00000000 */
892 894 +5.40995645549315919488e-08, /* 0x3E6D0B63 0x358A7E74 */
893 895 +2.29374051094055175781e-01, /* 0x3FCD5C21 0x00000000 */
894 896 +4.99707906542102284117e-08, /* 0x3E6AD3EE 0xE456E443 */
895 897 +2.41719901561737060547e-01, /* 0x3FCEF0AD 0x80000000 */
896 898 +3.53254081075974352804e-08, /* 0x3E62F716 0x4D948638 */
897 899 +2.53915190696716308594e-01, /* 0x3FD04025 0x80000000 */
898 900 +1.92842471355435739091e-08, /* 0x3E54B4D0 0x40DAE27C */
899 901 +2.65963494777679443359e-01, /* 0x3FD1058B 0xC0000000 */
900 902 +5.37194584979797487125e-08, /* 0x3E6CD725 0x6A8C4FD0 */
901 903 +2.77868449687957763672e-01, /* 0x3FD1C898 0xC0000000 */
902 904 +1.31549854251447496506e-09, /* 0x3E16999F 0xAFBC68E7 */
903 905 +2.89633274078369140625e-01, /* 0x3FD2895A 0x00000000 */
904 906 +1.85046735362538929911e-08, /* 0x3E53DE86 0xA35EB493 */
905 907 +3.01261305809020996094e-01, /* 0x3FD347DD 0x80000000 */
906 908 +2.47691407849191245052e-08, /* 0x3E5A987D 0x54D64567 */
907 909 +3.12755703926086425781e-01, /* 0x3FD40430 0x80000000 */
908 910 +6.07781046260499658610e-09, /* 0x3E3A1A9F 0x8EF4304A */
909 911 +3.24119448661804199219e-01, /* 0x3FD4BE5F 0x80000000 */
910 912 +1.99924077768719198045e-08, /* 0x3E557778 0xA0DB4C99 */
911 913 +3.35355520248413085938e-01, /* 0x3FD57677 0x00000000 */
912 914 +2.16727247443196802771e-08, /* 0x3E57455A 0x6C549AB7 */
913 915 +3.46466720104217529297e-01, /* 0x3FD62C82 0xC0000000 */
914 916 +4.72419910516215900493e-08, /* 0x3E695CE3 0xCA97B7B0 */
915 917 +3.57455849647521972656e-01, /* 0x3FD6E08E 0x80000000 */
916 918 +3.92742818015697624778e-08, /* 0x3E6515D0 0xF1C609CA */
917 919 +3.68325531482696533203e-01, /* 0x3FD792A5 0x40000000 */
918 920 +2.96760111198451042238e-08, /* 0x3E5FDD47 0xA27C15DA */
919 921 +3.79078328609466552734e-01, /* 0x3FD842D1 0xC0000000 */
920 922 +2.43255029056564770289e-08, /* 0x3E5A1E8B 0x17493B14 */
921 923 +3.89716744422912597656e-01, /* 0x3FD8F11E 0x80000000 */
922 924 +6.71711261571421332726e-09, /* 0x3E3CD98B 0x1DF85DA7 */
923 925 +4.00243163108825683594e-01, /* 0x3FD99D95 0x80000000 */
924 926 +1.01818702333557515008e-09, /* 0x3E117E08 0xACBA92EF */
925 927 +4.10659909248352050781e-01, /* 0x3FDA4840 0x80000000 */
926 928 +1.57369163351530571459e-08, /* 0x3E50E5BB 0x0A2BFCA7 */
927 929 +4.20969247817993164062e-01, /* 0x3FDAF129 0x00000000 */
928 930 +4.68261364720663662040e-08, /* 0x3E6923BC 0x358899C2 */
929 931 +4.31173443794250488281e-01, /* 0x3FDB9858 0x80000000 */
930 932 +2.10241208525779214510e-08, /* 0x3E569310 0xFB598FB1 */
931 933 +4.41274523735046386719e-01, /* 0x3FDC3DD7 0x80000000 */
932 934 +3.70698288427707487748e-08, /* 0x3E63E6D6 0xA6B9D9E1 */
933 935 +4.51274633407592773438e-01, /* 0x3FDCE1AF 0x00000000 */
934 936 +1.07318658117071930723e-08, /* 0x3E470BE7 0xD6F6FA58 */
935 937 +4.61175680160522460938e-01, /* 0x3FDD83E7 0x00000000 */
936 938 +3.49616477054305011286e-08, /* 0x3E62C517 0x9F2828AE */
937 939 +4.70979690551757812500e-01, /* 0x3FDE2488 0x00000000 */
938 940 +2.46670332000468969567e-08, /* 0x3E5A7C6C 0x261CBD8F */
939 941 +4.80688512325286865234e-01, /* 0x3FDEC399 0xC0000000 */
940 942 +1.70204650424422423704e-08, /* 0x3E52468C 0xC0175CEE */
941 943 +4.90303933620452880859e-01, /* 0x3FDF6123 0xC0000000 */
942 944 +5.44247409572909703749e-08, /* 0x3E6D3814 0x5630A2B6 */
943 945 +4.99827861785888671875e-01, /* 0x3FDFFD2E 0x00000000 */
944 946 +7.77056065794633071345e-09, /* 0x3E40AFE9 0x30AB2FA0 */
945 947 +5.09261846542358398438e-01, /* 0x3FE04BDF 0x80000000 */
946 948 +5.52474495483665749052e-08, /* 0x3E6DA926 0xD265FCC1 */
947 949 +5.18607735633850097656e-01, /* 0x3FE0986F 0x40000000 */
948 950 +2.85741955344967264536e-08, /* 0x3E5EAE6A 0x41723FB5 */
949 951 +5.27867078781127929688e-01, /* 0x3FE0E449 0x80000000 */
950 952 +1.08397144554263914271e-08, /* 0x3E474732 0x2FDBAB97 */
951 953 +5.37041425704956054688e-01, /* 0x3FE12F71 0x80000000 */
952 954 +4.01919275998792285777e-08, /* 0x3E6593EF 0xBC530123 */
953 955 +5.46132385730743408203e-01, /* 0x3FE179EA 0xA0000000 */
954 956 +5.18673922421792693237e-08, /* 0x3E6BD899 0xA0BFC60E */
955 957 +5.55141448974609375000e-01, /* 0x3FE1C3B8 0x00000000 */
956 958 +5.85658922177154808539e-08, /* 0x3E6F713C 0x24BC94F9 */
957 959 +5.64070105552673339844e-01, /* 0x3FE20CDC 0xC0000000 */
958 960 +3.27321296262276338905e-08, /* 0x3E6192AB 0x6D93503D */
959 961 +5.72919726371765136719e-01, /* 0x3FE2555B 0xC0000000 */
960 962 +2.71900203723740076878e-08, /* 0x3E5D31EF 0x96780876 */
961 963 +5.81691682338714599609e-01, /* 0x3FE29D37 0xE0000000 */
962 964 +5.72959078829112371070e-08, /* 0x3E6EC2B0 0x8AC85CD7 */
963 965 +5.90387403964996337891e-01, /* 0x3FE2E474 0x20000000 */
964 966 +4.26371800367512948470e-08, /* 0x3E66E402 0x68405422 */
965 967 +5.99008142948150634766e-01, /* 0x3FE32B13 0x20000000 */
966 968 +4.66979327646159769249e-08, /* 0x3E69121D 0x71320557 */
967 969 +6.07555210590362548828e-01, /* 0x3FE37117 0xA0000000 */
968 970 +3.96341792466729582847e-08, /* 0x3E654747 0xB5C5DD02 */
969 971 +6.16029858589172363281e-01, /* 0x3FE3B684 0x40000000 */
970 972 +1.86263416563663175432e-08, /* 0x3E53FFF8 0x455F1DBE */
971 973 +6.24433279037475585938e-01, /* 0x3FE3FB5B 0x80000000 */
972 974 +8.97441791510503832111e-09, /* 0x3E4345BD 0x096D3A75 */
973 975 +6.32766664028167724609e-01, /* 0x3FE43F9F 0xE0000000 */
974 976 +5.54287010493641158796e-09, /* 0x3E37CE73 0x3BD393DD */
975 977 +6.41031146049499511719e-01, /* 0x3FE48353 0xC0000000 */
976 978 +3.33714317793368531132e-08, /* 0x3E61EA88 0xDF73D5E9 */
977 979 +6.49227917194366455078e-01, /* 0x3FE4C679 0xA0000000 */
978 980 +2.94307433638127158696e-08, /* 0x3E5F99DC 0x7362D1DA */
979 981 +6.57358050346374511719e-01, /* 0x3FE50913 0xC0000000 */
980 982 +2.23619855184231409785e-08, /* 0x3E5802D0 0xD6979675 */
981 983 +6.65422618389129638672e-01, /* 0x3FE54B24 0x60000000 */
982 984 +1.41559608102782173188e-08, /* 0x3E4E6652 0x5EA4550A */
↓ open down ↓ |
156 lines elided |
↑ open up ↑ |
983 985 +6.73422634601593017578e-01, /* 0x3FE58CAD 0xA0000000 */
984 986 +4.06105737027198329700e-08, /* 0x3E65CD79 0x893092F2 */
985 987 +6.81359171867370605469e-01, /* 0x3FE5CDB1 0xC0000000 */
986 988 +5.29405324634793230630e-08, /* 0x3E6C6C17 0x648CF6E4 */
987 989 +6.89233243465423583984e-01, /* 0x3FE60E32 0xE0000000 */
988 990 +3.77733853963405370102e-08, /* 0x3E644788 0xD8CA7C89 */
989 991 };
990 992
991 993 /* S[j],S_trail[j] = 2**(j/32.) for the final computation of exp(t+w) */
992 994 static const double S[] = {
993 - +1.00000000000000000000e+00, /* 3FF0000000000000 */
994 - +1.02189714865411662714e+00, /* 3FF059B0D3158574 */
995 - +1.04427378242741375480e+00, /* 3FF0B5586CF9890F */
996 - +1.06714040067682369717e+00, /* 3FF11301D0125B51 */
997 - +1.09050773266525768967e+00, /* 3FF172B83C7D517B */
998 - +1.11438674259589243221e+00, /* 3FF1D4873168B9AA */
999 - +1.13878863475669156458e+00, /* 3FF2387A6E756238 */
1000 - +1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */
1001 - +1.18920711500272102690e+00, /* 3FF306FE0A31B715 */
1002 - +1.21524735998046895524e+00, /* 3FF371A7373AA9CB */
1003 - +1.24185781207348400201e+00, /* 3FF3DEA64C123422 */
1004 - +1.26905095719173321989e+00, /* 3FF44E086061892D */
1005 - +1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */
1006 - +1.32523664315974132322e+00, /* 3FF5342B569D4F82 */
1007 - +1.35425554693689265129e+00, /* 3FF5AB07DD485429 */
1008 - +1.38390988196383202258e+00, /* 3FF6247EB03A5585 */
1009 - +1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */
1010 - +1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */
1011 - +1.47682614593949934623e+00, /* 3FF7A11473EB0187 */
1012 - +1.50916442759342284141e+00, /* 3FF82589994CCE13 */
1013 - +1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */
1014 - +1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */
1015 - +1.61049033194925428347e+00, /* 3FF9C49182A3F090 */
1016 - +1.64575547815396494578e+00, /* 3FFA5503B23E255D */
1017 - +1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */
1018 - +1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */
1019 - +1.75625216037329945351e+00, /* 3FFC199BDD85529C */
1020 - +1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */
1021 - +1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */
1022 - +1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */
1023 - +1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */
1024 - +1.95714412417540017941e+00, /* 3FFF50765B6E4540 */
995 + +1.00000000000000000000e+00, /* 3FF0000000000000 */
996 + +1.02189714865411662714e+00, /* 3FF059B0D3158574 */
997 + +1.04427378242741375480e+00, /* 3FF0B5586CF9890F */
998 + +1.06714040067682369717e+00, /* 3FF11301D0125B51 */
999 + +1.09050773266525768967e+00, /* 3FF172B83C7D517B */
1000 + +1.11438674259589243221e+00, /* 3FF1D4873168B9AA */
1001 + +1.13878863475669156458e+00, /* 3FF2387A6E756238 */
1002 + +1.16372485877757747552e+00, /* 3FF29E9DF51FDEE1 */
1003 + +1.18920711500272102690e+00, /* 3FF306FE0A31B715 */
1004 + +1.21524735998046895524e+00, /* 3FF371A7373AA9CB */
1005 + +1.24185781207348400201e+00, /* 3FF3DEA64C123422 */
1006 + +1.26905095719173321989e+00, /* 3FF44E086061892D */
1007 + +1.29683955465100964055e+00, /* 3FF4BFDAD5362A27 */
1008 + +1.32523664315974132322e+00, /* 3FF5342B569D4F82 */
1009 + +1.35425554693689265129e+00, /* 3FF5AB07DD485429 */
1010 + +1.38390988196383202258e+00, /* 3FF6247EB03A5585 */
1011 + +1.41421356237309514547e+00, /* 3FF6A09E667F3BCD */
1012 + +1.44518080697704665027e+00, /* 3FF71F75E8EC5F74 */
1013 + +1.47682614593949934623e+00, /* 3FF7A11473EB0187 */
1014 + +1.50916442759342284141e+00, /* 3FF82589994CCE13 */
1015 + +1.54221082540794074411e+00, /* 3FF8ACE5422AA0DB */
1016 + +1.57598084510788649659e+00, /* 3FF93737B0CDC5E5 */
1017 + +1.61049033194925428347e+00, /* 3FF9C49182A3F090 */
1018 + +1.64575547815396494578e+00, /* 3FFA5503B23E255D */
1019 + +1.68179283050742900407e+00, /* 3FFAE89F995AD3AD */
1020 + +1.71861929812247793414e+00, /* 3FFB7F76F2FB5E47 */
1021 + +1.75625216037329945351e+00, /* 3FFC199BDD85529C */
1022 + +1.79470907500310716820e+00, /* 3FFCB720DCEF9069 */
1023 + +1.83400808640934243066e+00, /* 3FFD5818DCFBA487 */
1024 + +1.87416763411029996256e+00, /* 3FFDFC97337B9B5F */
1025 + +1.91520656139714740007e+00, /* 3FFEA4AFA2A490DA */
1026 + +1.95714412417540017941e+00, /* 3FFF50765B6E4540 */
1025 1027 };
1026 1028
1027 1029 static const double S_trail[] = {
1028 1030 +0.00000000000000000000e+00,
1029 - +5.10922502897344389359e-17, /* 3C8D73E2A475B465 */
1030 - +8.55188970553796365958e-17, /* 3C98A62E4ADC610A */
1031 - -7.89985396684158212226e-17, /* BC96C51039449B3A */
1032 - -3.04678207981247114697e-17, /* BC819041B9D78A76 */
1033 - +1.04102784568455709549e-16, /* 3C9E016E00A2643C */
1034 - +8.91281267602540777782e-17, /* 3C99B07EB6C70573 */
1035 - +3.82920483692409349872e-17, /* 3C8612E8AFAD1255 */
1036 - +3.98201523146564611098e-17, /* 3C86F46AD23182E4 */
1037 - -7.71263069268148813091e-17, /* BC963AEABF42EAE2 */
1038 - +4.65802759183693679123e-17, /* 3C8ADA0911F09EBC */
1039 - +2.66793213134218609523e-18, /* 3C489B7A04EF80D0 */
1040 - +2.53825027948883149593e-17, /* 3C7D4397AFEC42E2 */
1041 - -2.85873121003886075697e-17, /* BC807ABE1DB13CAC */
1042 - +7.70094837980298946162e-17, /* 3C96324C054647AD */
1043 - -6.77051165879478628716e-17, /* BC9383C17E40B497 */
1044 - -9.66729331345291345105e-17, /* BC9BDD3413B26456 */
1045 - -3.02375813499398731940e-17, /* BC816E4786887A99 */
1046 - -3.48399455689279579579e-17, /* BC841577EE04992F */
1047 - -1.01645532775429503911e-16, /* BC9D4C1DD41532D8 */
1048 - +7.94983480969762085616e-17, /* 3C96E9F156864B27 */
1049 - -1.01369164712783039808e-17, /* BC675FC781B57EBC */
1050 - +2.47071925697978878522e-17, /* 3C7C7C46B071F2BE */
1051 - -1.01256799136747726038e-16, /* BC9D2F6EDB8D41E1 */
1052 - +8.19901002058149652013e-17, /* 3C97A1CD345DCC81 */
1053 - -1.85138041826311098821e-17, /* BC75584F7E54AC3B */
1054 - +2.96014069544887330703e-17, /* 3C811065895048DD */
1055 - +1.82274584279120867698e-17, /* 3C7503CBD1E949DB */
1056 - +3.28310722424562658722e-17, /* 3C82ED02D75B3706 */
1057 - -6.12276341300414256164e-17, /* BC91A5CD4F184B5C */
1058 - -1.06199460561959626376e-16, /* BC9E9C23179C2893 */
1059 - +8.96076779103666776760e-17, /* 3C99D3E12DD8A18B */
1031 + +5.10922502897344389359e-17, /* 3C8D73E2A475B465 */
1032 + +8.55188970553796365958e-17, /* 3C98A62E4ADC610A */
1033 + -7.89985396684158212226e-17, /* BC96C51039449B3A */
1034 + -3.04678207981247114697e-17, /* BC819041B9D78A76 */
1035 + +1.04102784568455709549e-16, /* 3C9E016E00A2643C */
1036 + +8.91281267602540777782e-17, /* 3C99B07EB6C70573 */
1037 + +3.82920483692409349872e-17, /* 3C8612E8AFAD1255 */
1038 + +3.98201523146564611098e-17, /* 3C86F46AD23182E4 */
1039 + -7.71263069268148813091e-17, /* BC963AEABF42EAE2 */
1040 + +4.65802759183693679123e-17, /* 3C8ADA0911F09EBC */
1041 + +2.66793213134218609523e-18, /* 3C489B7A04EF80D0 */
1042 + +2.53825027948883149593e-17, /* 3C7D4397AFEC42E2 */
1043 + -2.85873121003886075697e-17, /* BC807ABE1DB13CAC */
1044 + +7.70094837980298946162e-17, /* 3C96324C054647AD */
1045 + -6.77051165879478628716e-17, /* BC9383C17E40B497 */
1046 + -9.66729331345291345105e-17, /* BC9BDD3413B26456 */
1047 + -3.02375813499398731940e-17, /* BC816E4786887A99 */
1048 + -3.48399455689279579579e-17, /* BC841577EE04992F */
1049 + -1.01645532775429503911e-16, /* BC9D4C1DD41532D8 */
1050 + +7.94983480969762085616e-17, /* 3C96E9F156864B27 */
1051 + -1.01369164712783039808e-17, /* BC675FC781B57EBC */
1052 + +2.47071925697978878522e-17, /* 3C7C7C46B071F2BE */
1053 + -1.01256799136747726038e-16, /* BC9D2F6EDB8D41E1 */
1054 + +8.19901002058149652013e-17, /* 3C97A1CD345DCC81 */
1055 + -1.85138041826311098821e-17, /* BC75584F7E54AC3B */
1056 + +2.96014069544887330703e-17, /* 3C811065895048DD */
1057 + +1.82274584279120867698e-17, /* 3C7503CBD1E949DB */
1058 + +3.28310722424562658722e-17, /* 3C82ED02D75B3706 */
1059 + -6.12276341300414256164e-17, /* BC91A5CD4F184B5C */
1060 + -1.06199460561959626376e-16, /* BC9E9C23179C2893 */
1061 + +8.96076779103666776760e-17, /* 3C99D3E12DD8A18B */
1060 1062 };
1061 1063
1062 1064 /* Primary interval GTi() */
1063 1065 static const double cr[] = {
1064 1066 /* p1, q1 */
1065 1067 +0.70908683619977797008004927192814648151397705078125000,
1066 1068 +1.71987061393048558089579513384356441668351720061e-0001,
1067 1069 -3.19273345791990970293320316122813960527705450671e-0002,
1068 1070 +8.36172645419110036267169600390549973563534476989e-0003,
1069 1071 +1.13745336648572838333152213474277971244629758101e-0003,
1070 1072 +1.0,
1071 1073 +9.71980217826032937526460731778472389791321968082e-0001,
1072 1074 -7.43576743326756176594084137256042653497087666030e-0002,
1073 1075 -1.19345944932265559769719470515102012246995255372e-0001,
1074 1076 +1.59913445751425002620935120470781382215050284762e-0002,
1075 1077 +1.12601136853374984566572691306402321911547550783e-0003,
1076 1078 /* p2, q2 */
1077 1079 +0.42848681585558601181418225678498856723308563232421875,
1078 1080 +6.53596762668970816023718845105667418483122103629e-0002,
1079 1081 -6.97280829631212931321050770925128264272768936731e-0003,
1080 1082 +6.46342359021981718947208605674813260166116632899e-0003,
1081 1083 +1.0,
1082 1084 +4.57572620560506047062553957454062012327519313936e-0001,
1083 1085 -2.52182594886075452859655003407796103083422572036e-0001,
1084 1086 -1.82970945407778594681348166040103197178711552827e-0002,
1085 1087 +2.43574726993169566475227642128830141304953840502e-0002,
1086 1088 -5.20390406466942525358645957564897411258667085501e-0003,
1087 1089 +4.79520251383279837635552431988023256031951133885e-0004,
1088 1090 /* p3, q3 */
1089 1091 +0.382409479734567459008331979930517263710498809814453125,
1090 1092 +1.42876048697668161599069814043449301572928034140e-0001,
1091 1093 +3.42157571052250536817923866013561760785748899071e-0003,
↓ open down ↓ |
22 lines elided |
↑ open up ↑ |
1092 1094 -5.01542621710067521405087887856991700987709272937e-0004,
1093 1095 +8.89285814866740910123834688163838287618332122670e-0004,
1094 1096 +1.0,
1095 1097 +3.04253086629444201002215640948957897906299633168e-0001,
1096 1098 -2.23162407379999477282555672834881213873185520006e-0001,
1097 1099 -1.05060867741952065921809811933670131427552903636e-0002,
1098 1100 +1.70511763916186982473301861980856352005926669320e-0002,
1099 1101 -2.12950201683609187927899416700094630764182477464e-0003,
1100 1102 };
1101 1103
1102 -#define P10 cr[0]
1103 -#define P11 cr[1]
1104 -#define P12 cr[2]
1105 -#define P13 cr[3]
1106 -#define P14 cr[4]
1107 -#define Q10 cr[5]
1108 -#define Q11 cr[6]
1109 -#define Q12 cr[7]
1110 -#define Q13 cr[8]
1111 -#define Q14 cr[9]
1112 -#define Q15 cr[10]
1113 -#define P20 cr[11]
1114 -#define P21 cr[12]
1115 -#define P22 cr[13]
1116 -#define P23 cr[14]
1117 -#define Q20 cr[15]
1118 -#define Q21 cr[16]
1119 -#define Q22 cr[17]
1120 -#define Q23 cr[18]
1121 -#define Q24 cr[19]
1122 -#define Q25 cr[20]
1123 -#define Q26 cr[21]
1124 -#define P30 cr[22]
1125 -#define P31 cr[23]
1126 -#define P32 cr[24]
1127 -#define P33 cr[25]
1128 -#define P34 cr[26]
1129 -#define Q30 cr[27]
1130 -#define Q31 cr[28]
1131 -#define Q32 cr[29]
1132 -#define Q33 cr[30]
1133 -#define Q34 cr[31]
1134 -#define Q35 cr[32]
1104 +#define P10 cr[0]
1105 +#define P11 cr[1]
1106 +#define P12 cr[2]
1107 +#define P13 cr[3]
1108 +#define P14 cr[4]
1109 +#define Q10 cr[5]
1110 +#define Q11 cr[6]
1111 +#define Q12 cr[7]
1112 +#define Q13 cr[8]
1113 +#define Q14 cr[9]
1114 +#define Q15 cr[10]
1115 +#define P20 cr[11]
1116 +#define P21 cr[12]
1117 +#define P22 cr[13]
1118 +#define P23 cr[14]
1119 +#define Q20 cr[15]
1120 +#define Q21 cr[16]
1121 +#define Q22 cr[17]
1122 +#define Q23 cr[18]
1123 +#define Q24 cr[19]
1124 +#define Q25 cr[20]
1125 +#define Q26 cr[21]
1126 +#define P30 cr[22]
1127 +#define P31 cr[23]
1128 +#define P32 cr[24]
1129 +#define P33 cr[25]
1130 +#define P34 cr[26]
1131 +#define Q30 cr[27]
1132 +#define Q31 cr[28]
1133 +#define Q32 cr[29]
1134 +#define Q33 cr[30]
1135 +#define Q34 cr[31]
1136 +#define Q35 cr[32]
1135 1137
1136 -static const double
1137 - GZ1_h = +0.938204627909682398190,
1138 +static const double GZ1_h = +0.938204627909682398190,
1138 1139 GZ1_l = +5.121952600248205157935e-17,
1139 1140 GZ2_h = +0.885603194410888749921,
1140 1141 GZ2_l = -4.964236872556339810692e-17,
1141 1142 GZ3_h = +0.936781411463652347038,
1142 1143 GZ3_l = -2.541923110834479415023e-17,
1143 1144 TZ1 = -0.3517214357852935791015625,
1144 1145 TZ3 = +0.280530631542205810546875;
1145 -/* INDENT ON */
1146 1146
1147 -/* compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845] */
1148 -/* assume yh got 20 significant bits */
1147 +/*
1148 + * compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845]
1149 + * assume yh got 20 significant bits
1150 + */
1149 1151 static struct Double
1150 -GT1(double yh, double yl) {
1152 +GT1(double yh, double yl)
1153 +{
1151 1154 double t3, t4, y, z;
1152 1155 struct Double r;
1153 1156
1154 1157 y = yh + yl;
1155 1158 z = y * y;
1156 - t3 = (z * (P10 + y * ((P11 + y * P12) + z * (P13 + y * P14)))) /
1157 - (Q10 + y * ((Q11 + y * Q12) + z * ((Q13 + Q14 * y) + z * Q15)));
1159 + t3 = (z * (P10 + y * ((P11 + y * P12) + z * (P13 + y * P14)))) / (Q10 +
1160 + y * ((Q11 + y * Q12) + z * ((Q13 + Q14 * y) + z * Q15)));
1158 1161 t3 += (TZ1 * yl + GZ1_l);
1159 1162 t4 = TZ1 * yh;
1160 - r.h = (double) ((float) (t4 + GZ1_h + t3));
1163 + r.h = (double)((float)(t4 + GZ1_h + t3));
1161 1164 t3 += (t4 - (r.h - GZ1_h));
1162 1165 r.l = t3;
1163 1166 return (r);
1164 1167 }
1165 1168
1166 -/* compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374] */
1167 -/* assume yh got 20 significant bits */
1169 +/*
1170 + * compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374]
1171 + * assume yh got 20 significant bits
1172 + */
1168 1173 static struct Double
1169 -GT2(double yh, double yl) {
1174 +GT2(double yh, double yl)
1175 +{
1170 1176 double t3, y, z;
1171 1177 struct Double r;
1172 1178
1173 1179 y = yh + yl;
1174 1180 z = y * y;
1175 - t3 = (z * (P20 + y * P21 + z * (P22 + y * P23))) /
1176 - (Q20 + (y * ((Q21 + Q22 * y) + z * Q23) +
1177 - (z * z) * ((Q24 + Q25 * y) + z * Q26))) + GZ2_l;
1178 - r.h = (double) ((float) (GZ2_h + t3));
1181 + t3 = (z * (P20 + y * P21 + z * (P22 + y * P23))) / (Q20 + (y * ((Q21 +
1182 + Q22 * y) + z * Q23) + (z * z) * ((Q24 + Q25 * y) + z * Q26))) +
1183 + GZ2_l;
1184 + r.h = (double)((float)(GZ2_h + t3));
1179 1185 r.l = t3 - (r.h - GZ2_h);
1180 1186 return (r);
1181 1187 }
1182 1188
1183 -/* compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000] */
1184 -/* assume yh got 20 significant bits */
1189 +/*
1190 + * compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000]
1191 + * assume yh got 20 significant bits
1192 + */
1185 1193 static struct Double
1186 -GT3(double yh, double yl) {
1194 +GT3(double yh, double yl)
1195 +{
1187 1196 double t3, t4, y, z;
1188 1197 struct Double r;
1189 1198
1190 1199 y = yh + yl;
1191 1200 z = y * y;
1192 - t3 = (z * (P30 + y * ((P31 + y * P32) + z * (P33 + y * P34)))) /
1193 - (Q30 + y * ((Q31 + y * Q32) + z * ((Q33 + Q34 * y) + z * Q35)));
1201 + t3 = (z * (P30 + y * ((P31 + y * P32) + z * (P33 + y * P34)))) / (Q30 +
1202 + y * ((Q31 + y * Q32) + z * ((Q33 + Q34 * y) + z * Q35)));
1194 1203 t3 += (TZ3 * yl + GZ3_l);
1195 1204 t4 = TZ3 * yh;
1196 - r.h = (double) ((float) (t4 + GZ3_h + t3));
1205 + r.h = (double)((float)(t4 + GZ3_h + t3));
1197 1206 t3 += (t4 - (r.h - GZ3_h));
1198 1207 r.l = t3;
1199 1208 return (r);
1200 1209 }
1201 1210
1202 -/* INDENT OFF */
1211 +
1203 1212 /*
1204 1213 * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula
1205 1214 * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
1206 1215 * = L1 + L2 + L3,
1207 1216 */
1208 -/* INDENT ON */
1209 1217 static struct Double
1210 -large_gam(double x, int *m) {
1211 - double z, t1, t2, t3, z2, t5, w, y, u, r, z4, v, t24 = 16777216.0,
1212 - p24 = 1.0 / 16777216.0;
1218 +large_gam(double x, int *m)
1219 +{
1220 + double z, t1, t2, t3, z2, t5, w, y, u, r, z4, v, t24 = 16777216.0, p24 =
1221 + 1.0 / 16777216.0;
1213 1222 int n2, j2, k, ix, j;
1214 1223 unsigned lx;
1215 1224 struct Double zz;
1216 1225 double u2, ss_h, ss_l, r_h, w_h, w_l, t4;
1217 1226
1218 -/* INDENT OFF */
1227 +
1219 1228 /*
1220 1229 * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details)
1221 1230 *
1222 1231 * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2,
1223 1232 * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
1224 1233 * T1(n) = T1[2n,2n+1] = n*log(2)-1,
1225 1234 * T2(j) = T2[2j,2j+1] = log(z[j]),
1226 1235 * T3(s) = 2s + A1[0]s^3 + A2[1]s^5 + A3[2]s^7
1227 1236 * Note
1228 1237 * (1) the leading entries are truncated to 24 binary point.
1229 1238 * (2) Remez error for T3(s) is bounded by 2**(-72.4)
1230 1239 * 2**(-24)
1231 1240 * _________V___________________
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
1232 1241 * T1(n): |_________|___________________|
1233 1242 * _______ ______________________
1234 1243 * T2(j): |_______|______________________|
1235 1244 * ____ _______________________
1236 1245 * 2s: |____|_______________________|
1237 1246 * __________________________
1238 1247 * + T3(s)-2s: |__________________________|
1239 1248 * -------------------------------------------
1240 1249 * [leading] + [Trailing]
1241 1250 */
1242 -/* INDENT ON */
1243 1251 ix = __HI(x);
1244 1252 lx = __LO(x);
1245 - n2 = (ix >> 20) - 0x3ff; /* exponent of x, range:3-7 */
1246 - n2 += n2; /* 2n */
1253 + n2 = (ix >> 20) - 0x3ff; /* exponent of x, range:3-7 */
1254 + n2 += n2; /* 2n */
1247 1255 ix = (ix & 0x000fffff) | 0x3ff00000; /* y = scale x to [1,2] */
1248 1256 __HI(y) = ix;
1249 1257 __LO(y) = lx;
1250 1258 __HI(z) = (ix & 0xffffc000) | 0x2000; /* z[j]=1+j/64+1/128 */
1251 1259 __LO(z) = 0;
1252 - j2 = (ix >> 13) & 0x7e; /* 2j */
1260 + j2 = (ix >> 13) & 0x7e; /* 2j */
1253 1261 t1 = y + z;
1254 1262 t2 = y - z;
1255 1263 r = one / t1;
1256 - t1 = (double) ((float) t1);
1257 - u = r * t2; /* u = (y-z)/(y+z) */
1264 + t1 = (double)((float)t1);
1265 + u = r * t2; /* u = (y-z)/(y+z) */
1258 1266 t4 = T2[j2 + 1] + T1[n2 + 1];
1259 1267 z2 = u * u;
1260 1268 k = __HI(u) & 0x7fffffff;
1261 1269 t3 = T2[j2] + T1[n2];
1270 +
1262 1271 if ((k >> 20) < 0x3ec) { /* |u|<2**-19 */
1263 1272 t2 = t4 + u * ((two + z2 * A1) + (z2 * z2) * (A2 + z2 * A3));
1264 1273 } else {
1265 1274 t5 = t4 + u * (z2 * A1 + (z2 * z2) * (A2 + z2 * A3));
1266 1275 u2 = u + u;
1267 - v = (double) ((int) (u2 * t24)) * p24;
1276 + v = (double)((int)(u2 * t24)) * p24;
1268 1277 t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z)));
1269 1278 t3 += v;
1270 1279 }
1271 - ss_h = (double) ((float) (t2 + t3));
1280 +
1281 + ss_h = (double)((float)(t2 + t3));
1272 1282 ss_l = t2 - (ss_h - t3);
1273 1283
1274 1284 /*
1275 1285 * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
1276 1286 * where ss = log(x) - 1 in already in extra precision
1277 1287 */
1278 1288 z = one / x;
1279 1289 r = x - half;
1280 - r_h = (double) ((float) r);
1290 + r_h = (double)((float)r);
1281 1291 w_h = r_h * ss_h + hln2pi_h;
1282 1292 z2 = z * z;
1283 1293 w = (r - r_h) * ss_h + r * ss_l;
1284 1294 z4 = z2 * z2;
1285 1295 t1 = z2 * (GP1 + z4 * (GP3 + z4 * (GP5 + z4 * GP7)));
1286 1296 t2 = z4 * (GP2 + z4 * (GP4 + z4 * GP6));
1287 1297 t1 += t2;
1288 1298 w += hln2pi_l;
1289 1299 w_l = z * (GP0 + t1) + w;
1290 - k = (int) ((w_h + w_l) * invln2_32 + half);
1300 + k = (int)((w_h + w_l) * invln2_32 + half);
1291 1301
1292 1302 /* compute the exponential of w_h+w_l */
1293 1303 j = k & 0x1f;
1294 1304 *m = (k >> 5);
1295 - t3 = (double) k;
1305 + t3 = (double)k;
1296 1306
1297 1307 /* perform w - k*ln2_32 (represent as w_h - w_l) */
1298 1308 t1 = w_h - t3 * ln2_32hi;
1299 1309 t2 = t3 * ln2_32lo;
1300 1310 w = w_l - t2;
1301 1311 w_h = t1 + w_l;
1302 1312 w_l = t2 - (w_l - (w_h - t1));
1303 1313
1304 1314 /* compute exp(w_h+w_l) */
1305 1315 z = w_h - w_l;
1306 1316 z2 = z * z;
1307 1317 t1 = z2 * (Et1 + z2 * (Et3 + z2 * Et5));
1308 1318 t2 = z2 * (Et2 + z2 * Et4);
1309 1319 t3 = w_h - (w_l - (t1 + z * t2));
1310 1320 zz.l = S_trail[j] * (one + t3) + S[j] * t3;
1311 1321 zz.h = S[j];
1312 1322 return (zz);
1313 1323 }
1314 1324
1315 -/* INDENT OFF */
1325 +
1316 1326 /*
1317 1327 * kpsin(x)= sin(pi*x)/pi
1318 1328 * 3 5 7 9 11 13 15
1319 1329 * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x +ks[5]*x +ks[6]*x
1320 1330 */
1321 1331 static const double ks[] = {
1322 1332 -1.64493406684822640606569,
1323 1333 +8.11742425283341655883668741874008920850698590621e-0001,
1324 1334 -1.90751824120862873825597279118304943994042258291e-0001,
1325 1335 +2.61478477632554278317289628332654539353521911570e-0002,
1326 1336 -2.34607978510202710377617190278735525354347705866e-0003,
1327 1337 +1.48413292290051695897242899977121846763824221705e-0004,
1328 1338 -6.87730769637543488108688726777687262485357072242e-0006,
1329 1339 };
1330 -/* INDENT ON */
1331 1340
1332 1341 /* assume x is not tiny and positive */
1333 1342 static struct Double
1334 -kpsin(double x) {
1343 +kpsin(double x)
1344 +{
1335 1345 double z, t1, t2, t3, t4;
1336 1346 struct Double xx;
1337 1347
1338 1348 z = x * x;
1339 1349 xx.h = x;
1340 1350 t1 = z * x;
1341 1351 t2 = z * z;
1342 1352 t4 = t1 * ks[0];
1343 - t3 = (t1 * z) * ((ks[1] + z * ks[2] + t2 * ks[3]) + (z * t2) *
1344 - (ks[4] + z * ks[5] + t2 * ks[6]));
1353 + t3 = (t1 * z) * ((ks[1] + z * ks[2] + t2 * ks[3]) + (z * t2) * (ks[4] +
1354 + z * ks[5] + t2 * ks[6]));
1345 1355 xx.l = t4 + t3;
1346 1356 return (xx);
1347 1357 }
1348 1358
1349 -/* INDENT OFF */
1359 +
1350 1360 /*
1351 1361 * kpcos(x)= cos(pi*x)/pi
1352 1362 * 2 4 6 8 10 12
1353 1363 * = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x +kc[5]*x
1354 1364 */
1355 1365
1356 1366 static const double one_pi_h = 0.318309886183790635705292970,
1357 - one_pi_l = 3.583247455607534006714276420e-17;
1367 + one_pi_l = 3.583247455607534006714276420e-17;
1358 1368 static const double npi_2_h = -1.5625,
1359 - npi_2_l = -0.00829632679489661923132169163975055099555883223;
1369 + npi_2_l = -0.00829632679489661923132169163975055099555883223;
1360 1370 static const double kc[] = {
1361 1371 -1.57079632679489661923132169163975055099555883223e+0000,
1362 1372 +1.29192819501230224953283586722575766189551966008e+0000,
1363 1373 -4.25027339940149518500158850753393173519732149213e-0001,
1364 1374 +7.49080625187015312373925142219429422375556727752e-0002,
1365 1375 -8.21442040906099210866977352284054849051348692715e-0003,
1366 1376 +6.10411356829515414575566564733632532333904115968e-0004,
1367 1377 };
1368 -/* INDENT ON */
1369 1378
1370 1379 /* assume x is not tiny and positive */
1371 1380 static struct Double
1372 -kpcos(double x) {
1381 +kpcos(double x)
1382 +{
1373 1383 double z, t1, t2, t3, t4, x4, x8;
1374 1384 struct Double xx;
1375 1385
1376 1386 z = x * x;
1377 1387 xx.h = one_pi_h;
1378 - t1 = (double) ((float) x);
1388 + t1 = (double)((float)x);
1379 1389 x4 = z * z;
1380 1390 t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1);
1381 - t3 = one_pi_l + x4 * ((kc[1] + z * kc[2]) + x4 * (kc[3] + z *
1382 - kc[4] + x4 * kc[5]));
1383 - t4 = t1 * t1; /* 48 bits mantissa */
1391 + t3 = one_pi_l + x4 * ((kc[1] + z * kc[2]) + x4 * (kc[3] + z * kc[4] +
1392 + x4 * kc[5]));
1393 + t4 = t1 * t1; /* 48 bits mantissa */
1384 1394 x8 = t2 + t3;
1385 - t4 *= npi_2_h; /* npi_2_h is 5 bits const. The product is exact */
1386 - xx.l = x8 + t4; /* that will minimized the rounding error in xx.l */
1395 + t4 *= npi_2_h; /* npi_2_h is 5 bits const. The product is exact */
1396 + xx.l = x8 + t4; /* that will minimized the rounding error in xx.l */
1387 1397 return (xx);
1388 1398 }
1389 1399
1390 -/* INDENT OFF */
1391 1400 static const double
1392 - /* 0.134861805732790769689793935774652917006 */
1393 - t0z1 = 0.1348618057327907737708,
1401 +/* 0.134861805732790769689793935774652917006 */
1402 + t0z1 = 0.1348618057327907737708,
1394 1403 t0z1_l = -4.0810077708578299022531e-18,
1395 - /* 0.461632144968362341262659542325721328468 */
1396 - t0z2 = 0.4616321449683623567850,
1404 +/* 0.461632144968362341262659542325721328468 */
1405 + t0z2 = 0.4616321449683623567850,
1397 1406 t0z2_l = -1.5522348162858676890521e-17,
1398 - /* 0.819773101100500601787868704921606996312 */
1399 - t0z3 = 0.8197731011005006118708,
1407 +/* 0.819773101100500601787868704921606996312 */
1408 + t0z3 = 0.8197731011005006118708,
1400 1409 t0z3_l = -1.0082945122487103498325e-17;
1401 - /* 1.134861805732790769689793935774652917006 */
1402 -/* INDENT ON */
1410 +
1411 +/*
1412 + * 1.134861805732790769689793935774652917006
1413 + */
1403 1414
1404 1415 /* gamma(x+i) for 0 <= x < 1 */
1405 1416 static struct Double
1406 -gam_n(int i, double x) {
1407 - struct Double rr = {0.0L, 0.0L}, yy;
1417 +gam_n(int i, double x)
1418 +{
1419 + struct Double rr = { 0.0L, 0.0L }, yy;
1408 1420 double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl;
1409 1421
1410 1422 /* compute yy = gamma(x+1) */
1411 1423 if (x > 0.2845) {
1412 1424 if (x > 0.6374) {
1413 1425 r1 = x - t0z3;
1414 - r2 = (double) ((float) (r1 - t0z3_l));
1426 + r2 = (double)((float)(r1 - t0z3_l));
1415 1427 t2 = r1 - r2;
1416 1428 yy = GT3(r2, t2 - t0z3_l);
1417 1429 } else {
1418 1430 r1 = x - t0z2;
1419 - r2 = (double) ((float) (r1 - t0z2_l));
1431 + r2 = (double)((float)(r1 - t0z2_l));
1420 1432 t2 = r1 - r2;
1421 1433 yy = GT2(r2, t2 - t0z2_l);
1422 1434 }
1423 1435 } else {
1424 1436 r1 = x - t0z1;
1425 - r2 = (double) ((float) (r1 - t0z1_l));
1437 + r2 = (double)((float)(r1 - t0z1_l));
1426 1438 t2 = r1 - r2;
1427 1439 yy = GT1(r2, t2 - t0z1_l);
1428 1440 }
1429 1441
1430 1442 /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
1431 1443 switch (i) {
1432 - case 0: /* yy/x */
1444 + case 0: /* yy/x */
1433 1445 r1 = one / x;
1434 - xh = (double) ((float) x); /* x is not tiny */
1435 - rr.h = (double) ((float) ((yy.h + yy.l) * r1));
1436 - rr.l = r1 * (yy.h - rr.h * xh) -
1437 - ((r1 * rr.h) * (x - xh) - r1 * yy.l);
1446 + xh = (double)((float)x); /* x is not tiny */
1447 + rr.h = (double)((float)((yy.h + yy.l) * r1));
1448 + rr.l = r1 * (yy.h - rr.h * xh) - ((r1 * rr.h) * (x - xh) - r1 *
1449 + yy.l);
1438 1450 break;
1439 - case 1: /* yy */
1451 + case 1: /* yy */
1440 1452 rr.h = yy.h;
1441 1453 rr.l = yy.l;
1442 1454 break;
1443 - case 2: /* (x+1)*yy */
1444 - z = x + one; /* may not be exact */
1445 - zh = (double) ((float) z);
1455 + case 2: /* (x+1)*yy */
1456 + z = x + one; /* may not be exact */
1457 + zh = (double)((float)z);
1446 1458 rr.h = zh * yy.h;
1447 1459 rr.l = z * yy.l + (x - (zh - one)) * yy.h;
1448 1460 break;
1449 - case 3: /* (x+2)*(x+1)*yy */
1461 + case 3: /* (x+2)*(x+1)*yy */
1450 1462 z1 = x + one;
1451 1463 z2 = x + 2.0;
1452 1464 z = z1 * z2;
1453 - xh = (double) ((float) z);
1454 - zh = (double) ((float) z1);
1465 + xh = (double)((float)z);
1466 + zh = (double)((float)z1);
1455 1467 xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one));
1456 1468 rr.h = xh * yy.h;
1457 1469 rr.l = z * yy.l + xl * yy.h;
1458 1470 break;
1459 1471
1460 - case 4: /* (x+1)*(x+3)*(x+2)*yy */
1472 + case 4: /* (x+1)*(x+3)*(x+2)*yy */
1461 1473 z1 = x + 2.0;
1462 1474 z2 = (x + one) * (x + 3.0);
1463 1475 zh = z1;
1464 1476 __LO(zh) = 0;
1465 1477 __HI(zh) &= 0xfffffff8; /* zh 18 bits mantissa */
1466 1478 zl = x - (zh - 2.0);
1467 1479 z = z1 * z2;
1468 - xh = (double) ((float) z);
1480 + xh = (double)((float)z);
1469 1481 xl = zl * (z2 + zh * (z1 + zh)) - (xh - zh * (zh * zh - one));
1470 1482 rr.h = xh * yy.h;
1471 1483 rr.l = z * yy.l + xl * yy.h;
1472 1484 break;
1473 - case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
1485 + case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
1474 1486 z1 = x + 2.0;
1475 1487 z2 = x + 3.0;
1476 1488 z = z1 * z2;
1477 - zh = (double) ((float) z1);
1478 - yh = (double) ((float) z);
1489 + zh = (double)((float)z1);
1490 + yh = (double)((float)z);
1479 1491 yl = (x - (zh - 2.0)) * (z2 + zh) - (yh - zh * (zh + one));
1480 1492 z2 = z - 2.0;
1481 1493 z *= z2;
1482 - xh = (double) ((float) z);
1494 + xh = (double)((float)z);
1483 1495 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1484 1496 rr.h = xh * yy.h;
1485 1497 rr.l = z * yy.l + xl * yy.h;
1486 1498 break;
1487 - case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
1499 + case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
1488 1500 z1 = x + 2.0;
1489 1501 z2 = x + 3.0;
1490 1502 z = z1 * z2;
1491 - zh = (double) ((float) z1);
1492 - yh = (double) ((float) z);
1503 + zh = (double)((float)z1);
1504 + yh = (double)((float)z);
1493 1505 z1 = x - (zh - 2.0);
1494 1506 yl = z1 * (z2 + zh) - (yh - zh * (zh + one));
1495 1507 z2 = z - 2.0;
1496 1508 x5 = x + 5.0;
1497 1509 z *= z2;
1498 - xh = (double) ((float) z);
1510 + xh = (double)((float)z);
1499 1511 zh += 3.0;
1500 1512 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1501 - /* xh+xl=(x+1)*...*(x+4) */
1502 - /* wh+wl=(x+5)*yy */
1503 - wh = (double) ((float) (x5 * (yy.h + yy.l)));
1513 +
1514 + /*
1515 + * xh+xl=(x+1)*...*(x+4)
1516 + * wh+wl=(x+5)*yy
1517 + */
1518 + wh = (double)((float)(x5 * (yy.h + yy.l)));
1504 1519 wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h);
1505 1520 rr.h = wh * xh;
1506 1521 rr.l = z * wl + xl * wh;
1507 1522 break;
1508 - case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
1523 + case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
1509 1524 z1 = x + 3.0;
1510 1525 z2 = x + 4.0;
1511 1526 z = z2 * z1;
1512 - zh = (double) ((float) z1);
1513 - yh = (double) ((float) z); /* yh+yl = (x+3)(x+4) */
1527 + zh = (double)((float)z1);
1528 + yh = (double)((float)z); /* yh+yl = (x+3)(x+4) */
1514 1529 yl = (x - (zh - 3.0)) * (z2 + zh) - (yh - (zh * (zh + one)));
1515 1530 z1 = x + 6.0;
1516 - z2 = z - 2.0; /* z2 = (x+2)*(x+5) */
1531 + z2 = z - 2.0; /* z2 = (x+2)*(x+5) */
1517 1532 z *= z2;
1518 - xh = (double) ((float) z);
1533 + xh = (double)((float)z);
1519 1534 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1520 - /* xh+xl=(x+2)*...*(x+5) */
1521 - /* wh+wl=(x+1)(x+6)*yy */
1522 - z2 -= 4.0; /* z2 = (x+1)(x+6) */
1523 - wh = (double) ((float) (z2 * (yy.h + yy.l)));
1535 +
1536 + /*
1537 + * xh+xl=(x+2)*...*(x+5)
1538 + * wh+wl=(x+1)(x+6)*yy
1539 + */
1540 + z2 -= 4.0; /* z2 = (x+1)(x+6) */
1541 + wh = (double)((float)(z2 * (yy.h + yy.l)));
1524 1542 wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0) * yy.h);
1525 1543 rr.h = wh * xh;
1526 1544 rr.l = z * wl + xl * wh;
1527 1545 }
1546 +
1528 1547 return (rr);
1529 1548 }
1530 1549
1531 1550 double
1532 -tgamma(double x) {
1551 +tgamma(double x)
1552 +{
1533 1553 struct Double ss, ww;
1534 1554 double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5;
1535 1555 int i, j, k, m, ix, hx, xk;
1536 1556 unsigned lx;
1537 1557
1538 1558 hx = __HI(x);
1539 1559 lx = __LO(x);
1540 1560 ix = hx & 0x7fffffff;
1541 1561 y = x;
1542 1562
1543 1563 if (ix < 0x3ca00000)
1544 1564 return (one / x); /* |x| < 2**-53 */
1565 +
1545 1566 if (ix >= 0x7ff00000)
1546 - /* +Inf -> +Inf, -Inf or NaN -> NaN */
1547 - return (x * ((hx < 0)? 0.0 : x));
1548 - if (hx > 0x406573fa || /* x > 171.62... overflow to +inf */
1567 + /* +Inf -> +Inf, -Inf or NaN -> NaN */
1568 + return (x * ((hx < 0) ? 0.0 : x));
1569 +
1570 + if (hx > 0x406573fa || /* x > 171.62... overflow to +inf */
1549 1571 (hx == 0x406573fa && lx > 0xE561F647)) {
1550 1572 z = x / tiny;
1551 1573 return (z * z);
1552 1574 }
1553 - if (hx >= 0x40200000) { /* x >= 8 */
1575 +
1576 + if (hx >= 0x40200000) { /* x >= 8 */
1554 1577 ww = large_gam(x, &m);
1555 1578 w = ww.h + ww.l;
1556 1579 __HI(w) += m << 20;
1557 1580 return (w);
1558 1581 }
1559 - if (hx > 0) { /* 0 < x < 8 */
1560 - i = (int) x;
1561 - ww = gam_n(i, x - (double) i);
1582 +
1583 + if (hx > 0) { /* 0 < x < 8 */
1584 + i = (int)x;
1585 + ww = gam_n(i, x - (double)i);
1562 1586 return (ww.h + ww.l);
1563 1587 }
1564 1588
1565 - /* negative x */
1566 - /* INDENT OFF */
1589 + /*
1590 + * negative x
1591 + */
1592 +
1567 1593 /*
1568 1594 * compute: xk =
1569 1595 * -2 ... x is an even int (-inf is even)
1570 1596 * -1 ... x is an odd int
1571 1597 * +0 ... x is not an int but chopped to an even int
1572 1598 * +1 ... x is not an int but chopped to an odd int
1573 1599 */
1574 - /* INDENT ON */
1575 1600 xk = 0;
1601 +
1576 1602 if (ix >= 0x43300000) {
1577 1603 if (ix >= 0x43400000)
1578 1604 xk = -2;
1579 1605 else
1580 1606 xk = -2 + (lx & 1);
1581 1607 } else if (ix >= 0x3ff00000) {
1582 1608 k = (ix >> 20) - 0x3ff;
1609 +
1583 1610 if (k > 20) {
1584 1611 j = lx >> (52 - k);
1612 +
1585 1613 if ((j << (52 - k)) == lx)
1586 1614 xk = -2 + (j & 1);
1587 1615 else
1588 1616 xk = j & 1;
1589 1617 } else {
1590 1618 j = ix >> (20 - k);
1619 +
1591 1620 if ((j << (20 - k)) == ix && lx == 0)
1592 1621 xk = -2 + (j & 1);
1593 1622 else
1594 1623 xk = j & 1;
1595 1624 }
1596 1625 }
1626 +
1597 1627 if (xk < 0)
1598 1628 /* ideally gamma(-n)= (-1)**(n+1) * inf, but c99 expect NaN */
1599 - return ((x - x) / (x - x)); /* 0/0 = NaN */
1600 -
1629 + return ((x - x) / (x - x)); /* 0/0 = NaN */
1601 1630
1602 1631 /* negative underflow thresold */
1603 1632 if (ix > 0x4066e000 || (ix == 0x4066e000 && lx > 11)) {
1604 1633 /* x < -183.0 - 11ulp */
1605 1634 z = tiny / x;
1635 +
1606 1636 if (xk == 1)
1607 1637 z = -z;
1638 +
1608 1639 return (z * tiny);
1609 1640 }
1610 1641
1611 1642 /* now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */
1612 1643
1613 1644 /*
1614 1645 * First compute ss = -sin(pi*y)/pi , so that
1615 1646 * gamma(x) = 1/(ss*gamma(1+y))
1616 1647 */
1617 1648 y = -x;
1618 - j = (int) y;
1619 - z = y - (double) j;
1620 - if (z > 0.3183098861837906715377675)
1649 + j = (int)y;
1650 + z = y - (double)j;
1651 +
1652 + if (z > 0.3183098861837906715377675) {
1621 1653 if (z > 0.6816901138162093284622325)
1622 1654 ss = kpsin(one - z);
1623 1655 else
1624 1656 ss = kpcos(0.5 - z);
1625 - else
1657 + } else {
1626 1658 ss = kpsin(z);
1659 + }
1660 +
1627 1661 if (xk == 0) {
1628 1662 ss.h = -ss.h;
1629 1663 ss.l = -ss.l;
1630 1664 }
1631 1665
1632 1666 /* Then compute ww = gamma(1+y), note that result scale to 2**m */
1633 1667 m = 0;
1668 +
1634 1669 if (j < 7) {
1635 1670 ww = gam_n(j + 1, z);
1636 1671 } else {
1637 1672 w = y + one;
1673 +
1638 1674 if ((lx & 1) == 0) { /* y+1 exact (note that y<184) */
1639 1675 ww = large_gam(w, &m);
1640 1676 } else {
1641 1677 t = w - one;
1678 +
1642 1679 if (t == y) { /* y+one exact */
1643 1680 ww = large_gam(w, &m);
1644 1681 } else { /* use y*gamma(y) */
1645 1682 if (j == 7)
1646 1683 ww = gam_n(j, z);
1647 1684 else
1648 1685 ww = large_gam(y, &m);
1686 +
1649 1687 t4 = ww.h + ww.l;
1650 - t1 = (double) ((float) y);
1651 - t2 = (double) ((float) t4);
1652 - /* t4 will not be too large */
1688 + t1 = (double)((float)y);
1689 + t2 = (double)((float)t4);
1690 + /* t4 will not be too large */
1653 1691 ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2;
1654 1692 ww.h = t1 * t2;
1655 1693 }
1656 1694 }
1657 1695 }
1658 1696
1659 1697 /* compute 1/(ss*ww) */
1660 1698 t3 = ss.h + ss.l;
1661 1699 t4 = ww.h + ww.l;
1662 - t1 = (double) ((float) t3);
1663 - t2 = (double) ((float) t4);
1700 + t1 = (double)((float)t3);
1701 + t2 = (double)((float)t4);
1664 1702 z1 = ss.l - (t1 - ss.h); /* (t1,z1) = ss */
1665 1703 z2 = ww.l - (t2 - ww.h); /* (t2,z2) = ww */
1666 1704 t3 = t3 * t4; /* t3 = ss*ww */
1667 1705 z3 = one / t3; /* z3 = 1/(ss*ww) */
1668 1706 t5 = t1 * t2;
1669 1707 z5 = z1 * t4 + t1 * z2; /* (t5,z5) = ss*ww */
1670 - t1 = (double) ((float) t3); /* (t1,z1) = ss*ww */
1708 + t1 = (double)((float)t3); /* (t1,z1) = ss*ww */
1671 1709 z1 = z5 - (t1 - t5);
1672 - t2 = (double) ((float) z3); /* leading 1/(ss*ww) */
1710 + t2 = (double)((float)z3); /* leading 1/(ss*ww) */
1673 1711 z2 = z3 * (t2 * z1 - (one - t2 * t1));
1674 1712 z = t2 - z2;
1675 1713
1676 1714 /* check whether z*2**-m underflow */
1677 1715 if (m != 0) {
1678 1716 hx = __HI(z);
1679 1717 i = hx & 0x80000000;
1680 1718 ix = hx ^ i;
1681 1719 j = ix >> 20;
1720 +
1682 1721 if (j > m) {
1683 1722 ix -= m << 20;
1684 1723 __HI(z) = ix ^ i;
1685 1724 } else if ((m - j) > 52) {
1686 1725 /* underflow */
1687 1726 if (xk == 0)
1688 1727 z = -tiny * tiny;
1689 1728 else
1690 1729 z = tiny * tiny;
1691 1730 } else {
1692 1731 /* subnormal */
1693 1732 m -= 60;
1694 1733 t = one;
1695 1734 __HI(t) -= 60 << 20;
1696 1735 ix -= m << 20;
1697 1736 __HI(z) = ix ^ i;
1698 1737 z *= t;
1699 1738 }
1700 1739 }
1740 +
1701 1741 return (z);
1702 1742 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX