Print this page
4853 illumos-gate is not lint-clean when built with openssl 1.0
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/openssl/libsunw_crypto/bn/x86_64-gcc.c
+++ new/usr/src/lib/openssl/libsunw_crypto/bn/x86_64-gcc.c
1 -#include "../bn_lcl.h"
1 +#include <bn_lcl.h>
2 2 #if !(defined(__GNUC__) && __GNUC__>=2)
3 -# include "../bn_asm.c" /* kind of dirty hack for Sun Studio */
3 +# include "bn_asm.c" /* kind of dirty hack for Sun Studio */
4 4 #else
5 5 /*
6 6 * x86_64 BIGNUM accelerator version 0.1, December 2002.
7 7 *
8 8 * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
9 9 * project.
10 10 *
11 11 * Rights for redistribution and usage in source and binary forms are
12 12 * granted according to the OpenSSL license. Warranty of any kind is
13 13 * disclaimed.
14 14 *
15 15 * Q. Version 0.1? It doesn't sound like Andy, he used to assign real
16 16 * versions, like 1.0...
17 17 * A. Well, that's because this code is basically a quick-n-dirty
18 18 * proof-of-concept hack. As you can see it's implemented with
19 19 * inline assembler, which means that you're bound to GCC and that
20 20 * there might be enough room for further improvement.
21 21 *
22 22 * Q. Why inline assembler?
23 23 * A. x86_64 features own ABI which I'm not familiar with. This is
24 24 * why I decided to let the compiler take care of subroutine
25 25 * prologue/epilogue as well as register allocation. For reference.
26 26 * Win64 implements different ABI for AMD64, different from Linux.
27 27 *
28 28 * Q. How much faster does it get?
29 29 * A. 'apps/openssl speed rsa dsa' output with no-asm:
30 30 *
31 31 * sign verify sign/s verify/s
32 32 * rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2
33 33 * rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0
34 34 * rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8
35 35 * rsa 4096 bits 0.1155s 0.0018s 8.7 555.6
36 36 * sign verify sign/s verify/s
37 37 * dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3
38 38 * dsa 1024 bits 0.0014s 0.0018s 692.3 559.2
39 39 * dsa 2048 bits 0.0049s 0.0061s 204.7 165.0
40 40 *
41 41 * 'apps/openssl speed rsa dsa' output with this module:
42 42 *
43 43 * sign verify sign/s verify/s
44 44 * rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9
45 45 * rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7
46 46 * rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0
47 47 * rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8
48 48 * sign verify sign/s verify/s
49 49 * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3
50 50 * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4
51 51 * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6
52 52 *
53 53 * For the reference. IA-32 assembler implementation performs
54 54 * very much like 64-bit code compiled with no-asm on the same
55 55 * machine.
56 56 */
57 57
58 58 #ifdef _WIN64
59 59 #define BN_ULONG unsigned long long
60 60 #else
61 61 #define BN_ULONG unsigned long
62 62 #endif
63 63
64 64 #undef mul
↓ open down ↓ |
51 lines elided |
↑ open up ↑ |
65 65 #undef mul_add
66 66 #undef sqr
67 67
68 68 /*
69 69 * "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
70 70 * "g"(0) let the compiler to decide where does it
71 71 * want to keep the value of zero;
72 72 */
73 73 #define mul_add(r,a,word,carry) do { \
74 74 register BN_ULONG high,low; \
75 - asm ("mulq %3" \
75 + __asm__ ("mulq %3" \
76 76 : "=a"(low),"=d"(high) \
77 77 : "a"(word),"m"(a) \
78 78 : "cc"); \
79 - asm ("addq %2,%0; adcq %3,%1" \
79 + __asm__ ("addq %2,%0; adcq %3,%1" \
80 80 : "+r"(carry),"+d"(high)\
81 81 : "a"(low),"g"(0) \
82 82 : "cc"); \
83 - asm ("addq %2,%0; adcq %3,%1" \
83 + __asm__ ("addq %2,%0; adcq %3,%1" \
84 84 : "+m"(r),"+d"(high) \
85 85 : "r"(carry),"g"(0) \
86 86 : "cc"); \
87 87 carry=high; \
88 88 } while (0)
89 89
90 90 #define mul(r,a,word,carry) do { \
91 91 register BN_ULONG high,low; \
92 - asm ("mulq %3" \
92 + __asm__ ("mulq %3" \
93 93 : "=a"(low),"=d"(high) \
94 94 : "a"(word),"g"(a) \
95 95 : "cc"); \
96 - asm ("addq %2,%0; adcq %3,%1" \
96 + __asm__ ("addq %2,%0; adcq %3,%1" \
97 97 : "+r"(carry),"+d"(high)\
98 98 : "a"(low),"g"(0) \
99 99 : "cc"); \
100 100 (r)=carry, carry=high; \
101 101 } while (0)
102 102
103 103 #define sqr(r0,r1,a) \
104 - asm ("mulq %2" \
104 + __asm__ ("mulq %2" \
105 105 : "=a"(r0),"=d"(r1) \
106 106 : "a"(a) \
107 107 : "cc");
108 108
109 109 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
110 110 {
111 111 BN_ULONG c1=0;
112 112
113 113 if (num <= 0) return(c1);
114 114
115 115 while (num&~3)
116 116 {
117 117 mul_add(rp[0],ap[0],w,c1);
118 118 mul_add(rp[1],ap[1],w,c1);
119 119 mul_add(rp[2],ap[2],w,c1);
120 120 mul_add(rp[3],ap[3],w,c1);
121 121 ap+=4; rp+=4; num-=4;
122 122 }
123 123 if (num)
124 124 {
125 125 mul_add(rp[0],ap[0],w,c1); if (--num==0) return c1;
126 126 mul_add(rp[1],ap[1],w,c1); if (--num==0) return c1;
127 127 mul_add(rp[2],ap[2],w,c1); return c1;
128 128 }
129 129
130 130 return(c1);
131 131 }
132 132
133 133 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w)
134 134 {
135 135 BN_ULONG c1=0;
136 136
137 137 if (num <= 0) return(c1);
138 138
139 139 while (num&~3)
140 140 {
141 141 mul(rp[0],ap[0],w,c1);
142 142 mul(rp[1],ap[1],w,c1);
143 143 mul(rp[2],ap[2],w,c1);
144 144 mul(rp[3],ap[3],w,c1);
145 145 ap+=4; rp+=4; num-=4;
146 146 }
147 147 if (num)
148 148 {
149 149 mul(rp[0],ap[0],w,c1); if (--num == 0) return c1;
150 150 mul(rp[1],ap[1],w,c1); if (--num == 0) return c1;
151 151 mul(rp[2],ap[2],w,c1);
152 152 }
153 153 return(c1);
154 154 }
155 155
156 156 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n)
157 157 {
158 158 if (n <= 0) return;
159 159
160 160 while (n&~3)
161 161 {
162 162 sqr(r[0],r[1],a[0]);
163 163 sqr(r[2],r[3],a[1]);
164 164 sqr(r[4],r[5],a[2]);
165 165 sqr(r[6],r[7],a[3]);
166 166 a+=4; r+=8; n-=4;
167 167 }
168 168 if (n)
↓ open down ↓ |
54 lines elided |
↑ open up ↑ |
169 169 {
170 170 sqr(r[0],r[1],a[0]); if (--n == 0) return;
171 171 sqr(r[2],r[3],a[1]); if (--n == 0) return;
172 172 sqr(r[4],r[5],a[2]);
173 173 }
174 174 }
175 175
176 176 BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d)
177 177 { BN_ULONG ret,waste;
178 178
179 - asm ("divq %4"
179 + __asm__ ("divq %4"
180 180 : "=a"(ret),"=d"(waste)
181 181 : "a"(l),"d"(h),"g"(d)
182 182 : "cc");
183 183
184 184 return ret;
185 185 }
186 186
187 187 BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)
188 188 { BN_ULONG ret=0,i=0;
189 189
190 190 if (n <= 0) return 0;
191 191
192 - asm (
192 + __asm__ (
193 193 " subq %2,%2 \n"
194 194 ".p2align 4 \n"
195 195 "1: movq (%4,%2,8),%0 \n"
196 196 " adcq (%5,%2,8),%0 \n"
197 197 " movq %0,(%3,%2,8) \n"
198 198 " leaq 1(%2),%2 \n"
199 199 " loop 1b \n"
200 200 " sbbq %0,%0 \n"
201 201 : "=&a"(ret),"+c"(n),"=&r"(i)
202 202 : "r"(rp),"r"(ap),"r"(bp)
203 203 : "cc"
204 204 );
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
205 205
206 206 return ret&1;
207 207 }
208 208
209 209 #ifndef SIMICS
210 210 BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int n)
211 211 { BN_ULONG ret=0,i=0;
212 212
213 213 if (n <= 0) return 0;
214 214
215 - asm (
215 + __asm__ (
216 216 " subq %2,%2 \n"
217 217 ".p2align 4 \n"
218 218 "1: movq (%4,%2,8),%0 \n"
219 219 " sbbq (%5,%2,8),%0 \n"
220 220 " movq %0,(%3,%2,8) \n"
221 221 " leaq 1(%2),%2 \n"
222 222 " loop 1b \n"
223 223 " sbbq %0,%0 \n"
224 224 : "=&a"(ret),"+c"(n),"=&r"(i)
225 225 : "r"(rp),"r"(ap),"r"(bp)
226 226 : "cc"
227 227 );
228 228
229 229 return ret&1;
230 230 }
231 231 #else
232 232 /* Simics 1.4<7 has buggy sbbq:-( */
233 233 #define BN_MASK2 0xffffffffffffffffL
234 234 BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
235 235 {
236 236 BN_ULONG t1,t2;
237 237 int c=0;
238 238
239 239 if (n <= 0) return((BN_ULONG)0);
240 240
241 241 for (;;)
242 242 {
243 243 t1=a[0]; t2=b[0];
244 244 r[0]=(t1-t2-c)&BN_MASK2;
245 245 if (t1 != t2) c=(t1 < t2);
246 246 if (--n <= 0) break;
247 247
248 248 t1=a[1]; t2=b[1];
249 249 r[1]=(t1-t2-c)&BN_MASK2;
250 250 if (t1 != t2) c=(t1 < t2);
251 251 if (--n <= 0) break;
252 252
253 253 t1=a[2]; t2=b[2];
254 254 r[2]=(t1-t2-c)&BN_MASK2;
255 255 if (t1 != t2) c=(t1 < t2);
256 256 if (--n <= 0) break;
257 257
258 258 t1=a[3]; t2=b[3];
259 259 r[3]=(t1-t2-c)&BN_MASK2;
260 260 if (t1 != t2) c=(t1 < t2);
261 261 if (--n <= 0) break;
262 262
263 263 a+=4;
264 264 b+=4;
265 265 r+=4;
266 266 }
267 267 return(c);
268 268 }
269 269 #endif
270 270
271 271 /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */
272 272 /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
273 273 /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
274 274 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
275 275
276 276 #if 0
277 277 /* original macros are kept for reference purposes */
278 278 #define mul_add_c(a,b,c0,c1,c2) { \
279 279 BN_ULONG ta=(a),tb=(b); \
280 280 t1 = ta * tb; \
281 281 t2 = BN_UMULT_HIGH(ta,tb); \
282 282 c0 += t1; t2 += (c0<t1)?1:0; \
283 283 c1 += t2; c2 += (c1<t2)?1:0; \
284 284 }
285 285
286 286 #define mul_add_c2(a,b,c0,c1,c2) { \
↓ open down ↓ |
61 lines elided |
↑ open up ↑ |
287 287 BN_ULONG ta=(a),tb=(b),t0; \
288 288 t1 = BN_UMULT_HIGH(ta,tb); \
289 289 t0 = ta * tb; \
290 290 t2 = t1+t1; c2 += (t2<t1)?1:0; \
291 291 t1 = t0+t0; t2 += (t1<t0)?1:0; \
292 292 c0 += t1; t2 += (c0<t1)?1:0; \
293 293 c1 += t2; c2 += (c1<t2)?1:0; \
294 294 }
295 295 #else
296 296 #define mul_add_c(a,b,c0,c1,c2) do { \
297 - asm ("mulq %3" \
297 + __asm__ ("mulq %3" \
298 298 : "=a"(t1),"=d"(t2) \
299 299 : "a"(a),"m"(b) \
300 300 : "cc"); \
301 - asm ("addq %2,%0; adcq %3,%1" \
301 + __asm__ ("addq %2,%0; adcq %3,%1" \
302 302 : "+r"(c0),"+d"(t2) \
303 303 : "a"(t1),"g"(0) \
304 304 : "cc"); \
305 - asm ("addq %2,%0; adcq %3,%1" \
305 + __asm__ ("addq %2,%0; adcq %3,%1" \
306 306 : "+r"(c1),"+r"(c2) \
307 307 : "d"(t2),"g"(0) \
308 308 : "cc"); \
309 309 } while (0)
310 310
311 311 #define sqr_add_c(a,i,c0,c1,c2) do { \
312 - asm ("mulq %2" \
312 + __asm__ ("mulq %2" \
313 313 : "=a"(t1),"=d"(t2) \
314 314 : "a"(a[i]) \
315 315 : "cc"); \
316 - asm ("addq %2,%0; adcq %3,%1" \
316 + __asm__ ("addq %2,%0; adcq %3,%1" \
317 317 : "+r"(c0),"+d"(t2) \
318 318 : "a"(t1),"g"(0) \
319 319 : "cc"); \
320 - asm ("addq %2,%0; adcq %3,%1" \
320 + __asm__ ("addq %2,%0; adcq %3,%1" \
321 321 : "+r"(c1),"+r"(c2) \
322 322 : "d"(t2),"g"(0) \
323 323 : "cc"); \
324 324 } while (0)
325 325
326 326 #define mul_add_c2(a,b,c0,c1,c2) do { \
327 - asm ("mulq %3" \
327 + __asm__ ("mulq %3" \
328 328 : "=a"(t1),"=d"(t2) \
329 329 : "a"(a),"m"(b) \
330 330 : "cc"); \
331 - asm ("addq %0,%0; adcq %2,%1" \
331 + __asm__ ("addq %0,%0; adcq %2,%1" \
332 332 : "+d"(t2),"+r"(c2) \
333 333 : "g"(0) \
334 334 : "cc"); \
335 - asm ("addq %0,%0; adcq %2,%1" \
335 + __asm__ ("addq %0,%0; adcq %2,%1" \
336 336 : "+a"(t1),"+d"(t2) \
337 337 : "g"(0) \
338 338 : "cc"); \
339 - asm ("addq %2,%0; adcq %3,%1" \
339 + __asm__ ("addq %2,%0; adcq %3,%1" \
340 340 : "+r"(c0),"+d"(t2) \
341 341 : "a"(t1),"g"(0) \
342 342 : "cc"); \
343 - asm ("addq %2,%0; adcq %3,%1" \
343 + __asm__ ("addq %2,%0; adcq %3,%1" \
344 344 : "+r"(c1),"+r"(c2) \
345 345 : "d"(t2),"g"(0) \
346 346 : "cc"); \
347 347 } while (0)
348 348 #endif
349 349
350 350 #define sqr_add_c2(a,i,j,c0,c1,c2) \
351 351 mul_add_c2((a)[i],(a)[j],c0,c1,c2)
352 352
353 353 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
354 354 {
355 355 BN_ULONG t1,t2;
356 356 BN_ULONG c1,c2,c3;
357 357
358 358 c1=0;
359 359 c2=0;
360 360 c3=0;
361 361 mul_add_c(a[0],b[0],c1,c2,c3);
362 362 r[0]=c1;
363 363 c1=0;
364 364 mul_add_c(a[0],b[1],c2,c3,c1);
365 365 mul_add_c(a[1],b[0],c2,c3,c1);
366 366 r[1]=c2;
367 367 c2=0;
368 368 mul_add_c(a[2],b[0],c3,c1,c2);
369 369 mul_add_c(a[1],b[1],c3,c1,c2);
370 370 mul_add_c(a[0],b[2],c3,c1,c2);
371 371 r[2]=c3;
372 372 c3=0;
373 373 mul_add_c(a[0],b[3],c1,c2,c3);
374 374 mul_add_c(a[1],b[2],c1,c2,c3);
375 375 mul_add_c(a[2],b[1],c1,c2,c3);
376 376 mul_add_c(a[3],b[0],c1,c2,c3);
377 377 r[3]=c1;
378 378 c1=0;
379 379 mul_add_c(a[4],b[0],c2,c3,c1);
380 380 mul_add_c(a[3],b[1],c2,c3,c1);
381 381 mul_add_c(a[2],b[2],c2,c3,c1);
382 382 mul_add_c(a[1],b[3],c2,c3,c1);
383 383 mul_add_c(a[0],b[4],c2,c3,c1);
384 384 r[4]=c2;
385 385 c2=0;
386 386 mul_add_c(a[0],b[5],c3,c1,c2);
387 387 mul_add_c(a[1],b[4],c3,c1,c2);
388 388 mul_add_c(a[2],b[3],c3,c1,c2);
389 389 mul_add_c(a[3],b[2],c3,c1,c2);
390 390 mul_add_c(a[4],b[1],c3,c1,c2);
391 391 mul_add_c(a[5],b[0],c3,c1,c2);
392 392 r[5]=c3;
393 393 c3=0;
394 394 mul_add_c(a[6],b[0],c1,c2,c3);
395 395 mul_add_c(a[5],b[1],c1,c2,c3);
396 396 mul_add_c(a[4],b[2],c1,c2,c3);
397 397 mul_add_c(a[3],b[3],c1,c2,c3);
398 398 mul_add_c(a[2],b[4],c1,c2,c3);
399 399 mul_add_c(a[1],b[5],c1,c2,c3);
400 400 mul_add_c(a[0],b[6],c1,c2,c3);
401 401 r[6]=c1;
402 402 c1=0;
403 403 mul_add_c(a[0],b[7],c2,c3,c1);
404 404 mul_add_c(a[1],b[6],c2,c3,c1);
405 405 mul_add_c(a[2],b[5],c2,c3,c1);
406 406 mul_add_c(a[3],b[4],c2,c3,c1);
407 407 mul_add_c(a[4],b[3],c2,c3,c1);
408 408 mul_add_c(a[5],b[2],c2,c3,c1);
409 409 mul_add_c(a[6],b[1],c2,c3,c1);
410 410 mul_add_c(a[7],b[0],c2,c3,c1);
411 411 r[7]=c2;
412 412 c2=0;
413 413 mul_add_c(a[7],b[1],c3,c1,c2);
414 414 mul_add_c(a[6],b[2],c3,c1,c2);
415 415 mul_add_c(a[5],b[3],c3,c1,c2);
416 416 mul_add_c(a[4],b[4],c3,c1,c2);
417 417 mul_add_c(a[3],b[5],c3,c1,c2);
418 418 mul_add_c(a[2],b[6],c3,c1,c2);
419 419 mul_add_c(a[1],b[7],c3,c1,c2);
420 420 r[8]=c3;
421 421 c3=0;
422 422 mul_add_c(a[2],b[7],c1,c2,c3);
423 423 mul_add_c(a[3],b[6],c1,c2,c3);
424 424 mul_add_c(a[4],b[5],c1,c2,c3);
425 425 mul_add_c(a[5],b[4],c1,c2,c3);
426 426 mul_add_c(a[6],b[3],c1,c2,c3);
427 427 mul_add_c(a[7],b[2],c1,c2,c3);
428 428 r[9]=c1;
429 429 c1=0;
430 430 mul_add_c(a[7],b[3],c2,c3,c1);
431 431 mul_add_c(a[6],b[4],c2,c3,c1);
432 432 mul_add_c(a[5],b[5],c2,c3,c1);
433 433 mul_add_c(a[4],b[6],c2,c3,c1);
434 434 mul_add_c(a[3],b[7],c2,c3,c1);
435 435 r[10]=c2;
436 436 c2=0;
437 437 mul_add_c(a[4],b[7],c3,c1,c2);
438 438 mul_add_c(a[5],b[6],c3,c1,c2);
439 439 mul_add_c(a[6],b[5],c3,c1,c2);
440 440 mul_add_c(a[7],b[4],c3,c1,c2);
441 441 r[11]=c3;
442 442 c3=0;
443 443 mul_add_c(a[7],b[5],c1,c2,c3);
444 444 mul_add_c(a[6],b[6],c1,c2,c3);
445 445 mul_add_c(a[5],b[7],c1,c2,c3);
446 446 r[12]=c1;
447 447 c1=0;
448 448 mul_add_c(a[6],b[7],c2,c3,c1);
449 449 mul_add_c(a[7],b[6],c2,c3,c1);
450 450 r[13]=c2;
451 451 c2=0;
452 452 mul_add_c(a[7],b[7],c3,c1,c2);
453 453 r[14]=c3;
454 454 r[15]=c1;
455 455 }
456 456
457 457 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b)
458 458 {
459 459 BN_ULONG t1,t2;
460 460 BN_ULONG c1,c2,c3;
461 461
462 462 c1=0;
463 463 c2=0;
464 464 c3=0;
465 465 mul_add_c(a[0],b[0],c1,c2,c3);
466 466 r[0]=c1;
467 467 c1=0;
468 468 mul_add_c(a[0],b[1],c2,c3,c1);
469 469 mul_add_c(a[1],b[0],c2,c3,c1);
470 470 r[1]=c2;
471 471 c2=0;
472 472 mul_add_c(a[2],b[0],c3,c1,c2);
473 473 mul_add_c(a[1],b[1],c3,c1,c2);
474 474 mul_add_c(a[0],b[2],c3,c1,c2);
475 475 r[2]=c3;
476 476 c3=0;
477 477 mul_add_c(a[0],b[3],c1,c2,c3);
478 478 mul_add_c(a[1],b[2],c1,c2,c3);
479 479 mul_add_c(a[2],b[1],c1,c2,c3);
480 480 mul_add_c(a[3],b[0],c1,c2,c3);
481 481 r[3]=c1;
482 482 c1=0;
483 483 mul_add_c(a[3],b[1],c2,c3,c1);
484 484 mul_add_c(a[2],b[2],c2,c3,c1);
485 485 mul_add_c(a[1],b[3],c2,c3,c1);
486 486 r[4]=c2;
487 487 c2=0;
488 488 mul_add_c(a[2],b[3],c3,c1,c2);
489 489 mul_add_c(a[3],b[2],c3,c1,c2);
490 490 r[5]=c3;
491 491 c3=0;
492 492 mul_add_c(a[3],b[3],c1,c2,c3);
493 493 r[6]=c1;
494 494 r[7]=c2;
495 495 }
496 496
497 497 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a)
498 498 {
499 499 BN_ULONG t1,t2;
500 500 BN_ULONG c1,c2,c3;
501 501
502 502 c1=0;
503 503 c2=0;
504 504 c3=0;
505 505 sqr_add_c(a,0,c1,c2,c3);
506 506 r[0]=c1;
507 507 c1=0;
508 508 sqr_add_c2(a,1,0,c2,c3,c1);
509 509 r[1]=c2;
510 510 c2=0;
511 511 sqr_add_c(a,1,c3,c1,c2);
512 512 sqr_add_c2(a,2,0,c3,c1,c2);
513 513 r[2]=c3;
514 514 c3=0;
515 515 sqr_add_c2(a,3,0,c1,c2,c3);
516 516 sqr_add_c2(a,2,1,c1,c2,c3);
517 517 r[3]=c1;
518 518 c1=0;
519 519 sqr_add_c(a,2,c2,c3,c1);
520 520 sqr_add_c2(a,3,1,c2,c3,c1);
521 521 sqr_add_c2(a,4,0,c2,c3,c1);
522 522 r[4]=c2;
523 523 c2=0;
524 524 sqr_add_c2(a,5,0,c3,c1,c2);
525 525 sqr_add_c2(a,4,1,c3,c1,c2);
526 526 sqr_add_c2(a,3,2,c3,c1,c2);
527 527 r[5]=c3;
528 528 c3=0;
529 529 sqr_add_c(a,3,c1,c2,c3);
530 530 sqr_add_c2(a,4,2,c1,c2,c3);
531 531 sqr_add_c2(a,5,1,c1,c2,c3);
532 532 sqr_add_c2(a,6,0,c1,c2,c3);
533 533 r[6]=c1;
534 534 c1=0;
535 535 sqr_add_c2(a,7,0,c2,c3,c1);
536 536 sqr_add_c2(a,6,1,c2,c3,c1);
537 537 sqr_add_c2(a,5,2,c2,c3,c1);
538 538 sqr_add_c2(a,4,3,c2,c3,c1);
539 539 r[7]=c2;
540 540 c2=0;
541 541 sqr_add_c(a,4,c3,c1,c2);
542 542 sqr_add_c2(a,5,3,c3,c1,c2);
543 543 sqr_add_c2(a,6,2,c3,c1,c2);
544 544 sqr_add_c2(a,7,1,c3,c1,c2);
545 545 r[8]=c3;
546 546 c3=0;
547 547 sqr_add_c2(a,7,2,c1,c2,c3);
548 548 sqr_add_c2(a,6,3,c1,c2,c3);
549 549 sqr_add_c2(a,5,4,c1,c2,c3);
550 550 r[9]=c1;
551 551 c1=0;
552 552 sqr_add_c(a,5,c2,c3,c1);
553 553 sqr_add_c2(a,6,4,c2,c3,c1);
554 554 sqr_add_c2(a,7,3,c2,c3,c1);
555 555 r[10]=c2;
556 556 c2=0;
557 557 sqr_add_c2(a,7,4,c3,c1,c2);
558 558 sqr_add_c2(a,6,5,c3,c1,c2);
559 559 r[11]=c3;
560 560 c3=0;
561 561 sqr_add_c(a,6,c1,c2,c3);
562 562 sqr_add_c2(a,7,5,c1,c2,c3);
563 563 r[12]=c1;
564 564 c1=0;
565 565 sqr_add_c2(a,7,6,c2,c3,c1);
566 566 r[13]=c2;
567 567 c2=0;
568 568 sqr_add_c(a,7,c3,c1,c2);
569 569 r[14]=c3;
570 570 r[15]=c1;
571 571 }
572 572
573 573 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
574 574 {
575 575 BN_ULONG t1,t2;
576 576 BN_ULONG c1,c2,c3;
577 577
578 578 c1=0;
579 579 c2=0;
580 580 c3=0;
581 581 sqr_add_c(a,0,c1,c2,c3);
582 582 r[0]=c1;
583 583 c1=0;
584 584 sqr_add_c2(a,1,0,c2,c3,c1);
585 585 r[1]=c2;
586 586 c2=0;
587 587 sqr_add_c(a,1,c3,c1,c2);
588 588 sqr_add_c2(a,2,0,c3,c1,c2);
589 589 r[2]=c3;
590 590 c3=0;
591 591 sqr_add_c2(a,3,0,c1,c2,c3);
592 592 sqr_add_c2(a,2,1,c1,c2,c3);
593 593 r[3]=c1;
594 594 c1=0;
595 595 sqr_add_c(a,2,c2,c3,c1);
596 596 sqr_add_c2(a,3,1,c2,c3,c1);
597 597 r[4]=c2;
598 598 c2=0;
599 599 sqr_add_c2(a,3,2,c3,c1,c2);
600 600 r[5]=c3;
601 601 c3=0;
602 602 sqr_add_c(a,3,c1,c2,c3);
603 603 r[6]=c1;
604 604 r[7]=c2;
605 605 }
606 606 #endif
↓ open down ↓ |
253 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX