145 # if defined(_MSC_VER) || defined(__ICC)
146 # define ROTATE(a,n) _lrotl(a,n)
147 # elif defined(__MWERKS__)
148 # if defined(__POWERPC__)
149 # define ROTATE(a,n) __rlwinm(a,n,0,31)
150 # elif defined(__MC68K__)
151 /* Motorola specific tweak. <appro@fy.chalmers.se> */
152 # define ROTATE(a,n) ( n<24 ? __rol(a,n) : __ror(a,32-n) )
153 # else
154 # define ROTATE(a,n) __rol(a,n)
155 # endif
156 # elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
157 /*
158 * Some GNU C inline assembler templates. Note that these are
159 * rotates by *constant* number of bits! But that's exactly
160 * what we need here...
161 * <appro@fy.chalmers.se>
162 */
163 # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
164 # define ROTATE(a,n) ({ register unsigned int ret; \
165 asm ( \
166 "roll %1,%0" \
167 : "=r"(ret) \
168 : "I"(n), "0"((unsigned int)(a)) \
169 : "cc"); \
170 ret; \
171 })
172 # elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
173 defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
174 # define ROTATE(a,n) ({ register unsigned int ret; \
175 asm ( \
176 "rlwinm %0,%1,%2,0,31" \
177 : "=r"(ret) \
178 : "r"(a), "I"(n)); \
179 ret; \
180 })
181 # elif defined(__s390x__)
182 # define ROTATE(a,n) ({ register unsigned int ret; \
183 asm ("rll %0,%1,%2" \
184 : "=r"(ret) \
185 : "r"(a), "I"(n)); \
186 ret; \
187 })
188 # endif
189 # endif
190 #endif /* PEDANTIC */
191
192 #ifndef ROTATE
193 #define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
194 #endif
195
196 #if defined(DATA_ORDER_IS_BIG_ENDIAN)
197
198 #ifndef PEDANTIC
199 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
200 # if ((defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)) || \
201 (defined(__x86_64) || defined(__x86_64__))
202 # if !defined(B_ENDIAN)
203 /*
204 * This gives ~30-40% performance improvement in SHA-256 compiled
205 * with gcc [on P4]. Well, first macro to be frank. We can pull
206 * this trick on x86* platforms only, because these CPUs can fetch
207 * unaligned data without raising an exception.
208 */
209 # define HOST_c2l(c,l) ({ unsigned int r=*((const unsigned int *)(c)); \
210 asm ("bswapl %0":"=r"(r):"0"(r)); \
211 (c)+=4; (l)=r; })
212 # define HOST_l2c(l,c) ({ unsigned int r=(l); \
213 asm ("bswapl %0":"=r"(r):"0"(r)); \
214 *((unsigned int *)(c))=r; (c)+=4; r; })
215 # endif
216 # endif
217 # endif
218 #endif
219 #if defined(__s390__) || defined(__s390x__)
220 # define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
221 # define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))
222 #endif
223
224 #ifndef HOST_c2l
225 #define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \
226 l|=(((unsigned long)(*((c)++)))<<16), \
227 l|=(((unsigned long)(*((c)++)))<< 8), \
228 l|=(((unsigned long)(*((c)++))) ), \
229 l)
230 #endif
231 #ifndef HOST_l2c
232 #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
233 *((c)++)=(unsigned char)(((l)>>16)&0xff), \
234 *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
235 *((c)++)=(unsigned char)(((l) )&0xff), \
236 l)
237 #endif
238
239 #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
240
241 #ifndef PEDANTIC
242 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
243 # if defined(__s390x__)
244 # define HOST_c2l(c,l) ({ asm ("lrv %0,%1" \
245 :"=d"(l) :"m"(*(const unsigned int *)(c)));\
246 (c)+=4; (l); })
247 # define HOST_l2c(l,c) ({ asm ("strv %1,%0" \
248 :"=m"(*(unsigned int *)(c)) :"d"(l));\
249 (c)+=4; (l); })
250 # endif
251 # endif
252 #endif
253 #if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
254 # ifndef B_ENDIAN
255 /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
256 # define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, l)
257 # define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, l)
258 # endif
259 #endif
260
261 #ifndef HOST_c2l
262 #define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++))) ), \
263 l|=(((unsigned long)(*((c)++)))<< 8), \
264 l|=(((unsigned long)(*((c)++)))<<16), \
265 l|=(((unsigned long)(*((c)++)))<<24), \
266 l)
267 #endif
|
145 # if defined(_MSC_VER) || defined(__ICC)
146 # define ROTATE(a,n) _lrotl(a,n)
147 # elif defined(__MWERKS__)
148 # if defined(__POWERPC__)
149 # define ROTATE(a,n) __rlwinm(a,n,0,31)
150 # elif defined(__MC68K__)
151 /* Motorola specific tweak. <appro@fy.chalmers.se> */
152 # define ROTATE(a,n) ( n<24 ? __rol(a,n) : __ror(a,32-n) )
153 # else
154 # define ROTATE(a,n) __rol(a,n)
155 # endif
156 # elif defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
157 /*
158 * Some GNU C inline assembler templates. Note that these are
159 * rotates by *constant* number of bits! But that's exactly
160 * what we need here...
161 * <appro@fy.chalmers.se>
162 */
163 # if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
164 # define ROTATE(a,n) ({ register unsigned int ret; \
165 __asm__ ( \
166 "roll %1,%0" \
167 : "=r"(ret) \
168 : "I"(n), "0"((unsigned int)(a)) \
169 : "cc"); \
170 ret; \
171 })
172 # elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \
173 defined(__powerpc) || defined(__ppc__) || defined(__powerpc64__)
174 # define ROTATE(a,n) ({ register unsigned int ret; \
175 __asm__ ( \
176 "rlwinm %0,%1,%2,0,31" \
177 : "=r"(ret) \
178 : "r"(a), "I"(n)); \
179 ret; \
180 })
181 # elif defined(__s390x__)
182 # define ROTATE(a,n) ({ register unsigned int ret; \
183 __asm__ ("rll %0,%1,%2" \
184 : "=r"(ret) \
185 : "r"(a), "I"(n)); \
186 ret; \
187 })
188 # endif
189 # endif
190 #endif /* PEDANTIC */
191
192 #ifndef ROTATE
193 #define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n))))
194 #endif
195
196 #if defined(DATA_ORDER_IS_BIG_ENDIAN)
197
198 #ifndef PEDANTIC
199 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
200 # if ((defined(__i386) || defined(__i386__)) && !defined(I386_ONLY)) || \
201 (defined(__x86_64) || defined(__x86_64__))
202 # if !defined(B_ENDIAN)
203 /*
204 * This gives ~30-40% performance improvement in SHA-256 compiled
205 * with gcc [on P4]. Well, first macro to be frank. We can pull
206 * this trick on x86* platforms only, because these CPUs can fetch
207 * unaligned data without raising an exception.
208 */
209 # define HOST_c2l(c,l) ({ unsigned int r=*((const unsigned int *)(c)); \
210 __asm__ ("bswapl %0":"=r"(r):"0"(r)); \
211 (c)+=4; (l)=r; })
212 # define HOST_l2c(l,c) ({ unsigned int r=(l); \
213 __asm__ ("bswapl %0":"=r"(r):"0"(r)); \
214 *((unsigned int *)(c))=r; (c)+=4; r; })
215 # endif
216 # endif
217 # endif
218 #endif
219 #if defined(__s390__) || defined(__s390x__)
220 # define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, (l))
221 # define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, (l))
222 #endif
223
224 #ifndef HOST_c2l
225 #define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \
226 l|=(((unsigned long)(*((c)++)))<<16), \
227 l|=(((unsigned long)(*((c)++)))<< 8), \
228 l|=(((unsigned long)(*((c)++))) ), \
229 l)
230 #endif
231 #ifndef HOST_l2c
232 #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \
233 *((c)++)=(unsigned char)(((l)>>16)&0xff), \
234 *((c)++)=(unsigned char)(((l)>> 8)&0xff), \
235 *((c)++)=(unsigned char)(((l) )&0xff), \
236 l)
237 #endif
238
239 #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN)
240
241 #ifndef PEDANTIC
242 # if defined(__GNUC__) && __GNUC__>=2 && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM)
243 # if defined(__s390x__)
244 # define HOST_c2l(c,l) ({ __asm__ ("lrv %0,%1" \
245 :"=d"(l) :"m"(*(const unsigned int *)(c)));\
246 (c)+=4; (l); })
247 # define HOST_l2c(l,c) ({ __asm__ ("strv %1,%0" \
248 :"=m"(*(unsigned int *)(c)) :"d"(l));\
249 (c)+=4; (l); })
250 # endif
251 # endif
252 #endif
253 #if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
254 # ifndef B_ENDIAN
255 /* See comment in DATA_ORDER_IS_BIG_ENDIAN section. */
256 # define HOST_c2l(c,l) ((l)=*((const unsigned int *)(c)), (c)+=4, l)
257 # define HOST_l2c(l,c) (*((unsigned int *)(c))=(l), (c)+=4, l)
258 # endif
259 #endif
260
261 #ifndef HOST_c2l
262 #define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++))) ), \
263 l|=(((unsigned long)(*((c)++)))<< 8), \
264 l|=(((unsigned long)(*((c)++)))<<16), \
265 l|=(((unsigned long)(*((c)++)))<<24), \
266 l)
267 #endif
|