Print this page
4853 illumos-gate is not lint-clean when built with openssl 1.0


 220  * support 2*N integer type, then you have to replace every N*N
 221  * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
 222  * and additions which unavoidably results in severe performance
 223  * penalties. Of course provided that the hardware is capable of
 224  * producing 2*N result... That's when you normally start
 225  * considering assembler implementation. However! It should be
 226  * pointed out that some CPUs (most notably Alpha, PowerPC and
 227  * upcoming IA-64 family:-) provide *separate* instruction
 228  * calculating the upper half of the product placing the result
 229  * into a general purpose register. Now *if* the compiler supports
 230  * inline assembler, then it's not impossible to implement the
 231  * "bignum" routines (and have the compiler optimize 'em)
 232  * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
 233  * macro is about:-)
 234  *
 235  *                                      <appro@fy.chalmers.se>
 236  */
 237 # if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
 238 #  if defined(__DECC)
 239 #   include <c_asm.h>
 240 #   define BN_UMULT_HIGH(a,b)   (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
 241 #  elif defined(__GNUC__) && __GNUC__>=2
 242 #   define BN_UMULT_HIGH(a,b)   ({      \
 243         register BN_ULONG ret;          \
 244         asm ("umulh     %1,%2,%0"       \
 245              : "=r"(ret)                \
 246              : "r"(a), "r"(b));         \
 247         ret;                    })
 248 #  endif        /* compiler */
 249 # elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
 250 #  if defined(__GNUC__) && __GNUC__>=2
 251 #   define BN_UMULT_HIGH(a,b)   ({      \
 252         register BN_ULONG ret;          \
 253         asm ("mulhdu    %0,%1,%2"       \
 254              : "=r"(ret)                \
 255              : "r"(a), "r"(b));         \
 256         ret;                    })
 257 #  endif        /* compiler */
 258 # elif (defined(__x86_64) || defined(__x86_64__)) && \
 259        (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
 260 #  if defined(__GNUC__) && __GNUC__>=2
 261 #   define BN_UMULT_HIGH(a,b)   ({      \
 262         register BN_ULONG ret,discard;  \
 263         asm ("mulq      %3"             \
 264              : "=a"(discard),"=d"(ret)  \
 265              : "a"(a), "g"(b)           \
 266              : "cc");                   \
 267         ret;                    })
 268 #   define BN_UMULT_LOHI(low,high,a,b)  \
 269         asm ("mulq      %3"             \
 270                 : "=a"(low),"=d"(high)  \
 271                 : "a"(a),"g"(b)         \
 272                 : "cc");
 273 #  endif
 274 # elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT)
 275 #  if defined(_MSC_VER) && _MSC_VER>=1400
 276     unsigned __int64 __umulh    (unsigned __int64 a,unsigned __int64 b);
 277     unsigned __int64 _umul128   (unsigned __int64 a,unsigned __int64 b,
 278                                  unsigned __int64 *h);
 279 #   pragma intrinsic(__umulh,_umul128)
 280 #   define BN_UMULT_HIGH(a,b)           __umulh((a),(b))
 281 #   define BN_UMULT_LOHI(low,high,a,b)  ((low)=_umul128((a),(b),&(high)))
 282 #  endif
 283 # elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
 284 #  if defined(__GNUC__) && __GNUC__>=2
 285 #   if __GNUC__>=4 && __GNUC_MINOR__>=4 /* "h" constraint is no more since 4.4 */
 286 #     define BN_UMULT_HIGH(a,b)          (((__uint128_t)(a)*(b))>>64)
 287 #     define BN_UMULT_LOHI(low,high,a,b) ({     \
 288         __uint128_t ret=(__uint128_t)(a)*(b);   \
 289         (high)=ret>>64; (low)=ret;         })
 290 #   else
 291 #     define BN_UMULT_HIGH(a,b) ({      \
 292         register BN_ULONG ret;          \
 293         asm ("dmultu    %1,%2"          \
 294              : "=h"(ret)                \
 295              : "r"(a), "r"(b) : "l");   \
 296         ret;                    })
 297 #     define BN_UMULT_LOHI(low,high,a,b)\
 298         asm ("dmultu    %2,%3"          \
 299              : "=l"(low),"=h"(high)     \
 300              : "r"(a), "r"(b));
 301 #    endif
 302 #  endif
 303 # endif         /* cpu */
 304 #endif          /* OPENSSL_NO_ASM */
 305 
 306 /*************************************************************
 307  * Using the long long type
 308  */
 309 #define Lw(t)    (((BN_ULONG)(t))&BN_MASK2)
 310 #define Hw(t)    (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
 311 
 312 #ifdef BN_DEBUG_RAND
 313 #define bn_clear_top2max(a) \
 314         { \
 315         int      ind = (a)->dmax - (a)->top; \
 316         BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
 317         for (; ind != 0; ind--) \
 318                 *(++ftl) = 0x0; \




 220  * support 2*N integer type, then you have to replace every N*N
 221  * multiplication with 4 (N/2)*(N/2) accompanied by some shifts
 222  * and additions which unavoidably results in severe performance
 223  * penalties. Of course provided that the hardware is capable of
 224  * producing 2*N result... That's when you normally start
 225  * considering assembler implementation. However! It should be
 226  * pointed out that some CPUs (most notably Alpha, PowerPC and
 227  * upcoming IA-64 family:-) provide *separate* instruction
 228  * calculating the upper half of the product placing the result
 229  * into a general purpose register. Now *if* the compiler supports
 230  * inline assembler, then it's not impossible to implement the
 231  * "bignum" routines (and have the compiler optimize 'em)
 232  * exhibiting "native" performance in C. That's what BN_UMULT_HIGH
 233  * macro is about:-)
 234  *
 235  *                                      <appro@fy.chalmers.se>
 236  */
 237 # if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
 238 #  if defined(__DECC)
 239 #   include <c_asm.h>
 240 #   define BN_UMULT_HIGH(a,b)   (BN_ULONG)__asm__("umulh %a0,%a1,%v0",(a),(b))
 241 #  elif defined(__GNUC__) && __GNUC__>=2
 242 #   define BN_UMULT_HIGH(a,b)   ({      \
 243         register BN_ULONG ret;          \
 244         __asm__ ("umulh %1,%2,%0"       \
 245              : "=r"(ret)                \
 246              : "r"(a), "r"(b));         \
 247         ret;                    })
 248 #  endif        /* compiler */
 249 # elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
 250 #  if defined(__GNUC__) && __GNUC__>=2
 251 #   define BN_UMULT_HIGH(a,b)   ({      \
 252         register BN_ULONG ret;          \
 253         __asm__ ("mulhdu        %0,%1,%2"       \
 254              : "=r"(ret)                \
 255              : "r"(a), "r"(b));         \
 256         ret;                    })
 257 #  endif        /* compiler */
 258 # elif (defined(__x86_64) || defined(__x86_64__)) && \
 259        (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
 260 #  if defined(__GNUC__) && __GNUC__>=2
 261 #   define BN_UMULT_HIGH(a,b)   ({      \
 262         register BN_ULONG ret,discard;  \
 263         __asm__ ("mulq  %3"             \
 264              : "=a"(discard),"=d"(ret)  \
 265              : "a"(a), "g"(b)           \
 266              : "cc");                   \
 267         ret;                    })
 268 #   define BN_UMULT_LOHI(low,high,a,b)  \
 269         __asm__ ("mulq  %3"             \
 270                 : "=a"(low),"=d"(high)  \
 271                 : "a"(a),"g"(b)         \
 272                 : "cc");
 273 #  endif
 274 # elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT)
 275 #  if defined(_MSC_VER) && _MSC_VER>=1400
 276     unsigned __int64 __umulh    (unsigned __int64 a,unsigned __int64 b);
 277     unsigned __int64 _umul128   (unsigned __int64 a,unsigned __int64 b,
 278                                  unsigned __int64 *h);
 279 #   pragma intrinsic(__umulh,_umul128)
 280 #   define BN_UMULT_HIGH(a,b)           __umulh((a),(b))
 281 #   define BN_UMULT_LOHI(low,high,a,b)  ((low)=_umul128((a),(b),&(high)))
 282 #  endif
 283 # elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
 284 #  if defined(__GNUC__) && __GNUC__>=2
 285 #   if __GNUC__>=4 && __GNUC_MINOR__>=4 /* "h" constraint is no more since 4.4 */
 286 #     define BN_UMULT_HIGH(a,b)          (((__uint128_t)(a)*(b))>>64)
 287 #     define BN_UMULT_LOHI(low,high,a,b) ({     \
 288         __uint128_t ret=(__uint128_t)(a)*(b);   \
 289         (high)=ret>>64; (low)=ret;         })
 290 #   else
 291 #     define BN_UMULT_HIGH(a,b) ({      \
 292         register BN_ULONG ret;          \
 293         __asm__ ("dmultu        %1,%2"          \
 294              : "=h"(ret)                \
 295              : "r"(a), "r"(b) : "l");   \
 296         ret;                    })
 297 #     define BN_UMULT_LOHI(low,high,a,b)\
 298         __asm__ ("dmultu        %2,%3"          \
 299              : "=l"(low),"=h"(high)     \
 300              : "r"(a), "r"(b));
 301 #    endif
 302 #  endif
 303 # endif         /* cpu */
 304 #endif          /* OPENSSL_NO_ASM */
 305 
 306 /*************************************************************
 307  * Using the long long type
 308  */
 309 #define Lw(t)    (((BN_ULONG)(t))&BN_MASK2)
 310 #define Hw(t)    (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
 311 
 312 #ifdef BN_DEBUG_RAND
 313 #define bn_clear_top2max(a) \
 314         { \
 315         int      ind = (a)->dmax - (a)->top; \
 316         BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
 317         for (; ind != 0; ind--) \
 318                 *(++ftl) = 0x0; \