1 #!/usr/bin/env perl
   3 $flavour = shift;
   4 $output  = shift;
   5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
   7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
   9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  10 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
  11 ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
  12 die "can't locate x86_64-xlate.pl";
  14 open OUT,"| \"$^X\" $xlate $flavour $output";
  15 *STDOUT=*OUT;
  17 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
  18                                  ("%rdi","%rsi","%rdx","%rcx"); # Unix order
  20 print<<___;
  21 .extern         OPENSSL_cpuid_setup
  22 .hidden         OPENSSL_cpuid_setup
  23 .section        .init
  24         call    OPENSSL_cpuid_setup
  26 .hidden OPENSSL_ia32cap_P
  27 .comm   OPENSSL_ia32cap_P,8,4
  29 .text
  31 .globl  OPENSSL_atomic_add
  32 .type   OPENSSL_atomic_add,\@abi-omnipotent
  33 .align  16
  34 OPENSSL_atomic_add:
  35         movl    ($arg1),%eax
  36 .Lspin: leaq    ($arg2,%rax),%r8
  37         .byte   0xf0            # lock
  38         cmpxchgl        %r8d,($arg1)
  39         jne     .Lspin
  40         movl    %r8d,%eax
  41         .byte   0x48,0x98       # cltq/cdqe
  42         ret
  43 .size   OPENSSL_atomic_add,.-OPENSSL_atomic_add
  45 .globl  OPENSSL_rdtsc
  46 .type   OPENSSL_rdtsc,\@abi-omnipotent
  47 .align  16
  48 OPENSSL_rdtsc:
  49         rdtsc
  50         shl     \$32,%rdx
  51         or      %rdx,%rax
  52         ret
  53 .size   OPENSSL_rdtsc,.-OPENSSL_rdtsc
  55 .globl  OPENSSL_ia32_cpuid
  56 .type   OPENSSL_ia32_cpuid,\@abi-omnipotent
  57 .align  16
  58 OPENSSL_ia32_cpuid:
  59         mov     %rbx,%r8                # save %rbx
  61         xor     %eax,%eax
  62         cpuid
  63         mov     %eax,%r11d              # max value for standard query level
  65         xor     %eax,%eax
  66         cmp     \$0x756e6547,%ebx       # "Genu"
  67         setne   %al
  68         mov     %eax,%r9d
  69         cmp     \$0x49656e69,%edx       # "ineI"
  70         setne   %al
  71         or      %eax,%r9d
  72         cmp     \$0x6c65746e,%ecx       # "ntel"
  73         setne   %al
  74         or      %eax,%r9d               # 0 indicates Intel CPU
  75         jz      .Lintel
  77         cmp     \$0x68747541,%ebx       # "Auth"
  78         setne   %al
  79         mov     %eax,%r10d
  80         cmp     \$0x69746E65,%edx       # "enti"
  81         setne   %al
  82         or      %eax,%r10d
  83         cmp     \$0x444D4163,%ecx       # "cAMD"
  84         setne   %al
  85         or      %eax,%r10d              # 0 indicates AMD CPU
  86         jnz     .Lintel
  88         # AMD specific
  89         mov     \$0x80000000,%eax
  90         cpuid
  91         cmp     \$0x80000001,%eax
  92         jb      .Lintel
  93         mov     %eax,%r10d
  94         mov     \$0x80000001,%eax
  95         cpuid
  96         or      %ecx,%r9d
  97         and     \$0x00000801,%r9d       # isolate AMD XOP bit, 1<<11
  99         cmp     \$0x80000008,%r10d
 100         jb      .Lintel
 102         mov     \$0x80000008,%eax
 103         cpuid
 104         movzb   %cl,%r10                # number of cores - 1
 105         inc     %r10                    # number of cores
 107         mov     \$1,%eax
 108         cpuid
 109         bt      \$28,%edx               # test hyper-threading bit
 110         jnc     .Lgeneric
 111         shr     \$16,%ebx               # number of logical processors
 112         cmp     %r10b,%bl
 113         ja      .Lgeneric
 114         and     \$0xefffffff,%edx       # ~(1<<28)
 115         jmp     .Lgeneric
 117 .Lintel:
 118         cmp     \$4,%r11d
 119         mov     \$-1,%r10d
 120         jb      .Lnocacheinfo
 122         mov     \$4,%eax
 123         mov     \$0,%ecx                # query L1D
 124         cpuid
 125         mov     %eax,%r10d
 126         shr     \$14,%r10d
 127         and     \$0xfff,%r10d           # number of cores -1 per L1D
 129 .Lnocacheinfo:
 130         mov     \$1,%eax
 131         cpuid
 132         and     \$0xbfefffff,%edx       # force reserved bits to 0
 133         cmp     \$0,%r9d
 134         jne     .Lnotintel
 135         or      \$0x40000000,%edx       # set reserved bit#30 on Intel CPUs
 136         and     \$15,%ah
 137         cmp     \$15,%ah                # examine Family ID
 138         jne     .Lnotintel
 139         or      \$0x00100000,%edx       # set reserved bit#20 to engage RC4_CHAR
 140 .Lnotintel:
 141         bt      \$28,%edx               # test hyper-threading bit
 142         jnc     .Lgeneric
 143         and     \$0xefffffff,%edx       # ~(1<<28)
 144         cmp     \$0,%r10d
 145         je      .Lgeneric
 147         or      \$0x10000000,%edx       # 1<<28
 148         shr     \$16,%ebx
 149         cmp     \$1,%bl                 # see if cache is shared
 150         ja      .Lgeneric
 151         and     \$0xefffffff,%edx       # ~(1<<28)
 152 .Lgeneric:
 153         and     \$0x00000800,%r9d       # isolate AMD XOP flag
 154         and     \$0xfffff7ff,%ecx
 155         or      %ecx,%r9d               # merge AMD XOP flag
 157         mov     %edx,%r10d              # %r9d:%r10d is copy of %ecx:%edx
 158         bt      \$27,%r9d               # check OSXSAVE bit
 159         jnc     .Lclear_avx
 160         xor     %ecx,%ecx               # XCR0
 161         .byte   0x0f,0x01,0xd0          # xgetbv
 162         and     \$6,%eax                # isolate XMM and YMM state support
 163         cmp     \$6,%eax
 164         je      .Ldone
 165 .Lclear_avx:
 166         mov     \$0xefffe7ff,%eax       # ~(1<<28|1<<12|1<<11)
 167         and     %eax,%r9d               # clear AVX, FMA and AMD XOP bits
 168 .Ldone:
 169         shl     \$32,%r9
 170         mov     %r10d,%eax
 171         mov     %r8,%rbx                # restore %rbx
 172         or      %r9,%rax
 173         ret
 174 .size   OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
 176 .globl  OPENSSL_cleanse
 177 .type   OPENSSL_cleanse,\@abi-omnipotent
 178 .align  16
 179 OPENSSL_cleanse:
 180         xor     %rax,%rax
 181         cmp     \$15,$arg2
 182         jae     .Lot
 183         cmp     \$0,$arg2
 184         je      .Lret
 185 .Little:
 186         mov     %al,($arg1)
 187         sub     \$1,$arg2
 188         lea     1($arg1),$arg1
 189         jnz     .Little
 190 .Lret:
 191         ret
 192 .align  16
 193 .Lot:
 194         test    \$7,$arg1
 195         jz      .Laligned
 196         mov     %al,($arg1)
 197         lea     -1($arg2),$arg2
 198         lea     1($arg1),$arg1
 199         jmp     .Lot
 200 .Laligned:
 201         mov     %rax,($arg1)
 202         lea     -8($arg2),$arg2
 203         test    \$-8,$arg2
 204         lea     8($arg1),$arg1
 205         jnz     .Laligned
 206         cmp     \$0,$arg2
 207         jne     .Little
 208         ret
 209 .size   OPENSSL_cleanse,.-OPENSSL_cleanse
 210 ___
 212 print<<___ if (!$win64);
 213 .globl  OPENSSL_wipe_cpu
 214 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
 215 .align  16
 216 OPENSSL_wipe_cpu:
 217         pxor    %xmm0,%xmm0
 218         pxor    %xmm1,%xmm1
 219         pxor    %xmm2,%xmm2
 220         pxor    %xmm3,%xmm3
 221         pxor    %xmm4,%xmm4
 222         pxor    %xmm5,%xmm5
 223         pxor    %xmm6,%xmm6
 224         pxor    %xmm7,%xmm7
 225         pxor    %xmm8,%xmm8
 226         pxor    %xmm9,%xmm9
 227         pxor    %xmm10,%xmm10
 228         pxor    %xmm11,%xmm11
 229         pxor    %xmm12,%xmm12
 230         pxor    %xmm13,%xmm13
 231         pxor    %xmm14,%xmm14
 232         pxor    %xmm15,%xmm15
 233         xorq    %rcx,%rcx
 234         xorq    %rdx,%rdx
 235         xorq    %rsi,%rsi
 236         xorq    %rdi,%rdi
 237         xorq    %r8,%r8
 238         xorq    %r9,%r9
 239         xorq    %r10,%r10
 240         xorq    %r11,%r11
 241         leaq    8(%rsp),%rax
 242         ret
 243 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
 244 ___
 245 print<<___ if ($win64);
 246 .globl  OPENSSL_wipe_cpu
 247 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
 248 .align  16
 249 OPENSSL_wipe_cpu:
 250         pxor    %xmm0,%xmm0
 251         pxor    %xmm1,%xmm1
 252         pxor    %xmm2,%xmm2
 253         pxor    %xmm3,%xmm3
 254         pxor    %xmm4,%xmm4
 255         pxor    %xmm5,%xmm5
 256         xorq    %rcx,%rcx
 257         xorq    %rdx,%rdx
 258         xorq    %r8,%r8
 259         xorq    %r9,%r9
 260         xorq    %r10,%r10
 261         xorq    %r11,%r11
 262         leaq    8(%rsp),%rax
 263         ret
 264 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
 265 ___
 267 print<<___;
 268 .globl  OPENSSL_ia32_rdrand
 269 .type   OPENSSL_ia32_rdrand,\@abi-omnipotent
 270 .align  16
 271 OPENSSL_ia32_rdrand:
 272         mov     \$8,%ecx
 273 .Loop_rdrand:
 274         rdrand  %rax
 275         jc      .Lbreak_rdrand
 276         loop    .Loop_rdrand
 277 .Lbreak_rdrand:
 278         cmp     \$0,%rax
 279         cmove   %rcx,%rax
 280         ret
 281 .size   OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
 282 ___
 284 close STDOUT;   # flush