1 #!/usr/bin/env perl
   2 
   3 $flavour = shift;
   4 $output  = shift;
   5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
   6 
   7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
   8 
   9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
  10 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
  11 ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
  12 die "can't locate x86_64-xlate.pl";
  13 
  14 open OUT,"| \"$^X\" $xlate $flavour $output";
  15 *STDOUT=*OUT;
  16 
  17 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
  18                                  ("%rdi","%rsi","%rdx","%rcx"); # Unix order
  19 
  20 print<<___;
  21 .extern         OPENSSL_cpuid_setup
  22 .hidden         OPENSSL_cpuid_setup
  23 .extern         illumos_locking_setup
  24 .hidden         illumos_locking_setup
  25 .section        .init
  26         call    illumos_locking_setup
  27         call    OPENSSL_cpuid_setup
  28 
  29 .hidden OPENSSL_ia32cap_P
  30 .comm   OPENSSL_ia32cap_P,8,4
  31 
  32 .text
  33 
  34 .globl  OPENSSL_atomic_add
  35 .type   OPENSSL_atomic_add,\@abi-omnipotent
  36 .align  16
  37 OPENSSL_atomic_add:
  38         movl    ($arg1),%eax
  39 .Lspin: leaq    ($arg2,%rax),%r8
  40         .byte   0xf0            # lock
  41         cmpxchgl        %r8d,($arg1)
  42         jne     .Lspin
  43         movl    %r8d,%eax
  44         .byte   0x48,0x98       # cltq/cdqe
  45         ret
  46 .size   OPENSSL_atomic_add,.-OPENSSL_atomic_add
  47 
  48 .globl  OPENSSL_rdtsc
  49 .type   OPENSSL_rdtsc,\@abi-omnipotent
  50 .align  16
  51 OPENSSL_rdtsc:
  52         rdtsc
  53         shl     \$32,%rdx
  54         or      %rdx,%rax
  55         ret
  56 .size   OPENSSL_rdtsc,.-OPENSSL_rdtsc
  57 
  58 .globl  OPENSSL_ia32_cpuid
  59 .type   OPENSSL_ia32_cpuid,\@abi-omnipotent
  60 .align  16
  61 OPENSSL_ia32_cpuid:
  62         mov     %rbx,%r8                # save %rbx
  63 
  64         xor     %eax,%eax
  65         cpuid
  66         mov     %eax,%r11d              # max value for standard query level
  67 
  68         xor     %eax,%eax
  69         cmp     \$0x756e6547,%ebx       # "Genu"
  70         setne   %al
  71         mov     %eax,%r9d
  72         cmp     \$0x49656e69,%edx       # "ineI"
  73         setne   %al
  74         or      %eax,%r9d
  75         cmp     \$0x6c65746e,%ecx       # "ntel"
  76         setne   %al
  77         or      %eax,%r9d               # 0 indicates Intel CPU
  78         jz      .Lintel
  79 
  80         cmp     \$0x68747541,%ebx       # "Auth"
  81         setne   %al
  82         mov     %eax,%r10d
  83         cmp     \$0x69746E65,%edx       # "enti"
  84         setne   %al
  85         or      %eax,%r10d
  86         cmp     \$0x444D4163,%ecx       # "cAMD"
  87         setne   %al
  88         or      %eax,%r10d              # 0 indicates AMD CPU
  89         jnz     .Lintel
  90 
  91         # AMD specific
  92         mov     \$0x80000000,%eax
  93         cpuid
  94         cmp     \$0x80000001,%eax
  95         jb      .Lintel
  96         mov     %eax,%r10d
  97         mov     \$0x80000001,%eax
  98         cpuid
  99         or      %ecx,%r9d
 100         and     \$0x00000801,%r9d       # isolate AMD XOP bit, 1<<11
 101 
 102         cmp     \$0x80000008,%r10d
 103         jb      .Lintel
 104 
 105         mov     \$0x80000008,%eax
 106         cpuid
 107         movzb   %cl,%r10                # number of cores - 1
 108         inc     %r10                    # number of cores
 109 
 110         mov     \$1,%eax
 111         cpuid
 112         bt      \$28,%edx               # test hyper-threading bit
 113         jnc     .Lgeneric
 114         shr     \$16,%ebx               # number of logical processors
 115         cmp     %r10b,%bl
 116         ja      .Lgeneric
 117         and     \$0xefffffff,%edx       # ~(1<<28)
 118         jmp     .Lgeneric
 119 
 120 .Lintel:
 121         cmp     \$4,%r11d
 122         mov     \$-1,%r10d
 123         jb      .Lnocacheinfo
 124 
 125         mov     \$4,%eax
 126         mov     \$0,%ecx                # query L1D
 127         cpuid
 128         mov     %eax,%r10d
 129         shr     \$14,%r10d
 130         and     \$0xfff,%r10d           # number of cores -1 per L1D
 131 
 132 .Lnocacheinfo:
 133         mov     \$1,%eax
 134         cpuid
 135         and     \$0xbfefffff,%edx       # force reserved bits to 0
 136         cmp     \$0,%r9d
 137         jne     .Lnotintel
 138         or      \$0x40000000,%edx       # set reserved bit#30 on Intel CPUs
 139         and     \$15,%ah
 140         cmp     \$15,%ah                # examine Family ID
 141         jne     .Lnotintel
 142         or      \$0x00100000,%edx       # set reserved bit#20 to engage RC4_CHAR
 143 .Lnotintel:
 144         bt      \$28,%edx               # test hyper-threading bit
 145         jnc     .Lgeneric
 146         and     \$0xefffffff,%edx       # ~(1<<28)
 147         cmp     \$0,%r10d
 148         je      .Lgeneric
 149 
 150         or      \$0x10000000,%edx       # 1<<28
 151         shr     \$16,%ebx
 152         cmp     \$1,%bl                 # see if cache is shared
 153         ja      .Lgeneric
 154         and     \$0xefffffff,%edx       # ~(1<<28)
 155 .Lgeneric:
 156         and     \$0x00000800,%r9d       # isolate AMD XOP flag
 157         and     \$0xfffff7ff,%ecx
 158         or      %ecx,%r9d               # merge AMD XOP flag
 159 
 160         mov     %edx,%r10d              # %r9d:%r10d is copy of %ecx:%edx
 161         bt      \$27,%r9d               # check OSXSAVE bit
 162         jnc     .Lclear_avx
 163         xor     %ecx,%ecx               # XCR0
 164         .byte   0x0f,0x01,0xd0          # xgetbv
 165         and     \$6,%eax                # isolate XMM and YMM state support
 166         cmp     \$6,%eax
 167         je      .Ldone
 168 .Lclear_avx:
 169         mov     \$0xefffe7ff,%eax       # ~(1<<28|1<<12|1<<11)
 170         and     %eax,%r9d               # clear AVX, FMA and AMD XOP bits
 171 .Ldone:
 172         shl     \$32,%r9
 173         mov     %r10d,%eax
 174         mov     %r8,%rbx                # restore %rbx
 175         or      %r9,%rax
 176         ret
 177 .size   OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
 178 
 179 .globl  OPENSSL_cleanse
 180 .type   OPENSSL_cleanse,\@abi-omnipotent
 181 .align  16
 182 OPENSSL_cleanse:
 183         xor     %rax,%rax
 184         cmp     \$15,$arg2
 185         jae     .Lot
 186         cmp     \$0,$arg2
 187         je      .Lret
 188 .Little:
 189         mov     %al,($arg1)
 190         sub     \$1,$arg2
 191         lea     1($arg1),$arg1
 192         jnz     .Little
 193 .Lret:
 194         ret
 195 .align  16
 196 .Lot:
 197         test    \$7,$arg1
 198         jz      .Laligned
 199         mov     %al,($arg1)
 200         lea     -1($arg2),$arg2
 201         lea     1($arg1),$arg1
 202         jmp     .Lot
 203 .Laligned:
 204         mov     %rax,($arg1)
 205         lea     -8($arg2),$arg2
 206         test    \$-8,$arg2
 207         lea     8($arg1),$arg1
 208         jnz     .Laligned
 209         cmp     \$0,$arg2
 210         jne     .Little
 211         ret
 212 .size   OPENSSL_cleanse,.-OPENSSL_cleanse
 213 ___
 214 
 215 print<<___ if (!$win64);
 216 .globl  OPENSSL_wipe_cpu
 217 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
 218 .align  16
 219 OPENSSL_wipe_cpu:
 220         pxor    %xmm0,%xmm0
 221         pxor    %xmm1,%xmm1
 222         pxor    %xmm2,%xmm2
 223         pxor    %xmm3,%xmm3
 224         pxor    %xmm4,%xmm4
 225         pxor    %xmm5,%xmm5
 226         pxor    %xmm6,%xmm6
 227         pxor    %xmm7,%xmm7
 228         pxor    %xmm8,%xmm8
 229         pxor    %xmm9,%xmm9
 230         pxor    %xmm10,%xmm10
 231         pxor    %xmm11,%xmm11
 232         pxor    %xmm12,%xmm12
 233         pxor    %xmm13,%xmm13
 234         pxor    %xmm14,%xmm14
 235         pxor    %xmm15,%xmm15
 236         xorq    %rcx,%rcx
 237         xorq    %rdx,%rdx
 238         xorq    %rsi,%rsi
 239         xorq    %rdi,%rdi
 240         xorq    %r8,%r8
 241         xorq    %r9,%r9
 242         xorq    %r10,%r10
 243         xorq    %r11,%r11
 244         leaq    8(%rsp),%rax
 245         ret
 246 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
 247 ___
 248 print<<___ if ($win64);
 249 .globl  OPENSSL_wipe_cpu
 250 .type   OPENSSL_wipe_cpu,\@abi-omnipotent
 251 .align  16
 252 OPENSSL_wipe_cpu:
 253         pxor    %xmm0,%xmm0
 254         pxor    %xmm1,%xmm1
 255         pxor    %xmm2,%xmm2
 256         pxor    %xmm3,%xmm3
 257         pxor    %xmm4,%xmm4
 258         pxor    %xmm5,%xmm5
 259         xorq    %rcx,%rcx
 260         xorq    %rdx,%rdx
 261         xorq    %r8,%r8
 262         xorq    %r9,%r9
 263         xorq    %r10,%r10
 264         xorq    %r11,%r11
 265         leaq    8(%rsp),%rax
 266         ret
 267 .size   OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
 268 ___
 269 
 270 print<<___;
 271 .globl  OPENSSL_ia32_rdrand
 272 .type   OPENSSL_ia32_rdrand,\@abi-omnipotent
 273 .align  16
 274 OPENSSL_ia32_rdrand:
 275         mov     \$8,%ecx
 276 .Loop_rdrand:
 277         rdrand  %rax
 278         jc      .Lbreak_rdrand
 279         loop    .Loop_rdrand
 280 .Lbreak_rdrand:
 281         cmp     \$0,%rax
 282         cmove   %rcx,%rax
 283         ret
 284 .size   OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
 285 ___
 286 
 287 close STDOUT;   # flush