1 #!/usr/bin/env perl
2
3 $flavour = shift;
4 $output = shift;
5 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
6
7 $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
8
9 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
11 ( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
12 die "can't locate x86_64-xlate.pl";
13
14 open OUT,"| \"$^X\" $xlate $flavour $output";
15 *STDOUT=*OUT;
16
17 ($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
18 ("%rdi","%rsi","%rdx","%rcx"); # Unix order
19
20 print<<___;
21 .extern OPENSSL_cpuid_setup
22 .hidden OPENSSL_cpuid_setup
23 .extern illumos_locking_setup
24 .hidden illumos_locking_setup
25 .section .init
26 call illumos_locking_setup
27 call OPENSSL_cpuid_setup
28
29 .hidden OPENSSL_ia32cap_P
30 .comm OPENSSL_ia32cap_P,8,4
31
32 .text
33
34 .globl OPENSSL_atomic_add
35 .type OPENSSL_atomic_add,\@abi-omnipotent
36 .align 16
37 OPENSSL_atomic_add:
38 movl ($arg1),%eax
39 .Lspin: leaq ($arg2,%rax),%r8
40 .byte 0xf0 # lock
41 cmpxchgl %r8d,($arg1)
42 jne .Lspin
43 movl %r8d,%eax
44 .byte 0x48,0x98 # cltq/cdqe
45 ret
46 .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
47
48 .globl OPENSSL_rdtsc
49 .type OPENSSL_rdtsc,\@abi-omnipotent
50 .align 16
51 OPENSSL_rdtsc:
52 rdtsc
53 shl \$32,%rdx
54 or %rdx,%rax
55 ret
56 .size OPENSSL_rdtsc,.-OPENSSL_rdtsc
57
58 .globl OPENSSL_ia32_cpuid
59 .type OPENSSL_ia32_cpuid,\@abi-omnipotent
60 .align 16
61 OPENSSL_ia32_cpuid:
62 mov %rbx,%r8 # save %rbx
63
64 xor %eax,%eax
65 cpuid
66 mov %eax,%r11d # max value for standard query level
67
68 xor %eax,%eax
69 cmp \$0x756e6547,%ebx # "Genu"
70 setne %al
71 mov %eax,%r9d
72 cmp \$0x49656e69,%edx # "ineI"
73 setne %al
74 or %eax,%r9d
75 cmp \$0x6c65746e,%ecx # "ntel"
76 setne %al
77 or %eax,%r9d # 0 indicates Intel CPU
78 jz .Lintel
79
80 cmp \$0x68747541,%ebx # "Auth"
81 setne %al
82 mov %eax,%r10d
83 cmp \$0x69746E65,%edx # "enti"
84 setne %al
85 or %eax,%r10d
86 cmp \$0x444D4163,%ecx # "cAMD"
87 setne %al
88 or %eax,%r10d # 0 indicates AMD CPU
89 jnz .Lintel
90
91 # AMD specific
92 mov \$0x80000000,%eax
93 cpuid
94 cmp \$0x80000001,%eax
95 jb .Lintel
96 mov %eax,%r10d
97 mov \$0x80000001,%eax
98 cpuid
99 or %ecx,%r9d
100 and \$0x00000801,%r9d # isolate AMD XOP bit, 1<<11
101
102 cmp \$0x80000008,%r10d
103 jb .Lintel
104
105 mov \$0x80000008,%eax
106 cpuid
107 movzb %cl,%r10 # number of cores - 1
108 inc %r10 # number of cores
109
110 mov \$1,%eax
111 cpuid
112 bt \$28,%edx # test hyper-threading bit
113 jnc .Lgeneric
114 shr \$16,%ebx # number of logical processors
115 cmp %r10b,%bl
116 ja .Lgeneric
117 and \$0xefffffff,%edx # ~(1<<28)
118 jmp .Lgeneric
119
120 .Lintel:
121 cmp \$4,%r11d
122 mov \$-1,%r10d
123 jb .Lnocacheinfo
124
125 mov \$4,%eax
126 mov \$0,%ecx # query L1D
127 cpuid
128 mov %eax,%r10d
129 shr \$14,%r10d
130 and \$0xfff,%r10d # number of cores -1 per L1D
131
132 .Lnocacheinfo:
133 mov \$1,%eax
134 cpuid
135 and \$0xbfefffff,%edx # force reserved bits to 0
136 cmp \$0,%r9d
137 jne .Lnotintel
138 or \$0x40000000,%edx # set reserved bit#30 on Intel CPUs
139 and \$15,%ah
140 cmp \$15,%ah # examine Family ID
141 jne .Lnotintel
142 or \$0x00100000,%edx # set reserved bit#20 to engage RC4_CHAR
143 .Lnotintel:
144 bt \$28,%edx # test hyper-threading bit
145 jnc .Lgeneric
146 and \$0xefffffff,%edx # ~(1<<28)
147 cmp \$0,%r10d
148 je .Lgeneric
149
150 or \$0x10000000,%edx # 1<<28
151 shr \$16,%ebx
152 cmp \$1,%bl # see if cache is shared
153 ja .Lgeneric
154 and \$0xefffffff,%edx # ~(1<<28)
155 .Lgeneric:
156 and \$0x00000800,%r9d # isolate AMD XOP flag
157 and \$0xfffff7ff,%ecx
158 or %ecx,%r9d # merge AMD XOP flag
159
160 mov %edx,%r10d # %r9d:%r10d is copy of %ecx:%edx
161 bt \$27,%r9d # check OSXSAVE bit
162 jnc .Lclear_avx
163 xor %ecx,%ecx # XCR0
164 .byte 0x0f,0x01,0xd0 # xgetbv
165 and \$6,%eax # isolate XMM and YMM state support
166 cmp \$6,%eax
167 je .Ldone
168 .Lclear_avx:
169 mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
170 and %eax,%r9d # clear AVX, FMA and AMD XOP bits
171 .Ldone:
172 shl \$32,%r9
173 mov %r10d,%eax
174 mov %r8,%rbx # restore %rbx
175 or %r9,%rax
176 ret
177 .size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
178
179 .globl OPENSSL_cleanse
180 .type OPENSSL_cleanse,\@abi-omnipotent
181 .align 16
182 OPENSSL_cleanse:
183 xor %rax,%rax
184 cmp \$15,$arg2
185 jae .Lot
186 cmp \$0,$arg2
187 je .Lret
188 .Little:
189 mov %al,($arg1)
190 sub \$1,$arg2
191 lea 1($arg1),$arg1
192 jnz .Little
193 .Lret:
194 ret
195 .align 16
196 .Lot:
197 test \$7,$arg1
198 jz .Laligned
199 mov %al,($arg1)
200 lea -1($arg2),$arg2
201 lea 1($arg1),$arg1
202 jmp .Lot
203 .Laligned:
204 mov %rax,($arg1)
205 lea -8($arg2),$arg2
206 test \$-8,$arg2
207 lea 8($arg1),$arg1
208 jnz .Laligned
209 cmp \$0,$arg2
210 jne .Little
211 ret
212 .size OPENSSL_cleanse,.-OPENSSL_cleanse
213 ___
214
215 print<<___ if (!$win64);
216 .globl OPENSSL_wipe_cpu
217 .type OPENSSL_wipe_cpu,\@abi-omnipotent
218 .align 16
219 OPENSSL_wipe_cpu:
220 pxor %xmm0,%xmm0
221 pxor %xmm1,%xmm1
222 pxor %xmm2,%xmm2
223 pxor %xmm3,%xmm3
224 pxor %xmm4,%xmm4
225 pxor %xmm5,%xmm5
226 pxor %xmm6,%xmm6
227 pxor %xmm7,%xmm7
228 pxor %xmm8,%xmm8
229 pxor %xmm9,%xmm9
230 pxor %xmm10,%xmm10
231 pxor %xmm11,%xmm11
232 pxor %xmm12,%xmm12
233 pxor %xmm13,%xmm13
234 pxor %xmm14,%xmm14
235 pxor %xmm15,%xmm15
236 xorq %rcx,%rcx
237 xorq %rdx,%rdx
238 xorq %rsi,%rsi
239 xorq %rdi,%rdi
240 xorq %r8,%r8
241 xorq %r9,%r9
242 xorq %r10,%r10
243 xorq %r11,%r11
244 leaq 8(%rsp),%rax
245 ret
246 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
247 ___
248 print<<___ if ($win64);
249 .globl OPENSSL_wipe_cpu
250 .type OPENSSL_wipe_cpu,\@abi-omnipotent
251 .align 16
252 OPENSSL_wipe_cpu:
253 pxor %xmm0,%xmm0
254 pxor %xmm1,%xmm1
255 pxor %xmm2,%xmm2
256 pxor %xmm3,%xmm3
257 pxor %xmm4,%xmm4
258 pxor %xmm5,%xmm5
259 xorq %rcx,%rcx
260 xorq %rdx,%rdx
261 xorq %r8,%r8
262 xorq %r9,%r9
263 xorq %r10,%r10
264 xorq %r11,%r11
265 leaq 8(%rsp),%rax
266 ret
267 .size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
268 ___
269
270 print<<___;
271 .globl OPENSSL_ia32_rdrand
272 .type OPENSSL_ia32_rdrand,\@abi-omnipotent
273 .align 16
274 OPENSSL_ia32_rdrand:
275 mov \$8,%ecx
276 .Loop_rdrand:
277 rdrand %rax
278 jc .Lbreak_rdrand
279 loop .Loop_rdrand
280 .Lbreak_rdrand:
281 cmp \$0,%rax
282 cmove %rcx,%rax
283 ret
284 .size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
285 ___
286
287 close STDOUT; # flush