1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #ifndef _SYS_MACHPRIVREGS_H
28 #define _SYS_MACHPRIVREGS_H
29
30 #include <sys/hypervisor.h>
31
32 /*
33 * Platform dependent instruction sequences for manipulating
34 * privileged state
35 */
36
37 #ifdef __cplusplus
38 extern "C" {
39 #endif
40
41 /*
42 * CLI and STI are quite complex to virtualize!
43 */
44
45 #if defined(__amd64)
46
47 #define CURVCPU(r) \
48 movq %gs:CPU_VCPU_INFO, r
49
50 #define CURTHREAD(r) \
51 movq %gs:CPU_THREAD, r
52
53 #elif defined(__i386)
54
55 #define CURVCPU(r) \
56 movl %gs:CPU_VCPU_INFO, r
57
58 #define CURTHREAD(r) \
59 movl %gs:CPU_THREAD, r
60
61 #endif /* __i386 */
62
63 #define XEN_TEST_EVENT_PENDING(r) \
64 testb $0xff, VCPU_INFO_EVTCHN_UPCALL_PENDING(r)
65
66 #define XEN_SET_UPCALL_MASK(r) \
67 movb $1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
68
69 #define XEN_GET_UPCALL_MASK(r, mask) \
70 movb VCPU_INFO_EVTCHN_UPCALL_MASK(r), mask
71
72 #define XEN_TEST_UPCALL_MASK(r) \
73 testb $1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
74
75 #define XEN_CLEAR_UPCALL_MASK(r) \
76 ASSERT_UPCALL_MASK_IS_SET; \
77 movb $0, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
78
79 #ifdef DEBUG
80
81 /*
82 * Much logic depends on the upcall mask being set at
83 * various points in the code; use this macro to validate.
84 *
85 * Need to use CURVCPU(r) to establish the vcpu pointer.
86 */
87 #if defined(__amd64)
88
89 #define ASSERT_UPCALL_MASK_IS_SET \
90 pushq %r11; \
91 CURVCPU(%r11); \
92 XEN_TEST_UPCALL_MASK(%r11); \
93 jne 6f; \
94 cmpl $0, stistipanic(%rip); \
95 jle 6f; \
96 movl $-1, stistipanic(%rip); \
97 movq stistimsg(%rip), %rdi; \
98 xorl %eax, %eax; \
99 call panic; \
100 6: pushq %rax; \
101 pushq %rbx; \
102 movl %gs:CPU_ID, %eax; \
103 leaq .+0(%rip), %r11; \
104 leaq laststi(%rip), %rbx; \
105 movq %r11, (%rbx, %rax, 8); \
106 popq %rbx; \
107 popq %rax; \
108 popq %r11
109
110 #define SAVE_CLI_LOCATION \
111 pushq %rax; \
112 pushq %rbx; \
113 pushq %rcx; \
114 movl %gs:CPU_ID, %eax; \
115 leaq .+0(%rip), %rcx; \
116 leaq lastcli, %rbx; \
117 movq %rcx, (%rbx, %rax, 8); \
118 popq %rcx; \
119 popq %rbx; \
120 popq %rax; \
121
122 #elif defined(__i386)
123
124 #define ASSERT_UPCALL_MASK_IS_SET \
125 pushl %ecx; \
126 CURVCPU(%ecx); \
127 XEN_TEST_UPCALL_MASK(%ecx); \
128 jne 6f; \
129 cmpl $0, stistipanic; \
130 jle 6f; \
131 movl $-1, stistipanic; \
132 movl stistimsg, %ecx; \
133 pushl %ecx; \
134 call panic; \
135 6: pushl %eax; \
136 pushl %ebx; \
137 movl %gs:CPU_ID, %eax; \
138 leal .+0, %ecx; \
139 leal laststi, %ebx; \
140 movl %ecx, (%ebx, %eax, 4); \
141 popl %ebx; \
142 popl %eax; \
143 popl %ecx
144
145 #define SAVE_CLI_LOCATION \
146 pushl %eax; \
147 pushl %ebx; \
148 pushl %ecx; \
149 movl %gs:CPU_ID, %eax; \
150 leal .+0, %ecx; \
151 leal lastcli, %ebx; \
152 movl %ecx, (%ebx, %eax, 4); \
153 popl %ecx; \
154 popl %ebx; \
155 popl %eax; \
156
157 #endif /* __i386 */
158
159 #else /* DEBUG */
160
161 #define ASSERT_UPCALL_MASK_IS_SET /* empty */
162 #define SAVE_CLI_LOCATION /* empty */
163
164 #endif /* DEBUG */
165
166 #define KPREEMPT_DISABLE(t) \
167 addb $1, T_PREEMPT(t)
168
169 #define KPREEMPT_ENABLE_NOKP(t) \
170 subb $1, T_PREEMPT(t)
171
172 #define CLI(r) \
173 CURTHREAD(r); \
174 KPREEMPT_DISABLE(r); \
175 CURVCPU(r); \
176 XEN_SET_UPCALL_MASK(r); \
177 SAVE_CLI_LOCATION; \
178 CURTHREAD(r); \
179 KPREEMPT_ENABLE_NOKP(r)
180
181 #define CLIRET(r, ret) \
182 CURTHREAD(r); \
183 KPREEMPT_DISABLE(r); \
184 CURVCPU(r); \
185 XEN_GET_UPCALL_MASK(r, ret); \
186 XEN_SET_UPCALL_MASK(r); \
187 SAVE_CLI_LOCATION; \
188 CURTHREAD(r); \
189 KPREEMPT_ENABLE_NOKP(r)
190
191 /*
192 * We use the fact that HYPERVISOR_block will clear the upcall mask
193 * for us and then give us an upcall if there is a pending event
194 * to achieve getting a callback on this cpu without the danger of
195 * being preempted and migrating to another cpu between the upcall
196 * enable and the callback delivery.
197 */
198 #if defined(__amd64)
199
200 #define STI_CLOBBER /* clobbers %rax, %rdi, %r11 */ \
201 CURVCPU(%r11); \
202 ASSERT_UPCALL_MASK_IS_SET; \
203 movw $0x100, %ax; /* assume mask set, pending clear */ \
204 movw $0, %di; /* clear mask and pending */ \
205 lock; \
206 cmpxchgw %di, VCPU_INFO_EVTCHN_UPCALL_PENDING(%r11); \
207 jz 7f; /* xchg worked, we're done */ \
208 movl $__HYPERVISOR_sched_op, %eax; /* have pending upcall */ \
209 movl $SCHEDOP_block, %edi; \
210 pushq %rsi; /* hypercall clobbers C param regs plus r10 */ \
211 pushq %rcx; \
212 pushq %rdx; \
213 pushq %r8; \
214 pushq %r9; \
215 pushq %r10; \
216 TRAP_INSTR; /* clear upcall mask, force upcall */ \
217 popq %r10; \
218 popq %r9; \
219 popq %r8; \
220 popq %rdx; \
221 popq %rcx; \
222 popq %rsi; \
223 7:
224
225 #define STI \
226 pushq %r11; \
227 pushq %rdi; \
228 pushq %rax; \
229 STI_CLOBBER; /* clobbers %r11, %rax, %rdi */ \
230 popq %rax; \
231 popq %rdi; \
232 popq %r11
233
234 #elif defined(__i386)
235
236 #define STI_CLOBBER /* clobbers %eax, %ebx, %ecx */ \
237 CURVCPU(%ecx); \
238 ASSERT_UPCALL_MASK_IS_SET; \
239 movw $0x100, %ax; /* assume mask set, pending clear */ \
240 movw $0, %bx; /* clear mask and pending */ \
241 lock; \
242 cmpxchgw %bx, VCPU_INFO_EVTCHN_UPCALL_PENDING(%ecx); \
243 jz 7f; /* xchg worked, we're done */ \
244 movl $__HYPERVISOR_sched_op, %eax; /* have pending upcall */ \
245 movl $SCHEDOP_block, %ebx; \
246 TRAP_INSTR; /* clear upcall mask, force upcall */ \
247 7:
248
249 #define STI \
250 pushl %eax; \
251 pushl %ebx; \
252 pushl %ecx; \
253 STI_CLOBBER; /* clobbers %eax, %ebx, %ecx */ \
254 popl %ecx; \
255 popl %ebx; \
256 popl %eax
257
258 #endif /* __i386 */
259
260 /*
261 * Map the PS_IE bit to the hypervisor's event mask bit
262 * To -set- the event mask, we have to do a CLI
263 * To -clear- the event mask, we have to do a STI
264 * (with all the accompanying pre-emption and callbacks, ick)
265 *
266 * And vice versa.
267 */
268
269 #if defined(__amd64)
270
271 #define IE_TO_EVENT_MASK(rtmp, rfl) \
272 testq $PS_IE, rfl; \
273 jnz 4f; \
274 CLI(rtmp); \
275 jmp 5f; \
276 4: STI; \
277 5:
278
279 #define EVENT_MASK_TO_IE(rtmp, rfl) \
280 andq $_BITNOT(PS_IE), rfl; \
281 CURVCPU(rtmp); \
282 XEN_TEST_UPCALL_MASK(rtmp); \
283 jnz 1f; \
284 orq $PS_IE, rfl; \
285 1:
286
287 #elif defined(__i386)
288
289 #define IE_TO_EVENT_MASK(rtmp, rfl) \
290 testl $PS_IE, rfl; \
291 jnz 4f; \
292 CLI(rtmp); \
293 jmp 5f; \
294 4: STI; \
295 5:
296
297 #define EVENT_MASK_TO_IE(rtmp, rfl) \
298 andl $_BITNOT(PS_IE), rfl; \
299 CURVCPU(rtmp); \
300 XEN_TEST_UPCALL_MASK(rtmp); \
301 jnz 1f; \
302 orl $PS_IE, rfl; \
303 1:
304
305 #endif /* __i386 */
306
307 /*
308 * Used to re-enable interrupts in the body of exception handlers
309 */
310
311 #if defined(__amd64)
312
313 #define ENABLE_INTR_FLAGS \
314 pushq $F_ON; \
315 popfq; \
316 STI
317
318 #elif defined(__i386)
319
320 #define ENABLE_INTR_FLAGS \
321 pushl $F_ON; \
322 popfl; \
323 STI
324
325 #endif /* __i386 */
326
327 /*
328 * Virtualize IRET and SYSRET
329 */
330
331 #if defined(__amd64)
332
333 #if defined(DEBUG)
334
335 /*
336 * Die nastily with a #ud trap if we are about to switch to user
337 * mode in HYPERVISOR_IRET and RUPDATE_PENDING is set.
338 */
339 #define __ASSERT_NO_RUPDATE_PENDING \
340 pushq %r15; \
341 cmpw $KCS_SEL, 0x10(%rsp); \
342 je 1f; \
343 movq %gs:CPU_THREAD, %r15; \
344 movq T_LWP(%r15), %r15; \
345 testb $0x1, PCB_RUPDATE(%r15); \
346 je 1f; \
347 ud2; \
348 1: popq %r15
349
350 #else /* DEBUG */
351
352 #define __ASSERT_NO_RUPDATE_PENDING
353
354 #endif /* DEBUG */
355
356 /*
357 * Switching from guest kernel to user mode.
358 * flag == VGCF_IN_SYSCALL => return via sysret
359 * flag == 0 => return via iretq
360 *
361 * See definition in public/arch-x86_64.h. Stack going in must be:
362 * rax, r11, rcx, flags, rip, cs, rflags, rsp, ss.
363 */
364 #define HYPERVISOR_IRET(flag) \
365 __ASSERT_NO_RUPDATE_PENDING; \
366 pushq $flag; \
367 pushq %rcx; \
368 pushq %r11; \
369 pushq %rax; \
370 movl $__HYPERVISOR_iret, %eax; \
371 syscall; \
372 ud2 /* die nastily if we return! */
373
374 #define IRET HYPERVISOR_IRET(0)
375
376 /*
377 * XXPV: Normally we would expect to use sysret to return from kernel to
378 * user mode when using the syscall instruction. The iret hypercall
379 * does support both iret and sysret semantics. For us to use sysret
380 * style would require that we use the hypervisor's private descriptors
381 * that obey syscall instruction's imposed segment selector ordering.
382 * With iret we can use whatever %cs value we choose. We should fix
383 * this to use sysret one day.
384 */
385 #define SYSRETQ HYPERVISOR_IRET(0)
386 #define SYSRETL ud2 /* 32-bit syscall/sysret not supported */
387 #define SWAPGS /* empty - handled in hypervisor */
388
389 #elif defined(__i386)
390
391 /*
392 * Switching from guest kernel to user mode.
393 * See definition in public/arch-x86_32.h. Stack going in must be:
394 * eax, flags, eip, cs, eflags, esp, ss.
395 */
396 #define HYPERVISOR_IRET \
397 pushl %eax; \
398 movl $__HYPERVISOR_iret, %eax; \
399 int $0x82; \
400 ud2 /* die nastily if we return! */
401
402 #define IRET HYPERVISOR_IRET
403 #define SYSRET ud2 /* 32-bit syscall/sysret not supported */
404
405 #endif /* __i386 */
406
407
408 /*
409 * Xen 3.x wedges the current value of upcall_mask into unused byte of
410 * saved %cs on stack at the time of passing through a trap or interrupt
411 * gate. Since Xen also updates PS_IE in %[e,r]lags as well, we always
412 * mask off the saved upcall mask so the kernel and/or tools like debuggers
413 * will not be confused about bits set in reserved portions of %cs slot.
414 *
415 * See xen/include/public/arch-x86_[32,64].h:cpu_user_regs_t for details.
416 */
417 #if defined(__amd64)
418
419 #define CLEAN_CS movb $0, REGOFF_CS+4(%rsp)
420
421 #elif defined(__i386)
422
423 #define CLEAN_CS movb $0, REGOFF_CS+2(%esp)
424
425 #endif /* __i386 */
426
427 /*
428 * All exceptions for amd64 have %r11 and %rcx on the stack.
429 * Just pop them back into their appropriate registers and
430 * let it get saved as is running native.
431 */
432 #if defined(__amd64)
433
434 #define XPV_TRAP_POP \
435 popq %rcx; \
436 popq %r11
437
438 #define XPV_TRAP_PUSH \
439 pushq %r11; \
440 pushq %rcx
441
442 #endif /* __amd64 */
443
444
445 /*
446 * Macros for saving the original segment registers and restoring them
447 * for fast traps.
448 */
449 #if defined(__amd64)
450
451 /*
452 * Smaller versions of INTR_PUSH and INTR_POP for fast traps.
453 * The following registers have been pushed onto the stack by
454 * hardware at this point:
455 *
456 * greg_t r_rip;
457 * greg_t r_cs;
458 * greg_t r_rfl;
459 * greg_t r_rsp;
460 * greg_t r_ss;
461 *
462 * This handler is executed both by 32-bit and 64-bit applications.
463 * 64-bit applications allow us to treat the set (%rdi, %rsi, %rdx,
464 * %rcx, %r8, %r9, %r10, %r11, %rax) as volatile across function calls.
465 * However, 32-bit applications only expect (%eax, %edx, %ecx) to be volatile
466 * across a function call -- in particular, %esi and %edi MUST be saved!
467 *
468 * We could do this differently by making a FAST_INTR_PUSH32 for 32-bit
469 * programs, and FAST_INTR_PUSH for 64-bit programs, but it doesn't seem
470 * particularly worth it.
471 *
472 */
473 #define FAST_INTR_PUSH \
474 INTGATE_INIT_KERNEL_FLAGS; \
475 popq %rcx; \
476 popq %r11; \
477 subq $REGOFF_RIP, %rsp; \
478 movq %rsi, REGOFF_RSI(%rsp); \
479 movq %rdi, REGOFF_RDI(%rsp); \
480 CLEAN_CS
481
482 #define FAST_INTR_POP \
483 movq REGOFF_RSI(%rsp), %rsi; \
484 movq REGOFF_RDI(%rsp), %rdi; \
485 addq $REGOFF_RIP, %rsp
486
487 #define FAST_INTR_RETURN \
488 ASSERT_UPCALL_MASK_IS_SET; \
489 HYPERVISOR_IRET(0)
490
491 #elif defined(__i386)
492
493 #define FAST_INTR_PUSH \
494 cld; \
495 __SEGREGS_PUSH \
496 __SEGREGS_LOAD_KERNEL \
497
498 #define FAST_INTR_POP \
499 __SEGREGS_POP
500
501 #define FAST_INTR_RETURN \
502 IRET
503
504 #endif /* __i386 */
505
506 /*
507 * Handling the CR0.TS bit for floating point handling.
508 *
509 * When the TS bit is *set*, attempts to touch the floating
510 * point hardware will result in a #nm trap.
511 */
512 #if defined(__amd64)
513
514 #define STTS(rtmp) \
515 pushq %rdi; \
516 movl $1, %edi; \
517 call HYPERVISOR_fpu_taskswitch; \
518 popq %rdi
519
520 #define CLTS \
521 pushq %rdi; \
522 xorl %edi, %edi; \
523 call HYPERVISOR_fpu_taskswitch; \
524 popq %rdi
525
526 #elif defined(__i386)
527
528 #define STTS(r) \
529 pushl $1; \
530 call HYPERVISOR_fpu_taskswitch; \
531 addl $4, %esp
532
533 #define CLTS \
534 pushl $0; \
535 call HYPERVISOR_fpu_taskswitch; \
536 addl $4, %esp
537
538 #endif /* __i386 */
539
540 #ifdef __cplusplus
541 }
542 #endif
543
544 #endif /* _SYS_MACHPRIVREGS_H */