1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 #ifndef _SYS_MACHPRIVREGS_H
  28 #define _SYS_MACHPRIVREGS_H
  29 
  30 #include <sys/hypervisor.h>
  31 
  32 /*
  33  * Platform dependent instruction sequences for manipulating
  34  * privileged state
  35  */
  36 
  37 #ifdef __cplusplus
  38 extern "C" {
  39 #endif
  40 
  41 /*
  42  * CLI and STI are quite complex to virtualize!
  43  */
  44 
  45 #if defined(__amd64)
  46 
  47 #define CURVCPU(r)                                      \
  48         movq    %gs:CPU_VCPU_INFO, r
  49 
  50 #define CURTHREAD(r)                                    \
  51         movq    %gs:CPU_THREAD, r
  52 
  53 #elif defined(__i386)
  54 
  55 #define CURVCPU(r)                                      \
  56         movl    %gs:CPU_VCPU_INFO, r
  57 
  58 #define CURTHREAD(r)                                    \
  59         movl    %gs:CPU_THREAD, r
  60 
  61 #endif  /* __i386 */
  62 
  63 #define XEN_TEST_EVENT_PENDING(r)                       \
  64         testb   $0xff, VCPU_INFO_EVTCHN_UPCALL_PENDING(r)
  65 
  66 #define XEN_SET_UPCALL_MASK(r)                          \
  67         movb    $1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
  68 
  69 #define XEN_GET_UPCALL_MASK(r, mask)                    \
  70         movb    VCPU_INFO_EVTCHN_UPCALL_MASK(r), mask
  71 
  72 #define XEN_TEST_UPCALL_MASK(r)                         \
  73         testb   $1, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
  74 
  75 #define XEN_CLEAR_UPCALL_MASK(r)                        \
  76         ASSERT_UPCALL_MASK_IS_SET;                      \
  77         movb    $0, VCPU_INFO_EVTCHN_UPCALL_MASK(r)
  78 
  79 #ifdef DEBUG
  80 
  81 /*
  82  * Much logic depends on the upcall mask being set at
  83  * various points in the code; use this macro to validate.
  84  *
  85  * Need to use CURVCPU(r) to establish the vcpu pointer.
  86  */
  87 #if defined(__amd64)
  88 
  89 #define ASSERT_UPCALL_MASK_IS_SET                       \
  90         pushq   %r11;                                   \
  91         CURVCPU(%r11);                                  \
  92         XEN_TEST_UPCALL_MASK(%r11);                     \
  93         jne     6f;                                     \
  94         cmpl    $0, stistipanic(%rip);                  \
  95         jle     6f;                                     \
  96         movl    $-1, stistipanic(%rip);                 \
  97         movq    stistimsg(%rip), %rdi;                  \
  98         xorl    %eax, %eax;                             \
  99         call    panic;                                  \
 100 6:      pushq   %rax;                                   \
 101         pushq   %rbx;                                   \
 102         movl    %gs:CPU_ID, %eax;                       \
 103         leaq    .+0(%rip), %r11;                        \
 104         leaq    laststi(%rip), %rbx;                    \
 105         movq    %r11, (%rbx, %rax, 8);                  \
 106         popq    %rbx;                                   \
 107         popq    %rax;                                   \
 108         popq    %r11
 109 
 110 #define SAVE_CLI_LOCATION                               \
 111         pushq   %rax;                                   \
 112         pushq   %rbx;                                   \
 113         pushq   %rcx;                                   \
 114         movl    %gs:CPU_ID, %eax;                       \
 115         leaq    .+0(%rip), %rcx;                        \
 116         leaq    lastcli, %rbx;                          \
 117         movq    %rcx, (%rbx, %rax, 8);                  \
 118         popq    %rcx;                                   \
 119         popq    %rbx;                                   \
 120         popq    %rax;                                   \
 121 
 122 #elif defined(__i386)
 123 
 124 #define ASSERT_UPCALL_MASK_IS_SET                       \
 125         pushl   %ecx;                                   \
 126         CURVCPU(%ecx);                                  \
 127         XEN_TEST_UPCALL_MASK(%ecx);                     \
 128         jne     6f;                                     \
 129         cmpl    $0, stistipanic;                        \
 130         jle     6f;                                     \
 131         movl    $-1, stistipanic;                       \
 132         movl    stistimsg, %ecx;                        \
 133         pushl   %ecx;                                   \
 134         call    panic;                                  \
 135 6:      pushl   %eax;                                   \
 136         pushl   %ebx;                                   \
 137         movl    %gs:CPU_ID, %eax;                       \
 138         leal    .+0, %ecx;                              \
 139         leal    laststi, %ebx;                          \
 140         movl    %ecx, (%ebx, %eax, 4);                  \
 141         popl    %ebx;                                   \
 142         popl    %eax;                                   \
 143         popl    %ecx
 144 
 145 #define SAVE_CLI_LOCATION                               \
 146         pushl   %eax;                                   \
 147         pushl   %ebx;                                   \
 148         pushl   %ecx;                                   \
 149         movl    %gs:CPU_ID, %eax;                       \
 150         leal    .+0, %ecx;                              \
 151         leal    lastcli, %ebx;                          \
 152         movl    %ecx, (%ebx, %eax, 4);                  \
 153         popl    %ecx;                                   \
 154         popl    %ebx;                                   \
 155         popl    %eax;                                   \
 156 
 157 #endif  /* __i386 */
 158 
 159 #else   /* DEBUG */
 160 
 161 #define ASSERT_UPCALL_MASK_IS_SET       /* empty */
 162 #define SAVE_CLI_LOCATION               /* empty */
 163 
 164 #endif  /* DEBUG */
 165 
 166 #define KPREEMPT_DISABLE(t)                             \
 167         addb    $1, T_PREEMPT(t)
 168 
 169 #define KPREEMPT_ENABLE_NOKP(t)                         \
 170         subb    $1, T_PREEMPT(t)
 171 
 172 #define CLI(r)                                          \
 173         CURTHREAD(r);                                   \
 174         KPREEMPT_DISABLE(r);                            \
 175         CURVCPU(r);                                     \
 176         XEN_SET_UPCALL_MASK(r);                         \
 177         SAVE_CLI_LOCATION;                              \
 178         CURTHREAD(r);                                   \
 179         KPREEMPT_ENABLE_NOKP(r)
 180 
 181 #define CLIRET(r, ret)                                  \
 182         CURTHREAD(r);                                   \
 183         KPREEMPT_DISABLE(r);                            \
 184         CURVCPU(r);                                     \
 185         XEN_GET_UPCALL_MASK(r, ret);                    \
 186         XEN_SET_UPCALL_MASK(r);                         \
 187         SAVE_CLI_LOCATION;                              \
 188         CURTHREAD(r);                                   \
 189         KPREEMPT_ENABLE_NOKP(r)
 190 
 191 /*
 192  * We use the fact that HYPERVISOR_block will clear the upcall mask
 193  * for us and then give us an upcall if there is a pending event
 194  * to achieve getting a callback on this cpu without the danger of
 195  * being preempted and migrating to another cpu between the upcall
 196  * enable and the callback delivery.
 197  */
 198 #if defined(__amd64)
 199 
 200 #define STI_CLOBBER             /* clobbers %rax, %rdi, %r11 */         \
 201         CURVCPU(%r11);                                                  \
 202         ASSERT_UPCALL_MASK_IS_SET;                                      \
 203         movw    $0x100, %ax;    /* assume mask set, pending clear */    \
 204         movw    $0, %di;        /* clear mask and pending */            \
 205         lock;                                                           \
 206         cmpxchgw %di, VCPU_INFO_EVTCHN_UPCALL_PENDING(%r11);            \
 207         jz      7f;             /* xchg worked, we're done */           \
 208         movl    $__HYPERVISOR_sched_op, %eax; /* have pending upcall */ \
 209         movl    $SCHEDOP_block, %edi;                                   \
 210         pushq   %rsi;   /* hypercall clobbers C param regs plus r10 */  \
 211         pushq   %rcx;                                                   \
 212         pushq   %rdx;                                                   \
 213         pushq   %r8;                                                    \
 214         pushq   %r9;                                                    \
 215         pushq   %r10;                                                   \
 216         TRAP_INSTR;     /* clear upcall mask, force upcall */           \
 217         popq    %r10;                                                   \
 218         popq    %r9;                                                    \
 219         popq    %r8;                                                    \
 220         popq    %rdx;                                                   \
 221         popq    %rcx;                                                   \
 222         popq    %rsi;                                                   \
 223 7:
 224 
 225 #define STI                                                             \
 226         pushq   %r11;                                                   \
 227         pushq   %rdi;                                                   \
 228         pushq   %rax;                                                   \
 229         STI_CLOBBER;    /* clobbers %r11, %rax, %rdi */                 \
 230         popq    %rax;                                                   \
 231         popq    %rdi;                                                   \
 232         popq    %r11
 233 
 234 #elif defined(__i386)
 235 
 236 #define STI_CLOBBER             /* clobbers %eax, %ebx, %ecx */         \
 237         CURVCPU(%ecx);                                                  \
 238         ASSERT_UPCALL_MASK_IS_SET;                                      \
 239         movw    $0x100, %ax;    /* assume mask set, pending clear */    \
 240         movw    $0, %bx;        /* clear mask and pending */            \
 241         lock;                                                           \
 242         cmpxchgw %bx, VCPU_INFO_EVTCHN_UPCALL_PENDING(%ecx);            \
 243         jz      7f;             /* xchg worked, we're done */           \
 244         movl    $__HYPERVISOR_sched_op, %eax; /* have pending upcall */ \
 245         movl    $SCHEDOP_block, %ebx;                                   \
 246         TRAP_INSTR;             /* clear upcall mask, force upcall */   \
 247 7:
 248 
 249 #define STI                                             \
 250         pushl   %eax;                                   \
 251         pushl   %ebx;                                   \
 252         pushl   %ecx;                                   \
 253         STI_CLOBBER;    /* clobbers %eax, %ebx, %ecx */ \
 254         popl    %ecx;                                   \
 255         popl    %ebx;                                   \
 256         popl    %eax
 257 
 258 #endif  /* __i386 */
 259 
 260 /*
 261  * Map the PS_IE bit to the hypervisor's event mask bit
 262  * To -set- the event mask, we have to do a CLI
 263  * To -clear- the event mask, we have to do a STI
 264  * (with all the accompanying pre-emption and callbacks, ick)
 265  *
 266  * And vice versa.
 267  */
 268 
 269 #if defined(__amd64)
 270 
 271 #define IE_TO_EVENT_MASK(rtmp, rfl)             \
 272         testq   $PS_IE, rfl;                    \
 273         jnz     4f;                             \
 274         CLI(rtmp);                              \
 275         jmp     5f;                             \
 276 4:      STI;                                    \
 277 5:
 278 
 279 #define EVENT_MASK_TO_IE(rtmp, rfl)             \
 280         andq    $_BITNOT(PS_IE), rfl;           \
 281         CURVCPU(rtmp);                          \
 282         XEN_TEST_UPCALL_MASK(rtmp);             \
 283         jnz     1f;                             \
 284         orq     $PS_IE, rfl;                    \
 285 1:
 286 
 287 #elif defined(__i386)
 288 
 289 #define IE_TO_EVENT_MASK(rtmp, rfl)             \
 290         testl   $PS_IE, rfl;                    \
 291         jnz     4f;                             \
 292         CLI(rtmp);                              \
 293         jmp     5f;                             \
 294 4:      STI;                                    \
 295 5:
 296 
 297 #define EVENT_MASK_TO_IE(rtmp, rfl)             \
 298         andl    $_BITNOT(PS_IE), rfl;           \
 299         CURVCPU(rtmp);                          \
 300         XEN_TEST_UPCALL_MASK(rtmp);             \
 301         jnz     1f;                             \
 302         orl     $PS_IE, rfl;                    \
 303 1:
 304 
 305 #endif  /* __i386 */
 306 
 307 /*
 308  * Used to re-enable interrupts in the body of exception handlers
 309  */
 310 
 311 #if defined(__amd64)
 312 
 313 #define ENABLE_INTR_FLAGS               \
 314         pushq   $F_ON;                  \
 315         popfq;                          \
 316         STI
 317 
 318 #elif defined(__i386)
 319 
 320 #define ENABLE_INTR_FLAGS               \
 321         pushl   $F_ON;                  \
 322         popfl;                          \
 323         STI
 324 
 325 #endif  /* __i386 */
 326 
 327 /*
 328  * Virtualize IRET and SYSRET
 329  */
 330 
 331 #if defined(__amd64)
 332 
 333 #if defined(DEBUG)
 334 
 335 /*
 336  * Die nastily with a #ud trap if we are about to switch to user
 337  * mode in HYPERVISOR_IRET and RUPDATE_PENDING is set.
 338  */
 339 #define __ASSERT_NO_RUPDATE_PENDING                     \
 340         pushq   %r15;                                   \
 341         cmpw    $KCS_SEL, 0x10(%rsp);                   \
 342         je      1f;                                     \
 343         movq    %gs:CPU_THREAD, %r15;                   \
 344         movq    T_LWP(%r15), %r15;                      \
 345         testb   $0x1, PCB_RUPDATE(%r15);                \
 346         je      1f;                                     \
 347         ud2;                                            \
 348 1:      popq    %r15
 349 
 350 #else   /* DEBUG */
 351 
 352 #define __ASSERT_NO_RUPDATE_PENDING
 353 
 354 #endif  /* DEBUG */
 355 
 356 /*
 357  * Switching from guest kernel to user mode.
 358  * flag == VGCF_IN_SYSCALL => return via sysret
 359  * flag == 0 => return via iretq
 360  *
 361  * See definition in public/arch-x86_64.h. Stack going in must be:
 362  * rax, r11, rcx, flags, rip, cs, rflags, rsp, ss.
 363  */
 364 #define HYPERVISOR_IRET(flag)                   \
 365         __ASSERT_NO_RUPDATE_PENDING;            \
 366         pushq   $flag;                          \
 367         pushq   %rcx;                           \
 368         pushq   %r11;                           \
 369         pushq   %rax;                           \
 370         movl    $__HYPERVISOR_iret, %eax;       \
 371         syscall;                                \
 372         ud2     /* die nastily if we return! */
 373 
 374 #define IRET    HYPERVISOR_IRET(0)
 375 
 376 /*
 377  * XXPV: Normally we would expect to use sysret to return from kernel to
 378  *       user mode when using the syscall instruction. The iret hypercall
 379  *       does support both iret and sysret semantics. For us to use sysret
 380  *       style would require that we use the hypervisor's private descriptors
 381  *       that obey syscall instruction's imposed segment selector ordering.
 382  *       With iret we can use whatever %cs value we choose. We should fix
 383  *       this to use sysret one day.
 384  */
 385 #define SYSRETQ HYPERVISOR_IRET(0)
 386 #define SYSRETL ud2             /* 32-bit syscall/sysret not supported */
 387 #define SWAPGS  /* empty - handled in hypervisor */
 388 
 389 #elif defined(__i386)
 390 
 391 /*
 392  * Switching from guest kernel to user mode.
 393  * See definition in public/arch-x86_32.h. Stack going in must be:
 394  * eax, flags, eip, cs, eflags, esp, ss.
 395  */
 396 #define HYPERVISOR_IRET                         \
 397         pushl   %eax;                           \
 398         movl    $__HYPERVISOR_iret, %eax;       \
 399         int     $0x82;                          \
 400         ud2     /* die nastily if we return! */
 401 
 402 #define IRET    HYPERVISOR_IRET
 403 #define SYSRET  ud2             /* 32-bit syscall/sysret not supported */
 404 
 405 #endif  /* __i386 */
 406 
 407 
 408 /*
 409  * Xen 3.x wedges the current value of upcall_mask into unused byte of
 410  * saved %cs on stack at the time of passing through a trap or interrupt
 411  * gate.  Since Xen also updates PS_IE in %[e,r]lags as well, we always
 412  * mask off the saved upcall mask so the kernel and/or tools like debuggers
 413  * will not be confused about bits set in reserved portions of %cs slot.
 414  *
 415  * See xen/include/public/arch-x86_[32,64].h:cpu_user_regs_t for details.
 416  */
 417 #if defined(__amd64)
 418 
 419 #define CLEAN_CS        movb    $0, REGOFF_CS+4(%rsp)
 420 
 421 #elif defined(__i386)
 422 
 423 #define CLEAN_CS        movb    $0, REGOFF_CS+2(%esp)
 424 
 425 #endif  /* __i386 */
 426 
 427 /*
 428  * All exceptions for amd64 have %r11 and %rcx on the stack.
 429  * Just pop them back into their appropriate registers and
 430  * let it get saved as is running native.
 431  */
 432 #if defined(__amd64)
 433 
 434 #define XPV_TRAP_POP    \
 435         popq    %rcx;   \
 436         popq    %r11
 437 
 438 #define XPV_TRAP_PUSH   \
 439         pushq   %r11;   \
 440         pushq   %rcx
 441 
 442 #endif  /* __amd64 */
 443 
 444 
 445 /*
 446  * Macros for saving the original segment registers and restoring them
 447  * for fast traps.
 448  */
 449 #if defined(__amd64)
 450 
 451 /*
 452  * Smaller versions of INTR_PUSH and INTR_POP for fast traps.
 453  * The following registers have been pushed onto the stack by
 454  * hardware at this point:
 455  *
 456  *      greg_t  r_rip;
 457  *      greg_t  r_cs;
 458  *      greg_t  r_rfl;
 459  *      greg_t  r_rsp;
 460  *      greg_t  r_ss;
 461  *
 462  * This handler is executed both by 32-bit and 64-bit applications.
 463  * 64-bit applications allow us to treat the set (%rdi, %rsi, %rdx,
 464  * %rcx, %r8, %r9, %r10, %r11, %rax) as volatile across function calls.
 465  * However, 32-bit applications only expect (%eax, %edx, %ecx) to be volatile
 466  * across a function call -- in particular, %esi and %edi MUST be saved!
 467  *
 468  * We could do this differently by making a FAST_INTR_PUSH32 for 32-bit
 469  * programs, and FAST_INTR_PUSH for 64-bit programs, but it doesn't seem
 470  * particularly worth it.
 471  *
 472  */
 473 #define FAST_INTR_PUSH                  \
 474         INTGATE_INIT_KERNEL_FLAGS;      \
 475         popq    %rcx;                   \
 476         popq    %r11;                   \
 477         subq    $REGOFF_RIP, %rsp;      \
 478         movq    %rsi, REGOFF_RSI(%rsp); \
 479         movq    %rdi, REGOFF_RDI(%rsp); \
 480         CLEAN_CS
 481 
 482 #define FAST_INTR_POP                   \
 483         movq    REGOFF_RSI(%rsp), %rsi; \
 484         movq    REGOFF_RDI(%rsp), %rdi; \
 485         addq    $REGOFF_RIP, %rsp
 486 
 487 #define FAST_INTR_RETURN                \
 488         ASSERT_UPCALL_MASK_IS_SET;      \
 489         HYPERVISOR_IRET(0)
 490 
 491 #elif defined(__i386)
 492 
 493 #define FAST_INTR_PUSH                  \
 494         cld;                            \
 495         __SEGREGS_PUSH                  \
 496         __SEGREGS_LOAD_KERNEL           \
 497 
 498 #define FAST_INTR_POP                   \
 499         __SEGREGS_POP
 500 
 501 #define FAST_INTR_RETURN                \
 502         IRET
 503 
 504 #endif  /* __i386 */
 505 
 506 /*
 507  * Handling the CR0.TS bit for floating point handling.
 508  *
 509  * When the TS bit is *set*, attempts to touch the floating
 510  * point hardware will result in a #nm trap.
 511  */
 512 #if defined(__amd64)
 513 
 514 #define STTS(rtmp)                              \
 515         pushq   %rdi;                           \
 516         movl    $1, %edi;                       \
 517         call    HYPERVISOR_fpu_taskswitch;      \
 518         popq    %rdi
 519 
 520 #define CLTS                                    \
 521         pushq   %rdi;                           \
 522         xorl    %edi, %edi;                     \
 523         call    HYPERVISOR_fpu_taskswitch;      \
 524         popq    %rdi
 525 
 526 #elif defined(__i386)
 527 
 528 #define STTS(r)                                 \
 529         pushl   $1;                             \
 530         call    HYPERVISOR_fpu_taskswitch;      \
 531         addl    $4, %esp
 532 
 533 #define CLTS                                    \
 534         pushl   $0;                             \
 535         call    HYPERVISOR_fpu_taskswitch;      \
 536         addl    $4, %esp
 537 
 538 #endif  /* __i386 */
 539 
 540 #ifdef __cplusplus
 541 }
 542 #endif
 543 
 544 #endif  /* _SYS_MACHPRIVREGS_H */