Print this page
11859 need swapgs mitigation
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@fingolfin.org>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/ml/kpti_trampolines.s
          +++ new/usr/src/uts/i86pc/ml/kpti_trampolines.s
↓ open down ↓ 1 lines elided ↑ open up ↑
   2    2   * This file and its contents are supplied under the terms of the
   3    3   * Common Development and Distribution License ("CDDL"), version 1.0.
   4    4   * You may only use this file in accordance with the terms of version
   5    5   * 1.0 of the CDDL.
   6    6   *
   7    7   * A full copy of the text of the CDDL should have accompanied this
   8    8   * source.  A copy of the CDDL is also available via the Internet at
   9    9   * http://www.illumos.org/license/CDDL.
  10   10   */
  11   11  /*
  12      - * Copyright 2018 Joyent, Inc.
       12 + * Copyright 2019 Joyent, Inc.
  13   13   */
  14   14  
  15   15  /*
  16   16   * This file contains the trampolines that are used by KPTI in order to be
  17   17   * able to take interrupts/trap/etc while on the "user" page table.
  18   18   *
  19   19   * We don't map the full kernel text into the user page table: instead we
  20   20   * map this one small section of trampolines (which compiles to ~13 pages).
  21   21   * These trampolines are set in the IDT always (so they will run no matter
  22   22   * whether we're on the kernel or user page table), and their primary job is to
↓ open down ↓ 58 lines elided ↑ open up ↑
  81   81   * corrupt!)
  82   82   *
  83   83   * Finally, for these "special" interrupts (NMI/MCE/double fault) we use a
  84   84   * special %cr3 value we stash here in the text (kpti_safe_cr3). We set this to
  85   85   * point at the PML4 for kas early in boot and never touch it again. Hopefully
  86   86   * it survives whatever corruption brings down the rest of the kernel!
  87   87   *
  88   88   * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64
  89   89   * cases) in that they do not push an interrupt frame (and also have some other
  90   90   * effects). In the syscall trampolines, we assume that we can only be taking
  91      - * the call from userland and use SWAPGS and an unconditional overwrite of %cr3.
       91 + * the call from userland and use swapgs and an unconditional overwrite of %cr3.
  92   92   * We do not do any stack pivoting for syscalls (and we leave SYSENTER's
  93   93   * existing %rsp pivot untouched) -- instead we spill registers into
  94   94   * %gs:CPU_KPTI_* as we need to.
  95   95   *
  96   96   * Note that the normal %cr3 values do not cause invalidations with PCIDE - see
  97   97   * hat_switch().
  98   98   */
  99   99  
 100  100  /*
 101  101   * The macros here mostly line up with what's in kdi_idthdl.s, too, so if you
↓ open down ↓ 394 lines elided ↑ open up ↑
 496  496          mov     %gs:CPU_THREAD, %r13;           \
 497  497          mov     T_STACK(%r13), %r13;            \
 498  498          addq    $REGSIZE+MINFRAME, %r13;        \
 499  499          mov     %r13, %rsp;                     \
 500  500          pushq   %gs:CPU_KPTI_SS;                \
 501  501          pushq   %gs:CPU_KPTI_RSP;               \
 502  502          pushq   %gs:CPU_KPTI_RFLAGS;            \
 503  503          pushq   %gs:CPU_KPTI_CS;                \
 504  504          pushq   %gs:CPU_KPTI_RIP;               \
 505  505          mov     %gs:CPU_KPTI_R13, %r13;         \
 506      -        SWAPGS;                                 \
      506 +        swapgs;                                 \
 507  507          jmp     isr;                            \
 508  508          SET_SIZE(tr_/**/isr)
 509  509  
 510  510          MK_SYSCALL_INT_TRAMPOLINE(brand_sys_syscall_int)
 511  511          MK_SYSCALL_INT_TRAMPOLINE(sys_syscall_int)
 512  512  
 513  513          /*
 514  514           * Interrupt/trap return trampolines
 515  515           */
 516  516  
↓ open down ↓ 12 lines elided ↑ open up ↑
 529  529          /*
 530  530           * Yes, this does nothing extra. But this way we know if we see iret
 531  531           * elsewhere, then we've failed to properly consider trampolines there.
 532  532           */
 533  533          iretq
 534  534          SET_SIZE(tr_iret_kernel)
 535  535  
 536  536          ENTRY_NP(tr_iret_user)
 537  537  #if DEBUG
 538  538          /*
 539      -         * Ensure that we return to user land with CR0.TS clear. We do this
 540      -         * before we trampoline back and pivot the stack and %cr3. This way
 541      -         * we're still on the kernel stack and kernel %cr3, though we are on the
 542      -         * user GSBASE.
      539 +         * Panic if we find CR0.TS set. We're still on the kernel stack and
      540 +         * %cr3, but we do need to swap back to the kernel gs. (We don't worry
      541 +         * about swapgs speculation here.)
 543  542           */
 544  543          pushq   %rax
 545  544          mov     %cr0, %rax
 546  545          testq   $CR0_TS, %rax
 547  546          jz      1f
 548  547          swapgs
 549  548          popq    %rax
 550  549          leaq    _bad_ts_panic_msg(%rip), %rdi
 551  550          xorl    %eax, %eax
 552  551          pushq   %rbp
 553  552          movq    %rsp, %rbp
 554  553          call    panic
 555  554  1:
 556  555          popq    %rax
 557  556  #endif
 558  557  
 559  558          cmpq    $1, kpti_enable
 560  559          jne     1f
 561  560  
      561 +        /*
      562 +         * KPTI enabled: we're on the user gsbase at this point, so we
      563 +         * need to swap back so we can pivot stacks.
      564 +         *
      565 +         * The swapgs lfence mitigation is probably not needed here
      566 +         * since a mis-speculation of the above branch would imply KPTI
      567 +         * is disabled, but we'll do so anyway.
      568 +         */
 562  569          swapgs
      570 +        lfence
 563  571          mov     %r13, %gs:CPU_KPTI_R13
 564  572          PIVOT_KPTI_STK(%r13)
 565  573          SET_USER_CR3(%r13)
 566  574          mov     %gs:CPU_KPTI_R13, %r13
 567      -        /* Zero these to make sure they didn't leak from a kernel trap */
      575 +        /* Zero these to make sure they didn't leak from a kernel trap. */
 568  576          movq    $0, %gs:CPU_KPTI_R13
 569  577          movq    $0, %gs:CPU_KPTI_R14
      578 +        /* And back to user gsbase again. */
 570  579          swapgs
 571  580  1:
 572  581          iretq
 573  582          SET_SIZE(tr_iret_user)
 574  583  
 575  584          /*
 576  585           * This special return trampoline is for KDI's use only (with kmdb).
 577  586           *
 578  587           * KDI/kmdb do not use swapgs -- they directly write the GSBASE MSR
 579  588           * instead. This trampoline runs after GSBASE has already been changed
↓ open down ↓ 233 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX