Print this page
9059 Simplify SMAP relocations with krtld
Portions contributed by: John Levon <john.levon@joyent.com>


  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2009, Intel Corporation
  28  * All rights reserved.
  29  */
  30 
  31 /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.        */
  32 /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T              */
  33 /*         All Rights Reserved                                          */
  34 
  35 /*       Copyright (c) 1987, 1988 Microsoft Corporation                 */
  36 /*         All Rights Reserved                                          */
  37 
  38 /*
  39  * Copyright 2019 Joyent, Inc.
  40  */
  41 
  42 #include <sys/errno.h>
  43 #include <sys/asm_linkage.h>
  44 
  45 #include "assym.h"
  46 
  47 #define KCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  48 #define XCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  49 /*
  50  * Non-temopral access (NTA) alignment requirement
  51  */
  52 #define NTA_ALIGN_SIZE  4       /* Must be at least 4-byte aligned */
  53 #define NTA_ALIGN_MASK  _CONST(NTA_ALIGN_SIZE-1)
  54 #define COUNT_ALIGN_SIZE        16      /* Must be at least 16-byte aligned */
  55 #define COUNT_ALIGN_MASK        _CONST(COUNT_ALIGN_SIZE-1)
  56 
  57 /*
  58  * With the introduction of Broadwell, Intel has introduced supervisor mode
  59  * access protection -- SMAP. SMAP forces the kernel to set certain bits to
  60  * enable access of user pages (AC in rflags, defines as PS_ACHK in
  61  * <sys/psw.h>). One of the challenges is that the implementation of many of the
  62  * userland copy routines directly use the kernel ones. For example, copyin and
  63  * copyout simply go and jump to the do_copy_fault label and traditionally let
  64  * those deal with the return for them. In fact, changing that is a can of frame
  65  * pointers.
  66  *
  67  * Rules and Constraints:
  68  *
  69  * 1. For anything that's not in copy.s, we have it do explicit calls to the
  70  * smap related code. It usually is in a position where it is able to. This is
  71  * restricted to the following three places: DTrace, resume() in swtch.s and
  72  * on_fault/no_fault. If you want to add it somewhere else, we should be
  73  * thinking twice.
  74  *
  75  * 2. We try to toggle this at the smallest window possible. This means that if
  76  * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
  77  * other function, we will always leave with SMAP enabled (the kernel cannot
  78  * access user pages).
  79  *
  80  * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
  81  * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
  82  * which already takes care of ensuring that SMAP is enabled and disabled. Note
  83  * this means that when under an on_fault()/no_fault() handler, one must not
  84  * call the non-*_noeer() routines.
  85  *
  86  * 4. The first thing we should do after coming out of an lofault handler is to
  87  * make sure that we call smap_enable again to ensure that we are safely
  88  * protected, as more often than not, we will have disabled smap to get there.
  89  *
  90  * 5. The SMAP functions, smap_enable and smap_disable may not touch any
  91  * registers beyond those done by the call and ret. These routines may be called
  92  * from arbitrary contexts in copy.s where we have slightly more special ABIs in
  93  * place.
  94  *
  95  * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
  96  * SMAP_DISABLE_INSTR macro should be used (except for smap_enable() and
  97  * smap_disable()). If the number of these is changed, you must update the
  98  * constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
  99  *
 100  * 7. Note, at this time SMAP is not implemented for the 32-bit kernel. There is
 101  * no known technical reason preventing it from being enabled.
 102  *
 103  * 8. Generally this .s file is processed by a K&R style cpp. This means that it
 104  * really has a lot of feelings about whitespace. In particular, if you have a
 105  * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
 106  *
 107  * 9. The smap_enable and smap_disable functions should not generally be called.
 108  * They exist such that DTrace and on_trap() may use them, that's it.
 109  *
 110  * 10. In general, the kernel has its own value for rflags that gets used. This
 111  * is maintained in a few different places which vary based on how the thread
 112  * comes into existence and whether it's a user thread. In general, when the
 113  * kernel takes a trap, it always will set ourselves to a known set of flags,
 114  * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
 115  * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
 116  * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
 117  * where that gets masked off.
 118  */
 119 
 120 /*
 121  * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
 122  * "rep smovq" for large sizes. Performance data shows that many calls to
 123  * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
 124  * these small sizes unrolled code is used. For medium sizes loops writing
 125  * 64-bytes per loop are used. Transition points were determined experimentally.
 126  */
 127 #define BZERO_USE_REP   (1024)
 128 #define BCOPY_DFLT_REP  (128)
 129 #define BCOPY_NHM_REP   (768)
 130 


1885 .copyin_panic_msg:
1886         .string "copyin: kaddr argument below kernelbase"
1887 .xcopyin_panic_msg:
1888         .string "xcopyin: kaddr argument below kernelbase"
1889 .copyout_panic_msg:
1890         .string "copyout: kaddr argument below kernelbase"
1891 .xcopyout_panic_msg:
1892         .string "xcopyout: kaddr argument below kernelbase"
1893 .copystr_panic_msg:
1894         .string "copystr: arguments in user space"
1895 .copyinstr_panic_msg:
1896         .string "copyinstr: kaddr argument not in kernel address space"
1897 .copyoutstr_panic_msg:
1898         .string "copyoutstr: kaddr argument not in kernel address space"
1899 .cpyin_ne_pmsg:
1900         .string "copyin_noerr: argument not in kernel address space"
1901 .cpyout_ne_pmsg:
1902         .string "copyout_noerr: argument not in kernel address space"
1903 #endif
1904 
1905 /*
1906  * These functions are used for SMAP, supervisor mode access protection. They
1907  * are hotpatched to become real instructions when the system starts up which is
1908  * done in mlsetup() as a part of enabling the other CR4 related features.
1909  *
1910  * Generally speaking, smap_disable() is a stac instruction and smap_enable is a
1911  * clac instruction. It's safe to call these any number of times, and in fact,
1912  * out of paranoia, the kernel will likely call it at several points.
1913  */
1914 
1915         ENTRY(smap_disable)
1916         nop
1917         nop
1918         nop
1919         ret
1920         SET_SIZE(smap_disable)
1921 
1922         ENTRY(smap_enable)
1923         nop
1924         nop
1925         nop
1926         ret
1927         SET_SIZE(smap_enable)
1928 
1929 .data
1930 .align  4
1931 .globl  _smap_enable_patch_count
1932 .type   _smap_enable_patch_count,@object
1933 .size   _smap_enable_patch_count, 4
1934 _smap_enable_patch_count:
1935         .long   SMAP_ENABLE_COUNT
1936 
1937 .globl  _smap_disable_patch_count
1938 .type   _smap_disable_patch_count,@object
1939 .size   _smap_disable_patch_count, 4
1940 _smap_disable_patch_count:
1941         .long SMAP_DISABLE_COUNT


  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2009, Intel Corporation
  28  * All rights reserved.
  29  */
  30 
  31 /*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.        */
  32 /*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T              */
  33 /*         All Rights Reserved                                          */
  34 
  35 /*       Copyright (c) 1987, 1988 Microsoft Corporation                 */
  36 /*         All Rights Reserved                                          */
  37 
  38 /*
  39  * Copyright 2020 Joyent, Inc.
  40  */
  41 
  42 #include <sys/errno.h>
  43 #include <sys/asm_linkage.h>
  44 
  45 #include "assym.h"
  46 
  47 #define KCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  48 #define XCOPY_MIN_SIZE  128     /* Must be >= 16 bytes */
  49 /*
  50  * Non-temopral access (NTA) alignment requirement
  51  */
  52 #define NTA_ALIGN_SIZE  4       /* Must be at least 4-byte aligned */
  53 #define NTA_ALIGN_MASK  _CONST(NTA_ALIGN_SIZE-1)
  54 #define COUNT_ALIGN_SIZE        16      /* Must be at least 16-byte aligned */
  55 #define COUNT_ALIGN_MASK        _CONST(COUNT_ALIGN_SIZE-1)
  56 
  57 /*
  58  * With the introduction of Broadwell, Intel has introduced supervisor mode
  59  * access protection -- SMAP. SMAP forces the kernel to set certain bits to
  60  * enable access of user pages (AC in rflags, defines as PS_ACHK in
  61  * <sys/psw.h>). One of the challenges is that the implementation of many of the
  62  * userland copy routines directly use the kernel ones. For example, copyin and
  63  * copyout simply go and jump to the do_copy_fault label and traditionally let
  64  * those deal with the return for them. In fact, changing that is a can of frame
  65  * pointers.
  66  *
  67  * Rules and Constraints:
  68  *
  69  * 1. For anything that's not in copy.s, we have it do explicit smap_disable()
  70  * or smap_enable() calls.  This is restricted to the following three places:
  71  * DTrace, resume() in swtch.s and on_fault/no_fault. If you want to add it
  72  * somewhere else, we should be thinking twice.

  73  *
  74  * 2. We try to toggle this at the smallest window possible. This means that if
  75  * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
  76  * other function, we will always leave with SMAP enabled (the kernel cannot
  77  * access user pages).
  78  *
  79  * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
  80  * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
  81  * which already takes care of ensuring that SMAP is enabled and disabled. Note
  82  * this means that when under an on_fault()/no_fault() handler, one must not
  83  * call the non-*_noerr() routines.
  84  *
  85  * 4. The first thing we should do after coming out of an lofault handler is to
  86  * make sure that we call smap_enable() again to ensure that we are safely
  87  * protected, as more often than not, we will have disabled smap to get there.
  88  *
  89  * 5. smap_enable() and smap_disable() don't exist: calls to these functions
  90  * generate runtime relocations, that are then processed into the necessary
  91  * clac/stac, via the krtld hotinlines mechanism and hotinline_smap().

  92  *
  93  * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
  94  * SMAP_DISABLE_INSTR macro should be used. If the number of these is changed,
  95  * you must update the constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.

  96  *
  97  * 7. Generally this .s file is processed by a K&R style cpp. This means that it



  98  * really has a lot of feelings about whitespace. In particular, if you have a
  99  * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
 100  *
 101  * 8. In general, the kernel has its own value for rflags that gets used. This



 102  * is maintained in a few different places which vary based on how the thread
 103  * comes into existence and whether it's a user thread. In general, when the
 104  * kernel takes a trap, it always will set ourselves to a known set of flags,
 105  * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
 106  * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
 107  * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
 108  * where that gets masked off.
 109  */
 110 
 111 /*
 112  * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
 113  * "rep smovq" for large sizes. Performance data shows that many calls to
 114  * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
 115  * these small sizes unrolled code is used. For medium sizes loops writing
 116  * 64-bytes per loop are used. Transition points were determined experimentally.
 117  */
 118 #define BZERO_USE_REP   (1024)
 119 #define BCOPY_DFLT_REP  (128)
 120 #define BCOPY_NHM_REP   (768)
 121 


1876 .copyin_panic_msg:
1877         .string "copyin: kaddr argument below kernelbase"
1878 .xcopyin_panic_msg:
1879         .string "xcopyin: kaddr argument below kernelbase"
1880 .copyout_panic_msg:
1881         .string "copyout: kaddr argument below kernelbase"
1882 .xcopyout_panic_msg:
1883         .string "xcopyout: kaddr argument below kernelbase"
1884 .copystr_panic_msg:
1885         .string "copystr: arguments in user space"
1886 .copyinstr_panic_msg:
1887         .string "copyinstr: kaddr argument not in kernel address space"
1888 .copyoutstr_panic_msg:
1889         .string "copyoutstr: kaddr argument not in kernel address space"
1890 .cpyin_ne_pmsg:
1891         .string "copyin_noerr: argument not in kernel address space"
1892 .cpyout_ne_pmsg:
1893         .string "copyout_noerr: argument not in kernel address space"
1894 #endif
1895 
























1896 .data
1897 .align  4
1898 .globl  _smap_enable_patch_count
1899 .type   _smap_enable_patch_count,@object
1900 .size   _smap_enable_patch_count, 4
1901 _smap_enable_patch_count:
1902         .long   SMAP_ENABLE_COUNT
1903 
1904 .globl  _smap_disable_patch_count
1905 .type   _smap_disable_patch_count,@object
1906 .size   _smap_disable_patch_count, 4
1907 _smap_disable_patch_count:
1908         .long SMAP_DISABLE_COUNT