Print this page
9600 LDT still not happy under KPTI

@@ -282,13 +282,16 @@
 
         USEGD_SETBASE(usd, ssd->bo);
         USEGD_SETLIMIT(usd, ssd->ls);
 
         /*
-         * set type, dpl and present bits.
+         * Set type, dpl and present bits.
+         *
+         * Force the "accessed" bit to on so that we don't run afoul of
+         * KPTI.
          */
-        usd->usd_type = ssd->acc1;
+        usd->usd_type = ssd->acc1 | SDT_A;
         usd->usd_dpl = ssd->acc1 >> 5;
         usd->usd_p = ssd->acc1 >> (5 + 2);
 
         ASSERT(usd->usd_type >= SDT_MEMRO);
         ASSERT(usd->usd_dpl == SEL_UPL);

@@ -343,12 +346,11 @@
  */
 static void
 ldt_load(void)
 {
 #if defined(__xpv)
-        xen_set_ldt(get_ssd_base(&curproc->p_ldt_desc),
-            curproc->p_ldtlimit + 1);
+        xen_set_ldt(curproc->p_ldt, curproc->p_ldtlimit + 1);
 #else
         size_t len;
         system_desc_t desc;
 
         /*

@@ -410,44 +412,48 @@
          */
         reset_sregs();
 #endif
 
         ldt_unload();
-        cpu_fast_syscall_enable(NULL);
+        cpu_fast_syscall_enable();
 }
 
 static void
 ldt_restorectx(proc_t *p)
 {
         ASSERT(p->p_ldt != NULL);
         ASSERT(p == curproc);
 
         ldt_load();
-        cpu_fast_syscall_disable(NULL);
+        cpu_fast_syscall_disable();
 }
 
 /*
- * When a process with a private LDT execs, fast syscalls must be enabled for
- * the new process image.
+ * At exec time, we need to clear up our LDT context and re-enable fast syscalls
+ * for the new process image.
+ *
+ * The same is true for the other case, where we have:
+ *
+ * proc_exit()
+ *  ->exitpctx()->ldt_savectx()
+ *  ->freepctx()->ldt_freectx()
+ *
+ * Because pre-emption is not prevented between the two callbacks, we could have
+ * come off CPU, and brought back LDT context when coming back on CPU via
+ * ldt_restorectx().
  */
 /* ARGSUSED */
 static void
 ldt_freectx(proc_t *p, int isexec)
 {
-        ASSERT(p->p_ldt);
+        ASSERT(p->p_ldt != NULL);
+        ASSERT(p == curproc);
 
-        if (isexec) {
                 kpreempt_disable();
-                cpu_fast_syscall_enable(NULL);
-                kpreempt_enable();
-        }
-
-        /*
-         * ldt_free() will free the memory used by the private LDT, reset the
-         * process's descriptor, and re-program the LDTR.
-         */
         ldt_free(p);
+        cpu_fast_syscall_enable();
+        kpreempt_enable();
 }
 
 /*
  * Install ctx op that ensures syscall/sysenter are disabled.
  * See comments below.

@@ -498,11 +504,11 @@
 setdscr(struct ssd *ssd)
 {
         ushort_t seli;          /* selector index */
         user_desc_t *ldp;       /* descriptor pointer */
         user_desc_t ndesc;      /* new descriptor */
-        proc_t  *pp = ttoproc(curthread);
+        proc_t  *pp = curproc;
         int     rc = 0;
 
         /*
          * LDT segments: executable and data at DPL 3 only.
          */

@@ -539,15 +545,16 @@
                  * thread to take the slow path (which doesn't make use
                  * of sysenter or sysexit) back out.
                  */
                 kpreempt_disable();
                 ldt_installctx(pp, NULL);
-                cpu_fast_syscall_disable(NULL);
+                cpu_fast_syscall_disable();
                 ASSERT(curthread->t_post_sys != 0);
                 kpreempt_enable();
 
         } else if (seli > pp->p_ldtlimit) {
+                ASSERT(pp->p_pctx != NULL);
 
                 /*
                  * Increase size of ldt to include seli.
                  */
                 ldt_grow(pp, seli);

@@ -645,14 +652,18 @@
                         return (EBUSY);
                 }
         }
 
         /*
-         * If acc1 is zero, clear the descriptor (including the 'present' bit)
+         * If acc1 is zero, clear the descriptor (including the 'present' bit).
+         * Make sure we update the CPU-private copy of the LDT.
          */
         if (ssd->acc1 == 0) {
                 rc  = ldt_update_segd(ldp, &null_udesc);
+                kpreempt_disable();
+                ldt_load();
+                kpreempt_enable();
                 mutex_exit(&pp->p_ldtlock);
                 return (rc);
         }
 
         /*

@@ -662,64 +673,47 @@
         if (SI86SSD_DPL(ssd) != SEL_UPL) {
                 mutex_exit(&pp->p_ldtlock);
                 return (EINVAL);
         }
 
-#if defined(__amd64)
         /*
          * Do not allow 32-bit applications to create 64-bit mode code
          * segments.
          */
         if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
             SI86SSD_ISLONG(ssd)) {
                 mutex_exit(&pp->p_ldtlock);
                 return (EINVAL);
         }
-#endif /* __amd64 */
 
         /*
-         * Set up a code or data user segment descriptor.
+         * Set up a code or data user segment descriptor, making sure to update
+         * the CPU-private copy of the LDT.
          */
         if (SI86SSD_ISUSEG(ssd)) {
                 ssd_to_usd(ssd, &ndesc);
                 rc = ldt_update_segd(ldp, &ndesc);
+                kpreempt_disable();
+                ldt_load();
+                kpreempt_enable();
                 mutex_exit(&pp->p_ldtlock);
                 return (rc);
         }
 
-#if defined(__i386)
-        /*
-         * Allow a call gate only if the destination is in the LDT
-         * and the system is running in 32-bit legacy mode.
-         *
-         * In long mode 32-bit call gates are redefined as 64-bit call
-         * gates and the hw enforces that the target code selector
-         * of the call gate must be 64-bit selector. A #gp fault is
-         * generated if otherwise. Since we do not allow 32-bit processes
-         * to switch themselves to 64-bits we never allow call gates
-         * on 64-bit system system.
-         */
-        if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) {
-
-
-                ssd_to_sgd(ssd, (gate_desc_t *)&ndesc);
-                rc = ldt_update_segd(ldp, &ndesc);
                 mutex_exit(&pp->p_ldtlock);
-                return (rc);
-        }
-#endif  /* __i386 */
-
-        mutex_exit(&pp->p_ldtlock);
         return (EINVAL);
 }
 
 /*
- * Allocate new LDT for process just large enough to contain seli.
- * Note we allocate and grow LDT in PAGESIZE chunks. We do this
- * to simplify the implementation and because on the hypervisor it's
- * required, since the LDT must live on pages that have PROT_WRITE
- * removed and which are given to the hypervisor.
+ * Allocate new LDT for process just large enough to contain seli.  Note we
+ * allocate and grow LDT in PAGESIZE chunks. We do this to simplify the
+ * implementation and because on the hypervisor it's required, since the LDT
+ * must live on pages that have PROT_WRITE removed and which are given to the
+ * hypervisor.
+ *
+ * Note that we don't actually load the LDT into the current CPU here: it's done
+ * later by our caller.
  */
 static void
 ldt_alloc(proc_t *pp, uint_t seli)
 {
         user_desc_t     *ldt;

@@ -746,17 +740,10 @@
                 panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed");
 #endif
 
         pp->p_ldt = ldt;
         pp->p_ldtlimit = nsels - 1;
-        set_syssegd(&pp->p_ldt_desc, ldt, ldtsz - 1, SDT_SYSLDT, SEL_KPL);
-
-        if (pp == curproc) {
-                kpreempt_disable();
-                ldt_load();
-                kpreempt_enable();
-        }
 }
 
 static void
 ldt_free(proc_t *pp)
 {

@@ -771,11 +758,10 @@
 
         ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE));
 
         pp->p_ldt = NULL;
         pp->p_ldtlimit = 0;
-        pp->p_ldt_desc = null_sdesc;
         mutex_exit(&pp->p_ldtlock);
 
         if (pp == curproc) {
                 kpreempt_disable();
                 ldt_unload();

@@ -836,10 +822,18 @@
         mutex_exit(&cp->p_ldtlock);
         mutex_exit(&pp->p_ldtlock);
 
 }
 
+/*
+ * Note that we don't actually load the LDT into the current CPU here: it's done
+ * later by our caller - unless we take an error.  This works out because
+ * ldt_load() does a copy of ->p_ldt instead of directly loading it into the GDT
+ * (and therefore can't be using the freed old LDT), and by definition if the
+ * new entry didn't pass validation, then the proc shouldn't be referencing an
+ * entry in the extended region.
+ */
 static void
 ldt_grow(proc_t *pp, uint_t seli)
 {
         user_desc_t     *oldt, *nldt;
         uint_t          nsels;

@@ -886,19 +880,7 @@
 #endif
 
         pp->p_ldt = nldt;
         pp->p_ldtlimit = nsels - 1;
 
-        /*
-         * write new ldt segment descriptor.
-         */
-        set_syssegd(&pp->p_ldt_desc, nldt, nldtsz - 1, SDT_SYSLDT, SEL_KPL);
-
-        /*
-         * load the new ldt.
-         */
-        kpreempt_disable();
-        ldt_load();
-        kpreempt_enable();
-
         kmem_free(oldt, oldtsz);
 }