Print this page
9600 LDT still not happy under KPTI

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/intel/ia32/os/sysi86.c
          +++ new/usr/src/uts/intel/ia32/os/sysi86.c
↓ open down ↓ 276 lines elided ↑ open up ↑
 277  277  static void
 278  278  ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
 279  279  {
 280  280  
 281  281          ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0);
 282  282  
 283  283          USEGD_SETBASE(usd, ssd->bo);
 284  284          USEGD_SETLIMIT(usd, ssd->ls);
 285  285  
 286  286          /*
 287      -         * set type, dpl and present bits.
      287 +         * Set type, dpl and present bits.
      288 +         *
      289 +         * Force the "accessed" bit to on so that we don't run afoul of
      290 +         * KPTI.
 288  291           */
 289      -        usd->usd_type = ssd->acc1;
      292 +        usd->usd_type = ssd->acc1 | SDT_A;
 290  293          usd->usd_dpl = ssd->acc1 >> 5;
 291  294          usd->usd_p = ssd->acc1 >> (5 + 2);
 292  295  
 293  296          ASSERT(usd->usd_type >= SDT_MEMRO);
 294  297          ASSERT(usd->usd_dpl == SEL_UPL);
 295  298  
 296  299          /*
 297  300           * 64-bit code selectors are never allowed in the LDT.
 298  301           * Reserved bit is always 0 on 32-bit systems.
 299  302           */
↓ open down ↓ 38 lines elided ↑ open up ↑
 338  341  
 339  342  #endif  /* __i386 */
 340  343  
 341  344  /*
 342  345   * Load LDT register with the current process's LDT.
 343  346   */
 344  347  static void
 345  348  ldt_load(void)
 346  349  {
 347  350  #if defined(__xpv)
 348      -        xen_set_ldt(get_ssd_base(&curproc->p_ldt_desc),
 349      -            curproc->p_ldtlimit + 1);
      351 +        xen_set_ldt(curproc->p_ldt, curproc->p_ldtlimit + 1);
 350  352  #else
 351  353          size_t len;
 352  354          system_desc_t desc;
 353  355  
 354  356          /*
 355  357           * Before we can use the LDT on this CPU, we must install the LDT in the
 356  358           * user mapping table.
 357  359           */
 358  360          len = (curproc->p_ldtlimit + 1) * sizeof (user_desc_t);
 359  361          bcopy(curproc->p_ldt, CPU->cpu_m.mcpu_ldt, len);
↓ open down ↓ 45 lines elided ↑ open up ↑
 405  407           *      When we continue and resume from kmdb we will #gp
 406  408           *      fault since kmdb will have saved the stale ldt selector
 407  409           *      from wine and will try to restore it but we are no longer in
 408  410           *      the context of the wine process and do not have our
 409  411           *      ldtr register pointing to the private ldt.
 410  412           */
 411  413          reset_sregs();
 412  414  #endif
 413  415  
 414  416          ldt_unload();
 415      -        cpu_fast_syscall_enable(NULL);
      417 +        cpu_fast_syscall_enable();
 416  418  }
 417  419  
 418  420  static void
 419  421  ldt_restorectx(proc_t *p)
 420  422  {
 421  423          ASSERT(p->p_ldt != NULL);
 422  424          ASSERT(p == curproc);
 423  425  
 424  426          ldt_load();
 425      -        cpu_fast_syscall_disable(NULL);
      427 +        cpu_fast_syscall_disable();
 426  428  }
 427  429  
 428  430  /*
 429      - * When a process with a private LDT execs, fast syscalls must be enabled for
 430      - * the new process image.
      431 + * At exec time, we need to clear up our LDT context and re-enable fast syscalls
      432 + * for the new process image.
      433 + *
      434 + * The same is true for the other case, where we have:
      435 + *
      436 + * proc_exit()
      437 + *  ->exitpctx()->ldt_savectx()
      438 + *  ->freepctx()->ldt_freectx()
      439 + *
      440 + * Because pre-emption is not prevented between the two callbacks, we could have
      441 + * come off CPU, and brought back LDT context when coming back on CPU via
      442 + * ldt_restorectx().
 431  443   */
 432  444  /* ARGSUSED */
 433  445  static void
 434  446  ldt_freectx(proc_t *p, int isexec)
 435  447  {
 436      -        ASSERT(p->p_ldt);
      448 +        ASSERT(p->p_ldt != NULL);
      449 +        ASSERT(p == curproc);
 437  450  
 438      -        if (isexec) {
 439      -                kpreempt_disable();
 440      -                cpu_fast_syscall_enable(NULL);
 441      -                kpreempt_enable();
 442      -        }
 443      -
 444      -        /*
 445      -         * ldt_free() will free the memory used by the private LDT, reset the
 446      -         * process's descriptor, and re-program the LDTR.
 447      -         */
      451 +        kpreempt_disable();
 448  452          ldt_free(p);
      453 +        cpu_fast_syscall_enable();
      454 +        kpreempt_enable();
 449  455  }
 450  456  
 451  457  /*
 452  458   * Install ctx op that ensures syscall/sysenter are disabled.
 453  459   * See comments below.
 454  460   *
 455  461   * When a thread with a private LDT forks, the new process
 456  462   * must have the LDT context ops installed.
 457  463   */
 458  464  /* ARGSUSED */
↓ open down ↓ 31 lines elided ↑ open up ↑
 490  496          t = targ->p_tlist;
 491  497          do {
 492  498                  t->t_post_sys = 1;
 493  499          } while ((t = t->t_forw) != targ->p_tlist);
 494  500          mutex_exit(&targ->p_lock);
 495  501  }
 496  502  
 497  503  int
 498  504  setdscr(struct ssd *ssd)
 499  505  {
 500      -        ushort_t seli;          /* selector index */
      506 +        ushort_t seli;          /* selector index */
 501  507          user_desc_t *ldp;       /* descriptor pointer */
 502  508          user_desc_t ndesc;      /* new descriptor */
 503      -        proc_t  *pp = ttoproc(curthread);
      509 +        proc_t  *pp = curproc;
 504  510          int     rc = 0;
 505  511  
 506  512          /*
 507  513           * LDT segments: executable and data at DPL 3 only.
 508  514           */
 509  515          if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel))
 510  516                  return (EINVAL);
 511  517  
 512  518          /*
 513  519           * check the selector index.
↓ open down ↓ 20 lines elided ↑ open up ↑
 534  540                   *
 535  541                   * Explicity disable them here and add a context handler
 536  542                   * to the process. Note that disabling
 537  543                   * them here means we can't use sysret or sysexit on
 538  544                   * the way out of this system call - so we force this
 539  545                   * thread to take the slow path (which doesn't make use
 540  546                   * of sysenter or sysexit) back out.
 541  547                   */
 542  548                  kpreempt_disable();
 543  549                  ldt_installctx(pp, NULL);
 544      -                cpu_fast_syscall_disable(NULL);
      550 +                cpu_fast_syscall_disable();
 545  551                  ASSERT(curthread->t_post_sys != 0);
 546  552                  kpreempt_enable();
 547  553  
 548  554          } else if (seli > pp->p_ldtlimit) {
      555 +                ASSERT(pp->p_pctx != NULL);
 549  556  
 550  557                  /*
 551  558                   * Increase size of ldt to include seli.
 552  559                   */
 553  560                  ldt_grow(pp, seli);
 554  561          }
 555  562  
 556  563          ASSERT(seli <= pp->p_ldtlimit);
 557  564          ldp = &pp->p_ldt[seli];
 558  565  
↓ open down ↓ 81 lines elided ↑ open up ↑
 640  647                  } while ((t = t->t_forw) != pp->p_tlist);
 641  648                  mutex_exit(&pp->p_lock);
 642  649  
 643  650                  if (bad) {
 644  651                          mutex_exit(&pp->p_ldtlock);
 645  652                          return (EBUSY);
 646  653                  }
 647  654          }
 648  655  
 649  656          /*
 650      -         * If acc1 is zero, clear the descriptor (including the 'present' bit)
      657 +         * If acc1 is zero, clear the descriptor (including the 'present' bit).
      658 +         * Make sure we update the CPU-private copy of the LDT.
 651  659           */
 652  660          if (ssd->acc1 == 0) {
 653  661                  rc  = ldt_update_segd(ldp, &null_udesc);
      662 +                kpreempt_disable();
      663 +                ldt_load();
      664 +                kpreempt_enable();
 654  665                  mutex_exit(&pp->p_ldtlock);
 655  666                  return (rc);
 656  667          }
 657  668  
 658  669          /*
 659  670           * Check segment type, allow segment not present and
 660  671           * only user DPL (3).
 661  672           */
 662  673          if (SI86SSD_DPL(ssd) != SEL_UPL) {
 663  674                  mutex_exit(&pp->p_ldtlock);
 664  675                  return (EINVAL);
 665  676          }
 666  677  
 667      -#if defined(__amd64)
 668  678          /*
 669  679           * Do not allow 32-bit applications to create 64-bit mode code
 670  680           * segments.
 671  681           */
 672  682          if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
 673  683              SI86SSD_ISLONG(ssd)) {
 674  684                  mutex_exit(&pp->p_ldtlock);
 675  685                  return (EINVAL);
 676  686          }
 677      -#endif /* __amd64 */
 678  687  
 679  688          /*
 680      -         * Set up a code or data user segment descriptor.
      689 +         * Set up a code or data user segment descriptor, making sure to update
      690 +         * the CPU-private copy of the LDT.
 681  691           */
 682  692          if (SI86SSD_ISUSEG(ssd)) {
 683  693                  ssd_to_usd(ssd, &ndesc);
 684  694                  rc = ldt_update_segd(ldp, &ndesc);
      695 +                kpreempt_disable();
      696 +                ldt_load();
      697 +                kpreempt_enable();
 685  698                  mutex_exit(&pp->p_ldtlock);
 686  699                  return (rc);
 687  700          }
 688  701  
 689      -#if defined(__i386)
 690      -        /*
 691      -         * Allow a call gate only if the destination is in the LDT
 692      -         * and the system is running in 32-bit legacy mode.
 693      -         *
 694      -         * In long mode 32-bit call gates are redefined as 64-bit call
 695      -         * gates and the hw enforces that the target code selector
 696      -         * of the call gate must be 64-bit selector. A #gp fault is
 697      -         * generated if otherwise. Since we do not allow 32-bit processes
 698      -         * to switch themselves to 64-bits we never allow call gates
 699      -         * on 64-bit system system.
 700      -         */
 701      -        if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) {
 702      -
 703      -
 704      -                ssd_to_sgd(ssd, (gate_desc_t *)&ndesc);
 705      -                rc = ldt_update_segd(ldp, &ndesc);
 706      -                mutex_exit(&pp->p_ldtlock);
 707      -                return (rc);
 708      -        }
 709      -#endif  /* __i386 */
 710      -
 711  702          mutex_exit(&pp->p_ldtlock);
 712  703          return (EINVAL);
 713  704  }
 714  705  
 715  706  /*
 716      - * Allocate new LDT for process just large enough to contain seli.
 717      - * Note we allocate and grow LDT in PAGESIZE chunks. We do this
 718      - * to simplify the implementation and because on the hypervisor it's
 719      - * required, since the LDT must live on pages that have PROT_WRITE
 720      - * removed and which are given to the hypervisor.
      707 + * Allocate new LDT for process just large enough to contain seli.  Note we
      708 + * allocate and grow LDT in PAGESIZE chunks. We do this to simplify the
      709 + * implementation and because on the hypervisor it's required, since the LDT
      710 + * must live on pages that have PROT_WRITE removed and which are given to the
      711 + * hypervisor.
      712 + *
      713 + * Note that we don't actually load the LDT into the current CPU here: it's done
      714 + * later by our caller.
 721  715   */
 722  716  static void
 723  717  ldt_alloc(proc_t *pp, uint_t seli)
 724  718  {
 725  719          user_desc_t     *ldt;
 726  720          size_t          ldtsz;
 727  721          uint_t          nsels;
 728  722  
 729  723          ASSERT(MUTEX_HELD(&pp->p_ldtlock));
 730  724          ASSERT(pp->p_ldt == NULL);
↓ open down ↓ 10 lines elided ↑ open up ↑
 741  735          ldt = kmem_zalloc(ldtsz, KM_SLEEP);
 742  736          ASSERT(IS_P2ALIGNED(ldt, PAGESIZE));
 743  737  
 744  738  #if defined(__xpv)
 745  739          if (xen_ldt_setprot(ldt, ldtsz, PROT_READ))
 746  740                  panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed");
 747  741  #endif
 748  742  
 749  743          pp->p_ldt = ldt;
 750  744          pp->p_ldtlimit = nsels - 1;
 751      -        set_syssegd(&pp->p_ldt_desc, ldt, ldtsz - 1, SDT_SYSLDT, SEL_KPL);
 752      -
 753      -        if (pp == curproc) {
 754      -                kpreempt_disable();
 755      -                ldt_load();
 756      -                kpreempt_enable();
 757      -        }
 758  745  }
 759  746  
 760  747  static void
 761  748  ldt_free(proc_t *pp)
 762  749  {
 763  750          user_desc_t     *ldt;
 764  751          size_t          ldtsz;
 765  752  
 766  753          ASSERT(pp->p_ldt != NULL);
 767  754  
 768  755          mutex_enter(&pp->p_ldtlock);
 769  756          ldt = pp->p_ldt;
 770  757          ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
 771  758  
 772  759          ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE));
 773  760  
 774  761          pp->p_ldt = NULL;
 775  762          pp->p_ldtlimit = 0;
 776      -        pp->p_ldt_desc = null_sdesc;
 777  763          mutex_exit(&pp->p_ldtlock);
 778  764  
 779  765          if (pp == curproc) {
 780  766                  kpreempt_disable();
 781  767                  ldt_unload();
 782  768                  kpreempt_enable();
 783  769          }
 784  770  
 785  771  #if defined(__xpv)
 786  772          /*
↓ open down ↓ 44 lines elided ↑ open up ↑
 831  817  
 832  818  #if defined(__xpv)
 833  819          if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ))
 834  820                  panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed");
 835  821  #endif
 836  822          mutex_exit(&cp->p_ldtlock);
 837  823          mutex_exit(&pp->p_ldtlock);
 838  824  
 839  825  }
 840  826  
      827 +/*
      828 + * Note that we don't actually load the LDT into the current CPU here: it's done
      829 + * later by our caller - unless we take an error.  This works out because
      830 + * ldt_load() does a copy of ->p_ldt instead of directly loading it into the GDT
      831 + * (and therefore can't be using the freed old LDT), and by definition if the
      832 + * new entry didn't pass validation, then the proc shouldn't be referencing an
      833 + * entry in the extended region.
      834 + */
 841  835  static void
 842  836  ldt_grow(proc_t *pp, uint_t seli)
 843  837  {
 844  838          user_desc_t     *oldt, *nldt;
 845  839          uint_t          nsels;
 846  840          size_t          oldtsz, nldtsz;
 847  841  
 848  842          ASSERT(MUTEX_HELD(&pp->p_ldtlock));
 849  843          ASSERT(pp->p_ldt != NULL);
 850  844          ASSERT(pp->p_ldtlimit != 0);
↓ open down ↓ 30 lines elided ↑ open up ↑
 881  875          if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE))
 882  876                  panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
 883  877  
 884  878          if (xen_ldt_setprot(nldt, nldtsz, PROT_READ))
 885  879                  panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed");
 886  880  #endif
 887  881  
 888  882          pp->p_ldt = nldt;
 889  883          pp->p_ldtlimit = nsels - 1;
 890  884  
 891      -        /*
 892      -         * write new ldt segment descriptor.
 893      -         */
 894      -        set_syssegd(&pp->p_ldt_desc, nldt, nldtsz - 1, SDT_SYSLDT, SEL_KPL);
 895      -
 896      -        /*
 897      -         * load the new ldt.
 898      -         */
 899      -        kpreempt_disable();
 900      -        ldt_load();
 901      -        kpreempt_enable();
 902      -
 903  885          kmem_free(oldt, oldtsz);
 904  886  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX