267 #if defined(__amd64)
268 ssd->acc2 |= usd->usd_long << 1;
269 #else
270 ssd->acc2 |= usd->usd_reserved << 1;
271 #endif
272
273 ssd->acc2 |= usd->usd_def32 << (1 + 1);
274 ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
275 }
276
277 static void
278 ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
279 {
280
281 ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0);
282
283 USEGD_SETBASE(usd, ssd->bo);
284 USEGD_SETLIMIT(usd, ssd->ls);
285
286 /*
287 * set type, dpl and present bits.
288 */
289 usd->usd_type = ssd->acc1;
290 usd->usd_dpl = ssd->acc1 >> 5;
291 usd->usd_p = ssd->acc1 >> (5 + 2);
292
293 ASSERT(usd->usd_type >= SDT_MEMRO);
294 ASSERT(usd->usd_dpl == SEL_UPL);
295
296 /*
297 * 64-bit code selectors are never allowed in the LDT.
298 * Reserved bit is always 0 on 32-bit systems.
299 */
300 #if defined(__amd64)
301 usd->usd_long = 0;
302 #else
303 usd->usd_reserved = 0;
304 #endif
305
306 /*
307 * set avl, DB and granularity bits.
308 */
309 usd->usd_avl = ssd->acc2;
328 /*
329 * set type, dpl and present bits.
330 */
331 sgd->sgd_type = ssd->acc1;
332 sgd->sgd_dpl = ssd->acc1 >> 5;
333 sgd->sgd_p = ssd->acc1 >> 7;
334 ASSERT(sgd->sgd_type == SDT_SYSCGT);
335 ASSERT(sgd->sgd_dpl == SEL_UPL);
336 sgd->sgd_stkcpy = 0;
337 }
338
339 #endif /* __i386 */
340
341 /*
342 * Load LDT register with the current process's LDT.
343 */
344 static void
345 ldt_load(void)
346 {
347 #if defined(__xpv)
348 xen_set_ldt(get_ssd_base(&curproc->p_ldt_desc),
349 curproc->p_ldtlimit + 1);
350 #else
351 size_t len;
352 system_desc_t desc;
353
354 /*
355 * Before we can use the LDT on this CPU, we must install the LDT in the
356 * user mapping table.
357 */
358 len = (curproc->p_ldtlimit + 1) * sizeof (user_desc_t);
359 bcopy(curproc->p_ldt, CPU->cpu_m.mcpu_ldt, len);
360 CPU->cpu_m.mcpu_ldt_len = len;
361 set_syssegd(&desc, CPU->cpu_m.mcpu_ldt, len - 1, SDT_SYSLDT, SEL_KPL);
362 *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = desc;
363
364 wr_ldtr(ULDT_SEL);
365 #endif
366 }
367
368 /*
369 * Store a NULL selector in the LDTR. All subsequent illegal references to
395 * The 64-bit kernel must be sure to clear any stale ldt
396 * selectors when context switching away from a process that
397 * has a private ldt. Consider the following example:
398 *
399 * Wine creats a ldt descriptor and points a segment register
400 * to it.
401 *
402 * We then context switch away from wine lwp to kernel
403 * thread and hit breakpoint in kernel with kmdb
404 *
405 * When we continue and resume from kmdb we will #gp
406 * fault since kmdb will have saved the stale ldt selector
407 * from wine and will try to restore it but we are no longer in
408 * the context of the wine process and do not have our
409 * ldtr register pointing to the private ldt.
410 */
411 reset_sregs();
412 #endif
413
414 ldt_unload();
415 cpu_fast_syscall_enable(NULL);
416 }
417
418 static void
419 ldt_restorectx(proc_t *p)
420 {
421 ASSERT(p->p_ldt != NULL);
422 ASSERT(p == curproc);
423
424 ldt_load();
425 cpu_fast_syscall_disable(NULL);
426 }
427
428 /*
429 * When a process with a private LDT execs, fast syscalls must be enabled for
430 * the new process image.
431 */
432 /* ARGSUSED */
433 static void
434 ldt_freectx(proc_t *p, int isexec)
435 {
436 ASSERT(p->p_ldt);
437
438 if (isexec) {
439 kpreempt_disable();
440 cpu_fast_syscall_enable(NULL);
441 kpreempt_enable();
442 }
443
444 /*
445 * ldt_free() will free the memory used by the private LDT, reset the
446 * process's descriptor, and re-program the LDTR.
447 */
448 ldt_free(p);
449 }
450
451 /*
452 * Install ctx op that ensures syscall/sysenter are disabled.
453 * See comments below.
454 *
455 * When a thread with a private LDT forks, the new process
456 * must have the LDT context ops installed.
457 */
458 /* ARGSUSED */
459 static void
460 ldt_installctx(proc_t *p, proc_t *cp)
461 {
462 proc_t *targ = p;
463 kthread_t *t;
464
465 /*
466 * If this is a fork, operate on the child process.
467 */
468 if (cp != NULL) {
483 * We've just disabled fast system call and return instructions; take
484 * the slow path out to make sure we don't try to use one to return
485 * back to user. We must set t_post_sys for every thread in the
486 * process to make sure none of them escape out via fast return.
487 */
488
489 mutex_enter(&targ->p_lock);
490 t = targ->p_tlist;
491 do {
492 t->t_post_sys = 1;
493 } while ((t = t->t_forw) != targ->p_tlist);
494 mutex_exit(&targ->p_lock);
495 }
496
497 int
498 setdscr(struct ssd *ssd)
499 {
500 ushort_t seli; /* selector index */
501 user_desc_t *ldp; /* descriptor pointer */
502 user_desc_t ndesc; /* new descriptor */
503 proc_t *pp = ttoproc(curthread);
504 int rc = 0;
505
506 /*
507 * LDT segments: executable and data at DPL 3 only.
508 */
509 if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel))
510 return (EINVAL);
511
512 /*
513 * check the selector index.
514 */
515 seli = SELTOIDX(ssd->sel);
516 if (seli >= MAXNLDT || seli < LDT_UDBASE)
517 return (EINVAL);
518
519 ndesc = null_udesc;
520 mutex_enter(&pp->p_ldtlock);
521
522 /*
523 * If this is the first time for this process then setup a
524 * private LDT for it.
525 */
526 if (pp->p_ldt == NULL) {
527 ldt_alloc(pp, seli);
528
529 /*
530 * Now that this process has a private LDT, the use of
531 * the syscall/sysret and sysenter/sysexit instructions
532 * is forbidden for this processes because they destroy
533 * the contents of %cs and %ss segment registers.
534 *
535 * Explicity disable them here and add a context handler
536 * to the process. Note that disabling
537 * them here means we can't use sysret or sysexit on
538 * the way out of this system call - so we force this
539 * thread to take the slow path (which doesn't make use
540 * of sysenter or sysexit) back out.
541 */
542 kpreempt_disable();
543 ldt_installctx(pp, NULL);
544 cpu_fast_syscall_disable(NULL);
545 ASSERT(curthread->t_post_sys != 0);
546 kpreempt_enable();
547
548 } else if (seli > pp->p_ldtlimit) {
549
550 /*
551 * Increase size of ldt to include seli.
552 */
553 ldt_grow(pp, seli);
554 }
555
556 ASSERT(seli <= pp->p_ldtlimit);
557 ldp = &pp->p_ldt[seli];
558
559 /*
560 * On the 64-bit kernel, this is where things get more subtle.
561 * Recall that in the 64-bit kernel, when we enter the kernel we
562 * deliberately -don't- reload the segment selectors we came in on
563 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
564 * and the underlying descriptors are essentially ignored by the
565 * hardware in long mode - except for the base that we override with
566 * the gsbase MSRs.
567 *
568 * However, there's one unfortunate issue with this rosy picture --
630 {
631 if (ssd->sel == rp->r_ds ||
632 ssd->sel == rp->r_es ||
633 ssd->sel == rp->r_fs ||
634 ssd->sel == rp->r_gs) {
635 bad = 1;
636 break;
637 }
638 }
639
640 } while ((t = t->t_forw) != pp->p_tlist);
641 mutex_exit(&pp->p_lock);
642
643 if (bad) {
644 mutex_exit(&pp->p_ldtlock);
645 return (EBUSY);
646 }
647 }
648
649 /*
650 * If acc1 is zero, clear the descriptor (including the 'present' bit)
651 */
652 if (ssd->acc1 == 0) {
653 rc = ldt_update_segd(ldp, &null_udesc);
654 mutex_exit(&pp->p_ldtlock);
655 return (rc);
656 }
657
658 /*
659 * Check segment type, allow segment not present and
660 * only user DPL (3).
661 */
662 if (SI86SSD_DPL(ssd) != SEL_UPL) {
663 mutex_exit(&pp->p_ldtlock);
664 return (EINVAL);
665 }
666
667 #if defined(__amd64)
668 /*
669 * Do not allow 32-bit applications to create 64-bit mode code
670 * segments.
671 */
672 if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
673 SI86SSD_ISLONG(ssd)) {
674 mutex_exit(&pp->p_ldtlock);
675 return (EINVAL);
676 }
677 #endif /* __amd64 */
678
679 /*
680 * Set up a code or data user segment descriptor.
681 */
682 if (SI86SSD_ISUSEG(ssd)) {
683 ssd_to_usd(ssd, &ndesc);
684 rc = ldt_update_segd(ldp, &ndesc);
685 mutex_exit(&pp->p_ldtlock);
686 return (rc);
687 }
688
689 #if defined(__i386)
690 /*
691 * Allow a call gate only if the destination is in the LDT
692 * and the system is running in 32-bit legacy mode.
693 *
694 * In long mode 32-bit call gates are redefined as 64-bit call
695 * gates and the hw enforces that the target code selector
696 * of the call gate must be 64-bit selector. A #gp fault is
697 * generated if otherwise. Since we do not allow 32-bit processes
698 * to switch themselves to 64-bits we never allow call gates
699 * on 64-bit system system.
700 */
701 if (SI86SSD_TYPE(ssd) == SDT_SYSCGT && SELISLDT(ssd->ls)) {
702
703
704 ssd_to_sgd(ssd, (gate_desc_t *)&ndesc);
705 rc = ldt_update_segd(ldp, &ndesc);
706 mutex_exit(&pp->p_ldtlock);
707 return (rc);
708 }
709 #endif /* __i386 */
710
711 mutex_exit(&pp->p_ldtlock);
712 return (EINVAL);
713 }
714
715 /*
716 * Allocate new LDT for process just large enough to contain seli.
717 * Note we allocate and grow LDT in PAGESIZE chunks. We do this
718 * to simplify the implementation and because on the hypervisor it's
719 * required, since the LDT must live on pages that have PROT_WRITE
720 * removed and which are given to the hypervisor.
721 */
722 static void
723 ldt_alloc(proc_t *pp, uint_t seli)
724 {
725 user_desc_t *ldt;
726 size_t ldtsz;
727 uint_t nsels;
728
729 ASSERT(MUTEX_HELD(&pp->p_ldtlock));
730 ASSERT(pp->p_ldt == NULL);
731 ASSERT(pp->p_ldtlimit == 0);
732
733 /*
734 * Allocate new LDT just large enough to contain seli. The LDT must
735 * always be allocated in units of pages for KPTI.
736 */
737 ldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
738 nsels = ldtsz / sizeof (user_desc_t);
739 ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
740
741 ldt = kmem_zalloc(ldtsz, KM_SLEEP);
742 ASSERT(IS_P2ALIGNED(ldt, PAGESIZE));
743
744 #if defined(__xpv)
745 if (xen_ldt_setprot(ldt, ldtsz, PROT_READ))
746 panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed");
747 #endif
748
749 pp->p_ldt = ldt;
750 pp->p_ldtlimit = nsels - 1;
751 set_syssegd(&pp->p_ldt_desc, ldt, ldtsz - 1, SDT_SYSLDT, SEL_KPL);
752
753 if (pp == curproc) {
754 kpreempt_disable();
755 ldt_load();
756 kpreempt_enable();
757 }
758 }
759
760 static void
761 ldt_free(proc_t *pp)
762 {
763 user_desc_t *ldt;
764 size_t ldtsz;
765
766 ASSERT(pp->p_ldt != NULL);
767
768 mutex_enter(&pp->p_ldtlock);
769 ldt = pp->p_ldt;
770 ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
771
772 ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE));
773
774 pp->p_ldt = NULL;
775 pp->p_ldtlimit = 0;
776 pp->p_ldt_desc = null_sdesc;
777 mutex_exit(&pp->p_ldtlock);
778
779 if (pp == curproc) {
780 kpreempt_disable();
781 ldt_unload();
782 kpreempt_enable();
783 }
784
785 #if defined(__xpv)
786 /*
787 * We are not allowed to make the ldt writable until after
788 * we tell the hypervisor to unload it.
789 */
790 if (xen_ldt_setprot(ldt, ldtsz, PROT_READ | PROT_WRITE))
791 panic("ldt_free:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
792 #endif
793
794 kmem_free(ldt, ldtsz);
795 }
796
821 * parent's ldt. This works since ldt_alloc above did not load
822 * the ldt since its for the child process. If we tried to make
823 * an LDT writable that is loaded in hw the setprot operation
824 * would fail.
825 */
826 if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ | PROT_WRITE))
827 panic("ldt_dup:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
828 #endif
829
830 bcopy(pp->p_ldt, cp->p_ldt, ldtsz);
831
832 #if defined(__xpv)
833 if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ))
834 panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed");
835 #endif
836 mutex_exit(&cp->p_ldtlock);
837 mutex_exit(&pp->p_ldtlock);
838
839 }
840
841 static void
842 ldt_grow(proc_t *pp, uint_t seli)
843 {
844 user_desc_t *oldt, *nldt;
845 uint_t nsels;
846 size_t oldtsz, nldtsz;
847
848 ASSERT(MUTEX_HELD(&pp->p_ldtlock));
849 ASSERT(pp->p_ldt != NULL);
850 ASSERT(pp->p_ldtlimit != 0);
851
852 /*
853 * Allocate larger LDT just large enough to contain seli. The LDT must
854 * always be allocated in units of pages for KPTI.
855 */
856 nldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
857 nsels = nldtsz / sizeof (user_desc_t);
858 ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
859 ASSERT(nsels > pp->p_ldtlimit);
860
871 */
872 kpreempt_disable();
873 ldt_unload();
874 kpreempt_enable();
875
876 #if defined(__xpv)
877
878 /*
879 * Make old ldt writable and new ldt read only.
880 */
881 if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE))
882 panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
883
884 if (xen_ldt_setprot(nldt, nldtsz, PROT_READ))
885 panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed");
886 #endif
887
888 pp->p_ldt = nldt;
889 pp->p_ldtlimit = nsels - 1;
890
891 /*
892 * write new ldt segment descriptor.
893 */
894 set_syssegd(&pp->p_ldt_desc, nldt, nldtsz - 1, SDT_SYSLDT, SEL_KPL);
895
896 /*
897 * load the new ldt.
898 */
899 kpreempt_disable();
900 ldt_load();
901 kpreempt_enable();
902
903 kmem_free(oldt, oldtsz);
904 }
|
267 #if defined(__amd64)
268 ssd->acc2 |= usd->usd_long << 1;
269 #else
270 ssd->acc2 |= usd->usd_reserved << 1;
271 #endif
272
273 ssd->acc2 |= usd->usd_def32 << (1 + 1);
274 ssd->acc2 |= usd->usd_gran << (1 + 1 + 1);
275 }
276
277 static void
278 ssd_to_usd(struct ssd *ssd, user_desc_t *usd)
279 {
280
281 ASSERT(bcmp(usd, &null_udesc, sizeof (*usd)) == 0);
282
283 USEGD_SETBASE(usd, ssd->bo);
284 USEGD_SETLIMIT(usd, ssd->ls);
285
286 /*
287 * Set type, dpl and present bits.
288 *
289 * Force the "accessed" bit to on so that we don't run afoul of
290 * KPTI.
291 */
292 usd->usd_type = ssd->acc1 | SDT_A;
293 usd->usd_dpl = ssd->acc1 >> 5;
294 usd->usd_p = ssd->acc1 >> (5 + 2);
295
296 ASSERT(usd->usd_type >= SDT_MEMRO);
297 ASSERT(usd->usd_dpl == SEL_UPL);
298
299 /*
300 * 64-bit code selectors are never allowed in the LDT.
301 * Reserved bit is always 0 on 32-bit systems.
302 */
303 #if defined(__amd64)
304 usd->usd_long = 0;
305 #else
306 usd->usd_reserved = 0;
307 #endif
308
309 /*
310 * set avl, DB and granularity bits.
311 */
312 usd->usd_avl = ssd->acc2;
331 /*
332 * set type, dpl and present bits.
333 */
334 sgd->sgd_type = ssd->acc1;
335 sgd->sgd_dpl = ssd->acc1 >> 5;
336 sgd->sgd_p = ssd->acc1 >> 7;
337 ASSERT(sgd->sgd_type == SDT_SYSCGT);
338 ASSERT(sgd->sgd_dpl == SEL_UPL);
339 sgd->sgd_stkcpy = 0;
340 }
341
342 #endif /* __i386 */
343
344 /*
345 * Load LDT register with the current process's LDT.
346 */
347 static void
348 ldt_load(void)
349 {
350 #if defined(__xpv)
351 xen_set_ldt(curproc->p_ldt, curproc->p_ldtlimit + 1);
352 #else
353 size_t len;
354 system_desc_t desc;
355
356 /*
357 * Before we can use the LDT on this CPU, we must install the LDT in the
358 * user mapping table.
359 */
360 len = (curproc->p_ldtlimit + 1) * sizeof (user_desc_t);
361 bcopy(curproc->p_ldt, CPU->cpu_m.mcpu_ldt, len);
362 CPU->cpu_m.mcpu_ldt_len = len;
363 set_syssegd(&desc, CPU->cpu_m.mcpu_ldt, len - 1, SDT_SYSLDT, SEL_KPL);
364 *((system_desc_t *)&CPU->cpu_gdt[GDT_LDT]) = desc;
365
366 wr_ldtr(ULDT_SEL);
367 #endif
368 }
369
370 /*
371 * Store a NULL selector in the LDTR. All subsequent illegal references to
397 * The 64-bit kernel must be sure to clear any stale ldt
398 * selectors when context switching away from a process that
399 * has a private ldt. Consider the following example:
400 *
401 * Wine creats a ldt descriptor and points a segment register
402 * to it.
403 *
404 * We then context switch away from wine lwp to kernel
405 * thread and hit breakpoint in kernel with kmdb
406 *
407 * When we continue and resume from kmdb we will #gp
408 * fault since kmdb will have saved the stale ldt selector
409 * from wine and will try to restore it but we are no longer in
410 * the context of the wine process and do not have our
411 * ldtr register pointing to the private ldt.
412 */
413 reset_sregs();
414 #endif
415
416 ldt_unload();
417 cpu_fast_syscall_enable();
418 }
419
420 static void
421 ldt_restorectx(proc_t *p)
422 {
423 ASSERT(p->p_ldt != NULL);
424 ASSERT(p == curproc);
425
426 ldt_load();
427 cpu_fast_syscall_disable();
428 }
429
430 /*
431 * At exec time, we need to clear up our LDT context and re-enable fast syscalls
432 * for the new process image.
433 *
434 * The same is true for the other case, where we have:
435 *
436 * proc_exit()
437 * ->exitpctx()->ldt_savectx()
438 * ->freepctx()->ldt_freectx()
439 *
440 * Because pre-emption is not prevented between the two callbacks, we could have
441 * come off CPU, and brought back LDT context when coming back on CPU via
442 * ldt_restorectx().
443 */
444 /* ARGSUSED */
445 static void
446 ldt_freectx(proc_t *p, int isexec)
447 {
448 ASSERT(p->p_ldt != NULL);
449 ASSERT(p == curproc);
450
451 kpreempt_disable();
452 ldt_free(p);
453 cpu_fast_syscall_enable();
454 kpreempt_enable();
455 }
456
457 /*
458 * Install ctx op that ensures syscall/sysenter are disabled.
459 * See comments below.
460 *
461 * When a thread with a private LDT forks, the new process
462 * must have the LDT context ops installed.
463 */
464 /* ARGSUSED */
465 static void
466 ldt_installctx(proc_t *p, proc_t *cp)
467 {
468 proc_t *targ = p;
469 kthread_t *t;
470
471 /*
472 * If this is a fork, operate on the child process.
473 */
474 if (cp != NULL) {
489 * We've just disabled fast system call and return instructions; take
490 * the slow path out to make sure we don't try to use one to return
491 * back to user. We must set t_post_sys for every thread in the
492 * process to make sure none of them escape out via fast return.
493 */
494
495 mutex_enter(&targ->p_lock);
496 t = targ->p_tlist;
497 do {
498 t->t_post_sys = 1;
499 } while ((t = t->t_forw) != targ->p_tlist);
500 mutex_exit(&targ->p_lock);
501 }
502
503 int
504 setdscr(struct ssd *ssd)
505 {
506 ushort_t seli; /* selector index */
507 user_desc_t *ldp; /* descriptor pointer */
508 user_desc_t ndesc; /* new descriptor */
509 proc_t *pp = curproc;
510 int rc = 0;
511
512 /*
513 * LDT segments: executable and data at DPL 3 only.
514 */
515 if (!SELISLDT(ssd->sel) || !SELISUPL(ssd->sel))
516 return (EINVAL);
517
518 /*
519 * check the selector index.
520 */
521 seli = SELTOIDX(ssd->sel);
522 if (seli >= MAXNLDT || seli < LDT_UDBASE)
523 return (EINVAL);
524
525 ndesc = null_udesc;
526 mutex_enter(&pp->p_ldtlock);
527
528 /*
529 * If this is the first time for this process then setup a
530 * private LDT for it.
531 */
532 if (pp->p_ldt == NULL) {
533 ldt_alloc(pp, seli);
534
535 /*
536 * Now that this process has a private LDT, the use of
537 * the syscall/sysret and sysenter/sysexit instructions
538 * is forbidden for this processes because they destroy
539 * the contents of %cs and %ss segment registers.
540 *
541 * Explicity disable them here and add a context handler
542 * to the process. Note that disabling
543 * them here means we can't use sysret or sysexit on
544 * the way out of this system call - so we force this
545 * thread to take the slow path (which doesn't make use
546 * of sysenter or sysexit) back out.
547 */
548 kpreempt_disable();
549 ldt_installctx(pp, NULL);
550 cpu_fast_syscall_disable();
551 ASSERT(curthread->t_post_sys != 0);
552 kpreempt_enable();
553
554 } else if (seli > pp->p_ldtlimit) {
555 ASSERT(pp->p_pctx != NULL);
556
557 /*
558 * Increase size of ldt to include seli.
559 */
560 ldt_grow(pp, seli);
561 }
562
563 ASSERT(seli <= pp->p_ldtlimit);
564 ldp = &pp->p_ldt[seli];
565
566 /*
567 * On the 64-bit kernel, this is where things get more subtle.
568 * Recall that in the 64-bit kernel, when we enter the kernel we
569 * deliberately -don't- reload the segment selectors we came in on
570 * for %ds, %es, %fs or %gs. Messing with selectors is expensive,
571 * and the underlying descriptors are essentially ignored by the
572 * hardware in long mode - except for the base that we override with
573 * the gsbase MSRs.
574 *
575 * However, there's one unfortunate issue with this rosy picture --
637 {
638 if (ssd->sel == rp->r_ds ||
639 ssd->sel == rp->r_es ||
640 ssd->sel == rp->r_fs ||
641 ssd->sel == rp->r_gs) {
642 bad = 1;
643 break;
644 }
645 }
646
647 } while ((t = t->t_forw) != pp->p_tlist);
648 mutex_exit(&pp->p_lock);
649
650 if (bad) {
651 mutex_exit(&pp->p_ldtlock);
652 return (EBUSY);
653 }
654 }
655
656 /*
657 * If acc1 is zero, clear the descriptor (including the 'present' bit).
658 * Make sure we update the CPU-private copy of the LDT.
659 */
660 if (ssd->acc1 == 0) {
661 rc = ldt_update_segd(ldp, &null_udesc);
662 kpreempt_disable();
663 ldt_load();
664 kpreempt_enable();
665 mutex_exit(&pp->p_ldtlock);
666 return (rc);
667 }
668
669 /*
670 * Check segment type, allow segment not present and
671 * only user DPL (3).
672 */
673 if (SI86SSD_DPL(ssd) != SEL_UPL) {
674 mutex_exit(&pp->p_ldtlock);
675 return (EINVAL);
676 }
677
678 /*
679 * Do not allow 32-bit applications to create 64-bit mode code
680 * segments.
681 */
682 if (SI86SSD_ISUSEG(ssd) && ((SI86SSD_TYPE(ssd) >> 3) & 1) == 1 &&
683 SI86SSD_ISLONG(ssd)) {
684 mutex_exit(&pp->p_ldtlock);
685 return (EINVAL);
686 }
687
688 /*
689 * Set up a code or data user segment descriptor, making sure to update
690 * the CPU-private copy of the LDT.
691 */
692 if (SI86SSD_ISUSEG(ssd)) {
693 ssd_to_usd(ssd, &ndesc);
694 rc = ldt_update_segd(ldp, &ndesc);
695 kpreempt_disable();
696 ldt_load();
697 kpreempt_enable();
698 mutex_exit(&pp->p_ldtlock);
699 return (rc);
700 }
701
702 mutex_exit(&pp->p_ldtlock);
703 return (EINVAL);
704 }
705
706 /*
707 * Allocate new LDT for process just large enough to contain seli. Note we
708 * allocate and grow LDT in PAGESIZE chunks. We do this to simplify the
709 * implementation and because on the hypervisor it's required, since the LDT
710 * must live on pages that have PROT_WRITE removed and which are given to the
711 * hypervisor.
712 *
713 * Note that we don't actually load the LDT into the current CPU here: it's done
714 * later by our caller.
715 */
716 static void
717 ldt_alloc(proc_t *pp, uint_t seli)
718 {
719 user_desc_t *ldt;
720 size_t ldtsz;
721 uint_t nsels;
722
723 ASSERT(MUTEX_HELD(&pp->p_ldtlock));
724 ASSERT(pp->p_ldt == NULL);
725 ASSERT(pp->p_ldtlimit == 0);
726
727 /*
728 * Allocate new LDT just large enough to contain seli. The LDT must
729 * always be allocated in units of pages for KPTI.
730 */
731 ldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
732 nsels = ldtsz / sizeof (user_desc_t);
733 ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
734
735 ldt = kmem_zalloc(ldtsz, KM_SLEEP);
736 ASSERT(IS_P2ALIGNED(ldt, PAGESIZE));
737
738 #if defined(__xpv)
739 if (xen_ldt_setprot(ldt, ldtsz, PROT_READ))
740 panic("ldt_alloc:xen_ldt_setprot(PROT_READ) failed");
741 #endif
742
743 pp->p_ldt = ldt;
744 pp->p_ldtlimit = nsels - 1;
745 }
746
747 static void
748 ldt_free(proc_t *pp)
749 {
750 user_desc_t *ldt;
751 size_t ldtsz;
752
753 ASSERT(pp->p_ldt != NULL);
754
755 mutex_enter(&pp->p_ldtlock);
756 ldt = pp->p_ldt;
757 ldtsz = (pp->p_ldtlimit + 1) * sizeof (user_desc_t);
758
759 ASSERT(IS_P2ALIGNED(ldtsz, PAGESIZE));
760
761 pp->p_ldt = NULL;
762 pp->p_ldtlimit = 0;
763 mutex_exit(&pp->p_ldtlock);
764
765 if (pp == curproc) {
766 kpreempt_disable();
767 ldt_unload();
768 kpreempt_enable();
769 }
770
771 #if defined(__xpv)
772 /*
773 * We are not allowed to make the ldt writable until after
774 * we tell the hypervisor to unload it.
775 */
776 if (xen_ldt_setprot(ldt, ldtsz, PROT_READ | PROT_WRITE))
777 panic("ldt_free:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
778 #endif
779
780 kmem_free(ldt, ldtsz);
781 }
782
807 * parent's ldt. This works since ldt_alloc above did not load
808 * the ldt since its for the child process. If we tried to make
809 * an LDT writable that is loaded in hw the setprot operation
810 * would fail.
811 */
812 if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ | PROT_WRITE))
813 panic("ldt_dup:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
814 #endif
815
816 bcopy(pp->p_ldt, cp->p_ldt, ldtsz);
817
818 #if defined(__xpv)
819 if (xen_ldt_setprot(cp->p_ldt, ldtsz, PROT_READ))
820 panic("ldt_dup:xen_ldt_setprot(PROT_READ) failed");
821 #endif
822 mutex_exit(&cp->p_ldtlock);
823 mutex_exit(&pp->p_ldtlock);
824
825 }
826
827 /*
828 * Note that we don't actually load the LDT into the current CPU here: it's done
829 * later by our caller - unless we take an error. This works out because
830 * ldt_load() does a copy of ->p_ldt instead of directly loading it into the GDT
831 * (and therefore can't be using the freed old LDT), and by definition if the
832 * new entry didn't pass validation, then the proc shouldn't be referencing an
833 * entry in the extended region.
834 */
835 static void
836 ldt_grow(proc_t *pp, uint_t seli)
837 {
838 user_desc_t *oldt, *nldt;
839 uint_t nsels;
840 size_t oldtsz, nldtsz;
841
842 ASSERT(MUTEX_HELD(&pp->p_ldtlock));
843 ASSERT(pp->p_ldt != NULL);
844 ASSERT(pp->p_ldtlimit != 0);
845
846 /*
847 * Allocate larger LDT just large enough to contain seli. The LDT must
848 * always be allocated in units of pages for KPTI.
849 */
850 nldtsz = P2ROUNDUP((seli + 1) * sizeof (user_desc_t), PAGESIZE);
851 nsels = nldtsz / sizeof (user_desc_t);
852 ASSERT(nsels >= MINNLDT && nsels <= MAXNLDT);
853 ASSERT(nsels > pp->p_ldtlimit);
854
865 */
866 kpreempt_disable();
867 ldt_unload();
868 kpreempt_enable();
869
870 #if defined(__xpv)
871
872 /*
873 * Make old ldt writable and new ldt read only.
874 */
875 if (xen_ldt_setprot(oldt, oldtsz, PROT_READ | PROT_WRITE))
876 panic("ldt_grow:xen_ldt_setprot(PROT_READ|PROT_WRITE) failed");
877
878 if (xen_ldt_setprot(nldt, nldtsz, PROT_READ))
879 panic("ldt_grow:xen_ldt_setprot(PROT_READ) failed");
880 #endif
881
882 pp->p_ldt = nldt;
883 pp->p_ldtlimit = nsels - 1;
884
885 kmem_free(oldt, oldtsz);
886 }
|