Print this page
9600 LDT still not happy under KPTI

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/intel/ia32/os/desctbls.c
          +++ new/usr/src/uts/intel/ia32/os/desctbls.c
↓ open down ↓ 170 lines elided ↑ open up ↑
 171  171  
 172  172  /*
 173  173   * software prototypes for default local descriptor table
 174  174   */
 175  175  
 176  176  /*
 177  177   * Routines for loading segment descriptors in format the hardware
 178  178   * can understand.
 179  179   */
 180  180  
 181      -#if defined(__amd64)
 182      -
 183  181  /*
 184  182   * In long mode we have the new L or long mode attribute bit
 185  183   * for code segments. Only the conforming bit in type is used along
 186  184   * with descriptor priority and present bits. Default operand size must
 187  185   * be zero when in long mode. In 32-bit compatibility mode all fields
 188  186   * are treated as in legacy mode. For data segments while in long mode
 189  187   * only the present bit is loaded.
 190  188   */
 191  189  void
 192  190  set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
 193  191      uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
 194  192  {
 195  193          ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
      194 +        /* This should never be a "system" segment. */
      195 +        ASSERT3U(type & SDT_S, !=, 0);
 196  196  
 197  197          /*
 198  198           * 64-bit long mode.
 199  199           */
 200  200          if (lmode == SDP_LONG)
 201  201                  dp->usd_def32 = 0;              /* 32-bit operands only */
 202  202          else
 203  203                  /*
 204  204                   * 32-bit compatibility mode.
 205  205                   */
 206  206                  dp->usd_def32 = defopsz;        /* 0 = 16, 1 = 32-bit ops */
 207  207  
      208 +        /*
      209 +         * We should always set the "accessed" bit (SDT_A), otherwise the CPU
      210 +         * will write to the GDT whenever we change segment registers around.
      211 +         * With KPTI on, the GDT is read-only in the user page table, which
      212 +         * causes crashes if we don't set this.
      213 +         */
      214 +        ASSERT3U(type & SDT_A, !=, 0);
      215 +
 208  216          dp->usd_long = lmode;   /* 64-bit mode */
 209  217          dp->usd_type = type;
 210  218          dp->usd_dpl = dpl;
 211  219          dp->usd_p = 1;
 212  220          dp->usd_gran = gran;            /* 0 = bytes, 1 = pages */
 213  221  
 214  222          dp->usd_lobase = (uintptr_t)base;
 215  223          dp->usd_midbase = (uintptr_t)base >> 16;
 216  224          dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 217  225          dp->usd_lolimit = size;
 218  226          dp->usd_hilimit = (uintptr_t)size >> 16;
 219  227  }
 220  228  
 221      -#elif defined(__i386)
 222      -
 223  229  /*
 224      - * Install user segment descriptor for code and data.
 225      - */
 226      -void
 227      -set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
 228      -    uint_t dpl, uint_t gran, uint_t defopsz)
 229      -{
 230      -        dp->usd_lolimit = size;
 231      -        dp->usd_hilimit = (uintptr_t)size >> 16;
 232      -
 233      -        dp->usd_lobase = (uintptr_t)base;
 234      -        dp->usd_midbase = (uintptr_t)base >> 16;
 235      -        dp->usd_hibase = (uintptr_t)base >> (16 + 8);
 236      -
 237      -        dp->usd_type = type;
 238      -        dp->usd_dpl = dpl;
 239      -        dp->usd_p = 1;
 240      -        dp->usd_def32 = defopsz;        /* 0 = 16, 1 = 32 bit operands */
 241      -        dp->usd_gran = gran;            /* 0 = bytes, 1 = pages */
 242      -}
 243      -
 244      -#endif  /* __i386 */
 245      -
 246      -/*
 247  230   * Install system segment descriptor for LDT and TSS segments.
 248  231   */
 249  232  
 250      -#if defined(__amd64)
 251      -
 252  233  void
 253  234  set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 254  235      uint_t dpl)
 255  236  {
 256  237          dp->ssd_lolimit = size;
 257  238          dp->ssd_hilimit = (uintptr_t)size >> 16;
 258  239  
 259  240          dp->ssd_lobase = (uintptr_t)base;
 260  241          dp->ssd_midbase = (uintptr_t)base >> 16;
 261  242          dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
↓ open down ↓ 12 lines elided ↑ open up ↑
 274  255  {
 275  256          uintptr_t       base;
 276  257  
 277  258          base = (uintptr_t)dp->ssd_lobase |
 278  259              (uintptr_t)dp->ssd_midbase << 16 |
 279  260              (uintptr_t)dp->ssd_hibase << (16 + 8) |
 280  261              (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
 281  262          return ((void *)base);
 282  263  }
 283  264  
 284      -#elif defined(__i386)
 285      -
 286      -void
 287      -set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
 288      -    uint_t dpl)
 289      -{
 290      -        dp->ssd_lolimit = size;
 291      -        dp->ssd_hilimit = (uintptr_t)size >> 16;
 292      -
 293      -        dp->ssd_lobase = (uintptr_t)base;
 294      -        dp->ssd_midbase = (uintptr_t)base >> 16;
 295      -        dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
 296      -
 297      -        dp->ssd_type = type;
 298      -        dp->ssd_zero = 0;       /* must be zero */
 299      -        dp->ssd_dpl = dpl;
 300      -        dp->ssd_p = 1;
 301      -        dp->ssd_gran = 0;       /* force byte units */
 302      -}
 303      -
 304      -void *
 305      -get_ssd_base(system_desc_t *dp)
 306      -{
 307      -        uintptr_t       base;
 308      -
 309      -        base = (uintptr_t)dp->ssd_lobase |
 310      -            (uintptr_t)dp->ssd_midbase << 16 |
 311      -            (uintptr_t)dp->ssd_hibase << (16 + 8);
 312      -        return ((void *)base);
 313      -}
 314      -
 315      -#endif  /* __i386 */
 316      -
 317  265  /*
 318  266   * Install gate segment descriptor for interrupt, trap, call and task gates.
 319  267   *
 320  268   * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on
 321  269   * all interrupts.  We have different ISTs for each class of exceptions that are
 322  270   * most likely to occur while handling an existing exception; while many of
 323  271   * these are just going to panic, it's nice not to trample on the existing
 324  272   * exception state for debugging purposes.
 325  273   *
 326  274   * Normal interrupts are all redirected unconditionally to the KPTI trampoline
↓ open down ↓ 57 lines elided ↑ open up ↑
 384  332  }
 385  333  
 386  334  /*
 387  335   * Updates a single user descriptor in the the GDT of the current cpu.
 388  336   * Caller is responsible for preventing cpu migration.
 389  337   */
 390  338  
 391  339  void
 392  340  gdt_update_usegd(uint_t sidx, user_desc_t *udp)
 393  341  {
 394      -#if defined(__xpv)
      342 +#if defined(DEBUG)
      343 +        /* This should never be a "system" segment, but it might be null. */
      344 +        if (udp->usd_p != 0 || udp->usd_type != 0) {
      345 +                ASSERT3U(udp->usd_type & SDT_S, !=, 0);
      346 +        }
      347 +        /*
      348 +         * We should always set the "accessed" bit (SDT_A), otherwise the CPU
      349 +         * will write to the GDT whenever we change segment registers around.
      350 +         * With KPTI on, the GDT is read-only in the user page table, which
      351 +         * causes crashes if we don't set this.
      352 +         */
      353 +        if (udp->usd_p != 0 || udp->usd_type != 0) {
      354 +                ASSERT3U(udp->usd_type & SDT_A, !=, 0);
      355 +        }
      356 +#endif
 395  357  
      358 +#if defined(__xpv)
 396  359          uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
 397  360  
 398  361          if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
 399  362                  panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
 400  363  
 401  364  #else   /* __xpv */
 402      -
 403  365          CPU->cpu_gdt[sidx] = *udp;
 404      -
 405  366  #endif  /* __xpv */
 406  367  }
 407  368  
 408  369  /*
 409  370   * Writes single descriptor pointed to by udp into a processes
 410  371   * LDT entry pointed to by ldp.
 411  372   */
 412  373  int
 413  374  ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
 414  375  {
 415      -#if defined(__xpv)
      376 +#if defined(DEBUG)
      377 +        /* This should never be a "system" segment, but it might be null. */
      378 +        if (udp->usd_p != 0 || udp->usd_type != 0) {
      379 +                ASSERT3U(udp->usd_type & SDT_S, !=, 0);
      380 +        }
      381 +        /*
      382 +         * We should always set the "accessed" bit (SDT_A), otherwise the CPU
      383 +         * will write to the LDT whenever we change segment registers around.
      384 +         * With KPTI on, the LDT is read-only in the user page table, which
      385 +         * causes crashes if we don't set this.
      386 +         */
      387 +        if (udp->usd_p != 0 || udp->usd_type != 0) {
      388 +                ASSERT3U(udp->usd_type & SDT_A, !=, 0);
      389 +        }
      390 +#endif
 416  391  
      392 +#if defined(__xpv)
 417  393          uint64_t dpa;
 418  394  
 419  395          dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
 420  396              ((uintptr_t)ldp & PAGEOFFSET);
 421  397  
 422  398          /*
 423  399           * The hypervisor is a little more restrictive about what it
 424  400           * supports in the LDT.
 425  401           */
 426  402          if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
 427  403                  return (EINVAL);
 428  404  
 429  405  #else   /* __xpv */
 430      -
 431  406          *ldp = *udp;
 432  407  
 433  408  #endif  /* __xpv */
 434  409          return (0);
 435  410  }
 436  411  
 437  412  #if defined(__xpv)
 438  413  
 439  414  /*
 440  415   * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
↓ open down ↓ 1008 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX