1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * KVM backend for hypervisor domain dumps.  We don't use libkvm for
  28  * such dumps, since they do not have a namelist file or the typical
  29  * dump structures we expect to aid bootstrapping.  Instead, we
  30  * bootstrap based upon a debug_info structure at a known VA, using the
  31  * guest's own page tables to resolve to physical addresses, and
  32  * construct the namelist in a manner similar to ksyms_snapshot().
  33  *
  34  * Note that there are two formats understood by this module: the older,
  35  * ad hoc format, which we call 'core' within this file, and an
  36  * ELF-based format, known as 'elf'.
  37  *
  38  * We only support the older format generated on Solaris dom0: before we
  39  * fixed it, core dump files were broken whenever a PFN didn't map a
  40  * real MFN (!).
  41  */
  42 
  43 #include <strings.h>
  44 #include <stdio.h>
  45 #include <stdlib.h>
  46 #include <stddef.h>
  47 #include <stdarg.h>
  48 #include <unistd.h>
  49 #include <fcntl.h>
  50 #include <gelf.h>
  51 #include <errno.h>
  52 
  53 #include <sys/mman.h>
  54 #include <sys/stat.h>
  55 #include <sys/debug_info.h>
  56 #include <sys/xen_mmu.h>
  57 #include <sys/elf.h>
  58 #include <sys/machelf.h>
  59 #include <sys/modctl.h>
  60 #include <sys/kobj.h>
  61 #include <sys/kobj_impl.h>
  62 #include <sys/sysmacros.h>
  63 #include <sys/privmregs.h>
  64 #include <vm/as.h>
  65 
  66 #include <mdb/mdb_io.h>
  67 #include <mdb/mdb_kb.h>
  68 #include <mdb/mdb_target_impl.h>
  69 
  70 #include <xen/public/xen.h>
  71 #include <xen/public/version.h>
  72 #include <xen/public/elfnote.h>
  73 
  74 #define XKB_SHDR_NULL 0
  75 #define XKB_SHDR_SYMTAB 1
  76 #define XKB_SHDR_STRTAB 2
  77 #define XKB_SHDR_SHSTRTAB 3
  78 #define XKB_SHDR_NUM 4
  79 
  80 #define XKB_WALK_LOCAL 0x1
  81 #define XKB_WALK_GLOBAL 0x2
  82 #define XKB_WALK_STR 0x4
  83 #define XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)
  84 
  85 #if defined(__i386)
  86 #define DEBUG_INFO 0xf4bff000
  87 #define DEBUG_INFO_HVM 0xfe7ff000
  88 #elif defined(__amd64)
  89 #define DEBUG_INFO 0xfffffffffb7ff000
  90 #define DEBUG_INFO_HVM 0xfffffffffb7ff000
  91 #endif
  92 
  93 #define PAGE_SIZE 0x1000
  94 #define PAGE_SHIFT 12
  95 #define PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1))
  96 #define PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1))
  97 #define PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0)
  98 #define PT_PADDR_LGPG 0x000fffffffffe000ull
  99 #define PT_PADDR 0x000ffffffffff000ull
 100 #define PT_VALID 0x1
 101 #define PT_PAGESIZE 0x080
 102 #define PTE_IS_LGPG(p, l) ((l) > 0 && ((p) & PT_PAGESIZE))
 103 
 104 #define XC_CORE_MAGIC 0xF00FEBED
 105 #define XC_CORE_MAGIC_HVM 0xF00FEBEE
 106 
 107 #define VGCF_HVM_GUEST (1<<1)
 108 
 109 typedef struct xc_core_header {
 110         unsigned int xch_magic;
 111         unsigned int xch_nr_vcpus;
 112         unsigned int xch_nr_pages;
 113         unsigned int xch_ctxt_offset;
 114         unsigned int xch_index_offset;
 115         unsigned int xch_pages_offset;
 116 } xc_core_header_t;
 117 
 118 struct xc_elf_header {
 119         uint64_t xeh_magic;
 120         uint64_t xeh_nr_vcpus;
 121         uint64_t xeh_nr_pages;
 122         uint64_t xeh_page_size;
 123 };
 124 
 125 struct xc_elf_version {
 126         uint64_t xev_major;
 127         uint64_t xev_minor;
 128         xen_extraversion_t xev_extra;
 129         xen_compile_info_t xev_compile_info;
 130         xen_capabilities_info_t xev_capabilities;
 131         xen_changeset_info_t xev_changeset;
 132         xen_platform_parameters_t xev_platform_parameters;
 133         uint64_t xev_pagesize;
 134 };
 135 
 136 /*
 137  * Either an old-style (3.0.4) core format, or the ELF format.
 138  */
 139 typedef enum {
 140         XKB_FORMAT_UNKNOWN = 0,
 141         XKB_FORMAT_CORE = 1,
 142         XKB_FORMAT_ELF = 2
 143 } xkb_type_t;
 144 
 145 typedef struct mfn_map {
 146         mfn_t mm_mfn;
 147         char *mm_map;
 148 } mfn_map_t;
 149 
 150 typedef struct mmu_info {
 151         size_t mi_max;
 152         size_t mi_shift[4];
 153         size_t mi_ptes;
 154         size_t mi_ptesize;
 155 } mmu_info_t;
 156 
 157 typedef struct xkb_core {
 158         xc_core_header_t xc_hdr;
 159         void *xc_p2m_buf;
 160 } xkb_core_t;
 161 
 162 typedef struct xkb_elf {
 163         mdb_gelf_file_t *xe_gelf;
 164         size_t *xe_off;
 165         struct xc_elf_header xe_hdr;
 166         struct xc_elf_version xe_version;
 167 } xkb_elf_t;
 168 
 169 typedef struct xkb {
 170         char *xkb_path;
 171         int xkb_fd;
 172         int xkb_is_hvm;
 173 
 174         xkb_type_t xkb_type;
 175         xkb_core_t xkb_core;
 176         xkb_elf_t xkb_elf;
 177 
 178         size_t xkb_nr_vcpus;
 179         size_t xkb_nr_pages;
 180         size_t xkb_pages_off;
 181         xen_pfn_t xkb_max_pfn;
 182         mfn_t xkb_max_mfn;
 183         int xkb_is_pae;
 184 
 185         mmu_info_t xkb_mmu;
 186         debug_info_t xkb_info;
 187 
 188         void *xkb_vcpu_data;
 189         size_t xkb_vcpu_data_sz;
 190         struct vcpu_guest_context **xkb_vcpus;
 191 
 192         char *xkb_pages;
 193         mfn_t *xkb_p2m;
 194         xen_pfn_t *xkb_m2p;
 195         mfn_map_t xkb_pt_map[4];
 196         mfn_map_t xkb_map;
 197 
 198         char *xkb_namelist;
 199         size_t xkb_namesize;
 200 } xkb_t;
 201 
 202 static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0";
 203 
 204 typedef struct xkb_namelist {
 205         Ehdr    kh_elf_hdr;
 206         Phdr    kh_text_phdr;
 207         Phdr    kh_data_phdr;
 208         Shdr    kh_shdr[XKB_SHDR_NUM];
 209         char    shstrings[sizeof (xkb_shstrtab)];
 210 } xkb_namelist_t;
 211 
 212 static int xkb_build_ksyms(xkb_t *);
 213 static offset_t xkb_mfn_to_offset(xkb_t *, mfn_t);
 214 static mfn_t xkb_va_to_mfn(xkb_t *, uintptr_t, mfn_t);
 215 static ssize_t xkb_read(xkb_t *, uintptr_t, void *, size_t);
 216 static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *);
 217 static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *);
 218 static int xkb_close(xkb_t *);
 219 
 220 /*
 221  * Jump through the hoops we need to to correctly identify a core file
 222  * of either the old or new format.
 223  */
 224 int
 225 xkb_identify(const char *file, int *longmode)
 226 {
 227         xc_core_header_t header;
 228         mdb_gelf_file_t *gf = NULL;
 229         mdb_gelf_sect_t *sect = NULL;
 230         mdb_io_t *io = NULL;
 231         char *notes = NULL;
 232         char *pos;
 233         int ret = 0;
 234         size_t sz;
 235         int fd;
 236 
 237         if ((fd = open64(file, O_RDONLY)) == -1)
 238                 return (-1);
 239 
 240         if (pread64(fd, &header, sizeof (header), 0) != sizeof (header)) {
 241                 (void) close(fd);
 242                 return (0);
 243         }
 244 
 245         (void) close(fd);
 246 
 247         if (header.xch_magic == XC_CORE_MAGIC) {
 248                 *longmode = 0;
 249 
 250                 /*
 251                  * Indeed.
 252                  */
 253                 sz = header.xch_index_offset - header.xch_ctxt_offset;
 254 #ifdef _LP64
 255                 if (sizeof (struct vcpu_guest_context) *
 256                     header.xch_nr_vcpus == sz)
 257                         *longmode = 1;
 258 #else
 259                 if (sizeof (struct vcpu_guest_context) *
 260                     header.xch_nr_vcpus != sz)
 261                         *longmode = 1;
 262 #endif /* _LP64 */
 263 
 264                 return (1);
 265         }
 266 
 267         if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL)
 268                 return (-1);
 269 
 270         if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL)
 271                 goto out;
 272 
 273         if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL)
 274                 goto out;
 275 
 276         if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL)
 277                 goto out;
 278 
 279         for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
 280                 struct xc_elf_version *vers;
 281                 /* LINTED - alignment */
 282                 Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
 283                 char *desc;
 284                 char *name;
 285 
 286                 name = pos + sizeof (*nhdr);
 287                 desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
 288 
 289                 pos = desc + nhdr->n_descsz;
 290 
 291                 if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION)
 292                         continue;
 293 
 294                 /*
 295                  * The contents of this struct differ between 32 and 64
 296                  * bit; however, not until past the 'xev_capabilities'
 297                  * member, so we can just about get away with this.
 298                  */
 299 
 300                 /* LINTED - alignment */
 301                 vers = (struct xc_elf_version *)desc;
 302 
 303                 if (strstr(vers->xev_capabilities, "x86_64")) {
 304                         /*
 305                          * 64-bit hypervisor, but it can still be
 306                          * a 32-bit domain core. 32-bit domain cores
 307                          * are also dumped in Elf64 format, but they
 308                          * have e_machine set to EM_386, not EM_AMD64.
 309                          */
 310                         if (gf->gf_ehdr.e_machine == EM_386)
 311                                 *longmode = 0;
 312                         else
 313                                 *longmode = 1;
 314                 } else if (strstr(vers->xev_capabilities, "x86_32") ||
 315                     strstr(vers->xev_capabilities, "x86_32p")) {
 316                         /*
 317                          * 32-bit hypervisor, can only be a 32-bit core.
 318                          */
 319                         *longmode = 0;
 320                 } else {
 321                         mdb_warn("couldn't derive word size of dump; "
 322                             "assuming 64-bit");
 323                         *longmode = 1;
 324                 }
 325         }
 326 
 327         ret = 1;
 328 
 329 out:
 330         if (gf != NULL)
 331                 mdb_gelf_destroy(gf);
 332         else if (io != NULL)
 333                 mdb_io_destroy(io);
 334         return (ret);
 335 }
 336 
 337 static void *
 338 xkb_fail(xkb_t *xkb, const char *msg, ...)
 339 {
 340         va_list args;
 341 
 342         va_start(args, msg);
 343         if (xkb != NULL)
 344                 (void) fprintf(stderr, "%s: ", xkb->xkb_path);
 345         (void) vfprintf(stderr, msg, args);
 346         (void) fprintf(stderr, "\n");
 347         va_end(args);
 348         if (xkb != NULL)
 349                 (void) xkb_close(xkb);
 350 
 351         errno = ENOEXEC;
 352 
 353         return (NULL);
 354 }
 355 
 356 static int
 357 xkb_build_m2p(xkb_t *xkb)
 358 {
 359         size_t i;
 360 
 361         for (i = 0; i <= xkb->xkb_max_pfn; i++) {
 362                 if (xkb->xkb_p2m[i] != MFN_INVALID &&
 363                     xkb->xkb_p2m[i] > xkb->xkb_max_mfn)
 364                         xkb->xkb_max_mfn = xkb->xkb_p2m[i];
 365         }
 366 
 367         xkb->xkb_m2p = mdb_alloc((xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t),
 368             UM_SLEEP);
 369 
 370         for (i = 0; i <= xkb->xkb_max_mfn; i++)
 371                 xkb->xkb_m2p[i] = PFN_INVALID;
 372 
 373         for (i = 0; i <= xkb->xkb_max_pfn; i++) {
 374                 if (xkb->xkb_p2m[i] != MFN_INVALID)
 375                         xkb->xkb_m2p[xkb->xkb_p2m[i]] = i;
 376         }
 377 
 378         return (1);
 379 }
 380 
 381 /*
 382  * With FORMAT_CORE, we can use the table in the dump file directly.
 383  * Just to make things fun, they've not page-aligned the p2m table.
 384  */
 385 static int
 386 xkb_map_p2m(xkb_t *xkb)
 387 {
 388         offset_t off;
 389         size_t size;
 390         xkb_core_t *xc = &xkb->xkb_core;
 391         size_t count = xkb->xkb_nr_pages;
 392         size_t boff = xc->xc_hdr.xch_index_offset;
 393 
 394         size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2);
 395         size = PAGE_MASK(size);
 396         off = PAGE_MASK(boff);
 397 
 398         /* LINTED - alignment */
 399         xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
 400             MAP_SHARED, xkb->xkb_fd, off);
 401 
 402         if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
 403                 (void) xkb_fail(xkb, "cannot map p2m table");
 404                 return (0);
 405         }
 406 
 407         /* LINTED - alignment */
 408         xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf +
 409             PAGE_OFFSET(boff));
 410 
 411         return (1);
 412 }
 413 
 414 /*
 415  * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert
 416  * into a linear array indexed by pfn for convenience.  We also need to
 417  * track the mapping between mfn and the offset in the file: a pfn with
 418  * no mfn will not appear in the core file.
 419  */
 420 static int
 421 xkb_build_p2m(xkb_t *xkb)
 422 {
 423         xkb_elf_t *xe = &xkb->xkb_elf;
 424         mdb_gelf_sect_t *sect;
 425         size_t size;
 426         size_t i;
 427 
 428         struct elf_p2m {
 429                 uint64_t pfn;
 430                 uint64_t gmfn;
 431         } *p2m;
 432 
 433         sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m");
 434 
 435         if (sect == NULL) {
 436                 (void) xkb_fail(xkb, "cannot find section .xen_p2m");
 437                 return (0);
 438         }
 439 
 440         if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
 441                 (void) xkb_fail(xkb, "couldn't read .xen_p2m");
 442                 return (0);
 443         }
 444 
 445         for (i = 0; i < xkb->xkb_nr_pages; i++) {
 446                 if (p2m[i].pfn > xkb->xkb_max_pfn)
 447                         xkb->xkb_max_pfn = p2m[i].pfn;
 448         }
 449 
 450         size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
 451         xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
 452         size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
 453         xe->xe_off = mdb_alloc(size, UM_SLEEP);
 454 
 455         for (i = 0; i <= xkb->xkb_max_pfn; i++) {
 456                 xkb->xkb_p2m[i] = PFN_INVALID;
 457                 xe->xe_off[i] = (size_t)-1;
 458         }
 459 
 460         for (i = 0; i < xkb->xkb_nr_pages; i++) {
 461                 xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn;
 462                 xe->xe_off[p2m[i].pfn] = i;
 463         }
 464 
 465         return (1);
 466 }
 467 
 468 /*
 469  * For HVM images, we don't have the corresponding MFN list; the table
 470  * is just a mapping from page index in the dump to the corresponding
 471  * PFN.  To simplify the other code, we'll pretend that these PFNs are
 472  * really MFNs as well, by populating xkb_p2m.
 473  */
 474 static int
 475 xkb_build_fake_p2m(xkb_t *xkb)
 476 {
 477         xkb_elf_t *xe = &xkb->xkb_elf;
 478         mdb_gelf_sect_t *sect;
 479         size_t size;
 480         size_t i;
 481 
 482         uint64_t *p2pfn;
 483 
 484         sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pfn");
 485 
 486         if (sect == NULL) {
 487                 (void) xkb_fail(xkb, "cannot find section .xen_pfn");
 488                 return (0);
 489         }
 490 
 491         if ((p2pfn = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
 492                 (void) xkb_fail(xkb, "couldn't read .xen_pfn");
 493                 return (0);
 494         }
 495 
 496         for (i = 0; i < xkb->xkb_nr_pages; i++) {
 497                 if (p2pfn[i] != PFN_INVALID && p2pfn[i] > xkb->xkb_max_pfn)
 498                         xkb->xkb_max_pfn = p2pfn[i];
 499         }
 500 
 501         size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
 502         xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
 503 
 504         size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
 505         xe->xe_off = mdb_alloc(size, UM_SLEEP);
 506 
 507         for (i = 0; i <= xkb->xkb_max_pfn; i++) {
 508                 xkb->xkb_p2m[i] = PFN_INVALID;
 509                 xe->xe_off[i] = (size_t)-1;
 510         }
 511 
 512         for (i = 0; i < xkb->xkb_nr_pages; i++) {
 513                 if (p2pfn[i] == PFN_INVALID)
 514                         continue;
 515                 xkb->xkb_p2m[p2pfn[i]] = p2pfn[i];
 516                 xe->xe_off[p2pfn[i]] = i;
 517         }
 518 
 519         return (1);
 520 }
 521 
 522 /*
 523  * Return the MFN of the top-level page table for the given as.
 524  */
 525 static mfn_t
 526 xkb_as_to_mfn(xkb_t *xkb, struct as *as)
 527 {
 528         uintptr_t asp = (uintptr_t)as;
 529         uintptr_t hatp;
 530         uintptr_t htablep;
 531         uintptr_t pfn;
 532 
 533         if (!xkb_read_word(xkb, asp + offsetof(struct as, a_hat), &hatp))
 534                 return (MFN_INVALID);
 535         if (!xkb_read_word(xkb, hatp + xkb->xkb_info.di_hat_htable_off,
 536             &htablep))
 537                 return (MFN_INVALID);
 538         if (!xkb_read_word(xkb, htablep + xkb->xkb_info.di_ht_pfn_off,
 539             &pfn))
 540                 return (MFN_INVALID);
 541 
 542         if (pfn > xkb->xkb_max_pfn)
 543                 return (MFN_INVALID);
 544 
 545         return (xkb->xkb_p2m[pfn]);
 546 }
 547 
 548 static mfn_t
 549 xkb_cr3_to_pfn(xkb_t *xkb)
 550 {
 551         uint64_t cr3 = xkb->xkb_vcpus[0]->ctrlreg[3];
 552         if (xkb->xkb_is_hvm)
 553                 return (cr3 >> PAGE_SHIFT);
 554         return (xen_cr3_to_pfn(cr3));
 555 }
 556 
 557 static ssize_t
 558 xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
 559     void *buf, size_t size)
 560 {
 561         size_t left = size;
 562         int windowed = (xkb->xkb_pages == NULL);
 563         mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
 564 
 565         if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
 566                 return (-1);
 567 
 568         while (left) {
 569                 uint64_t pos = addr + (size - left);
 570                 char *outpos = (char *)buf + (size - left);
 571                 size_t pageoff = PAGE_OFFSET(pos);
 572                 size_t sz = MIN(left, PAGE_SIZE - pageoff);
 573                 mfn_t mfn;
 574 
 575                 if (!phys) {
 576                         mfn = xkb_va_to_mfn(xkb, pos, tlmfn);
 577                         if (mfn == MFN_INVALID)
 578                                 return (-1);
 579                 } else {
 580                         xen_pfn_t pfn = pos >> PAGE_SHIFT;
 581                         if (pfn > xkb->xkb_max_pfn)
 582                                 return (-1);
 583                         mfn = xkb->xkb_p2m[pfn];
 584                         if (mfn == MFN_INVALID)
 585                                 return (-1);
 586                 }
 587 
 588                 /*
 589                  * If we're windowed then pread() is much faster.
 590                  */
 591                 if (windowed) {
 592                         offset_t off = xkb_mfn_to_offset(xkb, mfn);
 593                         int ret;
 594 
 595                         if (off == ~1ULL)
 596                                 return (-1);
 597 
 598                         off += pageoff;
 599 
 600                         ret = pread64(xkb->xkb_fd, outpos, sz, off);
 601                         if (ret == -1)
 602                                 return (-1);
 603                         if (ret != sz)
 604                                 return ((size - left) + ret);
 605 
 606                         left -= ret;
 607                 } else {
 608                         if (xkb_map_mfn(xkb, mfn, &xkb->xkb_map) == NULL)
 609                                 return (-1);
 610 
 611                         bcopy(xkb->xkb_map.mm_map + pageoff, outpos, sz);
 612 
 613                         left -= sz;
 614                 }
 615         }
 616 
 617         return (size);
 618 }
 619 
 620 static ssize_t
 621 xkb_pread(xkb_t *xkb, uint64_t addr, void *buf, size_t size)
 622 {
 623         return (xkb_read_helper(xkb, NULL, 1, addr, buf, size));
 624 }
 625 
 626 static ssize_t
 627 xkb_aread(xkb_t *xkb, uintptr_t addr, void *buf, size_t size, struct as *as)
 628 {
 629         return (xkb_read_helper(xkb, as, 0, addr, buf, size));
 630 }
 631 
 632 static ssize_t
 633 xkb_read(xkb_t *xkb, uintptr_t addr, void *buf, size_t size)
 634 {
 635         return (xkb_aread(xkb, addr, buf, size, NULL));
 636 }
 637 
 638 static int
 639 xkb_read_word(xkb_t *xkb, uintptr_t addr, uintptr_t *buf)
 640 {
 641         if (xkb_read(xkb, addr, buf, sizeof (uintptr_t)) !=
 642             sizeof (uintptr_t))
 643                 return (0);
 644         return (1);
 645 }
 646 
 647 static char *
 648 xkb_readstr(xkb_t *xkb, uintptr_t addr)
 649 {
 650         char *str = mdb_alloc(1024, UM_SLEEP);
 651         size_t i;
 652 
 653         for (i = 0; i < 1024; i++) {
 654                 if (xkb_read(xkb, addr + i, &str[i], 1) != 1) {
 655                         mdb_free(str, 1024);
 656                         return (NULL);
 657                 }
 658 
 659                 if (str[i] == '\0')
 660                         break;
 661         }
 662 
 663         if (i == 1024) {
 664                 mdb_free(str, 1024);
 665                 return (NULL);
 666         }
 667 
 668         return (str);
 669 }
 670 
 671 static offset_t
 672 xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn)
 673 {
 674         if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn)
 675                 return (-1ULL);
 676 
 677         if (xkb->xkb_type == XKB_FORMAT_CORE)
 678                 return (PAGE_SIZE * pfn);
 679 
 680         return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn]));
 681 }
 682 
 683 static offset_t
 684 xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn)
 685 {
 686         xen_pfn_t pfn;
 687 
 688         if (mfn > xkb->xkb_max_mfn)
 689                 return (-1ULL);
 690 
 691         pfn = xkb->xkb_m2p[mfn];
 692 
 693         if (pfn == PFN_INVALID)
 694                 return (-1ULL);
 695 
 696         return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn));
 697 }
 698 
 699 static char *
 700 xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
 701 {
 702         int windowed = (xkb->xkb_pages == NULL);
 703         offset_t off;
 704 
 705         if (mm->mm_mfn == mfn)
 706                 return (mm->mm_map);
 707 
 708         mm->mm_mfn = mfn;
 709 
 710         if (windowed) {
 711                 if (mm->mm_map != (char *)MAP_FAILED) {
 712                         (void) munmap(mm->mm_map, PAGE_SIZE);
 713                         mm->mm_map = (void *)MAP_FAILED;
 714                 }
 715 
 716                 if ((off = xkb_mfn_to_offset(xkb, mfn)) == (-1ULL))
 717                         return (NULL);
 718 
 719                 mm->mm_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
 720                     xkb->xkb_fd, off);
 721 
 722                 if (mm->mm_map == (char *)MAP_FAILED)
 723                         return (NULL);
 724         } else {
 725                 xen_pfn_t pfn;
 726 
 727                 mm->mm_map = NULL;
 728 
 729                 if (mfn > xkb->xkb_max_mfn)
 730                         return (NULL);
 731 
 732                 pfn = xkb->xkb_m2p[mfn];
 733 
 734                 if (pfn == PFN_INVALID)
 735                         return (NULL);
 736 
 737                 mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn);
 738         }
 739 
 740         return (mm->mm_map);
 741 }
 742 
 743 static uint64_t
 744 xkb_get_pte(mmu_info_t *mmu, char *ptep)
 745 {
 746         uint64_t pte = 0;
 747 
 748         if (mmu->mi_ptesize == 8) {
 749                 /* LINTED - alignment */
 750                 pte = *((uint64_t *)ptep);
 751         } else {
 752                 /* LINTED - alignment */
 753                 pte = *((uint32_t *)ptep);
 754         }
 755 
 756         return (pte);
 757 }
 758 
 759 static mfn_t
 760 xkb_pte_to_base_mfn(uint64_t pte, size_t level)
 761 {
 762         if (PTE_IS_LGPG(pte, level)) {
 763                 pte &= PT_PADDR_LGPG;
 764         } else {
 765                 pte &= PT_PADDR;
 766         }
 767 
 768         return (pte >> PAGE_SHIFT);
 769 }
 770 
 771 /*
 772  * Resolve the given VA into an MFN, using the provided mfn as a top-level page
 773  * table.
 774  */
 775 static mfn_t
 776 xkb_va_to_mfn(xkb_t *xkb, uintptr_t va, mfn_t mfn)
 777 {
 778         mmu_info_t *mmu = &xkb->xkb_mmu;
 779         uint64_t pte;
 780         size_t level;
 781 
 782         for (level = mmu->mi_max; ; --level) {
 783                 size_t entry;
 784 
 785                 if (xkb_map_mfn(xkb, mfn, &xkb->xkb_pt_map[level]) == NULL)
 786                         return (MFN_INVALID);
 787 
 788                 entry = (va >> mmu->mi_shift[level]) & (mmu->mi_ptes - 1);
 789 
 790                 pte = xkb_get_pte(mmu, (char *)xkb->xkb_pt_map[level].mm_map +
 791                     entry * mmu->mi_ptesize);
 792 
 793                 if ((mfn = xkb_pte_to_base_mfn(pte, level)) == MFN_INVALID)
 794                         return (MFN_INVALID);
 795 
 796                 if (level == 0)
 797                         break;
 798 
 799                 /*
 800                  * Currently 'mfn' refers to the base MFN of the
 801                  * large-page mapping.  Add on the 4K-sized index into
 802                  * the large-page mapping to get the right MFN within
 803                  * the mapping.
 804                  */
 805                 if (PTE_IS_LGPG(pte, level)) {
 806                         mfn += (va & ((1 << mmu->mi_shift[level]) - 1)) >>
 807                             PAGE_SHIFT;
 808                         break;
 809                 }
 810         }
 811 
 812         return (mfn);
 813 }
 814 
 815 static int
 816 xkb_read_module(xkb_t *xkb, uintptr_t modulep, struct module *module,
 817     uintptr_t *sym_addr, uintptr_t *sym_count, uintptr_t *str_addr)
 818 {
 819         if (xkb_read(xkb, modulep, module, sizeof (struct module)) !=
 820             sizeof (struct module))
 821                 return (0);
 822 
 823         if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
 824             offsetof(Shdr, sh_addr), sym_addr))
 825                 return (0);
 826 
 827         if (!xkb_read_word(xkb, (uintptr_t)module->strhdr +
 828             offsetof(Shdr, sh_addr), str_addr))
 829                 return (0);
 830 
 831         if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
 832             offsetof(Shdr, sh_size), sym_count))
 833                 return (0);
 834         *sym_count /= sizeof (Sym);
 835 
 836         return (1);
 837 }
 838 
 839 static int
 840 xkb_read_modsyms(xkb_t *xkb, char **buf, size_t *sizes, int types,
 841     uintptr_t sym_addr, uintptr_t str_addr, uintptr_t sym_count)
 842 {
 843         size_t i;
 844 
 845         for (i = 0; i < sym_count; i++) {
 846                 Sym sym;
 847                 char *name;
 848                 size_t sz;
 849                 int type = XKB_WALK_GLOBAL;
 850 
 851                 if (xkb_read(xkb, sym_addr + i * sizeof (sym), &sym,
 852                     sizeof (sym)) != sizeof (sym))
 853                         return (0);
 854 
 855                 if (GELF_ST_BIND(sym.st_info) == STB_LOCAL)
 856                         type = XKB_WALK_LOCAL;
 857 
 858                 name = xkb_readstr(xkb, str_addr + sym.st_name);
 859 
 860                 sym.st_shndx = SHN_ABS;
 861                 sym.st_name = sizes[XKB_WALK_STR];
 862 
 863                 sizes[type] += sizeof (sym);
 864                 sz = strlen(name) + 1;
 865                 sizes[XKB_WALK_STR] += sz;
 866 
 867                 if (buf != NULL) {
 868                         if (types & type) {
 869                                 bcopy(&sym, *buf, sizeof (sym));
 870                                 *buf += sizeof (sym);
 871                         }
 872                         if (types & XKB_WALK_STR) {
 873                                 bcopy(name, *buf, sz);
 874                                 *buf += sz;
 875                         }
 876                 }
 877 
 878                 mdb_free(name, 1024);
 879         }
 880 
 881         return (1);
 882 }
 883 
 884 static int
 885 xkb_walk_syms(xkb_t *xkb, uintptr_t modhead, char **buf,
 886     size_t *sizes, int types)
 887 {
 888         uintptr_t modctl = modhead;
 889         uintptr_t modulep;
 890         struct module module;
 891         uintptr_t sym_count;
 892         uintptr_t sym_addr;
 893         uintptr_t str_addr;
 894         size_t max_iter = 500;
 895 
 896         bzero(sizes, sizeof (*sizes) * (XKB_WALK_STR + 1));
 897 
 898         /*
 899          * empty first symbol
 900          */
 901         sizes[XKB_WALK_LOCAL] += sizeof (Sym);
 902         sizes[XKB_WALK_STR] += 1;
 903 
 904         if (buf != NULL) {
 905                 if (types & XKB_WALK_LOCAL) {
 906                         Sym tmp;
 907                         bzero(&tmp, sizeof (tmp));
 908                         bcopy(&tmp, *buf, sizeof (tmp));
 909                         *buf += sizeof (tmp);
 910                 }
 911                 if (types & XKB_WALK_STR) {
 912                         **buf = '\0';
 913                         (*buf)++;
 914                 }
 915         }
 916 
 917         for (;;) {
 918                 if (!xkb_read_word(xkb,
 919                     modctl + offsetof(struct modctl, mod_mp), &modulep))
 920                         return (0);
 921 
 922                 if (modulep == NULL)
 923                         goto next;
 924 
 925                 if (!xkb_read_module(xkb, modulep, &module, &sym_addr,
 926                     &sym_count, &str_addr))
 927                         return (0);
 928 
 929                 if ((module.flags & KOBJ_NOKSYMS))
 930                         goto next;
 931 
 932                 if (!xkb_read_modsyms(xkb, buf, sizes, types, sym_addr,
 933                     str_addr, sym_count))
 934                         return (0);
 935 
 936 next:
 937                 if (!xkb_read_word(xkb,
 938                     modctl + offsetof(struct modctl, mod_next), &modctl))
 939                         return (0);
 940 
 941                 if (modctl == modhead)
 942                         break;
 943                 /*
 944                  * Try and prevent us looping forever if we have a broken list.
 945                  */
 946                 if (--max_iter == 0)
 947                         break;
 948         }
 949 
 950         return (1);
 951 }
 952 
 953 /*
 954  * Userspace equivalent of ksyms_snapshot().  Since we don't have a namelist
 955  * file for hypervisor images, we fabricate one here using code similar
 956  * to that of /dev/ksyms.
 957  */
 958 static int
 959 xkb_build_ksyms(xkb_t *xkb)
 960 {
 961         debug_info_t *info = &xkb->xkb_info;
 962         size_t sizes[XKB_WALK_STR + 1];
 963         xkb_namelist_t *hdr;
 964         char *buf;
 965         struct modctl modules;
 966         uintptr_t module;
 967         Shdr *shp;
 968 
 969         if (xkb_read(xkb, info->di_modules, &modules,
 970             sizeof (struct modctl)) != sizeof (struct modctl))
 971                 return (0);
 972 
 973         module = (uintptr_t)modules.mod_mp;
 974 
 975         if (!xkb_walk_syms(xkb, info->di_modules, NULL, sizes,
 976             XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR))
 977                 return (0);
 978 
 979         xkb->xkb_namesize = sizeof (xkb_namelist_t);
 980         xkb->xkb_namesize += sizes[XKB_WALK_LOCAL];
 981         xkb->xkb_namesize += sizes[XKB_WALK_GLOBAL];
 982         xkb->xkb_namesize += sizes[XKB_WALK_STR];
 983 
 984         if ((xkb->xkb_namelist = mdb_zalloc(xkb->xkb_namesize, UM_SLEEP))
 985             == NULL)
 986                 return (0);
 987 
 988         /* LINTED - alignment */
 989         hdr = (xkb_namelist_t *)xkb->xkb_namelist;
 990 
 991         if (xkb_read(xkb, module + offsetof(struct module, hdr),
 992             &hdr->kh_elf_hdr, sizeof (Ehdr)) != sizeof (Ehdr))
 993                 return (0);
 994 
 995         hdr->kh_elf_hdr.e_phoff = offsetof(xkb_namelist_t, kh_text_phdr);
 996         hdr->kh_elf_hdr.e_shoff = offsetof(xkb_namelist_t, kh_shdr);
 997         hdr->kh_elf_hdr.e_phnum = 2;
 998         hdr->kh_elf_hdr.e_shnum = XKB_SHDR_NUM;
 999         hdr->kh_elf_hdr.e_shstrndx = XKB_SHDR_SHSTRTAB;
1000 
1001         hdr->kh_text_phdr.p_type = PT_LOAD;
1002         hdr->kh_text_phdr.p_vaddr = (Addr)info->di_s_text;
1003         hdr->kh_text_phdr.p_memsz = (Word)(info->di_e_text - info->di_s_text);
1004         hdr->kh_text_phdr.p_flags = PF_R | PF_X;
1005 
1006         hdr->kh_data_phdr.p_type = PT_LOAD;
1007         hdr->kh_data_phdr.p_vaddr = (Addr)info->di_s_data;
1008         hdr->kh_data_phdr.p_memsz = (Word)(info->di_e_data - info->di_s_data);
1009         hdr->kh_data_phdr.p_flags = PF_R | PF_W | PF_X;
1010 
1011         shp = &hdr->kh_shdr[XKB_SHDR_SYMTAB];
1012         shp->sh_name = 1;    /* xkb_shstrtab[1] = ".symtab" */
1013         shp->sh_type = SHT_SYMTAB;
1014         shp->sh_offset = sizeof (xkb_namelist_t);
1015         shp->sh_size = sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
1016         shp->sh_link = XKB_SHDR_STRTAB;
1017         shp->sh_info = sizes[XKB_WALK_LOCAL] / sizeof (Sym);
1018         shp->sh_addralign = sizeof (Addr);
1019         shp->sh_entsize = sizeof (Sym);
1020         shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1021 
1022 
1023         shp = &hdr->kh_shdr[XKB_SHDR_STRTAB];
1024         shp->sh_name = 9;    /* xkb_shstrtab[9] = ".strtab" */
1025         shp->sh_type = SHT_STRTAB;
1026         shp->sh_offset = sizeof (xkb_namelist_t) +
1027             sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
1028         shp->sh_size = sizes[XKB_WALK_STR];
1029         shp->sh_addralign = 1;
1030         shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1031 
1032 
1033         shp = &hdr->kh_shdr[XKB_SHDR_SHSTRTAB];
1034         shp->sh_name = 17;   /* xkb_shstrtab[17] = ".shstrtab" */
1035         shp->sh_type = SHT_STRTAB;
1036         shp->sh_offset = offsetof(xkb_namelist_t, shstrings);
1037         shp->sh_size = sizeof (xkb_shstrtab);
1038         shp->sh_addralign = 1;
1039         shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1040 
1041         bcopy(xkb_shstrtab, hdr->shstrings, sizeof (xkb_shstrtab));
1042 
1043         buf = xkb->xkb_namelist + sizeof (xkb_namelist_t);
1044 
1045         if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1046             XKB_WALK_LOCAL))
1047                 return (0);
1048         if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1049             XKB_WALK_GLOBAL))
1050                 return (0);
1051         if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1052             XKB_WALK_STR))
1053                 return (0);
1054 
1055         return (1);
1056 }
1057 
1058 static xkb_t *
1059 xkb_open_core(xkb_t *xkb)
1060 {
1061         xkb_core_t *xc = &xkb->xkb_core;
1062         size_t sz;
1063         int i;
1064         struct vcpu_guest_context *vcp;
1065 
1066         xkb->xkb_type = XKB_FORMAT_CORE;
1067 
1068         if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1)
1069                 return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path));
1070 
1071         if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) !=
1072             sizeof (xc->xc_hdr))
1073                 return (xkb_fail(xkb, "invalid dump file"));
1074 
1075         if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM)
1076                 return (xkb_fail(xkb, "cannot process HVM images"));
1077 
1078         if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) {
1079                 return (xkb_fail(xkb, "invalid magic %d",
1080                     xc->xc_hdr.xch_magic));
1081         }
1082 
1083         /*
1084          * With FORMAT_CORE, all pages are in the dump (non-existing
1085          * ones are zeroed out).
1086          */
1087         xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages;
1088         xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset;
1089         xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1;
1090         xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus;
1091 
1092         sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context);
1093         xkb->xkb_vcpu_data_sz = sz;
1094         xkb->xkb_vcpu_data = mdb_alloc(sz, UM_SLEEP);
1095 
1096         if (pread64(xkb->xkb_fd, xkb->xkb_vcpu_data, sz,
1097             xc->xc_hdr.xch_ctxt_offset) != sz)
1098                 return (xkb_fail(xkb, "cannot read VCPU contexts"));
1099 
1100         sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
1101         xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
1102 
1103         vcp = xkb->xkb_vcpu_data;
1104         for (i = 0; i < xkb->xkb_nr_vcpus; i++)
1105                 xkb->xkb_vcpus[i] = &vcp[i];
1106 
1107         /*
1108          * Try to map all the data pages. If we can't, fall back to the
1109          * window/pread() approach, which is significantly slower.
1110          */
1111         xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1112             PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset);
1113 
1114         if (xkb->xkb_pages == (char *)MAP_FAILED)
1115                 xkb->xkb_pages = NULL;
1116 
1117         /*
1118          * We'd like to adapt for correctness' sake, but we have no way of
1119          * detecting a PAE guest, since cr4 writes are disallowed.
1120          */
1121         xkb->xkb_is_pae = 1;
1122 
1123         if (!xkb_map_p2m(xkb))
1124                 return (NULL);
1125 
1126         return (xkb);
1127 }
1128 
1129 static xkb_t *
1130 xkb_open_elf(xkb_t *xkb)
1131 {
1132         xkb_elf_t *xe = &xkb->xkb_elf;
1133         mdb_gelf_sect_t *sect;
1134         char *notes;
1135         char *pos;
1136         mdb_io_t *io;
1137         size_t sz;
1138         int i;
1139         void *dp;
1140 
1141         if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path,
1142             O_RDONLY, 0)) == NULL)
1143                 return (xkb_fail(xkb, "failed to open"));
1144 
1145         xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE);
1146 
1147         if (xe->xe_gelf == NULL) {
1148                 mdb_io_destroy(io);
1149                 return (xkb);
1150         }
1151 
1152         xkb->xkb_fd = mdb_fdio_fileno(io);
1153 
1154         sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen");
1155 
1156         if (sect == NULL)
1157                 return (xkb);
1158 
1159         if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
1160                 return (xkb);
1161 
1162         /*
1163          * Now we know this is indeed a hypervisor core dump, even if
1164          * it's corrupted.
1165          */
1166         xkb->xkb_type = XKB_FORMAT_ELF;
1167 
1168         for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
1169                 /* LINTED - alignment */
1170                 Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
1171                 uint64_t vers;
1172                 char *desc;
1173                 char *name;
1174 
1175                 name = pos + sizeof (*nhdr);
1176                 desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
1177 
1178                 pos = desc + nhdr->n_descsz;
1179 
1180                 switch (nhdr->n_type) {
1181                 case XEN_ELFNOTE_DUMPCORE_NONE:
1182                         break;
1183 
1184                 case XEN_ELFNOTE_DUMPCORE_HEADER:
1185                         if (nhdr->n_descsz != sizeof (struct xc_elf_header)) {
1186                                 return (xkb_fail(xkb, "invalid ELF note "
1187                                     "XEN_ELFNOTE_DUMPCORE_HEADER\n"));
1188                         }
1189 
1190                         bcopy(desc, &xe->xe_hdr,
1191                             sizeof (struct xc_elf_header));
1192                         break;
1193 
1194                 case XEN_ELFNOTE_DUMPCORE_XEN_VERSION:
1195                         if (nhdr->n_descsz < sizeof (struct xc_elf_version)) {
1196                                 return (xkb_fail(xkb, "invalid ELF note "
1197                                     "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n"));
1198                         }
1199 
1200                         bcopy(desc, &xe->xe_version,
1201                             sizeof (struct xc_elf_version));
1202                         break;
1203 
1204                 case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION:
1205                         /* LINTED - alignment */
1206                         vers = *((uint64_t *)desc);
1207                         if ((vers >> 32) != 0) {
1208                                 return (xkb_fail(xkb, "unknown major "
1209                                     "version %d (expected 0)\n",
1210                                     (int)(vers >> 32)));
1211                         }
1212 
1213                         if ((vers & 0xffffffff) != 1) {
1214                                 mdb_warn("unexpected dump minor number "
1215                                     "version %d (expected 1)\n",
1216                                     (int)(vers & 0xffffffff));
1217                         }
1218                         break;
1219 
1220                 default:
1221                         mdb_warn("unknown ELF note %d(%s)\n",
1222                             nhdr->n_type, name);
1223                         break;
1224                 }
1225         }
1226 
1227         xkb->xkb_is_hvm = xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM;
1228 
1229         if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC &&
1230             xe->xe_hdr.xeh_magic != XC_CORE_MAGIC_HVM) {
1231                 return (xkb_fail(xkb, "invalid magic %d",
1232                     xe->xe_hdr.xeh_magic));
1233         }
1234 
1235         xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages;
1236         xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities,
1237             "x86_32p") != NULL);
1238 
1239         sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus");
1240 
1241         if (sect == NULL)
1242                 return (xkb_fail(xkb, "cannot find section .xen_prstatus"));
1243 
1244         if (sect->gs_shdr.sh_entsize < sizeof (vcpu_guest_context_t))
1245                 return (xkb_fail(xkb, "invalid section .xen_prstatus"));
1246 
1247         xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize;
1248 
1249         xkb->xkb_vcpu_data = mdb_gelf_sect_load(xe->xe_gelf, sect);
1250         if (xkb->xkb_vcpu_data == NULL)
1251                 return (xkb_fail(xkb, "cannot load section .xen_prstatus"));
1252         xkb->xkb_vcpu_data_sz = sect->gs_shdr.sh_size;
1253 
1254         /*
1255          * The vcpu_guest_context structures saved in the core file
1256          * are actually unions of the 64-bit and 32-bit versions.
1257          * Don't rely on the entry size to match the size of
1258          * the structure, but set up an array of pointers.
1259          */
1260         sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
1261         xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
1262         for (i = 0; i < xkb->xkb_nr_vcpus; i++) {
1263                 dp = ((char *)xkb->xkb_vcpu_data +
1264                     i * sect->gs_shdr.sh_entsize);
1265                 xkb->xkb_vcpus[i] = dp;
1266         }
1267 
1268         sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages");
1269 
1270         if (sect == NULL)
1271                 return (xkb_fail(xkb, "cannot find section .xen_pages"));
1272 
1273         if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset))
1274                 return (xkb_fail(xkb, ".xen_pages is not page aligned"));
1275 
1276         if (sect->gs_shdr.sh_entsize != PAGE_SIZE)
1277                 return (xkb_fail(xkb, "invalid section .xen_pages"));
1278 
1279         xkb->xkb_pages_off = sect->gs_shdr.sh_offset;
1280 
1281         /*
1282          * Try to map all the data pages. If we can't, fall back to the
1283          * window/pread() approach, which is significantly slower.
1284          */
1285         xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1286             PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off);
1287 
1288         if (xkb->xkb_pages == (char *)MAP_FAILED)
1289                 xkb->xkb_pages = NULL;
1290 
1291         if (xkb->xkb_is_hvm) {
1292                 if (!xkb_build_fake_p2m(xkb))
1293                         return (NULL);
1294         } else {
1295                 if (!xkb_build_p2m(xkb))
1296                         return (NULL);
1297         }
1298 
1299         return (xkb);
1300 }
1301 
1302 static void
1303 xkb_init_mmu(xkb_t *xkb)
1304 {
1305 #if defined(__amd64)
1306         xkb->xkb_mmu.mi_max = 3;
1307         xkb->xkb_mmu.mi_shift[0] = 12;
1308         xkb->xkb_mmu.mi_shift[1] = 21;
1309         xkb->xkb_mmu.mi_shift[2] = 30;
1310         xkb->xkb_mmu.mi_shift[3] = 39;
1311         xkb->xkb_mmu.mi_ptes = 512;
1312         xkb->xkb_mmu.mi_ptesize = 8;
1313 #elif defined(__i386)
1314         if (xkb->xkb_is_pae) {
1315                 xkb->xkb_mmu.mi_max = 2;
1316                 xkb->xkb_mmu.mi_shift[0] = 12;
1317                 xkb->xkb_mmu.mi_shift[1] = 21;
1318                 xkb->xkb_mmu.mi_shift[2] = 30;
1319                 xkb->xkb_mmu.mi_ptes = 512;
1320                 xkb->xkb_mmu.mi_ptesize = 8;
1321         } else {
1322                 xkb->xkb_mmu.mi_max = 1;
1323                 xkb->xkb_mmu.mi_shift[0] = 12;
1324                 xkb->xkb_mmu.mi_shift[1] = 22;
1325                 xkb->xkb_mmu.mi_ptes = 1024;
1326                 xkb->xkb_mmu.mi_ptesize = 4;
1327         }
1328 #endif
1329 }
1330 
1331 /*ARGSUSED*/
1332 xkb_t *
1333 xkb_open(const char *namelist, const char *corefile, const char *swapfile,
1334     int flag, const char *err)
1335 {
1336         uintptr_t debug_info = DEBUG_INFO;
1337         struct stat64 corestat;
1338         xkb_t *xkb = NULL;
1339         size_t i;
1340 
1341         if (stat64(corefile, &corestat) == -1)
1342                 return (xkb_fail(xkb, "cannot stat %s", corefile));
1343 
1344         if (flag != O_RDONLY)
1345                 return (xkb_fail(xkb, "invalid open flags"));
1346 
1347         xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP);
1348 
1349         for (i = 0; i < 4; i++) {
1350                 xkb->xkb_pt_map[i].mm_mfn = MFN_INVALID;
1351                 xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED;
1352         }
1353 
1354         xkb->xkb_type = XKB_FORMAT_UNKNOWN;
1355         xkb->xkb_map.mm_mfn = MFN_INVALID;
1356         xkb->xkb_map.mm_map = (char *)MAP_FAILED;
1357         xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED;
1358         xkb->xkb_fd = -1;
1359 
1360         xkb->xkb_path = strdup(corefile);
1361 
1362         if ((xkb = xkb_open_elf(xkb)) == NULL)
1363                 return (NULL);
1364 
1365         if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) {
1366                 if (!xkb_open_core(xkb))
1367                         return (NULL);
1368         }
1369 
1370         xkb_init_mmu(xkb);
1371 
1372         if (!xkb_build_m2p(xkb))
1373                 return (NULL);
1374 
1375         if (xkb->xkb_is_hvm)
1376                 debug_info = DEBUG_INFO_HVM;
1377 
1378         if (xkb_read(xkb, debug_info, &xkb->xkb_info,
1379             sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info))
1380                 return (xkb_fail(xkb, "cannot read debug_info"));
1381 
1382         if (xkb->xkb_info.di_magic != DEBUG_INFO_MAGIC) {
1383                 return (xkb_fail(xkb, "invalid debug info magic %d",
1384                     xkb->xkb_info.di_magic));
1385         }
1386 
1387         if (xkb->xkb_info.di_version != DEBUG_INFO_VERSION) {
1388                 return (xkb_fail(xkb, "unknown debug info version %d",
1389                     xkb->xkb_info.di_version));
1390         }
1391 
1392         if (!xkb_build_ksyms(xkb))
1393                 return (xkb_fail(xkb, "cannot construct namelist"));
1394 
1395         return (xkb);
1396 }
1397 
1398 int
1399 xkb_close(xkb_t *xkb)
1400 {
1401         size_t i, sz;
1402 
1403         if (xkb == NULL)
1404                 return (0);
1405 
1406         if (xkb->xkb_m2p != NULL) {
1407                 mdb_free(xkb->xkb_m2p,
1408                     (xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t));
1409         }
1410 
1411         if (xkb->xkb_pages != NULL) {
1412                 (void) munmap((void *)xkb->xkb_pages,
1413                     PAGE_SIZE * xkb->xkb_nr_pages);
1414         } else {
1415                 for (i = 0; i < 4; i++) {
1416                         char *addr = xkb->xkb_pt_map[i].mm_map;
1417                         if (addr != (char *)MAP_FAILED)
1418                                 (void) munmap((void *)addr, PAGE_SIZE);
1419                 }
1420                 if (xkb->xkb_map.mm_map != (char *)MAP_FAILED) {
1421                         (void) munmap((void *)xkb->xkb_map.mm_map,
1422                             PAGE_SIZE);
1423                 }
1424         }
1425 
1426         if (xkb->xkb_namelist != NULL)
1427                 mdb_free(xkb->xkb_namelist, xkb->xkb_namesize);
1428 
1429         if (xkb->xkb_type == XKB_FORMAT_ELF) {
1430                 xkb_elf_t *xe = &xkb->xkb_elf;
1431 
1432                 if (xe->xe_gelf != NULL)
1433                         mdb_gelf_destroy(xe->xe_gelf);
1434 
1435                 sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
1436 
1437                 if (xkb->xkb_p2m != NULL)
1438                         mdb_free(xkb->xkb_p2m, sz);
1439 
1440                 sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
1441 
1442                 if (xe->xe_off != NULL)
1443                         mdb_free(xe->xe_off, sz);
1444 
1445         } else if (xkb->xkb_type == XKB_FORMAT_CORE) {
1446                 xkb_core_t *xc = &xkb->xkb_core;
1447 
1448                 if (xkb->xkb_fd != -1)
1449                         (void) close(xkb->xkb_fd);
1450 
1451                 sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2);
1452                 sz = PAGE_MASK(sz);
1453 
1454                 if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED)
1455                         (void) munmap(xc->xc_p2m_buf, sz);
1456 
1457                 if (xkb->xkb_vcpu_data != NULL)
1458                         mdb_free(xkb->xkb_vcpu_data, xkb->xkb_vcpu_data_sz);
1459         }
1460 
1461         if (xkb->xkb_vcpus != NULL) {
1462                 sz = sizeof (struct vcpu_guest_context *) *
1463                     xkb->xkb_nr_vcpus;
1464                 mdb_free(xkb->xkb_vcpus, sz);
1465         }
1466 
1467         free(xkb->xkb_path);
1468 
1469         mdb_free(xkb, sizeof (*xkb));
1470         return (0);
1471 }
1472 
1473 /*ARGSUSED*/
1474 static mdb_io_t *
1475 xkb_sym_io(xkb_t *xkb, const char *symfile)
1476 {
1477         mdb_io_t *io = mdb_memio_create(xkb->xkb_namelist, xkb->xkb_namesize);
1478 
1479         if (io == NULL)
1480                 mdb_warn("failed to create namelist from %s", xkb->xkb_path);
1481 
1482         return (io);
1483 }
1484 
1485 uint64_t
1486 xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr)
1487 {
1488         mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
1489         mfn_t mfn;
1490 
1491         if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
1492                 return (-1ULL);
1493 
1494         mfn = xkb_va_to_mfn(xkb, addr, tlmfn);
1495 
1496         if (mfn == MFN_INVALID || mfn > xkb->xkb_max_mfn)
1497                 return (-1ULL);
1498 
1499         return (((uint64_t)xkb->xkb_m2p[mfn] << PAGE_SHIFT)
1500             | PAGE_OFFSET(addr));
1501 }
1502 
1503 static int
1504 xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs)
1505 {
1506         struct vcpu_guest_context *vcpu;
1507         struct cpu_user_regs *ur;
1508         struct regs *regs;
1509 
1510         if (cpu >= xkb->xkb_nr_vcpus) {
1511                 errno = EINVAL;
1512                 return (-1);
1513         }
1514 
1515         bzero(mregs, sizeof (*mregs));
1516 
1517         vcpu = xkb->xkb_vcpus[cpu];
1518         ur = &vcpu->user_regs;
1519         regs = &mregs->pm_gregs;
1520 
1521         regs->r_ss = ur->ss;
1522         regs->r_cs = ur->cs;
1523         regs->r_ds = ur->ds;
1524         regs->r_es = ur->es;
1525         regs->r_fs = ur->fs;
1526         regs->r_gs = ur->gs;
1527         regs->r_trapno = ur->entry_vector;
1528         regs->r_err = ur->error_code;
1529 #ifdef __amd64
1530         regs->r_savfp = ur->rbp;
1531         regs->r_savpc = ur->rip;
1532         regs->r_rdi = ur->rdi;
1533         regs->r_rsi = ur->rsi;
1534         regs->r_rdx = ur->rdx;
1535         regs->r_rcx = ur->rcx;
1536         regs->r_r8 = ur->r8;
1537         regs->r_r9 = ur->r9;
1538         regs->r_rax = ur->rax;
1539         regs->r_rbx = ur->rbx;
1540         regs->r_rbp = ur->rbp;
1541         regs->r_r10 = ur->r10;
1542         regs->r_r11 = ur->r11;
1543         regs->r_r12 = ur->r12;
1544         regs->r_r13 = ur->r13;
1545         regs->r_r14 = ur->r14;
1546         regs->r_r15 = ur->r15;
1547         regs->r_rip = ur->rip;
1548         regs->r_rfl = ur->rflags;
1549         regs->r_rsp = ur->rsp;
1550 #else
1551         regs->r_savfp = ur->ebp;
1552         regs->r_savpc = ur->eip;
1553         regs->r_edi = ur->edi;
1554         regs->r_esi = ur->esi;
1555         regs->r_ebp = ur->ebp;
1556         regs->r_esp = ur->esp;
1557         regs->r_ebx = ur->ebx;
1558         regs->r_edx = ur->edx;
1559         regs->r_ecx = ur->ecx;
1560         regs->r_eax = ur->eax;
1561         regs->r_eip = ur->eip;
1562         regs->r_efl = ur->eflags;
1563         regs->r_uesp = 0;
1564 #endif
1565 
1566         bcopy(&vcpu->ctrlreg, &mregs->pm_cr, 8 * sizeof (ulong_t));
1567         bcopy(&vcpu->debugreg, &mregs->pm_dr, 8 * sizeof (ulong_t));
1568 
1569         mregs->pm_flags = PM_GREGS | PM_CRREGS | PM_DRREGS;
1570 
1571         return (0);
1572 }
1573 
1574 static mdb_kb_ops_t xpv_kb_ops = {
1575         .kb_open = (void *(*)())xkb_open,
1576         .kb_close = (int (*)())xkb_close,
1577         .kb_sym_io = (mdb_io_t *(*)())xkb_sym_io,
1578         .kb_kread = (ssize_t (*)())xkb_read,
1579         .kb_kwrite = (ssize_t (*)())mdb_tgt_notsup,
1580         .kb_aread = (ssize_t (*)())xkb_aread,
1581         .kb_awrite = (ssize_t (*)())mdb_tgt_notsup,
1582         .kb_pread = (ssize_t (*)())xkb_pread,
1583         .kb_pwrite = (ssize_t (*)())mdb_tgt_notsup,
1584         .kb_vtop = (uint64_t (*)())xkb_vtop,
1585         .kb_getmregs = (int (*)())xkb_getmregs
1586 };
1587 
1588 mdb_kb_ops_t *
1589 mdb_kb_ops(void)
1590 {
1591         return (&xpv_kb_ops);
1592 }
1593 
1594 static const mdb_dcmd_t dcmds[] = { NULL, };
1595 static const mdb_walker_t walkers[] = { NULL, };
1596 static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1597 
1598 const mdb_modinfo_t *
1599 _mdb_init(void)
1600 {
1601         return (&modinfo);
1602 }
1603 
1604 void
1605 _mdb_fini(void)
1606 {
1607 }