1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * driver for accessing kernel devinfo tree.
  28  */
  29 #include <sys/types.h>
  30 #include <sys/pathname.h>
  31 #include <sys/debug.h>
  32 #include <sys/autoconf.h>
  33 #include <sys/vmsystm.h>
  34 #include <sys/conf.h>
  35 #include <sys/file.h>
  36 #include <sys/kmem.h>
  37 #include <sys/modctl.h>
  38 #include <sys/stat.h>
  39 #include <sys/ddi.h>
  40 #include <sys/sunddi.h>
  41 #include <sys/sunldi_impl.h>
  42 #include <sys/sunndi.h>
  43 #include <sys/esunddi.h>
  44 #include <sys/sunmdi.h>
  45 #include <sys/ddi_impldefs.h>
  46 #include <sys/ndi_impldefs.h>
  47 #include <sys/mdi_impldefs.h>
  48 #include <sys/devinfo_impl.h>
  49 #include <sys/thread.h>
  50 #include <sys/modhash.h>
  51 #include <sys/bitmap.h>
  52 #include <util/qsort.h>
  53 #include <sys/disp.h>
  54 #include <sys/kobj.h>
  55 #include <sys/crc32.h>
  56 #include <sys/ddi_hp.h>
  57 #include <sys/ddi_hp_impl.h>
  58 #include <sys/sysmacros.h>
  59 #include <sys/list.h>
  60 
  61 
  62 #ifdef DEBUG
  63 static int di_debug;
  64 #define dcmn_err(args) if (di_debug >= 1) cmn_err args
  65 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args
  66 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args
  67 #else
  68 #define dcmn_err(args) /* nothing */
  69 #define dcmn_err2(args) /* nothing */
  70 #define dcmn_err3(args) /* nothing */
  71 #endif
  72 
  73 /*
  74  * We partition the space of devinfo minor nodes equally between the full and
  75  * unprivileged versions of the driver.  The even-numbered minor nodes are the
  76  * full version, while the odd-numbered ones are the read-only version.
  77  */
  78 static int di_max_opens = 32;
  79 
  80 static int di_prop_dyn = 1;             /* enable dynamic property support */
  81 
  82 #define DI_FULL_PARENT          0
  83 #define DI_READONLY_PARENT      1
  84 #define DI_NODE_SPECIES         2
  85 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0)
  86 
  87 #define IOC_IDLE        0       /* snapshot ioctl states */
  88 #define IOC_SNAP        1       /* snapshot in progress */
  89 #define IOC_DONE        2       /* snapshot done, but not copied out */
  90 #define IOC_COPY        3       /* copyout in progress */
  91 
  92 /*
  93  * Keep max alignment so we can move snapshot to different platforms.
  94  *
  95  * NOTE: Most callers should rely on the di_checkmem return value
  96  * being aligned, and reestablish *off_p with aligned value, instead
  97  * of trying to align size of their allocations: this approach will
  98  * minimize memory use.
  99  */
 100 #define DI_ALIGN(addr)  ((addr + 7l) & ~7l)
 101 
 102 /*
 103  * To avoid wasting memory, make a linked list of memory chunks.
 104  * Size of each chunk is buf_size.
 105  */
 106 struct di_mem {
 107         struct di_mem   *next;          /* link to next chunk */
 108         char            *buf;           /* contiguous kernel memory */
 109         size_t          buf_size;       /* size of buf in bytes */
 110         devmap_cookie_t cook;           /* cookie from ddi_umem_alloc */
 111 };
 112 
 113 /*
 114  * This is a stack for walking the tree without using recursion.
 115  * When the devinfo tree height is above some small size, one
 116  * gets watchdog resets on sun4m.
 117  */
 118 struct di_stack {
 119         void            *offset[MAX_TREE_DEPTH];
 120         struct dev_info *dip[MAX_TREE_DEPTH];
 121         int             circ[MAX_TREE_DEPTH];
 122         int             depth;  /* depth of current node to be copied */
 123 };
 124 
 125 #define TOP_OFFSET(stack)       \
 126         ((di_off_t *)(stack)->offset[(stack)->depth - 1])
 127 #define TOP_NODE(stack)         \
 128         ((stack)->dip[(stack)->depth - 1])
 129 #define PARENT_OFFSET(stack)    \
 130         ((di_off_t *)(stack)->offset[(stack)->depth - 2])
 131 #define EMPTY_STACK(stack)      ((stack)->depth == 0)
 132 #define POP_STACK(stack)        { \
 133         ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \
 134                 (stack)->circ[(stack)->depth - 1]); \
 135         ((stack)->depth--); \
 136 }
 137 #define PUSH_STACK(stack, node, off_p)  { \
 138         ASSERT(node != NULL); \
 139         ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \
 140         (stack)->dip[(stack)->depth] = (node); \
 141         (stack)->offset[(stack)->depth] = (void *)(off_p); \
 142         ((stack)->depth)++; \
 143 }
 144 
 145 #define DI_ALL_PTR(s)   DI_ALL(di_mem_addr((s), 0))
 146 
 147 /*
 148  * With devfs, the device tree has no global locks. The device tree is
 149  * dynamic and dips may come and go if they are not locked locally. Under
 150  * these conditions, pointers are no longer reliable as unique IDs.
 151  * Specifically, these pointers cannot be used as keys for hash tables
 152  * as the same devinfo structure may be freed in one part of the tree only
 153  * to be allocated as the structure for a different device in another
 154  * part of the tree. This can happen if DR and the snapshot are
 155  * happening concurrently.
 156  * The following data structures act as keys for devinfo nodes and
 157  * pathinfo nodes.
 158  */
 159 
 160 enum di_ktype {
 161         DI_DKEY = 1,
 162         DI_PKEY = 2
 163 };
 164 
 165 struct di_dkey {
 166         dev_info_t      *dk_dip;
 167         major_t         dk_major;
 168         int             dk_inst;
 169         pnode_t         dk_nodeid;
 170 };
 171 
 172 struct di_pkey {
 173         mdi_pathinfo_t  *pk_pip;
 174         char            *pk_path_addr;
 175         dev_info_t      *pk_client;
 176         dev_info_t      *pk_phci;
 177 };
 178 
 179 struct di_key {
 180         enum di_ktype   k_type;
 181         union {
 182                 struct di_dkey dkey;
 183                 struct di_pkey pkey;
 184         } k_u;
 185 };
 186 
 187 
 188 struct i_lnode;
 189 
 190 typedef struct i_link {
 191         /*
 192          * If a di_link struct representing this i_link struct makes it
 193          * into the snapshot, then self will point to the offset of
 194          * the di_link struct in the snapshot
 195          */
 196         di_off_t        self;
 197 
 198         int             spec_type;      /* block or char access type */
 199         struct i_lnode  *src_lnode;     /* src i_lnode */
 200         struct i_lnode  *tgt_lnode;     /* tgt i_lnode */
 201         struct i_link   *src_link_next; /* next src i_link /w same i_lnode */
 202         struct i_link   *tgt_link_next; /* next tgt i_link /w same i_lnode */
 203 } i_link_t;
 204 
 205 typedef struct i_lnode {
 206         /*
 207          * If a di_lnode struct representing this i_lnode struct makes it
 208          * into the snapshot, then self will point to the offset of
 209          * the di_lnode struct in the snapshot
 210          */
 211         di_off_t        self;
 212 
 213         /*
 214          * used for hashing and comparing i_lnodes
 215          */
 216         int             modid;
 217 
 218         /*
 219          * public information describing a link endpoint
 220          */
 221         struct di_node  *di_node;       /* di_node in snapshot */
 222         dev_t           devt;           /* devt */
 223 
 224         /*
 225          * i_link ptr to links coming into this i_lnode node
 226          * (this i_lnode is the target of these i_links)
 227          */
 228         i_link_t        *link_in;
 229 
 230         /*
 231          * i_link ptr to links going out of this i_lnode node
 232          * (this i_lnode is the source of these i_links)
 233          */
 234         i_link_t        *link_out;
 235 } i_lnode_t;
 236 
 237 typedef struct i_hp {
 238         di_off_t        hp_off;         /* Offset of di_hp_t in snapshot */
 239         dev_info_t      *hp_child;      /* Child devinfo node of the di_hp_t */
 240         list_node_t     hp_link;        /* List linkage */
 241 } i_hp_t;
 242 
 243 /*
 244  * Soft state associated with each instance of driver open.
 245  */
 246 static struct di_state {
 247         di_off_t        mem_size;       /* total # bytes in memlist */
 248         struct di_mem   *memlist;       /* head of memlist */
 249         uint_t          command;        /* command from ioctl */
 250         int             di_iocstate;    /* snapshot ioctl state */
 251         mod_hash_t      *reg_dip_hash;
 252         mod_hash_t      *reg_pip_hash;
 253         int             lnode_count;
 254         int             link_count;
 255 
 256         mod_hash_t      *lnode_hash;
 257         mod_hash_t      *link_hash;
 258 
 259         list_t          hp_list;
 260 } **di_states;
 261 
 262 static kmutex_t di_lock;        /* serialize instance assignment */
 263 
 264 typedef enum {
 265         DI_QUIET = 0,   /* DI_QUIET must always be 0 */
 266         DI_ERR,
 267         DI_INFO,
 268         DI_TRACE,
 269         DI_TRACE1,
 270         DI_TRACE2
 271 } di_cache_debug_t;
 272 
 273 static uint_t   di_chunk = 32;          /* I/O chunk size in pages */
 274 
 275 #define DI_CACHE_LOCK(c)        (mutex_enter(&(c).cache_lock))
 276 #define DI_CACHE_UNLOCK(c)      (mutex_exit(&(c).cache_lock))
 277 #define DI_CACHE_LOCKED(c)      (mutex_owned(&(c).cache_lock))
 278 
 279 /*
 280  * Check that whole device tree is being configured as a pre-condition for
 281  * cleaning up /etc/devices files.
 282  */
 283 #define DEVICES_FILES_CLEANABLE(st)     \
 284         (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \
 285         strcmp(DI_ALL_PTR(st)->root_path, "/") == 0)
 286 
 287 #define CACHE_DEBUG(args)       \
 288         { if (di_cache_debug != DI_QUIET) di_cache_print args; }
 289 
 290 typedef struct phci_walk_arg {
 291         di_off_t        off;
 292         struct di_state *st;
 293 } phci_walk_arg_t;
 294 
 295 static int di_open(dev_t *, int, int, cred_t *);
 296 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 297 static int di_close(dev_t, int, int, cred_t *);
 298 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 299 static int di_attach(dev_info_t *, ddi_attach_cmd_t);
 300 static int di_detach(dev_info_t *, ddi_detach_cmd_t);
 301 
 302 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int);
 303 static di_off_t di_snapshot_and_clean(struct di_state *);
 304 static di_off_t di_copydevnm(di_off_t *, struct di_state *);
 305 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *);
 306 static di_off_t di_copynode(struct dev_info *, struct di_stack *,
 307     struct di_state *);
 308 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t,
 309     struct di_state *);
 310 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *);
 311 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *);
 312 static di_off_t di_gethpdata(ddi_hp_cn_handle_t *, di_off_t *,
 313     struct di_state *);
 314 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *,
 315     struct di_state *, struct dev_info *);
 316 static void di_allocmem(struct di_state *, size_t);
 317 static void di_freemem(struct di_state *);
 318 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz);
 319 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t);
 320 static void *di_mem_addr(struct di_state *, di_off_t);
 321 static int di_setstate(struct di_state *, int);
 322 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t);
 323 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t);
 324 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t,
 325     struct di_state *, int);
 326 static di_off_t di_getlink_data(di_off_t, struct di_state *);
 327 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p);
 328 
 329 static int cache_args_valid(struct di_state *st, int *error);
 330 static int snapshot_is_cacheable(struct di_state *st);
 331 static int di_cache_lookup(struct di_state *st);
 332 static int di_cache_update(struct di_state *st);
 333 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...);
 334 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg);
 335 static int build_phci_list(dev_info_t *ph_devinfo, void *arg);
 336 static void di_hotplug_children(struct di_state *st);
 337 
 338 extern int modrootloaded;
 339 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *);
 340 extern void mdi_vhci_walk_phcis(dev_info_t *,
 341         int (*)(dev_info_t *, void *), void *);
 342 
 343 
 344 static struct cb_ops di_cb_ops = {
 345         di_open,                /* open */
 346         di_close,               /* close */
 347         nodev,                  /* strategy */
 348         nodev,                  /* print */
 349         nodev,                  /* dump */
 350         nodev,                  /* read */
 351         nodev,                  /* write */
 352         di_ioctl,               /* ioctl */
 353         nodev,                  /* devmap */
 354         nodev,                  /* mmap */
 355         nodev,                  /* segmap */
 356         nochpoll,               /* poll */
 357         ddi_prop_op,            /* prop_op */
 358         NULL,                   /* streamtab  */
 359         D_NEW | D_MP            /* Driver compatibility flag */
 360 };
 361 
 362 static struct dev_ops di_ops = {
 363         DEVO_REV,               /* devo_rev, */
 364         0,                      /* refcnt  */
 365         di_info,                /* info */
 366         nulldev,                /* identify */
 367         nulldev,                /* probe */
 368         di_attach,              /* attach */
 369         di_detach,              /* detach */
 370         nodev,                  /* reset */
 371         &di_cb_ops,         /* driver operations */
 372         NULL                    /* bus operations */
 373 };
 374 
 375 /*
 376  * Module linkage information for the kernel.
 377  */
 378 static struct modldrv modldrv = {
 379         &mod_driverops,
 380         "DEVINFO Driver",
 381         &di_ops
 382 };
 383 
 384 static struct modlinkage modlinkage = {
 385         MODREV_1,
 386         { &modldrv, NULL }
 387 };
 388 
 389 int
 390 _init(void)
 391 {
 392         int     error;
 393 
 394         mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL);
 395 
 396         error = mod_install(&modlinkage);
 397         if (error != 0) {
 398                 mutex_destroy(&di_lock);
 399                 return (error);
 400         }
 401 
 402         return (0);
 403 }
 404 
 405 int
 406 _info(struct modinfo *modinfop)
 407 {
 408         return (mod_info(&modlinkage, modinfop));
 409 }
 410 
 411 int
 412 _fini(void)
 413 {
 414         int     error;
 415 
 416         error = mod_remove(&modlinkage);
 417         if (error != 0) {
 418                 return (error);
 419         }
 420 
 421         mutex_destroy(&di_lock);
 422         return (0);
 423 }
 424 
 425 static dev_info_t *di_dip;
 426 
 427 /*ARGSUSED*/
 428 static int
 429 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 430 {
 431         int     error = DDI_FAILURE;
 432 
 433         switch (infocmd) {
 434         case DDI_INFO_DEVT2DEVINFO:
 435                 *result = (void *)di_dip;
 436                 error = DDI_SUCCESS;
 437                 break;
 438         case DDI_INFO_DEVT2INSTANCE:
 439                 /*
 440                  * All dev_t's map to the same, single instance.
 441                  */
 442                 *result = (void *)0;
 443                 error = DDI_SUCCESS;
 444                 break;
 445         default:
 446                 break;
 447         }
 448 
 449         return (error);
 450 }
 451 
 452 static int
 453 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 454 {
 455         int     error = DDI_FAILURE;
 456 
 457         switch (cmd) {
 458         case DDI_ATTACH:
 459                 di_states = kmem_zalloc(
 460                     di_max_opens * sizeof (struct di_state *), KM_SLEEP);
 461 
 462                 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR,
 463                     DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE ||
 464                     ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR,
 465                     DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 466                         kmem_free(di_states,
 467                             di_max_opens * sizeof (struct di_state *));
 468                         ddi_remove_minor_node(dip, NULL);
 469                         error = DDI_FAILURE;
 470                 } else {
 471                         di_dip = dip;
 472                         ddi_report_dev(dip);
 473 
 474                         error = DDI_SUCCESS;
 475                 }
 476                 break;
 477         default:
 478                 error = DDI_FAILURE;
 479                 break;
 480         }
 481 
 482         return (error);
 483 }
 484 
 485 static int
 486 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 487 {
 488         int     error = DDI_FAILURE;
 489 
 490         switch (cmd) {
 491         case DDI_DETACH:
 492                 ddi_remove_minor_node(dip, NULL);
 493                 di_dip = NULL;
 494                 kmem_free(di_states, di_max_opens * sizeof (struct di_state *));
 495 
 496                 error = DDI_SUCCESS;
 497                 break;
 498         default:
 499                 error = DDI_FAILURE;
 500                 break;
 501         }
 502 
 503         return (error);
 504 }
 505 
 506 /*
 507  * Allow multiple opens by tweaking the dev_t such that it looks like each
 508  * open is getting a different minor device.  Each minor gets a separate
 509  * entry in the di_states[] table.  Based on the original minor number, we
 510  * discriminate opens of the full and read-only nodes.  If all of the instances
 511  * of the selected minor node are currently open, we return EAGAIN.
 512  */
 513 /*ARGSUSED*/
 514 static int
 515 di_open(dev_t *devp, int flag, int otyp, cred_t *credp)
 516 {
 517         int     m;
 518         minor_t minor_parent = getminor(*devp);
 519 
 520         if (minor_parent != DI_FULL_PARENT &&
 521             minor_parent != DI_READONLY_PARENT)
 522                 return (ENXIO);
 523 
 524         mutex_enter(&di_lock);
 525 
 526         for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) {
 527                 if (di_states[m] != NULL)
 528                         continue;
 529 
 530                 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP);
 531                 break;  /* It's ours. */
 532         }
 533 
 534         if (m >= di_max_opens) {
 535                 /*
 536                  * maximum open instance for device reached
 537                  */
 538                 mutex_exit(&di_lock);
 539                 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached"));
 540                 return (EAGAIN);
 541         }
 542         mutex_exit(&di_lock);
 543 
 544         ASSERT(m < di_max_opens);
 545         *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES));
 546 
 547         dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n",
 548             (void *)curthread, m + DI_NODE_SPECIES));
 549 
 550         return (0);
 551 }
 552 
 553 /*ARGSUSED*/
 554 static int
 555 di_close(dev_t dev, int flag, int otype, cred_t *cred_p)
 556 {
 557         struct di_state *st;
 558         int             m = (int)getminor(dev) - DI_NODE_SPECIES;
 559 
 560         if (m < 0) {
 561                 cmn_err(CE_WARN, "closing non-existent devinfo minor %d",
 562                     m + DI_NODE_SPECIES);
 563                 return (ENXIO);
 564         }
 565 
 566         st = di_states[m];
 567         ASSERT(m < di_max_opens && st != NULL);
 568 
 569         di_freemem(st);
 570         kmem_free(st, sizeof (struct di_state));
 571 
 572         /*
 573          * empty slot in state table
 574          */
 575         mutex_enter(&di_lock);
 576         di_states[m] = NULL;
 577         dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n",
 578             (void *)curthread, m + DI_NODE_SPECIES));
 579         mutex_exit(&di_lock);
 580 
 581         return (0);
 582 }
 583 
 584 
 585 /*ARGSUSED*/
 586 static int
 587 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
 588 {
 589         int             rv, error;
 590         di_off_t        off;
 591         struct di_all   *all;
 592         struct di_state *st;
 593         int             m = (int)getminor(dev) - DI_NODE_SPECIES;
 594         major_t         i;
 595         char            *drv_name;
 596         size_t          map_size, size;
 597         struct di_mem   *dcp;
 598         int             ndi_flags;
 599 
 600         if (m < 0 || m >= di_max_opens) {
 601                 return (ENXIO);
 602         }
 603 
 604         st = di_states[m];
 605         ASSERT(st != NULL);
 606 
 607         dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd));
 608 
 609         switch (cmd) {
 610         case DINFOIDENT:
 611                 /*
 612                  * This is called from di_init to verify that the driver
 613                  * opened is indeed devinfo. The purpose is to guard against
 614                  * sending ioctl to an unknown driver in case of an
 615                  * unresolved major number conflict during bfu.
 616                  */
 617                 *rvalp = DI_MAGIC;
 618                 return (0);
 619 
 620         case DINFOLODRV:
 621                 /*
 622                  * Hold an installed driver and return the result
 623                  */
 624                 if (DI_UNPRIVILEGED_NODE(m)) {
 625                         /*
 626                          * Only the fully enabled instances may issue
 627                          * DINFOLDDRV.
 628                          */
 629                         return (EACCES);
 630                 }
 631 
 632                 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 633                 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) {
 634                         kmem_free(drv_name, MAXNAMELEN);
 635                         return (EFAULT);
 636                 }
 637 
 638                 /*
 639                  * Some 3rd party driver's _init() walks the device tree,
 640                  * so we load the driver module before configuring driver.
 641                  */
 642                 i = ddi_name_to_major(drv_name);
 643                 if (ddi_hold_driver(i) == NULL) {
 644                         kmem_free(drv_name, MAXNAMELEN);
 645                         return (ENXIO);
 646                 }
 647 
 648                 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
 649 
 650                 /*
 651                  * i_ddi_load_drvconf() below will trigger a reprobe
 652                  * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't
 653                  * needed here.
 654                  */
 655                 modunload_disable();
 656                 (void) i_ddi_load_drvconf(i);
 657                 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i);
 658                 kmem_free(drv_name, MAXNAMELEN);
 659                 ddi_rele_driver(i);
 660                 rv = i_ddi_devs_attached(i);
 661                 modunload_enable();
 662 
 663                 i_ddi_di_cache_invalidate();
 664 
 665                 return ((rv == DDI_SUCCESS)? 0 : ENXIO);
 666 
 667         case DINFOUSRLD:
 668                 /*
 669                  * The case for copying snapshot to userland
 670                  */
 671                 if (di_setstate(st, IOC_COPY) == -1)
 672                         return (EBUSY);
 673 
 674                 map_size = DI_ALL_PTR(st)->map_size;
 675                 if (map_size == 0) {
 676                         (void) di_setstate(st, IOC_DONE);
 677                         return (EFAULT);
 678                 }
 679 
 680                 /*
 681                  * copyout the snapshot
 682                  */
 683                 map_size = (map_size + PAGEOFFSET) & PAGEMASK;
 684 
 685                 /*
 686                  * Return the map size, so caller may do a sanity
 687                  * check against the return value of snapshot ioctl()
 688                  */
 689                 *rvalp = (int)map_size;
 690 
 691                 /*
 692                  * Copy one chunk at a time
 693                  */
 694                 off = 0;
 695                 dcp = st->memlist;
 696                 while (map_size) {
 697                         size = dcp->buf_size;
 698                         if (map_size <= size) {
 699                                 size = map_size;
 700                         }
 701 
 702                         if (ddi_copyout(di_mem_addr(st, off),
 703                             (void *)(arg + off), size, mode) != 0) {
 704                                 (void) di_setstate(st, IOC_DONE);
 705                                 return (EFAULT);
 706                         }
 707 
 708                         map_size -= size;
 709                         off += size;
 710                         dcp = dcp->next;
 711                 }
 712 
 713                 di_freemem(st);
 714                 (void) di_setstate(st, IOC_IDLE);
 715                 return (0);
 716 
 717         default:
 718                 if ((cmd & ~DIIOC_MASK) != DIIOC) {
 719                         /*
 720                          * Invalid ioctl command
 721                          */
 722                         return (ENOTTY);
 723                 }
 724                 /*
 725                  * take a snapshot
 726                  */
 727                 st->command = cmd & DIIOC_MASK;
 728                 /*FALLTHROUGH*/
 729         }
 730 
 731         /*
 732          * Obtain enough memory to hold header + rootpath.  We prevent kernel
 733          * memory exhaustion by freeing any previously allocated snapshot and
 734          * refusing the operation; otherwise we would be allowing ioctl(),
 735          * ioctl(), ioctl(), ..., panic.
 736          */
 737         if (di_setstate(st, IOC_SNAP) == -1)
 738                 return (EBUSY);
 739 
 740         /*
 741          * Initial memlist always holds di_all and the root_path - and
 742          * is at least a page and size.
 743          */
 744         size = sizeof (struct di_all) +
 745             sizeof (((struct dinfo_io *)(NULL))->root_path);
 746         if (size < PAGESIZE)
 747                 size = PAGESIZE;
 748         off = di_checkmem(st, 0, size);
 749         all = DI_ALL_PTR(st);
 750         off += sizeof (struct di_all);          /* real length of di_all */
 751 
 752         all->devcnt = devcnt;
 753         all->command = st->command;
 754         all->version = DI_SNAPSHOT_VERSION;
 755         all->top_vhci_devinfo = 0;           /* filled by build_vhci_list. */
 756 
 757         /*
 758          * Note the endianness in case we need to transport snapshot
 759          * over the network.
 760          */
 761 #if defined(_LITTLE_ENDIAN)
 762         all->endianness = DI_LITTLE_ENDIAN;
 763 #else
 764         all->endianness = DI_BIG_ENDIAN;
 765 #endif
 766 
 767         /* Copyin ioctl args, store in the snapshot. */
 768         if (copyinstr((void *)arg, all->req_path,
 769             sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) {
 770                 di_freemem(st);
 771                 (void) di_setstate(st, IOC_IDLE);
 772                 return (EFAULT);
 773         }
 774         (void) strcpy(all->root_path, all->req_path);
 775         off += size;                            /* real length of root_path */
 776 
 777         if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) {
 778                 di_freemem(st);
 779                 (void) di_setstate(st, IOC_IDLE);
 780                 return (EINVAL);
 781         }
 782 
 783         error = 0;
 784         if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) {
 785                 di_freemem(st);
 786                 (void) di_setstate(st, IOC_IDLE);
 787                 return (error);
 788         }
 789 
 790         /*
 791          * Only the fully enabled version may force load drivers or read
 792          * the parent private data from a driver.
 793          */
 794         if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 &&
 795             DI_UNPRIVILEGED_NODE(m)) {
 796                 di_freemem(st);
 797                 (void) di_setstate(st, IOC_IDLE);
 798                 return (EACCES);
 799         }
 800 
 801         /* Do we need private data? */
 802         if (st->command & DINFOPRIVDATA) {
 803                 arg += sizeof (((struct dinfo_io *)(NULL))->root_path);
 804 
 805 #ifdef _MULTI_DATAMODEL
 806                 switch (ddi_model_convert_from(mode & FMODELS)) {
 807                 case DDI_MODEL_ILP32: {
 808                         /*
 809                          * Cannot copy private data from 64-bit kernel
 810                          * to 32-bit app
 811                          */
 812                         di_freemem(st);
 813                         (void) di_setstate(st, IOC_IDLE);
 814                         return (EINVAL);
 815                 }
 816                 case DDI_MODEL_NONE:
 817                         if ((off = di_copyformat(off, st, arg, mode)) == 0) {
 818                                 di_freemem(st);
 819                                 (void) di_setstate(st, IOC_IDLE);
 820                                 return (EFAULT);
 821                         }
 822                         break;
 823                 }
 824 #else /* !_MULTI_DATAMODEL */
 825                 if ((off = di_copyformat(off, st, arg, mode)) == 0) {
 826                         di_freemem(st);
 827                         (void) di_setstate(st, IOC_IDLE);
 828                         return (EFAULT);
 829                 }
 830 #endif /* _MULTI_DATAMODEL */
 831         }
 832 
 833         all->top_devinfo = DI_ALIGN(off);
 834 
 835         /*
 836          * For cache lookups we reallocate memory from scratch,
 837          * so the value of "all" is no longer valid.
 838          */
 839         all = NULL;
 840 
 841         if (st->command & DINFOCACHE) {
 842                 *rvalp = di_cache_lookup(st);
 843         } else if (snapshot_is_cacheable(st)) {
 844                 DI_CACHE_LOCK(di_cache);
 845                 *rvalp = di_cache_update(st);
 846                 DI_CACHE_UNLOCK(di_cache);
 847         } else
 848                 *rvalp = di_snapshot_and_clean(st);
 849 
 850         if (*rvalp) {
 851                 DI_ALL_PTR(st)->map_size = *rvalp;
 852                 (void) di_setstate(st, IOC_DONE);
 853         } else {
 854                 di_freemem(st);
 855                 (void) di_setstate(st, IOC_IDLE);
 856         }
 857 
 858         return (0);
 859 }
 860 
 861 /*
 862  * Get a chunk of memory >= size, for the snapshot
 863  */
 864 static void
 865 di_allocmem(struct di_state *st, size_t size)
 866 {
 867         struct di_mem   *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP);
 868 
 869         /*
 870          * Round up size to nearest power of 2. If it is less
 871          * than st->mem_size, set it to st->mem_size (i.e.,
 872          * the mem_size is doubled every time) to reduce the
 873          * number of memory allocations.
 874          */
 875         size_t tmp = 1;
 876         while (tmp < size) {
 877                 tmp <<= 1;
 878         }
 879         size = (tmp > st->mem_size) ? tmp : st->mem_size;
 880 
 881         mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook);
 882         mem->buf_size = size;
 883 
 884         dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size));
 885 
 886         if (st->mem_size == 0) {     /* first chunk */
 887                 st->memlist = mem;
 888         } else {
 889                 /*
 890                  * locate end of linked list and add a chunk at the end
 891                  */
 892                 struct di_mem *dcp = st->memlist;
 893                 while (dcp->next != NULL) {
 894                         dcp = dcp->next;
 895                 }
 896 
 897                 dcp->next = mem;
 898         }
 899 
 900         st->mem_size += size;
 901 }
 902 
 903 /*
 904  * Copy upto bufsiz bytes of the memlist to buf
 905  */
 906 static void
 907 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz)
 908 {
 909         struct di_mem   *dcp;
 910         size_t          copysz;
 911 
 912         if (st->mem_size == 0) {
 913                 ASSERT(st->memlist == NULL);
 914                 return;
 915         }
 916 
 917         copysz = 0;
 918         for (dcp = st->memlist; dcp; dcp = dcp->next) {
 919 
 920                 ASSERT(bufsiz > 0);
 921 
 922                 if (bufsiz <= dcp->buf_size)
 923                         copysz = bufsiz;
 924                 else
 925                         copysz = dcp->buf_size;
 926 
 927                 bcopy(dcp->buf, buf, copysz);
 928 
 929                 buf += copysz;
 930                 bufsiz -= copysz;
 931 
 932                 if (bufsiz == 0)
 933                         break;
 934         }
 935 }
 936 
 937 /*
 938  * Free all memory for the snapshot
 939  */
 940 static void
 941 di_freemem(struct di_state *st)
 942 {
 943         struct di_mem   *dcp, *tmp;
 944 
 945         dcmn_err2((CE_CONT, "di_freemem\n"));
 946 
 947         if (st->mem_size) {
 948                 dcp = st->memlist;
 949                 while (dcp) {   /* traverse the linked list */
 950                         tmp = dcp;
 951                         dcp = dcp->next;
 952                         ddi_umem_free(tmp->cook);
 953                         kmem_free(tmp, sizeof (struct di_mem));
 954                 }
 955                 st->mem_size = 0;
 956                 st->memlist = NULL;
 957         }
 958 
 959         ASSERT(st->mem_size == 0);
 960         ASSERT(st->memlist == NULL);
 961 }
 962 
 963 /*
 964  * Copies cached data to the di_state structure.
 965  * Returns:
 966  *      - size of data copied, on SUCCESS
 967  *      - 0 on failure
 968  */
 969 static int
 970 di_cache2mem(struct di_cache *cache, struct di_state *st)
 971 {
 972         caddr_t pa;
 973 
 974         ASSERT(st->mem_size == 0);
 975         ASSERT(st->memlist == NULL);
 976         ASSERT(!servicing_interrupt());
 977         ASSERT(DI_CACHE_LOCKED(*cache));
 978 
 979         if (cache->cache_size == 0) {
 980                 ASSERT(cache->cache_data == NULL);
 981                 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy"));
 982                 return (0);
 983         }
 984 
 985         ASSERT(cache->cache_data);
 986 
 987         di_allocmem(st, cache->cache_size);
 988 
 989         pa = di_mem_addr(st, 0);
 990 
 991         ASSERT(pa);
 992 
 993         /*
 994          * Verify that di_allocmem() allocates contiguous memory,
 995          * so that it is safe to do straight bcopy()
 996          */
 997         ASSERT(st->memlist != NULL);
 998         ASSERT(st->memlist->next == NULL);
 999         bcopy(cache->cache_data, pa, cache->cache_size);
1000 
1001         return (cache->cache_size);
1002 }
1003 
1004 /*
1005  * Copies a snapshot from di_state to the cache
1006  * Returns:
1007  *      - 0 on failure
1008  *      - size of copied data on success
1009  */
1010 static size_t
1011 di_mem2cache(struct di_state *st, struct di_cache *cache)
1012 {
1013         size_t  map_size;
1014 
1015         ASSERT(cache->cache_size == 0);
1016         ASSERT(cache->cache_data == NULL);
1017         ASSERT(!servicing_interrupt());
1018         ASSERT(DI_CACHE_LOCKED(*cache));
1019 
1020         if (st->mem_size == 0) {
1021                 ASSERT(st->memlist == NULL);
1022                 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy"));
1023                 return (0);
1024         }
1025 
1026         ASSERT(st->memlist);
1027 
1028         /*
1029          * The size of the memory list may be much larger than the
1030          * size of valid data (map_size). Cache only the valid data
1031          */
1032         map_size = DI_ALL_PTR(st)->map_size;
1033         if (map_size == 0 || map_size < sizeof (struct di_all) ||
1034             map_size > st->mem_size) {
1035                 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size));
1036                 return (0);
1037         }
1038 
1039         cache->cache_data = kmem_alloc(map_size, KM_SLEEP);
1040         cache->cache_size = map_size;
1041         di_copymem(st, cache->cache_data, cache->cache_size);
1042 
1043         return (map_size);
1044 }
1045 
1046 /*
1047  * Make sure there is at least "size" bytes memory left before
1048  * going on. Otherwise, start on a new chunk.
1049  */
1050 static di_off_t
1051 di_checkmem(struct di_state *st, di_off_t off, size_t size)
1052 {
1053         dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n",
1054             off, (int)size));
1055 
1056         /*
1057          * di_checkmem() shouldn't be called with a size of zero.
1058          * But in case it is, we want to make sure we return a valid
1059          * offset within the memlist and not an offset that points us
1060          * at the end of the memlist.
1061          */
1062         if (size == 0) {
1063                 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used"));
1064                 size = 1;
1065         }
1066 
1067         off = DI_ALIGN(off);
1068         if ((st->mem_size - off) < size) {
1069                 off = st->mem_size;
1070                 di_allocmem(st, size);
1071         }
1072 
1073         /* verify that return value is aligned */
1074         ASSERT(off == DI_ALIGN(off));
1075         return (off);
1076 }
1077 
1078 /*
1079  * Copy the private data format from ioctl arg.
1080  * On success, the ending offset is returned. On error 0 is returned.
1081  */
1082 static di_off_t
1083 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode)
1084 {
1085         di_off_t                size;
1086         struct di_priv_data     *priv;
1087         struct di_all           *all = DI_ALL_PTR(st);
1088 
1089         dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n",
1090             off, (void *)arg, mode));
1091 
1092         /*
1093          * Copyin data and check version.
1094          * We only handle private data version 0.
1095          */
1096         priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP);
1097         if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data),
1098             mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) {
1099                 kmem_free(priv, sizeof (struct di_priv_data));
1100                 return (0);
1101         }
1102 
1103         /*
1104          * Save di_priv_data copied from userland in snapshot.
1105          */
1106         all->pd_version = priv->version;
1107         all->n_ppdata = priv->n_parent;
1108         all->n_dpdata = priv->n_driver;
1109 
1110         /*
1111          * copyin private data format, modify offset accordingly
1112          */
1113         if (all->n_ppdata) { /* parent private data format */
1114                 /*
1115                  * check memory
1116                  */
1117                 size = all->n_ppdata * sizeof (struct di_priv_format);
1118                 all->ppdata_format = off = di_checkmem(st, off, size);
1119                 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size,
1120                     mode) != 0) {
1121                         kmem_free(priv, sizeof (struct di_priv_data));
1122                         return (0);
1123                 }
1124 
1125                 off += size;
1126         }
1127 
1128         if (all->n_dpdata) { /* driver private data format */
1129                 /*
1130                  * check memory
1131                  */
1132                 size = all->n_dpdata * sizeof (struct di_priv_format);
1133                 all->dpdata_format = off = di_checkmem(st, off, size);
1134                 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size,
1135                     mode) != 0) {
1136                         kmem_free(priv, sizeof (struct di_priv_data));
1137                         return (0);
1138                 }
1139 
1140                 off += size;
1141         }
1142 
1143         kmem_free(priv, sizeof (struct di_priv_data));
1144         return (off);
1145 }
1146 
1147 /*
1148  * Return the real address based on the offset (off) within snapshot
1149  */
1150 static void *
1151 di_mem_addr(struct di_state *st, di_off_t off)
1152 {
1153         struct di_mem   *dcp = st->memlist;
1154 
1155         dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n",
1156             (void *)dcp, off));
1157 
1158         ASSERT(off < st->mem_size);
1159 
1160         while (off >= dcp->buf_size) {
1161                 off -= dcp->buf_size;
1162                 dcp = dcp->next;
1163         }
1164 
1165         dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n",
1166             off, (void *)(dcp->buf + off)));
1167 
1168         return (dcp->buf + off);
1169 }
1170 
1171 /*
1172  * Ideally we would use the whole key to derive the hash
1173  * value. However, the probability that two keys will
1174  * have the same dip (or pip) is very low, so
1175  * hashing by dip (or pip) pointer should suffice.
1176  */
1177 static uint_t
1178 di_hash_byptr(void *arg, mod_hash_key_t key)
1179 {
1180         struct di_key   *dik = key;
1181         size_t          rshift;
1182         void            *ptr;
1183 
1184         ASSERT(arg == NULL);
1185 
1186         switch (dik->k_type) {
1187         case DI_DKEY:
1188                 ptr = dik->k_u.dkey.dk_dip;
1189                 rshift = highbit(sizeof (struct dev_info));
1190                 break;
1191         case DI_PKEY:
1192                 ptr = dik->k_u.pkey.pk_pip;
1193                 rshift = highbit(sizeof (struct mdi_pathinfo));
1194                 break;
1195         default:
1196                 panic("devinfo: unknown key type");
1197                 /*NOTREACHED*/
1198         }
1199         return (mod_hash_byptr((void *)rshift, ptr));
1200 }
1201 
1202 static void
1203 di_key_dtor(mod_hash_key_t key)
1204 {
1205         char            *path_addr;
1206         struct di_key   *dik = key;
1207 
1208         switch (dik->k_type) {
1209         case DI_DKEY:
1210                 break;
1211         case DI_PKEY:
1212                 path_addr = dik->k_u.pkey.pk_path_addr;
1213                 if (path_addr)
1214                         kmem_free(path_addr, strlen(path_addr) + 1);
1215                 break;
1216         default:
1217                 panic("devinfo: unknown key type");
1218                 /*NOTREACHED*/
1219         }
1220 
1221         kmem_free(dik, sizeof (struct di_key));
1222 }
1223 
1224 static int
1225 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2)
1226 {
1227         if (dk1->dk_dip !=  dk2->dk_dip)
1228                 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1);
1229 
1230         if (dk1->dk_major != DDI_MAJOR_T_NONE &&
1231             dk2->dk_major != DDI_MAJOR_T_NONE) {
1232                 if (dk1->dk_major !=  dk2->dk_major)
1233                         return (dk1->dk_major > dk2->dk_major ? 1 : -1);
1234 
1235                 if (dk1->dk_inst !=  dk2->dk_inst)
1236                         return (dk1->dk_inst > dk2->dk_inst ? 1 : -1);
1237         }
1238 
1239         if (dk1->dk_nodeid != dk2->dk_nodeid)
1240                 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1);
1241 
1242         return (0);
1243 }
1244 
1245 static int
1246 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2)
1247 {
1248         char    *p1, *p2;
1249         int     rv;
1250 
1251         if (pk1->pk_pip !=  pk2->pk_pip)
1252                 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1);
1253 
1254         p1 = pk1->pk_path_addr;
1255         p2 = pk2->pk_path_addr;
1256 
1257         p1 = p1 ? p1 : "";
1258         p2 = p2 ? p2 : "";
1259 
1260         rv = strcmp(p1, p2);
1261         if (rv)
1262                 return (rv > 0  ? 1 : -1);
1263 
1264         if (pk1->pk_client !=  pk2->pk_client)
1265                 return (pk1->pk_client > pk2->pk_client ? 1 : -1);
1266 
1267         if (pk1->pk_phci !=  pk2->pk_phci)
1268                 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1);
1269 
1270         return (0);
1271 }
1272 
1273 static int
1274 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
1275 {
1276         struct di_key   *dik1, *dik2;
1277 
1278         dik1 = key1;
1279         dik2 = key2;
1280 
1281         if (dik1->k_type != dik2->k_type) {
1282                 panic("devinfo: mismatched keys");
1283                 /*NOTREACHED*/
1284         }
1285 
1286         switch (dik1->k_type) {
1287         case DI_DKEY:
1288                 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey)));
1289         case DI_PKEY:
1290                 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey)));
1291         default:
1292                 panic("devinfo: unknown key type");
1293                 /*NOTREACHED*/
1294         }
1295 }
1296 
1297 static void
1298 di_copy_aliases(struct di_state *st, alias_pair_t *apair, di_off_t *offp)
1299 {
1300         di_off_t                off;
1301         struct di_all           *all = DI_ALL_PTR(st);
1302         struct di_alias         *di_alias;
1303         di_off_t                curroff;
1304         dev_info_t              *currdip;
1305         size_t                  size;
1306 
1307         currdip = NULL;
1308         if (resolve_pathname(apair->pair_alias, &currdip, NULL, NULL) != 0) {
1309                 return;
1310         }
1311 
1312         if (di_dip_find(st, currdip, &curroff) != 0) {
1313                 ndi_rele_devi(currdip);
1314                 return;
1315         }
1316         ndi_rele_devi(currdip);
1317 
1318         off = *offp;
1319         size = sizeof (struct di_alias);
1320         size += strlen(apair->pair_alias) + 1;
1321         off = di_checkmem(st, off, size);
1322         di_alias = DI_ALIAS(di_mem_addr(st, off));
1323 
1324         di_alias->self = off;
1325         di_alias->next = all->aliases;
1326         all->aliases = off;
1327         (void) strcpy(di_alias->alias, apair->pair_alias);
1328         di_alias->curroff = curroff;
1329 
1330         off += size;
1331 
1332         *offp = off;
1333 }
1334 
1335 /*
1336  * This is the main function that takes a snapshot
1337  */
1338 static di_off_t
1339 di_snapshot(struct di_state *st)
1340 {
1341         di_off_t        off;
1342         struct di_all   *all;
1343         dev_info_t      *rootnode;
1344         char            buf[80];
1345         int             plen;
1346         char            *path;
1347         vnode_t         *vp;
1348         int             i;
1349 
1350         all = DI_ALL_PTR(st);
1351         dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n"));
1352 
1353         /*
1354          * Translate requested root path if an alias and snap-root != "/"
1355          */
1356         if (ddi_aliases_present == B_TRUE && strcmp(all->root_path, "/") != 0) {
1357                 /* If there is no redirected alias, use root_path as is */
1358                 rootnode = ddi_alias_redirect(all->root_path);
1359                 if (rootnode) {
1360                         (void) ddi_pathname(rootnode, all->root_path);
1361                         goto got_root;
1362                 }
1363         }
1364 
1365         /*
1366          * Verify path before entrusting it to e_ddi_hold_devi_by_path because
1367          * some platforms have OBP bugs where executing the NDI_PROMNAME code
1368          * path against an invalid path results in panic.  The lookupnameat
1369          * is done relative to rootdir without a leading '/' on "devices/"
1370          * to force the lookup to occur in the global zone.
1371          */
1372         plen = strlen("devices/") + strlen(all->root_path) + 1;
1373         path = kmem_alloc(plen, KM_SLEEP);
1374         (void) snprintf(path, plen, "devices/%s", all->root_path);
1375         if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) {
1376                 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1377                     all->root_path));
1378                 kmem_free(path, plen);
1379                 return (0);
1380         }
1381         kmem_free(path, plen);
1382         VN_RELE(vp);
1383 
1384         /*
1385          * Hold the devinfo node referred by the path.
1386          */
1387         rootnode = e_ddi_hold_devi_by_path(all->root_path, 0);
1388         if (rootnode == NULL) {
1389                 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1390                     all->root_path));
1391                 return (0);
1392         }
1393 
1394 got_root:
1395         (void) snprintf(buf, sizeof (buf),
1396             "devinfo registered dips (statep=%p)", (void *)st);
1397 
1398         st->reg_dip_hash = mod_hash_create_extended(buf, 64,
1399             di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1400             NULL, di_key_cmp, KM_SLEEP);
1401 
1402 
1403         (void) snprintf(buf, sizeof (buf),
1404             "devinfo registered pips (statep=%p)", (void *)st);
1405 
1406         st->reg_pip_hash = mod_hash_create_extended(buf, 64,
1407             di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1408             NULL, di_key_cmp, KM_SLEEP);
1409 
1410         if (DINFOHP & st->command) {
1411                 list_create(&st->hp_list, sizeof (i_hp_t),
1412                     offsetof(i_hp_t, hp_link));
1413         }
1414 
1415         /*
1416          * copy the device tree
1417          */
1418         off = di_copytree(DEVI(rootnode), &all->top_devinfo, st);
1419 
1420         if (DINFOPATH & st->command) {
1421                 mdi_walk_vhcis(build_vhci_list, st);
1422         }
1423 
1424         if (DINFOHP & st->command) {
1425                 di_hotplug_children(st);
1426         }
1427 
1428         ddi_release_devi(rootnode);
1429 
1430         /*
1431          * copy the devnames array
1432          */
1433         all->devnames = off;
1434         off = di_copydevnm(&all->devnames, st);
1435 
1436 
1437         /* initialize the hash tables */
1438         st->lnode_count = 0;
1439         st->link_count = 0;
1440 
1441         if (DINFOLYR & st->command) {
1442                 off = di_getlink_data(off, st);
1443         }
1444 
1445         all->aliases = 0;
1446         if (ddi_aliases_present == B_FALSE)
1447                 goto done;
1448 
1449         for (i = 0; i < ddi_aliases.dali_num_pairs; i++) {
1450                 di_copy_aliases(st, &(ddi_aliases.dali_alias_pairs[i]), &off);
1451         }
1452 
1453 done:
1454         /*
1455          * Free up hash tables
1456          */
1457         mod_hash_destroy_hash(st->reg_dip_hash);
1458         mod_hash_destroy_hash(st->reg_pip_hash);
1459 
1460         /*
1461          * Record the timestamp now that we are done with snapshot.
1462          *
1463          * We compute the checksum later and then only if we cache
1464          * the snapshot, since checksumming adds some overhead.
1465          * The checksum is checked later if we read the cache file.
1466          * from disk.
1467          *
1468          * Set checksum field to 0 as CRC is calculated with that
1469          * field set to 0.
1470          */
1471         all->snapshot_time = ddi_get_time();
1472         all->cache_checksum = 0;
1473 
1474         ASSERT(all->snapshot_time != 0);
1475 
1476         return (off);
1477 }
1478 
1479 /*
1480  * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set
1481  */
1482 static di_off_t
1483 di_snapshot_and_clean(struct di_state *st)
1484 {
1485         di_off_t        off;
1486 
1487         modunload_disable();
1488         off = di_snapshot(st);
1489         if (off != 0 && (st->command & DINFOCLEANUP)) {
1490                 ASSERT(DEVICES_FILES_CLEANABLE(st));
1491                 /*
1492                  * Cleanup /etc/devices files:
1493                  * In order to accurately account for the system configuration
1494                  * in /etc/devices files, the appropriate drivers must be
1495                  * fully configured before the cleanup starts.
1496                  * So enable modunload only after the cleanup.
1497                  */
1498                 i_ddi_clean_devices_files();
1499                 /*
1500                  * Remove backing store nodes for unused devices,
1501                  * which retain past permissions customizations
1502                  * and may be undesired for newly configured devices.
1503                  */
1504                 dev_devices_cleanup();
1505         }
1506         modunload_enable();
1507 
1508         return (off);
1509 }
1510 
1511 /*
1512  * construct vhci linkage in the snapshot.
1513  */
1514 static int
1515 build_vhci_list(dev_info_t *vh_devinfo, void *arg)
1516 {
1517         struct di_all   *all;
1518         struct di_node  *me;
1519         struct di_state *st;
1520         di_off_t        off;
1521         phci_walk_arg_t pwa;
1522 
1523         dcmn_err3((CE_CONT, "build_vhci list\n"));
1524 
1525         dcmn_err3((CE_CONT, "vhci node %s%d\n",
1526             ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo)));
1527 
1528         st = (struct di_state *)arg;
1529         if (di_dip_find(st, vh_devinfo, &off) != 0) {
1530                 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1531                 return (DDI_WALK_TERMINATE);
1532         }
1533 
1534         dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n",
1535             st->mem_size, off));
1536 
1537         all = DI_ALL_PTR(st);
1538         if (all->top_vhci_devinfo == 0) {
1539                 all->top_vhci_devinfo = off;
1540         } else {
1541                 me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo));
1542 
1543                 while (me->next_vhci != 0) {
1544                         me = DI_NODE(di_mem_addr(st, me->next_vhci));
1545                 }
1546 
1547                 me->next_vhci = off;
1548         }
1549 
1550         pwa.off = off;
1551         pwa.st = st;
1552         mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa);
1553 
1554         return (DDI_WALK_CONTINUE);
1555 }
1556 
1557 /*
1558  * construct phci linkage for the given vhci in the snapshot.
1559  */
1560 static int
1561 build_phci_list(dev_info_t *ph_devinfo, void *arg)
1562 {
1563         struct di_node  *vh_di_node;
1564         struct di_node  *me;
1565         phci_walk_arg_t *pwa;
1566         di_off_t        off;
1567 
1568         pwa = (phci_walk_arg_t *)arg;
1569 
1570         dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n",
1571             pwa->off));
1572 
1573         vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off));
1574         if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) {
1575                 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1576                 return (DDI_WALK_TERMINATE);
1577         }
1578 
1579         dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n",
1580             ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off));
1581 
1582         if (vh_di_node->top_phci == 0) {
1583                 vh_di_node->top_phci = off;
1584                 return (DDI_WALK_CONTINUE);
1585         }
1586 
1587         me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci));
1588 
1589         while (me->next_phci != 0) {
1590                 me = DI_NODE(di_mem_addr(pwa->st, me->next_phci));
1591         }
1592         me->next_phci = off;
1593 
1594         return (DDI_WALK_CONTINUE);
1595 }
1596 
1597 /*
1598  * Assumes all devinfo nodes in device tree have been snapshotted
1599  */
1600 static void
1601 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p)
1602 {
1603         struct dev_info *node;
1604         struct di_node  *me;
1605         di_off_t        off;
1606 
1607         ASSERT(mutex_owned(&dnp->dn_lock));
1608 
1609         node = DEVI(dnp->dn_head);
1610         for (; node; node = node->devi_next) {
1611                 if (di_dip_find(st, (dev_info_t *)node, &off) != 0)
1612                         continue;
1613 
1614                 ASSERT(off > 0);
1615                 me = DI_NODE(di_mem_addr(st, off));
1616                 ASSERT(me->next == 0 || me->next == -1);
1617                 /*
1618                  * Only nodes which were BOUND when they were
1619                  * snapshotted will be added to per-driver list.
1620                  */
1621                 if (me->next != -1)
1622                         continue;
1623 
1624                 *off_p = off;
1625                 off_p = &me->next;
1626         }
1627 
1628         *off_p = 0;
1629 }
1630 
1631 /*
1632  * Copy the devnames array, so we have a list of drivers in the snapshot.
1633  * Also makes it possible to locate the per-driver devinfo nodes.
1634  */
1635 static di_off_t
1636 di_copydevnm(di_off_t *off_p, struct di_state *st)
1637 {
1638         int             i;
1639         di_off_t        off;
1640         size_t          size;
1641         struct di_devnm *dnp;
1642 
1643         dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p));
1644 
1645         /*
1646          * make sure there is some allocated memory
1647          */
1648         size = devcnt * sizeof (struct di_devnm);
1649         *off_p = off = di_checkmem(st, *off_p, size);
1650         dnp = DI_DEVNM(di_mem_addr(st, off));
1651         off += size;
1652 
1653         dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n",
1654             devcnt, off));
1655 
1656         for (i = 0; i < devcnt; i++) {
1657                 if (devnamesp[i].dn_name == NULL) {
1658                         continue;
1659                 }
1660 
1661                 /*
1662                  * dn_name is not freed during driver unload or removal.
1663                  *
1664                  * There is a race condition when make_devname() changes
1665                  * dn_name during our strcpy. This should be rare since
1666                  * only add_drv does this. At any rate, we never had a
1667                  * problem with ddi_name_to_major(), which should have
1668                  * the same problem.
1669                  */
1670                 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n",
1671                     devnamesp[i].dn_name, devnamesp[i].dn_instance, off));
1672 
1673                 size = strlen(devnamesp[i].dn_name) + 1;
1674                 dnp[i].name = off = di_checkmem(st, off, size);
1675                 (void) strcpy((char *)di_mem_addr(st, off),
1676                     devnamesp[i].dn_name);
1677                 off += size;
1678 
1679                 mutex_enter(&devnamesp[i].dn_lock);
1680 
1681                 /*
1682                  * Snapshot per-driver node list
1683                  */
1684                 snap_driver_list(st, &devnamesp[i], &dnp[i].head);
1685 
1686                 /*
1687                  * This is not used by libdevinfo, leave it for now
1688                  */
1689                 dnp[i].flags = devnamesp[i].dn_flags;
1690                 dnp[i].instance = devnamesp[i].dn_instance;
1691 
1692                 /*
1693                  * get global properties
1694                  */
1695                 if ((DINFOPROP & st->command) &&
1696                     devnamesp[i].dn_global_prop_ptr) {
1697                         dnp[i].global_prop = off;
1698                         off = di_getprop(DI_PROP_GLB_LIST,
1699                             &devnamesp[i].dn_global_prop_ptr->prop_list,
1700                             &dnp[i].global_prop, st, NULL);
1701                 }
1702 
1703                 /*
1704                  * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str
1705                  */
1706                 if (CB_DRV_INSTALLED(devopsp[i])) {
1707                         if (devopsp[i]->devo_cb_ops) {
1708                                 dnp[i].ops |= DI_CB_OPS;
1709                                 if (devopsp[i]->devo_cb_ops->cb_str)
1710                                         dnp[i].ops |= DI_STREAM_OPS;
1711                         }
1712                         if (NEXUS_DRV(devopsp[i])) {
1713                                 dnp[i].ops |= DI_BUS_OPS;
1714                         }
1715                 }
1716 
1717                 mutex_exit(&devnamesp[i].dn_lock);
1718         }
1719 
1720         dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off));
1721 
1722         return (off);
1723 }
1724 
1725 /*
1726  * Copy the kernel devinfo tree. The tree and the devnames array forms
1727  * the entire snapshot (see also di_copydevnm).
1728  */
1729 static di_off_t
1730 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st)
1731 {
1732         di_off_t        off;
1733         struct dev_info *node;
1734         struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP);
1735 
1736         dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n",
1737             (void *)root, *off_p));
1738 
1739         /* force attach drivers */
1740         if (i_ddi_devi_attached((dev_info_t *)root) &&
1741             (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) {
1742                 (void) ndi_devi_config((dev_info_t *)root,
1743                     NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT |
1744                     NDI_DRV_CONF_REPROBE);
1745         }
1746 
1747         /*
1748          * Push top_devinfo onto a stack
1749          *
1750          * The stack is necessary to avoid recursion, which can overrun
1751          * the kernel stack.
1752          */
1753         PUSH_STACK(dsp, root, off_p);
1754 
1755         /*
1756          * As long as there is a node on the stack, copy the node.
1757          * di_copynode() is responsible for pushing and popping
1758          * child and sibling nodes on the stack.
1759          */
1760         while (!EMPTY_STACK(dsp)) {
1761                 node = TOP_NODE(dsp);
1762                 off = di_copynode(node, dsp, st);
1763         }
1764 
1765         /*
1766          * Free the stack structure
1767          */
1768         kmem_free(dsp, sizeof (struct di_stack));
1769 
1770         return (off);
1771 }
1772 
1773 /*
1774  * This is the core function, which copies all data associated with a single
1775  * node into the snapshot. The amount of information is determined by the
1776  * ioctl command.
1777  */
1778 static di_off_t
1779 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st)
1780 {
1781         di_off_t        off;
1782         struct di_node  *me;
1783         size_t          size;
1784         struct dev_info *n;
1785 
1786         dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth));
1787         ASSERT((node != NULL) && (node == TOP_NODE(dsp)));
1788 
1789         /*
1790          * check memory usage, and fix offsets accordingly.
1791          */
1792         size = sizeof (struct di_node);
1793         *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size);
1794         me = DI_NODE(di_mem_addr(st, off));
1795         me->self = off;
1796         off += size;
1797 
1798         dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n",
1799             node->devi_node_name, node->devi_instance, off));
1800 
1801         /*
1802          * Node parameters:
1803          * self         -- offset of current node within snapshot
1804          * nodeid       -- pointer to PROM node (tri-valued)
1805          * state        -- hot plugging device state
1806          * node_state   -- devinfo node state
1807          */
1808         me->instance = node->devi_instance;
1809         me->nodeid = node->devi_nodeid;
1810         me->node_class = node->devi_node_class;
1811         me->attributes = node->devi_node_attributes;
1812         me->state = node->devi_state;
1813         me->flags = node->devi_flags;
1814         me->node_state = node->devi_node_state;
1815         me->next_vhci = 0;           /* Filled up by build_vhci_list. */
1816         me->top_phci = 0;            /* Filled up by build_phci_list. */
1817         me->next_phci = 0;           /* Filled up by build_phci_list. */
1818         me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */
1819         me->user_private_data = NULL;
1820 
1821         /*
1822          * Get parent's offset in snapshot from the stack
1823          * and store it in the current node
1824          */
1825         if (dsp->depth > 1) {
1826                 me->parent = *(PARENT_OFFSET(dsp));
1827         }
1828 
1829         /*
1830          * Save the offset of this di_node in a hash table.
1831          * This is used later to resolve references to this
1832          * dip from other parts of the tree (per-driver list,
1833          * multipathing linkages, layered usage linkages).
1834          * The key used for the hash table is derived from
1835          * information in the dip.
1836          */
1837         di_register_dip(st, (dev_info_t *)node, me->self);
1838 
1839 #ifdef  DEVID_COMPATIBILITY
1840         /* check for devid as property marker */
1841         if (node->devi_devid_str) {
1842                 ddi_devid_t     devid;
1843 
1844                 /*
1845                  * The devid is now represented as a property. For
1846                  * compatibility with di_devid() interface in libdevinfo we
1847                  * must return it as a binary structure in the snapshot. When
1848                  * (if) di_devid() is removed from libdevinfo then the code
1849                  * related to DEVID_COMPATIBILITY can be removed.
1850                  */
1851                 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) ==
1852                     DDI_SUCCESS) {
1853                         size = ddi_devid_sizeof(devid);
1854                         off = di_checkmem(st, off, size);
1855                         me->devid = off;
1856                         bcopy(devid, di_mem_addr(st, off), size);
1857                         off += size;
1858                         ddi_devid_free(devid);
1859                 }
1860         }
1861 #endif  /* DEVID_COMPATIBILITY */
1862 
1863         if (node->devi_node_name) {
1864                 size = strlen(node->devi_node_name) + 1;
1865                 me->node_name = off = di_checkmem(st, off, size);
1866                 (void) strcpy(di_mem_addr(st, off), node->devi_node_name);
1867                 off += size;
1868         }
1869 
1870         if (node->devi_compat_names && (node->devi_compat_length > 1)) {
1871                 size = node->devi_compat_length;
1872                 me->compat_names = off = di_checkmem(st, off, size);
1873                 me->compat_length = (int)size;
1874                 bcopy(node->devi_compat_names, di_mem_addr(st, off), size);
1875                 off += size;
1876         }
1877 
1878         if (node->devi_addr) {
1879                 size = strlen(node->devi_addr) + 1;
1880                 me->address = off = di_checkmem(st, off, size);
1881                 (void) strcpy(di_mem_addr(st, off), node->devi_addr);
1882                 off += size;
1883         }
1884 
1885         if (node->devi_binding_name) {
1886                 size = strlen(node->devi_binding_name) + 1;
1887                 me->bind_name = off = di_checkmem(st, off, size);
1888                 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name);
1889                 off += size;
1890         }
1891 
1892         me->drv_major = node->devi_major;
1893 
1894         /*
1895          * If the dip is BOUND, set the next pointer of the
1896          * per-instance list to -1, indicating that it is yet to be resolved.
1897          * This will be resolved later in snap_driver_list().
1898          */
1899         if (me->drv_major != -1) {
1900                 me->next = -1;
1901         } else {
1902                 me->next = 0;
1903         }
1904 
1905         /*
1906          * An optimization to skip mutex_enter when not needed.
1907          */
1908         if (!((DINFOMINOR | DINFOPROP | DINFOPATH | DINFOHP) & st->command)) {
1909                 goto priv_data;
1910         }
1911 
1912         /*
1913          * LOCKING: We already have an active ndi_devi_enter to gather the
1914          * minor data, and we will take devi_lock to gather properties as
1915          * needed off di_getprop.
1916          */
1917         if (!(DINFOMINOR & st->command)) {
1918                 goto path;
1919         }
1920 
1921         ASSERT(DEVI_BUSY_OWNED(node));
1922         if (node->devi_minor) {              /* minor data */
1923                 me->minor_data = off;
1924                 off = di_getmdata(node->devi_minor, &me->minor_data,
1925                     me->self, st);
1926         }
1927 
1928 path:
1929         if (!(DINFOPATH & st->command)) {
1930                 goto property;
1931         }
1932 
1933         if (MDI_VHCI(node)) {
1934                 me->multipath_component = MULTIPATH_COMPONENT_VHCI;
1935         }
1936 
1937         if (MDI_CLIENT(node)) {
1938                 me->multipath_component = MULTIPATH_COMPONENT_CLIENT;
1939                 me->multipath_client = off;
1940                 off = di_getpath_data((dev_info_t *)node, &me->multipath_client,
1941                     me->self, st, 1);
1942                 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p "
1943                     "component type = %d.  off=%d",
1944                     me->multipath_client,
1945                     (void *)node, node->devi_mdi_component, off));
1946         }
1947 
1948         if (MDI_PHCI(node)) {
1949                 me->multipath_component = MULTIPATH_COMPONENT_PHCI;
1950                 me->multipath_phci = off;
1951                 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci,
1952                     me->self, st, 0);
1953                 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p "
1954                     "component type = %d.  off=%d",
1955                     me->multipath_phci,
1956                     (void *)node, node->devi_mdi_component, off));
1957         }
1958 
1959 property:
1960         if (!(DINFOPROP & st->command)) {
1961                 goto hotplug_data;
1962         }
1963 
1964         if (node->devi_drv_prop_ptr) {       /* driver property list */
1965                 me->drv_prop = off;
1966                 off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr,
1967                     &me->drv_prop, st, node);
1968         }
1969 
1970         if (node->devi_sys_prop_ptr) {       /* system property list */
1971                 me->sys_prop = off;
1972                 off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr,
1973                     &me->sys_prop, st, node);
1974         }
1975 
1976         if (node->devi_hw_prop_ptr) {        /* hardware property list */
1977                 me->hw_prop = off;
1978                 off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr,
1979                     &me->hw_prop, st, node);
1980         }
1981 
1982         if (node->devi_global_prop_list == NULL) {
1983                 me->glob_prop = (di_off_t)-1;        /* not global property */
1984         } else {
1985                 /*
1986                  * Make copy of global property list if this devinfo refers
1987                  * global properties different from what's on the devnames
1988                  * array. It can happen if there has been a forced
1989                  * driver.conf update. See mod_drv(1M).
1990                  */
1991                 ASSERT(me->drv_major != -1);
1992                 if (node->devi_global_prop_list !=
1993                     devnamesp[me->drv_major].dn_global_prop_ptr) {
1994                         me->glob_prop = off;
1995                         off = di_getprop(DI_PROP_GLB_LIST,
1996                             &node->devi_global_prop_list->prop_list,
1997                             &me->glob_prop, st, node);
1998                 }
1999         }
2000 
2001 hotplug_data:
2002         if (!(DINFOHP & st->command)) {
2003                 goto priv_data;
2004         }
2005 
2006         if (node->devi_hp_hdlp) {    /* hotplug data */
2007                 me->hp_data = off;
2008                 off = di_gethpdata(node->devi_hp_hdlp, &me->hp_data, st);
2009         }
2010 
2011 priv_data:
2012         if (!(DINFOPRIVDATA & st->command)) {
2013                 goto pm_info;
2014         }
2015 
2016         if (ddi_get_parent_data((dev_info_t *)node) != NULL) {
2017                 me->parent_data = off;
2018                 off = di_getppdata(node, &me->parent_data, st);
2019         }
2020 
2021         if (ddi_get_driver_private((dev_info_t *)node) != NULL) {
2022                 me->driver_data = off;
2023                 off = di_getdpdata(node, &me->driver_data, st);
2024         }
2025 
2026 pm_info: /* NOT implemented */
2027 
2028 subtree:
2029         /* keep the stack aligned */
2030         off = DI_ALIGN(off);
2031 
2032         if (!(DINFOSUBTREE & st->command)) {
2033                 POP_STACK(dsp);
2034                 return (off);
2035         }
2036 
2037 child:
2038         /*
2039          * If there is a visible child--push child onto stack.
2040          * Hold the parent (me) busy while doing so.
2041          */
2042         if ((n = node->devi_child) != NULL) {
2043                 /* skip hidden nodes */
2044                 while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2045                         n = n->devi_sibling;
2046                 if (n) {
2047                         me->child = off;
2048                         PUSH_STACK(dsp, n, &me->child);
2049                         return (me->child);
2050                 }
2051         }
2052 
2053 sibling:
2054         /*
2055          * Done with any child nodes, unroll the stack till a visible
2056          * sibling of a parent node is found or root node is reached.
2057          */
2058         POP_STACK(dsp);
2059         while (!EMPTY_STACK(dsp)) {
2060                 if ((n = node->devi_sibling) != NULL) {
2061                         /* skip hidden nodes */
2062                         while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2063                                 n = n->devi_sibling;
2064                         if (n) {
2065                                 me->sibling = DI_ALIGN(off);
2066                                 PUSH_STACK(dsp, n, &me->sibling);
2067                                 return (me->sibling);
2068                         }
2069                 }
2070                 node = TOP_NODE(dsp);
2071                 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp))));
2072                 POP_STACK(dsp);
2073         }
2074 
2075         /*
2076          * DONE with all nodes
2077          */
2078         return (off);
2079 }
2080 
2081 static i_lnode_t *
2082 i_lnode_alloc(int modid)
2083 {
2084         i_lnode_t       *i_lnode;
2085 
2086         i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP);
2087 
2088         ASSERT(modid != -1);
2089         i_lnode->modid = modid;
2090 
2091         return (i_lnode);
2092 }
2093 
2094 static void
2095 i_lnode_free(i_lnode_t *i_lnode)
2096 {
2097         kmem_free(i_lnode, sizeof (i_lnode_t));
2098 }
2099 
2100 static void
2101 i_lnode_check_free(i_lnode_t *i_lnode)
2102 {
2103         /* This lnode and its dip must have been snapshotted */
2104         ASSERT(i_lnode->self > 0);
2105         ASSERT(i_lnode->di_node->self > 0);
2106 
2107         /* at least 1 link (in or out) must exist for this lnode */
2108         ASSERT(i_lnode->link_in || i_lnode->link_out);
2109 
2110         i_lnode_free(i_lnode);
2111 }
2112 
2113 static i_link_t *
2114 i_link_alloc(int spec_type)
2115 {
2116         i_link_t        *i_link;
2117 
2118         i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP);
2119         i_link->spec_type = spec_type;
2120 
2121         return (i_link);
2122 }
2123 
2124 static void
2125 i_link_check_free(i_link_t *i_link)
2126 {
2127         /* This link must have been snapshotted */
2128         ASSERT(i_link->self > 0);
2129 
2130         /* Both endpoint lnodes must exist for this link */
2131         ASSERT(i_link->src_lnode);
2132         ASSERT(i_link->tgt_lnode);
2133 
2134         kmem_free(i_link, sizeof (i_link_t));
2135 }
2136 
2137 /*ARGSUSED*/
2138 static uint_t
2139 i_lnode_hashfunc(void *arg, mod_hash_key_t key)
2140 {
2141         i_lnode_t       *i_lnode = (i_lnode_t *)key;
2142         struct di_node  *ptr;
2143         dev_t           dev;
2144 
2145         dev = i_lnode->devt;
2146         if (dev != DDI_DEV_T_NONE)
2147                 return (i_lnode->modid + getminor(dev) + getmajor(dev));
2148 
2149         ptr = i_lnode->di_node;
2150         ASSERT(ptr->self > 0);
2151         if (ptr) {
2152                 uintptr_t k = (uintptr_t)ptr;
2153                 k >>= (int)highbit(sizeof (struct di_node));
2154                 return ((uint_t)k);
2155         }
2156 
2157         return (i_lnode->modid);
2158 }
2159 
2160 static int
2161 i_lnode_cmp(void *arg1, void *arg2)
2162 {
2163         i_lnode_t       *i_lnode1 = (i_lnode_t *)arg1;
2164         i_lnode_t       *i_lnode2 = (i_lnode_t *)arg2;
2165 
2166         if (i_lnode1->modid != i_lnode2->modid) {
2167                 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1);
2168         }
2169 
2170         if (i_lnode1->di_node != i_lnode2->di_node)
2171                 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1);
2172 
2173         if (i_lnode1->devt != i_lnode2->devt)
2174                 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1);
2175 
2176         return (0);
2177 }
2178 
2179 /*
2180  * An lnode represents a {dip, dev_t} tuple. A link represents a
2181  * {src_lnode, tgt_lnode, spec_type} tuple.
2182  * The following callback assumes that LDI framework ref-counts the
2183  * src_dip and tgt_dip while invoking this callback.
2184  */
2185 static int
2186 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg)
2187 {
2188         struct di_state *st = (struct di_state *)arg;
2189         i_lnode_t       *src_lnode, *tgt_lnode, *i_lnode;
2190         i_link_t        **i_link_next, *i_link;
2191         di_off_t        soff, toff;
2192         mod_hash_val_t  nodep = NULL;
2193         int             res;
2194 
2195         /*
2196          * if the source or target of this device usage information doesn't
2197          * correspond to a device node then we don't report it via
2198          * libdevinfo so return.
2199          */
2200         if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL))
2201                 return (LDI_USAGE_CONTINUE);
2202 
2203         ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip));
2204         ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip));
2205 
2206         /*
2207          * Skip the ldi_usage if either src or tgt dip is not in the
2208          * snapshot. This saves us from pruning bad lnodes/links later.
2209          */
2210         if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0)
2211                 return (LDI_USAGE_CONTINUE);
2212         if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0)
2213                 return (LDI_USAGE_CONTINUE);
2214 
2215         ASSERT(soff > 0);
2216         ASSERT(toff > 0);
2217 
2218         /*
2219          * allocate an i_lnode and add it to the lnode hash
2220          * if it is not already present. For this particular
2221          * link the lnode is a source, but it may
2222          * participate as tgt or src in any number of layered
2223          * operations - so it may already be in the hash.
2224          */
2225         i_lnode = i_lnode_alloc(ldi_usage->src_modid);
2226         i_lnode->di_node = DI_NODE(di_mem_addr(st, soff));
2227         i_lnode->devt = ldi_usage->src_devt;
2228 
2229         res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2230         if (res == MH_ERR_NOTFOUND) {
2231                 /*
2232                  * new i_lnode
2233                  * add it to the hash and increment the lnode count
2234                  */
2235                 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2236                 ASSERT(res == 0);
2237                 st->lnode_count++;
2238                 src_lnode = i_lnode;
2239         } else {
2240                 /* this i_lnode already exists in the lnode_hash */
2241                 i_lnode_free(i_lnode);
2242                 src_lnode = (i_lnode_t *)nodep;
2243         }
2244 
2245         /*
2246          * allocate a tgt i_lnode and add it to the lnode hash
2247          */
2248         i_lnode = i_lnode_alloc(ldi_usage->tgt_modid);
2249         i_lnode->di_node = DI_NODE(di_mem_addr(st, toff));
2250         i_lnode->devt = ldi_usage->tgt_devt;
2251 
2252         res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2253         if (res == MH_ERR_NOTFOUND) {
2254                 /*
2255                  * new i_lnode
2256                  * add it to the hash and increment the lnode count
2257                  */
2258                 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2259                 ASSERT(res == 0);
2260                 st->lnode_count++;
2261                 tgt_lnode = i_lnode;
2262         } else {
2263                 /* this i_lnode already exists in the lnode_hash */
2264                 i_lnode_free(i_lnode);
2265                 tgt_lnode = (i_lnode_t *)nodep;
2266         }
2267 
2268         /*
2269          * allocate a i_link
2270          */
2271         i_link = i_link_alloc(ldi_usage->tgt_spec_type);
2272         i_link->src_lnode = src_lnode;
2273         i_link->tgt_lnode = tgt_lnode;
2274 
2275         /*
2276          * add this link onto the src i_lnodes outbound i_link list
2277          */
2278         i_link_next = &(src_lnode->link_out);
2279         while (*i_link_next != NULL) {
2280                 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) &&
2281                     (i_link->spec_type == (*i_link_next)->spec_type)) {
2282                         /* this link already exists */
2283                         kmem_free(i_link, sizeof (i_link_t));
2284                         return (LDI_USAGE_CONTINUE);
2285                 }
2286                 i_link_next = &((*i_link_next)->src_link_next);
2287         }
2288         *i_link_next = i_link;
2289 
2290         /*
2291          * add this link onto the tgt i_lnodes inbound i_link list
2292          */
2293         i_link_next = &(tgt_lnode->link_in);
2294         while (*i_link_next != NULL) {
2295                 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0);
2296                 i_link_next = &((*i_link_next)->tgt_link_next);
2297         }
2298         *i_link_next = i_link;
2299 
2300         /*
2301          * add this i_link to the link hash
2302          */
2303         res = mod_hash_insert(st->link_hash, i_link, i_link);
2304         ASSERT(res == 0);
2305         st->link_count++;
2306 
2307         return (LDI_USAGE_CONTINUE);
2308 }
2309 
2310 struct i_layer_data {
2311         struct di_state *st;
2312         int             lnode_count;
2313         int             link_count;
2314         di_off_t        lnode_off;
2315         di_off_t        link_off;
2316 };
2317 
2318 /*ARGSUSED*/
2319 static uint_t
2320 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2321 {
2322         i_link_t                *i_link  = (i_link_t *)key;
2323         struct i_layer_data     *data = arg;
2324         struct di_link          *me;
2325         struct di_lnode         *melnode;
2326         struct di_node          *medinode;
2327 
2328         ASSERT(i_link->self == 0);
2329 
2330         i_link->self = data->link_off +
2331             (data->link_count * sizeof (struct di_link));
2332         data->link_count++;
2333 
2334         ASSERT(data->link_off > 0 && data->link_count > 0);
2335         ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */
2336         ASSERT(data->link_count <= data->st->link_count);
2337 
2338         /* fill in fields for the di_link snapshot */
2339         me = DI_LINK(di_mem_addr(data->st, i_link->self));
2340         me->self = i_link->self;
2341         me->spec_type = i_link->spec_type;
2342 
2343         /*
2344          * The src_lnode and tgt_lnode i_lnode_t for this i_link_t
2345          * are created during the LDI table walk. Since we are
2346          * walking the link hash, the lnode hash has already been
2347          * walked and the lnodes have been snapshotted. Save lnode
2348          * offsets.
2349          */
2350         me->src_lnode = i_link->src_lnode->self;
2351         me->tgt_lnode = i_link->tgt_lnode->self;
2352 
2353         /*
2354          * Save this link's offset in the src_lnode snapshot's link_out
2355          * field
2356          */
2357         melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode));
2358         me->src_link_next = melnode->link_out;
2359         melnode->link_out = me->self;
2360 
2361         /*
2362          * Put this link on the tgt_lnode's link_in field
2363          */
2364         melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode));
2365         me->tgt_link_next = melnode->link_in;
2366         melnode->link_in = me->self;
2367 
2368         /*
2369          * An i_lnode_t is only created if the corresponding dip exists
2370          * in the snapshot. A pointer to the di_node is saved in the
2371          * i_lnode_t when it is allocated. For this link, get the di_node
2372          * for the source lnode. Then put the link on the di_node's list
2373          * of src links
2374          */
2375         medinode = i_link->src_lnode->di_node;
2376         me->src_node_next = medinode->src_links;
2377         medinode->src_links = me->self;
2378 
2379         /*
2380          * Put this link on the tgt_links list of the target
2381          * dip.
2382          */
2383         medinode = i_link->tgt_lnode->di_node;
2384         me->tgt_node_next = medinode->tgt_links;
2385         medinode->tgt_links = me->self;
2386 
2387         return (MH_WALK_CONTINUE);
2388 }
2389 
2390 /*ARGSUSED*/
2391 static uint_t
2392 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2393 {
2394         i_lnode_t               *i_lnode = (i_lnode_t *)key;
2395         struct i_layer_data     *data = arg;
2396         struct di_lnode         *me;
2397         struct di_node          *medinode;
2398 
2399         ASSERT(i_lnode->self == 0);
2400 
2401         i_lnode->self = data->lnode_off +
2402             (data->lnode_count * sizeof (struct di_lnode));
2403         data->lnode_count++;
2404 
2405         ASSERT(data->lnode_off > 0 && data->lnode_count > 0);
2406         ASSERT(data->link_count == 0); /* links not done yet */
2407         ASSERT(data->lnode_count <= data->st->lnode_count);
2408 
2409         /* fill in fields for the di_lnode snapshot */
2410         me = DI_LNODE(di_mem_addr(data->st, i_lnode->self));
2411         me->self = i_lnode->self;
2412 
2413         if (i_lnode->devt == DDI_DEV_T_NONE) {
2414                 me->dev_major = DDI_MAJOR_T_NONE;
2415                 me->dev_minor = DDI_MAJOR_T_NONE;
2416         } else {
2417                 me->dev_major = getmajor(i_lnode->devt);
2418                 me->dev_minor = getminor(i_lnode->devt);
2419         }
2420 
2421         /*
2422          * The dip corresponding to this lnode must exist in
2423          * the snapshot or we wouldn't have created the i_lnode_t
2424          * during LDI walk. Save the offset of the dip.
2425          */
2426         ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0);
2427         me->node = i_lnode->di_node->self;
2428 
2429         /*
2430          * There must be at least one link in or out of this lnode
2431          * or we wouldn't have created it. These fields will be set
2432          * during the link hash walk.
2433          */
2434         ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL));
2435 
2436         /*
2437          * set the offset of the devinfo node associated with this
2438          * lnode. Also update the node_next next pointer.  this pointer
2439          * is set if there are multiple lnodes associated with the same
2440          * devinfo node.  (could occure when multiple minor nodes
2441          * are open for one device, etc.)
2442          */
2443         medinode = i_lnode->di_node;
2444         me->node_next = medinode->lnodes;
2445         medinode->lnodes = me->self;
2446 
2447         return (MH_WALK_CONTINUE);
2448 }
2449 
2450 static di_off_t
2451 di_getlink_data(di_off_t off, struct di_state *st)
2452 {
2453         struct i_layer_data     data = {0};
2454         size_t                  size;
2455 
2456         dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off));
2457 
2458         st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32,
2459             mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free,
2460             i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP);
2461 
2462         st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32,
2463             (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t));
2464 
2465         /* get driver layering information */
2466         (void) ldi_usage_walker(st, di_ldi_callback);
2467 
2468         /* check if there is any link data to include in the snapshot */
2469         if (st->lnode_count == 0) {
2470                 ASSERT(st->link_count == 0);
2471                 goto out;
2472         }
2473 
2474         ASSERT(st->link_count != 0);
2475 
2476         /* get a pointer to snapshot memory for all the di_lnodes */
2477         size = sizeof (struct di_lnode) * st->lnode_count;
2478         data.lnode_off = off = di_checkmem(st, off, size);
2479         off += size;
2480 
2481         /* get a pointer to snapshot memory for all the di_links */
2482         size = sizeof (struct di_link) * st->link_count;
2483         data.link_off = off = di_checkmem(st, off, size);
2484         off += size;
2485 
2486         data.lnode_count = data.link_count = 0;
2487         data.st = st;
2488 
2489         /*
2490          * We have lnodes and links that will go into the
2491          * snapshot, so let's walk the respective hashes
2492          * and snapshot them. The various linkages are
2493          * also set up during the walk.
2494          */
2495         mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data);
2496         ASSERT(data.lnode_count == st->lnode_count);
2497 
2498         mod_hash_walk(st->link_hash, i_link_walker, (void *)&data);
2499         ASSERT(data.link_count == st->link_count);
2500 
2501 out:
2502         /* free up the i_lnodes and i_links used to create the snapshot */
2503         mod_hash_destroy_hash(st->lnode_hash);
2504         mod_hash_destroy_hash(st->link_hash);
2505         st->lnode_count = 0;
2506         st->link_count = 0;
2507 
2508         return (off);
2509 }
2510 
2511 
2512 /*
2513  * Copy all minor data nodes attached to a devinfo node into the snapshot.
2514  * It is called from di_copynode with active ndi_devi_enter to protect
2515  * the list of minor nodes.
2516  */
2517 static di_off_t
2518 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node,
2519         struct di_state *st)
2520 {
2521         di_off_t        off;
2522         struct di_minor *me;
2523         size_t          size;
2524 
2525         dcmn_err2((CE_CONT, "di_getmdata:\n"));
2526 
2527         /*
2528          * check memory first
2529          */
2530         off = di_checkmem(st, *off_p, sizeof (struct di_minor));
2531         *off_p = off;
2532 
2533         do {
2534                 me = DI_MINOR(di_mem_addr(st, off));
2535                 me->self = off;
2536                 me->type = mnode->type;
2537                 me->node = node;
2538                 me->user_private_data = NULL;
2539 
2540                 off += sizeof (struct di_minor);
2541 
2542                 /*
2543                  * Split dev_t to major/minor, so it works for
2544                  * both ILP32 and LP64 model
2545                  */
2546                 me->dev_major = getmajor(mnode->ddm_dev);
2547                 me->dev_minor = getminor(mnode->ddm_dev);
2548                 me->spec_type = mnode->ddm_spec_type;
2549 
2550                 if (mnode->ddm_name) {
2551                         size = strlen(mnode->ddm_name) + 1;
2552                         me->name = off = di_checkmem(st, off, size);
2553                         (void) strcpy(di_mem_addr(st, off), mnode->ddm_name);
2554                         off += size;
2555                 }
2556 
2557                 if (mnode->ddm_node_type) {
2558                         size = strlen(mnode->ddm_node_type) + 1;
2559                         me->node_type = off = di_checkmem(st, off, size);
2560                         (void) strcpy(di_mem_addr(st, off),
2561                             mnode->ddm_node_type);
2562                         off += size;
2563                 }
2564 
2565                 off = di_checkmem(st, off, sizeof (struct di_minor));
2566                 me->next = off;
2567                 mnode = mnode->next;
2568         } while (mnode);
2569 
2570         me->next = 0;
2571 
2572         return (off);
2573 }
2574 
2575 /*
2576  * di_register_dip(), di_find_dip(): The dip must be protected
2577  * from deallocation when using these routines - this can either
2578  * be a reference count, a busy hold or a per-driver lock.
2579  */
2580 
2581 static void
2582 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off)
2583 {
2584         struct dev_info *node = DEVI(dip);
2585         struct di_key   *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2586         struct di_dkey  *dk;
2587 
2588         ASSERT(dip);
2589         ASSERT(off > 0);
2590 
2591         key->k_type = DI_DKEY;
2592         dk = &(key->k_u.dkey);
2593 
2594         dk->dk_dip = dip;
2595         dk->dk_major = node->devi_major;
2596         dk->dk_inst = node->devi_instance;
2597         dk->dk_nodeid = node->devi_nodeid;
2598 
2599         if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key,
2600             (mod_hash_val_t)(uintptr_t)off) != 0) {
2601                 panic(
2602                     "duplicate devinfo (%p) registered during device "
2603                     "tree walk", (void *)dip);
2604         }
2605 }
2606 
2607 
2608 static int
2609 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p)
2610 {
2611         /*
2612          * uintptr_t must be used because it matches the size of void *;
2613          * mod_hash expects clients to place results into pointer-size
2614          * containers; since di_off_t is always a 32-bit offset, alignment
2615          * would otherwise be broken on 64-bit kernels.
2616          */
2617         uintptr_t       offset;
2618         struct          di_key key = {0};
2619         struct          di_dkey *dk;
2620 
2621         ASSERT(st->reg_dip_hash);
2622         ASSERT(dip);
2623         ASSERT(off_p);
2624 
2625 
2626         key.k_type = DI_DKEY;
2627         dk = &(key.k_u.dkey);
2628 
2629         dk->dk_dip = dip;
2630         dk->dk_major = DEVI(dip)->devi_major;
2631         dk->dk_inst = DEVI(dip)->devi_instance;
2632         dk->dk_nodeid = DEVI(dip)->devi_nodeid;
2633 
2634         if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key,
2635             (mod_hash_val_t *)&offset) == 0) {
2636                 *off_p = (di_off_t)offset;
2637                 return (0);
2638         } else {
2639                 return (-1);
2640         }
2641 }
2642 
2643 /*
2644  * di_register_pip(), di_find_pip(): The pip must be protected from deallocation
2645  * when using these routines. The caller must do this by protecting the
2646  * client(or phci)<->pip linkage while traversing the list and then holding the
2647  * pip when it is found in the list.
2648  */
2649 
2650 static void
2651 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off)
2652 {
2653         struct di_key   *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2654         char            *path_addr;
2655         struct di_pkey  *pk;
2656 
2657         ASSERT(pip);
2658         ASSERT(off > 0);
2659 
2660         key->k_type = DI_PKEY;
2661         pk = &(key->k_u.pkey);
2662 
2663         pk->pk_pip = pip;
2664         path_addr = mdi_pi_get_addr(pip);
2665         if (path_addr)
2666                 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP);
2667         pk->pk_client = mdi_pi_get_client(pip);
2668         pk->pk_phci = mdi_pi_get_phci(pip);
2669 
2670         if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key,
2671             (mod_hash_val_t)(uintptr_t)off) != 0) {
2672                 panic(
2673                     "duplicate pathinfo (%p) registered during device "
2674                     "tree walk", (void *)pip);
2675         }
2676 }
2677 
2678 /*
2679  * As with di_register_pip, the caller must hold or lock the pip
2680  */
2681 static int
2682 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p)
2683 {
2684         /*
2685          * uintptr_t must be used because it matches the size of void *;
2686          * mod_hash expects clients to place results into pointer-size
2687          * containers; since di_off_t is always a 32-bit offset, alignment
2688          * would otherwise be broken on 64-bit kernels.
2689          */
2690         uintptr_t       offset;
2691         struct di_key   key = {0};
2692         struct di_pkey  *pk;
2693 
2694         ASSERT(st->reg_pip_hash);
2695         ASSERT(off_p);
2696 
2697         if (pip == NULL) {
2698                 *off_p = 0;
2699                 return (0);
2700         }
2701 
2702         key.k_type = DI_PKEY;
2703         pk = &(key.k_u.pkey);
2704 
2705         pk->pk_pip = pip;
2706         pk->pk_path_addr = mdi_pi_get_addr(pip);
2707         pk->pk_client = mdi_pi_get_client(pip);
2708         pk->pk_phci = mdi_pi_get_phci(pip);
2709 
2710         if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key,
2711             (mod_hash_val_t *)&offset) == 0) {
2712                 *off_p = (di_off_t)offset;
2713                 return (0);
2714         } else {
2715                 return (-1);
2716         }
2717 }
2718 
2719 static di_path_state_t
2720 path_state_convert(mdi_pathinfo_state_t st)
2721 {
2722         switch (st) {
2723         case MDI_PATHINFO_STATE_ONLINE:
2724                 return (DI_PATH_STATE_ONLINE);
2725         case MDI_PATHINFO_STATE_STANDBY:
2726                 return (DI_PATH_STATE_STANDBY);
2727         case MDI_PATHINFO_STATE_OFFLINE:
2728                 return (DI_PATH_STATE_OFFLINE);
2729         case MDI_PATHINFO_STATE_FAULT:
2730                 return (DI_PATH_STATE_FAULT);
2731         default:
2732                 return (DI_PATH_STATE_UNKNOWN);
2733         }
2734 }
2735 
2736 static uint_t
2737 path_flags_convert(uint_t pi_path_flags)
2738 {
2739         uint_t  di_path_flags = 0;
2740 
2741         /* MDI_PATHINFO_FLAGS_HIDDEN nodes not in snapshot */
2742 
2743         if (pi_path_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED)
2744                 di_path_flags |= DI_PATH_FLAGS_DEVICE_REMOVED;
2745 
2746         return (di_path_flags);
2747 }
2748 
2749 
2750 static di_off_t
2751 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p,
2752     struct di_state *st)
2753 {
2754         nvpair_t                *prop = NULL;
2755         struct di_path_prop     *me;
2756         int                     off;
2757         size_t                  size;
2758         char                    *str;
2759         uchar_t                 *buf;
2760         uint_t                  nelems;
2761 
2762         off = *off_p;
2763         if (mdi_pi_get_next_prop(pip, NULL) == NULL) {
2764                 *off_p = 0;
2765                 return (off);
2766         }
2767 
2768         off = di_checkmem(st, off, sizeof (struct di_path_prop));
2769         *off_p = off;
2770 
2771         while (prop = mdi_pi_get_next_prop(pip, prop)) {
2772                 me = DI_PATHPROP(di_mem_addr(st, off));
2773                 me->self = off;
2774                 off += sizeof (struct di_path_prop);
2775 
2776                 /*
2777                  * property name
2778                  */
2779                 size = strlen(nvpair_name(prop)) + 1;
2780                 me->prop_name = off = di_checkmem(st, off, size);
2781                 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop));
2782                 off += size;
2783 
2784                 switch (nvpair_type(prop)) {
2785                 case DATA_TYPE_BYTE:
2786                 case DATA_TYPE_INT16:
2787                 case DATA_TYPE_UINT16:
2788                 case DATA_TYPE_INT32:
2789                 case DATA_TYPE_UINT32:
2790                         me->prop_type = DDI_PROP_TYPE_INT;
2791                         size = sizeof (int32_t);
2792                         off = di_checkmem(st, off, size);
2793                         (void) nvpair_value_int32(prop,
2794                             (int32_t *)di_mem_addr(st, off));
2795                         break;
2796 
2797                 case DATA_TYPE_INT64:
2798                 case DATA_TYPE_UINT64:
2799                         me->prop_type = DDI_PROP_TYPE_INT64;
2800                         size = sizeof (int64_t);
2801                         off = di_checkmem(st, off, size);
2802                         (void) nvpair_value_int64(prop,
2803                             (int64_t *)di_mem_addr(st, off));
2804                         break;
2805 
2806                 case DATA_TYPE_STRING:
2807                         me->prop_type = DDI_PROP_TYPE_STRING;
2808                         (void) nvpair_value_string(prop, &str);
2809                         size = strlen(str) + 1;
2810                         off = di_checkmem(st, off, size);
2811                         (void) strcpy(di_mem_addr(st, off), str);
2812                         break;
2813 
2814                 case DATA_TYPE_BYTE_ARRAY:
2815                 case DATA_TYPE_INT16_ARRAY:
2816                 case DATA_TYPE_UINT16_ARRAY:
2817                 case DATA_TYPE_INT32_ARRAY:
2818                 case DATA_TYPE_UINT32_ARRAY:
2819                 case DATA_TYPE_INT64_ARRAY:
2820                 case DATA_TYPE_UINT64_ARRAY:
2821                         me->prop_type = DDI_PROP_TYPE_BYTE;
2822                         (void) nvpair_value_byte_array(prop, &buf, &nelems);
2823                         size = nelems;
2824                         if (nelems != 0) {
2825                                 off = di_checkmem(st, off, size);
2826                                 bcopy(buf, di_mem_addr(st, off), size);
2827                         }
2828                         break;
2829 
2830                 default:        /* Unknown or unhandled type; skip it */
2831                         size = 0;
2832                         break;
2833                 }
2834 
2835                 if (size > 0) {
2836                         me->prop_data = off;
2837                 }
2838 
2839                 me->prop_len = (int)size;
2840                 off += size;
2841 
2842                 off = di_checkmem(st, off, sizeof (struct di_path_prop));
2843                 me->prop_next = off;
2844         }
2845 
2846         me->prop_next = 0;
2847         return (off);
2848 }
2849 
2850 
2851 static void
2852 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp,
2853     int get_client)
2854 {
2855         if (get_client) {
2856                 ASSERT(me->path_client == 0);
2857                 me->path_client = noff;
2858                 ASSERT(me->path_c_link == 0);
2859                 *off_pp = &me->path_c_link;
2860                 me->path_snap_state &=
2861                     ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK);
2862         } else {
2863                 ASSERT(me->path_phci == 0);
2864                 me->path_phci = noff;
2865                 ASSERT(me->path_p_link == 0);
2866                 *off_pp = &me->path_p_link;
2867                 me->path_snap_state &=
2868                     ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK);
2869         }
2870 }
2871 
2872 /*
2873  * off_p: pointer to the linkage field. This links pips along the client|phci
2874  *         linkage list.
2875  * noff  : Offset for the endpoint dip snapshot.
2876  */
2877 static di_off_t
2878 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff,
2879     struct di_state *st, int get_client)
2880 {
2881         di_off_t        off;
2882         mdi_pathinfo_t  *pip;
2883         struct di_path  *me;
2884         mdi_pathinfo_t  *(*next_pip)(dev_info_t *, mdi_pathinfo_t *);
2885         size_t          size;
2886 
2887         dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client));
2888 
2889         /*
2890          * The naming of the following mdi_xyz() is unfortunately
2891          * non-intuitive. mdi_get_next_phci_path() follows the
2892          * client_link i.e. the list of pip's belonging to the
2893          * given client dip.
2894          */
2895         if (get_client)
2896                 next_pip = &mdi_get_next_phci_path;
2897         else
2898                 next_pip = &mdi_get_next_client_path;
2899 
2900         off = *off_p;
2901 
2902         pip = NULL;
2903         while (pip = (*next_pip)(dip, pip)) {
2904                 di_off_t stored_offset;
2905 
2906                 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip));
2907 
2908                 mdi_pi_lock(pip);
2909 
2910                 /* We don't represent hidden paths in the snapshot */
2911                 if (mdi_pi_ishidden(pip)) {
2912                         dcmn_err((CE_WARN, "hidden, skip"));
2913                         mdi_pi_unlock(pip);
2914                         continue;
2915                 }
2916 
2917                 if (di_pip_find(st, pip, &stored_offset) != -1) {
2918                         /*
2919                          * We've already seen this pathinfo node so we need to
2920                          * take care not to snap it again; However, one endpoint
2921                          * and linkage will be set here. The other endpoint
2922                          * and linkage has already been set when the pip was
2923                          * first snapshotted i.e. when the other endpoint dip
2924                          * was snapshotted.
2925                          */
2926                         me = DI_PATH(di_mem_addr(st, stored_offset));
2927                         *off_p = stored_offset;
2928 
2929                         di_path_one_endpoint(me, noff, &off_p, get_client);
2930 
2931                         /*
2932                          * The other endpoint and linkage were set when this
2933                          * pip was snapshotted. So we are done with both
2934                          * endpoints and linkages.
2935                          */
2936                         ASSERT(!(me->path_snap_state &
2937                             (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI)));
2938                         ASSERT(!(me->path_snap_state &
2939                             (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK)));
2940 
2941                         mdi_pi_unlock(pip);
2942                         continue;
2943                 }
2944 
2945                 /*
2946                  * Now that we need to snapshot this pip, check memory
2947                  */
2948                 size = sizeof (struct di_path);
2949                 *off_p = off = di_checkmem(st, off, size);
2950                 me = DI_PATH(di_mem_addr(st, off));
2951                 me->self = off;
2952                 off += size;
2953 
2954                 me->path_snap_state =
2955                     DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK;
2956                 me->path_snap_state |=
2957                     DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI;
2958 
2959                 /*
2960                  * Zero out fields as di_checkmem() doesn't guarantee
2961                  * zero-filled memory
2962                  */
2963                 me->path_client = me->path_phci = 0;
2964                 me->path_c_link = me->path_p_link = 0;
2965 
2966                 di_path_one_endpoint(me, noff, &off_p, get_client);
2967 
2968                 /*
2969                  * Note the existence of this pathinfo
2970                  */
2971                 di_register_pip(st, pip, me->self);
2972 
2973                 me->path_state = path_state_convert(mdi_pi_get_state(pip));
2974                 me->path_flags = path_flags_convert(mdi_pi_get_flags(pip));
2975 
2976                 me->path_instance = mdi_pi_get_path_instance(pip);
2977 
2978                 /*
2979                  * Get intermediate addressing info.
2980                  */
2981                 size = strlen(mdi_pi_get_addr(pip)) + 1;
2982                 me->path_addr = off = di_checkmem(st, off, size);
2983                 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip));
2984                 off += size;
2985 
2986                 /*
2987                  * Get path properties if props are to be included in the
2988                  * snapshot
2989                  */
2990                 if (DINFOPROP & st->command) {
2991                         me->path_prop = off;
2992                         off = di_path_getprop(pip, &me->path_prop, st);
2993                 } else {
2994                         me->path_prop = 0;
2995                 }
2996 
2997                 mdi_pi_unlock(pip);
2998         }
2999 
3000         *off_p = 0;
3001         return (off);
3002 }
3003 
3004 /*
3005  * Return driver prop_op entry point for the specified devinfo node.
3006  *
3007  * To return a non-NULL value:
3008  * - driver must be attached and held:
3009  *   If driver is not attached we ignore the driver property list.
3010  *   No one should rely on such properties.
3011  * - driver "cb_prop_op != ddi_prop_op":
3012  *   If "cb_prop_op == ddi_prop_op", framework does not need to call driver.
3013  *   XXX or parent's bus_prop_op != ddi_bus_prop_op
3014  */
3015 static int
3016 (*di_getprop_prop_op(struct dev_info *dip))
3017         (dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *)
3018 {
3019         struct dev_ops  *ops;
3020 
3021         /* If driver is not attached we ignore the driver property list. */
3022         if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip))
3023                 return (NULL);
3024 
3025         /*
3026          * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev,
3027          * or even NULL.
3028          */
3029         ops = dip->devi_ops;
3030         if (ops && ops->devo_cb_ops &&
3031             (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) &&
3032             (ops->devo_cb_ops->cb_prop_op != nodev) &&
3033             (ops->devo_cb_ops->cb_prop_op != nulldev) &&
3034             (ops->devo_cb_ops->cb_prop_op != NULL))
3035                 return (ops->devo_cb_ops->cb_prop_op);
3036         return (NULL);
3037 }
3038 
3039 static di_off_t
3040 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip,
3041     int (*prop_op)(),
3042     char *name, dev_t devt, int aflags, int alen, caddr_t aval,
3043     di_off_t off, di_off_t **off_pp)
3044 {
3045         int             need_free = 0;
3046         dev_t           pdevt;
3047         int             pflags;
3048         int             rv;
3049         caddr_t         val;
3050         int             len;
3051         size_t          size;
3052         struct di_prop  *pp;
3053 
3054         /* If we have prop_op function, ask driver for latest value */
3055         if (prop_op) {
3056                 ASSERT(dip);
3057 
3058                 /* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */
3059                 pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt;
3060 
3061                 /*
3062                  * We have type information in flags, but are invoking an
3063                  * old non-typed prop_op(9E) interface. Since not all types are
3064                  * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64),
3065                  * we set DDI_PROP_CONSUMER_TYPED - causing the framework to
3066                  * expand type bits beyond DDI_PROP_TYPE_ANY.  This allows us
3067                  * to use the legacy prop_op(9E) interface to obtain updates
3068                  * non-DDI_PROP_TYPE_ANY dynamic properties.
3069                  */
3070                 pflags = aflags & ~DDI_PROP_TYPE_MASK;
3071                 pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM |
3072                     DDI_PROP_CONSUMER_TYPED;
3073 
3074                 /*
3075                  * Hold and exit across prop_op(9E) to avoid lock order
3076                  * issues between
3077                  *   [ndi_devi_enter() ..prop_op(9E).. driver-lock]
3078                  * .vs.
3079                  *   [..ioctl(9E).. driver-lock ..ddi_remove_minor_node(9F)..
3080                  *   ndi_devi_enter()]
3081                  * ordering.
3082                  */
3083                 ndi_hold_devi((dev_info_t *)dip);
3084                 ndi_devi_exit((dev_info_t *)dip, dip->devi_circular);
3085                 rv = (*prop_op)(pdevt, (dev_info_t *)dip,
3086                     PROP_LEN_AND_VAL_ALLOC, pflags, name, &val, &len);
3087                 ndi_devi_enter((dev_info_t *)dip, &dip->devi_circular);
3088                 ndi_rele_devi((dev_info_t *)dip);
3089 
3090                 if (rv == DDI_PROP_SUCCESS) {
3091                         need_free = 1;          /* dynamic prop obtained */
3092                 } else if (dyn) {
3093                         /*
3094                          * A dynamic property must succeed prop_op(9E) to show
3095                          * up in the snapshot - that is the only source of its
3096                          * value.
3097                          */
3098                         return (off);           /* dynamic prop not supported */
3099                 } else {
3100                         /*
3101                          * In case calling the driver caused an update off
3102                          * prop_op(9E) of a non-dynamic property (code leading
3103                          * to ddi_prop_change), we defer picking up val and
3104                          * len informatiojn until after prop_op(9E) to ensure
3105                          * that we snapshot the latest value.
3106                          */
3107                         val = aval;
3108                         len = alen;
3109 
3110                 }
3111         } else {
3112                 val = aval;
3113                 len = alen;
3114         }
3115 
3116         dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n",
3117             list, name ? name : "NULL", len, (void *)val));
3118 
3119         size = sizeof (struct di_prop);
3120         **off_pp = off = di_checkmem(st, off, size);
3121         pp = DI_PROP(di_mem_addr(st, off));
3122         pp->self = off;
3123         off += size;
3124 
3125         pp->dev_major = getmajor(devt);
3126         pp->dev_minor = getminor(devt);
3127         pp->prop_flags = aflags;
3128         pp->prop_list = list;
3129 
3130         /* property name */
3131         if (name) {
3132                 size = strlen(name) + 1;
3133                 pp->prop_name = off = di_checkmem(st, off, size);
3134                 (void) strcpy(di_mem_addr(st, off), name);
3135                 off += size;
3136         } else {
3137                 pp->prop_name = -1;
3138         }
3139 
3140         pp->prop_len = len;
3141         if (val == NULL) {
3142                 pp->prop_data = -1;
3143         } else if (len != 0) {
3144                 size = len;
3145                 pp->prop_data = off = di_checkmem(st, off, size);
3146                 bcopy(val, di_mem_addr(st, off), size);
3147                 off += size;
3148         }
3149 
3150         pp->next = 0;                        /* assume tail for now */
3151         *off_pp = &pp->next;             /* return pointer to our next */
3152 
3153         if (need_free)                  /* free PROP_LEN_AND_VAL_ALLOC alloc */
3154                 kmem_free(val, len);
3155         return (off);
3156 }
3157 
3158 
3159 /*
3160  * Copy a list of properties attached to a devinfo node. Called from
3161  * di_copynode with active ndi_devi_enter. The major number is passed in case
3162  * we need to call driver's prop_op entry. The value of list indicates
3163  * which list we are copying. Possible values are:
3164  * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST
3165  */
3166 static di_off_t
3167 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p,
3168     struct di_state *st, struct dev_info *dip)
3169 {
3170         struct ddi_prop         *prop;
3171         int                     (*prop_op)();
3172         int                     off;
3173         struct ddi_minor_data   *mn;
3174         i_ddi_prop_dyn_t        *dp;
3175         struct plist {
3176                 struct plist    *pl_next;
3177                 char            *pl_name;
3178                 int             pl_flags;
3179                 dev_t           pl_dev;
3180                 int             pl_len;
3181                 caddr_t         pl_val;
3182         }                       *pl, *pl0, **plp;
3183 
3184         ASSERT(st != NULL);
3185 
3186         off = *off_p;
3187         *off_p = 0;
3188         dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n",
3189             list, (void *)*pprop));
3190 
3191         /* get pointer to driver's prop_op(9E) implementation if DRV_LIST */
3192         prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL;
3193 
3194         /*
3195          * Form private list of properties, holding devi_lock for properties
3196          * that hang off the dip.
3197          */
3198         if (dip)
3199                 mutex_enter(&(dip->devi_lock));
3200         for (pl0 = NULL, plp = &pl0, prop = *pprop;
3201             prop; plp = &pl->pl_next, prop = prop->prop_next) {
3202                 pl = kmem_alloc(sizeof (*pl), KM_SLEEP);
3203                 *plp = pl;
3204                 pl->pl_next = NULL;
3205                 if (prop->prop_name)
3206                         pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP);
3207                 else
3208                         pl->pl_name = NULL;
3209                 pl->pl_flags = prop->prop_flags;
3210                 pl->pl_dev = prop->prop_dev;
3211                 if (prop->prop_len) {
3212                         pl->pl_len = prop->prop_len;
3213                         pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP);
3214                         bcopy(prop->prop_val, pl->pl_val, pl->pl_len);
3215                 } else {
3216                         pl->pl_len = 0;
3217                         pl->pl_val = NULL;
3218                 }
3219         }
3220         if (dip)
3221                 mutex_exit(&(dip->devi_lock));
3222 
3223         /*
3224          * Now that we have dropped devi_lock, perform a second-pass to
3225          * add properties to the snapshot.  We do this as a second pass
3226          * because we may need to call prop_op(9E) and we can't hold
3227          * devi_lock across that call.
3228          */
3229         for (pl = pl0; pl; pl = pl0) {
3230                 pl0 = pl->pl_next;
3231                 off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name,
3232                     pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val,
3233                     off, &off_p);
3234                 if (pl->pl_val)
3235                         kmem_free(pl->pl_val, pl->pl_len);
3236                 if (pl->pl_name)
3237                         kmem_free(pl->pl_name, strlen(pl->pl_name) + 1);
3238                 kmem_free(pl, sizeof (*pl));
3239         }
3240 
3241         /*
3242          * If there is no prop_op or dynamic property support has been
3243          * disabled, we are done.
3244          */
3245         if ((prop_op == NULL) || (di_prop_dyn == 0)) {
3246                 *off_p = 0;
3247                 return (off);
3248         }
3249 
3250         /* Add dynamic driver properties to snapshot */
3251         for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip);
3252             dp && dp->dp_name; dp++) {
3253                 if (dp->dp_spec_type) {
3254                         /* if spec_type, property of matching minor */
3255                         ASSERT(DEVI_BUSY_OWNED(dip));
3256                         for (mn = dip->devi_minor; mn; mn = mn->next) {
3257                                 if (mn->ddm_spec_type != dp->dp_spec_type)
3258                                         continue;
3259                                 off = di_getprop_add(list, 1, st, dip, prop_op,
3260                                     dp->dp_name, mn->ddm_dev, dp->dp_type,
3261                                     0, NULL, off, &off_p);
3262                         }
3263                 } else {
3264                         /* property of devinfo node */
3265                         off = di_getprop_add(list, 1, st, dip, prop_op,
3266                             dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3267                             0, NULL, off, &off_p);
3268                 }
3269         }
3270 
3271         /* Add dynamic parent properties to snapshot */
3272         for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip);
3273             dp && dp->dp_name; dp++) {
3274                 if (dp->dp_spec_type) {
3275                         /* if spec_type, property of matching minor */
3276                         ASSERT(DEVI_BUSY_OWNED(dip));
3277                         for (mn = dip->devi_minor; mn; mn = mn->next) {
3278                                 if (mn->ddm_spec_type != dp->dp_spec_type)
3279                                         continue;
3280                                 off = di_getprop_add(list, 1, st, dip, prop_op,
3281                                     dp->dp_name, mn->ddm_dev, dp->dp_type,
3282                                     0, NULL, off, &off_p);
3283                         }
3284                 } else {
3285                         /* property of devinfo node */
3286                         off = di_getprop_add(list, 1, st, dip, prop_op,
3287                             dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3288                             0, NULL, off, &off_p);
3289                 }
3290         }
3291 
3292         *off_p = 0;
3293         return (off);
3294 }
3295 
3296 /*
3297  * find private data format attached to a dip
3298  * parent = 1 to match driver name of parent dip (for parent private data)
3299  *      0 to match driver name of current dip (for driver private data)
3300  */
3301 #define DI_MATCH_DRIVER 0
3302 #define DI_MATCH_PARENT 1
3303 
3304 struct di_priv_format *
3305 di_match_drv_name(struct dev_info *node, struct di_state *st, int match)
3306 {
3307         int                     i, count, len;
3308         char                    *drv_name;
3309         major_t                 major;
3310         struct di_all           *all;
3311         struct di_priv_format   *form;
3312 
3313         dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n",
3314             node->devi_node_name, match));
3315 
3316         if (match == DI_MATCH_PARENT) {
3317                 node = DEVI(node->devi_parent);
3318         }
3319 
3320         if (node == NULL) {
3321                 return (NULL);
3322         }
3323 
3324         major = node->devi_major;
3325         if (major == (major_t)(-1)) {
3326                 return (NULL);
3327         }
3328 
3329         /*
3330          * Match the driver name.
3331          */
3332         drv_name = ddi_major_to_name(major);
3333         if ((drv_name == NULL) || *drv_name == '\0') {
3334                 return (NULL);
3335         }
3336 
3337         /* Now get the di_priv_format array */
3338         all = DI_ALL_PTR(st);
3339         if (match == DI_MATCH_PARENT) {
3340                 count = all->n_ppdata;
3341                 form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format));
3342         } else {
3343                 count = all->n_dpdata;
3344                 form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format));
3345         }
3346 
3347         len = strlen(drv_name);
3348         for (i = 0; i < count; i++) {
3349                 char *tmp;
3350 
3351                 tmp = form[i].drv_name;
3352                 while (tmp && (*tmp != '\0')) {
3353                         if (strncmp(drv_name, tmp, len) == 0) {
3354                                 return (&form[i]);
3355                         }
3356                         /*
3357                          * Move to next driver name, skipping a white space
3358                          */
3359                         if (tmp = strchr(tmp, ' ')) {
3360                                 tmp++;
3361                         }
3362                 }
3363         }
3364 
3365         return (NULL);
3366 }
3367 
3368 /*
3369  * The following functions copy data as specified by the format passed in.
3370  * To prevent invalid format from panicing the system, we call on_fault().
3371  * A return value of 0 indicates an error. Otherwise, the total offset
3372  * is returned.
3373  */
3374 #define DI_MAX_PRIVDATA (PAGESIZE >> 1)   /* max private data size */
3375 
3376 static di_off_t
3377 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node,
3378     void *data, di_off_t *off_p, struct di_state *st)
3379 {
3380         caddr_t         pa;
3381         void            *ptr;
3382         int             i, size, repeat;
3383         di_off_t        off, off0, *tmp;
3384         char            *path;
3385         label_t         ljb;
3386 
3387         dcmn_err2((CE_CONT, "di_getprvdata:\n"));
3388 
3389         /*
3390          * check memory availability. Private data size is
3391          * limited to DI_MAX_PRIVDATA.
3392          */
3393         off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA);
3394         *off_p = off;
3395 
3396         if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) {
3397                 goto failure;
3398         }
3399 
3400         if (!on_fault(&ljb)) {
3401                 /* copy the struct */
3402                 bcopy(data, di_mem_addr(st, off), pdp->bytes);
3403                 off0 = DI_ALIGN(pdp->bytes); /* XXX remove DI_ALIGN */
3404 
3405                 /* dereferencing pointers */
3406                 for (i = 0; i < MAX_PTR_IN_PRV; i++) {
3407 
3408                         if (pdp->ptr[i].size == 0) {
3409                                 goto success;   /* no more ptrs */
3410                         }
3411 
3412                         /*
3413                          * first, get the pointer content
3414                          */
3415                         if ((pdp->ptr[i].offset < 0) ||
3416                             (pdp->ptr[i].offset > pdp->bytes - sizeof (char *)))
3417                                 goto failure;   /* wrong offset */
3418 
3419                         pa = di_mem_addr(st, off + pdp->ptr[i].offset);
3420 
3421                         /* save a tmp ptr to store off_t later */
3422                         tmp = (di_off_t *)(intptr_t)pa;
3423 
3424                         /* get pointer value, if NULL continue */
3425                         ptr = *((void **) (intptr_t)pa);
3426                         if (ptr == NULL) {
3427                                 continue;
3428                         }
3429 
3430                         /*
3431                          * next, find the repeat count (array dimension)
3432                          */
3433                         repeat = pdp->ptr[i].len_offset;
3434 
3435                         /*
3436                          * Positive value indicates a fixed sized array.
3437                          * 0 or negative value indicates variable sized array.
3438                          *
3439                          * For variable sized array, the variable must be
3440                          * an int member of the structure, with an offset
3441                          * equal to the absolution value of struct member.
3442                          */
3443                         if (repeat > pdp->bytes - sizeof (int)) {
3444                                 goto failure;   /* wrong offset */
3445                         }
3446 
3447                         if (repeat >= 0) {
3448                                 repeat = *((int *)
3449                                     (intptr_t)((caddr_t)data + repeat));
3450                         } else {
3451                                 repeat = -repeat;
3452                         }
3453 
3454                         /*
3455                          * next, get the size of the object to be copied
3456                          */
3457                         size = pdp->ptr[i].size * repeat;
3458 
3459                         /*
3460                          * Arbitrarily limit the total size of object to be
3461                          * copied (1 byte to 1/4 page).
3462                          */
3463                         if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) {
3464                                 goto failure;   /* wrong size or too big */
3465                         }
3466 
3467                         /*
3468                          * Now copy the data
3469                          */
3470                         *tmp = off0;
3471                         bcopy(ptr, di_mem_addr(st, off + off0), size);
3472                         off0 += DI_ALIGN(size); /* XXX remove DI_ALIGN */
3473                 }
3474         } else {
3475                 goto failure;
3476         }
3477 
3478 success:
3479         /*
3480          * success if reached here
3481          */
3482         no_fault();
3483         return (off + off0);
3484         /*NOTREACHED*/
3485 
3486 failure:
3487         /*
3488          * fault occurred
3489          */
3490         no_fault();
3491         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3492         cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p",
3493             ddi_pathname((dev_info_t *)node, path), data);
3494         kmem_free(path, MAXPATHLEN);
3495         *off_p = -1;    /* set private data to indicate error */
3496 
3497         return (off);
3498 }
3499 
3500 /*
3501  * get parent private data; on error, returns original offset
3502  */
3503 static di_off_t
3504 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3505 {
3506         int                     off;
3507         struct di_priv_format   *ppdp;
3508 
3509         dcmn_err2((CE_CONT, "di_getppdata:\n"));
3510 
3511         /* find the parent data format */
3512         if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) {
3513                 off = *off_p;
3514                 *off_p = 0;     /* set parent data to none */
3515                 return (off);
3516         }
3517 
3518         return (di_getprvdata(ppdp, node,
3519             ddi_get_parent_data((dev_info_t *)node), off_p, st));
3520 }
3521 
3522 /*
3523  * get parent private data; returns original offset
3524  */
3525 static di_off_t
3526 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3527 {
3528         int                     off;
3529         struct di_priv_format   *dpdp;
3530 
3531         dcmn_err2((CE_CONT, "di_getdpdata:"));
3532 
3533         /* find the parent data format */
3534         if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) {
3535                 off = *off_p;
3536                 *off_p = 0;     /* set driver data to none */
3537                 return (off);
3538         }
3539 
3540         return (di_getprvdata(dpdp, node,
3541             ddi_get_driver_private((dev_info_t *)node), off_p, st));
3542 }
3543 
3544 /*
3545  * Copy hotplug data associated with a devinfo node into the snapshot.
3546  */
3547 static di_off_t
3548 di_gethpdata(ddi_hp_cn_handle_t *hp_hdl, di_off_t *off_p,
3549     struct di_state *st)
3550 {
3551         struct i_hp     *hp;
3552         struct di_hp    *me;
3553         size_t          size;
3554         di_off_t        off;
3555 
3556         dcmn_err2((CE_CONT, "di_gethpdata:\n"));
3557 
3558         /*
3559          * check memory first
3560          */
3561         off = di_checkmem(st, *off_p, sizeof (struct di_hp));
3562         *off_p = off;
3563 
3564         do {
3565                 me = DI_HP(di_mem_addr(st, off));
3566                 me->self = off;
3567                 me->hp_name = 0;
3568                 me->hp_connection = (int)hp_hdl->cn_info.cn_num;
3569                 me->hp_depends_on = (int)hp_hdl->cn_info.cn_num_dpd_on;
3570                 (void) ddihp_cn_getstate(hp_hdl);
3571                 me->hp_state = (int)hp_hdl->cn_info.cn_state;
3572                 me->hp_type = (int)hp_hdl->cn_info.cn_type;
3573                 me->hp_type_str = 0;
3574                 me->hp_last_change = (uint32_t)hp_hdl->cn_info.cn_last_change;
3575                 me->hp_child = 0;
3576 
3577                 /*
3578                  * Child links are resolved later by di_hotplug_children().
3579                  * Store a reference to this di_hp_t in the list used later
3580                  * by di_hotplug_children().
3581                  */
3582                 hp = kmem_zalloc(sizeof (i_hp_t), KM_SLEEP);
3583                 hp->hp_off = off;
3584                 hp->hp_child = hp_hdl->cn_info.cn_child;
3585                 list_insert_tail(&st->hp_list, hp);
3586 
3587                 off += sizeof (struct di_hp);
3588 
3589                 /* Add name of this di_hp_t to the snapshot */
3590                 if (hp_hdl->cn_info.cn_name) {
3591                         size = strlen(hp_hdl->cn_info.cn_name) + 1;
3592                         me->hp_name = off = di_checkmem(st, off, size);
3593                         (void) strcpy(di_mem_addr(st, off),
3594                             hp_hdl->cn_info.cn_name);
3595                         off += size;
3596                 }
3597 
3598                 /* Add type description of this di_hp_t to the snapshot */
3599                 if (hp_hdl->cn_info.cn_type_str) {
3600                         size = strlen(hp_hdl->cn_info.cn_type_str) + 1;
3601                         me->hp_type_str = off = di_checkmem(st, off, size);
3602                         (void) strcpy(di_mem_addr(st, off),
3603                             hp_hdl->cn_info.cn_type_str);
3604                         off += size;
3605                 }
3606 
3607                 /*
3608                  * Set link to next in the chain of di_hp_t nodes,
3609                  * or terminate the chain when processing the last node.
3610                  */
3611                 if (hp_hdl->next != NULL) {
3612                         off = di_checkmem(st, off, sizeof (struct di_hp));
3613                         me->next = off;
3614                 } else {
3615                         me->next = 0;
3616                 }
3617 
3618                 /* Update pointer to next in the chain */
3619                 hp_hdl = hp_hdl->next;
3620 
3621         } while (hp_hdl);
3622 
3623         return (off);
3624 }
3625 
3626 /*
3627  * The driver is stateful across DINFOCPYALL and DINFOUSRLD.
3628  * This function encapsulates the state machine:
3629  *
3630  *      -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY ->
3631  *      |               SNAPSHOT                USRLD    |
3632  *      --------------------------------------------------
3633  *
3634  * Returns 0 on success and -1 on failure
3635  */
3636 static int
3637 di_setstate(struct di_state *st, int new_state)
3638 {
3639         int     ret = 0;
3640 
3641         mutex_enter(&di_lock);
3642         switch (new_state) {
3643         case IOC_IDLE:
3644         case IOC_DONE:
3645                 break;
3646         case IOC_SNAP:
3647                 if (st->di_iocstate != IOC_IDLE)
3648                         ret = -1;
3649                 break;
3650         case IOC_COPY:
3651                 if (st->di_iocstate != IOC_DONE)
3652                         ret = -1;
3653                 break;
3654         default:
3655                 ret = -1;
3656         }
3657 
3658         if (ret == 0)
3659                 st->di_iocstate = new_state;
3660         else
3661                 cmn_err(CE_NOTE, "incorrect state transition from %d to %d",
3662                     st->di_iocstate, new_state);
3663         mutex_exit(&di_lock);
3664         return (ret);
3665 }
3666 
3667 /*
3668  * We cannot assume the presence of the entire
3669  * snapshot in this routine. All we are guaranteed
3670  * is the di_all struct + 1 byte (for root_path)
3671  */
3672 static int
3673 header_plus_one_ok(struct di_all *all)
3674 {
3675         /*
3676          * Refuse to read old versions
3677          */
3678         if (all->version != DI_SNAPSHOT_VERSION) {
3679                 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version));
3680                 return (0);
3681         }
3682 
3683         if (all->cache_magic != DI_CACHE_MAGIC) {
3684                 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic));
3685                 return (0);
3686         }
3687 
3688         if (all->snapshot_time == 0) {
3689                 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time));
3690                 return (0);
3691         }
3692 
3693         if (all->top_devinfo == 0) {
3694                 CACHE_DEBUG((DI_ERR, "NULL top devinfo"));
3695                 return (0);
3696         }
3697 
3698         if (all->map_size < sizeof (*all) + 1) {
3699                 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size));
3700                 return (0);
3701         }
3702 
3703         if (all->root_path[0] != '/' || all->root_path[1] != '\0') {
3704                 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c",
3705                     all->root_path[0], all->root_path[1]));
3706                 return (0);
3707         }
3708 
3709         /*
3710          * We can't check checksum here as we just have the header
3711          */
3712 
3713         return (1);
3714 }
3715 
3716 static int
3717 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len)
3718 {
3719         rlim64_t        rlimit;
3720         ssize_t         resid;
3721         int             error = 0;
3722 
3723 
3724         rlimit = RLIM64_INFINITY;
3725 
3726         while (len) {
3727                 resid = 0;
3728                 error = vn_rdwr(UIO_WRITE, vp, buf, len, off,
3729                     UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
3730 
3731                 if (error || resid < 0) {
3732                         error = error ? error : EIO;
3733                         CACHE_DEBUG((DI_ERR, "write error: %d", error));
3734                         break;
3735                 }
3736 
3737                 /*
3738                  * Check if we are making progress
3739                  */
3740                 if (resid >= len) {
3741                         error = ENOSPC;
3742                         break;
3743                 }
3744                 buf += len - resid;
3745                 off += len - resid;
3746                 len = resid;
3747         }
3748 
3749         return (error);
3750 }
3751 
3752 static void
3753 di_cache_write(struct di_cache *cache)
3754 {
3755         struct di_all   *all;
3756         struct vnode    *vp;
3757         int             oflags;
3758         size_t          map_size;
3759         size_t          chunk;
3760         offset_t        off;
3761         int             error;
3762         char            *buf;
3763 
3764         ASSERT(DI_CACHE_LOCKED(*cache));
3765         ASSERT(!servicing_interrupt());
3766 
3767         if (cache->cache_size == 0) {
3768                 ASSERT(cache->cache_data == NULL);
3769                 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write"));
3770                 return;
3771         }
3772 
3773         ASSERT(cache->cache_size > 0);
3774         ASSERT(cache->cache_data);
3775 
3776         if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) {
3777                 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write"));
3778                 return;
3779         }
3780 
3781         all = (struct di_all *)cache->cache_data;
3782 
3783         if (!header_plus_one_ok(all)) {
3784                 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write"));
3785                 return;
3786         }
3787 
3788         ASSERT(strcmp(all->root_path, "/") == 0);
3789 
3790         /*
3791          * The cache_size is the total allocated memory for the cache.
3792          * The map_size is the actual size of valid data in the cache.
3793          * map_size may be smaller than cache_size but cannot exceed
3794          * cache_size.
3795          */
3796         if (all->map_size > cache->cache_size) {
3797                 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)."
3798                     " Skipping write", all->map_size, cache->cache_size));
3799                 return;
3800         }
3801 
3802         /*
3803          * First unlink the temp file
3804          */
3805         error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE);
3806         if (error && error != ENOENT) {
3807                 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d",
3808                     DI_CACHE_TEMP, error));
3809         }
3810 
3811         if (error == EROFS) {
3812                 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write"));
3813                 return;
3814         }
3815 
3816         vp = NULL;
3817         oflags = (FCREAT|FWRITE);
3818         if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags,
3819             DI_CACHE_PERMS, &vp, CRCREAT, 0)) {
3820                 CACHE_DEBUG((DI_ERR, "%s: create failed: %d",
3821                     DI_CACHE_TEMP, error));
3822                 return;
3823         }
3824 
3825         ASSERT(vp);
3826 
3827         /*
3828          * Paranoid: Check if the file is on a read-only FS
3829          */
3830         if (vn_is_readonly(vp)) {
3831                 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS"));
3832                 goto fail;
3833         }
3834 
3835         /*
3836          * Note that we only write map_size bytes to disk - this saves
3837          * space as the actual cache size may be larger than size of
3838          * valid data in the cache.
3839          * Another advantage is that it makes verification of size
3840          * easier when the file is read later.
3841          */
3842         map_size = all->map_size;
3843         off = 0;
3844         buf = cache->cache_data;
3845 
3846         while (map_size) {
3847                 ASSERT(map_size > 0);
3848                 /*
3849                  * Write in chunks so that VM system
3850                  * is not overwhelmed
3851                  */
3852                 if (map_size > di_chunk * PAGESIZE)
3853                         chunk = di_chunk * PAGESIZE;
3854                 else
3855                         chunk = map_size;
3856 
3857                 error = chunk_write(vp, off, buf, chunk);
3858                 if (error) {
3859                         CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d",
3860                             off, error));
3861                         goto fail;
3862                 }
3863 
3864                 off += chunk;
3865                 buf += chunk;
3866                 map_size -= chunk;
3867 
3868                 /* If low on memory, give pageout a chance to run */
3869                 if (freemem < desfree)
3870                         delay(1);
3871         }
3872 
3873         /*
3874          * Now sync the file and close it
3875          */
3876         if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) {
3877                 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error));
3878         }
3879 
3880         if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) {
3881                 CACHE_DEBUG((DI_ERR, "close() failed: %d", error));
3882                 VN_RELE(vp);
3883                 return;
3884         }
3885 
3886         VN_RELE(vp);
3887 
3888         /*
3889          * Now do the rename
3890          */
3891         if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) {
3892                 CACHE_DEBUG((DI_ERR, "rename failed: %d", error));
3893                 return;
3894         }
3895 
3896         CACHE_DEBUG((DI_INFO, "Cache write successful."));
3897 
3898         return;
3899 
3900 fail:
3901         (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL);
3902         VN_RELE(vp);
3903 }
3904 
3905 
3906 /*
3907  * Since we could be called early in boot,
3908  * use kobj_read_file()
3909  */
3910 static void
3911 di_cache_read(struct di_cache *cache)
3912 {
3913         struct _buf     *file;
3914         struct di_all   *all;
3915         int             n;
3916         size_t          map_size, sz, chunk;
3917         offset_t        off;
3918         caddr_t         buf;
3919         uint32_t        saved_crc, crc;
3920 
3921         ASSERT(modrootloaded);
3922         ASSERT(DI_CACHE_LOCKED(*cache));
3923         ASSERT(cache->cache_data == NULL);
3924         ASSERT(cache->cache_size == 0);
3925         ASSERT(!servicing_interrupt());
3926 
3927         file = kobj_open_file(DI_CACHE_FILE);
3928         if (file == (struct _buf *)-1) {
3929                 CACHE_DEBUG((DI_ERR, "%s: open failed: %d",
3930                     DI_CACHE_FILE, ENOENT));
3931                 return;
3932         }
3933 
3934         /*
3935          * Read in the header+root_path first. The root_path must be "/"
3936          */
3937         all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP);
3938         n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0);
3939 
3940         if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) {
3941                 kmem_free(all, sizeof (*all) + 1);
3942                 kobj_close_file(file);
3943                 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid"));
3944                 return;
3945         }
3946 
3947         map_size = all->map_size;
3948 
3949         kmem_free(all, sizeof (*all) + 1);
3950 
3951         ASSERT(map_size >= sizeof (*all) + 1);
3952 
3953         buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP);
3954         sz = map_size;
3955         off = 0;
3956         while (sz) {
3957                 /* Don't overload VM with large reads */
3958                 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz;
3959                 n = kobj_read_file(file, buf, chunk, off);
3960                 if (n != chunk) {
3961                         CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld",
3962                             DI_CACHE_FILE, off));
3963                         goto fail;
3964                 }
3965                 off += chunk;
3966                 buf += chunk;
3967                 sz -= chunk;
3968         }
3969 
3970         ASSERT(off == map_size);
3971 
3972         /*
3973          * Read past expected EOF to verify size.
3974          */
3975         if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) {
3976                 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE));
3977                 goto fail;
3978         }
3979 
3980         all = (struct di_all *)di_cache.cache_data;
3981         if (!header_plus_one_ok(all)) {
3982                 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE));
3983                 goto fail;
3984         }
3985 
3986         /*
3987          * Compute CRC with checksum field in the cache data set to 0
3988          */
3989         saved_crc = all->cache_checksum;
3990         all->cache_checksum = 0;
3991         CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table);
3992         all->cache_checksum = saved_crc;
3993 
3994         if (crc != all->cache_checksum) {
3995                 CACHE_DEBUG((DI_ERR,
3996                     "%s: checksum error: expected=0x%x actual=0x%x",
3997                     DI_CACHE_FILE, all->cache_checksum, crc));
3998                 goto fail;
3999         }
4000 
4001         if (all->map_size != map_size) {
4002                 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE));
4003                 goto fail;
4004         }
4005 
4006         kobj_close_file(file);
4007 
4008         di_cache.cache_size = map_size;
4009 
4010         return;
4011 
4012 fail:
4013         kmem_free(di_cache.cache_data, map_size);
4014         kobj_close_file(file);
4015         di_cache.cache_data = NULL;
4016         di_cache.cache_size = 0;
4017 }
4018 
4019 
4020 /*
4021  * Checks if arguments are valid for using the cache.
4022  */
4023 static int
4024 cache_args_valid(struct di_state *st, int *error)
4025 {
4026         ASSERT(error);
4027         ASSERT(st->mem_size > 0);
4028         ASSERT(st->memlist != NULL);
4029 
4030         if (!modrootloaded || !i_ddi_io_initialized()) {
4031                 CACHE_DEBUG((DI_ERR,
4032                     "cache lookup failure: I/O subsystem not inited"));
4033                 *error = ENOTACTIVE;
4034                 return (0);
4035         }
4036 
4037         /*
4038          * No other flags allowed with DINFOCACHE
4039          */
4040         if (st->command != (DINFOCACHE & DIIOC_MASK)) {
4041                 CACHE_DEBUG((DI_ERR,
4042                     "cache lookup failure: bad flags: 0x%x",
4043                     st->command));
4044                 *error = EINVAL;
4045                 return (0);
4046         }
4047 
4048         if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4049                 CACHE_DEBUG((DI_ERR,
4050                     "cache lookup failure: bad root: %s",
4051                     DI_ALL_PTR(st)->root_path));
4052                 *error = EINVAL;
4053                 return (0);
4054         }
4055 
4056         CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command));
4057 
4058         *error = 0;
4059 
4060         return (1);
4061 }
4062 
4063 static int
4064 snapshot_is_cacheable(struct di_state *st)
4065 {
4066         ASSERT(st->mem_size > 0);
4067         ASSERT(st->memlist != NULL);
4068 
4069         if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) !=
4070             (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) {
4071                 CACHE_DEBUG((DI_INFO,
4072                     "not cacheable: incompatible flags: 0x%x",
4073                     st->command));
4074                 return (0);
4075         }
4076 
4077         if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4078                 CACHE_DEBUG((DI_INFO,
4079                     "not cacheable: incompatible root path: %s",
4080                     DI_ALL_PTR(st)->root_path));
4081                 return (0);
4082         }
4083 
4084         CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command));
4085 
4086         return (1);
4087 }
4088 
4089 static int
4090 di_cache_lookup(struct di_state *st)
4091 {
4092         size_t  rval;
4093         int     cache_valid;
4094 
4095         ASSERT(cache_args_valid(st, &cache_valid));
4096         ASSERT(modrootloaded);
4097 
4098         DI_CACHE_LOCK(di_cache);
4099 
4100         /*
4101          * The following assignment determines the validity
4102          * of the cache as far as this snapshot is concerned.
4103          */
4104         cache_valid = di_cache.cache_valid;
4105 
4106         if (cache_valid && di_cache.cache_data == NULL) {
4107                 di_cache_read(&di_cache);
4108                 /* check for read or file error */
4109                 if (di_cache.cache_data == NULL)
4110                         cache_valid = 0;
4111         }
4112 
4113         if (cache_valid) {
4114                 /*
4115                  * Ok, the cache was valid as of this particular
4116                  * snapshot. Copy the cached snapshot. This is safe
4117                  * to do as the cache cannot be freed (we hold the
4118                  * cache lock). Free the memory allocated in di_state
4119                  * up until this point - we will simply copy everything
4120                  * in the cache.
4121                  */
4122 
4123                 ASSERT(di_cache.cache_data != NULL);
4124                 ASSERT(di_cache.cache_size > 0);
4125 
4126                 di_freemem(st);
4127 
4128                 rval = 0;
4129                 if (di_cache2mem(&di_cache, st) > 0) {
4130                         /*
4131                          * map_size is size of valid data in the
4132                          * cached snapshot and may be less than
4133                          * size of the cache.
4134                          */
4135                         ASSERT(DI_ALL_PTR(st));
4136                         rval = DI_ALL_PTR(st)->map_size;
4137 
4138                         ASSERT(rval >= sizeof (struct di_all));
4139                         ASSERT(rval <= di_cache.cache_size);
4140                 }
4141         } else {
4142                 /*
4143                  * The cache isn't valid, we need to take a snapshot.
4144                  * Set the command flags appropriately
4145                  */
4146                 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK));
4147                 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK);
4148                 rval = di_cache_update(st);
4149                 st->command = (DINFOCACHE & DIIOC_MASK);
4150         }
4151 
4152         DI_CACHE_UNLOCK(di_cache);
4153 
4154         /*
4155          * For cached snapshots, the devinfo driver always returns
4156          * a snapshot rooted at "/".
4157          */
4158         ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0);
4159 
4160         return ((int)rval);
4161 }
4162 
4163 /*
4164  * This is a forced update of the cache  - the previous state of the cache
4165  * may be:
4166  *      - unpopulated
4167  *      - populated and invalid
4168  *      - populated and valid
4169  */
4170 static int
4171 di_cache_update(struct di_state *st)
4172 {
4173         int             rval;
4174         uint32_t        crc;
4175         struct di_all   *all;
4176 
4177         ASSERT(DI_CACHE_LOCKED(di_cache));
4178         ASSERT(snapshot_is_cacheable(st));
4179 
4180         /*
4181          * Free the in-core cache and the on-disk file (if they exist)
4182          */
4183         i_ddi_di_cache_free(&di_cache);
4184 
4185         /*
4186          * Set valid flag before taking the snapshot,
4187          * so that any invalidations that arrive
4188          * during or after the snapshot are not
4189          * removed by us.
4190          */
4191         atomic_or_32(&di_cache.cache_valid, 1);
4192 
4193         rval = di_snapshot_and_clean(st);
4194 
4195         if (rval == 0) {
4196                 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot"));
4197                 return (0);
4198         }
4199 
4200         DI_ALL_PTR(st)->map_size = rval;
4201         if (di_mem2cache(st, &di_cache) == 0) {
4202                 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed"));
4203                 return (0);
4204         }
4205 
4206         ASSERT(di_cache.cache_data);
4207         ASSERT(di_cache.cache_size > 0);
4208 
4209         /*
4210          * Now that we have cached the snapshot, compute its checksum.
4211          * The checksum is only computed over the valid data in the
4212          * cache, not the entire cache.
4213          * Also, set all the fields (except checksum) before computing
4214          * checksum.
4215          */
4216         all = (struct di_all *)di_cache.cache_data;
4217         all->cache_magic = DI_CACHE_MAGIC;
4218         all->map_size = rval;
4219 
4220         ASSERT(all->cache_checksum == 0);
4221         CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table);
4222         all->cache_checksum = crc;
4223 
4224         di_cache_write(&di_cache);
4225 
4226         return (rval);
4227 }
4228 
4229 static void
4230 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...)
4231 {
4232         va_list ap;
4233 
4234         if (di_cache_debug <= DI_QUIET)
4235                 return;
4236 
4237         if (di_cache_debug < msglevel)
4238                 return;
4239 
4240         switch (msglevel) {
4241                 case DI_ERR:
4242                         msglevel = CE_WARN;
4243                         break;
4244                 case DI_INFO:
4245                 case DI_TRACE:
4246                 default:
4247                         msglevel = CE_NOTE;
4248                         break;
4249         }
4250 
4251         va_start(ap, fmt);
4252         vcmn_err(msglevel, fmt, ap);
4253         va_end(ap);
4254 }
4255 
4256 static void
4257 di_hotplug_children(struct di_state *st)
4258 {
4259         di_off_t        off;
4260         struct di_hp    *hp;
4261         struct i_hp     *hp_list_node;
4262 
4263         while (hp_list_node = (struct i_hp *)list_remove_head(&st->hp_list)) {
4264 
4265                 if ((hp_list_node->hp_child != NULL) &&
4266                     (di_dip_find(st, hp_list_node->hp_child, &off) == 0)) {
4267                         hp = DI_HP(di_mem_addr(st, hp_list_node->hp_off));
4268                         hp->hp_child = off;
4269                 }
4270 
4271                 kmem_free(hp_list_node, sizeof (i_hp_t));
4272         }
4273 
4274         list_destroy(&st->hp_list);
4275 }