1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * driver for accessing kernel devinfo tree.
  28  */
  29 #include <sys/types.h>
  30 #include <sys/pathname.h>
  31 #include <sys/debug.h>
  32 #include <sys/autoconf.h>
  33 #include <sys/vmsystm.h>
  34 #include <sys/conf.h>
  35 #include <sys/file.h>
  36 #include <sys/kmem.h>
  37 #include <sys/modctl.h>
  38 #include <sys/stat.h>
  39 #include <sys/ddi.h>
  40 #include <sys/sunddi.h>
  41 #include <sys/sunldi_impl.h>
  42 #include <sys/sunndi.h>
  43 #include <sys/esunddi.h>
  44 #include <sys/sunmdi.h>
  45 #include <sys/ddi_impldefs.h>
  46 #include <sys/ndi_impldefs.h>
  47 #include <sys/mdi_impldefs.h>
  48 #include <sys/devinfo_impl.h>
  49 #include <sys/thread.h>
  50 #include <sys/modhash.h>
  51 #include <sys/bitmap.h>
  52 #include <util/qsort.h>
  53 #include <sys/disp.h>
  54 #include <sys/kobj.h>
  55 #include <sys/crc32.h>
  56 #include <sys/ddi_hp.h>
  57 #include <sys/ddi_hp_impl.h>
  58 #include <sys/sysmacros.h>
  59 #include <sys/list.h>
  60 
  61 
  62 #ifdef DEBUG
  63 static int di_debug;
  64 #define dcmn_err(args) if (di_debug >= 1) cmn_err args
  65 #define dcmn_err2(args) if (di_debug >= 2) cmn_err args
  66 #define dcmn_err3(args) if (di_debug >= 3) cmn_err args
  67 #else
  68 #define dcmn_err(args) /* nothing */
  69 #define dcmn_err2(args) /* nothing */
  70 #define dcmn_err3(args) /* nothing */
  71 #endif
  72 
  73 /*
  74  * We partition the space of devinfo minor nodes equally between the full and
  75  * unprivileged versions of the driver.  The even-numbered minor nodes are the
  76  * full version, while the odd-numbered ones are the read-only version.
  77  */
  78 static int di_max_opens = 32;
  79 
  80 static int di_prop_dyn = 1;             /* enable dynamic property support */
  81 
  82 #define DI_FULL_PARENT          0
  83 #define DI_READONLY_PARENT      1
  84 #define DI_NODE_SPECIES         2
  85 #define DI_UNPRIVILEGED_NODE(x) (((x) % 2) != 0)
  86 
  87 #define IOC_IDLE        0       /* snapshot ioctl states */
  88 #define IOC_SNAP        1       /* snapshot in progress */
  89 #define IOC_DONE        2       /* snapshot done, but not copied out */
  90 #define IOC_COPY        3       /* copyout in progress */
  91 
  92 /*
  93  * Keep max alignment so we can move snapshot to different platforms.
  94  *
  95  * NOTE: Most callers should rely on the di_checkmem return value
  96  * being aligned, and reestablish *off_p with aligned value, instead
  97  * of trying to align size of their allocations: this approach will
  98  * minimize memory use.
  99  */
 100 #define DI_ALIGN(addr)  ((addr + 7l) & ~7l)
 101 
 102 /*
 103  * To avoid wasting memory, make a linked list of memory chunks.
 104  * Size of each chunk is buf_size.
 105  */
 106 struct di_mem {
 107         struct di_mem   *next;          /* link to next chunk */
 108         char            *buf;           /* contiguous kernel memory */
 109         size_t          buf_size;       /* size of buf in bytes */
 110         devmap_cookie_t cook;           /* cookie from ddi_umem_alloc */
 111 };
 112 
 113 /*
 114  * This is a stack for walking the tree without using recursion.
 115  * When the devinfo tree height is above some small size, one
 116  * gets watchdog resets on sun4m.
 117  */
 118 struct di_stack {
 119         void            *offset[MAX_TREE_DEPTH];
 120         struct dev_info *dip[MAX_TREE_DEPTH];
 121         int             circ[MAX_TREE_DEPTH];
 122         int             depth;  /* depth of current node to be copied */
 123 };
 124 
 125 #define TOP_OFFSET(stack)       \
 126         ((di_off_t *)(stack)->offset[(stack)->depth - 1])
 127 #define TOP_NODE(stack)         \
 128         ((stack)->dip[(stack)->depth - 1])
 129 #define PARENT_OFFSET(stack)    \
 130         ((di_off_t *)(stack)->offset[(stack)->depth - 2])
 131 #define EMPTY_STACK(stack)      ((stack)->depth == 0)
 132 #define POP_STACK(stack)        { \
 133         ndi_devi_exit((dev_info_t *)TOP_NODE(stack), \
 134                 (stack)->circ[(stack)->depth - 1]); \
 135         ((stack)->depth--); \
 136 }
 137 #define PUSH_STACK(stack, node, off_p)  { \
 138         ASSERT(node != NULL); \
 139         ndi_devi_enter((dev_info_t *)node, &(stack)->circ[(stack)->depth]); \
 140         (stack)->dip[(stack)->depth] = (node); \
 141         (stack)->offset[(stack)->depth] = (void *)(off_p); \
 142         ((stack)->depth)++; \
 143 }
 144 
 145 #define DI_ALL_PTR(s)   DI_ALL(di_mem_addr((s), 0))
 146 
 147 /*
 148  * With devfs, the device tree has no global locks. The device tree is
 149  * dynamic and dips may come and go if they are not locked locally. Under
 150  * these conditions, pointers are no longer reliable as unique IDs.
 151  * Specifically, these pointers cannot be used as keys for hash tables
 152  * as the same devinfo structure may be freed in one part of the tree only
 153  * to be allocated as the structure for a different device in another
 154  * part of the tree. This can happen if DR and the snapshot are
 155  * happening concurrently.
 156  * The following data structures act as keys for devinfo nodes and
 157  * pathinfo nodes.
 158  */
 159 
 160 enum di_ktype {
 161         DI_DKEY = 1,
 162         DI_PKEY = 2
 163 };
 164 
 165 struct di_dkey {
 166         dev_info_t      *dk_dip;
 167         major_t         dk_major;
 168         int             dk_inst;
 169         pnode_t         dk_nodeid;
 170 };
 171 
 172 struct di_pkey {
 173         mdi_pathinfo_t  *pk_pip;
 174         char            *pk_path_addr;
 175         dev_info_t      *pk_client;
 176         dev_info_t      *pk_phci;
 177 };
 178 
 179 struct di_key {
 180         enum di_ktype   k_type;
 181         union {
 182                 struct di_dkey dkey;
 183                 struct di_pkey pkey;
 184         } k_u;
 185 };
 186 
 187 
 188 struct i_lnode;
 189 
 190 typedef struct i_link {
 191         /*
 192          * If a di_link struct representing this i_link struct makes it
 193          * into the snapshot, then self will point to the offset of
 194          * the di_link struct in the snapshot
 195          */
 196         di_off_t        self;
 197 
 198         int             spec_type;      /* block or char access type */
 199         struct i_lnode  *src_lnode;     /* src i_lnode */
 200         struct i_lnode  *tgt_lnode;     /* tgt i_lnode */
 201         struct i_link   *src_link_next; /* next src i_link /w same i_lnode */
 202         struct i_link   *tgt_link_next; /* next tgt i_link /w same i_lnode */
 203 } i_link_t;
 204 
 205 typedef struct i_lnode {
 206         /*
 207          * If a di_lnode struct representing this i_lnode struct makes it
 208          * into the snapshot, then self will point to the offset of
 209          * the di_lnode struct in the snapshot
 210          */
 211         di_off_t        self;
 212 
 213         /*
 214          * used for hashing and comparing i_lnodes
 215          */
 216         int             modid;
 217 
 218         /*
 219          * public information describing a link endpoint
 220          */
 221         struct di_node  *di_node;       /* di_node in snapshot */
 222         dev_t           devt;           /* devt */
 223 
 224         /*
 225          * i_link ptr to links coming into this i_lnode node
 226          * (this i_lnode is the target of these i_links)
 227          */
 228         i_link_t        *link_in;
 229 
 230         /*
 231          * i_link ptr to links going out of this i_lnode node
 232          * (this i_lnode is the source of these i_links)
 233          */
 234         i_link_t        *link_out;
 235 } i_lnode_t;
 236 
 237 typedef struct i_hp {
 238         di_off_t        hp_off;         /* Offset of di_hp_t in snapshot */
 239         dev_info_t      *hp_child;      /* Child devinfo node of the di_hp_t */
 240         list_node_t     hp_link;        /* List linkage */
 241 } i_hp_t;
 242 
 243 /*
 244  * Soft state associated with each instance of driver open.
 245  */
 246 static struct di_state {
 247         di_off_t        mem_size;       /* total # bytes in memlist */
 248         struct di_mem   *memlist;       /* head of memlist */
 249         uint_t          command;        /* command from ioctl */
 250         int             di_iocstate;    /* snapshot ioctl state */
 251         mod_hash_t      *reg_dip_hash;
 252         mod_hash_t      *reg_pip_hash;
 253         int             lnode_count;
 254         int             link_count;
 255 
 256         mod_hash_t      *lnode_hash;
 257         mod_hash_t      *link_hash;
 258 
 259         list_t          hp_list;
 260 } **di_states;
 261 
 262 static kmutex_t di_lock;        /* serialize instance assignment */
 263 
 264 typedef enum {
 265         DI_QUIET = 0,   /* DI_QUIET must always be 0 */
 266         DI_ERR,
 267         DI_INFO,
 268         DI_TRACE,
 269         DI_TRACE1,
 270         DI_TRACE2
 271 } di_cache_debug_t;
 272 
 273 static uint_t   di_chunk = 32;          /* I/O chunk size in pages */
 274 
 275 #define DI_CACHE_LOCK(c)        (mutex_enter(&(c).cache_lock))
 276 #define DI_CACHE_UNLOCK(c)      (mutex_exit(&(c).cache_lock))
 277 #define DI_CACHE_LOCKED(c)      (mutex_owned(&(c).cache_lock))
 278 
 279 /*
 280  * Check that whole device tree is being configured as a pre-condition for
 281  * cleaning up /etc/devices files.
 282  */
 283 #define DEVICES_FILES_CLEANABLE(st)     \
 284         (((st)->command & DINFOSUBTREE) && ((st)->command & DINFOFORCE) && \
 285         strcmp(DI_ALL_PTR(st)->root_path, "/") == 0)
 286 
 287 #define CACHE_DEBUG(args)       \
 288         { if (di_cache_debug != DI_QUIET) di_cache_print args; }
 289 
 290 typedef struct phci_walk_arg {
 291         di_off_t        off;
 292         struct di_state *st;
 293 } phci_walk_arg_t;
 294 
 295 static int di_open(dev_t *, int, int, cred_t *);
 296 static int di_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 297 static int di_close(dev_t, int, int, cred_t *);
 298 static int di_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
 299 static int di_attach(dev_info_t *, ddi_attach_cmd_t);
 300 static int di_detach(dev_info_t *, ddi_detach_cmd_t);
 301 
 302 static di_off_t di_copyformat(di_off_t, struct di_state *, intptr_t, int);
 303 static di_off_t di_snapshot_and_clean(struct di_state *);
 304 static di_off_t di_copydevnm(di_off_t *, struct di_state *);
 305 static di_off_t di_copytree(struct dev_info *, di_off_t *, struct di_state *);
 306 static di_off_t di_copynode(struct dev_info *, struct di_stack *,
 307     struct di_state *);
 308 static di_off_t di_getmdata(struct ddi_minor_data *, di_off_t *, di_off_t,
 309     struct di_state *);
 310 static di_off_t di_getppdata(struct dev_info *, di_off_t *, struct di_state *);
 311 static di_off_t di_getdpdata(struct dev_info *, di_off_t *, struct di_state *);
 312 static di_off_t di_gethpdata(ddi_hp_cn_handle_t *, di_off_t *,
 313     struct di_state *);
 314 static di_off_t di_getprop(int, struct ddi_prop **, di_off_t *,
 315     struct di_state *, struct dev_info *);
 316 static void di_allocmem(struct di_state *, size_t);
 317 static void di_freemem(struct di_state *);
 318 static void di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz);
 319 static di_off_t di_checkmem(struct di_state *, di_off_t, size_t);
 320 static void *di_mem_addr(struct di_state *, di_off_t);
 321 static int di_setstate(struct di_state *, int);
 322 static void di_register_dip(struct di_state *, dev_info_t *, di_off_t);
 323 static void di_register_pip(struct di_state *, mdi_pathinfo_t *, di_off_t);
 324 static di_off_t di_getpath_data(dev_info_t *, di_off_t *, di_off_t,
 325     struct di_state *, int);
 326 static di_off_t di_getlink_data(di_off_t, struct di_state *);
 327 static int di_dip_find(struct di_state *st, dev_info_t *node, di_off_t *off_p);
 328 
 329 static int cache_args_valid(struct di_state *st, int *error);
 330 static int snapshot_is_cacheable(struct di_state *st);
 331 static int di_cache_lookup(struct di_state *st);
 332 static int di_cache_update(struct di_state *st);
 333 static void di_cache_print(di_cache_debug_t msglevel, char *fmt, ...);
 334 static int build_vhci_list(dev_info_t *vh_devinfo, void *arg);
 335 static int build_phci_list(dev_info_t *ph_devinfo, void *arg);
 336 static void di_hotplug_children(struct di_state *st);
 337 
 338 extern int modrootloaded;
 339 extern void mdi_walk_vhcis(int (*)(dev_info_t *, void *), void *);
 340 extern void mdi_vhci_walk_phcis(dev_info_t *,
 341         int (*)(dev_info_t *, void *), void *);
 342 
 343 
 344 static struct cb_ops di_cb_ops = {
 345         di_open,                /* open */
 346         di_close,               /* close */
 347         nodev,                  /* strategy */
 348         nodev,                  /* print */
 349         nodev,                  /* dump */
 350         nodev,                  /* read */
 351         nodev,                  /* write */
 352         di_ioctl,               /* ioctl */
 353         nodev,                  /* devmap */
 354         nodev,                  /* mmap */
 355         nodev,                  /* segmap */
 356         nochpoll,               /* poll */
 357         ddi_prop_op,            /* prop_op */
 358         NULL,                   /* streamtab  */
 359         D_NEW | D_MP            /* Driver compatibility flag */
 360 };
 361 
 362 static struct dev_ops di_ops = {
 363         DEVO_REV,               /* devo_rev, */
 364         0,                      /* refcnt  */
 365         di_info,                /* info */
 366         nulldev,                /* identify */
 367         nulldev,                /* probe */
 368         di_attach,              /* attach */
 369         di_detach,              /* detach */
 370         nodev,                  /* reset */
 371         &di_cb_ops,         /* driver operations */
 372         NULL                    /* bus operations */
 373 };
 374 
 375 /*
 376  * Module linkage information for the kernel.
 377  */
 378 static struct modldrv modldrv = {
 379         &mod_driverops,
 380         "DEVINFO Driver",
 381         &di_ops
 382 };
 383 
 384 static struct modlinkage modlinkage = {
 385         MODREV_1,
 386         &modldrv,
 387         NULL
 388 };
 389 
 390 int
 391 _init(void)
 392 {
 393         int     error;
 394 
 395         mutex_init(&di_lock, NULL, MUTEX_DRIVER, NULL);
 396 
 397         error = mod_install(&modlinkage);
 398         if (error != 0) {
 399                 mutex_destroy(&di_lock);
 400                 return (error);
 401         }
 402 
 403         return (0);
 404 }
 405 
 406 int
 407 _info(struct modinfo *modinfop)
 408 {
 409         return (mod_info(&modlinkage, modinfop));
 410 }
 411 
 412 int
 413 _fini(void)
 414 {
 415         int     error;
 416 
 417         error = mod_remove(&modlinkage);
 418         if (error != 0) {
 419                 return (error);
 420         }
 421 
 422         mutex_destroy(&di_lock);
 423         return (0);
 424 }
 425 
 426 static dev_info_t *di_dip;
 427 
 428 /*ARGSUSED*/
 429 static int
 430 di_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
 431 {
 432         int     error = DDI_FAILURE;
 433 
 434         switch (infocmd) {
 435         case DDI_INFO_DEVT2DEVINFO:
 436                 *result = (void *)di_dip;
 437                 error = DDI_SUCCESS;
 438                 break;
 439         case DDI_INFO_DEVT2INSTANCE:
 440                 /*
 441                  * All dev_t's map to the same, single instance.
 442                  */
 443                 *result = (void *)0;
 444                 error = DDI_SUCCESS;
 445                 break;
 446         default:
 447                 break;
 448         }
 449 
 450         return (error);
 451 }
 452 
 453 static int
 454 di_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 455 {
 456         int     error = DDI_FAILURE;
 457 
 458         switch (cmd) {
 459         case DDI_ATTACH:
 460                 di_states = kmem_zalloc(
 461                     di_max_opens * sizeof (struct di_state *), KM_SLEEP);
 462 
 463                 if (ddi_create_minor_node(dip, "devinfo", S_IFCHR,
 464                     DI_FULL_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE ||
 465                     ddi_create_minor_node(dip, "devinfo,ro", S_IFCHR,
 466                     DI_READONLY_PARENT, DDI_PSEUDO, NULL) == DDI_FAILURE) {
 467                         kmem_free(di_states,
 468                             di_max_opens * sizeof (struct di_state *));
 469                         ddi_remove_minor_node(dip, NULL);
 470                         error = DDI_FAILURE;
 471                 } else {
 472                         di_dip = dip;
 473                         ddi_report_dev(dip);
 474 
 475                         error = DDI_SUCCESS;
 476                 }
 477                 break;
 478         default:
 479                 error = DDI_FAILURE;
 480                 break;
 481         }
 482 
 483         return (error);
 484 }
 485 
 486 static int
 487 di_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 488 {
 489         int     error = DDI_FAILURE;
 490 
 491         switch (cmd) {
 492         case DDI_DETACH:
 493                 ddi_remove_minor_node(dip, NULL);
 494                 di_dip = NULL;
 495                 kmem_free(di_states, di_max_opens * sizeof (struct di_state *));
 496 
 497                 error = DDI_SUCCESS;
 498                 break;
 499         default:
 500                 error = DDI_FAILURE;
 501                 break;
 502         }
 503 
 504         return (error);
 505 }
 506 
 507 /*
 508  * Allow multiple opens by tweaking the dev_t such that it looks like each
 509  * open is getting a different minor device.  Each minor gets a separate
 510  * entry in the di_states[] table.  Based on the original minor number, we
 511  * discriminate opens of the full and read-only nodes.  If all of the instances
 512  * of the selected minor node are currently open, we return EAGAIN.
 513  */
 514 /*ARGSUSED*/
 515 static int
 516 di_open(dev_t *devp, int flag, int otyp, cred_t *credp)
 517 {
 518         int     m;
 519         minor_t minor_parent = getminor(*devp);
 520 
 521         if (minor_parent != DI_FULL_PARENT &&
 522             minor_parent != DI_READONLY_PARENT)
 523                 return (ENXIO);
 524 
 525         mutex_enter(&di_lock);
 526 
 527         for (m = minor_parent; m < di_max_opens; m += DI_NODE_SPECIES) {
 528                 if (di_states[m] != NULL)
 529                         continue;
 530 
 531                 di_states[m] = kmem_zalloc(sizeof (struct di_state), KM_SLEEP);
 532                 break;  /* It's ours. */
 533         }
 534 
 535         if (m >= di_max_opens) {
 536                 /*
 537                  * maximum open instance for device reached
 538                  */
 539                 mutex_exit(&di_lock);
 540                 dcmn_err((CE_WARN, "devinfo: maximum devinfo open reached"));
 541                 return (EAGAIN);
 542         }
 543         mutex_exit(&di_lock);
 544 
 545         ASSERT(m < di_max_opens);
 546         *devp = makedevice(getmajor(*devp), (minor_t)(m + DI_NODE_SPECIES));
 547 
 548         dcmn_err((CE_CONT, "di_open: thread = %p, assigned minor = %d\n",
 549             (void *)curthread, m + DI_NODE_SPECIES));
 550 
 551         return (0);
 552 }
 553 
 554 /*ARGSUSED*/
 555 static int
 556 di_close(dev_t dev, int flag, int otype, cred_t *cred_p)
 557 {
 558         struct di_state *st;
 559         int             m = (int)getminor(dev) - DI_NODE_SPECIES;
 560 
 561         if (m < 0) {
 562                 cmn_err(CE_WARN, "closing non-existent devinfo minor %d",
 563                     m + DI_NODE_SPECIES);
 564                 return (ENXIO);
 565         }
 566 
 567         st = di_states[m];
 568         ASSERT(m < di_max_opens && st != NULL);
 569 
 570         di_freemem(st);
 571         kmem_free(st, sizeof (struct di_state));
 572 
 573         /*
 574          * empty slot in state table
 575          */
 576         mutex_enter(&di_lock);
 577         di_states[m] = NULL;
 578         dcmn_err((CE_CONT, "di_close: thread = %p, assigned minor = %d\n",
 579             (void *)curthread, m + DI_NODE_SPECIES));
 580         mutex_exit(&di_lock);
 581 
 582         return (0);
 583 }
 584 
 585 
 586 /*ARGSUSED*/
 587 static int
 588 di_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp)
 589 {
 590         int             rv, error;
 591         di_off_t        off;
 592         struct di_all   *all;
 593         struct di_state *st;
 594         int             m = (int)getminor(dev) - DI_NODE_SPECIES;
 595         major_t         i;
 596         char            *drv_name;
 597         size_t          map_size, size;
 598         struct di_mem   *dcp;
 599         int             ndi_flags;
 600 
 601         if (m < 0 || m >= di_max_opens) {
 602                 return (ENXIO);
 603         }
 604 
 605         st = di_states[m];
 606         ASSERT(st != NULL);
 607 
 608         dcmn_err2((CE_CONT, "di_ioctl: mode = %x, cmd = %x\n", mode, cmd));
 609 
 610         switch (cmd) {
 611         case DINFOIDENT:
 612                 /*
 613                  * This is called from di_init to verify that the driver
 614                  * opened is indeed devinfo. The purpose is to guard against
 615                  * sending ioctl to an unknown driver in case of an
 616                  * unresolved major number conflict during bfu.
 617                  */
 618                 *rvalp = DI_MAGIC;
 619                 return (0);
 620 
 621         case DINFOLODRV:
 622                 /*
 623                  * Hold an installed driver and return the result
 624                  */
 625                 if (DI_UNPRIVILEGED_NODE(m)) {
 626                         /*
 627                          * Only the fully enabled instances may issue
 628                          * DINFOLDDRV.
 629                          */
 630                         return (EACCES);
 631                 }
 632 
 633                 drv_name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 634                 if (ddi_copyin((void *)arg, drv_name, MAXNAMELEN, mode) != 0) {
 635                         kmem_free(drv_name, MAXNAMELEN);
 636                         return (EFAULT);
 637                 }
 638 
 639                 /*
 640                  * Some 3rd party driver's _init() walks the device tree,
 641                  * so we load the driver module before configuring driver.
 642                  */
 643                 i = ddi_name_to_major(drv_name);
 644                 if (ddi_hold_driver(i) == NULL) {
 645                         kmem_free(drv_name, MAXNAMELEN);
 646                         return (ENXIO);
 647                 }
 648 
 649                 ndi_flags = NDI_DEVI_PERSIST | NDI_CONFIG | NDI_NO_EVENT;
 650 
 651                 /*
 652                  * i_ddi_load_drvconf() below will trigger a reprobe
 653                  * via reset_nexus_flags(). NDI_DRV_CONF_REPROBE isn't
 654                  * needed here.
 655                  */
 656                 modunload_disable();
 657                 (void) i_ddi_load_drvconf(i);
 658                 (void) ndi_devi_config_driver(ddi_root_node(), ndi_flags, i);
 659                 kmem_free(drv_name, MAXNAMELEN);
 660                 ddi_rele_driver(i);
 661                 rv = i_ddi_devs_attached(i);
 662                 modunload_enable();
 663 
 664                 i_ddi_di_cache_invalidate();
 665 
 666                 return ((rv == DDI_SUCCESS)? 0 : ENXIO);
 667 
 668         case DINFOUSRLD:
 669                 /*
 670                  * The case for copying snapshot to userland
 671                  */
 672                 if (di_setstate(st, IOC_COPY) == -1)
 673                         return (EBUSY);
 674 
 675                 map_size = DI_ALL_PTR(st)->map_size;
 676                 if (map_size == 0) {
 677                         (void) di_setstate(st, IOC_DONE);
 678                         return (EFAULT);
 679                 }
 680 
 681                 /*
 682                  * copyout the snapshot
 683                  */
 684                 map_size = (map_size + PAGEOFFSET) & PAGEMASK;
 685 
 686                 /*
 687                  * Return the map size, so caller may do a sanity
 688                  * check against the return value of snapshot ioctl()
 689                  */
 690                 *rvalp = (int)map_size;
 691 
 692                 /*
 693                  * Copy one chunk at a time
 694                  */
 695                 off = 0;
 696                 dcp = st->memlist;
 697                 while (map_size) {
 698                         size = dcp->buf_size;
 699                         if (map_size <= size) {
 700                                 size = map_size;
 701                         }
 702 
 703                         if (ddi_copyout(di_mem_addr(st, off),
 704                             (void *)(arg + off), size, mode) != 0) {
 705                                 (void) di_setstate(st, IOC_DONE);
 706                                 return (EFAULT);
 707                         }
 708 
 709                         map_size -= size;
 710                         off += size;
 711                         dcp = dcp->next;
 712                 }
 713 
 714                 di_freemem(st);
 715                 (void) di_setstate(st, IOC_IDLE);
 716                 return (0);
 717 
 718         default:
 719                 if ((cmd & ~DIIOC_MASK) != DIIOC) {
 720                         /*
 721                          * Invalid ioctl command
 722                          */
 723                         return (ENOTTY);
 724                 }
 725                 /*
 726                  * take a snapshot
 727                  */
 728                 st->command = cmd & DIIOC_MASK;
 729                 /*FALLTHROUGH*/
 730         }
 731 
 732         /*
 733          * Obtain enough memory to hold header + rootpath.  We prevent kernel
 734          * memory exhaustion by freeing any previously allocated snapshot and
 735          * refusing the operation; otherwise we would be allowing ioctl(),
 736          * ioctl(), ioctl(), ..., panic.
 737          */
 738         if (di_setstate(st, IOC_SNAP) == -1)
 739                 return (EBUSY);
 740 
 741         /*
 742          * Initial memlist always holds di_all and the root_path - and
 743          * is at least a page and size.
 744          */
 745         size = sizeof (struct di_all) +
 746             sizeof (((struct dinfo_io *)(NULL))->root_path);
 747         if (size < PAGESIZE)
 748                 size = PAGESIZE;
 749         off = di_checkmem(st, 0, size);
 750         all = DI_ALL_PTR(st);
 751         off += sizeof (struct di_all);          /* real length of di_all */
 752 
 753         all->devcnt = devcnt;
 754         all->command = st->command;
 755         all->version = DI_SNAPSHOT_VERSION;
 756         all->top_vhci_devinfo = 0;           /* filled by build_vhci_list. */
 757 
 758         /*
 759          * Note the endianness in case we need to transport snapshot
 760          * over the network.
 761          */
 762 #if defined(_LITTLE_ENDIAN)
 763         all->endianness = DI_LITTLE_ENDIAN;
 764 #else
 765         all->endianness = DI_BIG_ENDIAN;
 766 #endif
 767 
 768         /* Copyin ioctl args, store in the snapshot. */
 769         if (copyinstr((void *)arg, all->req_path,
 770             sizeof (((struct dinfo_io *)(NULL))->root_path), &size) != 0) {
 771                 di_freemem(st);
 772                 (void) di_setstate(st, IOC_IDLE);
 773                 return (EFAULT);
 774         }
 775         (void) strcpy(all->root_path, all->req_path);
 776         off += size;                            /* real length of root_path */
 777 
 778         if ((st->command & DINFOCLEANUP) && !DEVICES_FILES_CLEANABLE(st)) {
 779                 di_freemem(st);
 780                 (void) di_setstate(st, IOC_IDLE);
 781                 return (EINVAL);
 782         }
 783 
 784         error = 0;
 785         if ((st->command & DINFOCACHE) && !cache_args_valid(st, &error)) {
 786                 di_freemem(st);
 787                 (void) di_setstate(st, IOC_IDLE);
 788                 return (error);
 789         }
 790 
 791         /*
 792          * Only the fully enabled version may force load drivers or read
 793          * the parent private data from a driver.
 794          */
 795         if ((st->command & (DINFOPRIVDATA | DINFOFORCE)) != 0 &&
 796             DI_UNPRIVILEGED_NODE(m)) {
 797                 di_freemem(st);
 798                 (void) di_setstate(st, IOC_IDLE);
 799                 return (EACCES);
 800         }
 801 
 802         /* Do we need private data? */
 803         if (st->command & DINFOPRIVDATA) {
 804                 arg += sizeof (((struct dinfo_io *)(NULL))->root_path);
 805 
 806 #ifdef _MULTI_DATAMODEL
 807                 switch (ddi_model_convert_from(mode & FMODELS)) {
 808                 case DDI_MODEL_ILP32: {
 809                         /*
 810                          * Cannot copy private data from 64-bit kernel
 811                          * to 32-bit app
 812                          */
 813                         di_freemem(st);
 814                         (void) di_setstate(st, IOC_IDLE);
 815                         return (EINVAL);
 816                 }
 817                 case DDI_MODEL_NONE:
 818                         if ((off = di_copyformat(off, st, arg, mode)) == 0) {
 819                                 di_freemem(st);
 820                                 (void) di_setstate(st, IOC_IDLE);
 821                                 return (EFAULT);
 822                         }
 823                         break;
 824                 }
 825 #else /* !_MULTI_DATAMODEL */
 826                 if ((off = di_copyformat(off, st, arg, mode)) == 0) {
 827                         di_freemem(st);
 828                         (void) di_setstate(st, IOC_IDLE);
 829                         return (EFAULT);
 830                 }
 831 #endif /* _MULTI_DATAMODEL */
 832         }
 833 
 834         all->top_devinfo = DI_ALIGN(off);
 835 
 836         /*
 837          * For cache lookups we reallocate memory from scratch,
 838          * so the value of "all" is no longer valid.
 839          */
 840         all = NULL;
 841 
 842         if (st->command & DINFOCACHE) {
 843                 *rvalp = di_cache_lookup(st);
 844         } else if (snapshot_is_cacheable(st)) {
 845                 DI_CACHE_LOCK(di_cache);
 846                 *rvalp = di_cache_update(st);
 847                 DI_CACHE_UNLOCK(di_cache);
 848         } else
 849                 *rvalp = di_snapshot_and_clean(st);
 850 
 851         if (*rvalp) {
 852                 DI_ALL_PTR(st)->map_size = *rvalp;
 853                 (void) di_setstate(st, IOC_DONE);
 854         } else {
 855                 di_freemem(st);
 856                 (void) di_setstate(st, IOC_IDLE);
 857         }
 858 
 859         return (0);
 860 }
 861 
 862 /*
 863  * Get a chunk of memory >= size, for the snapshot
 864  */
 865 static void
 866 di_allocmem(struct di_state *st, size_t size)
 867 {
 868         struct di_mem   *mem = kmem_zalloc(sizeof (struct di_mem), KM_SLEEP);
 869 
 870         /*
 871          * Round up size to nearest power of 2. If it is less
 872          * than st->mem_size, set it to st->mem_size (i.e.,
 873          * the mem_size is doubled every time) to reduce the
 874          * number of memory allocations.
 875          */
 876         size_t tmp = 1;
 877         while (tmp < size) {
 878                 tmp <<= 1;
 879         }
 880         size = (tmp > st->mem_size) ? tmp : st->mem_size;
 881 
 882         mem->buf = ddi_umem_alloc(size, DDI_UMEM_SLEEP, &mem->cook);
 883         mem->buf_size = size;
 884 
 885         dcmn_err2((CE_CONT, "di_allocmem: mem_size=%x\n", st->mem_size));
 886 
 887         if (st->mem_size == 0) {     /* first chunk */
 888                 st->memlist = mem;
 889         } else {
 890                 /*
 891                  * locate end of linked list and add a chunk at the end
 892                  */
 893                 struct di_mem *dcp = st->memlist;
 894                 while (dcp->next != NULL) {
 895                         dcp = dcp->next;
 896                 }
 897 
 898                 dcp->next = mem;
 899         }
 900 
 901         st->mem_size += size;
 902 }
 903 
 904 /*
 905  * Copy upto bufsiz bytes of the memlist to buf
 906  */
 907 static void
 908 di_copymem(struct di_state *st, caddr_t buf, size_t bufsiz)
 909 {
 910         struct di_mem   *dcp;
 911         size_t          copysz;
 912 
 913         if (st->mem_size == 0) {
 914                 ASSERT(st->memlist == NULL);
 915                 return;
 916         }
 917 
 918         copysz = 0;
 919         for (dcp = st->memlist; dcp; dcp = dcp->next) {
 920 
 921                 ASSERT(bufsiz > 0);
 922 
 923                 if (bufsiz <= dcp->buf_size)
 924                         copysz = bufsiz;
 925                 else
 926                         copysz = dcp->buf_size;
 927 
 928                 bcopy(dcp->buf, buf, copysz);
 929 
 930                 buf += copysz;
 931                 bufsiz -= copysz;
 932 
 933                 if (bufsiz == 0)
 934                         break;
 935         }
 936 }
 937 
 938 /*
 939  * Free all memory for the snapshot
 940  */
 941 static void
 942 di_freemem(struct di_state *st)
 943 {
 944         struct di_mem   *dcp, *tmp;
 945 
 946         dcmn_err2((CE_CONT, "di_freemem\n"));
 947 
 948         if (st->mem_size) {
 949                 dcp = st->memlist;
 950                 while (dcp) {   /* traverse the linked list */
 951                         tmp = dcp;
 952                         dcp = dcp->next;
 953                         ddi_umem_free(tmp->cook);
 954                         kmem_free(tmp, sizeof (struct di_mem));
 955                 }
 956                 st->mem_size = 0;
 957                 st->memlist = NULL;
 958         }
 959 
 960         ASSERT(st->mem_size == 0);
 961         ASSERT(st->memlist == NULL);
 962 }
 963 
 964 /*
 965  * Copies cached data to the di_state structure.
 966  * Returns:
 967  *      - size of data copied, on SUCCESS
 968  *      - 0 on failure
 969  */
 970 static int
 971 di_cache2mem(struct di_cache *cache, struct di_state *st)
 972 {
 973         caddr_t pa;
 974 
 975         ASSERT(st->mem_size == 0);
 976         ASSERT(st->memlist == NULL);
 977         ASSERT(!servicing_interrupt());
 978         ASSERT(DI_CACHE_LOCKED(*cache));
 979 
 980         if (cache->cache_size == 0) {
 981                 ASSERT(cache->cache_data == NULL);
 982                 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping copy"));
 983                 return (0);
 984         }
 985 
 986         ASSERT(cache->cache_data);
 987 
 988         di_allocmem(st, cache->cache_size);
 989 
 990         pa = di_mem_addr(st, 0);
 991 
 992         ASSERT(pa);
 993 
 994         /*
 995          * Verify that di_allocmem() allocates contiguous memory,
 996          * so that it is safe to do straight bcopy()
 997          */
 998         ASSERT(st->memlist != NULL);
 999         ASSERT(st->memlist->next == NULL);
1000         bcopy(cache->cache_data, pa, cache->cache_size);
1001 
1002         return (cache->cache_size);
1003 }
1004 
1005 /*
1006  * Copies a snapshot from di_state to the cache
1007  * Returns:
1008  *      - 0 on failure
1009  *      - size of copied data on success
1010  */
1011 static size_t
1012 di_mem2cache(struct di_state *st, struct di_cache *cache)
1013 {
1014         size_t  map_size;
1015 
1016         ASSERT(cache->cache_size == 0);
1017         ASSERT(cache->cache_data == NULL);
1018         ASSERT(!servicing_interrupt());
1019         ASSERT(DI_CACHE_LOCKED(*cache));
1020 
1021         if (st->mem_size == 0) {
1022                 ASSERT(st->memlist == NULL);
1023                 CACHE_DEBUG((DI_ERR, "Empty memlist. Skipping copy"));
1024                 return (0);
1025         }
1026 
1027         ASSERT(st->memlist);
1028 
1029         /*
1030          * The size of the memory list may be much larger than the
1031          * size of valid data (map_size). Cache only the valid data
1032          */
1033         map_size = DI_ALL_PTR(st)->map_size;
1034         if (map_size == 0 || map_size < sizeof (struct di_all) ||
1035             map_size > st->mem_size) {
1036                 CACHE_DEBUG((DI_ERR, "cannot cache: bad size: 0x%x", map_size));
1037                 return (0);
1038         }
1039 
1040         cache->cache_data = kmem_alloc(map_size, KM_SLEEP);
1041         cache->cache_size = map_size;
1042         di_copymem(st, cache->cache_data, cache->cache_size);
1043 
1044         return (map_size);
1045 }
1046 
1047 /*
1048  * Make sure there is at least "size" bytes memory left before
1049  * going on. Otherwise, start on a new chunk.
1050  */
1051 static di_off_t
1052 di_checkmem(struct di_state *st, di_off_t off, size_t size)
1053 {
1054         dcmn_err3((CE_CONT, "di_checkmem: off=%x size=%x\n",
1055             off, (int)size));
1056 
1057         /*
1058          * di_checkmem() shouldn't be called with a size of zero.
1059          * But in case it is, we want to make sure we return a valid
1060          * offset within the memlist and not an offset that points us
1061          * at the end of the memlist.
1062          */
1063         if (size == 0) {
1064                 dcmn_err((CE_WARN, "di_checkmem: invalid zero size used"));
1065                 size = 1;
1066         }
1067 
1068         off = DI_ALIGN(off);
1069         if ((st->mem_size - off) < size) {
1070                 off = st->mem_size;
1071                 di_allocmem(st, size);
1072         }
1073 
1074         /* verify that return value is aligned */
1075         ASSERT(off == DI_ALIGN(off));
1076         return (off);
1077 }
1078 
1079 /*
1080  * Copy the private data format from ioctl arg.
1081  * On success, the ending offset is returned. On error 0 is returned.
1082  */
1083 static di_off_t
1084 di_copyformat(di_off_t off, struct di_state *st, intptr_t arg, int mode)
1085 {
1086         di_off_t                size;
1087         struct di_priv_data     *priv;
1088         struct di_all           *all = DI_ALL_PTR(st);
1089 
1090         dcmn_err2((CE_CONT, "di_copyformat: off=%x, arg=%p mode=%x\n",
1091             off, (void *)arg, mode));
1092 
1093         /*
1094          * Copyin data and check version.
1095          * We only handle private data version 0.
1096          */
1097         priv = kmem_alloc(sizeof (struct di_priv_data), KM_SLEEP);
1098         if ((ddi_copyin((void *)arg, priv, sizeof (struct di_priv_data),
1099             mode) != 0) || (priv->version != DI_PRIVDATA_VERSION_0)) {
1100                 kmem_free(priv, sizeof (struct di_priv_data));
1101                 return (0);
1102         }
1103 
1104         /*
1105          * Save di_priv_data copied from userland in snapshot.
1106          */
1107         all->pd_version = priv->version;
1108         all->n_ppdata = priv->n_parent;
1109         all->n_dpdata = priv->n_driver;
1110 
1111         /*
1112          * copyin private data format, modify offset accordingly
1113          */
1114         if (all->n_ppdata) { /* parent private data format */
1115                 /*
1116                  * check memory
1117                  */
1118                 size = all->n_ppdata * sizeof (struct di_priv_format);
1119                 all->ppdata_format = off = di_checkmem(st, off, size);
1120                 if (ddi_copyin(priv->parent, di_mem_addr(st, off), size,
1121                     mode) != 0) {
1122                         kmem_free(priv, sizeof (struct di_priv_data));
1123                         return (0);
1124                 }
1125 
1126                 off += size;
1127         }
1128 
1129         if (all->n_dpdata) { /* driver private data format */
1130                 /*
1131                  * check memory
1132                  */
1133                 size = all->n_dpdata * sizeof (struct di_priv_format);
1134                 all->dpdata_format = off = di_checkmem(st, off, size);
1135                 if (ddi_copyin(priv->driver, di_mem_addr(st, off), size,
1136                     mode) != 0) {
1137                         kmem_free(priv, sizeof (struct di_priv_data));
1138                         return (0);
1139                 }
1140 
1141                 off += size;
1142         }
1143 
1144         kmem_free(priv, sizeof (struct di_priv_data));
1145         return (off);
1146 }
1147 
1148 /*
1149  * Return the real address based on the offset (off) within snapshot
1150  */
1151 static void *
1152 di_mem_addr(struct di_state *st, di_off_t off)
1153 {
1154         struct di_mem   *dcp = st->memlist;
1155 
1156         dcmn_err3((CE_CONT, "di_mem_addr: dcp=%p off=%x\n",
1157             (void *)dcp, off));
1158 
1159         ASSERT(off < st->mem_size);
1160 
1161         while (off >= dcp->buf_size) {
1162                 off -= dcp->buf_size;
1163                 dcp = dcp->next;
1164         }
1165 
1166         dcmn_err3((CE_CONT, "di_mem_addr: new off=%x, return = %p\n",
1167             off, (void *)(dcp->buf + off)));
1168 
1169         return (dcp->buf + off);
1170 }
1171 
1172 /*
1173  * Ideally we would use the whole key to derive the hash
1174  * value. However, the probability that two keys will
1175  * have the same dip (or pip) is very low, so
1176  * hashing by dip (or pip) pointer should suffice.
1177  */
1178 static uint_t
1179 di_hash_byptr(void *arg, mod_hash_key_t key)
1180 {
1181         struct di_key   *dik = key;
1182         size_t          rshift;
1183         void            *ptr;
1184 
1185         ASSERT(arg == NULL);
1186 
1187         switch (dik->k_type) {
1188         case DI_DKEY:
1189                 ptr = dik->k_u.dkey.dk_dip;
1190                 rshift = highbit(sizeof (struct dev_info));
1191                 break;
1192         case DI_PKEY:
1193                 ptr = dik->k_u.pkey.pk_pip;
1194                 rshift = highbit(sizeof (struct mdi_pathinfo));
1195                 break;
1196         default:
1197                 panic("devinfo: unknown key type");
1198                 /*NOTREACHED*/
1199         }
1200         return (mod_hash_byptr((void *)rshift, ptr));
1201 }
1202 
1203 static void
1204 di_key_dtor(mod_hash_key_t key)
1205 {
1206         char            *path_addr;
1207         struct di_key   *dik = key;
1208 
1209         switch (dik->k_type) {
1210         case DI_DKEY:
1211                 break;
1212         case DI_PKEY:
1213                 path_addr = dik->k_u.pkey.pk_path_addr;
1214                 if (path_addr)
1215                         kmem_free(path_addr, strlen(path_addr) + 1);
1216                 break;
1217         default:
1218                 panic("devinfo: unknown key type");
1219                 /*NOTREACHED*/
1220         }
1221 
1222         kmem_free(dik, sizeof (struct di_key));
1223 }
1224 
1225 static int
1226 di_dkey_cmp(struct di_dkey *dk1, struct di_dkey *dk2)
1227 {
1228         if (dk1->dk_dip !=  dk2->dk_dip)
1229                 return (dk1->dk_dip > dk2->dk_dip ? 1 : -1);
1230 
1231         if (dk1->dk_major != DDI_MAJOR_T_NONE &&
1232             dk2->dk_major != DDI_MAJOR_T_NONE) {
1233                 if (dk1->dk_major !=  dk2->dk_major)
1234                         return (dk1->dk_major > dk2->dk_major ? 1 : -1);
1235 
1236                 if (dk1->dk_inst !=  dk2->dk_inst)
1237                         return (dk1->dk_inst > dk2->dk_inst ? 1 : -1);
1238         }
1239 
1240         if (dk1->dk_nodeid != dk2->dk_nodeid)
1241                 return (dk1->dk_nodeid > dk2->dk_nodeid ? 1 : -1);
1242 
1243         return (0);
1244 }
1245 
1246 static int
1247 di_pkey_cmp(struct di_pkey *pk1, struct di_pkey *pk2)
1248 {
1249         char    *p1, *p2;
1250         int     rv;
1251 
1252         if (pk1->pk_pip !=  pk2->pk_pip)
1253                 return (pk1->pk_pip > pk2->pk_pip ? 1 : -1);
1254 
1255         p1 = pk1->pk_path_addr;
1256         p2 = pk2->pk_path_addr;
1257 
1258         p1 = p1 ? p1 : "";
1259         p2 = p2 ? p2 : "";
1260 
1261         rv = strcmp(p1, p2);
1262         if (rv)
1263                 return (rv > 0  ? 1 : -1);
1264 
1265         if (pk1->pk_client !=  pk2->pk_client)
1266                 return (pk1->pk_client > pk2->pk_client ? 1 : -1);
1267 
1268         if (pk1->pk_phci !=  pk2->pk_phci)
1269                 return (pk1->pk_phci > pk2->pk_phci ? 1 : -1);
1270 
1271         return (0);
1272 }
1273 
1274 static int
1275 di_key_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
1276 {
1277         struct di_key   *dik1, *dik2;
1278 
1279         dik1 = key1;
1280         dik2 = key2;
1281 
1282         if (dik1->k_type != dik2->k_type) {
1283                 panic("devinfo: mismatched keys");
1284                 /*NOTREACHED*/
1285         }
1286 
1287         switch (dik1->k_type) {
1288         case DI_DKEY:
1289                 return (di_dkey_cmp(&(dik1->k_u.dkey), &(dik2->k_u.dkey)));
1290         case DI_PKEY:
1291                 return (di_pkey_cmp(&(dik1->k_u.pkey), &(dik2->k_u.pkey)));
1292         default:
1293                 panic("devinfo: unknown key type");
1294                 /*NOTREACHED*/
1295         }
1296 }
1297 
1298 static void
1299 di_copy_aliases(struct di_state *st, alias_pair_t *apair, di_off_t *offp)
1300 {
1301         di_off_t                off;
1302         struct di_all           *all = DI_ALL_PTR(st);
1303         struct di_alias         *di_alias;
1304         di_off_t                curroff;
1305         dev_info_t              *currdip;
1306         size_t                  size;
1307 
1308         currdip = NULL;
1309         if (resolve_pathname(apair->pair_alias, &currdip, NULL, NULL) != 0) {
1310                 return;
1311         }
1312 
1313         if (di_dip_find(st, currdip, &curroff) != 0) {
1314                 ndi_rele_devi(currdip);
1315                 return;
1316         }
1317         ndi_rele_devi(currdip);
1318 
1319         off = *offp;
1320         size = sizeof (struct di_alias);
1321         size += strlen(apair->pair_alias) + 1;
1322         off = di_checkmem(st, off, size);
1323         di_alias = DI_ALIAS(di_mem_addr(st, off));
1324 
1325         di_alias->self = off;
1326         di_alias->next = all->aliases;
1327         all->aliases = off;
1328         (void) strcpy(di_alias->alias, apair->pair_alias);
1329         di_alias->curroff = curroff;
1330 
1331         off += size;
1332 
1333         *offp = off;
1334 }
1335 
1336 /*
1337  * This is the main function that takes a snapshot
1338  */
1339 static di_off_t
1340 di_snapshot(struct di_state *st)
1341 {
1342         di_off_t        off;
1343         struct di_all   *all;
1344         dev_info_t      *rootnode;
1345         char            buf[80];
1346         int             plen;
1347         char            *path;
1348         vnode_t         *vp;
1349         int             i;
1350 
1351         all = DI_ALL_PTR(st);
1352         dcmn_err((CE_CONT, "Taking a snapshot of devinfo tree...\n"));
1353 
1354         /*
1355          * Translate requested root path if an alias and snap-root != "/"
1356          */
1357         if (ddi_aliases_present == B_TRUE && strcmp(all->root_path, "/") != 0) {
1358                 /* If there is no redirected alias, use root_path as is */
1359                 rootnode = ddi_alias_redirect(all->root_path);
1360                 if (rootnode) {
1361                         (void) ddi_pathname(rootnode, all->root_path);
1362                         goto got_root;
1363                 }
1364         }
1365 
1366         /*
1367          * Verify path before entrusting it to e_ddi_hold_devi_by_path because
1368          * some platforms have OBP bugs where executing the NDI_PROMNAME code
1369          * path against an invalid path results in panic.  The lookupnameat
1370          * is done relative to rootdir without a leading '/' on "devices/"
1371          * to force the lookup to occur in the global zone.
1372          */
1373         plen = strlen("devices/") + strlen(all->root_path) + 1;
1374         path = kmem_alloc(plen, KM_SLEEP);
1375         (void) snprintf(path, plen, "devices/%s", all->root_path);
1376         if (lookupnameat(path, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp, rootdir)) {
1377                 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1378                     all->root_path));
1379                 kmem_free(path, plen);
1380                 return (0);
1381         }
1382         kmem_free(path, plen);
1383         VN_RELE(vp);
1384 
1385         /*
1386          * Hold the devinfo node referred by the path.
1387          */
1388         rootnode = e_ddi_hold_devi_by_path(all->root_path, 0);
1389         if (rootnode == NULL) {
1390                 dcmn_err((CE_CONT, "Devinfo node %s not found\n",
1391                     all->root_path));
1392                 return (0);
1393         }
1394 
1395 got_root:
1396         (void) snprintf(buf, sizeof (buf),
1397             "devinfo registered dips (statep=%p)", (void *)st);
1398 
1399         st->reg_dip_hash = mod_hash_create_extended(buf, 64,
1400             di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1401             NULL, di_key_cmp, KM_SLEEP);
1402 
1403 
1404         (void) snprintf(buf, sizeof (buf),
1405             "devinfo registered pips (statep=%p)", (void *)st);
1406 
1407         st->reg_pip_hash = mod_hash_create_extended(buf, 64,
1408             di_key_dtor, mod_hash_null_valdtor, di_hash_byptr,
1409             NULL, di_key_cmp, KM_SLEEP);
1410 
1411         if (DINFOHP & st->command) {
1412                 list_create(&st->hp_list, sizeof (i_hp_t),
1413                     offsetof(i_hp_t, hp_link));
1414         }
1415 
1416         /*
1417          * copy the device tree
1418          */
1419         off = di_copytree(DEVI(rootnode), &all->top_devinfo, st);
1420 
1421         if (DINFOPATH & st->command) {
1422                 mdi_walk_vhcis(build_vhci_list, st);
1423         }
1424 
1425         if (DINFOHP & st->command) {
1426                 di_hotplug_children(st);
1427         }
1428 
1429         ddi_release_devi(rootnode);
1430 
1431         /*
1432          * copy the devnames array
1433          */
1434         all->devnames = off;
1435         off = di_copydevnm(&all->devnames, st);
1436 
1437 
1438         /* initialize the hash tables */
1439         st->lnode_count = 0;
1440         st->link_count = 0;
1441 
1442         if (DINFOLYR & st->command) {
1443                 off = di_getlink_data(off, st);
1444         }
1445 
1446         all->aliases = 0;
1447         if (ddi_aliases_present == B_FALSE)
1448                 goto done;
1449 
1450         for (i = 0; i < ddi_aliases.dali_num_pairs; i++) {
1451                 di_copy_aliases(st, &(ddi_aliases.dali_alias_pairs[i]), &off);
1452         }
1453 
1454 done:
1455         /*
1456          * Free up hash tables
1457          */
1458         mod_hash_destroy_hash(st->reg_dip_hash);
1459         mod_hash_destroy_hash(st->reg_pip_hash);
1460 
1461         /*
1462          * Record the timestamp now that we are done with snapshot.
1463          *
1464          * We compute the checksum later and then only if we cache
1465          * the snapshot, since checksumming adds some overhead.
1466          * The checksum is checked later if we read the cache file.
1467          * from disk.
1468          *
1469          * Set checksum field to 0 as CRC is calculated with that
1470          * field set to 0.
1471          */
1472         all->snapshot_time = ddi_get_time();
1473         all->cache_checksum = 0;
1474 
1475         ASSERT(all->snapshot_time != 0);
1476 
1477         return (off);
1478 }
1479 
1480 /*
1481  * Take a snapshot and clean /etc/devices files if DINFOCLEANUP is set
1482  */
1483 static di_off_t
1484 di_snapshot_and_clean(struct di_state *st)
1485 {
1486         di_off_t        off;
1487 
1488         modunload_disable();
1489         off = di_snapshot(st);
1490         if (off != 0 && (st->command & DINFOCLEANUP)) {
1491                 ASSERT(DEVICES_FILES_CLEANABLE(st));
1492                 /*
1493                  * Cleanup /etc/devices files:
1494                  * In order to accurately account for the system configuration
1495                  * in /etc/devices files, the appropriate drivers must be
1496                  * fully configured before the cleanup starts.
1497                  * So enable modunload only after the cleanup.
1498                  */
1499                 i_ddi_clean_devices_files();
1500                 /*
1501                  * Remove backing store nodes for unused devices,
1502                  * which retain past permissions customizations
1503                  * and may be undesired for newly configured devices.
1504                  */
1505                 dev_devices_cleanup();
1506         }
1507         modunload_enable();
1508 
1509         return (off);
1510 }
1511 
1512 /*
1513  * construct vhci linkage in the snapshot.
1514  */
1515 static int
1516 build_vhci_list(dev_info_t *vh_devinfo, void *arg)
1517 {
1518         struct di_all   *all;
1519         struct di_node  *me;
1520         struct di_state *st;
1521         di_off_t        off;
1522         phci_walk_arg_t pwa;
1523 
1524         dcmn_err3((CE_CONT, "build_vhci list\n"));
1525 
1526         dcmn_err3((CE_CONT, "vhci node %s%d\n",
1527             ddi_driver_name(vh_devinfo), ddi_get_instance(vh_devinfo)));
1528 
1529         st = (struct di_state *)arg;
1530         if (di_dip_find(st, vh_devinfo, &off) != 0) {
1531                 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1532                 return (DDI_WALK_TERMINATE);
1533         }
1534 
1535         dcmn_err3((CE_CONT, "st->mem_size: %d vh_devinfo off: 0x%x\n",
1536             st->mem_size, off));
1537 
1538         all = DI_ALL_PTR(st);
1539         if (all->top_vhci_devinfo == 0) {
1540                 all->top_vhci_devinfo = off;
1541         } else {
1542                 me = DI_NODE(di_mem_addr(st, all->top_vhci_devinfo));
1543 
1544                 while (me->next_vhci != 0) {
1545                         me = DI_NODE(di_mem_addr(st, me->next_vhci));
1546                 }
1547 
1548                 me->next_vhci = off;
1549         }
1550 
1551         pwa.off = off;
1552         pwa.st = st;
1553         mdi_vhci_walk_phcis(vh_devinfo, build_phci_list, &pwa);
1554 
1555         return (DDI_WALK_CONTINUE);
1556 }
1557 
1558 /*
1559  * construct phci linkage for the given vhci in the snapshot.
1560  */
1561 static int
1562 build_phci_list(dev_info_t *ph_devinfo, void *arg)
1563 {
1564         struct di_node  *vh_di_node;
1565         struct di_node  *me;
1566         phci_walk_arg_t *pwa;
1567         di_off_t        off;
1568 
1569         pwa = (phci_walk_arg_t *)arg;
1570 
1571         dcmn_err3((CE_CONT, "build_phci list for vhci at offset: 0x%x\n",
1572             pwa->off));
1573 
1574         vh_di_node = DI_NODE(di_mem_addr(pwa->st, pwa->off));
1575         if (di_dip_find(pwa->st, ph_devinfo, &off) != 0) {
1576                 dcmn_err((CE_WARN, "di_dip_find error for the given node\n"));
1577                 return (DDI_WALK_TERMINATE);
1578         }
1579 
1580         dcmn_err3((CE_CONT, "phci node %s%d, at offset 0x%x\n",
1581             ddi_driver_name(ph_devinfo), ddi_get_instance(ph_devinfo), off));
1582 
1583         if (vh_di_node->top_phci == 0) {
1584                 vh_di_node->top_phci = off;
1585                 return (DDI_WALK_CONTINUE);
1586         }
1587 
1588         me = DI_NODE(di_mem_addr(pwa->st, vh_di_node->top_phci));
1589 
1590         while (me->next_phci != 0) {
1591                 me = DI_NODE(di_mem_addr(pwa->st, me->next_phci));
1592         }
1593         me->next_phci = off;
1594 
1595         return (DDI_WALK_CONTINUE);
1596 }
1597 
1598 /*
1599  * Assumes all devinfo nodes in device tree have been snapshotted
1600  */
1601 static void
1602 snap_driver_list(struct di_state *st, struct devnames *dnp, di_off_t *off_p)
1603 {
1604         struct dev_info *node;
1605         struct di_node  *me;
1606         di_off_t        off;
1607 
1608         ASSERT(mutex_owned(&dnp->dn_lock));
1609 
1610         node = DEVI(dnp->dn_head);
1611         for (; node; node = node->devi_next) {
1612                 if (di_dip_find(st, (dev_info_t *)node, &off) != 0)
1613                         continue;
1614 
1615                 ASSERT(off > 0);
1616                 me = DI_NODE(di_mem_addr(st, off));
1617                 ASSERT(me->next == 0 || me->next == -1);
1618                 /*
1619                  * Only nodes which were BOUND when they were
1620                  * snapshotted will be added to per-driver list.
1621                  */
1622                 if (me->next != -1)
1623                         continue;
1624 
1625                 *off_p = off;
1626                 off_p = &me->next;
1627         }
1628 
1629         *off_p = 0;
1630 }
1631 
1632 /*
1633  * Copy the devnames array, so we have a list of drivers in the snapshot.
1634  * Also makes it possible to locate the per-driver devinfo nodes.
1635  */
1636 static di_off_t
1637 di_copydevnm(di_off_t *off_p, struct di_state *st)
1638 {
1639         int             i;
1640         di_off_t        off;
1641         size_t          size;
1642         struct di_devnm *dnp;
1643 
1644         dcmn_err2((CE_CONT, "di_copydevnm: *off_p = %p\n", (void *)off_p));
1645 
1646         /*
1647          * make sure there is some allocated memory
1648          */
1649         size = devcnt * sizeof (struct di_devnm);
1650         *off_p = off = di_checkmem(st, *off_p, size);
1651         dnp = DI_DEVNM(di_mem_addr(st, off));
1652         off += size;
1653 
1654         dcmn_err((CE_CONT, "Start copying devnamesp[%d] at offset 0x%x\n",
1655             devcnt, off));
1656 
1657         for (i = 0; i < devcnt; i++) {
1658                 if (devnamesp[i].dn_name == NULL) {
1659                         continue;
1660                 }
1661 
1662                 /*
1663                  * dn_name is not freed during driver unload or removal.
1664                  *
1665                  * There is a race condition when make_devname() changes
1666                  * dn_name during our strcpy. This should be rare since
1667                  * only add_drv does this. At any rate, we never had a
1668                  * problem with ddi_name_to_major(), which should have
1669                  * the same problem.
1670                  */
1671                 dcmn_err2((CE_CONT, "di_copydevnm: %s%d, off=%x\n",
1672                     devnamesp[i].dn_name, devnamesp[i].dn_instance, off));
1673 
1674                 size = strlen(devnamesp[i].dn_name) + 1;
1675                 dnp[i].name = off = di_checkmem(st, off, size);
1676                 (void) strcpy((char *)di_mem_addr(st, off),
1677                     devnamesp[i].dn_name);
1678                 off += size;
1679 
1680                 mutex_enter(&devnamesp[i].dn_lock);
1681 
1682                 /*
1683                  * Snapshot per-driver node list
1684                  */
1685                 snap_driver_list(st, &devnamesp[i], &dnp[i].head);
1686 
1687                 /*
1688                  * This is not used by libdevinfo, leave it for now
1689                  */
1690                 dnp[i].flags = devnamesp[i].dn_flags;
1691                 dnp[i].instance = devnamesp[i].dn_instance;
1692 
1693                 /*
1694                  * get global properties
1695                  */
1696                 if ((DINFOPROP & st->command) &&
1697                     devnamesp[i].dn_global_prop_ptr) {
1698                         dnp[i].global_prop = off;
1699                         off = di_getprop(DI_PROP_GLB_LIST,
1700                             &devnamesp[i].dn_global_prop_ptr->prop_list,
1701                             &dnp[i].global_prop, st, NULL);
1702                 }
1703 
1704                 /*
1705                  * Bit encode driver ops: & bus_ops, cb_ops, & cb_ops->cb_str
1706                  */
1707                 if (CB_DRV_INSTALLED(devopsp[i])) {
1708                         if (devopsp[i]->devo_cb_ops) {
1709                                 dnp[i].ops |= DI_CB_OPS;
1710                                 if (devopsp[i]->devo_cb_ops->cb_str)
1711                                         dnp[i].ops |= DI_STREAM_OPS;
1712                         }
1713                         if (NEXUS_DRV(devopsp[i])) {
1714                                 dnp[i].ops |= DI_BUS_OPS;
1715                         }
1716                 }
1717 
1718                 mutex_exit(&devnamesp[i].dn_lock);
1719         }
1720 
1721         dcmn_err((CE_CONT, "End copying devnamesp at offset 0x%x\n", off));
1722 
1723         return (off);
1724 }
1725 
1726 /*
1727  * Copy the kernel devinfo tree. The tree and the devnames array forms
1728  * the entire snapshot (see also di_copydevnm).
1729  */
1730 static di_off_t
1731 di_copytree(struct dev_info *root, di_off_t *off_p, struct di_state *st)
1732 {
1733         di_off_t        off;
1734         struct dev_info *node;
1735         struct di_stack *dsp = kmem_zalloc(sizeof (struct di_stack), KM_SLEEP);
1736 
1737         dcmn_err((CE_CONT, "di_copytree: root = %p, *off_p = %x\n",
1738             (void *)root, *off_p));
1739 
1740         /* force attach drivers */
1741         if (i_ddi_devi_attached((dev_info_t *)root) &&
1742             (st->command & DINFOSUBTREE) && (st->command & DINFOFORCE)) {
1743                 (void) ndi_devi_config((dev_info_t *)root,
1744                     NDI_CONFIG | NDI_DEVI_PERSIST | NDI_NO_EVENT |
1745                     NDI_DRV_CONF_REPROBE);
1746         }
1747 
1748         /*
1749          * Push top_devinfo onto a stack
1750          *
1751          * The stack is necessary to avoid recursion, which can overrun
1752          * the kernel stack.
1753          */
1754         PUSH_STACK(dsp, root, off_p);
1755 
1756         /*
1757          * As long as there is a node on the stack, copy the node.
1758          * di_copynode() is responsible for pushing and popping
1759          * child and sibling nodes on the stack.
1760          */
1761         while (!EMPTY_STACK(dsp)) {
1762                 node = TOP_NODE(dsp);
1763                 off = di_copynode(node, dsp, st);
1764         }
1765 
1766         /*
1767          * Free the stack structure
1768          */
1769         kmem_free(dsp, sizeof (struct di_stack));
1770 
1771         return (off);
1772 }
1773 
1774 /*
1775  * This is the core function, which copies all data associated with a single
1776  * node into the snapshot. The amount of information is determined by the
1777  * ioctl command.
1778  */
1779 static di_off_t
1780 di_copynode(struct dev_info *node, struct di_stack *dsp, struct di_state *st)
1781 {
1782         di_off_t        off;
1783         struct di_node  *me;
1784         size_t          size;
1785         struct dev_info *n;
1786 
1787         dcmn_err2((CE_CONT, "di_copynode: depth = %x\n", dsp->depth));
1788         ASSERT((node != NULL) && (node == TOP_NODE(dsp)));
1789 
1790         /*
1791          * check memory usage, and fix offsets accordingly.
1792          */
1793         size = sizeof (struct di_node);
1794         *(TOP_OFFSET(dsp)) = off = di_checkmem(st, *(TOP_OFFSET(dsp)), size);
1795         me = DI_NODE(di_mem_addr(st, off));
1796         me->self = off;
1797         off += size;
1798 
1799         dcmn_err((CE_CONT, "copy node %s, instance #%d, at offset 0x%x\n",
1800             node->devi_node_name, node->devi_instance, off));
1801 
1802         /*
1803          * Node parameters:
1804          * self         -- offset of current node within snapshot
1805          * nodeid       -- pointer to PROM node (tri-valued)
1806          * state        -- hot plugging device state
1807          * node_state   -- devinfo node state
1808          */
1809         me->instance = node->devi_instance;
1810         me->nodeid = node->devi_nodeid;
1811         me->node_class = node->devi_node_class;
1812         me->attributes = node->devi_node_attributes;
1813         me->state = node->devi_state;
1814         me->flags = node->devi_flags;
1815         me->node_state = node->devi_node_state;
1816         me->next_vhci = 0;           /* Filled up by build_vhci_list. */
1817         me->top_phci = 0;            /* Filled up by build_phci_list. */
1818         me->next_phci = 0;           /* Filled up by build_phci_list. */
1819         me->multipath_component = MULTIPATH_COMPONENT_NONE; /* set default. */
1820         me->user_private_data = NULL;
1821 
1822         /*
1823          * Get parent's offset in snapshot from the stack
1824          * and store it in the current node
1825          */
1826         if (dsp->depth > 1) {
1827                 me->parent = *(PARENT_OFFSET(dsp));
1828         }
1829 
1830         /*
1831          * Save the offset of this di_node in a hash table.
1832          * This is used later to resolve references to this
1833          * dip from other parts of the tree (per-driver list,
1834          * multipathing linkages, layered usage linkages).
1835          * The key used for the hash table is derived from
1836          * information in the dip.
1837          */
1838         di_register_dip(st, (dev_info_t *)node, me->self);
1839 
1840 #ifdef  DEVID_COMPATIBILITY
1841         /* check for devid as property marker */
1842         if (node->devi_devid_str) {
1843                 ddi_devid_t     devid;
1844 
1845                 /*
1846                  * The devid is now represented as a property. For
1847                  * compatibility with di_devid() interface in libdevinfo we
1848                  * must return it as a binary structure in the snapshot. When
1849                  * (if) di_devid() is removed from libdevinfo then the code
1850                  * related to DEVID_COMPATIBILITY can be removed.
1851                  */
1852                 if (ddi_devid_str_decode(node->devi_devid_str, &devid, NULL) ==
1853                     DDI_SUCCESS) {
1854                         size = ddi_devid_sizeof(devid);
1855                         off = di_checkmem(st, off, size);
1856                         me->devid = off;
1857                         bcopy(devid, di_mem_addr(st, off), size);
1858                         off += size;
1859                         ddi_devid_free(devid);
1860                 }
1861         }
1862 #endif  /* DEVID_COMPATIBILITY */
1863 
1864         if (node->devi_node_name) {
1865                 size = strlen(node->devi_node_name) + 1;
1866                 me->node_name = off = di_checkmem(st, off, size);
1867                 (void) strcpy(di_mem_addr(st, off), node->devi_node_name);
1868                 off += size;
1869         }
1870 
1871         if (node->devi_compat_names && (node->devi_compat_length > 1)) {
1872                 size = node->devi_compat_length;
1873                 me->compat_names = off = di_checkmem(st, off, size);
1874                 me->compat_length = (int)size;
1875                 bcopy(node->devi_compat_names, di_mem_addr(st, off), size);
1876                 off += size;
1877         }
1878 
1879         if (node->devi_addr) {
1880                 size = strlen(node->devi_addr) + 1;
1881                 me->address = off = di_checkmem(st, off, size);
1882                 (void) strcpy(di_mem_addr(st, off), node->devi_addr);
1883                 off += size;
1884         }
1885 
1886         if (node->devi_binding_name) {
1887                 size = strlen(node->devi_binding_name) + 1;
1888                 me->bind_name = off = di_checkmem(st, off, size);
1889                 (void) strcpy(di_mem_addr(st, off), node->devi_binding_name);
1890                 off += size;
1891         }
1892 
1893         me->drv_major = node->devi_major;
1894 
1895         /*
1896          * If the dip is BOUND, set the next pointer of the
1897          * per-instance list to -1, indicating that it is yet to be resolved.
1898          * This will be resolved later in snap_driver_list().
1899          */
1900         if (me->drv_major != -1) {
1901                 me->next = -1;
1902         } else {
1903                 me->next = 0;
1904         }
1905 
1906         /*
1907          * An optimization to skip mutex_enter when not needed.
1908          */
1909         if (!((DINFOMINOR | DINFOPROP | DINFOPATH | DINFOHP) & st->command)) {
1910                 goto priv_data;
1911         }
1912 
1913         /*
1914          * LOCKING: We already have an active ndi_devi_enter to gather the
1915          * minor data, and we will take devi_lock to gather properties as
1916          * needed off di_getprop.
1917          */
1918         if (!(DINFOMINOR & st->command)) {
1919                 goto path;
1920         }
1921 
1922         ASSERT(DEVI_BUSY_OWNED(node));
1923         if (node->devi_minor) {              /* minor data */
1924                 me->minor_data = off;
1925                 off = di_getmdata(node->devi_minor, &me->minor_data,
1926                     me->self, st);
1927         }
1928 
1929 path:
1930         if (!(DINFOPATH & st->command)) {
1931                 goto property;
1932         }
1933 
1934         if (MDI_VHCI(node)) {
1935                 me->multipath_component = MULTIPATH_COMPONENT_VHCI;
1936         }
1937 
1938         if (MDI_CLIENT(node)) {
1939                 me->multipath_component = MULTIPATH_COMPONENT_CLIENT;
1940                 me->multipath_client = off;
1941                 off = di_getpath_data((dev_info_t *)node, &me->multipath_client,
1942                     me->self, st, 1);
1943                 dcmn_err((CE_WARN, "me->multipath_client = %x for node %p "
1944                     "component type = %d.  off=%d",
1945                     me->multipath_client,
1946                     (void *)node, node->devi_mdi_component, off));
1947         }
1948 
1949         if (MDI_PHCI(node)) {
1950                 me->multipath_component = MULTIPATH_COMPONENT_PHCI;
1951                 me->multipath_phci = off;
1952                 off = di_getpath_data((dev_info_t *)node, &me->multipath_phci,
1953                     me->self, st, 0);
1954                 dcmn_err((CE_WARN, "me->multipath_phci = %x for node %p "
1955                     "component type = %d.  off=%d",
1956                     me->multipath_phci,
1957                     (void *)node, node->devi_mdi_component, off));
1958         }
1959 
1960 property:
1961         if (!(DINFOPROP & st->command)) {
1962                 goto hotplug_data;
1963         }
1964 
1965         if (node->devi_drv_prop_ptr) {       /* driver property list */
1966                 me->drv_prop = off;
1967                 off = di_getprop(DI_PROP_DRV_LIST, &node->devi_drv_prop_ptr,
1968                     &me->drv_prop, st, node);
1969         }
1970 
1971         if (node->devi_sys_prop_ptr) {       /* system property list */
1972                 me->sys_prop = off;
1973                 off = di_getprop(DI_PROP_SYS_LIST, &node->devi_sys_prop_ptr,
1974                     &me->sys_prop, st, node);
1975         }
1976 
1977         if (node->devi_hw_prop_ptr) {        /* hardware property list */
1978                 me->hw_prop = off;
1979                 off = di_getprop(DI_PROP_HW_LIST, &node->devi_hw_prop_ptr,
1980                     &me->hw_prop, st, node);
1981         }
1982 
1983         if (node->devi_global_prop_list == NULL) {
1984                 me->glob_prop = (di_off_t)-1;        /* not global property */
1985         } else {
1986                 /*
1987                  * Make copy of global property list if this devinfo refers
1988                  * global properties different from what's on the devnames
1989                  * array. It can happen if there has been a forced
1990                  * driver.conf update. See mod_drv(1M).
1991                  */
1992                 ASSERT(me->drv_major != -1);
1993                 if (node->devi_global_prop_list !=
1994                     devnamesp[me->drv_major].dn_global_prop_ptr) {
1995                         me->glob_prop = off;
1996                         off = di_getprop(DI_PROP_GLB_LIST,
1997                             &node->devi_global_prop_list->prop_list,
1998                             &me->glob_prop, st, node);
1999                 }
2000         }
2001 
2002 hotplug_data:
2003         if (!(DINFOHP & st->command)) {
2004                 goto priv_data;
2005         }
2006 
2007         if (node->devi_hp_hdlp) {    /* hotplug data */
2008                 me->hp_data = off;
2009                 off = di_gethpdata(node->devi_hp_hdlp, &me->hp_data, st);
2010         }
2011 
2012 priv_data:
2013         if (!(DINFOPRIVDATA & st->command)) {
2014                 goto pm_info;
2015         }
2016 
2017         if (ddi_get_parent_data((dev_info_t *)node) != NULL) {
2018                 me->parent_data = off;
2019                 off = di_getppdata(node, &me->parent_data, st);
2020         }
2021 
2022         if (ddi_get_driver_private((dev_info_t *)node) != NULL) {
2023                 me->driver_data = off;
2024                 off = di_getdpdata(node, &me->driver_data, st);
2025         }
2026 
2027 pm_info: /* NOT implemented */
2028 
2029 subtree:
2030         /* keep the stack aligned */
2031         off = DI_ALIGN(off);
2032 
2033         if (!(DINFOSUBTREE & st->command)) {
2034                 POP_STACK(dsp);
2035                 return (off);
2036         }
2037 
2038 child:
2039         /*
2040          * If there is a visible child--push child onto stack.
2041          * Hold the parent (me) busy while doing so.
2042          */
2043         if ((n = node->devi_child) != NULL) {
2044                 /* skip hidden nodes */
2045                 while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2046                         n = n->devi_sibling;
2047                 if (n) {
2048                         me->child = off;
2049                         PUSH_STACK(dsp, n, &me->child);
2050                         return (me->child);
2051                 }
2052         }
2053 
2054 sibling:
2055         /*
2056          * Done with any child nodes, unroll the stack till a visible
2057          * sibling of a parent node is found or root node is reached.
2058          */
2059         POP_STACK(dsp);
2060         while (!EMPTY_STACK(dsp)) {
2061                 if ((n = node->devi_sibling) != NULL) {
2062                         /* skip hidden nodes */
2063                         while (n && ndi_dev_is_hidden_node((dev_info_t *)n))
2064                                 n = n->devi_sibling;
2065                         if (n) {
2066                                 me->sibling = DI_ALIGN(off);
2067                                 PUSH_STACK(dsp, n, &me->sibling);
2068                                 return (me->sibling);
2069                         }
2070                 }
2071                 node = TOP_NODE(dsp);
2072                 me = DI_NODE(di_mem_addr(st, *(TOP_OFFSET(dsp))));
2073                 POP_STACK(dsp);
2074         }
2075 
2076         /*
2077          * DONE with all nodes
2078          */
2079         return (off);
2080 }
2081 
2082 static i_lnode_t *
2083 i_lnode_alloc(int modid)
2084 {
2085         i_lnode_t       *i_lnode;
2086 
2087         i_lnode = kmem_zalloc(sizeof (i_lnode_t), KM_SLEEP);
2088 
2089         ASSERT(modid != -1);
2090         i_lnode->modid = modid;
2091 
2092         return (i_lnode);
2093 }
2094 
2095 static void
2096 i_lnode_free(i_lnode_t *i_lnode)
2097 {
2098         kmem_free(i_lnode, sizeof (i_lnode_t));
2099 }
2100 
2101 static void
2102 i_lnode_check_free(i_lnode_t *i_lnode)
2103 {
2104         /* This lnode and its dip must have been snapshotted */
2105         ASSERT(i_lnode->self > 0);
2106         ASSERT(i_lnode->di_node->self > 0);
2107 
2108         /* at least 1 link (in or out) must exist for this lnode */
2109         ASSERT(i_lnode->link_in || i_lnode->link_out);
2110 
2111         i_lnode_free(i_lnode);
2112 }
2113 
2114 static i_link_t *
2115 i_link_alloc(int spec_type)
2116 {
2117         i_link_t        *i_link;
2118 
2119         i_link = kmem_zalloc(sizeof (i_link_t), KM_SLEEP);
2120         i_link->spec_type = spec_type;
2121 
2122         return (i_link);
2123 }
2124 
2125 static void
2126 i_link_check_free(i_link_t *i_link)
2127 {
2128         /* This link must have been snapshotted */
2129         ASSERT(i_link->self > 0);
2130 
2131         /* Both endpoint lnodes must exist for this link */
2132         ASSERT(i_link->src_lnode);
2133         ASSERT(i_link->tgt_lnode);
2134 
2135         kmem_free(i_link, sizeof (i_link_t));
2136 }
2137 
2138 /*ARGSUSED*/
2139 static uint_t
2140 i_lnode_hashfunc(void *arg, mod_hash_key_t key)
2141 {
2142         i_lnode_t       *i_lnode = (i_lnode_t *)key;
2143         struct di_node  *ptr;
2144         dev_t           dev;
2145 
2146         dev = i_lnode->devt;
2147         if (dev != DDI_DEV_T_NONE)
2148                 return (i_lnode->modid + getminor(dev) + getmajor(dev));
2149 
2150         ptr = i_lnode->di_node;
2151         ASSERT(ptr->self > 0);
2152         if (ptr) {
2153                 uintptr_t k = (uintptr_t)ptr;
2154                 k >>= (int)highbit(sizeof (struct di_node));
2155                 return ((uint_t)k);
2156         }
2157 
2158         return (i_lnode->modid);
2159 }
2160 
2161 static int
2162 i_lnode_cmp(void *arg1, void *arg2)
2163 {
2164         i_lnode_t       *i_lnode1 = (i_lnode_t *)arg1;
2165         i_lnode_t       *i_lnode2 = (i_lnode_t *)arg2;
2166 
2167         if (i_lnode1->modid != i_lnode2->modid) {
2168                 return ((i_lnode1->modid < i_lnode2->modid) ? -1 : 1);
2169         }
2170 
2171         if (i_lnode1->di_node != i_lnode2->di_node)
2172                 return ((i_lnode1->di_node < i_lnode2->di_node) ? -1 : 1);
2173 
2174         if (i_lnode1->devt != i_lnode2->devt)
2175                 return ((i_lnode1->devt < i_lnode2->devt) ? -1 : 1);
2176 
2177         return (0);
2178 }
2179 
2180 /*
2181  * An lnode represents a {dip, dev_t} tuple. A link represents a
2182  * {src_lnode, tgt_lnode, spec_type} tuple.
2183  * The following callback assumes that LDI framework ref-counts the
2184  * src_dip and tgt_dip while invoking this callback.
2185  */
2186 static int
2187 di_ldi_callback(const ldi_usage_t *ldi_usage, void *arg)
2188 {
2189         struct di_state *st = (struct di_state *)arg;
2190         i_lnode_t       *src_lnode, *tgt_lnode, *i_lnode;
2191         i_link_t        **i_link_next, *i_link;
2192         di_off_t        soff, toff;
2193         mod_hash_val_t  nodep = NULL;
2194         int             res;
2195 
2196         /*
2197          * if the source or target of this device usage information doesn't
2198          * correspond to a device node then we don't report it via
2199          * libdevinfo so return.
2200          */
2201         if ((ldi_usage->src_dip == NULL) || (ldi_usage->tgt_dip == NULL))
2202                 return (LDI_USAGE_CONTINUE);
2203 
2204         ASSERT(e_ddi_devi_holdcnt(ldi_usage->src_dip));
2205         ASSERT(e_ddi_devi_holdcnt(ldi_usage->tgt_dip));
2206 
2207         /*
2208          * Skip the ldi_usage if either src or tgt dip is not in the
2209          * snapshot. This saves us from pruning bad lnodes/links later.
2210          */
2211         if (di_dip_find(st, ldi_usage->src_dip, &soff) != 0)
2212                 return (LDI_USAGE_CONTINUE);
2213         if (di_dip_find(st, ldi_usage->tgt_dip, &toff) != 0)
2214                 return (LDI_USAGE_CONTINUE);
2215 
2216         ASSERT(soff > 0);
2217         ASSERT(toff > 0);
2218 
2219         /*
2220          * allocate an i_lnode and add it to the lnode hash
2221          * if it is not already present. For this particular
2222          * link the lnode is a source, but it may
2223          * participate as tgt or src in any number of layered
2224          * operations - so it may already be in the hash.
2225          */
2226         i_lnode = i_lnode_alloc(ldi_usage->src_modid);
2227         i_lnode->di_node = DI_NODE(di_mem_addr(st, soff));
2228         i_lnode->devt = ldi_usage->src_devt;
2229 
2230         res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2231         if (res == MH_ERR_NOTFOUND) {
2232                 /*
2233                  * new i_lnode
2234                  * add it to the hash and increment the lnode count
2235                  */
2236                 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2237                 ASSERT(res == 0);
2238                 st->lnode_count++;
2239                 src_lnode = i_lnode;
2240         } else {
2241                 /* this i_lnode already exists in the lnode_hash */
2242                 i_lnode_free(i_lnode);
2243                 src_lnode = (i_lnode_t *)nodep;
2244         }
2245 
2246         /*
2247          * allocate a tgt i_lnode and add it to the lnode hash
2248          */
2249         i_lnode = i_lnode_alloc(ldi_usage->tgt_modid);
2250         i_lnode->di_node = DI_NODE(di_mem_addr(st, toff));
2251         i_lnode->devt = ldi_usage->tgt_devt;
2252 
2253         res = mod_hash_find(st->lnode_hash, i_lnode, &nodep);
2254         if (res == MH_ERR_NOTFOUND) {
2255                 /*
2256                  * new i_lnode
2257                  * add it to the hash and increment the lnode count
2258                  */
2259                 res = mod_hash_insert(st->lnode_hash, i_lnode, i_lnode);
2260                 ASSERT(res == 0);
2261                 st->lnode_count++;
2262                 tgt_lnode = i_lnode;
2263         } else {
2264                 /* this i_lnode already exists in the lnode_hash */
2265                 i_lnode_free(i_lnode);
2266                 tgt_lnode = (i_lnode_t *)nodep;
2267         }
2268 
2269         /*
2270          * allocate a i_link
2271          */
2272         i_link = i_link_alloc(ldi_usage->tgt_spec_type);
2273         i_link->src_lnode = src_lnode;
2274         i_link->tgt_lnode = tgt_lnode;
2275 
2276         /*
2277          * add this link onto the src i_lnodes outbound i_link list
2278          */
2279         i_link_next = &(src_lnode->link_out);
2280         while (*i_link_next != NULL) {
2281                 if ((i_lnode_cmp(tgt_lnode, (*i_link_next)->tgt_lnode) == 0) &&
2282                     (i_link->spec_type == (*i_link_next)->spec_type)) {
2283                         /* this link already exists */
2284                         kmem_free(i_link, sizeof (i_link_t));
2285                         return (LDI_USAGE_CONTINUE);
2286                 }
2287                 i_link_next = &((*i_link_next)->src_link_next);
2288         }
2289         *i_link_next = i_link;
2290 
2291         /*
2292          * add this link onto the tgt i_lnodes inbound i_link list
2293          */
2294         i_link_next = &(tgt_lnode->link_in);
2295         while (*i_link_next != NULL) {
2296                 ASSERT(i_lnode_cmp(src_lnode, (*i_link_next)->src_lnode) != 0);
2297                 i_link_next = &((*i_link_next)->tgt_link_next);
2298         }
2299         *i_link_next = i_link;
2300 
2301         /*
2302          * add this i_link to the link hash
2303          */
2304         res = mod_hash_insert(st->link_hash, i_link, i_link);
2305         ASSERT(res == 0);
2306         st->link_count++;
2307 
2308         return (LDI_USAGE_CONTINUE);
2309 }
2310 
2311 struct i_layer_data {
2312         struct di_state *st;
2313         int             lnode_count;
2314         int             link_count;
2315         di_off_t        lnode_off;
2316         di_off_t        link_off;
2317 };
2318 
2319 /*ARGSUSED*/
2320 static uint_t
2321 i_link_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2322 {
2323         i_link_t                *i_link  = (i_link_t *)key;
2324         struct i_layer_data     *data = arg;
2325         struct di_link          *me;
2326         struct di_lnode         *melnode;
2327         struct di_node          *medinode;
2328 
2329         ASSERT(i_link->self == 0);
2330 
2331         i_link->self = data->link_off +
2332             (data->link_count * sizeof (struct di_link));
2333         data->link_count++;
2334 
2335         ASSERT(data->link_off > 0 && data->link_count > 0);
2336         ASSERT(data->lnode_count == data->st->lnode_count); /* lnodes done */
2337         ASSERT(data->link_count <= data->st->link_count);
2338 
2339         /* fill in fields for the di_link snapshot */
2340         me = DI_LINK(di_mem_addr(data->st, i_link->self));
2341         me->self = i_link->self;
2342         me->spec_type = i_link->spec_type;
2343 
2344         /*
2345          * The src_lnode and tgt_lnode i_lnode_t for this i_link_t
2346          * are created during the LDI table walk. Since we are
2347          * walking the link hash, the lnode hash has already been
2348          * walked and the lnodes have been snapshotted. Save lnode
2349          * offsets.
2350          */
2351         me->src_lnode = i_link->src_lnode->self;
2352         me->tgt_lnode = i_link->tgt_lnode->self;
2353 
2354         /*
2355          * Save this link's offset in the src_lnode snapshot's link_out
2356          * field
2357          */
2358         melnode = DI_LNODE(di_mem_addr(data->st, me->src_lnode));
2359         me->src_link_next = melnode->link_out;
2360         melnode->link_out = me->self;
2361 
2362         /*
2363          * Put this link on the tgt_lnode's link_in field
2364          */
2365         melnode = DI_LNODE(di_mem_addr(data->st, me->tgt_lnode));
2366         me->tgt_link_next = melnode->link_in;
2367         melnode->link_in = me->self;
2368 
2369         /*
2370          * An i_lnode_t is only created if the corresponding dip exists
2371          * in the snapshot. A pointer to the di_node is saved in the
2372          * i_lnode_t when it is allocated. For this link, get the di_node
2373          * for the source lnode. Then put the link on the di_node's list
2374          * of src links
2375          */
2376         medinode = i_link->src_lnode->di_node;
2377         me->src_node_next = medinode->src_links;
2378         medinode->src_links = me->self;
2379 
2380         /*
2381          * Put this link on the tgt_links list of the target
2382          * dip.
2383          */
2384         medinode = i_link->tgt_lnode->di_node;
2385         me->tgt_node_next = medinode->tgt_links;
2386         medinode->tgt_links = me->self;
2387 
2388         return (MH_WALK_CONTINUE);
2389 }
2390 
2391 /*ARGSUSED*/
2392 static uint_t
2393 i_lnode_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
2394 {
2395         i_lnode_t               *i_lnode = (i_lnode_t *)key;
2396         struct i_layer_data     *data = arg;
2397         struct di_lnode         *me;
2398         struct di_node          *medinode;
2399 
2400         ASSERT(i_lnode->self == 0);
2401 
2402         i_lnode->self = data->lnode_off +
2403             (data->lnode_count * sizeof (struct di_lnode));
2404         data->lnode_count++;
2405 
2406         ASSERT(data->lnode_off > 0 && data->lnode_count > 0);
2407         ASSERT(data->link_count == 0); /* links not done yet */
2408         ASSERT(data->lnode_count <= data->st->lnode_count);
2409 
2410         /* fill in fields for the di_lnode snapshot */
2411         me = DI_LNODE(di_mem_addr(data->st, i_lnode->self));
2412         me->self = i_lnode->self;
2413 
2414         if (i_lnode->devt == DDI_DEV_T_NONE) {
2415                 me->dev_major = DDI_MAJOR_T_NONE;
2416                 me->dev_minor = DDI_MAJOR_T_NONE;
2417         } else {
2418                 me->dev_major = getmajor(i_lnode->devt);
2419                 me->dev_minor = getminor(i_lnode->devt);
2420         }
2421 
2422         /*
2423          * The dip corresponding to this lnode must exist in
2424          * the snapshot or we wouldn't have created the i_lnode_t
2425          * during LDI walk. Save the offset of the dip.
2426          */
2427         ASSERT(i_lnode->di_node && i_lnode->di_node->self > 0);
2428         me->node = i_lnode->di_node->self;
2429 
2430         /*
2431          * There must be at least one link in or out of this lnode
2432          * or we wouldn't have created it. These fields will be set
2433          * during the link hash walk.
2434          */
2435         ASSERT((i_lnode->link_in != NULL) || (i_lnode->link_out != NULL));
2436 
2437         /*
2438          * set the offset of the devinfo node associated with this
2439          * lnode. Also update the node_next next pointer.  this pointer
2440          * is set if there are multiple lnodes associated with the same
2441          * devinfo node.  (could occure when multiple minor nodes
2442          * are open for one device, etc.)
2443          */
2444         medinode = i_lnode->di_node;
2445         me->node_next = medinode->lnodes;
2446         medinode->lnodes = me->self;
2447 
2448         return (MH_WALK_CONTINUE);
2449 }
2450 
2451 static di_off_t
2452 di_getlink_data(di_off_t off, struct di_state *st)
2453 {
2454         struct i_layer_data     data = {0};
2455         size_t                  size;
2456 
2457         dcmn_err2((CE_CONT, "di_copylyr: off = %x\n", off));
2458 
2459         st->lnode_hash = mod_hash_create_extended("di_lnode_hash", 32,
2460             mod_hash_null_keydtor, (void (*)(mod_hash_val_t))i_lnode_check_free,
2461             i_lnode_hashfunc, NULL, i_lnode_cmp, KM_SLEEP);
2462 
2463         st->link_hash = mod_hash_create_ptrhash("di_link_hash", 32,
2464             (void (*)(mod_hash_val_t))i_link_check_free, sizeof (i_link_t));
2465 
2466         /* get driver layering information */
2467         (void) ldi_usage_walker(st, di_ldi_callback);
2468 
2469         /* check if there is any link data to include in the snapshot */
2470         if (st->lnode_count == 0) {
2471                 ASSERT(st->link_count == 0);
2472                 goto out;
2473         }
2474 
2475         ASSERT(st->link_count != 0);
2476 
2477         /* get a pointer to snapshot memory for all the di_lnodes */
2478         size = sizeof (struct di_lnode) * st->lnode_count;
2479         data.lnode_off = off = di_checkmem(st, off, size);
2480         off += size;
2481 
2482         /* get a pointer to snapshot memory for all the di_links */
2483         size = sizeof (struct di_link) * st->link_count;
2484         data.link_off = off = di_checkmem(st, off, size);
2485         off += size;
2486 
2487         data.lnode_count = data.link_count = 0;
2488         data.st = st;
2489 
2490         /*
2491          * We have lnodes and links that will go into the
2492          * snapshot, so let's walk the respective hashes
2493          * and snapshot them. The various linkages are
2494          * also set up during the walk.
2495          */
2496         mod_hash_walk(st->lnode_hash, i_lnode_walker, (void *)&data);
2497         ASSERT(data.lnode_count == st->lnode_count);
2498 
2499         mod_hash_walk(st->link_hash, i_link_walker, (void *)&data);
2500         ASSERT(data.link_count == st->link_count);
2501 
2502 out:
2503         /* free up the i_lnodes and i_links used to create the snapshot */
2504         mod_hash_destroy_hash(st->lnode_hash);
2505         mod_hash_destroy_hash(st->link_hash);
2506         st->lnode_count = 0;
2507         st->link_count = 0;
2508 
2509         return (off);
2510 }
2511 
2512 
2513 /*
2514  * Copy all minor data nodes attached to a devinfo node into the snapshot.
2515  * It is called from di_copynode with active ndi_devi_enter to protect
2516  * the list of minor nodes.
2517  */
2518 static di_off_t
2519 di_getmdata(struct ddi_minor_data *mnode, di_off_t *off_p, di_off_t node,
2520         struct di_state *st)
2521 {
2522         di_off_t        off;
2523         struct di_minor *me;
2524         size_t          size;
2525 
2526         dcmn_err2((CE_CONT, "di_getmdata:\n"));
2527 
2528         /*
2529          * check memory first
2530          */
2531         off = di_checkmem(st, *off_p, sizeof (struct di_minor));
2532         *off_p = off;
2533 
2534         do {
2535                 me = DI_MINOR(di_mem_addr(st, off));
2536                 me->self = off;
2537                 me->type = mnode->type;
2538                 me->node = node;
2539                 me->user_private_data = NULL;
2540 
2541                 off += sizeof (struct di_minor);
2542 
2543                 /*
2544                  * Split dev_t to major/minor, so it works for
2545                  * both ILP32 and LP64 model
2546                  */
2547                 me->dev_major = getmajor(mnode->ddm_dev);
2548                 me->dev_minor = getminor(mnode->ddm_dev);
2549                 me->spec_type = mnode->ddm_spec_type;
2550 
2551                 if (mnode->ddm_name) {
2552                         size = strlen(mnode->ddm_name) + 1;
2553                         me->name = off = di_checkmem(st, off, size);
2554                         (void) strcpy(di_mem_addr(st, off), mnode->ddm_name);
2555                         off += size;
2556                 }
2557 
2558                 if (mnode->ddm_node_type) {
2559                         size = strlen(mnode->ddm_node_type) + 1;
2560                         me->node_type = off = di_checkmem(st, off, size);
2561                         (void) strcpy(di_mem_addr(st, off),
2562                             mnode->ddm_node_type);
2563                         off += size;
2564                 }
2565 
2566                 off = di_checkmem(st, off, sizeof (struct di_minor));
2567                 me->next = off;
2568                 mnode = mnode->next;
2569         } while (mnode);
2570 
2571         me->next = 0;
2572 
2573         return (off);
2574 }
2575 
2576 /*
2577  * di_register_dip(), di_find_dip(): The dip must be protected
2578  * from deallocation when using these routines - this can either
2579  * be a reference count, a busy hold or a per-driver lock.
2580  */
2581 
2582 static void
2583 di_register_dip(struct di_state *st, dev_info_t *dip, di_off_t off)
2584 {
2585         struct dev_info *node = DEVI(dip);
2586         struct di_key   *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2587         struct di_dkey  *dk;
2588 
2589         ASSERT(dip);
2590         ASSERT(off > 0);
2591 
2592         key->k_type = DI_DKEY;
2593         dk = &(key->k_u.dkey);
2594 
2595         dk->dk_dip = dip;
2596         dk->dk_major = node->devi_major;
2597         dk->dk_inst = node->devi_instance;
2598         dk->dk_nodeid = node->devi_nodeid;
2599 
2600         if (mod_hash_insert(st->reg_dip_hash, (mod_hash_key_t)key,
2601             (mod_hash_val_t)(uintptr_t)off) != 0) {
2602                 panic(
2603                     "duplicate devinfo (%p) registered during device "
2604                     "tree walk", (void *)dip);
2605         }
2606 }
2607 
2608 
2609 static int
2610 di_dip_find(struct di_state *st, dev_info_t *dip, di_off_t *off_p)
2611 {
2612         /*
2613          * uintptr_t must be used because it matches the size of void *;
2614          * mod_hash expects clients to place results into pointer-size
2615          * containers; since di_off_t is always a 32-bit offset, alignment
2616          * would otherwise be broken on 64-bit kernels.
2617          */
2618         uintptr_t       offset;
2619         struct          di_key key = {0};
2620         struct          di_dkey *dk;
2621 
2622         ASSERT(st->reg_dip_hash);
2623         ASSERT(dip);
2624         ASSERT(off_p);
2625 
2626 
2627         key.k_type = DI_DKEY;
2628         dk = &(key.k_u.dkey);
2629 
2630         dk->dk_dip = dip;
2631         dk->dk_major = DEVI(dip)->devi_major;
2632         dk->dk_inst = DEVI(dip)->devi_instance;
2633         dk->dk_nodeid = DEVI(dip)->devi_nodeid;
2634 
2635         if (mod_hash_find(st->reg_dip_hash, (mod_hash_key_t)&key,
2636             (mod_hash_val_t *)&offset) == 0) {
2637                 *off_p = (di_off_t)offset;
2638                 return (0);
2639         } else {
2640                 return (-1);
2641         }
2642 }
2643 
2644 /*
2645  * di_register_pip(), di_find_pip(): The pip must be protected from deallocation
2646  * when using these routines. The caller must do this by protecting the
2647  * client(or phci)<->pip linkage while traversing the list and then holding the
2648  * pip when it is found in the list.
2649  */
2650 
2651 static void
2652 di_register_pip(struct di_state *st, mdi_pathinfo_t *pip, di_off_t off)
2653 {
2654         struct di_key   *key = kmem_zalloc(sizeof (*key), KM_SLEEP);
2655         char            *path_addr;
2656         struct di_pkey  *pk;
2657 
2658         ASSERT(pip);
2659         ASSERT(off > 0);
2660 
2661         key->k_type = DI_PKEY;
2662         pk = &(key->k_u.pkey);
2663 
2664         pk->pk_pip = pip;
2665         path_addr = mdi_pi_get_addr(pip);
2666         if (path_addr)
2667                 pk->pk_path_addr = i_ddi_strdup(path_addr, KM_SLEEP);
2668         pk->pk_client = mdi_pi_get_client(pip);
2669         pk->pk_phci = mdi_pi_get_phci(pip);
2670 
2671         if (mod_hash_insert(st->reg_pip_hash, (mod_hash_key_t)key,
2672             (mod_hash_val_t)(uintptr_t)off) != 0) {
2673                 panic(
2674                     "duplicate pathinfo (%p) registered during device "
2675                     "tree walk", (void *)pip);
2676         }
2677 }
2678 
2679 /*
2680  * As with di_register_pip, the caller must hold or lock the pip
2681  */
2682 static int
2683 di_pip_find(struct di_state *st, mdi_pathinfo_t *pip, di_off_t *off_p)
2684 {
2685         /*
2686          * uintptr_t must be used because it matches the size of void *;
2687          * mod_hash expects clients to place results into pointer-size
2688          * containers; since di_off_t is always a 32-bit offset, alignment
2689          * would otherwise be broken on 64-bit kernels.
2690          */
2691         uintptr_t       offset;
2692         struct di_key   key = {0};
2693         struct di_pkey  *pk;
2694 
2695         ASSERT(st->reg_pip_hash);
2696         ASSERT(off_p);
2697 
2698         if (pip == NULL) {
2699                 *off_p = 0;
2700                 return (0);
2701         }
2702 
2703         key.k_type = DI_PKEY;
2704         pk = &(key.k_u.pkey);
2705 
2706         pk->pk_pip = pip;
2707         pk->pk_path_addr = mdi_pi_get_addr(pip);
2708         pk->pk_client = mdi_pi_get_client(pip);
2709         pk->pk_phci = mdi_pi_get_phci(pip);
2710 
2711         if (mod_hash_find(st->reg_pip_hash, (mod_hash_key_t)&key,
2712             (mod_hash_val_t *)&offset) == 0) {
2713                 *off_p = (di_off_t)offset;
2714                 return (0);
2715         } else {
2716                 return (-1);
2717         }
2718 }
2719 
2720 static di_path_state_t
2721 path_state_convert(mdi_pathinfo_state_t st)
2722 {
2723         switch (st) {
2724         case MDI_PATHINFO_STATE_ONLINE:
2725                 return (DI_PATH_STATE_ONLINE);
2726         case MDI_PATHINFO_STATE_STANDBY:
2727                 return (DI_PATH_STATE_STANDBY);
2728         case MDI_PATHINFO_STATE_OFFLINE:
2729                 return (DI_PATH_STATE_OFFLINE);
2730         case MDI_PATHINFO_STATE_FAULT:
2731                 return (DI_PATH_STATE_FAULT);
2732         default:
2733                 return (DI_PATH_STATE_UNKNOWN);
2734         }
2735 }
2736 
2737 static uint_t
2738 path_flags_convert(uint_t pi_path_flags)
2739 {
2740         uint_t  di_path_flags = 0;
2741 
2742         /* MDI_PATHINFO_FLAGS_HIDDEN nodes not in snapshot */
2743 
2744         if (pi_path_flags & MDI_PATHINFO_FLAGS_DEVICE_REMOVED)
2745                 di_path_flags |= DI_PATH_FLAGS_DEVICE_REMOVED;
2746 
2747         return (di_path_flags);
2748 }
2749 
2750 
2751 static di_off_t
2752 di_path_getprop(mdi_pathinfo_t *pip, di_off_t *off_p,
2753     struct di_state *st)
2754 {
2755         nvpair_t                *prop = NULL;
2756         struct di_path_prop     *me;
2757         int                     off;
2758         size_t                  size;
2759         char                    *str;
2760         uchar_t                 *buf;
2761         uint_t                  nelems;
2762 
2763         off = *off_p;
2764         if (mdi_pi_get_next_prop(pip, NULL) == NULL) {
2765                 *off_p = 0;
2766                 return (off);
2767         }
2768 
2769         off = di_checkmem(st, off, sizeof (struct di_path_prop));
2770         *off_p = off;
2771 
2772         while (prop = mdi_pi_get_next_prop(pip, prop)) {
2773                 me = DI_PATHPROP(di_mem_addr(st, off));
2774                 me->self = off;
2775                 off += sizeof (struct di_path_prop);
2776 
2777                 /*
2778                  * property name
2779                  */
2780                 size = strlen(nvpair_name(prop)) + 1;
2781                 me->prop_name = off = di_checkmem(st, off, size);
2782                 (void) strcpy(di_mem_addr(st, off), nvpair_name(prop));
2783                 off += size;
2784 
2785                 switch (nvpair_type(prop)) {
2786                 case DATA_TYPE_BYTE:
2787                 case DATA_TYPE_INT16:
2788                 case DATA_TYPE_UINT16:
2789                 case DATA_TYPE_INT32:
2790                 case DATA_TYPE_UINT32:
2791                         me->prop_type = DDI_PROP_TYPE_INT;
2792                         size = sizeof (int32_t);
2793                         off = di_checkmem(st, off, size);
2794                         (void) nvpair_value_int32(prop,
2795                             (int32_t *)di_mem_addr(st, off));
2796                         break;
2797 
2798                 case DATA_TYPE_INT64:
2799                 case DATA_TYPE_UINT64:
2800                         me->prop_type = DDI_PROP_TYPE_INT64;
2801                         size = sizeof (int64_t);
2802                         off = di_checkmem(st, off, size);
2803                         (void) nvpair_value_int64(prop,
2804                             (int64_t *)di_mem_addr(st, off));
2805                         break;
2806 
2807                 case DATA_TYPE_STRING:
2808                         me->prop_type = DDI_PROP_TYPE_STRING;
2809                         (void) nvpair_value_string(prop, &str);
2810                         size = strlen(str) + 1;
2811                         off = di_checkmem(st, off, size);
2812                         (void) strcpy(di_mem_addr(st, off), str);
2813                         break;
2814 
2815                 case DATA_TYPE_BYTE_ARRAY:
2816                 case DATA_TYPE_INT16_ARRAY:
2817                 case DATA_TYPE_UINT16_ARRAY:
2818                 case DATA_TYPE_INT32_ARRAY:
2819                 case DATA_TYPE_UINT32_ARRAY:
2820                 case DATA_TYPE_INT64_ARRAY:
2821                 case DATA_TYPE_UINT64_ARRAY:
2822                         me->prop_type = DDI_PROP_TYPE_BYTE;
2823                         (void) nvpair_value_byte_array(prop, &buf, &nelems);
2824                         size = nelems;
2825                         if (nelems != 0) {
2826                                 off = di_checkmem(st, off, size);
2827                                 bcopy(buf, di_mem_addr(st, off), size);
2828                         }
2829                         break;
2830 
2831                 default:        /* Unknown or unhandled type; skip it */
2832                         size = 0;
2833                         break;
2834                 }
2835 
2836                 if (size > 0) {
2837                         me->prop_data = off;
2838                 }
2839 
2840                 me->prop_len = (int)size;
2841                 off += size;
2842 
2843                 off = di_checkmem(st, off, sizeof (struct di_path_prop));
2844                 me->prop_next = off;
2845         }
2846 
2847         me->prop_next = 0;
2848         return (off);
2849 }
2850 
2851 
2852 static void
2853 di_path_one_endpoint(struct di_path *me, di_off_t noff, di_off_t **off_pp,
2854     int get_client)
2855 {
2856         if (get_client) {
2857                 ASSERT(me->path_client == 0);
2858                 me->path_client = noff;
2859                 ASSERT(me->path_c_link == 0);
2860                 *off_pp = &me->path_c_link;
2861                 me->path_snap_state &=
2862                     ~(DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOCLINK);
2863         } else {
2864                 ASSERT(me->path_phci == 0);
2865                 me->path_phci = noff;
2866                 ASSERT(me->path_p_link == 0);
2867                 *off_pp = &me->path_p_link;
2868                 me->path_snap_state &=
2869                     ~(DI_PATH_SNAP_NOPHCI | DI_PATH_SNAP_NOPLINK);
2870         }
2871 }
2872 
2873 /*
2874  * off_p: pointer to the linkage field. This links pips along the client|phci
2875  *         linkage list.
2876  * noff  : Offset for the endpoint dip snapshot.
2877  */
2878 static di_off_t
2879 di_getpath_data(dev_info_t *dip, di_off_t *off_p, di_off_t noff,
2880     struct di_state *st, int get_client)
2881 {
2882         di_off_t        off;
2883         mdi_pathinfo_t  *pip;
2884         struct di_path  *me;
2885         mdi_pathinfo_t  *(*next_pip)(dev_info_t *, mdi_pathinfo_t *);
2886         size_t          size;
2887 
2888         dcmn_err2((CE_WARN, "di_getpath_data: client = %d", get_client));
2889 
2890         /*
2891          * The naming of the following mdi_xyz() is unfortunately
2892          * non-intuitive. mdi_get_next_phci_path() follows the
2893          * client_link i.e. the list of pip's belonging to the
2894          * given client dip.
2895          */
2896         if (get_client)
2897                 next_pip = &mdi_get_next_phci_path;
2898         else
2899                 next_pip = &mdi_get_next_client_path;
2900 
2901         off = *off_p;
2902 
2903         pip = NULL;
2904         while (pip = (*next_pip)(dip, pip)) {
2905                 di_off_t stored_offset;
2906 
2907                 dcmn_err((CE_WARN, "marshalling pip = %p", (void *)pip));
2908 
2909                 mdi_pi_lock(pip);
2910 
2911                 /* We don't represent hidden paths in the snapshot */
2912                 if (mdi_pi_ishidden(pip)) {
2913                         dcmn_err((CE_WARN, "hidden, skip"));
2914                         mdi_pi_unlock(pip);
2915                         continue;
2916                 }
2917 
2918                 if (di_pip_find(st, pip, &stored_offset) != -1) {
2919                         /*
2920                          * We've already seen this pathinfo node so we need to
2921                          * take care not to snap it again; However, one endpoint
2922                          * and linkage will be set here. The other endpoint
2923                          * and linkage has already been set when the pip was
2924                          * first snapshotted i.e. when the other endpoint dip
2925                          * was snapshotted.
2926                          */
2927                         me = DI_PATH(di_mem_addr(st, stored_offset));
2928                         *off_p = stored_offset;
2929 
2930                         di_path_one_endpoint(me, noff, &off_p, get_client);
2931 
2932                         /*
2933                          * The other endpoint and linkage were set when this
2934                          * pip was snapshotted. So we are done with both
2935                          * endpoints and linkages.
2936                          */
2937                         ASSERT(!(me->path_snap_state &
2938                             (DI_PATH_SNAP_NOCLIENT|DI_PATH_SNAP_NOPHCI)));
2939                         ASSERT(!(me->path_snap_state &
2940                             (DI_PATH_SNAP_NOCLINK|DI_PATH_SNAP_NOPLINK)));
2941 
2942                         mdi_pi_unlock(pip);
2943                         continue;
2944                 }
2945 
2946                 /*
2947                  * Now that we need to snapshot this pip, check memory
2948                  */
2949                 size = sizeof (struct di_path);
2950                 *off_p = off = di_checkmem(st, off, size);
2951                 me = DI_PATH(di_mem_addr(st, off));
2952                 me->self = off;
2953                 off += size;
2954 
2955                 me->path_snap_state =
2956                     DI_PATH_SNAP_NOCLINK | DI_PATH_SNAP_NOPLINK;
2957                 me->path_snap_state |=
2958                     DI_PATH_SNAP_NOCLIENT | DI_PATH_SNAP_NOPHCI;
2959 
2960                 /*
2961                  * Zero out fields as di_checkmem() doesn't guarantee
2962                  * zero-filled memory
2963                  */
2964                 me->path_client = me->path_phci = 0;
2965                 me->path_c_link = me->path_p_link = 0;
2966 
2967                 di_path_one_endpoint(me, noff, &off_p, get_client);
2968 
2969                 /*
2970                  * Note the existence of this pathinfo
2971                  */
2972                 di_register_pip(st, pip, me->self);
2973 
2974                 me->path_state = path_state_convert(mdi_pi_get_state(pip));
2975                 me->path_flags = path_flags_convert(mdi_pi_get_flags(pip));
2976 
2977                 me->path_instance = mdi_pi_get_path_instance(pip);
2978 
2979                 /*
2980                  * Get intermediate addressing info.
2981                  */
2982                 size = strlen(mdi_pi_get_addr(pip)) + 1;
2983                 me->path_addr = off = di_checkmem(st, off, size);
2984                 (void) strcpy(di_mem_addr(st, off), mdi_pi_get_addr(pip));
2985                 off += size;
2986 
2987                 /*
2988                  * Get path properties if props are to be included in the
2989                  * snapshot
2990                  */
2991                 if (DINFOPROP & st->command) {
2992                         me->path_prop = off;
2993                         off = di_path_getprop(pip, &me->path_prop, st);
2994                 } else {
2995                         me->path_prop = 0;
2996                 }
2997 
2998                 mdi_pi_unlock(pip);
2999         }
3000 
3001         *off_p = 0;
3002         return (off);
3003 }
3004 
3005 /*
3006  * Return driver prop_op entry point for the specified devinfo node.
3007  *
3008  * To return a non-NULL value:
3009  * - driver must be attached and held:
3010  *   If driver is not attached we ignore the driver property list.
3011  *   No one should rely on such properties.
3012  * - driver "cb_prop_op != ddi_prop_op":
3013  *   If "cb_prop_op == ddi_prop_op", framework does not need to call driver.
3014  *   XXX or parent's bus_prop_op != ddi_bus_prop_op
3015  */
3016 static int
3017 (*di_getprop_prop_op(struct dev_info *dip))
3018         (dev_t, dev_info_t *, ddi_prop_op_t, int, char *, caddr_t, int *)
3019 {
3020         struct dev_ops  *ops;
3021 
3022         /* If driver is not attached we ignore the driver property list. */
3023         if ((dip == NULL) || !i_ddi_devi_attached((dev_info_t *)dip))
3024                 return (NULL);
3025 
3026         /*
3027          * Some nexus drivers incorrectly set cb_prop_op to nodev, nulldev,
3028          * or even NULL.
3029          */
3030         ops = dip->devi_ops;
3031         if (ops && ops->devo_cb_ops &&
3032             (ops->devo_cb_ops->cb_prop_op != ddi_prop_op) &&
3033             (ops->devo_cb_ops->cb_prop_op != nodev) &&
3034             (ops->devo_cb_ops->cb_prop_op != nulldev) &&
3035             (ops->devo_cb_ops->cb_prop_op != NULL))
3036                 return (ops->devo_cb_ops->cb_prop_op);
3037         return (NULL);
3038 }
3039 
3040 static di_off_t
3041 di_getprop_add(int list, int dyn, struct di_state *st, struct dev_info *dip,
3042     int (*prop_op)(),
3043     char *name, dev_t devt, int aflags, int alen, caddr_t aval,
3044     di_off_t off, di_off_t **off_pp)
3045 {
3046         int             need_free = 0;
3047         dev_t           pdevt;
3048         int             pflags;
3049         int             rv;
3050         caddr_t         val;
3051         int             len;
3052         size_t          size;
3053         struct di_prop  *pp;
3054 
3055         /* If we have prop_op function, ask driver for latest value */
3056         if (prop_op) {
3057                 ASSERT(dip);
3058 
3059                 /* Must search DDI_DEV_T_NONE with DDI_DEV_T_ANY */
3060                 pdevt = (devt == DDI_DEV_T_NONE) ? DDI_DEV_T_ANY : devt;
3061 
3062                 /*
3063                  * We have type information in flags, but are invoking an
3064                  * old non-typed prop_op(9E) interface. Since not all types are
3065                  * part of DDI_PROP_TYPE_ANY (example is DDI_PROP_TYPE_INT64),
3066                  * we set DDI_PROP_CONSUMER_TYPED - causing the framework to
3067                  * expand type bits beyond DDI_PROP_TYPE_ANY.  This allows us
3068                  * to use the legacy prop_op(9E) interface to obtain updates
3069                  * non-DDI_PROP_TYPE_ANY dynamic properties.
3070                  */
3071                 pflags = aflags & ~DDI_PROP_TYPE_MASK;
3072                 pflags |= DDI_PROP_DONTPASS | DDI_PROP_NOTPROM |
3073                     DDI_PROP_CONSUMER_TYPED;
3074 
3075                 /*
3076                  * Hold and exit across prop_op(9E) to avoid lock order
3077                  * issues between
3078                  *   [ndi_devi_enter() ..prop_op(9E).. driver-lock]
3079                  * .vs.
3080                  *   [..ioctl(9E).. driver-lock ..ddi_remove_minor_node(9F)..
3081                  *   ndi_devi_enter()]
3082                  * ordering.
3083                  */
3084                 ndi_hold_devi((dev_info_t *)dip);
3085                 ndi_devi_exit((dev_info_t *)dip, dip->devi_circular);
3086                 rv = (*prop_op)(pdevt, (dev_info_t *)dip,
3087                     PROP_LEN_AND_VAL_ALLOC, pflags, name, &val, &len);
3088                 ndi_devi_enter((dev_info_t *)dip, &dip->devi_circular);
3089                 ndi_rele_devi((dev_info_t *)dip);
3090 
3091                 if (rv == DDI_PROP_SUCCESS) {
3092                         need_free = 1;          /* dynamic prop obtained */
3093                 } else if (dyn) {
3094                         /*
3095                          * A dynamic property must succeed prop_op(9E) to show
3096                          * up in the snapshot - that is the only source of its
3097                          * value.
3098                          */
3099                         return (off);           /* dynamic prop not supported */
3100                 } else {
3101                         /*
3102                          * In case calling the driver caused an update off
3103                          * prop_op(9E) of a non-dynamic property (code leading
3104                          * to ddi_prop_change), we defer picking up val and
3105                          * len informatiojn until after prop_op(9E) to ensure
3106                          * that we snapshot the latest value.
3107                          */
3108                         val = aval;
3109                         len = alen;
3110 
3111                 }
3112         } else {
3113                 val = aval;
3114                 len = alen;
3115         }
3116 
3117         dcmn_err((CE_CONT, "di_getprop_add: list %d %s len %d val %p\n",
3118             list, name ? name : "NULL", len, (void *)val));
3119 
3120         size = sizeof (struct di_prop);
3121         **off_pp = off = di_checkmem(st, off, size);
3122         pp = DI_PROP(di_mem_addr(st, off));
3123         pp->self = off;
3124         off += size;
3125 
3126         pp->dev_major = getmajor(devt);
3127         pp->dev_minor = getminor(devt);
3128         pp->prop_flags = aflags;
3129         pp->prop_list = list;
3130 
3131         /* property name */
3132         if (name) {
3133                 size = strlen(name) + 1;
3134                 pp->prop_name = off = di_checkmem(st, off, size);
3135                 (void) strcpy(di_mem_addr(st, off), name);
3136                 off += size;
3137         } else {
3138                 pp->prop_name = -1;
3139         }
3140 
3141         pp->prop_len = len;
3142         if (val == NULL) {
3143                 pp->prop_data = -1;
3144         } else if (len != 0) {
3145                 size = len;
3146                 pp->prop_data = off = di_checkmem(st, off, size);
3147                 bcopy(val, di_mem_addr(st, off), size);
3148                 off += size;
3149         }
3150 
3151         pp->next = 0;                        /* assume tail for now */
3152         *off_pp = &pp->next;             /* return pointer to our next */
3153 
3154         if (need_free)                  /* free PROP_LEN_AND_VAL_ALLOC alloc */
3155                 kmem_free(val, len);
3156         return (off);
3157 }
3158 
3159 
3160 /*
3161  * Copy a list of properties attached to a devinfo node. Called from
3162  * di_copynode with active ndi_devi_enter. The major number is passed in case
3163  * we need to call driver's prop_op entry. The value of list indicates
3164  * which list we are copying. Possible values are:
3165  * DI_PROP_DRV_LIST, DI_PROP_SYS_LIST, DI_PROP_GLB_LIST, DI_PROP_HW_LIST
3166  */
3167 static di_off_t
3168 di_getprop(int list, struct ddi_prop **pprop, di_off_t *off_p,
3169     struct di_state *st, struct dev_info *dip)
3170 {
3171         struct ddi_prop         *prop;
3172         int                     (*prop_op)();
3173         int                     off;
3174         struct ddi_minor_data   *mn;
3175         i_ddi_prop_dyn_t        *dp;
3176         struct plist {
3177                 struct plist    *pl_next;
3178                 char            *pl_name;
3179                 int             pl_flags;
3180                 dev_t           pl_dev;
3181                 int             pl_len;
3182                 caddr_t         pl_val;
3183         }                       *pl, *pl0, **plp;
3184 
3185         ASSERT(st != NULL);
3186 
3187         off = *off_p;
3188         *off_p = 0;
3189         dcmn_err((CE_CONT, "di_getprop: copy property list %d at addr %p\n",
3190             list, (void *)*pprop));
3191 
3192         /* get pointer to driver's prop_op(9E) implementation if DRV_LIST */
3193         prop_op = (list == DI_PROP_DRV_LIST) ? di_getprop_prop_op(dip) : NULL;
3194 
3195         /*
3196          * Form private list of properties, holding devi_lock for properties
3197          * that hang off the dip.
3198          */
3199         if (dip)
3200                 mutex_enter(&(dip->devi_lock));
3201         for (pl0 = NULL, plp = &pl0, prop = *pprop;
3202             prop; plp = &pl->pl_next, prop = prop->prop_next) {
3203                 pl = kmem_alloc(sizeof (*pl), KM_SLEEP);
3204                 *plp = pl;
3205                 pl->pl_next = NULL;
3206                 if (prop->prop_name)
3207                         pl->pl_name = i_ddi_strdup(prop->prop_name, KM_SLEEP);
3208                 else
3209                         pl->pl_name = NULL;
3210                 pl->pl_flags = prop->prop_flags;
3211                 pl->pl_dev = prop->prop_dev;
3212                 if (prop->prop_len) {
3213                         pl->pl_len = prop->prop_len;
3214                         pl->pl_val = kmem_alloc(pl->pl_len, KM_SLEEP);
3215                         bcopy(prop->prop_val, pl->pl_val, pl->pl_len);
3216                 } else {
3217                         pl->pl_len = 0;
3218                         pl->pl_val = NULL;
3219                 }
3220         }
3221         if (dip)
3222                 mutex_exit(&(dip->devi_lock));
3223 
3224         /*
3225          * Now that we have dropped devi_lock, perform a second-pass to
3226          * add properties to the snapshot.  We do this as a second pass
3227          * because we may need to call prop_op(9E) and we can't hold
3228          * devi_lock across that call.
3229          */
3230         for (pl = pl0; pl; pl = pl0) {
3231                 pl0 = pl->pl_next;
3232                 off = di_getprop_add(list, 0, st, dip, prop_op, pl->pl_name,
3233                     pl->pl_dev, pl->pl_flags, pl->pl_len, pl->pl_val,
3234                     off, &off_p);
3235                 if (pl->pl_val)
3236                         kmem_free(pl->pl_val, pl->pl_len);
3237                 if (pl->pl_name)
3238                         kmem_free(pl->pl_name, strlen(pl->pl_name) + 1);
3239                 kmem_free(pl, sizeof (*pl));
3240         }
3241 
3242         /*
3243          * If there is no prop_op or dynamic property support has been
3244          * disabled, we are done.
3245          */
3246         if ((prop_op == NULL) || (di_prop_dyn == 0)) {
3247                 *off_p = 0;
3248                 return (off);
3249         }
3250 
3251         /* Add dynamic driver properties to snapshot */
3252         for (dp = i_ddi_prop_dyn_driver_get((dev_info_t *)dip);
3253             dp && dp->dp_name; dp++) {
3254                 if (dp->dp_spec_type) {
3255                         /* if spec_type, property of matching minor */
3256                         ASSERT(DEVI_BUSY_OWNED(dip));
3257                         for (mn = dip->devi_minor; mn; mn = mn->next) {
3258                                 if (mn->ddm_spec_type != dp->dp_spec_type)
3259                                         continue;
3260                                 off = di_getprop_add(list, 1, st, dip, prop_op,
3261                                     dp->dp_name, mn->ddm_dev, dp->dp_type,
3262                                     0, NULL, off, &off_p);
3263                         }
3264                 } else {
3265                         /* property of devinfo node */
3266                         off = di_getprop_add(list, 1, st, dip, prop_op,
3267                             dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3268                             0, NULL, off, &off_p);
3269                 }
3270         }
3271 
3272         /* Add dynamic parent properties to snapshot */
3273         for (dp = i_ddi_prop_dyn_parent_get((dev_info_t *)dip);
3274             dp && dp->dp_name; dp++) {
3275                 if (dp->dp_spec_type) {
3276                         /* if spec_type, property of matching minor */
3277                         ASSERT(DEVI_BUSY_OWNED(dip));
3278                         for (mn = dip->devi_minor; mn; mn = mn->next) {
3279                                 if (mn->ddm_spec_type != dp->dp_spec_type)
3280                                         continue;
3281                                 off = di_getprop_add(list, 1, st, dip, prop_op,
3282                                     dp->dp_name, mn->ddm_dev, dp->dp_type,
3283                                     0, NULL, off, &off_p);
3284                         }
3285                 } else {
3286                         /* property of devinfo node */
3287                         off = di_getprop_add(list, 1, st, dip, prop_op,
3288                             dp->dp_name, DDI_DEV_T_NONE, dp->dp_type,
3289                             0, NULL, off, &off_p);
3290                 }
3291         }
3292 
3293         *off_p = 0;
3294         return (off);
3295 }
3296 
3297 /*
3298  * find private data format attached to a dip
3299  * parent = 1 to match driver name of parent dip (for parent private data)
3300  *      0 to match driver name of current dip (for driver private data)
3301  */
3302 #define DI_MATCH_DRIVER 0
3303 #define DI_MATCH_PARENT 1
3304 
3305 struct di_priv_format *
3306 di_match_drv_name(struct dev_info *node, struct di_state *st, int match)
3307 {
3308         int                     i, count, len;
3309         char                    *drv_name;
3310         major_t                 major;
3311         struct di_all           *all;
3312         struct di_priv_format   *form;
3313 
3314         dcmn_err2((CE_CONT, "di_match_drv_name: node = %s, match = %x\n",
3315             node->devi_node_name, match));
3316 
3317         if (match == DI_MATCH_PARENT) {
3318                 node = DEVI(node->devi_parent);
3319         }
3320 
3321         if (node == NULL) {
3322                 return (NULL);
3323         }
3324 
3325         major = node->devi_major;
3326         if (major == (major_t)(-1)) {
3327                 return (NULL);
3328         }
3329 
3330         /*
3331          * Match the driver name.
3332          */
3333         drv_name = ddi_major_to_name(major);
3334         if ((drv_name == NULL) || *drv_name == '\0') {
3335                 return (NULL);
3336         }
3337 
3338         /* Now get the di_priv_format array */
3339         all = DI_ALL_PTR(st);
3340         if (match == DI_MATCH_PARENT) {
3341                 count = all->n_ppdata;
3342                 form = DI_PRIV_FORMAT(di_mem_addr(st, all->ppdata_format));
3343         } else {
3344                 count = all->n_dpdata;
3345                 form = DI_PRIV_FORMAT(di_mem_addr(st, all->dpdata_format));
3346         }
3347 
3348         len = strlen(drv_name);
3349         for (i = 0; i < count; i++) {
3350                 char *tmp;
3351 
3352                 tmp = form[i].drv_name;
3353                 while (tmp && (*tmp != '\0')) {
3354                         if (strncmp(drv_name, tmp, len) == 0) {
3355                                 return (&form[i]);
3356                         }
3357                         /*
3358                          * Move to next driver name, skipping a white space
3359                          */
3360                         if (tmp = strchr(tmp, ' ')) {
3361                                 tmp++;
3362                         }
3363                 }
3364         }
3365 
3366         return (NULL);
3367 }
3368 
3369 /*
3370  * The following functions copy data as specified by the format passed in.
3371  * To prevent invalid format from panicing the system, we call on_fault().
3372  * A return value of 0 indicates an error. Otherwise, the total offset
3373  * is returned.
3374  */
3375 #define DI_MAX_PRIVDATA (PAGESIZE >> 1)   /* max private data size */
3376 
3377 static di_off_t
3378 di_getprvdata(struct di_priv_format *pdp, struct dev_info *node,
3379     void *data, di_off_t *off_p, struct di_state *st)
3380 {
3381         caddr_t         pa;
3382         void            *ptr;
3383         int             i, size, repeat;
3384         di_off_t        off, off0, *tmp;
3385         char            *path;
3386         label_t         ljb;
3387 
3388         dcmn_err2((CE_CONT, "di_getprvdata:\n"));
3389 
3390         /*
3391          * check memory availability. Private data size is
3392          * limited to DI_MAX_PRIVDATA.
3393          */
3394         off = di_checkmem(st, *off_p, DI_MAX_PRIVDATA);
3395         *off_p = off;
3396 
3397         if ((pdp->bytes == 0) || pdp->bytes > DI_MAX_PRIVDATA) {
3398                 goto failure;
3399         }
3400 
3401         if (!on_fault(&ljb)) {
3402                 /* copy the struct */
3403                 bcopy(data, di_mem_addr(st, off), pdp->bytes);
3404                 off0 = DI_ALIGN(pdp->bytes); /* XXX remove DI_ALIGN */
3405 
3406                 /* dereferencing pointers */
3407                 for (i = 0; i < MAX_PTR_IN_PRV; i++) {
3408 
3409                         if (pdp->ptr[i].size == 0) {
3410                                 goto success;   /* no more ptrs */
3411                         }
3412 
3413                         /*
3414                          * first, get the pointer content
3415                          */
3416                         if ((pdp->ptr[i].offset < 0) ||
3417                             (pdp->ptr[i].offset > pdp->bytes - sizeof (char *)))
3418                                 goto failure;   /* wrong offset */
3419 
3420                         pa = di_mem_addr(st, off + pdp->ptr[i].offset);
3421 
3422                         /* save a tmp ptr to store off_t later */
3423                         tmp = (di_off_t *)(intptr_t)pa;
3424 
3425                         /* get pointer value, if NULL continue */
3426                         ptr = *((void **) (intptr_t)pa);
3427                         if (ptr == NULL) {
3428                                 continue;
3429                         }
3430 
3431                         /*
3432                          * next, find the repeat count (array dimension)
3433                          */
3434                         repeat = pdp->ptr[i].len_offset;
3435 
3436                         /*
3437                          * Positive value indicates a fixed sized array.
3438                          * 0 or negative value indicates variable sized array.
3439                          *
3440                          * For variable sized array, the variable must be
3441                          * an int member of the structure, with an offset
3442                          * equal to the absolution value of struct member.
3443                          */
3444                         if (repeat > pdp->bytes - sizeof (int)) {
3445                                 goto failure;   /* wrong offset */
3446                         }
3447 
3448                         if (repeat >= 0) {
3449                                 repeat = *((int *)
3450                                     (intptr_t)((caddr_t)data + repeat));
3451                         } else {
3452                                 repeat = -repeat;
3453                         }
3454 
3455                         /*
3456                          * next, get the size of the object to be copied
3457                          */
3458                         size = pdp->ptr[i].size * repeat;
3459 
3460                         /*
3461                          * Arbitrarily limit the total size of object to be
3462                          * copied (1 byte to 1/4 page).
3463                          */
3464                         if ((size <= 0) || (size > (DI_MAX_PRIVDATA - off0))) {
3465                                 goto failure;   /* wrong size or too big */
3466                         }
3467 
3468                         /*
3469                          * Now copy the data
3470                          */
3471                         *tmp = off0;
3472                         bcopy(ptr, di_mem_addr(st, off + off0), size);
3473                         off0 += DI_ALIGN(size); /* XXX remove DI_ALIGN */
3474                 }
3475         } else {
3476                 goto failure;
3477         }
3478 
3479 success:
3480         /*
3481          * success if reached here
3482          */
3483         no_fault();
3484         return (off + off0);
3485         /*NOTREACHED*/
3486 
3487 failure:
3488         /*
3489          * fault occurred
3490          */
3491         no_fault();
3492         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3493         cmn_err(CE_WARN, "devinfo: fault on private data for '%s' at %p",
3494             ddi_pathname((dev_info_t *)node, path), data);
3495         kmem_free(path, MAXPATHLEN);
3496         *off_p = -1;    /* set private data to indicate error */
3497 
3498         return (off);
3499 }
3500 
3501 /*
3502  * get parent private data; on error, returns original offset
3503  */
3504 static di_off_t
3505 di_getppdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3506 {
3507         int                     off;
3508         struct di_priv_format   *ppdp;
3509 
3510         dcmn_err2((CE_CONT, "di_getppdata:\n"));
3511 
3512         /* find the parent data format */
3513         if ((ppdp = di_match_drv_name(node, st, DI_MATCH_PARENT)) == NULL) {
3514                 off = *off_p;
3515                 *off_p = 0;     /* set parent data to none */
3516                 return (off);
3517         }
3518 
3519         return (di_getprvdata(ppdp, node,
3520             ddi_get_parent_data((dev_info_t *)node), off_p, st));
3521 }
3522 
3523 /*
3524  * get parent private data; returns original offset
3525  */
3526 static di_off_t
3527 di_getdpdata(struct dev_info *node, di_off_t *off_p, struct di_state *st)
3528 {
3529         int                     off;
3530         struct di_priv_format   *dpdp;
3531 
3532         dcmn_err2((CE_CONT, "di_getdpdata:"));
3533 
3534         /* find the parent data format */
3535         if ((dpdp = di_match_drv_name(node, st, DI_MATCH_DRIVER)) == NULL) {
3536                 off = *off_p;
3537                 *off_p = 0;     /* set driver data to none */
3538                 return (off);
3539         }
3540 
3541         return (di_getprvdata(dpdp, node,
3542             ddi_get_driver_private((dev_info_t *)node), off_p, st));
3543 }
3544 
3545 /*
3546  * Copy hotplug data associated with a devinfo node into the snapshot.
3547  */
3548 static di_off_t
3549 di_gethpdata(ddi_hp_cn_handle_t *hp_hdl, di_off_t *off_p,
3550     struct di_state *st)
3551 {
3552         struct i_hp     *hp;
3553         struct di_hp    *me;
3554         size_t          size;
3555         di_off_t        off;
3556 
3557         dcmn_err2((CE_CONT, "di_gethpdata:\n"));
3558 
3559         /*
3560          * check memory first
3561          */
3562         off = di_checkmem(st, *off_p, sizeof (struct di_hp));
3563         *off_p = off;
3564 
3565         do {
3566                 me = DI_HP(di_mem_addr(st, off));
3567                 me->self = off;
3568                 me->hp_name = 0;
3569                 me->hp_connection = (int)hp_hdl->cn_info.cn_num;
3570                 me->hp_depends_on = (int)hp_hdl->cn_info.cn_num_dpd_on;
3571                 (void) ddihp_cn_getstate(hp_hdl);
3572                 me->hp_state = (int)hp_hdl->cn_info.cn_state;
3573                 me->hp_type = (int)hp_hdl->cn_info.cn_type;
3574                 me->hp_type_str = 0;
3575                 me->hp_last_change = (uint32_t)hp_hdl->cn_info.cn_last_change;
3576                 me->hp_child = 0;
3577 
3578                 /*
3579                  * Child links are resolved later by di_hotplug_children().
3580                  * Store a reference to this di_hp_t in the list used later
3581                  * by di_hotplug_children().
3582                  */
3583                 hp = kmem_zalloc(sizeof (i_hp_t), KM_SLEEP);
3584                 hp->hp_off = off;
3585                 hp->hp_child = hp_hdl->cn_info.cn_child;
3586                 list_insert_tail(&st->hp_list, hp);
3587 
3588                 off += sizeof (struct di_hp);
3589 
3590                 /* Add name of this di_hp_t to the snapshot */
3591                 if (hp_hdl->cn_info.cn_name) {
3592                         size = strlen(hp_hdl->cn_info.cn_name) + 1;
3593                         me->hp_name = off = di_checkmem(st, off, size);
3594                         (void) strcpy(di_mem_addr(st, off),
3595                             hp_hdl->cn_info.cn_name);
3596                         off += size;
3597                 }
3598 
3599                 /* Add type description of this di_hp_t to the snapshot */
3600                 if (hp_hdl->cn_info.cn_type_str) {
3601                         size = strlen(hp_hdl->cn_info.cn_type_str) + 1;
3602                         me->hp_type_str = off = di_checkmem(st, off, size);
3603                         (void) strcpy(di_mem_addr(st, off),
3604                             hp_hdl->cn_info.cn_type_str);
3605                         off += size;
3606                 }
3607 
3608                 /*
3609                  * Set link to next in the chain of di_hp_t nodes,
3610                  * or terminate the chain when processing the last node.
3611                  */
3612                 if (hp_hdl->next != NULL) {
3613                         off = di_checkmem(st, off, sizeof (struct di_hp));
3614                         me->next = off;
3615                 } else {
3616                         me->next = 0;
3617                 }
3618 
3619                 /* Update pointer to next in the chain */
3620                 hp_hdl = hp_hdl->next;
3621 
3622         } while (hp_hdl);
3623 
3624         return (off);
3625 }
3626 
3627 /*
3628  * The driver is stateful across DINFOCPYALL and DINFOUSRLD.
3629  * This function encapsulates the state machine:
3630  *
3631  *      -> IOC_IDLE -> IOC_SNAP -> IOC_DONE -> IOC_COPY ->
3632  *      |               SNAPSHOT                USRLD    |
3633  *      --------------------------------------------------
3634  *
3635  * Returns 0 on success and -1 on failure
3636  */
3637 static int
3638 di_setstate(struct di_state *st, int new_state)
3639 {
3640         int     ret = 0;
3641 
3642         mutex_enter(&di_lock);
3643         switch (new_state) {
3644         case IOC_IDLE:
3645         case IOC_DONE:
3646                 break;
3647         case IOC_SNAP:
3648                 if (st->di_iocstate != IOC_IDLE)
3649                         ret = -1;
3650                 break;
3651         case IOC_COPY:
3652                 if (st->di_iocstate != IOC_DONE)
3653                         ret = -1;
3654                 break;
3655         default:
3656                 ret = -1;
3657         }
3658 
3659         if (ret == 0)
3660                 st->di_iocstate = new_state;
3661         else
3662                 cmn_err(CE_NOTE, "incorrect state transition from %d to %d",
3663                     st->di_iocstate, new_state);
3664         mutex_exit(&di_lock);
3665         return (ret);
3666 }
3667 
3668 /*
3669  * We cannot assume the presence of the entire
3670  * snapshot in this routine. All we are guaranteed
3671  * is the di_all struct + 1 byte (for root_path)
3672  */
3673 static int
3674 header_plus_one_ok(struct di_all *all)
3675 {
3676         /*
3677          * Refuse to read old versions
3678          */
3679         if (all->version != DI_SNAPSHOT_VERSION) {
3680                 CACHE_DEBUG((DI_ERR, "bad version: 0x%x", all->version));
3681                 return (0);
3682         }
3683 
3684         if (all->cache_magic != DI_CACHE_MAGIC) {
3685                 CACHE_DEBUG((DI_ERR, "bad magic #: 0x%x", all->cache_magic));
3686                 return (0);
3687         }
3688 
3689         if (all->snapshot_time == 0) {
3690                 CACHE_DEBUG((DI_ERR, "bad timestamp: %ld", all->snapshot_time));
3691                 return (0);
3692         }
3693 
3694         if (all->top_devinfo == 0) {
3695                 CACHE_DEBUG((DI_ERR, "NULL top devinfo"));
3696                 return (0);
3697         }
3698 
3699         if (all->map_size < sizeof (*all) + 1) {
3700                 CACHE_DEBUG((DI_ERR, "bad map size: %u", all->map_size));
3701                 return (0);
3702         }
3703 
3704         if (all->root_path[0] != '/' || all->root_path[1] != '\0') {
3705                 CACHE_DEBUG((DI_ERR, "bad rootpath: %c%c",
3706                     all->root_path[0], all->root_path[1]));
3707                 return (0);
3708         }
3709 
3710         /*
3711          * We can't check checksum here as we just have the header
3712          */
3713 
3714         return (1);
3715 }
3716 
3717 static int
3718 chunk_write(struct vnode *vp, offset_t off, caddr_t buf, size_t len)
3719 {
3720         rlim64_t        rlimit;
3721         ssize_t         resid;
3722         int             error = 0;
3723 
3724 
3725         rlimit = RLIM64_INFINITY;
3726 
3727         while (len) {
3728                 resid = 0;
3729                 error = vn_rdwr(UIO_WRITE, vp, buf, len, off,
3730                     UIO_SYSSPACE, FSYNC, rlimit, kcred, &resid);
3731 
3732                 if (error || resid < 0) {
3733                         error = error ? error : EIO;
3734                         CACHE_DEBUG((DI_ERR, "write error: %d", error));
3735                         break;
3736                 }
3737 
3738                 /*
3739                  * Check if we are making progress
3740                  */
3741                 if (resid >= len) {
3742                         error = ENOSPC;
3743                         break;
3744                 }
3745                 buf += len - resid;
3746                 off += len - resid;
3747                 len = resid;
3748         }
3749 
3750         return (error);
3751 }
3752 
3753 static void
3754 di_cache_write(struct di_cache *cache)
3755 {
3756         struct di_all   *all;
3757         struct vnode    *vp;
3758         int             oflags;
3759         size_t          map_size;
3760         size_t          chunk;
3761         offset_t        off;
3762         int             error;
3763         char            *buf;
3764 
3765         ASSERT(DI_CACHE_LOCKED(*cache));
3766         ASSERT(!servicing_interrupt());
3767 
3768         if (cache->cache_size == 0) {
3769                 ASSERT(cache->cache_data == NULL);
3770                 CACHE_DEBUG((DI_ERR, "Empty cache. Skipping write"));
3771                 return;
3772         }
3773 
3774         ASSERT(cache->cache_size > 0);
3775         ASSERT(cache->cache_data);
3776 
3777         if (!modrootloaded || rootvp == NULL || vn_is_readonly(rootvp)) {
3778                 CACHE_DEBUG((DI_ERR, "Can't write to rootFS. Skipping write"));
3779                 return;
3780         }
3781 
3782         all = (struct di_all *)cache->cache_data;
3783 
3784         if (!header_plus_one_ok(all)) {
3785                 CACHE_DEBUG((DI_ERR, "Invalid header. Skipping write"));
3786                 return;
3787         }
3788 
3789         ASSERT(strcmp(all->root_path, "/") == 0);
3790 
3791         /*
3792          * The cache_size is the total allocated memory for the cache.
3793          * The map_size is the actual size of valid data in the cache.
3794          * map_size may be smaller than cache_size but cannot exceed
3795          * cache_size.
3796          */
3797         if (all->map_size > cache->cache_size) {
3798                 CACHE_DEBUG((DI_ERR, "map_size (0x%x) > cache_size (0x%x)."
3799                     " Skipping write", all->map_size, cache->cache_size));
3800                 return;
3801         }
3802 
3803         /*
3804          * First unlink the temp file
3805          */
3806         error = vn_remove(DI_CACHE_TEMP, UIO_SYSSPACE, RMFILE);
3807         if (error && error != ENOENT) {
3808                 CACHE_DEBUG((DI_ERR, "%s: unlink failed: %d",
3809                     DI_CACHE_TEMP, error));
3810         }
3811 
3812         if (error == EROFS) {
3813                 CACHE_DEBUG((DI_ERR, "RDONLY FS. Skipping write"));
3814                 return;
3815         }
3816 
3817         vp = NULL;
3818         oflags = (FCREAT|FWRITE);
3819         if (error = vn_open(DI_CACHE_TEMP, UIO_SYSSPACE, oflags,
3820             DI_CACHE_PERMS, &vp, CRCREAT, 0)) {
3821                 CACHE_DEBUG((DI_ERR, "%s: create failed: %d",
3822                     DI_CACHE_TEMP, error));
3823                 return;
3824         }
3825 
3826         ASSERT(vp);
3827 
3828         /*
3829          * Paranoid: Check if the file is on a read-only FS
3830          */
3831         if (vn_is_readonly(vp)) {
3832                 CACHE_DEBUG((DI_ERR, "cannot write: readonly FS"));
3833                 goto fail;
3834         }
3835 
3836         /*
3837          * Note that we only write map_size bytes to disk - this saves
3838          * space as the actual cache size may be larger than size of
3839          * valid data in the cache.
3840          * Another advantage is that it makes verification of size
3841          * easier when the file is read later.
3842          */
3843         map_size = all->map_size;
3844         off = 0;
3845         buf = cache->cache_data;
3846 
3847         while (map_size) {
3848                 ASSERT(map_size > 0);
3849                 /*
3850                  * Write in chunks so that VM system
3851                  * is not overwhelmed
3852                  */
3853                 if (map_size > di_chunk * PAGESIZE)
3854                         chunk = di_chunk * PAGESIZE;
3855                 else
3856                         chunk = map_size;
3857 
3858                 error = chunk_write(vp, off, buf, chunk);
3859                 if (error) {
3860                         CACHE_DEBUG((DI_ERR, "write failed: off=0x%x: %d",
3861                             off, error));
3862                         goto fail;
3863                 }
3864 
3865                 off += chunk;
3866                 buf += chunk;
3867                 map_size -= chunk;
3868 
3869                 /* If low on memory, give pageout a chance to run */
3870                 if (freemem < desfree)
3871                         delay(1);
3872         }
3873 
3874         /*
3875          * Now sync the file and close it
3876          */
3877         if (error = VOP_FSYNC(vp, FSYNC, kcred, NULL)) {
3878                 CACHE_DEBUG((DI_ERR, "FSYNC failed: %d", error));
3879         }
3880 
3881         if (error = VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL)) {
3882                 CACHE_DEBUG((DI_ERR, "close() failed: %d", error));
3883                 VN_RELE(vp);
3884                 return;
3885         }
3886 
3887         VN_RELE(vp);
3888 
3889         /*
3890          * Now do the rename
3891          */
3892         if (error = vn_rename(DI_CACHE_TEMP, DI_CACHE_FILE, UIO_SYSSPACE)) {
3893                 CACHE_DEBUG((DI_ERR, "rename failed: %d", error));
3894                 return;
3895         }
3896 
3897         CACHE_DEBUG((DI_INFO, "Cache write successful."));
3898 
3899         return;
3900 
3901 fail:
3902         (void) VOP_CLOSE(vp, oflags, 1, (offset_t)0, kcred, NULL);
3903         VN_RELE(vp);
3904 }
3905 
3906 
3907 /*
3908  * Since we could be called early in boot,
3909  * use kobj_read_file()
3910  */
3911 static void
3912 di_cache_read(struct di_cache *cache)
3913 {
3914         struct _buf     *file;
3915         struct di_all   *all;
3916         int             n;
3917         size_t          map_size, sz, chunk;
3918         offset_t        off;
3919         caddr_t         buf;
3920         uint32_t        saved_crc, crc;
3921 
3922         ASSERT(modrootloaded);
3923         ASSERT(DI_CACHE_LOCKED(*cache));
3924         ASSERT(cache->cache_data == NULL);
3925         ASSERT(cache->cache_size == 0);
3926         ASSERT(!servicing_interrupt());
3927 
3928         file = kobj_open_file(DI_CACHE_FILE);
3929         if (file == (struct _buf *)-1) {
3930                 CACHE_DEBUG((DI_ERR, "%s: open failed: %d",
3931                     DI_CACHE_FILE, ENOENT));
3932                 return;
3933         }
3934 
3935         /*
3936          * Read in the header+root_path first. The root_path must be "/"
3937          */
3938         all = kmem_zalloc(sizeof (*all) + 1, KM_SLEEP);
3939         n = kobj_read_file(file, (caddr_t)all, sizeof (*all) + 1, 0);
3940 
3941         if ((n != sizeof (*all) + 1) || !header_plus_one_ok(all)) {
3942                 kmem_free(all, sizeof (*all) + 1);
3943                 kobj_close_file(file);
3944                 CACHE_DEBUG((DI_ERR, "cache header: read error or invalid"));
3945                 return;
3946         }
3947 
3948         map_size = all->map_size;
3949 
3950         kmem_free(all, sizeof (*all) + 1);
3951 
3952         ASSERT(map_size >= sizeof (*all) + 1);
3953 
3954         buf = di_cache.cache_data = kmem_alloc(map_size, KM_SLEEP);
3955         sz = map_size;
3956         off = 0;
3957         while (sz) {
3958                 /* Don't overload VM with large reads */
3959                 chunk = (sz > di_chunk * PAGESIZE) ? di_chunk * PAGESIZE : sz;
3960                 n = kobj_read_file(file, buf, chunk, off);
3961                 if (n != chunk) {
3962                         CACHE_DEBUG((DI_ERR, "%s: read error at offset: %lld",
3963                             DI_CACHE_FILE, off));
3964                         goto fail;
3965                 }
3966                 off += chunk;
3967                 buf += chunk;
3968                 sz -= chunk;
3969         }
3970 
3971         ASSERT(off == map_size);
3972 
3973         /*
3974          * Read past expected EOF to verify size.
3975          */
3976         if (kobj_read_file(file, (caddr_t)&sz, 1, off) > 0) {
3977                 CACHE_DEBUG((DI_ERR, "%s: file size changed", DI_CACHE_FILE));
3978                 goto fail;
3979         }
3980 
3981         all = (struct di_all *)di_cache.cache_data;
3982         if (!header_plus_one_ok(all)) {
3983                 CACHE_DEBUG((DI_ERR, "%s: file header changed", DI_CACHE_FILE));
3984                 goto fail;
3985         }
3986 
3987         /*
3988          * Compute CRC with checksum field in the cache data set to 0
3989          */
3990         saved_crc = all->cache_checksum;
3991         all->cache_checksum = 0;
3992         CRC32(crc, di_cache.cache_data, map_size, -1U, crc32_table);
3993         all->cache_checksum = saved_crc;
3994 
3995         if (crc != all->cache_checksum) {
3996                 CACHE_DEBUG((DI_ERR,
3997                     "%s: checksum error: expected=0x%x actual=0x%x",
3998                     DI_CACHE_FILE, all->cache_checksum, crc));
3999                 goto fail;
4000         }
4001 
4002         if (all->map_size != map_size) {
4003                 CACHE_DEBUG((DI_ERR, "%s: map size changed", DI_CACHE_FILE));
4004                 goto fail;
4005         }
4006 
4007         kobj_close_file(file);
4008 
4009         di_cache.cache_size = map_size;
4010 
4011         return;
4012 
4013 fail:
4014         kmem_free(di_cache.cache_data, map_size);
4015         kobj_close_file(file);
4016         di_cache.cache_data = NULL;
4017         di_cache.cache_size = 0;
4018 }
4019 
4020 
4021 /*
4022  * Checks if arguments are valid for using the cache.
4023  */
4024 static int
4025 cache_args_valid(struct di_state *st, int *error)
4026 {
4027         ASSERT(error);
4028         ASSERT(st->mem_size > 0);
4029         ASSERT(st->memlist != NULL);
4030 
4031         if (!modrootloaded || !i_ddi_io_initialized()) {
4032                 CACHE_DEBUG((DI_ERR,
4033                     "cache lookup failure: I/O subsystem not inited"));
4034                 *error = ENOTACTIVE;
4035                 return (0);
4036         }
4037 
4038         /*
4039          * No other flags allowed with DINFOCACHE
4040          */
4041         if (st->command != (DINFOCACHE & DIIOC_MASK)) {
4042                 CACHE_DEBUG((DI_ERR,
4043                     "cache lookup failure: bad flags: 0x%x",
4044                     st->command));
4045                 *error = EINVAL;
4046                 return (0);
4047         }
4048 
4049         if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4050                 CACHE_DEBUG((DI_ERR,
4051                     "cache lookup failure: bad root: %s",
4052                     DI_ALL_PTR(st)->root_path));
4053                 *error = EINVAL;
4054                 return (0);
4055         }
4056 
4057         CACHE_DEBUG((DI_INFO, "cache lookup args ok: 0x%x", st->command));
4058 
4059         *error = 0;
4060 
4061         return (1);
4062 }
4063 
4064 static int
4065 snapshot_is_cacheable(struct di_state *st)
4066 {
4067         ASSERT(st->mem_size > 0);
4068         ASSERT(st->memlist != NULL);
4069 
4070         if ((st->command & DI_CACHE_SNAPSHOT_FLAGS) !=
4071             (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK)) {
4072                 CACHE_DEBUG((DI_INFO,
4073                     "not cacheable: incompatible flags: 0x%x",
4074                     st->command));
4075                 return (0);
4076         }
4077 
4078         if (strcmp(DI_ALL_PTR(st)->root_path, "/") != 0) {
4079                 CACHE_DEBUG((DI_INFO,
4080                     "not cacheable: incompatible root path: %s",
4081                     DI_ALL_PTR(st)->root_path));
4082                 return (0);
4083         }
4084 
4085         CACHE_DEBUG((DI_INFO, "cacheable snapshot request: 0x%x", st->command));
4086 
4087         return (1);
4088 }
4089 
4090 static int
4091 di_cache_lookup(struct di_state *st)
4092 {
4093         size_t  rval;
4094         int     cache_valid;
4095 
4096         ASSERT(cache_args_valid(st, &cache_valid));
4097         ASSERT(modrootloaded);
4098 
4099         DI_CACHE_LOCK(di_cache);
4100 
4101         /*
4102          * The following assignment determines the validity
4103          * of the cache as far as this snapshot is concerned.
4104          */
4105         cache_valid = di_cache.cache_valid;
4106 
4107         if (cache_valid && di_cache.cache_data == NULL) {
4108                 di_cache_read(&di_cache);
4109                 /* check for read or file error */
4110                 if (di_cache.cache_data == NULL)
4111                         cache_valid = 0;
4112         }
4113 
4114         if (cache_valid) {
4115                 /*
4116                  * Ok, the cache was valid as of this particular
4117                  * snapshot. Copy the cached snapshot. This is safe
4118                  * to do as the cache cannot be freed (we hold the
4119                  * cache lock). Free the memory allocated in di_state
4120                  * up until this point - we will simply copy everything
4121                  * in the cache.
4122                  */
4123 
4124                 ASSERT(di_cache.cache_data != NULL);
4125                 ASSERT(di_cache.cache_size > 0);
4126 
4127                 di_freemem(st);
4128 
4129                 rval = 0;
4130                 if (di_cache2mem(&di_cache, st) > 0) {
4131                         /*
4132                          * map_size is size of valid data in the
4133                          * cached snapshot and may be less than
4134                          * size of the cache.
4135                          */
4136                         ASSERT(DI_ALL_PTR(st));
4137                         rval = DI_ALL_PTR(st)->map_size;
4138 
4139                         ASSERT(rval >= sizeof (struct di_all));
4140                         ASSERT(rval <= di_cache.cache_size);
4141                 }
4142         } else {
4143                 /*
4144                  * The cache isn't valid, we need to take a snapshot.
4145                  * Set the command flags appropriately
4146                  */
4147                 ASSERT(st->command == (DINFOCACHE & DIIOC_MASK));
4148                 st->command = (DI_CACHE_SNAPSHOT_FLAGS & DIIOC_MASK);
4149                 rval = di_cache_update(st);
4150                 st->command = (DINFOCACHE & DIIOC_MASK);
4151         }
4152 
4153         DI_CACHE_UNLOCK(di_cache);
4154 
4155         /*
4156          * For cached snapshots, the devinfo driver always returns
4157          * a snapshot rooted at "/".
4158          */
4159         ASSERT(rval == 0 || strcmp(DI_ALL_PTR(st)->root_path, "/") == 0);
4160 
4161         return ((int)rval);
4162 }
4163 
4164 /*
4165  * This is a forced update of the cache  - the previous state of the cache
4166  * may be:
4167  *      - unpopulated
4168  *      - populated and invalid
4169  *      - populated and valid
4170  */
4171 static int
4172 di_cache_update(struct di_state *st)
4173 {
4174         int             rval;
4175         uint32_t        crc;
4176         struct di_all   *all;
4177 
4178         ASSERT(DI_CACHE_LOCKED(di_cache));
4179         ASSERT(snapshot_is_cacheable(st));
4180 
4181         /*
4182          * Free the in-core cache and the on-disk file (if they exist)
4183          */
4184         i_ddi_di_cache_free(&di_cache);
4185 
4186         /*
4187          * Set valid flag before taking the snapshot,
4188          * so that any invalidations that arrive
4189          * during or after the snapshot are not
4190          * removed by us.
4191          */
4192         atomic_or_32(&di_cache.cache_valid, 1);
4193 
4194         rval = di_snapshot_and_clean(st);
4195 
4196         if (rval == 0) {
4197                 CACHE_DEBUG((DI_ERR, "can't update cache: bad snapshot"));
4198                 return (0);
4199         }
4200 
4201         DI_ALL_PTR(st)->map_size = rval;
4202         if (di_mem2cache(st, &di_cache) == 0) {
4203                 CACHE_DEBUG((DI_ERR, "can't update cache: copy failed"));
4204                 return (0);
4205         }
4206 
4207         ASSERT(di_cache.cache_data);
4208         ASSERT(di_cache.cache_size > 0);
4209 
4210         /*
4211          * Now that we have cached the snapshot, compute its checksum.
4212          * The checksum is only computed over the valid data in the
4213          * cache, not the entire cache.
4214          * Also, set all the fields (except checksum) before computing
4215          * checksum.
4216          */
4217         all = (struct di_all *)di_cache.cache_data;
4218         all->cache_magic = DI_CACHE_MAGIC;
4219         all->map_size = rval;
4220 
4221         ASSERT(all->cache_checksum == 0);
4222         CRC32(crc, di_cache.cache_data, all->map_size, -1U, crc32_table);
4223         all->cache_checksum = crc;
4224 
4225         di_cache_write(&di_cache);
4226 
4227         return (rval);
4228 }
4229 
4230 static void
4231 di_cache_print(di_cache_debug_t msglevel, char *fmt, ...)
4232 {
4233         va_list ap;
4234 
4235         if (di_cache_debug <= DI_QUIET)
4236                 return;
4237 
4238         if (di_cache_debug < msglevel)
4239                 return;
4240 
4241         switch (msglevel) {
4242                 case DI_ERR:
4243                         msglevel = CE_WARN;
4244                         break;
4245                 case DI_INFO:
4246                 case DI_TRACE:
4247                 default:
4248                         msglevel = CE_NOTE;
4249                         break;
4250         }
4251 
4252         va_start(ap, fmt);
4253         vcmn_err(msglevel, fmt, ap);
4254         va_end(ap);
4255 }
4256 
4257 static void
4258 di_hotplug_children(struct di_state *st)
4259 {
4260         di_off_t        off;
4261         struct di_hp    *hp;
4262         struct i_hp     *hp_list_node;
4263 
4264         while (hp_list_node = (struct i_hp *)list_remove_head(&st->hp_list)) {
4265 
4266                 if ((hp_list_node->hp_child != NULL) &&
4267                     (di_dip_find(st, hp_list_node->hp_child, &off) == 0)) {
4268                         hp = DI_HP(di_mem_addr(st, hp_list_node->hp_off));
4269                         hp->hp_child = off;
4270                 }
4271 
4272                 kmem_free(hp_list_node, sizeof (i_hp_t));
4273         }
4274 
4275         list_destroy(&st->hp_list);
4276 }