1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 
  25 /*
  26  * miscellaneous routines for the devfs
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/param.h>
  31 #include <sys/t_lock.h>
  32 #include <sys/systm.h>
  33 #include <sys/sysmacros.h>
  34 #include <sys/user.h>
  35 #include <sys/time.h>
  36 #include <sys/vfs.h>
  37 #include <sys/vnode.h>
  38 #include <sys/file.h>
  39 #include <sys/fcntl.h>
  40 #include <sys/flock.h>
  41 #include <sys/kmem.h>
  42 #include <sys/uio.h>
  43 #include <sys/errno.h>
  44 #include <sys/stat.h>
  45 #include <sys/cred.h>
  46 #include <sys/dirent.h>
  47 #include <sys/pathname.h>
  48 #include <sys/cmn_err.h>
  49 #include <sys/debug.h>
  50 #include <sys/modctl.h>
  51 #include <fs/fs_subr.h>
  52 #include <sys/fs/dv_node.h>
  53 #include <sys/fs/snode.h>
  54 #include <sys/sunndi.h>
  55 #include <sys/sunmdi.h>
  56 #include <sys/conf.h>
  57 
  58 #ifdef DEBUG
  59 int devfs_debug = 0x0;
  60 #endif
  61 
  62 const char      dvnm[] = "devfs";
  63 kmem_cache_t    *dv_node_cache; /* dv_node cache */
  64 
  65 /*
  66  * The devfs_clean_key is taken during a devfs_clean operation: it is used to
  67  * prevent unnecessary code execution and for detection of potential deadlocks.
  68  */
  69 uint_t          devfs_clean_key;
  70 
  71 struct dv_node *dvroot;
  72 
  73 /* prototype memory vattrs */
  74 vattr_t dv_vattr_dir = {
  75         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
  76         VDIR,                                   /* va_type */
  77         DV_DIRMODE_DEFAULT,                     /* va_mode */
  78         DV_UID_DEFAULT,                         /* va_uid */
  79         DV_GID_DEFAULT,                         /* va_gid */
  80         0,                                      /* va_fsid; */
  81         0,                                      /* va_nodeid; */
  82         0,                                      /* va_nlink; */
  83         0,                                      /* va_size; */
  84         {0},                                    /* va_atime; */
  85         {0},                                    /* va_mtime; */
  86         {0},                                    /* va_ctime; */
  87         0,                                      /* va_rdev; */
  88         0,                                      /* va_blksize; */
  89         0,                                      /* va_nblocks; */
  90         0,                                      /* va_seq; */
  91 };
  92 
  93 vattr_t dv_vattr_file = {
  94         AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV,  /* va_mask */
  95         0,                                      /* va_type */
  96         DV_DEVMODE_DEFAULT,                     /* va_mode */
  97         DV_UID_DEFAULT,                         /* va_uid */
  98         DV_GID_DEFAULT,                         /* va_gid */
  99         0,                                      /* va_fsid; */
 100         0,                                      /* va_nodeid; */
 101         0,                                      /* va_nlink; */
 102         0,                                      /* va_size; */
 103         {0},                                    /* va_atime; */
 104         {0},                                    /* va_mtime; */
 105         {0},                                    /* va_ctime; */
 106         0,                                      /* va_rdev; */
 107         0,                                      /* va_blksize; */
 108         0,                                      /* va_nblocks; */
 109         0,                                      /* va_seq; */
 110 };
 111 
 112 vattr_t dv_vattr_priv = {
 113         AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV,  /* va_mask */
 114         0,                                      /* va_type */
 115         DV_DEVMODE_PRIV,                        /* va_mode */
 116         DV_UID_DEFAULT,                         /* va_uid */
 117         DV_GID_DEFAULT,                         /* va_gid */
 118         0,                                      /* va_fsid; */
 119         0,                                      /* va_nodeid; */
 120         0,                                      /* va_nlink; */
 121         0,                                      /* va_size; */
 122         {0},                                    /* va_atime; */
 123         {0},                                    /* va_mtime; */
 124         {0},                                    /* va_ctime; */
 125         0,                                      /* va_rdev; */
 126         0,                                      /* va_blksize; */
 127         0,                                      /* va_nblocks; */
 128         0,                                      /* va_seq; */
 129 };
 130 
 131 extern dev_info_t       *clone_dip;
 132 extern major_t          clone_major;
 133 extern struct dev_ops   *ddi_hold_driver(major_t);
 134 
 135 /* dv_node node constructor for kmem cache */
 136 static int
 137 i_dv_node_ctor(void *buf, void *cfarg, int flag)
 138 {
 139         _NOTE(ARGUNUSED(cfarg, flag))
 140         struct dv_node  *dv = (struct dv_node *)buf;
 141         struct vnode    *vp;
 142 
 143         bzero(buf, sizeof (struct dv_node));
 144         vp = dv->dv_vnode = vn_alloc(flag);
 145         if (vp == NULL) {
 146                 return (-1);
 147         }
 148         vp->v_data = dv;
 149         rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL);
 150         return (0);
 151 }
 152 
 153 /* dv_node node destructor for kmem cache */
 154 static void
 155 i_dv_node_dtor(void *buf, void *arg)
 156 {
 157         _NOTE(ARGUNUSED(arg))
 158         struct dv_node  *dv = (struct dv_node *)buf;
 159         struct vnode    *vp = DVTOV(dv);
 160 
 161         rw_destroy(&dv->dv_contents);
 162         vn_invalid(vp);
 163         vn_free(vp);
 164 }
 165 
 166 
 167 /* initialize dv_node node cache */
 168 void
 169 dv_node_cache_init()
 170 {
 171         ASSERT(dv_node_cache == NULL);
 172         dv_node_cache = kmem_cache_create("dv_node_cache",
 173             sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor,
 174             NULL, NULL, NULL, 0);
 175 
 176         tsd_create(&devfs_clean_key, NULL);
 177 }
 178 
 179 /* destroy dv_node node cache */
 180 void
 181 dv_node_cache_fini()
 182 {
 183         ASSERT(dv_node_cache != NULL);
 184         kmem_cache_destroy(dv_node_cache);
 185         dv_node_cache = NULL;
 186 
 187         tsd_destroy(&devfs_clean_key);
 188 }
 189 
 190 /*
 191  * dv_mkino - Generate a unique inode number for devfs nodes.
 192  *
 193  * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32
 194  * bit non-LARGEFILE applications. This means that there is a requirement to
 195  * maintain the inode number as a 32 bit value or applications will have
 196  * stat(2) calls fail with EOVERFLOW.  We form a 32 bit inode number from the
 197  * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor
 198  *
 199  * To generate inode numbers for directories, we assume that we will never use
 200  * more than half the major space - this allows for ~8190 drivers. We use this
 201  * upper major number space to allocate inode numbers for directories by
 202  * encoding the major and instance into this space.
 203  *
 204  * We also skew the result so that inode 2 is reserved for the root of the file
 205  * system.
 206  *
 207  * As part of the future support for 64-bit dev_t APIs, the upper minor bits
 208  * should be folded into the high inode bits by adding the following code
 209  * after "ino |= 1":
 210  *
 211  * #if (L_BITSMINOR32 != L_BITSMINOR)
 212  *              |* fold overflow minor bits into high bits of inode number *|
 213  *              ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR;
 214  * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *|
 215  *
 216  * This way only applications that use devices that overflow their minor
 217  * space will have an application level impact.
 218  */
 219 static ino_t
 220 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev)
 221 {
 222         major_t         major;
 223         minor_t         minor;
 224         ino_t           ino;
 225         static int      warn;
 226 
 227         if (typ == VDIR) {
 228                 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major;
 229                 minor = ddi_get_instance(devi);
 230 
 231                 /* makedevice32 in high half of major number space */
 232                 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32));
 233 
 234                 major = DEVI(devi)->devi_major;
 235         } else {
 236                 major = getmajor(dev);
 237                 minor = getminor(dev);
 238 
 239                 /* makedevice32 */
 240                 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32));
 241 
 242                 /* make ino for VCHR different than VBLK */
 243                 ino <<= 1;
 244                 if (typ == VCHR)
 245                         ino |= 1;
 246         }
 247 
 248         ino += DV_ROOTINO + 1;          /* skew */
 249 
 250         /*
 251          * diagnose things a little early because adding the skew to a large
 252          * minor number could roll over the major.
 253          */
 254         if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) {
 255                 warn = 1;
 256                 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm);
 257         }
 258 
 259         return (ino);
 260 }
 261 
 262 /*
 263  * Compare two nodes lexographically to balance avl tree
 264  */
 265 static int
 266 dv_compare_nodes(const struct dv_node *dv1, const struct dv_node *dv2)
 267 {
 268         int     rv;
 269 
 270         if ((rv = strcmp(dv1->dv_name, dv2->dv_name)) == 0)
 271                 return (0);
 272         return ((rv < 0) ? -1 : 1);
 273 }
 274 
 275 /*
 276  * dv_mkroot
 277  *
 278  * Build the first VDIR dv_node.
 279  */
 280 struct dv_node *
 281 dv_mkroot(struct vfs *vfsp, dev_t devfsdev)
 282 {
 283         struct dv_node  *dv;
 284         struct vnode    *vp;
 285 
 286         ASSERT(ddi_root_node() != NULL);
 287         ASSERT(dv_node_cache != NULL);
 288 
 289         dcmn_err3(("dv_mkroot\n"));
 290         dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
 291         vp = DVTOV(dv);
 292         vn_reinit(vp);
 293         vp->v_flag = VROOT;
 294         vp->v_vfsp = vfsp;
 295         vp->v_type = VDIR;
 296         vp->v_rdev = devfsdev;
 297         vn_setops(vp, dv_vnodeops);
 298         vn_exists(vp);
 299 
 300         dvroot = dv;
 301 
 302         dv->dv_name = NULL;          /* not needed */
 303         dv->dv_namelen = 0;
 304 
 305         dv->dv_devi = ddi_root_node();
 306 
 307         dv->dv_ino = DV_ROOTINO;
 308         dv->dv_nlink = 2;            /* name + . (no dv_insert) */
 309         dv->dv_dotdot = dv;          /* .. == self */
 310         dv->dv_attrvp = NULLVP;
 311         dv->dv_attr = NULL;
 312         dv->dv_flags = DV_BUILD;
 313         dv->dv_priv = NULL;
 314         dv->dv_busy = 0;
 315         dv->dv_dflt_mode = 0;
 316 
 317         avl_create(&dv->dv_entries,
 318             (int (*)(const void *, const void *))dv_compare_nodes,
 319             sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink));
 320 
 321         return (dv);
 322 }
 323 
 324 /*
 325  * dv_mkdir
 326  *
 327  * Given an probed or attached nexus node, create a VDIR dv_node.
 328  * No dv_attrvp is created at this point.
 329  */
 330 struct dv_node *
 331 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm)
 332 {
 333         struct dv_node  *dv;
 334         struct vnode    *vp;
 335         size_t          nmlen;
 336 
 337         ASSERT((devi));
 338         dcmn_err4(("dv_mkdir: %s\n", nm));
 339 
 340         dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
 341         nmlen = strlen(nm) + 1;
 342         dv->dv_name = kmem_alloc(nmlen, KM_SLEEP);
 343         bcopy(nm, dv->dv_name, nmlen);
 344         dv->dv_namelen = nmlen - 1;  /* '\0' not included */
 345 
 346         vp = DVTOV(dv);
 347         vn_reinit(vp);
 348         vp->v_flag = 0;
 349         vp->v_vfsp = DVTOV(ddv)->v_vfsp;
 350         vp->v_type = VDIR;
 351         vp->v_rdev = DVTOV(ddv)->v_rdev;
 352         vn_setops(vp, vn_getops(DVTOV(ddv)));
 353         vn_exists(vp);
 354 
 355         dv->dv_devi = devi;
 356         ndi_hold_devi(devi);
 357 
 358         dv->dv_ino = dv_mkino(devi, VDIR, NODEV);
 359         dv->dv_nlink = 0;            /* updated on insert */
 360         dv->dv_dotdot = ddv;
 361         dv->dv_attrvp = NULLVP;
 362         dv->dv_attr = NULL;
 363         dv->dv_flags = DV_BUILD;
 364         dv->dv_priv = NULL;
 365         dv->dv_busy = 0;
 366         dv->dv_dflt_mode = 0;
 367 
 368         avl_create(&dv->dv_entries,
 369             (int (*)(const void *, const void *))dv_compare_nodes,
 370             sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink));
 371 
 372         return (dv);
 373 }
 374 
 375 /*
 376  * dv_mknod
 377  *
 378  * Given a minor node, create a VCHR or VBLK dv_node.
 379  * No dv_attrvp is created at this point.
 380  */
 381 static struct dv_node *
 382 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm,
 383         struct ddi_minor_data *dmd)
 384 {
 385         struct dv_node  *dv;
 386         struct vnode    *vp;
 387         size_t          nmlen;
 388 
 389         dcmn_err4(("dv_mknod: %s\n", nm));
 390 
 391         dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
 392         nmlen = strlen(nm) + 1;
 393         dv->dv_name = kmem_alloc(nmlen, KM_SLEEP);
 394         bcopy(nm, dv->dv_name, nmlen);
 395         dv->dv_namelen = nmlen - 1;  /* no '\0' */
 396 
 397         vp = DVTOV(dv);
 398         vn_reinit(vp);
 399         vp->v_flag = 0;
 400         vp->v_vfsp = DVTOV(ddv)->v_vfsp;
 401         vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK;
 402         vp->v_rdev = dmd->ddm_dev;
 403         vn_setops(vp, vn_getops(DVTOV(ddv)));
 404         vn_exists(vp);
 405 
 406         /* increment dev_ref with devi_lock held */
 407         ASSERT(DEVI_BUSY_OWNED(devi));
 408         mutex_enter(&DEVI(devi)->devi_lock);
 409         dv->dv_devi = devi;
 410         DEVI(devi)->devi_ref++;              /* ndi_hold_devi(dip) */
 411         mutex_exit(&DEVI(devi)->devi_lock);
 412 
 413         dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev);
 414         dv->dv_nlink = 0;            /* updated on insert */
 415         dv->dv_dotdot = ddv;
 416         dv->dv_attrvp = NULLVP;
 417         dv->dv_attr = NULL;
 418         dv->dv_flags = 0;
 419 
 420         if (dmd->type == DDM_INTERNAL_PATH)
 421                 dv->dv_flags |= DV_INTERNAL;
 422         if (dmd->ddm_flags & DM_NO_FSPERM)
 423                 dv->dv_flags |= DV_NO_FSPERM;
 424 
 425         dv->dv_priv = dmd->ddm_node_priv;
 426         if (dv->dv_priv)
 427                 dphold(dv->dv_priv);
 428 
 429         /*
 430          * Minors created with ddi_create_priv_minor_node can specify
 431          * a default mode permission other than the devfs default.
 432          */
 433         if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) {
 434                 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n",
 435                     dv->dv_name, dmd->ddm_priv_mode));
 436                 dv->dv_flags |= DV_DFLT_MODE;
 437                 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB;
 438         }
 439 
 440         return (dv);
 441 }
 442 
 443 /*
 444  * dv_destroy
 445  *
 446  * Destroy what we created in dv_mkdir or dv_mknod.
 447  * In the case of a *referenced* directory, do nothing.
 448  */
 449 void
 450 dv_destroy(struct dv_node *dv, uint_t flags)
 451 {
 452         vnode_t *vp = DVTOV(dv);
 453         ASSERT(dv->dv_nlink == 0);           /* no references */
 454 
 455         dcmn_err4(("dv_destroy: %s\n", dv->dv_name));
 456 
 457         /*
 458          * We may be asked to unlink referenced directories.
 459          * In this case, there is nothing to be done.
 460          * The eventual memory free will be done in
 461          * devfs_inactive.
 462          */
 463         if (vp->v_count != 0) {
 464                 ASSERT(vp->v_type == VDIR);
 465                 ASSERT(flags & DV_CLEAN_FORCE);
 466                 ASSERT(DV_STALE(dv));
 467                 return;
 468         }
 469 
 470         if (vp->v_type == VDIR) {
 471                 ASSERT(DV_FIRST_ENTRY(dv) == NULL);
 472                 avl_destroy(&dv->dv_entries);
 473         }
 474 
 475         if (dv->dv_attrvp != NULLVP)
 476                 VN_RELE(dv->dv_attrvp);
 477         if (dv->dv_attr != NULL)
 478                 kmem_free(dv->dv_attr, sizeof (struct vattr));
 479         if (dv->dv_name != NULL)
 480                 kmem_free(dv->dv_name, dv->dv_namelen + 1);
 481         if (dv->dv_devi != NULL) {
 482                 ndi_rele_devi(dv->dv_devi);
 483         }
 484         if (dv->dv_priv != NULL) {
 485                 dpfree(dv->dv_priv);
 486         }
 487 
 488         kmem_cache_free(dv_node_cache, dv);
 489 }
 490 
 491 /*
 492  * Find and hold dv_node by name
 493  */
 494 static struct dv_node *
 495 dv_findbyname(struct dv_node *ddv, char *nm)
 496 {
 497         struct dv_node  *dv;
 498         avl_index_t     where;
 499         struct dv_node  dvtmp;
 500 
 501         ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
 502         dcmn_err3(("dv_findbyname: %s\n", nm));
 503 
 504         dvtmp.dv_name = nm;
 505         dv = avl_find(&ddv->dv_entries, &dvtmp, &where);
 506         if (dv) {
 507                 ASSERT(dv->dv_dotdot == ddv);
 508                 ASSERT(strcmp(dv->dv_name, nm) == 0);
 509                 VN_HOLD(DVTOV(dv));
 510                 return (dv);
 511         }
 512         return (NULL);
 513 }
 514 
 515 /*
 516  * Inserts a new dv_node in a parent directory
 517  */
 518 void
 519 dv_insert(struct dv_node *ddv, struct dv_node *dv)
 520 {
 521         avl_index_t     where;
 522 
 523         ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
 524         ASSERT(DVTOV(ddv)->v_type == VDIR);
 525         ASSERT(ddv->dv_nlink >= 2);
 526         ASSERT(dv->dv_nlink == 0);
 527 
 528         dcmn_err3(("dv_insert: %s\n", dv->dv_name));
 529 
 530         dv->dv_dotdot = ddv;
 531         if (DVTOV(dv)->v_type == VDIR) {
 532                 ddv->dv_nlink++;     /* .. to containing directory */
 533                 dv->dv_nlink = 2;    /* name + . */
 534         } else {
 535                 dv->dv_nlink = 1;    /* name */
 536         }
 537 
 538         /* enter node in the avl tree */
 539         VERIFY(avl_find(&ddv->dv_entries, dv, &where) == NULL);
 540         avl_insert(&ddv->dv_entries, dv, where);
 541 }
 542 
 543 /*
 544  * Unlink a dv_node from a perent directory
 545  */
 546 void
 547 dv_unlink(struct dv_node *ddv, struct dv_node *dv)
 548 {
 549         /* verify linkage of arguments */
 550         ASSERT(ddv && dv);
 551         ASSERT(dv->dv_dotdot == ddv);
 552         ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
 553         ASSERT(DVTOV(ddv)->v_type == VDIR);
 554 
 555         dcmn_err3(("dv_unlink: %s\n", dv->dv_name));
 556 
 557         if (DVTOV(dv)->v_type == VDIR) {
 558                 ddv->dv_nlink--;     /* .. to containing directory */
 559                 dv->dv_nlink -= 2;   /* name + . */
 560         } else {
 561                 dv->dv_nlink -= 1;   /* name */
 562         }
 563         ASSERT(ddv->dv_nlink >= 2);
 564         ASSERT(dv->dv_nlink == 0);
 565 
 566         dv->dv_dotdot = NULL;
 567 
 568         /* remove from avl tree */
 569         avl_remove(&ddv->dv_entries, dv);
 570 }
 571 
 572 /*
 573  * Merge devfs node specific information into an attribute structure.
 574  *
 575  * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node.
 576  */
 577 void
 578 dv_vattr_merge(struct dv_node *dv, struct vattr *vap)
 579 {
 580         struct vnode    *vp = DVTOV(dv);
 581 
 582         vap->va_nodeid = dv->dv_ino;
 583         vap->va_nlink = dv->dv_nlink;
 584 
 585         if (vp->v_type == VDIR) {
 586                 vap->va_rdev = 0;
 587                 vap->va_fsid = vp->v_rdev;
 588         } else {
 589                 vap->va_rdev = vp->v_rdev;
 590                 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev;
 591                 vap->va_type = vp->v_type;
 592                 /* don't trust the shadow file type */
 593                 vap->va_mode &= ~S_IFMT;
 594                 if (vap->va_type == VCHR)
 595                         vap->va_mode |= S_IFCHR;
 596                 else
 597                         vap->va_mode |= S_IFBLK;
 598         }
 599 }
 600 
 601 /*
 602  * Get default device permission by consulting rules in
 603  * privilege specification in minor node and /etc/minor_perm.
 604  *
 605  * This function is called from the devname filesystem to get default
 606  * permissions for a device exported to a non-global zone.
 607  */
 608 void
 609 devfs_get_defattr(struct vnode *vp, struct vattr *vap, int *no_fs_perm)
 610 {
 611         mperm_t         mp;
 612         struct dv_node  *dv;
 613 
 614         /* If vp isn't a dv_node, return something sensible */
 615         if (!vn_matchops(vp, dv_vnodeops)) {
 616                 if (no_fs_perm)
 617                         *no_fs_perm = 0;
 618                 *vap = dv_vattr_file;
 619                 return;
 620         }
 621 
 622         /*
 623          * For minors not created by ddi_create_priv_minor_node(),
 624          * use devfs defaults.
 625          */
 626         dv = VTODV(vp);
 627         if (vp->v_type == VDIR) {
 628                 *vap = dv_vattr_dir;
 629         } else if (dv->dv_flags & DV_NO_FSPERM) {
 630                 if (no_fs_perm)
 631                         *no_fs_perm = 1;
 632                 *vap = dv_vattr_priv;
 633         } else {
 634                 /*
 635                  * look up perm bits from minor_perm
 636                  */
 637                 *vap = dv_vattr_file;
 638                 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) {
 639                         VATTR_MP_MERGE((*vap), mp);
 640                         dcmn_err5(("%s: minor perm mode 0%o\n",
 641                             dv->dv_name, vap->va_mode));
 642                 } else if (dv->dv_flags & DV_DFLT_MODE) {
 643                         ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0);
 644                         vap->va_mode &= ~S_IAMB;
 645                         vap->va_mode |= dv->dv_dflt_mode;
 646                         dcmn_err5(("%s: priv mode 0%o\n",
 647                             dv->dv_name, vap->va_mode));
 648                 }
 649         }
 650 }
 651 
 652 /*
 653  * dv_shadow_node
 654  *
 655  * Given a VDIR dv_node, find/create the associated VDIR
 656  * node in the shadow attribute filesystem.
 657  *
 658  * Given a VCHR/VBLK dv_node, find the associated VREG
 659  * node in the shadow attribute filesystem.  These nodes
 660  * are only created to persist non-default attributes.
 661  * Lack of such a node implies the default permissions
 662  * are sufficient.
 663  *
 664  * Managing the attribute file entries is slightly tricky (mostly
 665  * because we can't intercept VN_HOLD and VN_RELE except on the last
 666  * release).
 667  *
 668  * We assert that if the dv_attrvp pointer is non-NULL, it points
 669  * to a singly-held (by us) vnode that represents the shadow entry
 670  * in the underlying filesystem.  To avoid store-ordering issues,
 671  * we assert that the pointer can only be tested under the dv_contents
 672  * READERS lock.
 673  */
 674 
 675 void
 676 dv_shadow_node(
 677         struct vnode *dvp,      /* devfs parent directory vnode */
 678         char *nm,               /* name component */
 679         struct vnode *vp,       /* devfs vnode */
 680         struct pathname *pnp,   /* the path .. */
 681         struct vnode *rdir,     /* the root .. */
 682         struct cred *cred,      /* who's asking? */
 683         int flags)              /* optionally create shadow node */
 684 {
 685         struct dv_node  *dv;    /* dv_node of named directory */
 686         struct vnode    *rdvp;  /* shadow parent directory vnode */
 687         struct vnode    *rvp;   /* shadow vnode */
 688         struct vnode    *rrvp;  /* realvp of shadow vnode */
 689         struct vattr    vattr;
 690         int             create_tried;
 691         int             error;
 692 
 693         ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK);
 694         dv = VTODV(vp);
 695         dcmn_err3(("dv_shadow_node: name %s attr %p\n",
 696             nm, (void *)dv->dv_attrvp));
 697 
 698         if ((flags & DV_SHADOW_WRITE_HELD) == 0) {
 699                 ASSERT(RW_READ_HELD(&dv->dv_contents));
 700                 if (dv->dv_attrvp != NULLVP)
 701                         return;
 702                 if (!rw_tryupgrade(&dv->dv_contents)) {
 703                         rw_exit(&dv->dv_contents);
 704                         rw_enter(&dv->dv_contents, RW_WRITER);
 705                         if (dv->dv_attrvp != NULLVP) {
 706                                 rw_downgrade(&dv->dv_contents);
 707                                 return;
 708                         }
 709                 }
 710         } else {
 711                 ASSERT(RW_WRITE_HELD(&dv->dv_contents));
 712                 if (dv->dv_attrvp != NULLVP)
 713                         return;
 714         }
 715 
 716         ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL);
 717 
 718         rdvp = VTODV(dvp)->dv_attrvp;
 719         create_tried = 0;
 720 lookup:
 721         if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) {
 722                 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred,
 723                     NULL, NULL, NULL);
 724 
 725                 /* factor out the snode since we only want the attribute node */
 726                 if ((error == 0) && (VOP_REALVP(rvp, &rrvp, NULL) == 0)) {
 727                         VN_HOLD(rrvp);
 728                         VN_RELE(rvp);
 729                         rvp = rrvp;
 730                 }
 731         } else
 732                 error = EROFS;          /* no parent, no entry */
 733 
 734         /*
 735          * All we want is the permissions (and maybe ACLs and
 736          * extended attributes), and we want to perform lookups
 737          * by name.  Drivers occasionally change their minor
 738          * number space.  If something changes, there's no
 739          * much we can do about it here.
 740          */
 741 
 742         /* The shadow node checks out. We are done */
 743         if (error == 0) {
 744                 dv->dv_attrvp = rvp; /* with one hold */
 745 
 746                 /*
 747                  * Determine if we have non-trivial ACLs on this node.
 748                  * It is not necessary to VOP_RWLOCK since fs_acl_nontrivial
 749                  * only does VOP_GETSECATTR.
 750                  */
 751                 dv->dv_flags &= ~DV_ACL;
 752 
 753                 if (fs_acl_nontrivial(rvp, cred))
 754                         dv->dv_flags |= DV_ACL;
 755 
 756                 /*
 757                  * If we have synced out the memory attributes, free
 758                  * them and switch back to using the persistent store.
 759                  */
 760                 if (rvp && dv->dv_attr) {
 761                         kmem_free(dv->dv_attr, sizeof (struct vattr));
 762                         dv->dv_attr = NULL;
 763                 }
 764                 if ((flags & DV_SHADOW_WRITE_HELD) == 0)
 765                         rw_downgrade(&dv->dv_contents);
 766                 ASSERT(RW_LOCK_HELD(&dv->dv_contents));
 767                 return;
 768         }
 769 
 770         /*
 771          * Failed to find attribute in persistent backing store,
 772          * get default permission bits.
 773          */
 774         devfs_get_defattr(vp, &vattr, NULL);
 775 
 776         dv_vattr_merge(dv, &vattr);
 777         gethrestime(&vattr.va_atime);
 778         vattr.va_mtime = vattr.va_atime;
 779         vattr.va_ctime = vattr.va_atime;
 780 
 781         /*
 782          * Try to create shadow dir. This is necessary in case
 783          * we need to create a shadow leaf node later, when user
 784          * executes chmod.
 785          */
 786         if ((error == ENOENT) && !create_tried) {
 787                 switch (vp->v_type) {
 788                 case VDIR:
 789                         error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred,
 790                             NULL, 0, NULL);
 791                         dsysdebug(error, ("vop_mkdir %s %s %d\n",
 792                             VTODV(dvp)->dv_name, nm, error));
 793                         create_tried = 1;
 794                         break;
 795 
 796                 case VCHR:
 797                 case VBLK:
 798                         /*
 799                          * Shadow nodes are only created on demand
 800                          */
 801                         if (flags & DV_SHADOW_CREATE) {
 802                                 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL,
 803                                     VREAD|VWRITE, &rvp, kcred, 0, NULL, NULL);
 804                                 dsysdebug(error, ("vop_create %s %s %d\n",
 805                                     VTODV(dvp)->dv_name, nm, error));
 806                                 create_tried = 1;
 807                         }
 808                         break;
 809 
 810                 default:
 811                         cmn_err(CE_PANIC, "devfs: %s: create", dvnm);
 812                         /*NOTREACHED*/
 813                 }
 814 
 815                 if (create_tried &&
 816                     (error == 0) || (error == EEXIST)) {
 817                         VN_RELE(rvp);
 818                         goto lookup;
 819                 }
 820         }
 821 
 822         /* Store attribute in memory */
 823         if (dv->dv_attr == NULL) {
 824                 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP);
 825                 *(dv->dv_attr) = vattr;
 826         }
 827 
 828         if ((flags & DV_SHADOW_WRITE_HELD) == 0)
 829                 rw_downgrade(&dv->dv_contents);
 830         ASSERT(RW_LOCK_HELD(&dv->dv_contents));
 831 }
 832 
 833 /*
 834  * Given a devinfo node, and a name, returns the appropriate
 835  * minor information for that named node, if it exists.
 836  */
 837 static int
 838 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi)
 839 {
 840         struct ddi_minor_data   *dmd;
 841 
 842         ASSERT(i_ddi_devi_attached(devi));
 843 
 844         dcmn_err3(("dv_find_leafnode: %s\n", minor_nm));
 845         ASSERT(DEVI_BUSY_OWNED(devi));
 846         for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) {
 847 
 848                 /*
 849                  * Skip alias nodes and nodes without a name.
 850                  */
 851                 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL))
 852                         continue;
 853 
 854                 dcmn_err4(("dv_find_leafnode: (%s,%s)\n",
 855                     minor_nm, dmd->ddm_name));
 856                 if (strcmp(minor_nm, dmd->ddm_name) == 0) {
 857                         r_mi->ddm_dev = dmd->ddm_dev;
 858                         r_mi->ddm_spec_type = dmd->ddm_spec_type;
 859                         r_mi->type = dmd->type;
 860                         r_mi->ddm_flags = dmd->ddm_flags;
 861                         r_mi->ddm_node_priv = dmd->ddm_node_priv;
 862                         r_mi->ddm_priv_mode = dmd->ddm_priv_mode;
 863                         if (r_mi->ddm_node_priv)
 864                                 dphold(r_mi->ddm_node_priv);
 865                         return (0);
 866                 }
 867         }
 868 
 869         dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm));
 870         return (ENOENT);
 871 }
 872 
 873 /*
 874  * Special handling for clone node:
 875  *      Clone minor name is a driver name, the minor number will
 876  *      be the major number of the driver. There is no minor
 877  *      node under the clone driver, so we'll manufacture the
 878  *      dev_t.
 879  */
 880 static struct dv_node *
 881 dv_clone_mknod(struct dv_node *ddv, char *drvname)
 882 {
 883         major_t                 major;
 884         struct dv_node          *dvp;
 885         char                    *devnm;
 886         struct ddi_minor_data   *dmd;
 887 
 888         /*
 889          * Make sure drvname is a STREAMS driver. We load the driver,
 890          * but don't attach to any instances. This makes stat(2)
 891          * relatively cheap.
 892          */
 893         major = ddi_name_to_major(drvname);
 894         if (major == DDI_MAJOR_T_NONE)
 895                 return (NULL);
 896 
 897         if (ddi_hold_driver(major) == NULL)
 898                 return (NULL);
 899 
 900         if (STREAMSTAB(major) == NULL) {
 901                 ddi_rele_driver(major);
 902                 return (NULL);
 903         }
 904 
 905         ddi_rele_driver(major);
 906         devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 907         (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname);
 908         dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP);
 909         dmd->ddm_dev = makedevice(clone_major, (minor_t)major);
 910         dmd->ddm_spec_type = S_IFCHR;
 911         dvp = dv_mknod(ddv, clone_dip, devnm, dmd);
 912         kmem_free(dmd, sizeof (*dmd));
 913         kmem_free(devnm, MAXNAMELEN);
 914         return (dvp);
 915 }
 916 
 917 /*
 918  * Given the parent directory node, and a name in it, returns the
 919  * named dv_node to the caller (as a vnode).
 920  *
 921  * (We need pnp and rdir for doing shadow lookups; they can be NULL)
 922  */
 923 int
 924 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp,
 925         struct vnode *rdir, struct cred *cred, uint_t ndi_flags)
 926 {
 927         extern int isminiroot;  /* see modctl.c */
 928 
 929         int                     circ;
 930         int                     rv = 0, was_busy = 0, nmlen, write_held = 0;
 931         struct vnode            *vp;
 932         struct dv_node          *dv, *dup;
 933         dev_info_t              *pdevi, *devi = NULL;
 934         char                    *mnm;
 935         struct ddi_minor_data   *dmd;
 936 
 937         dcmn_err3(("dv_find %s\n", nm));
 938 
 939         if (!rw_tryenter(&ddv->dv_contents, RW_READER)) {
 940                 if (tsd_get(devfs_clean_key))
 941                         return (EBUSY);
 942                 rw_enter(&ddv->dv_contents, RW_READER);
 943         }
 944 start:
 945         if (DV_STALE(ddv)) {
 946                 rw_exit(&ddv->dv_contents);
 947                 return (ESTALE);
 948         }
 949 
 950         /*
 951          * Empty name or ., return node itself.
 952          */
 953         nmlen = strlen(nm);
 954         if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
 955                 *vpp = DVTOV(ddv);
 956                 rw_exit(&ddv->dv_contents);
 957                 VN_HOLD(*vpp);
 958                 return (0);
 959         }
 960 
 961         /*
 962          * .., return the parent directory
 963          */
 964         if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
 965                 *vpp = DVTOV(ddv->dv_dotdot);
 966                 rw_exit(&ddv->dv_contents);
 967                 VN_HOLD(*vpp);
 968                 return (0);
 969         }
 970 
 971         /*
 972          * Fail anything without a valid device name component
 973          */
 974         if (nm[0] == '@' || nm[0] == ':') {
 975                 dcmn_err3(("devfs: no driver '%s'\n", nm));
 976                 rw_exit(&ddv->dv_contents);
 977                 return (ENOENT);
 978         }
 979 
 980         /*
 981          * So, now we have to deal with the trickier stuff.
 982          *
 983          * (a) search the existing list of dv_nodes on this directory
 984          */
 985         if ((dv = dv_findbyname(ddv, nm)) != NULL) {
 986 founddv:
 987                 ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
 988 
 989                 if (!rw_tryenter(&dv->dv_contents, RW_READER)) {
 990                         if (tsd_get(devfs_clean_key)) {
 991                                 VN_RELE(DVTOV(dv));
 992                                 rw_exit(&ddv->dv_contents);
 993                                 return (EBUSY);
 994                         }
 995                         rw_enter(&dv->dv_contents, RW_READER);
 996                 }
 997 
 998                 vp = DVTOV(dv);
 999                 if ((dv->dv_attrvp != NULLVP) ||
1000                     (vp->v_type != VDIR && dv->dv_attr != NULL)) {
1001                         /*
1002                          * Common case - we already have attributes
1003                          */
1004                         rw_exit(&dv->dv_contents);
1005                         rw_exit(&ddv->dv_contents);
1006                         goto found;
1007                 }
1008 
1009                 /*
1010                  * No attribute vp, try and build one.
1011                  *
1012                  * dv_shadow_node() can briefly drop &dv->dv_contents lock
1013                  * if it is unable to upgrade it to a write lock. If the
1014                  * current thread has come in through the bottom-up device
1015                  * configuration devfs_clean() path, we may deadlock against
1016                  * a thread performing top-down device configuration if it
1017                  * grabs the contents lock. To avoid this, when we are on the
1018                  * devfs_clean() path we attempt to upgrade the dv_contents
1019                  * lock before we call dv_shadow_node().
1020                  */
1021                 if (tsd_get(devfs_clean_key)) {
1022                         if (!rw_tryupgrade(&dv->dv_contents)) {
1023                                 VN_RELE(DVTOV(dv));
1024                                 rw_exit(&dv->dv_contents);
1025                                 rw_exit(&ddv->dv_contents);
1026                                 return (EBUSY);
1027                         }
1028 
1029                         write_held = DV_SHADOW_WRITE_HELD;
1030                 }
1031 
1032                 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred,
1033                     write_held);
1034 
1035                 rw_exit(&dv->dv_contents);
1036                 rw_exit(&ddv->dv_contents);
1037                 goto found;
1038         }
1039 
1040         /*
1041          * (b) Search the child devinfo nodes of our parent directory,
1042          * looking for the named node.  If we find it, build a new
1043          * node, then grab the writers lock, search the directory
1044          * if it's still not there, then insert it.
1045          *
1046          * We drop the devfs locks before accessing the device tree.
1047          * Take care to mark the node BUSY so that a forced devfs_clean
1048          * doesn't mark the directory node stale.
1049          *
1050          * Also, check if we are called as part of devfs_clean or
1051          * reset_perm. If so, simply return not found because there
1052          * is nothing to clean.
1053          */
1054         if (tsd_get(devfs_clean_key)) {
1055                 rw_exit(&ddv->dv_contents);
1056                 return (ENOENT);
1057         }
1058 
1059         /*
1060          * We could be either READ or WRITE locked at
1061          * this point. Upgrade if we are read locked.
1062          */
1063         ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
1064         if (rw_read_locked(&ddv->dv_contents) &&
1065             !rw_tryupgrade(&ddv->dv_contents)) {
1066                 rw_exit(&ddv->dv_contents);
1067                 rw_enter(&ddv->dv_contents, RW_WRITER);
1068                 /*
1069                  * Things may have changed when we dropped
1070                  * the contents lock, so start from top again
1071                  */
1072                 goto start;
1073         }
1074         ddv->dv_busy++;              /* mark busy before dropping lock */
1075         was_busy++;
1076         rw_exit(&ddv->dv_contents);
1077 
1078         pdevi = ddv->dv_devi;
1079         ASSERT(pdevi != NULL);
1080 
1081         mnm = strchr(nm, ':');
1082         if (mnm)
1083                 *mnm = (char)0;
1084 
1085         /*
1086          * Configure one nexus child, will call nexus's bus_ops
1087          * If successful, devi is held upon returning.
1088          * Note: devfs lookup should not be configuring grandchildren.
1089          */
1090         ASSERT((ndi_flags & NDI_CONFIG) == 0);
1091 
1092         rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT);
1093         if (mnm)
1094                 *mnm = ':';
1095         if (rv != NDI_SUCCESS) {
1096                 rv = ENOENT;
1097                 goto notfound;
1098         }
1099 
1100         ASSERT(devi);
1101 
1102         /* Check if this is a path alias */
1103         if (ddi_aliases_present == B_TRUE && ddi_get_parent(devi) != pdevi) {
1104                 char *curr = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1105 
1106                 (void) ddi_pathname(devi, curr);
1107 
1108                 vp = NULL;
1109                 if (devfs_lookupname(curr, NULL, &vp) == 0 && vp) {
1110                         dv = VTODV(vp);
1111                         kmem_free(curr, MAXPATHLEN);
1112                         goto found;
1113                 }
1114                 kmem_free(curr, MAXPATHLEN);
1115         }
1116 
1117         /*
1118          * If we configured a hidden node, consider it notfound.
1119          */
1120         if (ndi_dev_is_hidden_node(devi)) {
1121                 ndi_rele_devi(devi);
1122                 rv = ENOENT;
1123                 goto notfound;
1124         }
1125 
1126         /*
1127          * Don't make vhci clients visible under phci, unless we
1128          * are in miniroot.
1129          */
1130         if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) {
1131                 ndi_rele_devi(devi);
1132                 rv = ENOENT;
1133                 goto notfound;
1134         }
1135 
1136         ASSERT(devi && i_ddi_devi_attached(devi));
1137 
1138         /*
1139          * Invalidate cache to notice newly created minor nodes.
1140          */
1141         rw_enter(&ddv->dv_contents, RW_WRITER);
1142         ddv->dv_flags |= DV_BUILD;
1143         rw_exit(&ddv->dv_contents);
1144 
1145         /*
1146          * mkdir for nexus drivers and leaf nodes as well.  If we are racing
1147          * and create a duplicate, the duplicate will be destroyed below.
1148          */
1149         if (mnm == NULL) {
1150                 dv = dv_mkdir(ddv, devi, nm);
1151         } else {
1152                 /*
1153                  * Allocate dmd first to avoid KM_SLEEP with active
1154                  * ndi_devi_enter.
1155                  */
1156                 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP);
1157                 ndi_devi_enter(devi, &circ);
1158                 if (devi == clone_dip) {
1159                         /*
1160                          * For clone minors, load the driver indicated by
1161                          * minor name.
1162                          */
1163                         dv = dv_clone_mknod(ddv, mnm + 1);
1164                 } else {
1165                         /*
1166                          * Find minor node and make a dv_node
1167                          */
1168                         if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) {
1169                                 dv = dv_mknod(ddv, devi, nm, dmd);
1170                                 if (dmd->ddm_node_priv)
1171                                         dpfree(dmd->ddm_node_priv);
1172                         }
1173                 }
1174                 ndi_devi_exit(devi, circ);
1175                 kmem_free(dmd, sizeof (*dmd));
1176         }
1177         /*
1178          * Release hold from ndi_devi_config_one()
1179          */
1180         ndi_rele_devi(devi);
1181 
1182         if (dv == NULL) {
1183                 rv = ENOENT;
1184                 goto notfound;
1185         }
1186 
1187         /*
1188          * We have released the dv_contents lock, need to check
1189          * if another thread already created a duplicate node
1190          */
1191         rw_enter(&ddv->dv_contents, RW_WRITER);
1192         if ((dup = dv_findbyname(ddv, nm)) == NULL) {
1193                 dv_insert(ddv, dv);
1194         } else {
1195                 /*
1196                  * Duplicate found, use the existing node
1197                  */
1198                 VN_RELE(DVTOV(dv));
1199                 dv_destroy(dv, 0);
1200                 dv = dup;
1201         }
1202         goto founddv;
1203         /*NOTREACHED*/
1204 
1205 found:
1206         /*
1207          * Fail lookup of device that has now become hidden (typically via
1208          * hot removal of open device).
1209          */
1210         if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) {
1211                 dcmn_err2(("dv_find: nm %s failed: hidden/removed\n", nm));
1212                 VN_RELE(vp);
1213                 rv = ENOENT;
1214                 goto notfound;
1215         }
1216 
1217         /*
1218          * Skip non-kernel lookups of internal nodes.
1219          * This use of kcred to distinguish between user and
1220          * internal kernel lookups is unfortunate.  The information
1221          * provided by the seg argument to lookupnameat should
1222          * evolve into a lookup flag for filesystems that need
1223          * this distinction.
1224          */
1225         if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) {
1226                 dcmn_err2(("dv_find: nm %s failed: internal\n", nm));
1227                 VN_RELE(vp);
1228                 rv = ENOENT;
1229                 goto notfound;
1230         }
1231 
1232         dcmn_err2(("dv_find: returning vp for nm %s\n", nm));
1233         if (vp->v_type == VCHR || vp->v_type == VBLK) {
1234                 /*
1235                  * If vnode is a device, return special vnode instead
1236                  * (though it knows all about -us- via sp->s_realvp,
1237                  * sp->s_devvp, and sp->s_dip)
1238                  */
1239                 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred,
1240                     dv->dv_devi);
1241                 VN_RELE(vp);
1242                 if (*vpp == NULLVP)
1243                         rv = ENOSYS;
1244         } else
1245                 *vpp = vp;
1246 
1247 notfound:
1248         if (was_busy) {
1249                 /*
1250                  * Non-zero was_busy tells us that we are not in the
1251                  * devfs_clean() path which in turn means that we can afford
1252                  * to take the contents lock unconditionally.
1253                  */
1254                 rw_enter(&ddv->dv_contents, RW_WRITER);
1255                 ddv->dv_busy--;
1256                 rw_exit(&ddv->dv_contents);
1257         }
1258         return (rv);
1259 }
1260 
1261 /*
1262  * The given directory node is out-of-date; that is, it has been
1263  * marked as needing to be rebuilt, possibly because some new devinfo
1264  * node has come into existence, or possibly because this is the first
1265  * time we've been here.
1266  */
1267 void
1268 dv_filldir(struct dv_node *ddv)
1269 {
1270         struct dv_node          *dv;
1271         dev_info_t              *devi, *pdevi;
1272         struct ddi_minor_data   *dmd;
1273         char                    devnm[MAXNAMELEN];
1274         int                     circ, ccirc;
1275 
1276         ASSERT(DVTOV(ddv)->v_type == VDIR);
1277         ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
1278         ASSERT(ddv->dv_flags & DV_BUILD);
1279 
1280         dcmn_err3(("dv_filldir: %s\n", ddv->dv_name));
1281         if (DV_STALE(ddv))
1282                 return;
1283         pdevi = ddv->dv_devi;
1284 
1285         if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) {
1286                 dcmn_err3(("dv_filldir: config error %s\n", ddv->dv_name));
1287         }
1288 
1289         ndi_devi_enter(pdevi, &circ);
1290         for (devi = ddi_get_child(pdevi); devi;
1291             devi = ddi_get_next_sibling(devi)) {
1292                 /*
1293                  * While we know enough to create a directory at DS_INITIALIZED,
1294                  * the directory will be empty until DS_ATTACHED. The existence
1295                  * of an empty directory dv_node will cause a devi_ref, which
1296                  * has caused problems for existing code paths doing offline/DR
1297                  * type operations - making devfs_clean coordination even more
1298                  * sensitive and error prone. Given this, the 'continue' below
1299                  * is checking for DS_ATTACHED instead of DS_INITIALIZED.
1300                  */
1301                 if (i_ddi_node_state(devi) < DS_ATTACHED)
1302                         continue;
1303 
1304                 /* skip hidden nodes */
1305                 if (ndi_dev_is_hidden_node(devi))
1306                         continue;
1307 
1308                 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi)));
1309 
1310                 ndi_devi_enter(devi, &ccirc);
1311                 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) {
1312                         char *addr;
1313 
1314                         /*
1315                          * Skip alias nodes, internal nodes, and nodes
1316                          * without a name.  We allow DDM_DEFAULT nodes
1317                          * to appear in readdir.
1318                          */
1319                         if ((dmd->type == DDM_ALIAS) ||
1320                             (dmd->type == DDM_INTERNAL_PATH) ||
1321                             (dmd->ddm_name == NULL))
1322                                 continue;
1323 
1324                         addr = ddi_get_name_addr(devi);
1325                         if (addr && *addr)
1326                                 (void) sprintf(devnm, "%s@%s:%s",
1327                                     ddi_node_name(devi), addr, dmd->ddm_name);
1328                         else
1329                                 (void) sprintf(devnm, "%s:%s",
1330                                     ddi_node_name(devi), dmd->ddm_name);
1331 
1332                         if ((dv = dv_findbyname(ddv, devnm)) != NULL) {
1333                                 /* dv_node already exists */
1334                                 VN_RELE(DVTOV(dv));
1335                                 continue;
1336                         }
1337 
1338                         dv = dv_mknod(ddv, devi, devnm, dmd);
1339                         dv_insert(ddv, dv);
1340                         VN_RELE(DVTOV(dv));
1341                 }
1342                 ndi_devi_exit(devi, ccirc);
1343 
1344                 (void) ddi_deviname(devi, devnm);
1345                 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) {
1346                         /* directory doesn't exist */
1347                         dv = dv_mkdir(ddv, devi, devnm + 1);
1348                         dv_insert(ddv, dv);
1349                 }
1350                 VN_RELE(DVTOV(dv));
1351         }
1352         ndi_devi_exit(pdevi, circ);
1353 
1354         ddv->dv_flags &= ~DV_BUILD;
1355 }
1356 
1357 /*
1358  * Given a directory node, clean out all the nodes beneath.
1359  *
1360  * VDIR:        Reinvoke to clean them, then delete the directory.
1361  * VCHR, VBLK:  Just blow them away.
1362  *
1363  * Mark the directories touched as in need of a rebuild, in case
1364  * we fall over part way through. When DV_CLEAN_FORCE is specified,
1365  * we mark referenced empty directories as stale to facilitate DR.
1366  */
1367 int
1368 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags)
1369 {
1370         struct dv_node  *dv;
1371         struct dv_node  *next;
1372         struct vnode    *vp;
1373         int             busy = 0;
1374 
1375         /*
1376          * We should always be holding the tsd_clean_key here: dv_cleandir()
1377          * will be called as a result of a devfs_clean request and the
1378          * tsd_clean_key will be set in either in devfs_clean() itself or in
1379          * devfs_clean_vhci().
1380          *
1381          * Since we are on the devfs_clean path, we return EBUSY if we cannot
1382          * get the contents lock: if we blocked here we might deadlock against
1383          * a thread performing top-down device configuration.
1384          */
1385         ASSERT(tsd_get(devfs_clean_key));
1386 
1387         dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name));
1388 
1389         if (!(flags & DV_CLEANDIR_LCK) &&
1390             !rw_tryenter(&ddv->dv_contents, RW_WRITER))
1391                 return (EBUSY);
1392 
1393         for (dv = DV_FIRST_ENTRY(ddv); dv; dv = next) {
1394                 next = DV_NEXT_ENTRY(ddv, dv);
1395 
1396                 /*
1397                  * If devnm is specified, the non-minor portion of the
1398                  * name must match devnm.
1399                  */
1400                 if (devnm &&
1401                     (strncmp(devnm, dv->dv_name, strlen(devnm)) ||
1402                     (dv->dv_name[strlen(devnm)] != ':' &&
1403                     dv->dv_name[strlen(devnm)] != '\0')))
1404                         continue;
1405 
1406                 /* check type of what we are cleaning */
1407                 vp = DVTOV(dv);
1408                 if (vp->v_type == VDIR) {
1409                         /* recurse on directories */
1410                         rw_enter(&dv->dv_contents, RW_WRITER);
1411                         if (dv_cleandir(dv, NULL,
1412                             flags | DV_CLEANDIR_LCK) == EBUSY) {
1413                                 rw_exit(&dv->dv_contents);
1414                                 goto set_busy;
1415                         }
1416 
1417                         /* A clean directory is an empty directory... */
1418                         ASSERT(dv->dv_nlink == 2);
1419                         mutex_enter(&vp->v_lock);
1420                         if (vp->v_count > 0) {
1421                                 /*
1422                                  * ... but an empty directory can still have
1423                                  * references to it. If we have dv_busy or
1424                                  * DV_CLEAN_FORCE is *not* specified then a
1425                                  * referenced directory is considered busy.
1426                                  */
1427                                 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) {
1428                                         mutex_exit(&vp->v_lock);
1429                                         rw_exit(&dv->dv_contents);
1430                                         goto set_busy;
1431                                 }
1432 
1433                                 /*
1434                                  * Mark referenced directory stale so that DR
1435                                  * will succeed even if a shell has
1436                                  * /devices/xxx as current directory (causing
1437                                  * VN_HOLD reference to an empty directory).
1438                                  */
1439                                 ASSERT(!DV_STALE(dv));
1440                                 ndi_rele_devi(dv->dv_devi);
1441                                 dv->dv_devi = NULL;  /* mark DV_STALE */
1442                         }
1443                 } else {
1444                         ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK));
1445                         ASSERT(dv->dv_nlink == 1);   /* no hard links */
1446                         mutex_enter(&vp->v_lock);
1447                         if (vp->v_count > 0) {
1448                                 mutex_exit(&vp->v_lock);
1449                                 goto set_busy;
1450                         }
1451                 }
1452 
1453                 /* unlink from directory */
1454                 dv_unlink(ddv, dv);
1455 
1456                 /* drop locks */
1457                 mutex_exit(&vp->v_lock);
1458                 if (vp->v_type == VDIR)
1459                         rw_exit(&dv->dv_contents);
1460 
1461                 /* destroy vnode if ref count is zero */
1462                 if (vp->v_count == 0)
1463                         dv_destroy(dv, flags);
1464 
1465                 continue;
1466 
1467                 /*
1468                  * If devnm is not NULL we return immediately on busy,
1469                  * otherwise we continue destroying unused dv_node's.
1470                  */
1471 set_busy:       busy++;
1472                 if (devnm)
1473                         break;
1474         }
1475 
1476         /*
1477          * This code may be invoked to inform devfs that a new node has
1478          * been created in the kernel device tree. So we always set
1479          * the DV_BUILD flag to allow the next dv_filldir() to pick
1480          * the new devinfo nodes.
1481          */
1482         ddv->dv_flags |= DV_BUILD;
1483 
1484         if (!(flags & DV_CLEANDIR_LCK))
1485                 rw_exit(&ddv->dv_contents);
1486 
1487         return (busy ? EBUSY : 0);
1488 }
1489 
1490 /*
1491  * Walk through the devfs hierarchy, correcting the permissions of
1492  * devices with default permissions that do not match those specified
1493  * by minor perm.  This can only be done for all drivers for now.
1494  */
1495 static int
1496 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags)
1497 {
1498         struct dv_node  *dv;
1499         struct vnode    *vp;
1500         int             retval = 0;
1501         struct vattr    *attrp;
1502         mperm_t         mp;
1503         char            *nm;
1504         uid_t           old_uid;
1505         gid_t           old_gid;
1506         mode_t          old_mode;
1507 
1508         rw_enter(&ddv->dv_contents, RW_WRITER);
1509         for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) {
1510                 int error = 0;
1511                 nm = dv->dv_name;
1512 
1513                 rw_enter(&dv->dv_contents, RW_READER);
1514                 vp = DVTOV(dv);
1515                 if (vp->v_type == VDIR) {
1516                         rw_exit(&dv->dv_contents);
1517                         if (dv_reset_perm_dir(dv, flags) != 0) {
1518                                 error = EBUSY;
1519                         }
1520                 } else {
1521                         ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
1522 
1523                         /*
1524                          * Check for permissions from minor_perm
1525                          * If there are none, we're done
1526                          */
1527                         rw_exit(&dv->dv_contents);
1528                         if (dev_minorperm(dv->dv_devi, nm, &mp) != 0)
1529                                 continue;
1530 
1531                         rw_enter(&dv->dv_contents, RW_READER);
1532 
1533                         /*
1534                          * Allow a node's permissions to be altered
1535                          * permanently from the defaults by chmod,
1536                          * using the shadow node as backing store.
1537                          * Otherwise, update node to minor_perm permissions.
1538                          */
1539                         if (dv->dv_attrvp == NULLVP) {
1540                                 /*
1541                                  * No attribute vp, try to find one.
1542                                  */
1543                                 dv_shadow_node(DVTOV(ddv), nm, vp,
1544                                     NULL, NULLVP, kcred, 0);
1545                         }
1546                         if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) {
1547                                 rw_exit(&dv->dv_contents);
1548                                 continue;
1549                         }
1550 
1551                         attrp = dv->dv_attr;
1552 
1553                         if (VATTRP_MP_CMP(attrp, mp) == 0) {
1554                                 dcmn_err5(("%s: no perm change: "
1555                                     "%d %d 0%o\n", nm, attrp->va_uid,
1556                                     attrp->va_gid, attrp->va_mode));
1557                                 rw_exit(&dv->dv_contents);
1558                                 continue;
1559                         }
1560 
1561                         old_uid = attrp->va_uid;
1562                         old_gid = attrp->va_gid;
1563                         old_mode = attrp->va_mode;
1564 
1565                         VATTRP_MP_MERGE(attrp, mp);
1566                         mutex_enter(&vp->v_lock);
1567                         if (vp->v_count > 0) {
1568                                 error = EBUSY;
1569                         }
1570                         mutex_exit(&vp->v_lock);
1571 
1572                         dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n",
1573                             nm, old_uid, old_gid, old_mode, attrp->va_uid,
1574                             attrp->va_gid, attrp->va_mode, error));
1575 
1576                         rw_exit(&dv->dv_contents);
1577                 }
1578 
1579                 if (error != 0) {
1580                         retval = error;
1581                 }
1582         }
1583 
1584         ddv->dv_flags |= DV_BUILD;
1585 
1586         rw_exit(&ddv->dv_contents);
1587 
1588         return (retval);
1589 }
1590 
1591 int
1592 devfs_reset_perm(uint_t flags)
1593 {
1594         struct dv_node  *dvp;
1595         int             rval;
1596 
1597         if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL)
1598                 return (0);
1599 
1600         VN_HOLD(DVTOV(dvp));
1601         rval = dv_reset_perm_dir(dvp, flags);
1602         VN_RELE(DVTOV(dvp));
1603         return (rval);
1604 }
1605 
1606 /*
1607  * Clean up dangling devfs shadow nodes for removed
1608  * drivers so that, in the event the driver is re-added
1609  * to the system, newly created nodes won't incorrectly
1610  * pick up these stale shadow node permissions.
1611  *
1612  * This is accomplished by walking down the pathname
1613  * to the directory, starting at the root's attribute
1614  * node, then removing all minors matching the specified
1615  * node name.  Care must be taken to remove all entries
1616  * in a directory before the directory itself, so that
1617  * the clean-up associated with rem_drv'ing a nexus driver
1618  * does not inadvertently result in an inconsistent
1619  * filesystem underlying devfs.
1620  */
1621 
1622 static int
1623 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp)
1624 {
1625         int             error;
1626         vnode_t         *vp;
1627         int             eof;
1628         struct iovec    iov;
1629         struct uio      uio;
1630         struct dirent64 *dp;
1631         dirent64_t      *dbuf;
1632         size_t          dlen;
1633         size_t          dbuflen;
1634         int             ndirents = 64;
1635         char            *nm;
1636 
1637         VN_HOLD(dirvp);
1638 
1639         dlen = ndirents * (sizeof (*dbuf));
1640         dbuf = kmem_alloc(dlen, KM_SLEEP);
1641 
1642         uio.uio_iov = &iov;
1643         uio.uio_iovcnt = 1;
1644         uio.uio_segflg = UIO_SYSSPACE;
1645         uio.uio_fmode = 0;
1646         uio.uio_extflg = UIO_COPY_CACHED;
1647         uio.uio_loffset = 0;
1648         uio.uio_llimit = MAXOFFSET_T;
1649 
1650         eof = 0;
1651         error = 0;
1652         while (!error && !eof) {
1653                 uio.uio_resid = dlen;
1654                 iov.iov_base = (char *)dbuf;
1655                 iov.iov_len = dlen;
1656 
1657                 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1658                 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1659                 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1660 
1661                 dbuflen = dlen - uio.uio_resid;
1662 
1663                 if (error || dbuflen == 0)
1664                         break;
1665 
1666                 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
1667                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1668 
1669                         nm = dp->d_name;
1670 
1671                         if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
1672                                 continue;
1673 
1674                         error = VOP_LOOKUP(dirvp, nm,
1675                             &vp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
1676 
1677                         dsysdebug(error,
1678                             ("rem_drv %s/%s lookup (%d)\n",
1679                             dir, nm, error));
1680 
1681                         if (error)
1682                                 continue;
1683 
1684                         ASSERT(vp->v_type == VDIR ||
1685                             vp->v_type == VCHR || vp->v_type == VBLK);
1686 
1687                         if (vp->v_type == VDIR) {
1688                                 error = devfs_remdrv_rmdir(vp, nm, rvp);
1689                                 if (error == 0) {
1690                                         error = VOP_RMDIR(dirvp,
1691                                             (char *)nm, rvp, kcred, NULL, 0);
1692                                         dsysdebug(error,
1693                                             ("rem_drv %s/%s rmdir (%d)\n",
1694                                             dir, nm, error));
1695                                 }
1696                         } else {
1697                                 error = VOP_REMOVE(dirvp, (char *)nm, kcred,
1698                                     NULL, 0);
1699                                 dsysdebug(error,
1700                                     ("rem_drv %s/%s remove (%d)\n",
1701                                     dir, nm, error));
1702                         }
1703 
1704                         VN_RELE(vp);
1705                         if (error) {
1706                                 goto exit;
1707                         }
1708                 }
1709         }
1710 
1711 exit:
1712         VN_RELE(dirvp);
1713         kmem_free(dbuf, dlen);
1714 
1715         return (error);
1716 }
1717 
1718 int
1719 devfs_remdrv_cleanup(const char *dir, const char *nodename)
1720 {
1721         int             error;
1722         vnode_t         *vp;
1723         vnode_t         *dirvp;
1724         int             eof;
1725         struct iovec    iov;
1726         struct uio      uio;
1727         struct dirent64 *dp;
1728         dirent64_t      *dbuf;
1729         size_t          dlen;
1730         size_t          dbuflen;
1731         int             ndirents = 64;
1732         int             nodenamelen = strlen(nodename);
1733         char            *nm;
1734         struct pathname pn;
1735         vnode_t         *rvp;   /* root node of the underlying attribute fs */
1736 
1737         dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename));
1738 
1739         if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn))
1740                 return (0);
1741 
1742         rvp = dvroot->dv_attrvp;
1743         ASSERT(rvp != NULL);
1744         VN_HOLD(rvp);
1745 
1746         pn_skipslash(&pn);
1747         dirvp = rvp;
1748         VN_HOLD(dirvp);
1749 
1750         nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1751 
1752         while (pn_pathleft(&pn)) {
1753                 ASSERT(dirvp->v_type == VDIR);
1754                 (void) pn_getcomponent(&pn, nm);
1755                 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0));
1756                 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred,
1757                     NULL, NULL, NULL);
1758                 if (error) {
1759                         dcmn_err5(("remdrv_cleanup %s lookup error %d\n",
1760                             nm, error));
1761                         VN_RELE(dirvp);
1762                         if (dirvp != rvp)
1763                                 VN_RELE(rvp);
1764                         pn_free(&pn);
1765                         kmem_free(nm, MAXNAMELEN);
1766                         return (0);
1767                 }
1768                 VN_RELE(dirvp);
1769                 dirvp = vp;
1770                 pn_skipslash(&pn);
1771         }
1772 
1773         ASSERT(dirvp->v_type == VDIR);
1774         if (dirvp != rvp)
1775                 VN_RELE(rvp);
1776         pn_free(&pn);
1777         kmem_free(nm, MAXNAMELEN);
1778 
1779         dlen = ndirents * (sizeof (*dbuf));
1780         dbuf = kmem_alloc(dlen, KM_SLEEP);
1781 
1782         uio.uio_iov = &iov;
1783         uio.uio_iovcnt = 1;
1784         uio.uio_segflg = UIO_SYSSPACE;
1785         uio.uio_fmode = 0;
1786         uio.uio_extflg = UIO_COPY_CACHED;
1787         uio.uio_loffset = 0;
1788         uio.uio_llimit = MAXOFFSET_T;
1789 
1790         eof = 0;
1791         error = 0;
1792         while (!error && !eof) {
1793                 uio.uio_resid = dlen;
1794                 iov.iov_base = (char *)dbuf;
1795                 iov.iov_len = dlen;
1796 
1797                 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1798                 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1799                 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1800 
1801                 dbuflen = dlen - uio.uio_resid;
1802 
1803                 if (error || dbuflen == 0)
1804                         break;
1805 
1806                 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
1807                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1808 
1809                         nm = dp->d_name;
1810 
1811                         if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
1812                                 continue;
1813 
1814                         if (strncmp(nm, nodename, nodenamelen) != 0)
1815                                 continue;
1816 
1817                         error = VOP_LOOKUP(dirvp, nm, &vp,
1818                             NULL, 0, NULL, kcred, NULL, NULL, NULL);
1819 
1820                         dsysdebug(error,
1821                             ("rem_drv %s/%s lookup (%d)\n",
1822                             dir, nm, error));
1823 
1824                         if (error)
1825                                 continue;
1826 
1827                         ASSERT(vp->v_type == VDIR ||
1828                             vp->v_type == VCHR || vp->v_type == VBLK);
1829 
1830                         if (vp->v_type == VDIR) {
1831                                 error = devfs_remdrv_rmdir(vp, nm, rvp);
1832                                 if (error == 0) {
1833                                         error = VOP_RMDIR(dirvp, (char *)nm,
1834                                             rvp, kcred, NULL, 0);
1835                                         dsysdebug(error,
1836                                             ("rem_drv %s/%s rmdir (%d)\n",
1837                                             dir, nm, error));
1838                                 }
1839                         } else {
1840                                 error = VOP_REMOVE(dirvp, (char *)nm, kcred,
1841                                     NULL, 0);
1842                                 dsysdebug(error,
1843                                     ("rem_drv %s/%s remove (%d)\n",
1844                                     dir, nm, error));
1845                         }
1846 
1847                         VN_RELE(vp);
1848                         if (error)
1849                                 goto exit;
1850                 }
1851         }
1852 
1853 exit:
1854         VN_RELE(dirvp);
1855 
1856         kmem_free(dbuf, dlen);
1857 
1858         return (0);
1859 }
1860 
1861 struct dv_list {
1862         struct dv_node  *dv;
1863         struct dv_list  *next;
1864 };
1865 
1866 void
1867 dv_walk(
1868         struct dv_node  *ddv,
1869         char            *devnm,
1870         void            (*callback)(struct dv_node *, void *),
1871         void            *arg)
1872 {
1873         struct vnode    *dvp;
1874         struct dv_node  *dv;
1875         struct dv_list  *head, *tail, *next;
1876         int             len;
1877 
1878         dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n",
1879             ddv->dv_name, devnm ? devnm : "<null>"));
1880 
1881         dvp = DVTOV(ddv);
1882 
1883         ASSERT(dvp->v_type == VDIR);
1884 
1885         head = tail = next = NULL;
1886 
1887         rw_enter(&ddv->dv_contents, RW_READER);
1888         mutex_enter(&dvp->v_lock);
1889         for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) {
1890                 /*
1891                  * If devnm is not NULL and is not the empty string,
1892                  * select only dv_nodes with matching non-minor name
1893                  */
1894                 if (devnm && (len = strlen(devnm)) &&
1895                     (strncmp(devnm, dv->dv_name, len) ||
1896                     (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0')))
1897                         continue;
1898 
1899                 callback(dv, arg);
1900 
1901                 if (DVTOV(dv)->v_type != VDIR)
1902                         continue;
1903 
1904                 next = kmem_zalloc(sizeof (*next), KM_SLEEP);
1905                 next->dv = dv;
1906 
1907                 if (tail)
1908                         tail->next = next;
1909                 else
1910                         head = next;
1911 
1912                 tail = next;
1913         }
1914 
1915         while (head) {
1916                 dv_walk(head->dv, NULL, callback, arg);
1917                 next = head->next;
1918                 kmem_free(head, sizeof (*head));
1919                 head = next;
1920         }
1921         rw_exit(&ddv->dv_contents);
1922         mutex_exit(&dvp->v_lock);
1923 }