1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2018 Nexenta Systems, Inc.
  28  */
  29 
  30 /*
  31  * miscellaneous routines for the devfs
  32  */
  33 
  34 #include <sys/types.h>
  35 #include <sys/param.h>
  36 #include <sys/t_lock.h>
  37 #include <sys/systm.h>
  38 #include <sys/sysmacros.h>
  39 #include <sys/user.h>
  40 #include <sys/time.h>
  41 #include <sys/vfs.h>
  42 #include <sys/vnode.h>
  43 #include <sys/file.h>
  44 #include <sys/fcntl.h>
  45 #include <sys/flock.h>
  46 #include <sys/kmem.h>
  47 #include <sys/uio.h>
  48 #include <sys/errno.h>
  49 #include <sys/stat.h>
  50 #include <sys/cred.h>
  51 #include <sys/dirent.h>
  52 #include <sys/pathname.h>
  53 #include <sys/cmn_err.h>
  54 #include <sys/debug.h>
  55 #include <sys/modctl.h>
  56 #include <fs/fs_subr.h>
  57 #include <sys/fs/dv_node.h>
  58 #include <sys/fs/snode.h>
  59 #include <sys/sunndi.h>
  60 #include <sys/sunmdi.h>
  61 #include <sys/conf.h>
  62 
  63 #ifdef DEBUG
  64 int devfs_debug = 0x0;
  65 #endif
  66 
  67 const char      dvnm[] = "devfs";
  68 kmem_cache_t    *dv_node_cache; /* dv_node cache */
  69 
  70 /*
  71  * The devfs_clean_key is taken during a devfs_clean operation: it is used to
  72  * prevent unnecessary code execution and for detection of potential deadlocks.
  73  */
  74 uint_t          devfs_clean_key;
  75 
  76 struct dv_node *dvroot;
  77 
  78 /* prototype memory vattrs */
  79 vattr_t dv_vattr_dir = {
  80         AT_TYPE|AT_MODE|AT_UID|AT_GID,          /* va_mask */
  81         VDIR,                                   /* va_type */
  82         DV_DIRMODE_DEFAULT,                     /* va_mode */
  83         DV_UID_DEFAULT,                         /* va_uid */
  84         DV_GID_DEFAULT,                         /* va_gid */
  85         0,                                      /* va_fsid; */
  86         0,                                      /* va_nodeid; */
  87         0,                                      /* va_nlink; */
  88         0,                                      /* va_size; */
  89         0,                                      /* va_atime; */
  90         0,                                      /* va_mtime; */
  91         0,                                      /* va_ctime; */
  92         0,                                      /* va_rdev; */
  93         0,                                      /* va_blksize; */
  94         0,                                      /* va_nblocks; */
  95         0,                                      /* va_seq; */
  96 };
  97 
  98 vattr_t dv_vattr_file = {
  99         AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV,  /* va_mask */
 100         0,                                      /* va_type */
 101         DV_DEVMODE_DEFAULT,                     /* va_mode */
 102         DV_UID_DEFAULT,                         /* va_uid */
 103         DV_GID_DEFAULT,                         /* va_gid */
 104         0,                                      /* va_fsid; */
 105         0,                                      /* va_nodeid; */
 106         0,                                      /* va_nlink; */
 107         0,                                      /* va_size; */
 108         0,                                      /* va_atime; */
 109         0,                                      /* va_mtime; */
 110         0,                                      /* va_ctime; */
 111         0,                                      /* va_rdev; */
 112         0,                                      /* va_blksize; */
 113         0,                                      /* va_nblocks; */
 114         0,                                      /* va_seq; */
 115 };
 116 
 117 vattr_t dv_vattr_priv = {
 118         AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV,  /* va_mask */
 119         0,                                      /* va_type */
 120         DV_DEVMODE_PRIV,                        /* va_mode */
 121         DV_UID_DEFAULT,                         /* va_uid */
 122         DV_GID_DEFAULT,                         /* va_gid */
 123         0,                                      /* va_fsid; */
 124         0,                                      /* va_nodeid; */
 125         0,                                      /* va_nlink; */
 126         0,                                      /* va_size; */
 127         0,                                      /* va_atime; */
 128         0,                                      /* va_mtime; */
 129         0,                                      /* va_ctime; */
 130         0,                                      /* va_rdev; */
 131         0,                                      /* va_blksize; */
 132         0,                                      /* va_nblocks; */
 133         0,                                      /* va_seq; */
 134 };
 135 
 136 extern dev_info_t       *clone_dip;
 137 extern major_t          clone_major;
 138 extern struct dev_ops   *ddi_hold_driver(major_t);
 139 
 140 /* dv_node node constructor for kmem cache */
 141 static int
 142 i_dv_node_ctor(void *buf, void *cfarg, int flag)
 143 {
 144         _NOTE(ARGUNUSED(cfarg, flag))
 145         struct dv_node  *dv = (struct dv_node *)buf;
 146         struct vnode    *vp;
 147 
 148         bzero(buf, sizeof (struct dv_node));
 149         vp = dv->dv_vnode = vn_alloc(flag);
 150         if (vp == NULL) {
 151                 return (-1);
 152         }
 153         vp->v_data = dv;
 154         rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL);
 155         return (0);
 156 }
 157 
 158 /* dv_node node destructor for kmem cache */
 159 static void
 160 i_dv_node_dtor(void *buf, void *arg)
 161 {
 162         _NOTE(ARGUNUSED(arg))
 163         struct dv_node  *dv = (struct dv_node *)buf;
 164         struct vnode    *vp = DVTOV(dv);
 165 
 166         rw_destroy(&dv->dv_contents);
 167         vn_invalid(vp);
 168         vn_free(vp);
 169 }
 170 
 171 
 172 /* initialize dv_node node cache */
 173 void
 174 dv_node_cache_init()
 175 {
 176         ASSERT(dv_node_cache == NULL);
 177         dv_node_cache = kmem_cache_create("dv_node_cache",
 178             sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor,
 179             NULL, NULL, NULL, 0);
 180 
 181         tsd_create(&devfs_clean_key, NULL);
 182 }
 183 
 184 /* destroy dv_node node cache */
 185 void
 186 dv_node_cache_fini()
 187 {
 188         ASSERT(dv_node_cache != NULL);
 189         kmem_cache_destroy(dv_node_cache);
 190         dv_node_cache = NULL;
 191 
 192         tsd_destroy(&devfs_clean_key);
 193 }
 194 
 195 /*
 196  * dv_mkino - Generate a unique inode number for devfs nodes.
 197  *
 198  * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32
 199  * bit non-LARGEFILE applications. This means that there is a requirement to
 200  * maintain the inode number as a 32 bit value or applications will have
 201  * stat(2) calls fail with EOVERFLOW.  We form a 32 bit inode number from the
 202  * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor
 203  *
 204  * To generate inode numbers for directories, we assume that we will never use
 205  * more than half the major space - this allows for ~8190 drivers. We use this
 206  * upper major number space to allocate inode numbers for directories by
 207  * encoding the major and instance into this space.
 208  *
 209  * We also skew the result so that inode 2 is reserved for the root of the file
 210  * system.
 211  *
 212  * As part of the future support for 64-bit dev_t APIs, the upper minor bits
 213  * should be folded into the high inode bits by adding the following code
 214  * after "ino |= 1":
 215  *
 216  * #if (L_BITSMINOR32 != L_BITSMINOR)
 217  *              |* fold overflow minor bits into high bits of inode number *|
 218  *              ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR;
 219  * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *|
 220  *
 221  * This way only applications that use devices that overflow their minor
 222  * space will have an application level impact.
 223  */
 224 static ino_t
 225 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev)
 226 {
 227         major_t         major;
 228         minor_t         minor;
 229         ino_t           ino;
 230         static int      warn;
 231 
 232         if (typ == VDIR) {
 233                 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major;
 234                 minor = ddi_get_instance(devi);
 235 
 236                 /* makedevice32 in high half of major number space */
 237                 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32));
 238 
 239                 major = DEVI(devi)->devi_major;
 240         } else {
 241                 major = getmajor(dev);
 242                 minor = getminor(dev);
 243 
 244                 /* makedevice32 */
 245                 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32));
 246 
 247                 /* make ino for VCHR different than VBLK */
 248                 ino <<= 1;
 249                 if (typ == VCHR)
 250                         ino |= 1;
 251         }
 252 
 253         ino += DV_ROOTINO + 1;          /* skew */
 254 
 255         /*
 256          * diagnose things a little early because adding the skew to a large
 257          * minor number could roll over the major.
 258          */
 259         if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) {
 260                 warn = 1;
 261                 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm);
 262         }
 263 
 264         return (ino);
 265 }
 266 
 267 /*
 268  * Compare two nodes lexographically to balance avl tree
 269  */
 270 static int
 271 dv_compare_nodes(const struct dv_node *dv1, const struct dv_node *dv2)
 272 {
 273         int     rv;
 274 
 275         if ((rv = strcmp(dv1->dv_name, dv2->dv_name)) == 0)
 276                 return (0);
 277         return ((rv < 0) ? -1 : 1);
 278 }
 279 
 280 /*
 281  * dv_mkroot
 282  *
 283  * Build the first VDIR dv_node.
 284  */
 285 struct dv_node *
 286 dv_mkroot(struct vfs *vfsp, dev_t devfsdev)
 287 {
 288         struct dv_node  *dv;
 289         struct vnode    *vp;
 290 
 291         ASSERT(ddi_root_node() != NULL);
 292         ASSERT(dv_node_cache != NULL);
 293 
 294         dcmn_err3(("dv_mkroot\n"));
 295         dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
 296         vp = DVTOV(dv);
 297         vn_reinit(vp);
 298         vp->v_flag = VROOT;
 299         vp->v_vfsp = vfsp;
 300         vp->v_type = VDIR;
 301         vp->v_rdev = devfsdev;
 302         vn_setops(vp, dv_vnodeops);
 303         vn_exists(vp);
 304 
 305         dvroot = dv;
 306 
 307         dv->dv_name = NULL;          /* not needed */
 308         dv->dv_namelen = 0;
 309 
 310         dv->dv_devi = ddi_root_node();
 311 
 312         dv->dv_ino = DV_ROOTINO;
 313         dv->dv_nlink = 2;            /* name + . (no dv_insert) */
 314         dv->dv_dotdot = dv;          /* .. == self */
 315         dv->dv_attrvp = NULLVP;
 316         dv->dv_attr = NULL;
 317         dv->dv_flags = DV_BUILD;
 318         dv->dv_priv = NULL;
 319         dv->dv_busy = 0;
 320         dv->dv_dflt_mode = 0;
 321 
 322         avl_create(&dv->dv_entries,
 323             (int (*)(const void *, const void *))dv_compare_nodes,
 324             sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink));
 325 
 326         return (dv);
 327 }
 328 
 329 /*
 330  * dv_mkdir
 331  *
 332  * Given an probed or attached nexus node, create a VDIR dv_node.
 333  * No dv_attrvp is created at this point.
 334  */
 335 struct dv_node *
 336 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm)
 337 {
 338         struct dv_node  *dv;
 339         struct vnode    *vp;
 340         size_t          nmlen;
 341 
 342         ASSERT((devi));
 343         dcmn_err4(("dv_mkdir: %s\n", nm));
 344 
 345         dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
 346         nmlen = strlen(nm) + 1;
 347         dv->dv_name = kmem_alloc(nmlen, KM_SLEEP);
 348         bcopy(nm, dv->dv_name, nmlen);
 349         dv->dv_namelen = nmlen - 1;  /* '\0' not included */
 350 
 351         vp = DVTOV(dv);
 352         vn_reinit(vp);
 353         vp->v_flag = 0;
 354         vp->v_vfsp = DVTOV(ddv)->v_vfsp;
 355         vp->v_type = VDIR;
 356         vp->v_rdev = DVTOV(ddv)->v_rdev;
 357         vn_setops(vp, vn_getops(DVTOV(ddv)));
 358         vn_exists(vp);
 359 
 360         dv->dv_devi = devi;
 361         ndi_hold_devi(devi);
 362 
 363         dv->dv_ino = dv_mkino(devi, VDIR, NODEV);
 364         dv->dv_nlink = 0;            /* updated on insert */
 365         dv->dv_dotdot = ddv;
 366         dv->dv_attrvp = NULLVP;
 367         dv->dv_attr = NULL;
 368         dv->dv_flags = DV_BUILD;
 369         dv->dv_priv = NULL;
 370         dv->dv_busy = 0;
 371         dv->dv_dflt_mode = 0;
 372 
 373         avl_create(&dv->dv_entries,
 374             (int (*)(const void *, const void *))dv_compare_nodes,
 375             sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink));
 376 
 377         return (dv);
 378 }
 379 
 380 /*
 381  * dv_mknod
 382  *
 383  * Given a minor node, create a VCHR or VBLK dv_node.
 384  * No dv_attrvp is created at this point.
 385  */
 386 static struct dv_node *
 387 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm,
 388     struct ddi_minor_data *dmd)
 389 {
 390         struct dv_node  *dv;
 391         struct vnode    *vp;
 392         size_t          nmlen;
 393 
 394         dcmn_err4(("dv_mknod: %s\n", nm));
 395 
 396         dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP);
 397         nmlen = strlen(nm) + 1;
 398         dv->dv_name = kmem_alloc(nmlen, KM_SLEEP);
 399         bcopy(nm, dv->dv_name, nmlen);
 400         dv->dv_namelen = nmlen - 1;  /* no '\0' */
 401 
 402         vp = DVTOV(dv);
 403         vn_reinit(vp);
 404         vp->v_flag = 0;
 405         vp->v_vfsp = DVTOV(ddv)->v_vfsp;
 406         vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK;
 407         vp->v_rdev = dmd->ddm_dev;
 408         vn_setops(vp, vn_getops(DVTOV(ddv)));
 409         vn_exists(vp);
 410 
 411         ASSERT(DEVI_BUSY_OWNED(devi));
 412         ndi_hold_devi(devi);
 413 
 414         dv->dv_devi = devi;
 415         dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev);
 416         dv->dv_nlink = 0;            /* updated on insert */
 417         dv->dv_dotdot = ddv;
 418         dv->dv_attrvp = NULLVP;
 419         dv->dv_attr = NULL;
 420         dv->dv_flags = 0;
 421 
 422         if (dmd->type == DDM_INTERNAL_PATH)
 423                 dv->dv_flags |= DV_INTERNAL;
 424         if (dmd->ddm_flags & DM_NO_FSPERM)
 425                 dv->dv_flags |= DV_NO_FSPERM;
 426 
 427         dv->dv_priv = dmd->ddm_node_priv;
 428         if (dv->dv_priv)
 429                 dphold(dv->dv_priv);
 430 
 431         /*
 432          * Minors created with ddi_create_priv_minor_node can specify
 433          * a default mode permission other than the devfs default.
 434          */
 435         if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) {
 436                 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n",
 437                     dv->dv_name, dmd->ddm_priv_mode));
 438                 dv->dv_flags |= DV_DFLT_MODE;
 439                 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB;
 440         }
 441 
 442         return (dv);
 443 }
 444 
 445 /*
 446  * dv_destroy
 447  *
 448  * Destroy what we created in dv_mkdir or dv_mknod.
 449  * In the case of a *referenced* directory, do nothing.
 450  */
 451 void
 452 dv_destroy(struct dv_node *dv, uint_t flags)
 453 {
 454         vnode_t *vp = DVTOV(dv);
 455         ASSERT(dv->dv_nlink == 0);           /* no references */
 456 
 457         dcmn_err4(("dv_destroy: %s\n", dv->dv_name));
 458 
 459         /*
 460          * We may be asked to unlink referenced directories.
 461          * In this case, there is nothing to be done.
 462          * The eventual memory free will be done in
 463          * devfs_inactive.
 464          */
 465         if (vp->v_count != 0) {
 466                 ASSERT(vp->v_type == VDIR);
 467                 ASSERT(flags & DV_CLEAN_FORCE);
 468                 ASSERT(DV_STALE(dv));
 469                 return;
 470         }
 471 
 472         if (vp->v_type == VDIR) {
 473                 ASSERT(DV_FIRST_ENTRY(dv) == NULL);
 474                 avl_destroy(&dv->dv_entries);
 475         }
 476 
 477         if (dv->dv_attrvp != NULLVP)
 478                 VN_RELE(dv->dv_attrvp);
 479         if (dv->dv_attr != NULL)
 480                 kmem_free(dv->dv_attr, sizeof (struct vattr));
 481         if (dv->dv_name != NULL)
 482                 kmem_free(dv->dv_name, dv->dv_namelen + 1);
 483         if (dv->dv_devi != NULL) {
 484                 ndi_rele_devi(dv->dv_devi);
 485         }
 486         if (dv->dv_priv != NULL) {
 487                 dpfree(dv->dv_priv);
 488         }
 489 
 490         kmem_cache_free(dv_node_cache, dv);
 491 }
 492 
 493 /*
 494  * Find and hold dv_node by name
 495  */
 496 static struct dv_node *
 497 dv_findbyname(struct dv_node *ddv, char *nm)
 498 {
 499         struct dv_node  *dv;
 500         avl_index_t     where;
 501         struct dv_node  dvtmp;
 502 
 503         ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
 504         dcmn_err3(("dv_findbyname: %s\n", nm));
 505 
 506         dvtmp.dv_name = nm;
 507         dv = avl_find(&ddv->dv_entries, &dvtmp, &where);
 508         if (dv) {
 509                 ASSERT(dv->dv_dotdot == ddv);
 510                 ASSERT(strcmp(dv->dv_name, nm) == 0);
 511                 VN_HOLD(DVTOV(dv));
 512                 return (dv);
 513         }
 514         return (NULL);
 515 }
 516 
 517 /*
 518  * Inserts a new dv_node in a parent directory
 519  */
 520 void
 521 dv_insert(struct dv_node *ddv, struct dv_node *dv)
 522 {
 523         avl_index_t     where;
 524 
 525         ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
 526         ASSERT(DVTOV(ddv)->v_type == VDIR);
 527         ASSERT(ddv->dv_nlink >= 2);
 528         ASSERT(dv->dv_nlink == 0);
 529 
 530         dcmn_err3(("dv_insert: %s\n", dv->dv_name));
 531 
 532         dv->dv_dotdot = ddv;
 533         if (DVTOV(dv)->v_type == VDIR) {
 534                 ddv->dv_nlink++;     /* .. to containing directory */
 535                 dv->dv_nlink = 2;    /* name + . */
 536         } else {
 537                 dv->dv_nlink = 1;    /* name */
 538         }
 539 
 540         /* enter node in the avl tree */
 541         VERIFY(avl_find(&ddv->dv_entries, dv, &where) == NULL);
 542         avl_insert(&ddv->dv_entries, dv, where);
 543 }
 544 
 545 /*
 546  * Unlink a dv_node from a perent directory
 547  */
 548 void
 549 dv_unlink(struct dv_node *ddv, struct dv_node *dv)
 550 {
 551         /* verify linkage of arguments */
 552         ASSERT(ddv && dv);
 553         ASSERT(dv->dv_dotdot == ddv);
 554         ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
 555         ASSERT(DVTOV(ddv)->v_type == VDIR);
 556 
 557         dcmn_err3(("dv_unlink: %s\n", dv->dv_name));
 558 
 559         if (DVTOV(dv)->v_type == VDIR) {
 560                 ddv->dv_nlink--;     /* .. to containing directory */
 561                 dv->dv_nlink -= 2;   /* name + . */
 562         } else {
 563                 dv->dv_nlink -= 1;   /* name */
 564         }
 565         ASSERT(ddv->dv_nlink >= 2);
 566         ASSERT(dv->dv_nlink == 0);
 567 
 568         dv->dv_dotdot = NULL;
 569 
 570         /* remove from avl tree */
 571         avl_remove(&ddv->dv_entries, dv);
 572 }
 573 
 574 /*
 575  * Merge devfs node specific information into an attribute structure.
 576  *
 577  * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node.
 578  */
 579 void
 580 dv_vattr_merge(struct dv_node *dv, struct vattr *vap)
 581 {
 582         struct vnode    *vp = DVTOV(dv);
 583 
 584         vap->va_nodeid = dv->dv_ino;
 585         vap->va_nlink = dv->dv_nlink;
 586 
 587         if (vp->v_type == VDIR) {
 588                 vap->va_rdev = 0;
 589                 vap->va_fsid = vp->v_rdev;
 590         } else {
 591                 vap->va_rdev = vp->v_rdev;
 592                 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev;
 593                 vap->va_type = vp->v_type;
 594                 /* don't trust the shadow file type */
 595                 vap->va_mode &= ~S_IFMT;
 596                 if (vap->va_type == VCHR)
 597                         vap->va_mode |= S_IFCHR;
 598                 else
 599                         vap->va_mode |= S_IFBLK;
 600         }
 601 }
 602 
 603 /*
 604  * Get default device permission by consulting rules in
 605  * privilege specification in minor node and /etc/minor_perm.
 606  *
 607  * This function is called from the devname filesystem to get default
 608  * permissions for a device exported to a non-global zone.
 609  */
 610 void
 611 devfs_get_defattr(struct vnode *vp, struct vattr *vap, int *no_fs_perm)
 612 {
 613         mperm_t         mp;
 614         struct dv_node  *dv;
 615 
 616         /* If vp isn't a dv_node, return something sensible */
 617         if (!vn_matchops(vp, dv_vnodeops)) {
 618                 if (no_fs_perm)
 619                         *no_fs_perm = 0;
 620                 *vap = dv_vattr_file;
 621                 return;
 622         }
 623 
 624         /*
 625          * For minors not created by ddi_create_priv_minor_node(),
 626          * use devfs defaults.
 627          */
 628         dv = VTODV(vp);
 629         if (vp->v_type == VDIR) {
 630                 *vap = dv_vattr_dir;
 631         } else if (dv->dv_flags & DV_NO_FSPERM) {
 632                 if (no_fs_perm)
 633                         *no_fs_perm = 1;
 634                 *vap = dv_vattr_priv;
 635         } else {
 636                 /*
 637                  * look up perm bits from minor_perm
 638                  */
 639                 *vap = dv_vattr_file;
 640                 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) {
 641                         VATTR_MP_MERGE((*vap), mp);
 642                         dcmn_err5(("%s: minor perm mode 0%o\n",
 643                             dv->dv_name, vap->va_mode));
 644                 } else if (dv->dv_flags & DV_DFLT_MODE) {
 645                         ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0);
 646                         vap->va_mode &= ~S_IAMB;
 647                         vap->va_mode |= dv->dv_dflt_mode;
 648                         dcmn_err5(("%s: priv mode 0%o\n",
 649                             dv->dv_name, vap->va_mode));
 650                 }
 651         }
 652 }
 653 
 654 /*
 655  * dv_shadow_node
 656  *
 657  * Given a VDIR dv_node, find/create the associated VDIR
 658  * node in the shadow attribute filesystem.
 659  *
 660  * Given a VCHR/VBLK dv_node, find the associated VREG
 661  * node in the shadow attribute filesystem.  These nodes
 662  * are only created to persist non-default attributes.
 663  * Lack of such a node implies the default permissions
 664  * are sufficient.
 665  *
 666  * Managing the attribute file entries is slightly tricky (mostly
 667  * because we can't intercept VN_HOLD and VN_RELE except on the last
 668  * release).
 669  *
 670  * We assert that if the dv_attrvp pointer is non-NULL, it points
 671  * to a singly-held (by us) vnode that represents the shadow entry
 672  * in the underlying filesystem.  To avoid store-ordering issues,
 673  * we assert that the pointer can only be tested under the dv_contents
 674  * READERS lock.
 675  */
 676 
 677 void
 678 dv_shadow_node(
 679         struct vnode *dvp,      /* devfs parent directory vnode */
 680         char *nm,               /* name component */
 681         struct vnode *vp,       /* devfs vnode */
 682         struct pathname *pnp,   /* the path .. */
 683         struct vnode *rdir,     /* the root .. */
 684         struct cred *cred,      /* who's asking? */
 685         int flags)              /* optionally create shadow node */
 686 {
 687         struct dv_node  *dv;    /* dv_node of named directory */
 688         struct vnode    *rdvp;  /* shadow parent directory vnode */
 689         struct vnode    *rvp;   /* shadow vnode */
 690         struct vnode    *rrvp;  /* realvp of shadow vnode */
 691         struct vattr    vattr;
 692         int             create_tried;
 693         int             error;
 694 
 695         ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK);
 696         dv = VTODV(vp);
 697         dcmn_err3(("dv_shadow_node: name %s attr %p\n",
 698             nm, (void *)dv->dv_attrvp));
 699 
 700         if ((flags & DV_SHADOW_WRITE_HELD) == 0) {
 701                 ASSERT(RW_READ_HELD(&dv->dv_contents));
 702                 if (dv->dv_attrvp != NULLVP)
 703                         return;
 704                 if (!rw_tryupgrade(&dv->dv_contents)) {
 705                         rw_exit(&dv->dv_contents);
 706                         rw_enter(&dv->dv_contents, RW_WRITER);
 707                         if (dv->dv_attrvp != NULLVP) {
 708                                 rw_downgrade(&dv->dv_contents);
 709                                 return;
 710                         }
 711                 }
 712         } else {
 713                 ASSERT(RW_WRITE_HELD(&dv->dv_contents));
 714                 if (dv->dv_attrvp != NULLVP)
 715                         return;
 716         }
 717 
 718         ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL);
 719 
 720         rdvp = VTODV(dvp)->dv_attrvp;
 721         create_tried = 0;
 722 lookup:
 723         if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) {
 724                 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred,
 725                     NULL, NULL, NULL);
 726 
 727                 /* factor out the snode since we only want the attribute node */
 728                 if ((error == 0) && (VOP_REALVP(rvp, &rrvp, NULL) == 0)) {
 729                         VN_HOLD(rrvp);
 730                         VN_RELE(rvp);
 731                         rvp = rrvp;
 732                 }
 733         } else
 734                 error = EROFS;          /* no parent, no entry */
 735 
 736         /*
 737          * All we want is the permissions (and maybe ACLs and
 738          * extended attributes), and we want to perform lookups
 739          * by name.  Drivers occasionally change their minor
 740          * number space.  If something changes, there's no
 741          * much we can do about it here.
 742          */
 743 
 744         /* The shadow node checks out. We are done */
 745         if (error == 0) {
 746                 dv->dv_attrvp = rvp; /* with one hold */
 747 
 748                 /*
 749                  * Determine if we have non-trivial ACLs on this node.
 750                  * It is not necessary to VOP_RWLOCK since fs_acl_nontrivial
 751                  * only does VOP_GETSECATTR.
 752                  */
 753                 dv->dv_flags &= ~DV_ACL;
 754 
 755                 if (fs_acl_nontrivial(rvp, cred))
 756                         dv->dv_flags |= DV_ACL;
 757 
 758                 /*
 759                  * If we have synced out the memory attributes, free
 760                  * them and switch back to using the persistent store.
 761                  */
 762                 if (rvp && dv->dv_attr) {
 763                         kmem_free(dv->dv_attr, sizeof (struct vattr));
 764                         dv->dv_attr = NULL;
 765                 }
 766                 if ((flags & DV_SHADOW_WRITE_HELD) == 0)
 767                         rw_downgrade(&dv->dv_contents);
 768                 ASSERT(RW_LOCK_HELD(&dv->dv_contents));
 769                 return;
 770         }
 771 
 772         /*
 773          * Failed to find attribute in persistent backing store,
 774          * get default permission bits.
 775          */
 776         devfs_get_defattr(vp, &vattr, NULL);
 777 
 778         dv_vattr_merge(dv, &vattr);
 779         gethrestime(&vattr.va_atime);
 780         vattr.va_mtime = vattr.va_atime;
 781         vattr.va_ctime = vattr.va_atime;
 782 
 783         /*
 784          * Try to create shadow dir. This is necessary in case
 785          * we need to create a shadow leaf node later, when user
 786          * executes chmod.
 787          */
 788         if ((error == ENOENT) && !create_tried) {
 789                 switch (vp->v_type) {
 790                 case VDIR:
 791                         error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred,
 792                             NULL, 0, NULL);
 793                         dsysdebug(error, ("vop_mkdir %s %s %d\n",
 794                             VTODV(dvp)->dv_name, nm, error));
 795                         create_tried = 1;
 796                         break;
 797 
 798                 case VCHR:
 799                 case VBLK:
 800                         /*
 801                          * Shadow nodes are only created on demand
 802                          */
 803                         if (flags & DV_SHADOW_CREATE) {
 804                                 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL,
 805                                     VREAD|VWRITE, &rvp, kcred, 0, NULL, NULL);
 806                                 dsysdebug(error, ("vop_create %s %s %d\n",
 807                                     VTODV(dvp)->dv_name, nm, error));
 808                                 create_tried = 1;
 809                         }
 810                         break;
 811 
 812                 default:
 813                         cmn_err(CE_PANIC, "devfs: %s: create", dvnm);
 814                         /*NOTREACHED*/
 815                 }
 816 
 817                 if (create_tried &&
 818                     (error == 0) || (error == EEXIST)) {
 819                         VN_RELE(rvp);
 820                         goto lookup;
 821                 }
 822         }
 823 
 824         /* Store attribute in memory */
 825         if (dv->dv_attr == NULL) {
 826                 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP);
 827                 *(dv->dv_attr) = vattr;
 828         }
 829 
 830         if ((flags & DV_SHADOW_WRITE_HELD) == 0)
 831                 rw_downgrade(&dv->dv_contents);
 832         ASSERT(RW_LOCK_HELD(&dv->dv_contents));
 833 }
 834 
 835 /*
 836  * Given a devinfo node, and a name, returns the appropriate
 837  * minor information for that named node, if it exists.
 838  */
 839 static int
 840 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi)
 841 {
 842         struct ddi_minor_data   *dmd;
 843 
 844         ASSERT(i_ddi_devi_attached(devi));
 845 
 846         dcmn_err3(("dv_find_leafnode: %s\n", minor_nm));
 847         ASSERT(DEVI_BUSY_OWNED(devi));
 848         for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) {
 849 
 850                 /*
 851                  * Skip alias nodes and nodes without a name.
 852                  */
 853                 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL))
 854                         continue;
 855 
 856                 dcmn_err4(("dv_find_leafnode: (%s,%s)\n",
 857                     minor_nm, dmd->ddm_name));
 858                 if (strcmp(minor_nm, dmd->ddm_name) == 0) {
 859                         r_mi->ddm_dev = dmd->ddm_dev;
 860                         r_mi->ddm_spec_type = dmd->ddm_spec_type;
 861                         r_mi->type = dmd->type;
 862                         r_mi->ddm_flags = dmd->ddm_flags;
 863                         r_mi->ddm_node_priv = dmd->ddm_node_priv;
 864                         r_mi->ddm_priv_mode = dmd->ddm_priv_mode;
 865                         if (r_mi->ddm_node_priv)
 866                                 dphold(r_mi->ddm_node_priv);
 867                         return (0);
 868                 }
 869         }
 870 
 871         dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm));
 872         return (ENOENT);
 873 }
 874 
 875 /*
 876  * Special handling for clone node:
 877  *      Clone minor name is a driver name, the minor number will
 878  *      be the major number of the driver. There is no minor
 879  *      node under the clone driver, so we'll manufacture the
 880  *      dev_t.
 881  */
 882 static struct dv_node *
 883 dv_clone_mknod(struct dv_node *ddv, char *drvname)
 884 {
 885         major_t                 major;
 886         struct dv_node          *dvp;
 887         char                    *devnm;
 888         struct ddi_minor_data   *dmd;
 889 
 890         /*
 891          * Make sure drvname is a STREAMS driver. We load the driver,
 892          * but don't attach to any instances. This makes stat(2)
 893          * relatively cheap.
 894          */
 895         major = ddi_name_to_major(drvname);
 896         if (major == DDI_MAJOR_T_NONE)
 897                 return (NULL);
 898 
 899         if (ddi_hold_driver(major) == NULL)
 900                 return (NULL);
 901 
 902         if (STREAMSTAB(major) == NULL) {
 903                 ddi_rele_driver(major);
 904                 return (NULL);
 905         }
 906 
 907         ddi_rele_driver(major);
 908         devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
 909         (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname);
 910         dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP);
 911         dmd->ddm_dev = makedevice(clone_major, (minor_t)major);
 912         dmd->ddm_spec_type = S_IFCHR;
 913         dvp = dv_mknod(ddv, clone_dip, devnm, dmd);
 914         kmem_free(dmd, sizeof (*dmd));
 915         kmem_free(devnm, MAXNAMELEN);
 916         return (dvp);
 917 }
 918 
 919 /*
 920  * Given the parent directory node, and a name in it, returns the
 921  * named dv_node to the caller (as a vnode).
 922  *
 923  * (We need pnp and rdir for doing shadow lookups; they can be NULL)
 924  */
 925 int
 926 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp,
 927     struct vnode *rdir, struct cred *cred, uint_t ndi_flags)
 928 {
 929         extern int isminiroot;  /* see modctl.c */
 930 
 931         int                     circ;
 932         int                     rv = 0, was_busy = 0, nmlen, write_held = 0;
 933         struct vnode            *vp;
 934         struct dv_node          *dv, *dup;
 935         dev_info_t              *pdevi, *devi = NULL;
 936         char                    *mnm;
 937         struct ddi_minor_data   *dmd;
 938 
 939         dcmn_err3(("dv_find %s\n", nm));
 940 
 941         if (!rw_tryenter(&ddv->dv_contents, RW_READER)) {
 942                 if (tsd_get(devfs_clean_key))
 943                         return (EBUSY);
 944                 rw_enter(&ddv->dv_contents, RW_READER);
 945         }
 946 start:
 947         if (DV_STALE(ddv)) {
 948                 rw_exit(&ddv->dv_contents);
 949                 return (ESTALE);
 950         }
 951 
 952         /*
 953          * Empty name or ., return node itself.
 954          */
 955         nmlen = strlen(nm);
 956         if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) {
 957                 *vpp = DVTOV(ddv);
 958                 rw_exit(&ddv->dv_contents);
 959                 VN_HOLD(*vpp);
 960                 return (0);
 961         }
 962 
 963         /*
 964          * .., return the parent directory
 965          */
 966         if ((nmlen == 2) && (strcmp(nm, "..") == 0)) {
 967                 *vpp = DVTOV(ddv->dv_dotdot);
 968                 rw_exit(&ddv->dv_contents);
 969                 VN_HOLD(*vpp);
 970                 return (0);
 971         }
 972 
 973         /*
 974          * Fail anything without a valid device name component
 975          */
 976         if (nm[0] == '@' || nm[0] == ':') {
 977                 dcmn_err3(("devfs: no driver '%s'\n", nm));
 978                 rw_exit(&ddv->dv_contents);
 979                 return (ENOENT);
 980         }
 981 
 982         /*
 983          * So, now we have to deal with the trickier stuff.
 984          *
 985          * (a) search the existing list of dv_nodes on this directory
 986          */
 987         if ((dv = dv_findbyname(ddv, nm)) != NULL) {
 988 founddv:
 989                 ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
 990 
 991                 if (!rw_tryenter(&dv->dv_contents, RW_READER)) {
 992                         if (tsd_get(devfs_clean_key)) {
 993                                 VN_RELE(DVTOV(dv));
 994                                 rw_exit(&ddv->dv_contents);
 995                                 return (EBUSY);
 996                         }
 997                         rw_enter(&dv->dv_contents, RW_READER);
 998                 }
 999 
1000                 vp = DVTOV(dv);
1001                 if ((dv->dv_attrvp != NULLVP) ||
1002                     (vp->v_type != VDIR && dv->dv_attr != NULL)) {
1003                         /*
1004                          * Common case - we already have attributes
1005                          */
1006                         rw_exit(&dv->dv_contents);
1007                         rw_exit(&ddv->dv_contents);
1008                         goto found;
1009                 }
1010 
1011                 /*
1012                  * No attribute vp, try and build one.
1013                  *
1014                  * dv_shadow_node() can briefly drop &dv->dv_contents lock
1015                  * if it is unable to upgrade it to a write lock. If the
1016                  * current thread has come in through the bottom-up device
1017                  * configuration devfs_clean() path, we may deadlock against
1018                  * a thread performing top-down device configuration if it
1019                  * grabs the contents lock. To avoid this, when we are on the
1020                  * devfs_clean() path we attempt to upgrade the dv_contents
1021                  * lock before we call dv_shadow_node().
1022                  */
1023                 if (tsd_get(devfs_clean_key)) {
1024                         if (!rw_tryupgrade(&dv->dv_contents)) {
1025                                 VN_RELE(DVTOV(dv));
1026                                 rw_exit(&dv->dv_contents);
1027                                 rw_exit(&ddv->dv_contents);
1028                                 return (EBUSY);
1029                         }
1030 
1031                         write_held = DV_SHADOW_WRITE_HELD;
1032                 }
1033 
1034                 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred,
1035                     write_held);
1036 
1037                 rw_exit(&dv->dv_contents);
1038                 rw_exit(&ddv->dv_contents);
1039                 goto found;
1040         }
1041 
1042         /*
1043          * (b) Search the child devinfo nodes of our parent directory,
1044          * looking for the named node.  If we find it, build a new
1045          * node, then grab the writers lock, search the directory
1046          * if it's still not there, then insert it.
1047          *
1048          * We drop the devfs locks before accessing the device tree.
1049          * Take care to mark the node BUSY so that a forced devfs_clean
1050          * doesn't mark the directory node stale.
1051          *
1052          * Also, check if we are called as part of devfs_clean or
1053          * reset_perm. If so, simply return not found because there
1054          * is nothing to clean.
1055          */
1056         if (tsd_get(devfs_clean_key)) {
1057                 rw_exit(&ddv->dv_contents);
1058                 return (ENOENT);
1059         }
1060 
1061         /*
1062          * We could be either READ or WRITE locked at
1063          * this point. Upgrade if we are read locked.
1064          */
1065         ASSERT(RW_LOCK_HELD(&ddv->dv_contents));
1066         if (rw_read_locked(&ddv->dv_contents) &&
1067             !rw_tryupgrade(&ddv->dv_contents)) {
1068                 rw_exit(&ddv->dv_contents);
1069                 rw_enter(&ddv->dv_contents, RW_WRITER);
1070                 /*
1071                  * Things may have changed when we dropped
1072                  * the contents lock, so start from top again
1073                  */
1074                 goto start;
1075         }
1076         ddv->dv_busy++;              /* mark busy before dropping lock */
1077         was_busy++;
1078         rw_exit(&ddv->dv_contents);
1079 
1080         pdevi = ddv->dv_devi;
1081         ASSERT(pdevi != NULL);
1082 
1083         mnm = strchr(nm, ':');
1084         if (mnm)
1085                 *mnm = (char)0;
1086 
1087         /*
1088          * Configure one nexus child, will call nexus's bus_ops
1089          * If successful, devi is held upon returning.
1090          * Note: devfs lookup should not be configuring grandchildren.
1091          */
1092         ASSERT((ndi_flags & NDI_CONFIG) == 0);
1093 
1094         rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT);
1095         if (mnm)
1096                 *mnm = ':';
1097         if (rv != NDI_SUCCESS) {
1098                 rv = ENOENT;
1099                 goto notfound;
1100         }
1101 
1102         ASSERT(devi);
1103 
1104         /* Check if this is a path alias */
1105         if (ddi_aliases_present == B_TRUE && ddi_get_parent(devi) != pdevi) {
1106                 char *curr = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1107 
1108                 (void) ddi_pathname(devi, curr);
1109 
1110                 vp = NULL;
1111                 if (devfs_lookupname(curr, NULL, &vp) == 0 && vp) {
1112                         dv = VTODV(vp);
1113                         kmem_free(curr, MAXPATHLEN);
1114                         goto found;
1115                 }
1116                 kmem_free(curr, MAXPATHLEN);
1117         }
1118 
1119         /*
1120          * If we configured a hidden node, consider it notfound.
1121          */
1122         if (ndi_dev_is_hidden_node(devi)) {
1123                 ndi_rele_devi(devi);
1124                 rv = ENOENT;
1125                 goto notfound;
1126         }
1127 
1128         /*
1129          * Don't make vhci clients visible under phci, unless we
1130          * are in miniroot.
1131          */
1132         if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) {
1133                 ndi_rele_devi(devi);
1134                 rv = ENOENT;
1135                 goto notfound;
1136         }
1137 
1138         ASSERT(devi && i_ddi_devi_attached(devi));
1139 
1140         /*
1141          * Invalidate cache to notice newly created minor nodes.
1142          */
1143         rw_enter(&ddv->dv_contents, RW_WRITER);
1144         ddv->dv_flags |= DV_BUILD;
1145         rw_exit(&ddv->dv_contents);
1146 
1147         /*
1148          * mkdir for nexus drivers and leaf nodes as well.  If we are racing
1149          * and create a duplicate, the duplicate will be destroyed below.
1150          */
1151         if (mnm == NULL) {
1152                 dv = dv_mkdir(ddv, devi, nm);
1153         } else {
1154                 /*
1155                  * Allocate dmd first to avoid KM_SLEEP with active
1156                  * ndi_devi_enter.
1157                  */
1158                 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP);
1159                 ndi_devi_enter(devi, &circ);
1160                 if (devi == clone_dip) {
1161                         /*
1162                          * For clone minors, load the driver indicated by
1163                          * minor name.
1164                          */
1165                         dv = dv_clone_mknod(ddv, mnm + 1);
1166                 } else {
1167                         /*
1168                          * Find minor node and make a dv_node
1169                          */
1170                         if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) {
1171                                 dv = dv_mknod(ddv, devi, nm, dmd);
1172                                 if (dmd->ddm_node_priv)
1173                                         dpfree(dmd->ddm_node_priv);
1174                         }
1175                 }
1176                 ndi_devi_exit(devi, circ);
1177                 kmem_free(dmd, sizeof (*dmd));
1178         }
1179         /*
1180          * Release hold from ndi_devi_config_one()
1181          */
1182         ndi_rele_devi(devi);
1183 
1184         if (dv == NULL) {
1185                 rv = ENOENT;
1186                 goto notfound;
1187         }
1188 
1189         /*
1190          * We have released the dv_contents lock, need to check
1191          * if another thread already created a duplicate node
1192          */
1193         rw_enter(&ddv->dv_contents, RW_WRITER);
1194         if ((dup = dv_findbyname(ddv, nm)) == NULL) {
1195                 dv_insert(ddv, dv);
1196         } else {
1197                 /*
1198                  * Duplicate found, use the existing node
1199                  */
1200                 VN_RELE(DVTOV(dv));
1201                 dv_destroy(dv, 0);
1202                 dv = dup;
1203         }
1204         goto founddv;
1205         /*NOTREACHED*/
1206 
1207 found:
1208         /*
1209          * Fail lookup of device that has now become hidden (typically via
1210          * hot removal of open device).
1211          */
1212         if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) {
1213                 dcmn_err2(("dv_find: nm %s failed: hidden/removed\n", nm));
1214                 VN_RELE(vp);
1215                 rv = ENOENT;
1216                 goto notfound;
1217         }
1218 
1219         /*
1220          * Skip non-kernel lookups of internal nodes.
1221          * This use of kcred to distinguish between user and
1222          * internal kernel lookups is unfortunate.  The information
1223          * provided by the seg argument to lookupnameat should
1224          * evolve into a lookup flag for filesystems that need
1225          * this distinction.
1226          */
1227         if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) {
1228                 dcmn_err2(("dv_find: nm %s failed: internal\n", nm));
1229                 VN_RELE(vp);
1230                 rv = ENOENT;
1231                 goto notfound;
1232         }
1233 
1234         dcmn_err2(("dv_find: returning vp for nm %s\n", nm));
1235         if (vp->v_type == VCHR || vp->v_type == VBLK) {
1236                 /*
1237                  * If vnode is a device, return special vnode instead
1238                  * (though it knows all about -us- via sp->s_realvp,
1239                  * sp->s_devvp, and sp->s_dip)
1240                  */
1241                 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred,
1242                     dv->dv_devi);
1243                 VN_RELE(vp);
1244                 if (*vpp == NULLVP)
1245                         rv = ENOSYS;
1246         } else
1247                 *vpp = vp;
1248 
1249 notfound:
1250         if (was_busy) {
1251                 /*
1252                  * Non-zero was_busy tells us that we are not in the
1253                  * devfs_clean() path which in turn means that we can afford
1254                  * to take the contents lock unconditionally.
1255                  */
1256                 rw_enter(&ddv->dv_contents, RW_WRITER);
1257                 ddv->dv_busy--;
1258                 rw_exit(&ddv->dv_contents);
1259         }
1260         return (rv);
1261 }
1262 
1263 /*
1264  * The given directory node is out-of-date; that is, it has been
1265  * marked as needing to be rebuilt, possibly because some new devinfo
1266  * node has come into existence, or possibly because this is the first
1267  * time we've been here.
1268  */
1269 void
1270 dv_filldir(struct dv_node *ddv)
1271 {
1272         struct dv_node          *dv;
1273         dev_info_t              *devi, *pdevi;
1274         struct ddi_minor_data   *dmd;
1275         char                    devnm[MAXNAMELEN];
1276         int                     circ, ccirc;
1277 
1278         ASSERT(DVTOV(ddv)->v_type == VDIR);
1279         ASSERT(RW_WRITE_HELD(&ddv->dv_contents));
1280         ASSERT(ddv->dv_flags & DV_BUILD);
1281 
1282         dcmn_err3(("dv_filldir: %s\n", ddv->dv_name));
1283         if (DV_STALE(ddv))
1284                 return;
1285         pdevi = ddv->dv_devi;
1286 
1287         if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) {
1288                 dcmn_err3(("dv_filldir: config error %s\n", ddv->dv_name));
1289         }
1290 
1291         ndi_devi_enter(pdevi, &circ);
1292         for (devi = ddi_get_child(pdevi); devi;
1293             devi = ddi_get_next_sibling(devi)) {
1294                 /*
1295                  * While we know enough to create a directory at DS_INITIALIZED,
1296                  * the directory will be empty until DS_ATTACHED. The existence
1297                  * of an empty directory dv_node will cause a devi_ref, which
1298                  * has caused problems for existing code paths doing offline/DR
1299                  * type operations - making devfs_clean coordination even more
1300                  * sensitive and error prone. Given this, the 'continue' below
1301                  * is checking for DS_ATTACHED instead of DS_INITIALIZED.
1302                  */
1303                 if (i_ddi_node_state(devi) < DS_ATTACHED)
1304                         continue;
1305 
1306                 /* skip hidden nodes */
1307                 if (ndi_dev_is_hidden_node(devi))
1308                         continue;
1309 
1310                 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi)));
1311 
1312                 ndi_devi_enter(devi, &ccirc);
1313                 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) {
1314                         char *addr;
1315 
1316                         /*
1317                          * Skip alias nodes, internal nodes, and nodes
1318                          * without a name.  We allow DDM_DEFAULT nodes
1319                          * to appear in readdir.
1320                          */
1321                         if ((dmd->type == DDM_ALIAS) ||
1322                             (dmd->type == DDM_INTERNAL_PATH) ||
1323                             (dmd->ddm_name == NULL))
1324                                 continue;
1325 
1326                         addr = ddi_get_name_addr(devi);
1327                         if (addr && *addr)
1328                                 (void) sprintf(devnm, "%s@%s:%s",
1329                                     ddi_node_name(devi), addr, dmd->ddm_name);
1330                         else
1331                                 (void) sprintf(devnm, "%s:%s",
1332                                     ddi_node_name(devi), dmd->ddm_name);
1333 
1334                         if ((dv = dv_findbyname(ddv, devnm)) != NULL) {
1335                                 /* dv_node already exists */
1336                                 VN_RELE(DVTOV(dv));
1337                                 continue;
1338                         }
1339 
1340                         dv = dv_mknod(ddv, devi, devnm, dmd);
1341                         dv_insert(ddv, dv);
1342                         VN_RELE(DVTOV(dv));
1343                 }
1344                 ndi_devi_exit(devi, ccirc);
1345 
1346                 (void) ddi_deviname(devi, devnm);
1347                 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) {
1348                         /* directory doesn't exist */
1349                         dv = dv_mkdir(ddv, devi, devnm + 1);
1350                         dv_insert(ddv, dv);
1351                 }
1352                 VN_RELE(DVTOV(dv));
1353         }
1354         ndi_devi_exit(pdevi, circ);
1355 
1356         ddv->dv_flags &= ~DV_BUILD;
1357 }
1358 
1359 /*
1360  * Given a directory node, clean out all the nodes beneath.
1361  *
1362  * VDIR:        Reinvoke to clean them, then delete the directory.
1363  * VCHR, VBLK:  Just blow them away.
1364  *
1365  * Mark the directories touched as in need of a rebuild, in case
1366  * we fall over part way through. When DV_CLEAN_FORCE is specified,
1367  * we mark referenced empty directories as stale to facilitate DR.
1368  */
1369 int
1370 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags)
1371 {
1372         struct dv_node  *dv;
1373         struct dv_node  *next;
1374         struct vnode    *vp;
1375         int             busy = 0;
1376 
1377         /*
1378          * We should always be holding the tsd_clean_key here: dv_cleandir()
1379          * will be called as a result of a devfs_clean request and the
1380          * tsd_clean_key will be set in either in devfs_clean() itself or in
1381          * devfs_clean_vhci().
1382          *
1383          * Since we are on the devfs_clean path, we return EBUSY if we cannot
1384          * get the contents lock: if we blocked here we might deadlock against
1385          * a thread performing top-down device configuration.
1386          */
1387         ASSERT(tsd_get(devfs_clean_key));
1388 
1389         dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name));
1390 
1391         if (!(flags & DV_CLEANDIR_LCK) &&
1392             !rw_tryenter(&ddv->dv_contents, RW_WRITER))
1393                 return (EBUSY);
1394 
1395         for (dv = DV_FIRST_ENTRY(ddv); dv; dv = next) {
1396                 next = DV_NEXT_ENTRY(ddv, dv);
1397 
1398                 /*
1399                  * If devnm is specified, the non-minor portion of the
1400                  * name must match devnm.
1401                  */
1402                 if (devnm &&
1403                     (strncmp(devnm, dv->dv_name, strlen(devnm)) ||
1404                     (dv->dv_name[strlen(devnm)] != ':' &&
1405                     dv->dv_name[strlen(devnm)] != '\0')))
1406                         continue;
1407 
1408                 /* check type of what we are cleaning */
1409                 vp = DVTOV(dv);
1410                 if (vp->v_type == VDIR) {
1411                         /* recurse on directories */
1412                         rw_enter(&dv->dv_contents, RW_WRITER);
1413                         if (dv_cleandir(dv, NULL,
1414                             flags | DV_CLEANDIR_LCK) == EBUSY) {
1415                                 rw_exit(&dv->dv_contents);
1416                                 goto set_busy;
1417                         }
1418 
1419                         /* A clean directory is an empty directory... */
1420                         ASSERT(dv->dv_nlink == 2);
1421                         mutex_enter(&vp->v_lock);
1422                         if (vp->v_count > 0) {
1423                                 /*
1424                                  * ... but an empty directory can still have
1425                                  * references to it. If we have dv_busy or
1426                                  * DV_CLEAN_FORCE is *not* specified then a
1427                                  * referenced directory is considered busy.
1428                                  */
1429                                 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) {
1430                                         mutex_exit(&vp->v_lock);
1431                                         rw_exit(&dv->dv_contents);
1432                                         goto set_busy;
1433                                 }
1434 
1435                                 /*
1436                                  * Mark referenced directory stale so that DR
1437                                  * will succeed even if a shell has
1438                                  * /devices/xxx as current directory (causing
1439                                  * VN_HOLD reference to an empty directory).
1440                                  */
1441                                 ASSERT(!DV_STALE(dv));
1442                                 ndi_rele_devi(dv->dv_devi);
1443                                 dv->dv_devi = NULL;  /* mark DV_STALE */
1444                         }
1445                 } else {
1446                         ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK));
1447                         ASSERT(dv->dv_nlink == 1);   /* no hard links */
1448                         mutex_enter(&vp->v_lock);
1449                         if (vp->v_count > 0) {
1450                                 /*
1451                                  * The file still has references to it.  If
1452                                  * DV_DEVI_GONE is *not* specified then a
1453                                  * referenced file is considered busy.
1454                                  */
1455                                 if (!(flags & DV_DEVI_GONE)) {
1456                                         mutex_exit(&vp->v_lock);
1457                                         goto set_busy;
1458                                 }
1459 
1460                                 /*
1461                                  * Mark referenced file stale so that DR will
1462                                  * succeed even if there are userland opens.
1463                                  */
1464                                 ASSERT(!DV_STALE(dv));
1465                                 ndi_rele_devi(dv->dv_devi);
1466                                 DEVI(dv->dv_devi)->devi_gone = 1;
1467                                 dv->dv_devi = NULL;
1468                         }
1469                 }
1470 
1471                 /* unlink from directory */
1472                 dv_unlink(ddv, dv);
1473 
1474                 /* drop locks */
1475                 mutex_exit(&vp->v_lock);
1476                 if (vp->v_type == VDIR)
1477                         rw_exit(&dv->dv_contents);
1478 
1479                 /* destroy vnode if ref count is zero */
1480                 if (vp->v_count == 0)
1481                         dv_destroy(dv, flags);
1482 
1483                 continue;
1484 
1485                 /*
1486                  * If devnm is not NULL we return immediately on busy,
1487                  * otherwise we continue destroying unused dv_node's.
1488                  */
1489 set_busy:       busy++;
1490                 if (devnm)
1491                         break;
1492         }
1493 
1494         /*
1495          * This code may be invoked to inform devfs that a new node has
1496          * been created in the kernel device tree. So we always set
1497          * the DV_BUILD flag to allow the next dv_filldir() to pick
1498          * the new devinfo nodes.
1499          */
1500         ddv->dv_flags |= DV_BUILD;
1501 
1502         if (!(flags & DV_CLEANDIR_LCK))
1503                 rw_exit(&ddv->dv_contents);
1504 
1505         return (busy ? EBUSY : 0);
1506 }
1507 
1508 /*
1509  * Walk through the devfs hierarchy, correcting the permissions of
1510  * devices with default permissions that do not match those specified
1511  * by minor perm.  This can only be done for all drivers for now.
1512  */
1513 static int
1514 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags)
1515 {
1516         struct dv_node  *dv;
1517         struct vnode    *vp;
1518         int             retval = 0;
1519         struct vattr    *attrp;
1520         mperm_t         mp;
1521         char            *nm;
1522         uid_t           old_uid;
1523         gid_t           old_gid;
1524         mode_t          old_mode;
1525 
1526         rw_enter(&ddv->dv_contents, RW_WRITER);
1527         for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) {
1528                 int error = 0;
1529                 nm = dv->dv_name;
1530 
1531                 rw_enter(&dv->dv_contents, RW_READER);
1532                 vp = DVTOV(dv);
1533                 if (vp->v_type == VDIR) {
1534                         rw_exit(&dv->dv_contents);
1535                         if (dv_reset_perm_dir(dv, flags) != 0) {
1536                                 error = EBUSY;
1537                         }
1538                 } else {
1539                         ASSERT(vp->v_type == VCHR || vp->v_type == VBLK);
1540 
1541                         /*
1542                          * Check for permissions from minor_perm
1543                          * If there are none, we're done
1544                          */
1545                         rw_exit(&dv->dv_contents);
1546                         if (dev_minorperm(dv->dv_devi, nm, &mp) != 0)
1547                                 continue;
1548 
1549                         rw_enter(&dv->dv_contents, RW_READER);
1550 
1551                         /*
1552                          * Allow a node's permissions to be altered
1553                          * permanently from the defaults by chmod,
1554                          * using the shadow node as backing store.
1555                          * Otherwise, update node to minor_perm permissions.
1556                          */
1557                         if (dv->dv_attrvp == NULLVP) {
1558                                 /*
1559                                  * No attribute vp, try to find one.
1560                                  */
1561                                 dv_shadow_node(DVTOV(ddv), nm, vp,
1562                                     NULL, NULLVP, kcred, 0);
1563                         }
1564                         if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) {
1565                                 rw_exit(&dv->dv_contents);
1566                                 continue;
1567                         }
1568 
1569                         attrp = dv->dv_attr;
1570 
1571                         if (VATTRP_MP_CMP(attrp, mp) == 0) {
1572                                 dcmn_err5(("%s: no perm change: "
1573                                     "%d %d 0%o\n", nm, attrp->va_uid,
1574                                     attrp->va_gid, attrp->va_mode));
1575                                 rw_exit(&dv->dv_contents);
1576                                 continue;
1577                         }
1578 
1579                         old_uid = attrp->va_uid;
1580                         old_gid = attrp->va_gid;
1581                         old_mode = attrp->va_mode;
1582 
1583                         VATTRP_MP_MERGE(attrp, mp);
1584                         mutex_enter(&vp->v_lock);
1585                         if (vp->v_count > 0) {
1586                                 error = EBUSY;
1587                         }
1588                         mutex_exit(&vp->v_lock);
1589 
1590                         dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n",
1591                             nm, old_uid, old_gid, old_mode, attrp->va_uid,
1592                             attrp->va_gid, attrp->va_mode, error));
1593 
1594                         rw_exit(&dv->dv_contents);
1595                 }
1596 
1597                 if (error != 0) {
1598                         retval = error;
1599                 }
1600         }
1601 
1602         ddv->dv_flags |= DV_BUILD;
1603 
1604         rw_exit(&ddv->dv_contents);
1605 
1606         return (retval);
1607 }
1608 
1609 int
1610 devfs_reset_perm(uint_t flags)
1611 {
1612         struct dv_node  *dvp;
1613         int             rval;
1614 
1615         if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL)
1616                 return (0);
1617 
1618         VN_HOLD(DVTOV(dvp));
1619         rval = dv_reset_perm_dir(dvp, flags);
1620         VN_RELE(DVTOV(dvp));
1621         return (rval);
1622 }
1623 
1624 /*
1625  * Clean up dangling devfs shadow nodes for removed
1626  * drivers so that, in the event the driver is re-added
1627  * to the system, newly created nodes won't incorrectly
1628  * pick up these stale shadow node permissions.
1629  *
1630  * This is accomplished by walking down the pathname
1631  * to the directory, starting at the root's attribute
1632  * node, then removing all minors matching the specified
1633  * node name.  Care must be taken to remove all entries
1634  * in a directory before the directory itself, so that
1635  * the clean-up associated with rem_drv'ing a nexus driver
1636  * does not inadvertently result in an inconsistent
1637  * filesystem underlying devfs.
1638  */
1639 
1640 static int
1641 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp)
1642 {
1643         int             error;
1644         vnode_t         *vp;
1645         int             eof;
1646         struct iovec    iov;
1647         struct uio      uio;
1648         struct dirent64 *dp;
1649         dirent64_t      *dbuf;
1650         size_t          dlen;
1651         size_t          dbuflen;
1652         int             ndirents = 64;
1653         char            *nm;
1654 
1655         VN_HOLD(dirvp);
1656 
1657         dlen = ndirents * (sizeof (*dbuf));
1658         dbuf = kmem_alloc(dlen, KM_SLEEP);
1659 
1660         uio.uio_iov = &iov;
1661         uio.uio_iovcnt = 1;
1662         uio.uio_segflg = UIO_SYSSPACE;
1663         uio.uio_fmode = 0;
1664         uio.uio_extflg = UIO_COPY_CACHED;
1665         uio.uio_loffset = 0;
1666         uio.uio_llimit = MAXOFFSET_T;
1667 
1668         eof = 0;
1669         error = 0;
1670         while (!error && !eof) {
1671                 uio.uio_resid = dlen;
1672                 iov.iov_base = (char *)dbuf;
1673                 iov.iov_len = dlen;
1674 
1675                 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1676                 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1677                 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1678 
1679                 dbuflen = dlen - uio.uio_resid;
1680 
1681                 if (error || dbuflen == 0)
1682                         break;
1683 
1684                 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
1685                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1686 
1687                         nm = dp->d_name;
1688 
1689                         if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
1690                                 continue;
1691 
1692                         error = VOP_LOOKUP(dirvp, nm,
1693                             &vp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
1694 
1695                         dsysdebug(error,
1696                             ("rem_drv %s/%s lookup (%d)\n",
1697                             dir, nm, error));
1698 
1699                         if (error)
1700                                 continue;
1701 
1702                         ASSERT(vp->v_type == VDIR ||
1703                             vp->v_type == VCHR || vp->v_type == VBLK);
1704 
1705                         if (vp->v_type == VDIR) {
1706                                 error = devfs_remdrv_rmdir(vp, nm, rvp);
1707                                 if (error == 0) {
1708                                         error = VOP_RMDIR(dirvp,
1709                                             (char *)nm, rvp, kcred, NULL, 0);
1710                                         dsysdebug(error,
1711                                             ("rem_drv %s/%s rmdir (%d)\n",
1712                                             dir, nm, error));
1713                                 }
1714                         } else {
1715                                 error = VOP_REMOVE(dirvp, (char *)nm, kcred,
1716                                     NULL, 0);
1717                                 dsysdebug(error,
1718                                     ("rem_drv %s/%s remove (%d)\n",
1719                                     dir, nm, error));
1720                         }
1721 
1722                         VN_RELE(vp);
1723                         if (error) {
1724                                 goto exit;
1725                         }
1726                 }
1727         }
1728 
1729 exit:
1730         VN_RELE(dirvp);
1731         kmem_free(dbuf, dlen);
1732 
1733         return (error);
1734 }
1735 
1736 int
1737 devfs_remdrv_cleanup(const char *dir, const char *nodename)
1738 {
1739         int             error;
1740         vnode_t         *vp;
1741         vnode_t         *dirvp;
1742         int             eof;
1743         struct iovec    iov;
1744         struct uio      uio;
1745         struct dirent64 *dp;
1746         dirent64_t      *dbuf;
1747         size_t          dlen;
1748         size_t          dbuflen;
1749         int             ndirents = 64;
1750         int             nodenamelen = strlen(nodename);
1751         char            *nm;
1752         struct pathname pn;
1753         vnode_t         *rvp;   /* root node of the underlying attribute fs */
1754 
1755         dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename));
1756 
1757         if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn))
1758                 return (0);
1759 
1760         rvp = dvroot->dv_attrvp;
1761         ASSERT(rvp != NULL);
1762         VN_HOLD(rvp);
1763 
1764         pn_skipslash(&pn);
1765         dirvp = rvp;
1766         VN_HOLD(dirvp);
1767 
1768         nm = kmem_alloc(MAXNAMELEN, KM_SLEEP);
1769 
1770         while (pn_pathleft(&pn)) {
1771                 ASSERT(dirvp->v_type == VDIR);
1772                 (void) pn_getcomponent(&pn, nm);
1773                 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0));
1774                 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred,
1775                     NULL, NULL, NULL);
1776                 if (error) {
1777                         dcmn_err5(("remdrv_cleanup %s lookup error %d\n",
1778                             nm, error));
1779                         VN_RELE(dirvp);
1780                         if (dirvp != rvp)
1781                                 VN_RELE(rvp);
1782                         pn_free(&pn);
1783                         kmem_free(nm, MAXNAMELEN);
1784                         return (0);
1785                 }
1786                 VN_RELE(dirvp);
1787                 dirvp = vp;
1788                 pn_skipslash(&pn);
1789         }
1790 
1791         ASSERT(dirvp->v_type == VDIR);
1792         if (dirvp != rvp)
1793                 VN_RELE(rvp);
1794         pn_free(&pn);
1795         kmem_free(nm, MAXNAMELEN);
1796 
1797         dlen = ndirents * (sizeof (*dbuf));
1798         dbuf = kmem_alloc(dlen, KM_SLEEP);
1799 
1800         uio.uio_iov = &iov;
1801         uio.uio_iovcnt = 1;
1802         uio.uio_segflg = UIO_SYSSPACE;
1803         uio.uio_fmode = 0;
1804         uio.uio_extflg = UIO_COPY_CACHED;
1805         uio.uio_loffset = 0;
1806         uio.uio_llimit = MAXOFFSET_T;
1807 
1808         eof = 0;
1809         error = 0;
1810         while (!error && !eof) {
1811                 uio.uio_resid = dlen;
1812                 iov.iov_base = (char *)dbuf;
1813                 iov.iov_len = dlen;
1814 
1815                 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1816                 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0);
1817                 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL);
1818 
1819                 dbuflen = dlen - uio.uio_resid;
1820 
1821                 if (error || dbuflen == 0)
1822                         break;
1823 
1824                 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen);
1825                     dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) {
1826 
1827                         nm = dp->d_name;
1828 
1829                         if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0)
1830                                 continue;
1831 
1832                         if (strncmp(nm, nodename, nodenamelen) != 0)
1833                                 continue;
1834 
1835                         error = VOP_LOOKUP(dirvp, nm, &vp,
1836                             NULL, 0, NULL, kcred, NULL, NULL, NULL);
1837 
1838                         dsysdebug(error,
1839                             ("rem_drv %s/%s lookup (%d)\n",
1840                             dir, nm, error));
1841 
1842                         if (error)
1843                                 continue;
1844 
1845                         ASSERT(vp->v_type == VDIR ||
1846                             vp->v_type == VCHR || vp->v_type == VBLK);
1847 
1848                         if (vp->v_type == VDIR) {
1849                                 error = devfs_remdrv_rmdir(vp, nm, rvp);
1850                                 if (error == 0) {
1851                                         error = VOP_RMDIR(dirvp, (char *)nm,
1852                                             rvp, kcred, NULL, 0);
1853                                         dsysdebug(error,
1854                                             ("rem_drv %s/%s rmdir (%d)\n",
1855                                             dir, nm, error));
1856                                 }
1857                         } else {
1858                                 error = VOP_REMOVE(dirvp, (char *)nm, kcred,
1859                                     NULL, 0);
1860                                 dsysdebug(error,
1861                                     ("rem_drv %s/%s remove (%d)\n",
1862                                     dir, nm, error));
1863                         }
1864 
1865                         VN_RELE(vp);
1866                         if (error)
1867                                 goto exit;
1868                 }
1869         }
1870 
1871 exit:
1872         VN_RELE(dirvp);
1873 
1874         kmem_free(dbuf, dlen);
1875 
1876         return (0);
1877 }
1878 
1879 struct dv_list {
1880         struct dv_node  *dv;
1881         struct dv_list  *next;
1882 };
1883 
1884 void
1885 dv_walk(
1886         struct dv_node  *ddv,
1887         char            *devnm,
1888         void            (*callback)(struct dv_node *, void *),
1889         void            *arg)
1890 {
1891         struct vnode    *dvp;
1892         struct dv_node  *dv;
1893         struct dv_list  *head, *tail, *next;
1894         int             len;
1895 
1896         dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n",
1897             ddv->dv_name, devnm ? devnm : "<null>"));
1898 
1899         dvp = DVTOV(ddv);
1900 
1901         ASSERT(dvp->v_type == VDIR);
1902 
1903         head = tail = next = NULL;
1904 
1905         rw_enter(&ddv->dv_contents, RW_READER);
1906         mutex_enter(&dvp->v_lock);
1907         for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) {
1908                 /*
1909                  * If devnm is not NULL and is not the empty string,
1910                  * select only dv_nodes with matching non-minor name
1911                  */
1912                 if (devnm && (len = strlen(devnm)) &&
1913                     (strncmp(devnm, dv->dv_name, len) ||
1914                     (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0')))
1915                         continue;
1916 
1917                 callback(dv, arg);
1918 
1919                 if (DVTOV(dv)->v_type != VDIR)
1920                         continue;
1921 
1922                 next = kmem_zalloc(sizeof (*next), KM_SLEEP);
1923                 next->dv = dv;
1924 
1925                 if (tail)
1926                         tail->next = next;
1927                 else
1928                         head = next;
1929 
1930                 tail = next;
1931         }
1932 
1933         while (head) {
1934                 dv_walk(head->dv, NULL, callback, arg);
1935                 next = head->next;
1936                 kmem_free(head, sizeof (*head));
1937                 head = next;
1938         }
1939         rw_exit(&ddv->dv_contents);
1940         mutex_exit(&dvp->v_lock);
1941 }