1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2018 Nexenta Systems, Inc. 28 */ 29 30 /* 31 * miscellaneous routines for the devfs 32 */ 33 34 #include <sys/types.h> 35 #include <sys/param.h> 36 #include <sys/t_lock.h> 37 #include <sys/systm.h> 38 #include <sys/sysmacros.h> 39 #include <sys/user.h> 40 #include <sys/time.h> 41 #include <sys/vfs.h> 42 #include <sys/vnode.h> 43 #include <sys/file.h> 44 #include <sys/fcntl.h> 45 #include <sys/flock.h> 46 #include <sys/kmem.h> 47 #include <sys/uio.h> 48 #include <sys/errno.h> 49 #include <sys/stat.h> 50 #include <sys/cred.h> 51 #include <sys/dirent.h> 52 #include <sys/pathname.h> 53 #include <sys/cmn_err.h> 54 #include <sys/debug.h> 55 #include <sys/modctl.h> 56 #include <fs/fs_subr.h> 57 #include <sys/fs/dv_node.h> 58 #include <sys/fs/snode.h> 59 #include <sys/sunndi.h> 60 #include <sys/sunmdi.h> 61 #include <sys/conf.h> 62 63 #ifdef DEBUG 64 int devfs_debug = 0x0; 65 #endif 66 67 const char dvnm[] = "devfs"; 68 kmem_cache_t *dv_node_cache; /* dv_node cache */ 69 70 /* 71 * The devfs_clean_key is taken during a devfs_clean operation: it is used to 72 * prevent unnecessary code execution and for detection of potential deadlocks. 73 */ 74 uint_t devfs_clean_key; 75 76 struct dv_node *dvroot; 77 78 /* prototype memory vattrs */ 79 vattr_t dv_vattr_dir = { 80 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 81 VDIR, /* va_type */ 82 DV_DIRMODE_DEFAULT, /* va_mode */ 83 DV_UID_DEFAULT, /* va_uid */ 84 DV_GID_DEFAULT, /* va_gid */ 85 0, /* va_fsid; */ 86 0, /* va_nodeid; */ 87 0, /* va_nlink; */ 88 0, /* va_size; */ 89 0, /* va_atime; */ 90 0, /* va_mtime; */ 91 0, /* va_ctime; */ 92 0, /* va_rdev; */ 93 0, /* va_blksize; */ 94 0, /* va_nblocks; */ 95 0, /* va_seq; */ 96 }; 97 98 vattr_t dv_vattr_file = { 99 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 100 0, /* va_type */ 101 DV_DEVMODE_DEFAULT, /* va_mode */ 102 DV_UID_DEFAULT, /* va_uid */ 103 DV_GID_DEFAULT, /* va_gid */ 104 0, /* va_fsid; */ 105 0, /* va_nodeid; */ 106 0, /* va_nlink; */ 107 0, /* va_size; */ 108 0, /* va_atime; */ 109 0, /* va_mtime; */ 110 0, /* va_ctime; */ 111 0, /* va_rdev; */ 112 0, /* va_blksize; */ 113 0, /* va_nblocks; */ 114 0, /* va_seq; */ 115 }; 116 117 vattr_t dv_vattr_priv = { 118 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 119 0, /* va_type */ 120 DV_DEVMODE_PRIV, /* va_mode */ 121 DV_UID_DEFAULT, /* va_uid */ 122 DV_GID_DEFAULT, /* va_gid */ 123 0, /* va_fsid; */ 124 0, /* va_nodeid; */ 125 0, /* va_nlink; */ 126 0, /* va_size; */ 127 0, /* va_atime; */ 128 0, /* va_mtime; */ 129 0, /* va_ctime; */ 130 0, /* va_rdev; */ 131 0, /* va_blksize; */ 132 0, /* va_nblocks; */ 133 0, /* va_seq; */ 134 }; 135 136 extern dev_info_t *clone_dip; 137 extern major_t clone_major; 138 extern struct dev_ops *ddi_hold_driver(major_t); 139 140 /* dv_node node constructor for kmem cache */ 141 static int 142 i_dv_node_ctor(void *buf, void *cfarg, int flag) 143 { 144 _NOTE(ARGUNUSED(cfarg, flag)) 145 struct dv_node *dv = (struct dv_node *)buf; 146 struct vnode *vp; 147 148 bzero(buf, sizeof (struct dv_node)); 149 vp = dv->dv_vnode = vn_alloc(flag); 150 if (vp == NULL) { 151 return (-1); 152 } 153 vp->v_data = dv; 154 rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL); 155 return (0); 156 } 157 158 /* dv_node node destructor for kmem cache */ 159 static void 160 i_dv_node_dtor(void *buf, void *arg) 161 { 162 _NOTE(ARGUNUSED(arg)) 163 struct dv_node *dv = (struct dv_node *)buf; 164 struct vnode *vp = DVTOV(dv); 165 166 rw_destroy(&dv->dv_contents); 167 vn_invalid(vp); 168 vn_free(vp); 169 } 170 171 172 /* initialize dv_node node cache */ 173 void 174 dv_node_cache_init() 175 { 176 ASSERT(dv_node_cache == NULL); 177 dv_node_cache = kmem_cache_create("dv_node_cache", 178 sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor, 179 NULL, NULL, NULL, 0); 180 181 tsd_create(&devfs_clean_key, NULL); 182 } 183 184 /* destroy dv_node node cache */ 185 void 186 dv_node_cache_fini() 187 { 188 ASSERT(dv_node_cache != NULL); 189 kmem_cache_destroy(dv_node_cache); 190 dv_node_cache = NULL; 191 192 tsd_destroy(&devfs_clean_key); 193 } 194 195 /* 196 * dv_mkino - Generate a unique inode number for devfs nodes. 197 * 198 * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32 199 * bit non-LARGEFILE applications. This means that there is a requirement to 200 * maintain the inode number as a 32 bit value or applications will have 201 * stat(2) calls fail with EOVERFLOW. We form a 32 bit inode number from the 202 * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor 203 * 204 * To generate inode numbers for directories, we assume that we will never use 205 * more than half the major space - this allows for ~8190 drivers. We use this 206 * upper major number space to allocate inode numbers for directories by 207 * encoding the major and instance into this space. 208 * 209 * We also skew the result so that inode 2 is reserved for the root of the file 210 * system. 211 * 212 * As part of the future support for 64-bit dev_t APIs, the upper minor bits 213 * should be folded into the high inode bits by adding the following code 214 * after "ino |= 1": 215 * 216 * #if (L_BITSMINOR32 != L_BITSMINOR) 217 * |* fold overflow minor bits into high bits of inode number *| 218 * ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR; 219 * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *| 220 * 221 * This way only applications that use devices that overflow their minor 222 * space will have an application level impact. 223 */ 224 static ino_t 225 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev) 226 { 227 major_t major; 228 minor_t minor; 229 ino_t ino; 230 static int warn; 231 232 if (typ == VDIR) { 233 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major; 234 minor = ddi_get_instance(devi); 235 236 /* makedevice32 in high half of major number space */ 237 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 238 239 major = DEVI(devi)->devi_major; 240 } else { 241 major = getmajor(dev); 242 minor = getminor(dev); 243 244 /* makedevice32 */ 245 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 246 247 /* make ino for VCHR different than VBLK */ 248 ino <<= 1; 249 if (typ == VCHR) 250 ino |= 1; 251 } 252 253 ino += DV_ROOTINO + 1; /* skew */ 254 255 /* 256 * diagnose things a little early because adding the skew to a large 257 * minor number could roll over the major. 258 */ 259 if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) { 260 warn = 1; 261 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm); 262 } 263 264 return (ino); 265 } 266 267 /* 268 * Compare two nodes lexographically to balance avl tree 269 */ 270 static int 271 dv_compare_nodes(const struct dv_node *dv1, const struct dv_node *dv2) 272 { 273 int rv; 274 275 if ((rv = strcmp(dv1->dv_name, dv2->dv_name)) == 0) 276 return (0); 277 return ((rv < 0) ? -1 : 1); 278 } 279 280 /* 281 * dv_mkroot 282 * 283 * Build the first VDIR dv_node. 284 */ 285 struct dv_node * 286 dv_mkroot(struct vfs *vfsp, dev_t devfsdev) 287 { 288 struct dv_node *dv; 289 struct vnode *vp; 290 291 ASSERT(ddi_root_node() != NULL); 292 ASSERT(dv_node_cache != NULL); 293 294 dcmn_err3(("dv_mkroot\n")); 295 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 296 vp = DVTOV(dv); 297 vn_reinit(vp); 298 vp->v_flag = VROOT; 299 vp->v_vfsp = vfsp; 300 vp->v_type = VDIR; 301 vp->v_rdev = devfsdev; 302 vn_setops(vp, dv_vnodeops); 303 vn_exists(vp); 304 305 dvroot = dv; 306 307 dv->dv_name = NULL; /* not needed */ 308 dv->dv_namelen = 0; 309 310 dv->dv_devi = ddi_root_node(); 311 312 dv->dv_ino = DV_ROOTINO; 313 dv->dv_nlink = 2; /* name + . (no dv_insert) */ 314 dv->dv_dotdot = dv; /* .. == self */ 315 dv->dv_attrvp = NULLVP; 316 dv->dv_attr = NULL; 317 dv->dv_flags = DV_BUILD; 318 dv->dv_priv = NULL; 319 dv->dv_busy = 0; 320 dv->dv_dflt_mode = 0; 321 322 avl_create(&dv->dv_entries, 323 (int (*)(const void *, const void *))dv_compare_nodes, 324 sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink)); 325 326 return (dv); 327 } 328 329 /* 330 * dv_mkdir 331 * 332 * Given an probed or attached nexus node, create a VDIR dv_node. 333 * No dv_attrvp is created at this point. 334 */ 335 struct dv_node * 336 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm) 337 { 338 struct dv_node *dv; 339 struct vnode *vp; 340 size_t nmlen; 341 342 ASSERT((devi)); 343 dcmn_err4(("dv_mkdir: %s\n", nm)); 344 345 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 346 nmlen = strlen(nm) + 1; 347 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 348 bcopy(nm, dv->dv_name, nmlen); 349 dv->dv_namelen = nmlen - 1; /* '\0' not included */ 350 351 vp = DVTOV(dv); 352 vn_reinit(vp); 353 vp->v_flag = 0; 354 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 355 vp->v_type = VDIR; 356 vp->v_rdev = DVTOV(ddv)->v_rdev; 357 vn_setops(vp, vn_getops(DVTOV(ddv))); 358 vn_exists(vp); 359 360 dv->dv_devi = devi; 361 ndi_hold_devi(devi); 362 363 dv->dv_ino = dv_mkino(devi, VDIR, NODEV); 364 dv->dv_nlink = 0; /* updated on insert */ 365 dv->dv_dotdot = ddv; 366 dv->dv_attrvp = NULLVP; 367 dv->dv_attr = NULL; 368 dv->dv_flags = DV_BUILD; 369 dv->dv_priv = NULL; 370 dv->dv_busy = 0; 371 dv->dv_dflt_mode = 0; 372 373 avl_create(&dv->dv_entries, 374 (int (*)(const void *, const void *))dv_compare_nodes, 375 sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink)); 376 377 return (dv); 378 } 379 380 /* 381 * dv_mknod 382 * 383 * Given a minor node, create a VCHR or VBLK dv_node. 384 * No dv_attrvp is created at this point. 385 */ 386 static struct dv_node * 387 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm, 388 struct ddi_minor_data *dmd) 389 { 390 struct dv_node *dv; 391 struct vnode *vp; 392 size_t nmlen; 393 394 dcmn_err4(("dv_mknod: %s\n", nm)); 395 396 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 397 nmlen = strlen(nm) + 1; 398 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 399 bcopy(nm, dv->dv_name, nmlen); 400 dv->dv_namelen = nmlen - 1; /* no '\0' */ 401 402 vp = DVTOV(dv); 403 vn_reinit(vp); 404 vp->v_flag = 0; 405 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 406 vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK; 407 vp->v_rdev = dmd->ddm_dev; 408 vn_setops(vp, vn_getops(DVTOV(ddv))); 409 vn_exists(vp); 410 411 ASSERT(DEVI_BUSY_OWNED(devi)); 412 ndi_hold_devi(devi); 413 414 dv->dv_devi = devi; 415 dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev); 416 dv->dv_nlink = 0; /* updated on insert */ 417 dv->dv_dotdot = ddv; 418 dv->dv_attrvp = NULLVP; 419 dv->dv_attr = NULL; 420 dv->dv_flags = 0; 421 422 if (dmd->type == DDM_INTERNAL_PATH) 423 dv->dv_flags |= DV_INTERNAL; 424 if (dmd->ddm_flags & DM_NO_FSPERM) 425 dv->dv_flags |= DV_NO_FSPERM; 426 427 dv->dv_priv = dmd->ddm_node_priv; 428 if (dv->dv_priv) 429 dphold(dv->dv_priv); 430 431 /* 432 * Minors created with ddi_create_priv_minor_node can specify 433 * a default mode permission other than the devfs default. 434 */ 435 if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) { 436 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n", 437 dv->dv_name, dmd->ddm_priv_mode)); 438 dv->dv_flags |= DV_DFLT_MODE; 439 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB; 440 } 441 442 return (dv); 443 } 444 445 /* 446 * dv_destroy 447 * 448 * Destroy what we created in dv_mkdir or dv_mknod. 449 * In the case of a *referenced* directory, do nothing. 450 */ 451 void 452 dv_destroy(struct dv_node *dv, uint_t flags) 453 { 454 vnode_t *vp = DVTOV(dv); 455 ASSERT(dv->dv_nlink == 0); /* no references */ 456 457 dcmn_err4(("dv_destroy: %s\n", dv->dv_name)); 458 459 /* 460 * We may be asked to unlink referenced directories. 461 * In this case, there is nothing to be done. 462 * The eventual memory free will be done in 463 * devfs_inactive. 464 */ 465 if (vp->v_count != 0) { 466 ASSERT(vp->v_type == VDIR); 467 ASSERT(flags & DV_CLEAN_FORCE); 468 ASSERT(DV_STALE(dv)); 469 return; 470 } 471 472 if (vp->v_type == VDIR) { 473 ASSERT(DV_FIRST_ENTRY(dv) == NULL); 474 avl_destroy(&dv->dv_entries); 475 } 476 477 if (dv->dv_attrvp != NULLVP) 478 VN_RELE(dv->dv_attrvp); 479 if (dv->dv_attr != NULL) 480 kmem_free(dv->dv_attr, sizeof (struct vattr)); 481 if (dv->dv_name != NULL) 482 kmem_free(dv->dv_name, dv->dv_namelen + 1); 483 if (dv->dv_devi != NULL) { 484 ndi_rele_devi(dv->dv_devi); 485 } 486 if (dv->dv_priv != NULL) { 487 dpfree(dv->dv_priv); 488 } 489 490 kmem_cache_free(dv_node_cache, dv); 491 } 492 493 /* 494 * Find and hold dv_node by name 495 */ 496 static struct dv_node * 497 dv_findbyname(struct dv_node *ddv, char *nm) 498 { 499 struct dv_node *dv; 500 avl_index_t where; 501 struct dv_node dvtmp; 502 503 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 504 dcmn_err3(("dv_findbyname: %s\n", nm)); 505 506 dvtmp.dv_name = nm; 507 dv = avl_find(&ddv->dv_entries, &dvtmp, &where); 508 if (dv) { 509 ASSERT(dv->dv_dotdot == ddv); 510 ASSERT(strcmp(dv->dv_name, nm) == 0); 511 VN_HOLD(DVTOV(dv)); 512 return (dv); 513 } 514 return (NULL); 515 } 516 517 /* 518 * Inserts a new dv_node in a parent directory 519 */ 520 void 521 dv_insert(struct dv_node *ddv, struct dv_node *dv) 522 { 523 avl_index_t where; 524 525 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 526 ASSERT(DVTOV(ddv)->v_type == VDIR); 527 ASSERT(ddv->dv_nlink >= 2); 528 ASSERT(dv->dv_nlink == 0); 529 530 dcmn_err3(("dv_insert: %s\n", dv->dv_name)); 531 532 dv->dv_dotdot = ddv; 533 if (DVTOV(dv)->v_type == VDIR) { 534 ddv->dv_nlink++; /* .. to containing directory */ 535 dv->dv_nlink = 2; /* name + . */ 536 } else { 537 dv->dv_nlink = 1; /* name */ 538 } 539 540 /* enter node in the avl tree */ 541 VERIFY(avl_find(&ddv->dv_entries, dv, &where) == NULL); 542 avl_insert(&ddv->dv_entries, dv, where); 543 } 544 545 /* 546 * Unlink a dv_node from a perent directory 547 */ 548 void 549 dv_unlink(struct dv_node *ddv, struct dv_node *dv) 550 { 551 /* verify linkage of arguments */ 552 ASSERT(ddv && dv); 553 ASSERT(dv->dv_dotdot == ddv); 554 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 555 ASSERT(DVTOV(ddv)->v_type == VDIR); 556 557 dcmn_err3(("dv_unlink: %s\n", dv->dv_name)); 558 559 if (DVTOV(dv)->v_type == VDIR) { 560 ddv->dv_nlink--; /* .. to containing directory */ 561 dv->dv_nlink -= 2; /* name + . */ 562 } else { 563 dv->dv_nlink -= 1; /* name */ 564 } 565 ASSERT(ddv->dv_nlink >= 2); 566 ASSERT(dv->dv_nlink == 0); 567 568 dv->dv_dotdot = NULL; 569 570 /* remove from avl tree */ 571 avl_remove(&ddv->dv_entries, dv); 572 } 573 574 /* 575 * Merge devfs node specific information into an attribute structure. 576 * 577 * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node. 578 */ 579 void 580 dv_vattr_merge(struct dv_node *dv, struct vattr *vap) 581 { 582 struct vnode *vp = DVTOV(dv); 583 584 vap->va_nodeid = dv->dv_ino; 585 vap->va_nlink = dv->dv_nlink; 586 587 if (vp->v_type == VDIR) { 588 vap->va_rdev = 0; 589 vap->va_fsid = vp->v_rdev; 590 } else { 591 vap->va_rdev = vp->v_rdev; 592 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev; 593 vap->va_type = vp->v_type; 594 /* don't trust the shadow file type */ 595 vap->va_mode &= ~S_IFMT; 596 if (vap->va_type == VCHR) 597 vap->va_mode |= S_IFCHR; 598 else 599 vap->va_mode |= S_IFBLK; 600 } 601 } 602 603 /* 604 * Get default device permission by consulting rules in 605 * privilege specification in minor node and /etc/minor_perm. 606 * 607 * This function is called from the devname filesystem to get default 608 * permissions for a device exported to a non-global zone. 609 */ 610 void 611 devfs_get_defattr(struct vnode *vp, struct vattr *vap, int *no_fs_perm) 612 { 613 mperm_t mp; 614 struct dv_node *dv; 615 616 /* If vp isn't a dv_node, return something sensible */ 617 if (!vn_matchops(vp, dv_vnodeops)) { 618 if (no_fs_perm) 619 *no_fs_perm = 0; 620 *vap = dv_vattr_file; 621 return; 622 } 623 624 /* 625 * For minors not created by ddi_create_priv_minor_node(), 626 * use devfs defaults. 627 */ 628 dv = VTODV(vp); 629 if (vp->v_type == VDIR) { 630 *vap = dv_vattr_dir; 631 } else if (dv->dv_flags & DV_NO_FSPERM) { 632 if (no_fs_perm) 633 *no_fs_perm = 1; 634 *vap = dv_vattr_priv; 635 } else { 636 /* 637 * look up perm bits from minor_perm 638 */ 639 *vap = dv_vattr_file; 640 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) { 641 VATTR_MP_MERGE((*vap), mp); 642 dcmn_err5(("%s: minor perm mode 0%o\n", 643 dv->dv_name, vap->va_mode)); 644 } else if (dv->dv_flags & DV_DFLT_MODE) { 645 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 646 vap->va_mode &= ~S_IAMB; 647 vap->va_mode |= dv->dv_dflt_mode; 648 dcmn_err5(("%s: priv mode 0%o\n", 649 dv->dv_name, vap->va_mode)); 650 } 651 } 652 } 653 654 /* 655 * dv_shadow_node 656 * 657 * Given a VDIR dv_node, find/create the associated VDIR 658 * node in the shadow attribute filesystem. 659 * 660 * Given a VCHR/VBLK dv_node, find the associated VREG 661 * node in the shadow attribute filesystem. These nodes 662 * are only created to persist non-default attributes. 663 * Lack of such a node implies the default permissions 664 * are sufficient. 665 * 666 * Managing the attribute file entries is slightly tricky (mostly 667 * because we can't intercept VN_HOLD and VN_RELE except on the last 668 * release). 669 * 670 * We assert that if the dv_attrvp pointer is non-NULL, it points 671 * to a singly-held (by us) vnode that represents the shadow entry 672 * in the underlying filesystem. To avoid store-ordering issues, 673 * we assert that the pointer can only be tested under the dv_contents 674 * READERS lock. 675 */ 676 677 void 678 dv_shadow_node( 679 struct vnode *dvp, /* devfs parent directory vnode */ 680 char *nm, /* name component */ 681 struct vnode *vp, /* devfs vnode */ 682 struct pathname *pnp, /* the path .. */ 683 struct vnode *rdir, /* the root .. */ 684 struct cred *cred, /* who's asking? */ 685 int flags) /* optionally create shadow node */ 686 { 687 struct dv_node *dv; /* dv_node of named directory */ 688 struct vnode *rdvp; /* shadow parent directory vnode */ 689 struct vnode *rvp; /* shadow vnode */ 690 struct vnode *rrvp; /* realvp of shadow vnode */ 691 struct vattr vattr; 692 int create_tried; 693 int error; 694 695 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 696 dv = VTODV(vp); 697 dcmn_err3(("dv_shadow_node: name %s attr %p\n", 698 nm, (void *)dv->dv_attrvp)); 699 700 if ((flags & DV_SHADOW_WRITE_HELD) == 0) { 701 ASSERT(RW_READ_HELD(&dv->dv_contents)); 702 if (dv->dv_attrvp != NULLVP) 703 return; 704 if (!rw_tryupgrade(&dv->dv_contents)) { 705 rw_exit(&dv->dv_contents); 706 rw_enter(&dv->dv_contents, RW_WRITER); 707 if (dv->dv_attrvp != NULLVP) { 708 rw_downgrade(&dv->dv_contents); 709 return; 710 } 711 } 712 } else { 713 ASSERT(RW_WRITE_HELD(&dv->dv_contents)); 714 if (dv->dv_attrvp != NULLVP) 715 return; 716 } 717 718 ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL); 719 720 rdvp = VTODV(dvp)->dv_attrvp; 721 create_tried = 0; 722 lookup: 723 if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) { 724 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred, 725 NULL, NULL, NULL); 726 727 /* factor out the snode since we only want the attribute node */ 728 if ((error == 0) && (VOP_REALVP(rvp, &rrvp, NULL) == 0)) { 729 VN_HOLD(rrvp); 730 VN_RELE(rvp); 731 rvp = rrvp; 732 } 733 } else 734 error = EROFS; /* no parent, no entry */ 735 736 /* 737 * All we want is the permissions (and maybe ACLs and 738 * extended attributes), and we want to perform lookups 739 * by name. Drivers occasionally change their minor 740 * number space. If something changes, there's no 741 * much we can do about it here. 742 */ 743 744 /* The shadow node checks out. We are done */ 745 if (error == 0) { 746 dv->dv_attrvp = rvp; /* with one hold */ 747 748 /* 749 * Determine if we have non-trivial ACLs on this node. 750 * It is not necessary to VOP_RWLOCK since fs_acl_nontrivial 751 * only does VOP_GETSECATTR. 752 */ 753 dv->dv_flags &= ~DV_ACL; 754 755 if (fs_acl_nontrivial(rvp, cred)) 756 dv->dv_flags |= DV_ACL; 757 758 /* 759 * If we have synced out the memory attributes, free 760 * them and switch back to using the persistent store. 761 */ 762 if (rvp && dv->dv_attr) { 763 kmem_free(dv->dv_attr, sizeof (struct vattr)); 764 dv->dv_attr = NULL; 765 } 766 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 767 rw_downgrade(&dv->dv_contents); 768 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 769 return; 770 } 771 772 /* 773 * Failed to find attribute in persistent backing store, 774 * get default permission bits. 775 */ 776 devfs_get_defattr(vp, &vattr, NULL); 777 778 dv_vattr_merge(dv, &vattr); 779 gethrestime(&vattr.va_atime); 780 vattr.va_mtime = vattr.va_atime; 781 vattr.va_ctime = vattr.va_atime; 782 783 /* 784 * Try to create shadow dir. This is necessary in case 785 * we need to create a shadow leaf node later, when user 786 * executes chmod. 787 */ 788 if ((error == ENOENT) && !create_tried) { 789 switch (vp->v_type) { 790 case VDIR: 791 error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred, 792 NULL, 0, NULL); 793 dsysdebug(error, ("vop_mkdir %s %s %d\n", 794 VTODV(dvp)->dv_name, nm, error)); 795 create_tried = 1; 796 break; 797 798 case VCHR: 799 case VBLK: 800 /* 801 * Shadow nodes are only created on demand 802 */ 803 if (flags & DV_SHADOW_CREATE) { 804 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL, 805 VREAD|VWRITE, &rvp, kcred, 0, NULL, NULL); 806 dsysdebug(error, ("vop_create %s %s %d\n", 807 VTODV(dvp)->dv_name, nm, error)); 808 create_tried = 1; 809 } 810 break; 811 812 default: 813 cmn_err(CE_PANIC, "devfs: %s: create", dvnm); 814 /*NOTREACHED*/ 815 } 816 817 if (create_tried && 818 (error == 0) || (error == EEXIST)) { 819 VN_RELE(rvp); 820 goto lookup; 821 } 822 } 823 824 /* Store attribute in memory */ 825 if (dv->dv_attr == NULL) { 826 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP); 827 *(dv->dv_attr) = vattr; 828 } 829 830 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 831 rw_downgrade(&dv->dv_contents); 832 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 833 } 834 835 /* 836 * Given a devinfo node, and a name, returns the appropriate 837 * minor information for that named node, if it exists. 838 */ 839 static int 840 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi) 841 { 842 struct ddi_minor_data *dmd; 843 844 ASSERT(i_ddi_devi_attached(devi)); 845 846 dcmn_err3(("dv_find_leafnode: %s\n", minor_nm)); 847 ASSERT(DEVI_BUSY_OWNED(devi)); 848 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 849 850 /* 851 * Skip alias nodes and nodes without a name. 852 */ 853 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL)) 854 continue; 855 856 dcmn_err4(("dv_find_leafnode: (%s,%s)\n", 857 minor_nm, dmd->ddm_name)); 858 if (strcmp(minor_nm, dmd->ddm_name) == 0) { 859 r_mi->ddm_dev = dmd->ddm_dev; 860 r_mi->ddm_spec_type = dmd->ddm_spec_type; 861 r_mi->type = dmd->type; 862 r_mi->ddm_flags = dmd->ddm_flags; 863 r_mi->ddm_node_priv = dmd->ddm_node_priv; 864 r_mi->ddm_priv_mode = dmd->ddm_priv_mode; 865 if (r_mi->ddm_node_priv) 866 dphold(r_mi->ddm_node_priv); 867 return (0); 868 } 869 } 870 871 dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm)); 872 return (ENOENT); 873 } 874 875 /* 876 * Special handling for clone node: 877 * Clone minor name is a driver name, the minor number will 878 * be the major number of the driver. There is no minor 879 * node under the clone driver, so we'll manufacture the 880 * dev_t. 881 */ 882 static struct dv_node * 883 dv_clone_mknod(struct dv_node *ddv, char *drvname) 884 { 885 major_t major; 886 struct dv_node *dvp; 887 char *devnm; 888 struct ddi_minor_data *dmd; 889 890 /* 891 * Make sure drvname is a STREAMS driver. We load the driver, 892 * but don't attach to any instances. This makes stat(2) 893 * relatively cheap. 894 */ 895 major = ddi_name_to_major(drvname); 896 if (major == DDI_MAJOR_T_NONE) 897 return (NULL); 898 899 if (ddi_hold_driver(major) == NULL) 900 return (NULL); 901 902 if (STREAMSTAB(major) == NULL) { 903 ddi_rele_driver(major); 904 return (NULL); 905 } 906 907 ddi_rele_driver(major); 908 devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 909 (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname); 910 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 911 dmd->ddm_dev = makedevice(clone_major, (minor_t)major); 912 dmd->ddm_spec_type = S_IFCHR; 913 dvp = dv_mknod(ddv, clone_dip, devnm, dmd); 914 kmem_free(dmd, sizeof (*dmd)); 915 kmem_free(devnm, MAXNAMELEN); 916 return (dvp); 917 } 918 919 /* 920 * Given the parent directory node, and a name in it, returns the 921 * named dv_node to the caller (as a vnode). 922 * 923 * (We need pnp and rdir for doing shadow lookups; they can be NULL) 924 */ 925 int 926 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp, 927 struct vnode *rdir, struct cred *cred, uint_t ndi_flags) 928 { 929 extern int isminiroot; /* see modctl.c */ 930 931 int circ; 932 int rv = 0, was_busy = 0, nmlen, write_held = 0; 933 struct vnode *vp; 934 struct dv_node *dv, *dup; 935 dev_info_t *pdevi, *devi = NULL; 936 char *mnm; 937 struct ddi_minor_data *dmd; 938 939 dcmn_err3(("dv_find %s\n", nm)); 940 941 if (!rw_tryenter(&ddv->dv_contents, RW_READER)) { 942 if (tsd_get(devfs_clean_key)) 943 return (EBUSY); 944 rw_enter(&ddv->dv_contents, RW_READER); 945 } 946 start: 947 if (DV_STALE(ddv)) { 948 rw_exit(&ddv->dv_contents); 949 return (ESTALE); 950 } 951 952 /* 953 * Empty name or ., return node itself. 954 */ 955 nmlen = strlen(nm); 956 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 957 *vpp = DVTOV(ddv); 958 rw_exit(&ddv->dv_contents); 959 VN_HOLD(*vpp); 960 return (0); 961 } 962 963 /* 964 * .., return the parent directory 965 */ 966 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 967 *vpp = DVTOV(ddv->dv_dotdot); 968 rw_exit(&ddv->dv_contents); 969 VN_HOLD(*vpp); 970 return (0); 971 } 972 973 /* 974 * Fail anything without a valid device name component 975 */ 976 if (nm[0] == '@' || nm[0] == ':') { 977 dcmn_err3(("devfs: no driver '%s'\n", nm)); 978 rw_exit(&ddv->dv_contents); 979 return (ENOENT); 980 } 981 982 /* 983 * So, now we have to deal with the trickier stuff. 984 * 985 * (a) search the existing list of dv_nodes on this directory 986 */ 987 if ((dv = dv_findbyname(ddv, nm)) != NULL) { 988 founddv: 989 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 990 991 if (!rw_tryenter(&dv->dv_contents, RW_READER)) { 992 if (tsd_get(devfs_clean_key)) { 993 VN_RELE(DVTOV(dv)); 994 rw_exit(&ddv->dv_contents); 995 return (EBUSY); 996 } 997 rw_enter(&dv->dv_contents, RW_READER); 998 } 999 1000 vp = DVTOV(dv); 1001 if ((dv->dv_attrvp != NULLVP) || 1002 (vp->v_type != VDIR && dv->dv_attr != NULL)) { 1003 /* 1004 * Common case - we already have attributes 1005 */ 1006 rw_exit(&dv->dv_contents); 1007 rw_exit(&ddv->dv_contents); 1008 goto found; 1009 } 1010 1011 /* 1012 * No attribute vp, try and build one. 1013 * 1014 * dv_shadow_node() can briefly drop &dv->dv_contents lock 1015 * if it is unable to upgrade it to a write lock. If the 1016 * current thread has come in through the bottom-up device 1017 * configuration devfs_clean() path, we may deadlock against 1018 * a thread performing top-down device configuration if it 1019 * grabs the contents lock. To avoid this, when we are on the 1020 * devfs_clean() path we attempt to upgrade the dv_contents 1021 * lock before we call dv_shadow_node(). 1022 */ 1023 if (tsd_get(devfs_clean_key)) { 1024 if (!rw_tryupgrade(&dv->dv_contents)) { 1025 VN_RELE(DVTOV(dv)); 1026 rw_exit(&dv->dv_contents); 1027 rw_exit(&ddv->dv_contents); 1028 return (EBUSY); 1029 } 1030 1031 write_held = DV_SHADOW_WRITE_HELD; 1032 } 1033 1034 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred, 1035 write_held); 1036 1037 rw_exit(&dv->dv_contents); 1038 rw_exit(&ddv->dv_contents); 1039 goto found; 1040 } 1041 1042 /* 1043 * (b) Search the child devinfo nodes of our parent directory, 1044 * looking for the named node. If we find it, build a new 1045 * node, then grab the writers lock, search the directory 1046 * if it's still not there, then insert it. 1047 * 1048 * We drop the devfs locks before accessing the device tree. 1049 * Take care to mark the node BUSY so that a forced devfs_clean 1050 * doesn't mark the directory node stale. 1051 * 1052 * Also, check if we are called as part of devfs_clean or 1053 * reset_perm. If so, simply return not found because there 1054 * is nothing to clean. 1055 */ 1056 if (tsd_get(devfs_clean_key)) { 1057 rw_exit(&ddv->dv_contents); 1058 return (ENOENT); 1059 } 1060 1061 /* 1062 * We could be either READ or WRITE locked at 1063 * this point. Upgrade if we are read locked. 1064 */ 1065 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 1066 if (rw_read_locked(&ddv->dv_contents) && 1067 !rw_tryupgrade(&ddv->dv_contents)) { 1068 rw_exit(&ddv->dv_contents); 1069 rw_enter(&ddv->dv_contents, RW_WRITER); 1070 /* 1071 * Things may have changed when we dropped 1072 * the contents lock, so start from top again 1073 */ 1074 goto start; 1075 } 1076 ddv->dv_busy++; /* mark busy before dropping lock */ 1077 was_busy++; 1078 rw_exit(&ddv->dv_contents); 1079 1080 pdevi = ddv->dv_devi; 1081 ASSERT(pdevi != NULL); 1082 1083 mnm = strchr(nm, ':'); 1084 if (mnm) 1085 *mnm = (char)0; 1086 1087 /* 1088 * Configure one nexus child, will call nexus's bus_ops 1089 * If successful, devi is held upon returning. 1090 * Note: devfs lookup should not be configuring grandchildren. 1091 */ 1092 ASSERT((ndi_flags & NDI_CONFIG) == 0); 1093 1094 rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT); 1095 if (mnm) 1096 *mnm = ':'; 1097 if (rv != NDI_SUCCESS) { 1098 rv = ENOENT; 1099 goto notfound; 1100 } 1101 1102 ASSERT(devi); 1103 1104 /* Check if this is a path alias */ 1105 if (ddi_aliases_present == B_TRUE && ddi_get_parent(devi) != pdevi) { 1106 char *curr = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1107 1108 (void) ddi_pathname(devi, curr); 1109 1110 vp = NULL; 1111 if (devfs_lookupname(curr, NULL, &vp) == 0 && vp) { 1112 dv = VTODV(vp); 1113 kmem_free(curr, MAXPATHLEN); 1114 goto found; 1115 } 1116 kmem_free(curr, MAXPATHLEN); 1117 } 1118 1119 /* 1120 * If we configured a hidden node, consider it notfound. 1121 */ 1122 if (ndi_dev_is_hidden_node(devi)) { 1123 ndi_rele_devi(devi); 1124 rv = ENOENT; 1125 goto notfound; 1126 } 1127 1128 /* 1129 * Don't make vhci clients visible under phci, unless we 1130 * are in miniroot. 1131 */ 1132 if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) { 1133 ndi_rele_devi(devi); 1134 rv = ENOENT; 1135 goto notfound; 1136 } 1137 1138 ASSERT(devi && i_ddi_devi_attached(devi)); 1139 1140 /* 1141 * Invalidate cache to notice newly created minor nodes. 1142 */ 1143 rw_enter(&ddv->dv_contents, RW_WRITER); 1144 ddv->dv_flags |= DV_BUILD; 1145 rw_exit(&ddv->dv_contents); 1146 1147 /* 1148 * mkdir for nexus drivers and leaf nodes as well. If we are racing 1149 * and create a duplicate, the duplicate will be destroyed below. 1150 */ 1151 if (mnm == NULL) { 1152 dv = dv_mkdir(ddv, devi, nm); 1153 } else { 1154 /* 1155 * Allocate dmd first to avoid KM_SLEEP with active 1156 * ndi_devi_enter. 1157 */ 1158 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 1159 ndi_devi_enter(devi, &circ); 1160 if (devi == clone_dip) { 1161 /* 1162 * For clone minors, load the driver indicated by 1163 * minor name. 1164 */ 1165 dv = dv_clone_mknod(ddv, mnm + 1); 1166 } else { 1167 /* 1168 * Find minor node and make a dv_node 1169 */ 1170 if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) { 1171 dv = dv_mknod(ddv, devi, nm, dmd); 1172 if (dmd->ddm_node_priv) 1173 dpfree(dmd->ddm_node_priv); 1174 } 1175 } 1176 ndi_devi_exit(devi, circ); 1177 kmem_free(dmd, sizeof (*dmd)); 1178 } 1179 /* 1180 * Release hold from ndi_devi_config_one() 1181 */ 1182 ndi_rele_devi(devi); 1183 1184 if (dv == NULL) { 1185 rv = ENOENT; 1186 goto notfound; 1187 } 1188 1189 /* 1190 * We have released the dv_contents lock, need to check 1191 * if another thread already created a duplicate node 1192 */ 1193 rw_enter(&ddv->dv_contents, RW_WRITER); 1194 if ((dup = dv_findbyname(ddv, nm)) == NULL) { 1195 dv_insert(ddv, dv); 1196 } else { 1197 /* 1198 * Duplicate found, use the existing node 1199 */ 1200 VN_RELE(DVTOV(dv)); 1201 dv_destroy(dv, 0); 1202 dv = dup; 1203 } 1204 goto founddv; 1205 /*NOTREACHED*/ 1206 1207 found: 1208 /* 1209 * Fail lookup of device that has now become hidden (typically via 1210 * hot removal of open device). 1211 */ 1212 if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) { 1213 dcmn_err2(("dv_find: nm %s failed: hidden/removed\n", nm)); 1214 VN_RELE(vp); 1215 rv = ENOENT; 1216 goto notfound; 1217 } 1218 1219 /* 1220 * Skip non-kernel lookups of internal nodes. 1221 * This use of kcred to distinguish between user and 1222 * internal kernel lookups is unfortunate. The information 1223 * provided by the seg argument to lookupnameat should 1224 * evolve into a lookup flag for filesystems that need 1225 * this distinction. 1226 */ 1227 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) { 1228 dcmn_err2(("dv_find: nm %s failed: internal\n", nm)); 1229 VN_RELE(vp); 1230 rv = ENOENT; 1231 goto notfound; 1232 } 1233 1234 dcmn_err2(("dv_find: returning vp for nm %s\n", nm)); 1235 if (vp->v_type == VCHR || vp->v_type == VBLK) { 1236 /* 1237 * If vnode is a device, return special vnode instead 1238 * (though it knows all about -us- via sp->s_realvp, 1239 * sp->s_devvp, and sp->s_dip) 1240 */ 1241 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred, 1242 dv->dv_devi); 1243 VN_RELE(vp); 1244 if (*vpp == NULLVP) 1245 rv = ENOSYS; 1246 } else 1247 *vpp = vp; 1248 1249 notfound: 1250 if (was_busy) { 1251 /* 1252 * Non-zero was_busy tells us that we are not in the 1253 * devfs_clean() path which in turn means that we can afford 1254 * to take the contents lock unconditionally. 1255 */ 1256 rw_enter(&ddv->dv_contents, RW_WRITER); 1257 ddv->dv_busy--; 1258 rw_exit(&ddv->dv_contents); 1259 } 1260 return (rv); 1261 } 1262 1263 /* 1264 * The given directory node is out-of-date; that is, it has been 1265 * marked as needing to be rebuilt, possibly because some new devinfo 1266 * node has come into existence, or possibly because this is the first 1267 * time we've been here. 1268 */ 1269 void 1270 dv_filldir(struct dv_node *ddv) 1271 { 1272 struct dv_node *dv; 1273 dev_info_t *devi, *pdevi; 1274 struct ddi_minor_data *dmd; 1275 char devnm[MAXNAMELEN]; 1276 int circ, ccirc; 1277 1278 ASSERT(DVTOV(ddv)->v_type == VDIR); 1279 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 1280 ASSERT(ddv->dv_flags & DV_BUILD); 1281 1282 dcmn_err3(("dv_filldir: %s\n", ddv->dv_name)); 1283 if (DV_STALE(ddv)) 1284 return; 1285 pdevi = ddv->dv_devi; 1286 1287 if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) { 1288 dcmn_err3(("dv_filldir: config error %s\n", ddv->dv_name)); 1289 } 1290 1291 ndi_devi_enter(pdevi, &circ); 1292 for (devi = ddi_get_child(pdevi); devi; 1293 devi = ddi_get_next_sibling(devi)) { 1294 /* 1295 * While we know enough to create a directory at DS_INITIALIZED, 1296 * the directory will be empty until DS_ATTACHED. The existence 1297 * of an empty directory dv_node will cause a devi_ref, which 1298 * has caused problems for existing code paths doing offline/DR 1299 * type operations - making devfs_clean coordination even more 1300 * sensitive and error prone. Given this, the 'continue' below 1301 * is checking for DS_ATTACHED instead of DS_INITIALIZED. 1302 */ 1303 if (i_ddi_node_state(devi) < DS_ATTACHED) 1304 continue; 1305 1306 /* skip hidden nodes */ 1307 if (ndi_dev_is_hidden_node(devi)) 1308 continue; 1309 1310 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi))); 1311 1312 ndi_devi_enter(devi, &ccirc); 1313 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 1314 char *addr; 1315 1316 /* 1317 * Skip alias nodes, internal nodes, and nodes 1318 * without a name. We allow DDM_DEFAULT nodes 1319 * to appear in readdir. 1320 */ 1321 if ((dmd->type == DDM_ALIAS) || 1322 (dmd->type == DDM_INTERNAL_PATH) || 1323 (dmd->ddm_name == NULL)) 1324 continue; 1325 1326 addr = ddi_get_name_addr(devi); 1327 if (addr && *addr) 1328 (void) sprintf(devnm, "%s@%s:%s", 1329 ddi_node_name(devi), addr, dmd->ddm_name); 1330 else 1331 (void) sprintf(devnm, "%s:%s", 1332 ddi_node_name(devi), dmd->ddm_name); 1333 1334 if ((dv = dv_findbyname(ddv, devnm)) != NULL) { 1335 /* dv_node already exists */ 1336 VN_RELE(DVTOV(dv)); 1337 continue; 1338 } 1339 1340 dv = dv_mknod(ddv, devi, devnm, dmd); 1341 dv_insert(ddv, dv); 1342 VN_RELE(DVTOV(dv)); 1343 } 1344 ndi_devi_exit(devi, ccirc); 1345 1346 (void) ddi_deviname(devi, devnm); 1347 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) { 1348 /* directory doesn't exist */ 1349 dv = dv_mkdir(ddv, devi, devnm + 1); 1350 dv_insert(ddv, dv); 1351 } 1352 VN_RELE(DVTOV(dv)); 1353 } 1354 ndi_devi_exit(pdevi, circ); 1355 1356 ddv->dv_flags &= ~DV_BUILD; 1357 } 1358 1359 /* 1360 * Given a directory node, clean out all the nodes beneath. 1361 * 1362 * VDIR: Reinvoke to clean them, then delete the directory. 1363 * VCHR, VBLK: Just blow them away. 1364 * 1365 * Mark the directories touched as in need of a rebuild, in case 1366 * we fall over part way through. When DV_CLEAN_FORCE is specified, 1367 * we mark referenced empty directories as stale to facilitate DR. 1368 */ 1369 int 1370 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags) 1371 { 1372 struct dv_node *dv; 1373 struct dv_node *next; 1374 struct vnode *vp; 1375 int busy = 0; 1376 1377 /* 1378 * We should always be holding the tsd_clean_key here: dv_cleandir() 1379 * will be called as a result of a devfs_clean request and the 1380 * tsd_clean_key will be set in either in devfs_clean() itself or in 1381 * devfs_clean_vhci(). 1382 * 1383 * Since we are on the devfs_clean path, we return EBUSY if we cannot 1384 * get the contents lock: if we blocked here we might deadlock against 1385 * a thread performing top-down device configuration. 1386 */ 1387 ASSERT(tsd_get(devfs_clean_key)); 1388 1389 dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name)); 1390 1391 if (!(flags & DV_CLEANDIR_LCK) && 1392 !rw_tryenter(&ddv->dv_contents, RW_WRITER)) 1393 return (EBUSY); 1394 1395 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = next) { 1396 next = DV_NEXT_ENTRY(ddv, dv); 1397 1398 /* 1399 * If devnm is specified, the non-minor portion of the 1400 * name must match devnm. 1401 */ 1402 if (devnm && 1403 (strncmp(devnm, dv->dv_name, strlen(devnm)) || 1404 (dv->dv_name[strlen(devnm)] != ':' && 1405 dv->dv_name[strlen(devnm)] != '\0'))) 1406 continue; 1407 1408 /* check type of what we are cleaning */ 1409 vp = DVTOV(dv); 1410 if (vp->v_type == VDIR) { 1411 /* recurse on directories */ 1412 rw_enter(&dv->dv_contents, RW_WRITER); 1413 if (dv_cleandir(dv, NULL, 1414 flags | DV_CLEANDIR_LCK) == EBUSY) { 1415 rw_exit(&dv->dv_contents); 1416 goto set_busy; 1417 } 1418 1419 /* A clean directory is an empty directory... */ 1420 ASSERT(dv->dv_nlink == 2); 1421 mutex_enter(&vp->v_lock); 1422 if (vp->v_count > 0) { 1423 /* 1424 * ... but an empty directory can still have 1425 * references to it. If we have dv_busy or 1426 * DV_CLEAN_FORCE is *not* specified then a 1427 * referenced directory is considered busy. 1428 */ 1429 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) { 1430 mutex_exit(&vp->v_lock); 1431 rw_exit(&dv->dv_contents); 1432 goto set_busy; 1433 } 1434 1435 /* 1436 * Mark referenced directory stale so that DR 1437 * will succeed even if a shell has 1438 * /devices/xxx as current directory (causing 1439 * VN_HOLD reference to an empty directory). 1440 */ 1441 ASSERT(!DV_STALE(dv)); 1442 ndi_rele_devi(dv->dv_devi); 1443 dv->dv_devi = NULL; /* mark DV_STALE */ 1444 } 1445 } else { 1446 ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK)); 1447 ASSERT(dv->dv_nlink == 1); /* no hard links */ 1448 mutex_enter(&vp->v_lock); 1449 if (vp->v_count > 0) { 1450 /* 1451 * The file still has references to it. If 1452 * DV_DEVI_GONE is *not* specified then a 1453 * referenced file is considered busy. 1454 */ 1455 if (!(flags & DV_DEVI_GONE)) { 1456 mutex_exit(&vp->v_lock); 1457 goto set_busy; 1458 } 1459 1460 /* 1461 * Mark referenced file stale so that DR will 1462 * succeed even if there are userland opens. 1463 */ 1464 ASSERT(!DV_STALE(dv)); 1465 ndi_rele_devi(dv->dv_devi); 1466 DEVI(dv->dv_devi)->devi_gone = 1; 1467 dv->dv_devi = NULL; 1468 } 1469 } 1470 1471 /* unlink from directory */ 1472 dv_unlink(ddv, dv); 1473 1474 /* drop locks */ 1475 mutex_exit(&vp->v_lock); 1476 if (vp->v_type == VDIR) 1477 rw_exit(&dv->dv_contents); 1478 1479 /* destroy vnode if ref count is zero */ 1480 if (vp->v_count == 0) 1481 dv_destroy(dv, flags); 1482 1483 continue; 1484 1485 /* 1486 * If devnm is not NULL we return immediately on busy, 1487 * otherwise we continue destroying unused dv_node's. 1488 */ 1489 set_busy: busy++; 1490 if (devnm) 1491 break; 1492 } 1493 1494 /* 1495 * This code may be invoked to inform devfs that a new node has 1496 * been created in the kernel device tree. So we always set 1497 * the DV_BUILD flag to allow the next dv_filldir() to pick 1498 * the new devinfo nodes. 1499 */ 1500 ddv->dv_flags |= DV_BUILD; 1501 1502 if (!(flags & DV_CLEANDIR_LCK)) 1503 rw_exit(&ddv->dv_contents); 1504 1505 return (busy ? EBUSY : 0); 1506 } 1507 1508 /* 1509 * Walk through the devfs hierarchy, correcting the permissions of 1510 * devices with default permissions that do not match those specified 1511 * by minor perm. This can only be done for all drivers for now. 1512 */ 1513 static int 1514 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags) 1515 { 1516 struct dv_node *dv; 1517 struct vnode *vp; 1518 int retval = 0; 1519 struct vattr *attrp; 1520 mperm_t mp; 1521 char *nm; 1522 uid_t old_uid; 1523 gid_t old_gid; 1524 mode_t old_mode; 1525 1526 rw_enter(&ddv->dv_contents, RW_WRITER); 1527 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) { 1528 int error = 0; 1529 nm = dv->dv_name; 1530 1531 rw_enter(&dv->dv_contents, RW_READER); 1532 vp = DVTOV(dv); 1533 if (vp->v_type == VDIR) { 1534 rw_exit(&dv->dv_contents); 1535 if (dv_reset_perm_dir(dv, flags) != 0) { 1536 error = EBUSY; 1537 } 1538 } else { 1539 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 1540 1541 /* 1542 * Check for permissions from minor_perm 1543 * If there are none, we're done 1544 */ 1545 rw_exit(&dv->dv_contents); 1546 if (dev_minorperm(dv->dv_devi, nm, &mp) != 0) 1547 continue; 1548 1549 rw_enter(&dv->dv_contents, RW_READER); 1550 1551 /* 1552 * Allow a node's permissions to be altered 1553 * permanently from the defaults by chmod, 1554 * using the shadow node as backing store. 1555 * Otherwise, update node to minor_perm permissions. 1556 */ 1557 if (dv->dv_attrvp == NULLVP) { 1558 /* 1559 * No attribute vp, try to find one. 1560 */ 1561 dv_shadow_node(DVTOV(ddv), nm, vp, 1562 NULL, NULLVP, kcred, 0); 1563 } 1564 if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) { 1565 rw_exit(&dv->dv_contents); 1566 continue; 1567 } 1568 1569 attrp = dv->dv_attr; 1570 1571 if (VATTRP_MP_CMP(attrp, mp) == 0) { 1572 dcmn_err5(("%s: no perm change: " 1573 "%d %d 0%o\n", nm, attrp->va_uid, 1574 attrp->va_gid, attrp->va_mode)); 1575 rw_exit(&dv->dv_contents); 1576 continue; 1577 } 1578 1579 old_uid = attrp->va_uid; 1580 old_gid = attrp->va_gid; 1581 old_mode = attrp->va_mode; 1582 1583 VATTRP_MP_MERGE(attrp, mp); 1584 mutex_enter(&vp->v_lock); 1585 if (vp->v_count > 0) { 1586 error = EBUSY; 1587 } 1588 mutex_exit(&vp->v_lock); 1589 1590 dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n", 1591 nm, old_uid, old_gid, old_mode, attrp->va_uid, 1592 attrp->va_gid, attrp->va_mode, error)); 1593 1594 rw_exit(&dv->dv_contents); 1595 } 1596 1597 if (error != 0) { 1598 retval = error; 1599 } 1600 } 1601 1602 ddv->dv_flags |= DV_BUILD; 1603 1604 rw_exit(&ddv->dv_contents); 1605 1606 return (retval); 1607 } 1608 1609 int 1610 devfs_reset_perm(uint_t flags) 1611 { 1612 struct dv_node *dvp; 1613 int rval; 1614 1615 if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL) 1616 return (0); 1617 1618 VN_HOLD(DVTOV(dvp)); 1619 rval = dv_reset_perm_dir(dvp, flags); 1620 VN_RELE(DVTOV(dvp)); 1621 return (rval); 1622 } 1623 1624 /* 1625 * Clean up dangling devfs shadow nodes for removed 1626 * drivers so that, in the event the driver is re-added 1627 * to the system, newly created nodes won't incorrectly 1628 * pick up these stale shadow node permissions. 1629 * 1630 * This is accomplished by walking down the pathname 1631 * to the directory, starting at the root's attribute 1632 * node, then removing all minors matching the specified 1633 * node name. Care must be taken to remove all entries 1634 * in a directory before the directory itself, so that 1635 * the clean-up associated with rem_drv'ing a nexus driver 1636 * does not inadvertently result in an inconsistent 1637 * filesystem underlying devfs. 1638 */ 1639 1640 static int 1641 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp) 1642 { 1643 int error; 1644 vnode_t *vp; 1645 int eof; 1646 struct iovec iov; 1647 struct uio uio; 1648 struct dirent64 *dp; 1649 dirent64_t *dbuf; 1650 size_t dlen; 1651 size_t dbuflen; 1652 int ndirents = 64; 1653 char *nm; 1654 1655 VN_HOLD(dirvp); 1656 1657 dlen = ndirents * (sizeof (*dbuf)); 1658 dbuf = kmem_alloc(dlen, KM_SLEEP); 1659 1660 uio.uio_iov = &iov; 1661 uio.uio_iovcnt = 1; 1662 uio.uio_segflg = UIO_SYSSPACE; 1663 uio.uio_fmode = 0; 1664 uio.uio_extflg = UIO_COPY_CACHED; 1665 uio.uio_loffset = 0; 1666 uio.uio_llimit = MAXOFFSET_T; 1667 1668 eof = 0; 1669 error = 0; 1670 while (!error && !eof) { 1671 uio.uio_resid = dlen; 1672 iov.iov_base = (char *)dbuf; 1673 iov.iov_len = dlen; 1674 1675 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1676 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1677 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1678 1679 dbuflen = dlen - uio.uio_resid; 1680 1681 if (error || dbuflen == 0) 1682 break; 1683 1684 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1685 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1686 1687 nm = dp->d_name; 1688 1689 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1690 continue; 1691 1692 error = VOP_LOOKUP(dirvp, nm, 1693 &vp, NULL, 0, NULL, kcred, NULL, NULL, NULL); 1694 1695 dsysdebug(error, 1696 ("rem_drv %s/%s lookup (%d)\n", 1697 dir, nm, error)); 1698 1699 if (error) 1700 continue; 1701 1702 ASSERT(vp->v_type == VDIR || 1703 vp->v_type == VCHR || vp->v_type == VBLK); 1704 1705 if (vp->v_type == VDIR) { 1706 error = devfs_remdrv_rmdir(vp, nm, rvp); 1707 if (error == 0) { 1708 error = VOP_RMDIR(dirvp, 1709 (char *)nm, rvp, kcred, NULL, 0); 1710 dsysdebug(error, 1711 ("rem_drv %s/%s rmdir (%d)\n", 1712 dir, nm, error)); 1713 } 1714 } else { 1715 error = VOP_REMOVE(dirvp, (char *)nm, kcred, 1716 NULL, 0); 1717 dsysdebug(error, 1718 ("rem_drv %s/%s remove (%d)\n", 1719 dir, nm, error)); 1720 } 1721 1722 VN_RELE(vp); 1723 if (error) { 1724 goto exit; 1725 } 1726 } 1727 } 1728 1729 exit: 1730 VN_RELE(dirvp); 1731 kmem_free(dbuf, dlen); 1732 1733 return (error); 1734 } 1735 1736 int 1737 devfs_remdrv_cleanup(const char *dir, const char *nodename) 1738 { 1739 int error; 1740 vnode_t *vp; 1741 vnode_t *dirvp; 1742 int eof; 1743 struct iovec iov; 1744 struct uio uio; 1745 struct dirent64 *dp; 1746 dirent64_t *dbuf; 1747 size_t dlen; 1748 size_t dbuflen; 1749 int ndirents = 64; 1750 int nodenamelen = strlen(nodename); 1751 char *nm; 1752 struct pathname pn; 1753 vnode_t *rvp; /* root node of the underlying attribute fs */ 1754 1755 dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename)); 1756 1757 if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn)) 1758 return (0); 1759 1760 rvp = dvroot->dv_attrvp; 1761 ASSERT(rvp != NULL); 1762 VN_HOLD(rvp); 1763 1764 pn_skipslash(&pn); 1765 dirvp = rvp; 1766 VN_HOLD(dirvp); 1767 1768 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 1769 1770 while (pn_pathleft(&pn)) { 1771 ASSERT(dirvp->v_type == VDIR); 1772 (void) pn_getcomponent(&pn, nm); 1773 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0)); 1774 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred, 1775 NULL, NULL, NULL); 1776 if (error) { 1777 dcmn_err5(("remdrv_cleanup %s lookup error %d\n", 1778 nm, error)); 1779 VN_RELE(dirvp); 1780 if (dirvp != rvp) 1781 VN_RELE(rvp); 1782 pn_free(&pn); 1783 kmem_free(nm, MAXNAMELEN); 1784 return (0); 1785 } 1786 VN_RELE(dirvp); 1787 dirvp = vp; 1788 pn_skipslash(&pn); 1789 } 1790 1791 ASSERT(dirvp->v_type == VDIR); 1792 if (dirvp != rvp) 1793 VN_RELE(rvp); 1794 pn_free(&pn); 1795 kmem_free(nm, MAXNAMELEN); 1796 1797 dlen = ndirents * (sizeof (*dbuf)); 1798 dbuf = kmem_alloc(dlen, KM_SLEEP); 1799 1800 uio.uio_iov = &iov; 1801 uio.uio_iovcnt = 1; 1802 uio.uio_segflg = UIO_SYSSPACE; 1803 uio.uio_fmode = 0; 1804 uio.uio_extflg = UIO_COPY_CACHED; 1805 uio.uio_loffset = 0; 1806 uio.uio_llimit = MAXOFFSET_T; 1807 1808 eof = 0; 1809 error = 0; 1810 while (!error && !eof) { 1811 uio.uio_resid = dlen; 1812 iov.iov_base = (char *)dbuf; 1813 iov.iov_len = dlen; 1814 1815 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1816 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1817 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1818 1819 dbuflen = dlen - uio.uio_resid; 1820 1821 if (error || dbuflen == 0) 1822 break; 1823 1824 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1825 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1826 1827 nm = dp->d_name; 1828 1829 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1830 continue; 1831 1832 if (strncmp(nm, nodename, nodenamelen) != 0) 1833 continue; 1834 1835 error = VOP_LOOKUP(dirvp, nm, &vp, 1836 NULL, 0, NULL, kcred, NULL, NULL, NULL); 1837 1838 dsysdebug(error, 1839 ("rem_drv %s/%s lookup (%d)\n", 1840 dir, nm, error)); 1841 1842 if (error) 1843 continue; 1844 1845 ASSERT(vp->v_type == VDIR || 1846 vp->v_type == VCHR || vp->v_type == VBLK); 1847 1848 if (vp->v_type == VDIR) { 1849 error = devfs_remdrv_rmdir(vp, nm, rvp); 1850 if (error == 0) { 1851 error = VOP_RMDIR(dirvp, (char *)nm, 1852 rvp, kcred, NULL, 0); 1853 dsysdebug(error, 1854 ("rem_drv %s/%s rmdir (%d)\n", 1855 dir, nm, error)); 1856 } 1857 } else { 1858 error = VOP_REMOVE(dirvp, (char *)nm, kcred, 1859 NULL, 0); 1860 dsysdebug(error, 1861 ("rem_drv %s/%s remove (%d)\n", 1862 dir, nm, error)); 1863 } 1864 1865 VN_RELE(vp); 1866 if (error) 1867 goto exit; 1868 } 1869 } 1870 1871 exit: 1872 VN_RELE(dirvp); 1873 1874 kmem_free(dbuf, dlen); 1875 1876 return (0); 1877 } 1878 1879 struct dv_list { 1880 struct dv_node *dv; 1881 struct dv_list *next; 1882 }; 1883 1884 void 1885 dv_walk( 1886 struct dv_node *ddv, 1887 char *devnm, 1888 void (*callback)(struct dv_node *, void *), 1889 void *arg) 1890 { 1891 struct vnode *dvp; 1892 struct dv_node *dv; 1893 struct dv_list *head, *tail, *next; 1894 int len; 1895 1896 dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n", 1897 ddv->dv_name, devnm ? devnm : "<null>")); 1898 1899 dvp = DVTOV(ddv); 1900 1901 ASSERT(dvp->v_type == VDIR); 1902 1903 head = tail = next = NULL; 1904 1905 rw_enter(&ddv->dv_contents, RW_READER); 1906 mutex_enter(&dvp->v_lock); 1907 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) { 1908 /* 1909 * If devnm is not NULL and is not the empty string, 1910 * select only dv_nodes with matching non-minor name 1911 */ 1912 if (devnm && (len = strlen(devnm)) && 1913 (strncmp(devnm, dv->dv_name, len) || 1914 (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0'))) 1915 continue; 1916 1917 callback(dv, arg); 1918 1919 if (DVTOV(dv)->v_type != VDIR) 1920 continue; 1921 1922 next = kmem_zalloc(sizeof (*next), KM_SLEEP); 1923 next->dv = dv; 1924 1925 if (tail) 1926 tail->next = next; 1927 else 1928 head = next; 1929 1930 tail = next; 1931 } 1932 1933 while (head) { 1934 dv_walk(head->dv, NULL, callback, arg); 1935 next = head->next; 1936 kmem_free(head, sizeof (*head)); 1937 head = next; 1938 } 1939 rw_exit(&ddv->dv_contents); 1940 mutex_exit(&dvp->v_lock); 1941 }