1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * miscellaneous routines for the devfs 27 */ 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/t_lock.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/user.h> 35 #include <sys/time.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/file.h> 39 #include <sys/fcntl.h> 40 #include <sys/flock.h> 41 #include <sys/kmem.h> 42 #include <sys/uio.h> 43 #include <sys/errno.h> 44 #include <sys/stat.h> 45 #include <sys/cred.h> 46 #include <sys/dirent.h> 47 #include <sys/pathname.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/modctl.h> 51 #include <fs/fs_subr.h> 52 #include <sys/fs/dv_node.h> 53 #include <sys/fs/snode.h> 54 #include <sys/sunndi.h> 55 #include <sys/sunmdi.h> 56 #include <sys/conf.h> 57 58 #ifdef DEBUG 59 int devfs_debug = 0x0; 60 #endif 61 62 const char dvnm[] = "devfs"; 63 kmem_cache_t *dv_node_cache; /* dv_node cache */ 64 65 /* 66 * The devfs_clean_key is taken during a devfs_clean operation: it is used to 67 * prevent unnecessary code execution and for detection of potential deadlocks. 68 */ 69 uint_t devfs_clean_key; 70 71 struct dv_node *dvroot; 72 73 /* prototype memory vattrs */ 74 vattr_t dv_vattr_dir = { 75 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 76 VDIR, /* va_type */ 77 DV_DIRMODE_DEFAULT, /* va_mode */ 78 DV_UID_DEFAULT, /* va_uid */ 79 DV_GID_DEFAULT, /* va_gid */ 80 0, /* va_fsid; */ 81 0, /* va_nodeid; */ 82 0, /* va_nlink; */ 83 0, /* va_size; */ 84 0, /* va_atime; */ 85 0, /* va_mtime; */ 86 0, /* va_ctime; */ 87 0, /* va_rdev; */ 88 0, /* va_blksize; */ 89 0, /* va_nblocks; */ 90 0, /* va_seq; */ 91 }; 92 93 vattr_t dv_vattr_file = { 94 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 95 0, /* va_type */ 96 DV_DEVMODE_DEFAULT, /* va_mode */ 97 DV_UID_DEFAULT, /* va_uid */ 98 DV_GID_DEFAULT, /* va_gid */ 99 0, /* va_fsid; */ 100 0, /* va_nodeid; */ 101 0, /* va_nlink; */ 102 0, /* va_size; */ 103 0, /* va_atime; */ 104 0, /* va_mtime; */ 105 0, /* va_ctime; */ 106 0, /* va_rdev; */ 107 0, /* va_blksize; */ 108 0, /* va_nblocks; */ 109 0, /* va_seq; */ 110 }; 111 112 vattr_t dv_vattr_priv = { 113 AT_TYPE|AT_MODE|AT_SIZE|AT_UID|AT_GID|AT_RDEV, /* va_mask */ 114 0, /* va_type */ 115 DV_DEVMODE_PRIV, /* va_mode */ 116 DV_UID_DEFAULT, /* va_uid */ 117 DV_GID_DEFAULT, /* va_gid */ 118 0, /* va_fsid; */ 119 0, /* va_nodeid; */ 120 0, /* va_nlink; */ 121 0, /* va_size; */ 122 0, /* va_atime; */ 123 0, /* va_mtime; */ 124 0, /* va_ctime; */ 125 0, /* va_rdev; */ 126 0, /* va_blksize; */ 127 0, /* va_nblocks; */ 128 0, /* va_seq; */ 129 }; 130 131 extern dev_info_t *clone_dip; 132 extern major_t clone_major; 133 extern struct dev_ops *ddi_hold_driver(major_t); 134 135 /* dv_node node constructor for kmem cache */ 136 static int 137 i_dv_node_ctor(void *buf, void *cfarg, int flag) 138 { 139 _NOTE(ARGUNUSED(cfarg, flag)) 140 struct dv_node *dv = (struct dv_node *)buf; 141 struct vnode *vp; 142 143 bzero(buf, sizeof (struct dv_node)); 144 vp = dv->dv_vnode = vn_alloc(flag); 145 if (vp == NULL) { 146 return (-1); 147 } 148 vp->v_data = dv; 149 rw_init(&dv->dv_contents, NULL, RW_DEFAULT, NULL); 150 return (0); 151 } 152 153 /* dv_node node destructor for kmem cache */ 154 static void 155 i_dv_node_dtor(void *buf, void *arg) 156 { 157 _NOTE(ARGUNUSED(arg)) 158 struct dv_node *dv = (struct dv_node *)buf; 159 struct vnode *vp = DVTOV(dv); 160 161 rw_destroy(&dv->dv_contents); 162 vn_invalid(vp); 163 vn_free(vp); 164 } 165 166 167 /* initialize dv_node node cache */ 168 void 169 dv_node_cache_init() 170 { 171 ASSERT(dv_node_cache == NULL); 172 dv_node_cache = kmem_cache_create("dv_node_cache", 173 sizeof (struct dv_node), 0, i_dv_node_ctor, i_dv_node_dtor, 174 NULL, NULL, NULL, 0); 175 176 tsd_create(&devfs_clean_key, NULL); 177 } 178 179 /* destroy dv_node node cache */ 180 void 181 dv_node_cache_fini() 182 { 183 ASSERT(dv_node_cache != NULL); 184 kmem_cache_destroy(dv_node_cache); 185 dv_node_cache = NULL; 186 187 tsd_destroy(&devfs_clean_key); 188 } 189 190 /* 191 * dv_mkino - Generate a unique inode number for devfs nodes. 192 * 193 * Although ino_t is 64 bits, the inode number is truncated to 32 bits for 32 194 * bit non-LARGEFILE applications. This means that there is a requirement to 195 * maintain the inode number as a 32 bit value or applications will have 196 * stat(2) calls fail with EOVERFLOW. We form a 32 bit inode number from the 197 * dev_t. but if the minor number is larger than L_MAXMIN32 we fold extra minor 198 * 199 * To generate inode numbers for directories, we assume that we will never use 200 * more than half the major space - this allows for ~8190 drivers. We use this 201 * upper major number space to allocate inode numbers for directories by 202 * encoding the major and instance into this space. 203 * 204 * We also skew the result so that inode 2 is reserved for the root of the file 205 * system. 206 * 207 * As part of the future support for 64-bit dev_t APIs, the upper minor bits 208 * should be folded into the high inode bits by adding the following code 209 * after "ino |= 1": 210 * 211 * #if (L_BITSMINOR32 != L_BITSMINOR) 212 * |* fold overflow minor bits into high bits of inode number *| 213 * ino |= ((ino_t)(minor >> L_BITSMINOR32)) << L_BITSMINOR; 214 * #endif |* (L_BITSMINOR32 != L_BITSMINOR) *| 215 * 216 * This way only applications that use devices that overflow their minor 217 * space will have an application level impact. 218 */ 219 static ino_t 220 dv_mkino(dev_info_t *devi, vtype_t typ, dev_t dev) 221 { 222 major_t major; 223 minor_t minor; 224 ino_t ino; 225 static int warn; 226 227 if (typ == VDIR) { 228 major = ((L_MAXMAJ32 + 1) >> 1) + DEVI(devi)->devi_major; 229 minor = ddi_get_instance(devi); 230 231 /* makedevice32 in high half of major number space */ 232 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 233 234 major = DEVI(devi)->devi_major; 235 } else { 236 major = getmajor(dev); 237 minor = getminor(dev); 238 239 /* makedevice32 */ 240 ino = (ino_t)((major << L_BITSMINOR32) | (minor & L_MAXMIN32)); 241 242 /* make ino for VCHR different than VBLK */ 243 ino <<= 1; 244 if (typ == VCHR) 245 ino |= 1; 246 } 247 248 ino += DV_ROOTINO + 1; /* skew */ 249 250 /* 251 * diagnose things a little early because adding the skew to a large 252 * minor number could roll over the major. 253 */ 254 if ((major >= (L_MAXMAJ32 >> 1)) && (warn == 0)) { 255 warn = 1; 256 cmn_err(CE_WARN, "%s: inode numbers are not unique", dvnm); 257 } 258 259 return (ino); 260 } 261 262 /* 263 * Compare two nodes lexographically to balance avl tree 264 */ 265 static int 266 dv_compare_nodes(const struct dv_node *dv1, const struct dv_node *dv2) 267 { 268 int rv; 269 270 if ((rv = strcmp(dv1->dv_name, dv2->dv_name)) == 0) 271 return (0); 272 return ((rv < 0) ? -1 : 1); 273 } 274 275 /* 276 * dv_mkroot 277 * 278 * Build the first VDIR dv_node. 279 */ 280 struct dv_node * 281 dv_mkroot(struct vfs *vfsp, dev_t devfsdev) 282 { 283 struct dv_node *dv; 284 struct vnode *vp; 285 286 ASSERT(ddi_root_node() != NULL); 287 ASSERT(dv_node_cache != NULL); 288 289 dcmn_err3(("dv_mkroot\n")); 290 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 291 vp = DVTOV(dv); 292 vn_reinit(vp); 293 vp->v_flag = VROOT; 294 vp->v_vfsp = vfsp; 295 vp->v_type = VDIR; 296 vp->v_rdev = devfsdev; 297 vn_setops(vp, dv_vnodeops); 298 vn_exists(vp); 299 300 dvroot = dv; 301 302 dv->dv_name = NULL; /* not needed */ 303 dv->dv_namelen = 0; 304 305 dv->dv_devi = ddi_root_node(); 306 307 dv->dv_ino = DV_ROOTINO; 308 dv->dv_nlink = 2; /* name + . (no dv_insert) */ 309 dv->dv_dotdot = dv; /* .. == self */ 310 dv->dv_attrvp = NULLVP; 311 dv->dv_attr = NULL; 312 dv->dv_flags = DV_BUILD; 313 dv->dv_priv = NULL; 314 dv->dv_busy = 0; 315 dv->dv_dflt_mode = 0; 316 317 avl_create(&dv->dv_entries, 318 (int (*)(const void *, const void *))dv_compare_nodes, 319 sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink)); 320 321 return (dv); 322 } 323 324 /* 325 * dv_mkdir 326 * 327 * Given an probed or attached nexus node, create a VDIR dv_node. 328 * No dv_attrvp is created at this point. 329 */ 330 struct dv_node * 331 dv_mkdir(struct dv_node *ddv, dev_info_t *devi, char *nm) 332 { 333 struct dv_node *dv; 334 struct vnode *vp; 335 size_t nmlen; 336 337 ASSERT((devi)); 338 dcmn_err4(("dv_mkdir: %s\n", nm)); 339 340 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 341 nmlen = strlen(nm) + 1; 342 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 343 bcopy(nm, dv->dv_name, nmlen); 344 dv->dv_namelen = nmlen - 1; /* '\0' not included */ 345 346 vp = DVTOV(dv); 347 vn_reinit(vp); 348 vp->v_flag = 0; 349 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 350 vp->v_type = VDIR; 351 vp->v_rdev = DVTOV(ddv)->v_rdev; 352 vn_setops(vp, vn_getops(DVTOV(ddv))); 353 vn_exists(vp); 354 355 dv->dv_devi = devi; 356 ndi_hold_devi(devi); 357 358 dv->dv_ino = dv_mkino(devi, VDIR, NODEV); 359 dv->dv_nlink = 0; /* updated on insert */ 360 dv->dv_dotdot = ddv; 361 dv->dv_attrvp = NULLVP; 362 dv->dv_attr = NULL; 363 dv->dv_flags = DV_BUILD; 364 dv->dv_priv = NULL; 365 dv->dv_busy = 0; 366 dv->dv_dflt_mode = 0; 367 368 avl_create(&dv->dv_entries, 369 (int (*)(const void *, const void *))dv_compare_nodes, 370 sizeof (struct dv_node), offsetof(struct dv_node, dv_avllink)); 371 372 return (dv); 373 } 374 375 /* 376 * dv_mknod 377 * 378 * Given a minor node, create a VCHR or VBLK dv_node. 379 * No dv_attrvp is created at this point. 380 */ 381 static struct dv_node * 382 dv_mknod(struct dv_node *ddv, dev_info_t *devi, char *nm, 383 struct ddi_minor_data *dmd) 384 { 385 struct dv_node *dv; 386 struct vnode *vp; 387 size_t nmlen; 388 389 dcmn_err4(("dv_mknod: %s\n", nm)); 390 391 dv = kmem_cache_alloc(dv_node_cache, KM_SLEEP); 392 nmlen = strlen(nm) + 1; 393 dv->dv_name = kmem_alloc(nmlen, KM_SLEEP); 394 bcopy(nm, dv->dv_name, nmlen); 395 dv->dv_namelen = nmlen - 1; /* no '\0' */ 396 397 vp = DVTOV(dv); 398 vn_reinit(vp); 399 vp->v_flag = 0; 400 vp->v_vfsp = DVTOV(ddv)->v_vfsp; 401 vp->v_type = dmd->ddm_spec_type == S_IFCHR ? VCHR : VBLK; 402 vp->v_rdev = dmd->ddm_dev; 403 vn_setops(vp, vn_getops(DVTOV(ddv))); 404 vn_exists(vp); 405 406 /* increment dev_ref with devi_lock held */ 407 ASSERT(DEVI_BUSY_OWNED(devi)); 408 mutex_enter(&DEVI(devi)->devi_lock); 409 dv->dv_devi = devi; 410 DEVI(devi)->devi_ref++; /* ndi_hold_devi(dip) */ 411 mutex_exit(&DEVI(devi)->devi_lock); 412 413 dv->dv_ino = dv_mkino(devi, vp->v_type, vp->v_rdev); 414 dv->dv_nlink = 0; /* updated on insert */ 415 dv->dv_dotdot = ddv; 416 dv->dv_attrvp = NULLVP; 417 dv->dv_attr = NULL; 418 dv->dv_flags = 0; 419 420 if (dmd->type == DDM_INTERNAL_PATH) 421 dv->dv_flags |= DV_INTERNAL; 422 if (dmd->ddm_flags & DM_NO_FSPERM) 423 dv->dv_flags |= DV_NO_FSPERM; 424 425 dv->dv_priv = dmd->ddm_node_priv; 426 if (dv->dv_priv) 427 dphold(dv->dv_priv); 428 429 /* 430 * Minors created with ddi_create_priv_minor_node can specify 431 * a default mode permission other than the devfs default. 432 */ 433 if (dv->dv_priv || dv->dv_flags & DV_NO_FSPERM) { 434 dcmn_err5(("%s: dv_mknod default priv mode 0%o\n", 435 dv->dv_name, dmd->ddm_priv_mode)); 436 dv->dv_flags |= DV_DFLT_MODE; 437 dv->dv_dflt_mode = dmd->ddm_priv_mode & S_IAMB; 438 } 439 440 return (dv); 441 } 442 443 /* 444 * dv_destroy 445 * 446 * Destroy what we created in dv_mkdir or dv_mknod. 447 * In the case of a *referenced* directory, do nothing. 448 */ 449 void 450 dv_destroy(struct dv_node *dv, uint_t flags) 451 { 452 vnode_t *vp = DVTOV(dv); 453 ASSERT(dv->dv_nlink == 0); /* no references */ 454 455 dcmn_err4(("dv_destroy: %s\n", dv->dv_name)); 456 457 /* 458 * We may be asked to unlink referenced directories. 459 * In this case, there is nothing to be done. 460 * The eventual memory free will be done in 461 * devfs_inactive. 462 */ 463 if (vp->v_count != 0) { 464 ASSERT(vp->v_type == VDIR); 465 ASSERT(flags & DV_CLEAN_FORCE); 466 ASSERT(DV_STALE(dv)); 467 return; 468 } 469 470 if (vp->v_type == VDIR) { 471 ASSERT(DV_FIRST_ENTRY(dv) == NULL); 472 avl_destroy(&dv->dv_entries); 473 } 474 475 if (dv->dv_attrvp != NULLVP) 476 VN_RELE(dv->dv_attrvp); 477 if (dv->dv_attr != NULL) 478 kmem_free(dv->dv_attr, sizeof (struct vattr)); 479 if (dv->dv_name != NULL) 480 kmem_free(dv->dv_name, dv->dv_namelen + 1); 481 if (dv->dv_devi != NULL) { 482 ndi_rele_devi(dv->dv_devi); 483 } 484 if (dv->dv_priv != NULL) { 485 dpfree(dv->dv_priv); 486 } 487 488 kmem_cache_free(dv_node_cache, dv); 489 } 490 491 /* 492 * Find and hold dv_node by name 493 */ 494 static struct dv_node * 495 dv_findbyname(struct dv_node *ddv, char *nm) 496 { 497 struct dv_node *dv; 498 avl_index_t where; 499 struct dv_node dvtmp; 500 501 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 502 dcmn_err3(("dv_findbyname: %s\n", nm)); 503 504 dvtmp.dv_name = nm; 505 dv = avl_find(&ddv->dv_entries, &dvtmp, &where); 506 if (dv) { 507 ASSERT(dv->dv_dotdot == ddv); 508 ASSERT(strcmp(dv->dv_name, nm) == 0); 509 VN_HOLD(DVTOV(dv)); 510 return (dv); 511 } 512 return (NULL); 513 } 514 515 /* 516 * Inserts a new dv_node in a parent directory 517 */ 518 void 519 dv_insert(struct dv_node *ddv, struct dv_node *dv) 520 { 521 avl_index_t where; 522 523 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 524 ASSERT(DVTOV(ddv)->v_type == VDIR); 525 ASSERT(ddv->dv_nlink >= 2); 526 ASSERT(dv->dv_nlink == 0); 527 528 dcmn_err3(("dv_insert: %s\n", dv->dv_name)); 529 530 dv->dv_dotdot = ddv; 531 if (DVTOV(dv)->v_type == VDIR) { 532 ddv->dv_nlink++; /* .. to containing directory */ 533 dv->dv_nlink = 2; /* name + . */ 534 } else { 535 dv->dv_nlink = 1; /* name */ 536 } 537 538 /* enter node in the avl tree */ 539 VERIFY(avl_find(&ddv->dv_entries, dv, &where) == NULL); 540 avl_insert(&ddv->dv_entries, dv, where); 541 } 542 543 /* 544 * Unlink a dv_node from a perent directory 545 */ 546 void 547 dv_unlink(struct dv_node *ddv, struct dv_node *dv) 548 { 549 /* verify linkage of arguments */ 550 ASSERT(ddv && dv); 551 ASSERT(dv->dv_dotdot == ddv); 552 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 553 ASSERT(DVTOV(ddv)->v_type == VDIR); 554 555 dcmn_err3(("dv_unlink: %s\n", dv->dv_name)); 556 557 if (DVTOV(dv)->v_type == VDIR) { 558 ddv->dv_nlink--; /* .. to containing directory */ 559 dv->dv_nlink -= 2; /* name + . */ 560 } else { 561 dv->dv_nlink -= 1; /* name */ 562 } 563 ASSERT(ddv->dv_nlink >= 2); 564 ASSERT(dv->dv_nlink == 0); 565 566 dv->dv_dotdot = NULL; 567 568 /* remove from avl tree */ 569 avl_remove(&ddv->dv_entries, dv); 570 } 571 572 /* 573 * Merge devfs node specific information into an attribute structure. 574 * 575 * NOTE: specfs provides ATIME,MTIME,CTIME,SIZE,BLKSIZE,NBLOCKS on leaf node. 576 */ 577 void 578 dv_vattr_merge(struct dv_node *dv, struct vattr *vap) 579 { 580 struct vnode *vp = DVTOV(dv); 581 582 vap->va_nodeid = dv->dv_ino; 583 vap->va_nlink = dv->dv_nlink; 584 585 if (vp->v_type == VDIR) { 586 vap->va_rdev = 0; 587 vap->va_fsid = vp->v_rdev; 588 } else { 589 vap->va_rdev = vp->v_rdev; 590 vap->va_fsid = DVTOV(dv->dv_dotdot)->v_rdev; 591 vap->va_type = vp->v_type; 592 /* don't trust the shadow file type */ 593 vap->va_mode &= ~S_IFMT; 594 if (vap->va_type == VCHR) 595 vap->va_mode |= S_IFCHR; 596 else 597 vap->va_mode |= S_IFBLK; 598 } 599 } 600 601 /* 602 * Get default device permission by consulting rules in 603 * privilege specification in minor node and /etc/minor_perm. 604 * 605 * This function is called from the devname filesystem to get default 606 * permissions for a device exported to a non-global zone. 607 */ 608 void 609 devfs_get_defattr(struct vnode *vp, struct vattr *vap, int *no_fs_perm) 610 { 611 mperm_t mp; 612 struct dv_node *dv; 613 614 /* If vp isn't a dv_node, return something sensible */ 615 if (!vn_matchops(vp, dv_vnodeops)) { 616 if (no_fs_perm) 617 *no_fs_perm = 0; 618 *vap = dv_vattr_file; 619 return; 620 } 621 622 /* 623 * For minors not created by ddi_create_priv_minor_node(), 624 * use devfs defaults. 625 */ 626 dv = VTODV(vp); 627 if (vp->v_type == VDIR) { 628 *vap = dv_vattr_dir; 629 } else if (dv->dv_flags & DV_NO_FSPERM) { 630 if (no_fs_perm) 631 *no_fs_perm = 1; 632 *vap = dv_vattr_priv; 633 } else { 634 /* 635 * look up perm bits from minor_perm 636 */ 637 *vap = dv_vattr_file; 638 if (dev_minorperm(dv->dv_devi, dv->dv_name, &mp) == 0) { 639 VATTR_MP_MERGE((*vap), mp); 640 dcmn_err5(("%s: minor perm mode 0%o\n", 641 dv->dv_name, vap->va_mode)); 642 } else if (dv->dv_flags & DV_DFLT_MODE) { 643 ASSERT((dv->dv_dflt_mode & ~S_IAMB) == 0); 644 vap->va_mode &= ~S_IAMB; 645 vap->va_mode |= dv->dv_dflt_mode; 646 dcmn_err5(("%s: priv mode 0%o\n", 647 dv->dv_name, vap->va_mode)); 648 } 649 } 650 } 651 652 /* 653 * dv_shadow_node 654 * 655 * Given a VDIR dv_node, find/create the associated VDIR 656 * node in the shadow attribute filesystem. 657 * 658 * Given a VCHR/VBLK dv_node, find the associated VREG 659 * node in the shadow attribute filesystem. These nodes 660 * are only created to persist non-default attributes. 661 * Lack of such a node implies the default permissions 662 * are sufficient. 663 * 664 * Managing the attribute file entries is slightly tricky (mostly 665 * because we can't intercept VN_HOLD and VN_RELE except on the last 666 * release). 667 * 668 * We assert that if the dv_attrvp pointer is non-NULL, it points 669 * to a singly-held (by us) vnode that represents the shadow entry 670 * in the underlying filesystem. To avoid store-ordering issues, 671 * we assert that the pointer can only be tested under the dv_contents 672 * READERS lock. 673 */ 674 675 void 676 dv_shadow_node( 677 struct vnode *dvp, /* devfs parent directory vnode */ 678 char *nm, /* name component */ 679 struct vnode *vp, /* devfs vnode */ 680 struct pathname *pnp, /* the path .. */ 681 struct vnode *rdir, /* the root .. */ 682 struct cred *cred, /* who's asking? */ 683 int flags) /* optionally create shadow node */ 684 { 685 struct dv_node *dv; /* dv_node of named directory */ 686 struct vnode *rdvp; /* shadow parent directory vnode */ 687 struct vnode *rvp; /* shadow vnode */ 688 struct vnode *rrvp; /* realvp of shadow vnode */ 689 struct vattr vattr; 690 int create_tried; 691 int error; 692 693 ASSERT(vp->v_type == VDIR || vp->v_type == VCHR || vp->v_type == VBLK); 694 dv = VTODV(vp); 695 dcmn_err3(("dv_shadow_node: name %s attr %p\n", 696 nm, (void *)dv->dv_attrvp)); 697 698 if ((flags & DV_SHADOW_WRITE_HELD) == 0) { 699 ASSERT(RW_READ_HELD(&dv->dv_contents)); 700 if (dv->dv_attrvp != NULLVP) 701 return; 702 if (!rw_tryupgrade(&dv->dv_contents)) { 703 rw_exit(&dv->dv_contents); 704 rw_enter(&dv->dv_contents, RW_WRITER); 705 if (dv->dv_attrvp != NULLVP) { 706 rw_downgrade(&dv->dv_contents); 707 return; 708 } 709 } 710 } else { 711 ASSERT(RW_WRITE_HELD(&dv->dv_contents)); 712 if (dv->dv_attrvp != NULLVP) 713 return; 714 } 715 716 ASSERT(RW_WRITE_HELD(&dv->dv_contents) && dv->dv_attrvp == NULL); 717 718 rdvp = VTODV(dvp)->dv_attrvp; 719 create_tried = 0; 720 lookup: 721 if (rdvp && (dv->dv_flags & DV_NO_FSPERM) == 0) { 722 error = VOP_LOOKUP(rdvp, nm, &rvp, pnp, LOOKUP_DIR, rdir, cred, 723 NULL, NULL, NULL); 724 725 /* factor out the snode since we only want the attribute node */ 726 if ((error == 0) && (VOP_REALVP(rvp, &rrvp, NULL) == 0)) { 727 VN_HOLD(rrvp); 728 VN_RELE(rvp); 729 rvp = rrvp; 730 } 731 } else 732 error = EROFS; /* no parent, no entry */ 733 734 /* 735 * All we want is the permissions (and maybe ACLs and 736 * extended attributes), and we want to perform lookups 737 * by name. Drivers occasionally change their minor 738 * number space. If something changes, there's no 739 * much we can do about it here. 740 */ 741 742 /* The shadow node checks out. We are done */ 743 if (error == 0) { 744 dv->dv_attrvp = rvp; /* with one hold */ 745 746 /* 747 * Determine if we have non-trivial ACLs on this node. 748 * It is not necessary to VOP_RWLOCK since fs_acl_nontrivial 749 * only does VOP_GETSECATTR. 750 */ 751 dv->dv_flags &= ~DV_ACL; 752 753 if (fs_acl_nontrivial(rvp, cred)) 754 dv->dv_flags |= DV_ACL; 755 756 /* 757 * If we have synced out the memory attributes, free 758 * them and switch back to using the persistent store. 759 */ 760 if (rvp && dv->dv_attr) { 761 kmem_free(dv->dv_attr, sizeof (struct vattr)); 762 dv->dv_attr = NULL; 763 } 764 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 765 rw_downgrade(&dv->dv_contents); 766 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 767 return; 768 } 769 770 /* 771 * Failed to find attribute in persistent backing store, 772 * get default permission bits. 773 */ 774 devfs_get_defattr(vp, &vattr, NULL); 775 776 dv_vattr_merge(dv, &vattr); 777 gethrestime(&vattr.va_atime); 778 vattr.va_mtime = vattr.va_atime; 779 vattr.va_ctime = vattr.va_atime; 780 781 /* 782 * Try to create shadow dir. This is necessary in case 783 * we need to create a shadow leaf node later, when user 784 * executes chmod. 785 */ 786 if ((error == ENOENT) && !create_tried) { 787 switch (vp->v_type) { 788 case VDIR: 789 error = VOP_MKDIR(rdvp, nm, &vattr, &rvp, kcred, 790 NULL, 0, NULL); 791 dsysdebug(error, ("vop_mkdir %s %s %d\n", 792 VTODV(dvp)->dv_name, nm, error)); 793 create_tried = 1; 794 break; 795 796 case VCHR: 797 case VBLK: 798 /* 799 * Shadow nodes are only created on demand 800 */ 801 if (flags & DV_SHADOW_CREATE) { 802 error = VOP_CREATE(rdvp, nm, &vattr, NONEXCL, 803 VREAD|VWRITE, &rvp, kcred, 0, NULL, NULL); 804 dsysdebug(error, ("vop_create %s %s %d\n", 805 VTODV(dvp)->dv_name, nm, error)); 806 create_tried = 1; 807 } 808 break; 809 810 default: 811 cmn_err(CE_PANIC, "devfs: %s: create", dvnm); 812 /*NOTREACHED*/ 813 } 814 815 if (create_tried && 816 (error == 0) || (error == EEXIST)) { 817 VN_RELE(rvp); 818 goto lookup; 819 } 820 } 821 822 /* Store attribute in memory */ 823 if (dv->dv_attr == NULL) { 824 dv->dv_attr = kmem_alloc(sizeof (struct vattr), KM_SLEEP); 825 *(dv->dv_attr) = vattr; 826 } 827 828 if ((flags & DV_SHADOW_WRITE_HELD) == 0) 829 rw_downgrade(&dv->dv_contents); 830 ASSERT(RW_LOCK_HELD(&dv->dv_contents)); 831 } 832 833 /* 834 * Given a devinfo node, and a name, returns the appropriate 835 * minor information for that named node, if it exists. 836 */ 837 static int 838 dv_find_leafnode(dev_info_t *devi, char *minor_nm, struct ddi_minor_data *r_mi) 839 { 840 struct ddi_minor_data *dmd; 841 842 ASSERT(i_ddi_devi_attached(devi)); 843 844 dcmn_err3(("dv_find_leafnode: %s\n", minor_nm)); 845 ASSERT(DEVI_BUSY_OWNED(devi)); 846 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 847 848 /* 849 * Skip alias nodes and nodes without a name. 850 */ 851 if ((dmd->type == DDM_ALIAS) || (dmd->ddm_name == NULL)) 852 continue; 853 854 dcmn_err4(("dv_find_leafnode: (%s,%s)\n", 855 minor_nm, dmd->ddm_name)); 856 if (strcmp(minor_nm, dmd->ddm_name) == 0) { 857 r_mi->ddm_dev = dmd->ddm_dev; 858 r_mi->ddm_spec_type = dmd->ddm_spec_type; 859 r_mi->type = dmd->type; 860 r_mi->ddm_flags = dmd->ddm_flags; 861 r_mi->ddm_node_priv = dmd->ddm_node_priv; 862 r_mi->ddm_priv_mode = dmd->ddm_priv_mode; 863 if (r_mi->ddm_node_priv) 864 dphold(r_mi->ddm_node_priv); 865 return (0); 866 } 867 } 868 869 dcmn_err3(("dv_find_leafnode: %s: ENOENT\n", minor_nm)); 870 return (ENOENT); 871 } 872 873 /* 874 * Special handling for clone node: 875 * Clone minor name is a driver name, the minor number will 876 * be the major number of the driver. There is no minor 877 * node under the clone driver, so we'll manufacture the 878 * dev_t. 879 */ 880 static struct dv_node * 881 dv_clone_mknod(struct dv_node *ddv, char *drvname) 882 { 883 major_t major; 884 struct dv_node *dvp; 885 char *devnm; 886 struct ddi_minor_data *dmd; 887 888 /* 889 * Make sure drvname is a STREAMS driver. We load the driver, 890 * but don't attach to any instances. This makes stat(2) 891 * relatively cheap. 892 */ 893 major = ddi_name_to_major(drvname); 894 if (major == DDI_MAJOR_T_NONE) 895 return (NULL); 896 897 if (ddi_hold_driver(major) == NULL) 898 return (NULL); 899 900 if (STREAMSTAB(major) == NULL) { 901 ddi_rele_driver(major); 902 return (NULL); 903 } 904 905 ddi_rele_driver(major); 906 devnm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 907 (void) snprintf(devnm, MAXNAMELEN, "clone@0:%s", drvname); 908 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 909 dmd->ddm_dev = makedevice(clone_major, (minor_t)major); 910 dmd->ddm_spec_type = S_IFCHR; 911 dvp = dv_mknod(ddv, clone_dip, devnm, dmd); 912 kmem_free(dmd, sizeof (*dmd)); 913 kmem_free(devnm, MAXNAMELEN); 914 return (dvp); 915 } 916 917 /* 918 * Given the parent directory node, and a name in it, returns the 919 * named dv_node to the caller (as a vnode). 920 * 921 * (We need pnp and rdir for doing shadow lookups; they can be NULL) 922 */ 923 int 924 dv_find(struct dv_node *ddv, char *nm, struct vnode **vpp, struct pathname *pnp, 925 struct vnode *rdir, struct cred *cred, uint_t ndi_flags) 926 { 927 extern int isminiroot; /* see modctl.c */ 928 929 int circ; 930 int rv = 0, was_busy = 0, nmlen, write_held = 0; 931 struct vnode *vp; 932 struct dv_node *dv, *dup; 933 dev_info_t *pdevi, *devi = NULL; 934 char *mnm; 935 struct ddi_minor_data *dmd; 936 937 dcmn_err3(("dv_find %s\n", nm)); 938 939 if (!rw_tryenter(&ddv->dv_contents, RW_READER)) { 940 if (tsd_get(devfs_clean_key)) 941 return (EBUSY); 942 rw_enter(&ddv->dv_contents, RW_READER); 943 } 944 start: 945 if (DV_STALE(ddv)) { 946 rw_exit(&ddv->dv_contents); 947 return (ESTALE); 948 } 949 950 /* 951 * Empty name or ., return node itself. 952 */ 953 nmlen = strlen(nm); 954 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 955 *vpp = DVTOV(ddv); 956 rw_exit(&ddv->dv_contents); 957 VN_HOLD(*vpp); 958 return (0); 959 } 960 961 /* 962 * .., return the parent directory 963 */ 964 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 965 *vpp = DVTOV(ddv->dv_dotdot); 966 rw_exit(&ddv->dv_contents); 967 VN_HOLD(*vpp); 968 return (0); 969 } 970 971 /* 972 * Fail anything without a valid device name component 973 */ 974 if (nm[0] == '@' || nm[0] == ':') { 975 dcmn_err3(("devfs: no driver '%s'\n", nm)); 976 rw_exit(&ddv->dv_contents); 977 return (ENOENT); 978 } 979 980 /* 981 * So, now we have to deal with the trickier stuff. 982 * 983 * (a) search the existing list of dv_nodes on this directory 984 */ 985 if ((dv = dv_findbyname(ddv, nm)) != NULL) { 986 founddv: 987 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 988 989 if (!rw_tryenter(&dv->dv_contents, RW_READER)) { 990 if (tsd_get(devfs_clean_key)) { 991 VN_RELE(DVTOV(dv)); 992 rw_exit(&ddv->dv_contents); 993 return (EBUSY); 994 } 995 rw_enter(&dv->dv_contents, RW_READER); 996 } 997 998 vp = DVTOV(dv); 999 if ((dv->dv_attrvp != NULLVP) || 1000 (vp->v_type != VDIR && dv->dv_attr != NULL)) { 1001 /* 1002 * Common case - we already have attributes 1003 */ 1004 rw_exit(&dv->dv_contents); 1005 rw_exit(&ddv->dv_contents); 1006 goto found; 1007 } 1008 1009 /* 1010 * No attribute vp, try and build one. 1011 * 1012 * dv_shadow_node() can briefly drop &dv->dv_contents lock 1013 * if it is unable to upgrade it to a write lock. If the 1014 * current thread has come in through the bottom-up device 1015 * configuration devfs_clean() path, we may deadlock against 1016 * a thread performing top-down device configuration if it 1017 * grabs the contents lock. To avoid this, when we are on the 1018 * devfs_clean() path we attempt to upgrade the dv_contents 1019 * lock before we call dv_shadow_node(). 1020 */ 1021 if (tsd_get(devfs_clean_key)) { 1022 if (!rw_tryupgrade(&dv->dv_contents)) { 1023 VN_RELE(DVTOV(dv)); 1024 rw_exit(&dv->dv_contents); 1025 rw_exit(&ddv->dv_contents); 1026 return (EBUSY); 1027 } 1028 1029 write_held = DV_SHADOW_WRITE_HELD; 1030 } 1031 1032 dv_shadow_node(DVTOV(ddv), nm, vp, pnp, rdir, cred, 1033 write_held); 1034 1035 rw_exit(&dv->dv_contents); 1036 rw_exit(&ddv->dv_contents); 1037 goto found; 1038 } 1039 1040 /* 1041 * (b) Search the child devinfo nodes of our parent directory, 1042 * looking for the named node. If we find it, build a new 1043 * node, then grab the writers lock, search the directory 1044 * if it's still not there, then insert it. 1045 * 1046 * We drop the devfs locks before accessing the device tree. 1047 * Take care to mark the node BUSY so that a forced devfs_clean 1048 * doesn't mark the directory node stale. 1049 * 1050 * Also, check if we are called as part of devfs_clean or 1051 * reset_perm. If so, simply return not found because there 1052 * is nothing to clean. 1053 */ 1054 if (tsd_get(devfs_clean_key)) { 1055 rw_exit(&ddv->dv_contents); 1056 return (ENOENT); 1057 } 1058 1059 /* 1060 * We could be either READ or WRITE locked at 1061 * this point. Upgrade if we are read locked. 1062 */ 1063 ASSERT(RW_LOCK_HELD(&ddv->dv_contents)); 1064 if (rw_read_locked(&ddv->dv_contents) && 1065 !rw_tryupgrade(&ddv->dv_contents)) { 1066 rw_exit(&ddv->dv_contents); 1067 rw_enter(&ddv->dv_contents, RW_WRITER); 1068 /* 1069 * Things may have changed when we dropped 1070 * the contents lock, so start from top again 1071 */ 1072 goto start; 1073 } 1074 ddv->dv_busy++; /* mark busy before dropping lock */ 1075 was_busy++; 1076 rw_exit(&ddv->dv_contents); 1077 1078 pdevi = ddv->dv_devi; 1079 ASSERT(pdevi != NULL); 1080 1081 mnm = strchr(nm, ':'); 1082 if (mnm) 1083 *mnm = (char)0; 1084 1085 /* 1086 * Configure one nexus child, will call nexus's bus_ops 1087 * If successful, devi is held upon returning. 1088 * Note: devfs lookup should not be configuring grandchildren. 1089 */ 1090 ASSERT((ndi_flags & NDI_CONFIG) == 0); 1091 1092 rv = ndi_devi_config_one(pdevi, nm, &devi, ndi_flags | NDI_NO_EVENT); 1093 if (mnm) 1094 *mnm = ':'; 1095 if (rv != NDI_SUCCESS) { 1096 rv = ENOENT; 1097 goto notfound; 1098 } 1099 1100 ASSERT(devi); 1101 1102 /* Check if this is a path alias */ 1103 if (ddi_aliases_present == B_TRUE && ddi_get_parent(devi) != pdevi) { 1104 char *curr = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1105 1106 (void) ddi_pathname(devi, curr); 1107 1108 vp = NULL; 1109 if (devfs_lookupname(curr, NULL, &vp) == 0 && vp) { 1110 dv = VTODV(vp); 1111 kmem_free(curr, MAXPATHLEN); 1112 goto found; 1113 } 1114 kmem_free(curr, MAXPATHLEN); 1115 } 1116 1117 /* 1118 * If we configured a hidden node, consider it notfound. 1119 */ 1120 if (ndi_dev_is_hidden_node(devi)) { 1121 ndi_rele_devi(devi); 1122 rv = ENOENT; 1123 goto notfound; 1124 } 1125 1126 /* 1127 * Don't make vhci clients visible under phci, unless we 1128 * are in miniroot. 1129 */ 1130 if (isminiroot == 0 && ddi_get_parent(devi) != pdevi) { 1131 ndi_rele_devi(devi); 1132 rv = ENOENT; 1133 goto notfound; 1134 } 1135 1136 ASSERT(devi && i_ddi_devi_attached(devi)); 1137 1138 /* 1139 * Invalidate cache to notice newly created minor nodes. 1140 */ 1141 rw_enter(&ddv->dv_contents, RW_WRITER); 1142 ddv->dv_flags |= DV_BUILD; 1143 rw_exit(&ddv->dv_contents); 1144 1145 /* 1146 * mkdir for nexus drivers and leaf nodes as well. If we are racing 1147 * and create a duplicate, the duplicate will be destroyed below. 1148 */ 1149 if (mnm == NULL) { 1150 dv = dv_mkdir(ddv, devi, nm); 1151 } else { 1152 /* 1153 * Allocate dmd first to avoid KM_SLEEP with active 1154 * ndi_devi_enter. 1155 */ 1156 dmd = kmem_zalloc(sizeof (*dmd), KM_SLEEP); 1157 ndi_devi_enter(devi, &circ); 1158 if (devi == clone_dip) { 1159 /* 1160 * For clone minors, load the driver indicated by 1161 * minor name. 1162 */ 1163 dv = dv_clone_mknod(ddv, mnm + 1); 1164 } else { 1165 /* 1166 * Find minor node and make a dv_node 1167 */ 1168 if (dv_find_leafnode(devi, mnm + 1, dmd) == 0) { 1169 dv = dv_mknod(ddv, devi, nm, dmd); 1170 if (dmd->ddm_node_priv) 1171 dpfree(dmd->ddm_node_priv); 1172 } 1173 } 1174 ndi_devi_exit(devi, circ); 1175 kmem_free(dmd, sizeof (*dmd)); 1176 } 1177 /* 1178 * Release hold from ndi_devi_config_one() 1179 */ 1180 ndi_rele_devi(devi); 1181 1182 if (dv == NULL) { 1183 rv = ENOENT; 1184 goto notfound; 1185 } 1186 1187 /* 1188 * We have released the dv_contents lock, need to check 1189 * if another thread already created a duplicate node 1190 */ 1191 rw_enter(&ddv->dv_contents, RW_WRITER); 1192 if ((dup = dv_findbyname(ddv, nm)) == NULL) { 1193 dv_insert(ddv, dv); 1194 } else { 1195 /* 1196 * Duplicate found, use the existing node 1197 */ 1198 VN_RELE(DVTOV(dv)); 1199 dv_destroy(dv, 0); 1200 dv = dup; 1201 } 1202 goto founddv; 1203 /*NOTREACHED*/ 1204 1205 found: 1206 /* 1207 * Fail lookup of device that has now become hidden (typically via 1208 * hot removal of open device). 1209 */ 1210 if (dv->dv_devi && ndi_dev_is_hidden_node(dv->dv_devi)) { 1211 dcmn_err2(("dv_find: nm %s failed: hidden/removed\n", nm)); 1212 VN_RELE(vp); 1213 rv = ENOENT; 1214 goto notfound; 1215 } 1216 1217 /* 1218 * Skip non-kernel lookups of internal nodes. 1219 * This use of kcred to distinguish between user and 1220 * internal kernel lookups is unfortunate. The information 1221 * provided by the seg argument to lookupnameat should 1222 * evolve into a lookup flag for filesystems that need 1223 * this distinction. 1224 */ 1225 if ((dv->dv_flags & DV_INTERNAL) && (cred != kcred)) { 1226 dcmn_err2(("dv_find: nm %s failed: internal\n", nm)); 1227 VN_RELE(vp); 1228 rv = ENOENT; 1229 goto notfound; 1230 } 1231 1232 dcmn_err2(("dv_find: returning vp for nm %s\n", nm)); 1233 if (vp->v_type == VCHR || vp->v_type == VBLK) { 1234 /* 1235 * If vnode is a device, return special vnode instead 1236 * (though it knows all about -us- via sp->s_realvp, 1237 * sp->s_devvp, and sp->s_dip) 1238 */ 1239 *vpp = specvp_devfs(vp, vp->v_rdev, vp->v_type, cred, 1240 dv->dv_devi); 1241 VN_RELE(vp); 1242 if (*vpp == NULLVP) 1243 rv = ENOSYS; 1244 } else 1245 *vpp = vp; 1246 1247 notfound: 1248 if (was_busy) { 1249 /* 1250 * Non-zero was_busy tells us that we are not in the 1251 * devfs_clean() path which in turn means that we can afford 1252 * to take the contents lock unconditionally. 1253 */ 1254 rw_enter(&ddv->dv_contents, RW_WRITER); 1255 ddv->dv_busy--; 1256 rw_exit(&ddv->dv_contents); 1257 } 1258 return (rv); 1259 } 1260 1261 /* 1262 * The given directory node is out-of-date; that is, it has been 1263 * marked as needing to be rebuilt, possibly because some new devinfo 1264 * node has come into existence, or possibly because this is the first 1265 * time we've been here. 1266 */ 1267 void 1268 dv_filldir(struct dv_node *ddv) 1269 { 1270 struct dv_node *dv; 1271 dev_info_t *devi, *pdevi; 1272 struct ddi_minor_data *dmd; 1273 char devnm[MAXNAMELEN]; 1274 int circ, ccirc; 1275 1276 ASSERT(DVTOV(ddv)->v_type == VDIR); 1277 ASSERT(RW_WRITE_HELD(&ddv->dv_contents)); 1278 ASSERT(ddv->dv_flags & DV_BUILD); 1279 1280 dcmn_err3(("dv_filldir: %s\n", ddv->dv_name)); 1281 if (DV_STALE(ddv)) 1282 return; 1283 pdevi = ddv->dv_devi; 1284 1285 if (ndi_devi_config(pdevi, NDI_NO_EVENT) != NDI_SUCCESS) { 1286 dcmn_err3(("dv_filldir: config error %s\n", ddv->dv_name)); 1287 } 1288 1289 ndi_devi_enter(pdevi, &circ); 1290 for (devi = ddi_get_child(pdevi); devi; 1291 devi = ddi_get_next_sibling(devi)) { 1292 /* 1293 * While we know enough to create a directory at DS_INITIALIZED, 1294 * the directory will be empty until DS_ATTACHED. The existence 1295 * of an empty directory dv_node will cause a devi_ref, which 1296 * has caused problems for existing code paths doing offline/DR 1297 * type operations - making devfs_clean coordination even more 1298 * sensitive and error prone. Given this, the 'continue' below 1299 * is checking for DS_ATTACHED instead of DS_INITIALIZED. 1300 */ 1301 if (i_ddi_node_state(devi) < DS_ATTACHED) 1302 continue; 1303 1304 /* skip hidden nodes */ 1305 if (ndi_dev_is_hidden_node(devi)) 1306 continue; 1307 1308 dcmn_err3(("dv_filldir: node %s\n", ddi_node_name(devi))); 1309 1310 ndi_devi_enter(devi, &ccirc); 1311 for (dmd = DEVI(devi)->devi_minor; dmd; dmd = dmd->next) { 1312 char *addr; 1313 1314 /* 1315 * Skip alias nodes, internal nodes, and nodes 1316 * without a name. We allow DDM_DEFAULT nodes 1317 * to appear in readdir. 1318 */ 1319 if ((dmd->type == DDM_ALIAS) || 1320 (dmd->type == DDM_INTERNAL_PATH) || 1321 (dmd->ddm_name == NULL)) 1322 continue; 1323 1324 addr = ddi_get_name_addr(devi); 1325 if (addr && *addr) 1326 (void) sprintf(devnm, "%s@%s:%s", 1327 ddi_node_name(devi), addr, dmd->ddm_name); 1328 else 1329 (void) sprintf(devnm, "%s:%s", 1330 ddi_node_name(devi), dmd->ddm_name); 1331 1332 if ((dv = dv_findbyname(ddv, devnm)) != NULL) { 1333 /* dv_node already exists */ 1334 VN_RELE(DVTOV(dv)); 1335 continue; 1336 } 1337 1338 dv = dv_mknod(ddv, devi, devnm, dmd); 1339 dv_insert(ddv, dv); 1340 VN_RELE(DVTOV(dv)); 1341 } 1342 ndi_devi_exit(devi, ccirc); 1343 1344 (void) ddi_deviname(devi, devnm); 1345 if ((dv = dv_findbyname(ddv, devnm + 1)) == NULL) { 1346 /* directory doesn't exist */ 1347 dv = dv_mkdir(ddv, devi, devnm + 1); 1348 dv_insert(ddv, dv); 1349 } 1350 VN_RELE(DVTOV(dv)); 1351 } 1352 ndi_devi_exit(pdevi, circ); 1353 1354 ddv->dv_flags &= ~DV_BUILD; 1355 } 1356 1357 /* 1358 * Given a directory node, clean out all the nodes beneath. 1359 * 1360 * VDIR: Reinvoke to clean them, then delete the directory. 1361 * VCHR, VBLK: Just blow them away. 1362 * 1363 * Mark the directories touched as in need of a rebuild, in case 1364 * we fall over part way through. When DV_CLEAN_FORCE is specified, 1365 * we mark referenced empty directories as stale to facilitate DR. 1366 */ 1367 int 1368 dv_cleandir(struct dv_node *ddv, char *devnm, uint_t flags) 1369 { 1370 struct dv_node *dv; 1371 struct dv_node *next; 1372 struct vnode *vp; 1373 int busy = 0; 1374 1375 /* 1376 * We should always be holding the tsd_clean_key here: dv_cleandir() 1377 * will be called as a result of a devfs_clean request and the 1378 * tsd_clean_key will be set in either in devfs_clean() itself or in 1379 * devfs_clean_vhci(). 1380 * 1381 * Since we are on the devfs_clean path, we return EBUSY if we cannot 1382 * get the contents lock: if we blocked here we might deadlock against 1383 * a thread performing top-down device configuration. 1384 */ 1385 ASSERT(tsd_get(devfs_clean_key)); 1386 1387 dcmn_err3(("dv_cleandir: %s\n", ddv->dv_name)); 1388 1389 if (!(flags & DV_CLEANDIR_LCK) && 1390 !rw_tryenter(&ddv->dv_contents, RW_WRITER)) 1391 return (EBUSY); 1392 1393 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = next) { 1394 next = DV_NEXT_ENTRY(ddv, dv); 1395 1396 /* 1397 * If devnm is specified, the non-minor portion of the 1398 * name must match devnm. 1399 */ 1400 if (devnm && 1401 (strncmp(devnm, dv->dv_name, strlen(devnm)) || 1402 (dv->dv_name[strlen(devnm)] != ':' && 1403 dv->dv_name[strlen(devnm)] != '\0'))) 1404 continue; 1405 1406 /* check type of what we are cleaning */ 1407 vp = DVTOV(dv); 1408 if (vp->v_type == VDIR) { 1409 /* recurse on directories */ 1410 rw_enter(&dv->dv_contents, RW_WRITER); 1411 if (dv_cleandir(dv, NULL, 1412 flags | DV_CLEANDIR_LCK) == EBUSY) { 1413 rw_exit(&dv->dv_contents); 1414 goto set_busy; 1415 } 1416 1417 /* A clean directory is an empty directory... */ 1418 ASSERT(dv->dv_nlink == 2); 1419 mutex_enter(&vp->v_lock); 1420 if (vp->v_count > 0) { 1421 /* 1422 * ... but an empty directory can still have 1423 * references to it. If we have dv_busy or 1424 * DV_CLEAN_FORCE is *not* specified then a 1425 * referenced directory is considered busy. 1426 */ 1427 if (dv->dv_busy || !(flags & DV_CLEAN_FORCE)) { 1428 mutex_exit(&vp->v_lock); 1429 rw_exit(&dv->dv_contents); 1430 goto set_busy; 1431 } 1432 1433 /* 1434 * Mark referenced directory stale so that DR 1435 * will succeed even if a shell has 1436 * /devices/xxx as current directory (causing 1437 * VN_HOLD reference to an empty directory). 1438 */ 1439 ASSERT(!DV_STALE(dv)); 1440 ndi_rele_devi(dv->dv_devi); 1441 dv->dv_devi = NULL; /* mark DV_STALE */ 1442 } 1443 } else { 1444 ASSERT((vp->v_type == VCHR) || (vp->v_type == VBLK)); 1445 ASSERT(dv->dv_nlink == 1); /* no hard links */ 1446 mutex_enter(&vp->v_lock); 1447 if (vp->v_count > 0) { 1448 mutex_exit(&vp->v_lock); 1449 goto set_busy; 1450 } 1451 } 1452 1453 /* unlink from directory */ 1454 dv_unlink(ddv, dv); 1455 1456 /* drop locks */ 1457 mutex_exit(&vp->v_lock); 1458 if (vp->v_type == VDIR) 1459 rw_exit(&dv->dv_contents); 1460 1461 /* destroy vnode if ref count is zero */ 1462 if (vp->v_count == 0) 1463 dv_destroy(dv, flags); 1464 1465 continue; 1466 1467 /* 1468 * If devnm is not NULL we return immediately on busy, 1469 * otherwise we continue destroying unused dv_node's. 1470 */ 1471 set_busy: busy++; 1472 if (devnm) 1473 break; 1474 } 1475 1476 /* 1477 * This code may be invoked to inform devfs that a new node has 1478 * been created in the kernel device tree. So we always set 1479 * the DV_BUILD flag to allow the next dv_filldir() to pick 1480 * the new devinfo nodes. 1481 */ 1482 ddv->dv_flags |= DV_BUILD; 1483 1484 if (!(flags & DV_CLEANDIR_LCK)) 1485 rw_exit(&ddv->dv_contents); 1486 1487 return (busy ? EBUSY : 0); 1488 } 1489 1490 /* 1491 * Walk through the devfs hierarchy, correcting the permissions of 1492 * devices with default permissions that do not match those specified 1493 * by minor perm. This can only be done for all drivers for now. 1494 */ 1495 static int 1496 dv_reset_perm_dir(struct dv_node *ddv, uint_t flags) 1497 { 1498 struct dv_node *dv; 1499 struct vnode *vp; 1500 int retval = 0; 1501 struct vattr *attrp; 1502 mperm_t mp; 1503 char *nm; 1504 uid_t old_uid; 1505 gid_t old_gid; 1506 mode_t old_mode; 1507 1508 rw_enter(&ddv->dv_contents, RW_WRITER); 1509 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) { 1510 int error = 0; 1511 nm = dv->dv_name; 1512 1513 rw_enter(&dv->dv_contents, RW_READER); 1514 vp = DVTOV(dv); 1515 if (vp->v_type == VDIR) { 1516 rw_exit(&dv->dv_contents); 1517 if (dv_reset_perm_dir(dv, flags) != 0) { 1518 error = EBUSY; 1519 } 1520 } else { 1521 ASSERT(vp->v_type == VCHR || vp->v_type == VBLK); 1522 1523 /* 1524 * Check for permissions from minor_perm 1525 * If there are none, we're done 1526 */ 1527 rw_exit(&dv->dv_contents); 1528 if (dev_minorperm(dv->dv_devi, nm, &mp) != 0) 1529 continue; 1530 1531 rw_enter(&dv->dv_contents, RW_READER); 1532 1533 /* 1534 * Allow a node's permissions to be altered 1535 * permanently from the defaults by chmod, 1536 * using the shadow node as backing store. 1537 * Otherwise, update node to minor_perm permissions. 1538 */ 1539 if (dv->dv_attrvp == NULLVP) { 1540 /* 1541 * No attribute vp, try to find one. 1542 */ 1543 dv_shadow_node(DVTOV(ddv), nm, vp, 1544 NULL, NULLVP, kcred, 0); 1545 } 1546 if (dv->dv_attrvp != NULLVP || dv->dv_attr == NULL) { 1547 rw_exit(&dv->dv_contents); 1548 continue; 1549 } 1550 1551 attrp = dv->dv_attr; 1552 1553 if (VATTRP_MP_CMP(attrp, mp) == 0) { 1554 dcmn_err5(("%s: no perm change: " 1555 "%d %d 0%o\n", nm, attrp->va_uid, 1556 attrp->va_gid, attrp->va_mode)); 1557 rw_exit(&dv->dv_contents); 1558 continue; 1559 } 1560 1561 old_uid = attrp->va_uid; 1562 old_gid = attrp->va_gid; 1563 old_mode = attrp->va_mode; 1564 1565 VATTRP_MP_MERGE(attrp, mp); 1566 mutex_enter(&vp->v_lock); 1567 if (vp->v_count > 0) { 1568 error = EBUSY; 1569 } 1570 mutex_exit(&vp->v_lock); 1571 1572 dcmn_err5(("%s: perm %d/%d/0%o -> %d/%d/0%o (%d)\n", 1573 nm, old_uid, old_gid, old_mode, attrp->va_uid, 1574 attrp->va_gid, attrp->va_mode, error)); 1575 1576 rw_exit(&dv->dv_contents); 1577 } 1578 1579 if (error != 0) { 1580 retval = error; 1581 } 1582 } 1583 1584 ddv->dv_flags |= DV_BUILD; 1585 1586 rw_exit(&ddv->dv_contents); 1587 1588 return (retval); 1589 } 1590 1591 int 1592 devfs_reset_perm(uint_t flags) 1593 { 1594 struct dv_node *dvp; 1595 int rval; 1596 1597 if ((dvp = devfs_dip_to_dvnode(ddi_root_node())) == NULL) 1598 return (0); 1599 1600 VN_HOLD(DVTOV(dvp)); 1601 rval = dv_reset_perm_dir(dvp, flags); 1602 VN_RELE(DVTOV(dvp)); 1603 return (rval); 1604 } 1605 1606 /* 1607 * Clean up dangling devfs shadow nodes for removed 1608 * drivers so that, in the event the driver is re-added 1609 * to the system, newly created nodes won't incorrectly 1610 * pick up these stale shadow node permissions. 1611 * 1612 * This is accomplished by walking down the pathname 1613 * to the directory, starting at the root's attribute 1614 * node, then removing all minors matching the specified 1615 * node name. Care must be taken to remove all entries 1616 * in a directory before the directory itself, so that 1617 * the clean-up associated with rem_drv'ing a nexus driver 1618 * does not inadvertently result in an inconsistent 1619 * filesystem underlying devfs. 1620 */ 1621 1622 static int 1623 devfs_remdrv_rmdir(vnode_t *dirvp, const char *dir, vnode_t *rvp) 1624 { 1625 int error; 1626 vnode_t *vp; 1627 int eof; 1628 struct iovec iov; 1629 struct uio uio; 1630 struct dirent64 *dp; 1631 dirent64_t *dbuf; 1632 size_t dlen; 1633 size_t dbuflen; 1634 int ndirents = 64; 1635 char *nm; 1636 1637 VN_HOLD(dirvp); 1638 1639 dlen = ndirents * (sizeof (*dbuf)); 1640 dbuf = kmem_alloc(dlen, KM_SLEEP); 1641 1642 uio.uio_iov = &iov; 1643 uio.uio_iovcnt = 1; 1644 uio.uio_segflg = UIO_SYSSPACE; 1645 uio.uio_fmode = 0; 1646 uio.uio_extflg = UIO_COPY_CACHED; 1647 uio.uio_loffset = 0; 1648 uio.uio_llimit = MAXOFFSET_T; 1649 1650 eof = 0; 1651 error = 0; 1652 while (!error && !eof) { 1653 uio.uio_resid = dlen; 1654 iov.iov_base = (char *)dbuf; 1655 iov.iov_len = dlen; 1656 1657 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1658 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1659 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1660 1661 dbuflen = dlen - uio.uio_resid; 1662 1663 if (error || dbuflen == 0) 1664 break; 1665 1666 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1667 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1668 1669 nm = dp->d_name; 1670 1671 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1672 continue; 1673 1674 error = VOP_LOOKUP(dirvp, nm, 1675 &vp, NULL, 0, NULL, kcred, NULL, NULL, NULL); 1676 1677 dsysdebug(error, 1678 ("rem_drv %s/%s lookup (%d)\n", 1679 dir, nm, error)); 1680 1681 if (error) 1682 continue; 1683 1684 ASSERT(vp->v_type == VDIR || 1685 vp->v_type == VCHR || vp->v_type == VBLK); 1686 1687 if (vp->v_type == VDIR) { 1688 error = devfs_remdrv_rmdir(vp, nm, rvp); 1689 if (error == 0) { 1690 error = VOP_RMDIR(dirvp, 1691 (char *)nm, rvp, kcred, NULL, 0); 1692 dsysdebug(error, 1693 ("rem_drv %s/%s rmdir (%d)\n", 1694 dir, nm, error)); 1695 } 1696 } else { 1697 error = VOP_REMOVE(dirvp, (char *)nm, kcred, 1698 NULL, 0); 1699 dsysdebug(error, 1700 ("rem_drv %s/%s remove (%d)\n", 1701 dir, nm, error)); 1702 } 1703 1704 VN_RELE(vp); 1705 if (error) { 1706 goto exit; 1707 } 1708 } 1709 } 1710 1711 exit: 1712 VN_RELE(dirvp); 1713 kmem_free(dbuf, dlen); 1714 1715 return (error); 1716 } 1717 1718 int 1719 devfs_remdrv_cleanup(const char *dir, const char *nodename) 1720 { 1721 int error; 1722 vnode_t *vp; 1723 vnode_t *dirvp; 1724 int eof; 1725 struct iovec iov; 1726 struct uio uio; 1727 struct dirent64 *dp; 1728 dirent64_t *dbuf; 1729 size_t dlen; 1730 size_t dbuflen; 1731 int ndirents = 64; 1732 int nodenamelen = strlen(nodename); 1733 char *nm; 1734 struct pathname pn; 1735 vnode_t *rvp; /* root node of the underlying attribute fs */ 1736 1737 dcmn_err5(("devfs_remdrv_cleanup: %s %s\n", dir, nodename)); 1738 1739 if (error = pn_get((char *)dir, UIO_SYSSPACE, &pn)) 1740 return (0); 1741 1742 rvp = dvroot->dv_attrvp; 1743 ASSERT(rvp != NULL); 1744 VN_HOLD(rvp); 1745 1746 pn_skipslash(&pn); 1747 dirvp = rvp; 1748 VN_HOLD(dirvp); 1749 1750 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 1751 1752 while (pn_pathleft(&pn)) { 1753 ASSERT(dirvp->v_type == VDIR); 1754 (void) pn_getcomponent(&pn, nm); 1755 ASSERT((strcmp(nm, ".") != 0) && (strcmp(nm, "..") != 0)); 1756 error = VOP_LOOKUP(dirvp, nm, &vp, NULL, 0, rvp, kcred, 1757 NULL, NULL, NULL); 1758 if (error) { 1759 dcmn_err5(("remdrv_cleanup %s lookup error %d\n", 1760 nm, error)); 1761 VN_RELE(dirvp); 1762 if (dirvp != rvp) 1763 VN_RELE(rvp); 1764 pn_free(&pn); 1765 kmem_free(nm, MAXNAMELEN); 1766 return (0); 1767 } 1768 VN_RELE(dirvp); 1769 dirvp = vp; 1770 pn_skipslash(&pn); 1771 } 1772 1773 ASSERT(dirvp->v_type == VDIR); 1774 if (dirvp != rvp) 1775 VN_RELE(rvp); 1776 pn_free(&pn); 1777 kmem_free(nm, MAXNAMELEN); 1778 1779 dlen = ndirents * (sizeof (*dbuf)); 1780 dbuf = kmem_alloc(dlen, KM_SLEEP); 1781 1782 uio.uio_iov = &iov; 1783 uio.uio_iovcnt = 1; 1784 uio.uio_segflg = UIO_SYSSPACE; 1785 uio.uio_fmode = 0; 1786 uio.uio_extflg = UIO_COPY_CACHED; 1787 uio.uio_loffset = 0; 1788 uio.uio_llimit = MAXOFFSET_T; 1789 1790 eof = 0; 1791 error = 0; 1792 while (!error && !eof) { 1793 uio.uio_resid = dlen; 1794 iov.iov_base = (char *)dbuf; 1795 iov.iov_len = dlen; 1796 1797 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1798 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1799 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1800 1801 dbuflen = dlen - uio.uio_resid; 1802 1803 if (error || dbuflen == 0) 1804 break; 1805 1806 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 1807 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1808 1809 nm = dp->d_name; 1810 1811 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 1812 continue; 1813 1814 if (strncmp(nm, nodename, nodenamelen) != 0) 1815 continue; 1816 1817 error = VOP_LOOKUP(dirvp, nm, &vp, 1818 NULL, 0, NULL, kcred, NULL, NULL, NULL); 1819 1820 dsysdebug(error, 1821 ("rem_drv %s/%s lookup (%d)\n", 1822 dir, nm, error)); 1823 1824 if (error) 1825 continue; 1826 1827 ASSERT(vp->v_type == VDIR || 1828 vp->v_type == VCHR || vp->v_type == VBLK); 1829 1830 if (vp->v_type == VDIR) { 1831 error = devfs_remdrv_rmdir(vp, nm, rvp); 1832 if (error == 0) { 1833 error = VOP_RMDIR(dirvp, (char *)nm, 1834 rvp, kcred, NULL, 0); 1835 dsysdebug(error, 1836 ("rem_drv %s/%s rmdir (%d)\n", 1837 dir, nm, error)); 1838 } 1839 } else { 1840 error = VOP_REMOVE(dirvp, (char *)nm, kcred, 1841 NULL, 0); 1842 dsysdebug(error, 1843 ("rem_drv %s/%s remove (%d)\n", 1844 dir, nm, error)); 1845 } 1846 1847 VN_RELE(vp); 1848 if (error) 1849 goto exit; 1850 } 1851 } 1852 1853 exit: 1854 VN_RELE(dirvp); 1855 1856 kmem_free(dbuf, dlen); 1857 1858 return (0); 1859 } 1860 1861 struct dv_list { 1862 struct dv_node *dv; 1863 struct dv_list *next; 1864 }; 1865 1866 void 1867 dv_walk( 1868 struct dv_node *ddv, 1869 char *devnm, 1870 void (*callback)(struct dv_node *, void *), 1871 void *arg) 1872 { 1873 struct vnode *dvp; 1874 struct dv_node *dv; 1875 struct dv_list *head, *tail, *next; 1876 int len; 1877 1878 dcmn_err3(("dv_walk: ddv = %s, devnm = %s\n", 1879 ddv->dv_name, devnm ? devnm : "<null>")); 1880 1881 dvp = DVTOV(ddv); 1882 1883 ASSERT(dvp->v_type == VDIR); 1884 1885 head = tail = next = NULL; 1886 1887 rw_enter(&ddv->dv_contents, RW_READER); 1888 mutex_enter(&dvp->v_lock); 1889 for (dv = DV_FIRST_ENTRY(ddv); dv; dv = DV_NEXT_ENTRY(ddv, dv)) { 1890 /* 1891 * If devnm is not NULL and is not the empty string, 1892 * select only dv_nodes with matching non-minor name 1893 */ 1894 if (devnm && (len = strlen(devnm)) && 1895 (strncmp(devnm, dv->dv_name, len) || 1896 (dv->dv_name[len] != ':' && dv->dv_name[len] != '\0'))) 1897 continue; 1898 1899 callback(dv, arg); 1900 1901 if (DVTOV(dv)->v_type != VDIR) 1902 continue; 1903 1904 next = kmem_zalloc(sizeof (*next), KM_SLEEP); 1905 next->dv = dv; 1906 1907 if (tail) 1908 tail->next = next; 1909 else 1910 head = next; 1911 1912 tail = next; 1913 } 1914 1915 while (head) { 1916 dv_walk(head->dv, NULL, callback, arg); 1917 next = head->next; 1918 kmem_free(head, sizeof (*head)); 1919 head = next; 1920 } 1921 rw_exit(&ddv->dv_contents); 1922 mutex_exit(&dvp->v_lock); 1923 }