1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * This is the device filesystem. 28 * 29 * It is a combination of a namer to drive autoconfiguration, 30 * plus the access methods for the device drivers of the system. 31 * 32 * The prototype is fairly dependent on specfs for the latter part 33 * of its implementation, though a final version would integrate the two. 34 */ 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/sysmacros.h> 38 #include <sys/systm.h> 39 #include <sys/kmem.h> 40 #include <sys/time.h> 41 #include <sys/pathname.h> 42 #include <sys/vfs.h> 43 #include <sys/vfs_opreg.h> 44 #include <sys/vnode.h> 45 #include <sys/stat.h> 46 #include <sys/uio.h> 47 #include <sys/stat.h> 48 #include <sys/errno.h> 49 #include <sys/cmn_err.h> 50 #include <sys/cred.h> 51 #include <sys/statvfs.h> 52 #include <sys/mount.h> 53 #include <sys/debug.h> 54 #include <sys/modctl.h> 55 #include <fs/fs_subr.h> 56 #include <sys/fs/dv_node.h> 57 #include <sys/fs/snode.h> 58 #include <sys/sunndi.h> 59 #include <sys/policy.h> 60 #include <sys/sunmdi.h> 61 62 /* 63 * devfs vfs operations. 64 */ 65 static int devfs_mount(struct vfs *, struct vnode *, struct mounta *, 66 struct cred *); 67 static int devfs_unmount(struct vfs *, int, struct cred *); 68 static int devfs_root(struct vfs *, struct vnode **); 69 static int devfs_statvfs(struct vfs *, struct statvfs64 *); 70 static int devfs_mountroot(struct vfs *, enum whymountroot); 71 72 static int devfsinit(int, char *); 73 74 static vfsdef_t devfs_vfssw = { 75 VFSDEF_VERSION, 76 "devfs", /* type name string */ 77 devfsinit, /* init routine */ 78 0, /* flags */ 79 NULL /* mount options table prototype */ 80 }; 81 82 static kmutex_t devfs_lock; /* protects global data */ 83 static int devfstype; /* fstype */ 84 static dev_t devfsdev; /* the fictious 'device' we live on */ 85 static struct devfs_data *devfs_mntinfo; /* linked list of instances */ 86 87 /* 88 * Module linkage information 89 */ 90 static struct modlfs modlfs = { 91 &mod_fsops, "devices filesystem", &devfs_vfssw 92 }; 93 94 static struct modlinkage modlinkage = { 95 MODREV_1, (void *)&modlfs, NULL 96 }; 97 98 int 99 _init(void) 100 { 101 int e; 102 103 mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL); 104 dv_node_cache_init(); 105 if ((e = mod_install(&modlinkage)) != 0) { 106 dv_node_cache_fini(); 107 mutex_destroy(&devfs_lock); 108 return (e); 109 } 110 dcmn_err(("devfs loaded\n")); 111 return (0); 112 } 113 114 int 115 _fini(void) 116 { 117 return (EBUSY); 118 } 119 120 int 121 _info(struct modinfo *modinfop) 122 { 123 return (mod_info(&modlinkage, modinfop)); 124 } 125 126 /*ARGSUSED1*/ 127 static int 128 devfsinit(int fstype, char *name) 129 { 130 static const fs_operation_def_t devfs_vfsops_template[] = { 131 VFSNAME_MOUNT, { .vfs_mount = devfs_mount }, 132 VFSNAME_UNMOUNT, { .vfs_unmount = devfs_unmount }, 133 VFSNAME_ROOT, { .vfs_root = devfs_root }, 134 VFSNAME_STATVFS, { .vfs_statvfs = devfs_statvfs }, 135 VFSNAME_SYNC, { .vfs_sync = fs_sync }, 136 VFSNAME_MOUNTROOT, { .vfs_mountroot = devfs_mountroot }, 137 NULL, NULL 138 }; 139 int error; 140 int dev; 141 extern major_t getudev(void); /* gack - what a function */ 142 143 devfstype = fstype; 144 /* 145 * Associate VFS ops vector with this fstype 146 */ 147 error = vfs_setfsops(fstype, devfs_vfsops_template, NULL); 148 if (error != 0) { 149 cmn_err(CE_WARN, "devfsinit: bad vfs ops template"); 150 return (error); 151 } 152 153 error = vn_make_ops("dev fs", dv_vnodeops_template, &dv_vnodeops); 154 if (error != 0) { 155 (void) vfs_freevfsops_by_type(fstype); 156 cmn_err(CE_WARN, "devfsinit: bad vnode ops template"); 157 return (error); 158 } 159 160 /* 161 * Invent a dev_t (sigh). 162 */ 163 if ((dev = getudev()) == DDI_MAJOR_T_NONE) { 164 cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name); 165 dev = 0; 166 } 167 devfsdev = makedevice(dev, 0); 168 169 return (0); 170 } 171 172 /* 173 * The name of the mount point and the name of the attribute 174 * filesystem are passed down from userland for now. 175 */ 176 static int 177 devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap, 178 struct cred *cr) 179 { 180 struct devfs_data *devfs_data; 181 struct vnode *avp; 182 struct dv_node *dv; 183 struct vattr va; 184 185 dcmn_err(("devfs_mount\n")); 186 187 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 188 return (EPERM); 189 190 /* 191 * check that the mount point is sane 192 */ 193 if (mvp->v_type != VDIR) 194 return (ENOTDIR); 195 196 ASSERT(uap->flags & MS_SYSSPACE); 197 /* 198 * Devfs can only be mounted from kernel during boot. 199 * avp is the existing /devices, the same as the mount point. 200 */ 201 avp = mvp; 202 203 /* 204 * Create and initialize the vfs-private data. 205 * This includes a hand-crafted root vnode (we build 206 * this here mostly so that traverse() doesn't sleep 207 * in VFS_ROOT()). 208 */ 209 mutex_enter(&devfs_lock); 210 ASSERT(devfs_mntinfo == NULL); 211 dv = dv_mkroot(vfsp, devfsdev); 212 dv->dv_attrvp = avp; /* attribute root vp */ 213 214 ASSERT(dv == dv->dv_dotdot); 215 216 devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP); 217 devfs_data->devfs_vfsp = vfsp; 218 devfs_data->devfs_root = dv; 219 220 vfsp->vfs_data = (caddr_t)devfs_data; 221 vfsp->vfs_fstype = devfstype; 222 vfsp->vfs_dev = devfsdev; 223 vfsp->vfs_bsize = DEV_BSIZE; 224 vfsp->vfs_mtime = ddi_get_time(); 225 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype); 226 227 /* We're there. */ 228 devfs_mntinfo = devfs_data; 229 mutex_exit(&devfs_lock); 230 231 va.va_mask = AT_ATIME|AT_MTIME; 232 gethrestime(&va.va_atime); 233 gethrestime(&va.va_mtime); 234 (void) VOP_SETATTR(DVTOV(dv), &va, 0, cr, NULL); 235 return (0); 236 } 237 238 239 /* 240 * We never unmount devfs in a real production system. 241 */ 242 /*ARGSUSED*/ 243 static int 244 devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr) 245 { 246 return (EBUSY); 247 } 248 249 /* 250 * return root vnode for given vfs 251 */ 252 static int 253 devfs_root(struct vfs *vfsp, struct vnode **vpp) 254 { 255 dcmn_err(("devfs_root\n")); 256 *vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root); 257 VN_HOLD(*vpp); 258 return (0); 259 } 260 261 /* 262 * return 'generic superblock' information to userland. 263 * 264 * not much that we can usefully admit to here 265 */ 266 static int 267 devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp) 268 { 269 extern kmem_cache_t *dv_node_cache; 270 271 dev32_t d32; 272 273 dcmn_err(("devfs_statvfs\n")); 274 bzero(sbp, sizeof (*sbp)); 275 sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize; 276 /* 277 * We could compute the number of devfsnodes here .. but since 278 * it's dynamic anyway, it's not clear how useful this is. 279 */ 280 sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc"); 281 282 /* no illusions that free/avail files is relevant to devfs */ 283 sbp->f_ffree = 0; 284 sbp->f_favail = 0; 285 286 /* no illusions that blocks are relevant to devfs */ 287 sbp->f_bfree = 0; 288 sbp->f_bavail = 0; 289 sbp->f_blocks = 0; 290 291 (void) cmpldev(&d32, vfsp->vfs_dev); 292 sbp->f_fsid = d32; 293 (void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name); 294 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 295 sbp->f_namemax = MAXNAMELEN - 1; 296 (void) strcpy(sbp->f_fstr, "devices"); 297 298 return (0); 299 } 300 301 /* 302 * devfs always mount after root is mounted, so this should never 303 * be invoked. 304 */ 305 /*ARGSUSED*/ 306 static int 307 devfs_mountroot(struct vfs *vfsp, enum whymountroot why) 308 { 309 dcmn_err(("devfs_mountroot\n")); 310 311 return (EINVAL); 312 } 313 314 struct dv_node * 315 devfs_dip_to_dvnode(dev_info_t *dip) 316 { 317 char *dirpath; 318 struct vnode *dirvp; 319 320 ASSERT(dip != NULL); 321 322 /* no-op if devfs not mounted yet */ 323 if (devfs_mntinfo == NULL) 324 return (NULL); 325 326 /* 327 * The lookupname below only looks up cached dv_nodes 328 * because devfs_clean_key is set in thread specific data. 329 */ 330 dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 331 (void) ddi_pathname(dip, dirpath); 332 if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) { 333 dcmn_err(("directory %s not found\n", dirpath)); 334 kmem_free(dirpath, MAXPATHLEN); 335 return (NULL); 336 } 337 338 kmem_free(dirpath, MAXPATHLEN); 339 return (VTODV(dirvp)); 340 } 341 342 /* 343 * If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root 344 * and not a vHCI we also need to clean any vHCI branches because they 345 * may contain pHCI nodes. A detach_node() of a pHCI will fail if its 346 * mdi_devi_offline() fails, and the mdi_devi_offline() of the last 347 * pHCI will fail unless an ndi_devi_offline() of the Client nodes under 348 * the vHCI is successful - which requires a clean vHCI branch to removed 349 * the devi_refs associated with devfs vnodes. 350 */ 351 static int 352 devfs_clean_vhci(dev_info_t *dip, void *args) 353 { 354 struct dv_node *dvp; 355 uint_t flags = (uint_t)(uintptr_t)args; 356 357 (void) tsd_set(devfs_clean_key, (void *)1); 358 dvp = devfs_dip_to_dvnode(dip); 359 if (dvp) { 360 (void) dv_cleandir(dvp, NULL, flags); 361 VN_RELE(DVTOV(dvp)); 362 } 363 (void) tsd_set(devfs_clean_key, NULL); 364 return (DDI_WALK_CONTINUE); 365 } 366 367 /* 368 * devfs_clean() 369 * 370 * Destroy unreferenced dv_node's and detach devices. 371 * 372 * devfs_clean will try its best to clean up unused nodes. It is 373 * no longer valid to assume that just because devfs_clean fails, 374 * the device is not removable. This is because device contracts 375 * can result in userland processes releasing a device during the 376 * device offline process in the kernel. Thus it is no longer 377 * correct to fail an offline just because devfs_clean finds 378 * referenced dv_nodes. To enforce this, devfs_clean() always 379 * returns success i.e. 0. 380 * 381 * devfs_clean() may return before removing all possible nodes if 382 * we cannot acquire locks in areas of the code where potential for 383 * deadlock exists (see comments in dv_find() and dv_cleandir() for 384 * examples of this). 385 * 386 * devfs caches unreferenced dv_node to speed by the performance 387 * of ls, find, etc. devfs_clean() is invoked to cleanup cached 388 * dv_nodes to reclaim memory as well as to facilitate device 389 * removal (dv_node reference devinfo nodes, which prevents driver 390 * detach). 391 * 392 * If a shell parks in a /devices directory, the dv_node will be 393 * held, preventing the corresponding device to be detached. 394 * This would be a denial of service against DR. To prevent this, 395 * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag. 396 * The dv_cleandir() implementation does the right thing to ensure 397 * successful DR. 398 */ 399 int 400 devfs_clean(dev_info_t *dip, char *devnm, uint_t flags) 401 { 402 struct dv_node *dvp; 403 404 dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x", 405 (void *)dip, flags)); 406 407 /* avoid recursion back into the device tree */ 408 (void) tsd_set(devfs_clean_key, (void *)1); 409 dvp = devfs_dip_to_dvnode(dip); 410 if (dvp == NULL) { 411 (void) tsd_set(devfs_clean_key, NULL); 412 return (0); 413 } 414 415 (void) dv_cleandir(dvp, devnm, flags); 416 (void) tsd_set(devfs_clean_key, NULL); 417 VN_RELE(DVTOV(dvp)); 418 419 /* 420 * If we are doing a DV_CLEAN_FORCE, and we did not start at the 421 * root, and we did not start at a vHCI node then clean vHCI 422 * branches too. Failure to clean vHCI branch does not cause EBUSY. 423 * 424 * Also, to accommodate nexus callers that clean 'self' to DR 'child' 425 * (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch 426 * above fails - this prevents a busy DR 'child' sibling from causing 427 * the DR of 'child' to fail because a vHCI branch was not cleaned. 428 */ 429 if ((flags & DV_CLEAN_FORCE) && (dip != ddi_root_node()) && 430 (mdi_component_is_vhci(dip, NULL) != MDI_SUCCESS)) { 431 /* 432 * NOTE: for backport the following is recommended 433 * (void) devfs_clean_vhci(scsi_vhci_dip, 434 * (void *)(uintptr_t)flags); 435 */ 436 mdi_walk_vhcis(devfs_clean_vhci, (void *)(uintptr_t)flags); 437 } 438 439 return (0); 440 } 441 442 /* 443 * lookup a devfs relative pathname, returning held vnodes for the final 444 * component and the containing directory (if requested). 445 * 446 * NOTE: We can't use lookupname because this would use the current 447 * processes credentials (CRED) in the call lookuppnvp instead 448 * of kcred. It also does not give you the flexibility so 449 * specify the directory to start the resolution in (devicesdir). 450 */ 451 int 452 devfs_lookupname( 453 char *pathname, /* user pathname */ 454 vnode_t **dirvpp, /* ret for ptr to parent dir vnode */ 455 vnode_t **compvpp) /* ret for ptr to component vnode */ 456 { 457 struct pathname pn; 458 int error; 459 460 ASSERT(devicesdir); /* devfs must be initialized */ 461 ASSERT(pathname); /* must have some path */ 462 463 if (error = pn_get(pathname, UIO_SYSSPACE, &pn)) 464 return (error); 465 466 /* make the path relative to /devices. */ 467 pn_skipslash(&pn); 468 if (pn_pathleft(&pn) == 0) { 469 /* all we had was "\0" or "/" (which skipslash skiped) */ 470 if (dirvpp) 471 *dirvpp = NULL; 472 if (compvpp) { 473 VN_HOLD(devicesdir); 474 *compvpp = devicesdir; 475 } 476 } else { 477 /* 478 * Use devfs lookup to resolve pathname to the vnode for 479 * the device via relative lookup in devfs. Extra holds for 480 * using devicesdir as directory we are searching and for 481 * being our root without being == rootdir. 482 */ 483 VN_HOLD(devicesdir); 484 VN_HOLD(devicesdir); 485 error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp, 486 devicesdir, devicesdir, kcred); 487 } 488 pn_free(&pn); 489 490 return (error); 491 } 492 493 /* 494 * Given a devfs path (without the /devices prefix), walk 495 * the dv_node sub-tree rooted at the path. 496 */ 497 int 498 devfs_walk( 499 char *path, 500 void (*callback)(struct dv_node *, void *), 501 void *arg) 502 { 503 char *dirpath, *devnm; 504 struct vnode *dirvp; 505 506 ASSERT(path && callback); 507 508 if (*path != '/' || devfs_mntinfo == NULL) 509 return (ENXIO); 510 511 dcmn_err(("devfs_walk: path = %s", path)); 512 513 dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 514 515 (void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path); 516 517 devnm = strrchr(dirpath, '/'); 518 519 ASSERT(devnm); 520 521 *devnm++ = '\0'; 522 523 if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) { 524 dcmn_err(("directory %s not found\n", dirpath)); 525 kmem_free(dirpath, MAXPATHLEN); 526 return (ENXIO); 527 } 528 529 /* 530 * if path == "/", visit the root dv_node 531 */ 532 if (*devnm == '\0') { 533 callback(VTODV(dirvp), arg); 534 devnm = NULL; 535 } 536 537 dv_walk(VTODV(dirvp), devnm, callback, arg); 538 539 VN_RELE(dirvp); 540 541 kmem_free(dirpath, MAXPATHLEN); 542 543 return (0); 544 } 545 546 int 547 devfs_devpolicy(vnode_t *vp, devplcy_t **dpp) 548 { 549 struct vnode *rvp; 550 struct dv_node *dvp; 551 int rval = -1; 552 553 /* fail if devfs not mounted yet */ 554 if (devfs_mntinfo == NULL) 555 return (rval); 556 557 if (VOP_REALVP(vp, &rvp, NULL) == 0 && vn_matchops(rvp, dv_vnodeops)) { 558 dvp = VTODV(rvp); 559 rw_enter(&dvp->dv_contents, RW_READER); 560 if (dvp->dv_priv) { 561 dphold(dvp->dv_priv); 562 *dpp = dvp->dv_priv; 563 rval = 0; 564 } 565 rw_exit(&dvp->dv_contents); 566 } 567 return (rval); 568 }