1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* 25 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 26 */ 27 28 /* 29 * vnode ops for the /dev filesystem 30 * 31 * - VDIR, VCHR, CBLK, and VLNK are considered must supported files 32 * - VREG and VDOOR are used for some internal implementations in 33 * the global zone, e.g. devname and devfsadm communication 34 * - other file types are unusual in this namespace and 35 * not supported for now 36 */ 37 38 /* 39 * sdev has a few basic goals: 40 * o Provide /dev for the global zone as well as various non-global zones. 41 * o Provide the basic functionality that devfsadm might need (mknod, 42 * symlinks, etc.) 43 * o Allow persistent permissions on files in /dev. 44 * o Allow for dynamic directories and nodes for use by various services (pts, 45 * zvol, net, etc.) 46 * 47 * The sdev file system is primarily made up of sdev_node_t's which is sdev's 48 * counterpart to the vnode_t. There are two different classes of sdev_node_t's 49 * that we generally care about, dynamic and otherwise. 50 * 51 * Persisting Information 52 * ---------------------- 53 * 54 * When sdev is mounted, it keeps track of the underlying file system it is 55 * mounted over. In certain situations, sdev will go and create entries in that 56 * underlying file system. These underlying 'back end' nodes are used as proxies 57 * for various changes in permissions. While specific sets of nodes, such as 58 * dynamic ones, are exempt, this process stores permission changes against 59 * these back end nodes. The point of all of this is to allow for these settings 60 * to persist across host and zone reboots. As an example, consider the entry 61 * /dev/dsk/c0t0d0 which is a character device and that / is in UFS. Upon 62 * changing the permissions on c0t0d0 you'd have the following logical 63 * relationships: 64 * 65 * +------------------+ sdev_vnode +--------------+ 66 * | sdev_node_t |<---------------->| vnode_t | 67 * | /dev/dsk/c0t0d0 |<---------------->| for sdev | 68 * +------------------+ +--------------+ 69 * | 70 * | sdev_attrvp 71 * | 72 * | +---------------------+ 73 * +--->| vnode_t for UFS|ZFS | 74 * | /dev/dsk/c0t0d0 | 75 * +---------------------+ 76 * 77 * sdev is generally in memory. Therefore when a lookup happens and there is no 78 * entry already inside of a directory cache, it will next check the backing 79 * store. If the backing store exists, we will reconstitute the sdev_node based 80 * on the information that we persisted. When we create the backing store node, 81 * we use the struct vattr information that we already have in sdev_node_t. 82 * Because of this, we already know if the entry was previously a symlink, 83 * directory, or some other kind of type. Note that not all types of nodes are 84 * supported. Currently only VDIR, VCHR, VBLK, VREG, VDOOR, and VLNK are 85 * eligible to be persisted. 86 * 87 * When the sdev_node is created and the lookup is done, we grab a hold on the 88 * underlying vnode as part of the call to VOP_LOOKUP. That reference is held 89 * until the sdev_node becomes inactive. Once its reference count reaches one 90 * and the VOP_INACTIVE callback fires leading to the destruction of the node, 91 * the reference on the underlying vnode will be released. 92 * 93 * The backing store node will be deleted only when the node itself is deleted 94 * through the means of a VOP_REMOVE, VOP_RMDIR, or similar call. 95 * 96 * Not everything can be persisted, see The Rules section for more details. 97 * 98 * Dynamic Nodes 99 * ------------- 100 * 101 * Dynamic nodes allow for specific interactions with various kernel subsystems 102 * when looking up directory entries. This allows the lookup and readdir 103 * functions to check against the kernel subsystem's for validity. eg. does a 104 * zvol or nic still exist. 105 * 106 * More specifically, when we create various directories we check if the 107 * directory name matches that of one of the names in the vtab[] (sdev_subr.c). 108 * If it does, we swap out the vnode operations into a new set which combine the 109 * normal sdev vnode operations with the dynamic set here. 110 * 111 * In addition, various dynamic nodes implement a verification entry point. This 112 * verification entry is used as a part of lookup and readdir. The goal for 113 * these dynamic nodes is to allow them to check with the underlying subsystems 114 * to ensure that these devices are still present, or if they have gone away, to 115 * remove them from the results. This is indicated by using the SDEV_VTOR flag 116 * in vtab[]. 117 * 118 * Dynamic nodes have additional restrictions placed upon them. They may only 119 * appear at the top level directory of the file system. In addition, users 120 * cannot create dirents below any leve of a dynamic node aside from its special 121 * vnops. 122 * 123 * Profiles 124 * -------- 125 * 126 * Profiles exist for the purpose of non-global zones. They work with the zone 127 * brands and zoneadmd to set up a filter of allowed devices that can appear in 128 * a non-global zone's /dev. These are sent to sdev by means of libdevinfo and a 129 * modctl system call. Specifically it allows one to add patterns of device 130 * paths to include and exclude. It allows for a collection of symlinks to be 131 * added and it allows for remapping names. 132 * 133 * When operating in a non-global zone, several of the sdev vnops are redirected 134 * to the profile versions. These impose additional restrictions such as 135 * enforcing that a non-global zone's /dev is read only. 136 * 137 * sdev_node_t States 138 * ------------------ 139 * 140 * A given sdev_node_t has a field called the sdev_state which describes where 141 * in the sdev life cycle it is. There are three primary states: SDEV_INIT, 142 * SDEV_READY, and SDEV_ZOMBIE. 143 * 144 * SDEV_INIT: When a new /dev file is first looked up, a sdev_node 145 * is allocated, initialized and added to the directory's 146 * sdev_node cache. A node at this state will also 147 * have the SDEV_LOOKUP flag set. 148 * 149 * Other threads that are trying to look up a node at 150 * this state will be blocked until the SDEV_LOOKUP flag 151 * is cleared. 152 * 153 * When the SDEV_LOOKUP flag is cleared, the node may 154 * transition into the SDEV_READY state for a successful 155 * lookup or the node is removed from the directory cache 156 * and destroyed if the named node can not be found. 157 * An ENOENT error is returned for the second case. 158 * 159 * SDEV_READY: A /dev file has been successfully looked up and 160 * associated with a vnode. The /dev file is available 161 * for the supported /dev file system operations. 162 * 163 * SDEV_ZOMBIE: Deletion of a /dev file has been explicitly issued 164 * to an SDEV_READY node. The node is transitioned into 165 * the SDEV_ZOMBIE state if the vnode reference count 166 * is still held. A SDEV_ZOMBIE node does not support 167 * any of the /dev file system operations. A SDEV_ZOMBIE 168 * node is immediately removed from the directory cache 169 * and destroyed once the reference count reaches zero. 170 * 171 * Historically nodes that were marked SDEV_ZOMBIE were not removed from the 172 * underlying directory caches. This has been the source of numerous bugs and 173 * thus to better mimic what happens on a real file system, it is no longer the 174 * case. 175 * 176 * The following state machine describes the life cycle of a given node and its 177 * associated states: 178 * 179 * node is . . . . . 180 * allocated via . +-------------+ . . . . . . . vnode_t refcount 181 * sdev_nodeinit() . | Unallocated | . reaches zero and 182 * +--------*-----| Memory |<--------*---+ sdev_inactive is 183 * | +-------------+ | called. 184 * | +------------^ | called. 185 * v | | 186 * +-----------+ * . . sdev_nodeready() +-------------+ 187 * | SDEV_INIT | | or related setup | SDEV_ZOMBIE | 188 * +-----------+ | failure +-------------+ 189 * | | ^ 190 * | | +------------+ | 191 * +-*----------->| SDEV_READY |--------*-----+ 192 * . +------------+ . The node is no longer 193 * . . node successfully . . . . . valid or we've been 194 * inserted into the asked to remove it. 195 * directory cache This happens via 196 * and sdev_nodready() sdev_dirdelete(). 197 * call successful. 198 * 199 * Adding and Removing Dirents, Zombie Nodes 200 * ----------------------------------------- 201 * 202 * As part of doing a lookup, readdir, or an explicit creation operation like 203 * mkdir or create, nodes may be created. Every directory has an avl tree which 204 * contains its children, the sdev_entries tree. This is only used if the type 205 * is VDIR. Access to this is controlled by the sdev_node_t's contents_lock and 206 * it is managed through sdev_cache_update(). 207 * 208 * Every sdev_node_t has a field sdev_state, which describes the current state 209 * of the node. A node is generally speaking in the SDEV_READY state. When it is 210 * there, it can be looked up, accessed, and operations performed on it. When a 211 * node is going to be removed from the directory cache it is marked as a 212 * zombie. Once a node becomes a zombie, no other file system operations will 213 * succeed and it will continue to exist as a node until the vnode count on the 214 * node reaches zero. At that point, the node will be freed. However, once a 215 * node has been marked as a zombie, it will be removed immediately from the 216 * directory cache such that no one else may find it again. This means that 217 * someone else can insert a new entry into that directory with the same name 218 * and without a problem. 219 * 220 * To remove a node, see the section on that in The Rules. 221 * 222 * The Rules 223 * --------- 224 * These are the rules to live by when working in sdev. These are not 225 * exhaustive. 226 * 227 * - Set 1: Working with Backing Nodes 228 * o If there is a SDEV_READY sdev_node_t, it knows about its backing node. 229 * o If we find a backing node when looking up an sdev_node_t for the first 230 * time, we use its attributes to build our sdev_node_t. 231 * o If there is a found backing node, or we create a backing node, that's 232 * when we grab the hold on its vnode. 233 * o If we mark an sdev_node_t a ZOMBIE, we must remove its backing node from 234 * the underlying file system. It must not be searchable or findable. 235 * o We release our hold on the backing node vnode when we destroy the 236 * sdev_node_t. 237 * 238 * - Set 2: Locking rules for sdev (not exhaustive) 239 * o The majority of nodes contain an sdev_contents rw lock. You must hold it 240 * for read or write if manipulating its contents appropriately. 241 * o You must lock your parent before yourself. 242 * o If you need your vnode's v_lock and the sdev_contents rw lock, you must 243 * grab the v_lock before the sdev_contents rw_lock. 244 * o If you release a lock on the node as a part of upgrading it, you must 245 * verify that the node has not become a zombie as a part of this process. 246 * 247 * - Set 3: Zombie Status and What it Means 248 * o If you encounter a node that is a ZOMBIE, that means that it has been 249 * unlinked from the backing store. 250 * o If you release your contents lock and acquire it again (say as part of 251 * trying to grab a write lock) you must check that the node has not become 252 * a zombie. 253 * o You should VERIFY that a looked up node is not a zombie. This follows 254 * from the following logic. To mark something as a zombie means that it is 255 * removed from the parents directory cache. To do that, you must have a 256 * write lock on the parent's sdev_contents. To lookup through that 257 * directory you must have a read lock. This then becomes a simple ordering 258 * problem. If you've been granted the lock then the other operation cannot 259 * be in progress or must have already succeeded. 260 * 261 * - Set 4: Removing Directory Entries (aka making nodes Zombies) 262 * o Write lock must be held on the directory 263 * o Write lock must be held on the node 264 * o Remove the sdev_node_t from its parent cache 265 * o Remove the corresponding backing store node, if it exists, eg. use 266 * VOP_REMOVE or VOP_RMDIR. 267 * o You must NOT make any change in the vnode reference count! Nodes should 268 * only be cleaned up through VOP_INACTIVE callbacks. 269 * o VOP_INACTIVE is the only one responsible for doing the final vn_rele of 270 * the backing store vnode that was grabbed during lookup. 271 * 272 * - Set 5: What Nodes may be Persisted 273 * o The root, /dev is always persisted 274 * o Any node in vtab which is marked SDEV_DYNAMIC, may not be persisted 275 * unless it is also marked SDEV_PERSIST 276 * o Anything whose parent directory is marked SDEV_PERSIST will pass that 277 * along to the child as long as it does not contradict the above rules 278 */ 279 280 #include <sys/types.h> 281 #include <sys/param.h> 282 #include <sys/t_lock.h> 283 #include <sys/systm.h> 284 #include <sys/sysmacros.h> 285 #include <sys/user.h> 286 #include <sys/time.h> 287 #include <sys/vfs.h> 288 #include <sys/vnode.h> 289 #include <sys/vfs_opreg.h> 290 #include <sys/file.h> 291 #include <sys/fcntl.h> 292 #include <sys/flock.h> 293 #include <sys/kmem.h> 294 #include <sys/uio.h> 295 #include <sys/errno.h> 296 #include <sys/stat.h> 297 #include <sys/cred.h> 298 #include <sys/dirent.h> 299 #include <sys/pathname.h> 300 #include <sys/cmn_err.h> 301 #include <sys/debug.h> 302 #include <sys/policy.h> 303 #include <vm/hat.h> 304 #include <vm/seg_vn.h> 305 #include <vm/seg_map.h> 306 #include <vm/seg.h> 307 #include <vm/as.h> 308 #include <vm/page.h> 309 #include <sys/proc.h> 310 #include <sys/mode.h> 311 #include <sys/sunndi.h> 312 #include <sys/ptms.h> 313 #include <fs/fs_subr.h> 314 #include <sys/fs/dv_node.h> 315 #include <sys/fs/sdev_impl.h> 316 317 /*ARGSUSED*/ 318 static int 319 sdev_open(struct vnode **vpp, int flag, struct cred *cred, caller_context_t *ct) 320 { 321 struct sdev_node *dv = VTOSDEV(*vpp); 322 struct sdev_node *ddv = dv->sdev_dotdot; 323 int error = 0; 324 325 if ((*vpp)->v_type == VDIR) 326 return (0); 327 328 if (!SDEV_IS_GLOBAL(dv)) 329 return (ENOTSUP); 330 331 if ((*vpp)->v_type == VLNK) 332 return (ENOENT); 333 ASSERT((*vpp)->v_type == VREG); 334 if ((*vpp)->v_type != VREG) 335 return (ENOTSUP); 336 337 ASSERT(ddv); 338 rw_enter(&ddv->sdev_contents, RW_READER); 339 if (dv->sdev_attrvp == NULL) { 340 rw_exit(&ddv->sdev_contents); 341 return (ENOENT); 342 } 343 error = VOP_OPEN(&(dv->sdev_attrvp), flag, cred, ct); 344 rw_exit(&ddv->sdev_contents); 345 return (error); 346 } 347 348 /*ARGSUSED1*/ 349 static int 350 sdev_close(struct vnode *vp, int flag, int count, 351 offset_t offset, struct cred *cred, caller_context_t *ct) 352 { 353 struct sdev_node *dv = VTOSDEV(vp); 354 355 if (vp->v_type == VDIR) { 356 cleanlocks(vp, ttoproc(curthread)->p_pid, 0); 357 cleanshares(vp, ttoproc(curthread)->p_pid); 358 return (0); 359 } 360 361 if (!SDEV_IS_GLOBAL(dv)) 362 return (ENOTSUP); 363 364 ASSERT(vp->v_type == VREG); 365 if (vp->v_type != VREG) 366 return (ENOTSUP); 367 368 ASSERT(dv->sdev_attrvp); 369 return (VOP_CLOSE(dv->sdev_attrvp, flag, count, offset, cred, ct)); 370 } 371 372 /*ARGSUSED*/ 373 static int 374 sdev_read(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, 375 struct caller_context *ct) 376 { 377 struct sdev_node *dv = (struct sdev_node *)VTOSDEV(vp); 378 int error; 379 380 if (!SDEV_IS_GLOBAL(dv)) 381 return (EINVAL); 382 383 if (vp->v_type == VDIR) 384 return (EISDIR); 385 386 /* only supporting regular files in /dev */ 387 ASSERT(vp->v_type == VREG); 388 if (vp->v_type != VREG) 389 return (EINVAL); 390 391 ASSERT(RW_READ_HELD(&VTOSDEV(vp)->sdev_contents)); 392 ASSERT(dv->sdev_attrvp); 393 (void) VOP_RWLOCK(dv->sdev_attrvp, 0, ct); 394 error = VOP_READ(dv->sdev_attrvp, uio, ioflag, cred, ct); 395 VOP_RWUNLOCK(dv->sdev_attrvp, 0, ct); 396 return (error); 397 } 398 399 /*ARGSUSED*/ 400 static int 401 sdev_write(struct vnode *vp, struct uio *uio, int ioflag, struct cred *cred, 402 struct caller_context *ct) 403 { 404 struct sdev_node *dv = VTOSDEV(vp); 405 int error = 0; 406 407 if (!SDEV_IS_GLOBAL(dv)) 408 return (EINVAL); 409 410 if (vp->v_type == VDIR) 411 return (EISDIR); 412 413 /* only supporting regular files in /dev */ 414 ASSERT(vp->v_type == VREG); 415 if (vp->v_type != VREG) 416 return (EINVAL); 417 418 ASSERT(dv->sdev_attrvp); 419 420 (void) VOP_RWLOCK(dv->sdev_attrvp, 1, ct); 421 error = VOP_WRITE(dv->sdev_attrvp, uio, ioflag, cred, ct); 422 VOP_RWUNLOCK(dv->sdev_attrvp, 1, ct); 423 if (error == 0) { 424 sdev_update_timestamps(dv->sdev_attrvp, kcred, 425 AT_MTIME); 426 } 427 return (error); 428 } 429 430 /*ARGSUSED*/ 431 static int 432 sdev_ioctl(struct vnode *vp, int cmd, intptr_t arg, int flag, 433 struct cred *cred, int *rvalp, caller_context_t *ct) 434 { 435 struct sdev_node *dv = VTOSDEV(vp); 436 437 if (!SDEV_IS_GLOBAL(dv) || (vp->v_type == VDIR)) 438 return (ENOTTY); 439 440 ASSERT(vp->v_type == VREG); 441 if (vp->v_type != VREG) 442 return (EINVAL); 443 444 ASSERT(dv->sdev_attrvp); 445 return (VOP_IOCTL(dv->sdev_attrvp, cmd, arg, flag, cred, rvalp, ct)); 446 } 447 448 static int 449 sdev_getattr(struct vnode *vp, struct vattr *vap, int flags, 450 struct cred *cr, caller_context_t *ct) 451 { 452 int error = 0; 453 struct sdev_node *dv = VTOSDEV(vp); 454 struct sdev_node *parent = dv->sdev_dotdot; 455 456 ASSERT(parent); 457 458 rw_enter(&parent->sdev_contents, RW_READER); 459 ASSERT(dv->sdev_attr || dv->sdev_attrvp); 460 461 /* 462 * search order: 463 * - for persistent nodes (SDEV_PERSIST): backstore 464 * - for non-persistent nodes: module ops if global, then memory 465 */ 466 if (dv->sdev_attrvp) { 467 rw_exit(&parent->sdev_contents); 468 error = VOP_GETATTR(dv->sdev_attrvp, vap, flags, cr, ct); 469 sdev_vattr_merge(dv, vap); 470 } else { 471 ASSERT(dv->sdev_attr); 472 *vap = *dv->sdev_attr; 473 sdev_vattr_merge(dv, vap); 474 rw_exit(&parent->sdev_contents); 475 } 476 477 return (error); 478 } 479 480 /*ARGSUSED4*/ 481 static int 482 sdev_setattr(struct vnode *vp, struct vattr *vap, int flags, 483 struct cred *cred, caller_context_t *ctp) 484 { 485 return (devname_setattr_func(vp, vap, flags, cred, NULL, 0)); 486 } 487 488 static int 489 sdev_getsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 490 struct cred *cr, caller_context_t *ct) 491 { 492 int error; 493 struct sdev_node *dv = VTOSDEV(vp); 494 struct vnode *avp = dv->sdev_attrvp; 495 496 if (avp == NULL) { 497 /* return fs_fab_acl() if flavor matches, else do nothing */ 498 if ((SDEV_ACL_FLAVOR(vp) == _ACL_ACLENT_ENABLED && 499 (vsap->vsa_mask & (VSA_ACLCNT | VSA_DFACLCNT))) || 500 (SDEV_ACL_FLAVOR(vp) == _ACL_ACE_ENABLED && 501 (vsap->vsa_mask & (VSA_ACECNT | VSA_ACE)))) 502 return (fs_fab_acl(vp, vsap, flags, cr, ct)); 503 504 return (ENOSYS); 505 } 506 507 (void) VOP_RWLOCK(avp, 1, ct); 508 error = VOP_GETSECATTR(avp, vsap, flags, cr, ct); 509 VOP_RWUNLOCK(avp, 1, ct); 510 return (error); 511 } 512 513 static int 514 sdev_setsecattr(struct vnode *vp, struct vsecattr *vsap, int flags, 515 struct cred *cr, caller_context_t *ct) 516 { 517 int error; 518 struct sdev_node *dv = VTOSDEV(vp); 519 struct vnode *avp = dv->sdev_attrvp; 520 521 if (dv->sdev_state == SDEV_ZOMBIE) 522 return (0); 523 524 if (avp == NULL) { 525 if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_PERSIST(dv)) 526 return (fs_nosys()); 527 ASSERT(dv->sdev_attr); 528 /* 529 * if coming in directly, the acl system call will 530 * have held the read-write lock via VOP_RWLOCK() 531 * If coming in via specfs, specfs will have 532 * held the rw lock on the realvp i.e. us. 533 */ 534 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 535 sdev_vattr_merge(dv, dv->sdev_attr); 536 error = sdev_shadow_node(dv, cr); 537 if (error) { 538 return (fs_nosys()); 539 } 540 541 ASSERT(dv->sdev_attrvp); 542 /* clean out the memory copy if any */ 543 if (dv->sdev_attr) { 544 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 545 dv->sdev_attr = NULL; 546 } 547 avp = dv->sdev_attrvp; 548 } 549 ASSERT(avp); 550 551 (void) VOP_RWLOCK(avp, V_WRITELOCK_TRUE, ct); 552 error = VOP_SETSECATTR(avp, vsap, flags, cr, ct); 553 VOP_RWUNLOCK(avp, V_WRITELOCK_TRUE, ct); 554 return (error); 555 } 556 557 /* 558 * There are two different unlocked routines. This one is not static as it is 559 * used as part of the secpolicy_vnode_setattr calls in sdev_subr.c. Because it 560 * is used in that function it has to have a specific signature. 561 */ 562 int 563 sdev_unlocked_access(void *vdv, int mode, struct cred *cr) 564 { 565 struct sdev_node *dv = vdv; 566 int shift = 0; 567 uid_t owner = dv->sdev_attr->va_uid; 568 569 if (crgetuid(cr) != owner) { 570 shift += 3; 571 if (groupmember(dv->sdev_attr->va_gid, cr) == 0) 572 shift += 3; 573 } 574 575 return (secpolicy_vnode_access2(cr, SDEVTOV(dv), owner, 576 dv->sdev_attr->va_mode << shift, mode)); 577 } 578 579 static int 580 sdev_self_access(sdev_node_t *dv, int mode, int flags, struct cred *cr, 581 caller_context_t *ct) 582 { 583 int ret; 584 585 ASSERT(dv->sdev_attr || dv->sdev_attrvp); 586 if (dv->sdev_attrvp) { 587 ret = VOP_ACCESS(dv->sdev_attrvp, mode, flags, cr, ct); 588 } else if (dv->sdev_attr) { 589 ret = sdev_unlocked_access(dv, mode, cr); 590 if (ret) 591 ret = EACCES; 592 } 593 594 return (ret); 595 } 596 597 static int 598 sdev_access(struct vnode *vp, int mode, int flags, struct cred *cr, 599 caller_context_t *ct) 600 { 601 struct sdev_node *dv = VTOSDEV(vp); 602 int ret; 603 604 rw_enter(&dv->sdev_contents, RW_READER); 605 ret = sdev_self_access(dv, mode, flags, cr, ct); 606 rw_exit(&dv->sdev_contents); 607 608 return (ret); 609 } 610 611 /* 612 * Lookup 613 */ 614 /*ARGSUSED3*/ 615 static int 616 sdev_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 617 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, 618 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 619 { 620 struct sdev_node *parent; 621 int error; 622 623 parent = VTOSDEV(dvp); 624 ASSERT(parent); 625 626 /* execute access is required to search the directory */ 627 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 628 return (error); 629 630 if (!SDEV_IS_GLOBAL(parent)) 631 return (prof_lookup(dvp, nm, vpp, cred)); 632 return (devname_lookup_func(parent, nm, vpp, cred, NULL, 0)); 633 } 634 635 /*ARGSUSED2*/ 636 static int 637 sdev_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 638 int mode, struct vnode **vpp, struct cred *cred, int flag, 639 caller_context_t *ct, vsecattr_t *vsecp) 640 { 641 struct vnode *vp = NULL; 642 struct vnode *avp; 643 struct sdev_node *parent; 644 struct sdev_node *self = NULL; 645 int error = 0; 646 vtype_t type = vap->va_type; 647 648 ASSERT(type != VNON && type != VBAD); 649 650 if ((type == VFIFO) || (type == VSOCK) || 651 (type == VPROC) || (type == VPORT)) 652 return (ENOTSUP); 653 654 parent = VTOSDEV(dvp); 655 ASSERT(parent); 656 657 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 658 if (parent->sdev_state == SDEV_ZOMBIE) { 659 rw_exit(&parent->sdev_dotdot->sdev_contents); 660 return (ENOENT); 661 } 662 663 /* non-global do not allow pure node creation */ 664 if (!SDEV_IS_GLOBAL(parent)) { 665 rw_exit(&parent->sdev_dotdot->sdev_contents); 666 return (prof_lookup(dvp, nm, vpp, cred)); 667 } 668 rw_exit(&parent->sdev_dotdot->sdev_contents); 669 670 /* execute access is required to search the directory */ 671 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 672 return (error); 673 674 /* check existing name */ 675 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 676 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 677 678 /* name found */ 679 if (error == 0) { 680 ASSERT(vp); 681 if (excl == EXCL) { 682 error = EEXIST; 683 } else if ((vp->v_type == VDIR) && (mode & VWRITE)) { 684 /* allowing create/read-only an existing directory */ 685 error = EISDIR; 686 } else { 687 error = VOP_ACCESS(vp, mode, 0, cred, ct); 688 } 689 690 if (error) { 691 VN_RELE(vp); 692 return (error); 693 } 694 695 /* truncation first */ 696 if ((vp->v_type == VREG) && (vap->va_mask & AT_SIZE) && 697 (vap->va_size == 0)) { 698 ASSERT(parent->sdev_attrvp); 699 error = VOP_CREATE(parent->sdev_attrvp, 700 nm, vap, excl, mode, &avp, cred, flag, ct, vsecp); 701 702 if (error) { 703 VN_RELE(vp); 704 return (error); 705 } 706 } 707 708 sdev_update_timestamps(vp, kcred, 709 AT_CTIME|AT_MTIME|AT_ATIME); 710 *vpp = vp; 711 return (0); 712 } 713 714 /* bail out early */ 715 if (error != ENOENT) 716 return (error); 717 718 /* verify write access - compliance specifies ENXIO */ 719 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) { 720 if (error == EACCES) 721 error = ENXIO; 722 return (error); 723 } 724 725 /* 726 * For memory-based (ROFS) directory: 727 * - either disallow node creation; 728 * - or implement VOP_CREATE of its own 729 */ 730 rw_enter(&parent->sdev_contents, RW_WRITER); 731 if (!SDEV_IS_PERSIST(parent)) { 732 rw_exit(&parent->sdev_contents); 733 return (ENOTSUP); 734 } 735 ASSERT(parent->sdev_attrvp); 736 error = sdev_mknode(parent, nm, &self, vap, NULL, NULL, 737 cred, SDEV_READY); 738 if (error) { 739 rw_exit(&parent->sdev_contents); 740 if (self) 741 SDEV_RELE(self); 742 return (error); 743 } 744 rw_exit(&parent->sdev_contents); 745 746 ASSERT(self); 747 /* take care the timestamps for the node and its parent */ 748 sdev_update_timestamps(SDEVTOV(self), kcred, 749 AT_CTIME|AT_MTIME|AT_ATIME); 750 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 751 if (SDEV_IS_GLOBAL(parent)) 752 atomic_inc_ulong(&parent->sdev_gdir_gen); 753 754 /* wake up other threads blocked on looking up this node */ 755 mutex_enter(&self->sdev_lookup_lock); 756 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 757 mutex_exit(&self->sdev_lookup_lock); 758 error = sdev_to_vp(self, vpp); 759 return (error); 760 } 761 762 static int 763 sdev_remove(struct vnode *dvp, char *nm, struct cred *cred, 764 caller_context_t *ct, int flags) 765 { 766 int error; 767 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 768 struct vnode *vp = NULL; 769 struct sdev_node *dv = NULL; 770 int len; 771 int bkstore; 772 773 /* bail out early */ 774 len = strlen(nm); 775 if (nm[0] == '.') { 776 if (len == 1) { 777 return (EINVAL); 778 } else if (len == 2 && nm[1] == '.') { 779 return (EEXIST); 780 } 781 } 782 783 ASSERT(parent); 784 rw_enter(&parent->sdev_contents, RW_READER); 785 if (!SDEV_IS_GLOBAL(parent)) { 786 rw_exit(&parent->sdev_contents); 787 return (ENOTSUP); 788 } 789 790 /* execute access is required to search the directory */ 791 if ((error = sdev_self_access(parent, VEXEC, 0, cred, ct)) != 0) { 792 rw_exit(&parent->sdev_contents); 793 return (error); 794 } 795 796 /* check existence first */ 797 dv = sdev_cache_lookup(parent, nm); 798 if (dv == NULL) { 799 rw_exit(&parent->sdev_contents); 800 return (ENOENT); 801 } 802 803 vp = SDEVTOV(dv); 804 if ((dv->sdev_state == SDEV_INIT) || 805 (dv->sdev_state == SDEV_ZOMBIE)) { 806 rw_exit(&parent->sdev_contents); 807 VN_RELE(vp); 808 return (ENOENT); 809 } 810 811 /* write access is required to remove an entry */ 812 if ((error = sdev_self_access(parent, VWRITE, 0, cred, ct)) != 0) { 813 rw_exit(&parent->sdev_contents); 814 VN_RELE(vp); 815 return (error); 816 } 817 818 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 819 if (!rw_tryupgrade(&parent->sdev_contents)) { 820 rw_exit(&parent->sdev_contents); 821 rw_enter(&parent->sdev_contents, RW_WRITER); 822 /* Make sure we didn't become a zombie */ 823 if (parent->sdev_state == SDEV_ZOMBIE) { 824 rw_exit(&parent->sdev_contents); 825 VN_RELE(vp); 826 return (ENOENT); 827 } 828 } 829 830 /* we do not support unlinking a non-empty directory */ 831 if (vp->v_type == VDIR && dv->sdev_nlink > 2) { 832 rw_exit(&parent->sdev_contents); 833 VN_RELE(vp); 834 return (EBUSY); 835 } 836 837 /* 838 * sdev_dirdelete does the real job of: 839 * - make sure no open ref count 840 * - destroying the sdev_node 841 * - releasing the hold on attrvp 842 */ 843 sdev_cache_update(parent, &dv, nm, SDEV_CACHE_DELETE); 844 VN_RELE(vp); 845 rw_exit(&parent->sdev_contents); 846 847 /* 848 * best efforts clean up the backing store 849 */ 850 if (bkstore) { 851 ASSERT(parent->sdev_attrvp); 852 error = VOP_REMOVE(parent->sdev_attrvp, nm, cred, 853 ct, flags); 854 /* 855 * do not report BUSY error 856 * because the backing store ref count is released 857 * when the last ref count on the sdev_node is 858 * released. 859 */ 860 if (error == EBUSY) { 861 sdcmn_err2(("sdev_remove: device %s is still on" 862 "disk %s\n", nm, parent->sdev_path)); 863 error = 0; 864 } 865 } 866 867 return (error); 868 } 869 870 /* 871 * Some restrictions for this file system: 872 * - both oldnm and newnm are in the scope of /dev file system, 873 * to simply the namespace management model. 874 */ 875 /*ARGSUSED6*/ 876 static int 877 sdev_rename(struct vnode *odvp, char *onm, struct vnode *ndvp, char *nnm, 878 struct cred *cred, caller_context_t *ct, int flags) 879 { 880 struct sdev_node *fromparent = NULL; 881 struct vattr vattr; 882 struct sdev_node *toparent; 883 struct sdev_node *fromdv = NULL; /* source node */ 884 struct vnode *ovp = NULL; /* source vnode */ 885 struct sdev_node *todv = NULL; /* destination node */ 886 struct vnode *nvp = NULL; /* destination vnode */ 887 int samedir = 0; /* set if odvp == ndvp */ 888 struct vnode *realvp; 889 int error = 0; 890 dev_t fsid; 891 int bkstore = 0; 892 vtype_t type; 893 894 /* prevent modifying "." and ".." */ 895 if ((onm[0] == '.' && 896 (onm[1] == '\0' || (onm[1] == '.' && onm[2] == '\0'))) || 897 (nnm[0] == '.' && 898 (nnm[1] == '\0' || (nnm[1] == '.' && nnm[2] == '\0')))) { 899 return (EINVAL); 900 } 901 902 fromparent = VTOSDEV(odvp); 903 toparent = VTOSDEV(ndvp); 904 905 /* ZOMBIE parent doesn't allow new node creation */ 906 rw_enter(&fromparent->sdev_dotdot->sdev_contents, RW_READER); 907 if (fromparent->sdev_state == SDEV_ZOMBIE) { 908 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 909 return (ENOENT); 910 } 911 912 /* renaming only supported for global device nodes */ 913 if (!SDEV_IS_GLOBAL(fromparent)) { 914 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 915 return (ENOTSUP); 916 } 917 rw_exit(&fromparent->sdev_dotdot->sdev_contents); 918 919 rw_enter(&toparent->sdev_dotdot->sdev_contents, RW_READER); 920 if (toparent->sdev_state == SDEV_ZOMBIE) { 921 rw_exit(&toparent->sdev_dotdot->sdev_contents); 922 return (ENOENT); 923 } 924 rw_exit(&toparent->sdev_dotdot->sdev_contents); 925 926 /* 927 * acquire the global lock to prevent 928 * mount/unmount/other rename activities. 929 */ 930 mutex_enter(&sdev_lock); 931 932 /* check existence of the source node */ 933 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 934 error = VOP_LOOKUP(odvp, onm, &ovp, NULL, 0, NULL, cred, ct, 935 NULL, NULL); 936 if (error) { 937 sdcmn_err2(("sdev_rename: the source node %s exists\n", 938 onm)); 939 mutex_exit(&sdev_lock); 940 return (error); 941 } 942 943 if (VOP_REALVP(ovp, &realvp, ct) == 0) { 944 VN_HOLD(realvp); 945 VN_RELE(ovp); 946 ovp = realvp; 947 } 948 949 /* check existence of destination */ 950 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 951 error = VOP_LOOKUP(ndvp, nnm, &nvp, NULL, 0, NULL, cred, ct, 952 NULL, NULL); 953 if (error && (error != ENOENT)) { 954 mutex_exit(&sdev_lock); 955 VN_RELE(ovp); 956 return (error); 957 } 958 959 if (nvp && (VOP_REALVP(nvp, &realvp, ct) == 0)) { 960 VN_HOLD(realvp); 961 VN_RELE(nvp); 962 nvp = realvp; 963 } 964 965 /* 966 * make sure the source and the destination are 967 * in the same dev filesystem 968 */ 969 if (odvp != ndvp) { 970 vattr.va_mask = AT_FSID; 971 if (error = VOP_GETATTR(odvp, &vattr, 0, cred, ct)) { 972 mutex_exit(&sdev_lock); 973 VN_RELE(ovp); 974 if (nvp != NULL) 975 VN_RELE(nvp); 976 return (error); 977 } 978 fsid = vattr.va_fsid; 979 vattr.va_mask = AT_FSID; 980 if (error = VOP_GETATTR(ndvp, &vattr, 0, cred, ct)) { 981 mutex_exit(&sdev_lock); 982 VN_RELE(ovp); 983 if (nvp != NULL) 984 VN_RELE(nvp); 985 return (error); 986 } 987 if (fsid != vattr.va_fsid) { 988 mutex_exit(&sdev_lock); 989 VN_RELE(ovp); 990 if (nvp != NULL) 991 VN_RELE(nvp); 992 return (EXDEV); 993 } 994 } 995 996 /* make sure the old entry can be deleted */ 997 error = VOP_ACCESS(odvp, VWRITE, 0, cred, ct); 998 if (error) { 999 mutex_exit(&sdev_lock); 1000 VN_RELE(ovp); 1001 if (nvp != NULL) 1002 VN_RELE(nvp); 1003 return (error); 1004 } 1005 1006 /* make sure the destination allows creation */ 1007 samedir = (fromparent == toparent); 1008 if (!samedir) { 1009 error = VOP_ACCESS(ndvp, VEXEC|VWRITE, 0, cred, ct); 1010 if (error) { 1011 mutex_exit(&sdev_lock); 1012 VN_RELE(ovp); 1013 if (nvp != NULL) 1014 VN_RELE(nvp); 1015 return (error); 1016 } 1017 } 1018 1019 fromdv = VTOSDEV(ovp); 1020 ASSERT(fromdv); 1021 1022 /* destination file exists */ 1023 if (nvp != NULL) { 1024 todv = VTOSDEV(nvp); 1025 ASSERT(todv); 1026 } 1027 1028 if ((fromdv->sdev_flags & SDEV_DYNAMIC) != 0 || 1029 (todv != NULL && (todv->sdev_flags & SDEV_DYNAMIC) != 0)) { 1030 mutex_exit(&sdev_lock); 1031 if (nvp != NULL) 1032 VN_RELE(nvp); 1033 VN_RELE(ovp); 1034 return (EACCES); 1035 } 1036 1037 /* 1038 * link source to new target in the memory. Regardless of failure, we 1039 * must rele our hold on nvp. 1040 */ 1041 error = sdev_rnmnode(fromparent, fromdv, toparent, &todv, nnm, cred); 1042 if (nvp != NULL) 1043 VN_RELE(nvp); 1044 if (error) { 1045 sdcmn_err2(("sdev_rename: renaming %s to %s failed " 1046 " with error %d\n", onm, nnm, error)); 1047 mutex_exit(&sdev_lock); 1048 VN_RELE(ovp); 1049 return (error); 1050 } 1051 1052 /* 1053 * unlink from source 1054 */ 1055 rw_enter(&fromparent->sdev_contents, RW_READER); 1056 fromdv = sdev_cache_lookup(fromparent, onm); 1057 if (fromdv == NULL) { 1058 rw_exit(&fromparent->sdev_contents); 1059 mutex_exit(&sdev_lock); 1060 VN_RELE(ovp); 1061 sdcmn_err2(("sdev_rename: the source is deleted already\n")); 1062 return (0); 1063 } 1064 1065 if (fromdv->sdev_state == SDEV_ZOMBIE) { 1066 rw_exit(&fromparent->sdev_contents); 1067 mutex_exit(&sdev_lock); 1068 VN_RELE(SDEVTOV(fromdv)); 1069 VN_RELE(ovp); 1070 sdcmn_err2(("sdev_rename: the source is being deleted\n")); 1071 return (0); 1072 } 1073 rw_exit(&fromparent->sdev_contents); 1074 ASSERT(SDEVTOV(fromdv) == ovp); 1075 VN_RELE(ovp); 1076 1077 /* clean out the directory contents before it can be removed */ 1078 type = SDEVTOV(fromdv)->v_type; 1079 if (type == VDIR) { 1080 error = sdev_cleandir(fromdv, NULL, 0); 1081 sdcmn_err2(("sdev_rename: cleandir finished with %d\n", 1082 error)); 1083 if (error == EBUSY) 1084 error = 0; 1085 } 1086 1087 rw_enter(&fromparent->sdev_contents, RW_WRITER); 1088 bkstore = SDEV_IS_PERSIST(fromdv) ? 1 : 0; 1089 sdev_cache_update(fromparent, &fromdv, onm, 1090 SDEV_CACHE_DELETE); 1091 VN_RELE(SDEVTOV(fromdv)); 1092 1093 /* best effforts clean up the backing store */ 1094 if (bkstore) { 1095 ASSERT(fromparent->sdev_attrvp); 1096 if (type != VDIR) { 1097 /* XXXci - We may need to translate the C-I flags on VOP_REMOVE */ 1098 error = VOP_REMOVE(fromparent->sdev_attrvp, 1099 onm, kcred, ct, 0); 1100 } else { 1101 /* XXXci - We may need to translate the C-I flags on VOP_RMDIR */ 1102 error = VOP_RMDIR(fromparent->sdev_attrvp, 1103 onm, fromparent->sdev_attrvp, kcred, ct, 0); 1104 } 1105 1106 if (error) { 1107 sdcmn_err2(("sdev_rename: device %s is " 1108 "still on disk %s\n", onm, 1109 fromparent->sdev_path)); 1110 error = 0; 1111 } 1112 } 1113 rw_exit(&fromparent->sdev_contents); 1114 mutex_exit(&sdev_lock); 1115 1116 /* once reached to this point, the rename is regarded successful */ 1117 return (0); 1118 } 1119 1120 /* 1121 * dev-fs version of "ln -s path dev-name" 1122 * tnm - path, e.g. /devices/... or /dev/... 1123 * lnm - dev_name 1124 */ 1125 /*ARGSUSED6*/ 1126 static int 1127 sdev_symlink(struct vnode *dvp, char *lnm, struct vattr *tva, 1128 char *tnm, struct cred *cred, caller_context_t *ct, int flags) 1129 { 1130 int error; 1131 struct vnode *vp = NULL; 1132 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1133 struct sdev_node *self = (struct sdev_node *)NULL; 1134 1135 ASSERT(parent); 1136 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1137 if (parent->sdev_state == SDEV_ZOMBIE) { 1138 rw_exit(&parent->sdev_dotdot->sdev_contents); 1139 sdcmn_err2(("sdev_symlink: parent %s is ZOMBIED \n", 1140 parent->sdev_name)); 1141 return (ENOENT); 1142 } 1143 1144 if (!SDEV_IS_GLOBAL(parent)) { 1145 rw_exit(&parent->sdev_dotdot->sdev_contents); 1146 return (ENOTSUP); 1147 } 1148 rw_exit(&parent->sdev_dotdot->sdev_contents); 1149 1150 /* execute access is required to search a directory */ 1151 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 1152 return (error); 1153 1154 /* find existing name */ 1155 /* XXXci - We may need to translate the C-I flags here */ 1156 error = VOP_LOOKUP(dvp, lnm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 1157 if (error == 0) { 1158 ASSERT(vp); 1159 VN_RELE(vp); 1160 sdcmn_err2(("sdev_symlink: node %s already exists\n", lnm)); 1161 return (EEXIST); 1162 } 1163 if (error != ENOENT) 1164 return (error); 1165 1166 /* write access is required to create a symlink */ 1167 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) 1168 return (error); 1169 1170 /* put it into memory cache */ 1171 rw_enter(&parent->sdev_contents, RW_WRITER); 1172 error = sdev_mknode(parent, lnm, &self, tva, NULL, (void *)tnm, 1173 cred, SDEV_READY); 1174 if (error) { 1175 rw_exit(&parent->sdev_contents); 1176 sdcmn_err2(("sdev_symlink: node %s creation failed\n", lnm)); 1177 if (self) 1178 SDEV_RELE(self); 1179 1180 return (error); 1181 } 1182 ASSERT(self && (self->sdev_state == SDEV_READY)); 1183 rw_exit(&parent->sdev_contents); 1184 1185 /* take care the timestamps for the node and its parent */ 1186 sdev_update_timestamps(SDEVTOV(self), kcred, 1187 AT_CTIME|AT_MTIME|AT_ATIME); 1188 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 1189 if (SDEV_IS_GLOBAL(parent)) 1190 atomic_inc_ulong(&parent->sdev_gdir_gen); 1191 1192 /* wake up other threads blocked on looking up this node */ 1193 mutex_enter(&self->sdev_lookup_lock); 1194 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 1195 mutex_exit(&self->sdev_lookup_lock); 1196 SDEV_RELE(self); /* don't return with vnode held */ 1197 return (0); 1198 } 1199 1200 /*ARGSUSED6*/ 1201 static int 1202 sdev_mkdir(struct vnode *dvp, char *nm, struct vattr *va, struct vnode **vpp, 1203 struct cred *cred, caller_context_t *ct, int flags, vsecattr_t *vsecp) 1204 { 1205 int error; 1206 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1207 struct sdev_node *self = NULL; 1208 struct vnode *vp = NULL; 1209 1210 ASSERT(parent && parent->sdev_dotdot); 1211 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1212 if (parent->sdev_state == SDEV_ZOMBIE) { 1213 rw_exit(&parent->sdev_dotdot->sdev_contents); 1214 return (ENOENT); 1215 } 1216 1217 /* non-global do not allow pure directory creation */ 1218 if (!SDEV_IS_GLOBAL(parent)) { 1219 rw_exit(&parent->sdev_dotdot->sdev_contents); 1220 return (prof_lookup(dvp, nm, vpp, cred)); 1221 } 1222 rw_exit(&parent->sdev_dotdot->sdev_contents); 1223 1224 /* execute access is required to search the directory */ 1225 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) { 1226 return (error); 1227 } 1228 1229 /* find existing name */ 1230 /* XXXci - We may need to translate the C-I flags on VOP_LOOKUP */ 1231 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, NULL); 1232 if (error == 0) { 1233 VN_RELE(vp); 1234 return (EEXIST); 1235 } 1236 if (error != ENOENT) 1237 return (error); 1238 1239 /* require write access to create a directory */ 1240 if ((error = VOP_ACCESS(dvp, VWRITE, 0, cred, ct)) != 0) { 1241 return (error); 1242 } 1243 1244 /* put it into memory */ 1245 rw_enter(&parent->sdev_contents, RW_WRITER); 1246 error = sdev_mknode(parent, nm, &self, 1247 va, NULL, NULL, cred, SDEV_READY); 1248 if (error) { 1249 rw_exit(&parent->sdev_contents); 1250 if (self) 1251 SDEV_RELE(self); 1252 return (error); 1253 } 1254 ASSERT(self && (self->sdev_state == SDEV_READY)); 1255 rw_exit(&parent->sdev_contents); 1256 1257 /* take care the timestamps for the node and its parent */ 1258 sdev_update_timestamps(SDEVTOV(self), kcred, 1259 AT_CTIME|AT_MTIME|AT_ATIME); 1260 sdev_update_timestamps(dvp, kcred, AT_MTIME|AT_ATIME); 1261 if (SDEV_IS_GLOBAL(parent)) 1262 atomic_inc_ulong(&parent->sdev_gdir_gen); 1263 1264 /* wake up other threads blocked on looking up this node */ 1265 mutex_enter(&self->sdev_lookup_lock); 1266 SDEV_UNBLOCK_OTHERS(self, SDEV_LOOKUP); 1267 mutex_exit(&self->sdev_lookup_lock); 1268 *vpp = SDEVTOV(self); 1269 return (0); 1270 } 1271 1272 /* 1273 * allowing removing an empty directory under /dev 1274 */ 1275 /*ARGSUSED*/ 1276 static int 1277 sdev_rmdir(struct vnode *dvp, char *nm, struct vnode *cdir, struct cred *cred, 1278 caller_context_t *ct, int flags) 1279 { 1280 int error = 0; 1281 struct sdev_node *parent = (struct sdev_node *)VTOSDEV(dvp); 1282 struct sdev_node *self = NULL; 1283 struct vnode *vp = NULL; 1284 1285 /* bail out early */ 1286 if (strcmp(nm, ".") == 0) 1287 return (EINVAL); 1288 if (strcmp(nm, "..") == 0) 1289 return (EEXIST); /* should be ENOTEMPTY */ 1290 1291 /* no destruction of non-global node */ 1292 ASSERT(parent && parent->sdev_dotdot); 1293 rw_enter(&parent->sdev_dotdot->sdev_contents, RW_READER); 1294 if (!SDEV_IS_GLOBAL(parent)) { 1295 rw_exit(&parent->sdev_dotdot->sdev_contents); 1296 return (ENOTSUP); 1297 } 1298 rw_exit(&parent->sdev_dotdot->sdev_contents); 1299 1300 /* execute access is required to search the directory */ 1301 if ((error = VOP_ACCESS(dvp, VEXEC|VWRITE, 0, cred, ct)) != 0) 1302 return (error); 1303 1304 /* check existing name */ 1305 rw_enter(&parent->sdev_contents, RW_WRITER); 1306 self = sdev_cache_lookup(parent, nm); 1307 if (self == NULL) { 1308 rw_exit(&parent->sdev_contents); 1309 return (ENOENT); 1310 } 1311 1312 vp = SDEVTOV(self); 1313 if ((self->sdev_state == SDEV_INIT) || 1314 (self->sdev_state == SDEV_ZOMBIE)) { 1315 rw_exit(&parent->sdev_contents); 1316 VN_RELE(vp); 1317 return (ENOENT); 1318 } 1319 1320 /* some sanity checks */ 1321 if (vp == dvp || vp == cdir) { 1322 rw_exit(&parent->sdev_contents); 1323 VN_RELE(vp); 1324 return (EINVAL); 1325 } 1326 1327 if (vp->v_type != VDIR) { 1328 rw_exit(&parent->sdev_contents); 1329 VN_RELE(vp); 1330 return (ENOTDIR); 1331 } 1332 1333 if (vn_vfswlock(vp)) { 1334 rw_exit(&parent->sdev_contents); 1335 VN_RELE(vp); 1336 return (EBUSY); 1337 } 1338 1339 if (vn_mountedvfs(vp) != NULL) { 1340 rw_exit(&parent->sdev_contents); 1341 vn_vfsunlock(vp); 1342 VN_RELE(vp); 1343 return (EBUSY); 1344 } 1345 1346 self = VTOSDEV(vp); 1347 /* bail out on a non-empty directory */ 1348 rw_enter(&self->sdev_contents, RW_READER); 1349 if (self->sdev_nlink > 2) { 1350 rw_exit(&self->sdev_contents); 1351 rw_exit(&parent->sdev_contents); 1352 vn_vfsunlock(vp); 1353 VN_RELE(vp); 1354 return (ENOTEMPTY); 1355 } 1356 rw_exit(&self->sdev_contents); 1357 1358 /* unlink it from the directory cache */ 1359 sdev_cache_update(parent, &self, nm, SDEV_CACHE_DELETE); 1360 rw_exit(&parent->sdev_contents); 1361 vn_vfsunlock(vp); 1362 VN_RELE(vp); 1363 1364 /* best effort to clean up the backing store */ 1365 if (SDEV_IS_PERSIST(parent)) { 1366 ASSERT(parent->sdev_attrvp); 1367 error = VOP_RMDIR(parent->sdev_attrvp, nm, 1368 parent->sdev_attrvp, kcred, ct, flags); 1369 1370 if (error) 1371 sdcmn_err2(("sdev_rmdir: cleaning device %s is on" 1372 " disk error %d\n", parent->sdev_path, error)); 1373 if (error == EBUSY) 1374 error = 0; 1375 1376 } 1377 1378 return (error); 1379 } 1380 1381 /* 1382 * read the contents of a symbolic link 1383 */ 1384 static int 1385 sdev_readlink(struct vnode *vp, struct uio *uiop, struct cred *cred, 1386 caller_context_t *ct) 1387 { 1388 struct sdev_node *dv; 1389 int error = 0; 1390 1391 ASSERT(vp->v_type == VLNK); 1392 1393 dv = VTOSDEV(vp); 1394 1395 if (dv->sdev_attrvp) { 1396 /* non-NULL attrvp implys a persisted node at READY state */ 1397 return (VOP_READLINK(dv->sdev_attrvp, uiop, cred, ct)); 1398 } else if (dv->sdev_symlink != NULL) { 1399 /* memory nodes, e.g. local nodes */ 1400 rw_enter(&dv->sdev_contents, RW_READER); 1401 sdcmn_err2(("sdev_readlink link is %s\n", dv->sdev_symlink)); 1402 error = uiomove(dv->sdev_symlink, strlen(dv->sdev_symlink), 1403 UIO_READ, uiop); 1404 rw_exit(&dv->sdev_contents); 1405 return (error); 1406 } 1407 1408 return (ENOENT); 1409 } 1410 1411 /*ARGSUSED4*/ 1412 static int 1413 sdev_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, int *eofp, 1414 caller_context_t *ct, int flags) 1415 { 1416 struct sdev_node *parent = VTOSDEV(dvp); 1417 int error; 1418 1419 /* 1420 * We must check that we have execute access to search the directory -- 1421 * but because our sdev_contents lock is already held as a reader (the 1422 * caller must have done a VOP_RWLOCK()), we call directly into the 1423 * underlying access routine if sdev_attr is non-NULL. 1424 */ 1425 if (parent->sdev_attr != NULL) { 1426 VERIFY(RW_READ_HELD(&parent->sdev_contents)); 1427 1428 if (sdev_unlocked_access(parent, VEXEC, cred) != 0) 1429 return (EACCES); 1430 } else { 1431 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 1432 return (error); 1433 } 1434 1435 ASSERT(parent); 1436 if (!SDEV_IS_GLOBAL(parent)) 1437 prof_filldir(parent); 1438 return (devname_readdir_func(dvp, uiop, cred, eofp, SDEV_BROWSE)); 1439 } 1440 1441 /*ARGSUSED1*/ 1442 static void 1443 sdev_inactive(struct vnode *vp, struct cred *cred, caller_context_t *ct) 1444 { 1445 devname_inactive_func(vp, cred, NULL); 1446 } 1447 1448 /*ARGSUSED2*/ 1449 static int 1450 sdev_fid(struct vnode *vp, struct fid *fidp, caller_context_t *ct) 1451 { 1452 struct sdev_node *dv = VTOSDEV(vp); 1453 struct sdev_fid *sdev_fid; 1454 1455 if (fidp->fid_len < (sizeof (struct sdev_fid) - sizeof (ushort_t))) { 1456 fidp->fid_len = sizeof (struct sdev_fid) - sizeof (ushort_t); 1457 return (ENOSPC); 1458 } 1459 1460 sdev_fid = (struct sdev_fid *)fidp; 1461 bzero(sdev_fid, sizeof (struct sdev_fid)); 1462 sdev_fid->sdevfid_len = 1463 (int)sizeof (struct sdev_fid) - sizeof (ushort_t); 1464 sdev_fid->sdevfid_ino = dv->sdev_ino; 1465 1466 return (0); 1467 } 1468 1469 /* 1470 * This pair of routines bracket all VOP_READ, VOP_WRITE 1471 * and VOP_READDIR requests. The contents lock stops things 1472 * moving around while we're looking at them. 1473 */ 1474 /*ARGSUSED2*/ 1475 static int 1476 sdev_rwlock(struct vnode *vp, int write_flag, caller_context_t *ctp) 1477 { 1478 rw_enter(&VTOSDEV(vp)->sdev_contents, 1479 write_flag ? RW_WRITER : RW_READER); 1480 return (write_flag ? V_WRITELOCK_TRUE : V_WRITELOCK_FALSE); 1481 } 1482 1483 /*ARGSUSED1*/ 1484 static void 1485 sdev_rwunlock(struct vnode *vp, int write_flag, caller_context_t *ctp) 1486 { 1487 rw_exit(&VTOSDEV(vp)->sdev_contents); 1488 } 1489 1490 /*ARGSUSED1*/ 1491 static int 1492 sdev_seek(struct vnode *vp, offset_t ooff, offset_t *noffp, 1493 caller_context_t *ct) 1494 { 1495 struct vnode *attrvp = VTOSDEV(vp)->sdev_attrvp; 1496 1497 ASSERT(vp->v_type != VCHR && 1498 vp->v_type != VBLK && vp->v_type != VLNK); 1499 1500 if (vp->v_type == VDIR) 1501 return (fs_seek(vp, ooff, noffp, ct)); 1502 1503 ASSERT(attrvp); 1504 return (VOP_SEEK(attrvp, ooff, noffp, ct)); 1505 } 1506 1507 /*ARGSUSED1*/ 1508 static int 1509 sdev_frlock(struct vnode *vp, int cmd, struct flock64 *bfp, int flag, 1510 offset_t offset, struct flk_callback *flk_cbp, struct cred *cr, 1511 caller_context_t *ct) 1512 { 1513 int error; 1514 struct sdev_node *dv = VTOSDEV(vp); 1515 1516 ASSERT(dv); 1517 ASSERT(dv->sdev_attrvp); 1518 error = VOP_FRLOCK(dv->sdev_attrvp, cmd, bfp, flag, offset, 1519 flk_cbp, cr, ct); 1520 1521 return (error); 1522 } 1523 1524 static int 1525 sdev_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr, 1526 caller_context_t *ct) 1527 { 1528 switch (cmd) { 1529 case _PC_ACL_ENABLED: 1530 *valp = SDEV_ACL_FLAVOR(vp); 1531 return (0); 1532 } 1533 1534 return (fs_pathconf(vp, cmd, valp, cr, ct)); 1535 } 1536 1537 vnodeops_t *sdev_vnodeops; 1538 1539 const fs_operation_def_t sdev_vnodeops_tbl[] = { 1540 { VOPNAME_OPEN, { .vop_open = sdev_open } }, 1541 { VOPNAME_CLOSE, { .vop_close = sdev_close } }, 1542 { VOPNAME_READ, { .vop_read = sdev_read } }, 1543 { VOPNAME_WRITE, { .vop_write = sdev_write } }, 1544 { VOPNAME_IOCTL, { .vop_ioctl = sdev_ioctl } }, 1545 { VOPNAME_GETATTR, { .vop_getattr = sdev_getattr } }, 1546 { VOPNAME_SETATTR, { .vop_setattr = sdev_setattr } }, 1547 { VOPNAME_ACCESS, { .vop_access = sdev_access } }, 1548 { VOPNAME_LOOKUP, { .vop_lookup = sdev_lookup } }, 1549 { VOPNAME_CREATE, { .vop_create = sdev_create } }, 1550 { VOPNAME_RENAME, { .vop_rename = sdev_rename } }, 1551 { VOPNAME_REMOVE, { .vop_remove = sdev_remove } }, 1552 { VOPNAME_MKDIR, { .vop_mkdir = sdev_mkdir } }, 1553 { VOPNAME_RMDIR, { .vop_rmdir = sdev_rmdir } }, 1554 { VOPNAME_READDIR, { .vop_readdir = sdev_readdir } }, 1555 { VOPNAME_SYMLINK, { .vop_symlink = sdev_symlink } }, 1556 { VOPNAME_READLINK, { .vop_readlink = sdev_readlink } }, 1557 { VOPNAME_INACTIVE, { .vop_inactive = sdev_inactive } }, 1558 { VOPNAME_FID, { .vop_fid = sdev_fid } }, 1559 { VOPNAME_RWLOCK, { .vop_rwlock = sdev_rwlock } }, 1560 { VOPNAME_RWUNLOCK, { .vop_rwunlock = sdev_rwunlock } }, 1561 { VOPNAME_SEEK, { .vop_seek = sdev_seek } }, 1562 { VOPNAME_FRLOCK, { .vop_frlock = sdev_frlock } }, 1563 { VOPNAME_PATHCONF, { .vop_pathconf = sdev_pathconf } }, 1564 { VOPNAME_SETSECATTR, { .vop_setsecattr = sdev_setsecattr } }, 1565 { VOPNAME_GETSECATTR, { .vop_getsecattr = sdev_getsecattr } }, 1566 { NULL, { NULL } } 1567 }; 1568 1569 int sdev_vnodeops_tbl_size = sizeof (sdev_vnodeops_tbl);