1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/param.h> 26 #include <sys/errno.h> 27 #include <sys/vfs.h> 28 #include <sys/vfs_opreg.h> 29 #include <sys/vnode.h> 30 #include <sys/uio.h> 31 #include <sys/pathname.h> 32 #include <sys/kmem.h> 33 #include <sys/cred.h> 34 #include <sys/statvfs.h> 35 #include <sys/fs/lofs_info.h> 36 #include <sys/fs/lofs_node.h> 37 #include <sys/mount.h> 38 #include <sys/mntent.h> 39 #include <sys/mkdev.h> 40 #include <sys/priv.h> 41 #include <sys/sysmacros.h> 42 #include <sys/systm.h> 43 #include <sys/cmn_err.h> 44 #include <sys/policy.h> 45 #include <sys/tsol/label.h> 46 #include "fs/fs_subr.h" 47 48 /* 49 * This is the loadable module wrapper. 50 */ 51 #include <sys/modctl.h> 52 53 static mntopts_t lofs_mntopts; 54 55 static int lofsinit(int, char *); 56 57 static vfsdef_t vfw = { 58 VFSDEF_VERSION, 59 "lofs", 60 lofsinit, 61 VSW_HASPROTO|VSW_STATS|VSW_ZMOUNT, 62 &lofs_mntopts 63 }; 64 65 /* 66 * LOFS mount options table 67 */ 68 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 69 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 70 static char *sub_cancel[] = { MNTOPT_LOFS_NOSUB, NULL }; 71 static char *nosub_cancel[] = { MNTOPT_LOFS_SUB, NULL }; 72 73 static mntopt_t mntopts[] = { 74 /* 75 * option name cancel option default arg flags 76 * private data 77 */ 78 { MNTOPT_XATTR, xattr_cancel, NULL, 0, 79 (void *)0 }, 80 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, 81 (void *)0 }, 82 { MNTOPT_LOFS_SUB, sub_cancel, NULL, 0, 83 (void *)0 }, 84 { MNTOPT_LOFS_NOSUB, nosub_cancel, NULL, 0, 85 (void *)0 }, 86 }; 87 88 static mntopts_t lofs_mntopts = { 89 sizeof (mntopts) / sizeof (mntopt_t), 90 mntopts 91 }; 92 93 /* 94 * Module linkage information for the kernel. 95 */ 96 97 static struct modlfs modlfs = { 98 &mod_fsops, "filesystem for lofs", &vfw 99 }; 100 101 static struct modlinkage modlinkage = { 102 MODREV_1, (void *)&modlfs, NULL 103 }; 104 105 /* 106 * This is the module initialization routine. 107 */ 108 109 int 110 _init(void) 111 { 112 int status; 113 114 lofs_subrinit(); 115 status = mod_install(&modlinkage); 116 if (status != 0) { 117 /* 118 * Cleanup previously initialized work. 119 */ 120 lofs_subrfini(); 121 } 122 123 return (status); 124 } 125 126 /* 127 * Don't allow the lofs module to be unloaded for now. 128 * There is a memory leak if it gets unloaded. 129 */ 130 131 int 132 _fini(void) 133 { 134 return (EBUSY); 135 } 136 137 int 138 _info(struct modinfo *modinfop) 139 { 140 return (mod_info(&modlinkage, modinfop)); 141 } 142 143 144 static int lofsfstype; 145 vfsops_t *lo_vfsops; 146 147 /* 148 * lo mount vfsop 149 * Set up mount info record and attach it to vfs struct. 150 */ 151 /*ARGSUSED*/ 152 static int 153 lo_mount(struct vfs *vfsp, 154 struct vnode *vp, 155 struct mounta *uap, 156 struct cred *cr) 157 { 158 int error; 159 struct vnode *srootvp = NULL; /* the server's root */ 160 struct vnode *realrootvp; 161 struct loinfo *li; 162 int nodev; 163 164 nodev = vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL); 165 166 if ((error = secpolicy_fs_mount(cr, vp, vfsp)) != 0) 167 return (EPERM); 168 169 /* 170 * Loopback devices which get "nodevices" added can be done without 171 * "nodevices" set because we cannot import devices into a zone 172 * with loopback. Note that we have all zone privileges when 173 * this happens; if not, we'd have gotten "nosuid". 174 */ 175 if (!nodev && vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 176 vfs_setmntopt(vfsp, MNTOPT_DEVICES, NULL, VFS_NODISPLAY); 177 178 mutex_enter(&vp->v_lock); 179 if (!(uap->flags & MS_OVERLAY) && 180 (vp->v_count != 1 || (vp->v_flag & VROOT))) { 181 mutex_exit(&vp->v_lock); 182 return (EBUSY); 183 } 184 mutex_exit(&vp->v_lock); 185 186 /* 187 * Find real root, and make vfs point to real vfs 188 */ 189 190 if (error = lookupname(uap->spec, (uap->flags & MS_SYSSPACE) ? 191 UIO_SYSSPACE : UIO_USERSPACE, FOLLOW, NULLVPP, &realrootvp)) 192 return (error); 193 194 /* 195 * Enforce MAC policy if needed. 196 * 197 * Loopback mounts must not allow writing up. The dominance test 198 * is intended to prevent a global zone caller from accidentally 199 * creating write-up conditions between two labeled zones. 200 * Local zones can't violate MAC on their own without help from 201 * the global zone because they can't name a pathname that 202 * they don't already have. 203 * 204 * The special case check for the NET_MAC_AWARE process flag is 205 * to support the case of the automounter in the global zone. We 206 * permit automounting of local zone directories such as home 207 * directories, into the global zone as required by setlabel, 208 * zonecopy, and saving of desktop sessions. Such mounts are 209 * trusted not to expose the contents of one zone's directories 210 * to another by leaking them through the global zone. 211 */ 212 if (is_system_labeled() && crgetzoneid(cr) == GLOBAL_ZONEID) { 213 char specname[MAXPATHLEN]; 214 zone_t *from_zptr; 215 zone_t *to_zptr; 216 217 if (vnodetopath(NULL, realrootvp, specname, 218 sizeof (specname), CRED()) != 0) { 219 VN_RELE(realrootvp); 220 return (EACCES); 221 } 222 223 from_zptr = zone_find_by_path(specname); 224 to_zptr = zone_find_by_path(refstr_value(vfsp->vfs_mntpt)); 225 226 /* 227 * Special case for scratch zones used for Live Upgrade: 228 * this is used to mount the zone's root from /root to /a in 229 * the scratch zone. As with the other special case, this 230 * appears to be outside of the zone because it's not under 231 * the zone rootpath, which is $ZONEPATH/lu in the scratch 232 * zone case. 233 */ 234 235 if (from_zptr != to_zptr && 236 !(to_zptr->zone_flags & ZF_IS_SCRATCH)) { 237 /* 238 * We know at this point that the labels aren't equal 239 * because the zone pointers aren't equal, and zones 240 * can't share a label. 241 * 242 * If the source is the global zone then making 243 * it available to a local zone must be done in 244 * read-only mode as the label will become admin_low. 245 * 246 * If it is a mount between local zones then if 247 * the current process is in the global zone and has 248 * the NET_MAC_AWARE flag, then regular read-write 249 * access is allowed. If it's in some other zone, but 250 * the label on the mount point dominates the original 251 * source, then allow the mount as read-only 252 * ("read-down"). 253 */ 254 if (from_zptr->zone_id == GLOBAL_ZONEID) { 255 /* make the mount read-only */ 256 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 257 } else { /* cross-zone mount */ 258 if (to_zptr->zone_id == GLOBAL_ZONEID && 259 /* LINTED: no consequent */ 260 getpflags(NET_MAC_AWARE, cr) != 0) { 261 /* Allow the mount as read-write */ 262 } else if (bldominates( 263 label2bslabel(to_zptr->zone_slabel), 264 label2bslabel(from_zptr->zone_slabel))) { 265 /* make the mount read-only */ 266 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 267 } else { 268 VN_RELE(realrootvp); 269 zone_rele(to_zptr); 270 zone_rele(from_zptr); 271 return (EACCES); 272 } 273 } 274 } 275 zone_rele(to_zptr); 276 zone_rele(from_zptr); 277 } 278 279 /* 280 * realrootvp may be an AUTOFS node, in which case we perform a 281 * VOP_ACCESS() to trigger the mount of the intended filesystem. 282 * This causes a loopback mount of the intended filesystem instead 283 * of the AUTOFS filesystem. 284 * 285 * If a lofs mount creates a mount loop (such that a lofs vfs is 286 * mounted on an autofs node and that lofs vfs points back to the 287 * autofs node which it is mounted on) then a VOP_ACCESS call will 288 * create a deadlock. Once this deadlock is released, VOP_ACCESS will 289 * return EINTR. In such a case we don't want the lofs vfs to be 290 * created as the loop could panic the system. 291 */ 292 if ((error = VOP_ACCESS(realrootvp, 0, 0, cr, NULL)) != 0) { 293 VN_RELE(realrootvp); 294 return (error); 295 } 296 297 /* 298 * We're interested in the top most filesystem. 299 * This is specially important when uap->spec is a trigger 300 * AUTOFS node, since we're really interested in mounting the 301 * filesystem AUTOFS mounted as result of the VOP_ACCESS() 302 * call not the AUTOFS node itself. 303 */ 304 if (vn_mountedvfs(realrootvp) != NULL) { 305 if (error = traverse(&realrootvp)) { 306 VN_RELE(realrootvp); 307 return (error); 308 } 309 } 310 311 /* 312 * Allocate a vfs info struct and attach it 313 */ 314 li = kmem_zalloc(sizeof (struct loinfo), KM_SLEEP); 315 li->li_realvfs = realrootvp->v_vfsp; 316 li->li_mountvfs = vfsp; 317 318 /* 319 * Set mount flags to be inherited by loopback vfs's 320 */ 321 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) { 322 li->li_mflag |= VFS_RDONLY; 323 } 324 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) { 325 li->li_mflag |= (VFS_NOSETUID|VFS_NODEVICES); 326 } 327 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) { 328 li->li_mflag |= VFS_NODEVICES; 329 } 330 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) { 331 li->li_mflag |= VFS_NOSETUID; 332 } 333 /* 334 * Permissive flags are added to the "deny" bitmap. 335 */ 336 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) { 337 li->li_dflag |= VFS_XATTR; 338 } 339 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) { 340 li->li_dflag |= VFS_NBMAND; 341 } 342 343 /* 344 * Propagate inheritable mount flags from the real vfs. 345 */ 346 if ((li->li_realvfs->vfs_flag & VFS_RDONLY) && 347 !vfs_optionisset(vfsp, MNTOPT_RO, NULL)) 348 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 349 VFS_NODISPLAY); 350 if ((li->li_realvfs->vfs_flag & VFS_NOSETUID) && 351 !vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) 352 vfs_setmntopt(vfsp, MNTOPT_NOSETUID, NULL, 353 VFS_NODISPLAY); 354 if ((li->li_realvfs->vfs_flag & VFS_NODEVICES) && 355 !vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) 356 vfs_setmntopt(vfsp, MNTOPT_NODEVICES, NULL, 357 VFS_NODISPLAY); 358 /* 359 * Permissive flags such as VFS_XATTR, as opposed to restrictive flags 360 * such as VFS_RDONLY, are handled differently. An explicit 361 * MNTOPT_NOXATTR should override the underlying filesystem's VFS_XATTR. 362 */ 363 if ((li->li_realvfs->vfs_flag & VFS_XATTR) && 364 !vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL) && 365 !vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) 366 vfs_setmntopt(vfsp, MNTOPT_XATTR, NULL, 367 VFS_NODISPLAY); 368 if ((li->li_realvfs->vfs_flag & VFS_NBMAND) && 369 !vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL) && 370 !vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) 371 vfs_setmntopt(vfsp, MNTOPT_NBMAND, NULL, 372 VFS_NODISPLAY); 373 374 li->li_refct = 0; 375 vfsp->vfs_data = (caddr_t)li; 376 vfsp->vfs_bcount = 0; 377 vfsp->vfs_fstype = lofsfstype; 378 vfsp->vfs_bsize = li->li_realvfs->vfs_bsize; 379 380 vfsp->vfs_dev = li->li_realvfs->vfs_dev; 381 vfsp->vfs_fsid.val[0] = li->li_realvfs->vfs_fsid.val[0]; 382 vfsp->vfs_fsid.val[1] = li->li_realvfs->vfs_fsid.val[1]; 383 384 if (vfs_optionisset(vfsp, MNTOPT_LOFS_NOSUB, NULL)) { 385 li->li_flag |= LO_NOSUB; 386 } 387 388 /* 389 * Propagate any VFS features 390 */ 391 392 vfs_propagate_features(li->li_realvfs, vfsp); 393 394 /* 395 * Setup the hashtable. If the root of this mount isn't a directory, 396 * there's no point in allocating a large hashtable. A table with one 397 * bucket is sufficient. 398 */ 399 if (realrootvp->v_type != VDIR) 400 lsetup(li, 1); 401 else 402 lsetup(li, 0); 403 404 /* 405 * Make the root vnode 406 */ 407 srootvp = makelonode(realrootvp, li, 0); 408 srootvp->v_flag |= VROOT; 409 li->li_rootvp = srootvp; 410 411 #ifdef LODEBUG 412 lo_dprint(4, "lo_mount: vfs %p realvfs %p root %p realroot %p li %p\n", 413 vfsp, li->li_realvfs, srootvp, realrootvp, li); 414 #endif 415 return (0); 416 } 417 418 /* 419 * Undo loopback mount 420 */ 421 static int 422 lo_unmount(struct vfs *vfsp, int flag, struct cred *cr) 423 { 424 struct loinfo *li; 425 426 if (secpolicy_fs_unmount(cr, vfsp) != 0) 427 return (EPERM); 428 429 /* 430 * Forced unmount is not supported by this file system 431 * and thus, ENOTSUP, is being returned. 432 */ 433 if (flag & MS_FORCE) 434 return (ENOTSUP); 435 436 li = vtoli(vfsp); 437 #ifdef LODEBUG 438 lo_dprint(4, "lo_unmount(%p) li %p\n", vfsp, li); 439 #endif 440 if (li->li_refct != 1 || li->li_rootvp->v_count != 1) { 441 #ifdef LODEBUG 442 lo_dprint(4, "refct %d v_ct %d\n", li->li_refct, 443 li->li_rootvp->v_count); 444 #endif 445 return (EBUSY); 446 } 447 VN_RELE(li->li_rootvp); 448 return (0); 449 } 450 451 /* 452 * Find root of lofs mount. 453 */ 454 static int 455 lo_root(struct vfs *vfsp, struct vnode **vpp) 456 { 457 *vpp = vtoli(vfsp)->li_rootvp; 458 #ifdef LODEBUG 459 lo_dprint(4, "lo_root(0x%p) = %p\n", vfsp, *vpp); 460 #endif 461 /* 462 * If the root of the filesystem is a special file, return the specvp 463 * version of the vnode. We don't save the specvp vnode in our 464 * hashtable since that's exclusively for lnodes. 465 */ 466 if (IS_DEVVP(*vpp)) { 467 struct vnode *svp; 468 469 svp = specvp(*vpp, (*vpp)->v_rdev, (*vpp)->v_type, kcred); 470 if (svp == NULL) 471 return (ENOSYS); 472 *vpp = svp; 473 } else { 474 VN_HOLD(*vpp); 475 } 476 477 return (0); 478 } 479 480 /* 481 * Get file system statistics. 482 */ 483 static int 484 lo_statvfs(register struct vfs *vfsp, struct statvfs64 *sbp) 485 { 486 vnode_t *realrootvp; 487 488 #ifdef LODEBUG 489 lo_dprint(4, "lostatvfs %p\n", vfsp); 490 #endif 491 /* 492 * Using realrootvp->v_vfsp (instead of the realvfsp that was 493 * cached) is necessary to make lofs work woth forced UFS unmounts. 494 * In the case of a forced unmount, UFS stores a set of dummy vfsops 495 * in all the (i)vnodes in the filesystem. The dummy ops simply 496 * returns back EIO. 497 */ 498 (void) lo_realvfs(vfsp, &realrootvp); 499 if (realrootvp != NULL) 500 return (VFS_STATVFS(realrootvp->v_vfsp, sbp)); 501 else 502 return (EIO); 503 } 504 505 /* 506 * LOFS doesn't have any data or metadata to flush, pending I/O on the 507 * underlying filesystem will be flushed when such filesystem is synched. 508 */ 509 /* ARGSUSED */ 510 static int 511 lo_sync(struct vfs *vfsp, 512 short flag, 513 struct cred *cr) 514 { 515 #ifdef LODEBUG 516 lo_dprint(4, "lo_sync: %p\n", vfsp); 517 #endif 518 return (0); 519 } 520 521 /* 522 * Obtain the vnode from the underlying filesystem. 523 */ 524 static int 525 lo_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 526 { 527 vnode_t *realrootvp; 528 529 #ifdef LODEBUG 530 lo_dprint(4, "lo_vget: %p\n", vfsp); 531 #endif 532 (void) lo_realvfs(vfsp, &realrootvp); 533 if (realrootvp != NULL) 534 return (VFS_VGET(realrootvp->v_vfsp, vpp, fidp)); 535 else 536 return (EIO); 537 } 538 539 /* 540 * Free mount-specific data. 541 */ 542 static void 543 lo_freevfs(struct vfs *vfsp) 544 { 545 struct loinfo *li = vtoli(vfsp); 546 547 ldestroy(li); 548 kmem_free(li, sizeof (struct loinfo)); 549 } 550 551 static int 552 lofsinit(int fstyp, char *name) 553 { 554 static const fs_operation_def_t lo_vfsops_template[] = { 555 VFSNAME_MOUNT, { .vfs_mount = lo_mount }, 556 VFSNAME_UNMOUNT, { .vfs_unmount = lo_unmount }, 557 VFSNAME_ROOT, { .vfs_root = lo_root }, 558 VFSNAME_STATVFS, { .vfs_statvfs = lo_statvfs }, 559 VFSNAME_SYNC, { .vfs_sync = lo_sync }, 560 VFSNAME_VGET, { .vfs_vget = lo_vget }, 561 VFSNAME_FREEVFS, { .vfs_freevfs = lo_freevfs }, 562 NULL, NULL 563 }; 564 int error; 565 566 error = vfs_setfsops(fstyp, lo_vfsops_template, &lo_vfsops); 567 if (error != 0) { 568 cmn_err(CE_WARN, "lofsinit: bad vfs ops template"); 569 return (error); 570 } 571 572 error = vn_make_ops(name, lo_vnodeops_template, &lo_vnodeops); 573 if (error != 0) { 574 (void) vfs_freevfsops_by_type(fstyp); 575 cmn_err(CE_WARN, "lofsinit: bad vnode ops template"); 576 return (error); 577 } 578 579 lofsfstype = fstyp; 580 581 return (0); 582 }