1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #include <sys/types.h> 40 #include <sys/t_lock.h> 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/bitmap.h> 44 #include <sys/sysmacros.h> 45 #include <sys/kmem.h> 46 #include <sys/signal.h> 47 #include <sys/user.h> 48 #include <sys/proc.h> 49 #include <sys/disp.h> 50 #include <sys/buf.h> 51 #include <sys/pathname.h> 52 #include <sys/vfs.h> 53 #include <sys/vfs_opreg.h> 54 #include <sys/vnode.h> 55 #include <sys/file.h> 56 #include <sys/atomic.h> 57 #include <sys/uio.h> 58 #include <sys/dkio.h> 59 #include <sys/cred.h> 60 #include <sys/conf.h> 61 #include <sys/dnlc.h> 62 #include <sys/kstat.h> 63 #include <sys/acl.h> 64 #include <sys/fs/ufs_fsdir.h> 65 #include <sys/fs/ufs_fs.h> 66 #include <sys/fs/ufs_inode.h> 67 #include <sys/fs/ufs_mount.h> 68 #include <sys/fs/ufs_acl.h> 69 #include <sys/fs/ufs_panic.h> 70 #include <sys/fs/ufs_bio.h> 71 #include <sys/fs/ufs_quota.h> 72 #include <sys/fs/ufs_log.h> 73 #undef NFS 74 #include <sys/statvfs.h> 75 #include <sys/mount.h> 76 #include <sys/mntent.h> 77 #include <sys/swap.h> 78 #include <sys/errno.h> 79 #include <sys/debug.h> 80 #include "fs/fs_subr.h" 81 #include <sys/cmn_err.h> 82 #include <sys/dnlc.h> 83 #include <sys/fssnap_if.h> 84 #include <sys/sunddi.h> 85 #include <sys/bootconf.h> 86 #include <sys/policy.h> 87 #include <sys/zone.h> 88 89 /* 90 * This is the loadable module wrapper. 91 */ 92 #include <sys/modctl.h> 93 94 int ufsfstype; 95 vfsops_t *ufs_vfsops; 96 static int ufsinit(int, char *); 97 static int mountfs(); 98 extern int highbit(); 99 extern struct instats ins; 100 extern struct vnode *common_specvp(struct vnode *vp); 101 extern vfs_t EIO_vfs; 102 103 struct dquot *dquot, *dquotNDQUOT; 104 105 /* 106 * Cylinder group summary information handling tunable. 107 * This defines when these deltas get logged. 108 * If the number of cylinders in the file system is over the 109 * tunable then we log csum updates. Otherwise the updates are only 110 * done for performance on unmount. After a panic they can be 111 * quickly constructed during mounting. See ufs_construct_si() 112 * called from ufs_getsummaryinfo(). 113 * 114 * This performance feature can of course be disabled by setting 115 * ufs_ncg_log to 0, and fully enabled by setting it to 0xffffffff. 116 */ 117 #define UFS_LOG_NCG_DEFAULT 10000 118 uint32_t ufs_ncg_log = UFS_LOG_NCG_DEFAULT; 119 120 /* 121 * ufs_clean_root indicates whether the root fs went down cleanly 122 */ 123 static int ufs_clean_root = 0; 124 125 /* 126 * UFS Mount options table 127 */ 128 static char *intr_cancel[] = { MNTOPT_NOINTR, NULL }; 129 static char *nointr_cancel[] = { MNTOPT_INTR, NULL }; 130 static char *forcedirectio_cancel[] = { MNTOPT_NOFORCEDIRECTIO, NULL }; 131 static char *noforcedirectio_cancel[] = { MNTOPT_FORCEDIRECTIO, NULL }; 132 static char *largefiles_cancel[] = { MNTOPT_NOLARGEFILES, NULL }; 133 static char *nolargefiles_cancel[] = { MNTOPT_LARGEFILES, NULL }; 134 static char *logging_cancel[] = { MNTOPT_NOLOGGING, NULL }; 135 static char *nologging_cancel[] = { MNTOPT_LOGGING, NULL }; 136 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL }; 137 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL }; 138 static char *quota_cancel[] = { MNTOPT_NOQUOTA, NULL }; 139 static char *noquota_cancel[] = { MNTOPT_QUOTA, NULL }; 140 static char *dfratime_cancel[] = { MNTOPT_NODFRATIME, NULL }; 141 static char *nodfratime_cancel[] = { MNTOPT_DFRATIME, NULL }; 142 143 static mntopt_t mntopts[] = { 144 /* 145 * option name cancel option default arg flags 146 * ufs arg flag 147 */ 148 { MNTOPT_INTR, intr_cancel, NULL, MO_DEFAULT, 149 (void *)0 }, 150 { MNTOPT_NOINTR, nointr_cancel, NULL, 0, 151 (void *)UFSMNT_NOINTR }, 152 { MNTOPT_SYNCDIR, NULL, NULL, 0, 153 (void *)UFSMNT_SYNCDIR }, 154 { MNTOPT_FORCEDIRECTIO, forcedirectio_cancel, NULL, 0, 155 (void *)UFSMNT_FORCEDIRECTIO }, 156 { MNTOPT_NOFORCEDIRECTIO, noforcedirectio_cancel, NULL, 0, 157 (void *)UFSMNT_NOFORCEDIRECTIO }, 158 { MNTOPT_NOSETSEC, NULL, NULL, 0, 159 (void *)UFSMNT_NOSETSEC }, 160 { MNTOPT_LARGEFILES, largefiles_cancel, NULL, MO_DEFAULT, 161 (void *)UFSMNT_LARGEFILES }, 162 { MNTOPT_NOLARGEFILES, nolargefiles_cancel, NULL, 0, 163 (void *)0 }, 164 { MNTOPT_LOGGING, logging_cancel, NULL, MO_TAG, 165 (void *)UFSMNT_LOGGING }, 166 { MNTOPT_NOLOGGING, nologging_cancel, NULL, 167 MO_NODISPLAY|MO_DEFAULT|MO_TAG, (void *)0 }, 168 { MNTOPT_QUOTA, quota_cancel, NULL, MO_IGNORE, 169 (void *)0 }, 170 { MNTOPT_NOQUOTA, noquota_cancel, NULL, 171 MO_NODISPLAY|MO_DEFAULT, (void *)0 }, 172 { MNTOPT_GLOBAL, NULL, NULL, 0, 173 (void *)0 }, 174 { MNTOPT_XATTR, xattr_cancel, NULL, MO_DEFAULT, 175 (void *)0 }, 176 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, 177 (void *)0 }, 178 { MNTOPT_NOATIME, NULL, NULL, 0, 179 (void *)UFSMNT_NOATIME }, 180 { MNTOPT_DFRATIME, dfratime_cancel, NULL, 0, 181 (void *)0 }, 182 { MNTOPT_NODFRATIME, nodfratime_cancel, NULL, 183 MO_NODISPLAY|MO_DEFAULT, (void *)UFSMNT_NODFRATIME }, 184 { MNTOPT_ONERROR, NULL, UFSMNT_ONERROR_PANIC_STR, 185 MO_DEFAULT|MO_HASVALUE, (void *)0 }, 186 }; 187 188 static mntopts_t ufs_mntopts = { 189 sizeof (mntopts) / sizeof (mntopt_t), 190 mntopts 191 }; 192 193 static vfsdef_t vfw = { 194 VFSDEF_VERSION, 195 "ufs", 196 ufsinit, 197 VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI, 198 &ufs_mntopts 199 }; 200 201 /* 202 * Module linkage information for the kernel. 203 */ 204 extern struct mod_ops mod_fsops; 205 206 static struct modlfs modlfs = { 207 &mod_fsops, "filesystem for ufs", &vfw 208 }; 209 210 static struct modlinkage modlinkage = { 211 MODREV_1, { (void *)&modlfs, NULL } 212 }; 213 214 /* 215 * An attempt has been made to make this module unloadable. In order to 216 * test it, we need a system in which the root fs is NOT ufs. THIS HAS NOT 217 * BEEN DONE 218 */ 219 220 extern kstat_t *ufs_inode_kstat; 221 extern uint_t ufs_lockfs_key; 222 extern void ufs_lockfs_tsd_destructor(void *); 223 extern uint_t bypass_snapshot_throttle_key; 224 225 int 226 _init(void) 227 { 228 /* 229 * Create an index into the per thread array so that any thread doing 230 * VOP will have a lockfs mark on it. 231 */ 232 tsd_create(&ufs_lockfs_key, ufs_lockfs_tsd_destructor); 233 tsd_create(&bypass_snapshot_throttle_key, NULL); 234 return (mod_install(&modlinkage)); 235 } 236 237 int 238 _fini(void) 239 { 240 return (EBUSY); 241 } 242 243 int 244 _info(struct modinfo *modinfop) 245 { 246 return (mod_info(&modlinkage, modinfop)); 247 } 248 249 extern struct vnode *makespecvp(dev_t dev, vtype_t type); 250 251 extern kmutex_t ufs_scan_lock; 252 253 static int mountfs(struct vfs *, enum whymountroot, struct vnode *, char *, 254 struct cred *, int, void *, int); 255 256 257 static int 258 ufs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap, 259 struct cred *cr) 260 261 { 262 char *data = uap->dataptr; 263 int datalen = uap->datalen; 264 dev_t dev; 265 struct vnode *lvp = NULL; 266 struct vnode *svp = NULL; 267 struct pathname dpn; 268 int error; 269 enum whymountroot why = ROOT_INIT; 270 struct ufs_args args; 271 int oflag, aflag; 272 int fromspace = (uap->flags & MS_SYSSPACE) ? 273 UIO_SYSSPACE : UIO_USERSPACE; 274 275 if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0) 276 return (error); 277 278 if (mvp->v_type != VDIR) 279 return (ENOTDIR); 280 281 mutex_enter(&mvp->v_lock); 282 if ((uap->flags & MS_REMOUNT) == 0 && 283 (uap->flags & MS_OVERLAY) == 0 && 284 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) { 285 mutex_exit(&mvp->v_lock); 286 return (EBUSY); 287 } 288 mutex_exit(&mvp->v_lock); 289 290 /* 291 * Get arguments 292 */ 293 bzero(&args, sizeof (args)); 294 if ((uap->flags & MS_DATA) && data != NULL && datalen != 0) { 295 int copy_result = 0; 296 297 if (datalen > sizeof (args)) 298 return (EINVAL); 299 if (uap->flags & MS_SYSSPACE) 300 bcopy(data, &args, datalen); 301 else 302 copy_result = copyin(data, &args, datalen); 303 if (copy_result) 304 return (EFAULT); 305 datalen = sizeof (struct ufs_args); 306 } else { 307 datalen = 0; 308 } 309 310 if ((vfsp->vfs_flag & VFS_RDONLY) != 0 || 311 (uap->flags & MS_RDONLY) != 0) { 312 oflag = FREAD; 313 aflag = VREAD; 314 } else { 315 oflag = FREAD | FWRITE; 316 aflag = VREAD | VWRITE; 317 } 318 319 /* 320 * Read in the mount point pathname 321 * (so we can record the directory the file system was last mounted on). 322 */ 323 if (error = pn_get(uap->dir, fromspace, &dpn)) 324 return (error); 325 326 /* 327 * Resolve path name of special file being mounted. 328 */ 329 if (error = lookupname(uap->spec, fromspace, FOLLOW, NULL, &svp)) { 330 pn_free(&dpn); 331 return (error); 332 } 333 334 error = vfs_get_lofi(vfsp, &lvp); 335 336 if (error > 0) { 337 VN_RELE(svp); 338 pn_free(&dpn); 339 return (error); 340 } else if (error == 0) { 341 dev = lvp->v_rdev; 342 343 if (getmajor(dev) >= devcnt) { 344 error = ENXIO; 345 goto out; 346 } 347 } else { 348 dev = svp->v_rdev; 349 350 if (svp->v_type != VBLK) { 351 VN_RELE(svp); 352 pn_free(&dpn); 353 return (ENOTBLK); 354 } 355 356 if (getmajor(dev) >= devcnt) { 357 error = ENXIO; 358 goto out; 359 } 360 361 /* 362 * In SunCluster, requests to a global device are 363 * satisfied by a local device. We substitute the global 364 * pxfs node with a local spec node here. 365 */ 366 if (IS_PXFSVP(svp)) { 367 ASSERT(lvp == NULL); 368 VN_RELE(svp); 369 svp = makespecvp(dev, VBLK); 370 } 371 372 if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0) { 373 VN_RELE(svp); 374 pn_free(&dpn); 375 return (error); 376 } 377 } 378 379 if (uap->flags & MS_REMOUNT) 380 why = ROOT_REMOUNT; 381 382 /* 383 * Open device/file mounted on. We need this to check whether 384 * the caller has sufficient rights to access the resource in 385 * question. When bio is fixed for vnodes this can all be vnode 386 * operations. 387 */ 388 if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0) 389 goto out; 390 391 /* 392 * Ensure that this device isn't already mounted or in progress on a 393 * mount unless this is a REMOUNT request or we are told to suppress 394 * mount checks. Global mounts require special handling. 395 */ 396 if ((uap->flags & MS_NOCHECK) == 0) { 397 if ((uap->flags & MS_GLOBAL) == 0 && 398 vfs_devmounting(dev, vfsp)) { 399 error = EBUSY; 400 goto out; 401 } 402 if (vfs_devismounted(dev)) { 403 if ((uap->flags & MS_REMOUNT) == 0) { 404 error = EBUSY; 405 goto out; 406 } 407 } 408 } 409 410 /* 411 * If the device is a tape, mount it read only 412 */ 413 if (devopsp[getmajor(dev)]->devo_cb_ops->cb_flag & D_TAPE) { 414 vfsp->vfs_flag |= VFS_RDONLY; 415 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0); 416 } 417 if (uap->flags & MS_RDONLY) 418 vfsp->vfs_flag |= VFS_RDONLY; 419 420 /* 421 * Mount the filesystem, free the device vnode on error. 422 */ 423 error = mountfs(vfsp, why, lvp != NULL ? lvp : svp, 424 dpn.pn_path, cr, 0, &args, datalen); 425 426 if (error == 0) { 427 vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS); 428 429 /* 430 * If lofi, drop our reference to the original file. 431 */ 432 if (lvp != NULL) 433 VN_RELE(svp); 434 } 435 436 out: 437 pn_free(&dpn); 438 439 if (error) { 440 if (lvp != NULL) 441 VN_RELE(lvp); 442 if (svp != NULL) 443 VN_RELE(svp); 444 } 445 return (error); 446 } 447 448 /* 449 * Mount root file system. 450 * "why" is ROOT_INIT on initial call ROOT_REMOUNT if called to 451 * remount the root file system, and ROOT_UNMOUNT if called to 452 * unmount the root (e.g., as part of a system shutdown). 453 * 454 * XXX - this may be partially machine-dependent; it, along with the VFS_SWAPVP 455 * operation, goes along with auto-configuration. A mechanism should be 456 * provided by which machine-INdependent code in the kernel can say "get me the 457 * right root file system" and "get me the right initial swap area", and have 458 * that done in what may well be a machine-dependent fashion. 459 * Unfortunately, it is also file-system-type dependent (NFS gets it via 460 * bootparams calls, UFS gets it from various and sundry machine-dependent 461 * mechanisms, as SPECFS does for swap). 462 */ 463 static int 464 ufs_mountroot(struct vfs *vfsp, enum whymountroot why) 465 { 466 struct fs *fsp; 467 int error; 468 static int ufsrootdone = 0; 469 dev_t rootdev; 470 struct vnode *vp; 471 struct vnode *devvp = 0; 472 int ovflags; 473 int doclkset; 474 ufsvfs_t *ufsvfsp; 475 476 if (why == ROOT_INIT) { 477 if (ufsrootdone++) 478 return (EBUSY); 479 rootdev = getrootdev(); 480 if (rootdev == (dev_t)NODEV) 481 return (ENODEV); 482 vfsp->vfs_dev = rootdev; 483 vfsp->vfs_flag |= VFS_RDONLY; 484 } else if (why == ROOT_REMOUNT) { 485 vp = ((struct ufsvfs *)vfsp->vfs_data)->vfs_devvp; 486 (void) dnlc_purge_vfsp(vfsp, 0); 487 vp = common_specvp(vp); 488 (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_INVAL, 489 CRED(), NULL); 490 (void) bfinval(vfsp->vfs_dev, 0); 491 fsp = getfs(vfsp); 492 493 ovflags = vfsp->vfs_flag; 494 vfsp->vfs_flag &= ~VFS_RDONLY; 495 vfsp->vfs_flag |= VFS_REMOUNT; 496 rootdev = vfsp->vfs_dev; 497 } else if (why == ROOT_UNMOUNT) { 498 if (vfs_lock(vfsp) == 0) { 499 (void) ufs_flush(vfsp); 500 /* 501 * Mark the log as fully rolled 502 */ 503 ufsvfsp = (ufsvfs_t *)vfsp->vfs_data; 504 fsp = ufsvfsp->vfs_fs; 505 if (TRANS_ISTRANS(ufsvfsp) && 506 !TRANS_ISERROR(ufsvfsp) && 507 (fsp->fs_rolled == FS_NEED_ROLL)) { 508 ml_unit_t *ul = ufsvfsp->vfs_log; 509 510 error = ufs_putsummaryinfo(ul->un_dev, 511 ufsvfsp, fsp); 512 if (error == 0) { 513 fsp->fs_rolled = FS_ALL_ROLLED; 514 UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp); 515 } 516 } 517 vfs_unlock(vfsp); 518 } else { 519 ufs_update(0); 520 } 521 522 vp = ((struct ufsvfs *)vfsp->vfs_data)->vfs_devvp; 523 (void) VOP_CLOSE(vp, FREAD|FWRITE, 1, 524 (offset_t)0, CRED(), NULL); 525 return (0); 526 } 527 error = vfs_lock(vfsp); 528 if (error) 529 return (error); 530 531 devvp = makespecvp(rootdev, VBLK); 532 533 /* If RO media, don't call clkset() (see below) */ 534 doclkset = 1; 535 if (why == ROOT_INIT) { 536 error = VOP_OPEN(&devvp, FREAD|FWRITE, CRED(), NULL); 537 if (error == 0) { 538 (void) VOP_CLOSE(devvp, FREAD|FWRITE, 1, 539 (offset_t)0, CRED(), NULL); 540 } else { 541 doclkset = 0; 542 } 543 } 544 545 error = mountfs(vfsp, why, devvp, "/", CRED(), 1, NULL, 0); 546 /* 547 * XXX - assumes root device is not indirect, because we don't set 548 * rootvp. Is rootvp used for anything? If so, make another arg 549 * to mountfs. 550 */ 551 if (error) { 552 vfs_unlock(vfsp); 553 if (why == ROOT_REMOUNT) 554 vfsp->vfs_flag = ovflags; 555 if (rootvp) { 556 VN_RELE(rootvp); 557 rootvp = (struct vnode *)0; 558 } 559 VN_RELE(devvp); 560 return (error); 561 } 562 if (why == ROOT_INIT) 563 vfs_add((struct vnode *)0, vfsp, 564 (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0); 565 vfs_unlock(vfsp); 566 fsp = getfs(vfsp); 567 clkset(doclkset ? fsp->fs_time : -1); 568 ufsvfsp = (ufsvfs_t *)vfsp->vfs_data; 569 if (ufsvfsp->vfs_log) { 570 vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0); 571 } 572 return (0); 573 } 574 575 static int 576 remountfs(struct vfs *vfsp, dev_t dev, void *raw_argsp, int args_len) 577 { 578 struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 579 struct ulockfs *ulp = &ufsvfsp->vfs_ulockfs; 580 struct buf *bp = ufsvfsp->vfs_bufp; 581 struct fs *fsp = (struct fs *)bp->b_un.b_addr; 582 struct fs *fspt; 583 struct buf *tpt = 0; 584 int error = 0; 585 int flags = 0; 586 587 if (args_len == sizeof (struct ufs_args) && raw_argsp) 588 flags = ((struct ufs_args *)raw_argsp)->flags; 589 590 /* cannot remount to RDONLY */ 591 if (vfsp->vfs_flag & VFS_RDONLY) 592 return (ENOTSUP); 593 594 /* whoops, wrong dev */ 595 if (vfsp->vfs_dev != dev) 596 return (EINVAL); 597 598 /* 599 * synchronize w/ufs ioctls 600 */ 601 mutex_enter(&ulp->ul_lock); 602 atomic_inc_ulong(&ufs_quiesce_pend); 603 604 /* 605 * reset options 606 */ 607 ufsvfsp->vfs_nointr = flags & UFSMNT_NOINTR; 608 ufsvfsp->vfs_syncdir = flags & UFSMNT_SYNCDIR; 609 ufsvfsp->vfs_nosetsec = flags & UFSMNT_NOSETSEC; 610 ufsvfsp->vfs_noatime = flags & UFSMNT_NOATIME; 611 if ((flags & UFSMNT_NODFRATIME) || ufsvfsp->vfs_noatime) 612 ufsvfsp->vfs_dfritime &= ~UFS_DFRATIME; 613 else /* dfratime, default behavior */ 614 ufsvfsp->vfs_dfritime |= UFS_DFRATIME; 615 if (flags & UFSMNT_FORCEDIRECTIO) 616 ufsvfsp->vfs_forcedirectio = 1; 617 else /* default is no direct I/O */ 618 ufsvfsp->vfs_forcedirectio = 0; 619 ufsvfsp->vfs_iotstamp = ddi_get_lbolt(); 620 621 /* 622 * set largefiles flag in ufsvfs equal to the 623 * value passed in by the mount command. If 624 * it is "nolargefiles", and the flag is set 625 * in the superblock, the mount fails. 626 */ 627 if (!(flags & UFSMNT_LARGEFILES)) { /* "nolargefiles" */ 628 if (fsp->fs_flags & FSLARGEFILES) { 629 error = EFBIG; 630 goto remounterr; 631 } 632 ufsvfsp->vfs_lfflags &= ~UFS_LARGEFILES; 633 } else /* "largefiles" */ 634 ufsvfsp->vfs_lfflags |= UFS_LARGEFILES; 635 /* 636 * read/write to read/write; all done 637 */ 638 if (fsp->fs_ronly == 0) 639 goto remounterr; 640 641 /* 642 * fix-on-panic assumes RO->RW remount implies system-critical fs 643 * if it is shortly after boot; so, don't attempt to lock and fix 644 * (unless the user explicitly asked for another action on error) 645 * XXX UFSMNT_ONERROR_RDONLY rather than UFSMNT_ONERROR_PANIC 646 */ 647 #define BOOT_TIME_LIMIT (180*hz) 648 if (!(flags & UFSMNT_ONERROR_FLGMASK) && 649 ddi_get_lbolt() < BOOT_TIME_LIMIT) { 650 cmn_err(CE_WARN, "%s is required to be mounted onerror=%s", 651 ufsvfsp->vfs_fs->fs_fsmnt, UFSMNT_ONERROR_PANIC_STR); 652 flags |= UFSMNT_ONERROR_PANIC; 653 } 654 655 if ((error = ufsfx_mount(ufsvfsp, flags)) != 0) 656 goto remounterr; 657 658 /* 659 * quiesce the file system 660 */ 661 error = ufs_quiesce(ulp); 662 if (error) 663 goto remounterr; 664 665 tpt = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, SBLOCK, SBSIZE); 666 if (tpt->b_flags & B_ERROR) { 667 error = EIO; 668 goto remounterr; 669 } 670 fspt = (struct fs *)tpt->b_un.b_addr; 671 if (((fspt->fs_magic != FS_MAGIC) && 672 (fspt->fs_magic != MTB_UFS_MAGIC)) || 673 (fspt->fs_magic == FS_MAGIC && 674 (fspt->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 && 675 fspt->fs_version != UFS_VERSION_MIN)) || 676 (fspt->fs_magic == MTB_UFS_MAGIC && 677 (fspt->fs_version > MTB_UFS_VERSION_1 || 678 fspt->fs_version < MTB_UFS_VERSION_MIN)) || 679 fspt->fs_bsize > MAXBSIZE || fspt->fs_frag > MAXFRAG || 680 fspt->fs_bsize < sizeof (struct fs) || fspt->fs_bsize < PAGESIZE) { 681 tpt->b_flags |= B_STALE | B_AGE; 682 error = EINVAL; 683 goto remounterr; 684 } 685 686 if (ufsvfsp->vfs_log && (ufsvfsp->vfs_log->un_flags & LDL_NOROLL)) { 687 ufsvfsp->vfs_log->un_flags &= ~LDL_NOROLL; 688 logmap_start_roll(ufsvfsp->vfs_log); 689 } 690 691 if (TRANS_ISERROR(ufsvfsp)) 692 goto remounterr; 693 TRANS_DOMATAMAP(ufsvfsp); 694 695 if ((fspt->fs_state + fspt->fs_time == FSOKAY) && 696 fspt->fs_clean == FSLOG && !TRANS_ISTRANS(ufsvfsp)) { 697 ufsvfsp->vfs_log = NULL; 698 ufsvfsp->vfs_domatamap = 0; 699 error = ENOSPC; 700 goto remounterr; 701 } 702 703 if (fspt->fs_state + fspt->fs_time == FSOKAY && 704 (fspt->fs_clean == FSCLEAN || 705 fspt->fs_clean == FSSTABLE || 706 fspt->fs_clean == FSLOG)) { 707 708 /* 709 * Ensure that ufs_getsummaryinfo doesn't reconstruct 710 * the summary info. 711 */ 712 error = ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, fspt); 713 if (error) 714 goto remounterr; 715 716 /* preserve mount name */ 717 (void) strncpy(fspt->fs_fsmnt, fsp->fs_fsmnt, MAXMNTLEN); 718 /* free the old cg space */ 719 kmem_free(fsp->fs_u.fs_csp, fsp->fs_cssize); 720 /* switch in the new superblock */ 721 fspt->fs_rolled = FS_NEED_ROLL; 722 bcopy(tpt->b_un.b_addr, bp->b_un.b_addr, fspt->fs_sbsize); 723 724 fsp->fs_clean = FSSTABLE; 725 } /* superblock updated in memory */ 726 tpt->b_flags |= B_STALE | B_AGE; 727 brelse(tpt); 728 tpt = 0; 729 730 if (fsp->fs_clean != FSSTABLE) { 731 error = ENOSPC; 732 goto remounterr; 733 } 734 735 736 if (TRANS_ISTRANS(ufsvfsp)) { 737 fsp->fs_clean = FSLOG; 738 ufsvfsp->vfs_dio = 0; 739 } else 740 if (ufsvfsp->vfs_dio) 741 fsp->fs_clean = FSSUSPEND; 742 743 TRANS_MATA_MOUNT(ufsvfsp); 744 745 fsp->fs_fmod = 0; 746 fsp->fs_ronly = 0; 747 748 atomic_dec_ulong(&ufs_quiesce_pend); 749 cv_broadcast(&ulp->ul_cv); 750 mutex_exit(&ulp->ul_lock); 751 752 if (TRANS_ISTRANS(ufsvfsp)) { 753 754 /* 755 * start the delete thread 756 */ 757 ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp); 758 759 /* 760 * start the reclaim thread 761 */ 762 if (fsp->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) { 763 fsp->fs_reclaim &= ~FS_RECLAIM; 764 fsp->fs_reclaim |= FS_RECLAIMING; 765 ufs_thread_start(&ufsvfsp->vfs_reclaim, 766 ufs_thread_reclaim, vfsp); 767 } 768 } 769 770 TRANS_SBWRITE(ufsvfsp, TOP_MOUNT); 771 772 return (0); 773 774 remounterr: 775 if (tpt) 776 brelse(tpt); 777 atomic_dec_ulong(&ufs_quiesce_pend); 778 cv_broadcast(&ulp->ul_cv); 779 mutex_exit(&ulp->ul_lock); 780 return (error); 781 } 782 783 /* 784 * If the device maxtransfer size is not available, we use ufs_maxmaxphys 785 * along with the system value for maxphys to determine the value for 786 * maxtransfer. 787 */ 788 int ufs_maxmaxphys = (1024 * 1024); 789 790 #include <sys/ddi.h> /* for delay(9f) */ 791 792 int ufs_mount_error_delay = 20; /* default to 20ms */ 793 int ufs_mount_timeout = 60000; /* default to 1 minute */ 794 795 static int 796 mountfs(struct vfs *vfsp, enum whymountroot why, struct vnode *devvp, 797 char *path, cred_t *cr, int isroot, void *raw_argsp, int args_len) 798 { 799 dev_t dev = devvp->v_rdev; 800 struct fs *fsp; 801 struct ufsvfs *ufsvfsp = 0; 802 struct buf *bp = 0; 803 struct buf *tp = 0; 804 struct dk_cinfo ci; 805 int error = 0; 806 size_t len; 807 int needclose = 0; 808 int needtrans = 0; 809 struct inode *rip; 810 struct vnode *rvp = NULL; 811 int flags = 0; 812 kmutex_t *ihm; 813 int elapsed; 814 int status; 815 extern int maxphys; 816 817 if (args_len == sizeof (struct ufs_args) && raw_argsp) 818 flags = ((struct ufs_args *)raw_argsp)->flags; 819 820 ASSERT(vfs_lock_held(vfsp)); 821 822 if (why == ROOT_INIT) { 823 /* 824 * Open block device mounted on. 825 * When bio is fixed for vnodes this can all be vnode 826 * operations. 827 */ 828 error = VOP_OPEN(&devvp, 829 (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE, 830 cr, NULL); 831 if (error) 832 goto out; 833 needclose = 1; 834 835 /* 836 * Refuse to go any further if this 837 * device is being used for swapping. 838 */ 839 if (IS_SWAPVP(devvp)) { 840 error = EBUSY; 841 goto out; 842 } 843 } 844 845 /* 846 * check for dev already mounted on 847 */ 848 if (vfsp->vfs_flag & VFS_REMOUNT) { 849 error = remountfs(vfsp, dev, raw_argsp, args_len); 850 if (error == 0) 851 VN_RELE(devvp); 852 return (error); 853 } 854 855 ASSERT(devvp != 0); 856 857 /* 858 * Flush back any dirty pages on the block device to 859 * try and keep the buffer cache in sync with the page 860 * cache if someone is trying to use block devices when 861 * they really should be using the raw device. 862 */ 863 (void) VOP_PUTPAGE(common_specvp(devvp), (offset_t)0, 864 (size_t)0, B_INVAL, cr, NULL); 865 866 /* 867 * read in superblock 868 */ 869 ufsvfsp = kmem_zalloc(sizeof (struct ufsvfs), KM_SLEEP); 870 tp = UFS_BREAD(ufsvfsp, dev, SBLOCK, SBSIZE); 871 if (tp->b_flags & B_ERROR) 872 goto out; 873 fsp = (struct fs *)tp->b_un.b_addr; 874 875 if ((fsp->fs_magic != FS_MAGIC) && (fsp->fs_magic != MTB_UFS_MAGIC)) { 876 cmn_err(CE_NOTE, 877 "mount: not a UFS magic number (0x%x)", fsp->fs_magic); 878 error = EINVAL; 879 goto out; 880 } 881 882 if ((fsp->fs_magic == FS_MAGIC) && 883 (fsp->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 && 884 fsp->fs_version != UFS_VERSION_MIN)) { 885 cmn_err(CE_NOTE, 886 "mount: unrecognized version of UFS on-disk format: %d", 887 fsp->fs_version); 888 error = EINVAL; 889 goto out; 890 } 891 892 if ((fsp->fs_magic == MTB_UFS_MAGIC) && 893 (fsp->fs_version > MTB_UFS_VERSION_1 || 894 fsp->fs_version < MTB_UFS_VERSION_MIN)) { 895 cmn_err(CE_NOTE, 896 "mount: unrecognized version of UFS on-disk format: %d", 897 fsp->fs_version); 898 error = EINVAL; 899 goto out; 900 } 901 902 #ifndef _LP64 903 if (fsp->fs_magic == MTB_UFS_MAGIC) { 904 /* 905 * Find the size of the device in sectors. If the 906 * the size in sectors is greater than INT_MAX, it's 907 * a multi-terabyte file system, which can't be 908 * mounted by a 32-bit kernel. We can't use the 909 * fsbtodb() macro in the next line because the macro 910 * casts the intermediate values to daddr_t, which is 911 * a 32-bit quantity in a 32-bit kernel. Here we 912 * really do need the intermediate values to be held 913 * in 64-bit quantities because we're checking for 914 * overflow of a 32-bit field. 915 */ 916 if ((((diskaddr_t)(fsp->fs_size)) << fsp->fs_fsbtodb) 917 > INT_MAX) { 918 cmn_err(CE_NOTE, 919 "mount: multi-terabyte UFS cannot be" 920 " mounted by a 32-bit kernel"); 921 error = EINVAL; 922 goto out; 923 } 924 925 } 926 #endif 927 928 if (fsp->fs_bsize > MAXBSIZE || fsp->fs_frag > MAXFRAG || 929 fsp->fs_bsize < sizeof (struct fs) || fsp->fs_bsize < PAGESIZE) { 930 error = EINVAL; /* also needs translation */ 931 goto out; 932 } 933 934 /* 935 * Allocate VFS private data. 936 */ 937 vfsp->vfs_bcount = 0; 938 vfsp->vfs_data = (caddr_t)ufsvfsp; 939 vfsp->vfs_fstype = ufsfstype; 940 vfsp->vfs_dev = dev; 941 vfsp->vfs_flag |= VFS_NOTRUNC; 942 vfs_make_fsid(&vfsp->vfs_fsid, dev, ufsfstype); 943 ufsvfsp->vfs_devvp = devvp; 944 945 /* 946 * Cross-link with vfs and add to instance list. 947 */ 948 ufsvfsp->vfs_vfs = vfsp; 949 ufs_vfs_add(ufsvfsp); 950 951 ufsvfsp->vfs_dev = dev; 952 ufsvfsp->vfs_bufp = tp; 953 954 ufsvfsp->vfs_dirsize = INODESIZE + (4 * ALLOCSIZE) + fsp->fs_fsize; 955 ufsvfsp->vfs_minfrags = 956 (int)((int64_t)fsp->fs_dsize * fsp->fs_minfree / 100); 957 /* 958 * if mount allows largefiles, indicate so in ufsvfs 959 */ 960 if (flags & UFSMNT_LARGEFILES) 961 ufsvfsp->vfs_lfflags |= UFS_LARGEFILES; 962 /* 963 * Initialize threads 964 */ 965 ufs_delete_init(ufsvfsp, 1); 966 ufs_thread_init(&ufsvfsp->vfs_reclaim, 0); 967 968 /* 969 * Chicken and egg problem. The superblock may have deltas 970 * in the log. So after the log is scanned we reread the 971 * superblock. We guarantee that the fields needed to 972 * scan the log will not be in the log. 973 */ 974 if (fsp->fs_logbno && fsp->fs_clean == FSLOG && 975 (fsp->fs_state + fsp->fs_time == FSOKAY)) { 976 error = lufs_snarf(ufsvfsp, fsp, (vfsp->vfs_flag & VFS_RDONLY)); 977 if (error) { 978 /* 979 * Allow a ro mount to continue even if the 980 * log cannot be processed - yet. 981 */ 982 if (!(vfsp->vfs_flag & VFS_RDONLY)) { 983 cmn_err(CE_WARN, "Error accessing ufs " 984 "log for %s; Please run fsck(1M)", path); 985 goto out; 986 } 987 } 988 tp->b_flags |= (B_AGE | B_STALE); 989 brelse(tp); 990 tp = UFS_BREAD(ufsvfsp, dev, SBLOCK, SBSIZE); 991 fsp = (struct fs *)tp->b_un.b_addr; 992 ufsvfsp->vfs_bufp = tp; 993 if (tp->b_flags & B_ERROR) 994 goto out; 995 } 996 997 /* 998 * Set logging mounted flag used by lockfs 999 */ 1000 ufsvfsp->vfs_validfs = UT_MOUNTED; 1001 1002 /* 1003 * Copy the super block into a buffer in its native size. 1004 * Use ngeteblk to allocate the buffer 1005 */ 1006 bp = ngeteblk(fsp->fs_bsize); 1007 ufsvfsp->vfs_bufp = bp; 1008 bp->b_edev = dev; 1009 bp->b_dev = cmpdev(dev); 1010 bp->b_blkno = SBLOCK; 1011 bp->b_bcount = fsp->fs_sbsize; 1012 bcopy(tp->b_un.b_addr, bp->b_un.b_addr, fsp->fs_sbsize); 1013 tp->b_flags |= B_STALE | B_AGE; 1014 brelse(tp); 1015 tp = 0; 1016 1017 fsp = (struct fs *)bp->b_un.b_addr; 1018 /* 1019 * Mount fails if superblock flag indicates presence of large 1020 * files and filesystem is attempted to be mounted 'nolargefiles'. 1021 * The exception is for a read only mount of root, which we 1022 * always want to succeed, so fsck can fix potential problems. 1023 * The assumption is that we will remount root at some point, 1024 * and the remount will enforce the mount option. 1025 */ 1026 if (!(isroot & (vfsp->vfs_flag & VFS_RDONLY)) && 1027 (fsp->fs_flags & FSLARGEFILES) && 1028 !(flags & UFSMNT_LARGEFILES)) { 1029 error = EFBIG; 1030 goto out; 1031 } 1032 1033 if (vfsp->vfs_flag & VFS_RDONLY) { 1034 fsp->fs_ronly = 1; 1035 fsp->fs_fmod = 0; 1036 if (((fsp->fs_state + fsp->fs_time) == FSOKAY) && 1037 ((fsp->fs_clean == FSCLEAN) || 1038 (fsp->fs_clean == FSSTABLE) || 1039 (fsp->fs_clean == FSLOG))) { 1040 if (isroot) { 1041 if (fsp->fs_clean == FSLOG) { 1042 if (fsp->fs_rolled == FS_ALL_ROLLED) { 1043 ufs_clean_root = 1; 1044 } 1045 } else { 1046 ufs_clean_root = 1; 1047 } 1048 } 1049 fsp->fs_clean = FSSTABLE; 1050 } else { 1051 fsp->fs_clean = FSBAD; 1052 } 1053 } else { 1054 1055 fsp->fs_fmod = 0; 1056 fsp->fs_ronly = 0; 1057 1058 TRANS_DOMATAMAP(ufsvfsp); 1059 1060 if ((TRANS_ISERROR(ufsvfsp)) || 1061 (((fsp->fs_state + fsp->fs_time) == FSOKAY) && 1062 fsp->fs_clean == FSLOG && !TRANS_ISTRANS(ufsvfsp))) { 1063 ufsvfsp->vfs_log = NULL; 1064 ufsvfsp->vfs_domatamap = 0; 1065 error = ENOSPC; 1066 goto out; 1067 } 1068 1069 if (((fsp->fs_state + fsp->fs_time) == FSOKAY) && 1070 (fsp->fs_clean == FSCLEAN || 1071 fsp->fs_clean == FSSTABLE || 1072 fsp->fs_clean == FSLOG)) 1073 fsp->fs_clean = FSSTABLE; 1074 else { 1075 if (isroot) { 1076 /* 1077 * allow root partition to be mounted even 1078 * when fs_state is not ok 1079 * will be fixed later by a remount root 1080 */ 1081 fsp->fs_clean = FSBAD; 1082 ufsvfsp->vfs_log = NULL; 1083 ufsvfsp->vfs_domatamap = 0; 1084 } else { 1085 error = ENOSPC; 1086 goto out; 1087 } 1088 } 1089 1090 if (fsp->fs_clean == FSSTABLE && TRANS_ISTRANS(ufsvfsp)) 1091 fsp->fs_clean = FSLOG; 1092 } 1093 TRANS_MATA_MOUNT(ufsvfsp); 1094 needtrans = 1; 1095 1096 vfsp->vfs_bsize = fsp->fs_bsize; 1097 1098 /* 1099 * Read in summary info 1100 */ 1101 if (error = ufs_getsummaryinfo(dev, ufsvfsp, fsp)) 1102 goto out; 1103 1104 /* 1105 * lastwhinetime is set to zero rather than lbolt, so that after 1106 * mounting if the filesystem is found to be full, then immediately the 1107 * "file system message" will be logged. 1108 */ 1109 ufsvfsp->vfs_lastwhinetime = 0L; 1110 1111 1112 mutex_init(&ufsvfsp->vfs_lock, NULL, MUTEX_DEFAULT, NULL); 1113 (void) copystr(path, fsp->fs_fsmnt, sizeof (fsp->fs_fsmnt) - 1, &len); 1114 bzero(fsp->fs_fsmnt + len, sizeof (fsp->fs_fsmnt) - len); 1115 1116 /* 1117 * Sanity checks for old file systems 1118 */ 1119 if (fsp->fs_postblformat == FS_42POSTBLFMT) 1120 ufsvfsp->vfs_nrpos = 8; 1121 else 1122 ufsvfsp->vfs_nrpos = fsp->fs_nrpos; 1123 1124 /* 1125 * Initialize lockfs structure to support file system locking 1126 */ 1127 bzero(&ufsvfsp->vfs_ulockfs.ul_lockfs, 1128 sizeof (struct lockfs)); 1129 ufsvfsp->vfs_ulockfs.ul_fs_lock = ULOCKFS_ULOCK; 1130 mutex_init(&ufsvfsp->vfs_ulockfs.ul_lock, NULL, 1131 MUTEX_DEFAULT, NULL); 1132 cv_init(&ufsvfsp->vfs_ulockfs.ul_cv, NULL, CV_DEFAULT, NULL); 1133 1134 /* 1135 * We don't need to grab vfs_dqrwlock for this ufs_iget() call. 1136 * We are in the process of mounting the file system so there 1137 * is no need to grab the quota lock. If a quota applies to the 1138 * root inode, then it will be updated when quotas are enabled. 1139 * 1140 * However, we have an ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock)) 1141 * in getinoquota() that we want to keep so grab it anyway. 1142 */ 1143 rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 1144 1145 error = ufs_iget_alloced(vfsp, UFSROOTINO, &rip, cr); 1146 1147 rw_exit(&ufsvfsp->vfs_dqrwlock); 1148 1149 if (error) 1150 goto out; 1151 1152 /* 1153 * make sure root inode is a directory. Returning ENOTDIR might 1154 * be confused with the mount point not being a directory, so 1155 * we use EIO instead. 1156 */ 1157 if ((rip->i_mode & IFMT) != IFDIR) { 1158 /* 1159 * Mark this inode as subject for cleanup 1160 * to avoid stray inodes in the cache. 1161 */ 1162 rvp = ITOV(rip); 1163 error = EIO; 1164 goto out; 1165 } 1166 1167 rvp = ITOV(rip); 1168 mutex_enter(&rvp->v_lock); 1169 rvp->v_flag |= VROOT; 1170 mutex_exit(&rvp->v_lock); 1171 ufsvfsp->vfs_root = rvp; 1172 /* The buffer for the root inode does not contain a valid b_vp */ 1173 (void) bfinval(dev, 0); 1174 1175 /* options */ 1176 ufsvfsp->vfs_nosetsec = flags & UFSMNT_NOSETSEC; 1177 ufsvfsp->vfs_nointr = flags & UFSMNT_NOINTR; 1178 ufsvfsp->vfs_syncdir = flags & UFSMNT_SYNCDIR; 1179 ufsvfsp->vfs_noatime = flags & UFSMNT_NOATIME; 1180 if ((flags & UFSMNT_NODFRATIME) || ufsvfsp->vfs_noatime) 1181 ufsvfsp->vfs_dfritime &= ~UFS_DFRATIME; 1182 else /* dfratime, default behavior */ 1183 ufsvfsp->vfs_dfritime |= UFS_DFRATIME; 1184 if (flags & UFSMNT_FORCEDIRECTIO) 1185 ufsvfsp->vfs_forcedirectio = 1; 1186 else if (flags & UFSMNT_NOFORCEDIRECTIO) 1187 ufsvfsp->vfs_forcedirectio = 0; 1188 ufsvfsp->vfs_iotstamp = ddi_get_lbolt(); 1189 1190 ufsvfsp->vfs_nindiroffset = fsp->fs_nindir - 1; 1191 ufsvfsp->vfs_nindirshift = highbit(ufsvfsp->vfs_nindiroffset); 1192 ufsvfsp->vfs_ioclustsz = fsp->fs_bsize * fsp->fs_maxcontig; 1193 1194 if (cdev_ioctl(dev, DKIOCINFO, (intptr_t)&ci, 1195 FKIOCTL|FNATIVE|FREAD, CRED(), &status) == 0) { 1196 ufsvfsp->vfs_iotransz = ci.dki_maxtransfer * DEV_BSIZE; 1197 } else { 1198 ufsvfsp->vfs_iotransz = MIN(maxphys, ufs_maxmaxphys); 1199 } 1200 1201 if (ufsvfsp->vfs_iotransz <= 0) { 1202 ufsvfsp->vfs_iotransz = MIN(maxphys, ufs_maxmaxphys); 1203 } 1204 1205 /* 1206 * When logging, used to reserve log space for writes and truncs 1207 */ 1208 ufsvfsp->vfs_avgbfree = fsp->fs_cstotal.cs_nbfree / fsp->fs_ncg; 1209 1210 /* 1211 * Determine whether to log cylinder group summary info. 1212 */ 1213 ufsvfsp->vfs_nolog_si = (fsp->fs_ncg < ufs_ncg_log); 1214 1215 if (TRANS_ISTRANS(ufsvfsp)) { 1216 /* 1217 * start the delete thread 1218 */ 1219 ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp); 1220 1221 /* 1222 * start reclaim thread if the filesystem was not mounted 1223 * read only. 1224 */ 1225 if (!fsp->fs_ronly && (fsp->fs_reclaim & 1226 (FS_RECLAIM|FS_RECLAIMING))) { 1227 fsp->fs_reclaim &= ~FS_RECLAIM; 1228 fsp->fs_reclaim |= FS_RECLAIMING; 1229 ufs_thread_start(&ufsvfsp->vfs_reclaim, 1230 ufs_thread_reclaim, vfsp); 1231 } 1232 1233 /* Mark the fs as unrolled */ 1234 fsp->fs_rolled = FS_NEED_ROLL; 1235 } else if (!fsp->fs_ronly && (fsp->fs_reclaim & 1236 (FS_RECLAIM|FS_RECLAIMING))) { 1237 /* 1238 * If a file system that is mounted nologging, after 1239 * having previously been mounted logging, becomes 1240 * unmounted whilst the reclaim thread is in the throes 1241 * of reclaiming open/deleted inodes, a subsequent mount 1242 * of such a file system with logging disabled could lead 1243 * to inodes becoming lost. So, start reclaim now, even 1244 * though logging was disabled for the previous mount, to 1245 * tidy things up. 1246 */ 1247 fsp->fs_reclaim &= ~FS_RECLAIM; 1248 fsp->fs_reclaim |= FS_RECLAIMING; 1249 ufs_thread_start(&ufsvfsp->vfs_reclaim, 1250 ufs_thread_reclaim, vfsp); 1251 } 1252 1253 if (!fsp->fs_ronly) { 1254 TRANS_SBWRITE(ufsvfsp, TOP_MOUNT); 1255 if (error = geterror(ufsvfsp->vfs_bufp)) 1256 goto out; 1257 } 1258 1259 /* fix-on-panic initialization */ 1260 if (isroot && !(flags & UFSMNT_ONERROR_FLGMASK)) 1261 flags |= UFSMNT_ONERROR_PANIC; /* XXX ..._RDONLY */ 1262 1263 if ((error = ufsfx_mount(ufsvfsp, flags)) != 0) 1264 goto out; 1265 1266 if (why == ROOT_INIT && isroot) 1267 rootvp = devvp; 1268 1269 return (0); 1270 out: 1271 if (error == 0) 1272 error = EIO; 1273 if (rvp) { 1274 /* the following sequence is similar to ufs_unmount() */ 1275 1276 /* 1277 * There's a problem that ufs_iget() puts inodes into 1278 * the inode cache before it returns them. If someone 1279 * traverses that cache and gets a reference to our 1280 * inode, there's a chance they'll still be using it 1281 * after we've destroyed it. This is a hard race to 1282 * hit, but it's happened (putting in a medium delay 1283 * here, and a large delay in ufs_scan_inodes() for 1284 * inodes on the device we're bailing out on, makes 1285 * the race easy to demonstrate). The symptom is some 1286 * other part of UFS faulting on bad inode contents, 1287 * or when grabbing one of the locks inside the inode, 1288 * etc. The usual victim is ufs_scan_inodes() or 1289 * someone called by it. 1290 */ 1291 1292 /* 1293 * First, isolate it so that no new references can be 1294 * gotten via the inode cache. 1295 */ 1296 ihm = &ih_lock[INOHASH(UFSROOTINO)]; 1297 mutex_enter(ihm); 1298 remque(rip); 1299 mutex_exit(ihm); 1300 1301 /* 1302 * Now wait for all outstanding references except our 1303 * own to drain. This could, in theory, take forever, 1304 * so don't wait *too* long. If we time out, mark 1305 * it stale and leak it, so we don't hit the problem 1306 * described above. 1307 * 1308 * Note that v_count is an int, which means we can read 1309 * it in one operation. Thus, there's no need to lock 1310 * around our tests. 1311 */ 1312 elapsed = 0; 1313 while ((rvp->v_count > 1) && (elapsed < ufs_mount_timeout)) { 1314 delay(ufs_mount_error_delay * drv_usectohz(1000)); 1315 elapsed += ufs_mount_error_delay; 1316 } 1317 1318 if (rvp->v_count > 1) { 1319 mutex_enter(&rip->i_tlock); 1320 rip->i_flag |= ISTALE; 1321 mutex_exit(&rip->i_tlock); 1322 cmn_err(CE_WARN, 1323 "Timed out while cleaning up after " 1324 "failed mount of %s", path); 1325 } else { 1326 1327 /* 1328 * Now we're the only one with a handle left, so tear 1329 * it down the rest of the way. 1330 */ 1331 if (ufs_rmidle(rip)) 1332 VN_RELE(rvp); 1333 ufs_si_del(rip); 1334 rip->i_ufsvfs = NULL; 1335 rvp->v_vfsp = NULL; 1336 rvp->v_type = VBAD; 1337 VN_RELE(rvp); 1338 } 1339 } 1340 if (needtrans) { 1341 TRANS_MATA_UMOUNT(ufsvfsp); 1342 } 1343 if (ufsvfsp) { 1344 ufs_vfs_remove(ufsvfsp); 1345 ufs_thread_exit(&ufsvfsp->vfs_delete); 1346 ufs_thread_exit(&ufsvfsp->vfs_reclaim); 1347 mutex_destroy(&ufsvfsp->vfs_lock); 1348 if (ufsvfsp->vfs_log) { 1349 lufs_unsnarf(ufsvfsp); 1350 } 1351 kmem_free(ufsvfsp, sizeof (struct ufsvfs)); 1352 } 1353 if (bp) { 1354 bp->b_flags |= (B_STALE|B_AGE); 1355 brelse(bp); 1356 } 1357 if (tp) { 1358 tp->b_flags |= (B_STALE|B_AGE); 1359 brelse(tp); 1360 } 1361 if (needclose) { 1362 (void) VOP_CLOSE(devvp, (vfsp->vfs_flag & VFS_RDONLY) ? 1363 FREAD : FREAD|FWRITE, 1, (offset_t)0, cr, NULL); 1364 bflush(dev); 1365 (void) bfinval(dev, 1); 1366 } 1367 return (error); 1368 } 1369 1370 /* 1371 * vfs operations 1372 */ 1373 static int 1374 ufs_unmount(struct vfs *vfsp, int fflag, struct cred *cr) 1375 { 1376 dev_t dev = vfsp->vfs_dev; 1377 struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 1378 struct fs *fs = ufsvfsp->vfs_fs; 1379 struct ulockfs *ulp = &ufsvfsp->vfs_ulockfs; 1380 struct vnode *bvp, *vp; 1381 struct buf *bp; 1382 struct inode *ip, *inext, *rip; 1383 union ihead *ih; 1384 int error, flag, i; 1385 struct lockfs lockfs; 1386 int poll_events = POLLPRI; 1387 extern struct pollhead ufs_pollhd; 1388 refstr_t *mountpoint; 1389 1390 ASSERT(vfs_lock_held(vfsp)); 1391 1392 if (secpolicy_fs_unmount(cr, vfsp) != 0) 1393 return (EPERM); 1394 /* 1395 * Forced unmount is now supported through the 1396 * lockfs protocol. 1397 */ 1398 if (fflag & MS_FORCE) { 1399 /* 1400 * Mark the filesystem as being unmounted now in 1401 * case of a forcible umount before we take any 1402 * locks inside UFS to prevent racing with a VFS_VGET() 1403 * request. Throw these VFS_VGET() requests away for 1404 * the duration of the forcible umount so they won't 1405 * use stale or even freed data later on when we're done. 1406 * It may happen that the VFS has had a additional hold 1407 * placed on it by someone other than UFS and thus will 1408 * not get freed immediately once we're done with the 1409 * umount by dounmount() - use VFS_UNMOUNTED to inform 1410 * users of this still-alive VFS that its corresponding 1411 * filesystem being gone so they can detect that and error 1412 * out. 1413 */ 1414 vfsp->vfs_flag |= VFS_UNMOUNTED; 1415 1416 ufs_thread_suspend(&ufsvfsp->vfs_delete); 1417 mutex_enter(&ulp->ul_lock); 1418 /* 1419 * If file system is already hard locked, 1420 * unmount the file system, otherwise 1421 * hard lock it before unmounting. 1422 */ 1423 if (!ULOCKFS_IS_HLOCK(ulp)) { 1424 atomic_inc_ulong(&ufs_quiesce_pend); 1425 lockfs.lf_lock = LOCKFS_HLOCK; 1426 lockfs.lf_flags = 0; 1427 lockfs.lf_key = ulp->ul_lockfs.lf_key + 1; 1428 lockfs.lf_comlen = 0; 1429 lockfs.lf_comment = NULL; 1430 ufs_freeze(ulp, &lockfs); 1431 ULOCKFS_SET_BUSY(ulp); 1432 LOCKFS_SET_BUSY(&ulp->ul_lockfs); 1433 (void) ufs_quiesce(ulp); 1434 (void) ufs_flush(vfsp); 1435 (void) ufs_thaw(vfsp, ufsvfsp, ulp); 1436 atomic_dec_ulong(&ufs_quiesce_pend); 1437 ULOCKFS_CLR_BUSY(ulp); 1438 LOCKFS_CLR_BUSY(&ulp->ul_lockfs); 1439 poll_events |= POLLERR; 1440 pollwakeup(&ufs_pollhd, poll_events); 1441 } 1442 ufs_thread_continue(&ufsvfsp->vfs_delete); 1443 mutex_exit(&ulp->ul_lock); 1444 } 1445 1446 /* let all types of writes go through */ 1447 ufsvfsp->vfs_iotstamp = ddi_get_lbolt(); 1448 1449 /* coordinate with global hlock thread */ 1450 if (TRANS_ISTRANS(ufsvfsp) && (ufsvfsp->vfs_validfs == UT_HLOCKING)) { 1451 /* 1452 * last possibility for a forced umount to fail hence clear 1453 * VFS_UNMOUNTED if appropriate. 1454 */ 1455 if (fflag & MS_FORCE) 1456 vfsp->vfs_flag &= ~VFS_UNMOUNTED; 1457 return (EAGAIN); 1458 } 1459 1460 ufsvfsp->vfs_validfs = UT_UNMOUNTED; 1461 1462 /* kill the reclaim thread */ 1463 ufs_thread_exit(&ufsvfsp->vfs_reclaim); 1464 1465 /* suspend the delete thread */ 1466 ufs_thread_suspend(&ufsvfsp->vfs_delete); 1467 1468 /* 1469 * drain the delete and idle queues 1470 */ 1471 ufs_delete_drain(vfsp, -1, 1); 1472 ufs_idle_drain(vfsp); 1473 1474 /* 1475 * use the lockfs protocol to prevent new ops from starting 1476 * a forcible umount can not fail beyond this point as 1477 * we hard-locked the filesystem and drained all current consumers 1478 * before. 1479 */ 1480 mutex_enter(&ulp->ul_lock); 1481 1482 /* 1483 * if the file system is busy; return EBUSY 1484 */ 1485 if (ulp->ul_vnops_cnt || ulp->ul_falloc_cnt || ULOCKFS_IS_SLOCK(ulp)) { 1486 error = EBUSY; 1487 goto out; 1488 } 1489 1490 /* 1491 * if this is not a forced unmount (!hard/error locked), then 1492 * get rid of every inode except the root and quota inodes 1493 * also, commit any outstanding transactions 1494 */ 1495 if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp)) 1496 if (error = ufs_flush(vfsp)) 1497 goto out; 1498 1499 /* 1500 * ignore inodes in the cache if fs is hard locked or error locked 1501 */ 1502 rip = VTOI(ufsvfsp->vfs_root); 1503 if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp)) { 1504 /* 1505 * Otherwise, only the quota and root inodes are in the cache. 1506 * 1507 * Avoid racing with ufs_update() and ufs_sync(). 1508 */ 1509 mutex_enter(&ufs_scan_lock); 1510 1511 for (i = 0, ih = ihead; i < inohsz; i++, ih++) { 1512 mutex_enter(&ih_lock[i]); 1513 for (ip = ih->ih_chain[0]; 1514 ip != (struct inode *)ih; 1515 ip = ip->i_forw) { 1516 if (ip->i_ufsvfs != ufsvfsp) 1517 continue; 1518 if (ip == ufsvfsp->vfs_qinod) 1519 continue; 1520 if (ip == rip && ITOV(ip)->v_count == 1) 1521 continue; 1522 mutex_exit(&ih_lock[i]); 1523 mutex_exit(&ufs_scan_lock); 1524 error = EBUSY; 1525 goto out; 1526 } 1527 mutex_exit(&ih_lock[i]); 1528 } 1529 mutex_exit(&ufs_scan_lock); 1530 } 1531 1532 /* 1533 * if a snapshot exists and this is a forced unmount, then delete 1534 * the snapshot. Otherwise return EBUSY. This will insure the 1535 * snapshot always belongs to a valid file system. 1536 */ 1537 if (ufsvfsp->vfs_snapshot) { 1538 if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) { 1539 (void) fssnap_delete(&ufsvfsp->vfs_snapshot); 1540 } else { 1541 error = EBUSY; 1542 goto out; 1543 } 1544 } 1545 1546 /* 1547 * Close the quota file and invalidate anything left in the quota 1548 * cache for this file system. Pass kcred to allow all quota 1549 * manipulations. 1550 */ 1551 (void) closedq(ufsvfsp, kcred); 1552 invalidatedq(ufsvfsp); 1553 /* 1554 * drain the delete and idle queues 1555 */ 1556 ufs_delete_drain(vfsp, -1, 0); 1557 ufs_idle_drain(vfsp); 1558 1559 /* 1560 * discard the inodes for this fs (including root, shadow, and quota) 1561 */ 1562 for (i = 0, ih = ihead; i < inohsz; i++, ih++) { 1563 mutex_enter(&ih_lock[i]); 1564 for (inext = 0, ip = ih->ih_chain[0]; 1565 ip != (struct inode *)ih; 1566 ip = inext) { 1567 inext = ip->i_forw; 1568 if (ip->i_ufsvfs != ufsvfsp) 1569 continue; 1570 1571 /* 1572 * We've found the inode in the cache and as we 1573 * hold the hash mutex the inode can not 1574 * disappear from underneath us. 1575 * We also know it must have at least a vnode 1576 * reference count of 1. 1577 * We perform an additional VN_HOLD so the VN_RELE 1578 * in case we take the inode off the idle queue 1579 * can not be the last one. 1580 * It is safe to grab the writer contents lock here 1581 * to prevent a race with ufs_iinactive() putting 1582 * inodes into the idle queue while we operate on 1583 * this inode. 1584 */ 1585 rw_enter(&ip->i_contents, RW_WRITER); 1586 1587 vp = ITOV(ip); 1588 VN_HOLD(vp) 1589 remque(ip); 1590 if (ufs_rmidle(ip)) 1591 VN_RELE(vp); 1592 ufs_si_del(ip); 1593 /* 1594 * rip->i_ufsvfsp is needed by bflush() 1595 */ 1596 if (ip != rip) 1597 ip->i_ufsvfs = NULL; 1598 /* 1599 * Set vnode's vfsops to dummy ops, which return 1600 * EIO. This is needed to forced unmounts to work 1601 * with lofs/nfs properly. 1602 */ 1603 if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) 1604 vp->v_vfsp = &EIO_vfs; 1605 else 1606 vp->v_vfsp = NULL; 1607 vp->v_type = VBAD; 1608 1609 rw_exit(&ip->i_contents); 1610 1611 VN_RELE(vp); 1612 } 1613 mutex_exit(&ih_lock[i]); 1614 } 1615 ufs_si_cache_flush(dev); 1616 1617 /* 1618 * kill the delete thread and drain the idle queue 1619 */ 1620 ufs_thread_exit(&ufsvfsp->vfs_delete); 1621 ufs_idle_drain(vfsp); 1622 1623 bp = ufsvfsp->vfs_bufp; 1624 bvp = ufsvfsp->vfs_devvp; 1625 flag = !fs->fs_ronly; 1626 if (flag) { 1627 bflush(dev); 1628 if (fs->fs_clean != FSBAD) { 1629 if (fs->fs_clean == FSSTABLE) 1630 fs->fs_clean = FSCLEAN; 1631 fs->fs_reclaim &= ~FS_RECLAIM; 1632 } 1633 if (TRANS_ISTRANS(ufsvfsp) && 1634 !TRANS_ISERROR(ufsvfsp) && 1635 !ULOCKFS_IS_HLOCK(ulp) && 1636 (fs->fs_rolled == FS_NEED_ROLL)) { 1637 /* 1638 * ufs_flush() above has flushed the last Moby. 1639 * This is needed to ensure the following superblock 1640 * update really is the last metadata update 1641 */ 1642 error = ufs_putsummaryinfo(dev, ufsvfsp, fs); 1643 if (error == 0) { 1644 fs->fs_rolled = FS_ALL_ROLLED; 1645 } 1646 } 1647 TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_UNMOUNT); 1648 /* 1649 * push this last transaction 1650 */ 1651 curthread->t_flag |= T_DONTBLOCK; 1652 TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UNMOUNT, TOP_COMMIT_SIZE, 1653 error); 1654 if (!error) 1655 TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UNMOUNT, 1656 TOP_COMMIT_SIZE); 1657 curthread->t_flag &= ~T_DONTBLOCK; 1658 } 1659 1660 TRANS_MATA_UMOUNT(ufsvfsp); 1661 lufs_unsnarf(ufsvfsp); /* Release the in-memory structs */ 1662 ufsfx_unmount(ufsvfsp); /* fix-on-panic bookkeeping */ 1663 kmem_free(fs->fs_u.fs_csp, fs->fs_cssize); 1664 1665 bp->b_flags |= B_STALE|B_AGE; 1666 ufsvfsp->vfs_bufp = NULL; /* don't point at freed buf */ 1667 brelse(bp); /* free the superblock buf */ 1668 1669 (void) VOP_PUTPAGE(common_specvp(bvp), (offset_t)0, (size_t)0, 1670 B_INVAL, cr, NULL); 1671 (void) VOP_CLOSE(bvp, flag, 1, (offset_t)0, cr, NULL); 1672 bflush(dev); 1673 (void) bfinval(dev, 1); 1674 VN_RELE(bvp); 1675 1676 /* 1677 * It is now safe to NULL out the ufsvfs pointer and discard 1678 * the root inode. 1679 */ 1680 rip->i_ufsvfs = NULL; 1681 VN_RELE(ITOV(rip)); 1682 1683 /* free up lockfs comment structure, if any */ 1684 if (ulp->ul_lockfs.lf_comlen && ulp->ul_lockfs.lf_comment) 1685 kmem_free(ulp->ul_lockfs.lf_comment, ulp->ul_lockfs.lf_comlen); 1686 1687 /* 1688 * Remove from instance list. 1689 */ 1690 ufs_vfs_remove(ufsvfsp); 1691 1692 /* 1693 * For a forcible unmount, threads may be asleep in 1694 * ufs_lockfs_begin/ufs_check_lockfs. These threads will need 1695 * the ufsvfs structure so we don't free it, yet. ufs_update 1696 * will free it up after awhile. 1697 */ 1698 if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) { 1699 extern kmutex_t ufsvfs_mutex; 1700 extern struct ufsvfs *ufsvfslist; 1701 1702 mutex_enter(&ufsvfs_mutex); 1703 ufsvfsp->vfs_dontblock = 1; 1704 ufsvfsp->vfs_next = ufsvfslist; 1705 ufsvfslist = ufsvfsp; 1706 mutex_exit(&ufsvfs_mutex); 1707 /* wakeup any suspended threads */ 1708 cv_broadcast(&ulp->ul_cv); 1709 mutex_exit(&ulp->ul_lock); 1710 } else { 1711 mutex_destroy(&ufsvfsp->vfs_lock); 1712 kmem_free(ufsvfsp, sizeof (struct ufsvfs)); 1713 } 1714 1715 /* 1716 * Now mark the filesystem as unmounted since we're done with it. 1717 */ 1718 vfsp->vfs_flag |= VFS_UNMOUNTED; 1719 1720 return (0); 1721 out: 1722 /* open the fs to new ops */ 1723 cv_broadcast(&ulp->ul_cv); 1724 mutex_exit(&ulp->ul_lock); 1725 1726 if (TRANS_ISTRANS(ufsvfsp)) { 1727 /* allow the delete thread to continue */ 1728 ufs_thread_continue(&ufsvfsp->vfs_delete); 1729 /* restart the reclaim thread */ 1730 ufs_thread_start(&ufsvfsp->vfs_reclaim, ufs_thread_reclaim, 1731 vfsp); 1732 /* coordinate with global hlock thread */ 1733 ufsvfsp->vfs_validfs = UT_MOUNTED; 1734 /* check for trans errors during umount */ 1735 ufs_trans_onerror(); 1736 1737 /* 1738 * if we have a separate /usr it will never unmount 1739 * when halting. In order to not re-read all the 1740 * cylinder group summary info on mounting after 1741 * reboot the logging of summary info is re-enabled 1742 * and the super block written out. 1743 */ 1744 mountpoint = vfs_getmntpoint(vfsp); 1745 if ((fs->fs_si == FS_SI_OK) && 1746 (strcmp("/usr", refstr_value(mountpoint)) == 0)) { 1747 ufsvfsp->vfs_nolog_si = 0; 1748 UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp); 1749 } 1750 refstr_rele(mountpoint); 1751 } 1752 1753 return (error); 1754 } 1755 1756 static int 1757 ufs_root(struct vfs *vfsp, struct vnode **vpp) 1758 { 1759 struct ufsvfs *ufsvfsp; 1760 struct vnode *vp; 1761 1762 if (!vfsp) 1763 return (EIO); 1764 1765 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 1766 if (!ufsvfsp || !ufsvfsp->vfs_root) 1767 return (EIO); /* forced unmount */ 1768 1769 vp = ufsvfsp->vfs_root; 1770 VN_HOLD(vp); 1771 *vpp = vp; 1772 return (0); 1773 } 1774 1775 /* 1776 * Get file system statistics. 1777 */ 1778 static int 1779 ufs_statvfs(struct vfs *vfsp, struct statvfs64 *sp) 1780 { 1781 struct fs *fsp; 1782 struct ufsvfs *ufsvfsp; 1783 int blk, i; 1784 long max_avail, used; 1785 dev32_t d32; 1786 1787 if (vfsp->vfs_flag & VFS_UNMOUNTED) 1788 return (EIO); 1789 1790 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 1791 fsp = ufsvfsp->vfs_fs; 1792 if ((fsp->fs_magic != FS_MAGIC) && (fsp->fs_magic != MTB_UFS_MAGIC)) 1793 return (EINVAL); 1794 if (fsp->fs_magic == FS_MAGIC && 1795 (fsp->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 && 1796 fsp->fs_version != UFS_VERSION_MIN)) 1797 return (EINVAL); 1798 if (fsp->fs_magic == MTB_UFS_MAGIC && 1799 (fsp->fs_version > MTB_UFS_VERSION_1 || 1800 fsp->fs_version < MTB_UFS_VERSION_MIN)) 1801 return (EINVAL); 1802 1803 /* 1804 * get the basic numbers 1805 */ 1806 (void) bzero(sp, sizeof (*sp)); 1807 1808 sp->f_bsize = fsp->fs_bsize; 1809 sp->f_frsize = fsp->fs_fsize; 1810 sp->f_blocks = (fsblkcnt64_t)fsp->fs_dsize; 1811 sp->f_bfree = (fsblkcnt64_t)fsp->fs_cstotal.cs_nbfree * fsp->fs_frag + 1812 fsp->fs_cstotal.cs_nffree; 1813 1814 sp->f_files = (fsfilcnt64_t)fsp->fs_ncg * fsp->fs_ipg; 1815 sp->f_ffree = (fsfilcnt64_t)fsp->fs_cstotal.cs_nifree; 1816 1817 /* 1818 * Adjust the numbers based on things waiting to be deleted. 1819 * modifies f_bfree and f_ffree. Afterwards, everything we 1820 * come up with will be self-consistent. By definition, this 1821 * is a point-in-time snapshot, so the fact that the delete 1822 * thread's probably already invalidated the results is not a 1823 * problem. Note that if the delete thread is ever extended to 1824 * non-logging ufs, this adjustment must always be made. 1825 */ 1826 if (TRANS_ISTRANS(ufsvfsp)) 1827 ufs_delete_adjust_stats(ufsvfsp, sp); 1828 1829 /* 1830 * avail = MAX(max_avail - used, 0) 1831 */ 1832 max_avail = fsp->fs_dsize - ufsvfsp->vfs_minfrags; 1833 1834 used = (fsp->fs_dsize - sp->f_bfree); 1835 1836 if (max_avail > used) 1837 sp->f_bavail = (fsblkcnt64_t)max_avail - used; 1838 else 1839 sp->f_bavail = (fsblkcnt64_t)0; 1840 1841 sp->f_favail = sp->f_ffree; 1842 (void) cmpldev(&d32, vfsp->vfs_dev); 1843 sp->f_fsid = d32; 1844 (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name); 1845 sp->f_flag = vf_to_stf(vfsp->vfs_flag); 1846 1847 /* keep coordinated with ufs_l_pathconf() */ 1848 sp->f_namemax = MAXNAMLEN; 1849 1850 if (fsp->fs_cpc == 0) { 1851 bzero(sp->f_fstr, 14); 1852 return (0); 1853 } 1854 blk = fsp->fs_spc * fsp->fs_cpc / NSPF(fsp); 1855 for (i = 0; i < blk; i += fsp->fs_frag) /* CSTYLED */ 1856 /* void */; 1857 i -= fsp->fs_frag; 1858 blk = i / fsp->fs_frag; 1859 bcopy(&(fs_rotbl(fsp)[blk]), sp->f_fstr, 14); 1860 return (0); 1861 } 1862 1863 /* 1864 * Flush any pending I/O to file system vfsp. 1865 * The ufs_update() routine will only flush *all* ufs files. 1866 * If vfsp is non-NULL, only sync this ufs (in preparation 1867 * for a umount). 1868 */ 1869 /*ARGSUSED*/ 1870 static int 1871 ufs_sync(struct vfs *vfsp, short flag, struct cred *cr) 1872 { 1873 struct ufsvfs *ufsvfsp; 1874 struct fs *fs; 1875 int cheap = flag & SYNC_ATTR; 1876 int error; 1877 1878 /* 1879 * SYNC_CLOSE means we're rebooting. Toss everything 1880 * on the idle queue so we don't have to slog through 1881 * a bunch of uninteresting inodes over and over again. 1882 */ 1883 if (flag & SYNC_CLOSE) 1884 ufs_idle_drain(NULL); 1885 1886 if (vfsp == NULL) { 1887 ufs_update(flag); 1888 return (0); 1889 } 1890 1891 /* Flush a single ufs */ 1892 if (!vfs_matchops(vfsp, ufs_vfsops) || vfs_lock(vfsp) != 0) 1893 return (0); 1894 1895 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 1896 if (!ufsvfsp) 1897 return (EIO); 1898 fs = ufsvfsp->vfs_fs; 1899 mutex_enter(&ufsvfsp->vfs_lock); 1900 1901 if (ufsvfsp->vfs_dio && 1902 fs->fs_ronly == 0 && 1903 fs->fs_clean != FSBAD && 1904 fs->fs_clean != FSLOG) { 1905 /* turn off fast-io on unmount, so no fsck needed (4029401) */ 1906 ufsvfsp->vfs_dio = 0; 1907 fs->fs_clean = FSACTIVE; 1908 fs->fs_fmod = 1; 1909 } 1910 1911 /* Write back modified superblock */ 1912 if (fs->fs_fmod == 0) { 1913 mutex_exit(&ufsvfsp->vfs_lock); 1914 } else { 1915 if (fs->fs_ronly != 0) { 1916 mutex_exit(&ufsvfsp->vfs_lock); 1917 vfs_unlock(vfsp); 1918 return (ufs_fault(ufsvfsp->vfs_root, 1919 "fs = %s update: ro fs mod\n", fs->fs_fsmnt)); 1920 } 1921 fs->fs_fmod = 0; 1922 mutex_exit(&ufsvfsp->vfs_lock); 1923 1924 TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_UPDATE); 1925 } 1926 vfs_unlock(vfsp); 1927 1928 /* 1929 * Avoid racing with ufs_update() and ufs_unmount(). 1930 * 1931 */ 1932 mutex_enter(&ufs_scan_lock); 1933 1934 (void) ufs_scan_inodes(1, ufs_sync_inode, 1935 (void *)(uintptr_t)cheap, ufsvfsp); 1936 1937 mutex_exit(&ufs_scan_lock); 1938 1939 bflush((dev_t)vfsp->vfs_dev); 1940 1941 /* 1942 * commit any outstanding async transactions 1943 */ 1944 curthread->t_flag |= T_DONTBLOCK; 1945 TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE, error); 1946 if (!error) { 1947 TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UPDATE, 1948 TOP_COMMIT_SIZE); 1949 } 1950 curthread->t_flag &= ~T_DONTBLOCK; 1951 1952 return (0); 1953 } 1954 1955 1956 void 1957 sbupdate(struct vfs *vfsp) 1958 { 1959 struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 1960 struct fs *fs = ufsvfsp->vfs_fs; 1961 struct buf *bp; 1962 int blks; 1963 caddr_t space; 1964 int i; 1965 size_t size; 1966 1967 /* 1968 * for ulockfs processing, limit the superblock writes 1969 */ 1970 if ((ufsvfsp->vfs_ulockfs.ul_sbowner) && 1971 (curthread != ufsvfsp->vfs_ulockfs.ul_sbowner)) { 1972 /* process later */ 1973 fs->fs_fmod = 1; 1974 return; 1975 } 1976 ULOCKFS_SET_MOD((&ufsvfsp->vfs_ulockfs)); 1977 1978 if (TRANS_ISTRANS(ufsvfsp)) { 1979 mutex_enter(&ufsvfsp->vfs_lock); 1980 ufs_sbwrite(ufsvfsp); 1981 mutex_exit(&ufsvfsp->vfs_lock); 1982 return; 1983 } 1984 1985 blks = howmany(fs->fs_cssize, fs->fs_fsize); 1986 space = (caddr_t)fs->fs_u.fs_csp; 1987 for (i = 0; i < blks; i += fs->fs_frag) { 1988 size = fs->fs_bsize; 1989 if (i + fs->fs_frag > blks) 1990 size = (blks - i) * fs->fs_fsize; 1991 bp = UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev, 1992 (daddr_t)(fsbtodb(fs, fs->fs_csaddr + i)), 1993 fs->fs_bsize); 1994 bcopy(space, bp->b_un.b_addr, size); 1995 space += size; 1996 bp->b_bcount = size; 1997 UFS_BRWRITE(ufsvfsp, bp); 1998 } 1999 mutex_enter(&ufsvfsp->vfs_lock); 2000 ufs_sbwrite(ufsvfsp); 2001 mutex_exit(&ufsvfsp->vfs_lock); 2002 } 2003 2004 int ufs_vget_idle_count = 2; /* Number of inodes to idle each time */ 2005 static int 2006 ufs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp) 2007 { 2008 int error = 0; 2009 struct ufid *ufid; 2010 struct inode *ip; 2011 struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 2012 struct ulockfs *ulp; 2013 2014 /* 2015 * Check for unmounted filesystem. 2016 */ 2017 if (vfsp->vfs_flag & VFS_UNMOUNTED) { 2018 error = EIO; 2019 goto errout; 2020 } 2021 2022 /* 2023 * Keep the idle queue from getting too long by 2024 * idling an inode before attempting to allocate another. 2025 * This operation must be performed before entering 2026 * lockfs or a transaction. 2027 */ 2028 if (ufs_idle_q.uq_ne > ufs_idle_q.uq_hiwat) 2029 if ((curthread->t_flag & T_DONTBLOCK) == 0) { 2030 ins.in_vidles.value.ul += ufs_vget_idle_count; 2031 ufs_idle_some(ufs_vget_idle_count); 2032 } 2033 2034 ufid = (struct ufid *)fidp; 2035 2036 if (error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_VGET_MASK)) 2037 goto errout; 2038 2039 rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER); 2040 2041 error = ufs_iget(vfsp, ufid->ufid_ino, &ip, CRED()); 2042 2043 rw_exit(&ufsvfsp->vfs_dqrwlock); 2044 2045 ufs_lockfs_end(ulp); 2046 2047 if (error) 2048 goto errout; 2049 2050 /* 2051 * Check if the inode has been deleted or freed or is in transient state 2052 * since the last VFS_VGET() request for it, release it and don't return 2053 * it to the caller, presumably NFS, as it's no longer valid. 2054 */ 2055 if (ip->i_gen != ufid->ufid_gen || ip->i_mode == 0 || 2056 (ip->i_nlink <= 0)) { 2057 VN_RELE(ITOV(ip)); 2058 error = EINVAL; 2059 goto errout; 2060 } 2061 2062 *vpp = ITOV(ip); 2063 return (0); 2064 2065 errout: 2066 *vpp = NULL; 2067 return (error); 2068 } 2069 2070 static int 2071 ufsinit(int fstype, char *name) 2072 { 2073 static const fs_operation_def_t ufs_vfsops_template[] = { 2074 { VFSNAME_MOUNT, { .vfs_mount = ufs_mount } }, 2075 { VFSNAME_UNMOUNT, { .vfs_unmount = ufs_unmount } }, 2076 { VFSNAME_ROOT, { .vfs_root = ufs_root } }, 2077 { VFSNAME_STATVFS, { .vfs_statvfs = ufs_statvfs } }, 2078 { VFSNAME_SYNC, { .vfs_sync = ufs_sync } }, 2079 { VFSNAME_VGET, { .vfs_vget = ufs_vget } }, 2080 { VFSNAME_MOUNTROOT, { .vfs_mountroot = ufs_mountroot } }, 2081 { NULL, { NULL } } 2082 }; 2083 int error; 2084 2085 ufsfstype = fstype; 2086 2087 error = vfs_setfsops(fstype, ufs_vfsops_template, &ufs_vfsops); 2088 if (error != 0) { 2089 cmn_err(CE_WARN, "ufsinit: bad vfs ops template"); 2090 return (error); 2091 } 2092 2093 error = vn_make_ops(name, ufs_vnodeops_template, &ufs_vnodeops); 2094 if (error != 0) { 2095 (void) vfs_freevfsops_by_type(fstype); 2096 cmn_err(CE_WARN, "ufsinit: bad vnode ops template"); 2097 return (error); 2098 } 2099 2100 ufs_iinit(); 2101 return (0); 2102 } 2103 2104 #ifdef __sparc 2105 2106 /* 2107 * Mounting a mirrored SVM volume is only supported on ufs, 2108 * this is special-case boot code to support that configuration. 2109 * At this point, we have booted and mounted root on a 2110 * single component of the mirror. Complete the boot 2111 * by configuring SVM and converting the root to the 2112 * dev_t of the mirrored root device. This dev_t conversion 2113 * only works because the underlying device doesn't change. 2114 */ 2115 int 2116 ufs_remountroot(struct vfs *vfsp) 2117 { 2118 struct ufsvfs *ufsvfsp; 2119 struct ulockfs *ulp; 2120 dev_t new_rootdev; 2121 dev_t old_rootdev; 2122 struct vnode *old_rootvp; 2123 struct vnode *new_rootvp; 2124 int error, sberror = 0; 2125 struct inode *ip; 2126 union ihead *ih; 2127 struct buf *bp; 2128 int i; 2129 2130 old_rootdev = rootdev; 2131 old_rootvp = rootvp; 2132 2133 new_rootdev = getrootdev(); 2134 if (new_rootdev == (dev_t)NODEV) { 2135 return (ENODEV); 2136 } 2137 2138 new_rootvp = makespecvp(new_rootdev, VBLK); 2139 2140 error = VOP_OPEN(&new_rootvp, 2141 (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE, CRED(), NULL); 2142 if (error) { 2143 cmn_err(CE_CONT, 2144 "Cannot open mirrored root device, error %d\n", error); 2145 return (error); 2146 } 2147 2148 if (vfs_lock(vfsp) != 0) { 2149 return (EBUSY); 2150 } 2151 2152 ufsvfsp = (struct ufsvfs *)vfsp->vfs_data; 2153 ulp = &ufsvfsp->vfs_ulockfs; 2154 2155 mutex_enter(&ulp->ul_lock); 2156 atomic_inc_ulong(&ufs_quiesce_pend); 2157 2158 (void) ufs_quiesce(ulp); 2159 (void) ufs_flush(vfsp); 2160 2161 /* 2162 * Convert root vfs to new dev_t, including vfs hash 2163 * table and fs id. 2164 */ 2165 vfs_root_redev(vfsp, new_rootdev, ufsfstype); 2166 2167 ufsvfsp->vfs_devvp = new_rootvp; 2168 ufsvfsp->vfs_dev = new_rootdev; 2169 2170 bp = ufsvfsp->vfs_bufp; 2171 bp->b_edev = new_rootdev; 2172 bp->b_dev = cmpdev(new_rootdev); 2173 2174 /* 2175 * The buffer for the root inode does not contain a valid b_vp 2176 */ 2177 (void) bfinval(new_rootdev, 0); 2178 2179 /* 2180 * Here we hand-craft inodes with old root device 2181 * references to refer to the new device instead. 2182 */ 2183 mutex_enter(&ufs_scan_lock); 2184 2185 for (i = 0, ih = ihead; i < inohsz; i++, ih++) { 2186 mutex_enter(&ih_lock[i]); 2187 for (ip = ih->ih_chain[0]; 2188 ip != (struct inode *)ih; 2189 ip = ip->i_forw) { 2190 if (ip->i_ufsvfs != ufsvfsp) 2191 continue; 2192 if (ip == ufsvfsp->vfs_qinod) 2193 continue; 2194 if (ip->i_dev == old_rootdev) { 2195 ip->i_dev = new_rootdev; 2196 } 2197 2198 if (ip->i_devvp == old_rootvp) { 2199 ip->i_devvp = new_rootvp; 2200 } 2201 } 2202 mutex_exit(&ih_lock[i]); 2203 } 2204 2205 mutex_exit(&ufs_scan_lock); 2206 2207 /* 2208 * Make Sure logging structures are using the new device 2209 * if logging is enabled. Also start any logging thread that 2210 * needs to write to the device and couldn't earlier. 2211 */ 2212 if (ufsvfsp->vfs_log) { 2213 buf_t *bp, *tbp; 2214 ml_unit_t *ul = ufsvfsp->vfs_log; 2215 struct fs *fsp = ufsvfsp->vfs_fs; 2216 2217 /* 2218 * Update the main logging structure. 2219 */ 2220 ul->un_dev = new_rootdev; 2221 2222 /* 2223 * Get a new bp for the on disk structures. 2224 */ 2225 bp = ul->un_bp; 2226 tbp = ngeteblk(dbtob(LS_SECTORS)); 2227 tbp->b_edev = new_rootdev; 2228 tbp->b_dev = cmpdev(new_rootdev); 2229 tbp->b_blkno = bp->b_blkno; 2230 bcopy(bp->b_un.b_addr, tbp->b_un.b_addr, DEV_BSIZE); 2231 bcopy(bp->b_un.b_addr, tbp->b_un.b_addr + DEV_BSIZE, DEV_BSIZE); 2232 bp->b_flags |= (B_STALE | B_AGE); 2233 brelse(bp); 2234 ul->un_bp = tbp; 2235 2236 /* 2237 * Allocate new circular buffers. 2238 */ 2239 alloc_rdbuf(&ul->un_rdbuf, MAPBLOCKSIZE, MAPBLOCKSIZE); 2240 alloc_wrbuf(&ul->un_wrbuf, ldl_bufsize(ul)); 2241 2242 /* 2243 * Clear the noroll bit which indicates that logging 2244 * can't roll the log yet and start the logmap roll thread 2245 * unless the filesystem is still read-only in which case 2246 * remountfs() will do it when going to read-write. 2247 */ 2248 ASSERT(ul->un_flags & LDL_NOROLL); 2249 2250 if (!fsp->fs_ronly) { 2251 ul->un_flags &= ~LDL_NOROLL; 2252 logmap_start_roll(ul); 2253 } 2254 2255 /* 2256 * Start the reclaim thread if needed. 2257 */ 2258 if (!fsp->fs_ronly && (fsp->fs_reclaim & 2259 (FS_RECLAIM|FS_RECLAIMING))) { 2260 fsp->fs_reclaim &= ~FS_RECLAIM; 2261 fsp->fs_reclaim |= FS_RECLAIMING; 2262 ufs_thread_start(&ufsvfsp->vfs_reclaim, 2263 ufs_thread_reclaim, vfsp); 2264 TRANS_SBWRITE(ufsvfsp, TOP_SBUPDATE_UPDATE); 2265 if (sberror = geterror(ufsvfsp->vfs_bufp)) { 2266 refstr_t *mntpt; 2267 mntpt = vfs_getmntpoint(vfsp); 2268 cmn_err(CE_WARN, 2269 "Remountroot failed to update Reclaim" 2270 "state for filesystem %s " 2271 "Error writing SuperBlock %d", 2272 refstr_value(mntpt), error); 2273 refstr_rele(mntpt); 2274 } 2275 } 2276 } 2277 2278 rootdev = new_rootdev; 2279 rootvp = new_rootvp; 2280 2281 atomic_dec_ulong(&ufs_quiesce_pend); 2282 cv_broadcast(&ulp->ul_cv); 2283 mutex_exit(&ulp->ul_lock); 2284 2285 vfs_unlock(vfsp); 2286 2287 error = VOP_CLOSE(old_rootvp, FREAD, 1, (offset_t)0, CRED(), NULL); 2288 if (error) { 2289 cmn_err(CE_CONT, 2290 "close of root device component failed, error %d\n", 2291 error); 2292 } 2293 VN_RELE(old_rootvp); 2294 2295 return (sberror ? sberror : error); 2296 } 2297 2298 #endif /* __sparc */