illumos-gate Wdiff usr/src/uts/common/fs/ufs/ufs_vfsops.c

Print this page

7127  remove -Wno-missing-braces from Makefile.uts

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/ufs/ufs_vfsops.c
          +++ new/usr/src/uts/common/fs/ufs/ufs_vfsops.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
  25   25  
  26   26  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  27   27  /*        All Rights Reserved   */
  28   28  
  29   29  /*
  30   30   * University Copyright- Copyright (c) 1982, 1986, 1988
  31   31   * The Regents of the University of California
  32   32   * All Rights Reserved
  33   33   *
  34   34   * University Acknowledgment- Portions of this document are derived from
  35   35   * software developed by the University of California, Berkeley, and its
  36   36   * contributors.
  37   37   */
  38   38  
  39   39  #include <sys/types.h>
  40   40  #include <sys/t_lock.h>
  41   41  #include <sys/param.h>
  42   42  #include <sys/systm.h>
  43   43  #include <sys/bitmap.h>
  44   44  #include <sys/sysmacros.h>
  45   45  #include <sys/kmem.h>
  46   46  #include <sys/signal.h>
  47   47  #include <sys/user.h>
  48   48  #include <sys/proc.h>
  49   49  #include <sys/disp.h>
  50   50  #include <sys/buf.h>
  51   51  #include <sys/pathname.h>
  52   52  #include <sys/vfs.h>
  53   53  #include <sys/vfs_opreg.h>
  54   54  #include <sys/vnode.h>
  55   55  #include <sys/file.h>
  56   56  #include <sys/atomic.h>
  57   57  #include <sys/uio.h>
  58   58  #include <sys/dkio.h>
  59   59  #include <sys/cred.h>
  60   60  #include <sys/conf.h>
  61   61  #include <sys/dnlc.h>
  62   62  #include <sys/kstat.h>
  63   63  #include <sys/acl.h>
  64   64  #include <sys/fs/ufs_fsdir.h>
  65   65  #include <sys/fs/ufs_fs.h>
  66   66  #include <sys/fs/ufs_inode.h>
  67   67  #include <sys/fs/ufs_mount.h>
  68   68  #include <sys/fs/ufs_acl.h>
  69   69  #include <sys/fs/ufs_panic.h>
  70   70  #include <sys/fs/ufs_bio.h>
  71   71  #include <sys/fs/ufs_quota.h>
  72   72  #include <sys/fs/ufs_log.h>
  73   73  #undef NFS
  74   74  #include <sys/statvfs.h>
  75   75  #include <sys/mount.h>
  76   76  #include <sys/mntent.h>
  77   77  #include <sys/swap.h>
  78   78  #include <sys/errno.h>
  79   79  #include <sys/debug.h>
  80   80  #include "fs/fs_subr.h"
  81   81  #include <sys/cmn_err.h>
  82   82  #include <sys/dnlc.h>
  83   83  #include <sys/fssnap_if.h>
  84   84  #include <sys/sunddi.h>
  85   85  #include <sys/bootconf.h>
  86   86  #include <sys/policy.h>
  87   87  #include <sys/zone.h>
  88   88  
  89   89  /*
  90   90   * This is the loadable module wrapper.
  91   91   */
  92   92  #include <sys/modctl.h>
  93   93  
  94   94  int                     ufsfstype;
  95   95  vfsops_t                *ufs_vfsops;
  96   96  static int              ufsinit(int, char *);
  97   97  static int              mountfs();
  98   98  extern int              highbit();
  99   99  extern struct instats   ins;
 100  100  extern struct vnode *common_specvp(struct vnode *vp);
 101  101  extern vfs_t            EIO_vfs;
 102  102  
 103  103  struct  dquot *dquot, *dquotNDQUOT;
 104  104  
 105  105  /*
 106  106   * Cylinder group summary information handling tunable.
 107  107   * This defines when these deltas get logged.
 108  108   * If the number of cylinders in the file system is over the
 109  109   * tunable then we log csum updates. Otherwise the updates are only
 110  110   * done for performance on unmount. After a panic they can be
 111  111   * quickly constructed during mounting. See ufs_construct_si()
 112  112   * called from ufs_getsummaryinfo().
 113  113   *
 114  114   * This performance feature can of course be disabled by setting
 115  115   * ufs_ncg_log to 0, and fully enabled by setting it to 0xffffffff.
 116  116   */
 117  117  #define UFS_LOG_NCG_DEFAULT 10000
 118  118  uint32_t ufs_ncg_log = UFS_LOG_NCG_DEFAULT;
 119  119  
 120  120  /*
 121  121   * ufs_clean_root indicates whether the root fs went down cleanly
 122  122   */
 123  123  static int ufs_clean_root = 0;
 124  124  
 125  125  /*
 126  126   * UFS Mount options table
 127  127   */
 128  128  static char *intr_cancel[] = { MNTOPT_NOINTR, NULL };
 129  129  static char *nointr_cancel[] = { MNTOPT_INTR, NULL };
 130  130  static char *forcedirectio_cancel[] = { MNTOPT_NOFORCEDIRECTIO, NULL };
 131  131  static char *noforcedirectio_cancel[] = { MNTOPT_FORCEDIRECTIO, NULL };
 132  132  static char *largefiles_cancel[] = { MNTOPT_NOLARGEFILES, NULL };
 133  133  static char *nolargefiles_cancel[] = { MNTOPT_LARGEFILES, NULL };
 134  134  static char *logging_cancel[] = { MNTOPT_NOLOGGING, NULL };
 135  135  static char *nologging_cancel[] = { MNTOPT_LOGGING, NULL };
 136  136  static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
 137  137  static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
 138  138  static char *quota_cancel[] = { MNTOPT_NOQUOTA, NULL };
 139  139  static char *noquota_cancel[] = { MNTOPT_QUOTA, NULL };
 140  140  static char *dfratime_cancel[] = { MNTOPT_NODFRATIME, NULL };
 141  141  static char *nodfratime_cancel[] = { MNTOPT_DFRATIME, NULL };
 142  142  
 143  143  static mntopt_t mntopts[] = {
 144  144  /*
 145  145   *      option name             cancel option   default arg     flags
 146  146   *              ufs arg flag
 147  147   */
 148  148          { MNTOPT_INTR,          intr_cancel,    NULL,           MO_DEFAULT,
 149  149                  (void *)0 },
 150  150          { MNTOPT_NOINTR,        nointr_cancel,  NULL,           0,
 151  151                  (void *)UFSMNT_NOINTR },
 152  152          { MNTOPT_SYNCDIR,       NULL,           NULL,           0,
 153  153                  (void *)UFSMNT_SYNCDIR },
 154  154          { MNTOPT_FORCEDIRECTIO, forcedirectio_cancel, NULL,     0,
 155  155                  (void *)UFSMNT_FORCEDIRECTIO },
 156  156          { MNTOPT_NOFORCEDIRECTIO, noforcedirectio_cancel, NULL, 0,
 157  157                  (void *)UFSMNT_NOFORCEDIRECTIO },
 158  158          { MNTOPT_NOSETSEC,      NULL,           NULL,           0,
 159  159                  (void *)UFSMNT_NOSETSEC },
 160  160          { MNTOPT_LARGEFILES,    largefiles_cancel, NULL,        MO_DEFAULT,
 161  161                  (void *)UFSMNT_LARGEFILES },
 162  162          { MNTOPT_NOLARGEFILES,  nolargefiles_cancel, NULL,      0,
 163  163                  (void *)0 },
 164  164          { MNTOPT_LOGGING,       logging_cancel, NULL,           MO_TAG,
 165  165                  (void *)UFSMNT_LOGGING },
 166  166          { MNTOPT_NOLOGGING,     nologging_cancel, NULL,
 167  167                  MO_NODISPLAY|MO_DEFAULT|MO_TAG, (void *)0 },
 168  168          { MNTOPT_QUOTA,         quota_cancel, NULL,             MO_IGNORE,
 169  169                  (void *)0 },
 170  170          { MNTOPT_NOQUOTA,       noquota_cancel, NULL,
 171  171                  MO_NODISPLAY|MO_DEFAULT, (void *)0 },
 172  172          { MNTOPT_GLOBAL,        NULL,           NULL,           0,
 173  173                  (void *)0 },
 174  174          { MNTOPT_XATTR, xattr_cancel,           NULL,           MO_DEFAULT,
 175  175                  (void *)0 },
 176  176          { MNTOPT_NOXATTR,       noxattr_cancel,         NULL,           0,
 177  177                  (void *)0 },
 178  178          { MNTOPT_NOATIME,       NULL,           NULL,           0,
 179  179                  (void *)UFSMNT_NOATIME },
 180  180          { MNTOPT_DFRATIME,      dfratime_cancel, NULL,          0,
 181  181                  (void *)0 },
 182  182          { MNTOPT_NODFRATIME,    nodfratime_cancel, NULL,
 183  183                  MO_NODISPLAY|MO_DEFAULT, (void *)UFSMNT_NODFRATIME },
 184  184          { MNTOPT_ONERROR,       NULL,           UFSMNT_ONERROR_PANIC_STR,
 185  185                  MO_DEFAULT|MO_HASVALUE, (void *)0 },
 186  186  };
 187  187  
 188  188  static mntopts_t ufs_mntopts = {
 189  189          sizeof (mntopts) / sizeof (mntopt_t),
 190  190          mntopts
 191  191  };
 192  192  
 193  193  static vfsdef_t vfw = {
 194  194          VFSDEF_VERSION,
 195  195          "ufs",
 196  196          ufsinit,
 197  197          VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI,
 198  198          &ufs_mntopts
 199  199  };
 200  200

↓ open down ↓

200 lines elided

↑ open up ↑

 201  201  /*
 202  202   * Module linkage information for the kernel.
 203  203   */
 204  204  extern struct mod_ops mod_fsops;
 205  205  
 206  206  static struct modlfs modlfs = {
 207  207          &mod_fsops, "filesystem for ufs", &vfw
 208  208  };
 209  209  
 210  210  static struct modlinkage modlinkage = {
 211      -        MODREV_1, (void *)&modlfs, NULL
      211 +        MODREV_1,  { (void *)&modlfs, NULL }
 212  212  };
 213  213  
 214  214  /*
 215  215   * An attempt has been made to make this module unloadable.  In order to
 216  216   * test it, we need a system in which the root fs is NOT ufs.  THIS HAS NOT
 217  217   * BEEN DONE
 218  218   */
 219  219  
 220  220  extern kstat_t *ufs_inode_kstat;
 221  221  extern uint_t ufs_lockfs_key;

 222  222  extern void ufs_lockfs_tsd_destructor(void *);
 223  223  extern uint_t bypass_snapshot_throttle_key;
 224  224  
 225  225  int
 226  226  _init(void)
 227  227  {
 228  228          /*
 229  229           * Create an index into the per thread array so that any thread doing
 230  230           * VOP will have a lockfs mark on it.
 231  231           */
 232  232          tsd_create(&ufs_lockfs_key, ufs_lockfs_tsd_destructor);
 233  233          tsd_create(&bypass_snapshot_throttle_key, NULL);
 234  234          return (mod_install(&modlinkage));
 235  235  }
 236  236  
 237  237  int
 238  238  _fini(void)
 239  239  {
 240  240          return (EBUSY);
 241  241  }
 242  242  
 243  243  int
 244  244  _info(struct modinfo *modinfop)
 245  245  {
 246  246          return (mod_info(&modlinkage, modinfop));
 247  247  }
 248  248  
 249  249  extern struct vnode *makespecvp(dev_t dev, vtype_t type);
 250  250  
 251  251  extern kmutex_t ufs_scan_lock;
 252  252  
 253  253  static int mountfs(struct vfs *, enum whymountroot, struct vnode *, char *,
 254  254                  struct cred *, int, void *, int);
 255  255  
 256  256  
 257  257  static int
 258  258  ufs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
 259  259          struct cred *cr)
 260  260  
 261  261  {
 262  262          char *data = uap->dataptr;
 263  263          int datalen = uap->datalen;
 264  264          dev_t dev;
 265  265          struct vnode *lvp = NULL;
 266  266          struct vnode *svp = NULL;
 267  267          struct pathname dpn;
 268  268          int error;
 269  269          enum whymountroot why = ROOT_INIT;
 270  270          struct ufs_args args;
 271  271          int oflag, aflag;
 272  272          int fromspace = (uap->flags & MS_SYSSPACE) ?
 273  273              UIO_SYSSPACE : UIO_USERSPACE;
 274  274  
 275  275          if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 276  276                  return (error);
 277  277  
 278  278          if (mvp->v_type != VDIR)
 279  279                  return (ENOTDIR);
 280  280  
 281  281          mutex_enter(&mvp->v_lock);
 282  282          if ((uap->flags & MS_REMOUNT) == 0 &&
 283  283              (uap->flags & MS_OVERLAY) == 0 &&
 284  284              (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 285  285                  mutex_exit(&mvp->v_lock);
 286  286                  return (EBUSY);
 287  287          }
 288  288          mutex_exit(&mvp->v_lock);
 289  289  
 290  290          /*
 291  291           * Get arguments
 292  292           */
 293  293          bzero(&args, sizeof (args));
 294  294          if ((uap->flags & MS_DATA) && data != NULL && datalen != 0) {
 295  295                  int copy_result = 0;
 296  296  
 297  297                  if (datalen > sizeof (args))
 298  298                          return (EINVAL);
 299  299                  if (uap->flags & MS_SYSSPACE)
 300  300                          bcopy(data, &args, datalen);
 301  301                  else
 302  302                          copy_result = copyin(data, &args, datalen);
 303  303                  if (copy_result)
 304  304                          return (EFAULT);
 305  305                  datalen = sizeof (struct ufs_args);
 306  306          } else {
 307  307                  datalen = 0;
 308  308          }
 309  309  
 310  310          if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
 311  311              (uap->flags & MS_RDONLY) != 0) {
 312  312                  oflag = FREAD;
 313  313                  aflag = VREAD;
 314  314          } else {
 315  315                  oflag = FREAD | FWRITE;
 316  316                  aflag = VREAD | VWRITE;
 317  317          }
 318  318  
 319  319          /*
 320  320           * Read in the mount point pathname
 321  321           * (so we can record the directory the file system was last mounted on).
 322  322           */
 323  323          if (error = pn_get(uap->dir, fromspace, &dpn))
 324  324                  return (error);
 325  325  
 326  326          /*
 327  327           * Resolve path name of special file being mounted.
 328  328           */
 329  329          if (error = lookupname(uap->spec, fromspace, FOLLOW, NULL, &svp)) {
 330  330                  pn_free(&dpn);
 331  331                  return (error);
 332  332          }
 333  333  
 334  334          error = vfs_get_lofi(vfsp, &lvp);
 335  335  
 336  336          if (error > 0) {
 337  337                  VN_RELE(svp);
 338  338                  pn_free(&dpn);
 339  339                  return (error);
 340  340          } else if (error == 0) {
 341  341                  dev = lvp->v_rdev;
 342  342  
 343  343                  if (getmajor(dev) >= devcnt) {
 344  344                          error = ENXIO;
 345  345                          goto out;
 346  346                  }
 347  347          } else {
 348  348                  dev = svp->v_rdev;
 349  349  
 350  350                  if (svp->v_type != VBLK) {
 351  351                          VN_RELE(svp);
 352  352                          pn_free(&dpn);
 353  353                          return (ENOTBLK);
 354  354                  }
 355  355  
 356  356                  if (getmajor(dev) >= devcnt) {
 357  357                          error = ENXIO;
 358  358                          goto out;
 359  359                  }
 360  360  
 361  361                  /*
 362  362                   * In SunCluster, requests to a global device are
 363  363                   * satisfied by a local device. We substitute the global
 364  364                   * pxfs node with a local spec node here.
 365  365                   */
 366  366                  if (IS_PXFSVP(svp)) {
 367  367                          ASSERT(lvp == NULL);
 368  368                          VN_RELE(svp);
 369  369                          svp = makespecvp(dev, VBLK);
 370  370                  }
 371  371  
 372  372                  if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0) {
 373  373                          VN_RELE(svp);
 374  374                          pn_free(&dpn);
 375  375                          return (error);
 376  376                  }
 377  377          }
 378  378  
 379  379          if (uap->flags & MS_REMOUNT)
 380  380                  why = ROOT_REMOUNT;
 381  381  
 382  382          /*
 383  383           * Open device/file mounted on.  We need this to check whether
 384  384           * the caller has sufficient rights to access the resource in
 385  385           * question.  When bio is fixed for vnodes this can all be vnode
 386  386           * operations.
 387  387           */
 388  388          if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
 389  389                  goto out;
 390  390  
 391  391          /*
 392  392           * Ensure that this device isn't already mounted or in progress on a
 393  393           * mount unless this is a REMOUNT request or we are told to suppress
 394  394           * mount checks. Global mounts require special handling.
 395  395           */
 396  396          if ((uap->flags & MS_NOCHECK) == 0) {
 397  397                  if ((uap->flags & MS_GLOBAL) == 0 &&
 398  398                      vfs_devmounting(dev, vfsp)) {
 399  399                          error = EBUSY;
 400  400                          goto out;
 401  401                  }
 402  402                  if (vfs_devismounted(dev)) {
 403  403                          if ((uap->flags & MS_REMOUNT) == 0) {
 404  404                                  error = EBUSY;
 405  405                                  goto out;
 406  406                          }
 407  407                  }
 408  408          }
 409  409  
 410  410          /*
 411  411           * If the device is a tape, mount it read only
 412  412           */
 413  413          if (devopsp[getmajor(dev)]->devo_cb_ops->cb_flag & D_TAPE) {
 414  414                  vfsp->vfs_flag |= VFS_RDONLY;
 415  415                  vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 416  416          }
 417  417          if (uap->flags & MS_RDONLY)
 418  418                  vfsp->vfs_flag |= VFS_RDONLY;
 419  419  
 420  420          /*
 421  421           * Mount the filesystem, free the device vnode on error.
 422  422           */
 423  423          error = mountfs(vfsp, why, lvp != NULL ? lvp : svp,
 424  424              dpn.pn_path, cr, 0, &args, datalen);
 425  425  
 426  426          if (error == 0) {
 427  427                  vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
 428  428  
 429  429                  /*
 430  430                   * If lofi, drop our reference to the original file.
 431  431                   */
 432  432                  if (lvp != NULL)
 433  433                          VN_RELE(svp);
 434  434          }
 435  435  
 436  436  out:
 437  437          pn_free(&dpn);
 438  438  
 439  439          if (error) {
 440  440                  if (lvp != NULL)
 441  441                          VN_RELE(lvp);
 442  442                  if (svp != NULL)
 443  443                          VN_RELE(svp);
 444  444          }
 445  445          return (error);
 446  446  }
 447  447  
 448  448  /*
 449  449   * Mount root file system.
 450  450   * "why" is ROOT_INIT on initial call ROOT_REMOUNT if called to
 451  451   * remount the root file system, and ROOT_UNMOUNT if called to
 452  452   * unmount the root (e.g., as part of a system shutdown).
 453  453   *
 454  454   * XXX - this may be partially machine-dependent; it, along with the VFS_SWAPVP
 455  455   * operation, goes along with auto-configuration.  A mechanism should be
 456  456   * provided by which machine-INdependent code in the kernel can say "get me the
 457  457   * right root file system" and "get me the right initial swap area", and have
 458  458   * that done in what may well be a machine-dependent fashion.
 459  459   * Unfortunately, it is also file-system-type dependent (NFS gets it via
 460  460   * bootparams calls, UFS gets it from various and sundry machine-dependent
 461  461   * mechanisms, as SPECFS does for swap).
 462  462   */
 463  463  static int
 464  464  ufs_mountroot(struct vfs *vfsp, enum whymountroot why)
 465  465  {
 466  466          struct fs *fsp;
 467  467          int error;
 468  468          static int ufsrootdone = 0;
 469  469          dev_t rootdev;
 470  470          struct vnode *vp;
 471  471          struct vnode *devvp = 0;
 472  472          int ovflags;
 473  473          int doclkset;
 474  474          ufsvfs_t *ufsvfsp;
 475  475  
 476  476          if (why == ROOT_INIT) {
 477  477                  if (ufsrootdone++)
 478  478                          return (EBUSY);
 479  479                  rootdev = getrootdev();
 480  480                  if (rootdev == (dev_t)NODEV)
 481  481                          return (ENODEV);
 482  482                  vfsp->vfs_dev = rootdev;
 483  483                  vfsp->vfs_flag |= VFS_RDONLY;
 484  484          } else if (why == ROOT_REMOUNT) {
 485  485                  vp = ((struct ufsvfs *)vfsp->vfs_data)->vfs_devvp;
 486  486                  (void) dnlc_purge_vfsp(vfsp, 0);
 487  487                  vp = common_specvp(vp);
 488  488                  (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_INVAL,
 489  489                      CRED(), NULL);
 490  490                  (void) bfinval(vfsp->vfs_dev, 0);
 491  491                  fsp = getfs(vfsp);
 492  492  
 493  493                  ovflags = vfsp->vfs_flag;
 494  494                  vfsp->vfs_flag &= ~VFS_RDONLY;
 495  495                  vfsp->vfs_flag |= VFS_REMOUNT;
 496  496                  rootdev = vfsp->vfs_dev;
 497  497          } else if (why == ROOT_UNMOUNT) {
 498  498                  if (vfs_lock(vfsp) == 0) {
 499  499                          (void) ufs_flush(vfsp);
 500  500                          /*
 501  501                           * Mark the log as fully rolled
 502  502                           */
 503  503                          ufsvfsp = (ufsvfs_t *)vfsp->vfs_data;
 504  504                          fsp = ufsvfsp->vfs_fs;
 505  505                          if (TRANS_ISTRANS(ufsvfsp) &&
 506  506                              !TRANS_ISERROR(ufsvfsp) &&
 507  507                              (fsp->fs_rolled == FS_NEED_ROLL)) {
 508  508                                  ml_unit_t *ul = ufsvfsp->vfs_log;
 509  509  
 510  510                                  error = ufs_putsummaryinfo(ul->un_dev,
 511  511                                      ufsvfsp, fsp);
 512  512                                  if (error == 0) {
 513  513                                          fsp->fs_rolled = FS_ALL_ROLLED;
 514  514                                          UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp);
 515  515                                  }
 516  516                          }
 517  517                          vfs_unlock(vfsp);
 518  518                  } else {
 519  519                          ufs_update(0);
 520  520                  }
 521  521  
 522  522                  vp = ((struct ufsvfs *)vfsp->vfs_data)->vfs_devvp;
 523  523                  (void) VOP_CLOSE(vp, FREAD|FWRITE, 1,
 524  524                      (offset_t)0, CRED(), NULL);
 525  525                  return (0);
 526  526          }
 527  527          error = vfs_lock(vfsp);
 528  528          if (error)
 529  529                  return (error);
 530  530  
 531  531          devvp = makespecvp(rootdev, VBLK);
 532  532  
 533  533          /* If RO media, don't call clkset() (see below) */
 534  534          doclkset = 1;
 535  535          if (why == ROOT_INIT) {
 536  536                  error = VOP_OPEN(&devvp, FREAD|FWRITE, CRED(), NULL);
 537  537                  if (error == 0) {
 538  538                          (void) VOP_CLOSE(devvp, FREAD|FWRITE, 1,
 539  539                              (offset_t)0, CRED(), NULL);
 540  540                  } else {
 541  541                          doclkset = 0;
 542  542                  }
 543  543          }
 544  544  
 545  545          error = mountfs(vfsp, why, devvp, "/", CRED(), 1, NULL, 0);
 546  546          /*
 547  547           * XXX - assumes root device is not indirect, because we don't set
 548  548           * rootvp.  Is rootvp used for anything?  If so, make another arg
 549  549           * to mountfs.
 550  550           */
 551  551          if (error) {
 552  552                  vfs_unlock(vfsp);
 553  553                  if (why == ROOT_REMOUNT)
 554  554                          vfsp->vfs_flag = ovflags;
 555  555                  if (rootvp) {
 556  556                          VN_RELE(rootvp);
 557  557                          rootvp = (struct vnode *)0;
 558  558                  }
 559  559                  VN_RELE(devvp);
 560  560                  return (error);
 561  561          }
 562  562          if (why == ROOT_INIT)
 563  563                  vfs_add((struct vnode *)0, vfsp,
 564  564                      (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
 565  565          vfs_unlock(vfsp);
 566  566          fsp = getfs(vfsp);
 567  567          clkset(doclkset ? fsp->fs_time : -1);
 568  568          ufsvfsp = (ufsvfs_t *)vfsp->vfs_data;
 569  569          if (ufsvfsp->vfs_log) {
 570  570                  vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
 571  571          }
 572  572          return (0);
 573  573  }
 574  574  
 575  575  static int
 576  576  remountfs(struct vfs *vfsp, dev_t dev, void *raw_argsp, int args_len)
 577  577  {
 578  578          struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
 579  579          struct ulockfs *ulp = &ufsvfsp->vfs_ulockfs;
 580  580          struct buf *bp = ufsvfsp->vfs_bufp;
 581  581          struct fs *fsp = (struct fs *)bp->b_un.b_addr;
 582  582          struct fs *fspt;
 583  583          struct buf *tpt = 0;
 584  584          int error = 0;
 585  585          int flags = 0;
 586  586  
 587  587          if (args_len == sizeof (struct ufs_args) && raw_argsp)
 588  588                  flags = ((struct ufs_args *)raw_argsp)->flags;
 589  589  
 590  590          /* cannot remount to RDONLY */
 591  591          if (vfsp->vfs_flag & VFS_RDONLY)
 592  592                  return (ENOTSUP);
 593  593  
 594  594          /* whoops, wrong dev */
 595  595          if (vfsp->vfs_dev != dev)
 596  596                  return (EINVAL);
 597  597  
 598  598          /*
 599  599           * synchronize w/ufs ioctls
 600  600           */
 601  601          mutex_enter(&ulp->ul_lock);
 602  602          atomic_inc_ulong(&ufs_quiesce_pend);
 603  603  
 604  604          /*
 605  605           * reset options
 606  606           */
 607  607          ufsvfsp->vfs_nointr  = flags & UFSMNT_NOINTR;
 608  608          ufsvfsp->vfs_syncdir = flags & UFSMNT_SYNCDIR;
 609  609          ufsvfsp->vfs_nosetsec = flags & UFSMNT_NOSETSEC;
 610  610          ufsvfsp->vfs_noatime = flags & UFSMNT_NOATIME;
 611  611          if ((flags & UFSMNT_NODFRATIME) || ufsvfsp->vfs_noatime)
 612  612                  ufsvfsp->vfs_dfritime &= ~UFS_DFRATIME;
 613  613          else    /* dfratime, default behavior */
 614  614                  ufsvfsp->vfs_dfritime |= UFS_DFRATIME;
 615  615          if (flags & UFSMNT_FORCEDIRECTIO)
 616  616                  ufsvfsp->vfs_forcedirectio = 1;
 617  617          else    /* default is no direct I/O */
 618  618                  ufsvfsp->vfs_forcedirectio = 0;
 619  619          ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
 620  620  
 621  621          /*
 622  622           * set largefiles flag in ufsvfs equal to the
 623  623           * value passed in by the mount command. If
 624  624           * it is "nolargefiles", and the flag is set
 625  625           * in the superblock, the mount fails.
 626  626           */
 627  627          if (!(flags & UFSMNT_LARGEFILES)) {  /* "nolargefiles" */
 628  628                  if (fsp->fs_flags & FSLARGEFILES) {
 629  629                          error = EFBIG;
 630  630                          goto remounterr;
 631  631                  }
 632  632                  ufsvfsp->vfs_lfflags &= ~UFS_LARGEFILES;
 633  633          } else  /* "largefiles" */
 634  634                  ufsvfsp->vfs_lfflags |= UFS_LARGEFILES;
 635  635          /*
 636  636           * read/write to read/write; all done
 637  637           */
 638  638          if (fsp->fs_ronly == 0)
 639  639                  goto remounterr;
 640  640  
 641  641          /*
 642  642           * fix-on-panic assumes RO->RW remount implies system-critical fs
 643  643           * if it is shortly after boot; so, don't attempt to lock and fix
 644  644           * (unless the user explicitly asked for another action on error)
 645  645           * XXX UFSMNT_ONERROR_RDONLY rather than UFSMNT_ONERROR_PANIC
 646  646           */
 647  647  #define BOOT_TIME_LIMIT (180*hz)
 648  648          if (!(flags & UFSMNT_ONERROR_FLGMASK) &&
 649  649              ddi_get_lbolt() < BOOT_TIME_LIMIT) {
 650  650                  cmn_err(CE_WARN, "%s is required to be mounted onerror=%s",
 651  651                      ufsvfsp->vfs_fs->fs_fsmnt, UFSMNT_ONERROR_PANIC_STR);
 652  652                  flags |= UFSMNT_ONERROR_PANIC;
 653  653          }
 654  654  
 655  655          if ((error = ufsfx_mount(ufsvfsp, flags)) != 0)
 656  656                  goto remounterr;
 657  657  
 658  658          /*
 659  659           * quiesce the file system
 660  660           */
 661  661          error = ufs_quiesce(ulp);
 662  662          if (error)
 663  663                  goto remounterr;
 664  664  
 665  665          tpt = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, SBLOCK, SBSIZE);
 666  666          if (tpt->b_flags & B_ERROR) {
 667  667                  error = EIO;
 668  668                  goto remounterr;
 669  669          }
 670  670          fspt = (struct fs *)tpt->b_un.b_addr;
 671  671          if (((fspt->fs_magic != FS_MAGIC) &&
 672  672              (fspt->fs_magic != MTB_UFS_MAGIC)) ||
 673  673              (fspt->fs_magic == FS_MAGIC &&
 674  674              (fspt->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
 675  675              fspt->fs_version != UFS_VERSION_MIN)) ||
 676  676              (fspt->fs_magic == MTB_UFS_MAGIC &&
 677  677              (fspt->fs_version > MTB_UFS_VERSION_1 ||
 678  678              fspt->fs_version < MTB_UFS_VERSION_MIN)) ||
 679  679              fspt->fs_bsize > MAXBSIZE || fspt->fs_frag > MAXFRAG ||
 680  680              fspt->fs_bsize < sizeof (struct fs) || fspt->fs_bsize < PAGESIZE) {
 681  681                  tpt->b_flags |= B_STALE | B_AGE;
 682  682                  error = EINVAL;
 683  683                  goto remounterr;
 684  684          }
 685  685  
 686  686          if (ufsvfsp->vfs_log && (ufsvfsp->vfs_log->un_flags & LDL_NOROLL)) {
 687  687                  ufsvfsp->vfs_log->un_flags &= ~LDL_NOROLL;
 688  688                  logmap_start_roll(ufsvfsp->vfs_log);
 689  689          }
 690  690  
 691  691          if (TRANS_ISERROR(ufsvfsp))
 692  692                  goto remounterr;
 693  693          TRANS_DOMATAMAP(ufsvfsp);
 694  694  
 695  695          if ((fspt->fs_state + fspt->fs_time == FSOKAY) &&
 696  696              fspt->fs_clean == FSLOG && !TRANS_ISTRANS(ufsvfsp)) {
 697  697                  ufsvfsp->vfs_log = NULL;
 698  698                  ufsvfsp->vfs_domatamap = 0;
 699  699                  error = ENOSPC;
 700  700                  goto remounterr;
 701  701          }
 702  702  
 703  703          if (fspt->fs_state + fspt->fs_time == FSOKAY &&
 704  704              (fspt->fs_clean == FSCLEAN ||
 705  705              fspt->fs_clean == FSSTABLE ||
 706  706              fspt->fs_clean == FSLOG)) {
 707  707  
 708  708                  /*
 709  709                   * Ensure that ufs_getsummaryinfo doesn't reconstruct
 710  710                   * the summary info.
 711  711                   */
 712  712                  error = ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, fspt);
 713  713                  if (error)
 714  714                          goto remounterr;
 715  715  
 716  716                  /* preserve mount name */
 717  717                  (void) strncpy(fspt->fs_fsmnt, fsp->fs_fsmnt, MAXMNTLEN);
 718  718                  /* free the old cg space */
 719  719                  kmem_free(fsp->fs_u.fs_csp, fsp->fs_cssize);
 720  720                  /* switch in the new superblock */
 721  721                  fspt->fs_rolled = FS_NEED_ROLL;
 722  722                  bcopy(tpt->b_un.b_addr, bp->b_un.b_addr, fspt->fs_sbsize);
 723  723  
 724  724                  fsp->fs_clean = FSSTABLE;
 725  725          } /* superblock updated in memory */
 726  726          tpt->b_flags |= B_STALE | B_AGE;
 727  727          brelse(tpt);
 728  728          tpt = 0;
 729  729  
 730  730          if (fsp->fs_clean != FSSTABLE) {
 731  731                  error = ENOSPC;
 732  732                  goto remounterr;
 733  733          }
 734  734  
 735  735  
 736  736          if (TRANS_ISTRANS(ufsvfsp)) {
 737  737                  fsp->fs_clean = FSLOG;
 738  738                  ufsvfsp->vfs_dio = 0;
 739  739          } else
 740  740                  if (ufsvfsp->vfs_dio)
 741  741                          fsp->fs_clean = FSSUSPEND;
 742  742  
 743  743          TRANS_MATA_MOUNT(ufsvfsp);
 744  744  
 745  745          fsp->fs_fmod = 0;
 746  746          fsp->fs_ronly = 0;
 747  747  
 748  748          atomic_dec_ulong(&ufs_quiesce_pend);
 749  749          cv_broadcast(&ulp->ul_cv);
 750  750          mutex_exit(&ulp->ul_lock);
 751  751  
 752  752          if (TRANS_ISTRANS(ufsvfsp)) {
 753  753  
 754  754                  /*
 755  755                   * start the delete thread
 756  756                   */
 757  757                  ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp);
 758  758  
 759  759                  /*
 760  760                   * start the reclaim thread
 761  761                   */
 762  762                  if (fsp->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
 763  763                          fsp->fs_reclaim &= ~FS_RECLAIM;
 764  764                          fsp->fs_reclaim |=  FS_RECLAIMING;
 765  765                          ufs_thread_start(&ufsvfsp->vfs_reclaim,
 766  766                              ufs_thread_reclaim, vfsp);
 767  767                  }
 768  768          }
 769  769  
 770  770          TRANS_SBWRITE(ufsvfsp, TOP_MOUNT);
 771  771  
 772  772          return (0);
 773  773  
 774  774  remounterr:
 775  775          if (tpt)
 776  776                  brelse(tpt);
 777  777          atomic_dec_ulong(&ufs_quiesce_pend);
 778  778          cv_broadcast(&ulp->ul_cv);
 779  779          mutex_exit(&ulp->ul_lock);
 780  780          return (error);
 781  781  }
 782  782  
 783  783  /*
 784  784   * If the device maxtransfer size is not available, we use ufs_maxmaxphys
 785  785   * along with the system value for maxphys to determine the value for
 786  786   * maxtransfer.
 787  787   */
 788  788  int ufs_maxmaxphys = (1024 * 1024);
 789  789  
 790  790  #include <sys/ddi.h>            /* for delay(9f) */
 791  791  
 792  792  int ufs_mount_error_delay = 20; /* default to 20ms */
 793  793  int ufs_mount_timeout = 60000;  /* default to 1 minute */
 794  794  
 795  795  static int
 796  796  mountfs(struct vfs *vfsp, enum whymountroot why, struct vnode *devvp,
 797  797          char *path, cred_t *cr, int isroot, void *raw_argsp, int args_len)
 798  798  {
 799  799          dev_t dev = devvp->v_rdev;
 800  800          struct fs *fsp;
 801  801          struct ufsvfs *ufsvfsp = 0;
 802  802          struct buf *bp = 0;
 803  803          struct buf *tp = 0;
 804  804          struct dk_cinfo ci;
 805  805          int error = 0;
 806  806          size_t len;
 807  807          int needclose = 0;
 808  808          int needtrans = 0;
 809  809          struct inode *rip;
 810  810          struct vnode *rvp = NULL;
 811  811          int flags = 0;
 812  812          kmutex_t *ihm;
 813  813          int elapsed;
 814  814          int status;
 815  815          extern  int     maxphys;
 816  816  
 817  817          if (args_len == sizeof (struct ufs_args) && raw_argsp)
 818  818                  flags = ((struct ufs_args *)raw_argsp)->flags;
 819  819  
 820  820          ASSERT(vfs_lock_held(vfsp));
 821  821  
 822  822          if (why == ROOT_INIT) {
 823  823                  /*
 824  824                   * Open block device mounted on.
 825  825                   * When bio is fixed for vnodes this can all be vnode
 826  826                   * operations.
 827  827                   */
 828  828                  error = VOP_OPEN(&devvp,
 829  829                      (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
 830  830                      cr, NULL);
 831  831                  if (error)
 832  832                          goto out;
 833  833                  needclose = 1;
 834  834  
 835  835                  /*
 836  836                   * Refuse to go any further if this
 837  837                   * device is being used for swapping.
 838  838                   */
 839  839                  if (IS_SWAPVP(devvp)) {
 840  840                          error = EBUSY;
 841  841                          goto out;
 842  842                  }
 843  843          }
 844  844  
 845  845          /*
 846  846           * check for dev already mounted on
 847  847           */
 848  848          if (vfsp->vfs_flag & VFS_REMOUNT) {
 849  849                  error = remountfs(vfsp, dev, raw_argsp, args_len);
 850  850                  if (error == 0)
 851  851                          VN_RELE(devvp);
 852  852                  return (error);
 853  853          }
 854  854  
 855  855          ASSERT(devvp != 0);
 856  856  
 857  857          /*
 858  858           * Flush back any dirty pages on the block device to
 859  859           * try and keep the buffer cache in sync with the page
 860  860           * cache if someone is trying to use block devices when
 861  861           * they really should be using the raw device.
 862  862           */
 863  863          (void) VOP_PUTPAGE(common_specvp(devvp), (offset_t)0,
 864  864              (size_t)0, B_INVAL, cr, NULL);
 865  865  
 866  866          /*
 867  867           * read in superblock
 868  868           */
 869  869          ufsvfsp = kmem_zalloc(sizeof (struct ufsvfs), KM_SLEEP);
 870  870          tp = UFS_BREAD(ufsvfsp, dev, SBLOCK, SBSIZE);
 871  871          if (tp->b_flags & B_ERROR)
 872  872                  goto out;
 873  873          fsp = (struct fs *)tp->b_un.b_addr;
 874  874  
 875  875          if ((fsp->fs_magic != FS_MAGIC) && (fsp->fs_magic != MTB_UFS_MAGIC)) {
 876  876                  cmn_err(CE_NOTE,
 877  877                      "mount: not a UFS magic number (0x%x)", fsp->fs_magic);
 878  878                  error = EINVAL;
 879  879                  goto out;
 880  880          }
 881  881  
 882  882          if ((fsp->fs_magic == FS_MAGIC) &&
 883  883              (fsp->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
 884  884              fsp->fs_version != UFS_VERSION_MIN)) {
 885  885                  cmn_err(CE_NOTE,
 886  886                      "mount: unrecognized version of UFS on-disk format: %d",
 887  887                      fsp->fs_version);
 888  888                  error = EINVAL;
 889  889                  goto out;
 890  890          }
 891  891  
 892  892          if ((fsp->fs_magic == MTB_UFS_MAGIC) &&
 893  893              (fsp->fs_version > MTB_UFS_VERSION_1 ||
 894  894              fsp->fs_version < MTB_UFS_VERSION_MIN)) {
 895  895                  cmn_err(CE_NOTE,
 896  896                      "mount: unrecognized version of UFS on-disk format: %d",
 897  897                      fsp->fs_version);
 898  898                  error = EINVAL;
 899  899                  goto out;
 900  900          }
 901  901  
 902  902  #ifndef _LP64
 903  903          if (fsp->fs_magic == MTB_UFS_MAGIC) {
 904  904                  /*
 905  905                   * Find the size of the device in sectors.  If the
 906  906                   * the size in sectors is greater than INT_MAX, it's
 907  907                   * a multi-terabyte file system, which can't be
 908  908                   * mounted by a 32-bit kernel.  We can't use the
 909  909                   * fsbtodb() macro in the next line because the macro
 910  910                   * casts the intermediate values to daddr_t, which is
 911  911                   * a 32-bit quantity in a 32-bit kernel.  Here we
 912  912                   * really do need the intermediate values to be held
 913  913                   * in 64-bit quantities because we're checking for
 914  914                   * overflow of a 32-bit field.
 915  915                   */
 916  916                  if ((((diskaddr_t)(fsp->fs_size)) << fsp->fs_fsbtodb)
 917  917                      > INT_MAX) {
 918  918                          cmn_err(CE_NOTE,
 919  919                              "mount: multi-terabyte UFS cannot be"
 920  920                              " mounted by a 32-bit kernel");
 921  921                          error = EINVAL;
 922  922                          goto out;
 923  923                  }
 924  924  
 925  925          }
 926  926  #endif
 927  927  
 928  928          if (fsp->fs_bsize > MAXBSIZE || fsp->fs_frag > MAXFRAG ||
 929  929              fsp->fs_bsize < sizeof (struct fs) || fsp->fs_bsize < PAGESIZE) {
 930  930                  error = EINVAL; /* also needs translation */
 931  931                  goto out;
 932  932          }
 933  933  
 934  934          /*
 935  935           * Allocate VFS private data.
 936  936           */
 937  937          vfsp->vfs_bcount = 0;
 938  938          vfsp->vfs_data = (caddr_t)ufsvfsp;
 939  939          vfsp->vfs_fstype = ufsfstype;
 940  940          vfsp->vfs_dev = dev;
 941  941          vfsp->vfs_flag |= VFS_NOTRUNC;
 942  942          vfs_make_fsid(&vfsp->vfs_fsid, dev, ufsfstype);
 943  943          ufsvfsp->vfs_devvp = devvp;
 944  944  
 945  945          /*
 946  946           * Cross-link with vfs and add to instance list.
 947  947           */
 948  948          ufsvfsp->vfs_vfs = vfsp;
 949  949          ufs_vfs_add(ufsvfsp);
 950  950  
 951  951          ufsvfsp->vfs_dev = dev;
 952  952          ufsvfsp->vfs_bufp = tp;
 953  953  
 954  954          ufsvfsp->vfs_dirsize = INODESIZE + (4 * ALLOCSIZE) + fsp->fs_fsize;
 955  955          ufsvfsp->vfs_minfrags =
 956  956              (int)((int64_t)fsp->fs_dsize * fsp->fs_minfree / 100);
 957  957          /*
 958  958           * if mount allows largefiles, indicate so in ufsvfs
 959  959           */
 960  960          if (flags & UFSMNT_LARGEFILES)
 961  961                  ufsvfsp->vfs_lfflags |= UFS_LARGEFILES;
 962  962          /*
 963  963           * Initialize threads
 964  964           */
 965  965          ufs_delete_init(ufsvfsp, 1);
 966  966          ufs_thread_init(&ufsvfsp->vfs_reclaim, 0);
 967  967  
 968  968          /*
 969  969           * Chicken and egg problem. The superblock may have deltas
 970  970           * in the log.  So after the log is scanned we reread the
 971  971           * superblock. We guarantee that the fields needed to
 972  972           * scan the log will not be in the log.
 973  973           */
 974  974          if (fsp->fs_logbno && fsp->fs_clean == FSLOG &&
 975  975              (fsp->fs_state + fsp->fs_time == FSOKAY)) {
 976  976                  error = lufs_snarf(ufsvfsp, fsp, (vfsp->vfs_flag & VFS_RDONLY));
 977  977                  if (error) {
 978  978                          /*
 979  979                           * Allow a ro mount to continue even if the
 980  980                           * log cannot be processed - yet.
 981  981                           */
 982  982                          if (!(vfsp->vfs_flag & VFS_RDONLY)) {
 983  983                                  cmn_err(CE_WARN, "Error accessing ufs "
 984  984                                      "log for %s; Please run fsck(1M)", path);
 985  985                                  goto out;
 986  986                          }
 987  987                  }
 988  988                  tp->b_flags |= (B_AGE | B_STALE);
 989  989                  brelse(tp);
 990  990                  tp = UFS_BREAD(ufsvfsp, dev, SBLOCK, SBSIZE);
 991  991                  fsp = (struct fs *)tp->b_un.b_addr;
 992  992                  ufsvfsp->vfs_bufp = tp;
 993  993                  if (tp->b_flags & B_ERROR)
 994  994                          goto out;
 995  995          }
 996  996  
 997  997          /*
 998  998           * Set logging mounted flag used by lockfs
 999  999           */
1000 1000          ufsvfsp->vfs_validfs = UT_MOUNTED;
1001 1001  
1002 1002          /*
1003 1003           * Copy the super block into a buffer in its native size.
1004 1004           * Use ngeteblk to allocate the buffer
1005 1005           */
1006 1006          bp = ngeteblk(fsp->fs_bsize);
1007 1007          ufsvfsp->vfs_bufp = bp;
1008 1008          bp->b_edev = dev;
1009 1009          bp->b_dev = cmpdev(dev);
1010 1010          bp->b_blkno = SBLOCK;
1011 1011          bp->b_bcount = fsp->fs_sbsize;
1012 1012          bcopy(tp->b_un.b_addr, bp->b_un.b_addr, fsp->fs_sbsize);
1013 1013          tp->b_flags |= B_STALE | B_AGE;
1014 1014          brelse(tp);
1015 1015          tp = 0;
1016 1016  
1017 1017          fsp = (struct fs *)bp->b_un.b_addr;
1018 1018          /*
1019 1019           * Mount fails if superblock flag indicates presence of large
1020 1020           * files and filesystem is attempted to be mounted 'nolargefiles'.
1021 1021           * The exception is for a read only mount of root, which we
1022 1022           * always want to succeed, so fsck can fix potential problems.
1023 1023           * The assumption is that we will remount root at some point,
1024 1024           * and the remount will enforce the mount option.
1025 1025           */
1026 1026          if (!(isroot & (vfsp->vfs_flag & VFS_RDONLY)) &&
1027 1027              (fsp->fs_flags & FSLARGEFILES) &&
1028 1028              !(flags & UFSMNT_LARGEFILES)) {
1029 1029                  error = EFBIG;
1030 1030                  goto out;
1031 1031          }
1032 1032  
1033 1033          if (vfsp->vfs_flag & VFS_RDONLY) {
1034 1034                  fsp->fs_ronly = 1;
1035 1035                  fsp->fs_fmod = 0;
1036 1036                  if (((fsp->fs_state + fsp->fs_time) == FSOKAY) &&
1037 1037                      ((fsp->fs_clean == FSCLEAN) ||
1038 1038                      (fsp->fs_clean == FSSTABLE) ||
1039 1039                      (fsp->fs_clean == FSLOG))) {
1040 1040                          if (isroot) {
1041 1041                                  if (fsp->fs_clean == FSLOG) {
1042 1042                                          if (fsp->fs_rolled == FS_ALL_ROLLED) {
1043 1043                                                  ufs_clean_root = 1;
1044 1044                                          }
1045 1045                                  } else {
1046 1046                                          ufs_clean_root = 1;
1047 1047                                  }
1048 1048                          }
1049 1049                          fsp->fs_clean = FSSTABLE;
1050 1050                  } else {
1051 1051                          fsp->fs_clean = FSBAD;
1052 1052                  }
1053 1053          } else {
1054 1054  
1055 1055                  fsp->fs_fmod = 0;
1056 1056                  fsp->fs_ronly = 0;
1057 1057  
1058 1058                  TRANS_DOMATAMAP(ufsvfsp);
1059 1059  
1060 1060                  if ((TRANS_ISERROR(ufsvfsp)) ||
1061 1061                      (((fsp->fs_state + fsp->fs_time) == FSOKAY) &&
1062 1062                      fsp->fs_clean == FSLOG && !TRANS_ISTRANS(ufsvfsp))) {
1063 1063                          ufsvfsp->vfs_log = NULL;
1064 1064                          ufsvfsp->vfs_domatamap = 0;
1065 1065                          error = ENOSPC;
1066 1066                          goto out;
1067 1067                  }
1068 1068  
1069 1069                  if (((fsp->fs_state + fsp->fs_time) == FSOKAY) &&
1070 1070                      (fsp->fs_clean == FSCLEAN ||
1071 1071                      fsp->fs_clean == FSSTABLE ||
1072 1072                      fsp->fs_clean == FSLOG))
1073 1073                          fsp->fs_clean = FSSTABLE;
1074 1074                  else {
1075 1075                          if (isroot) {
1076 1076                                  /*
1077 1077                                   * allow root partition to be mounted even
1078 1078                                   * when fs_state is not ok
1079 1079                                   * will be fixed later by a remount root
1080 1080                                   */
1081 1081                                  fsp->fs_clean = FSBAD;
1082 1082                                  ufsvfsp->vfs_log = NULL;
1083 1083                                  ufsvfsp->vfs_domatamap = 0;
1084 1084                          } else {
1085 1085                                  error = ENOSPC;
1086 1086                                  goto out;
1087 1087                          }
1088 1088                  }
1089 1089  
1090 1090                  if (fsp->fs_clean == FSSTABLE && TRANS_ISTRANS(ufsvfsp))
1091 1091                          fsp->fs_clean = FSLOG;
1092 1092          }
1093 1093          TRANS_MATA_MOUNT(ufsvfsp);
1094 1094          needtrans = 1;
1095 1095  
1096 1096          vfsp->vfs_bsize = fsp->fs_bsize;
1097 1097  
1098 1098          /*
1099 1099           * Read in summary info
1100 1100           */
1101 1101          if (error = ufs_getsummaryinfo(dev, ufsvfsp, fsp))
1102 1102                  goto out;
1103 1103  
1104 1104          /*
1105 1105           * lastwhinetime is set to zero rather than lbolt, so that after
1106 1106           * mounting if the filesystem is found to be full, then immediately the
1107 1107           * "file system message" will be logged.
1108 1108           */
1109 1109          ufsvfsp->vfs_lastwhinetime = 0L;
1110 1110  
1111 1111  
1112 1112          mutex_init(&ufsvfsp->vfs_lock, NULL, MUTEX_DEFAULT, NULL);
1113 1113          (void) copystr(path, fsp->fs_fsmnt, sizeof (fsp->fs_fsmnt) - 1, &len);
1114 1114          bzero(fsp->fs_fsmnt + len, sizeof (fsp->fs_fsmnt) - len);
1115 1115  
1116 1116          /*
1117 1117           * Sanity checks for old file systems
1118 1118           */
1119 1119          if (fsp->fs_postblformat == FS_42POSTBLFMT)
1120 1120                  ufsvfsp->vfs_nrpos = 8;
1121 1121          else
1122 1122                  ufsvfsp->vfs_nrpos = fsp->fs_nrpos;
1123 1123  
1124 1124          /*
1125 1125           * Initialize lockfs structure to support file system locking
1126 1126           */
1127 1127          bzero(&ufsvfsp->vfs_ulockfs.ul_lockfs,
1128 1128              sizeof (struct lockfs));
1129 1129          ufsvfsp->vfs_ulockfs.ul_fs_lock = ULOCKFS_ULOCK;
1130 1130          mutex_init(&ufsvfsp->vfs_ulockfs.ul_lock, NULL,
1131 1131              MUTEX_DEFAULT, NULL);
1132 1132          cv_init(&ufsvfsp->vfs_ulockfs.ul_cv, NULL, CV_DEFAULT, NULL);
1133 1133  
1134 1134          /*
1135 1135           * We don't need to grab vfs_dqrwlock for this ufs_iget() call.
1136 1136           * We are in the process of mounting the file system so there
1137 1137           * is no need to grab the quota lock. If a quota applies to the
1138 1138           * root inode, then it will be updated when quotas are enabled.
1139 1139           *
1140 1140           * However, we have an ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock))
1141 1141           * in getinoquota() that we want to keep so grab it anyway.
1142 1142           */
1143 1143          rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1144 1144  
1145 1145          error = ufs_iget_alloced(vfsp, UFSROOTINO, &rip, cr);
1146 1146  
1147 1147          rw_exit(&ufsvfsp->vfs_dqrwlock);
1148 1148  
1149 1149          if (error)
1150 1150                  goto out;
1151 1151  
1152 1152          /*
1153 1153           * make sure root inode is a directory.  Returning ENOTDIR might
1154 1154           * be confused with the mount point not being a directory, so
1155 1155           * we use EIO instead.
1156 1156           */
1157 1157          if ((rip->i_mode & IFMT) != IFDIR) {
1158 1158                  /*
1159 1159                   * Mark this inode as subject for cleanup
1160 1160                   * to avoid stray inodes in the cache.
1161 1161                   */
1162 1162                  rvp = ITOV(rip);
1163 1163                  error = EIO;
1164 1164                  goto out;
1165 1165          }
1166 1166  
1167 1167          rvp = ITOV(rip);
1168 1168          mutex_enter(&rvp->v_lock);
1169 1169          rvp->v_flag |= VROOT;
1170 1170          mutex_exit(&rvp->v_lock);
1171 1171          ufsvfsp->vfs_root = rvp;
1172 1172          /* The buffer for the root inode does not contain a valid b_vp */
1173 1173          (void) bfinval(dev, 0);
1174 1174  
1175 1175          /* options */
1176 1176          ufsvfsp->vfs_nosetsec = flags & UFSMNT_NOSETSEC;
1177 1177          ufsvfsp->vfs_nointr  = flags & UFSMNT_NOINTR;
1178 1178          ufsvfsp->vfs_syncdir = flags & UFSMNT_SYNCDIR;
1179 1179          ufsvfsp->vfs_noatime = flags & UFSMNT_NOATIME;
1180 1180          if ((flags & UFSMNT_NODFRATIME) || ufsvfsp->vfs_noatime)
1181 1181                  ufsvfsp->vfs_dfritime &= ~UFS_DFRATIME;
1182 1182          else    /* dfratime, default behavior */
1183 1183                  ufsvfsp->vfs_dfritime |= UFS_DFRATIME;
1184 1184          if (flags & UFSMNT_FORCEDIRECTIO)
1185 1185                  ufsvfsp->vfs_forcedirectio = 1;
1186 1186          else if (flags & UFSMNT_NOFORCEDIRECTIO)
1187 1187                  ufsvfsp->vfs_forcedirectio = 0;
1188 1188          ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
1189 1189  
1190 1190          ufsvfsp->vfs_nindiroffset = fsp->fs_nindir - 1;
1191 1191          ufsvfsp->vfs_nindirshift = highbit(ufsvfsp->vfs_nindiroffset);
1192 1192          ufsvfsp->vfs_ioclustsz = fsp->fs_bsize * fsp->fs_maxcontig;
1193 1193  
1194 1194          if (cdev_ioctl(dev, DKIOCINFO, (intptr_t)&ci,
1195 1195              FKIOCTL|FNATIVE|FREAD, CRED(), &status) == 0) {
1196 1196                  ufsvfsp->vfs_iotransz = ci.dki_maxtransfer * DEV_BSIZE;
1197 1197          } else {
1198 1198                  ufsvfsp->vfs_iotransz = MIN(maxphys, ufs_maxmaxphys);
1199 1199          }
1200 1200  
1201 1201          if (ufsvfsp->vfs_iotransz <= 0) {
1202 1202                  ufsvfsp->vfs_iotransz = MIN(maxphys, ufs_maxmaxphys);
1203 1203          }
1204 1204  
1205 1205          /*
1206 1206           * When logging, used to reserve log space for writes and truncs
1207 1207           */
1208 1208          ufsvfsp->vfs_avgbfree = fsp->fs_cstotal.cs_nbfree / fsp->fs_ncg;
1209 1209  
1210 1210          /*
1211 1211           * Determine whether to log cylinder group summary info.
1212 1212           */
1213 1213          ufsvfsp->vfs_nolog_si = (fsp->fs_ncg < ufs_ncg_log);
1214 1214  
1215 1215          if (TRANS_ISTRANS(ufsvfsp)) {
1216 1216                  /*
1217 1217                   * start the delete thread
1218 1218                   */
1219 1219                  ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp);
1220 1220  
1221 1221                  /*
1222 1222                   * start reclaim thread if the filesystem was not mounted
1223 1223                   * read only.
1224 1224                   */
1225 1225                  if (!fsp->fs_ronly && (fsp->fs_reclaim &
1226 1226                      (FS_RECLAIM|FS_RECLAIMING))) {
1227 1227                          fsp->fs_reclaim &= ~FS_RECLAIM;
1228 1228                          fsp->fs_reclaim |=  FS_RECLAIMING;
1229 1229                          ufs_thread_start(&ufsvfsp->vfs_reclaim,
1230 1230                              ufs_thread_reclaim, vfsp);
1231 1231                  }
1232 1232  
1233 1233                  /* Mark the fs as unrolled */
1234 1234                  fsp->fs_rolled = FS_NEED_ROLL;
1235 1235          } else if (!fsp->fs_ronly && (fsp->fs_reclaim &
1236 1236              (FS_RECLAIM|FS_RECLAIMING))) {
1237 1237                  /*
1238 1238                   * If a file system that is mounted nologging, after
1239 1239                   * having previously been mounted logging, becomes
1240 1240                   * unmounted whilst the reclaim thread is in the throes
1241 1241                   * of reclaiming open/deleted inodes, a subsequent mount
1242 1242                   * of such a file system with logging disabled could lead
1243 1243                   * to inodes becoming lost.  So, start reclaim now, even
1244 1244                   * though logging was disabled for the previous mount, to
1245 1245                   * tidy things up.
1246 1246                   */
1247 1247                  fsp->fs_reclaim &= ~FS_RECLAIM;
1248 1248                  fsp->fs_reclaim |=  FS_RECLAIMING;
1249 1249                  ufs_thread_start(&ufsvfsp->vfs_reclaim,
1250 1250                      ufs_thread_reclaim, vfsp);
1251 1251          }
1252 1252  
1253 1253          if (!fsp->fs_ronly) {
1254 1254                  TRANS_SBWRITE(ufsvfsp, TOP_MOUNT);
1255 1255                  if (error = geterror(ufsvfsp->vfs_bufp))
1256 1256                          goto out;
1257 1257          }
1258 1258  
1259 1259          /* fix-on-panic initialization */
1260 1260          if (isroot && !(flags & UFSMNT_ONERROR_FLGMASK))
1261 1261                  flags |= UFSMNT_ONERROR_PANIC;  /* XXX ..._RDONLY */
1262 1262  
1263 1263          if ((error = ufsfx_mount(ufsvfsp, flags)) != 0)
1264 1264                  goto out;
1265 1265  
1266 1266          if (why == ROOT_INIT && isroot)
1267 1267                  rootvp = devvp;
1268 1268  
1269 1269          return (0);
1270 1270  out:
1271 1271          if (error == 0)
1272 1272                  error = EIO;
1273 1273          if (rvp) {
1274 1274                  /* the following sequence is similar to ufs_unmount() */
1275 1275  
1276 1276                  /*
1277 1277                   * There's a problem that ufs_iget() puts inodes into
1278 1278                   * the inode cache before it returns them.  If someone
1279 1279                   * traverses that cache and gets a reference to our
1280 1280                   * inode, there's a chance they'll still be using it
1281 1281                   * after we've destroyed it.  This is a hard race to
1282 1282                   * hit, but it's happened (putting in a medium delay
1283 1283                   * here, and a large delay in ufs_scan_inodes() for
1284 1284                   * inodes on the device we're bailing out on, makes
1285 1285                   * the race easy to demonstrate).  The symptom is some
1286 1286                   * other part of UFS faulting on bad inode contents,
1287 1287                   * or when grabbing one of the locks inside the inode,
1288 1288                   * etc.  The usual victim is ufs_scan_inodes() or
1289 1289                   * someone called by it.
1290 1290                   */
1291 1291  
1292 1292                  /*
1293 1293                   * First, isolate it so that no new references can be
1294 1294                   * gotten via the inode cache.
1295 1295                   */
1296 1296                  ihm = &ih_lock[INOHASH(UFSROOTINO)];
1297 1297                  mutex_enter(ihm);
1298 1298                  remque(rip);
1299 1299                  mutex_exit(ihm);
1300 1300  
1301 1301                  /*
1302 1302                   * Now wait for all outstanding references except our
1303 1303                   * own to drain.  This could, in theory, take forever,
1304 1304                   * so don't wait *too* long.  If we time out, mark
1305 1305                   * it stale and leak it, so we don't hit the problem
1306 1306                   * described above.
1307 1307                   *
1308 1308                   * Note that v_count is an int, which means we can read
1309 1309                   * it in one operation.  Thus, there's no need to lock
1310 1310                   * around our tests.
1311 1311                   */
1312 1312                  elapsed = 0;
1313 1313                  while ((rvp->v_count > 1) && (elapsed < ufs_mount_timeout)) {
1314 1314                          delay(ufs_mount_error_delay * drv_usectohz(1000));
1315 1315                          elapsed += ufs_mount_error_delay;
1316 1316                  }
1317 1317  
1318 1318                  if (rvp->v_count > 1) {
1319 1319                          mutex_enter(&rip->i_tlock);
1320 1320                          rip->i_flag |= ISTALE;
1321 1321                          mutex_exit(&rip->i_tlock);
1322 1322                          cmn_err(CE_WARN,
1323 1323                              "Timed out while cleaning up after "
1324 1324                              "failed mount of %s", path);
1325 1325                  } else {
1326 1326  
1327 1327                          /*
1328 1328                           * Now we're the only one with a handle left, so tear
1329 1329                           * it down the rest of the way.
1330 1330                           */
1331 1331                          if (ufs_rmidle(rip))
1332 1332                                  VN_RELE(rvp);
1333 1333                          ufs_si_del(rip);
1334 1334                          rip->i_ufsvfs = NULL;
1335 1335                          rvp->v_vfsp = NULL;
1336 1336                          rvp->v_type = VBAD;
1337 1337                          VN_RELE(rvp);
1338 1338                  }
1339 1339          }
1340 1340          if (needtrans) {
1341 1341                  TRANS_MATA_UMOUNT(ufsvfsp);
1342 1342          }
1343 1343          if (ufsvfsp) {
1344 1344                  ufs_vfs_remove(ufsvfsp);
1345 1345                  ufs_thread_exit(&ufsvfsp->vfs_delete);
1346 1346                  ufs_thread_exit(&ufsvfsp->vfs_reclaim);
1347 1347                  mutex_destroy(&ufsvfsp->vfs_lock);
1348 1348                  if (ufsvfsp->vfs_log) {
1349 1349                          lufs_unsnarf(ufsvfsp);
1350 1350                  }
1351 1351                  kmem_free(ufsvfsp, sizeof (struct ufsvfs));
1352 1352          }
1353 1353          if (bp) {
1354 1354                  bp->b_flags |= (B_STALE|B_AGE);
1355 1355                  brelse(bp);
1356 1356          }
1357 1357          if (tp) {
1358 1358                  tp->b_flags |= (B_STALE|B_AGE);
1359 1359                  brelse(tp);
1360 1360          }
1361 1361          if (needclose) {
1362 1362                  (void) VOP_CLOSE(devvp, (vfsp->vfs_flag & VFS_RDONLY) ?
1363 1363                      FREAD : FREAD|FWRITE, 1, (offset_t)0, cr, NULL);
1364 1364                  bflush(dev);
1365 1365                  (void) bfinval(dev, 1);
1366 1366          }
1367 1367          return (error);
1368 1368  }
1369 1369  
1370 1370  /*
1371 1371   * vfs operations
1372 1372   */
1373 1373  static int
1374 1374  ufs_unmount(struct vfs *vfsp, int fflag, struct cred *cr)
1375 1375  {
1376 1376          dev_t           dev             = vfsp->vfs_dev;
1377 1377          struct ufsvfs   *ufsvfsp        = (struct ufsvfs *)vfsp->vfs_data;
1378 1378          struct fs       *fs             = ufsvfsp->vfs_fs;
1379 1379          struct ulockfs  *ulp            = &ufsvfsp->vfs_ulockfs;
1380 1380          struct vnode    *bvp, *vp;
1381 1381          struct buf      *bp;
1382 1382          struct inode    *ip, *inext, *rip;
1383 1383          union ihead     *ih;
1384 1384          int             error, flag, i;
1385 1385          struct lockfs   lockfs;
1386 1386          int             poll_events = POLLPRI;
1387 1387          extern struct pollhead ufs_pollhd;
1388 1388          refstr_t        *mountpoint;
1389 1389  
1390 1390          ASSERT(vfs_lock_held(vfsp));
1391 1391  
1392 1392          if (secpolicy_fs_unmount(cr, vfsp) != 0)
1393 1393                  return (EPERM);
1394 1394          /*
1395 1395           * Forced unmount is now supported through the
1396 1396           * lockfs protocol.
1397 1397           */
1398 1398          if (fflag & MS_FORCE) {
1399 1399                  /*
1400 1400                   * Mark the filesystem as being unmounted now in
1401 1401                   * case of a forcible umount before we take any
1402 1402                   * locks inside UFS to prevent racing with a VFS_VGET()
1403 1403                   * request. Throw these VFS_VGET() requests away for
1404 1404                   * the duration of the forcible umount so they won't
1405 1405                   * use stale or even freed data later on when we're done.
1406 1406                   * It may happen that the VFS has had a additional hold
1407 1407                   * placed on it by someone other than UFS and thus will
1408 1408                   * not get freed immediately once we're done with the
1409 1409                   * umount by dounmount() - use VFS_UNMOUNTED to inform
1410 1410                   * users of this still-alive VFS that its corresponding
1411 1411                   * filesystem being gone so they can detect that and error
1412 1412                   * out.
1413 1413                   */
1414 1414                  vfsp->vfs_flag |= VFS_UNMOUNTED;
1415 1415  
1416 1416                  ufs_thread_suspend(&ufsvfsp->vfs_delete);
1417 1417                  mutex_enter(&ulp->ul_lock);
1418 1418                  /*
1419 1419                   * If file system is already hard locked,
1420 1420                   * unmount the file system, otherwise
1421 1421                   * hard lock it before unmounting.
1422 1422                   */
1423 1423                  if (!ULOCKFS_IS_HLOCK(ulp)) {
1424 1424                          atomic_inc_ulong(&ufs_quiesce_pend);
1425 1425                          lockfs.lf_lock = LOCKFS_HLOCK;
1426 1426                          lockfs.lf_flags = 0;
1427 1427                          lockfs.lf_key = ulp->ul_lockfs.lf_key + 1;
1428 1428                          lockfs.lf_comlen = 0;
1429 1429                          lockfs.lf_comment = NULL;
1430 1430                          ufs_freeze(ulp, &lockfs);
1431 1431                          ULOCKFS_SET_BUSY(ulp);
1432 1432                          LOCKFS_SET_BUSY(&ulp->ul_lockfs);
1433 1433                          (void) ufs_quiesce(ulp);
1434 1434                          (void) ufs_flush(vfsp);
1435 1435                          (void) ufs_thaw(vfsp, ufsvfsp, ulp);
1436 1436                          atomic_dec_ulong(&ufs_quiesce_pend);
1437 1437                          ULOCKFS_CLR_BUSY(ulp);
1438 1438                          LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1439 1439                          poll_events |= POLLERR;
1440 1440                          pollwakeup(&ufs_pollhd, poll_events);
1441 1441                  }
1442 1442                  ufs_thread_continue(&ufsvfsp->vfs_delete);
1443 1443                  mutex_exit(&ulp->ul_lock);
1444 1444          }
1445 1445  
1446 1446          /* let all types of writes go through */
1447 1447          ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
1448 1448  
1449 1449          /* coordinate with global hlock thread */
1450 1450          if (TRANS_ISTRANS(ufsvfsp) && (ufsvfsp->vfs_validfs == UT_HLOCKING)) {
1451 1451                  /*
1452 1452                   * last possibility for a forced umount to fail hence clear
1453 1453                   * VFS_UNMOUNTED if appropriate.
1454 1454                   */
1455 1455                  if (fflag & MS_FORCE)
1456 1456                          vfsp->vfs_flag &= ~VFS_UNMOUNTED;
1457 1457                  return (EAGAIN);
1458 1458          }
1459 1459  
1460 1460          ufsvfsp->vfs_validfs = UT_UNMOUNTED;
1461 1461  
1462 1462          /* kill the reclaim thread */
1463 1463          ufs_thread_exit(&ufsvfsp->vfs_reclaim);
1464 1464  
1465 1465          /* suspend the delete thread */
1466 1466          ufs_thread_suspend(&ufsvfsp->vfs_delete);
1467 1467  
1468 1468          /*
1469 1469           * drain the delete and idle queues
1470 1470           */
1471 1471          ufs_delete_drain(vfsp, -1, 1);
1472 1472          ufs_idle_drain(vfsp);
1473 1473  
1474 1474          /*
1475 1475           * use the lockfs protocol to prevent new ops from starting
1476 1476           * a forcible umount can not fail beyond this point as
1477 1477           * we hard-locked the filesystem and drained all current consumers
1478 1478           * before.
1479 1479           */
1480 1480          mutex_enter(&ulp->ul_lock);
1481 1481  
1482 1482          /*
1483 1483           * if the file system is busy; return EBUSY
1484 1484           */
1485 1485          if (ulp->ul_vnops_cnt || ulp->ul_falloc_cnt || ULOCKFS_IS_SLOCK(ulp)) {
1486 1486                  error = EBUSY;
1487 1487                  goto out;
1488 1488          }
1489 1489  
1490 1490          /*
1491 1491           * if this is not a forced unmount (!hard/error locked), then
1492 1492           * get rid of every inode except the root and quota inodes
1493 1493           * also, commit any outstanding transactions
1494 1494           */
1495 1495          if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp))
1496 1496                  if (error = ufs_flush(vfsp))
1497 1497                          goto out;
1498 1498  
1499 1499          /*
1500 1500           * ignore inodes in the cache if fs is hard locked or error locked
1501 1501           */
1502 1502          rip = VTOI(ufsvfsp->vfs_root);
1503 1503          if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp)) {
1504 1504                  /*
1505 1505                   * Otherwise, only the quota and root inodes are in the cache.
1506 1506                   *
1507 1507                   * Avoid racing with ufs_update() and ufs_sync().
1508 1508                   */
1509 1509                  mutex_enter(&ufs_scan_lock);
1510 1510  
1511 1511                  for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
1512 1512                          mutex_enter(&ih_lock[i]);
1513 1513                          for (ip = ih->ih_chain[0];
1514 1514                              ip != (struct inode *)ih;
1515 1515                              ip = ip->i_forw) {
1516 1516                                  if (ip->i_ufsvfs != ufsvfsp)
1517 1517                                          continue;
1518 1518                                  if (ip == ufsvfsp->vfs_qinod)
1519 1519                                          continue;
1520 1520                                  if (ip == rip && ITOV(ip)->v_count == 1)
1521 1521                                          continue;
1522 1522                                  mutex_exit(&ih_lock[i]);
1523 1523                                  mutex_exit(&ufs_scan_lock);
1524 1524                                  error = EBUSY;
1525 1525                                  goto out;
1526 1526                          }
1527 1527                          mutex_exit(&ih_lock[i]);
1528 1528                  }
1529 1529                  mutex_exit(&ufs_scan_lock);
1530 1530          }
1531 1531  
1532 1532          /*
1533 1533           * if a snapshot exists and this is a forced unmount, then delete
1534 1534           * the snapshot.  Otherwise return EBUSY.  This will insure the
1535 1535           * snapshot always belongs to a valid file system.
1536 1536           */
1537 1537          if (ufsvfsp->vfs_snapshot) {
1538 1538                  if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
1539 1539                          (void) fssnap_delete(&ufsvfsp->vfs_snapshot);
1540 1540                  } else {
1541 1541                          error = EBUSY;
1542 1542                          goto out;
1543 1543                  }
1544 1544          }
1545 1545  
1546 1546          /*
1547 1547           * Close the quota file and invalidate anything left in the quota
1548 1548           * cache for this file system.  Pass kcred to allow all quota
1549 1549           * manipulations.
1550 1550           */
1551 1551          (void) closedq(ufsvfsp, kcred);
1552 1552          invalidatedq(ufsvfsp);
1553 1553          /*
1554 1554           * drain the delete and idle queues
1555 1555           */
1556 1556          ufs_delete_drain(vfsp, -1, 0);
1557 1557          ufs_idle_drain(vfsp);
1558 1558  
1559 1559          /*
1560 1560           * discard the inodes for this fs (including root, shadow, and quota)
1561 1561           */
1562 1562          for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
1563 1563                  mutex_enter(&ih_lock[i]);
1564 1564                  for (inext = 0, ip = ih->ih_chain[0];
1565 1565                      ip != (struct inode *)ih;
1566 1566                      ip = inext) {
1567 1567                          inext = ip->i_forw;
1568 1568                          if (ip->i_ufsvfs != ufsvfsp)
1569 1569                                  continue;
1570 1570  
1571 1571                          /*
1572 1572                           * We've found the inode in the cache and as we
1573 1573                           * hold the hash mutex the inode can not
1574 1574                           * disappear from underneath us.
1575 1575                           * We also know it must have at least a vnode
1576 1576                           * reference count of 1.
1577 1577                           * We perform an additional VN_HOLD so the VN_RELE
1578 1578                           * in case we take the inode off the idle queue
1579 1579                           * can not be the last one.
1580 1580                           * It is safe to grab the writer contents lock here
1581 1581                           * to prevent a race with ufs_iinactive() putting
1582 1582                           * inodes into the idle queue while we operate on
1583 1583                           * this inode.
1584 1584                           */
1585 1585                          rw_enter(&ip->i_contents, RW_WRITER);
1586 1586  
1587 1587                          vp = ITOV(ip);
1588 1588                          VN_HOLD(vp)
1589 1589                          remque(ip);
1590 1590                          if (ufs_rmidle(ip))
1591 1591                                  VN_RELE(vp);
1592 1592                          ufs_si_del(ip);
1593 1593                          /*
1594 1594                           * rip->i_ufsvfsp is needed by bflush()
1595 1595                           */
1596 1596                          if (ip != rip)
1597 1597                                  ip->i_ufsvfs = NULL;
1598 1598                          /*
1599 1599                           * Set vnode's vfsops to dummy ops, which return
1600 1600                           * EIO. This is needed to forced unmounts to work
1601 1601                           * with lofs/nfs properly.
1602 1602                           */
1603 1603                          if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp))
1604 1604                                  vp->v_vfsp = &EIO_vfs;
1605 1605                          else
1606 1606                                  vp->v_vfsp = NULL;
1607 1607                          vp->v_type = VBAD;
1608 1608  
1609 1609                          rw_exit(&ip->i_contents);
1610 1610  
1611 1611                          VN_RELE(vp);
1612 1612                  }
1613 1613                  mutex_exit(&ih_lock[i]);
1614 1614          }
1615 1615          ufs_si_cache_flush(dev);
1616 1616  
1617 1617          /*
1618 1618           * kill the delete thread and drain the idle queue
1619 1619           */
1620 1620          ufs_thread_exit(&ufsvfsp->vfs_delete);
1621 1621          ufs_idle_drain(vfsp);
1622 1622  
1623 1623          bp = ufsvfsp->vfs_bufp;
1624 1624          bvp = ufsvfsp->vfs_devvp;
1625 1625          flag = !fs->fs_ronly;
1626 1626          if (flag) {
1627 1627                  bflush(dev);
1628 1628                  if (fs->fs_clean != FSBAD) {
1629 1629                          if (fs->fs_clean == FSSTABLE)
1630 1630                                  fs->fs_clean = FSCLEAN;
1631 1631                          fs->fs_reclaim &= ~FS_RECLAIM;
1632 1632                  }
1633 1633                  if (TRANS_ISTRANS(ufsvfsp) &&
1634 1634                      !TRANS_ISERROR(ufsvfsp) &&
1635 1635                      !ULOCKFS_IS_HLOCK(ulp) &&
1636 1636                      (fs->fs_rolled == FS_NEED_ROLL)) {
1637 1637                          /*
1638 1638                           * ufs_flush() above has flushed the last Moby.
1639 1639                           * This is needed to ensure the following superblock
1640 1640                           * update really is the last metadata update
1641 1641                           */
1642 1642                          error = ufs_putsummaryinfo(dev, ufsvfsp, fs);
1643 1643                          if (error == 0) {
1644 1644                                  fs->fs_rolled = FS_ALL_ROLLED;
1645 1645                          }
1646 1646                  }
1647 1647                  TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_UNMOUNT);
1648 1648                  /*
1649 1649                   * push this last transaction
1650 1650                   */
1651 1651                  curthread->t_flag |= T_DONTBLOCK;
1652 1652                  TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UNMOUNT, TOP_COMMIT_SIZE,
1653 1653                      error);
1654 1654                  if (!error)
1655 1655                          TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UNMOUNT,
1656 1656                              TOP_COMMIT_SIZE);
1657 1657                  curthread->t_flag &= ~T_DONTBLOCK;
1658 1658          }
1659 1659  
1660 1660          TRANS_MATA_UMOUNT(ufsvfsp);
1661 1661          lufs_unsnarf(ufsvfsp);          /* Release the in-memory structs */
1662 1662          ufsfx_unmount(ufsvfsp);         /* fix-on-panic bookkeeping */
1663 1663          kmem_free(fs->fs_u.fs_csp, fs->fs_cssize);
1664 1664  
1665 1665          bp->b_flags |= B_STALE|B_AGE;
1666 1666          ufsvfsp->vfs_bufp = NULL;       /* don't point at freed buf */
1667 1667          brelse(bp);                     /* free the superblock buf */
1668 1668  
1669 1669          (void) VOP_PUTPAGE(common_specvp(bvp), (offset_t)0, (size_t)0,
1670 1670              B_INVAL, cr, NULL);
1671 1671          (void) VOP_CLOSE(bvp, flag, 1, (offset_t)0, cr, NULL);
1672 1672          bflush(dev);
1673 1673          (void) bfinval(dev, 1);
1674 1674          VN_RELE(bvp);
1675 1675  
1676 1676          /*
1677 1677           * It is now safe to NULL out the ufsvfs pointer and discard
1678 1678           * the root inode.
1679 1679           */
1680 1680          rip->i_ufsvfs = NULL;
1681 1681          VN_RELE(ITOV(rip));
1682 1682  
1683 1683          /* free up lockfs comment structure, if any */
1684 1684          if (ulp->ul_lockfs.lf_comlen && ulp->ul_lockfs.lf_comment)
1685 1685                  kmem_free(ulp->ul_lockfs.lf_comment, ulp->ul_lockfs.lf_comlen);
1686 1686  
1687 1687          /*
1688 1688           * Remove from instance list.
1689 1689           */
1690 1690          ufs_vfs_remove(ufsvfsp);
1691 1691  
1692 1692          /*
1693 1693           * For a forcible unmount, threads may be asleep in
1694 1694           * ufs_lockfs_begin/ufs_check_lockfs.  These threads will need
1695 1695           * the ufsvfs structure so we don't free it, yet.  ufs_update
1696 1696           * will free it up after awhile.
1697 1697           */
1698 1698          if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
1699 1699                  extern kmutex_t         ufsvfs_mutex;
1700 1700                  extern struct ufsvfs    *ufsvfslist;
1701 1701  
1702 1702                  mutex_enter(&ufsvfs_mutex);
1703 1703                  ufsvfsp->vfs_dontblock = 1;
1704 1704                  ufsvfsp->vfs_next = ufsvfslist;
1705 1705                  ufsvfslist = ufsvfsp;
1706 1706                  mutex_exit(&ufsvfs_mutex);
1707 1707                  /* wakeup any suspended threads */
1708 1708                  cv_broadcast(&ulp->ul_cv);
1709 1709                  mutex_exit(&ulp->ul_lock);
1710 1710          } else {
1711 1711                  mutex_destroy(&ufsvfsp->vfs_lock);
1712 1712                  kmem_free(ufsvfsp, sizeof (struct ufsvfs));
1713 1713          }
1714 1714  
1715 1715          /*
1716 1716           * Now mark the filesystem as unmounted since we're done with it.
1717 1717           */
1718 1718          vfsp->vfs_flag |= VFS_UNMOUNTED;
1719 1719  
1720 1720          return (0);
1721 1721  out:
1722 1722          /* open the fs to new ops */
1723 1723          cv_broadcast(&ulp->ul_cv);
1724 1724          mutex_exit(&ulp->ul_lock);
1725 1725  
1726 1726          if (TRANS_ISTRANS(ufsvfsp)) {
1727 1727                  /* allow the delete thread to continue */
1728 1728                  ufs_thread_continue(&ufsvfsp->vfs_delete);
1729 1729                  /* restart the reclaim thread */
1730 1730                  ufs_thread_start(&ufsvfsp->vfs_reclaim, ufs_thread_reclaim,
1731 1731                      vfsp);
1732 1732                  /* coordinate with global hlock thread */
1733 1733                  ufsvfsp->vfs_validfs = UT_MOUNTED;
1734 1734                  /* check for trans errors during umount */
1735 1735                  ufs_trans_onerror();
1736 1736  
1737 1737                  /*
1738 1738                   * if we have a separate /usr it will never unmount
1739 1739                   * when halting. In order to not re-read all the
1740 1740                   * cylinder group summary info on mounting after
1741 1741                   * reboot the logging of summary info is re-enabled
1742 1742                   * and the super block written out.
1743 1743                   */
1744 1744                  mountpoint = vfs_getmntpoint(vfsp);
1745 1745                  if ((fs->fs_si == FS_SI_OK) &&
1746 1746                      (strcmp("/usr", refstr_value(mountpoint)) == 0)) {
1747 1747                          ufsvfsp->vfs_nolog_si = 0;
1748 1748                          UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp);
1749 1749                  }
1750 1750                  refstr_rele(mountpoint);
1751 1751          }
1752 1752  
1753 1753          return (error);
1754 1754  }
1755 1755  
1756 1756  static int
1757 1757  ufs_root(struct vfs *vfsp, struct vnode **vpp)
1758 1758  {
1759 1759          struct ufsvfs *ufsvfsp;
1760 1760          struct vnode *vp;
1761 1761  
1762 1762          if (!vfsp)
1763 1763                  return (EIO);
1764 1764  
1765 1765          ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1766 1766          if (!ufsvfsp || !ufsvfsp->vfs_root)
1767 1767                  return (EIO);   /* forced unmount */
1768 1768  
1769 1769          vp = ufsvfsp->vfs_root;
1770 1770          VN_HOLD(vp);
1771 1771          *vpp = vp;
1772 1772          return (0);
1773 1773  }
1774 1774  
1775 1775  /*
1776 1776   * Get file system statistics.
1777 1777   */
1778 1778  static int
1779 1779  ufs_statvfs(struct vfs *vfsp, struct statvfs64 *sp)
1780 1780  {
1781 1781          struct fs *fsp;
1782 1782          struct ufsvfs *ufsvfsp;
1783 1783          int blk, i;
1784 1784          long max_avail, used;
1785 1785          dev32_t d32;
1786 1786  
1787 1787          if (vfsp->vfs_flag & VFS_UNMOUNTED)
1788 1788                  return (EIO);
1789 1789  
1790 1790          ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1791 1791          fsp = ufsvfsp->vfs_fs;
1792 1792          if ((fsp->fs_magic != FS_MAGIC) && (fsp->fs_magic != MTB_UFS_MAGIC))
1793 1793                  return (EINVAL);
1794 1794          if (fsp->fs_magic == FS_MAGIC &&
1795 1795              (fsp->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
1796 1796              fsp->fs_version != UFS_VERSION_MIN))
1797 1797                  return (EINVAL);
1798 1798          if (fsp->fs_magic == MTB_UFS_MAGIC &&
1799 1799              (fsp->fs_version > MTB_UFS_VERSION_1 ||
1800 1800              fsp->fs_version < MTB_UFS_VERSION_MIN))
1801 1801                  return (EINVAL);
1802 1802  
1803 1803          /*
1804 1804           * get the basic numbers
1805 1805           */
1806 1806          (void) bzero(sp, sizeof (*sp));
1807 1807  
1808 1808          sp->f_bsize = fsp->fs_bsize;
1809 1809          sp->f_frsize = fsp->fs_fsize;
1810 1810          sp->f_blocks = (fsblkcnt64_t)fsp->fs_dsize;
1811 1811          sp->f_bfree = (fsblkcnt64_t)fsp->fs_cstotal.cs_nbfree * fsp->fs_frag +
1812 1812              fsp->fs_cstotal.cs_nffree;
1813 1813  
1814 1814          sp->f_files = (fsfilcnt64_t)fsp->fs_ncg * fsp->fs_ipg;
1815 1815          sp->f_ffree = (fsfilcnt64_t)fsp->fs_cstotal.cs_nifree;
1816 1816  
1817 1817          /*
1818 1818           * Adjust the numbers based on things waiting to be deleted.
1819 1819           * modifies f_bfree and f_ffree.  Afterwards, everything we
1820 1820           * come up with will be self-consistent.  By definition, this
1821 1821           * is a point-in-time snapshot, so the fact that the delete
1822 1822           * thread's probably already invalidated the results is not a
1823 1823           * problem.  Note that if the delete thread is ever extended to
1824 1824           * non-logging ufs, this adjustment must always be made.
1825 1825           */
1826 1826          if (TRANS_ISTRANS(ufsvfsp))
1827 1827                  ufs_delete_adjust_stats(ufsvfsp, sp);
1828 1828  
1829 1829          /*
1830 1830           * avail = MAX(max_avail - used, 0)
1831 1831           */
1832 1832          max_avail = fsp->fs_dsize - ufsvfsp->vfs_minfrags;
1833 1833  
1834 1834          used = (fsp->fs_dsize - sp->f_bfree);
1835 1835  
1836 1836          if (max_avail > used)
1837 1837                  sp->f_bavail = (fsblkcnt64_t)max_avail - used;
1838 1838          else
1839 1839                  sp->f_bavail = (fsblkcnt64_t)0;
1840 1840  
1841 1841          sp->f_favail = sp->f_ffree;
1842 1842          (void) cmpldev(&d32, vfsp->vfs_dev);
1843 1843          sp->f_fsid = d32;
1844 1844          (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
1845 1845          sp->f_flag = vf_to_stf(vfsp->vfs_flag);
1846 1846  
1847 1847          /* keep coordinated with ufs_l_pathconf() */
1848 1848          sp->f_namemax = MAXNAMLEN;
1849 1849  
1850 1850          if (fsp->fs_cpc == 0) {
1851 1851                  bzero(sp->f_fstr, 14);
1852 1852                  return (0);
1853 1853          }
1854 1854          blk = fsp->fs_spc * fsp->fs_cpc / NSPF(fsp);
1855 1855          for (i = 0; i < blk; i += fsp->fs_frag) /* CSTYLED */
1856 1856                  /* void */;
1857 1857          i -= fsp->fs_frag;
1858 1858          blk = i / fsp->fs_frag;
1859 1859          bcopy(&(fs_rotbl(fsp)[blk]), sp->f_fstr, 14);
1860 1860          return (0);
1861 1861  }
1862 1862  
1863 1863  /*
1864 1864   * Flush any pending I/O to file system vfsp.
1865 1865   * The ufs_update() routine will only flush *all* ufs files.
1866 1866   * If vfsp is non-NULL, only sync this ufs (in preparation
1867 1867   * for a umount).
1868 1868   */
1869 1869  /*ARGSUSED*/
1870 1870  static int
1871 1871  ufs_sync(struct vfs *vfsp, short flag, struct cred *cr)
1872 1872  {
1873 1873          struct ufsvfs *ufsvfsp;
1874 1874          struct fs *fs;
1875 1875          int cheap = flag & SYNC_ATTR;
1876 1876          int error;
1877 1877  
1878 1878          /*
1879 1879           * SYNC_CLOSE means we're rebooting.  Toss everything
1880 1880           * on the idle queue so we don't have to slog through
1881 1881           * a bunch of uninteresting inodes over and over again.
1882 1882           */
1883 1883          if (flag & SYNC_CLOSE)
1884 1884                  ufs_idle_drain(NULL);
1885 1885  
1886 1886          if (vfsp == NULL) {
1887 1887                  ufs_update(flag);
1888 1888                  return (0);
1889 1889          }
1890 1890  
1891 1891          /* Flush a single ufs */
1892 1892          if (!vfs_matchops(vfsp, ufs_vfsops) || vfs_lock(vfsp) != 0)
1893 1893                  return (0);
1894 1894  
1895 1895          ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1896 1896          if (!ufsvfsp)
1897 1897                  return (EIO);
1898 1898          fs = ufsvfsp->vfs_fs;
1899 1899          mutex_enter(&ufsvfsp->vfs_lock);
1900 1900  
1901 1901          if (ufsvfsp->vfs_dio &&
1902 1902              fs->fs_ronly == 0 &&
1903 1903              fs->fs_clean != FSBAD &&
1904 1904              fs->fs_clean != FSLOG) {
1905 1905                  /* turn off fast-io on unmount, so no fsck needed (4029401) */
1906 1906                  ufsvfsp->vfs_dio = 0;
1907 1907                  fs->fs_clean = FSACTIVE;
1908 1908                  fs->fs_fmod = 1;
1909 1909          }
1910 1910  
1911 1911          /* Write back modified superblock */
1912 1912          if (fs->fs_fmod == 0) {
1913 1913                  mutex_exit(&ufsvfsp->vfs_lock);
1914 1914          } else {
1915 1915                  if (fs->fs_ronly != 0) {
1916 1916                          mutex_exit(&ufsvfsp->vfs_lock);
1917 1917                          vfs_unlock(vfsp);
1918 1918                          return (ufs_fault(ufsvfsp->vfs_root,
1919 1919                              "fs = %s update: ro fs mod\n", fs->fs_fsmnt));
1920 1920                  }
1921 1921                  fs->fs_fmod = 0;
1922 1922                  mutex_exit(&ufsvfsp->vfs_lock);
1923 1923  
1924 1924                  TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_UPDATE);
1925 1925          }
1926 1926          vfs_unlock(vfsp);
1927 1927  
1928 1928          /*
1929 1929           * Avoid racing with ufs_update() and ufs_unmount().
1930 1930           *
1931 1931           */
1932 1932          mutex_enter(&ufs_scan_lock);
1933 1933  
1934 1934          (void) ufs_scan_inodes(1, ufs_sync_inode,
1935 1935              (void *)(uintptr_t)cheap, ufsvfsp);
1936 1936  
1937 1937          mutex_exit(&ufs_scan_lock);
1938 1938  
1939 1939          bflush((dev_t)vfsp->vfs_dev);
1940 1940  
1941 1941          /*
1942 1942           * commit any outstanding async transactions
1943 1943           */
1944 1944          curthread->t_flag |= T_DONTBLOCK;
1945 1945          TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE, error);
1946 1946          if (!error) {
1947 1947                  TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UPDATE,
1948 1948                      TOP_COMMIT_SIZE);
1949 1949          }
1950 1950          curthread->t_flag &= ~T_DONTBLOCK;
1951 1951  
1952 1952          return (0);
1953 1953  }
1954 1954  
1955 1955  
1956 1956  void
1957 1957  sbupdate(struct vfs *vfsp)
1958 1958  {
1959 1959          struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1960 1960          struct fs *fs = ufsvfsp->vfs_fs;
1961 1961          struct buf *bp;
1962 1962          int blks;
1963 1963          caddr_t space;
1964 1964          int i;
1965 1965          size_t size;
1966 1966  
1967 1967          /*
1968 1968           * for ulockfs processing, limit the superblock writes
1969 1969           */
1970 1970          if ((ufsvfsp->vfs_ulockfs.ul_sbowner) &&
1971 1971              (curthread != ufsvfsp->vfs_ulockfs.ul_sbowner)) {
1972 1972                  /* process later */
1973 1973                  fs->fs_fmod = 1;
1974 1974                  return;
1975 1975          }
1976 1976          ULOCKFS_SET_MOD((&ufsvfsp->vfs_ulockfs));
1977 1977  
1978 1978          if (TRANS_ISTRANS(ufsvfsp)) {
1979 1979                  mutex_enter(&ufsvfsp->vfs_lock);
1980 1980                  ufs_sbwrite(ufsvfsp);
1981 1981                  mutex_exit(&ufsvfsp->vfs_lock);
1982 1982                  return;
1983 1983          }
1984 1984  
1985 1985          blks = howmany(fs->fs_cssize, fs->fs_fsize);
1986 1986          space = (caddr_t)fs->fs_u.fs_csp;
1987 1987          for (i = 0; i < blks; i += fs->fs_frag) {
1988 1988                  size = fs->fs_bsize;
1989 1989                  if (i + fs->fs_frag > blks)
1990 1990                          size = (blks - i) * fs->fs_fsize;
1991 1991                  bp = UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev,
1992 1992                      (daddr_t)(fsbtodb(fs, fs->fs_csaddr + i)),
1993 1993                      fs->fs_bsize);
1994 1994                  bcopy(space, bp->b_un.b_addr, size);
1995 1995                  space += size;
1996 1996                  bp->b_bcount = size;
1997 1997                  UFS_BRWRITE(ufsvfsp, bp);
1998 1998          }
1999 1999          mutex_enter(&ufsvfsp->vfs_lock);
2000 2000          ufs_sbwrite(ufsvfsp);
2001 2001          mutex_exit(&ufsvfsp->vfs_lock);
2002 2002  }
2003 2003  
2004 2004  int ufs_vget_idle_count = 2;    /* Number of inodes to idle each time */
2005 2005  static int
2006 2006  ufs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
2007 2007  {
2008 2008          int error = 0;
2009 2009          struct ufid *ufid;
2010 2010          struct inode *ip;
2011 2011          struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
2012 2012          struct ulockfs *ulp;
2013 2013  
2014 2014          /*
2015 2015           * Check for unmounted filesystem.
2016 2016           */
2017 2017          if (vfsp->vfs_flag & VFS_UNMOUNTED) {
2018 2018                  error = EIO;
2019 2019                  goto errout;
2020 2020          }
2021 2021  
2022 2022          /*
2023 2023           * Keep the idle queue from getting too long by
2024 2024           * idling an inode before attempting to allocate another.
2025 2025           *    This operation must be performed before entering
2026 2026           *    lockfs or a transaction.
2027 2027           */
2028 2028          if (ufs_idle_q.uq_ne > ufs_idle_q.uq_hiwat)
2029 2029                  if ((curthread->t_flag & T_DONTBLOCK) == 0) {
2030 2030                          ins.in_vidles.value.ul += ufs_vget_idle_count;
2031 2031                          ufs_idle_some(ufs_vget_idle_count);
2032 2032                  }
2033 2033  
2034 2034          ufid = (struct ufid *)fidp;
2035 2035  
2036 2036          if (error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_VGET_MASK))
2037 2037                  goto errout;
2038 2038  
2039 2039          rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
2040 2040  
2041 2041          error = ufs_iget(vfsp, ufid->ufid_ino, &ip, CRED());
2042 2042  
2043 2043          rw_exit(&ufsvfsp->vfs_dqrwlock);
2044 2044  
2045 2045          ufs_lockfs_end(ulp);
2046 2046  
2047 2047          if (error)
2048 2048                  goto errout;
2049 2049  
2050 2050          /*
2051 2051           * Check if the inode has been deleted or freed or is in transient state
2052 2052           * since the last VFS_VGET() request for it, release it and don't return
2053 2053           * it to the caller, presumably NFS, as it's no longer valid.
2054 2054           */
2055 2055          if (ip->i_gen != ufid->ufid_gen || ip->i_mode == 0 ||
2056 2056              (ip->i_nlink <= 0)) {
2057 2057                  VN_RELE(ITOV(ip));
2058 2058                  error = EINVAL;
2059 2059                  goto errout;
2060 2060          }
2061 2061  
2062 2062          *vpp = ITOV(ip);
2063 2063          return (0);

↓ open down ↓

1842 lines elided

↑ open up ↑

2064 2064  
2065 2065  errout:
2066 2066          *vpp = NULL;
2067 2067          return (error);
2068 2068  }
2069 2069  
2070 2070  static int
2071 2071  ufsinit(int fstype, char *name)
2072 2072  {
2073 2073          static const fs_operation_def_t ufs_vfsops_template[] = {
2074      -                VFSNAME_MOUNT,          { .vfs_mount = ufs_mount },
2075      -                VFSNAME_UNMOUNT,        { .vfs_unmount = ufs_unmount },
2076      -                VFSNAME_ROOT,           { .vfs_root = ufs_root },
2077      -                VFSNAME_STATVFS,        { .vfs_statvfs = ufs_statvfs },
2078      -                VFSNAME_SYNC,           { .vfs_sync = ufs_sync },
2079      -                VFSNAME_VGET,           { .vfs_vget = ufs_vget },
2080      -                VFSNAME_MOUNTROOT,      { .vfs_mountroot = ufs_mountroot },
2081      -                NULL,                   NULL
     2074 +                { VFSNAME_MOUNT,        { .vfs_mount = ufs_mount } },
     2075 +                { VFSNAME_UNMOUNT,      { .vfs_unmount = ufs_unmount } },
     2076 +                { VFSNAME_ROOT,         { .vfs_root = ufs_root } },
     2077 +                { VFSNAME_STATVFS,      { .vfs_statvfs = ufs_statvfs } },
     2078 +                { VFSNAME_SYNC,         { .vfs_sync = ufs_sync } },
     2079 +                { VFSNAME_VGET,         { .vfs_vget = ufs_vget } },
     2080 +                { VFSNAME_MOUNTROOT,    { .vfs_mountroot = ufs_mountroot } },
     2081 +                { NULL,                 { NULL } }
2082 2082          };
2083 2083          int error;
2084 2084  
2085 2085          ufsfstype = fstype;
2086 2086  
2087 2087          error = vfs_setfsops(fstype, ufs_vfsops_template, &ufs_vfsops);
2088 2088          if (error != 0) {
2089 2089                  cmn_err(CE_WARN, "ufsinit: bad vfs ops template");
2090 2090                  return (error);
2091 2091          }

2092 2092  
2093 2093          error = vn_make_ops(name, ufs_vnodeops_template, &ufs_vnodeops);
2094 2094          if (error != 0) {
2095 2095                  (void) vfs_freevfsops_by_type(fstype);
2096 2096                  cmn_err(CE_WARN, "ufsinit: bad vnode ops template");
2097 2097                  return (error);
2098 2098          }
2099 2099  
2100 2100          ufs_iinit();
2101 2101          return (0);
2102 2102  }
2103 2103  
2104 2104  #ifdef __sparc
2105 2105  
2106 2106  /*
2107 2107   * Mounting a mirrored SVM volume is only supported on ufs,
2108 2108   * this is special-case boot code to support that configuration.
2109 2109   * At this point, we have booted and mounted root on a
2110 2110   * single component of the mirror.  Complete the boot
2111 2111   * by configuring SVM and converting the root to the
2112 2112   * dev_t of the mirrored root device.  This dev_t conversion
2113 2113   * only works because the underlying device doesn't change.
2114 2114   */
2115 2115  int
2116 2116  ufs_remountroot(struct vfs *vfsp)
2117 2117  {
2118 2118          struct ufsvfs *ufsvfsp;
2119 2119          struct ulockfs *ulp;
2120 2120          dev_t new_rootdev;
2121 2121          dev_t old_rootdev;
2122 2122          struct vnode *old_rootvp;
2123 2123          struct vnode *new_rootvp;
2124 2124          int error, sberror = 0;
2125 2125          struct inode    *ip;
2126 2126          union ihead     *ih;
2127 2127          struct buf      *bp;
2128 2128          int i;
2129 2129  
2130 2130          old_rootdev = rootdev;
2131 2131          old_rootvp = rootvp;
2132 2132  
2133 2133          new_rootdev = getrootdev();
2134 2134          if (new_rootdev == (dev_t)NODEV) {
2135 2135                  return (ENODEV);
2136 2136          }
2137 2137  
2138 2138          new_rootvp = makespecvp(new_rootdev, VBLK);
2139 2139  
2140 2140          error = VOP_OPEN(&new_rootvp,
2141 2141              (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE, CRED(), NULL);
2142 2142          if (error) {
2143 2143                  cmn_err(CE_CONT,
2144 2144                      "Cannot open mirrored root device, error %d\n", error);
2145 2145                  return (error);
2146 2146          }
2147 2147  
2148 2148          if (vfs_lock(vfsp) != 0) {
2149 2149                  return (EBUSY);
2150 2150          }
2151 2151  
2152 2152          ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
2153 2153          ulp = &ufsvfsp->vfs_ulockfs;
2154 2154  
2155 2155          mutex_enter(&ulp->ul_lock);
2156 2156          atomic_inc_ulong(&ufs_quiesce_pend);
2157 2157  
2158 2158          (void) ufs_quiesce(ulp);
2159 2159          (void) ufs_flush(vfsp);
2160 2160  
2161 2161          /*
2162 2162           * Convert root vfs to new dev_t, including vfs hash
2163 2163           * table and fs id.
2164 2164           */
2165 2165          vfs_root_redev(vfsp, new_rootdev, ufsfstype);
2166 2166  
2167 2167          ufsvfsp->vfs_devvp = new_rootvp;
2168 2168          ufsvfsp->vfs_dev = new_rootdev;
2169 2169  
2170 2170          bp = ufsvfsp->vfs_bufp;
2171 2171          bp->b_edev = new_rootdev;
2172 2172          bp->b_dev = cmpdev(new_rootdev);
2173 2173  
2174 2174          /*
2175 2175           * The buffer for the root inode does not contain a valid b_vp
2176 2176           */
2177 2177          (void) bfinval(new_rootdev, 0);
2178 2178  
2179 2179          /*
2180 2180           * Here we hand-craft inodes with old root device
2181 2181           * references to refer to the new device instead.
2182 2182           */
2183 2183          mutex_enter(&ufs_scan_lock);
2184 2184  
2185 2185          for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
2186 2186                  mutex_enter(&ih_lock[i]);
2187 2187                  for (ip = ih->ih_chain[0];
2188 2188                      ip != (struct inode *)ih;
2189 2189                      ip = ip->i_forw) {
2190 2190                          if (ip->i_ufsvfs != ufsvfsp)
2191 2191                                  continue;
2192 2192                          if (ip == ufsvfsp->vfs_qinod)
2193 2193                                  continue;
2194 2194                          if (ip->i_dev == old_rootdev) {
2195 2195                                  ip->i_dev = new_rootdev;
2196 2196                          }
2197 2197  
2198 2198                          if (ip->i_devvp == old_rootvp) {
2199 2199                                  ip->i_devvp = new_rootvp;
2200 2200                          }
2201 2201                  }
2202 2202                  mutex_exit(&ih_lock[i]);
2203 2203          }
2204 2204  
2205 2205          mutex_exit(&ufs_scan_lock);
2206 2206  
2207 2207          /*
2208 2208           * Make Sure logging structures are using the new device
2209 2209           * if logging is enabled.  Also start any logging thread that
2210 2210           * needs to write to the device and couldn't earlier.
2211 2211           */
2212 2212          if (ufsvfsp->vfs_log) {
2213 2213                  buf_t           *bp, *tbp;
2214 2214                  ml_unit_t       *ul = ufsvfsp->vfs_log;
2215 2215                  struct fs       *fsp = ufsvfsp->vfs_fs;
2216 2216  
2217 2217                  /*
2218 2218                   * Update the main logging structure.
2219 2219                   */
2220 2220                  ul->un_dev = new_rootdev;
2221 2221  
2222 2222                  /*
2223 2223                   * Get a new bp for the on disk structures.
2224 2224                   */
2225 2225                  bp = ul->un_bp;
2226 2226                  tbp = ngeteblk(dbtob(LS_SECTORS));
2227 2227                  tbp->b_edev = new_rootdev;
2228 2228                  tbp->b_dev = cmpdev(new_rootdev);
2229 2229                  tbp->b_blkno = bp->b_blkno;
2230 2230                  bcopy(bp->b_un.b_addr, tbp->b_un.b_addr, DEV_BSIZE);
2231 2231                  bcopy(bp->b_un.b_addr, tbp->b_un.b_addr + DEV_BSIZE, DEV_BSIZE);
2232 2232                  bp->b_flags |= (B_STALE | B_AGE);
2233 2233                  brelse(bp);
2234 2234                  ul->un_bp = tbp;
2235 2235  
2236 2236                  /*
2237 2237                   * Allocate new circular buffers.
2238 2238                   */
2239 2239                  alloc_rdbuf(&ul->un_rdbuf, MAPBLOCKSIZE, MAPBLOCKSIZE);
2240 2240                  alloc_wrbuf(&ul->un_wrbuf, ldl_bufsize(ul));
2241 2241  
2242 2242                  /*
2243 2243                   * Clear the noroll bit which indicates that logging
2244 2244                   * can't roll the log yet and start the logmap roll thread
2245 2245                   * unless the filesystem is still read-only in which case
2246 2246                   * remountfs() will do it when going to read-write.
2247 2247                   */
2248 2248                  ASSERT(ul->un_flags & LDL_NOROLL);
2249 2249  
2250 2250                  if (!fsp->fs_ronly) {
2251 2251                          ul->un_flags &= ~LDL_NOROLL;
2252 2252                          logmap_start_roll(ul);
2253 2253                  }
2254 2254  
2255 2255                  /*
2256 2256                   * Start the reclaim thread if needed.
2257 2257                   */
2258 2258                  if (!fsp->fs_ronly && (fsp->fs_reclaim &
2259 2259                      (FS_RECLAIM|FS_RECLAIMING))) {
2260 2260                          fsp->fs_reclaim &= ~FS_RECLAIM;
2261 2261                          fsp->fs_reclaim |= FS_RECLAIMING;
2262 2262                          ufs_thread_start(&ufsvfsp->vfs_reclaim,
2263 2263                              ufs_thread_reclaim, vfsp);
2264 2264                          TRANS_SBWRITE(ufsvfsp, TOP_SBUPDATE_UPDATE);
2265 2265                          if (sberror = geterror(ufsvfsp->vfs_bufp)) {
2266 2266                                  refstr_t        *mntpt;
2267 2267                                  mntpt = vfs_getmntpoint(vfsp);
2268 2268                                  cmn_err(CE_WARN,
2269 2269                                      "Remountroot failed to update Reclaim"
2270 2270                                      "state for filesystem %s "
2271 2271                                      "Error writing SuperBlock %d",
2272 2272                                      refstr_value(mntpt), error);
2273 2273                                  refstr_rele(mntpt);
2274 2274                          }
2275 2275                  }
2276 2276          }
2277 2277  
2278 2278          rootdev = new_rootdev;
2279 2279          rootvp = new_rootvp;
2280 2280  
2281 2281          atomic_dec_ulong(&ufs_quiesce_pend);
2282 2282          cv_broadcast(&ulp->ul_cv);
2283 2283          mutex_exit(&ulp->ul_lock);
2284 2284  
2285 2285          vfs_unlock(vfsp);
2286 2286  
2287 2287          error = VOP_CLOSE(old_rootvp, FREAD, 1, (offset_t)0, CRED(), NULL);
2288 2288          if (error) {
2289 2289                  cmn_err(CE_CONT,
2290 2290                      "close of root device component failed, error %d\n",
2291 2291                      error);
2292 2292          }
2293 2293          VN_RELE(old_rootvp);
2294 2294  
2295 2295          return (sberror ? sberror : error);
2296 2296  }
2297 2297  
2298 2298  #endif  /* __sparc */

↓ open down ↓

207 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX