illumos-gate Wdiff usr/src/uts/common/fs/ufs/ufs_vfsops.c

Print this page

8115 parallel zfs mount

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/common/fs/ufs/ufs_vfsops.c
          +++ new/usr/src/uts/common/fs/ufs/ufs_vfsops.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the

↓ open down ↓

15 lines elided

↑ open up ↑

  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   * Copyright 2016 Nexenta Systems, Inc.
       26 + * Copyright (c) 2017 by Delphix. All rights reserved.
  26   27   */
  27   28  
  28   29  /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  29   30  /*        All Rights Reserved   */
  30   31  
  31   32  /*
  32   33   * University Copyright- Copyright (c) 1982, 1986, 1988
  33   34   * The Regents of the University of California
  34   35   * All Rights Reserved
  35   36   *

  36   37   * University Acknowledgment- Portions of this document are derived from
  37   38   * software developed by the University of California, Berkeley, and its
  38   39   * contributors.
  39   40   */
  40   41  
  41   42  #include <sys/types.h>
  42   43  #include <sys/t_lock.h>
  43   44  #include <sys/param.h>
  44   45  #include <sys/systm.h>
  45   46  #include <sys/bitmap.h>
  46   47  #include <sys/sysmacros.h>
  47   48  #include <sys/kmem.h>
  48   49  #include <sys/signal.h>
  49   50  #include <sys/user.h>
  50   51  #include <sys/proc.h>
  51   52  #include <sys/disp.h>
  52   53  #include <sys/buf.h>
  53   54  #include <sys/pathname.h>
  54   55  #include <sys/vfs.h>
  55   56  #include <sys/vfs_opreg.h>
  56   57  #include <sys/vnode.h>
  57   58  #include <sys/file.h>
  58   59  #include <sys/atomic.h>
  59   60  #include <sys/uio.h>
  60   61  #include <sys/dkio.h>
  61   62  #include <sys/cred.h>
  62   63  #include <sys/conf.h>
  63   64  #include <sys/dnlc.h>
  64   65  #include <sys/kstat.h>
  65   66  #include <sys/acl.h>
  66   67  #include <sys/fs/ufs_fsdir.h>
  67   68  #include <sys/fs/ufs_fs.h>
  68   69  #include <sys/fs/ufs_inode.h>
  69   70  #include <sys/fs/ufs_mount.h>
  70   71  #include <sys/fs/ufs_acl.h>
  71   72  #include <sys/fs/ufs_panic.h>
  72   73  #include <sys/fs/ufs_bio.h>
  73   74  #include <sys/fs/ufs_quota.h>
  74   75  #include <sys/fs/ufs_log.h>
  75   76  #undef NFS
  76   77  #include <sys/statvfs.h>
  77   78  #include <sys/mount.h>
  78   79  #include <sys/mntent.h>
  79   80  #include <sys/swap.h>
  80   81  #include <sys/errno.h>
  81   82  #include <sys/debug.h>
  82   83  #include "fs/fs_subr.h"
  83   84  #include <sys/cmn_err.h>
  84   85  #include <sys/dnlc.h>
  85   86  #include <sys/fssnap_if.h>
  86   87  #include <sys/sunddi.h>
  87   88  #include <sys/bootconf.h>
  88   89  #include <sys/policy.h>
  89   90  #include <sys/zone.h>
  90   91  
  91   92  /*
  92   93   * This is the loadable module wrapper.
  93   94   */
  94   95  #include <sys/modctl.h>
  95   96  
  96   97  int                     ufsfstype;
  97   98  vfsops_t                *ufs_vfsops;
  98   99  static int              ufsinit(int, char *);
  99  100  static int              mountfs();
 100  101  extern int              highbit();
 101  102  extern struct instats   ins;
 102  103  extern struct vnode *common_specvp(struct vnode *vp);
 103  104  extern vfs_t            EIO_vfs;
 104  105  
 105  106  struct  dquot *dquot, *dquotNDQUOT;
 106  107  
 107  108  /*
 108  109   * Cylinder group summary information handling tunable.
 109  110   * This defines when these deltas get logged.
 110  111   * If the number of cylinders in the file system is over the
 111  112   * tunable then we log csum updates. Otherwise the updates are only
 112  113   * done for performance on unmount. After a panic they can be
 113  114   * quickly constructed during mounting. See ufs_construct_si()
 114  115   * called from ufs_getsummaryinfo().
 115  116   *
 116  117   * This performance feature can of course be disabled by setting
 117  118   * ufs_ncg_log to 0, and fully enabled by setting it to 0xffffffff.
 118  119   */
 119  120  #define UFS_LOG_NCG_DEFAULT 10000
 120  121  uint32_t ufs_ncg_log = UFS_LOG_NCG_DEFAULT;
 121  122  
 122  123  /*
 123  124   * ufs_clean_root indicates whether the root fs went down cleanly
 124  125   */
 125  126  static int ufs_clean_root = 0;
 126  127  
 127  128  /*
 128  129   * UFS Mount options table
 129  130   */
 130  131  static char *intr_cancel[] = { MNTOPT_NOINTR, NULL };
 131  132  static char *nointr_cancel[] = { MNTOPT_INTR, NULL };
 132  133  static char *forcedirectio_cancel[] = { MNTOPT_NOFORCEDIRECTIO, NULL };
 133  134  static char *noforcedirectio_cancel[] = { MNTOPT_FORCEDIRECTIO, NULL };
 134  135  static char *largefiles_cancel[] = { MNTOPT_NOLARGEFILES, NULL };
 135  136  static char *nolargefiles_cancel[] = { MNTOPT_LARGEFILES, NULL };
 136  137  static char *logging_cancel[] = { MNTOPT_NOLOGGING, NULL };
 137  138  static char *nologging_cancel[] = { MNTOPT_LOGGING, NULL };
 138  139  static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
 139  140  static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
 140  141  static char *quota_cancel[] = { MNTOPT_NOQUOTA, NULL };
 141  142  static char *noquota_cancel[] = { MNTOPT_QUOTA, NULL };
 142  143  static char *dfratime_cancel[] = { MNTOPT_NODFRATIME, NULL };
 143  144  static char *nodfratime_cancel[] = { MNTOPT_DFRATIME, NULL };
 144  145  
 145  146  static mntopt_t mntopts[] = {
 146  147  /*
 147  148   *      option name             cancel option   default arg     flags
 148  149   *              ufs arg flag
 149  150   */
 150  151          { MNTOPT_INTR,          intr_cancel,    NULL,           MO_DEFAULT,
 151  152                  (void *)0 },
 152  153          { MNTOPT_NOINTR,        nointr_cancel,  NULL,           0,
 153  154                  (void *)UFSMNT_NOINTR },
 154  155          { MNTOPT_SYNCDIR,       NULL,           NULL,           0,
 155  156                  (void *)UFSMNT_SYNCDIR },
 156  157          { MNTOPT_FORCEDIRECTIO, forcedirectio_cancel, NULL,     0,
 157  158                  (void *)UFSMNT_FORCEDIRECTIO },
 158  159          { MNTOPT_NOFORCEDIRECTIO, noforcedirectio_cancel, NULL, 0,
 159  160                  (void *)UFSMNT_NOFORCEDIRECTIO },
 160  161          { MNTOPT_NOSETSEC,      NULL,           NULL,           0,
 161  162                  (void *)UFSMNT_NOSETSEC },
 162  163          { MNTOPT_LARGEFILES,    largefiles_cancel, NULL,        MO_DEFAULT,
 163  164                  (void *)UFSMNT_LARGEFILES },
 164  165          { MNTOPT_NOLARGEFILES,  nolargefiles_cancel, NULL,      0,
 165  166                  (void *)0 },
 166  167          { MNTOPT_LOGGING,       logging_cancel, NULL,           MO_TAG,
 167  168                  (void *)UFSMNT_LOGGING },
 168  169          { MNTOPT_NOLOGGING,     nologging_cancel, NULL,
 169  170                  MO_NODISPLAY|MO_DEFAULT|MO_TAG, (void *)0 },
 170  171          { MNTOPT_QUOTA,         quota_cancel, NULL,             MO_IGNORE,
 171  172                  (void *)0 },
 172  173          { MNTOPT_NOQUOTA,       noquota_cancel, NULL,
 173  174                  MO_NODISPLAY|MO_DEFAULT, (void *)0 },
 174  175          { MNTOPT_GLOBAL,        NULL,           NULL,           0,
 175  176                  (void *)0 },
 176  177          { MNTOPT_XATTR, xattr_cancel,           NULL,           MO_DEFAULT,
 177  178                  (void *)0 },
 178  179          { MNTOPT_NOXATTR,       noxattr_cancel,         NULL,           0,
 179  180                  (void *)0 },
 180  181          { MNTOPT_NOATIME,       NULL,           NULL,           0,
 181  182                  (void *)UFSMNT_NOATIME },
 182  183          { MNTOPT_DFRATIME,      dfratime_cancel, NULL,          0,
 183  184                  (void *)0 },
 184  185          { MNTOPT_NODFRATIME,    nodfratime_cancel, NULL,
 185  186                  MO_NODISPLAY|MO_DEFAULT, (void *)UFSMNT_NODFRATIME },
 186  187          { MNTOPT_ONERROR,       NULL,           UFSMNT_ONERROR_PANIC_STR,
 187  188                  MO_DEFAULT|MO_HASVALUE, (void *)0 },
 188  189  };

↓ open down ↓

153 lines elided

↑ open up ↑

 189  190  
 190  191  static mntopts_t ufs_mntopts = {
 191  192          sizeof (mntopts) / sizeof (mntopt_t),
 192  193          mntopts
 193  194  };
 194  195  
 195  196  static vfsdef_t vfw = {
 196  197          VFSDEF_VERSION,
 197  198          "ufs",
 198  199          ufsinit,
 199      -        VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI,
      200 +        VSW_HASPROTO|VSW_CANREMOUNT|VSW_STATS|VSW_CANLOFI|VSW_MOUNTDEV,
 200  201          &ufs_mntopts
 201  202  };
 202  203  
 203  204  /*
 204  205   * Module linkage information for the kernel.
 205  206   */
 206  207  extern struct mod_ops mod_fsops;
 207  208  
 208  209  static struct modlfs modlfs = {
 209  210          &mod_fsops, "filesystem for ufs", &vfw

 210  211  };
 211  212  
 212  213  static struct modlinkage modlinkage = {
 213  214          MODREV_1, (void *)&modlfs, NULL
 214  215  };
 215  216  
 216  217  /*
 217  218   * An attempt has been made to make this module unloadable.  In order to
 218  219   * test it, we need a system in which the root fs is NOT ufs.  THIS HAS NOT
 219  220   * BEEN DONE
 220  221   */
 221  222  
 222  223  extern kstat_t *ufs_inode_kstat;
 223  224  extern uint_t ufs_lockfs_key;
 224  225  extern void ufs_lockfs_tsd_destructor(void *);
 225  226  extern uint_t bypass_snapshot_throttle_key;
 226  227  
 227  228  int
 228  229  _init(void)
 229  230  {
 230  231          /*
 231  232           * Create an index into the per thread array so that any thread doing
 232  233           * VOP will have a lockfs mark on it.
 233  234           */
 234  235          tsd_create(&ufs_lockfs_key, ufs_lockfs_tsd_destructor);
 235  236          tsd_create(&bypass_snapshot_throttle_key, NULL);
 236  237          return (mod_install(&modlinkage));
 237  238  }
 238  239  
 239  240  int
 240  241  _fini(void)
 241  242  {
 242  243          return (EBUSY);
 243  244  }
 244  245  
 245  246  int
 246  247  _info(struct modinfo *modinfop)
 247  248  {
 248  249          return (mod_info(&modlinkage, modinfop));
 249  250  }
 250  251

↓ open down ↓

41 lines elided

↑ open up ↑

 251  252  extern struct vnode *makespecvp(dev_t dev, vtype_t type);
 252  253  
 253  254  extern kmutex_t ufs_scan_lock;
 254  255  
 255  256  static int mountfs(struct vfs *, enum whymountroot, struct vnode *, char *,
 256  257                  struct cred *, int, void *, int);
 257  258  
 258  259  
 259  260  static int
 260  261  ufs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
 261      -        struct cred *cr)
 262      -
      262 +    struct cred *cr)
 263  263  {
 264  264          char *data = uap->dataptr;
 265  265          int datalen = uap->datalen;
 266  266          dev_t dev;
 267  267          struct vnode *lvp = NULL;
 268  268          struct vnode *svp = NULL;
 269  269          struct pathname dpn;
 270  270          int error;
 271  271          enum whymountroot why = ROOT_INIT;
 272  272          struct ufs_args args;

 273  273          int oflag, aflag;
 274  274          int fromspace = (uap->flags & MS_SYSSPACE) ?
 275  275              UIO_SYSSPACE : UIO_USERSPACE;
 276  276  
 277  277          if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
 278  278                  return (error);
 279  279  
 280  280          if (mvp->v_type != VDIR)
 281  281                  return (ENOTDIR);
 282  282  
 283  283          mutex_enter(&mvp->v_lock);
 284  284          if ((uap->flags & MS_REMOUNT) == 0 &&
 285  285              (uap->flags & MS_OVERLAY) == 0 &&
 286  286              (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
 287  287                  mutex_exit(&mvp->v_lock);
 288  288                  return (EBUSY);
 289  289          }
 290  290          mutex_exit(&mvp->v_lock);
 291  291  
 292  292          /*
 293  293           * Get arguments
 294  294           */
 295  295          bzero(&args, sizeof (args));
 296  296          if ((uap->flags & MS_DATA) && data != NULL && datalen != 0) {
 297  297                  int copy_result = 0;
 298  298  
 299  299                  if (datalen > sizeof (args))
 300  300                          return (EINVAL);
 301  301                  if (uap->flags & MS_SYSSPACE)
 302  302                          bcopy(data, &args, datalen);
 303  303                  else
 304  304                          copy_result = copyin(data, &args, datalen);
 305  305                  if (copy_result)
 306  306                          return (EFAULT);
 307  307                  datalen = sizeof (struct ufs_args);
 308  308          } else {
 309  309                  datalen = 0;
 310  310          }
 311  311  
 312  312          if ((vfsp->vfs_flag & VFS_RDONLY) != 0 ||
 313  313              (uap->flags & MS_RDONLY) != 0) {
 314  314                  oflag = FREAD;
 315  315                  aflag = VREAD;
 316  316          } else {
 317  317                  oflag = FREAD | FWRITE;
 318  318                  aflag = VREAD | VWRITE;
 319  319          }
 320  320  
 321  321          /*
 322  322           * Read in the mount point pathname
 323  323           * (so we can record the directory the file system was last mounted on).
 324  324           */
 325  325          if (error = pn_get(uap->dir, fromspace, &dpn))
 326  326                  return (error);
 327  327  
 328  328          /*
 329  329           * Resolve path name of special file being mounted.
 330  330           */
 331  331          if (error = lookupname(uap->spec, fromspace, FOLLOW, NULL, &svp)) {
 332  332                  pn_free(&dpn);
 333  333                  return (error);
 334  334          }
 335  335  
 336  336          error = vfs_get_lofi(vfsp, &lvp);
 337  337  
 338  338          if (error > 0) {
 339  339                  VN_RELE(svp);
 340  340                  pn_free(&dpn);
 341  341                  return (error);
 342  342          } else if (error == 0) {
 343  343                  dev = lvp->v_rdev;
 344  344  
 345  345                  if (getmajor(dev) >= devcnt) {
 346  346                          error = ENXIO;
 347  347                          goto out;
 348  348                  }
 349  349          } else {
 350  350                  dev = svp->v_rdev;
 351  351  
 352  352                  if (svp->v_type != VBLK) {
 353  353                          VN_RELE(svp);
 354  354                          pn_free(&dpn);
 355  355                          return (ENOTBLK);
 356  356                  }
 357  357  
 358  358                  if (getmajor(dev) >= devcnt) {
 359  359                          error = ENXIO;
 360  360                          goto out;
 361  361                  }
 362  362  
 363  363                  /*
 364  364                   * In SunCluster, requests to a global device are
 365  365                   * satisfied by a local device. We substitute the global
 366  366                   * pxfs node with a local spec node here.
 367  367                   */
 368  368                  if (IS_PXFSVP(svp)) {
 369  369                          ASSERT(lvp == NULL);
 370  370                          VN_RELE(svp);
 371  371                          svp = makespecvp(dev, VBLK);
 372  372                  }
 373  373  
 374  374                  if ((error = secpolicy_spec_open(cr, svp, oflag)) != 0) {
 375  375                          VN_RELE(svp);
 376  376                          pn_free(&dpn);
 377  377                          return (error);
 378  378                  }
 379  379          }
 380  380  
 381  381          if (uap->flags & MS_REMOUNT)
 382  382                  why = ROOT_REMOUNT;
 383  383  
 384  384          /*
 385  385           * Open device/file mounted on.  We need this to check whether
 386  386           * the caller has sufficient rights to access the resource in
 387  387           * question.  When bio is fixed for vnodes this can all be vnode
 388  388           * operations.
 389  389           */
 390  390          if ((error = VOP_ACCESS(svp, aflag, 0, cr, NULL)) != 0)
 391  391                  goto out;
 392  392  
 393  393          /*
 394  394           * Ensure that this device isn't already mounted or in progress on a
 395  395           * mount unless this is a REMOUNT request or we are told to suppress
 396  396           * mount checks. Global mounts require special handling.
 397  397           */
 398  398          if ((uap->flags & MS_NOCHECK) == 0) {
 399  399                  if ((uap->flags & MS_GLOBAL) == 0 &&
 400  400                      vfs_devmounting(dev, vfsp)) {
 401  401                          error = EBUSY;
 402  402                          goto out;
 403  403                  }
 404  404                  if (vfs_devismounted(dev)) {
 405  405                          if ((uap->flags & MS_REMOUNT) == 0) {
 406  406                                  error = EBUSY;
 407  407                                  goto out;
 408  408                          }
 409  409                  }
 410  410          }
 411  411  
 412  412          /*
 413  413           * If the device is a tape, mount it read only
 414  414           */
 415  415          if (devopsp[getmajor(dev)]->devo_cb_ops->cb_flag & D_TAPE) {
 416  416                  vfsp->vfs_flag |= VFS_RDONLY;
 417  417                  vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
 418  418          }
 419  419          if (uap->flags & MS_RDONLY)
 420  420                  vfsp->vfs_flag |= VFS_RDONLY;
 421  421  
 422  422          /*
 423  423           * Mount the filesystem, free the device vnode on error.
 424  424           */
 425  425          error = mountfs(vfsp, why, lvp != NULL ? lvp : svp,
 426  426              dpn.pn_path, cr, 0, &args, datalen);
 427  427  
 428  428          if (error == 0) {
 429  429                  vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
 430  430  
 431  431                  /*
 432  432                   * If lofi, drop our reference to the original file.
 433  433                   */
 434  434                  if (lvp != NULL)
 435  435                          VN_RELE(svp);
 436  436          }
 437  437  
 438  438  out:
 439  439          pn_free(&dpn);
 440  440  
 441  441          if (error) {
 442  442                  if (lvp != NULL)
 443  443                          VN_RELE(lvp);
 444  444                  if (svp != NULL)
 445  445                          VN_RELE(svp);
 446  446          }
 447  447          return (error);
 448  448  }
 449  449  
 450  450  /*
 451  451   * Mount root file system.
 452  452   * "why" is ROOT_INIT on initial call ROOT_REMOUNT if called to
 453  453   * remount the root file system, and ROOT_UNMOUNT if called to
 454  454   * unmount the root (e.g., as part of a system shutdown).
 455  455   *
 456  456   * XXX - this may be partially machine-dependent; it, along with the VFS_SWAPVP
 457  457   * operation, goes along with auto-configuration.  A mechanism should be
 458  458   * provided by which machine-INdependent code in the kernel can say "get me the
 459  459   * right root file system" and "get me the right initial swap area", and have
 460  460   * that done in what may well be a machine-dependent fashion.
 461  461   * Unfortunately, it is also file-system-type dependent (NFS gets it via
 462  462   * bootparams calls, UFS gets it from various and sundry machine-dependent
 463  463   * mechanisms, as SPECFS does for swap).
 464  464   */
 465  465  static int
 466  466  ufs_mountroot(struct vfs *vfsp, enum whymountroot why)
 467  467  {
 468  468          struct fs *fsp;
 469  469          int error;
 470  470          static int ufsrootdone = 0;
 471  471          dev_t rootdev;
 472  472          struct vnode *vp;
 473  473          struct vnode *devvp = 0;
 474  474          int ovflags;
 475  475          int doclkset;
 476  476          ufsvfs_t *ufsvfsp;
 477  477  
 478  478          if (why == ROOT_INIT) {
 479  479                  if (ufsrootdone++)
 480  480                          return (EBUSY);
 481  481                  rootdev = getrootdev();
 482  482                  if (rootdev == (dev_t)NODEV)
 483  483                          return (ENODEV);
 484  484                  vfsp->vfs_dev = rootdev;
 485  485                  vfsp->vfs_flag |= VFS_RDONLY;
 486  486          } else if (why == ROOT_REMOUNT) {
 487  487                  vp = ((struct ufsvfs *)vfsp->vfs_data)->vfs_devvp;
 488  488                  (void) dnlc_purge_vfsp(vfsp, 0);
 489  489                  vp = common_specvp(vp);
 490  490                  (void) VOP_PUTPAGE(vp, (offset_t)0, (size_t)0, B_INVAL,
 491  491                      CRED(), NULL);
 492  492                  (void) bfinval(vfsp->vfs_dev, 0);
 493  493                  fsp = getfs(vfsp);
 494  494  
 495  495                  ovflags = vfsp->vfs_flag;
 496  496                  vfsp->vfs_flag &= ~VFS_RDONLY;
 497  497                  vfsp->vfs_flag |= VFS_REMOUNT;
 498  498                  rootdev = vfsp->vfs_dev;
 499  499          } else if (why == ROOT_UNMOUNT) {
 500  500                  if (vfs_lock(vfsp) == 0) {
 501  501                          (void) ufs_flush(vfsp);
 502  502                          /*
 503  503                           * Mark the log as fully rolled
 504  504                           */
 505  505                          ufsvfsp = (ufsvfs_t *)vfsp->vfs_data;
 506  506                          fsp = ufsvfsp->vfs_fs;
 507  507                          if (TRANS_ISTRANS(ufsvfsp) &&
 508  508                              !TRANS_ISERROR(ufsvfsp) &&
 509  509                              (fsp->fs_rolled == FS_NEED_ROLL)) {
 510  510                                  ml_unit_t *ul = ufsvfsp->vfs_log;
 511  511  
 512  512                                  error = ufs_putsummaryinfo(ul->un_dev,
 513  513                                      ufsvfsp, fsp);
 514  514                                  if (error == 0) {
 515  515                                          fsp->fs_rolled = FS_ALL_ROLLED;
 516  516                                          UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp);
 517  517                                  }
 518  518                          }
 519  519                          vfs_unlock(vfsp);
 520  520                  } else {
 521  521                          ufs_update(0);
 522  522                  }
 523  523  
 524  524                  vp = ((struct ufsvfs *)vfsp->vfs_data)->vfs_devvp;
 525  525                  (void) VOP_CLOSE(vp, FREAD|FWRITE, 1,
 526  526                      (offset_t)0, CRED(), NULL);
 527  527                  return (0);
 528  528          }
 529  529          error = vfs_lock(vfsp);
 530  530          if (error)
 531  531                  return (error);
 532  532  
 533  533          devvp = makespecvp(rootdev, VBLK);
 534  534  
 535  535          /* If RO media, don't call clkset() (see below) */
 536  536          doclkset = 1;
 537  537          if (why == ROOT_INIT) {
 538  538                  error = VOP_OPEN(&devvp, FREAD|FWRITE, CRED(), NULL);
 539  539                  if (error == 0) {
 540  540                          (void) VOP_CLOSE(devvp, FREAD|FWRITE, 1,
 541  541                              (offset_t)0, CRED(), NULL);
 542  542                  } else {
 543  543                          doclkset = 0;
 544  544                  }
 545  545          }
 546  546  
 547  547          error = mountfs(vfsp, why, devvp, "/", CRED(), 1, NULL, 0);
 548  548          /*
 549  549           * XXX - assumes root device is not indirect, because we don't set
 550  550           * rootvp.  Is rootvp used for anything?  If so, make another arg
 551  551           * to mountfs.
 552  552           */
 553  553          if (error) {
 554  554                  vfs_unlock(vfsp);
 555  555                  if (why == ROOT_REMOUNT)
 556  556                          vfsp->vfs_flag = ovflags;
 557  557                  if (rootvp) {
 558  558                          VN_RELE(rootvp);
 559  559                          rootvp = (struct vnode *)0;
 560  560                  }
 561  561                  VN_RELE(devvp);
 562  562                  return (error);
 563  563          }
 564  564          if (why == ROOT_INIT)
 565  565                  vfs_add((struct vnode *)0, vfsp,
 566  566                      (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
 567  567          vfs_unlock(vfsp);
 568  568          fsp = getfs(vfsp);
 569  569          clkset(doclkset ? fsp->fs_time : -1);
 570  570          ufsvfsp = (ufsvfs_t *)vfsp->vfs_data;
 571  571          if (ufsvfsp->vfs_log) {
 572  572                  vfs_setmntopt(vfsp, MNTOPT_LOGGING, NULL, 0);
 573  573          }
 574  574          return (0);
 575  575  }
 576  576  
 577  577  static int
 578  578  remountfs(struct vfs *vfsp, dev_t dev, void *raw_argsp, int args_len)
 579  579  {
 580  580          struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
 581  581          struct ulockfs *ulp = &ufsvfsp->vfs_ulockfs;
 582  582          struct buf *bp = ufsvfsp->vfs_bufp;
 583  583          struct fs *fsp = (struct fs *)bp->b_un.b_addr;
 584  584          struct fs *fspt;
 585  585          struct buf *tpt = 0;
 586  586          int error = 0;
 587  587          int flags = 0;
 588  588  
 589  589          if (args_len == sizeof (struct ufs_args) && raw_argsp)
 590  590                  flags = ((struct ufs_args *)raw_argsp)->flags;
 591  591  
 592  592          /* cannot remount to RDONLY */
 593  593          if (vfsp->vfs_flag & VFS_RDONLY)
 594  594                  return (ENOTSUP);
 595  595  
 596  596          /* whoops, wrong dev */
 597  597          if (vfsp->vfs_dev != dev)
 598  598                  return (EINVAL);
 599  599  
 600  600          /*
 601  601           * synchronize w/ufs ioctls
 602  602           */
 603  603          mutex_enter(&ulp->ul_lock);
 604  604          atomic_inc_ulong(&ufs_quiesce_pend);
 605  605  
 606  606          /*
 607  607           * reset options
 608  608           */
 609  609          ufsvfsp->vfs_nointr  = flags & UFSMNT_NOINTR;
 610  610          ufsvfsp->vfs_syncdir = flags & UFSMNT_SYNCDIR;
 611  611          ufsvfsp->vfs_nosetsec = flags & UFSMNT_NOSETSEC;
 612  612          ufsvfsp->vfs_noatime = flags & UFSMNT_NOATIME;
 613  613          if ((flags & UFSMNT_NODFRATIME) || ufsvfsp->vfs_noatime)
 614  614                  ufsvfsp->vfs_dfritime &= ~UFS_DFRATIME;
 615  615          else    /* dfratime, default behavior */
 616  616                  ufsvfsp->vfs_dfritime |= UFS_DFRATIME;
 617  617          if (flags & UFSMNT_FORCEDIRECTIO)
 618  618                  ufsvfsp->vfs_forcedirectio = 1;
 619  619          else    /* default is no direct I/O */
 620  620                  ufsvfsp->vfs_forcedirectio = 0;
 621  621          ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
 622  622  
 623  623          /*
 624  624           * set largefiles flag in ufsvfs equal to the
 625  625           * value passed in by the mount command. If
 626  626           * it is "nolargefiles", and the flag is set
 627  627           * in the superblock, the mount fails.
 628  628           */
 629  629          if (!(flags & UFSMNT_LARGEFILES)) {  /* "nolargefiles" */
 630  630                  if (fsp->fs_flags & FSLARGEFILES) {
 631  631                          error = EFBIG;
 632  632                          goto remounterr;
 633  633                  }
 634  634                  ufsvfsp->vfs_lfflags &= ~UFS_LARGEFILES;
 635  635          } else  /* "largefiles" */
 636  636                  ufsvfsp->vfs_lfflags |= UFS_LARGEFILES;
 637  637          /*
 638  638           * read/write to read/write; all done
 639  639           */
 640  640          if (fsp->fs_ronly == 0)
 641  641                  goto remounterr;
 642  642  
 643  643          /*
 644  644           * fix-on-panic assumes RO->RW remount implies system-critical fs
 645  645           * if it is shortly after boot; so, don't attempt to lock and fix
 646  646           * (unless the user explicitly asked for another action on error)
 647  647           * XXX UFSMNT_ONERROR_RDONLY rather than UFSMNT_ONERROR_PANIC
 648  648           */
 649  649  #define BOOT_TIME_LIMIT (180*hz)
 650  650          if (!(flags & UFSMNT_ONERROR_FLGMASK) &&
 651  651              ddi_get_lbolt() < BOOT_TIME_LIMIT) {
 652  652                  cmn_err(CE_WARN, "%s is required to be mounted onerror=%s",
 653  653                      ufsvfsp->vfs_fs->fs_fsmnt, UFSMNT_ONERROR_PANIC_STR);
 654  654                  flags |= UFSMNT_ONERROR_PANIC;
 655  655          }
 656  656  
 657  657          if ((error = ufsfx_mount(ufsvfsp, flags)) != 0)
 658  658                  goto remounterr;
 659  659  
 660  660          /*
 661  661           * quiesce the file system
 662  662           */
 663  663          error = ufs_quiesce(ulp);
 664  664          if (error)
 665  665                  goto remounterr;
 666  666  
 667  667          tpt = UFS_BREAD(ufsvfsp, ufsvfsp->vfs_dev, SBLOCK, SBSIZE);
 668  668          if (tpt->b_flags & B_ERROR) {
 669  669                  error = EIO;
 670  670                  goto remounterr;
 671  671          }
 672  672          fspt = (struct fs *)tpt->b_un.b_addr;
 673  673          if (((fspt->fs_magic != FS_MAGIC) &&
 674  674              (fspt->fs_magic != MTB_UFS_MAGIC)) ||
 675  675              (fspt->fs_magic == FS_MAGIC &&
 676  676              (fspt->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
 677  677              fspt->fs_version != UFS_VERSION_MIN)) ||
 678  678              (fspt->fs_magic == MTB_UFS_MAGIC &&
 679  679              (fspt->fs_version > MTB_UFS_VERSION_1 ||
 680  680              fspt->fs_version < MTB_UFS_VERSION_MIN)) ||
 681  681              fspt->fs_bsize > MAXBSIZE || fspt->fs_frag > MAXFRAG ||
 682  682              fspt->fs_bsize < sizeof (struct fs) || fspt->fs_bsize < PAGESIZE) {
 683  683                  tpt->b_flags |= B_STALE | B_AGE;
 684  684                  error = EINVAL;
 685  685                  goto remounterr;
 686  686          }
 687  687  
 688  688          if (ufsvfsp->vfs_log && (ufsvfsp->vfs_log->un_flags & LDL_NOROLL)) {
 689  689                  ufsvfsp->vfs_log->un_flags &= ~LDL_NOROLL;
 690  690                  logmap_start_roll(ufsvfsp->vfs_log);
 691  691          }
 692  692  
 693  693          if (TRANS_ISERROR(ufsvfsp))
 694  694                  goto remounterr;
 695  695          TRANS_DOMATAMAP(ufsvfsp);
 696  696  
 697  697          if ((fspt->fs_state + fspt->fs_time == FSOKAY) &&
 698  698              fspt->fs_clean == FSLOG && !TRANS_ISTRANS(ufsvfsp)) {
 699  699                  ufsvfsp->vfs_log = NULL;
 700  700                  ufsvfsp->vfs_domatamap = 0;
 701  701                  error = ENOSPC;
 702  702                  goto remounterr;
 703  703          }
 704  704  
 705  705          if (fspt->fs_state + fspt->fs_time == FSOKAY &&
 706  706              (fspt->fs_clean == FSCLEAN ||
 707  707              fspt->fs_clean == FSSTABLE ||
 708  708              fspt->fs_clean == FSLOG)) {
 709  709  
 710  710                  /*
 711  711                   * Ensure that ufs_getsummaryinfo doesn't reconstruct
 712  712                   * the summary info.
 713  713                   */
 714  714                  error = ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, fspt);
 715  715                  if (error)
 716  716                          goto remounterr;
 717  717  
 718  718                  /* preserve mount name */
 719  719                  (void) strncpy(fspt->fs_fsmnt, fsp->fs_fsmnt, MAXMNTLEN);
 720  720                  /* free the old cg space */
 721  721                  kmem_free(fsp->fs_u.fs_csp, fsp->fs_cssize);
 722  722                  /* switch in the new superblock */
 723  723                  fspt->fs_rolled = FS_NEED_ROLL;
 724  724                  bcopy(tpt->b_un.b_addr, bp->b_un.b_addr, fspt->fs_sbsize);
 725  725  
 726  726                  fsp->fs_clean = FSSTABLE;
 727  727          } /* superblock updated in memory */
 728  728          tpt->b_flags |= B_STALE | B_AGE;
 729  729          brelse(tpt);
 730  730          tpt = 0;
 731  731  
 732  732          if (fsp->fs_clean != FSSTABLE) {
 733  733                  error = ENOSPC;
 734  734                  goto remounterr;
 735  735          }
 736  736  
 737  737  
 738  738          if (TRANS_ISTRANS(ufsvfsp)) {
 739  739                  fsp->fs_clean = FSLOG;
 740  740                  ufsvfsp->vfs_dio = 0;
 741  741          } else
 742  742                  if (ufsvfsp->vfs_dio)
 743  743                          fsp->fs_clean = FSSUSPEND;
 744  744  
 745  745          TRANS_MATA_MOUNT(ufsvfsp);
 746  746  
 747  747          fsp->fs_fmod = 0;
 748  748          fsp->fs_ronly = 0;
 749  749  
 750  750          atomic_dec_ulong(&ufs_quiesce_pend);
 751  751          cv_broadcast(&ulp->ul_cv);
 752  752          mutex_exit(&ulp->ul_lock);
 753  753  
 754  754          if (TRANS_ISTRANS(ufsvfsp)) {
 755  755  
 756  756                  /*
 757  757                   * start the delete thread
 758  758                   */
 759  759                  ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp);
 760  760  
 761  761                  /*
 762  762                   * start the reclaim thread
 763  763                   */
 764  764                  if (fsp->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
 765  765                          fsp->fs_reclaim &= ~FS_RECLAIM;
 766  766                          fsp->fs_reclaim |=  FS_RECLAIMING;
 767  767                          ufs_thread_start(&ufsvfsp->vfs_reclaim,
 768  768                              ufs_thread_reclaim, vfsp);
 769  769                  }
 770  770          }
 771  771  
 772  772          TRANS_SBWRITE(ufsvfsp, TOP_MOUNT);
 773  773  
 774  774          return (0);
 775  775  
 776  776  remounterr:
 777  777          if (tpt)
 778  778                  brelse(tpt);
 779  779          atomic_dec_ulong(&ufs_quiesce_pend);
 780  780          cv_broadcast(&ulp->ul_cv);
 781  781          mutex_exit(&ulp->ul_lock);
 782  782          return (error);
 783  783  }
 784  784  
 785  785  /*
 786  786   * If the device maxtransfer size is not available, we use ufs_maxmaxphys
 787  787   * along with the system value for maxphys to determine the value for
 788  788   * maxtransfer.

↓ open down ↓

516 lines elided

↑ open up ↑

 789  789   */
 790  790  int ufs_maxmaxphys = (1024 * 1024);
 791  791  
 792  792  #include <sys/ddi.h>            /* for delay(9f) */
 793  793  
 794  794  int ufs_mount_error_delay = 20; /* default to 20ms */
 795  795  int ufs_mount_timeout = 60000;  /* default to 1 minute */
 796  796  
 797  797  static int
 798  798  mountfs(struct vfs *vfsp, enum whymountroot why, struct vnode *devvp,
 799      -        char *path, cred_t *cr, int isroot, void *raw_argsp, int args_len)
      799 +    char *path, cred_t *cr, int isroot, void *raw_argsp, int args_len)
 800  800  {
 801  801          dev_t dev = devvp->v_rdev;
 802  802          struct fs *fsp;
 803  803          struct ufsvfs *ufsvfsp = 0;
 804  804          struct buf *bp = 0;
 805  805          struct buf *tp = 0;
 806  806          struct dk_cinfo ci;
 807  807          int error = 0;
 808  808          size_t len;
 809  809          int needclose = 0;

 810  810          int needtrans = 0;
 811  811          struct inode *rip;
 812  812          struct vnode *rvp = NULL;
 813  813          int flags = 0;
 814  814          kmutex_t *ihm;
 815  815          int elapsed;
 816  816          int status;
 817  817          extern  int     maxphys;
 818  818  
 819  819          if (args_len == sizeof (struct ufs_args) && raw_argsp)
 820  820                  flags = ((struct ufs_args *)raw_argsp)->flags;
 821  821  
 822  822          ASSERT(vfs_lock_held(vfsp));
 823  823  
 824  824          if (why == ROOT_INIT) {
 825  825                  /*
 826  826                   * Open block device mounted on.
 827  827                   * When bio is fixed for vnodes this can all be vnode
 828  828                   * operations.
 829  829                   */
 830  830                  error = VOP_OPEN(&devvp,
 831  831                      (vfsp->vfs_flag & VFS_RDONLY) ? FREAD : FREAD|FWRITE,
 832  832                      cr, NULL);
 833  833                  if (error)
 834  834                          goto out;
 835  835                  needclose = 1;
 836  836  
 837  837                  /*
 838  838                   * Refuse to go any further if this
 839  839                   * device is being used for swapping.
 840  840                   */
 841  841                  if (IS_SWAPVP(devvp)) {
 842  842                          error = EBUSY;
 843  843                          goto out;
 844  844                  }
 845  845          }
 846  846  
 847  847          /*
 848  848           * check for dev already mounted on
 849  849           */
 850  850          if (vfsp->vfs_flag & VFS_REMOUNT) {
 851  851                  error = remountfs(vfsp, dev, raw_argsp, args_len);
 852  852                  if (error == 0)
 853  853                          VN_RELE(devvp);
 854  854                  return (error);
 855  855          }
 856  856  
 857  857          ASSERT(devvp != 0);
 858  858  
 859  859          /*
 860  860           * Flush back any dirty pages on the block device to
 861  861           * try and keep the buffer cache in sync with the page
 862  862           * cache if someone is trying to use block devices when
 863  863           * they really should be using the raw device.
 864  864           */
 865  865          (void) VOP_PUTPAGE(common_specvp(devvp), (offset_t)0,
 866  866              (size_t)0, B_INVAL, cr, NULL);
 867  867  
 868  868          /*
 869  869           * read in superblock
 870  870           */
 871  871          ufsvfsp = kmem_zalloc(sizeof (struct ufsvfs), KM_SLEEP);
 872  872          tp = UFS_BREAD(ufsvfsp, dev, SBLOCK, SBSIZE);
 873  873          if (tp->b_flags & B_ERROR)
 874  874                  goto out;
 875  875          fsp = (struct fs *)tp->b_un.b_addr;
 876  876  
 877  877          if ((fsp->fs_magic != FS_MAGIC) && (fsp->fs_magic != MTB_UFS_MAGIC)) {
 878  878                  cmn_err(CE_NOTE,
 879  879                      "mount: not a UFS magic number (0x%x)", fsp->fs_magic);
 880  880                  error = EINVAL;
 881  881                  goto out;
 882  882          }
 883  883  
 884  884          if ((fsp->fs_magic == FS_MAGIC) &&
 885  885              (fsp->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
 886  886              fsp->fs_version != UFS_VERSION_MIN)) {
 887  887                  cmn_err(CE_NOTE,
 888  888                      "mount: unrecognized version of UFS on-disk format: %d",
 889  889                      fsp->fs_version);
 890  890                  error = EINVAL;
 891  891                  goto out;
 892  892          }
 893  893  
 894  894          if ((fsp->fs_magic == MTB_UFS_MAGIC) &&
 895  895              (fsp->fs_version > MTB_UFS_VERSION_1 ||
 896  896              fsp->fs_version < MTB_UFS_VERSION_MIN)) {
 897  897                  cmn_err(CE_NOTE,
 898  898                      "mount: unrecognized version of UFS on-disk format: %d",
 899  899                      fsp->fs_version);
 900  900                  error = EINVAL;
 901  901                  goto out;
 902  902          }
 903  903  
 904  904  #ifndef _LP64
 905  905          if (fsp->fs_magic == MTB_UFS_MAGIC) {
 906  906                  /*
 907  907                   * Find the size of the device in sectors.  If the
 908  908                   * the size in sectors is greater than INT_MAX, it's
 909  909                   * a multi-terabyte file system, which can't be
 910  910                   * mounted by a 32-bit kernel.  We can't use the
 911  911                   * fsbtodb() macro in the next line because the macro
 912  912                   * casts the intermediate values to daddr_t, which is
 913  913                   * a 32-bit quantity in a 32-bit kernel.  Here we
 914  914                   * really do need the intermediate values to be held
 915  915                   * in 64-bit quantities because we're checking for
 916  916                   * overflow of a 32-bit field.
 917  917                   */
 918  918                  if ((((diskaddr_t)(fsp->fs_size)) << fsp->fs_fsbtodb)
 919  919                      > INT_MAX) {
 920  920                          cmn_err(CE_NOTE,
 921  921                              "mount: multi-terabyte UFS cannot be"
 922  922                              " mounted by a 32-bit kernel");
 923  923                          error = EINVAL;
 924  924                          goto out;
 925  925                  }
 926  926  
 927  927          }
 928  928  #endif
 929  929  
 930  930          if (fsp->fs_bsize > MAXBSIZE || fsp->fs_frag > MAXFRAG ||
 931  931              fsp->fs_bsize < sizeof (struct fs) || fsp->fs_bsize < PAGESIZE) {
 932  932                  error = EINVAL; /* also needs translation */
 933  933                  goto out;
 934  934          }
 935  935  
 936  936          /*
 937  937           * Allocate VFS private data.
 938  938           */
 939  939          vfsp->vfs_bcount = 0;
 940  940          vfsp->vfs_data = (caddr_t)ufsvfsp;
 941  941          vfsp->vfs_fstype = ufsfstype;
 942  942          vfsp->vfs_dev = dev;
 943  943          vfsp->vfs_flag |= VFS_NOTRUNC;
 944  944          vfs_make_fsid(&vfsp->vfs_fsid, dev, ufsfstype);
 945  945          ufsvfsp->vfs_devvp = devvp;
 946  946  
 947  947          /*
 948  948           * Cross-link with vfs and add to instance list.
 949  949           */
 950  950          ufsvfsp->vfs_vfs = vfsp;
 951  951          ufs_vfs_add(ufsvfsp);
 952  952  
 953  953          ufsvfsp->vfs_dev = dev;
 954  954          ufsvfsp->vfs_bufp = tp;
 955  955  
 956  956          ufsvfsp->vfs_dirsize = INODESIZE + (4 * ALLOCSIZE) + fsp->fs_fsize;
 957  957          ufsvfsp->vfs_minfrags =
 958  958              (int)((int64_t)fsp->fs_dsize * fsp->fs_minfree / 100);
 959  959          /*
 960  960           * if mount allows largefiles, indicate so in ufsvfs
 961  961           */
 962  962          if (flags & UFSMNT_LARGEFILES)
 963  963                  ufsvfsp->vfs_lfflags |= UFS_LARGEFILES;
 964  964          /*
 965  965           * Initialize threads
 966  966           */
 967  967          ufs_delete_init(ufsvfsp, 1);
 968  968          ufs_thread_init(&ufsvfsp->vfs_reclaim, 0);
 969  969  
 970  970          /*
 971  971           * Chicken and egg problem. The superblock may have deltas
 972  972           * in the log.  So after the log is scanned we reread the
 973  973           * superblock. We guarantee that the fields needed to
 974  974           * scan the log will not be in the log.
 975  975           */
 976  976          if (fsp->fs_logbno && fsp->fs_clean == FSLOG &&
 977  977              (fsp->fs_state + fsp->fs_time == FSOKAY)) {
 978  978                  error = lufs_snarf(ufsvfsp, fsp, (vfsp->vfs_flag & VFS_RDONLY));
 979  979                  if (error) {
 980  980                          /*
 981  981                           * Allow a ro mount to continue even if the
 982  982                           * log cannot be processed - yet.
 983  983                           */
 984  984                          if (!(vfsp->vfs_flag & VFS_RDONLY)) {
 985  985                                  cmn_err(CE_WARN, "Error accessing ufs "
 986  986                                      "log for %s; Please run fsck(1M)", path);
 987  987                                  goto out;
 988  988                          }
 989  989                  }
 990  990                  tp->b_flags |= (B_AGE | B_STALE);
 991  991                  brelse(tp);
 992  992                  tp = UFS_BREAD(ufsvfsp, dev, SBLOCK, SBSIZE);
 993  993                  fsp = (struct fs *)tp->b_un.b_addr;
 994  994                  ufsvfsp->vfs_bufp = tp;
 995  995                  if (tp->b_flags & B_ERROR)
 996  996                          goto out;
 997  997          }
 998  998  
 999  999          /*
1000 1000           * Set logging mounted flag used by lockfs
1001 1001           */
1002 1002          ufsvfsp->vfs_validfs = UT_MOUNTED;
1003 1003  
1004 1004          /*
1005 1005           * Copy the super block into a buffer in its native size.
1006 1006           * Use ngeteblk to allocate the buffer
1007 1007           */
1008 1008          bp = ngeteblk(fsp->fs_bsize);
1009 1009          ufsvfsp->vfs_bufp = bp;
1010 1010          bp->b_edev = dev;
1011 1011          bp->b_dev = cmpdev(dev);
1012 1012          bp->b_blkno = SBLOCK;
1013 1013          bp->b_bcount = fsp->fs_sbsize;
1014 1014          bcopy(tp->b_un.b_addr, bp->b_un.b_addr, fsp->fs_sbsize);
1015 1015          tp->b_flags |= B_STALE | B_AGE;
1016 1016          brelse(tp);
1017 1017          tp = 0;
1018 1018  
1019 1019          fsp = (struct fs *)bp->b_un.b_addr;
1020 1020          /*
1021 1021           * Mount fails if superblock flag indicates presence of large
1022 1022           * files and filesystem is attempted to be mounted 'nolargefiles'.
1023 1023           * The exception is for a read only mount of root, which we
1024 1024           * always want to succeed, so fsck can fix potential problems.
1025 1025           * The assumption is that we will remount root at some point,
1026 1026           * and the remount will enforce the mount option.
1027 1027           */
1028 1028          if (!(isroot & (vfsp->vfs_flag & VFS_RDONLY)) &&
1029 1029              (fsp->fs_flags & FSLARGEFILES) &&
1030 1030              !(flags & UFSMNT_LARGEFILES)) {
1031 1031                  error = EFBIG;
1032 1032                  goto out;
1033 1033          }
1034 1034  
1035 1035          if (vfsp->vfs_flag & VFS_RDONLY) {
1036 1036                  fsp->fs_ronly = 1;
1037 1037                  fsp->fs_fmod = 0;
1038 1038                  if (((fsp->fs_state + fsp->fs_time) == FSOKAY) &&
1039 1039                      ((fsp->fs_clean == FSCLEAN) ||
1040 1040                      (fsp->fs_clean == FSSTABLE) ||
1041 1041                      (fsp->fs_clean == FSLOG))) {
1042 1042                          if (isroot) {
1043 1043                                  if (fsp->fs_clean == FSLOG) {
1044 1044                                          if (fsp->fs_rolled == FS_ALL_ROLLED) {
1045 1045                                                  ufs_clean_root = 1;
1046 1046                                          }
1047 1047                                  } else {
1048 1048                                          ufs_clean_root = 1;
1049 1049                                  }
1050 1050                          }
1051 1051                          fsp->fs_clean = FSSTABLE;
1052 1052                  } else {
1053 1053                          fsp->fs_clean = FSBAD;
1054 1054                  }
1055 1055          } else {
1056 1056  
1057 1057                  fsp->fs_fmod = 0;
1058 1058                  fsp->fs_ronly = 0;
1059 1059  
1060 1060                  TRANS_DOMATAMAP(ufsvfsp);
1061 1061  
1062 1062                  if ((TRANS_ISERROR(ufsvfsp)) ||
1063 1063                      (((fsp->fs_state + fsp->fs_time) == FSOKAY) &&
1064 1064                      fsp->fs_clean == FSLOG && !TRANS_ISTRANS(ufsvfsp))) {
1065 1065                          ufsvfsp->vfs_log = NULL;
1066 1066                          ufsvfsp->vfs_domatamap = 0;
1067 1067                          error = ENOSPC;
1068 1068                          goto out;
1069 1069                  }
1070 1070  
1071 1071                  if (((fsp->fs_state + fsp->fs_time) == FSOKAY) &&
1072 1072                      (fsp->fs_clean == FSCLEAN ||
1073 1073                      fsp->fs_clean == FSSTABLE ||
1074 1074                      fsp->fs_clean == FSLOG))
1075 1075                          fsp->fs_clean = FSSTABLE;
1076 1076                  else {
1077 1077                          if (isroot) {
1078 1078                                  /*
1079 1079                                   * allow root partition to be mounted even
1080 1080                                   * when fs_state is not ok
1081 1081                                   * will be fixed later by a remount root
1082 1082                                   */
1083 1083                                  fsp->fs_clean = FSBAD;
1084 1084                                  ufsvfsp->vfs_log = NULL;
1085 1085                                  ufsvfsp->vfs_domatamap = 0;
1086 1086                          } else {
1087 1087                                  error = ENOSPC;
1088 1088                                  goto out;
1089 1089                          }
1090 1090                  }
1091 1091  
1092 1092                  if (fsp->fs_clean == FSSTABLE && TRANS_ISTRANS(ufsvfsp))
1093 1093                          fsp->fs_clean = FSLOG;
1094 1094          }
1095 1095          TRANS_MATA_MOUNT(ufsvfsp);
1096 1096          needtrans = 1;
1097 1097  
1098 1098          vfsp->vfs_bsize = fsp->fs_bsize;
1099 1099  
1100 1100          /*
1101 1101           * Read in summary info
1102 1102           */
1103 1103          if (error = ufs_getsummaryinfo(dev, ufsvfsp, fsp))
1104 1104                  goto out;
1105 1105  
1106 1106          /*
1107 1107           * lastwhinetime is set to zero rather than lbolt, so that after
1108 1108           * mounting if the filesystem is found to be full, then immediately the
1109 1109           * "file system message" will be logged.
1110 1110           */
1111 1111          ufsvfsp->vfs_lastwhinetime = 0L;
1112 1112  
1113 1113  
1114 1114          mutex_init(&ufsvfsp->vfs_lock, NULL, MUTEX_DEFAULT, NULL);
1115 1115          (void) copystr(path, fsp->fs_fsmnt, sizeof (fsp->fs_fsmnt) - 1, &len);
1116 1116          bzero(fsp->fs_fsmnt + len, sizeof (fsp->fs_fsmnt) - len);
1117 1117  
1118 1118          /*
1119 1119           * Sanity checks for old file systems
1120 1120           */
1121 1121          if (fsp->fs_postblformat == FS_42POSTBLFMT)
1122 1122                  ufsvfsp->vfs_nrpos = 8;
1123 1123          else
1124 1124                  ufsvfsp->vfs_nrpos = fsp->fs_nrpos;
1125 1125  
1126 1126          /*
1127 1127           * Initialize lockfs structure to support file system locking
1128 1128           */
1129 1129          bzero(&ufsvfsp->vfs_ulockfs.ul_lockfs,
1130 1130              sizeof (struct lockfs));
1131 1131          ufsvfsp->vfs_ulockfs.ul_fs_lock = ULOCKFS_ULOCK;
1132 1132          mutex_init(&ufsvfsp->vfs_ulockfs.ul_lock, NULL,
1133 1133              MUTEX_DEFAULT, NULL);
1134 1134          cv_init(&ufsvfsp->vfs_ulockfs.ul_cv, NULL, CV_DEFAULT, NULL);
1135 1135  
1136 1136          /*
1137 1137           * We don't need to grab vfs_dqrwlock for this ufs_iget() call.
1138 1138           * We are in the process of mounting the file system so there
1139 1139           * is no need to grab the quota lock. If a quota applies to the
1140 1140           * root inode, then it will be updated when quotas are enabled.
1141 1141           *
1142 1142           * However, we have an ASSERT(RW_LOCK_HELD(&ufsvfsp->vfs_dqrwlock))
1143 1143           * in getinoquota() that we want to keep so grab it anyway.
1144 1144           */
1145 1145          rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
1146 1146  
1147 1147          error = ufs_iget_alloced(vfsp, UFSROOTINO, &rip, cr);
1148 1148  
1149 1149          rw_exit(&ufsvfsp->vfs_dqrwlock);
1150 1150  
1151 1151          if (error)
1152 1152                  goto out;
1153 1153  
1154 1154          /*
1155 1155           * make sure root inode is a directory.  Returning ENOTDIR might
1156 1156           * be confused with the mount point not being a directory, so
1157 1157           * we use EIO instead.
1158 1158           */
1159 1159          if ((rip->i_mode & IFMT) != IFDIR) {
1160 1160                  /*
1161 1161                   * Mark this inode as subject for cleanup
1162 1162                   * to avoid stray inodes in the cache.
1163 1163                   */
1164 1164                  rvp = ITOV(rip);
1165 1165                  error = EIO;
1166 1166                  goto out;
1167 1167          }
1168 1168  
1169 1169          rvp = ITOV(rip);
1170 1170          mutex_enter(&rvp->v_lock);
1171 1171          rvp->v_flag |= VROOT;
1172 1172          mutex_exit(&rvp->v_lock);
1173 1173          ufsvfsp->vfs_root = rvp;
1174 1174          /* The buffer for the root inode does not contain a valid b_vp */
1175 1175          (void) bfinval(dev, 0);
1176 1176  
1177 1177          /* options */
1178 1178          ufsvfsp->vfs_nosetsec = flags & UFSMNT_NOSETSEC;
1179 1179          ufsvfsp->vfs_nointr  = flags & UFSMNT_NOINTR;
1180 1180          ufsvfsp->vfs_syncdir = flags & UFSMNT_SYNCDIR;
1181 1181          ufsvfsp->vfs_noatime = flags & UFSMNT_NOATIME;
1182 1182          if ((flags & UFSMNT_NODFRATIME) || ufsvfsp->vfs_noatime)
1183 1183                  ufsvfsp->vfs_dfritime &= ~UFS_DFRATIME;
1184 1184          else    /* dfratime, default behavior */
1185 1185                  ufsvfsp->vfs_dfritime |= UFS_DFRATIME;
1186 1186          if (flags & UFSMNT_FORCEDIRECTIO)
1187 1187                  ufsvfsp->vfs_forcedirectio = 1;
1188 1188          else if (flags & UFSMNT_NOFORCEDIRECTIO)
1189 1189                  ufsvfsp->vfs_forcedirectio = 0;
1190 1190          ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
1191 1191  
1192 1192          ufsvfsp->vfs_nindiroffset = fsp->fs_nindir - 1;
1193 1193          ufsvfsp->vfs_nindirshift = highbit(ufsvfsp->vfs_nindiroffset);
1194 1194          ufsvfsp->vfs_ioclustsz = fsp->fs_bsize * fsp->fs_maxcontig;
1195 1195  
1196 1196          if (cdev_ioctl(dev, DKIOCINFO, (intptr_t)&ci,
1197 1197              FKIOCTL|FNATIVE|FREAD, CRED(), &status) == 0) {
1198 1198                  ufsvfsp->vfs_iotransz = ci.dki_maxtransfer * DEV_BSIZE;
1199 1199          } else {
1200 1200                  ufsvfsp->vfs_iotransz = MIN(maxphys, ufs_maxmaxphys);
1201 1201          }
1202 1202  
1203 1203          if (ufsvfsp->vfs_iotransz <= 0) {
1204 1204                  ufsvfsp->vfs_iotransz = MIN(maxphys, ufs_maxmaxphys);
1205 1205          }
1206 1206  
1207 1207          /*
1208 1208           * When logging, used to reserve log space for writes and truncs
1209 1209           */
1210 1210          ufsvfsp->vfs_avgbfree = fsp->fs_cstotal.cs_nbfree / fsp->fs_ncg;
1211 1211  
1212 1212          /*
1213 1213           * Determine whether to log cylinder group summary info.
1214 1214           */
1215 1215          ufsvfsp->vfs_nolog_si = (fsp->fs_ncg < ufs_ncg_log);
1216 1216  
1217 1217          if (TRANS_ISTRANS(ufsvfsp)) {
1218 1218                  /*
1219 1219                   * start the delete thread
1220 1220                   */
1221 1221                  ufs_thread_start(&ufsvfsp->vfs_delete, ufs_thread_delete, vfsp);
1222 1222  
1223 1223                  /*
1224 1224                   * start reclaim thread if the filesystem was not mounted
1225 1225                   * read only.
1226 1226                   */
1227 1227                  if (!fsp->fs_ronly && (fsp->fs_reclaim &
1228 1228                      (FS_RECLAIM|FS_RECLAIMING))) {
1229 1229                          fsp->fs_reclaim &= ~FS_RECLAIM;
1230 1230                          fsp->fs_reclaim |=  FS_RECLAIMING;
1231 1231                          ufs_thread_start(&ufsvfsp->vfs_reclaim,
1232 1232                              ufs_thread_reclaim, vfsp);
1233 1233                  }
1234 1234  
1235 1235                  /* Mark the fs as unrolled */
1236 1236                  fsp->fs_rolled = FS_NEED_ROLL;
1237 1237          } else if (!fsp->fs_ronly && (fsp->fs_reclaim &
1238 1238              (FS_RECLAIM|FS_RECLAIMING))) {
1239 1239                  /*
1240 1240                   * If a file system that is mounted nologging, after
1241 1241                   * having previously been mounted logging, becomes
1242 1242                   * unmounted whilst the reclaim thread is in the throes
1243 1243                   * of reclaiming open/deleted inodes, a subsequent mount
1244 1244                   * of such a file system with logging disabled could lead
1245 1245                   * to inodes becoming lost.  So, start reclaim now, even
1246 1246                   * though logging was disabled for the previous mount, to
1247 1247                   * tidy things up.
1248 1248                   */
1249 1249                  fsp->fs_reclaim &= ~FS_RECLAIM;
1250 1250                  fsp->fs_reclaim |=  FS_RECLAIMING;
1251 1251                  ufs_thread_start(&ufsvfsp->vfs_reclaim,
1252 1252                      ufs_thread_reclaim, vfsp);
1253 1253          }
1254 1254  
1255 1255          if (!fsp->fs_ronly) {
1256 1256                  TRANS_SBWRITE(ufsvfsp, TOP_MOUNT);
1257 1257                  if (error = geterror(ufsvfsp->vfs_bufp))
1258 1258                          goto out;
1259 1259          }
1260 1260  
1261 1261          /* fix-on-panic initialization */
1262 1262          if (isroot && !(flags & UFSMNT_ONERROR_FLGMASK))
1263 1263                  flags |= UFSMNT_ONERROR_PANIC;  /* XXX ..._RDONLY */
1264 1264  
1265 1265          if ((error = ufsfx_mount(ufsvfsp, flags)) != 0)
1266 1266                  goto out;
1267 1267  
1268 1268          if (why == ROOT_INIT && isroot)
1269 1269                  rootvp = devvp;
1270 1270  
1271 1271          return (0);
1272 1272  out:
1273 1273          if (error == 0)
1274 1274                  error = EIO;
1275 1275          if (rvp) {
1276 1276                  /* the following sequence is similar to ufs_unmount() */
1277 1277  
1278 1278                  /*
1279 1279                   * There's a problem that ufs_iget() puts inodes into
1280 1280                   * the inode cache before it returns them.  If someone
1281 1281                   * traverses that cache and gets a reference to our
1282 1282                   * inode, there's a chance they'll still be using it
1283 1283                   * after we've destroyed it.  This is a hard race to
1284 1284                   * hit, but it's happened (putting in a medium delay
1285 1285                   * here, and a large delay in ufs_scan_inodes() for
1286 1286                   * inodes on the device we're bailing out on, makes
1287 1287                   * the race easy to demonstrate).  The symptom is some
1288 1288                   * other part of UFS faulting on bad inode contents,
1289 1289                   * or when grabbing one of the locks inside the inode,
1290 1290                   * etc.  The usual victim is ufs_scan_inodes() or
1291 1291                   * someone called by it.
1292 1292                   */
1293 1293  
1294 1294                  /*
1295 1295                   * First, isolate it so that no new references can be
1296 1296                   * gotten via the inode cache.
1297 1297                   */
1298 1298                  ihm = &ih_lock[INOHASH(UFSROOTINO)];
1299 1299                  mutex_enter(ihm);
1300 1300                  remque(rip);
1301 1301                  mutex_exit(ihm);
1302 1302  
1303 1303                  /*
1304 1304                   * Now wait for all outstanding references except our
1305 1305                   * own to drain.  This could, in theory, take forever,
1306 1306                   * so don't wait *too* long.  If we time out, mark
1307 1307                   * it stale and leak it, so we don't hit the problem
1308 1308                   * described above.
1309 1309                   *
1310 1310                   * Note that v_count is an int, which means we can read
1311 1311                   * it in one operation.  Thus, there's no need to lock
1312 1312                   * around our tests.
1313 1313                   */
1314 1314                  elapsed = 0;
1315 1315                  while ((rvp->v_count > 1) && (elapsed < ufs_mount_timeout)) {
1316 1316                          delay(ufs_mount_error_delay * drv_usectohz(1000));
1317 1317                          elapsed += ufs_mount_error_delay;
1318 1318                  }
1319 1319  
1320 1320                  if (rvp->v_count > 1) {
1321 1321                          mutex_enter(&rip->i_tlock);
1322 1322                          rip->i_flag |= ISTALE;
1323 1323                          mutex_exit(&rip->i_tlock);
1324 1324                          cmn_err(CE_WARN,
1325 1325                              "Timed out while cleaning up after "
1326 1326                              "failed mount of %s", path);
1327 1327                  } else {
1328 1328  
1329 1329                          /*
1330 1330                           * Now we're the only one with a handle left, so tear
1331 1331                           * it down the rest of the way.
1332 1332                           */
1333 1333                          if (ufs_rmidle(rip))
1334 1334                                  VN_RELE(rvp);
1335 1335                          ufs_si_del(rip);
1336 1336                          rip->i_ufsvfs = NULL;
1337 1337                          rvp->v_vfsp = NULL;
1338 1338                          rvp->v_type = VBAD;
1339 1339                          VN_RELE(rvp);
1340 1340                  }
1341 1341          }
1342 1342          if (needtrans) {
1343 1343                  TRANS_MATA_UMOUNT(ufsvfsp);
1344 1344          }
1345 1345          if (ufsvfsp) {
1346 1346                  ufs_vfs_remove(ufsvfsp);
1347 1347                  ufs_thread_exit(&ufsvfsp->vfs_delete);
1348 1348                  ufs_thread_exit(&ufsvfsp->vfs_reclaim);
1349 1349                  mutex_destroy(&ufsvfsp->vfs_lock);
1350 1350                  if (ufsvfsp->vfs_log) {
1351 1351                          lufs_unsnarf(ufsvfsp);
1352 1352                  }
1353 1353                  kmem_free(ufsvfsp, sizeof (struct ufsvfs));
1354 1354          }
1355 1355          if (bp) {
1356 1356                  bp->b_flags |= (B_STALE|B_AGE);
1357 1357                  brelse(bp);
1358 1358          }
1359 1359          if (tp) {
1360 1360                  tp->b_flags |= (B_STALE|B_AGE);
1361 1361                  brelse(tp);
1362 1362          }
1363 1363          if (needclose) {
1364 1364                  (void) VOP_CLOSE(devvp, (vfsp->vfs_flag & VFS_RDONLY) ?
1365 1365                      FREAD : FREAD|FWRITE, 1, (offset_t)0, cr, NULL);
1366 1366                  bflush(dev);
1367 1367                  (void) bfinval(dev, 1);
1368 1368          }
1369 1369          return (error);
1370 1370  }
1371 1371  
1372 1372  /*
1373 1373   * vfs operations
1374 1374   */
1375 1375  static int
1376 1376  ufs_unmount(struct vfs *vfsp, int fflag, struct cred *cr)
1377 1377  {
1378 1378          dev_t           dev             = vfsp->vfs_dev;
1379 1379          struct ufsvfs   *ufsvfsp        = (struct ufsvfs *)vfsp->vfs_data;
1380 1380          struct fs       *fs             = ufsvfsp->vfs_fs;
1381 1381          struct ulockfs  *ulp            = &ufsvfsp->vfs_ulockfs;
1382 1382          struct vnode    *bvp, *vp;
1383 1383          struct buf      *bp;
1384 1384          struct inode    *ip, *inext, *rip;
1385 1385          union ihead     *ih;
1386 1386          int             error, flag, i;
1387 1387          struct lockfs   lockfs;
1388 1388          int             poll_events = POLLPRI;
1389 1389          extern struct pollhead ufs_pollhd;
1390 1390          refstr_t        *mountpoint;
1391 1391  
1392 1392          ASSERT(vfs_lock_held(vfsp));
1393 1393  
1394 1394          if (secpolicy_fs_unmount(cr, vfsp) != 0)
1395 1395                  return (EPERM);
1396 1396          /*
1397 1397           * Forced unmount is now supported through the
1398 1398           * lockfs protocol.
1399 1399           */
1400 1400          if (fflag & MS_FORCE) {
1401 1401                  /*
1402 1402                   * Mark the filesystem as being unmounted now in
1403 1403                   * case of a forcible umount before we take any
1404 1404                   * locks inside UFS to prevent racing with a VFS_VGET()
1405 1405                   * request. Throw these VFS_VGET() requests away for
1406 1406                   * the duration of the forcible umount so they won't
1407 1407                   * use stale or even freed data later on when we're done.
1408 1408                   * It may happen that the VFS has had a additional hold
1409 1409                   * placed on it by someone other than UFS and thus will
1410 1410                   * not get freed immediately once we're done with the
1411 1411                   * umount by dounmount() - use VFS_UNMOUNTED to inform
1412 1412                   * users of this still-alive VFS that its corresponding
1413 1413                   * filesystem being gone so they can detect that and error
1414 1414                   * out.
1415 1415                   */
1416 1416                  vfsp->vfs_flag |= VFS_UNMOUNTED;
1417 1417  
1418 1418                  ufs_thread_suspend(&ufsvfsp->vfs_delete);
1419 1419                  mutex_enter(&ulp->ul_lock);
1420 1420                  /*
1421 1421                   * If file system is already hard locked,
1422 1422                   * unmount the file system, otherwise
1423 1423                   * hard lock it before unmounting.
1424 1424                   */
1425 1425                  if (!ULOCKFS_IS_HLOCK(ulp)) {
1426 1426                          atomic_inc_ulong(&ufs_quiesce_pend);
1427 1427                          lockfs.lf_lock = LOCKFS_HLOCK;
1428 1428                          lockfs.lf_flags = 0;
1429 1429                          lockfs.lf_key = ulp->ul_lockfs.lf_key + 1;
1430 1430                          lockfs.lf_comlen = 0;
1431 1431                          lockfs.lf_comment = NULL;
1432 1432                          ufs_freeze(ulp, &lockfs);
1433 1433                          ULOCKFS_SET_BUSY(ulp);
1434 1434                          LOCKFS_SET_BUSY(&ulp->ul_lockfs);
1435 1435                          (void) ufs_quiesce(ulp);
1436 1436                          (void) ufs_flush(vfsp);
1437 1437                          (void) ufs_thaw(vfsp, ufsvfsp, ulp);
1438 1438                          atomic_dec_ulong(&ufs_quiesce_pend);
1439 1439                          ULOCKFS_CLR_BUSY(ulp);
1440 1440                          LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1441 1441                          poll_events |= POLLERR;
1442 1442                          pollwakeup(&ufs_pollhd, poll_events);
1443 1443                  }
1444 1444                  ufs_thread_continue(&ufsvfsp->vfs_delete);
1445 1445                  mutex_exit(&ulp->ul_lock);
1446 1446          }
1447 1447  
1448 1448          /* let all types of writes go through */
1449 1449          ufsvfsp->vfs_iotstamp = ddi_get_lbolt();
1450 1450  
1451 1451          /* coordinate with global hlock thread */
1452 1452          if (TRANS_ISTRANS(ufsvfsp) && (ufsvfsp->vfs_validfs == UT_HLOCKING)) {
1453 1453                  /*
1454 1454                   * last possibility for a forced umount to fail hence clear
1455 1455                   * VFS_UNMOUNTED if appropriate.
1456 1456                   */
1457 1457                  if (fflag & MS_FORCE)
1458 1458                          vfsp->vfs_flag &= ~VFS_UNMOUNTED;
1459 1459                  return (EAGAIN);
1460 1460          }
1461 1461  
1462 1462          ufsvfsp->vfs_validfs = UT_UNMOUNTED;
1463 1463  
1464 1464          /* kill the reclaim thread */
1465 1465          ufs_thread_exit(&ufsvfsp->vfs_reclaim);
1466 1466  
1467 1467          /* suspend the delete thread */
1468 1468          ufs_thread_suspend(&ufsvfsp->vfs_delete);
1469 1469  
1470 1470          /*
1471 1471           * drain the delete and idle queues
1472 1472           */
1473 1473          ufs_delete_drain(vfsp, -1, 1);
1474 1474          ufs_idle_drain(vfsp);
1475 1475  
1476 1476          /*
1477 1477           * use the lockfs protocol to prevent new ops from starting
1478 1478           * a forcible umount can not fail beyond this point as
1479 1479           * we hard-locked the filesystem and drained all current consumers
1480 1480           * before.
1481 1481           */
1482 1482          mutex_enter(&ulp->ul_lock);
1483 1483  
1484 1484          /*
1485 1485           * if the file system is busy; return EBUSY
1486 1486           */
1487 1487          if (ulp->ul_vnops_cnt || ulp->ul_falloc_cnt || ULOCKFS_IS_SLOCK(ulp)) {
1488 1488                  error = EBUSY;
1489 1489                  goto out;
1490 1490          }
1491 1491  
1492 1492          /*
1493 1493           * if this is not a forced unmount (!hard/error locked), then
1494 1494           * get rid of every inode except the root and quota inodes
1495 1495           * also, commit any outstanding transactions
1496 1496           */
1497 1497          if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp))
1498 1498                  if (error = ufs_flush(vfsp))
1499 1499                          goto out;
1500 1500  
1501 1501          /*
1502 1502           * ignore inodes in the cache if fs is hard locked or error locked
1503 1503           */
1504 1504          rip = VTOI(ufsvfsp->vfs_root);
1505 1505          if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp)) {
1506 1506                  /*
1507 1507                   * Otherwise, only the quota and root inodes are in the cache.
1508 1508                   *
1509 1509                   * Avoid racing with ufs_update() and ufs_sync().
1510 1510                   */
1511 1511                  mutex_enter(&ufs_scan_lock);
1512 1512  
1513 1513                  for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
1514 1514                          mutex_enter(&ih_lock[i]);
1515 1515                          for (ip = ih->ih_chain[0];
1516 1516                              ip != (struct inode *)ih;
1517 1517                              ip = ip->i_forw) {
1518 1518                                  if (ip->i_ufsvfs != ufsvfsp)
1519 1519                                          continue;
1520 1520                                  if (ip == ufsvfsp->vfs_qinod)
1521 1521                                          continue;
1522 1522                                  if (ip == rip && ITOV(ip)->v_count == 1)
1523 1523                                          continue;
1524 1524                                  mutex_exit(&ih_lock[i]);
1525 1525                                  mutex_exit(&ufs_scan_lock);
1526 1526                                  error = EBUSY;
1527 1527                                  goto out;
1528 1528                          }
1529 1529                          mutex_exit(&ih_lock[i]);
1530 1530                  }
1531 1531                  mutex_exit(&ufs_scan_lock);
1532 1532          }
1533 1533  
1534 1534          /*
1535 1535           * if a snapshot exists and this is a forced unmount, then delete
1536 1536           * the snapshot.  Otherwise return EBUSY.  This will insure the
1537 1537           * snapshot always belongs to a valid file system.
1538 1538           */
1539 1539          if (ufsvfsp->vfs_snapshot) {
1540 1540                  if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
1541 1541                          (void) fssnap_delete(&ufsvfsp->vfs_snapshot);
1542 1542                  } else {
1543 1543                          error = EBUSY;
1544 1544                          goto out;
1545 1545                  }
1546 1546          }
1547 1547  
1548 1548          /*
1549 1549           * Close the quota file and invalidate anything left in the quota
1550 1550           * cache for this file system.  Pass kcred to allow all quota
1551 1551           * manipulations.
1552 1552           */
1553 1553          (void) closedq(ufsvfsp, kcred);
1554 1554          invalidatedq(ufsvfsp);
1555 1555          /*
1556 1556           * drain the delete and idle queues
1557 1557           */
1558 1558          ufs_delete_drain(vfsp, -1, 0);
1559 1559          ufs_idle_drain(vfsp);
1560 1560  
1561 1561          /*
1562 1562           * discard the inodes for this fs (including root, shadow, and quota)
1563 1563           */
1564 1564          for (i = 0, ih = ihead; i < inohsz; i++, ih++) {
1565 1565                  mutex_enter(&ih_lock[i]);
1566 1566                  for (inext = 0, ip = ih->ih_chain[0];
1567 1567                      ip != (struct inode *)ih;
1568 1568                      ip = inext) {
1569 1569                          inext = ip->i_forw;
1570 1570                          if (ip->i_ufsvfs != ufsvfsp)
1571 1571                                  continue;
1572 1572  
1573 1573                          /*
1574 1574                           * We've found the inode in the cache and as we
1575 1575                           * hold the hash mutex the inode can not
1576 1576                           * disappear from underneath us.
1577 1577                           * We also know it must have at least a vnode
1578 1578                           * reference count of 1.
1579 1579                           * We perform an additional VN_HOLD so the VN_RELE
1580 1580                           * in case we take the inode off the idle queue
1581 1581                           * can not be the last one.
1582 1582                           * It is safe to grab the writer contents lock here
1583 1583                           * to prevent a race with ufs_iinactive() putting
1584 1584                           * inodes into the idle queue while we operate on
1585 1585                           * this inode.
1586 1586                           */
1587 1587                          rw_enter(&ip->i_contents, RW_WRITER);
1588 1588  
1589 1589                          vp = ITOV(ip);
1590 1590                          VN_HOLD(vp)
1591 1591                          remque(ip);
1592 1592                          if (ufs_rmidle(ip))
1593 1593                                  VN_RELE(vp);
1594 1594                          ufs_si_del(ip);
1595 1595                          /*
1596 1596                           * rip->i_ufsvfsp is needed by bflush()
1597 1597                           */
1598 1598                          if (ip != rip)
1599 1599                                  ip->i_ufsvfs = NULL;
1600 1600                          /*
1601 1601                           * Set vnode's vfsops to dummy ops, which return
1602 1602                           * EIO. This is needed to forced unmounts to work
1603 1603                           * with lofs/nfs properly.
1604 1604                           */
1605 1605                          if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp))
1606 1606                                  vp->v_vfsp = &EIO_vfs;
1607 1607                          else
1608 1608                                  vp->v_vfsp = NULL;
1609 1609                          vp->v_type = VBAD;
1610 1610  
1611 1611                          rw_exit(&ip->i_contents);
1612 1612  
1613 1613                          VN_RELE(vp);
1614 1614                  }
1615 1615                  mutex_exit(&ih_lock[i]);
1616 1616          }
1617 1617          ufs_si_cache_flush(dev);
1618 1618  
1619 1619          /*
1620 1620           * kill the delete thread and drain the idle queue
1621 1621           */
1622 1622          ufs_thread_exit(&ufsvfsp->vfs_delete);
1623 1623          ufs_idle_drain(vfsp);
1624 1624  
1625 1625          bp = ufsvfsp->vfs_bufp;
1626 1626          bvp = ufsvfsp->vfs_devvp;
1627 1627          flag = !fs->fs_ronly;
1628 1628          if (flag) {
1629 1629                  bflush(dev);
1630 1630                  if (fs->fs_clean != FSBAD) {
1631 1631                          if (fs->fs_clean == FSSTABLE)
1632 1632                                  fs->fs_clean = FSCLEAN;
1633 1633                          fs->fs_reclaim &= ~FS_RECLAIM;
1634 1634                  }
1635 1635                  if (TRANS_ISTRANS(ufsvfsp) &&
1636 1636                      !TRANS_ISERROR(ufsvfsp) &&
1637 1637                      !ULOCKFS_IS_HLOCK(ulp) &&
1638 1638                      (fs->fs_rolled == FS_NEED_ROLL)) {
1639 1639                          /*
1640 1640                           * ufs_flush() above has flushed the last Moby.
1641 1641                           * This is needed to ensure the following superblock
1642 1642                           * update really is the last metadata update
1643 1643                           */
1644 1644                          error = ufs_putsummaryinfo(dev, ufsvfsp, fs);
1645 1645                          if (error == 0) {
1646 1646                                  fs->fs_rolled = FS_ALL_ROLLED;
1647 1647                          }
1648 1648                  }
1649 1649                  TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_UNMOUNT);
1650 1650                  /*
1651 1651                   * push this last transaction
1652 1652                   */
1653 1653                  curthread->t_flag |= T_DONTBLOCK;
1654 1654                  TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UNMOUNT, TOP_COMMIT_SIZE,
1655 1655                      error);
1656 1656                  if (!error)
1657 1657                          TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UNMOUNT,
1658 1658                              TOP_COMMIT_SIZE);
1659 1659                  curthread->t_flag &= ~T_DONTBLOCK;
1660 1660          }
1661 1661  
1662 1662          TRANS_MATA_UMOUNT(ufsvfsp);
1663 1663          lufs_unsnarf(ufsvfsp);          /* Release the in-memory structs */
1664 1664          ufsfx_unmount(ufsvfsp);         /* fix-on-panic bookkeeping */
1665 1665          kmem_free(fs->fs_u.fs_csp, fs->fs_cssize);
1666 1666  
1667 1667          bp->b_flags |= B_STALE|B_AGE;
1668 1668          ufsvfsp->vfs_bufp = NULL;       /* don't point at freed buf */
1669 1669          brelse(bp);                     /* free the superblock buf */
1670 1670  
1671 1671          (void) VOP_PUTPAGE(common_specvp(bvp), (offset_t)0, (size_t)0,
1672 1672              B_INVAL, cr, NULL);
1673 1673          (void) VOP_CLOSE(bvp, flag, 1, (offset_t)0, cr, NULL);
1674 1674          bflush(dev);
1675 1675          (void) bfinval(dev, 1);
1676 1676          VN_RELE(bvp);
1677 1677  
1678 1678          /*
1679 1679           * It is now safe to NULL out the ufsvfs pointer and discard
1680 1680           * the root inode.
1681 1681           */
1682 1682          rip->i_ufsvfs = NULL;
1683 1683          VN_RELE(ITOV(rip));
1684 1684  
1685 1685          /* free up lockfs comment structure, if any */
1686 1686          if (ulp->ul_lockfs.lf_comlen && ulp->ul_lockfs.lf_comment)
1687 1687                  kmem_free(ulp->ul_lockfs.lf_comment, ulp->ul_lockfs.lf_comlen);
1688 1688  
1689 1689          /*
1690 1690           * Remove from instance list.
1691 1691           */
1692 1692          ufs_vfs_remove(ufsvfsp);
1693 1693  
1694 1694          /*
1695 1695           * For a forcible unmount, threads may be asleep in
1696 1696           * ufs_lockfs_begin/ufs_check_lockfs.  These threads will need
1697 1697           * the ufsvfs structure so we don't free it, yet.  ufs_update
1698 1698           * will free it up after awhile.
1699 1699           */
1700 1700          if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
1701 1701                  extern kmutex_t         ufsvfs_mutex;
1702 1702                  extern struct ufsvfs    *ufsvfslist;
1703 1703  
1704 1704                  mutex_enter(&ufsvfs_mutex);
1705 1705                  ufsvfsp->vfs_dontblock = 1;
1706 1706                  ufsvfsp->vfs_next = ufsvfslist;
1707 1707                  ufsvfslist = ufsvfsp;
1708 1708                  mutex_exit(&ufsvfs_mutex);
1709 1709                  /* wakeup any suspended threads */
1710 1710                  cv_broadcast(&ulp->ul_cv);
1711 1711                  mutex_exit(&ulp->ul_lock);
1712 1712          } else {
1713 1713                  mutex_destroy(&ufsvfsp->vfs_lock);
1714 1714                  kmem_free(ufsvfsp, sizeof (struct ufsvfs));
1715 1715          }
1716 1716  
1717 1717          /*
1718 1718           * Now mark the filesystem as unmounted since we're done with it.
1719 1719           */
1720 1720          vfsp->vfs_flag |= VFS_UNMOUNTED;
1721 1721  
1722 1722          return (0);
1723 1723  out:
1724 1724          /* open the fs to new ops */
1725 1725          cv_broadcast(&ulp->ul_cv);
1726 1726          mutex_exit(&ulp->ul_lock);
1727 1727  
1728 1728          if (TRANS_ISTRANS(ufsvfsp)) {
1729 1729                  /* allow the delete thread to continue */
1730 1730                  ufs_thread_continue(&ufsvfsp->vfs_delete);
1731 1731                  /* restart the reclaim thread */
1732 1732                  ufs_thread_start(&ufsvfsp->vfs_reclaim, ufs_thread_reclaim,
1733 1733                      vfsp);
1734 1734                  /* coordinate with global hlock thread */
1735 1735                  ufsvfsp->vfs_validfs = UT_MOUNTED;
1736 1736                  /* check for trans errors during umount */
1737 1737                  ufs_trans_onerror();
1738 1738  
1739 1739                  /*
1740 1740                   * if we have a separate /usr it will never unmount
1741 1741                   * when halting. In order to not re-read all the
1742 1742                   * cylinder group summary info on mounting after
1743 1743                   * reboot the logging of summary info is re-enabled
1744 1744                   * and the super block written out.
1745 1745                   */
1746 1746                  mountpoint = vfs_getmntpoint(vfsp);
1747 1747                  if ((fs->fs_si == FS_SI_OK) &&
1748 1748                      (strcmp("/usr", refstr_value(mountpoint)) == 0)) {
1749 1749                          ufsvfsp->vfs_nolog_si = 0;
1750 1750                          UFS_BWRITE2(NULL, ufsvfsp->vfs_bufp);
1751 1751                  }
1752 1752                  refstr_rele(mountpoint);
1753 1753          }
1754 1754  
1755 1755          return (error);
1756 1756  }
1757 1757  
1758 1758  static int
1759 1759  ufs_root(struct vfs *vfsp, struct vnode **vpp)
1760 1760  {
1761 1761          struct ufsvfs *ufsvfsp;
1762 1762          struct vnode *vp;
1763 1763  
1764 1764          if (!vfsp)
1765 1765                  return (EIO);
1766 1766  
1767 1767          ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1768 1768          if (!ufsvfsp || !ufsvfsp->vfs_root)
1769 1769                  return (EIO);   /* forced unmount */
1770 1770  
1771 1771          vp = ufsvfsp->vfs_root;
1772 1772          VN_HOLD(vp);
1773 1773          *vpp = vp;
1774 1774          return (0);
1775 1775  }
1776 1776  
1777 1777  /*
1778 1778   * Get file system statistics.
1779 1779   */
1780 1780  static int
1781 1781  ufs_statvfs(struct vfs *vfsp, struct statvfs64 *sp)
1782 1782  {
1783 1783          struct fs *fsp;
1784 1784          struct ufsvfs *ufsvfsp;
1785 1785          int blk, i;
1786 1786          long max_avail, used;
1787 1787          dev32_t d32;
1788 1788  
1789 1789          if (vfsp->vfs_flag & VFS_UNMOUNTED)
1790 1790                  return (EIO);
1791 1791  
1792 1792          ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1793 1793          fsp = ufsvfsp->vfs_fs;
1794 1794          if ((fsp->fs_magic != FS_MAGIC) && (fsp->fs_magic != MTB_UFS_MAGIC))
1795 1795                  return (EINVAL);
1796 1796          if (fsp->fs_magic == FS_MAGIC &&
1797 1797              (fsp->fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 &&
1798 1798              fsp->fs_version != UFS_VERSION_MIN))
1799 1799                  return (EINVAL);
1800 1800          if (fsp->fs_magic == MTB_UFS_MAGIC &&
1801 1801              (fsp->fs_version > MTB_UFS_VERSION_1 ||
1802 1802              fsp->fs_version < MTB_UFS_VERSION_MIN))
1803 1803                  return (EINVAL);
1804 1804  
1805 1805          /*
1806 1806           * get the basic numbers
1807 1807           */
1808 1808          (void) bzero(sp, sizeof (*sp));
1809 1809  
1810 1810          sp->f_bsize = fsp->fs_bsize;
1811 1811          sp->f_frsize = fsp->fs_fsize;
1812 1812          sp->f_blocks = (fsblkcnt64_t)fsp->fs_dsize;
1813 1813          sp->f_bfree = (fsblkcnt64_t)fsp->fs_cstotal.cs_nbfree * fsp->fs_frag +
1814 1814              fsp->fs_cstotal.cs_nffree;
1815 1815  
1816 1816          sp->f_files = (fsfilcnt64_t)fsp->fs_ncg * fsp->fs_ipg;
1817 1817          sp->f_ffree = (fsfilcnt64_t)fsp->fs_cstotal.cs_nifree;
1818 1818  
1819 1819          /*
1820 1820           * Adjust the numbers based on things waiting to be deleted.
1821 1821           * modifies f_bfree and f_ffree.  Afterwards, everything we
1822 1822           * come up with will be self-consistent.  By definition, this
1823 1823           * is a point-in-time snapshot, so the fact that the delete
1824 1824           * thread's probably already invalidated the results is not a
1825 1825           * problem.  Note that if the delete thread is ever extended to
1826 1826           * non-logging ufs, this adjustment must always be made.
1827 1827           */
1828 1828          if (TRANS_ISTRANS(ufsvfsp))
1829 1829                  ufs_delete_adjust_stats(ufsvfsp, sp);
1830 1830  
1831 1831          /*
1832 1832           * avail = MAX(max_avail - used, 0)
1833 1833           */
1834 1834          max_avail = fsp->fs_dsize - ufsvfsp->vfs_minfrags;
1835 1835  
1836 1836          used = (fsp->fs_dsize - sp->f_bfree);
1837 1837  
1838 1838          if (max_avail > used)
1839 1839                  sp->f_bavail = (fsblkcnt64_t)max_avail - used;
1840 1840          else
1841 1841                  sp->f_bavail = (fsblkcnt64_t)0;
1842 1842  
1843 1843          sp->f_favail = sp->f_ffree;
1844 1844          (void) cmpldev(&d32, vfsp->vfs_dev);
1845 1845          sp->f_fsid = d32;
1846 1846          (void) strcpy(sp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
1847 1847          sp->f_flag = vf_to_stf(vfsp->vfs_flag);
1848 1848  
1849 1849          /* keep coordinated with ufs_l_pathconf() */
1850 1850          sp->f_namemax = MAXNAMLEN;
1851 1851  
1852 1852          if (fsp->fs_cpc == 0) {
1853 1853                  bzero(sp->f_fstr, 14);
1854 1854                  return (0);
1855 1855          }
1856 1856          blk = fsp->fs_spc * fsp->fs_cpc / NSPF(fsp);
1857 1857          for (i = 0; i < blk; i += fsp->fs_frag) /* CSTYLED */
1858 1858                  /* void */;
1859 1859          i -= fsp->fs_frag;
1860 1860          blk = i / fsp->fs_frag;
1861 1861          bcopy(&(fs_rotbl(fsp)[blk]), sp->f_fstr, 14);
1862 1862          return (0);
1863 1863  }
1864 1864  
1865 1865  /*
1866 1866   * Flush any pending I/O to file system vfsp.
1867 1867   * The ufs_update() routine will only flush *all* ufs files.
1868 1868   * If vfsp is non-NULL, only sync this ufs (in preparation
1869 1869   * for a umount).
1870 1870   */
1871 1871  /*ARGSUSED*/
1872 1872  static int
1873 1873  ufs_sync(struct vfs *vfsp, short flag, struct cred *cr)
1874 1874  {
1875 1875          struct ufsvfs *ufsvfsp;
1876 1876          struct fs *fs;
1877 1877          int cheap = flag & SYNC_ATTR;
1878 1878          int error;
1879 1879  
1880 1880          /*
1881 1881           * SYNC_CLOSE means we're rebooting.  Toss everything
1882 1882           * on the idle queue so we don't have to slog through
1883 1883           * a bunch of uninteresting inodes over and over again.
1884 1884           */
1885 1885          if (flag & SYNC_CLOSE)
1886 1886                  ufs_idle_drain(NULL);
1887 1887  
1888 1888          if (vfsp == NULL) {
1889 1889                  ufs_update(flag);
1890 1890                  return (0);
1891 1891          }
1892 1892  
1893 1893          /* Flush a single ufs */
1894 1894          if (!vfs_matchops(vfsp, ufs_vfsops) || vfs_lock(vfsp) != 0)
1895 1895                  return (0);
1896 1896  
1897 1897          ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1898 1898          if (!ufsvfsp)
1899 1899                  return (EIO);
1900 1900          fs = ufsvfsp->vfs_fs;
1901 1901          mutex_enter(&ufsvfsp->vfs_lock);
1902 1902  
1903 1903          if (ufsvfsp->vfs_dio &&
1904 1904              fs->fs_ronly == 0 &&
1905 1905              fs->fs_clean != FSBAD &&
1906 1906              fs->fs_clean != FSLOG) {
1907 1907                  /* turn off fast-io on unmount, so no fsck needed (4029401) */
1908 1908                  ufsvfsp->vfs_dio = 0;
1909 1909                  fs->fs_clean = FSACTIVE;
1910 1910                  fs->fs_fmod = 1;
1911 1911          }
1912 1912  
1913 1913          /* Write back modified superblock */
1914 1914          if (fs->fs_fmod == 0) {
1915 1915                  mutex_exit(&ufsvfsp->vfs_lock);
1916 1916          } else {
1917 1917                  if (fs->fs_ronly != 0) {
1918 1918                          mutex_exit(&ufsvfsp->vfs_lock);
1919 1919                          vfs_unlock(vfsp);
1920 1920                          return (ufs_fault(ufsvfsp->vfs_root,
1921 1921                              "fs = %s update: ro fs mod\n", fs->fs_fsmnt));
1922 1922                  }
1923 1923                  fs->fs_fmod = 0;
1924 1924                  mutex_exit(&ufsvfsp->vfs_lock);
1925 1925  
1926 1926                  TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_UPDATE);
1927 1927          }
1928 1928          vfs_unlock(vfsp);
1929 1929  
1930 1930          /*
1931 1931           * Avoid racing with ufs_update() and ufs_unmount().
1932 1932           *
1933 1933           */
1934 1934          mutex_enter(&ufs_scan_lock);
1935 1935  
1936 1936          (void) ufs_scan_inodes(1, ufs_sync_inode,
1937 1937              (void *)(uintptr_t)cheap, ufsvfsp);
1938 1938  
1939 1939          mutex_exit(&ufs_scan_lock);
1940 1940  
1941 1941          bflush((dev_t)vfsp->vfs_dev);
1942 1942  
1943 1943          /*
1944 1944           * commit any outstanding async transactions
1945 1945           */
1946 1946          curthread->t_flag |= T_DONTBLOCK;
1947 1947          TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_UPDATE, TOP_COMMIT_SIZE, error);
1948 1948          if (!error) {
1949 1949                  TRANS_END_SYNC(ufsvfsp, error, TOP_COMMIT_UPDATE,
1950 1950                      TOP_COMMIT_SIZE);
1951 1951          }
1952 1952          curthread->t_flag &= ~T_DONTBLOCK;
1953 1953  
1954 1954          return (0);
1955 1955  }
1956 1956  
1957 1957  
1958 1958  void
1959 1959  sbupdate(struct vfs *vfsp)
1960 1960  {
1961 1961          struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
1962 1962          struct fs *fs = ufsvfsp->vfs_fs;
1963 1963          struct buf *bp;
1964 1964          int blks;
1965 1965          caddr_t space;
1966 1966          int i;
1967 1967          size_t size;
1968 1968  
1969 1969          /*
1970 1970           * for ulockfs processing, limit the superblock writes
1971 1971           */
1972 1972          if ((ufsvfsp->vfs_ulockfs.ul_sbowner) &&
1973 1973              (curthread != ufsvfsp->vfs_ulockfs.ul_sbowner)) {
1974 1974                  /* process later */
1975 1975                  fs->fs_fmod = 1;
1976 1976                  return;
1977 1977          }
1978 1978          ULOCKFS_SET_MOD((&ufsvfsp->vfs_ulockfs));
1979 1979  
1980 1980          if (TRANS_ISTRANS(ufsvfsp)) {
1981 1981                  mutex_enter(&ufsvfsp->vfs_lock);
1982 1982                  ufs_sbwrite(ufsvfsp);
1983 1983                  mutex_exit(&ufsvfsp->vfs_lock);
1984 1984                  return;
1985 1985          }
1986 1986  
1987 1987          blks = howmany(fs->fs_cssize, fs->fs_fsize);
1988 1988          space = (caddr_t)fs->fs_u.fs_csp;
1989 1989          for (i = 0; i < blks; i += fs->fs_frag) {
1990 1990                  size = fs->fs_bsize;
1991 1991                  if (i + fs->fs_frag > blks)
1992 1992                          size = (blks - i) * fs->fs_fsize;
1993 1993                  bp = UFS_GETBLK(ufsvfsp, ufsvfsp->vfs_dev,
1994 1994                      (daddr_t)(fsbtodb(fs, fs->fs_csaddr + i)),
1995 1995                      fs->fs_bsize);
1996 1996                  bcopy(space, bp->b_un.b_addr, size);
1997 1997                  space += size;
1998 1998                  bp->b_bcount = size;
1999 1999                  UFS_BRWRITE(ufsvfsp, bp);
2000 2000          }
2001 2001          mutex_enter(&ufsvfsp->vfs_lock);
2002 2002          ufs_sbwrite(ufsvfsp);
2003 2003          mutex_exit(&ufsvfsp->vfs_lock);
2004 2004  }
2005 2005  
2006 2006  int ufs_vget_idle_count = 2;    /* Number of inodes to idle each time */
2007 2007  static int
2008 2008  ufs_vget(struct vfs *vfsp, struct vnode **vpp, struct fid *fidp)
2009 2009  {
2010 2010          int error = 0;
2011 2011          struct ufid *ufid;
2012 2012          struct inode *ip;
2013 2013          struct ufsvfs *ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
2014 2014          struct ulockfs *ulp;
2015 2015  
2016 2016          /*
2017 2017           * Check for unmounted filesystem.
2018 2018           */
2019 2019          if (vfsp->vfs_flag & VFS_UNMOUNTED) {
2020 2020                  error = EIO;
2021 2021                  goto errout;
2022 2022          }
2023 2023  
2024 2024          /*
2025 2025           * Keep the idle queue from getting too long by
2026 2026           * idling an inode before attempting to allocate another.
2027 2027           *    This operation must be performed before entering
2028 2028           *    lockfs or a transaction.
2029 2029           */
2030 2030          if (ufs_idle_q.uq_ne > ufs_idle_q.uq_hiwat)
2031 2031                  if ((curthread->t_flag & T_DONTBLOCK) == 0) {
2032 2032                          ins.in_vidles.value.ul += ufs_vget_idle_count;
2033 2033                          ufs_idle_some(ufs_vget_idle_count);
2034 2034                  }
2035 2035  
2036 2036          ufid = (struct ufid *)fidp;
2037 2037  
2038 2038          if (error = ufs_lockfs_begin(ufsvfsp, &ulp, ULOCKFS_VGET_MASK))
2039 2039                  goto errout;
2040 2040  
2041 2041          rw_enter(&ufsvfsp->vfs_dqrwlock, RW_READER);
2042 2042  
2043 2043          error = ufs_iget(vfsp, ufid->ufid_ino, &ip, CRED());
2044 2044  
2045 2045          rw_exit(&ufsvfsp->vfs_dqrwlock);
2046 2046  
2047 2047          ufs_lockfs_end(ulp);
2048 2048  
2049 2049          if (error)
2050 2050                  goto errout;
2051 2051  
2052 2052          /*
2053 2053           * Check if the inode has been deleted or freed or is in transient state
2054 2054           * since the last VFS_VGET() request for it, release it and don't return
2055 2055           * it to the caller, presumably NFS, as it's no longer valid.
2056 2056           */
2057 2057          if (ip->i_gen != ufid->ufid_gen || ip->i_mode == 0 ||
2058 2058              (ip->i_nlink <= 0)) {
2059 2059                  VN_RELE(ITOV(ip));
2060 2060                  error = EINVAL;
2061 2061                  goto errout;
2062 2062          }
2063 2063  
2064 2064          *vpp = ITOV(ip);
2065 2065          return (0);
2066 2066  
2067 2067  errout:
2068 2068          *vpp = NULL;
2069 2069          return (error);
2070 2070  }
2071 2071  
2072 2072  static int
2073 2073  ufsinit(int fstype, char *name)
2074 2074  {
2075 2075          static const fs_operation_def_t ufs_vfsops_template[] = {
2076 2076                  VFSNAME_MOUNT,          { .vfs_mount = ufs_mount },
2077 2077                  VFSNAME_UNMOUNT,        { .vfs_unmount = ufs_unmount },
2078 2078                  VFSNAME_ROOT,           { .vfs_root = ufs_root },
2079 2079                  VFSNAME_STATVFS,        { .vfs_statvfs = ufs_statvfs },
2080 2080                  VFSNAME_SYNC,           { .vfs_sync = ufs_sync },
2081 2081                  VFSNAME_VGET,           { .vfs_vget = ufs_vget },
2082 2082                  VFSNAME_MOUNTROOT,      { .vfs_mountroot = ufs_mountroot },
2083 2083                  NULL,                   NULL
2084 2084          };
2085 2085          int error;
2086 2086  
2087 2087          ufsfstype = fstype;
2088 2088  
2089 2089          error = vfs_setfsops(fstype, ufs_vfsops_template, &ufs_vfsops);
2090 2090          if (error != 0) {
2091 2091                  cmn_err(CE_WARN, "ufsinit: bad vfs ops template");
2092 2092                  return (error);
2093 2093          }
2094 2094  
2095 2095          error = vn_make_ops(name, ufs_vnodeops_template, &ufs_vnodeops);
2096 2096          if (error != 0) {
2097 2097                  (void) vfs_freevfsops_by_type(fstype);
2098 2098                  cmn_err(CE_WARN, "ufsinit: bad vnode ops template");
2099 2099                  return (error);
2100 2100          }
2101 2101  
2102 2102          ufs_iinit();
2103 2103          return (0);
2104 2104  }

↓ open down ↓

1295 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX