Print this page
filesystem hook framework (August 19th)


  67 #include <sys/modctl.h>
  68 #include <sys/ddi.h>
  69 #include <sys/pathname.h>
  70 #include <sys/bootconf.h>
  71 #include <sys/dumphdr.h>
  72 #include <sys/dc_ki.h>
  73 #include <sys/poll.h>
  74 #include <sys/sunddi.h>
  75 #include <sys/sysmacros.h>
  76 #include <sys/zone.h>
  77 #include <sys/policy.h>
  78 #include <sys/ctfs.h>
  79 #include <sys/objfs.h>
  80 #include <sys/console.h>
  81 #include <sys/reboot.h>
  82 #include <sys/attr.h>
  83 #include <sys/zio.h>
  84 #include <sys/spa.h>
  85 #include <sys/lofi.h>
  86 #include <sys/bootprops.h>


  87 
  88 #include <vm/page.h>
  89 
  90 #include <fs/fs_subr.h>
  91 /* Private interfaces to create vopstats-related data structures */
  92 extern void             initialize_vopstats(vopstats_t *);
  93 extern vopstats_t       *get_fstype_vopstats(struct vfs *, struct vfssw *);
  94 extern vsk_anchor_t     *get_vskstat_anchor(struct vfs *);
  95 
  96 static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int);
  97 static void vfs_setmntopt_nolock(mntopts_t *, const char *,
  98     const char *, int, int);
  99 static int  vfs_optionisset_nolock(const mntopts_t *, const char *, char **);
 100 static void vfs_freemnttab(struct vfs *);
 101 static void vfs_freeopt(mntopt_t *);
 102 static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *);
 103 static void vfs_swapopttbl(mntopts_t *, mntopts_t *);
 104 static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int);
 105 static void vfs_createopttbl_extend(mntopts_t *, const char *,
 106     const mntopts_t *);


 199         { MNTOPT_NONBMAND,      nonbmand_cancel,        NULL,           0,
 200                 (void *)0 },
 201         { MNTOPT_EXEC,          exec_cancel,            NULL,           0,
 202                 (void *)0 },
 203         { MNTOPT_NOEXEC,        noexec_cancel,          NULL,           0,
 204                 (void *)0 },
 205 };
 206 
 207 const mntopts_t vfs_mntopts = {
 208         sizeof (mntopts) / sizeof (mntopt_t),
 209         (mntopt_t *)&mntopts[0]
 210 };
 211 
 212 /*
 213  * File system operation dispatch functions.
 214  */
 215 
 216 int
 217 fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 218 {
 219         return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
 220 }
 221 
 222 int
 223 fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr)
 224 {
 225         return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr);
 226 }
 227 
 228 int
 229 fsop_root(vfs_t *vfsp, vnode_t **vpp)
 230 {
 231         refstr_t *mntpt;
 232         int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp);
 233         /*
 234          * Make sure this root has a path.  With lofs, it is possible to have
 235          * a NULL mountpoint.
 236          */
 237         if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) {
 238                 mntpt = vfs_getmntpoint(vfsp);
 239                 vn_setpath_str(*vpp, refstr_value(mntpt),
 240                     strlen(refstr_value(mntpt)));
 241                 refstr_rele(mntpt);
 242         }
 243 
 244         return (ret);
 245 }


1768         if ((error == 0) && (copyout_error == 0)) {
1769                 if (!remount) {
1770                         /*
1771                          * Don't call get_vskstat_anchor() while holding
1772                          * locks since it allocates memory and calls
1773                          * VFS_STATVFS().  For NFS, the latter can generate
1774                          * an over-the-wire call.
1775                          */
1776                         vskap = get_vskstat_anchor(vfsp);
1777                         /* Only take the lock if we have something to do */
1778                         if (vskap != NULL) {
1779                                 vfs_lock_wait(vfsp);
1780                                 if (vfsp->vfs_flag & VFS_STATS) {
1781                                         vfsp->vfs_vskap = vskap;
1782                                 }
1783                                 vfs_unlock(vfsp);
1784                         }
1785                 }
1786                 /* Return vfsp to caller. */
1787                 *vfspp = vfsp;

1788         }
1789 errout:
1790         vfs_freeopttbl(&mnt_mntopts);
1791         if (resource != NULL)
1792                 kmem_free(resource, strlen(resource) + 1);
1793         if (mountpt != NULL)
1794                 kmem_free(mountpt, strlen(mountpt) + 1);
1795         /*
1796          * It is possible we errored prior to adding to mount in progress
1797          * table. Must free vnode we acquired with successful lookupname.
1798          */
1799         if (addmip)
1800                 VN_RELE(bvp);
1801         if (delmip)
1802                 vfs_delmip(vfsp);
1803         ASSERT(vswp != NULL);
1804         vfs_unrefvfssw(vswp);
1805         if (inargs != opts)
1806                 kmem_free(inargs, MAX_MNTOPT_STR);
1807         if (copyout_error) {


4218                 VFSNAME_ROOT,           { .error = vfsstray },
4219                 VFSNAME_STATVFS,        { .error = vfsstray },
4220                 VFSNAME_SYNC,           { .vfs_sync = vfsstray_sync },
4221                 VFSNAME_VGET,           { .error = vfsstray },
4222                 VFSNAME_MOUNTROOT,      { .error = vfsstray },
4223                 VFSNAME_FREEVFS,        { .error = vfsstray },
4224                 VFSNAME_VNSTATE,        { .error = vfsstray },
4225                 NULL, NULL
4226         };
4227 
4228         /* Create vfs cache */
4229         vfs_cache = kmem_cache_create("vfs_cache", sizeof (struct vfs),
4230             sizeof (uintptr_t), NULL, NULL, NULL, NULL, NULL, 0);
4231 
4232         /* Initialize the vnode cache (file systems may use it during init). */
4233         vn_create_cache();
4234 
4235         /* Setup event monitor framework */
4236         fem_init();
4237 



4238         /* Initialize the dummy stray file system type. */
4239         error = vfs_setfsops(0, stray_vfsops_template, NULL);
4240 
4241         /* Initialize the dummy EIO file system. */
4242         error = vfs_makefsops(EIO_vfsops_template, &EIO_vfsops);
4243         if (error != 0) {
4244                 cmn_err(CE_WARN, "vfsinit: bad EIO vfs ops template");
4245                 /* Shouldn't happen, but not bad enough to panic */
4246         }
4247 
4248         VFS_INIT(&EIO_vfs, EIO_vfsops, (caddr_t)NULL);
4249 
4250         /*
4251          * Default EIO_vfs.vfs_flag to VFS_UNMOUNTED so a lookup
4252          * on this vfs can immediately notice it's invalid.
4253          */
4254         EIO_vfs.vfs_flag |= VFS_UNMOUNTED;
4255 
4256         /*
4257          * Call the init routines of non-loadable filesystems only.


4302         /*
4303          * One would be tempted to assert that "vfsp->vfs_count == 0".
4304          * The problem is that this gets called out of domount() with
4305          * a partially initialized vfs and a vfs_count of 1.  This is
4306          * also called from vfs_rele() with a vfs_count of 0.  We can't
4307          * call VFS_RELE() from domount() if VFS_MOUNT() hasn't successfully
4308          * returned.  This is because VFS_MOUNT() fully initializes the
4309          * vfs structure and its associated data.  VFS_RELE() will call
4310          * VFS_FREEVFS() which may panic the system if the data structures
4311          * aren't fully initialized from a successful VFS_MOUNT()).
4312          */
4313 
4314         /* If FEM was in use, make sure everything gets cleaned up */
4315         if (vfsp->vfs_femhead) {
4316                 ASSERT(vfsp->vfs_femhead->femh_list == NULL);
4317                 mutex_destroy(&vfsp->vfs_femhead->femh_lock);
4318                 kmem_free(vfsp->vfs_femhead, sizeof (*(vfsp->vfs_femhead)));
4319                 vfsp->vfs_femhead = NULL;
4320         }
4321 









4322         if (vfsp->vfs_implp)
4323                 vfsimpl_teardown(vfsp);
4324         sema_destroy(&vfsp->vfs_reflock);
4325         kmem_cache_free(vfs_cache, vfsp);
4326 }
4327 
4328 /*
4329  * Increments the vfs reference count by one atomically.
4330  */
4331 void
4332 vfs_hold(vfs_t *vfsp)
4333 {
4334         atomic_add_32(&vfsp->vfs_count, 1);
4335         ASSERT(vfsp->vfs_count != 0);
4336 }
4337 
4338 /*
4339  * Decrements the vfs reference count by one atomically. When
4340  * vfs reference count becomes zero, it calls the file system
4341  * specific vfs_freevfs() to free up the resources.
4342  */
4343 void
4344 vfs_rele(vfs_t *vfsp)
4345 {
4346         ASSERT(vfsp->vfs_count != 0);
4347         if (atomic_add_32_nv(&vfsp->vfs_count, -1) == 0) {

4348                 VFS_FREEVFS(vfsp);
4349                 lofi_remove(vfsp);
4350                 if (vfsp->vfs_zone)
4351                         zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref,
4352                             ZONE_REF_VFS);
4353                 vfs_freemnttab(vfsp);
4354                 vfs_free(vfsp);
4355         }
4356 }
4357 
4358 /*
4359  * Generic operations vector support.
4360  *
4361  * This is used to build operations vectors for both the vfs and vnode.
4362  * It's normally called only when a file system is loaded.
4363  *
4364  * There are many possible algorithms for this, including the following:
4365  *
4366  *   (1) scan the list of known operations; for each, see if the file system
4367  *       includes an entry for it, and fill it in as appropriate.




  67 #include <sys/modctl.h>
  68 #include <sys/ddi.h>
  69 #include <sys/pathname.h>
  70 #include <sys/bootconf.h>
  71 #include <sys/dumphdr.h>
  72 #include <sys/dc_ki.h>
  73 #include <sys/poll.h>
  74 #include <sys/sunddi.h>
  75 #include <sys/sysmacros.h>
  76 #include <sys/zone.h>
  77 #include <sys/policy.h>
  78 #include <sys/ctfs.h>
  79 #include <sys/objfs.h>
  80 #include <sys/console.h>
  81 #include <sys/reboot.h>
  82 #include <sys/attr.h>
  83 #include <sys/zio.h>
  84 #include <sys/spa.h>
  85 #include <sys/lofi.h>
  86 #include <sys/bootprops.h>
  87 #include <sys/fsh.h>
  88 #include <sys/fsh_impl.h>
  89 
  90 #include <vm/page.h>
  91 
  92 #include <fs/fs_subr.h>
  93 /* Private interfaces to create vopstats-related data structures */
  94 extern void             initialize_vopstats(vopstats_t *);
  95 extern vopstats_t       *get_fstype_vopstats(struct vfs *, struct vfssw *);
  96 extern vsk_anchor_t     *get_vskstat_anchor(struct vfs *);
  97 
  98 static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int);
  99 static void vfs_setmntopt_nolock(mntopts_t *, const char *,
 100     const char *, int, int);
 101 static int  vfs_optionisset_nolock(const mntopts_t *, const char *, char **);
 102 static void vfs_freemnttab(struct vfs *);
 103 static void vfs_freeopt(mntopt_t *);
 104 static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *);
 105 static void vfs_swapopttbl(mntopts_t *, mntopts_t *);
 106 static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int);
 107 static void vfs_createopttbl_extend(mntopts_t *, const char *,
 108     const mntopts_t *);


 201         { MNTOPT_NONBMAND,      nonbmand_cancel,        NULL,           0,
 202                 (void *)0 },
 203         { MNTOPT_EXEC,          exec_cancel,            NULL,           0,
 204                 (void *)0 },
 205         { MNTOPT_NOEXEC,        noexec_cancel,          NULL,           0,
 206                 (void *)0 },
 207 };
 208 
 209 const mntopts_t vfs_mntopts = {
 210         sizeof (mntopts) / sizeof (mntopt_t),
 211         (mntopt_t *)&mntopts[0]
 212 };
 213 
 214 /*
 215  * File system operation dispatch functions.
 216  */
 217 
 218 int
 219 fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
 220 {
 221         return (fsh_mount(vfsp, mvp, uap, cr));
 222 }
 223 
 224 int
 225 fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr)
 226 {
 227         return (fsh_unmount(vfsp, flag, cr));
 228 }
 229 
 230 int
 231 fsop_root(vfs_t *vfsp, vnode_t **vpp)
 232 {
 233         refstr_t *mntpt;
 234         int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp);
 235         /*
 236          * Make sure this root has a path.  With lofs, it is possible to have
 237          * a NULL mountpoint.
 238          */
 239         if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) {
 240                 mntpt = vfs_getmntpoint(vfsp);
 241                 vn_setpath_str(*vpp, refstr_value(mntpt),
 242                     strlen(refstr_value(mntpt)));
 243                 refstr_rele(mntpt);
 244         }
 245 
 246         return (ret);
 247 }


1770         if ((error == 0) && (copyout_error == 0)) {
1771                 if (!remount) {
1772                         /*
1773                          * Don't call get_vskstat_anchor() while holding
1774                          * locks since it allocates memory and calls
1775                          * VFS_STATVFS().  For NFS, the latter can generate
1776                          * an over-the-wire call.
1777                          */
1778                         vskap = get_vskstat_anchor(vfsp);
1779                         /* Only take the lock if we have something to do */
1780                         if (vskap != NULL) {
1781                                 vfs_lock_wait(vfsp);
1782                                 if (vfsp->vfs_flag & VFS_STATS) {
1783                                         vfsp->vfs_vskap = vskap;
1784                                 }
1785                                 vfs_unlock(vfsp);
1786                         }
1787                 }
1788                 /* Return vfsp to caller. */
1789                 *vfspp = vfsp;
1790                 fsh_exec_mount_callbacks(vfsp);
1791         }
1792 errout:
1793         vfs_freeopttbl(&mnt_mntopts);
1794         if (resource != NULL)
1795                 kmem_free(resource, strlen(resource) + 1);
1796         if (mountpt != NULL)
1797                 kmem_free(mountpt, strlen(mountpt) + 1);
1798         /*
1799          * It is possible we errored prior to adding to mount in progress
1800          * table. Must free vnode we acquired with successful lookupname.
1801          */
1802         if (addmip)
1803                 VN_RELE(bvp);
1804         if (delmip)
1805                 vfs_delmip(vfsp);
1806         ASSERT(vswp != NULL);
1807         vfs_unrefvfssw(vswp);
1808         if (inargs != opts)
1809                 kmem_free(inargs, MAX_MNTOPT_STR);
1810         if (copyout_error) {


4221                 VFSNAME_ROOT,           { .error = vfsstray },
4222                 VFSNAME_STATVFS,        { .error = vfsstray },
4223                 VFSNAME_SYNC,           { .vfs_sync = vfsstray_sync },
4224                 VFSNAME_VGET,           { .error = vfsstray },
4225                 VFSNAME_MOUNTROOT,      { .error = vfsstray },
4226                 VFSNAME_FREEVFS,        { .error = vfsstray },
4227                 VFSNAME_VNSTATE,        { .error = vfsstray },
4228                 NULL, NULL
4229         };
4230 
4231         /* Create vfs cache */
4232         vfs_cache = kmem_cache_create("vfs_cache", sizeof (struct vfs),
4233             sizeof (uintptr_t), NULL, NULL, NULL, NULL, NULL, 0);
4234 
4235         /* Initialize the vnode cache (file systems may use it during init). */
4236         vn_create_cache();
4237 
4238         /* Setup event monitor framework */
4239         fem_init();
4240 
4241         /* Setup filesystem hook framework */
4242         fsh_init();
4243 
4244         /* Initialize the dummy stray file system type. */
4245         error = vfs_setfsops(0, stray_vfsops_template, NULL);
4246 
4247         /* Initialize the dummy EIO file system. */
4248         error = vfs_makefsops(EIO_vfsops_template, &EIO_vfsops);
4249         if (error != 0) {
4250                 cmn_err(CE_WARN, "vfsinit: bad EIO vfs ops template");
4251                 /* Shouldn't happen, but not bad enough to panic */
4252         }
4253 
4254         VFS_INIT(&EIO_vfs, EIO_vfsops, (caddr_t)NULL);
4255 
4256         /*
4257          * Default EIO_vfs.vfs_flag to VFS_UNMOUNTED so a lookup
4258          * on this vfs can immediately notice it's invalid.
4259          */
4260         EIO_vfs.vfs_flag |= VFS_UNMOUNTED;
4261 
4262         /*
4263          * Call the init routines of non-loadable filesystems only.


4308         /*
4309          * One would be tempted to assert that "vfsp->vfs_count == 0".
4310          * The problem is that this gets called out of domount() with
4311          * a partially initialized vfs and a vfs_count of 1.  This is
4312          * also called from vfs_rele() with a vfs_count of 0.  We can't
4313          * call VFS_RELE() from domount() if VFS_MOUNT() hasn't successfully
4314          * returned.  This is because VFS_MOUNT() fully initializes the
4315          * vfs structure and its associated data.  VFS_RELE() will call
4316          * VFS_FREEVFS() which may panic the system if the data structures
4317          * aren't fully initialized from a successful VFS_MOUNT()).
4318          */
4319 
4320         /* If FEM was in use, make sure everything gets cleaned up */
4321         if (vfsp->vfs_femhead) {
4322                 ASSERT(vfsp->vfs_femhead->femh_list == NULL);
4323                 mutex_destroy(&vfsp->vfs_femhead->femh_lock);
4324                 kmem_free(vfsp->vfs_femhead, sizeof (*(vfsp->vfs_femhead)));
4325                 vfsp->vfs_femhead = NULL;
4326         }
4327 
4328         /*
4329          * fsh cleanup
4330          * There's no need here to use atomic operations on vfs_fshrecord.
4331          */
4332         if (vfsp->vfs_fshrecord != NULL) {
4333                 fsh_fsrec_destroy(vfsp->vfs_fshrecord);
4334                 vfsp->vfs_fshrecord = NULL;
4335         }
4336 
4337         if (vfsp->vfs_implp)
4338                 vfsimpl_teardown(vfsp);
4339         sema_destroy(&vfsp->vfs_reflock);
4340         kmem_cache_free(vfs_cache, vfsp);
4341 }
4342 
4343 /*
4344  * Increments the vfs reference count by one atomically.
4345  */
4346 void
4347 vfs_hold(vfs_t *vfsp)
4348 {
4349         atomic_add_32(&vfsp->vfs_count, 1);
4350         ASSERT(vfsp->vfs_count != 0);
4351 }
4352 
4353 /*
4354  * Decrements the vfs reference count by one atomically. When
4355  * vfs reference count becomes zero, it calls the file system
4356  * specific vfs_freevfs() to free up the resources.
4357  */
4358 void
4359 vfs_rele(vfs_t *vfsp)
4360 {
4361         ASSERT(vfsp->vfs_count != 0);
4362         if (atomic_add_32_nv(&vfsp->vfs_count, -1) == 0) {
4363                 fsh_exec_free_callbacks(vfsp);
4364                 VFS_FREEVFS(vfsp);
4365                 lofi_remove(vfsp);
4366                 if (vfsp->vfs_zone)
4367                         zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref,
4368                             ZONE_REF_VFS);
4369                 vfs_freemnttab(vfsp);
4370                 vfs_free(vfsp);
4371         }
4372 }
4373 
4374 /*
4375  * Generic operations vector support.
4376  *
4377  * This is used to build operations vectors for both the vfs and vnode.
4378  * It's normally called only when a file system is loaded.
4379  *
4380  * There are many possible algorithms for this, including the following:
4381  *
4382  *   (1) scan the list of known operations; for each, see if the file system
4383  *       includes an entry for it, and fill it in as appropriate.