67 #include <sys/modctl.h>
68 #include <sys/ddi.h>
69 #include <sys/pathname.h>
70 #include <sys/bootconf.h>
71 #include <sys/dumphdr.h>
72 #include <sys/dc_ki.h>
73 #include <sys/poll.h>
74 #include <sys/sunddi.h>
75 #include <sys/sysmacros.h>
76 #include <sys/zone.h>
77 #include <sys/policy.h>
78 #include <sys/ctfs.h>
79 #include <sys/objfs.h>
80 #include <sys/console.h>
81 #include <sys/reboot.h>
82 #include <sys/attr.h>
83 #include <sys/zio.h>
84 #include <sys/spa.h>
85 #include <sys/lofi.h>
86 #include <sys/bootprops.h>
87
88 #include <vm/page.h>
89
90 #include <fs/fs_subr.h>
91 /* Private interfaces to create vopstats-related data structures */
92 extern void initialize_vopstats(vopstats_t *);
93 extern vopstats_t *get_fstype_vopstats(struct vfs *, struct vfssw *);
94 extern vsk_anchor_t *get_vskstat_anchor(struct vfs *);
95
96 static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int);
97 static void vfs_setmntopt_nolock(mntopts_t *, const char *,
98 const char *, int, int);
99 static int vfs_optionisset_nolock(const mntopts_t *, const char *, char **);
100 static void vfs_freemnttab(struct vfs *);
101 static void vfs_freeopt(mntopt_t *);
102 static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *);
103 static void vfs_swapopttbl(mntopts_t *, mntopts_t *);
104 static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int);
105 static void vfs_createopttbl_extend(mntopts_t *, const char *,
106 const mntopts_t *);
199 { MNTOPT_NONBMAND, nonbmand_cancel, NULL, 0,
200 (void *)0 },
201 { MNTOPT_EXEC, exec_cancel, NULL, 0,
202 (void *)0 },
203 { MNTOPT_NOEXEC, noexec_cancel, NULL, 0,
204 (void *)0 },
205 };
206
207 const mntopts_t vfs_mntopts = {
208 sizeof (mntopts) / sizeof (mntopt_t),
209 (mntopt_t *)&mntopts[0]
210 };
211
212 /*
213 * File system operation dispatch functions.
214 */
215
216 int
217 fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
218 {
219 return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
220 }
221
222 int
223 fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr)
224 {
225 return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr);
226 }
227
228 int
229 fsop_root(vfs_t *vfsp, vnode_t **vpp)
230 {
231 refstr_t *mntpt;
232 int ret = (*(vfsp)->vfs_op->vfs_root)(vfsp, vpp);
233 /*
234 * Make sure this root has a path. With lofs, it is possible to have
235 * a NULL mountpoint.
236 */
237 if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) {
238 mntpt = vfs_getmntpoint(vfsp);
239 vn_setpath_str(*vpp, refstr_value(mntpt),
240 strlen(refstr_value(mntpt)));
241 refstr_rele(mntpt);
242 }
243
244 return (ret);
245 }
246
247 int
248 fsop_statfs(vfs_t *vfsp, statvfs64_t *sp)
249 {
250 return (*(vfsp)->vfs_op->vfs_statvfs)(vfsp, sp);
251 }
252
253 int
254 fsop_sync(vfs_t *vfsp, short flag, cred_t *cr)
255 {
256 return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr);
257 }
258
259 int
260 fsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
261 {
262 /*
263 * In order to handle system attribute fids in a manner
264 * transparent to the underlying fs, we embed the fid for
265 * the sysattr parent object in the sysattr fid and tack on
266 * some extra bytes that only the sysattr layer knows about.
267 *
268 * This guarantees that sysattr fids are larger than other fids
269 * for this vfs. If the vfs supports the sysattr view interface
270 * (as indicated by VFSFT_SYSATTR_VIEWS), we cannot have a size
271 * collision with XATTR_FIDSZ.
272 */
273 if (vfs_has_feature(vfsp, VFSFT_SYSATTR_VIEWS) &&
274 fidp->fid_len == XATTR_FIDSZ)
275 return (xattr_dir_vget(vfsp, vpp, fidp));
276
277 return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp);
278 }
279
280 int
281 fsop_mountroot(vfs_t *vfsp, enum whymountroot reason)
282 {
283 return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason);
284 }
285
286 void
287 fsop_freefs(vfs_t *vfsp)
288 {
289 (*(vfsp)->vfs_op->vfs_freevfs)(vfsp);
290 }
291
292 int
293 fsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate)
294 {
295 return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate));
296 }
297
1768 if ((error == 0) && (copyout_error == 0)) {
1769 if (!remount) {
1770 /*
1771 * Don't call get_vskstat_anchor() while holding
1772 * locks since it allocates memory and calls
1773 * VFS_STATVFS(). For NFS, the latter can generate
1774 * an over-the-wire call.
1775 */
1776 vskap = get_vskstat_anchor(vfsp);
1777 /* Only take the lock if we have something to do */
1778 if (vskap != NULL) {
1779 vfs_lock_wait(vfsp);
1780 if (vfsp->vfs_flag & VFS_STATS) {
1781 vfsp->vfs_vskap = vskap;
1782 }
1783 vfs_unlock(vfsp);
1784 }
1785 }
1786 /* Return vfsp to caller. */
1787 *vfspp = vfsp;
1788 }
1789 errout:
1790 vfs_freeopttbl(&mnt_mntopts);
1791 if (resource != NULL)
1792 kmem_free(resource, strlen(resource) + 1);
1793 if (mountpt != NULL)
1794 kmem_free(mountpt, strlen(mountpt) + 1);
1795 /*
1796 * It is possible we errored prior to adding to mount in progress
1797 * table. Must free vnode we acquired with successful lookupname.
1798 */
1799 if (addmip)
1800 VN_RELE(bvp);
1801 if (delmip)
1802 vfs_delmip(vfsp);
1803 ASSERT(vswp != NULL);
1804 vfs_unrefvfssw(vswp);
1805 if (inargs != opts)
1806 kmem_free(inargs, MAX_MNTOPT_STR);
1807 if (copyout_error) {
4218 VFSNAME_ROOT, { .error = vfsstray },
4219 VFSNAME_STATVFS, { .error = vfsstray },
4220 VFSNAME_SYNC, { .vfs_sync = vfsstray_sync },
4221 VFSNAME_VGET, { .error = vfsstray },
4222 VFSNAME_MOUNTROOT, { .error = vfsstray },
4223 VFSNAME_FREEVFS, { .error = vfsstray },
4224 VFSNAME_VNSTATE, { .error = vfsstray },
4225 NULL, NULL
4226 };
4227
4228 /* Create vfs cache */
4229 vfs_cache = kmem_cache_create("vfs_cache", sizeof (struct vfs),
4230 sizeof (uintptr_t), NULL, NULL, NULL, NULL, NULL, 0);
4231
4232 /* Initialize the vnode cache (file systems may use it during init). */
4233 vn_create_cache();
4234
4235 /* Setup event monitor framework */
4236 fem_init();
4237
4238 /* Initialize the dummy stray file system type. */
4239 error = vfs_setfsops(0, stray_vfsops_template, NULL);
4240
4241 /* Initialize the dummy EIO file system. */
4242 error = vfs_makefsops(EIO_vfsops_template, &EIO_vfsops);
4243 if (error != 0) {
4244 cmn_err(CE_WARN, "vfsinit: bad EIO vfs ops template");
4245 /* Shouldn't happen, but not bad enough to panic */
4246 }
4247
4248 VFS_INIT(&EIO_vfs, EIO_vfsops, (caddr_t)NULL);
4249
4250 /*
4251 * Default EIO_vfs.vfs_flag to VFS_UNMOUNTED so a lookup
4252 * on this vfs can immediately notice it's invalid.
4253 */
4254 EIO_vfs.vfs_flag |= VFS_UNMOUNTED;
4255
4256 /*
4257 * Call the init routines of non-loadable filesystems only.
4302 /*
4303 * One would be tempted to assert that "vfsp->vfs_count == 0".
4304 * The problem is that this gets called out of domount() with
4305 * a partially initialized vfs and a vfs_count of 1. This is
4306 * also called from vfs_rele() with a vfs_count of 0. We can't
4307 * call VFS_RELE() from domount() if VFS_MOUNT() hasn't successfully
4308 * returned. This is because VFS_MOUNT() fully initializes the
4309 * vfs structure and its associated data. VFS_RELE() will call
4310 * VFS_FREEVFS() which may panic the system if the data structures
4311 * aren't fully initialized from a successful VFS_MOUNT()).
4312 */
4313
4314 /* If FEM was in use, make sure everything gets cleaned up */
4315 if (vfsp->vfs_femhead) {
4316 ASSERT(vfsp->vfs_femhead->femh_list == NULL);
4317 mutex_destroy(&vfsp->vfs_femhead->femh_lock);
4318 kmem_free(vfsp->vfs_femhead, sizeof (*(vfsp->vfs_femhead)));
4319 vfsp->vfs_femhead = NULL;
4320 }
4321
4322 if (vfsp->vfs_implp)
4323 vfsimpl_teardown(vfsp);
4324 sema_destroy(&vfsp->vfs_reflock);
4325 kmem_cache_free(vfs_cache, vfsp);
4326 }
4327
4328 /*
4329 * Increments the vfs reference count by one atomically.
4330 */
4331 void
4332 vfs_hold(vfs_t *vfsp)
4333 {
4334 atomic_add_32(&vfsp->vfs_count, 1);
4335 ASSERT(vfsp->vfs_count != 0);
4336 }
4337
4338 /*
4339 * Decrements the vfs reference count by one atomically. When
4340 * vfs reference count becomes zero, it calls the file system
4341 * specific vfs_freevfs() to free up the resources.
4342 */
4343 void
4344 vfs_rele(vfs_t *vfsp)
4345 {
4346 ASSERT(vfsp->vfs_count != 0);
4347 if (atomic_add_32_nv(&vfsp->vfs_count, -1) == 0) {
4348 VFS_FREEVFS(vfsp);
4349 lofi_remove(vfsp);
4350 if (vfsp->vfs_zone)
4351 zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref,
4352 ZONE_REF_VFS);
4353 vfs_freemnttab(vfsp);
4354 vfs_free(vfsp);
4355 }
4356 }
4357
4358 /*
4359 * Generic operations vector support.
4360 *
4361 * This is used to build operations vectors for both the vfs and vnode.
4362 * It's normally called only when a file system is loaded.
4363 *
4364 * There are many possible algorithms for this, including the following:
4365 *
4366 * (1) scan the list of known operations; for each, see if the file system
4367 * includes an entry for it, and fill it in as appropriate.
|
67 #include <sys/modctl.h>
68 #include <sys/ddi.h>
69 #include <sys/pathname.h>
70 #include <sys/bootconf.h>
71 #include <sys/dumphdr.h>
72 #include <sys/dc_ki.h>
73 #include <sys/poll.h>
74 #include <sys/sunddi.h>
75 #include <sys/sysmacros.h>
76 #include <sys/zone.h>
77 #include <sys/policy.h>
78 #include <sys/ctfs.h>
79 #include <sys/objfs.h>
80 #include <sys/console.h>
81 #include <sys/reboot.h>
82 #include <sys/attr.h>
83 #include <sys/zio.h>
84 #include <sys/spa.h>
85 #include <sys/lofi.h>
86 #include <sys/bootprops.h>
87 #include <sys/fsh.h>
88 #include <sys/fsh_impl.h>
89
90 #include <vm/page.h>
91
92 #include <fs/fs_subr.h>
93 /* Private interfaces to create vopstats-related data structures */
94 extern void initialize_vopstats(vopstats_t *);
95 extern vopstats_t *get_fstype_vopstats(struct vfs *, struct vfssw *);
96 extern vsk_anchor_t *get_vskstat_anchor(struct vfs *);
97
98 static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int);
99 static void vfs_setmntopt_nolock(mntopts_t *, const char *,
100 const char *, int, int);
101 static int vfs_optionisset_nolock(const mntopts_t *, const char *, char **);
102 static void vfs_freemnttab(struct vfs *);
103 static void vfs_freeopt(mntopt_t *);
104 static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *);
105 static void vfs_swapopttbl(mntopts_t *, mntopts_t *);
106 static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int);
107 static void vfs_createopttbl_extend(mntopts_t *, const char *,
108 const mntopts_t *);
201 { MNTOPT_NONBMAND, nonbmand_cancel, NULL, 0,
202 (void *)0 },
203 { MNTOPT_EXEC, exec_cancel, NULL, 0,
204 (void *)0 },
205 { MNTOPT_NOEXEC, noexec_cancel, NULL, 0,
206 (void *)0 },
207 };
208
209 const mntopts_t vfs_mntopts = {
210 sizeof (mntopts) / sizeof (mntopt_t),
211 (mntopt_t *)&mntopts[0]
212 };
213
214 /*
215 * File system operation dispatch functions.
216 */
217
218 int
219 fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
220 {
221 return (fsh_mount(vfsp, mvp, uap, cr));
222 }
223
224 int
225 fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr)
226 {
227 return (fsh_unmount(vfsp, flag, cr));
228 }
229
230 int
231 fsop_root(vfs_t *vfsp, vnode_t **vpp)
232 {
233 refstr_t *mntpt;
234 int ret = fsh_root(vfsp, vpp);
235 /*
236 * Make sure this root has a path. With lofs, it is possible to have
237 * a NULL mountpoint.
238 */
239 if (ret == 0 && vfsp->vfs_mntpt != NULL && (*vpp)->v_path == NULL) {
240 mntpt = vfs_getmntpoint(vfsp);
241 vn_setpath_str(*vpp, refstr_value(mntpt),
242 strlen(refstr_value(mntpt)));
243 refstr_rele(mntpt);
244 }
245
246 return (ret);
247 }
248
249 int
250 fsop_statfs(vfs_t *vfsp, statvfs64_t *sp)
251 {
252 return (fsh_statfs(vfsp, sp));
253 }
254
255 int
256 fsop_sync(vfs_t *vfsp, short flag, cred_t *cr)
257 {
258 return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr);
259 }
260
261 int
262 fsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
263 {
264 /*
265 * In order to handle system attribute fids in a manner
266 * transparent to the underlying fs, we embed the fid for
267 * the sysattr parent object in the sysattr fid and tack on
268 * some extra bytes that only the sysattr layer knows about.
269 *
270 * This guarantees that sysattr fids are larger than other fids
271 * for this vfs. If the vfs supports the sysattr view interface
272 * (as indicated by VFSFT_SYSATTR_VIEWS), we cannot have a size
273 * collision with XATTR_FIDSZ.
274 */
275 if (vfs_has_feature(vfsp, VFSFT_SYSATTR_VIEWS) &&
276 fidp->fid_len == XATTR_FIDSZ)
277 return (xattr_dir_vget(vfsp, vpp, fidp));
278
279 return (fsh_vget(vfsp, vpp, fidp));
280 }
281
282 int
283 fsop_mountroot(vfs_t *vfsp, enum whymountroot reason)
284 {
285 return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason);
286 }
287
288 void
289 fsop_freefs(vfs_t *vfsp)
290 {
291 (*(vfsp)->vfs_op->vfs_freevfs)(vfsp);
292 }
293
294 int
295 fsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate)
296 {
297 return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate));
298 }
299
1770 if ((error == 0) && (copyout_error == 0)) {
1771 if (!remount) {
1772 /*
1773 * Don't call get_vskstat_anchor() while holding
1774 * locks since it allocates memory and calls
1775 * VFS_STATVFS(). For NFS, the latter can generate
1776 * an over-the-wire call.
1777 */
1778 vskap = get_vskstat_anchor(vfsp);
1779 /* Only take the lock if we have something to do */
1780 if (vskap != NULL) {
1781 vfs_lock_wait(vfsp);
1782 if (vfsp->vfs_flag & VFS_STATS) {
1783 vfsp->vfs_vskap = vskap;
1784 }
1785 vfs_unlock(vfsp);
1786 }
1787 }
1788 /* Return vfsp to caller. */
1789 *vfspp = vfsp;
1790 fsh_exec_mount_callbacks(vfsp);
1791 }
1792 errout:
1793 vfs_freeopttbl(&mnt_mntopts);
1794 if (resource != NULL)
1795 kmem_free(resource, strlen(resource) + 1);
1796 if (mountpt != NULL)
1797 kmem_free(mountpt, strlen(mountpt) + 1);
1798 /*
1799 * It is possible we errored prior to adding to mount in progress
1800 * table. Must free vnode we acquired with successful lookupname.
1801 */
1802 if (addmip)
1803 VN_RELE(bvp);
1804 if (delmip)
1805 vfs_delmip(vfsp);
1806 ASSERT(vswp != NULL);
1807 vfs_unrefvfssw(vswp);
1808 if (inargs != opts)
1809 kmem_free(inargs, MAX_MNTOPT_STR);
1810 if (copyout_error) {
4221 VFSNAME_ROOT, { .error = vfsstray },
4222 VFSNAME_STATVFS, { .error = vfsstray },
4223 VFSNAME_SYNC, { .vfs_sync = vfsstray_sync },
4224 VFSNAME_VGET, { .error = vfsstray },
4225 VFSNAME_MOUNTROOT, { .error = vfsstray },
4226 VFSNAME_FREEVFS, { .error = vfsstray },
4227 VFSNAME_VNSTATE, { .error = vfsstray },
4228 NULL, NULL
4229 };
4230
4231 /* Create vfs cache */
4232 vfs_cache = kmem_cache_create("vfs_cache", sizeof (struct vfs),
4233 sizeof (uintptr_t), NULL, NULL, NULL, NULL, NULL, 0);
4234
4235 /* Initialize the vnode cache (file systems may use it during init). */
4236 vn_create_cache();
4237
4238 /* Setup event monitor framework */
4239 fem_init();
4240
4241 /* Setup filesystem hook framework */
4242 fsh_init();
4243
4244 /* Initialize the dummy stray file system type. */
4245 error = vfs_setfsops(0, stray_vfsops_template, NULL);
4246
4247 /* Initialize the dummy EIO file system. */
4248 error = vfs_makefsops(EIO_vfsops_template, &EIO_vfsops);
4249 if (error != 0) {
4250 cmn_err(CE_WARN, "vfsinit: bad EIO vfs ops template");
4251 /* Shouldn't happen, but not bad enough to panic */
4252 }
4253
4254 VFS_INIT(&EIO_vfs, EIO_vfsops, (caddr_t)NULL);
4255
4256 /*
4257 * Default EIO_vfs.vfs_flag to VFS_UNMOUNTED so a lookup
4258 * on this vfs can immediately notice it's invalid.
4259 */
4260 EIO_vfs.vfs_flag |= VFS_UNMOUNTED;
4261
4262 /*
4263 * Call the init routines of non-loadable filesystems only.
4308 /*
4309 * One would be tempted to assert that "vfsp->vfs_count == 0".
4310 * The problem is that this gets called out of domount() with
4311 * a partially initialized vfs and a vfs_count of 1. This is
4312 * also called from vfs_rele() with a vfs_count of 0. We can't
4313 * call VFS_RELE() from domount() if VFS_MOUNT() hasn't successfully
4314 * returned. This is because VFS_MOUNT() fully initializes the
4315 * vfs structure and its associated data. VFS_RELE() will call
4316 * VFS_FREEVFS() which may panic the system if the data structures
4317 * aren't fully initialized from a successful VFS_MOUNT()).
4318 */
4319
4320 /* If FEM was in use, make sure everything gets cleaned up */
4321 if (vfsp->vfs_femhead) {
4322 ASSERT(vfsp->vfs_femhead->femh_list == NULL);
4323 mutex_destroy(&vfsp->vfs_femhead->femh_lock);
4324 kmem_free(vfsp->vfs_femhead, sizeof (*(vfsp->vfs_femhead)));
4325 vfsp->vfs_femhead = NULL;
4326 }
4327
4328 /*
4329 * FSH cleanup
4330 * There's no need here to use atomic operations on vfs_fshrecord.
4331 */
4332 fsh_fsrec_destroy(vfsp->vfs_fshrecord);
4333 vfsp->vfs_fshrecord = NULL;
4334
4335 if (vfsp->vfs_implp)
4336 vfsimpl_teardown(vfsp);
4337 sema_destroy(&vfsp->vfs_reflock);
4338 kmem_cache_free(vfs_cache, vfsp);
4339 }
4340
4341 /*
4342 * Increments the vfs reference count by one atomically.
4343 */
4344 void
4345 vfs_hold(vfs_t *vfsp)
4346 {
4347 atomic_add_32(&vfsp->vfs_count, 1);
4348 ASSERT(vfsp->vfs_count != 0);
4349 }
4350
4351 /*
4352 * Decrements the vfs reference count by one atomically. When
4353 * vfs reference count becomes zero, it calls the file system
4354 * specific vfs_freevfs() to free up the resources.
4355 */
4356 void
4357 vfs_rele(vfs_t *vfsp)
4358 {
4359 ASSERT(vfsp->vfs_count != 0);
4360 if (atomic_add_32_nv(&vfsp->vfs_count, -1) == 0) {
4361 fsh_exec_free_callbacks(vfsp);
4362 VFS_FREEVFS(vfsp);
4363 lofi_remove(vfsp);
4364 if (vfsp->vfs_zone)
4365 zone_rele_ref(&vfsp->vfs_implp->vi_zone_ref,
4366 ZONE_REF_VFS);
4367 vfs_freemnttab(vfsp);
4368 vfs_free(vfsp);
4369 }
4370 }
4371
4372 /*
4373 * Generic operations vector support.
4374 *
4375 * This is used to build operations vectors for both the vfs and vnode.
4376 * It's normally called only when a file system is loaded.
4377 *
4378 * There are many possible algorithms for this, including the following:
4379 *
4380 * (1) scan the list of known operations; for each, see if the file system
4381 * includes an entry for it, and fill it in as appropriate.
|