Print this page
2882 implement libzfs_core
2883 changing "canmount" property to "on" should not always remount dataset
2900 "zfs snapshot" should be able to create multiple, arbitrary snapshots at once
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Chris Siden <christopher.siden@delphix.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Bill Pijewski <wdp@joyent.com>
Reviewed by: Dan Kruchinin <dan.kruchinin@gmail.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/zfs/zfs_vfsops.c
+++ new/usr/src/uts/common/fs/zfs/zfs_vfsops.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 + * Copyright (c) 2012 by Delphix. All rights reserved.
23 24 */
24 25
25 26 /* Portions Copyright 2010 Robert Milkowski */
26 27
27 28 #include <sys/types.h>
28 29 #include <sys/param.h>
29 30 #include <sys/systm.h>
30 31 #include <sys/sysmacros.h>
31 32 #include <sys/kmem.h>
32 33 #include <sys/pathname.h>
33 34 #include <sys/vnode.h>
34 35 #include <sys/vfs.h>
35 36 #include <sys/vfs_opreg.h>
36 37 #include <sys/mntent.h>
37 38 #include <sys/mount.h>
38 39 #include <sys/cmn_err.h>
39 40 #include "fs/fs_subr.h"
40 41 #include <sys/zfs_znode.h>
41 42 #include <sys/zfs_dir.h>
42 43 #include <sys/zil.h>
43 44 #include <sys/fs/zfs.h>
44 45 #include <sys/dmu.h>
45 46 #include <sys/dsl_prop.h>
46 47 #include <sys/dsl_dataset.h>
47 48 #include <sys/dsl_deleg.h>
48 49 #include <sys/spa.h>
49 50 #include <sys/zap.h>
50 51 #include <sys/sa.h>
51 52 #include <sys/varargs.h>
52 53 #include <sys/policy.h>
53 54 #include <sys/atomic.h>
54 55 #include <sys/mkdev.h>
55 56 #include <sys/modctl.h>
56 57 #include <sys/refstr.h>
57 58 #include <sys/zfs_ioctl.h>
58 59 #include <sys/zfs_ctldir.h>
59 60 #include <sys/zfs_fuid.h>
60 61 #include <sys/bootconf.h>
61 62 #include <sys/sunddi.h>
62 63 #include <sys/dnlc.h>
63 64 #include <sys/dmu_objset.h>
64 65 #include <sys/spa_boot.h>
65 66 #include <sys/sa.h>
66 67 #include "zfs_comutil.h"
67 68
68 69 int zfsfstype;
69 70 vfsops_t *zfs_vfsops = NULL;
70 71 static major_t zfs_major;
71 72 static minor_t zfs_minor;
72 73 static kmutex_t zfs_dev_mtx;
73 74
74 75 extern int sys_shutdown;
75 76
76 77 static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr);
77 78 static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr);
78 79 static int zfs_mountroot(vfs_t *vfsp, enum whymountroot);
79 80 static int zfs_root(vfs_t *vfsp, vnode_t **vpp);
80 81 static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp);
81 82 static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp);
82 83 static void zfs_freevfs(vfs_t *vfsp);
83 84
84 85 static const fs_operation_def_t zfs_vfsops_template[] = {
85 86 VFSNAME_MOUNT, { .vfs_mount = zfs_mount },
86 87 VFSNAME_MOUNTROOT, { .vfs_mountroot = zfs_mountroot },
87 88 VFSNAME_UNMOUNT, { .vfs_unmount = zfs_umount },
88 89 VFSNAME_ROOT, { .vfs_root = zfs_root },
89 90 VFSNAME_STATVFS, { .vfs_statvfs = zfs_statvfs },
90 91 VFSNAME_SYNC, { .vfs_sync = zfs_sync },
91 92 VFSNAME_VGET, { .vfs_vget = zfs_vget },
92 93 VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs },
93 94 NULL, NULL
94 95 };
95 96
96 97 static const fs_operation_def_t zfs_vfsops_eio_template[] = {
97 98 VFSNAME_FREEVFS, { .vfs_freevfs = zfs_freevfs },
98 99 NULL, NULL
99 100 };
100 101
101 102 /*
102 103 * We need to keep a count of active fs's.
103 104 * This is necessary to prevent our module
104 105 * from being unloaded after a umount -f
105 106 */
106 107 static uint32_t zfs_active_fs_count = 0;
107 108
108 109 static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
109 110 static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
110 111 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
111 112 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
112 113
113 114 /*
114 115 * MO_DEFAULT is not used since the default value is determined
115 116 * by the equivalent property.
116 117 */
117 118 static mntopt_t mntopts[] = {
118 119 { MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL },
119 120 { MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL },
120 121 { MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL },
121 122 { MNTOPT_ATIME, atime_cancel, NULL, 0, NULL }
122 123 };
123 124
124 125 static mntopts_t zfs_mntopts = {
125 126 sizeof (mntopts) / sizeof (mntopt_t),
126 127 mntopts
127 128 };
128 129
129 130 /*ARGSUSED*/
130 131 int
131 132 zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
132 133 {
133 134 /*
134 135 * Data integrity is job one. We don't want a compromised kernel
135 136 * writing to the storage pool, so we never sync during panic.
136 137 */
137 138 if (panicstr)
138 139 return (0);
139 140
140 141 /*
141 142 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
142 143 * to sync metadata, which they would otherwise cache indefinitely.
143 144 * Semantically, the only requirement is that the sync be initiated.
144 145 * The DMU syncs out txgs frequently, so there's nothing to do.
145 146 */
146 147 if (flag & SYNC_ATTR)
147 148 return (0);
148 149
149 150 if (vfsp != NULL) {
150 151 /*
151 152 * Sync a specific filesystem.
152 153 */
153 154 zfsvfs_t *zfsvfs = vfsp->vfs_data;
154 155 dsl_pool_t *dp;
155 156
156 157 ZFS_ENTER(zfsvfs);
157 158 dp = dmu_objset_pool(zfsvfs->z_os);
158 159
159 160 /*
160 161 * If the system is shutting down, then skip any
161 162 * filesystems which may exist on a suspended pool.
162 163 */
163 164 if (sys_shutdown && spa_suspended(dp->dp_spa)) {
164 165 ZFS_EXIT(zfsvfs);
165 166 return (0);
166 167 }
167 168
168 169 if (zfsvfs->z_log != NULL)
169 170 zil_commit(zfsvfs->z_log, 0);
170 171
171 172 ZFS_EXIT(zfsvfs);
172 173 } else {
173 174 /*
174 175 * Sync all ZFS filesystems. This is what happens when you
175 176 * run sync(1M). Unlike other filesystems, ZFS honors the
176 177 * request by waiting for all pools to commit all dirty data.
177 178 */
178 179 spa_sync_allpools();
179 180 }
180 181
181 182 return (0);
182 183 }
183 184
184 185 static int
185 186 zfs_create_unique_device(dev_t *dev)
186 187 {
187 188 major_t new_major;
188 189
189 190 do {
190 191 ASSERT3U(zfs_minor, <=, MAXMIN32);
191 192 minor_t start = zfs_minor;
192 193 do {
193 194 mutex_enter(&zfs_dev_mtx);
194 195 if (zfs_minor >= MAXMIN32) {
195 196 /*
196 197 * If we're still using the real major
197 198 * keep out of /dev/zfs and /dev/zvol minor
198 199 * number space. If we're using a getudev()'ed
199 200 * major number, we can use all of its minors.
200 201 */
201 202 if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
202 203 zfs_minor = ZFS_MIN_MINOR;
203 204 else
204 205 zfs_minor = 0;
205 206 } else {
206 207 zfs_minor++;
207 208 }
208 209 *dev = makedevice(zfs_major, zfs_minor);
209 210 mutex_exit(&zfs_dev_mtx);
210 211 } while (vfs_devismounted(*dev) && zfs_minor != start);
211 212 if (zfs_minor == start) {
212 213 /*
213 214 * We are using all ~262,000 minor numbers for the
214 215 * current major number. Create a new major number.
215 216 */
216 217 if ((new_major = getudev()) == (major_t)-1) {
217 218 cmn_err(CE_WARN,
218 219 "zfs_mount: Can't get unique major "
219 220 "device number.");
220 221 return (-1);
221 222 }
222 223 mutex_enter(&zfs_dev_mtx);
223 224 zfs_major = new_major;
224 225 zfs_minor = 0;
225 226
226 227 mutex_exit(&zfs_dev_mtx);
227 228 } else {
228 229 break;
229 230 }
230 231 /* CONSTANTCONDITION */
231 232 } while (1);
232 233
233 234 return (0);
234 235 }
235 236
236 237 static void
237 238 atime_changed_cb(void *arg, uint64_t newval)
238 239 {
239 240 zfsvfs_t *zfsvfs = arg;
240 241
241 242 if (newval == TRUE) {
242 243 zfsvfs->z_atime = TRUE;
243 244 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
244 245 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
245 246 } else {
246 247 zfsvfs->z_atime = FALSE;
247 248 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
248 249 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
249 250 }
250 251 }
251 252
252 253 static void
253 254 xattr_changed_cb(void *arg, uint64_t newval)
254 255 {
255 256 zfsvfs_t *zfsvfs = arg;
256 257
257 258 if (newval == TRUE) {
258 259 /* XXX locking on vfs_flag? */
259 260 zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
260 261 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
261 262 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
262 263 } else {
263 264 /* XXX locking on vfs_flag? */
264 265 zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
265 266 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
266 267 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
267 268 }
268 269 }
269 270
270 271 static void
271 272 blksz_changed_cb(void *arg, uint64_t newval)
272 273 {
273 274 zfsvfs_t *zfsvfs = arg;
274 275
275 276 if (newval < SPA_MINBLOCKSIZE ||
276 277 newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
277 278 newval = SPA_MAXBLOCKSIZE;
278 279
279 280 zfsvfs->z_max_blksz = newval;
280 281 zfsvfs->z_vfs->vfs_bsize = newval;
281 282 }
282 283
283 284 static void
284 285 readonly_changed_cb(void *arg, uint64_t newval)
285 286 {
286 287 zfsvfs_t *zfsvfs = arg;
287 288
288 289 if (newval) {
289 290 /* XXX locking on vfs_flag? */
290 291 zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
291 292 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
292 293 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
293 294 } else {
294 295 /* XXX locking on vfs_flag? */
295 296 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
296 297 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
297 298 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
298 299 }
299 300 }
300 301
301 302 static void
302 303 devices_changed_cb(void *arg, uint64_t newval)
303 304 {
304 305 zfsvfs_t *zfsvfs = arg;
305 306
306 307 if (newval == FALSE) {
307 308 zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES;
308 309 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES);
309 310 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0);
310 311 } else {
311 312 zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES;
312 313 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES);
313 314 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0);
314 315 }
315 316 }
316 317
317 318 static void
318 319 setuid_changed_cb(void *arg, uint64_t newval)
319 320 {
320 321 zfsvfs_t *zfsvfs = arg;
321 322
322 323 if (newval == FALSE) {
323 324 zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
324 325 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
325 326 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
326 327 } else {
327 328 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
328 329 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
329 330 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
330 331 }
331 332 }
332 333
333 334 static void
334 335 exec_changed_cb(void *arg, uint64_t newval)
335 336 {
336 337 zfsvfs_t *zfsvfs = arg;
337 338
338 339 if (newval == FALSE) {
339 340 zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
340 341 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
341 342 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
342 343 } else {
343 344 zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
344 345 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
345 346 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
346 347 }
347 348 }
348 349
349 350 /*
350 351 * The nbmand mount option can be changed at mount time.
351 352 * We can't allow it to be toggled on live file systems or incorrect
352 353 * behavior may be seen from cifs clients
353 354 *
354 355 * This property isn't registered via dsl_prop_register(), but this callback
355 356 * will be called when a file system is first mounted
356 357 */
357 358 static void
358 359 nbmand_changed_cb(void *arg, uint64_t newval)
359 360 {
360 361 zfsvfs_t *zfsvfs = arg;
361 362 if (newval == FALSE) {
362 363 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
363 364 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
364 365 } else {
365 366 vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
366 367 vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
367 368 }
368 369 }
369 370
370 371 static void
371 372 snapdir_changed_cb(void *arg, uint64_t newval)
372 373 {
373 374 zfsvfs_t *zfsvfs = arg;
374 375
375 376 zfsvfs->z_show_ctldir = newval;
376 377 }
377 378
378 379 static void
379 380 vscan_changed_cb(void *arg, uint64_t newval)
380 381 {
381 382 zfsvfs_t *zfsvfs = arg;
382 383
383 384 zfsvfs->z_vscan = newval;
384 385 }
385 386
386 387 static void
387 388 acl_mode_changed_cb(void *arg, uint64_t newval)
388 389 {
389 390 zfsvfs_t *zfsvfs = arg;
390 391
391 392 zfsvfs->z_acl_mode = newval;
392 393 }
393 394
394 395 static void
395 396 acl_inherit_changed_cb(void *arg, uint64_t newval)
396 397 {
397 398 zfsvfs_t *zfsvfs = arg;
398 399
399 400 zfsvfs->z_acl_inherit = newval;
400 401 }
401 402
402 403 static int
403 404 zfs_register_callbacks(vfs_t *vfsp)
404 405 {
405 406 struct dsl_dataset *ds = NULL;
406 407 objset_t *os = NULL;
407 408 zfsvfs_t *zfsvfs = NULL;
408 409 uint64_t nbmand;
409 410 int readonly, do_readonly = B_FALSE;
410 411 int setuid, do_setuid = B_FALSE;
411 412 int exec, do_exec = B_FALSE;
412 413 int devices, do_devices = B_FALSE;
413 414 int xattr, do_xattr = B_FALSE;
414 415 int atime, do_atime = B_FALSE;
415 416 int error = 0;
416 417
417 418 ASSERT(vfsp);
418 419 zfsvfs = vfsp->vfs_data;
419 420 ASSERT(zfsvfs);
420 421 os = zfsvfs->z_os;
421 422
422 423 /*
423 424 * The act of registering our callbacks will destroy any mount
424 425 * options we may have. In order to enable temporary overrides
425 426 * of mount options, we stash away the current values and
426 427 * restore them after we register the callbacks.
427 428 */
428 429 if (vfs_optionisset(vfsp, MNTOPT_RO, NULL) ||
429 430 !spa_writeable(dmu_objset_spa(os))) {
430 431 readonly = B_TRUE;
431 432 do_readonly = B_TRUE;
432 433 } else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
433 434 readonly = B_FALSE;
434 435 do_readonly = B_TRUE;
435 436 }
436 437 if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
437 438 devices = B_FALSE;
438 439 setuid = B_FALSE;
439 440 do_devices = B_TRUE;
440 441 do_setuid = B_TRUE;
441 442 } else {
442 443 if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
443 444 devices = B_FALSE;
444 445 do_devices = B_TRUE;
445 446 } else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) {
446 447 devices = B_TRUE;
447 448 do_devices = B_TRUE;
448 449 }
449 450
450 451 if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
451 452 setuid = B_FALSE;
452 453 do_setuid = B_TRUE;
453 454 } else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
454 455 setuid = B_TRUE;
455 456 do_setuid = B_TRUE;
456 457 }
457 458 }
458 459 if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
459 460 exec = B_FALSE;
460 461 do_exec = B_TRUE;
461 462 } else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
462 463 exec = B_TRUE;
463 464 do_exec = B_TRUE;
464 465 }
465 466 if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
466 467 xattr = B_FALSE;
467 468 do_xattr = B_TRUE;
468 469 } else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
469 470 xattr = B_TRUE;
470 471 do_xattr = B_TRUE;
471 472 }
472 473 if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
473 474 atime = B_FALSE;
474 475 do_atime = B_TRUE;
475 476 } else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
476 477 atime = B_TRUE;
477 478 do_atime = B_TRUE;
478 479 }
479 480
480 481 /*
481 482 * nbmand is a special property. It can only be changed at
482 483 * mount time.
483 484 *
484 485 * This is weird, but it is documented to only be changeable
485 486 * at mount time.
486 487 */
487 488 if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
488 489 nbmand = B_FALSE;
489 490 } else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
490 491 nbmand = B_TRUE;
491 492 } else {
492 493 char osname[MAXNAMELEN];
493 494
494 495 dmu_objset_name(os, osname);
495 496 if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
496 497 NULL)) {
497 498 return (error);
498 499 }
499 500 }
500 501
501 502 /*
502 503 * Register property callbacks.
503 504 *
504 505 * It would probably be fine to just check for i/o error from
505 506 * the first prop_register(), but I guess I like to go
506 507 * overboard...
507 508 */
508 509 ds = dmu_objset_ds(os);
509 510 error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
510 511 error = error ? error : dsl_prop_register(ds,
511 512 "xattr", xattr_changed_cb, zfsvfs);
512 513 error = error ? error : dsl_prop_register(ds,
513 514 "recordsize", blksz_changed_cb, zfsvfs);
514 515 error = error ? error : dsl_prop_register(ds,
515 516 "readonly", readonly_changed_cb, zfsvfs);
516 517 error = error ? error : dsl_prop_register(ds,
517 518 "devices", devices_changed_cb, zfsvfs);
518 519 error = error ? error : dsl_prop_register(ds,
519 520 "setuid", setuid_changed_cb, zfsvfs);
520 521 error = error ? error : dsl_prop_register(ds,
521 522 "exec", exec_changed_cb, zfsvfs);
522 523 error = error ? error : dsl_prop_register(ds,
523 524 "snapdir", snapdir_changed_cb, zfsvfs);
524 525 error = error ? error : dsl_prop_register(ds,
525 526 "aclmode", acl_mode_changed_cb, zfsvfs);
526 527 error = error ? error : dsl_prop_register(ds,
527 528 "aclinherit", acl_inherit_changed_cb, zfsvfs);
528 529 error = error ? error : dsl_prop_register(ds,
529 530 "vscan", vscan_changed_cb, zfsvfs);
530 531 if (error)
531 532 goto unregister;
532 533
533 534 /*
534 535 * Invoke our callbacks to restore temporary mount options.
535 536 */
536 537 if (do_readonly)
537 538 readonly_changed_cb(zfsvfs, readonly);
538 539 if (do_setuid)
539 540 setuid_changed_cb(zfsvfs, setuid);
540 541 if (do_exec)
541 542 exec_changed_cb(zfsvfs, exec);
542 543 if (do_devices)
543 544 devices_changed_cb(zfsvfs, devices);
544 545 if (do_xattr)
545 546 xattr_changed_cb(zfsvfs, xattr);
546 547 if (do_atime)
547 548 atime_changed_cb(zfsvfs, atime);
548 549
549 550 nbmand_changed_cb(zfsvfs, nbmand);
550 551
551 552 return (0);
552 553
553 554 unregister:
554 555 /*
555 556 * We may attempt to unregister some callbacks that are not
556 557 * registered, but this is OK; it will simply return ENOMSG,
557 558 * which we will ignore.
558 559 */
559 560 (void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
560 561 (void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
561 562 (void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
562 563 (void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
563 564 (void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs);
564 565 (void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
565 566 (void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
566 567 (void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
567 568 (void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
568 569 (void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
569 570 zfsvfs);
570 571 (void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
571 572 return (error);
572 573
573 574 }
574 575
575 576 static int
576 577 zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
577 578 uint64_t *userp, uint64_t *groupp)
578 579 {
579 580 znode_phys_t *znp = data;
580 581 int error = 0;
581 582
582 583 /*
583 584 * Is it a valid type of object to track?
584 585 */
585 586 if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
586 587 return (ENOENT);
587 588
588 589 /*
589 590 * If we have a NULL data pointer
590 591 * then assume the id's aren't changing and
591 592 * return EEXIST to the dmu to let it know to
592 593 * use the same ids
593 594 */
594 595 if (data == NULL)
595 596 return (EEXIST);
596 597
597 598 if (bonustype == DMU_OT_ZNODE) {
598 599 *userp = znp->zp_uid;
599 600 *groupp = znp->zp_gid;
600 601 } else {
601 602 int hdrsize;
602 603
603 604 ASSERT(bonustype == DMU_OT_SA);
604 605 hdrsize = sa_hdrsize(data);
605 606
606 607 if (hdrsize != 0) {
607 608 *userp = *((uint64_t *)((uintptr_t)data + hdrsize +
608 609 SA_UID_OFFSET));
609 610 *groupp = *((uint64_t *)((uintptr_t)data + hdrsize +
610 611 SA_GID_OFFSET));
611 612 } else {
612 613 /*
613 614 * This should only happen for newly created
614 615 * files that haven't had the znode data filled
615 616 * in yet.
616 617 */
617 618 *userp = 0;
618 619 *groupp = 0;
619 620 }
620 621 }
621 622 return (error);
622 623 }
623 624
624 625 static void
625 626 fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
626 627 char *domainbuf, int buflen, uid_t *ridp)
627 628 {
628 629 uint64_t fuid;
629 630 const char *domain;
630 631
631 632 fuid = strtonum(fuidstr, NULL);
632 633
633 634 domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
634 635 if (domain)
635 636 (void) strlcpy(domainbuf, domain, buflen);
636 637 else
637 638 domainbuf[0] = '\0';
638 639 *ridp = FUID_RID(fuid);
639 640 }
640 641
641 642 static uint64_t
642 643 zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
643 644 {
644 645 switch (type) {
645 646 case ZFS_PROP_USERUSED:
646 647 return (DMU_USERUSED_OBJECT);
647 648 case ZFS_PROP_GROUPUSED:
648 649 return (DMU_GROUPUSED_OBJECT);
649 650 case ZFS_PROP_USERQUOTA:
650 651 return (zfsvfs->z_userquota_obj);
651 652 case ZFS_PROP_GROUPQUOTA:
652 653 return (zfsvfs->z_groupquota_obj);
653 654 }
654 655 return (0);
655 656 }
656 657
657 658 int
658 659 zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
659 660 uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
660 661 {
661 662 int error;
662 663 zap_cursor_t zc;
663 664 zap_attribute_t za;
664 665 zfs_useracct_t *buf = vbuf;
665 666 uint64_t obj;
666 667
667 668 if (!dmu_objset_userspace_present(zfsvfs->z_os))
668 669 return (ENOTSUP);
669 670
670 671 obj = zfs_userquota_prop_to_obj(zfsvfs, type);
671 672 if (obj == 0) {
672 673 *bufsizep = 0;
673 674 return (0);
674 675 }
675 676
676 677 for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
677 678 (error = zap_cursor_retrieve(&zc, &za)) == 0;
678 679 zap_cursor_advance(&zc)) {
679 680 if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
680 681 *bufsizep)
681 682 break;
682 683
683 684 fuidstr_to_sid(zfsvfs, za.za_name,
684 685 buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
685 686
686 687 buf->zu_space = za.za_first_integer;
687 688 buf++;
688 689 }
689 690 if (error == ENOENT)
690 691 error = 0;
691 692
692 693 ASSERT3U((uintptr_t)buf - (uintptr_t)vbuf, <=, *bufsizep);
693 694 *bufsizep = (uintptr_t)buf - (uintptr_t)vbuf;
694 695 *cookiep = zap_cursor_serialize(&zc);
695 696 zap_cursor_fini(&zc);
696 697 return (error);
697 698 }
698 699
699 700 /*
700 701 * buf must be big enough (eg, 32 bytes)
701 702 */
702 703 static int
703 704 id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
704 705 char *buf, boolean_t addok)
705 706 {
706 707 uint64_t fuid;
707 708 int domainid = 0;
708 709
709 710 if (domain && domain[0]) {
710 711 domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
711 712 if (domainid == -1)
712 713 return (ENOENT);
713 714 }
714 715 fuid = FUID_ENCODE(domainid, rid);
715 716 (void) sprintf(buf, "%llx", (longlong_t)fuid);
716 717 return (0);
717 718 }
718 719
719 720 int
720 721 zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
721 722 const char *domain, uint64_t rid, uint64_t *valp)
722 723 {
723 724 char buf[32];
724 725 int err;
725 726 uint64_t obj;
726 727
727 728 *valp = 0;
728 729
729 730 if (!dmu_objset_userspace_present(zfsvfs->z_os))
730 731 return (ENOTSUP);
731 732
732 733 obj = zfs_userquota_prop_to_obj(zfsvfs, type);
733 734 if (obj == 0)
734 735 return (0);
735 736
736 737 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_FALSE);
737 738 if (err)
738 739 return (err);
739 740
740 741 err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
741 742 if (err == ENOENT)
742 743 err = 0;
743 744 return (err);
744 745 }
745 746
746 747 int
747 748 zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
748 749 const char *domain, uint64_t rid, uint64_t quota)
749 750 {
750 751 char buf[32];
751 752 int err;
752 753 dmu_tx_t *tx;
753 754 uint64_t *objp;
754 755 boolean_t fuid_dirtied;
755 756
756 757 if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
757 758 return (EINVAL);
758 759
759 760 if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
760 761 return (ENOTSUP);
761 762
762 763 objp = (type == ZFS_PROP_USERQUOTA) ? &zfsvfs->z_userquota_obj :
763 764 &zfsvfs->z_groupquota_obj;
764 765
765 766 err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
766 767 if (err)
767 768 return (err);
768 769 fuid_dirtied = zfsvfs->z_fuid_dirty;
769 770
770 771 tx = dmu_tx_create(zfsvfs->z_os);
771 772 dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
772 773 if (*objp == 0) {
773 774 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
774 775 zfs_userquota_prop_prefixes[type]);
775 776 }
776 777 if (fuid_dirtied)
777 778 zfs_fuid_txhold(zfsvfs, tx);
778 779 err = dmu_tx_assign(tx, TXG_WAIT);
779 780 if (err) {
780 781 dmu_tx_abort(tx);
781 782 return (err);
782 783 }
783 784
784 785 mutex_enter(&zfsvfs->z_lock);
785 786 if (*objp == 0) {
786 787 *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
787 788 DMU_OT_NONE, 0, tx);
788 789 VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
789 790 zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
790 791 }
791 792 mutex_exit(&zfsvfs->z_lock);
792 793
793 794 if (quota == 0) {
794 795 err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
795 796 if (err == ENOENT)
796 797 err = 0;
797 798 } else {
798 799 err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx);
799 800 }
800 801 ASSERT(err == 0);
801 802 if (fuid_dirtied)
802 803 zfs_fuid_sync(zfsvfs, tx);
803 804 dmu_tx_commit(tx);
804 805 return (err);
805 806 }
806 807
807 808 boolean_t
808 809 zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
809 810 {
810 811 char buf[32];
811 812 uint64_t used, quota, usedobj, quotaobj;
812 813 int err;
813 814
814 815 usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
815 816 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
816 817
817 818 if (quotaobj == 0 || zfsvfs->z_replay)
818 819 return (B_FALSE);
819 820
820 821 (void) sprintf(buf, "%llx", (longlong_t)fuid);
821 822 err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a);
822 823 if (err != 0)
823 824 return (B_FALSE);
824 825
825 826 err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
826 827 if (err != 0)
827 828 return (B_FALSE);
828 829 return (used >= quota);
829 830 }
830 831
831 832 boolean_t
832 833 zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
833 834 {
834 835 uint64_t fuid;
835 836 uint64_t quotaobj;
836 837
837 838 quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
838 839
839 840 fuid = isgroup ? zp->z_gid : zp->z_uid;
840 841
841 842 if (quotaobj == 0 || zfsvfs->z_replay)
842 843 return (B_FALSE);
843 844
844 845 return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
845 846 }
846 847
847 848 int
848 849 zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
849 850 {
850 851 objset_t *os;
851 852 zfsvfs_t *zfsvfs;
852 853 uint64_t zval;
853 854 int i, error;
854 855 uint64_t sa_obj;
855 856
856 857 zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
857 858
858 859 /*
859 860 * We claim to always be readonly so we can open snapshots;
860 861 * other ZPL code will prevent us from writing to snapshots.
861 862 */
862 863 error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
863 864 if (error) {
864 865 kmem_free(zfsvfs, sizeof (zfsvfs_t));
865 866 return (error);
866 867 }
867 868
868 869 /*
869 870 * Initialize the zfs-specific filesystem structure.
870 871 * Should probably make this a kmem cache, shuffle fields,
871 872 * and just bzero up to z_hold_mtx[].
872 873 */
873 874 zfsvfs->z_vfs = NULL;
874 875 zfsvfs->z_parent = zfsvfs;
875 876 zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
876 877 zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
877 878 zfsvfs->z_os = os;
878 879
879 880 error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
880 881 if (error) {
881 882 goto out;
882 883 } else if (zfsvfs->z_version >
883 884 zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
884 885 (void) printf("Can't mount a version %lld file system "
885 886 "on a version %lld pool\n. Pool must be upgraded to mount "
886 887 "this file system.", (u_longlong_t)zfsvfs->z_version,
887 888 (u_longlong_t)spa_version(dmu_objset_spa(os)));
888 889 error = ENOTSUP;
889 890 goto out;
890 891 }
891 892 if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
892 893 goto out;
893 894 zfsvfs->z_norm = (int)zval;
894 895
895 896 if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
896 897 goto out;
897 898 zfsvfs->z_utf8 = (zval != 0);
898 899
899 900 if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
900 901 goto out;
901 902 zfsvfs->z_case = (uint_t)zval;
902 903
903 904 /*
904 905 * Fold case on file systems that are always or sometimes case
905 906 * insensitive.
906 907 */
907 908 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
908 909 zfsvfs->z_case == ZFS_CASE_MIXED)
909 910 zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
910 911
911 912 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
912 913 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
913 914
914 915 if (zfsvfs->z_use_sa) {
915 916 /* should either have both of these objects or none */
916 917 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
917 918 &sa_obj);
918 919 if (error)
919 920 return (error);
920 921 } else {
921 922 /*
922 923 * Pre SA versions file systems should never touch
923 924 * either the attribute registration or layout objects.
924 925 */
925 926 sa_obj = 0;
926 927 }
927 928
928 929 error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
929 930 &zfsvfs->z_attr_table);
930 931 if (error)
931 932 goto out;
932 933
933 934 if (zfsvfs->z_version >= ZPL_VERSION_SA)
934 935 sa_register_update_callback(os, zfs_sa_upgrade);
935 936
936 937 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
937 938 &zfsvfs->z_root);
938 939 if (error)
939 940 goto out;
940 941 ASSERT(zfsvfs->z_root != 0);
941 942
942 943 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
943 944 &zfsvfs->z_unlinkedobj);
944 945 if (error)
945 946 goto out;
946 947
947 948 error = zap_lookup(os, MASTER_NODE_OBJ,
948 949 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
949 950 8, 1, &zfsvfs->z_userquota_obj);
950 951 if (error && error != ENOENT)
951 952 goto out;
952 953
953 954 error = zap_lookup(os, MASTER_NODE_OBJ,
954 955 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
955 956 8, 1, &zfsvfs->z_groupquota_obj);
956 957 if (error && error != ENOENT)
957 958 goto out;
958 959
959 960 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
960 961 &zfsvfs->z_fuid_obj);
961 962 if (error && error != ENOENT)
962 963 goto out;
963 964
964 965 error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
965 966 &zfsvfs->z_shares_dir);
966 967 if (error && error != ENOENT)
967 968 goto out;
968 969
969 970 mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
970 971 mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
971 972 list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
972 973 offsetof(znode_t, z_link_node));
973 974 rrw_init(&zfsvfs->z_teardown_lock);
974 975 rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
975 976 rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
976 977 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
977 978 mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL);
978 979
979 980 *zfvp = zfsvfs;
980 981 return (0);
981 982
982 983 out:
983 984 dmu_objset_disown(os, zfsvfs);
984 985 *zfvp = NULL;
985 986 kmem_free(zfsvfs, sizeof (zfsvfs_t));
986 987 return (error);
987 988 }
988 989
989 990 static int
990 991 zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
991 992 {
992 993 int error;
993 994
994 995 error = zfs_register_callbacks(zfsvfs->z_vfs);
995 996 if (error)
996 997 return (error);
997 998
998 999 /*
999 1000 * Set the objset user_ptr to track its zfsvfs.
1000 1001 */
1001 1002 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1002 1003 dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1003 1004 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1004 1005
1005 1006 zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
1006 1007
1007 1008 /*
1008 1009 * If we are not mounting (ie: online recv), then we don't
1009 1010 * have to worry about replaying the log as we blocked all
1010 1011 * operations out since we closed the ZIL.
1011 1012 */
1012 1013 if (mounting) {
1013 1014 boolean_t readonly;
1014 1015
1015 1016 /*
1016 1017 * During replay we remove the read only flag to
1017 1018 * allow replays to succeed.
1018 1019 */
1019 1020 readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
1020 1021 if (readonly != 0)
1021 1022 zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
1022 1023 else
1023 1024 zfs_unlinked_drain(zfsvfs);
1024 1025
1025 1026 /*
1026 1027 * Parse and replay the intent log.
1027 1028 *
1028 1029 * Because of ziltest, this must be done after
1029 1030 * zfs_unlinked_drain(). (Further note: ziltest
1030 1031 * doesn't use readonly mounts, where
1031 1032 * zfs_unlinked_drain() isn't called.) This is because
1032 1033 * ziltest causes spa_sync() to think it's committed,
1033 1034 * but actually it is not, so the intent log contains
1034 1035 * many txg's worth of changes.
1035 1036 *
1036 1037 * In particular, if object N is in the unlinked set in
1037 1038 * the last txg to actually sync, then it could be
1038 1039 * actually freed in a later txg and then reallocated
1039 1040 * in a yet later txg. This would write a "create
1040 1041 * object N" record to the intent log. Normally, this
1041 1042 * would be fine because the spa_sync() would have
1042 1043 * written out the fact that object N is free, before
1043 1044 * we could write the "create object N" intent log
1044 1045 * record.
1045 1046 *
1046 1047 * But when we are in ziltest mode, we advance the "open
1047 1048 * txg" without actually spa_sync()-ing the changes to
1048 1049 * disk. So we would see that object N is still
1049 1050 * allocated and in the unlinked set, and there is an
1050 1051 * intent log record saying to allocate it.
1051 1052 */
1052 1053 if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
1053 1054 if (zil_replay_disable) {
1054 1055 zil_destroy(zfsvfs->z_log, B_FALSE);
1055 1056 } else {
1056 1057 zfsvfs->z_replay = B_TRUE;
1057 1058 zil_replay(zfsvfs->z_os, zfsvfs,
1058 1059 zfs_replay_vector);
1059 1060 zfsvfs->z_replay = B_FALSE;
1060 1061 }
1061 1062 }
1062 1063 zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
1063 1064 }
1064 1065
1065 1066 return (0);
1066 1067 }
1067 1068
1068 1069 void
1069 1070 zfsvfs_free(zfsvfs_t *zfsvfs)
1070 1071 {
1071 1072 int i;
1072 1073 extern krwlock_t zfsvfs_lock; /* in zfs_znode.c */
1073 1074
1074 1075 /*
1075 1076 * This is a barrier to prevent the filesystem from going away in
1076 1077 * zfs_znode_move() until we can safely ensure that the filesystem is
1077 1078 * not unmounted. We consider the filesystem valid before the barrier
1078 1079 * and invalid after the barrier.
1079 1080 */
1080 1081 rw_enter(&zfsvfs_lock, RW_READER);
1081 1082 rw_exit(&zfsvfs_lock);
1082 1083
1083 1084 zfs_fuid_destroy(zfsvfs);
1084 1085
1085 1086 mutex_destroy(&zfsvfs->z_znodes_lock);
1086 1087 mutex_destroy(&zfsvfs->z_lock);
1087 1088 list_destroy(&zfsvfs->z_all_znodes);
1088 1089 rrw_destroy(&zfsvfs->z_teardown_lock);
1089 1090 rw_destroy(&zfsvfs->z_teardown_inactive_lock);
1090 1091 rw_destroy(&zfsvfs->z_fuid_lock);
1091 1092 for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1092 1093 mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1093 1094 kmem_free(zfsvfs, sizeof (zfsvfs_t));
1094 1095 }
1095 1096
1096 1097 static void
1097 1098 zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
1098 1099 {
1099 1100 zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
1100 1101 if (zfsvfs->z_vfs) {
1101 1102 if (zfsvfs->z_use_fuids) {
1102 1103 vfs_set_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1103 1104 vfs_set_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1104 1105 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1105 1106 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1106 1107 vfs_set_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1107 1108 vfs_set_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1108 1109 } else {
1109 1110 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_XVATTR);
1110 1111 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_SYSATTR_VIEWS);
1111 1112 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACEMASKONACCESS);
1112 1113 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACLONCREATE);
1113 1114 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_ACCESS_FILTER);
1114 1115 vfs_clear_feature(zfsvfs->z_vfs, VFSFT_REPARSE);
1115 1116 }
1116 1117 }
1117 1118 zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
1118 1119 }
1119 1120
1120 1121 static int
1121 1122 zfs_domount(vfs_t *vfsp, char *osname)
1122 1123 {
1123 1124 dev_t mount_dev;
1124 1125 uint64_t recordsize, fsid_guid;
1125 1126 int error = 0;
1126 1127 zfsvfs_t *zfsvfs;
1127 1128
1128 1129 ASSERT(vfsp);
1129 1130 ASSERT(osname);
1130 1131
1131 1132 error = zfsvfs_create(osname, &zfsvfs);
1132 1133 if (error)
1133 1134 return (error);
1134 1135 zfsvfs->z_vfs = vfsp;
1135 1136
1136 1137 /* Initialize the generic filesystem structure. */
1137 1138 vfsp->vfs_bcount = 0;
1138 1139 vfsp->vfs_data = NULL;
1139 1140
1140 1141 if (zfs_create_unique_device(&mount_dev) == -1) {
1141 1142 error = ENODEV;
1142 1143 goto out;
1143 1144 }
1144 1145 ASSERT(vfs_devismounted(mount_dev) == 0);
1145 1146
1146 1147 if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
1147 1148 NULL))
1148 1149 goto out;
1149 1150
1150 1151 vfsp->vfs_dev = mount_dev;
1151 1152 vfsp->vfs_fstype = zfsfstype;
1152 1153 vfsp->vfs_bsize = recordsize;
1153 1154 vfsp->vfs_flag |= VFS_NOTRUNC;
1154 1155 vfsp->vfs_data = zfsvfs;
1155 1156
1156 1157 /*
1157 1158 * The fsid is 64 bits, composed of an 8-bit fs type, which
1158 1159 * separates our fsid from any other filesystem types, and a
1159 1160 * 56-bit objset unique ID. The objset unique ID is unique to
1160 1161 * all objsets open on this system, provided by unique_create().
1161 1162 * The 8-bit fs type must be put in the low bits of fsid[1]
1162 1163 * because that's where other Solaris filesystems put it.
1163 1164 */
1164 1165 fsid_guid = dmu_objset_fsid_guid(zfsvfs->z_os);
1165 1166 ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0);
1166 1167 vfsp->vfs_fsid.val[0] = fsid_guid;
1167 1168 vfsp->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) |
1168 1169 zfsfstype & 0xFF;
1169 1170
1170 1171 /*
1171 1172 * Set features for file system.
1172 1173 */
1173 1174 zfs_set_fuid_feature(zfsvfs);
1174 1175 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
1175 1176 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1176 1177 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1177 1178 vfs_set_feature(vfsp, VFSFT_NOCASESENSITIVE);
1178 1179 } else if (zfsvfs->z_case == ZFS_CASE_MIXED) {
1179 1180 vfs_set_feature(vfsp, VFSFT_DIRENTFLAGS);
1180 1181 vfs_set_feature(vfsp, VFSFT_CASEINSENSITIVE);
1181 1182 }
1182 1183 vfs_set_feature(vfsp, VFSFT_ZEROCOPY_SUPPORTED);
1183 1184
1184 1185 if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
1185 1186 uint64_t pval;
1186 1187
1187 1188 atime_changed_cb(zfsvfs, B_FALSE);
1188 1189 readonly_changed_cb(zfsvfs, B_TRUE);
1189 1190 if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
1190 1191 goto out;
1191 1192 xattr_changed_cb(zfsvfs, pval);
1192 1193 zfsvfs->z_issnap = B_TRUE;
1193 1194 zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
1194 1195
1195 1196 mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
1196 1197 dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
1197 1198 mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
1198 1199 } else {
1199 1200 error = zfsvfs_setup(zfsvfs, B_TRUE);
1200 1201 }
1201 1202
1202 1203 if (!zfsvfs->z_issnap)
1203 1204 zfsctl_create(zfsvfs);
1204 1205 out:
1205 1206 if (error) {
1206 1207 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1207 1208 zfsvfs_free(zfsvfs);
1208 1209 } else {
1209 1210 atomic_add_32(&zfs_active_fs_count, 1);
1210 1211 }
1211 1212
1212 1213 return (error);
1213 1214 }
1214 1215
1215 1216 void
1216 1217 zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
1217 1218 {
1218 1219 objset_t *os = zfsvfs->z_os;
1219 1220 struct dsl_dataset *ds;
1220 1221
1221 1222 /*
1222 1223 * Unregister properties.
1223 1224 */
1224 1225 if (!dmu_objset_is_snapshot(os)) {
1225 1226 ds = dmu_objset_ds(os);
1226 1227 VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
1227 1228 zfsvfs) == 0);
1228 1229
1229 1230 VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
1230 1231 zfsvfs) == 0);
1231 1232
1232 1233 VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
1233 1234 zfsvfs) == 0);
1234 1235
1235 1236 VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
1236 1237 zfsvfs) == 0);
1237 1238
1238 1239 VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
1239 1240 zfsvfs) == 0);
1240 1241
1241 1242 VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
1242 1243 zfsvfs) == 0);
1243 1244
1244 1245 VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
1245 1246 zfsvfs) == 0);
1246 1247
1247 1248 VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
1248 1249 zfsvfs) == 0);
1249 1250
1250 1251 VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
1251 1252 zfsvfs) == 0);
1252 1253
1253 1254 VERIFY(dsl_prop_unregister(ds, "aclinherit",
1254 1255 acl_inherit_changed_cb, zfsvfs) == 0);
1255 1256
1256 1257 VERIFY(dsl_prop_unregister(ds, "vscan",
1257 1258 vscan_changed_cb, zfsvfs) == 0);
1258 1259 }
1259 1260 }
1260 1261
1261 1262 /*
1262 1263 * Convert a decimal digit string to a uint64_t integer.
1263 1264 */
1264 1265 static int
1265 1266 str_to_uint64(char *str, uint64_t *objnum)
1266 1267 {
1267 1268 uint64_t num = 0;
1268 1269
1269 1270 while (*str) {
1270 1271 if (*str < '0' || *str > '9')
1271 1272 return (EINVAL);
1272 1273
1273 1274 num = num*10 + *str++ - '0';
1274 1275 }
1275 1276
1276 1277 *objnum = num;
1277 1278 return (0);
1278 1279 }
1279 1280
1280 1281 /*
1281 1282 * The boot path passed from the boot loader is in the form of
1282 1283 * "rootpool-name/root-filesystem-object-number'. Convert this
1283 1284 * string to a dataset name: "rootpool-name/root-filesystem-name".
1284 1285 */
1285 1286 static int
1286 1287 zfs_parse_bootfs(char *bpath, char *outpath)
1287 1288 {
1288 1289 char *slashp;
1289 1290 uint64_t objnum;
1290 1291 int error;
1291 1292
1292 1293 if (*bpath == 0 || *bpath == '/')
1293 1294 return (EINVAL);
1294 1295
1295 1296 (void) strcpy(outpath, bpath);
1296 1297
1297 1298 slashp = strchr(bpath, '/');
1298 1299
1299 1300 /* if no '/', just return the pool name */
1300 1301 if (slashp == NULL) {
1301 1302 return (0);
1302 1303 }
1303 1304
1304 1305 /* if not a number, just return the root dataset name */
1305 1306 if (str_to_uint64(slashp+1, &objnum)) {
1306 1307 return (0);
1307 1308 }
1308 1309
1309 1310 *slashp = '\0';
1310 1311 error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
1311 1312 *slashp = '/';
1312 1313
1313 1314 return (error);
1314 1315 }
1315 1316
1316 1317 /*
1317 1318 * zfs_check_global_label:
1318 1319 * Check that the hex label string is appropriate for the dataset
1319 1320 * being mounted into the global_zone proper.
1320 1321 *
1321 1322 * Return an error if the hex label string is not default or
1322 1323 * admin_low/admin_high. For admin_low labels, the corresponding
1323 1324 * dataset must be readonly.
1324 1325 */
1325 1326 int
1326 1327 zfs_check_global_label(const char *dsname, const char *hexsl)
1327 1328 {
1328 1329 if (strcasecmp(hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1329 1330 return (0);
1330 1331 if (strcasecmp(hexsl, ADMIN_HIGH) == 0)
1331 1332 return (0);
1332 1333 if (strcasecmp(hexsl, ADMIN_LOW) == 0) {
1333 1334 /* must be readonly */
1334 1335 uint64_t rdonly;
1335 1336
1336 1337 if (dsl_prop_get_integer(dsname,
1337 1338 zfs_prop_to_name(ZFS_PROP_READONLY), &rdonly, NULL))
1338 1339 return (EACCES);
1339 1340 return (rdonly ? 0 : EACCES);
1340 1341 }
1341 1342 return (EACCES);
1342 1343 }
1343 1344
1344 1345 /*
1345 1346 * zfs_mount_label_policy:
1346 1347 * Determine whether the mount is allowed according to MAC check.
1347 1348 * by comparing (where appropriate) label of the dataset against
1348 1349 * the label of the zone being mounted into. If the dataset has
1349 1350 * no label, create one.
1350 1351 *
1351 1352 * Returns:
1352 1353 * 0 : access allowed
1353 1354 * >0 : error code, such as EACCES
1354 1355 */
1355 1356 static int
1356 1357 zfs_mount_label_policy(vfs_t *vfsp, char *osname)
1357 1358 {
1358 1359 int error, retv;
1359 1360 zone_t *mntzone = NULL;
1360 1361 ts_label_t *mnt_tsl;
1361 1362 bslabel_t *mnt_sl;
1362 1363 bslabel_t ds_sl;
1363 1364 char ds_hexsl[MAXNAMELEN];
1364 1365
1365 1366 retv = EACCES; /* assume the worst */
1366 1367
1367 1368 /*
1368 1369 * Start by getting the dataset label if it exists.
1369 1370 */
1370 1371 error = dsl_prop_get(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1371 1372 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
1372 1373 if (error)
1373 1374 return (EACCES);
1374 1375
1375 1376 /*
1376 1377 * If labeling is NOT enabled, then disallow the mount of datasets
1377 1378 * which have a non-default label already. No other label checks
1378 1379 * are needed.
1379 1380 */
1380 1381 if (!is_system_labeled()) {
1381 1382 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0)
1382 1383 return (0);
1383 1384 return (EACCES);
1384 1385 }
1385 1386
1386 1387 /*
1387 1388 * Get the label of the mountpoint. If mounting into the global
1388 1389 * zone (i.e. mountpoint is not within an active zone and the
1389 1390 * zoned property is off), the label must be default or
1390 1391 * admin_low/admin_high only; no other checks are needed.
1391 1392 */
1392 1393 mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
1393 1394 if (mntzone->zone_id == GLOBAL_ZONEID) {
1394 1395 uint64_t zoned;
1395 1396
1396 1397 zone_rele(mntzone);
1397 1398
1398 1399 if (dsl_prop_get_integer(osname,
1399 1400 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
1400 1401 return (EACCES);
1401 1402 if (!zoned)
1402 1403 return (zfs_check_global_label(osname, ds_hexsl));
1403 1404 else
1404 1405 /*
1405 1406 * This is the case of a zone dataset being mounted
1406 1407 * initially, before the zone has been fully created;
1407 1408 * allow this mount into global zone.
1408 1409 */
1409 1410 return (0);
1410 1411 }
1411 1412
1412 1413 mnt_tsl = mntzone->zone_slabel;
1413 1414 ASSERT(mnt_tsl != NULL);
1414 1415 label_hold(mnt_tsl);
1415 1416 mnt_sl = label2bslabel(mnt_tsl);
1416 1417
1417 1418 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) == 0) {
1418 1419 /*
1419 1420 * The dataset doesn't have a real label, so fabricate one.
1420 1421 */
1421 1422 char *str = NULL;
1422 1423
1423 1424 if (l_to_str_internal(mnt_sl, &str) == 0 &&
1424 1425 dsl_prop_set(osname, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
1425 1426 ZPROP_SRC_LOCAL, 1, strlen(str) + 1, str) == 0)
1426 1427 retv = 0;
1427 1428 if (str != NULL)
1428 1429 kmem_free(str, strlen(str) + 1);
1429 1430 } else if (hexstr_to_label(ds_hexsl, &ds_sl) == 0) {
1430 1431 /*
1431 1432 * Now compare labels to complete the MAC check. If the
1432 1433 * labels are equal then allow access. If the mountpoint
1433 1434 * label dominates the dataset label, allow readonly access.
1434 1435 * Otherwise, access is denied.
1435 1436 */
1436 1437 if (blequal(mnt_sl, &ds_sl))
1437 1438 retv = 0;
1438 1439 else if (bldominates(mnt_sl, &ds_sl)) {
1439 1440 vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
1440 1441 retv = 0;
1441 1442 }
1442 1443 }
1443 1444
1444 1445 label_rele(mnt_tsl);
1445 1446 zone_rele(mntzone);
1446 1447 return (retv);
1447 1448 }
1448 1449
1449 1450 static int
1450 1451 zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
1451 1452 {
1452 1453 int error = 0;
1453 1454 static int zfsrootdone = 0;
1454 1455 zfsvfs_t *zfsvfs = NULL;
1455 1456 znode_t *zp = NULL;
1456 1457 vnode_t *vp = NULL;
1457 1458 char *zfs_bootfs;
1458 1459 char *zfs_devid;
1459 1460
1460 1461 ASSERT(vfsp);
1461 1462
1462 1463 /*
1463 1464 * The filesystem that we mount as root is defined in the
1464 1465 * boot property "zfs-bootfs" with a format of
1465 1466 * "poolname/root-dataset-objnum".
1466 1467 */
1467 1468 if (why == ROOT_INIT) {
1468 1469 if (zfsrootdone++)
1469 1470 return (EBUSY);
1470 1471 /*
1471 1472 * the process of doing a spa_load will require the
1472 1473 * clock to be set before we could (for example) do
1473 1474 * something better by looking at the timestamp on
1474 1475 * an uberblock, so just set it to -1.
1475 1476 */
1476 1477 clkset(-1);
1477 1478
1478 1479 if ((zfs_bootfs = spa_get_bootprop("zfs-bootfs")) == NULL) {
1479 1480 cmn_err(CE_NOTE, "spa_get_bootfs: can not get "
1480 1481 "bootfs name");
1481 1482 return (EINVAL);
1482 1483 }
1483 1484 zfs_devid = spa_get_bootprop("diskdevid");
1484 1485 error = spa_import_rootpool(rootfs.bo_name, zfs_devid);
1485 1486 if (zfs_devid)
1486 1487 spa_free_bootprop(zfs_devid);
1487 1488 if (error) {
1488 1489 spa_free_bootprop(zfs_bootfs);
1489 1490 cmn_err(CE_NOTE, "spa_import_rootpool: error %d",
1490 1491 error);
1491 1492 return (error);
1492 1493 }
1493 1494 if (error = zfs_parse_bootfs(zfs_bootfs, rootfs.bo_name)) {
1494 1495 spa_free_bootprop(zfs_bootfs);
1495 1496 cmn_err(CE_NOTE, "zfs_parse_bootfs: error %d",
1496 1497 error);
1497 1498 return (error);
1498 1499 }
1499 1500
1500 1501 spa_free_bootprop(zfs_bootfs);
1501 1502
1502 1503 if (error = vfs_lock(vfsp))
1503 1504 return (error);
1504 1505
1505 1506 if (error = zfs_domount(vfsp, rootfs.bo_name)) {
1506 1507 cmn_err(CE_NOTE, "zfs_domount: error %d", error);
1507 1508 goto out;
1508 1509 }
1509 1510
1510 1511 zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
1511 1512 ASSERT(zfsvfs);
1512 1513 if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp)) {
1513 1514 cmn_err(CE_NOTE, "zfs_zget: error %d", error);
1514 1515 goto out;
1515 1516 }
1516 1517
1517 1518 vp = ZTOV(zp);
1518 1519 mutex_enter(&vp->v_lock);
1519 1520 vp->v_flag |= VROOT;
1520 1521 mutex_exit(&vp->v_lock);
1521 1522 rootvp = vp;
1522 1523
1523 1524 /*
1524 1525 * Leave rootvp held. The root file system is never unmounted.
1525 1526 */
1526 1527
1527 1528 vfs_add((struct vnode *)0, vfsp,
1528 1529 (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
1529 1530 out:
1530 1531 vfs_unlock(vfsp);
1531 1532 return (error);
1532 1533 } else if (why == ROOT_REMOUNT) {
1533 1534 readonly_changed_cb(vfsp->vfs_data, B_FALSE);
1534 1535 vfsp->vfs_flag |= VFS_REMOUNT;
1535 1536
1536 1537 /* refresh mount options */
1537 1538 zfs_unregister_callbacks(vfsp->vfs_data);
1538 1539 return (zfs_register_callbacks(vfsp));
1539 1540
1540 1541 } else if (why == ROOT_UNMOUNT) {
1541 1542 zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
1542 1543 (void) zfs_sync(vfsp, 0, 0);
1543 1544 return (0);
1544 1545 }
1545 1546
1546 1547 /*
1547 1548 * if "why" is equal to anything else other than ROOT_INIT,
1548 1549 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
1549 1550 */
1550 1551 return (ENOTSUP);
1551 1552 }
1552 1553
1553 1554 /*ARGSUSED*/
1554 1555 static int
1555 1556 zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
1556 1557 {
1557 1558 char *osname;
1558 1559 pathname_t spn;
1559 1560 int error = 0;
1560 1561 uio_seg_t fromspace = (uap->flags & MS_SYSSPACE) ?
1561 1562 UIO_SYSSPACE : UIO_USERSPACE;
1562 1563 int canwrite;
1563 1564
1564 1565 if (mvp->v_type != VDIR)
1565 1566 return (ENOTDIR);
1566 1567
1567 1568 mutex_enter(&mvp->v_lock);
1568 1569 if ((uap->flags & MS_REMOUNT) == 0 &&
1569 1570 (uap->flags & MS_OVERLAY) == 0 &&
1570 1571 (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
1571 1572 mutex_exit(&mvp->v_lock);
1572 1573 return (EBUSY);
1573 1574 }
1574 1575 mutex_exit(&mvp->v_lock);
1575 1576
1576 1577 /*
1577 1578 * ZFS does not support passing unparsed data in via MS_DATA.
1578 1579 * Users should use the MS_OPTIONSTR interface; this means
1579 1580 * that all option parsing is already done and the options struct
1580 1581 * can be interrogated.
1581 1582 */
1582 1583 if ((uap->flags & MS_DATA) && uap->datalen > 0)
1583 1584 return (EINVAL);
1584 1585
1585 1586 /*
1586 1587 * Get the objset name (the "special" mount argument).
1587 1588 */
1588 1589 if (error = pn_get(uap->spec, fromspace, &spn))
1589 1590 return (error);
1590 1591
1591 1592 osname = spn.pn_path;
1592 1593
1593 1594 /*
1594 1595 * Check for mount privilege?
1595 1596 *
1596 1597 * If we don't have privilege then see if
1597 1598 * we have local permission to allow it
1598 1599 */
1599 1600 error = secpolicy_fs_mount(cr, mvp, vfsp);
1600 1601 if (error) {
1601 1602 if (dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr) == 0) {
1602 1603 vattr_t vattr;
1603 1604
1604 1605 /*
1605 1606 * Make sure user is the owner of the mount point
1606 1607 * or has sufficient privileges.
1607 1608 */
1608 1609
1609 1610 vattr.va_mask = AT_UID;
1610 1611
1611 1612 if (VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
1612 1613 goto out;
1613 1614 }
1614 1615
1615 1616 if (secpolicy_vnode_owner(cr, vattr.va_uid) != 0 &&
1616 1617 VOP_ACCESS(mvp, VWRITE, 0, cr, NULL) != 0) {
1617 1618 goto out;
1618 1619 }
1619 1620 secpolicy_fs_mount_clearopts(cr, vfsp);
1620 1621 } else {
1621 1622 goto out;
1622 1623 }
1623 1624 }
1624 1625
1625 1626 /*
1626 1627 * Refuse to mount a filesystem if we are in a local zone and the
1627 1628 * dataset is not visible.
1628 1629 */
1629 1630 if (!INGLOBALZONE(curproc) &&
1630 1631 (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
1631 1632 error = EPERM;
1632 1633 goto out;
1633 1634 }
1634 1635
1635 1636 error = zfs_mount_label_policy(vfsp, osname);
1636 1637 if (error)
1637 1638 goto out;
1638 1639
1639 1640 /*
1640 1641 * When doing a remount, we simply refresh our temporary properties
1641 1642 * according to those options set in the current VFS options.
1642 1643 */
1643 1644 if (uap->flags & MS_REMOUNT) {
1644 1645 /* refresh mount options */
1645 1646 zfs_unregister_callbacks(vfsp->vfs_data);
1646 1647 error = zfs_register_callbacks(vfsp);
1647 1648 goto out;
1648 1649 }
1649 1650
1650 1651 error = zfs_domount(vfsp, osname);
1651 1652
1652 1653 /*
1653 1654 * Add an extra VFS_HOLD on our parent vfs so that it can't
1654 1655 * disappear due to a forced unmount.
1655 1656 */
1656 1657 if (error == 0 && ((zfsvfs_t *)vfsp->vfs_data)->z_issnap)
1657 1658 VFS_HOLD(mvp->v_vfsp);
1658 1659
1659 1660 out:
1660 1661 pn_free(&spn);
1661 1662 return (error);
1662 1663 }
1663 1664
1664 1665 static int
1665 1666 zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp)
1666 1667 {
1667 1668 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1668 1669 dev32_t d32;
1669 1670 uint64_t refdbytes, availbytes, usedobjs, availobjs;
1670 1671
1671 1672 ZFS_ENTER(zfsvfs);
1672 1673
1673 1674 dmu_objset_space(zfsvfs->z_os,
1674 1675 &refdbytes, &availbytes, &usedobjs, &availobjs);
1675 1676
1676 1677 /*
1677 1678 * The underlying storage pool actually uses multiple block sizes.
1678 1679 * We report the fragsize as the smallest block size we support,
1679 1680 * and we report our blocksize as the filesystem's maximum blocksize.
1680 1681 */
1681 1682 statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT;
1682 1683 statp->f_bsize = zfsvfs->z_max_blksz;
1683 1684
1684 1685 /*
1685 1686 * The following report "total" blocks of various kinds in the
1686 1687 * file system, but reported in terms of f_frsize - the
1687 1688 * "fragment" size.
1688 1689 */
1689 1690
1690 1691 statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
1691 1692 statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT;
1692 1693 statp->f_bavail = statp->f_bfree; /* no root reservation */
1693 1694
1694 1695 /*
1695 1696 * statvfs() should really be called statufs(), because it assumes
1696 1697 * static metadata. ZFS doesn't preallocate files, so the best
1697 1698 * we can do is report the max that could possibly fit in f_files,
1698 1699 * and that minus the number actually used in f_ffree.
1699 1700 * For f_ffree, report the smaller of the number of object available
1700 1701 * and the number of blocks (each object will take at least a block).
1701 1702 */
1702 1703 statp->f_ffree = MIN(availobjs, statp->f_bfree);
1703 1704 statp->f_favail = statp->f_ffree; /* no "root reservation" */
1704 1705 statp->f_files = statp->f_ffree + usedobjs;
1705 1706
1706 1707 (void) cmpldev(&d32, vfsp->vfs_dev);
1707 1708 statp->f_fsid = d32;
1708 1709
1709 1710 /*
1710 1711 * We're a zfs filesystem.
1711 1712 */
1712 1713 (void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
1713 1714
1714 1715 statp->f_flag = vf_to_stf(vfsp->vfs_flag);
1715 1716
1716 1717 statp->f_namemax = ZFS_MAXNAMELEN;
1717 1718
1718 1719 /*
1719 1720 * We have all of 32 characters to stuff a string here.
1720 1721 * Is there anything useful we could/should provide?
1721 1722 */
1722 1723 bzero(statp->f_fstr, sizeof (statp->f_fstr));
1723 1724
1724 1725 ZFS_EXIT(zfsvfs);
1725 1726 return (0);
1726 1727 }
1727 1728
1728 1729 static int
1729 1730 zfs_root(vfs_t *vfsp, vnode_t **vpp)
1730 1731 {
1731 1732 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1732 1733 znode_t *rootzp;
1733 1734 int error;
1734 1735
1735 1736 ZFS_ENTER(zfsvfs);
1736 1737
1737 1738 error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1738 1739 if (error == 0)
1739 1740 *vpp = ZTOV(rootzp);
1740 1741
1741 1742 ZFS_EXIT(zfsvfs);
1742 1743 return (error);
1743 1744 }
1744 1745
1745 1746 /*
1746 1747 * Teardown the zfsvfs::z_os.
1747 1748 *
1748 1749 * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
1749 1750 * and 'z_teardown_inactive_lock' held.
1750 1751 */
1751 1752 static int
1752 1753 zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1753 1754 {
1754 1755 znode_t *zp;
1755 1756
1756 1757 rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
1757 1758
1758 1759 if (!unmounting) {
1759 1760 /*
1760 1761 * We purge the parent filesystem's vfsp as the parent
1761 1762 * filesystem and all of its snapshots have their vnode's
1762 1763 * v_vfsp set to the parent's filesystem's vfsp. Note,
1763 1764 * 'z_parent' is self referential for non-snapshots.
1764 1765 */
1765 1766 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1766 1767 }
1767 1768
1768 1769 /*
1769 1770 * Close the zil. NB: Can't close the zil while zfs_inactive
1770 1771 * threads are blocked as zil_close can call zfs_inactive.
1771 1772 */
1772 1773 if (zfsvfs->z_log) {
1773 1774 zil_close(zfsvfs->z_log);
1774 1775 zfsvfs->z_log = NULL;
1775 1776 }
1776 1777
1777 1778 rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
1778 1779
1779 1780 /*
1780 1781 * If we are not unmounting (ie: online recv) and someone already
1781 1782 * unmounted this file system while we were doing the switcheroo,
1782 1783 * or a reopen of z_os failed then just bail out now.
1783 1784 */
1784 1785 if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1785 1786 rw_exit(&zfsvfs->z_teardown_inactive_lock);
1786 1787 rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1787 1788 return (EIO);
1788 1789 }
1789 1790
1790 1791 /*
1791 1792 * At this point there are no vops active, and any new vops will
1792 1793 * fail with EIO since we have z_teardown_lock for writer (only
1793 1794 * relavent for forced unmount).
1794 1795 *
1795 1796 * Release all holds on dbufs.
1796 1797 */
1797 1798 mutex_enter(&zfsvfs->z_znodes_lock);
1798 1799 for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
1799 1800 zp = list_next(&zfsvfs->z_all_znodes, zp))
1800 1801 if (zp->z_sa_hdl) {
1801 1802 ASSERT(ZTOV(zp)->v_count > 0);
1802 1803 zfs_znode_dmu_fini(zp);
1803 1804 }
1804 1805 mutex_exit(&zfsvfs->z_znodes_lock);
1805 1806
1806 1807 /*
1807 1808 * If we are unmounting, set the unmounted flag and let new vops
1808 1809 * unblock. zfs_inactive will have the unmounted behavior, and all
1809 1810 * other vops will fail with EIO.
1810 1811 */
1811 1812 if (unmounting) {
1812 1813 zfsvfs->z_unmounted = B_TRUE;
1813 1814 rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1814 1815 rw_exit(&zfsvfs->z_teardown_inactive_lock);
1815 1816 }
1816 1817
1817 1818 /*
1818 1819 * z_os will be NULL if there was an error in attempting to reopen
1819 1820 * zfsvfs, so just return as the properties had already been
1820 1821 * unregistered and cached data had been evicted before.
1821 1822 */
1822 1823 if (zfsvfs->z_os == NULL)
1823 1824 return (0);
1824 1825
1825 1826 /*
1826 1827 * Unregister properties.
1827 1828 */
1828 1829 zfs_unregister_callbacks(zfsvfs);
1829 1830
1830 1831 /*
1831 1832 * Evict cached data
1832 1833 */
1833 1834 if (dmu_objset_is_dirty_anywhere(zfsvfs->z_os))
1834 1835 if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
1835 1836 txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
1836 1837 (void) dmu_objset_evict_dbufs(zfsvfs->z_os);
1837 1838
1838 1839 return (0);
1839 1840 }
1840 1841
1841 1842 /*ARGSUSED*/
1842 1843 static int
1843 1844 zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
1844 1845 {
1845 1846 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1846 1847 objset_t *os;
1847 1848 int ret;
1848 1849
1849 1850 ret = secpolicy_fs_unmount(cr, vfsp);
1850 1851 if (ret) {
1851 1852 if (dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
1852 1853 ZFS_DELEG_PERM_MOUNT, cr))
1853 1854 return (ret);
1854 1855 }
1855 1856
1856 1857 /*
1857 1858 * We purge the parent filesystem's vfsp as the parent filesystem
1858 1859 * and all of its snapshots have their vnode's v_vfsp set to the
1859 1860 * parent's filesystem's vfsp. Note, 'z_parent' is self
1860 1861 * referential for non-snapshots.
1861 1862 */
1862 1863 (void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1863 1864
1864 1865 /*
1865 1866 * Unmount any snapshots mounted under .zfs before unmounting the
1866 1867 * dataset itself.
1867 1868 */
1868 1869 if (zfsvfs->z_ctldir != NULL &&
1869 1870 (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) {
1870 1871 return (ret);
1871 1872 }
1872 1873
1873 1874 if (!(fflag & MS_FORCE)) {
1874 1875 /*
1875 1876 * Check the number of active vnodes in the file system.
1876 1877 * Our count is maintained in the vfs structure, but the
1877 1878 * number is off by 1 to indicate a hold on the vfs
1878 1879 * structure itself.
1879 1880 *
1880 1881 * The '.zfs' directory maintains a reference of its
1881 1882 * own, and any active references underneath are
1882 1883 * reflected in the vnode count.
1883 1884 */
1884 1885 if (zfsvfs->z_ctldir == NULL) {
1885 1886 if (vfsp->vfs_count > 1)
1886 1887 return (EBUSY);
1887 1888 } else {
1888 1889 if (vfsp->vfs_count > 2 ||
1889 1890 zfsvfs->z_ctldir->v_count > 1)
1890 1891 return (EBUSY);
1891 1892 }
1892 1893 }
1893 1894
1894 1895 vfsp->vfs_flag |= VFS_UNMOUNTED;
1895 1896
1896 1897 VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
1897 1898 os = zfsvfs->z_os;
1898 1899
1899 1900 /*
1900 1901 * z_os will be NULL if there was an error in
1901 1902 * attempting to reopen zfsvfs.
1902 1903 */
1903 1904 if (os != NULL) {
1904 1905 /*
1905 1906 * Unset the objset user_ptr.
1906 1907 */
1907 1908 mutex_enter(&os->os_user_ptr_lock);
1908 1909 dmu_objset_set_user(os, NULL);
1909 1910 mutex_exit(&os->os_user_ptr_lock);
1910 1911
1911 1912 /*
1912 1913 * Finally release the objset
1913 1914 */
1914 1915 dmu_objset_disown(os, zfsvfs);
1915 1916 }
1916 1917
1917 1918 /*
1918 1919 * We can now safely destroy the '.zfs' directory node.
1919 1920 */
1920 1921 if (zfsvfs->z_ctldir != NULL)
1921 1922 zfsctl_destroy(zfsvfs);
1922 1923
1923 1924 return (0);
1924 1925 }
1925 1926
1926 1927 static int
1927 1928 zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1928 1929 {
1929 1930 zfsvfs_t *zfsvfs = vfsp->vfs_data;
1930 1931 znode_t *zp;
1931 1932 uint64_t object = 0;
1932 1933 uint64_t fid_gen = 0;
1933 1934 uint64_t gen_mask;
1934 1935 uint64_t zp_gen;
1935 1936 int i, err;
1936 1937
1937 1938 *vpp = NULL;
1938 1939
1939 1940 ZFS_ENTER(zfsvfs);
1940 1941
1941 1942 if (fidp->fid_len == LONG_FID_LEN) {
1942 1943 zfid_long_t *zlfid = (zfid_long_t *)fidp;
1943 1944 uint64_t objsetid = 0;
1944 1945 uint64_t setgen = 0;
1945 1946
1946 1947 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
1947 1948 objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
1948 1949
1949 1950 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
1950 1951 setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
1951 1952
1952 1953 ZFS_EXIT(zfsvfs);
1953 1954
1954 1955 err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
1955 1956 if (err)
1956 1957 return (EINVAL);
1957 1958 ZFS_ENTER(zfsvfs);
1958 1959 }
1959 1960
1960 1961 if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
1961 1962 zfid_short_t *zfid = (zfid_short_t *)fidp;
1962 1963
1963 1964 for (i = 0; i < sizeof (zfid->zf_object); i++)
1964 1965 object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
1965 1966
1966 1967 for (i = 0; i < sizeof (zfid->zf_gen); i++)
1967 1968 fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
1968 1969 } else {
1969 1970 ZFS_EXIT(zfsvfs);
1970 1971 return (EINVAL);
1971 1972 }
1972 1973
1973 1974 /* A zero fid_gen means we are in the .zfs control directories */
1974 1975 if (fid_gen == 0 &&
1975 1976 (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
1976 1977 *vpp = zfsvfs->z_ctldir;
1977 1978 ASSERT(*vpp != NULL);
1978 1979 if (object == ZFSCTL_INO_SNAPDIR) {
1979 1980 VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
1980 1981 0, NULL, NULL, NULL, NULL, NULL) == 0);
1981 1982 } else {
1982 1983 VN_HOLD(*vpp);
1983 1984 }
1984 1985 ZFS_EXIT(zfsvfs);
1985 1986 return (0);
1986 1987 }
1987 1988
1988 1989 gen_mask = -1ULL >> (64 - 8 * i);
1989 1990
1990 1991 dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
1991 1992 if (err = zfs_zget(zfsvfs, object, &zp)) {
1992 1993 ZFS_EXIT(zfsvfs);
1993 1994 return (err);
1994 1995 }
1995 1996 (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
1996 1997 sizeof (uint64_t));
1997 1998 zp_gen = zp_gen & gen_mask;
1998 1999 if (zp_gen == 0)
1999 2000 zp_gen = 1;
2000 2001 if (zp->z_unlinked || zp_gen != fid_gen) {
2001 2002 dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
2002 2003 VN_RELE(ZTOV(zp));
2003 2004 ZFS_EXIT(zfsvfs);
2004 2005 return (EINVAL);
2005 2006 }
2006 2007
2007 2008 *vpp = ZTOV(zp);
2008 2009 ZFS_EXIT(zfsvfs);
2009 2010 return (0);
2010 2011 }
2011 2012
2012 2013 /*
2013 2014 * Block out VOPs and close zfsvfs_t::z_os
2014 2015 *
2015 2016 * Note, if successful, then we return with the 'z_teardown_lock' and
2016 2017 * 'z_teardown_inactive_lock' write held.
2017 2018 */
2018 2019 int
2019 2020 zfs_suspend_fs(zfsvfs_t *zfsvfs)
2020 2021 {
2021 2022 int error;
2022 2023
2023 2024 if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
2024 2025 return (error);
2025 2026 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
2026 2027
2027 2028 return (0);
2028 2029 }
2029 2030
2030 2031 /*
2031 2032 * Reopen zfsvfs_t::z_os and release VOPs.
2032 2033 */
2033 2034 int
2034 2035 zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname)
2035 2036 {
2036 2037 int err;
2037 2038
2038 2039 ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
2039 2040 ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
2040 2041
2041 2042 err = dmu_objset_own(osname, DMU_OST_ZFS, B_FALSE, zfsvfs,
2042 2043 &zfsvfs->z_os);
2043 2044 if (err) {
2044 2045 zfsvfs->z_os = NULL;
2045 2046 } else {
2046 2047 znode_t *zp;
2047 2048 uint64_t sa_obj = 0;
2048 2049
2049 2050 /*
2050 2051 * Make sure version hasn't changed
2051 2052 */
2052 2053
2053 2054 err = zfs_get_zplprop(zfsvfs->z_os, ZFS_PROP_VERSION,
2054 2055 &zfsvfs->z_version);
2055 2056
2056 2057 if (err)
2057 2058 goto bail;
2058 2059
2059 2060 err = zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
2060 2061 ZFS_SA_ATTRS, 8, 1, &sa_obj);
2061 2062
2062 2063 if (err && zfsvfs->z_version >= ZPL_VERSION_SA)
2063 2064 goto bail;
2064 2065
2065 2066 if ((err = sa_setup(zfsvfs->z_os, sa_obj,
2066 2067 zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table)) != 0)
2067 2068 goto bail;
2068 2069
2069 2070 if (zfsvfs->z_version >= ZPL_VERSION_SA)
2070 2071 sa_register_update_callback(zfsvfs->z_os,
2071 2072 zfs_sa_upgrade);
2072 2073
2073 2074 VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
2074 2075
2075 2076 zfs_set_fuid_feature(zfsvfs);
2076 2077
2077 2078 /*
2078 2079 * Attempt to re-establish all the active znodes with
2079 2080 * their dbufs. If a zfs_rezget() fails, then we'll let
2080 2081 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
2081 2082 * when they try to use their znode.
2082 2083 */
2083 2084 mutex_enter(&zfsvfs->z_znodes_lock);
2084 2085 for (zp = list_head(&zfsvfs->z_all_znodes); zp;
2085 2086 zp = list_next(&zfsvfs->z_all_znodes, zp)) {
2086 2087 (void) zfs_rezget(zp);
2087 2088 }
2088 2089 mutex_exit(&zfsvfs->z_znodes_lock);
2089 2090 }
2090 2091
2091 2092 bail:
2092 2093 /* release the VOPs */
2093 2094 rw_exit(&zfsvfs->z_teardown_inactive_lock);
2094 2095 rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
2095 2096
2096 2097 if (err) {
2097 2098 /*
2098 2099 * Since we couldn't reopen zfsvfs::z_os, or
2099 2100 * setup the sa framework force unmount this file system.
2100 2101 */
2101 2102 if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
2102 2103 (void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED());
2103 2104 }
2104 2105 return (err);
2105 2106 }
2106 2107
2107 2108 static void
2108 2109 zfs_freevfs(vfs_t *vfsp)
2109 2110 {
2110 2111 zfsvfs_t *zfsvfs = vfsp->vfs_data;
2111 2112
2112 2113 /*
2113 2114 * If this is a snapshot, we have an extra VFS_HOLD on our parent
2114 2115 * from zfs_mount(). Release it here. If we came through
2115 2116 * zfs_mountroot() instead, we didn't grab an extra hold, so
2116 2117 * skip the VFS_RELE for rootvfs.
2117 2118 */
2118 2119 if (zfsvfs->z_issnap && (vfsp != rootvfs))
2119 2120 VFS_RELE(zfsvfs->z_parent->z_vfs);
2120 2121
2121 2122 zfsvfs_free(zfsvfs);
2122 2123
2123 2124 atomic_add_32(&zfs_active_fs_count, -1);
2124 2125 }
2125 2126
2126 2127 /*
2127 2128 * VFS_INIT() initialization. Note that there is no VFS_FINI(),
2128 2129 * so we can't safely do any non-idempotent initialization here.
2129 2130 * Leave that to zfs_init() and zfs_fini(), which are called
2130 2131 * from the module's _init() and _fini() entry points.
2131 2132 */
2132 2133 /*ARGSUSED*/
2133 2134 static int
2134 2135 zfs_vfsinit(int fstype, char *name)
2135 2136 {
2136 2137 int error;
2137 2138
2138 2139 zfsfstype = fstype;
2139 2140
2140 2141 /*
2141 2142 * Setup vfsops and vnodeops tables.
2142 2143 */
2143 2144 error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops);
2144 2145 if (error != 0) {
2145 2146 cmn_err(CE_WARN, "zfs: bad vfs ops template");
2146 2147 }
2147 2148
2148 2149 error = zfs_create_op_tables();
2149 2150 if (error) {
2150 2151 zfs_remove_op_tables();
2151 2152 cmn_err(CE_WARN, "zfs: bad vnode ops template");
2152 2153 (void) vfs_freevfsops_by_type(zfsfstype);
2153 2154 return (error);
2154 2155 }
2155 2156
2156 2157 mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
2157 2158
2158 2159 /*
2159 2160 * Unique major number for all zfs mounts.
2160 2161 * If we run out of 32-bit minors, we'll getudev() another major.
2161 2162 */
2162 2163 zfs_major = ddi_name_to_major(ZFS_DRIVER);
2163 2164 zfs_minor = ZFS_MIN_MINOR;
2164 2165
2165 2166 return (0);
2166 2167 }
2167 2168
2168 2169 void
2169 2170 zfs_init(void)
2170 2171 {
2171 2172 /*
2172 2173 * Initialize .zfs directory structures
2173 2174 */
2174 2175 zfsctl_init();
2175 2176
2176 2177 /*
2177 2178 * Initialize znode cache, vnode ops, etc...
2178 2179 */
2179 2180 zfs_znode_init();
2180 2181
2181 2182 dmu_objset_register_type(DMU_OST_ZFS, zfs_space_delta_cb);
2182 2183 }
2183 2184
2184 2185 void
2185 2186 zfs_fini(void)
2186 2187 {
2187 2188 zfsctl_fini();
2188 2189 zfs_znode_fini();
2189 2190 }
2190 2191
2191 2192 int
2192 2193 zfs_busy(void)
2193 2194 {
2194 2195 return (zfs_active_fs_count != 0);
2195 2196 }
2196 2197
2197 2198 int
2198 2199 zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
2199 2200 {
2200 2201 int error;
2201 2202 objset_t *os = zfsvfs->z_os;
2202 2203 dmu_tx_t *tx;
2203 2204
2204 2205 if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
2205 2206 return (EINVAL);
2206 2207
2207 2208 if (newvers < zfsvfs->z_version)
2208 2209 return (EINVAL);
2209 2210
2210 2211 if (zfs_spa_version_map(newvers) >
2211 2212 spa_version(dmu_objset_spa(zfsvfs->z_os)))
2212 2213 return (ENOTSUP);
2213 2214
2214 2215 tx = dmu_tx_create(os);
2215 2216 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
2216 2217 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2217 2218 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
2218 2219 ZFS_SA_ATTRS);
2219 2220 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
2220 2221 }
2221 2222 error = dmu_tx_assign(tx, TXG_WAIT);
2222 2223 if (error) {
2223 2224 dmu_tx_abort(tx);
2224 2225 return (error);
2225 2226 }
2226 2227
2227 2228 error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
2228 2229 8, 1, &newvers, tx);
2229 2230
2230 2231 if (error) {
2231 2232 dmu_tx_commit(tx);
2232 2233 return (error);
2233 2234 }
2234 2235
2235 2236 if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
2236 2237 uint64_t sa_obj;
2237 2238
2238 2239 ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
2239 2240 SPA_VERSION_SA);
2240 2241 sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
↓ open down ↓ |
2208 lines elided |
↑ open up ↑ |
2241 2242 DMU_OT_NONE, 0, tx);
2242 2243
2243 2244 error = zap_add(os, MASTER_NODE_OBJ,
2244 2245 ZFS_SA_ATTRS, 8, 1, &sa_obj, tx);
2245 2246 ASSERT3U(error, ==, 0);
2246 2247
2247 2248 VERIFY(0 == sa_set_sa_object(os, sa_obj));
2248 2249 sa_register_update_callback(os, zfs_sa_upgrade);
2249 2250 }
2250 2251
2251 - spa_history_log_internal(LOG_DS_UPGRADE,
2252 - dmu_objset_spa(os), tx, "oldver=%llu newver=%llu dataset = %llu",
2253 - zfsvfs->z_version, newvers, dmu_objset_id(os));
2252 + spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
2253 + "from %llu to %llu", zfsvfs->z_version, newvers);
2254 2254
2255 2255 dmu_tx_commit(tx);
2256 2256
2257 2257 zfsvfs->z_version = newvers;
2258 2258
2259 2259 zfs_set_fuid_feature(zfsvfs);
2260 2260
2261 2261 return (0);
2262 2262 }
2263 2263
2264 2264 /*
2265 2265 * Read a property stored within the master node.
2266 2266 */
2267 2267 int
2268 2268 zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
2269 2269 {
2270 2270 const char *pname;
2271 2271 int error = ENOENT;
2272 2272
2273 2273 /*
2274 2274 * Look up the file system's value for the property. For the
2275 2275 * version property, we look up a slightly different string.
2276 2276 */
2277 2277 if (prop == ZFS_PROP_VERSION)
2278 2278 pname = ZPL_VERSION_STR;
2279 2279 else
2280 2280 pname = zfs_prop_to_name(prop);
2281 2281
2282 2282 if (os != NULL)
2283 2283 error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
2284 2284
2285 2285 if (error == ENOENT) {
2286 2286 /* No value set, use the default value */
2287 2287 switch (prop) {
2288 2288 case ZFS_PROP_VERSION:
2289 2289 *value = ZPL_VERSION;
2290 2290 break;
2291 2291 case ZFS_PROP_NORMALIZE:
2292 2292 case ZFS_PROP_UTF8ONLY:
2293 2293 *value = 0;
2294 2294 break;
2295 2295 case ZFS_PROP_CASE:
2296 2296 *value = ZFS_CASE_SENSITIVE;
2297 2297 break;
2298 2298 default:
2299 2299 return (error);
2300 2300 }
2301 2301 error = 0;
2302 2302 }
2303 2303 return (error);
2304 2304 }
2305 2305
2306 2306 static vfsdef_t vfw = {
2307 2307 VFSDEF_VERSION,
2308 2308 MNTTYPE_ZFS,
2309 2309 zfs_vfsinit,
2310 2310 VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS|
2311 2311 VSW_XID|VSW_ZMOUNT,
2312 2312 &zfs_mntopts
2313 2313 };
2314 2314
2315 2315 struct modlfs zfs_modlfs = {
2316 2316 &mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw
2317 2317 };
↓ open down ↓ |
54 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX