57 #include <sys/zfs_dir.h>
58 #include <sys/zfs_acl.h>
59 #include <sys/zfs_ioctl.h>
60 #include <sys/fs/zfs.h>
61 #include <sys/dmu.h>
62 #include <sys/dmu_objset.h>
63 #include <sys/spa.h>
64 #include <sys/txg.h>
65 #include <sys/dbuf.h>
66 #include <sys/zap.h>
67 #include <sys/sa.h>
68 #include <sys/dirent.h>
69 #include <sys/policy.h>
70 #include <sys/sunddi.h>
71 #include <sys/filio.h>
72 #include <sys/sid.h>
73 #include "fs/fs_subr.h"
74 #include <sys/zfs_ctldir.h>
75 #include <sys/zfs_fuid.h>
76 #include <sys/zfs_sa.h>
77 #include <sys/dnlc.h>
78 #include <sys/zfs_rlock.h>
79 #include <sys/extdirent.h>
80 #include <sys/kidmap.h>
81 #include <sys/cred.h>
82 #include <sys/attr.h>
83
84 /*
85 * Programming rules.
86 *
87 * Each vnode op performs some logical unit of work. To do this, the ZPL must
88 * properly lock its in-core state, create a DMU transaction, do the work,
89 * record this work in the intent log (ZIL), commit the DMU transaction,
90 * and wait for the intent log to commit if it is a synchronous operation.
91 * Moreover, the vnode ops must work in both normal and log replay context.
92 * The ordering of events is important to avoid deadlocks and references
93 * to freed memory. The example below illustrates the following Big Rules:
94 *
95 * (1) A check must be made in each zfs thread for a mounted file system.
96 * This is done avoiding races using ZFS_ENTER(zfsvfs).
275 return (SET_ERROR(ENXIO));
276
277 /*
278 * We could find a hole that begins after the logical end-of-file,
279 * because dmu_offset_next() only works on whole blocks. If the
280 * EOF falls mid-block, then indicate that the "virtual hole"
281 * at the end of the file begins at the logical EOF, rather than
282 * at the end of the last block.
283 */
284 if (noff > file_sz) {
285 ASSERT(hole);
286 noff = file_sz;
287 }
288
289 if (noff < *off)
290 return (error);
291 *off = noff;
292 return (error);
293 }
294
295 /* ARGSUSED */
296 static int
297 zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
298 int *rvalp, caller_context_t *ct)
299 {
300 offset_t off;
301 int error;
302 zfsvfs_t *zfsvfs;
303 znode_t *zp;
304
305 switch (com) {
306 case _FIOFFS:
307 return (zfs_sync(vp->v_vfsp, 0, cred));
308
309 /*
310 * The following two ioctls are used by bfu. Faking out,
311 * necessary to avoid bfu errors.
312 */
313 case _FIOGDIO:
314 case _FIOSDIO:
315 return (0);
316
317 case _FIO_SEEK_DATA:
318 case _FIO_SEEK_HOLE:
319 if (ddi_copyin((void *)data, &off, sizeof (off), flag))
320 return (SET_ERROR(EFAULT));
321
322 zp = VTOZ(vp);
323 zfsvfs = zp->z_zfsvfs;
324 ZFS_ENTER(zfsvfs);
325 ZFS_VERIFY_ZP(zp);
326
327 /* offset parameter is in/out */
328 error = zfs_holey(vp, com, &off);
329 ZFS_EXIT(zfsvfs);
330 if (error)
331 return (error);
332 if (ddi_copyout(&off, (void *)data, sizeof (off), flag))
333 return (SET_ERROR(EFAULT));
334 return (0);
335 }
336 return (SET_ERROR(ENOTTY));
337 }
338
339 /*
340 * Utility functions to map and unmap a single physical page. These
341 * are used to manage the mappable copies of ZFS file data, and therefore
342 * do not update ref/mod bits.
343 */
344 caddr_t
345 zfs_map_page(page_t *pp, enum seg_rw rw)
346 {
347 if (kpm_enable)
348 return (hat_kpm_mapin(pp, 0));
349 ASSERT(rw == S_READ || rw == S_WRITE);
350 return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0),
351 (caddr_t)-1));
352 }
353
354 void
939
940 zfs_range_unlock(rl);
941
942 /*
943 * If we're in replay mode, or we made no progress, return error.
944 * Otherwise, it's at least a partial write, so it's successful.
945 */
946 if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
947 ZFS_EXIT(zfsvfs);
948 return (error);
949 }
950
951 if (ioflag & (FSYNC | FDSYNC) ||
952 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
953 zil_commit(zilog, zp->z_id);
954
955 ZFS_EXIT(zfsvfs);
956 return (0);
957 }
958
959 void
960 zfs_get_done(zgd_t *zgd, int error)
961 {
962 znode_t *zp = zgd->zgd_private;
963 objset_t *os = zp->z_zfsvfs->z_os;
964
965 if (zgd->zgd_db)
966 dmu_buf_rele(zgd->zgd_db, zgd);
967
968 zfs_range_unlock(zgd->zgd_rl);
969
970 /*
971 * Release the vnode asynchronously as we currently have the
972 * txg stopped from syncing.
973 */
974 VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
975
976 if (error == 0 && zgd->zgd_bp)
977 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
978
|
57 #include <sys/zfs_dir.h>
58 #include <sys/zfs_acl.h>
59 #include <sys/zfs_ioctl.h>
60 #include <sys/fs/zfs.h>
61 #include <sys/dmu.h>
62 #include <sys/dmu_objset.h>
63 #include <sys/spa.h>
64 #include <sys/txg.h>
65 #include <sys/dbuf.h>
66 #include <sys/zap.h>
67 #include <sys/sa.h>
68 #include <sys/dirent.h>
69 #include <sys/policy.h>
70 #include <sys/sunddi.h>
71 #include <sys/filio.h>
72 #include <sys/sid.h>
73 #include "fs/fs_subr.h"
74 #include <sys/zfs_ctldir.h>
75 #include <sys/zfs_fuid.h>
76 #include <sys/zfs_sa.h>
77 #include <sys/zfeature.h>
78 #include <sys/dnlc.h>
79 #include <sys/zfs_rlock.h>
80 #include <sys/extdirent.h>
81 #include <sys/kidmap.h>
82 #include <sys/cred.h>
83 #include <sys/attr.h>
84
85 /*
86 * Programming rules.
87 *
88 * Each vnode op performs some logical unit of work. To do this, the ZPL must
89 * properly lock its in-core state, create a DMU transaction, do the work,
90 * record this work in the intent log (ZIL), commit the DMU transaction,
91 * and wait for the intent log to commit if it is a synchronous operation.
92 * Moreover, the vnode ops must work in both normal and log replay context.
93 * The ordering of events is important to avoid deadlocks and references
94 * to freed memory. The example below illustrates the following Big Rules:
95 *
96 * (1) A check must be made in each zfs thread for a mounted file system.
97 * This is done avoiding races using ZFS_ENTER(zfsvfs).
276 return (SET_ERROR(ENXIO));
277
278 /*
279 * We could find a hole that begins after the logical end-of-file,
280 * because dmu_offset_next() only works on whole blocks. If the
281 * EOF falls mid-block, then indicate that the "virtual hole"
282 * at the end of the file begins at the logical EOF, rather than
283 * at the end of the last block.
284 */
285 if (noff > file_sz) {
286 ASSERT(hole);
287 noff = file_sz;
288 }
289
290 if (noff < *off)
291 return (error);
292 *off = noff;
293 return (error);
294 }
295
296
297 static int zfs_zero_write(vnode_t *vp, uint64_t size, cred_t *cr,
298 caller_context_t *ct);
299
300 /* ARGSUSED */
301 static int
302 zfs_ioctl(vnode_t *vp, int com, intptr_t data, int flag, cred_t *cred,
303 int *rvalp, caller_context_t *ct)
304 {
305 offset_t off;
306 int error;
307 zfsvfs_t *zfsvfs;
308 znode_t *zp;
309 uint64_t size;
310
311 switch (com) {
312 case _FIOFFS:
313 return (zfs_sync(vp->v_vfsp, 0, cred));
314
315 /*
316 * The following two ioctls are used by bfu. Faking out,
317 * necessary to avoid bfu errors.
318 */
319 case _FIOGDIO:
320 case _FIOSDIO:
321 return (0);
322
323 case _FIO_SEEK_DATA:
324 case _FIO_SEEK_HOLE:
325 if (ddi_copyin((void *)data, &off, sizeof (off), flag))
326 return (SET_ERROR(EFAULT));
327
328 zp = VTOZ(vp);
329 zfsvfs = zp->z_zfsvfs;
330 ZFS_ENTER(zfsvfs);
331 ZFS_VERIFY_ZP(zp);
332
333 /* offset parameter is in/out */
334 error = zfs_holey(vp, com, &off);
335 ZFS_EXIT(zfsvfs);
336 if (error)
337 return (error);
338 if (ddi_copyout(&off, (void *)data, sizeof (off), flag))
339 return (SET_ERROR(EFAULT));
340 return (0);
341 case _FIO_RESERVE_SPACE:
342 if (ddi_copyin((void *)data, &size, sizeof (size), flag))
343 return (EFAULT);
344 error = zfs_zero_write(vp, size, cred, ct);
345 return (error);
346 }
347 return (SET_ERROR(ENOTTY));
348 }
349
350 /*
351 * Utility functions to map and unmap a single physical page. These
352 * are used to manage the mappable copies of ZFS file data, and therefore
353 * do not update ref/mod bits.
354 */
355 caddr_t
356 zfs_map_page(page_t *pp, enum seg_rw rw)
357 {
358 if (kpm_enable)
359 return (hat_kpm_mapin(pp, 0));
360 ASSERT(rw == S_READ || rw == S_WRITE);
361 return (ppmapin(pp, PROT_READ | ((rw == S_WRITE) ? PROT_WRITE : 0),
362 (caddr_t)-1));
363 }
364
365 void
950
951 zfs_range_unlock(rl);
952
953 /*
954 * If we're in replay mode, or we made no progress, return error.
955 * Otherwise, it's at least a partial write, so it's successful.
956 */
957 if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
958 ZFS_EXIT(zfsvfs);
959 return (error);
960 }
961
962 if (ioflag & (FSYNC | FDSYNC) ||
963 zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
964 zil_commit(zilog, zp->z_id);
965
966 ZFS_EXIT(zfsvfs);
967 return (0);
968 }
969
970 #define ZFS_RESERVE_CHUNK (2 * 1024 * 1024)
971 /* ARGSUSED */
972 static int
973 zfs_zero_write(vnode_t *vp, uint64_t size, cred_t *cr, caller_context_t *ct)
974 {
975 znode_t *zp = VTOZ(vp);
976 zfsvfs_t *zfsvfs = zp->z_zfsvfs;
977 int count = 0;
978 sa_bulk_attr_t bulk[4];
979 uint64_t mtime[2], ctime[2];
980 rl_t *rl;
981 int error = 0;
982 dmu_tx_t *tx = NULL;
983 uint64_t end_size;
984 uint64_t pos = 0;
985
986 if (zp->z_size > 0)
987 return (EFBIG);
988 if (size == 0)
989 return (0);
990
991 ZFS_ENTER(zfsvfs);
992 ZFS_VERIFY_ZP(zp);
993
994 if (!spa_feature_is_enabled(zfsvfs->z_os->os_spa,
995 SPA_FEATURE_SPACE_RESERVATION))
996 {
997 ZFS_EXIT(zfsvfs);
998 return (ENOTSUP);
999 }
1000
1001 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
1002 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
1003 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
1004 &zp->z_size, 8);
1005 SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
1006 &zp->z_pflags, 8);
1007
1008 /*
1009 * If immutable or not appending then return EPERM
1010 */
1011 if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY))) {
1012 ZFS_EXIT(zfsvfs);
1013 return (EPERM);
1014 }
1015
1016 rl = zfs_range_lock(zp, 0, size, RL_WRITER);
1017
1018 if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
1019 zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
1020 error = EDQUOT;
1021 goto out;
1022 }
1023
1024 while (pos < size) {
1025 uint64_t length = size - pos;
1026 length = MIN(length, ZFS_RESERVE_CHUNK);
1027 again:
1028 tx = dmu_tx_create(zfsvfs->z_os);
1029 dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
1030 dmu_tx_hold_write(tx, zp->z_id, pos, length);
1031 zfs_sa_upgrade_txholds(tx, zp);
1032 error = dmu_tx_assign(tx, TXG_NOWAIT);
1033 if (error) {
1034 if (error == ERESTART) {
1035 dmu_tx_wait(tx);
1036 dmu_tx_abort(tx);
1037 goto again;
1038 }
1039 dmu_tx_abort(tx);
1040 goto out;
1041 }
1042
1043 if (pos == 0)
1044 zfs_grow_blocksize(zp, MIN(size, zfsvfs->z_max_blksz), tx);
1045 dmu_write_zero(zfsvfs->z_os, zp->z_id, pos, length, tx);
1046
1047 zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE);
1048
1049 pos += length;
1050 while ((end_size = zp->z_size) < pos)
1051 (void) atomic_cas_64(&zp->z_size, end_size, pos);
1052
1053 error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
1054
1055 dmu_tx_commit(tx);
1056 if (error)
1057 goto out;
1058 }
1059 out:
1060 zfs_range_unlock(rl);
1061 ZFS_EXIT(zfsvfs);
1062
1063 return (error);
1064 }
1065
1066 void
1067 zfs_get_done(zgd_t *zgd, int error)
1068 {
1069 znode_t *zp = zgd->zgd_private;
1070 objset_t *os = zp->z_zfsvfs->z_os;
1071
1072 if (zgd->zgd_db)
1073 dmu_buf_rele(zgd->zgd_db, zgd);
1074
1075 zfs_range_unlock(zgd->zgd_rl);
1076
1077 /*
1078 * Release the vnode asynchronously as we currently have the
1079 * txg stopped from syncing.
1080 */
1081 VN_RELE_ASYNC(ZTOV(zp), dsl_pool_vnrele_taskq(dmu_objset_pool(os)));
1082
1083 if (error == 0 && zgd->zgd_bp)
1084 zil_add_block(zgd->zgd_zilog, zgd->zgd_bp);
1085
|