8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 *
24 * Portions Copyright 2010 Robert Milkowski
25 *
26 * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
27 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
28 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29 * Copyright (c) 2014 Integros [integros.com]
30 * Copyright (c) 2019, Joyent, Inc.
31 */
32
33 /*
34 * ZFS volume emulation driver.
35 *
36 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
37 * Volumes are accessed through the symbolic links named:
38 *
39 * /dev/zvol/dsk/<pool_name>/<dataset_name>
40 * /dev/zvol/rdsk/<pool_name>/<dataset_name>
41 *
42 * These links are created by the /dev filesystem (sdev_zvolops.c).
43 * Volumes are persistent through reboot. No user command needs to be
44 * run before opening and using a device.
45 */
46
47 #include <sys/types.h>
48 #include <sys/param.h>
73 #include <sys/dirent.h>
74 #include <sys/policy.h>
75 #include <sys/fs/zfs.h>
76 #include <sys/zfs_ioctl.h>
77 #include <sys/mkdev.h>
78 #include <sys/zil.h>
79 #include <sys/refcount.h>
80 #include <sys/zfs_znode.h>
81 #include <sys/zfs_rlock.h>
82 #include <sys/vdev_disk.h>
83 #include <sys/vdev_impl.h>
84 #include <sys/vdev_raidz.h>
85 #include <sys/zvol.h>
86 #include <sys/dumphdr.h>
87 #include <sys/zil_impl.h>
88 #include <sys/dbuf.h>
89 #include <sys/dmu_tx.h>
90 #include <sys/zfeature.h>
91 #include <sys/zio_checksum.h>
92 #include <sys/zil_impl.h>
93 #include <sys/dkioc_free_util.h>
94 #include <sys/zfs_rlock.h>
95
96 #include "zfs_namecheck.h"
97
98 void *zfsdev_state;
99 static char *zvol_tag = "zvol_tag";
100
101 #define ZVOL_DUMPSIZE "dumpsize"
102
103 /*
104 * This lock protects the zfsdev_state structure from being modified
105 * while it's being used, e.g. an open that comes in before a create
106 * finishes. It also protects temporary opens of the dataset so that,
107 * e.g., an open doesn't get a spurious EBUSY.
108 */
109 kmutex_t zfsdev_state_lock;
110 static uint32_t zvol_minors;
111
112 typedef struct zvol_extent {
1255
1256 os = zv->zv_objset;
1257 ASSERT(os != NULL);
1258
1259 bp_mapin(bp);
1260 addr = bp->b_un.b_addr;
1261 resid = bp->b_bcount;
1262
1263 if (resid > 0 && (off < 0 || off >= volsize)) {
1264 bioerror(bp, EIO);
1265 biodone(bp);
1266 return (0);
1267 }
1268
1269 is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED;
1270 sync = ((!(bp->b_flags & B_ASYNC) &&
1271 !(zv->zv_flags & ZVOL_WCE)) ||
1272 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) &&
1273 !doread && !is_dumpified;
1274
1275 /*
1276 * There must be no buffer changes when doing a dmu_sync() because
1277 * we can't change the data whilst calculating the checksum.
1278 */
1279 locked_range_t *lr = rangelock_enter(&zv->zv_rangelock, off, resid,
1280 doread ? RL_READER : RL_WRITER);
1281
1282 while (resid != 0 && off < volsize) {
1283 size_t size = MIN(resid, zvol_maxphys);
1284 if (is_dumpified) {
1285 size = MIN(size, P2END(off, zv->zv_volblocksize) - off);
1286 error = zvol_dumpio(zv, addr, off, size,
1287 doread, B_FALSE);
1288 } else if (doread) {
1289 error = dmu_read(os, ZVOL_OBJ, off, size, addr,
1290 DMU_READ_PREFETCH);
1291 } else {
1292 dmu_tx_t *tx = dmu_tx_create(os);
1293 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
1294 error = dmu_tx_assign(tx, TXG_WAIT);
1302 }
1303 if (error) {
1304 /* convert checksum errors into IO errors */
1305 if (error == ECKSUM)
1306 error = SET_ERROR(EIO);
1307 break;
1308 }
1309 off += size;
1310 addr += size;
1311 resid -= size;
1312 }
1313 rangelock_exit(lr);
1314
1315 if ((bp->b_resid = resid) == bp->b_bcount)
1316 bioerror(bp, off > volsize ? EINVAL : error);
1317
1318 if (sync)
1319 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1320 biodone(bp);
1321
1322 return (0);
1323 }
1324
1325 /*
1326 * Set the buffer count to the zvol maximum transfer.
1327 * Using our own routine instead of the default minphys()
1328 * means that for larger writes we write bigger buffers on X86
1329 * (128K instead of 56K) and flush the disk write cache less often
1330 * (every zvol_maxphys - currently 1MB) instead of minphys (currently
1331 * 56K on X86 and 128K on sparc).
1332 */
1333 void
1334 zvol_minphys(struct buf *bp)
1335 {
1336 if (bp->b_bcount > zvol_maxphys)
1337 bp->b_bcount = zvol_maxphys;
1338 }
1339
1340 int
1341 zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks)
1379 minor_t minor = getminor(dev);
1380 zvol_state_t *zv;
1381 uint64_t volsize;
1382 int error = 0;
1383
1384 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1385 if (zv == NULL)
1386 return (SET_ERROR(ENXIO));
1387
1388 volsize = zv->zv_volsize;
1389 if (uio->uio_resid > 0 &&
1390 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
1391 return (SET_ERROR(EIO));
1392
1393 if (zv->zv_flags & ZVOL_DUMPIFIED) {
1394 error = physio(zvol_strategy, NULL, dev, B_READ,
1395 zvol_minphys, uio);
1396 return (error);
1397 }
1398
1399 locked_range_t *lr = rangelock_enter(&zv->zv_rangelock,
1400 uio->uio_loffset, uio->uio_resid, RL_READER);
1401 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
1402 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
1403
1404 /* don't read past the end */
1405 if (bytes > volsize - uio->uio_loffset)
1406 bytes = volsize - uio->uio_loffset;
1407
1408 error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes);
1409 if (error) {
1410 /* convert checksum errors into IO errors */
1411 if (error == ECKSUM)
1412 error = SET_ERROR(EIO);
1413 break;
1414 }
1415 }
1416 rangelock_exit(lr);
1417
1418 return (error);
1419 }
1420
1421 /*ARGSUSED*/
1422 int
1423 zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
1424 {
1425 minor_t minor = getminor(dev);
1426 zvol_state_t *zv;
1427 uint64_t volsize;
1428 int error = 0;
1429 boolean_t sync;
1430
1431 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1432 if (zv == NULL)
1433 return (SET_ERROR(ENXIO));
1434
1435 volsize = zv->zv_volsize;
1436 if (uio->uio_resid > 0 &&
1437 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
1438 return (SET_ERROR(EIO));
1439
1440 if (zv->zv_flags & ZVOL_DUMPIFIED) {
1441 error = physio(zvol_strategy, NULL, dev, B_WRITE,
1442 zvol_minphys, uio);
1443 return (error);
1444 }
1445
1446 sync = !(zv->zv_flags & ZVOL_WCE) ||
1447 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
1448
1449 locked_range_t *lr = rangelock_enter(&zv->zv_rangelock,
1450 uio->uio_loffset, uio->uio_resid, RL_WRITER);
1451 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
1452 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
1453 uint64_t off = uio->uio_loffset;
1454 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
1455
1456 if (bytes > volsize - off) /* don't write past the end */
1457 bytes = volsize - off;
1458
1459 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
1460 error = dmu_tx_assign(tx, TXG_WAIT);
1461 if (error) {
1462 dmu_tx_abort(tx);
1463 break;
1464 }
1465 error = dmu_write_uio_dnode(zv->zv_dn, uio, bytes, tx);
1466 if (error == 0)
1467 zvol_log_write(zv, tx, off, bytes, sync);
1468 dmu_tx_commit(tx);
1469
1470 if (error)
1471 break;
1472 }
1473 rangelock_exit(lr);
1474
1475 if (sync)
1476 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1477 return (error);
1478 }
1479
1480 int
1481 zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs)
1482 {
1483 struct uuid uuid = EFI_RESERVED;
1484 efi_gpe_t gpe = { 0 };
1485 uint32_t crc;
1486 dk_efi_t efi;
1487 int length;
1488 char *ptr;
1489
1490 if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag))
1491 return (SET_ERROR(EFAULT));
1492 ptr = (char *)(uintptr_t)efi.dki_data_64;
1493 length = efi.dki_length;
1494 /*
1495 * Some clients may attempt to request a PMBR for the
1496 * zvol. Currently this interface will return EINVAL to
1697 dkmext.dki_media_type = DK_UNKNOWN;
1698 mutex_exit(&zfsdev_state_lock);
1699 if (ddi_copyout(&dkmext, (void *)arg, sizeof (dkmext), flag))
1700 error = SET_ERROR(EFAULT);
1701 return (error);
1702 }
1703
1704 case DKIOCGETEFI:
1705 {
1706 uint64_t vs = zv->zv_volsize;
1707 uint8_t bs = zv->zv_min_bs;
1708
1709 mutex_exit(&zfsdev_state_lock);
1710 error = zvol_getefi((void *)arg, flag, vs, bs);
1711 return (error);
1712 }
1713
1714 case DKIOCFLUSHWRITECACHE:
1715 dkc = (struct dk_callback *)arg;
1716 mutex_exit(&zfsdev_state_lock);
1717 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1718 if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) {
1719 (*dkc->dkc_callback)(dkc->dkc_cookie, error);
1720 error = 0;
1721 }
1722 return (error);
1723
1724 case DKIOCGETWCE:
1725 {
1726 int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0;
1727 if (ddi_copyout(&wce, (void *)arg, sizeof (int),
1728 flag))
1729 error = SET_ERROR(EFAULT);
1730 break;
1731 }
1732 case DKIOCSETWCE:
1733 {
1734 int wce;
1735 if (ddi_copyin((void *)arg, &wce, sizeof (int),
1736 flag)) {
1737 error = SET_ERROR(EFAULT);
1738 break;
1739 }
1740 if (wce) {
1741 zv->zv_flags |= ZVOL_WCE;
1742 mutex_exit(&zfsdev_state_lock);
1743 } else {
1744 zv->zv_flags &= ~ZVOL_WCE;
1745 mutex_exit(&zfsdev_state_lock);
1746 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1747 }
1748 return (0);
1749 }
1750
1751 case DKIOCGGEOM:
1752 case DKIOCGVTOC:
1753 /*
1754 * commands using these (like prtvtoc) expect ENOTSUP
1755 * since we're emulating an EFI label
1756 */
1757 error = SET_ERROR(ENOTSUP);
1758 break;
1759
1760 case DKIOCDUMPINIT:
1761 lr = rangelock_enter(&zv->zv_rangelock, 0, zv->zv_volsize,
1762 RL_WRITER);
1763 error = zvol_dumpify(zv);
1764 rangelock_exit(lr);
1765 break;
1766
1779 dmu_tx_t *tx;
1780
1781 if (!zvol_unmap_enabled)
1782 break;
1783
1784 if (!(flag & FKIOCTL)) {
1785 error = dfl_copyin((void *)arg, &dfl, flag, KM_SLEEP);
1786 if (error != 0)
1787 break;
1788 } else {
1789 dfl = (dkioc_free_list_t *)arg;
1790 ASSERT3U(dfl->dfl_num_exts, <=, DFL_COPYIN_MAX_EXTS);
1791 if (dfl->dfl_num_exts > DFL_COPYIN_MAX_EXTS) {
1792 error = SET_ERROR(EINVAL);
1793 break;
1794 }
1795 }
1796
1797 mutex_exit(&zfsdev_state_lock);
1798
1799 for (int i = 0; i < dfl->dfl_num_exts; i++) {
1800 uint64_t start = dfl->dfl_exts[i].dfle_start,
1801 length = dfl->dfl_exts[i].dfle_length,
1802 end = start + length;
1803
1804 /*
1805 * Apply Postel's Law to length-checking. If they
1806 * overshoot, just blank out until the end, if there's
1807 * a need to blank out anything.
1808 */
1809 if (start >= zv->zv_volsize)
1810 continue; /* No need to do anything... */
1811 if (end > zv->zv_volsize) {
1812 end = DMU_OBJECT_END;
1813 length = end - start;
1814 }
1815
1816 lr = rangelock_enter(&zv->zv_rangelock, start, length,
1817 RL_WRITER);
1818 tx = dmu_tx_create(zv->zv_objset);
1834 }
1835
1836 /*
1837 * If the write-cache is disabled, 'sync' property
1838 * is set to 'always', or if the caller is asking for
1839 * a synchronous free, commit this operation to the zil.
1840 * This will sync any previous uncommitted writes to the
1841 * zvol object.
1842 * Can be overridden by the zvol_unmap_sync_enabled tunable.
1843 */
1844 if ((error == 0) && zvol_unmap_sync_enabled &&
1845 (!(zv->zv_flags & ZVOL_WCE) ||
1846 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) ||
1847 (dfl->dfl_flags & DF_WAIT_SYNC))) {
1848 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1849 }
1850
1851 if (!(flag & FKIOCTL))
1852 dfl_free(dfl);
1853
1854 return (error);
1855 }
1856
1857 default:
1858 error = SET_ERROR(ENOTTY);
1859 break;
1860
1861 }
1862 mutex_exit(&zfsdev_state_lock);
1863 return (error);
1864 }
1865
1866 int
1867 zvol_busy(void)
1868 {
1869 return (zvol_minors != 0);
1870 }
1871
1872 void
1873 zvol_init(void)
|
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 *
24 * Portions Copyright 2010 Robert Milkowski
25 *
26 * Copyright 2017 Nexenta Systems, Inc. All rights reserved.
27 * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
28 * Copyright (c) 2014 Integros [integros.com]
29 * Copyright (c) 2019, Joyent, Inc.
30 */
31
32 /*
33 * ZFS volume emulation driver.
34 *
35 * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
36 * Volumes are accessed through the symbolic links named:
37 *
38 * /dev/zvol/dsk/<pool_name>/<dataset_name>
39 * /dev/zvol/rdsk/<pool_name>/<dataset_name>
40 *
41 * These links are created by the /dev filesystem (sdev_zvolops.c).
42 * Volumes are persistent through reboot. No user command needs to be
43 * run before opening and using a device.
44 */
45
46 #include <sys/types.h>
47 #include <sys/param.h>
72 #include <sys/dirent.h>
73 #include <sys/policy.h>
74 #include <sys/fs/zfs.h>
75 #include <sys/zfs_ioctl.h>
76 #include <sys/mkdev.h>
77 #include <sys/zil.h>
78 #include <sys/refcount.h>
79 #include <sys/zfs_znode.h>
80 #include <sys/zfs_rlock.h>
81 #include <sys/vdev_disk.h>
82 #include <sys/vdev_impl.h>
83 #include <sys/vdev_raidz.h>
84 #include <sys/zvol.h>
85 #include <sys/dumphdr.h>
86 #include <sys/zil_impl.h>
87 #include <sys/dbuf.h>
88 #include <sys/dmu_tx.h>
89 #include <sys/zfeature.h>
90 #include <sys/zio_checksum.h>
91 #include <sys/zil_impl.h>
92 #include <sys/ht.h>
93 #include <sys/dkioc_free_util.h>
94 #include <sys/zfs_rlock.h>
95
96 #include "zfs_namecheck.h"
97
98 void *zfsdev_state;
99 static char *zvol_tag = "zvol_tag";
100
101 #define ZVOL_DUMPSIZE "dumpsize"
102
103 /*
104 * This lock protects the zfsdev_state structure from being modified
105 * while it's being used, e.g. an open that comes in before a create
106 * finishes. It also protects temporary opens of the dataset so that,
107 * e.g., an open doesn't get a spurious EBUSY.
108 */
109 kmutex_t zfsdev_state_lock;
110 static uint32_t zvol_minors;
111
112 typedef struct zvol_extent {
1255
1256 os = zv->zv_objset;
1257 ASSERT(os != NULL);
1258
1259 bp_mapin(bp);
1260 addr = bp->b_un.b_addr;
1261 resid = bp->b_bcount;
1262
1263 if (resid > 0 && (off < 0 || off >= volsize)) {
1264 bioerror(bp, EIO);
1265 biodone(bp);
1266 return (0);
1267 }
1268
1269 is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED;
1270 sync = ((!(bp->b_flags & B_ASYNC) &&
1271 !(zv->zv_flags & ZVOL_WCE)) ||
1272 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) &&
1273 !doread && !is_dumpified;
1274
1275 ht_begin_unsafe();
1276
1277 /*
1278 * There must be no buffer changes when doing a dmu_sync() because
1279 * we can't change the data whilst calculating the checksum.
1280 */
1281 locked_range_t *lr = rangelock_enter(&zv->zv_rangelock, off, resid,
1282 doread ? RL_READER : RL_WRITER);
1283
1284 while (resid != 0 && off < volsize) {
1285 size_t size = MIN(resid, zvol_maxphys);
1286 if (is_dumpified) {
1287 size = MIN(size, P2END(off, zv->zv_volblocksize) - off);
1288 error = zvol_dumpio(zv, addr, off, size,
1289 doread, B_FALSE);
1290 } else if (doread) {
1291 error = dmu_read(os, ZVOL_OBJ, off, size, addr,
1292 DMU_READ_PREFETCH);
1293 } else {
1294 dmu_tx_t *tx = dmu_tx_create(os);
1295 dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
1296 error = dmu_tx_assign(tx, TXG_WAIT);
1304 }
1305 if (error) {
1306 /* convert checksum errors into IO errors */
1307 if (error == ECKSUM)
1308 error = SET_ERROR(EIO);
1309 break;
1310 }
1311 off += size;
1312 addr += size;
1313 resid -= size;
1314 }
1315 rangelock_exit(lr);
1316
1317 if ((bp->b_resid = resid) == bp->b_bcount)
1318 bioerror(bp, off > volsize ? EINVAL : error);
1319
1320 if (sync)
1321 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1322 biodone(bp);
1323
1324 ht_end_unsafe();
1325
1326 return (0);
1327 }
1328
1329 /*
1330 * Set the buffer count to the zvol maximum transfer.
1331 * Using our own routine instead of the default minphys()
1332 * means that for larger writes we write bigger buffers on X86
1333 * (128K instead of 56K) and flush the disk write cache less often
1334 * (every zvol_maxphys - currently 1MB) instead of minphys (currently
1335 * 56K on X86 and 128K on sparc).
1336 */
1337 void
1338 zvol_minphys(struct buf *bp)
1339 {
1340 if (bp->b_bcount > zvol_maxphys)
1341 bp->b_bcount = zvol_maxphys;
1342 }
1343
1344 int
1345 zvol_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblocks)
1383 minor_t minor = getminor(dev);
1384 zvol_state_t *zv;
1385 uint64_t volsize;
1386 int error = 0;
1387
1388 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1389 if (zv == NULL)
1390 return (SET_ERROR(ENXIO));
1391
1392 volsize = zv->zv_volsize;
1393 if (uio->uio_resid > 0 &&
1394 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
1395 return (SET_ERROR(EIO));
1396
1397 if (zv->zv_flags & ZVOL_DUMPIFIED) {
1398 error = physio(zvol_strategy, NULL, dev, B_READ,
1399 zvol_minphys, uio);
1400 return (error);
1401 }
1402
1403 ht_begin_unsafe();
1404
1405 locked_range_t *lr = rangelock_enter(&zv->zv_rangelock,
1406 uio->uio_loffset, uio->uio_resid, RL_READER);
1407 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
1408 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
1409
1410 /* don't read past the end */
1411 if (bytes > volsize - uio->uio_loffset)
1412 bytes = volsize - uio->uio_loffset;
1413
1414 error = dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes);
1415 if (error) {
1416 /* convert checksum errors into IO errors */
1417 if (error == ECKSUM)
1418 error = SET_ERROR(EIO);
1419 break;
1420 }
1421 }
1422 rangelock_exit(lr);
1423
1424 ht_end_unsafe();
1425
1426 return (error);
1427 }
1428
1429 /*ARGSUSED*/
1430 int
1431 zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
1432 {
1433 minor_t minor = getminor(dev);
1434 zvol_state_t *zv;
1435 uint64_t volsize;
1436 int error = 0;
1437 boolean_t sync;
1438
1439 zv = zfsdev_get_soft_state(minor, ZSST_ZVOL);
1440 if (zv == NULL)
1441 return (SET_ERROR(ENXIO));
1442
1443 volsize = zv->zv_volsize;
1444 if (uio->uio_resid > 0 &&
1445 (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
1446 return (SET_ERROR(EIO));
1447
1448 if (zv->zv_flags & ZVOL_DUMPIFIED) {
1449 error = physio(zvol_strategy, NULL, dev, B_WRITE,
1450 zvol_minphys, uio);
1451 return (error);
1452 }
1453
1454 ht_begin_unsafe();
1455
1456 sync = !(zv->zv_flags & ZVOL_WCE) ||
1457 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
1458
1459 locked_range_t *lr = rangelock_enter(&zv->zv_rangelock,
1460 uio->uio_loffset, uio->uio_resid, RL_WRITER);
1461 while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
1462 uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
1463 uint64_t off = uio->uio_loffset;
1464 dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
1465
1466 if (bytes > volsize - off) /* don't write past the end */
1467 bytes = volsize - off;
1468
1469 dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
1470 error = dmu_tx_assign(tx, TXG_WAIT);
1471 if (error) {
1472 dmu_tx_abort(tx);
1473 break;
1474 }
1475 error = dmu_write_uio_dnode(zv->zv_dn, uio, bytes, tx);
1476 if (error == 0)
1477 zvol_log_write(zv, tx, off, bytes, sync);
1478 dmu_tx_commit(tx);
1479
1480 if (error)
1481 break;
1482 }
1483 rangelock_exit(lr);
1484
1485 if (sync)
1486 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1487
1488 ht_end_unsafe();
1489
1490 return (error);
1491 }
1492
1493 int
1494 zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs)
1495 {
1496 struct uuid uuid = EFI_RESERVED;
1497 efi_gpe_t gpe = { 0 };
1498 uint32_t crc;
1499 dk_efi_t efi;
1500 int length;
1501 char *ptr;
1502
1503 if (ddi_copyin(arg, &efi, sizeof (dk_efi_t), flag))
1504 return (SET_ERROR(EFAULT));
1505 ptr = (char *)(uintptr_t)efi.dki_data_64;
1506 length = efi.dki_length;
1507 /*
1508 * Some clients may attempt to request a PMBR for the
1509 * zvol. Currently this interface will return EINVAL to
1710 dkmext.dki_media_type = DK_UNKNOWN;
1711 mutex_exit(&zfsdev_state_lock);
1712 if (ddi_copyout(&dkmext, (void *)arg, sizeof (dkmext), flag))
1713 error = SET_ERROR(EFAULT);
1714 return (error);
1715 }
1716
1717 case DKIOCGETEFI:
1718 {
1719 uint64_t vs = zv->zv_volsize;
1720 uint8_t bs = zv->zv_min_bs;
1721
1722 mutex_exit(&zfsdev_state_lock);
1723 error = zvol_getefi((void *)arg, flag, vs, bs);
1724 return (error);
1725 }
1726
1727 case DKIOCFLUSHWRITECACHE:
1728 dkc = (struct dk_callback *)arg;
1729 mutex_exit(&zfsdev_state_lock);
1730
1731 ht_begin_unsafe();
1732
1733 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1734 if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) {
1735 (*dkc->dkc_callback)(dkc->dkc_cookie, error);
1736 error = 0;
1737 }
1738
1739 ht_end_unsafe();
1740
1741 return (error);
1742
1743 case DKIOCGETWCE:
1744 {
1745 int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0;
1746 if (ddi_copyout(&wce, (void *)arg, sizeof (int),
1747 flag))
1748 error = SET_ERROR(EFAULT);
1749 break;
1750 }
1751 case DKIOCSETWCE:
1752 {
1753 int wce;
1754 if (ddi_copyin((void *)arg, &wce, sizeof (int),
1755 flag)) {
1756 error = SET_ERROR(EFAULT);
1757 break;
1758 }
1759 if (wce) {
1760 zv->zv_flags |= ZVOL_WCE;
1761 mutex_exit(&zfsdev_state_lock);
1762 } else {
1763 zv->zv_flags &= ~ZVOL_WCE;
1764 mutex_exit(&zfsdev_state_lock);
1765 ht_begin_unsafe();
1766 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1767 ht_end_unsafe();
1768 }
1769 return (0);
1770 }
1771
1772 case DKIOCGGEOM:
1773 case DKIOCGVTOC:
1774 /*
1775 * commands using these (like prtvtoc) expect ENOTSUP
1776 * since we're emulating an EFI label
1777 */
1778 error = SET_ERROR(ENOTSUP);
1779 break;
1780
1781 case DKIOCDUMPINIT:
1782 lr = rangelock_enter(&zv->zv_rangelock, 0, zv->zv_volsize,
1783 RL_WRITER);
1784 error = zvol_dumpify(zv);
1785 rangelock_exit(lr);
1786 break;
1787
1800 dmu_tx_t *tx;
1801
1802 if (!zvol_unmap_enabled)
1803 break;
1804
1805 if (!(flag & FKIOCTL)) {
1806 error = dfl_copyin((void *)arg, &dfl, flag, KM_SLEEP);
1807 if (error != 0)
1808 break;
1809 } else {
1810 dfl = (dkioc_free_list_t *)arg;
1811 ASSERT3U(dfl->dfl_num_exts, <=, DFL_COPYIN_MAX_EXTS);
1812 if (dfl->dfl_num_exts > DFL_COPYIN_MAX_EXTS) {
1813 error = SET_ERROR(EINVAL);
1814 break;
1815 }
1816 }
1817
1818 mutex_exit(&zfsdev_state_lock);
1819
1820 ht_begin_unsafe();
1821
1822 for (int i = 0; i < dfl->dfl_num_exts; i++) {
1823 uint64_t start = dfl->dfl_exts[i].dfle_start,
1824 length = dfl->dfl_exts[i].dfle_length,
1825 end = start + length;
1826
1827 /*
1828 * Apply Postel's Law to length-checking. If they
1829 * overshoot, just blank out until the end, if there's
1830 * a need to blank out anything.
1831 */
1832 if (start >= zv->zv_volsize)
1833 continue; /* No need to do anything... */
1834 if (end > zv->zv_volsize) {
1835 end = DMU_OBJECT_END;
1836 length = end - start;
1837 }
1838
1839 lr = rangelock_enter(&zv->zv_rangelock, start, length,
1840 RL_WRITER);
1841 tx = dmu_tx_create(zv->zv_objset);
1857 }
1858
1859 /*
1860 * If the write-cache is disabled, 'sync' property
1861 * is set to 'always', or if the caller is asking for
1862 * a synchronous free, commit this operation to the zil.
1863 * This will sync any previous uncommitted writes to the
1864 * zvol object.
1865 * Can be overridden by the zvol_unmap_sync_enabled tunable.
1866 */
1867 if ((error == 0) && zvol_unmap_sync_enabled &&
1868 (!(zv->zv_flags & ZVOL_WCE) ||
1869 (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) ||
1870 (dfl->dfl_flags & DF_WAIT_SYNC))) {
1871 zil_commit(zv->zv_zilog, ZVOL_OBJ);
1872 }
1873
1874 if (!(flag & FKIOCTL))
1875 dfl_free(dfl);
1876
1877 ht_end_unsafe();
1878
1879 return (error);
1880 }
1881
1882 default:
1883 error = SET_ERROR(ENOTTY);
1884 break;
1885
1886 }
1887 mutex_exit(&zfsdev_state_lock);
1888 return (error);
1889 }
1890
1891 int
1892 zvol_busy(void)
1893 {
1894 return (zvol_minors != 0);
1895 }
1896
1897 void
1898 zvol_init(void)
|