Print this page
OS-7125 Need mitigation of L1TF (CVE-2018-3646)
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/zvol.c
          +++ new/usr/src/uts/common/fs/zfs/zvol.c
↓ open down ↓ 17 lines elided ↑ open up ↑
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   *
  24   24   * Portions Copyright 2010 Robert Milkowski
  25   25   *
  26   26   * Copyright 2017 Nexenta Systems, Inc.  All rights reserved.
  27   27   * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
  28      - * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  29   28   * Copyright (c) 2014 Integros [integros.com]
  30   29   * Copyright (c) 2019, Joyent, Inc.
  31   30   */
  32   31  
  33   32  /*
  34   33   * ZFS volume emulation driver.
  35   34   *
  36   35   * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
  37   36   * Volumes are accessed through the symbolic links named:
  38   37   *
↓ open down ↓ 44 lines elided ↑ open up ↑
  83   82  #include <sys/vdev_impl.h>
  84   83  #include <sys/vdev_raidz.h>
  85   84  #include <sys/zvol.h>
  86   85  #include <sys/dumphdr.h>
  87   86  #include <sys/zil_impl.h>
  88   87  #include <sys/dbuf.h>
  89   88  #include <sys/dmu_tx.h>
  90   89  #include <sys/zfeature.h>
  91   90  #include <sys/zio_checksum.h>
  92   91  #include <sys/zil_impl.h>
       92 +#include <sys/ht.h>
  93   93  #include <sys/dkioc_free_util.h>
  94   94  #include <sys/zfs_rlock.h>
  95   95  
  96   96  #include "zfs_namecheck.h"
  97   97  
  98   98  void *zfsdev_state;
  99   99  static char *zvol_tag = "zvol_tag";
 100  100  
 101  101  #define ZVOL_DUMPSIZE           "dumpsize"
 102  102  
↓ open down ↓ 1162 lines elided ↑ open up ↑
1265 1265                  biodone(bp);
1266 1266                  return (0);
1267 1267          }
1268 1268  
1269 1269          is_dumpified = zv->zv_flags & ZVOL_DUMPIFIED;
1270 1270          sync = ((!(bp->b_flags & B_ASYNC) &&
1271 1271              !(zv->zv_flags & ZVOL_WCE)) ||
1272 1272              (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)) &&
1273 1273              !doread && !is_dumpified;
1274 1274  
     1275 +        ht_begin_unsafe();
     1276 +
1275 1277          /*
1276 1278           * There must be no buffer changes when doing a dmu_sync() because
1277 1279           * we can't change the data whilst calculating the checksum.
1278 1280           */
1279 1281          locked_range_t *lr = rangelock_enter(&zv->zv_rangelock, off, resid,
1280 1282              doread ? RL_READER : RL_WRITER);
1281 1283  
1282 1284          while (resid != 0 && off < volsize) {
1283 1285                  size_t size = MIN(resid, zvol_maxphys);
1284 1286                  if (is_dumpified) {
↓ open down ↓ 27 lines elided ↑ open up ↑
1312 1314          }
1313 1315          rangelock_exit(lr);
1314 1316  
1315 1317          if ((bp->b_resid = resid) == bp->b_bcount)
1316 1318                  bioerror(bp, off > volsize ? EINVAL : error);
1317 1319  
1318 1320          if (sync)
1319 1321                  zil_commit(zv->zv_zilog, ZVOL_OBJ);
1320 1322          biodone(bp);
1321 1323  
     1324 +        ht_end_unsafe();
     1325 +
1322 1326          return (0);
1323 1327  }
1324 1328  
1325 1329  /*
1326 1330   * Set the buffer count to the zvol maximum transfer.
1327 1331   * Using our own routine instead of the default minphys()
1328 1332   * means that for larger writes we write bigger buffers on X86
1329 1333   * (128K instead of 56K) and flush the disk write cache less often
1330 1334   * (every zvol_maxphys - currently 1MB) instead of minphys (currently
1331 1335   * 56K on X86 and 128K on sparc).
↓ open down ↓ 57 lines elided ↑ open up ↑
1389 1393          if (uio->uio_resid > 0 &&
1390 1394              (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
1391 1395                  return (SET_ERROR(EIO));
1392 1396  
1393 1397          if (zv->zv_flags & ZVOL_DUMPIFIED) {
1394 1398                  error = physio(zvol_strategy, NULL, dev, B_READ,
1395 1399                      zvol_minphys, uio);
1396 1400                  return (error);
1397 1401          }
1398 1402  
     1403 +        ht_begin_unsafe();
     1404 +
1399 1405          locked_range_t *lr = rangelock_enter(&zv->zv_rangelock,
1400 1406              uio->uio_loffset, uio->uio_resid, RL_READER);
1401 1407          while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
1402 1408                  uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
1403 1409  
1404 1410                  /* don't read past the end */
1405 1411                  if (bytes > volsize - uio->uio_loffset)
1406 1412                          bytes = volsize - uio->uio_loffset;
1407 1413  
1408 1414                  error =  dmu_read_uio(zv->zv_objset, ZVOL_OBJ, uio, bytes);
1409 1415                  if (error) {
1410 1416                          /* convert checksum errors into IO errors */
1411 1417                          if (error == ECKSUM)
1412 1418                                  error = SET_ERROR(EIO);
1413 1419                          break;
1414 1420                  }
1415 1421          }
1416 1422          rangelock_exit(lr);
1417 1423  
     1424 +        ht_end_unsafe();
     1425 +
1418 1426          return (error);
1419 1427  }
1420 1428  
1421 1429  /*ARGSUSED*/
1422 1430  int
1423 1431  zvol_write(dev_t dev, uio_t *uio, cred_t *cr)
1424 1432  {
1425 1433          minor_t minor = getminor(dev);
1426 1434          zvol_state_t *zv;
1427 1435          uint64_t volsize;
↓ open down ↓ 8 lines elided ↑ open up ↑
1436 1444          if (uio->uio_resid > 0 &&
1437 1445              (uio->uio_loffset < 0 || uio->uio_loffset >= volsize))
1438 1446                  return (SET_ERROR(EIO));
1439 1447  
1440 1448          if (zv->zv_flags & ZVOL_DUMPIFIED) {
1441 1449                  error = physio(zvol_strategy, NULL, dev, B_WRITE,
1442 1450                      zvol_minphys, uio);
1443 1451                  return (error);
1444 1452          }
1445 1453  
     1454 +        ht_begin_unsafe();
     1455 +
1446 1456          sync = !(zv->zv_flags & ZVOL_WCE) ||
1447 1457              (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS);
1448 1458  
1449 1459          locked_range_t *lr = rangelock_enter(&zv->zv_rangelock,
1450 1460              uio->uio_loffset, uio->uio_resid, RL_WRITER);
1451 1461          while (uio->uio_resid > 0 && uio->uio_loffset < volsize) {
1452 1462                  uint64_t bytes = MIN(uio->uio_resid, DMU_MAX_ACCESS >> 1);
1453 1463                  uint64_t off = uio->uio_loffset;
1454 1464                  dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
1455 1465  
↓ open down ↓ 11 lines elided ↑ open up ↑
1467 1477                          zvol_log_write(zv, tx, off, bytes, sync);
1468 1478                  dmu_tx_commit(tx);
1469 1479  
1470 1480                  if (error)
1471 1481                          break;
1472 1482          }
1473 1483          rangelock_exit(lr);
1474 1484  
1475 1485          if (sync)
1476 1486                  zil_commit(zv->zv_zilog, ZVOL_OBJ);
     1487 +
     1488 +        ht_end_unsafe();
     1489 +
1477 1490          return (error);
1478 1491  }
1479 1492  
1480 1493  int
1481 1494  zvol_getefi(void *arg, int flag, uint64_t vs, uint8_t bs)
1482 1495  {
1483 1496          struct uuid uuid = EFI_RESERVED;
1484 1497          efi_gpe_t gpe = { 0 };
1485 1498          uint32_t crc;
1486 1499          dk_efi_t efi;
↓ open down ↓ 220 lines elided ↑ open up ↑
1707 1720                  uint8_t bs = zv->zv_min_bs;
1708 1721  
1709 1722                  mutex_exit(&zfsdev_state_lock);
1710 1723                  error = zvol_getefi((void *)arg, flag, vs, bs);
1711 1724                  return (error);
1712 1725          }
1713 1726  
1714 1727          case DKIOCFLUSHWRITECACHE:
1715 1728                  dkc = (struct dk_callback *)arg;
1716 1729                  mutex_exit(&zfsdev_state_lock);
     1730 +
     1731 +                ht_begin_unsafe();
     1732 +
1717 1733                  zil_commit(zv->zv_zilog, ZVOL_OBJ);
1718 1734                  if ((flag & FKIOCTL) && dkc != NULL && dkc->dkc_callback) {
1719 1735                          (*dkc->dkc_callback)(dkc->dkc_cookie, error);
1720 1736                          error = 0;
1721 1737                  }
     1738 +
     1739 +                ht_end_unsafe();
     1740 +
1722 1741                  return (error);
1723 1742  
1724 1743          case DKIOCGETWCE:
1725 1744          {
1726 1745                  int wce = (zv->zv_flags & ZVOL_WCE) ? 1 : 0;
1727 1746                  if (ddi_copyout(&wce, (void *)arg, sizeof (int),
1728 1747                      flag))
1729 1748                          error = SET_ERROR(EFAULT);
1730 1749                  break;
1731 1750          }
↓ open down ↓ 4 lines elided ↑ open up ↑
1736 1755                      flag)) {
1737 1756                          error = SET_ERROR(EFAULT);
1738 1757                          break;
1739 1758                  }
1740 1759                  if (wce) {
1741 1760                          zv->zv_flags |= ZVOL_WCE;
1742 1761                          mutex_exit(&zfsdev_state_lock);
1743 1762                  } else {
1744 1763                          zv->zv_flags &= ~ZVOL_WCE;
1745 1764                          mutex_exit(&zfsdev_state_lock);
     1765 +                        ht_begin_unsafe();
1746 1766                          zil_commit(zv->zv_zilog, ZVOL_OBJ);
     1767 +                        ht_end_unsafe();
1747 1768                  }
1748 1769                  return (0);
1749 1770          }
1750 1771  
1751 1772          case DKIOCGGEOM:
1752 1773          case DKIOCGVTOC:
1753 1774                  /*
1754 1775                   * commands using these (like prtvtoc) expect ENOTSUP
1755 1776                   * since we're emulating an EFI label
1756 1777                   */
↓ open down ↓ 32 lines elided ↑ open up ↑
1789 1810                          dfl = (dkioc_free_list_t *)arg;
1790 1811                          ASSERT3U(dfl->dfl_num_exts, <=, DFL_COPYIN_MAX_EXTS);
1791 1812                          if (dfl->dfl_num_exts > DFL_COPYIN_MAX_EXTS) {
1792 1813                                  error = SET_ERROR(EINVAL);
1793 1814                                  break;
1794 1815                          }
1795 1816                  }
1796 1817  
1797 1818                  mutex_exit(&zfsdev_state_lock);
1798 1819  
     1820 +                ht_begin_unsafe();
     1821 +
1799 1822                  for (int i = 0; i < dfl->dfl_num_exts; i++) {
1800 1823                          uint64_t start = dfl->dfl_exts[i].dfle_start,
1801 1824                              length = dfl->dfl_exts[i].dfle_length,
1802 1825                              end = start + length;
1803 1826  
1804 1827                          /*
1805 1828                           * Apply Postel's Law to length-checking.  If they
1806 1829                           * overshoot, just blank out until the end, if there's
1807 1830                           * a need to blank out anything.
1808 1831                           */
↓ open down ↓ 35 lines elided ↑ open up ↑
1844 1867                  if ((error == 0) && zvol_unmap_sync_enabled &&
1845 1868                      (!(zv->zv_flags & ZVOL_WCE) ||
1846 1869                      (zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS) ||
1847 1870                      (dfl->dfl_flags & DF_WAIT_SYNC))) {
1848 1871                          zil_commit(zv->zv_zilog, ZVOL_OBJ);
1849 1872                  }
1850 1873  
1851 1874                  if (!(flag & FKIOCTL))
1852 1875                          dfl_free(dfl);
1853 1876  
     1877 +                ht_end_unsafe();
     1878 +
1854 1879                  return (error);
1855 1880          }
1856 1881  
1857 1882          default:
1858 1883                  error = SET_ERROR(ENOTTY);
1859 1884                  break;
1860 1885  
1861 1886          }
1862 1887          mutex_exit(&zfsdev_state_lock);
1863 1888          return (error);
↓ open down ↓ 326 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX