Print this page
5269 zfs: zpool import slow
While importing a pool all objsets are enumerated twice, once to check
the zil log chains and once to claim them. On pools with many datasets
this process might take a substantial amount of time.
Speed up the process by parallelizing it utilizing a taskq. The number
of parallel tasks is limited to 4 times the number of leaf vdevs.

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dmu_objset.c
          +++ new/usr/src/uts/common/fs/zfs/dmu_objset.c
↓ open down ↓ 15 lines elided ↑ open up ↑
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23   23   * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  24   24   * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  25   25   * Copyright (c) 2013, Joyent, Inc. All rights reserved.
       26 + * Copyright (c) 2014, STRATO AG, Inc. All rights reserved.
  26   27   */
  27   28  
  28   29  /* Portions Copyright 2010 Robert Milkowski */
  29   30  
  30   31  #include <sys/cred.h>
  31   32  #include <sys/zfs_context.h>
  32   33  #include <sys/dmu_objset.h>
  33   34  #include <sys/dsl_dir.h>
  34   35  #include <sys/dsl_dataset.h>
  35   36  #include <sys/dsl_prop.h>
↓ open down ↓ 4 lines elided ↑ open up ↑
  40   41  #include <sys/dbuf.h>
  41   42  #include <sys/zvol.h>
  42   43  #include <sys/dmu_tx.h>
  43   44  #include <sys/zap.h>
  44   45  #include <sys/zil.h>
  45   46  #include <sys/dmu_impl.h>
  46   47  #include <sys/zfs_ioctl.h>
  47   48  #include <sys/sa.h>
  48   49  #include <sys/zfs_onexit.h>
  49   50  #include <sys/dsl_destroy.h>
       51 +#include <sys/vdev.h>
  50   52  
  51   53  /*
  52   54   * Needed to close a window in dnode_move() that allows the objset to be freed
  53   55   * before it can be safely accessed.
  54   56   */
  55   57  krwlock_t os_lock;
  56   58  
  57   59  void
  58   60  dmu_objset_init(void)
  59   61  {
↓ open down ↓ 424 lines elided ↑ open up ↑
 484  486  
 485  487          err = dmu_objset_from_ds(ds, osp);
 486  488          if (err != 0) {
 487  489                  dsl_dataset_rele(ds, tag);
 488  490                  dsl_pool_rele(dp, tag);
 489  491          }
 490  492  
 491  493          return (err);
 492  494  }
 493  495  
      496 +static int
      497 +dmu_objset_own_common(dsl_dataset_t *ds, dmu_objset_type_t type,
      498 +    boolean_t readonly, void *tag, objset_t **osp)
      499 +{
      500 +        int err;
      501 +
      502 +        err = dmu_objset_from_ds(ds, osp);
      503 +        if (err != 0) {
      504 +                dsl_dataset_disown(ds, tag);
      505 +        } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
      506 +                dsl_dataset_disown(ds, tag);
      507 +                return (SET_ERROR(EINVAL));
      508 +        } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
      509 +                dsl_dataset_disown(ds, tag);
      510 +                return (SET_ERROR(EROFS));
      511 +        }
      512 +        return (err);
      513 +}
      514 +
 494  515  /*
 495  516   * dsl_pool must not be held when this is called.
 496  517   * Upon successful return, there will be a longhold on the dataset,
 497  518   * and the dsl_pool will not be held.
 498  519   */
 499  520  int
 500  521  dmu_objset_own(const char *name, dmu_objset_type_t type,
 501  522      boolean_t readonly, void *tag, objset_t **osp)
 502  523  {
 503  524          dsl_pool_t *dp;
↓ open down ↓ 1 lines elided ↑ open up ↑
 505  526          int err;
 506  527  
 507  528          err = dsl_pool_hold(name, FTAG, &dp);
 508  529          if (err != 0)
 509  530                  return (err);
 510  531          err = dsl_dataset_own(dp, name, tag, &ds);
 511  532          if (err != 0) {
 512  533                  dsl_pool_rele(dp, FTAG);
 513  534                  return (err);
 514  535          }
 515      -
 516      -        err = dmu_objset_from_ds(ds, osp);
      536 +        err = dmu_objset_own_common(ds, type, readonly, tag, osp);
 517  537          dsl_pool_rele(dp, FTAG);
 518      -        if (err != 0) {
 519      -                dsl_dataset_disown(ds, tag);
 520      -        } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
 521      -                dsl_dataset_disown(ds, tag);
 522      -                return (SET_ERROR(EINVAL));
 523      -        } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
 524      -                dsl_dataset_disown(ds, tag);
 525      -                return (SET_ERROR(EROFS));
 526      -        }
      538 +
 527  539          return (err);
 528  540  }
 529  541  
      542 +int
      543 +dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
      544 +    boolean_t readonly, void *tag, objset_t **osp)
      545 +{
      546 +        dsl_dataset_t *ds;
      547 +        int err;
      548 +
      549 +        err = dsl_dataset_own_obj(dp, obj, tag, &ds);
      550 +        if (err != 0)
      551 +                return (err);
      552 +
      553 +        return (dmu_objset_own_common(ds, type, readonly, tag, osp));
      554 +}
      555 +
 530  556  void
 531  557  dmu_objset_rele(objset_t *os, void *tag)
 532  558  {
 533  559          dsl_pool_t *dp = dmu_objset_pool(os);
 534  560          dsl_dataset_rele(os->os_dsl_dataset, tag);
 535  561          dsl_pool_rele(dp, tag);
 536  562  }
 537  563  
      564 +void
      565 +dmu_objset_rele_obj(objset_t *os, void *tag)
      566 +{
      567 +        dsl_dataset_rele(os->os_dsl_dataset, tag);
      568 +}
      569 +
 538  570  /*
 539  571   * When we are called, os MUST refer to an objset associated with a dataset
 540  572   * that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
 541  573   * == tag.  We will then release and reacquire ownership of the dataset while
 542  574   * holding the pool config_rwlock to avoid intervening namespace or ownership
 543  575   * changes may occur.
 544  576   *
 545  577   * This exists solely to accommodate zfs_ioc_userspace_upgrade()'s desire to
 546  578   * release the hold on its dataset and acquire a new one on the dataset of the
 547  579   * same name so that it can be partially torn down and reconstructed.
↓ open down ↓ 990 lines elided ↑ open up ↑
1538 1570          (void) strcpy(name, attr.za_name);
1539 1571          if (idp)
1540 1572                  *idp = attr.za_first_integer;
1541 1573          zap_cursor_advance(&cursor);
1542 1574          *offp = zap_cursor_serialize(&cursor);
1543 1575          zap_cursor_fini(&cursor);
1544 1576  
1545 1577          return (0);
1546 1578  }
1547 1579  
1548      -/*
1549      - * Find objsets under and including ddobj, call func(ds) on each.
1550      - */
1551      -int
1552      -dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
1553      -    int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
     1580 +typedef struct dmu_objset_find_ctx {
     1581 +        taskq_t         *dc_tq;
     1582 +        dsl_pool_t      *dc_dp;
     1583 +        uint64_t        dc_obj;
     1584 +        int             (*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *);
     1585 +        void            *dc_arg;
     1586 +        int             dc_flags;
     1587 +        kmutex_t        *dc_error_lock;
     1588 +        int             *dc_error;
     1589 +} dmu_objset_find_ctx_t;
     1590 +
     1591 +static void
     1592 +dmu_objset_find_dp_impl(void *arg)
1554 1593  {
     1594 +        dmu_objset_find_ctx_t *dcp = arg;
     1595 +        dsl_pool_t *dp = dcp->dc_dp;
     1596 +        dmu_objset_find_ctx_t *child_dcp;
1555 1597          dsl_dir_t *dd;
1556 1598          dsl_dataset_t *ds;
1557 1599          zap_cursor_t zc;
1558 1600          zap_attribute_t *attr;
1559 1601          uint64_t thisobj;
1560 1602          int err;
1561 1603  
1562      -        ASSERT(dsl_pool_config_held(dp));
     1604 +        dsl_pool_config_enter(dp, FTAG);
     1605 +
     1606 +        /* don't process if there already was an error */
     1607 +        if (*dcp->dc_error)
     1608 +                goto out;
1563 1609  
1564      -        err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
     1610 +        err = dsl_dir_hold_obj(dp, dcp->dc_obj, NULL, FTAG, &dd);
1565 1611          if (err != 0)
1566      -                return (err);
     1612 +                goto fail;
1567 1613  
1568 1614          /* Don't visit hidden ($MOS & $ORIGIN) objsets. */
1569 1615          if (dd->dd_myname[0] == '$') {
1570 1616                  dsl_dir_rele(dd, FTAG);
1571      -                return (0);
     1617 +                goto out;
1572 1618          }
1573 1619  
1574 1620          thisobj = dd->dd_phys->dd_head_dataset_obj;
1575 1621          attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
1576 1622  
1577 1623          /*
1578 1624           * Iterate over all children.
1579 1625           */
1580      -        if (flags & DS_FIND_CHILDREN) {
     1626 +        if (dcp->dc_flags & DS_FIND_CHILDREN) {
1581 1627                  for (zap_cursor_init(&zc, dp->dp_meta_objset,
1582 1628                      dd->dd_phys->dd_child_dir_zapobj);
1583 1629                      zap_cursor_retrieve(&zc, attr) == 0;
1584 1630                      (void) zap_cursor_advance(&zc)) {
1585 1631                          ASSERT3U(attr->za_integer_length, ==,
1586 1632                              sizeof (uint64_t));
1587 1633                          ASSERT3U(attr->za_num_integers, ==, 1);
1588 1634  
1589      -                        err = dmu_objset_find_dp(dp, attr->za_first_integer,
1590      -                            func, arg, flags);
1591      -                        if (err != 0)
1592      -                                break;
     1635 +                        child_dcp = kmem_alloc(sizeof(*child_dcp), KM_SLEEP);
     1636 +                        *child_dcp = *dcp;
     1637 +                        child_dcp->dc_obj = attr->za_first_integer;
     1638 +                        taskq_dispatch(dcp->dc_tq, dmu_objset_find_dp_impl,
     1639 +                            child_dcp, TQ_SLEEP);
1593 1640                  }
1594 1641                  zap_cursor_fini(&zc);
1595      -
1596      -                if (err != 0) {
1597      -                        dsl_dir_rele(dd, FTAG);
1598      -                        kmem_free(attr, sizeof (zap_attribute_t));
1599      -                        return (err);
1600      -                }
1601 1642          }
1602 1643  
1603 1644          /*
1604 1645           * Iterate over all snapshots.
1605 1646           */
1606      -        if (flags & DS_FIND_SNAPSHOTS) {
     1647 +        if (dcp->dc_flags & DS_FIND_SNAPSHOTS) {
1607 1648                  dsl_dataset_t *ds;
1608 1649                  err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1609 1650  
1610 1651                  if (err == 0) {
1611 1652                          uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
1612 1653                          dsl_dataset_rele(ds, FTAG);
1613 1654  
1614 1655                          for (zap_cursor_init(&zc, dp->dp_meta_objset, snapobj);
1615 1656                              zap_cursor_retrieve(&zc, attr) == 0;
1616 1657                              (void) zap_cursor_advance(&zc)) {
1617 1658                                  ASSERT3U(attr->za_integer_length, ==,
1618 1659                                      sizeof (uint64_t));
1619 1660                                  ASSERT3U(attr->za_num_integers, ==, 1);
1620 1661  
1621 1662                                  err = dsl_dataset_hold_obj(dp,
1622 1663                                      attr->za_first_integer, FTAG, &ds);
1623 1664                                  if (err != 0)
1624 1665                                          break;
1625      -                                err = func(dp, ds, arg);
     1666 +                                err = dcp->dc_func(dp, ds, dcp->dc_arg);
1626 1667                                  dsl_dataset_rele(ds, FTAG);
1627 1668                                  if (err != 0)
1628 1669                                          break;
1629 1670                          }
1630 1671                          zap_cursor_fini(&zc);
1631 1672                  }
1632 1673          }
1633 1674  
1634 1675          dsl_dir_rele(dd, FTAG);
1635 1676          kmem_free(attr, sizeof (zap_attribute_t));
1636 1677  
1637 1678          if (err != 0)
1638      -                return (err);
     1679 +                goto fail;
1639 1680  
1640 1681          /*
1641 1682           * Apply to self.
1642 1683           */
1643 1684          err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
1644 1685          if (err != 0)
1645      -                return (err);
1646      -        err = func(dp, ds, arg);
     1686 +                goto fail;
     1687 +        err = dcp->dc_func(dp, ds, dcp->dc_arg);
1647 1688          dsl_dataset_rele(ds, FTAG);
1648      -        return (err);
     1689 +
     1690 +fail:
     1691 +        if (err) {
     1692 +                mutex_enter(dcp->dc_error_lock);
     1693 +                /* only keep first error */
     1694 +                if (*dcp->dc_error == 0)
     1695 +                        *dcp->dc_error = err;
     1696 +                mutex_exit(dcp->dc_error_lock);
     1697 +        }
     1698 +
     1699 +out:
     1700 +        dsl_pool_config_exit(dp, FTAG);
     1701 +        kmem_free(dcp, sizeof(*dcp));
     1702 +}
     1703 +
     1704 +/*
     1705 + * Find objsets under and including ddobj, call func(ds) on each.
     1706 + * The order for the enumeration is completely undefined.
     1707 + * func is called with dsl_pool_config held.
     1708 + */
     1709 +int
     1710 +dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
     1711 +    int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
     1712 +{
     1713 +        int error = 0;
     1714 +        taskq_t *tq = NULL;
     1715 +        int ntasks;
     1716 +        dmu_objset_find_ctx_t *dcp;
     1717 +        kmutex_t err_lock;
     1718 +
     1719 +        ntasks = vdev_count_leaves(dp->dp_spa) * 4;
     1720 +        tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks,
     1721 +            INT_MAX, 0);
     1722 +        if (!tq)
     1723 +                return (SET_ERROR(ENOMEM));
     1724 +
     1725 +        mutex_init(&err_lock, NULL, MUTEX_DEFAULT, NULL);
     1726 +        dcp = kmem_alloc(sizeof(*dcp), KM_SLEEP);
     1727 +        dcp->dc_tq = tq;
     1728 +        dcp->dc_dp = dp;
     1729 +        dcp->dc_obj = ddobj;
     1730 +        dcp->dc_func = func;
     1731 +        dcp->dc_arg = arg;
     1732 +        dcp->dc_flags = flags;
     1733 +        dcp->dc_error_lock = &err_lock;
     1734 +        dcp->dc_error = &error;
     1735 +        /* dcp and dc_name will be freed by task */
     1736 +        taskq_dispatch(tq, dmu_objset_find_dp_impl, dcp, TQ_SLEEP);
     1737 +
     1738 +        taskq_wait(tq);
     1739 +        taskq_destroy(tq);
     1740 +        mutex_destroy(&err_lock);
     1741 +
     1742 +        return (error);
1649 1743  }
1650 1744  
1651 1745  /*
1652 1746   * Find all objsets under name, and for each, call 'func(child_name, arg)'.
1653 1747   * The dp_config_rwlock must not be held when this is called, and it
1654 1748   * will not be held when the callback is called.
1655 1749   * Therefore this function should only be used when the pool is not changing
1656 1750   * (e.g. in syncing context), or the callback can deal with the possible races.
1657 1751   */
1658 1752  static int
↓ open down ↓ 149 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX