Print this page
5269 zfs: zpool import slow
While importing a pool all objsets are enumerated twice, once to check
the zil log chains and once to claim them. On pools with many datasets
this process might take a substantial amount of time.
Speed up the process by parallelizing it utilizing a taskq. The number
of parallel tasks is limited to 4 times the number of leaf vdevs.


1695                         vdev_load_log_state(tvd, mtvd);
1696                         vdev_reopen(tvd);
1697                 }
1698         }
1699         vdev_free(mrvd);
1700         spa_config_exit(spa, SCL_ALL, FTAG);
1701 
1702         /*
1703          * Ensure we were able to validate the config.
1704          */
1705         return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum);
1706 }
1707 
1708 /*
1709  * Check for missing log devices
1710  */
1711 static boolean_t
1712 spa_check_logs(spa_t *spa)
1713 {
1714         boolean_t rv = B_FALSE;

1715 
1716         switch (spa->spa_log_state) {
1717         case SPA_LOG_MISSING:
1718                 /* need to recheck in case slog has been restored */
1719         case SPA_LOG_UNKNOWN:
1720                 rv = (dmu_objset_find(spa->spa_name, zil_check_log_chain,
1721                     NULL, DS_FIND_CHILDREN) != 0);
1722                 if (rv)
1723                         spa_set_log_state(spa, SPA_LOG_MISSING);
1724                 break;
1725         }
1726         return (rv);
1727 }
1728 
1729 static boolean_t
1730 spa_passivate_log(spa_t *spa)
1731 {
1732         vdev_t *rvd = spa->spa_root_vdev;
1733         boolean_t slog_found = B_FALSE;
1734 
1735         ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
1736 
1737         if (!spa_has_slogs(spa))
1738                 return (B_FALSE);
1739 
1740         for (int c = 0; c < rvd->vdev_children; c++) {
1741                 vdev_t *tvd = rvd->vdev_child[c];


2075 
2076 /*
2077  * Load an existing storage pool, using the pool's builtin spa_config as a
2078  * source of configuration information.
2079  */
2080 static int
2081 spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
2082     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
2083     char **ereport)
2084 {
2085         int error = 0;
2086         nvlist_t *nvroot = NULL;
2087         nvlist_t *label;
2088         vdev_t *rvd;
2089         uberblock_t *ub = &spa->spa_uberblock;
2090         uint64_t children, config_cache_txg = spa->spa_config_txg;
2091         int orig_mode = spa->spa_mode;
2092         int parse;
2093         uint64_t obj;
2094         boolean_t missing_feat_write = B_FALSE;
2095 
2096         /*
2097          * If this is an untrusted config, access the pool in read-only mode.
2098          * This prevents things like resilvering recently removed devices.
2099          */
2100         if (!mosconfig)
2101                 spa->spa_mode = FREAD;
2102 
2103         ASSERT(MUTEX_HELD(&spa_namespace_lock));
2104 
2105         spa->spa_load_state = state;
2106 
2107         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
2108                 return (SET_ERROR(EINVAL));
2109 
2110         parse = (type == SPA_IMPORT_EXISTING ?
2111             VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT);
2112 
2113         /*
2114          * Create "The Godfather" zio to hold all async IOs
2115          */


2629                  * read-only mode but not read-write mode. We now have enough
2630                  * information and can return to userland.
2631                  */
2632                 return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
2633         }
2634 
2635         /*
2636          * We've successfully opened the pool, verify that we're ready
2637          * to start pushing transactions.
2638          */
2639         if (state != SPA_LOAD_TRYIMPORT) {
2640                 if (error = spa_load_verify(spa))
2641                         return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
2642                             error));
2643         }
2644 
2645         if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
2646             spa->spa_load_max_txg == UINT64_MAX)) {
2647                 dmu_tx_t *tx;
2648                 int need_update = B_FALSE;

2649 
2650                 ASSERT(state != SPA_LOAD_TRYIMPORT);
2651 
2652                 /*
2653                  * Claim log blocks that haven't been committed yet.
2654                  * This must all happen in a single txg.
2655                  * Note: spa_claim_max_txg is updated by spa_claim_notify(),
2656                  * invoked from zil_claim_log_block()'s i/o done callback.
2657                  * Price of rollback is that we abandon the log.
2658                  */
2659                 spa->spa_claiming = B_TRUE;
2660 
2661                 tx = dmu_tx_create_assigned(spa_get_dsl(spa),
2662                     spa_first_txg(spa));
2663                 (void) dmu_objset_find(spa_name(spa),
2664                     zil_claim, tx, DS_FIND_CHILDREN);
2665                 dmu_tx_commit(tx);
2666 
2667                 spa->spa_claiming = B_FALSE;
2668 
2669                 spa_set_log_state(spa, SPA_LOG_GOOD);
2670                 spa->spa_sync_on = B_TRUE;
2671                 txg_sync_start(spa->spa_dsl_pool);
2672 
2673                 /*
2674                  * Wait for all claims to sync.  We sync up to the highest
2675                  * claimed log block birth time so that claimed log blocks
2676                  * don't appear to be from the future.  spa_claim_max_txg
2677                  * will have been set for us by either zil_check_log_chain()
2678                  * (invoked from spa_check_logs()) or zil_claim() above.
2679                  */
2680                 txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
2681 
2682                 /*
2683                  * If the config cache is stale, or we have uninitialized




1695                         vdev_load_log_state(tvd, mtvd);
1696                         vdev_reopen(tvd);
1697                 }
1698         }
1699         vdev_free(mrvd);
1700         spa_config_exit(spa, SCL_ALL, FTAG);
1701 
1702         /*
1703          * Ensure we were able to validate the config.
1704          */
1705         return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum);
1706 }
1707 
1708 /*
1709  * Check for missing log devices
1710  */
1711 static boolean_t
1712 spa_check_logs(spa_t *spa)
1713 {
1714         boolean_t rv = B_FALSE;
1715         dsl_pool_t *dp = spa_get_dsl(spa);
1716 
1717         switch (spa->spa_log_state) {
1718         case SPA_LOG_MISSING:
1719                 /* need to recheck in case slog has been restored */
1720         case SPA_LOG_UNKNOWN:
1721                 rv = (dmu_objset_find_dp(dp, dp->dp_root_dir_obj,
1722                     zil_check_log_chain, NULL, DS_FIND_CHILDREN) != 0);
1723                 if (rv)
1724                         spa_set_log_state(spa, SPA_LOG_MISSING);
1725                 break;
1726         }
1727         return (rv);
1728 }
1729 
1730 static boolean_t
1731 spa_passivate_log(spa_t *spa)
1732 {
1733         vdev_t *rvd = spa->spa_root_vdev;
1734         boolean_t slog_found = B_FALSE;
1735 
1736         ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
1737 
1738         if (!spa_has_slogs(spa))
1739                 return (B_FALSE);
1740 
1741         for (int c = 0; c < rvd->vdev_children; c++) {
1742                 vdev_t *tvd = rvd->vdev_child[c];


2076 
2077 /*
2078  * Load an existing storage pool, using the pool's builtin spa_config as a
2079  * source of configuration information.
2080  */
2081 static int
2082 spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
2083     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
2084     char **ereport)
2085 {
2086         int error = 0;
2087         nvlist_t *nvroot = NULL;
2088         nvlist_t *label;
2089         vdev_t *rvd;
2090         uberblock_t *ub = &spa->spa_uberblock;
2091         uint64_t children, config_cache_txg = spa->spa_config_txg;
2092         int orig_mode = spa->spa_mode;
2093         int parse;
2094         uint64_t obj;
2095         boolean_t missing_feat_write = B_FALSE;

2096         /*
2097          * If this is an untrusted config, access the pool in read-only mode.
2098          * This prevents things like resilvering recently removed devices.
2099          */
2100         if (!mosconfig)
2101                 spa->spa_mode = FREAD;
2102 
2103         ASSERT(MUTEX_HELD(&spa_namespace_lock));
2104 
2105         spa->spa_load_state = state;
2106 
2107         if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
2108                 return (SET_ERROR(EINVAL));
2109 
2110         parse = (type == SPA_IMPORT_EXISTING ?
2111             VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT);
2112 
2113         /*
2114          * Create "The Godfather" zio to hold all async IOs
2115          */


2629                  * read-only mode but not read-write mode. We now have enough
2630                  * information and can return to userland.
2631                  */
2632                 return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
2633         }
2634 
2635         /*
2636          * We've successfully opened the pool, verify that we're ready
2637          * to start pushing transactions.
2638          */
2639         if (state != SPA_LOAD_TRYIMPORT) {
2640                 if (error = spa_load_verify(spa))
2641                         return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
2642                             error));
2643         }
2644 
2645         if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
2646             spa->spa_load_max_txg == UINT64_MAX)) {
2647                 dmu_tx_t *tx;
2648                 int need_update = B_FALSE;
2649                 dsl_pool_t *dp = spa_get_dsl(spa);
2650 
2651                 ASSERT(state != SPA_LOAD_TRYIMPORT);
2652 
2653                 /*
2654                  * Claim log blocks that haven't been committed yet.
2655                  * This must all happen in a single txg.
2656                  * Note: spa_claim_max_txg is updated by spa_claim_notify(),
2657                  * invoked from zil_claim_log_block()'s i/o done callback.
2658                  * Price of rollback is that we abandon the log.
2659                  */
2660                 spa->spa_claiming = B_TRUE;
2661 
2662                 tx = dmu_tx_create_assigned(dp, spa_first_txg(spa));
2663                 (void) dmu_objset_find_dp(dp, dp->dp_root_dir_obj,

2664                     zil_claim, tx, DS_FIND_CHILDREN);
2665                 dmu_tx_commit(tx);
2666 
2667                 spa->spa_claiming = B_FALSE;
2668 
2669                 spa_set_log_state(spa, SPA_LOG_GOOD);
2670                 spa->spa_sync_on = B_TRUE;
2671                 txg_sync_start(spa->spa_dsl_pool);
2672 
2673                 /*
2674                  * Wait for all claims to sync.  We sync up to the highest
2675                  * claimed log block birth time so that claimed log blocks
2676                  * don't appear to be from the future.  spa_claim_max_txg
2677                  * will have been set for us by either zil_check_log_chain()
2678                  * (invoked from spa_check_logs()) or zil_claim() above.
2679                  */
2680                 txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
2681 
2682                 /*
2683                  * If the config cache is stale, or we have uninitialized