Print this page
5269 zfs: zpool import slow
While importing a pool all objsets are enumerated twice, once to check
the zil log chains and once to claim them. On pools with many datasets
this process might take a substantial amount of time.
Speed up the process by parallelizing it utilizing a taskq. The number
of parallel tasks is limited to 4 times the number of leaf vdevs.
@@ -21,10 +21,11 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, STRATO AG, Inc. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
#include <sys/cred.h>
@@ -45,10 +46,11 @@
#include <sys/dmu_impl.h>
#include <sys/zfs_ioctl.h>
#include <sys/sa.h>
#include <sys/zfs_onexit.h>
#include <sys/dsl_destroy.h>
+#include <sys/vdev.h>
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
* before it can be safely accessed.
*/
@@ -489,10 +491,29 @@
}
return (err);
}
+static int
+dmu_objset_own_common(dsl_dataset_t *ds, dmu_objset_type_t type,
+ boolean_t readonly, void *tag, objset_t **osp)
+{
+ int err;
+
+ err = dmu_objset_from_ds(ds, osp);
+ if (err != 0) {
+ dsl_dataset_disown(ds, tag);
+ } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
+ dsl_dataset_disown(ds, tag);
+ return (SET_ERROR(EINVAL));
+ } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
+ dsl_dataset_disown(ds, tag);
+ return (SET_ERROR(EROFS));
+ }
+ return (err);
+}
+
/*
* dsl_pool must not be held when this is called.
* Upon successful return, there will be a longhold on the dataset,
* and the dsl_pool will not be held.
*/
@@ -510,33 +531,44 @@
err = dsl_dataset_own(dp, name, tag, &ds);
if (err != 0) {
dsl_pool_rele(dp, FTAG);
return (err);
}
-
- err = dmu_objset_from_ds(ds, osp);
+ err = dmu_objset_own_common(ds, type, readonly, tag, osp);
dsl_pool_rele(dp, FTAG);
- if (err != 0) {
- dsl_dataset_disown(ds, tag);
- } else if (type != DMU_OST_ANY && type != (*osp)->os_phys->os_type) {
- dsl_dataset_disown(ds, tag);
- return (SET_ERROR(EINVAL));
- } else if (!readonly && dsl_dataset_is_snapshot(ds)) {
- dsl_dataset_disown(ds, tag);
- return (SET_ERROR(EROFS));
- }
+
+ return (err);
+}
+
+int
+dmu_objset_own_obj(dsl_pool_t *dp, uint64_t obj, dmu_objset_type_t type,
+ boolean_t readonly, void *tag, objset_t **osp)
+{
+ dsl_dataset_t *ds;
+ int err;
+
+ err = dsl_dataset_own_obj(dp, obj, tag, &ds);
+ if (err != 0)
return (err);
+
+ return (dmu_objset_own_common(ds, type, readonly, tag, osp));
}
void
dmu_objset_rele(objset_t *os, void *tag)
{
dsl_pool_t *dp = dmu_objset_pool(os);
dsl_dataset_rele(os->os_dsl_dataset, tag);
dsl_pool_rele(dp, tag);
}
+void
+dmu_objset_rele_obj(objset_t *os, void *tag)
+{
+ dsl_dataset_rele(os->os_dsl_dataset, tag);
+}
+
/*
* When we are called, os MUST refer to an objset associated with a dataset
* that is owned by 'tag'; that is, is held and long held by 'tag' and ds_owner
* == tag. We will then release and reacquire ownership of the dataset while
* holding the pool config_rwlock to avoid intervening namespace or ownership
@@ -1543,69 +1575,78 @@
zap_cursor_fini(&cursor);
return (0);
}
-/*
- * Find objsets under and including ddobj, call func(ds) on each.
- */
-int
-dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
- int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
+typedef struct dmu_objset_find_ctx {
+ taskq_t *dc_tq;
+ dsl_pool_t *dc_dp;
+ uint64_t dc_obj;
+ int (*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *);
+ void *dc_arg;
+ int dc_flags;
+ kmutex_t *dc_error_lock;
+ int *dc_error;
+} dmu_objset_find_ctx_t;
+
+static void
+dmu_objset_find_dp_impl(void *arg)
{
+ dmu_objset_find_ctx_t *dcp = arg;
+ dsl_pool_t *dp = dcp->dc_dp;
+ dmu_objset_find_ctx_t *child_dcp;
dsl_dir_t *dd;
dsl_dataset_t *ds;
zap_cursor_t zc;
zap_attribute_t *attr;
uint64_t thisobj;
int err;
- ASSERT(dsl_pool_config_held(dp));
+ dsl_pool_config_enter(dp, FTAG);
+
+ /* don't process if there already was an error */
+ if (*dcp->dc_error)
+ goto out;
- err = dsl_dir_hold_obj(dp, ddobj, NULL, FTAG, &dd);
+ err = dsl_dir_hold_obj(dp, dcp->dc_obj, NULL, FTAG, &dd);
if (err != 0)
- return (err);
+ goto fail;
/* Don't visit hidden ($MOS & $ORIGIN) objsets. */
if (dd->dd_myname[0] == '$') {
dsl_dir_rele(dd, FTAG);
- return (0);
+ goto out;
}
thisobj = dd->dd_phys->dd_head_dataset_obj;
attr = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
/*
* Iterate over all children.
*/
- if (flags & DS_FIND_CHILDREN) {
+ if (dcp->dc_flags & DS_FIND_CHILDREN) {
for (zap_cursor_init(&zc, dp->dp_meta_objset,
dd->dd_phys->dd_child_dir_zapobj);
zap_cursor_retrieve(&zc, attr) == 0;
(void) zap_cursor_advance(&zc)) {
ASSERT3U(attr->za_integer_length, ==,
sizeof (uint64_t));
ASSERT3U(attr->za_num_integers, ==, 1);
- err = dmu_objset_find_dp(dp, attr->za_first_integer,
- func, arg, flags);
- if (err != 0)
- break;
+ child_dcp = kmem_alloc(sizeof(*child_dcp), KM_SLEEP);
+ *child_dcp = *dcp;
+ child_dcp->dc_obj = attr->za_first_integer;
+ taskq_dispatch(dcp->dc_tq, dmu_objset_find_dp_impl,
+ child_dcp, TQ_SLEEP);
}
zap_cursor_fini(&zc);
-
- if (err != 0) {
- dsl_dir_rele(dd, FTAG);
- kmem_free(attr, sizeof (zap_attribute_t));
- return (err);
- }
}
/*
* Iterate over all snapshots.
*/
- if (flags & DS_FIND_SNAPSHOTS) {
+ if (dcp->dc_flags & DS_FIND_SNAPSHOTS) {
dsl_dataset_t *ds;
err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
if (err == 0) {
uint64_t snapobj = ds->ds_phys->ds_snapnames_zapobj;
@@ -1620,11 +1661,11 @@
err = dsl_dataset_hold_obj(dp,
attr->za_first_integer, FTAG, &ds);
if (err != 0)
break;
- err = func(dp, ds, arg);
+ err = dcp->dc_func(dp, ds, dcp->dc_arg);
dsl_dataset_rele(ds, FTAG);
if (err != 0)
break;
}
zap_cursor_fini(&zc);
@@ -1633,21 +1674,74 @@
dsl_dir_rele(dd, FTAG);
kmem_free(attr, sizeof (zap_attribute_t));
if (err != 0)
- return (err);
+ goto fail;
/*
* Apply to self.
*/
err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
if (err != 0)
- return (err);
- err = func(dp, ds, arg);
+ goto fail;
+ err = dcp->dc_func(dp, ds, dcp->dc_arg);
dsl_dataset_rele(ds, FTAG);
- return (err);
+
+fail:
+ if (err) {
+ mutex_enter(dcp->dc_error_lock);
+ /* only keep first error */
+ if (*dcp->dc_error == 0)
+ *dcp->dc_error = err;
+ mutex_exit(dcp->dc_error_lock);
+ }
+
+out:
+ dsl_pool_config_exit(dp, FTAG);
+ kmem_free(dcp, sizeof(*dcp));
+}
+
+/*
+ * Find objsets under and including ddobj, call func(ds) on each.
+ * The order for the enumeration is completely undefined.
+ * func is called with dsl_pool_config held.
+ */
+int
+dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
+ int func(dsl_pool_t *, dsl_dataset_t *, void *), void *arg, int flags)
+{
+ int error = 0;
+ taskq_t *tq = NULL;
+ int ntasks;
+ dmu_objset_find_ctx_t *dcp;
+ kmutex_t err_lock;
+
+ ntasks = vdev_count_leaves(dp->dp_spa) * 4;
+ tq = taskq_create("dmu_objset_find", ntasks, minclsyspri, ntasks,
+ INT_MAX, 0);
+ if (!tq)
+ return (SET_ERROR(ENOMEM));
+
+ mutex_init(&err_lock, NULL, MUTEX_DEFAULT, NULL);
+ dcp = kmem_alloc(sizeof(*dcp), KM_SLEEP);
+ dcp->dc_tq = tq;
+ dcp->dc_dp = dp;
+ dcp->dc_obj = ddobj;
+ dcp->dc_func = func;
+ dcp->dc_arg = arg;
+ dcp->dc_flags = flags;
+ dcp->dc_error_lock = &err_lock;
+ dcp->dc_error = &error;
+ /* dcp and dc_name will be freed by task */
+ taskq_dispatch(tq, dmu_objset_find_dp_impl, dcp, TQ_SLEEP);
+
+ taskq_wait(tq);
+ taskq_destroy(tq);
+ mutex_destroy(&err_lock);
+
+ return (error);
}
/*
* Find all objsets under name, and for each, call 'func(child_name, arg)'.
* The dp_config_rwlock must not be held when this is called, and it