2528
2529 for (int c = 0; c < rvd->vdev_children; c++)
2530 if (rvd->vdev_child[c]->vdev_ms_array == 0)
2531 need_update = B_TRUE;
2532
2533 /*
2534 * Update the config cache asychronously in case we're the
2535 * root pool, in which case the config cache isn't writable yet.
2536 */
2537 if (need_update)
2538 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
2539
2540 /*
2541 * Check all DTLs to see if anything needs resilvering.
2542 */
2543 if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
2544 vdev_resilver_needed(rvd, NULL, NULL))
2545 spa_async_request(spa, SPA_ASYNC_RESILVER);
2546
2547 /*
2548 * Delete any inconsistent datasets.
2549 */
2550 (void) dmu_objset_find(spa_name(spa),
2551 dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
2552
2553 /*
2554 * Clean up any stale temporary dataset userrefs.
2555 */
2556 dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
2557 }
2558
2559 return (0);
2560 }
2561
2562 static int
2563 spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
2564 {
2565 int mode = spa->spa_mode;
2566
2567 spa_unload(spa);
3203 int i;
3204 spa_aux_vdev_t *sav = &spa->spa_l2cache;
3205
3206 for (i = 0; i < sav->sav_count; i++) {
3207 uint64_t pool;
3208
3209 vd = sav->sav_vdevs[i];
3210 ASSERT(vd != NULL);
3211
3212 if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
3213 pool != 0ULL && l2arc_vdev_present(vd))
3214 l2arc_remove_vdev(vd);
3215 }
3216 }
3217
3218 /*
3219 * Pool Creation
3220 */
3221 int
3222 spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
3223 const char *history_str, nvlist_t *zplprops)
3224 {
3225 spa_t *spa;
3226 char *altroot = NULL;
3227 vdev_t *rvd;
3228 dsl_pool_t *dp;
3229 dmu_tx_t *tx;
3230 int error = 0;
3231 uint64_t txg = TXG_INITIAL;
3232 nvlist_t **spares, **l2cache;
3233 uint_t nspares, nl2cache;
3234 uint64_t version, obj;
3235 boolean_t has_features;
3236
3237 /*
3238 * If this pool already exists, return failure.
3239 */
3240 mutex_enter(&spa_namespace_lock);
3241 if (spa_lookup(pool) != NULL) {
3242 mutex_exit(&spa_namespace_lock);
3243 return (EEXIST);
3422 spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
3423
3424 if (props != NULL) {
3425 spa_configfile_set(spa, props, B_FALSE);
3426 spa_sync_props(spa, props, tx);
3427 }
3428
3429 dmu_tx_commit(tx);
3430
3431 spa->spa_sync_on = B_TRUE;
3432 txg_sync_start(spa->spa_dsl_pool);
3433
3434 /*
3435 * We explicitly wait for the first transaction to complete so that our
3436 * bean counters are appropriately updated.
3437 */
3438 txg_wait_synced(spa->spa_dsl_pool, txg);
3439
3440 spa_config_sync(spa, B_FALSE, B_TRUE);
3441
3442 if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
3443 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
3444 spa_history_log_version(spa, LOG_POOL_CREATE);
3445
3446 spa->spa_minref = refcount_count(&spa->spa_refcount);
3447
3448 mutex_exit(&spa_namespace_lock);
3449
3450 return (0);
3451 }
3452
3453 #ifdef _KERNEL
3454 /*
3455 * Get the root pool information from the root disk, then import the root pool
3456 * during the system boot up time.
3457 */
3458 extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
3459
3460 static nvlist_t *
3461 spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
3462 {
3463 nvlist_t *config;
3464 nvlist_t *nvtop, *nvroot;
3624 "try booting from '%s'", avd->vdev_path);
3625 error = EINVAL;
3626 goto out;
3627 }
3628
3629 /*
3630 * If the boot device is part of a spare vdev then ensure that
3631 * we're booting off the active spare.
3632 */
3633 if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
3634 !bvd->vdev_isspare) {
3635 cmn_err(CE_NOTE, "The boot device is currently spared. Please "
3636 "try booting from '%s'",
3637 bvd->vdev_parent->
3638 vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
3639 error = EINVAL;
3640 goto out;
3641 }
3642
3643 error = 0;
3644 spa_history_log_version(spa, LOG_POOL_IMPORT);
3645 out:
3646 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
3647 vdev_free(rvd);
3648 spa_config_exit(spa, SCL_ALL, FTAG);
3649 mutex_exit(&spa_namespace_lock);
3650
3651 nvlist_free(config);
3652 return (error);
3653 }
3654
3655 #endif
3656
3657 /*
3658 * Import a non-root pool into the system.
3659 */
3660 int
3661 spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
3662 {
3663 spa_t *spa;
3664 char *altroot = NULL;
3686 (void) nvlist_lookup_string(props,
3687 zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
3688 (void) nvlist_lookup_uint64(props,
3689 zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
3690 if (readonly)
3691 mode = FREAD;
3692 spa = spa_add(pool, config, altroot);
3693 spa->spa_import_flags = flags;
3694
3695 /*
3696 * Verbatim import - Take a pool and insert it into the namespace
3697 * as if it had been loaded at boot.
3698 */
3699 if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
3700 if (props != NULL)
3701 spa_configfile_set(spa, props, B_FALSE);
3702
3703 spa_config_sync(spa, B_FALSE, B_TRUE);
3704
3705 mutex_exit(&spa_namespace_lock);
3706 spa_history_log_version(spa, LOG_POOL_IMPORT);
3707
3708 return (0);
3709 }
3710
3711 spa_activate(spa, mode);
3712
3713 /*
3714 * Don't start async tasks until we know everything is healthy.
3715 */
3716 spa_async_suspend(spa);
3717
3718 zpool_get_rewind_policy(config, &policy);
3719 if (policy.zrp_request & ZPOOL_DO_REWIND)
3720 state = SPA_LOAD_RECOVER;
3721
3722 /*
3723 * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig
3724 * because the user-supplied config is actually the one to trust when
3725 * doing an import.
3726 */
3817 */
3818 if (spa->spa_autoreplace) {
3819 spa_aux_check_removed(&spa->spa_spares);
3820 spa_aux_check_removed(&spa->spa_l2cache);
3821 }
3822
3823 if (spa_writeable(spa)) {
3824 /*
3825 * Update the config cache to include the newly-imported pool.
3826 */
3827 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
3828 }
3829
3830 /*
3831 * It's possible that the pool was expanded while it was exported.
3832 * We kick off an async task to handle this for us.
3833 */
3834 spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
3835
3836 mutex_exit(&spa_namespace_lock);
3837 spa_history_log_version(spa, LOG_POOL_IMPORT);
3838
3839 return (0);
3840 }
3841
3842 nvlist_t *
3843 spa_tryimport(nvlist_t *tryconfig)
3844 {
3845 nvlist_t *config = NULL;
3846 char *poolname;
3847 spa_t *spa;
3848 uint64_t state;
3849 int error;
3850
3851 if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
3852 return (NULL);
3853
3854 if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
3855 return (NULL);
3856
3857 /*
4355
4356 oldvdpath = spa_strdup(oldvd->vdev_path);
4357 newvdpath = spa_strdup(newvd->vdev_path);
4358 newvd_isspare = newvd->vdev_isspare;
4359
4360 /*
4361 * Mark newvd's DTL dirty in this txg.
4362 */
4363 vdev_dirty(tvd, VDD_DTL, newvd, txg);
4364
4365 /*
4366 * Restart the resilver
4367 */
4368 dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
4369
4370 /*
4371 * Commit the config
4372 */
4373 (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
4374
4375 spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL,
4376 "%s vdev=%s %s vdev=%s",
4377 replacing && newvd_isspare ? "spare in" :
4378 replacing ? "replace" : "attach", newvdpath,
4379 replacing ? "for" : "to", oldvdpath);
4380
4381 spa_strfree(oldvdpath);
4382 spa_strfree(newvdpath);
4383
4384 if (spa->spa_bootfs)
4385 spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH);
4386
4387 return (0);
4388 }
4389
4390 /*
4391 * Detach a device from a mirror or replacing vdev.
4392 * If 'replace_done' is specified, only detach if the parent
4393 * is a replacing vdev.
4394 */
4395 int
4572
4573 /*
4574 * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that
4575 * vd->vdev_detached is set and free vd's DTL object in syncing context.
4576 * But first make sure we're not on any *other* txg's DTL list, to
4577 * prevent vd from being accessed after it's freed.
4578 */
4579 vdpath = spa_strdup(vd->vdev_path);
4580 for (int t = 0; t < TXG_SIZE; t++)
4581 (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
4582 vd->vdev_detached = B_TRUE;
4583 vdev_dirty(tvd, VDD_DTL, vd, txg);
4584
4585 spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
4586
4587 /* hang on to the spa before we release the lock */
4588 spa_open_ref(spa, FTAG);
4589
4590 error = spa_vdev_exit(spa, vd, txg, 0);
4591
4592 spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
4593 "vdev=%s", vdpath);
4594 spa_strfree(vdpath);
4595
4596 /*
4597 * If this was the removal of the original device in a hot spare vdev,
4598 * then we want to go through and remove the device from the hot spare
4599 * list of every other pool.
4600 */
4601 if (unspare) {
4602 spa_t *altspa = NULL;
4603
4604 mutex_enter(&spa_namespace_lock);
4605 while ((altspa = spa_next(altspa)) != NULL) {
4606 if (altspa->spa_state != POOL_STATE_ACTIVE ||
4607 altspa == spa)
4608 continue;
4609
4610 spa_open_ref(altspa, FTAG);
4611 mutex_exit(&spa_namespace_lock);
4612 (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);
4841 /* flush everything */
4842 txg = spa_vdev_config_enter(newspa);
4843 vdev_config_dirty(newspa->spa_root_vdev);
4844 (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG);
4845
4846 if (zio_injection_enabled)
4847 zio_handle_panic_injection(spa, FTAG, 2);
4848
4849 spa_async_resume(newspa);
4850
4851 /* finally, update the original pool's config */
4852 txg = spa_vdev_config_enter(spa);
4853 tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
4854 error = dmu_tx_assign(tx, TXG_WAIT);
4855 if (error != 0)
4856 dmu_tx_abort(tx);
4857 for (c = 0; c < children; c++) {
4858 if (vml[c] != NULL) {
4859 vdev_split(vml[c]);
4860 if (error == 0)
4861 spa_history_log_internal(LOG_POOL_VDEV_DETACH,
4862 spa, tx, "vdev=%s",
4863 vml[c]->vdev_path);
4864 vdev_free(vml[c]);
4865 }
4866 }
4867 vdev_config_dirty(spa->spa_root_vdev);
4868 spa->spa_config_splitting = NULL;
4869 nvlist_free(nvl);
4870 if (error == 0)
4871 dmu_tx_commit(tx);
4872 (void) spa_vdev_exit(spa, NULL, txg, 0);
4873
4874 if (zio_injection_enabled)
4875 zio_handle_panic_injection(spa, FTAG, 3);
4876
4877 /* split is complete; log a history record */
4878 spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL,
4879 "split new pool %s from pool %s", newname, spa_name(spa));
4880
4881 kmem_free(vml, children * sizeof (vdev_t *));
4882
4883 /* if we're not going to mount the filesystems in userland, export */
4884 if (exp)
4885 error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL,
4886 B_FALSE, B_FALSE);
4887
4888 return (error);
4889
4890 out:
4891 spa_unload(newspa);
4892 spa_deactivate(newspa);
4893 spa_remove(newspa);
4894
4895 txg = spa_vdev_config_enter(spa);
4896
4897 /* re-online all offlined disks */
4898 for (c = 0; c < children; c++) {
4899 if (vml[c] != NULL)
5445 spa->spa_async_tasks = 0;
5446 mutex_exit(&spa->spa_async_lock);
5447
5448 /*
5449 * See if the config needs to be updated.
5450 */
5451 if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
5452 uint64_t old_space, new_space;
5453
5454 mutex_enter(&spa_namespace_lock);
5455 old_space = metaslab_class_get_space(spa_normal_class(spa));
5456 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
5457 new_space = metaslab_class_get_space(spa_normal_class(spa));
5458 mutex_exit(&spa_namespace_lock);
5459
5460 /*
5461 * If the pool grew as a result of the config update,
5462 * then log an internal history event.
5463 */
5464 if (new_space != old_space) {
5465 spa_history_log_internal(LOG_POOL_VDEV_ONLINE,
5466 spa, NULL,
5467 "pool '%s' size: %llu(+%llu)",
5468 spa_name(spa), new_space, new_space - old_space);
5469 }
5470 }
5471
5472 /*
5473 * See if any devices need to be marked REMOVED.
5474 */
5475 if (tasks & SPA_ASYNC_REMOVE) {
5476 spa_vdev_state_enter(spa, SCL_NONE);
5477 spa_async_remove(spa, spa->spa_root_vdev);
5478 for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
5479 spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
5480 for (int i = 0; i < spa->spa_spares.sav_count; i++)
5481 spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
5482 (void) spa_vdev_state_exit(spa, NULL, 0);
5483 }
5484
5485 if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
5486 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
5682
5683 spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
5684 }
5685
5686 static void
5687 spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
5688 {
5689 spa_t *spa = arg1;
5690 uint64_t version = *(uint64_t *)arg2;
5691
5692 /*
5693 * Setting the version is special cased when first creating the pool.
5694 */
5695 ASSERT(tx->tx_txg != TXG_INITIAL);
5696
5697 ASSERT(version <= SPA_VERSION);
5698 ASSERT(version >= spa_version(spa));
5699
5700 spa->spa_uberblock.ub_version = version;
5701 vdev_config_dirty(spa->spa_root_vdev);
5702 }
5703
5704 /*
5705 * Set zpool properties.
5706 */
5707 static void
5708 spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
5709 {
5710 spa_t *spa = arg1;
5711 objset_t *mos = spa->spa_meta_objset;
5712 nvlist_t *nvp = arg2;
5713 nvpair_t *elem = NULL;
5714
5715 mutex_enter(&spa->spa_props_lock);
5716
5717 while ((elem = nvlist_next_nvpair(nvp, elem))) {
5718 uint64_t intval;
5719 char *strval, *fname;
5720 zpool_prop_t prop;
5721 const char *propname;
5722 zprop_type_t proptype;
5723 zfeature_info_t *feature;
5724
5725 switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
5726 case ZPROP_INVAL:
5727 /*
5728 * We checked this earlier in spa_prop_validate().
5729 */
5730 ASSERT(zpool_prop_feature(nvpair_name(elem)));
5731
5732 fname = strchr(nvpair_name(elem), '@') + 1;
5733 VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
5734
5735 spa_feature_enable(spa, feature, tx);
5736 break;
5737
5738 case ZPOOL_PROP_VERSION:
5739 VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5740 /*
5741 * The version is synced seperatly before other
5742 * properties and should be correct by now.
5743 */
5744 ASSERT3U(spa_version(spa), >=, intval);
5745 break;
5746
5747 case ZPOOL_PROP_ALTROOT:
5748 /*
5749 * 'altroot' is a non-persistent property. It should
5750 * have been set temporarily at creation or import time.
5751 */
5752 ASSERT(spa->spa_root != NULL);
5753 break;
5754
5755 case ZPOOL_PROP_READONLY:
5756 case ZPOOL_PROP_CACHEFILE:
5757 /*
5758 * 'readonly' and 'cachefile' are also non-persisitent
5759 * properties.
5760 */
5761 break;
5762 case ZPOOL_PROP_COMMENT:
5763 VERIFY(nvpair_value_string(elem, &strval) == 0);
5764 if (spa->spa_comment != NULL)
5765 spa_strfree(spa->spa_comment);
5766 spa->spa_comment = spa_strdup(strval);
5767 /*
5768 * We need to dirty the configuration on all the vdevs
5769 * so that their labels get updated. It's unnecessary
5770 * to do this for pool creation since the vdev's
5771 * configuratoin has already been dirtied.
5772 */
5773 if (tx->tx_txg != TXG_INITIAL)
5774 vdev_config_dirty(spa->spa_root_vdev);
5775 break;
5776 default:
5777 /*
5778 * Set pool property values in the poolprops mos object.
5779 */
5780 if (spa->spa_pool_props_object == 0) {
5781 spa->spa_pool_props_object =
5782 zap_create_link(mos, DMU_OT_POOL_PROPS,
5783 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
5784 tx);
5785 }
5786
5787 /* normalize the property name */
5788 propname = zpool_prop_to_name(prop);
5789 proptype = zpool_prop_get_type(prop);
5790
5791 if (nvpair_type(elem) == DATA_TYPE_STRING) {
5792 ASSERT(proptype == PROP_TYPE_STRING);
5793 VERIFY(nvpair_value_string(elem, &strval) == 0);
5794 VERIFY(zap_update(mos,
5795 spa->spa_pool_props_object, propname,
5796 1, strlen(strval) + 1, strval, tx) == 0);
5797
5798 } else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
5799 VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5800
5801 if (proptype == PROP_TYPE_INDEX) {
5802 const char *unused;
5803 VERIFY(zpool_prop_index_to_string(
5804 prop, intval, &unused) == 0);
5805 }
5806 VERIFY(zap_update(mos,
5807 spa->spa_pool_props_object, propname,
5808 8, 1, &intval, tx) == 0);
5809 } else {
5810 ASSERT(0); /* not allowed */
5811 }
5812
5813 switch (prop) {
5814 case ZPOOL_PROP_DELEGATION:
5815 spa->spa_delegation = intval;
5816 break;
5817 case ZPOOL_PROP_BOOTFS:
5818 spa->spa_bootfs = intval;
5819 break;
5820 case ZPOOL_PROP_FAILUREMODE:
5821 spa->spa_failmode = intval;
5822 break;
5823 case ZPOOL_PROP_AUTOEXPAND:
5824 spa->spa_autoexpand = intval;
5825 if (tx->tx_txg != TXG_INITIAL)
5826 spa_async_request(spa,
5827 SPA_ASYNC_AUTOEXPAND);
5828 break;
5829 case ZPOOL_PROP_DEDUPDITTO:
5830 spa->spa_dedup_ditto = intval;
5831 break;
5832 default:
5833 break;
5834 }
5835 }
5836
5837 /* log internal history if this is not a zpool create */
5838 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
5839 tx->tx_txg != TXG_INITIAL) {
5840 spa_history_log_internal(LOG_POOL_PROPSET,
5841 spa, tx, "%s %lld %s",
5842 nvpair_name(elem), intval, spa_name(spa));
5843 }
5844 }
5845
5846 mutex_exit(&spa->spa_props_lock);
5847 }
5848
5849 /*
5850 * Perform one-time upgrade on-disk changes. spa_version() does not
5851 * reflect the new version this txg, so there must be no changes this
5852 * txg to anything that the upgrade code depends on after it executes.
5853 * Therefore this must be called after dsl_pool_sync() does the sync
5854 * tasks.
5855 */
5856 static void
5857 spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
5858 {
5859 dsl_pool_t *dp = spa->spa_dsl_pool;
5860
5861 ASSERT(spa->spa_sync_pass == 1);
5862
5863 if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
|
2528
2529 for (int c = 0; c < rvd->vdev_children; c++)
2530 if (rvd->vdev_child[c]->vdev_ms_array == 0)
2531 need_update = B_TRUE;
2532
2533 /*
2534 * Update the config cache asychronously in case we're the
2535 * root pool, in which case the config cache isn't writable yet.
2536 */
2537 if (need_update)
2538 spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
2539
2540 /*
2541 * Check all DTLs to see if anything needs resilvering.
2542 */
2543 if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
2544 vdev_resilver_needed(rvd, NULL, NULL))
2545 spa_async_request(spa, SPA_ASYNC_RESILVER);
2546
2547 /*
2548 * Log the fact that we booted up (so that we can detect if
2549 * we rebooted in the middle of an operation).
2550 */
2551 spa_history_log_version(spa, "open");
2552
2553 /*
2554 * Delete any inconsistent datasets.
2555 */
2556 (void) dmu_objset_find(spa_name(spa),
2557 dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
2558
2559 /*
2560 * Clean up any stale temporary dataset userrefs.
2561 */
2562 dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
2563 }
2564
2565 return (0);
2566 }
2567
2568 static int
2569 spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
2570 {
2571 int mode = spa->spa_mode;
2572
2573 spa_unload(spa);
3209 int i;
3210 spa_aux_vdev_t *sav = &spa->spa_l2cache;
3211
3212 for (i = 0; i < sav->sav_count; i++) {
3213 uint64_t pool;
3214
3215 vd = sav->sav_vdevs[i];
3216 ASSERT(vd != NULL);
3217
3218 if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
3219 pool != 0ULL && l2arc_vdev_present(vd))
3220 l2arc_remove_vdev(vd);
3221 }
3222 }
3223
3224 /*
3225 * Pool Creation
3226 */
3227 int
3228 spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
3229 nvlist_t *zplprops)
3230 {
3231 spa_t *spa;
3232 char *altroot = NULL;
3233 vdev_t *rvd;
3234 dsl_pool_t *dp;
3235 dmu_tx_t *tx;
3236 int error = 0;
3237 uint64_t txg = TXG_INITIAL;
3238 nvlist_t **spares, **l2cache;
3239 uint_t nspares, nl2cache;
3240 uint64_t version, obj;
3241 boolean_t has_features;
3242
3243 /*
3244 * If this pool already exists, return failure.
3245 */
3246 mutex_enter(&spa_namespace_lock);
3247 if (spa_lookup(pool) != NULL) {
3248 mutex_exit(&spa_namespace_lock);
3249 return (EEXIST);
3428 spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
3429
3430 if (props != NULL) {
3431 spa_configfile_set(spa, props, B_FALSE);
3432 spa_sync_props(spa, props, tx);
3433 }
3434
3435 dmu_tx_commit(tx);
3436
3437 spa->spa_sync_on = B_TRUE;
3438 txg_sync_start(spa->spa_dsl_pool);
3439
3440 /*
3441 * We explicitly wait for the first transaction to complete so that our
3442 * bean counters are appropriately updated.
3443 */
3444 txg_wait_synced(spa->spa_dsl_pool, txg);
3445
3446 spa_config_sync(spa, B_FALSE, B_TRUE);
3447
3448 spa_history_log_version(spa, "create");
3449
3450 spa->spa_minref = refcount_count(&spa->spa_refcount);
3451
3452 mutex_exit(&spa_namespace_lock);
3453
3454 return (0);
3455 }
3456
3457 #ifdef _KERNEL
3458 /*
3459 * Get the root pool information from the root disk, then import the root pool
3460 * during the system boot up time.
3461 */
3462 extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
3463
3464 static nvlist_t *
3465 spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
3466 {
3467 nvlist_t *config;
3468 nvlist_t *nvtop, *nvroot;
3628 "try booting from '%s'", avd->vdev_path);
3629 error = EINVAL;
3630 goto out;
3631 }
3632
3633 /*
3634 * If the boot device is part of a spare vdev then ensure that
3635 * we're booting off the active spare.
3636 */
3637 if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
3638 !bvd->vdev_isspare) {
3639 cmn_err(CE_NOTE, "The boot device is currently spared. Please "
3640 "try booting from '%s'",
3641 bvd->vdev_parent->
3642 vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
3643 error = EINVAL;
3644 goto out;
3645 }
3646
3647 error = 0;
3648 out:
3649 spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
3650 vdev_free(rvd);
3651 spa_config_exit(spa, SCL_ALL, FTAG);
3652 mutex_exit(&spa_namespace_lock);
3653
3654 nvlist_free(config);
3655 return (error);
3656 }
3657
3658 #endif
3659
3660 /*
3661 * Import a non-root pool into the system.
3662 */
3663 int
3664 spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
3665 {
3666 spa_t *spa;
3667 char *altroot = NULL;
3689 (void) nvlist_lookup_string(props,
3690 zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
3691 (void) nvlist_lookup_uint64(props,
3692 zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
3693 if (readonly)
3694 mode = FREAD;
3695 spa = spa_add(pool, config, altroot);
3696 spa->spa_import_flags = flags;
3697
3698 /*
3699 * Verbatim import - Take a pool and insert it into the namespace
3700 * as if it had been loaded at boot.
3701 */
3702 if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
3703 if (props != NULL)
3704 spa_configfile_set(spa, props, B_FALSE);
3705
3706 spa_config_sync(spa, B_FALSE, B_TRUE);
3707
3708 mutex_exit(&spa_namespace_lock);
3709 spa_history_log_version(spa, "import");
3710
3711 return (0);
3712 }
3713
3714 spa_activate(spa, mode);
3715
3716 /*
3717 * Don't start async tasks until we know everything is healthy.
3718 */
3719 spa_async_suspend(spa);
3720
3721 zpool_get_rewind_policy(config, &policy);
3722 if (policy.zrp_request & ZPOOL_DO_REWIND)
3723 state = SPA_LOAD_RECOVER;
3724
3725 /*
3726 * Pass off the heavy lifting to spa_load(). Pass TRUE for mosconfig
3727 * because the user-supplied config is actually the one to trust when
3728 * doing an import.
3729 */
3820 */
3821 if (spa->spa_autoreplace) {
3822 spa_aux_check_removed(&spa->spa_spares);
3823 spa_aux_check_removed(&spa->spa_l2cache);
3824 }
3825
3826 if (spa_writeable(spa)) {
3827 /*
3828 * Update the config cache to include the newly-imported pool.
3829 */
3830 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
3831 }
3832
3833 /*
3834 * It's possible that the pool was expanded while it was exported.
3835 * We kick off an async task to handle this for us.
3836 */
3837 spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
3838
3839 mutex_exit(&spa_namespace_lock);
3840 spa_history_log_version(spa, "import");
3841
3842 return (0);
3843 }
3844
3845 nvlist_t *
3846 spa_tryimport(nvlist_t *tryconfig)
3847 {
3848 nvlist_t *config = NULL;
3849 char *poolname;
3850 spa_t *spa;
3851 uint64_t state;
3852 int error;
3853
3854 if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
3855 return (NULL);
3856
3857 if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
3858 return (NULL);
3859
3860 /*
4358
4359 oldvdpath = spa_strdup(oldvd->vdev_path);
4360 newvdpath = spa_strdup(newvd->vdev_path);
4361 newvd_isspare = newvd->vdev_isspare;
4362
4363 /*
4364 * Mark newvd's DTL dirty in this txg.
4365 */
4366 vdev_dirty(tvd, VDD_DTL, newvd, txg);
4367
4368 /*
4369 * Restart the resilver
4370 */
4371 dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
4372
4373 /*
4374 * Commit the config
4375 */
4376 (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
4377
4378 spa_history_log_internal(spa, "vdev attach", NULL,
4379 "%s vdev=%s %s vdev=%s",
4380 replacing && newvd_isspare ? "spare in" :
4381 replacing ? "replace" : "attach", newvdpath,
4382 replacing ? "for" : "to", oldvdpath);
4383
4384 spa_strfree(oldvdpath);
4385 spa_strfree(newvdpath);
4386
4387 if (spa->spa_bootfs)
4388 spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH);
4389
4390 return (0);
4391 }
4392
4393 /*
4394 * Detach a device from a mirror or replacing vdev.
4395 * If 'replace_done' is specified, only detach if the parent
4396 * is a replacing vdev.
4397 */
4398 int
4575
4576 /*
4577 * Mark vd's DTL as dirty in this txg. vdev_dtl_sync() will see that
4578 * vd->vdev_detached is set and free vd's DTL object in syncing context.
4579 * But first make sure we're not on any *other* txg's DTL list, to
4580 * prevent vd from being accessed after it's freed.
4581 */
4582 vdpath = spa_strdup(vd->vdev_path);
4583 for (int t = 0; t < TXG_SIZE; t++)
4584 (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
4585 vd->vdev_detached = B_TRUE;
4586 vdev_dirty(tvd, VDD_DTL, vd, txg);
4587
4588 spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
4589
4590 /* hang on to the spa before we release the lock */
4591 spa_open_ref(spa, FTAG);
4592
4593 error = spa_vdev_exit(spa, vd, txg, 0);
4594
4595 spa_history_log_internal(spa, "detach", NULL,
4596 "vdev=%s", vdpath);
4597 spa_strfree(vdpath);
4598
4599 /*
4600 * If this was the removal of the original device in a hot spare vdev,
4601 * then we want to go through and remove the device from the hot spare
4602 * list of every other pool.
4603 */
4604 if (unspare) {
4605 spa_t *altspa = NULL;
4606
4607 mutex_enter(&spa_namespace_lock);
4608 while ((altspa = spa_next(altspa)) != NULL) {
4609 if (altspa->spa_state != POOL_STATE_ACTIVE ||
4610 altspa == spa)
4611 continue;
4612
4613 spa_open_ref(altspa, FTAG);
4614 mutex_exit(&spa_namespace_lock);
4615 (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);
4844 /* flush everything */
4845 txg = spa_vdev_config_enter(newspa);
4846 vdev_config_dirty(newspa->spa_root_vdev);
4847 (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG);
4848
4849 if (zio_injection_enabled)
4850 zio_handle_panic_injection(spa, FTAG, 2);
4851
4852 spa_async_resume(newspa);
4853
4854 /* finally, update the original pool's config */
4855 txg = spa_vdev_config_enter(spa);
4856 tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
4857 error = dmu_tx_assign(tx, TXG_WAIT);
4858 if (error != 0)
4859 dmu_tx_abort(tx);
4860 for (c = 0; c < children; c++) {
4861 if (vml[c] != NULL) {
4862 vdev_split(vml[c]);
4863 if (error == 0)
4864 spa_history_log_internal(spa, "detach", tx,
4865 "vdev=%s", vml[c]->vdev_path);
4866 vdev_free(vml[c]);
4867 }
4868 }
4869 vdev_config_dirty(spa->spa_root_vdev);
4870 spa->spa_config_splitting = NULL;
4871 nvlist_free(nvl);
4872 if (error == 0)
4873 dmu_tx_commit(tx);
4874 (void) spa_vdev_exit(spa, NULL, txg, 0);
4875
4876 if (zio_injection_enabled)
4877 zio_handle_panic_injection(spa, FTAG, 3);
4878
4879 /* split is complete; log a history record */
4880 spa_history_log_internal(newspa, "split", NULL,
4881 "from pool %s", spa_name(spa));
4882
4883 kmem_free(vml, children * sizeof (vdev_t *));
4884
4885 /* if we're not going to mount the filesystems in userland, export */
4886 if (exp)
4887 error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL,
4888 B_FALSE, B_FALSE);
4889
4890 return (error);
4891
4892 out:
4893 spa_unload(newspa);
4894 spa_deactivate(newspa);
4895 spa_remove(newspa);
4896
4897 txg = spa_vdev_config_enter(spa);
4898
4899 /* re-online all offlined disks */
4900 for (c = 0; c < children; c++) {
4901 if (vml[c] != NULL)
5447 spa->spa_async_tasks = 0;
5448 mutex_exit(&spa->spa_async_lock);
5449
5450 /*
5451 * See if the config needs to be updated.
5452 */
5453 if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
5454 uint64_t old_space, new_space;
5455
5456 mutex_enter(&spa_namespace_lock);
5457 old_space = metaslab_class_get_space(spa_normal_class(spa));
5458 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
5459 new_space = metaslab_class_get_space(spa_normal_class(spa));
5460 mutex_exit(&spa_namespace_lock);
5461
5462 /*
5463 * If the pool grew as a result of the config update,
5464 * then log an internal history event.
5465 */
5466 if (new_space != old_space) {
5467 spa_history_log_internal(spa, "vdev online", NULL,
5468 "pool '%s' size: %llu(+%llu)",
5469 spa_name(spa), new_space, new_space - old_space);
5470 }
5471 }
5472
5473 /*
5474 * See if any devices need to be marked REMOVED.
5475 */
5476 if (tasks & SPA_ASYNC_REMOVE) {
5477 spa_vdev_state_enter(spa, SCL_NONE);
5478 spa_async_remove(spa, spa->spa_root_vdev);
5479 for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
5480 spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
5481 for (int i = 0; i < spa->spa_spares.sav_count; i++)
5482 spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
5483 (void) spa_vdev_state_exit(spa, NULL, 0);
5484 }
5485
5486 if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
5487 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
5683
5684 spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
5685 }
5686
5687 static void
5688 spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
5689 {
5690 spa_t *spa = arg1;
5691 uint64_t version = *(uint64_t *)arg2;
5692
5693 /*
5694 * Setting the version is special cased when first creating the pool.
5695 */
5696 ASSERT(tx->tx_txg != TXG_INITIAL);
5697
5698 ASSERT(version <= SPA_VERSION);
5699 ASSERT(version >= spa_version(spa));
5700
5701 spa->spa_uberblock.ub_version = version;
5702 vdev_config_dirty(spa->spa_root_vdev);
5703 spa_history_log_internal(spa, "set", tx, "version=%lld", version);
5704 }
5705
5706 /*
5707 * Set zpool properties.
5708 */
5709 static void
5710 spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
5711 {
5712 spa_t *spa = arg1;
5713 objset_t *mos = spa->spa_meta_objset;
5714 nvlist_t *nvp = arg2;
5715 nvpair_t *elem = NULL;
5716
5717 mutex_enter(&spa->spa_props_lock);
5718
5719 while ((elem = nvlist_next_nvpair(nvp, elem))) {
5720 uint64_t intval;
5721 char *strval, *fname;
5722 zpool_prop_t prop;
5723 const char *propname;
5724 zprop_type_t proptype;
5725 zfeature_info_t *feature;
5726
5727 switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
5728 case ZPROP_INVAL:
5729 /*
5730 * We checked this earlier in spa_prop_validate().
5731 */
5732 ASSERT(zpool_prop_feature(nvpair_name(elem)));
5733
5734 fname = strchr(nvpair_name(elem), '@') + 1;
5735 VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
5736
5737 spa_feature_enable(spa, feature, tx);
5738 spa_history_log_internal(spa, "set", tx,
5739 "%s=enabled", nvpair_name(elem));
5740 break;
5741
5742 case ZPOOL_PROP_VERSION:
5743 VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5744 /*
5745 * The version is synced seperatly before other
5746 * properties and should be correct by now.
5747 */
5748 ASSERT3U(spa_version(spa), >=, intval);
5749 break;
5750
5751 case ZPOOL_PROP_ALTROOT:
5752 /*
5753 * 'altroot' is a non-persistent property. It should
5754 * have been set temporarily at creation or import time.
5755 */
5756 ASSERT(spa->spa_root != NULL);
5757 break;
5758
5759 case ZPOOL_PROP_READONLY:
5760 case ZPOOL_PROP_CACHEFILE:
5761 /*
5762 * 'readonly' and 'cachefile' are also non-persisitent
5763 * properties.
5764 */
5765 break;
5766 case ZPOOL_PROP_COMMENT:
5767 VERIFY(nvpair_value_string(elem, &strval) == 0);
5768 if (spa->spa_comment != NULL)
5769 spa_strfree(spa->spa_comment);
5770 spa->spa_comment = spa_strdup(strval);
5771 /*
5772 * We need to dirty the configuration on all the vdevs
5773 * so that their labels get updated. It's unnecessary
5774 * to do this for pool creation since the vdev's
5775 * configuratoin has already been dirtied.
5776 */
5777 if (tx->tx_txg != TXG_INITIAL)
5778 vdev_config_dirty(spa->spa_root_vdev);
5779 spa_history_log_internal(spa, "set", tx,
5780 "%s=%s", nvpair_name(elem), strval);
5781 break;
5782 default:
5783 /*
5784 * Set pool property values in the poolprops mos object.
5785 */
5786 if (spa->spa_pool_props_object == 0) {
5787 spa->spa_pool_props_object =
5788 zap_create_link(mos, DMU_OT_POOL_PROPS,
5789 DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
5790 tx);
5791 }
5792
5793 /* normalize the property name */
5794 propname = zpool_prop_to_name(prop);
5795 proptype = zpool_prop_get_type(prop);
5796
5797 if (nvpair_type(elem) == DATA_TYPE_STRING) {
5798 ASSERT(proptype == PROP_TYPE_STRING);
5799 VERIFY(nvpair_value_string(elem, &strval) == 0);
5800 VERIFY(zap_update(mos,
5801 spa->spa_pool_props_object, propname,
5802 1, strlen(strval) + 1, strval, tx) == 0);
5803 spa_history_log_internal(spa, "set", tx,
5804 "%s=%s", nvpair_name(elem), strval);
5805 } else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
5806 VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5807
5808 if (proptype == PROP_TYPE_INDEX) {
5809 const char *unused;
5810 VERIFY(zpool_prop_index_to_string(
5811 prop, intval, &unused) == 0);
5812 }
5813 VERIFY(zap_update(mos,
5814 spa->spa_pool_props_object, propname,
5815 8, 1, &intval, tx) == 0);
5816 spa_history_log_internal(spa, "set", tx,
5817 "%s=%lld", nvpair_name(elem), intval);
5818 } else {
5819 ASSERT(0); /* not allowed */
5820 }
5821
5822 switch (prop) {
5823 case ZPOOL_PROP_DELEGATION:
5824 spa->spa_delegation = intval;
5825 break;
5826 case ZPOOL_PROP_BOOTFS:
5827 spa->spa_bootfs = intval;
5828 break;
5829 case ZPOOL_PROP_FAILUREMODE:
5830 spa->spa_failmode = intval;
5831 break;
5832 case ZPOOL_PROP_AUTOEXPAND:
5833 spa->spa_autoexpand = intval;
5834 if (tx->tx_txg != TXG_INITIAL)
5835 spa_async_request(spa,
5836 SPA_ASYNC_AUTOEXPAND);
5837 break;
5838 case ZPOOL_PROP_DEDUPDITTO:
5839 spa->spa_dedup_ditto = intval;
5840 break;
5841 default:
5842 break;
5843 }
5844 }
5845
5846 }
5847
5848 mutex_exit(&spa->spa_props_lock);
5849 }
5850
5851 /*
5852 * Perform one-time upgrade on-disk changes. spa_version() does not
5853 * reflect the new version this txg, so there must be no changes this
5854 * txg to anything that the upgrade code depends on after it executes.
5855 * Therefore this must be called after dsl_pool_sync() does the sync
5856 * tasks.
5857 */
5858 static void
5859 spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
5860 {
5861 dsl_pool_t *dp = spa->spa_dsl_pool;
5862
5863 ASSERT(spa->spa_sync_pass == 1);
5864
5865 if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
|