Print this page
2882 implement libzfs_core
2883 changing "canmount" property to "on" should not always remount dataset
2900 "zfs snapshot" should be able to create multiple, arbitrary snapshots at once
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Chris Siden <christopher.siden@delphix.com>
Reviewed by: Garrett D'Amore <garrett@damore.org>
Reviewed by: Bill Pijewski <wdp@joyent.com>
Reviewed by: Dan Kruchinin <dan.kruchinin@gmail.com>


2528 
2529                 for (int c = 0; c < rvd->vdev_children; c++)
2530                         if (rvd->vdev_child[c]->vdev_ms_array == 0)
2531                                 need_update = B_TRUE;
2532 
2533                 /*
2534                  * Update the config cache asychronously in case we're the
2535                  * root pool, in which case the config cache isn't writable yet.
2536                  */
2537                 if (need_update)
2538                         spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
2539 
2540                 /*
2541                  * Check all DTLs to see if anything needs resilvering.
2542                  */
2543                 if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
2544                     vdev_resilver_needed(rvd, NULL, NULL))
2545                         spa_async_request(spa, SPA_ASYNC_RESILVER);
2546 
2547                 /*






2548                  * Delete any inconsistent datasets.
2549                  */
2550                 (void) dmu_objset_find(spa_name(spa),
2551                     dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
2552 
2553                 /*
2554                  * Clean up any stale temporary dataset userrefs.
2555                  */
2556                 dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
2557         }
2558 
2559         return (0);
2560 }
2561 
2562 static int
2563 spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
2564 {
2565         int mode = spa->spa_mode;
2566 
2567         spa_unload(spa);


3203         int i;
3204         spa_aux_vdev_t *sav = &spa->spa_l2cache;
3205 
3206         for (i = 0; i < sav->sav_count; i++) {
3207                 uint64_t pool;
3208 
3209                 vd = sav->sav_vdevs[i];
3210                 ASSERT(vd != NULL);
3211 
3212                 if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
3213                     pool != 0ULL && l2arc_vdev_present(vd))
3214                         l2arc_remove_vdev(vd);
3215         }
3216 }
3217 
3218 /*
3219  * Pool Creation
3220  */
3221 int
3222 spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
3223     const char *history_str, nvlist_t *zplprops)
3224 {
3225         spa_t *spa;
3226         char *altroot = NULL;
3227         vdev_t *rvd;
3228         dsl_pool_t *dp;
3229         dmu_tx_t *tx;
3230         int error = 0;
3231         uint64_t txg = TXG_INITIAL;
3232         nvlist_t **spares, **l2cache;
3233         uint_t nspares, nl2cache;
3234         uint64_t version, obj;
3235         boolean_t has_features;
3236 
3237         /*
3238          * If this pool already exists, return failure.
3239          */
3240         mutex_enter(&spa_namespace_lock);
3241         if (spa_lookup(pool) != NULL) {
3242                 mutex_exit(&spa_namespace_lock);
3243                 return (EEXIST);


3422         spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
3423 
3424         if (props != NULL) {
3425                 spa_configfile_set(spa, props, B_FALSE);
3426                 spa_sync_props(spa, props, tx);
3427         }
3428 
3429         dmu_tx_commit(tx);
3430 
3431         spa->spa_sync_on = B_TRUE;
3432         txg_sync_start(spa->spa_dsl_pool);
3433 
3434         /*
3435          * We explicitly wait for the first transaction to complete so that our
3436          * bean counters are appropriately updated.
3437          */
3438         txg_wait_synced(spa->spa_dsl_pool, txg);
3439 
3440         spa_config_sync(spa, B_FALSE, B_TRUE);
3441 
3442         if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
3443                 (void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
3444         spa_history_log_version(spa, LOG_POOL_CREATE);
3445 
3446         spa->spa_minref = refcount_count(&spa->spa_refcount);
3447 
3448         mutex_exit(&spa_namespace_lock);
3449 
3450         return (0);
3451 }
3452 
3453 #ifdef _KERNEL
3454 /*
3455  * Get the root pool information from the root disk, then import the root pool
3456  * during the system boot up time.
3457  */
3458 extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
3459 
3460 static nvlist_t *
3461 spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
3462 {
3463         nvlist_t *config;
3464         nvlist_t *nvtop, *nvroot;


3624                     "try booting from '%s'", avd->vdev_path);
3625                 error = EINVAL;
3626                 goto out;
3627         }
3628 
3629         /*
3630          * If the boot device is part of a spare vdev then ensure that
3631          * we're booting off the active spare.
3632          */
3633         if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
3634             !bvd->vdev_isspare) {
3635                 cmn_err(CE_NOTE, "The boot device is currently spared. Please "
3636                     "try booting from '%s'",
3637                     bvd->vdev_parent->
3638                     vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
3639                 error = EINVAL;
3640                 goto out;
3641         }
3642 
3643         error = 0;
3644         spa_history_log_version(spa, LOG_POOL_IMPORT);
3645 out:
3646         spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
3647         vdev_free(rvd);
3648         spa_config_exit(spa, SCL_ALL, FTAG);
3649         mutex_exit(&spa_namespace_lock);
3650 
3651         nvlist_free(config);
3652         return (error);
3653 }
3654 
3655 #endif
3656 
3657 /*
3658  * Import a non-root pool into the system.
3659  */
3660 int
3661 spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
3662 {
3663         spa_t *spa;
3664         char *altroot = NULL;


3686         (void) nvlist_lookup_string(props,
3687             zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
3688         (void) nvlist_lookup_uint64(props,
3689             zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
3690         if (readonly)
3691                 mode = FREAD;
3692         spa = spa_add(pool, config, altroot);
3693         spa->spa_import_flags = flags;
3694 
3695         /*
3696          * Verbatim import - Take a pool and insert it into the namespace
3697          * as if it had been loaded at boot.
3698          */
3699         if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
3700                 if (props != NULL)
3701                         spa_configfile_set(spa, props, B_FALSE);
3702 
3703                 spa_config_sync(spa, B_FALSE, B_TRUE);
3704 
3705                 mutex_exit(&spa_namespace_lock);
3706                 spa_history_log_version(spa, LOG_POOL_IMPORT);
3707 
3708                 return (0);
3709         }
3710 
3711         spa_activate(spa, mode);
3712 
3713         /*
3714          * Don't start async tasks until we know everything is healthy.
3715          */
3716         spa_async_suspend(spa);
3717 
3718         zpool_get_rewind_policy(config, &policy);
3719         if (policy.zrp_request & ZPOOL_DO_REWIND)
3720                 state = SPA_LOAD_RECOVER;
3721 
3722         /*
3723          * Pass off the heavy lifting to spa_load().  Pass TRUE for mosconfig
3724          * because the user-supplied config is actually the one to trust when
3725          * doing an import.
3726          */


3817          */
3818         if (spa->spa_autoreplace) {
3819                 spa_aux_check_removed(&spa->spa_spares);
3820                 spa_aux_check_removed(&spa->spa_l2cache);
3821         }
3822 
3823         if (spa_writeable(spa)) {
3824                 /*
3825                  * Update the config cache to include the newly-imported pool.
3826                  */
3827                 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
3828         }
3829 
3830         /*
3831          * It's possible that the pool was expanded while it was exported.
3832          * We kick off an async task to handle this for us.
3833          */
3834         spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
3835 
3836         mutex_exit(&spa_namespace_lock);
3837         spa_history_log_version(spa, LOG_POOL_IMPORT);
3838 
3839         return (0);
3840 }
3841 
3842 nvlist_t *
3843 spa_tryimport(nvlist_t *tryconfig)
3844 {
3845         nvlist_t *config = NULL;
3846         char *poolname;
3847         spa_t *spa;
3848         uint64_t state;
3849         int error;
3850 
3851         if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
3852                 return (NULL);
3853 
3854         if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
3855                 return (NULL);
3856 
3857         /*


4355 
4356         oldvdpath = spa_strdup(oldvd->vdev_path);
4357         newvdpath = spa_strdup(newvd->vdev_path);
4358         newvd_isspare = newvd->vdev_isspare;
4359 
4360         /*
4361          * Mark newvd's DTL dirty in this txg.
4362          */
4363         vdev_dirty(tvd, VDD_DTL, newvd, txg);
4364 
4365         /*
4366          * Restart the resilver
4367          */
4368         dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
4369 
4370         /*
4371          * Commit the config
4372          */
4373         (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
4374 
4375         spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL,
4376             "%s vdev=%s %s vdev=%s",
4377             replacing && newvd_isspare ? "spare in" :
4378             replacing ? "replace" : "attach", newvdpath,
4379             replacing ? "for" : "to", oldvdpath);
4380 
4381         spa_strfree(oldvdpath);
4382         spa_strfree(newvdpath);
4383 
4384         if (spa->spa_bootfs)
4385                 spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH);
4386 
4387         return (0);
4388 }
4389 
4390 /*
4391  * Detach a device from a mirror or replacing vdev.
4392  * If 'replace_done' is specified, only detach if the parent
4393  * is a replacing vdev.
4394  */
4395 int


4572 
4573         /*
4574          * Mark vd's DTL as dirty in this txg.  vdev_dtl_sync() will see that
4575          * vd->vdev_detached is set and free vd's DTL object in syncing context.
4576          * But first make sure we're not on any *other* txg's DTL list, to
4577          * prevent vd from being accessed after it's freed.
4578          */
4579         vdpath = spa_strdup(vd->vdev_path);
4580         for (int t = 0; t < TXG_SIZE; t++)
4581                 (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
4582         vd->vdev_detached = B_TRUE;
4583         vdev_dirty(tvd, VDD_DTL, vd, txg);
4584 
4585         spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
4586 
4587         /* hang on to the spa before we release the lock */
4588         spa_open_ref(spa, FTAG);
4589 
4590         error = spa_vdev_exit(spa, vd, txg, 0);
4591 
4592         spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
4593             "vdev=%s", vdpath);
4594         spa_strfree(vdpath);
4595 
4596         /*
4597          * If this was the removal of the original device in a hot spare vdev,
4598          * then we want to go through and remove the device from the hot spare
4599          * list of every other pool.
4600          */
4601         if (unspare) {
4602                 spa_t *altspa = NULL;
4603 
4604                 mutex_enter(&spa_namespace_lock);
4605                 while ((altspa = spa_next(altspa)) != NULL) {
4606                         if (altspa->spa_state != POOL_STATE_ACTIVE ||
4607                             altspa == spa)
4608                                 continue;
4609 
4610                         spa_open_ref(altspa, FTAG);
4611                         mutex_exit(&spa_namespace_lock);
4612                         (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);


4841         /* flush everything */
4842         txg = spa_vdev_config_enter(newspa);
4843         vdev_config_dirty(newspa->spa_root_vdev);
4844         (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG);
4845 
4846         if (zio_injection_enabled)
4847                 zio_handle_panic_injection(spa, FTAG, 2);
4848 
4849         spa_async_resume(newspa);
4850 
4851         /* finally, update the original pool's config */
4852         txg = spa_vdev_config_enter(spa);
4853         tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
4854         error = dmu_tx_assign(tx, TXG_WAIT);
4855         if (error != 0)
4856                 dmu_tx_abort(tx);
4857         for (c = 0; c < children; c++) {
4858                 if (vml[c] != NULL) {
4859                         vdev_split(vml[c]);
4860                         if (error == 0)
4861                                 spa_history_log_internal(LOG_POOL_VDEV_DETACH,
4862                                     spa, tx, "vdev=%s",
4863                                     vml[c]->vdev_path);
4864                         vdev_free(vml[c]);
4865                 }
4866         }
4867         vdev_config_dirty(spa->spa_root_vdev);
4868         spa->spa_config_splitting = NULL;
4869         nvlist_free(nvl);
4870         if (error == 0)
4871                 dmu_tx_commit(tx);
4872         (void) spa_vdev_exit(spa, NULL, txg, 0);
4873 
4874         if (zio_injection_enabled)
4875                 zio_handle_panic_injection(spa, FTAG, 3);
4876 
4877         /* split is complete; log a history record */
4878         spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL,
4879             "split new pool %s from pool %s", newname, spa_name(spa));
4880 
4881         kmem_free(vml, children * sizeof (vdev_t *));
4882 
4883         /* if we're not going to mount the filesystems in userland, export */
4884         if (exp)
4885                 error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL,
4886                     B_FALSE, B_FALSE);
4887 
4888         return (error);
4889 
4890 out:
4891         spa_unload(newspa);
4892         spa_deactivate(newspa);
4893         spa_remove(newspa);
4894 
4895         txg = spa_vdev_config_enter(spa);
4896 
4897         /* re-online all offlined disks */
4898         for (c = 0; c < children; c++) {
4899                 if (vml[c] != NULL)


5445         spa->spa_async_tasks = 0;
5446         mutex_exit(&spa->spa_async_lock);
5447 
5448         /*
5449          * See if the config needs to be updated.
5450          */
5451         if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
5452                 uint64_t old_space, new_space;
5453 
5454                 mutex_enter(&spa_namespace_lock);
5455                 old_space = metaslab_class_get_space(spa_normal_class(spa));
5456                 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
5457                 new_space = metaslab_class_get_space(spa_normal_class(spa));
5458                 mutex_exit(&spa_namespace_lock);
5459 
5460                 /*
5461                  * If the pool grew as a result of the config update,
5462                  * then log an internal history event.
5463                  */
5464                 if (new_space != old_space) {
5465                         spa_history_log_internal(LOG_POOL_VDEV_ONLINE,
5466                             spa, NULL,
5467                             "pool '%s' size: %llu(+%llu)",
5468                             spa_name(spa), new_space, new_space - old_space);
5469                 }
5470         }
5471 
5472         /*
5473          * See if any devices need to be marked REMOVED.
5474          */
5475         if (tasks & SPA_ASYNC_REMOVE) {
5476                 spa_vdev_state_enter(spa, SCL_NONE);
5477                 spa_async_remove(spa, spa->spa_root_vdev);
5478                 for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
5479                         spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
5480                 for (int i = 0; i < spa->spa_spares.sav_count; i++)
5481                         spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
5482                 (void) spa_vdev_state_exit(spa, NULL, 0);
5483         }
5484 
5485         if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
5486                 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);


5682 
5683         spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
5684 }
5685 
5686 static void
5687 spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
5688 {
5689         spa_t *spa = arg1;
5690         uint64_t version = *(uint64_t *)arg2;
5691 
5692         /*
5693          * Setting the version is special cased when first creating the pool.
5694          */
5695         ASSERT(tx->tx_txg != TXG_INITIAL);
5696 
5697         ASSERT(version <= SPA_VERSION);
5698         ASSERT(version >= spa_version(spa));
5699 
5700         spa->spa_uberblock.ub_version = version;
5701         vdev_config_dirty(spa->spa_root_vdev);

5702 }
5703 
5704 /*
5705  * Set zpool properties.
5706  */
5707 static void
5708 spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
5709 {
5710         spa_t *spa = arg1;
5711         objset_t *mos = spa->spa_meta_objset;
5712         nvlist_t *nvp = arg2;
5713         nvpair_t *elem = NULL;
5714 
5715         mutex_enter(&spa->spa_props_lock);
5716 
5717         while ((elem = nvlist_next_nvpair(nvp, elem))) {
5718                 uint64_t intval;
5719                 char *strval, *fname;
5720                 zpool_prop_t prop;
5721                 const char *propname;
5722                 zprop_type_t proptype;
5723                 zfeature_info_t *feature;
5724 
5725                 switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
5726                 case ZPROP_INVAL:
5727                         /*
5728                          * We checked this earlier in spa_prop_validate().
5729                          */
5730                         ASSERT(zpool_prop_feature(nvpair_name(elem)));
5731 
5732                         fname = strchr(nvpair_name(elem), '@') + 1;
5733                         VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
5734 
5735                         spa_feature_enable(spa, feature, tx);


5736                         break;
5737 
5738                 case ZPOOL_PROP_VERSION:
5739                         VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5740                         /*
5741                          * The version is synced seperatly before other
5742                          * properties and should be correct by now.
5743                          */
5744                         ASSERT3U(spa_version(spa), >=, intval);
5745                         break;
5746 
5747                 case ZPOOL_PROP_ALTROOT:
5748                         /*
5749                          * 'altroot' is a non-persistent property. It should
5750                          * have been set temporarily at creation or import time.
5751                          */
5752                         ASSERT(spa->spa_root != NULL);
5753                         break;
5754 
5755                 case ZPOOL_PROP_READONLY:
5756                 case ZPOOL_PROP_CACHEFILE:
5757                         /*
5758                          * 'readonly' and 'cachefile' are also non-persisitent
5759                          * properties.
5760                          */
5761                         break;
5762                 case ZPOOL_PROP_COMMENT:
5763                         VERIFY(nvpair_value_string(elem, &strval) == 0);
5764                         if (spa->spa_comment != NULL)
5765                                 spa_strfree(spa->spa_comment);
5766                         spa->spa_comment = spa_strdup(strval);
5767                         /*
5768                          * We need to dirty the configuration on all the vdevs
5769                          * so that their labels get updated.  It's unnecessary
5770                          * to do this for pool creation since the vdev's
5771                          * configuratoin has already been dirtied.
5772                          */
5773                         if (tx->tx_txg != TXG_INITIAL)
5774                                 vdev_config_dirty(spa->spa_root_vdev);


5775                         break;
5776                 default:
5777                         /*
5778                          * Set pool property values in the poolprops mos object.
5779                          */
5780                         if (spa->spa_pool_props_object == 0) {
5781                                 spa->spa_pool_props_object =
5782                                     zap_create_link(mos, DMU_OT_POOL_PROPS,
5783                                     DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
5784                                     tx);
5785                         }
5786 
5787                         /* normalize the property name */
5788                         propname = zpool_prop_to_name(prop);
5789                         proptype = zpool_prop_get_type(prop);
5790 
5791                         if (nvpair_type(elem) == DATA_TYPE_STRING) {
5792                                 ASSERT(proptype == PROP_TYPE_STRING);
5793                                 VERIFY(nvpair_value_string(elem, &strval) == 0);
5794                                 VERIFY(zap_update(mos,
5795                                     spa->spa_pool_props_object, propname,
5796                                     1, strlen(strval) + 1, strval, tx) == 0);
5797 

5798                         } else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
5799                                 VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5800 
5801                                 if (proptype == PROP_TYPE_INDEX) {
5802                                         const char *unused;
5803                                         VERIFY(zpool_prop_index_to_string(
5804                                             prop, intval, &unused) == 0);
5805                                 }
5806                                 VERIFY(zap_update(mos,
5807                                     spa->spa_pool_props_object, propname,
5808                                     8, 1, &intval, tx) == 0);


5809                         } else {
5810                                 ASSERT(0); /* not allowed */
5811                         }
5812 
5813                         switch (prop) {
5814                         case ZPOOL_PROP_DELEGATION:
5815                                 spa->spa_delegation = intval;
5816                                 break;
5817                         case ZPOOL_PROP_BOOTFS:
5818                                 spa->spa_bootfs = intval;
5819                                 break;
5820                         case ZPOOL_PROP_FAILUREMODE:
5821                                 spa->spa_failmode = intval;
5822                                 break;
5823                         case ZPOOL_PROP_AUTOEXPAND:
5824                                 spa->spa_autoexpand = intval;
5825                                 if (tx->tx_txg != TXG_INITIAL)
5826                                         spa_async_request(spa,
5827                                             SPA_ASYNC_AUTOEXPAND);
5828                                 break;
5829                         case ZPOOL_PROP_DEDUPDITTO:
5830                                 spa->spa_dedup_ditto = intval;
5831                                 break;
5832                         default:
5833                                 break;
5834                         }
5835                 }
5836 
5837                 /* log internal history if this is not a zpool create */
5838                 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
5839                     tx->tx_txg != TXG_INITIAL) {
5840                         spa_history_log_internal(LOG_POOL_PROPSET,
5841                             spa, tx, "%s %lld %s",
5842                             nvpair_name(elem), intval, spa_name(spa));
5843                 }
5844         }
5845 
5846         mutex_exit(&spa->spa_props_lock);
5847 }
5848 
5849 /*
5850  * Perform one-time upgrade on-disk changes.  spa_version() does not
5851  * reflect the new version this txg, so there must be no changes this
5852  * txg to anything that the upgrade code depends on after it executes.
5853  * Therefore this must be called after dsl_pool_sync() does the sync
5854  * tasks.
5855  */
5856 static void
5857 spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
5858 {
5859         dsl_pool_t *dp = spa->spa_dsl_pool;
5860 
5861         ASSERT(spa->spa_sync_pass == 1);
5862 
5863         if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&




2528 
2529                 for (int c = 0; c < rvd->vdev_children; c++)
2530                         if (rvd->vdev_child[c]->vdev_ms_array == 0)
2531                                 need_update = B_TRUE;
2532 
2533                 /*
2534                  * Update the config cache asychronously in case we're the
2535                  * root pool, in which case the config cache isn't writable yet.
2536                  */
2537                 if (need_update)
2538                         spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
2539 
2540                 /*
2541                  * Check all DTLs to see if anything needs resilvering.
2542                  */
2543                 if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
2544                     vdev_resilver_needed(rvd, NULL, NULL))
2545                         spa_async_request(spa, SPA_ASYNC_RESILVER);
2546 
2547                 /*
2548                  * Log the fact that we booted up (so that we can detect if
2549                  * we rebooted in the middle of an operation).
2550                  */
2551                 spa_history_log_version(spa, "open");
2552 
2553                 /*
2554                  * Delete any inconsistent datasets.
2555                  */
2556                 (void) dmu_objset_find(spa_name(spa),
2557                     dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
2558 
2559                 /*
2560                  * Clean up any stale temporary dataset userrefs.
2561                  */
2562                 dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
2563         }
2564 
2565         return (0);
2566 }
2567 
2568 static int
2569 spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
2570 {
2571         int mode = spa->spa_mode;
2572 
2573         spa_unload(spa);


3209         int i;
3210         spa_aux_vdev_t *sav = &spa->spa_l2cache;
3211 
3212         for (i = 0; i < sav->sav_count; i++) {
3213                 uint64_t pool;
3214 
3215                 vd = sav->sav_vdevs[i];
3216                 ASSERT(vd != NULL);
3217 
3218                 if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
3219                     pool != 0ULL && l2arc_vdev_present(vd))
3220                         l2arc_remove_vdev(vd);
3221         }
3222 }
3223 
3224 /*
3225  * Pool Creation
3226  */
3227 int
3228 spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
3229     nvlist_t *zplprops)
3230 {
3231         spa_t *spa;
3232         char *altroot = NULL;
3233         vdev_t *rvd;
3234         dsl_pool_t *dp;
3235         dmu_tx_t *tx;
3236         int error = 0;
3237         uint64_t txg = TXG_INITIAL;
3238         nvlist_t **spares, **l2cache;
3239         uint_t nspares, nl2cache;
3240         uint64_t version, obj;
3241         boolean_t has_features;
3242 
3243         /*
3244          * If this pool already exists, return failure.
3245          */
3246         mutex_enter(&spa_namespace_lock);
3247         if (spa_lookup(pool) != NULL) {
3248                 mutex_exit(&spa_namespace_lock);
3249                 return (EEXIST);


3428         spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
3429 
3430         if (props != NULL) {
3431                 spa_configfile_set(spa, props, B_FALSE);
3432                 spa_sync_props(spa, props, tx);
3433         }
3434 
3435         dmu_tx_commit(tx);
3436 
3437         spa->spa_sync_on = B_TRUE;
3438         txg_sync_start(spa->spa_dsl_pool);
3439 
3440         /*
3441          * We explicitly wait for the first transaction to complete so that our
3442          * bean counters are appropriately updated.
3443          */
3444         txg_wait_synced(spa->spa_dsl_pool, txg);
3445 
3446         spa_config_sync(spa, B_FALSE, B_TRUE);
3447 
3448         spa_history_log_version(spa, "create");


3449 
3450         spa->spa_minref = refcount_count(&spa->spa_refcount);
3451 
3452         mutex_exit(&spa_namespace_lock);
3453 
3454         return (0);
3455 }
3456 
3457 #ifdef _KERNEL
3458 /*
3459  * Get the root pool information from the root disk, then import the root pool
3460  * during the system boot up time.
3461  */
3462 extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
3463 
3464 static nvlist_t *
3465 spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
3466 {
3467         nvlist_t *config;
3468         nvlist_t *nvtop, *nvroot;


3628                     "try booting from '%s'", avd->vdev_path);
3629                 error = EINVAL;
3630                 goto out;
3631         }
3632 
3633         /*
3634          * If the boot device is part of a spare vdev then ensure that
3635          * we're booting off the active spare.
3636          */
3637         if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
3638             !bvd->vdev_isspare) {
3639                 cmn_err(CE_NOTE, "The boot device is currently spared. Please "
3640                     "try booting from '%s'",
3641                     bvd->vdev_parent->
3642                     vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
3643                 error = EINVAL;
3644                 goto out;
3645         }
3646 
3647         error = 0;

3648 out:
3649         spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
3650         vdev_free(rvd);
3651         spa_config_exit(spa, SCL_ALL, FTAG);
3652         mutex_exit(&spa_namespace_lock);
3653 
3654         nvlist_free(config);
3655         return (error);
3656 }
3657 
3658 #endif
3659 
3660 /*
3661  * Import a non-root pool into the system.
3662  */
3663 int
3664 spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
3665 {
3666         spa_t *spa;
3667         char *altroot = NULL;


3689         (void) nvlist_lookup_string(props,
3690             zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
3691         (void) nvlist_lookup_uint64(props,
3692             zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
3693         if (readonly)
3694                 mode = FREAD;
3695         spa = spa_add(pool, config, altroot);
3696         spa->spa_import_flags = flags;
3697 
3698         /*
3699          * Verbatim import - Take a pool and insert it into the namespace
3700          * as if it had been loaded at boot.
3701          */
3702         if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
3703                 if (props != NULL)
3704                         spa_configfile_set(spa, props, B_FALSE);
3705 
3706                 spa_config_sync(spa, B_FALSE, B_TRUE);
3707 
3708                 mutex_exit(&spa_namespace_lock);
3709                 spa_history_log_version(spa, "import");
3710 
3711                 return (0);
3712         }
3713 
3714         spa_activate(spa, mode);
3715 
3716         /*
3717          * Don't start async tasks until we know everything is healthy.
3718          */
3719         spa_async_suspend(spa);
3720 
3721         zpool_get_rewind_policy(config, &policy);
3722         if (policy.zrp_request & ZPOOL_DO_REWIND)
3723                 state = SPA_LOAD_RECOVER;
3724 
3725         /*
3726          * Pass off the heavy lifting to spa_load().  Pass TRUE for mosconfig
3727          * because the user-supplied config is actually the one to trust when
3728          * doing an import.
3729          */


3820          */
3821         if (spa->spa_autoreplace) {
3822                 spa_aux_check_removed(&spa->spa_spares);
3823                 spa_aux_check_removed(&spa->spa_l2cache);
3824         }
3825 
3826         if (spa_writeable(spa)) {
3827                 /*
3828                  * Update the config cache to include the newly-imported pool.
3829                  */
3830                 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
3831         }
3832 
3833         /*
3834          * It's possible that the pool was expanded while it was exported.
3835          * We kick off an async task to handle this for us.
3836          */
3837         spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
3838 
3839         mutex_exit(&spa_namespace_lock);
3840         spa_history_log_version(spa, "import");
3841 
3842         return (0);
3843 }
3844 
3845 nvlist_t *
3846 spa_tryimport(nvlist_t *tryconfig)
3847 {
3848         nvlist_t *config = NULL;
3849         char *poolname;
3850         spa_t *spa;
3851         uint64_t state;
3852         int error;
3853 
3854         if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
3855                 return (NULL);
3856 
3857         if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
3858                 return (NULL);
3859 
3860         /*


4358 
4359         oldvdpath = spa_strdup(oldvd->vdev_path);
4360         newvdpath = spa_strdup(newvd->vdev_path);
4361         newvd_isspare = newvd->vdev_isspare;
4362 
4363         /*
4364          * Mark newvd's DTL dirty in this txg.
4365          */
4366         vdev_dirty(tvd, VDD_DTL, newvd, txg);
4367 
4368         /*
4369          * Restart the resilver
4370          */
4371         dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
4372 
4373         /*
4374          * Commit the config
4375          */
4376         (void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
4377 
4378         spa_history_log_internal(spa, "vdev attach", NULL,
4379             "%s vdev=%s %s vdev=%s",
4380             replacing && newvd_isspare ? "spare in" :
4381             replacing ? "replace" : "attach", newvdpath,
4382             replacing ? "for" : "to", oldvdpath);
4383 
4384         spa_strfree(oldvdpath);
4385         spa_strfree(newvdpath);
4386 
4387         if (spa->spa_bootfs)
4388                 spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH);
4389 
4390         return (0);
4391 }
4392 
4393 /*
4394  * Detach a device from a mirror or replacing vdev.
4395  * If 'replace_done' is specified, only detach if the parent
4396  * is a replacing vdev.
4397  */
4398 int


4575 
4576         /*
4577          * Mark vd's DTL as dirty in this txg.  vdev_dtl_sync() will see that
4578          * vd->vdev_detached is set and free vd's DTL object in syncing context.
4579          * But first make sure we're not on any *other* txg's DTL list, to
4580          * prevent vd from being accessed after it's freed.
4581          */
4582         vdpath = spa_strdup(vd->vdev_path);
4583         for (int t = 0; t < TXG_SIZE; t++)
4584                 (void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
4585         vd->vdev_detached = B_TRUE;
4586         vdev_dirty(tvd, VDD_DTL, vd, txg);
4587 
4588         spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
4589 
4590         /* hang on to the spa before we release the lock */
4591         spa_open_ref(spa, FTAG);
4592 
4593         error = spa_vdev_exit(spa, vd, txg, 0);
4594 
4595         spa_history_log_internal(spa, "detach", NULL,
4596             "vdev=%s", vdpath);
4597         spa_strfree(vdpath);
4598 
4599         /*
4600          * If this was the removal of the original device in a hot spare vdev,
4601          * then we want to go through and remove the device from the hot spare
4602          * list of every other pool.
4603          */
4604         if (unspare) {
4605                 spa_t *altspa = NULL;
4606 
4607                 mutex_enter(&spa_namespace_lock);
4608                 while ((altspa = spa_next(altspa)) != NULL) {
4609                         if (altspa->spa_state != POOL_STATE_ACTIVE ||
4610                             altspa == spa)
4611                                 continue;
4612 
4613                         spa_open_ref(altspa, FTAG);
4614                         mutex_exit(&spa_namespace_lock);
4615                         (void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);


4844         /* flush everything */
4845         txg = spa_vdev_config_enter(newspa);
4846         vdev_config_dirty(newspa->spa_root_vdev);
4847         (void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG);
4848 
4849         if (zio_injection_enabled)
4850                 zio_handle_panic_injection(spa, FTAG, 2);
4851 
4852         spa_async_resume(newspa);
4853 
4854         /* finally, update the original pool's config */
4855         txg = spa_vdev_config_enter(spa);
4856         tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
4857         error = dmu_tx_assign(tx, TXG_WAIT);
4858         if (error != 0)
4859                 dmu_tx_abort(tx);
4860         for (c = 0; c < children; c++) {
4861                 if (vml[c] != NULL) {
4862                         vdev_split(vml[c]);
4863                         if (error == 0)
4864                                 spa_history_log_internal(spa, "detach", tx,
4865                                     "vdev=%s", vml[c]->vdev_path);

4866                         vdev_free(vml[c]);
4867                 }
4868         }
4869         vdev_config_dirty(spa->spa_root_vdev);
4870         spa->spa_config_splitting = NULL;
4871         nvlist_free(nvl);
4872         if (error == 0)
4873                 dmu_tx_commit(tx);
4874         (void) spa_vdev_exit(spa, NULL, txg, 0);
4875 
4876         if (zio_injection_enabled)
4877                 zio_handle_panic_injection(spa, FTAG, 3);
4878 
4879         /* split is complete; log a history record */
4880         spa_history_log_internal(newspa, "split", NULL,
4881             "from pool %s", spa_name(spa));
4882 
4883         kmem_free(vml, children * sizeof (vdev_t *));
4884 
4885         /* if we're not going to mount the filesystems in userland, export */
4886         if (exp)
4887                 error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL,
4888                     B_FALSE, B_FALSE);
4889 
4890         return (error);
4891 
4892 out:
4893         spa_unload(newspa);
4894         spa_deactivate(newspa);
4895         spa_remove(newspa);
4896 
4897         txg = spa_vdev_config_enter(spa);
4898 
4899         /* re-online all offlined disks */
4900         for (c = 0; c < children; c++) {
4901                 if (vml[c] != NULL)


5447         spa->spa_async_tasks = 0;
5448         mutex_exit(&spa->spa_async_lock);
5449 
5450         /*
5451          * See if the config needs to be updated.
5452          */
5453         if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
5454                 uint64_t old_space, new_space;
5455 
5456                 mutex_enter(&spa_namespace_lock);
5457                 old_space = metaslab_class_get_space(spa_normal_class(spa));
5458                 spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
5459                 new_space = metaslab_class_get_space(spa_normal_class(spa));
5460                 mutex_exit(&spa_namespace_lock);
5461 
5462                 /*
5463                  * If the pool grew as a result of the config update,
5464                  * then log an internal history event.
5465                  */
5466                 if (new_space != old_space) {
5467                         spa_history_log_internal(spa, "vdev online", NULL,

5468                             "pool '%s' size: %llu(+%llu)",
5469                             spa_name(spa), new_space, new_space - old_space);
5470                 }
5471         }
5472 
5473         /*
5474          * See if any devices need to be marked REMOVED.
5475          */
5476         if (tasks & SPA_ASYNC_REMOVE) {
5477                 spa_vdev_state_enter(spa, SCL_NONE);
5478                 spa_async_remove(spa, spa->spa_root_vdev);
5479                 for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
5480                         spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
5481                 for (int i = 0; i < spa->spa_spares.sav_count; i++)
5482                         spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
5483                 (void) spa_vdev_state_exit(spa, NULL, 0);
5484         }
5485 
5486         if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
5487                 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);


5683 
5684         spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
5685 }
5686 
5687 static void
5688 spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
5689 {
5690         spa_t *spa = arg1;
5691         uint64_t version = *(uint64_t *)arg2;
5692 
5693         /*
5694          * Setting the version is special cased when first creating the pool.
5695          */
5696         ASSERT(tx->tx_txg != TXG_INITIAL);
5697 
5698         ASSERT(version <= SPA_VERSION);
5699         ASSERT(version >= spa_version(spa));
5700 
5701         spa->spa_uberblock.ub_version = version;
5702         vdev_config_dirty(spa->spa_root_vdev);
5703         spa_history_log_internal(spa, "set", tx, "version=%lld", version);
5704 }
5705 
5706 /*
5707  * Set zpool properties.
5708  */
5709 static void
5710 spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
5711 {
5712         spa_t *spa = arg1;
5713         objset_t *mos = spa->spa_meta_objset;
5714         nvlist_t *nvp = arg2;
5715         nvpair_t *elem = NULL;
5716 
5717         mutex_enter(&spa->spa_props_lock);
5718 
5719         while ((elem = nvlist_next_nvpair(nvp, elem))) {
5720                 uint64_t intval;
5721                 char *strval, *fname;
5722                 zpool_prop_t prop;
5723                 const char *propname;
5724                 zprop_type_t proptype;
5725                 zfeature_info_t *feature;
5726 
5727                 switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
5728                 case ZPROP_INVAL:
5729                         /*
5730                          * We checked this earlier in spa_prop_validate().
5731                          */
5732                         ASSERT(zpool_prop_feature(nvpair_name(elem)));
5733 
5734                         fname = strchr(nvpair_name(elem), '@') + 1;
5735                         VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
5736 
5737                         spa_feature_enable(spa, feature, tx);
5738                         spa_history_log_internal(spa, "set", tx,
5739                             "%s=enabled", nvpair_name(elem));
5740                         break;
5741 
5742                 case ZPOOL_PROP_VERSION:
5743                         VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5744                         /*
5745                          * The version is synced seperatly before other
5746                          * properties and should be correct by now.
5747                          */
5748                         ASSERT3U(spa_version(spa), >=, intval);
5749                         break;
5750 
5751                 case ZPOOL_PROP_ALTROOT:
5752                         /*
5753                          * 'altroot' is a non-persistent property. It should
5754                          * have been set temporarily at creation or import time.
5755                          */
5756                         ASSERT(spa->spa_root != NULL);
5757                         break;
5758 
5759                 case ZPOOL_PROP_READONLY:
5760                 case ZPOOL_PROP_CACHEFILE:
5761                         /*
5762                          * 'readonly' and 'cachefile' are also non-persisitent
5763                          * properties.
5764                          */
5765                         break;
5766                 case ZPOOL_PROP_COMMENT:
5767                         VERIFY(nvpair_value_string(elem, &strval) == 0);
5768                         if (spa->spa_comment != NULL)
5769                                 spa_strfree(spa->spa_comment);
5770                         spa->spa_comment = spa_strdup(strval);
5771                         /*
5772                          * We need to dirty the configuration on all the vdevs
5773                          * so that their labels get updated.  It's unnecessary
5774                          * to do this for pool creation since the vdev's
5775                          * configuratoin has already been dirtied.
5776                          */
5777                         if (tx->tx_txg != TXG_INITIAL)
5778                                 vdev_config_dirty(spa->spa_root_vdev);
5779                         spa_history_log_internal(spa, "set", tx,
5780                             "%s=%s", nvpair_name(elem), strval);
5781                         break;
5782                 default:
5783                         /*
5784                          * Set pool property values in the poolprops mos object.
5785                          */
5786                         if (spa->spa_pool_props_object == 0) {
5787                                 spa->spa_pool_props_object =
5788                                     zap_create_link(mos, DMU_OT_POOL_PROPS,
5789                                     DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
5790                                     tx);
5791                         }
5792 
5793                         /* normalize the property name */
5794                         propname = zpool_prop_to_name(prop);
5795                         proptype = zpool_prop_get_type(prop);
5796 
5797                         if (nvpair_type(elem) == DATA_TYPE_STRING) {
5798                                 ASSERT(proptype == PROP_TYPE_STRING);
5799                                 VERIFY(nvpair_value_string(elem, &strval) == 0);
5800                                 VERIFY(zap_update(mos,
5801                                     spa->spa_pool_props_object, propname,
5802                                     1, strlen(strval) + 1, strval, tx) == 0);
5803                                 spa_history_log_internal(spa, "set", tx,
5804                                     "%s=%s", nvpair_name(elem), strval);
5805                         } else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
5806                                 VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5807 
5808                                 if (proptype == PROP_TYPE_INDEX) {
5809                                         const char *unused;
5810                                         VERIFY(zpool_prop_index_to_string(
5811                                             prop, intval, &unused) == 0);
5812                                 }
5813                                 VERIFY(zap_update(mos,
5814                                     spa->spa_pool_props_object, propname,
5815                                     8, 1, &intval, tx) == 0);
5816                                 spa_history_log_internal(spa, "set", tx,
5817                                     "%s=%lld", nvpair_name(elem), intval);
5818                         } else {
5819                                 ASSERT(0); /* not allowed */
5820                         }
5821 
5822                         switch (prop) {
5823                         case ZPOOL_PROP_DELEGATION:
5824                                 spa->spa_delegation = intval;
5825                                 break;
5826                         case ZPOOL_PROP_BOOTFS:
5827                                 spa->spa_bootfs = intval;
5828                                 break;
5829                         case ZPOOL_PROP_FAILUREMODE:
5830                                 spa->spa_failmode = intval;
5831                                 break;
5832                         case ZPOOL_PROP_AUTOEXPAND:
5833                                 spa->spa_autoexpand = intval;
5834                                 if (tx->tx_txg != TXG_INITIAL)
5835                                         spa_async_request(spa,
5836                                             SPA_ASYNC_AUTOEXPAND);
5837                                 break;
5838                         case ZPOOL_PROP_DEDUPDITTO:
5839                                 spa->spa_dedup_ditto = intval;
5840                                 break;
5841                         default:
5842                                 break;
5843                         }
5844                 }
5845 







5846         }
5847 
5848         mutex_exit(&spa->spa_props_lock);
5849 }
5850 
5851 /*
5852  * Perform one-time upgrade on-disk changes.  spa_version() does not
5853  * reflect the new version this txg, so there must be no changes this
5854  * txg to anything that the upgrade code depends on after it executes.
5855  * Therefore this must be called after dsl_pool_sync() does the sync
5856  * tasks.
5857  */
5858 static void
5859 spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
5860 {
5861         dsl_pool_t *dp = spa->spa_dsl_pool;
5862 
5863         ASSERT(spa->spa_sync_pass == 1);
5864 
5865         if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&