1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Portions Copyright 2011 Martin Matuska
  25  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  27  * Copyright (c) 2013 by Delphix. All rights reserved.
  28  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  29  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  30  */
  31 
  32 /*
  33  * ZFS ioctls.
  34  *
  35  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
  36  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
  37  *
  38  * There are two ways that we handle ioctls: the legacy way where almost
  39  * all of the logic is in the ioctl callback, and the new way where most
  40  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
  41  *
  42  * Non-legacy ioctls should be registered by calling
  43  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
  44  * from userland by lzc_ioctl().
  45  *
  46  * The registration arguments are as follows:
  47  *
  48  * const char *name
  49  *   The name of the ioctl.  This is used for history logging.  If the
  50  *   ioctl returns successfully (the callback returns 0), and allow_log
  51  *   is true, then a history log entry will be recorded with the input &
  52  *   output nvlists.  The log entry can be printed with "zpool history -i".
  53  *
  54  * zfs_ioc_t ioc
  55  *   The ioctl request number, which userland will pass to ioctl(2).
  56  *   The ioctl numbers can change from release to release, because
  57  *   the caller (libzfs) must be matched to the kernel.
  58  *
  59  * zfs_secpolicy_func_t *secpolicy
  60  *   This function will be called before the zfs_ioc_func_t, to
  61  *   determine if this operation is permitted.  It should return EPERM
  62  *   on failure, and 0 on success.  Checks include determining if the
  63  *   dataset is visible in this zone, and if the user has either all
  64  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
  65  *   to do this operation on this dataset with "zfs allow".
  66  *
  67  * zfs_ioc_namecheck_t namecheck
  68  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
  69  *   name, a dataset name, or nothing.  If the name is not well-formed,
  70  *   the ioctl will fail and the callback will not be called.
  71  *   Therefore, the callback can assume that the name is well-formed
  72  *   (e.g. is null-terminated, doesn't have more than one '@' character,
  73  *   doesn't have invalid characters).
  74  *
  75  * zfs_ioc_poolcheck_t pool_check
  76  *   This specifies requirements on the pool state.  If the pool does
  77  *   not meet them (is suspended or is readonly), the ioctl will fail
  78  *   and the callback will not be called.  If any checks are specified
  79  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
  80  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
  81  *   POOL_CHECK_READONLY).
  82  *
  83  * boolean_t smush_outnvlist
  84  *   If smush_outnvlist is true, then the output is presumed to be a
  85  *   list of errors, and it will be "smushed" down to fit into the
  86  *   caller's buffer, by removing some entries and replacing them with a
  87  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
  88  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
  89  *   outnvlist does not fit into the userland-provided buffer, then the
  90  *   ioctl will fail with ENOMEM.
  91  *
  92  * zfs_ioc_func_t *func
  93  *   The callback function that will perform the operation.
  94  *
  95  *   The callback should return 0 on success, or an error number on
  96  *   failure.  If the function fails, the userland ioctl will return -1,
  97  *   and errno will be set to the callback's return value.  The callback
  98  *   will be called with the following arguments:
  99  *
 100  *   const char *name
 101  *     The name of the pool or dataset to operate on, from
 102  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
 103  *     expected type (pool, dataset, or none).
 104  *
 105  *   nvlist_t *innvl
 106  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
 107  *     NULL if no input nvlist was provided.  Changes to this nvlist are
 108  *     ignored.  If the input nvlist could not be deserialized, the
 109  *     ioctl will fail and the callback will not be called.
 110  *
 111  *   nvlist_t *outnvl
 112  *     The output nvlist, initially empty.  The callback can fill it in,
 113  *     and it will be returned to userland by serializing it into
 114  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
 115  *     fails (e.g. because the caller didn't supply a large enough
 116  *     buffer), then the overall ioctl will fail.  See the
 117  *     'smush_nvlist' argument above for additional behaviors.
 118  *
 119  *     There are two typical uses of the output nvlist:
 120  *       - To return state, e.g. property values.  In this case,
 121  *         smush_outnvlist should be false.  If the buffer was not large
 122  *         enough, the caller will reallocate a larger buffer and try
 123  *         the ioctl again.
 124  *
 125  *       - To return multiple errors from an ioctl which makes on-disk
 126  *         changes.  In this case, smush_outnvlist should be true.
 127  *         Ioctls which make on-disk modifications should generally not
 128  *         use the outnvl if they succeed, because the caller can not
 129  *         distinguish between the operation failing, and
 130  *         deserialization failing.
 131  */
 132 
 133 #include <sys/types.h>
 134 #include <sys/param.h>
 135 #include <sys/errno.h>
 136 #include <sys/uio.h>
 137 #include <sys/buf.h>
 138 #include <sys/modctl.h>
 139 #include <sys/open.h>
 140 #include <sys/file.h>
 141 #include <sys/kmem.h>
 142 #include <sys/conf.h>
 143 #include <sys/cmn_err.h>
 144 #include <sys/stat.h>
 145 #include <sys/zfs_ioctl.h>
 146 #include <sys/zfs_vfsops.h>
 147 #include <sys/zfs_znode.h>
 148 #include <sys/zap.h>
 149 #include <sys/spa.h>
 150 #include <sys/spa_impl.h>
 151 #include <sys/vdev.h>
 152 #include <sys/priv_impl.h>
 153 #include <sys/dmu.h>
 154 #include <sys/dsl_dir.h>
 155 #include <sys/dsl_dataset.h>
 156 #include <sys/dsl_prop.h>
 157 #include <sys/dsl_deleg.h>
 158 #include <sys/dmu_objset.h>
 159 #include <sys/dmu_impl.h>
 160 #include <sys/dmu_tx.h>
 161 #include <sys/ddi.h>
 162 #include <sys/sunddi.h>
 163 #include <sys/sunldi.h>
 164 #include <sys/policy.h>
 165 #include <sys/zone.h>
 166 #include <sys/nvpair.h>
 167 #include <sys/pathname.h>
 168 #include <sys/mount.h>
 169 #include <sys/sdt.h>
 170 #include <sys/fs/zfs.h>
 171 #include <sys/zfs_ctldir.h>
 172 #include <sys/zfs_dir.h>
 173 #include <sys/zfs_onexit.h>
 174 #include <sys/zvol.h>
 175 #include <sys/dsl_scan.h>
 176 #include <sharefs/share.h>
 177 #include <sys/dmu_objset.h>
 178 #include <sys/dmu_send.h>
 179 #include <sys/dsl_destroy.h>
 180 #include <sys/dsl_userhold.h>
 181 #include <sys/zfeature.h>
 182 #include <sys/zio_checksum.h>
 183 
 184 #include "zfs_namecheck.h"
 185 #include "zfs_prop.h"
 186 #include "zfs_deleg.h"
 187 #include "zfs_comutil.h"
 188 
 189 extern struct modlfs zfs_modlfs;
 190 
 191 extern void zfs_init(void);
 192 extern void zfs_fini(void);
 193 
 194 ldi_ident_t zfs_li = NULL;
 195 dev_info_t *zfs_dip;
 196 
 197 uint_t zfs_fsyncer_key;
 198 extern uint_t rrw_tsd_key;
 199 static uint_t zfs_allow_log_key;
 200 
 201 typedef int zfs_ioc_legacy_func_t(zfs_cmd_t *);
 202 typedef int zfs_ioc_func_t(const char *, nvlist_t *, nvlist_t *);
 203 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, nvlist_t *, cred_t *);
 204 
 205 typedef enum {
 206         NO_NAME,
 207         POOL_NAME,
 208         DATASET_NAME
 209 } zfs_ioc_namecheck_t;
 210 
 211 typedef enum {
 212         POOL_CHECK_NONE         = 1 << 0,
 213         POOL_CHECK_SUSPENDED    = 1 << 1,
 214         POOL_CHECK_READONLY     = 1 << 2,
 215 } zfs_ioc_poolcheck_t;
 216 
 217 typedef struct zfs_ioc_vec {
 218         zfs_ioc_legacy_func_t   *zvec_legacy_func;
 219         zfs_ioc_func_t          *zvec_func;
 220         zfs_secpolicy_func_t    *zvec_secpolicy;
 221         zfs_ioc_namecheck_t     zvec_namecheck;
 222         boolean_t               zvec_allow_log;
 223         zfs_ioc_poolcheck_t     zvec_pool_check;
 224         boolean_t               zvec_smush_outnvlist;
 225         const char              *zvec_name;
 226 } zfs_ioc_vec_t;
 227 
 228 /* This array is indexed by zfs_userquota_prop_t */
 229 static const char *userquota_perms[] = {
 230         ZFS_DELEG_PERM_USERUSED,
 231         ZFS_DELEG_PERM_USERQUOTA,
 232         ZFS_DELEG_PERM_GROUPUSED,
 233         ZFS_DELEG_PERM_GROUPQUOTA,
 234 };
 235 
 236 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 237 static int zfs_check_settable(const char *name, nvpair_t *property,
 238     cred_t *cr);
 239 static int zfs_check_clearable(char *dataset, nvlist_t *props,
 240     nvlist_t **errors);
 241 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
 242     boolean_t *);
 243 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
 244 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
 245 
 246 static int zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature);
 247 
 248 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
 249 void
 250 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 251 {
 252         const char *newfile;
 253         char buf[512];
 254         va_list adx;
 255 
 256         /*
 257          * Get rid of annoying "../common/" prefix to filename.
 258          */
 259         newfile = strrchr(file, '/');
 260         if (newfile != NULL) {
 261                 newfile = newfile + 1; /* Get rid of leading / */
 262         } else {
 263                 newfile = file;
 264         }
 265 
 266         va_start(adx, fmt);
 267         (void) vsnprintf(buf, sizeof (buf), fmt, adx);
 268         va_end(adx);
 269 
 270         /*
 271          * To get this data, use the zfs-dprintf probe as so:
 272          * dtrace -q -n 'zfs-dprintf \
 273          *      /stringof(arg0) == "dbuf.c"/ \
 274          *      {printf("%s: %s", stringof(arg1), stringof(arg3))}'
 275          * arg0 = file name
 276          * arg1 = function name
 277          * arg2 = line number
 278          * arg3 = message
 279          */
 280         DTRACE_PROBE4(zfs__dprintf,
 281             char *, newfile, char *, func, int, line, char *, buf);
 282 }
 283 
 284 static void
 285 history_str_free(char *buf)
 286 {
 287         kmem_free(buf, HIS_MAX_RECORD_LEN);
 288 }
 289 
 290 static char *
 291 history_str_get(zfs_cmd_t *zc)
 292 {
 293         char *buf;
 294 
 295         if (zc->zc_history == NULL)
 296                 return (NULL);
 297 
 298         buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
 299         if (copyinstr((void *)(uintptr_t)zc->zc_history,
 300             buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 301                 history_str_free(buf);
 302                 return (NULL);
 303         }
 304 
 305         buf[HIS_MAX_RECORD_LEN -1] = '\0';
 306 
 307         return (buf);
 308 }
 309 
 310 /*
 311  * Check to see if the named dataset is currently defined as bootable
 312  */
 313 static boolean_t
 314 zfs_is_bootfs(const char *name)
 315 {
 316         objset_t *os;
 317 
 318         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 319                 boolean_t ret;
 320                 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
 321                 dmu_objset_rele(os, FTAG);
 322                 return (ret);
 323         }
 324         return (B_FALSE);
 325 }
 326 
 327 /*
 328  * Return non-zero if the spa version is less than requested version.
 329  */
 330 static int
 331 zfs_earlier_version(const char *name, int version)
 332 {
 333         spa_t *spa;
 334 
 335         if (spa_open(name, &spa, FTAG) == 0) {
 336                 if (spa_version(spa) < version) {
 337                         spa_close(spa, FTAG);
 338                         return (1);
 339                 }
 340                 spa_close(spa, FTAG);
 341         }
 342         return (0);
 343 }
 344 
 345 /*
 346  * Return TRUE if the ZPL version is less than requested version.
 347  */
 348 static boolean_t
 349 zpl_earlier_version(const char *name, int version)
 350 {
 351         objset_t *os;
 352         boolean_t rc = B_TRUE;
 353 
 354         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 355                 uint64_t zplversion;
 356 
 357                 if (dmu_objset_type(os) != DMU_OST_ZFS) {
 358                         dmu_objset_rele(os, FTAG);
 359                         return (B_TRUE);
 360                 }
 361                 /* XXX reading from non-owned objset */
 362                 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 363                         rc = zplversion < version;
 364                 dmu_objset_rele(os, FTAG);
 365         }
 366         return (rc);
 367 }
 368 
 369 static void
 370 zfs_log_history(zfs_cmd_t *zc)
 371 {
 372         spa_t *spa;
 373         char *buf;
 374 
 375         if ((buf = history_str_get(zc)) == NULL)
 376                 return;
 377 
 378         if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 379                 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
 380                         (void) spa_history_log(spa, buf);
 381                 spa_close(spa, FTAG);
 382         }
 383         history_str_free(buf);
 384 }
 385 
 386 /*
 387  * Policy for top-level read operations (list pools).  Requires no privileges,
 388  * and can be used in the local zone, as there is no associated dataset.
 389  */
 390 /* ARGSUSED */
 391 static int
 392 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 393 {
 394         return (0);
 395 }
 396 
 397 /*
 398  * Policy for dataset read operations (list children, get statistics).  Requires
 399  * no privileges, but must be visible in the local zone.
 400  */
 401 /* ARGSUSED */
 402 static int
 403 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 404 {
 405         if (INGLOBALZONE(curproc) ||
 406             zone_dataset_visible(zc->zc_name, NULL))
 407                 return (0);
 408 
 409         return (SET_ERROR(ENOENT));
 410 }
 411 
 412 static int
 413 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 414 {
 415         int writable = 1;
 416 
 417         /*
 418          * The dataset must be visible by this zone -- check this first
 419          * so they don't see EPERM on something they shouldn't know about.
 420          */
 421         if (!INGLOBALZONE(curproc) &&
 422             !zone_dataset_visible(dataset, &writable))
 423                 return (SET_ERROR(ENOENT));
 424 
 425         if (INGLOBALZONE(curproc)) {
 426                 /*
 427                  * If the fs is zoned, only root can access it from the
 428                  * global zone.
 429                  */
 430                 if (secpolicy_zfs(cr) && zoned)
 431                         return (SET_ERROR(EPERM));
 432         } else {
 433                 /*
 434                  * If we are in a local zone, the 'zoned' property must be set.
 435                  */
 436                 if (!zoned)
 437                         return (SET_ERROR(EPERM));
 438 
 439                 /* must be writable by this zone */
 440                 if (!writable)
 441                         return (SET_ERROR(EPERM));
 442         }
 443         return (0);
 444 }
 445 
 446 static int
 447 zfs_dozonecheck(const char *dataset, cred_t *cr)
 448 {
 449         uint64_t zoned;
 450 
 451         if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
 452                 return (SET_ERROR(ENOENT));
 453 
 454         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 455 }
 456 
 457 static int
 458 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 459 {
 460         uint64_t zoned;
 461 
 462         if (dsl_prop_get_int_ds(ds, "zoned", &zoned))
 463                 return (SET_ERROR(ENOENT));
 464 
 465         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 466 }
 467 
 468 static int
 469 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
 470     const char *perm, cred_t *cr)
 471 {
 472         int error;
 473 
 474         error = zfs_dozonecheck_ds(name, ds, cr);
 475         if (error == 0) {
 476                 error = secpolicy_zfs(cr);
 477                 if (error != 0)
 478                         error = dsl_deleg_access_impl(ds, perm, cr);
 479         }
 480         return (error);
 481 }
 482 
 483 static int
 484 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 485 {
 486         int error;
 487         dsl_dataset_t *ds;
 488         dsl_pool_t *dp;
 489 
 490         error = dsl_pool_hold(name, FTAG, &dp);
 491         if (error != 0)
 492                 return (error);
 493 
 494         error = dsl_dataset_hold(dp, name, FTAG, &ds);
 495         if (error != 0) {
 496                 dsl_pool_rele(dp, FTAG);
 497                 return (error);
 498         }
 499 
 500         error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
 501 
 502         dsl_dataset_rele(ds, FTAG);
 503         dsl_pool_rele(dp, FTAG);
 504         return (error);
 505 }
 506 
 507 /*
 508  * Policy for setting the security label property.
 509  *
 510  * Returns 0 for success, non-zero for access and other errors.
 511  */
 512 static int
 513 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
 514 {
 515         char            ds_hexsl[MAXNAMELEN];
 516         bslabel_t       ds_sl, new_sl;
 517         boolean_t       new_default = FALSE;
 518         uint64_t        zoned;
 519         int             needed_priv = -1;
 520         int             error;
 521 
 522         /* First get the existing dataset label. */
 523         error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 524             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 525         if (error != 0)
 526                 return (SET_ERROR(EPERM));
 527 
 528         if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 529                 new_default = TRUE;
 530 
 531         /* The label must be translatable */
 532         if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 533                 return (SET_ERROR(EINVAL));
 534 
 535         /*
 536          * In a non-global zone, disallow attempts to set a label that
 537          * doesn't match that of the zone; otherwise no other checks
 538          * are needed.
 539          */
 540         if (!INGLOBALZONE(curproc)) {
 541                 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 542                         return (SET_ERROR(EPERM));
 543                 return (0);
 544         }
 545 
 546         /*
 547          * For global-zone datasets (i.e., those whose zoned property is
 548          * "off", verify that the specified new label is valid for the
 549          * global zone.
 550          */
 551         if (dsl_prop_get_integer(name,
 552             zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 553                 return (SET_ERROR(EPERM));
 554         if (!zoned) {
 555                 if (zfs_check_global_label(name, strval) != 0)
 556                         return (SET_ERROR(EPERM));
 557         }
 558 
 559         /*
 560          * If the existing dataset label is nondefault, check if the
 561          * dataset is mounted (label cannot be changed while mounted).
 562          * Get the zfsvfs; if there isn't one, then the dataset isn't
 563          * mounted (or isn't a dataset, doesn't exist, ...).
 564          */
 565         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 566                 objset_t *os;
 567                 static char *setsl_tag = "setsl_tag";
 568 
 569                 /*
 570                  * Try to own the dataset; abort if there is any error,
 571                  * (e.g., already mounted, in use, or other error).
 572                  */
 573                 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
 574                     setsl_tag, &os);
 575                 if (error != 0)
 576                         return (SET_ERROR(EPERM));
 577 
 578                 dmu_objset_disown(os, setsl_tag);
 579 
 580                 if (new_default) {
 581                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 582                         goto out_check;
 583                 }
 584 
 585                 if (hexstr_to_label(strval, &new_sl) != 0)
 586                         return (SET_ERROR(EPERM));
 587 
 588                 if (blstrictdom(&ds_sl, &new_sl))
 589                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 590                 else if (blstrictdom(&new_sl, &ds_sl))
 591                         needed_priv = PRIV_FILE_UPGRADE_SL;
 592         } else {
 593                 /* dataset currently has a default label */
 594                 if (!new_default)
 595                         needed_priv = PRIV_FILE_UPGRADE_SL;
 596         }
 597 
 598 out_check:
 599         if (needed_priv != -1)
 600                 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 601         return (0);
 602 }
 603 
 604 static int
 605 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
 606     cred_t *cr)
 607 {
 608         char *strval;
 609 
 610         /*
 611          * Check permissions for special properties.
 612          */
 613         switch (prop) {
 614         case ZFS_PROP_ZONED:
 615                 /*
 616                  * Disallow setting of 'zoned' from within a local zone.
 617                  */
 618                 if (!INGLOBALZONE(curproc))
 619                         return (SET_ERROR(EPERM));
 620                 break;
 621 
 622         case ZFS_PROP_QUOTA:
 623                 if (!INGLOBALZONE(curproc)) {
 624                         uint64_t zoned;
 625                         char setpoint[MAXNAMELEN];
 626                         /*
 627                          * Unprivileged users are allowed to modify the
 628                          * quota on things *under* (ie. contained by)
 629                          * the thing they own.
 630                          */
 631                         if (dsl_prop_get_integer(dsname, "zoned", &zoned,
 632                             setpoint))
 633                                 return (SET_ERROR(EPERM));
 634                         if (!zoned || strlen(dsname) <= strlen(setpoint))
 635                                 return (SET_ERROR(EPERM));
 636                 }
 637                 break;
 638 
 639         case ZFS_PROP_MLSLABEL:
 640                 if (!is_system_labeled())
 641                         return (SET_ERROR(EPERM));
 642 
 643                 if (nvpair_value_string(propval, &strval) == 0) {
 644                         int err;
 645 
 646                         err = zfs_set_slabel_policy(dsname, strval, CRED());
 647                         if (err != 0)
 648                                 return (err);
 649                 }
 650                 break;
 651         }
 652 
 653         return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 654 }
 655 
 656 /* ARGSUSED */
 657 static int
 658 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 659 {
 660         int error;
 661 
 662         error = zfs_dozonecheck(zc->zc_name, cr);
 663         if (error != 0)
 664                 return (error);
 665 
 666         /*
 667          * permission to set permissions will be evaluated later in
 668          * dsl_deleg_can_allow()
 669          */
 670         return (0);
 671 }
 672 
 673 /* ARGSUSED */
 674 static int
 675 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 676 {
 677         return (zfs_secpolicy_write_perms(zc->zc_name,
 678             ZFS_DELEG_PERM_ROLLBACK, cr));
 679 }
 680 
 681 /* ARGSUSED */
 682 static int
 683 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 684 {
 685         dsl_pool_t *dp;
 686         dsl_dataset_t *ds;
 687         char *cp;
 688         int error;
 689 
 690         /*
 691          * Generate the current snapshot name from the given objsetid, then
 692          * use that name for the secpolicy/zone checks.
 693          */
 694         cp = strchr(zc->zc_name, '@');
 695         if (cp == NULL)
 696                 return (SET_ERROR(EINVAL));
 697         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 698         if (error != 0)
 699                 return (error);
 700 
 701         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 702         if (error != 0) {
 703                 dsl_pool_rele(dp, FTAG);
 704                 return (error);
 705         }
 706 
 707         dsl_dataset_name(ds, zc->zc_name);
 708 
 709         error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 710             ZFS_DELEG_PERM_SEND, cr);
 711         dsl_dataset_rele(ds, FTAG);
 712         dsl_pool_rele(dp, FTAG);
 713 
 714         return (error);
 715 }
 716 
 717 /* ARGSUSED */
 718 static int
 719 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 720 {
 721         return (zfs_secpolicy_write_perms(zc->zc_name,
 722             ZFS_DELEG_PERM_SEND, cr));
 723 }
 724 
 725 /* ARGSUSED */
 726 static int
 727 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 728 {
 729         vnode_t *vp;
 730         int error;
 731 
 732         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 733             NO_FOLLOW, NULL, &vp)) != 0)
 734                 return (error);
 735 
 736         /* Now make sure mntpnt and dataset are ZFS */
 737 
 738         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
 739             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 740             zc->zc_name) != 0)) {
 741                 VN_RELE(vp);
 742                 return (SET_ERROR(EPERM));
 743         }
 744 
 745         VN_RELE(vp);
 746         return (dsl_deleg_access(zc->zc_name,
 747             ZFS_DELEG_PERM_SHARE, cr));
 748 }
 749 
 750 int
 751 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 752 {
 753         if (!INGLOBALZONE(curproc))
 754                 return (SET_ERROR(EPERM));
 755 
 756         if (secpolicy_nfs(cr) == 0) {
 757                 return (0);
 758         } else {
 759                 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 760         }
 761 }
 762 
 763 int
 764 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 765 {
 766         if (!INGLOBALZONE(curproc))
 767                 return (SET_ERROR(EPERM));
 768 
 769         if (secpolicy_smb(cr) == 0) {
 770                 return (0);
 771         } else {
 772                 return (zfs_secpolicy_deleg_share(zc, innvl, cr));
 773         }
 774 }
 775 
 776 static int
 777 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 778 {
 779         char *cp;
 780 
 781         /*
 782          * Remove the @bla or /bla from the end of the name to get the parent.
 783          */
 784         (void) strncpy(parent, datasetname, parentsize);
 785         cp = strrchr(parent, '@');
 786         if (cp != NULL) {
 787                 cp[0] = '\0';
 788         } else {
 789                 cp = strrchr(parent, '/');
 790                 if (cp == NULL)
 791                         return (SET_ERROR(ENOENT));
 792                 cp[0] = '\0';
 793         }
 794 
 795         return (0);
 796 }
 797 
 798 int
 799 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 800 {
 801         int error;
 802 
 803         if ((error = zfs_secpolicy_write_perms(name,
 804             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 805                 return (error);
 806 
 807         return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 808 }
 809 
 810 /* ARGSUSED */
 811 static int
 812 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 813 {
 814         return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 815 }
 816 
 817 /*
 818  * Destroying snapshots with delegated permissions requires
 819  * descendant mount and destroy permissions.
 820  */
 821 /* ARGSUSED */
 822 static int
 823 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 824 {
 825         nvlist_t *snaps;
 826         nvpair_t *pair, *nextpair;
 827         int error = 0;
 828 
 829         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
 830                 return (SET_ERROR(EINVAL));
 831         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 832             pair = nextpair) {
 833                 dsl_pool_t *dp;
 834                 dsl_dataset_t *ds;
 835 
 836                 error = dsl_pool_hold(nvpair_name(pair), FTAG, &dp);
 837                 if (error != 0)
 838                         break;
 839                 nextpair = nvlist_next_nvpair(snaps, pair);
 840                 error = dsl_dataset_hold(dp, nvpair_name(pair), FTAG, &ds);
 841                 if (error == 0)
 842                         dsl_dataset_rele(ds, FTAG);
 843                 dsl_pool_rele(dp, FTAG);
 844 
 845                 if (error == 0) {
 846                         error = zfs_secpolicy_destroy_perms(nvpair_name(pair),
 847                             cr);
 848                 } else if (error == ENOENT) {
 849                         /*
 850                          * Ignore any snapshots that don't exist (we consider
 851                          * them "already destroyed").  Remove the name from the
 852                          * nvl here in case the snapshot is created between
 853                          * now and when we try to destroy it (in which case
 854                          * we don't want to destroy it since we haven't
 855                          * checked for permission).
 856                          */
 857                         fnvlist_remove_nvpair(snaps, pair);
 858                         error = 0;
 859                 }
 860                 if (error != 0)
 861                         break;
 862         }
 863 
 864         return (error);
 865 }
 866 
 867 int
 868 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 869 {
 870         char    parentname[MAXNAMELEN];
 871         int     error;
 872 
 873         if ((error = zfs_secpolicy_write_perms(from,
 874             ZFS_DELEG_PERM_RENAME, cr)) != 0)
 875                 return (error);
 876 
 877         if ((error = zfs_secpolicy_write_perms(from,
 878             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 879                 return (error);
 880 
 881         if ((error = zfs_get_parent(to, parentname,
 882             sizeof (parentname))) != 0)
 883                 return (error);
 884 
 885         if ((error = zfs_secpolicy_write_perms(parentname,
 886             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 887                 return (error);
 888 
 889         if ((error = zfs_secpolicy_write_perms(parentname,
 890             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 891                 return (error);
 892 
 893         return (error);
 894 }
 895 
 896 /* ARGSUSED */
 897 static int
 898 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 899 {
 900         return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
 901 }
 902 
 903 /* ARGSUSED */
 904 static int
 905 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 906 {
 907         dsl_pool_t *dp;
 908         dsl_dataset_t *clone;
 909         int error;
 910 
 911         error = zfs_secpolicy_write_perms(zc->zc_name,
 912             ZFS_DELEG_PERM_PROMOTE, cr);
 913         if (error != 0)
 914                 return (error);
 915 
 916         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
 917         if (error != 0)
 918                 return (error);
 919 
 920         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
 921 
 922         if (error == 0) {
 923                 char parentname[MAXNAMELEN];
 924                 dsl_dataset_t *origin = NULL;
 925                 dsl_dir_t *dd;
 926                 dd = clone->ds_dir;
 927 
 928                 error = dsl_dataset_hold_obj(dd->dd_pool,
 929                     dd->dd_phys->dd_origin_obj, FTAG, &origin);
 930                 if (error != 0) {
 931                         dsl_dataset_rele(clone, FTAG);
 932                         dsl_pool_rele(dp, FTAG);
 933                         return (error);
 934                 }
 935 
 936                 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
 937                     ZFS_DELEG_PERM_MOUNT, cr);
 938 
 939                 dsl_dataset_name(origin, parentname);
 940                 if (error == 0) {
 941                         error = zfs_secpolicy_write_perms_ds(parentname, origin,
 942                             ZFS_DELEG_PERM_PROMOTE, cr);
 943                 }
 944                 dsl_dataset_rele(clone, FTAG);
 945                 dsl_dataset_rele(origin, FTAG);
 946         }
 947         dsl_pool_rele(dp, FTAG);
 948         return (error);
 949 }
 950 
 951 /* ARGSUSED */
 952 static int
 953 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 954 {
 955         int error;
 956 
 957         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 958             ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
 959                 return (error);
 960 
 961         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 962             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 963                 return (error);
 964 
 965         return (zfs_secpolicy_write_perms(zc->zc_name,
 966             ZFS_DELEG_PERM_CREATE, cr));
 967 }
 968 
 969 int
 970 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 971 {
 972         return (zfs_secpolicy_write_perms(name,
 973             ZFS_DELEG_PERM_SNAPSHOT, cr));
 974 }
 975 
 976 /*
 977  * Check for permission to create each snapshot in the nvlist.
 978  */
 979 /* ARGSUSED */
 980 static int
 981 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 982 {
 983         nvlist_t *snaps;
 984         int error = 0;
 985         nvpair_t *pair;
 986 
 987         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
 988                 return (SET_ERROR(EINVAL));
 989         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
 990             pair = nvlist_next_nvpair(snaps, pair)) {
 991                 char *name = nvpair_name(pair);
 992                 char *atp = strchr(name, '@');
 993 
 994                 if (atp == NULL) {
 995                         error = SET_ERROR(EINVAL);
 996                         break;
 997                 }
 998                 *atp = '\0';
 999                 error = zfs_secpolicy_snapshot_perms(name, cr);
1000                 *atp = '@';
1001                 if (error != 0)
1002                         break;
1003         }
1004         return (error);
1005 }
1006 
1007 /* ARGSUSED */
1008 static int
1009 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1010 {
1011         /*
1012          * Even root must have a proper TSD so that we know what pool
1013          * to log to.
1014          */
1015         if (tsd_get(zfs_allow_log_key) == NULL)
1016                 return (SET_ERROR(EPERM));
1017         return (0);
1018 }
1019 
1020 static int
1021 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1022 {
1023         char    parentname[MAXNAMELEN];
1024         int     error;
1025         char    *origin;
1026 
1027         if ((error = zfs_get_parent(zc->zc_name, parentname,
1028             sizeof (parentname))) != 0)
1029                 return (error);
1030 
1031         if (nvlist_lookup_string(innvl, "origin", &origin) == 0 &&
1032             (error = zfs_secpolicy_write_perms(origin,
1033             ZFS_DELEG_PERM_CLONE, cr)) != 0)
1034                 return (error);
1035 
1036         if ((error = zfs_secpolicy_write_perms(parentname,
1037             ZFS_DELEG_PERM_CREATE, cr)) != 0)
1038                 return (error);
1039 
1040         return (zfs_secpolicy_write_perms(parentname,
1041             ZFS_DELEG_PERM_MOUNT, cr));
1042 }
1043 
1044 /*
1045  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1046  * SYS_CONFIG privilege, which is not available in a local zone.
1047  */
1048 /* ARGSUSED */
1049 static int
1050 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1051 {
1052         if (secpolicy_sys_config(cr, B_FALSE) != 0)
1053                 return (SET_ERROR(EPERM));
1054 
1055         return (0);
1056 }
1057 
1058 /*
1059  * Policy for object to name lookups.
1060  */
1061 /* ARGSUSED */
1062 static int
1063 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1064 {
1065         int error;
1066 
1067         if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
1068                 return (0);
1069 
1070         error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1071         return (error);
1072 }
1073 
1074 /*
1075  * Policy for fault injection.  Requires all privileges.
1076  */
1077 /* ARGSUSED */
1078 static int
1079 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1080 {
1081         return (secpolicy_zinject(cr));
1082 }
1083 
1084 /* ARGSUSED */
1085 static int
1086 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1087 {
1088         zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1089 
1090         if (prop == ZPROP_INVAL) {
1091                 if (!zfs_prop_user(zc->zc_value))
1092                         return (SET_ERROR(EINVAL));
1093                 return (zfs_secpolicy_write_perms(zc->zc_name,
1094                     ZFS_DELEG_PERM_USERPROP, cr));
1095         } else {
1096                 return (zfs_secpolicy_setprop(zc->zc_name, prop,
1097                     NULL, cr));
1098         }
1099 }
1100 
1101 static int
1102 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1103 {
1104         int err = zfs_secpolicy_read(zc, innvl, cr);
1105         if (err)
1106                 return (err);
1107 
1108         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1109                 return (SET_ERROR(EINVAL));
1110 
1111         if (zc->zc_value[0] == 0) {
1112                 /*
1113                  * They are asking about a posix uid/gid.  If it's
1114                  * themself, allow it.
1115                  */
1116                 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1117                     zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
1118                         if (zc->zc_guid == crgetuid(cr))
1119                                 return (0);
1120                 } else {
1121                         if (groupmember(zc->zc_guid, cr))
1122                                 return (0);
1123                 }
1124         }
1125 
1126         return (zfs_secpolicy_write_perms(zc->zc_name,
1127             userquota_perms[zc->zc_objset_type], cr));
1128 }
1129 
1130 static int
1131 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1132 {
1133         int err = zfs_secpolicy_read(zc, innvl, cr);
1134         if (err)
1135                 return (err);
1136 
1137         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1138                 return (SET_ERROR(EINVAL));
1139 
1140         return (zfs_secpolicy_write_perms(zc->zc_name,
1141             userquota_perms[zc->zc_objset_type], cr));
1142 }
1143 
1144 /* ARGSUSED */
1145 static int
1146 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1147 {
1148         return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1149             NULL, cr));
1150 }
1151 
1152 /* ARGSUSED */
1153 static int
1154 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1155 {
1156         nvpair_t *pair;
1157         nvlist_t *holds;
1158         int error;
1159 
1160         error = nvlist_lookup_nvlist(innvl, "holds", &holds);
1161         if (error != 0)
1162                 return (SET_ERROR(EINVAL));
1163 
1164         for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1165             pair = nvlist_next_nvpair(holds, pair)) {
1166                 char fsname[MAXNAMELEN];
1167                 error = dmu_fsname(nvpair_name(pair), fsname);
1168                 if (error != 0)
1169                         return (error);
1170                 error = zfs_secpolicy_write_perms(fsname,
1171                     ZFS_DELEG_PERM_HOLD, cr);
1172                 if (error != 0)
1173                         return (error);
1174         }
1175         return (0);
1176 }
1177 
1178 /* ARGSUSED */
1179 static int
1180 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1181 {
1182         nvpair_t *pair;
1183         int error;
1184 
1185         for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1186             pair = nvlist_next_nvpair(innvl, pair)) {
1187                 char fsname[MAXNAMELEN];
1188                 error = dmu_fsname(nvpair_name(pair), fsname);
1189                 if (error != 0)
1190                         return (error);
1191                 error = zfs_secpolicy_write_perms(fsname,
1192                     ZFS_DELEG_PERM_RELEASE, cr);
1193                 if (error != 0)
1194                         return (error);
1195         }
1196         return (0);
1197 }
1198 
1199 /*
1200  * Policy for allowing temporary snapshots to be taken or released
1201  */
1202 static int
1203 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1204 {
1205         /*
1206          * A temporary snapshot is the same as a snapshot,
1207          * hold, destroy and release all rolled into one.
1208          * Delegated diff alone is sufficient that we allow this.
1209          */
1210         int error;
1211 
1212         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1213             ZFS_DELEG_PERM_DIFF, cr)) == 0)
1214                 return (0);
1215 
1216         error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1217         if (error == 0)
1218                 error = zfs_secpolicy_hold(zc, innvl, cr);
1219         if (error == 0)
1220                 error = zfs_secpolicy_release(zc, innvl, cr);
1221         if (error == 0)
1222                 error = zfs_secpolicy_destroy(zc, innvl, cr);
1223         return (error);
1224 }
1225 
1226 /*
1227  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1228  */
1229 static int
1230 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1231 {
1232         char *packed;
1233         int error;
1234         nvlist_t *list = NULL;
1235 
1236         /*
1237          * Read in and unpack the user-supplied nvlist.
1238          */
1239         if (size == 0)
1240                 return (SET_ERROR(EINVAL));
1241 
1242         packed = kmem_alloc(size, KM_SLEEP);
1243 
1244         if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1245             iflag)) != 0) {
1246                 kmem_free(packed, size);
1247                 return (error);
1248         }
1249 
1250         if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1251                 kmem_free(packed, size);
1252                 return (error);
1253         }
1254 
1255         kmem_free(packed, size);
1256 
1257         *nvp = list;
1258         return (0);
1259 }
1260 
1261 /*
1262  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1263  * Entries will be removed from the end of the nvlist, and one int32 entry
1264  * named "N_MORE_ERRORS" will be added indicating how many entries were
1265  * removed.
1266  */
1267 static int
1268 nvlist_smush(nvlist_t *errors, size_t max)
1269 {
1270         size_t size;
1271 
1272         size = fnvlist_size(errors);
1273 
1274         if (size > max) {
1275                 nvpair_t *more_errors;
1276                 int n = 0;
1277 
1278                 if (max < 1024)
1279                         return (SET_ERROR(ENOMEM));
1280 
1281                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1282                 more_errors = nvlist_prev_nvpair(errors, NULL);
1283 
1284                 do {
1285                         nvpair_t *pair = nvlist_prev_nvpair(errors,
1286                             more_errors);
1287                         fnvlist_remove_nvpair(errors, pair);
1288                         n++;
1289                         size = fnvlist_size(errors);
1290                 } while (size > max);
1291 
1292                 fnvlist_remove_nvpair(errors, more_errors);
1293                 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1294                 ASSERT3U(fnvlist_size(errors), <=, max);
1295         }
1296 
1297         return (0);
1298 }
1299 
1300 static int
1301 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1302 {
1303         char *packed = NULL;
1304         int error = 0;
1305         size_t size;
1306 
1307         size = fnvlist_size(nvl);
1308 
1309         if (size > zc->zc_nvlist_dst_size) {
1310                 error = SET_ERROR(ENOMEM);
1311         } else {
1312                 packed = fnvlist_pack(nvl, &size);
1313                 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1314                     size, zc->zc_iflags) != 0)
1315                         error = SET_ERROR(EFAULT);
1316                 fnvlist_pack_free(packed, size);
1317         }
1318 
1319         zc->zc_nvlist_dst_size = size;
1320         zc->zc_nvlist_dst_filled = B_TRUE;
1321         return (error);
1322 }
1323 
1324 static int
1325 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1326 {
1327         objset_t *os;
1328         int error;
1329 
1330         error = dmu_objset_hold(dsname, FTAG, &os);
1331         if (error != 0)
1332                 return (error);
1333         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1334                 dmu_objset_rele(os, FTAG);
1335                 return (SET_ERROR(EINVAL));
1336         }
1337 
1338         mutex_enter(&os->os_user_ptr_lock);
1339         *zfvp = dmu_objset_get_user(os);
1340         if (*zfvp) {
1341                 VFS_HOLD((*zfvp)->z_vfs);
1342         } else {
1343                 error = SET_ERROR(ESRCH);
1344         }
1345         mutex_exit(&os->os_user_ptr_lock);
1346         dmu_objset_rele(os, FTAG);
1347         return (error);
1348 }
1349 
1350 /*
1351  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1352  * case its z_vfs will be NULL, and it will be opened as the owner.
1353  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1354  * which prevents all vnode ops from running.
1355  */
1356 static int
1357 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1358 {
1359         int error = 0;
1360 
1361         if (getzfsvfs(name, zfvp) != 0)
1362                 error = zfsvfs_create(name, zfvp);
1363         if (error == 0) {
1364                 rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1365                     RW_READER, tag);
1366                 if ((*zfvp)->z_unmounted) {
1367                         /*
1368                          * XXX we could probably try again, since the unmounting
1369                          * thread should be just about to disassociate the
1370                          * objset from the zfsvfs.
1371                          */
1372                         rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1373                         return (SET_ERROR(EBUSY));
1374                 }
1375         }
1376         return (error);
1377 }
1378 
1379 static void
1380 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1381 {
1382         rrw_exit(&zfsvfs->z_teardown_lock, tag);
1383 
1384         if (zfsvfs->z_vfs) {
1385                 VFS_RELE(zfsvfs->z_vfs);
1386         } else {
1387                 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1388                 zfsvfs_free(zfsvfs);
1389         }
1390 }
1391 
1392 static int
1393 zfs_ioc_pool_create(zfs_cmd_t *zc)
1394 {
1395         int error;
1396         nvlist_t *config, *props = NULL;
1397         nvlist_t *rootprops = NULL;
1398         nvlist_t *zplprops = NULL;
1399 
1400         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1401             zc->zc_iflags, &config))
1402                 return (error);
1403 
1404         if (zc->zc_nvlist_src_size != 0 && (error =
1405             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1406             zc->zc_iflags, &props))) {
1407                 nvlist_free(config);
1408                 return (error);
1409         }
1410 
1411         if (props) {
1412                 nvlist_t *nvl = NULL;
1413                 uint64_t version = SPA_VERSION;
1414 
1415                 (void) nvlist_lookup_uint64(props,
1416                     zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1417                 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1418                         error = SET_ERROR(EINVAL);
1419                         goto pool_props_bad;
1420                 }
1421                 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1422                 if (nvl) {
1423                         error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1424                         if (error != 0) {
1425                                 nvlist_free(config);
1426                                 nvlist_free(props);
1427                                 return (error);
1428                         }
1429                         (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1430                 }
1431                 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1432                 error = zfs_fill_zplprops_root(version, rootprops,
1433                     zplprops, NULL);
1434                 if (error != 0)
1435                         goto pool_props_bad;
1436         }
1437 
1438         error = spa_create(zc->zc_name, config, props, zplprops);
1439 
1440         /*
1441          * Set the remaining root properties
1442          */
1443         if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1444             ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1445                 (void) spa_destroy(zc->zc_name);
1446 
1447 pool_props_bad:
1448         nvlist_free(rootprops);
1449         nvlist_free(zplprops);
1450         nvlist_free(config);
1451         nvlist_free(props);
1452 
1453         return (error);
1454 }
1455 
1456 static int
1457 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1458 {
1459         int error;
1460         zfs_log_history(zc);
1461         error = spa_destroy(zc->zc_name);
1462         if (error == 0)
1463                 zvol_remove_minors(zc->zc_name);
1464         return (error);
1465 }
1466 
1467 static int
1468 zfs_ioc_pool_import(zfs_cmd_t *zc)
1469 {
1470         nvlist_t *config, *props = NULL;
1471         uint64_t guid;
1472         int error;
1473 
1474         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1475             zc->zc_iflags, &config)) != 0)
1476                 return (error);
1477 
1478         if (zc->zc_nvlist_src_size != 0 && (error =
1479             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1480             zc->zc_iflags, &props))) {
1481                 nvlist_free(config);
1482                 return (error);
1483         }
1484 
1485         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1486             guid != zc->zc_guid)
1487                 error = SET_ERROR(EINVAL);
1488         else
1489                 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1490 
1491         if (zc->zc_nvlist_dst != 0) {
1492                 int err;
1493 
1494                 if ((err = put_nvlist(zc, config)) != 0)
1495                         error = err;
1496         }
1497 
1498         nvlist_free(config);
1499 
1500         if (props)
1501                 nvlist_free(props);
1502 
1503         return (error);
1504 }
1505 
1506 static int
1507 zfs_ioc_pool_export(zfs_cmd_t *zc)
1508 {
1509         int error;
1510         boolean_t force = (boolean_t)zc->zc_cookie;
1511         boolean_t hardforce = (boolean_t)zc->zc_guid;
1512 
1513         zfs_log_history(zc);
1514         error = spa_export(zc->zc_name, NULL, force, hardforce);
1515         if (error == 0)
1516                 zvol_remove_minors(zc->zc_name);
1517         return (error);
1518 }
1519 
1520 static int
1521 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1522 {
1523         nvlist_t *configs;
1524         int error;
1525 
1526         if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1527                 return (SET_ERROR(EEXIST));
1528 
1529         error = put_nvlist(zc, configs);
1530 
1531         nvlist_free(configs);
1532 
1533         return (error);
1534 }
1535 
1536 /*
1537  * inputs:
1538  * zc_name              name of the pool
1539  *
1540  * outputs:
1541  * zc_cookie            real errno
1542  * zc_nvlist_dst        config nvlist
1543  * zc_nvlist_dst_size   size of config nvlist
1544  */
1545 static int
1546 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1547 {
1548         nvlist_t *config;
1549         int error;
1550         int ret = 0;
1551 
1552         error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1553             sizeof (zc->zc_value));
1554 
1555         if (config != NULL) {
1556                 ret = put_nvlist(zc, config);
1557                 nvlist_free(config);
1558 
1559                 /*
1560                  * The config may be present even if 'error' is non-zero.
1561                  * In this case we return success, and preserve the real errno
1562                  * in 'zc_cookie'.
1563                  */
1564                 zc->zc_cookie = error;
1565         } else {
1566                 ret = error;
1567         }
1568 
1569         return (ret);
1570 }
1571 
1572 /*
1573  * Try to import the given pool, returning pool stats as appropriate so that
1574  * user land knows which devices are available and overall pool health.
1575  */
1576 static int
1577 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1578 {
1579         nvlist_t *tryconfig, *config;
1580         int error;
1581 
1582         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1583             zc->zc_iflags, &tryconfig)) != 0)
1584                 return (error);
1585 
1586         config = spa_tryimport(tryconfig);
1587 
1588         nvlist_free(tryconfig);
1589 
1590         if (config == NULL)
1591                 return (SET_ERROR(EINVAL));
1592 
1593         error = put_nvlist(zc, config);
1594         nvlist_free(config);
1595 
1596         return (error);
1597 }
1598 
1599 /*
1600  * inputs:
1601  * zc_name              name of the pool
1602  * zc_cookie            scan func (pool_scan_func_t)
1603  */
1604 static int
1605 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1606 {
1607         spa_t *spa;
1608         int error;
1609 
1610         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1611                 return (error);
1612 
1613         if (zc->zc_cookie == POOL_SCAN_NONE)
1614                 error = spa_scan_stop(spa);
1615         else
1616                 error = spa_scan(spa, zc->zc_cookie);
1617 
1618         spa_close(spa, FTAG);
1619 
1620         return (error);
1621 }
1622 
1623 static int
1624 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1625 {
1626         spa_t *spa;
1627         int error;
1628 
1629         error = spa_open(zc->zc_name, &spa, FTAG);
1630         if (error == 0) {
1631                 spa_freeze(spa);
1632                 spa_close(spa, FTAG);
1633         }
1634         return (error);
1635 }
1636 
1637 static int
1638 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1639 {
1640         spa_t *spa;
1641         int error;
1642 
1643         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1644                 return (error);
1645 
1646         if (zc->zc_cookie < spa_version(spa) ||
1647             !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1648                 spa_close(spa, FTAG);
1649                 return (SET_ERROR(EINVAL));
1650         }
1651 
1652         spa_upgrade(spa, zc->zc_cookie);
1653         spa_close(spa, FTAG);
1654 
1655         return (error);
1656 }
1657 
1658 static int
1659 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1660 {
1661         spa_t *spa;
1662         char *hist_buf;
1663         uint64_t size;
1664         int error;
1665 
1666         if ((size = zc->zc_history_len) == 0)
1667                 return (SET_ERROR(EINVAL));
1668 
1669         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1670                 return (error);
1671 
1672         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1673                 spa_close(spa, FTAG);
1674                 return (SET_ERROR(ENOTSUP));
1675         }
1676 
1677         hist_buf = kmem_alloc(size, KM_SLEEP);
1678         if ((error = spa_history_get(spa, &zc->zc_history_offset,
1679             &zc->zc_history_len, hist_buf)) == 0) {
1680                 error = ddi_copyout(hist_buf,
1681                     (void *)(uintptr_t)zc->zc_history,
1682                     zc->zc_history_len, zc->zc_iflags);
1683         }
1684 
1685         spa_close(spa, FTAG);
1686         kmem_free(hist_buf, size);
1687         return (error);
1688 }
1689 
1690 static int
1691 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1692 {
1693         spa_t *spa;
1694         int error;
1695 
1696         error = spa_open(zc->zc_name, &spa, FTAG);
1697         if (error == 0) {
1698                 error = spa_change_guid(spa);
1699                 spa_close(spa, FTAG);
1700         }
1701         return (error);
1702 }
1703 
1704 static int
1705 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1706 {
1707         return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
1708 }
1709 
1710 /*
1711  * inputs:
1712  * zc_name              name of filesystem
1713  * zc_obj               object to find
1714  *
1715  * outputs:
1716  * zc_value             name of object
1717  */
1718 static int
1719 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1720 {
1721         objset_t *os;
1722         int error;
1723 
1724         /* XXX reading from objset not owned */
1725         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1726                 return (error);
1727         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1728                 dmu_objset_rele(os, FTAG);
1729                 return (SET_ERROR(EINVAL));
1730         }
1731         error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1732             sizeof (zc->zc_value));
1733         dmu_objset_rele(os, FTAG);
1734 
1735         return (error);
1736 }
1737 
1738 /*
1739  * inputs:
1740  * zc_name              name of filesystem
1741  * zc_obj               object to find
1742  *
1743  * outputs:
1744  * zc_stat              stats on object
1745  * zc_value             path to object
1746  */
1747 static int
1748 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1749 {
1750         objset_t *os;
1751         int error;
1752 
1753         /* XXX reading from objset not owned */
1754         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1755                 return (error);
1756         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1757                 dmu_objset_rele(os, FTAG);
1758                 return (SET_ERROR(EINVAL));
1759         }
1760         error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1761             sizeof (zc->zc_value));
1762         dmu_objset_rele(os, FTAG);
1763 
1764         return (error);
1765 }
1766 
1767 static int
1768 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1769 {
1770         spa_t *spa;
1771         int error;
1772         nvlist_t *config, **l2cache, **spares;
1773         uint_t nl2cache = 0, nspares = 0;
1774 
1775         error = spa_open(zc->zc_name, &spa, FTAG);
1776         if (error != 0)
1777                 return (error);
1778 
1779         error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1780             zc->zc_iflags, &config);
1781         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1782             &l2cache, &nl2cache);
1783 
1784         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1785             &spares, &nspares);
1786 
1787         /*
1788          * A root pool with concatenated devices is not supported.
1789          * Thus, can not add a device to a root pool.
1790          *
1791          * Intent log device can not be added to a rootpool because
1792          * during mountroot, zil is replayed, a seperated log device
1793          * can not be accessed during the mountroot time.
1794          *
1795          * l2cache and spare devices are ok to be added to a rootpool.
1796          */
1797         if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1798                 nvlist_free(config);
1799                 spa_close(spa, FTAG);
1800                 return (SET_ERROR(EDOM));
1801         }
1802 
1803         if (error == 0) {
1804                 error = spa_vdev_add(spa, config);
1805                 nvlist_free(config);
1806         }
1807         spa_close(spa, FTAG);
1808         return (error);
1809 }
1810 
1811 /*
1812  * inputs:
1813  * zc_name              name of the pool
1814  * zc_nvlist_conf       nvlist of devices to remove
1815  * zc_cookie            to stop the remove?
1816  */
1817 static int
1818 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1819 {
1820         spa_t *spa;
1821         int error;
1822 
1823         error = spa_open(zc->zc_name, &spa, FTAG);
1824         if (error != 0)
1825                 return (error);
1826         error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1827         spa_close(spa, FTAG);
1828         return (error);
1829 }
1830 
1831 static int
1832 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1833 {
1834         spa_t *spa;
1835         int error;
1836         vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1837 
1838         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1839                 return (error);
1840         switch (zc->zc_cookie) {
1841         case VDEV_STATE_ONLINE:
1842                 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1843                 break;
1844 
1845         case VDEV_STATE_OFFLINE:
1846                 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1847                 break;
1848 
1849         case VDEV_STATE_FAULTED:
1850                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1851                     zc->zc_obj != VDEV_AUX_EXTERNAL)
1852                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1853 
1854                 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1855                 break;
1856 
1857         case VDEV_STATE_DEGRADED:
1858                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1859                     zc->zc_obj != VDEV_AUX_EXTERNAL)
1860                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1861 
1862                 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1863                 break;
1864 
1865         default:
1866                 error = SET_ERROR(EINVAL);
1867         }
1868         zc->zc_cookie = newstate;
1869         spa_close(spa, FTAG);
1870         return (error);
1871 }
1872 
1873 static int
1874 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1875 {
1876         spa_t *spa;
1877         int replacing = zc->zc_cookie;
1878         nvlist_t *config;
1879         int error;
1880 
1881         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1882                 return (error);
1883 
1884         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1885             zc->zc_iflags, &config)) == 0) {
1886                 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1887                 nvlist_free(config);
1888         }
1889 
1890         spa_close(spa, FTAG);
1891         return (error);
1892 }
1893 
1894 static int
1895 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1896 {
1897         spa_t *spa;
1898         int error;
1899 
1900         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1901                 return (error);
1902 
1903         error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1904 
1905         spa_close(spa, FTAG);
1906         return (error);
1907 }
1908 
1909 static int
1910 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1911 {
1912         spa_t *spa;
1913         nvlist_t *config, *props = NULL;
1914         int error;
1915         boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1916 
1917         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1918                 return (error);
1919 
1920         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1921             zc->zc_iflags, &config)) {
1922                 spa_close(spa, FTAG);
1923                 return (error);
1924         }
1925 
1926         if (zc->zc_nvlist_src_size != 0 && (error =
1927             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1928             zc->zc_iflags, &props))) {
1929                 spa_close(spa, FTAG);
1930                 nvlist_free(config);
1931                 return (error);
1932         }
1933 
1934         error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1935 
1936         spa_close(spa, FTAG);
1937 
1938         nvlist_free(config);
1939         nvlist_free(props);
1940 
1941         return (error);
1942 }
1943 
1944 static int
1945 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1946 {
1947         spa_t *spa;
1948         char *path = zc->zc_value;
1949         uint64_t guid = zc->zc_guid;
1950         int error;
1951 
1952         error = spa_open(zc->zc_name, &spa, FTAG);
1953         if (error != 0)
1954                 return (error);
1955 
1956         error = spa_vdev_setpath(spa, guid, path);
1957         spa_close(spa, FTAG);
1958         return (error);
1959 }
1960 
1961 static int
1962 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1963 {
1964         spa_t *spa;
1965         char *fru = zc->zc_value;
1966         uint64_t guid = zc->zc_guid;
1967         int error;
1968 
1969         error = spa_open(zc->zc_name, &spa, FTAG);
1970         if (error != 0)
1971                 return (error);
1972 
1973         error = spa_vdev_setfru(spa, guid, fru);
1974         spa_close(spa, FTAG);
1975         return (error);
1976 }
1977 
1978 static int
1979 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
1980 {
1981         int error = 0;
1982         nvlist_t *nv;
1983 
1984         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1985 
1986         if (zc->zc_nvlist_dst != 0 &&
1987             (error = dsl_prop_get_all(os, &nv)) == 0) {
1988                 dmu_objset_stats(os, nv);
1989                 /*
1990                  * NB: zvol_get_stats() will read the objset contents,
1991                  * which we aren't supposed to do with a
1992                  * DS_MODE_USER hold, because it could be
1993                  * inconsistent.  So this is a bit of a workaround...
1994                  * XXX reading with out owning
1995                  */
1996                 if (!zc->zc_objset_stats.dds_inconsistent &&
1997                     dmu_objset_type(os) == DMU_OST_ZVOL) {
1998                         error = zvol_get_stats(os, nv);
1999                         if (error == EIO)
2000                                 return (error);
2001                         VERIFY0(error);
2002                 }
2003                 error = put_nvlist(zc, nv);
2004                 nvlist_free(nv);
2005         }
2006 
2007         return (error);
2008 }
2009 
2010 /*
2011  * inputs:
2012  * zc_name              name of filesystem
2013  * zc_nvlist_dst_size   size of buffer for property nvlist
2014  *
2015  * outputs:
2016  * zc_objset_stats      stats
2017  * zc_nvlist_dst        property nvlist
2018  * zc_nvlist_dst_size   size of property nvlist
2019  */
2020 static int
2021 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2022 {
2023         objset_t *os;
2024         int error;
2025 
2026         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2027         if (error == 0) {
2028                 error = zfs_ioc_objset_stats_impl(zc, os);
2029                 dmu_objset_rele(os, FTAG);
2030         }
2031 
2032         return (error);
2033 }
2034 
2035 /*
2036  * inputs:
2037  * zc_name              name of filesystem
2038  * zc_nvlist_dst_size   size of buffer for property nvlist
2039  *
2040  * outputs:
2041  * zc_nvlist_dst        received property nvlist
2042  * zc_nvlist_dst_size   size of received property nvlist
2043  *
2044  * Gets received properties (distinct from local properties on or after
2045  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2046  * local property values.
2047  */
2048 static int
2049 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2050 {
2051         int error = 0;
2052         nvlist_t *nv;
2053 
2054         /*
2055          * Without this check, we would return local property values if the
2056          * caller has not already received properties on or after
2057          * SPA_VERSION_RECVD_PROPS.
2058          */
2059         if (!dsl_prop_get_hasrecvd(zc->zc_name))
2060                 return (SET_ERROR(ENOTSUP));
2061 
2062         if (zc->zc_nvlist_dst != 0 &&
2063             (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2064                 error = put_nvlist(zc, nv);
2065                 nvlist_free(nv);
2066         }
2067 
2068         return (error);
2069 }
2070 
2071 static int
2072 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2073 {
2074         uint64_t value;
2075         int error;
2076 
2077         /*
2078          * zfs_get_zplprop() will either find a value or give us
2079          * the default value (if there is one).
2080          */
2081         if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2082                 return (error);
2083         VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
2084         return (0);
2085 }
2086 
2087 /*
2088  * inputs:
2089  * zc_name              name of filesystem
2090  * zc_nvlist_dst_size   size of buffer for zpl property nvlist
2091  *
2092  * outputs:
2093  * zc_nvlist_dst        zpl property nvlist
2094  * zc_nvlist_dst_size   size of zpl property nvlist
2095  */
2096 static int
2097 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2098 {
2099         objset_t *os;
2100         int err;
2101 
2102         /* XXX reading without owning */
2103         if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
2104                 return (err);
2105 
2106         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2107 
2108         /*
2109          * NB: nvl_add_zplprop() will read the objset contents,
2110          * which we aren't supposed to do with a DS_MODE_USER
2111          * hold, because it could be inconsistent.
2112          */
2113         if (zc->zc_nvlist_dst != NULL &&
2114             !zc->zc_objset_stats.dds_inconsistent &&
2115             dmu_objset_type(os) == DMU_OST_ZFS) {
2116                 nvlist_t *nv;
2117 
2118                 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2119                 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2120                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2121                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2122                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
2123                         err = put_nvlist(zc, nv);
2124                 nvlist_free(nv);
2125         } else {
2126                 err = SET_ERROR(ENOENT);
2127         }
2128         dmu_objset_rele(os, FTAG);
2129         return (err);
2130 }
2131 
2132 static boolean_t
2133 dataset_name_hidden(const char *name)
2134 {
2135         /*
2136          * Skip over datasets that are not visible in this zone,
2137          * internal datasets (which have a $ in their name), and
2138          * temporary datasets (which have a % in their name).
2139          */
2140         if (strchr(name, '$') != NULL)
2141                 return (B_TRUE);
2142         if (strchr(name, '%') != NULL)
2143                 return (B_TRUE);
2144         if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
2145                 return (B_TRUE);
2146         return (B_FALSE);
2147 }
2148 
2149 /*
2150  * inputs:
2151  * zc_name              name of filesystem
2152  * zc_cookie            zap cursor
2153  * zc_nvlist_dst_size   size of buffer for property nvlist
2154  *
2155  * outputs:
2156  * zc_name              name of next filesystem
2157  * zc_cookie            zap cursor
2158  * zc_objset_stats      stats
2159  * zc_nvlist_dst        property nvlist
2160  * zc_nvlist_dst_size   size of property nvlist
2161  */
2162 static int
2163 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2164 {
2165         objset_t *os;
2166         int error;
2167         char *p;
2168         size_t orig_len = strlen(zc->zc_name);
2169 
2170 top:
2171         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
2172                 if (error == ENOENT)
2173                         error = SET_ERROR(ESRCH);
2174                 return (error);
2175         }
2176 
2177         p = strrchr(zc->zc_name, '/');
2178         if (p == NULL || p[1] != '\0')
2179                 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2180         p = zc->zc_name + strlen(zc->zc_name);
2181 
2182         do {
2183                 error = dmu_dir_list_next(os,
2184                     sizeof (zc->zc_name) - (p - zc->zc_name), p,
2185                     NULL, &zc->zc_cookie);
2186                 if (error == ENOENT)
2187                         error = SET_ERROR(ESRCH);
2188         } while (error == 0 && dataset_name_hidden(zc->zc_name));
2189         dmu_objset_rele(os, FTAG);
2190 
2191         /*
2192          * If it's an internal dataset (ie. with a '$' in its name),
2193          * don't try to get stats for it, otherwise we'll return ENOENT.
2194          */
2195         if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2196                 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2197                 if (error == ENOENT) {
2198                         /* We lost a race with destroy, get the next one. */
2199                         zc->zc_name[orig_len] = '\0';
2200                         goto top;
2201                 }
2202         }
2203         return (error);
2204 }
2205 
2206 /*
2207  * inputs:
2208  * zc_name              name of filesystem
2209  * zc_cookie            zap cursor
2210  * zc_nvlist_dst_size   size of buffer for property nvlist
2211  *
2212  * outputs:
2213  * zc_name              name of next snapshot
2214  * zc_objset_stats      stats
2215  * zc_nvlist_dst        property nvlist
2216  * zc_nvlist_dst_size   size of property nvlist
2217  */
2218 static int
2219 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2220 {
2221         objset_t *os;
2222         int error;
2223 
2224         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2225         if (error != 0) {
2226                 return (error == ENOENT ? ESRCH : error);
2227         }
2228 
2229         /*
2230          * A dataset name of maximum length cannot have any snapshots,
2231          * so exit immediately.
2232          */
2233         if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
2234                 dmu_objset_rele(os, FTAG);
2235                 return (SET_ERROR(ESRCH));
2236         }
2237 
2238         error = dmu_snapshot_list_next(os,
2239             sizeof (zc->zc_name) - strlen(zc->zc_name),
2240             zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2241             NULL);
2242 
2243         if (error == 0) {
2244                 dsl_dataset_t *ds;
2245                 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2246 
2247                 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2248                 if (error == 0) {
2249                         objset_t *ossnap;
2250 
2251                         error = dmu_objset_from_ds(ds, &ossnap);
2252                         if (error == 0)
2253                                 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2254                         dsl_dataset_rele(ds, FTAG);
2255                 }
2256         } else if (error == ENOENT) {
2257                 error = SET_ERROR(ESRCH);
2258         }
2259 
2260         dmu_objset_rele(os, FTAG);
2261         /* if we failed, undo the @ that we tacked on to zc_name */
2262         if (error != 0)
2263                 *strchr(zc->zc_name, '@') = '\0';
2264         return (error);
2265 }
2266 
2267 static int
2268 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2269 {
2270         const char *propname = nvpair_name(pair);
2271         uint64_t *valary;
2272         unsigned int vallen;
2273         const char *domain;
2274         char *dash;
2275         zfs_userquota_prop_t type;
2276         uint64_t rid;
2277         uint64_t quota;
2278         zfsvfs_t *zfsvfs;
2279         int err;
2280 
2281         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2282                 nvlist_t *attrs;
2283                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2284                 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2285                     &pair) != 0)
2286                         return (SET_ERROR(EINVAL));
2287         }
2288 
2289         /*
2290          * A correctly constructed propname is encoded as
2291          * userquota@<rid>-<domain>.
2292          */
2293         if ((dash = strchr(propname, '-')) == NULL ||
2294             nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2295             vallen != 3)
2296                 return (SET_ERROR(EINVAL));
2297 
2298         domain = dash + 1;
2299         type = valary[0];
2300         rid = valary[1];
2301         quota = valary[2];
2302 
2303         err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2304         if (err == 0) {
2305                 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2306                 zfsvfs_rele(zfsvfs, FTAG);
2307         }
2308 
2309         return (err);
2310 }
2311 
2312 /*
2313  * If the named property is one that has a special function to set its value,
2314  * return 0 on success and a positive error code on failure; otherwise if it is
2315  * not one of the special properties handled by this function, return -1.
2316  *
2317  * XXX: It would be better for callers of the property interface if we handled
2318  * these special cases in dsl_prop.c (in the dsl layer).
2319  */
2320 static int
2321 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2322     nvpair_t *pair)
2323 {
2324         const char *propname = nvpair_name(pair);
2325         zfs_prop_t prop = zfs_name_to_prop(propname);
2326         uint64_t intval;
2327         int err;
2328 
2329         if (prop == ZPROP_INVAL) {
2330                 if (zfs_prop_userquota(propname))
2331                         return (zfs_prop_set_userquota(dsname, pair));
2332                 return (-1);
2333         }
2334 
2335         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2336                 nvlist_t *attrs;
2337                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2338                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2339                     &pair) == 0);
2340         }
2341 
2342         if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2343                 return (-1);
2344 
2345         VERIFY(0 == nvpair_value_uint64(pair, &intval));
2346 
2347         switch (prop) {
2348         case ZFS_PROP_QUOTA:
2349                 err = dsl_dir_set_quota(dsname, source, intval);
2350                 break;
2351         case ZFS_PROP_REFQUOTA:
2352                 err = dsl_dataset_set_refquota(dsname, source, intval);
2353                 break;
2354         case ZFS_PROP_RESERVATION:
2355                 err = dsl_dir_set_reservation(dsname, source, intval);
2356                 break;
2357         case ZFS_PROP_REFRESERVATION:
2358                 err = dsl_dataset_set_refreservation(dsname, source, intval);
2359                 break;
2360         case ZFS_PROP_VOLSIZE:
2361                 err = zvol_set_volsize(dsname, intval);
2362                 break;
2363         case ZFS_PROP_VERSION:
2364         {
2365                 zfsvfs_t *zfsvfs;
2366 
2367                 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2368                         break;
2369 
2370                 err = zfs_set_version(zfsvfs, intval);
2371                 zfsvfs_rele(zfsvfs, FTAG);
2372 
2373                 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2374                         zfs_cmd_t *zc;
2375 
2376                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2377                         (void) strcpy(zc->zc_name, dsname);
2378                         (void) zfs_ioc_userspace_upgrade(zc);
2379                         kmem_free(zc, sizeof (zfs_cmd_t));
2380                 }
2381                 break;
2382         }
2383         case ZFS_PROP_COMPRESSION:
2384         {
2385                 if (intval == ZIO_COMPRESS_LZ4) {
2386                         zfeature_info_t *feature =
2387                             &spa_feature_table[SPA_FEATURE_LZ4_COMPRESS];
2388                         spa_t *spa;
2389 
2390                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
2391                                 return (err);
2392 
2393                         /*
2394                          * Setting the LZ4 compression algorithm activates
2395                          * the feature.
2396                          */
2397                         if (!spa_feature_is_active(spa, feature)) {
2398                                 if ((err = zfs_prop_activate_feature(spa,
2399                                     feature)) != 0) {
2400                                         spa_close(spa, FTAG);
2401                                         return (err);
2402                                 }
2403                         }
2404 
2405                         spa_close(spa, FTAG);
2406                 }
2407                 /*
2408                  * We still want the default set action to be performed in the
2409                  * caller, we only performed zfeature settings here.
2410                  */
2411                 err = -1;
2412                 break;
2413         }
2414         case ZFS_PROP_CHECKSUM:
2415         case ZFS_PROP_DEDUP:
2416         {
2417                 zfeature_info_t *feature = NULL;
2418                 spa_t           *spa;
2419 
2420                 if (intval == ZIO_CHECKSUM_SHA512)
2421                         feature = &spa_feature_table[SPA_FEATURE_SHA512];
2422                 else if (intval == ZIO_CHECKSUM_SKEIN)
2423                         feature = &spa_feature_table[SPA_FEATURE_SKEIN];
2424                 else if (intval == ZIO_CHECKSUM_EDONR)
2425                         feature = &spa_feature_table[SPA_FEATURE_EDONR];
2426                 if (feature == NULL) {
2427                         /* No features need to be activated for this cksum */
2428                         err = -1;
2429                         break;
2430                 }
2431                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
2432                         return (err);
2433                 if (!spa_feature_is_active(spa, feature)) {
2434                         /* Salted checksums must store the salt in the MOS */
2435                         if (zio_checksum_table[intval].ci_salted)
2436                                 err = spa_activate_salted_cksum(spa, feature);
2437                         else
2438                                 err = zfs_prop_activate_feature(spa, feature);
2439                 }
2440                 spa_close(spa, FTAG);
2441                 if (err == 0) {
2442                         /*
2443                          * We want the default set action to be performed in
2444                          * the caller, we only perform feature checks here.
2445                          */
2446                         err = -1;
2447                 }
2448                 break;
2449         }
2450 
2451         default:
2452                 err = -1;
2453         }
2454 
2455         return (err);
2456 }
2457 
2458 /*
2459  * This function is best effort. If it fails to set any of the given properties,
2460  * it continues to set as many as it can and returns the last error
2461  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2462  * with the list of names of all the properties that failed along with the
2463  * corresponding error numbers.
2464  *
2465  * If every property is set successfully, zero is returned and errlist is not
2466  * modified.
2467  */
2468 int
2469 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2470     nvlist_t *errlist)
2471 {
2472         nvpair_t *pair;
2473         nvpair_t *propval;
2474         int rv = 0;
2475         uint64_t intval;
2476         char *strval;
2477         nvlist_t *genericnvl = fnvlist_alloc();
2478         nvlist_t *retrynvl = fnvlist_alloc();
2479 
2480 retry:
2481         pair = NULL;
2482         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2483                 const char *propname = nvpair_name(pair);
2484                 zfs_prop_t prop = zfs_name_to_prop(propname);
2485                 int err = 0;
2486 
2487                 /* decode the property value */
2488                 propval = pair;
2489                 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2490                         nvlist_t *attrs;
2491                         attrs = fnvpair_value_nvlist(pair);
2492                         if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2493                             &propval) != 0)
2494                                 err = SET_ERROR(EINVAL);
2495                 }
2496 
2497                 /* Validate value type */
2498                 if (err == 0 && prop == ZPROP_INVAL) {
2499                         if (zfs_prop_user(propname)) {
2500                                 if (nvpair_type(propval) != DATA_TYPE_STRING)
2501                                         err = SET_ERROR(EINVAL);
2502                         } else if (zfs_prop_userquota(propname)) {
2503                                 if (nvpair_type(propval) !=
2504                                     DATA_TYPE_UINT64_ARRAY)
2505                                         err = SET_ERROR(EINVAL);
2506                         } else {
2507                                 err = SET_ERROR(EINVAL);
2508                         }
2509                 } else if (err == 0) {
2510                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2511                                 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2512                                         err = SET_ERROR(EINVAL);
2513                         } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2514                                 const char *unused;
2515 
2516                                 intval = fnvpair_value_uint64(propval);
2517 
2518                                 switch (zfs_prop_get_type(prop)) {
2519                                 case PROP_TYPE_NUMBER:
2520                                         break;
2521                                 case PROP_TYPE_STRING:
2522                                         err = SET_ERROR(EINVAL);
2523                                         break;
2524                                 case PROP_TYPE_INDEX:
2525                                         if (zfs_prop_index_to_string(prop,
2526                                             intval, &unused) != 0)
2527                                                 err = SET_ERROR(EINVAL);
2528                                         break;
2529                                 default:
2530                                         cmn_err(CE_PANIC,
2531                                             "unknown property type");
2532                                 }
2533                         } else {
2534                                 err = SET_ERROR(EINVAL);
2535                         }
2536                 }
2537 
2538                 /* Validate permissions */
2539                 if (err == 0)
2540                         err = zfs_check_settable(dsname, pair, CRED());
2541 
2542                 if (err == 0) {
2543                         err = zfs_prop_set_special(dsname, source, pair);
2544                         if (err == -1) {
2545                                 /*
2546                                  * For better performance we build up a list of
2547                                  * properties to set in a single transaction.
2548                                  */
2549                                 err = nvlist_add_nvpair(genericnvl, pair);
2550                         } else if (err != 0 && nvl != retrynvl) {
2551                                 /*
2552                                  * This may be a spurious error caused by
2553                                  * receiving quota and reservation out of order.
2554                                  * Try again in a second pass.
2555                                  */
2556                                 err = nvlist_add_nvpair(retrynvl, pair);
2557                         }
2558                 }
2559 
2560                 if (err != 0) {
2561                         if (errlist != NULL)
2562                                 fnvlist_add_int32(errlist, propname, err);
2563                         rv = err;
2564                 }
2565         }
2566 
2567         if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2568                 nvl = retrynvl;
2569                 goto retry;
2570         }
2571 
2572         if (!nvlist_empty(genericnvl) &&
2573             dsl_props_set(dsname, source, genericnvl) != 0) {
2574                 /*
2575                  * If this fails, we still want to set as many properties as we
2576                  * can, so try setting them individually.
2577                  */
2578                 pair = NULL;
2579                 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2580                         const char *propname = nvpair_name(pair);
2581                         int err = 0;
2582 
2583                         propval = pair;
2584                         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2585                                 nvlist_t *attrs;
2586                                 attrs = fnvpair_value_nvlist(pair);
2587                                 propval = fnvlist_lookup_nvpair(attrs,
2588                                     ZPROP_VALUE);
2589                         }
2590 
2591                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2592                                 strval = fnvpair_value_string(propval);
2593                                 err = dsl_prop_set_string(dsname, propname,
2594                                     source, strval);
2595                         } else {
2596                                 intval = fnvpair_value_uint64(propval);
2597                                 err = dsl_prop_set_int(dsname, propname, source,
2598                                     intval);
2599                         }
2600 
2601                         if (err != 0) {
2602                                 if (errlist != NULL) {
2603                                         fnvlist_add_int32(errlist, propname,
2604                                             err);
2605                                 }
2606                                 rv = err;
2607                         }
2608                 }
2609         }
2610         nvlist_free(genericnvl);
2611         nvlist_free(retrynvl);
2612 
2613         return (rv);
2614 }
2615 
2616 /*
2617  * Check that all the properties are valid user properties.
2618  */
2619 static int
2620 zfs_check_userprops(const char *fsname, nvlist_t *nvl)
2621 {
2622         nvpair_t *pair = NULL;
2623         int error = 0;
2624 
2625         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2626                 const char *propname = nvpair_name(pair);
2627                 char *valstr;
2628 
2629                 if (!zfs_prop_user(propname) ||
2630                     nvpair_type(pair) != DATA_TYPE_STRING)
2631                         return (SET_ERROR(EINVAL));
2632 
2633                 if (error = zfs_secpolicy_write_perms(fsname,
2634                     ZFS_DELEG_PERM_USERPROP, CRED()))
2635                         return (error);
2636 
2637                 if (strlen(propname) >= ZAP_MAXNAMELEN)
2638                         return (SET_ERROR(ENAMETOOLONG));
2639 
2640                 VERIFY(nvpair_value_string(pair, &valstr) == 0);
2641                 if (strlen(valstr) >= ZAP_MAXVALUELEN)
2642                         return (E2BIG);
2643         }
2644         return (0);
2645 }
2646 
2647 static void
2648 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2649 {
2650         nvpair_t *pair;
2651 
2652         VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2653 
2654         pair = NULL;
2655         while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2656                 if (nvlist_exists(skipped, nvpair_name(pair)))
2657                         continue;
2658 
2659                 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2660         }
2661 }
2662 
2663 static int
2664 clear_received_props(const char *dsname, nvlist_t *props,
2665     nvlist_t *skipped)
2666 {
2667         int err = 0;
2668         nvlist_t *cleared_props = NULL;
2669         props_skip(props, skipped, &cleared_props);
2670         if (!nvlist_empty(cleared_props)) {
2671                 /*
2672                  * Acts on local properties until the dataset has received
2673                  * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2674                  */
2675                 zprop_source_t flags = (ZPROP_SRC_NONE |
2676                     (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
2677                 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
2678         }
2679         nvlist_free(cleared_props);
2680         return (err);
2681 }
2682 
2683 /*
2684  * inputs:
2685  * zc_name              name of filesystem
2686  * zc_value             name of property to set
2687  * zc_nvlist_src{_size} nvlist of properties to apply
2688  * zc_cookie            received properties flag
2689  *
2690  * outputs:
2691  * zc_nvlist_dst{_size} error for each unapplied received property
2692  */
2693 static int
2694 zfs_ioc_set_prop(zfs_cmd_t *zc)
2695 {
2696         nvlist_t *nvl;
2697         boolean_t received = zc->zc_cookie;
2698         zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2699             ZPROP_SRC_LOCAL);
2700         nvlist_t *errors;
2701         int error;
2702 
2703         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2704             zc->zc_iflags, &nvl)) != 0)
2705                 return (error);
2706 
2707         if (received) {
2708                 nvlist_t *origprops;
2709 
2710                 if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
2711                         (void) clear_received_props(zc->zc_name,
2712                             origprops, nvl);
2713                         nvlist_free(origprops);
2714                 }
2715 
2716                 error = dsl_prop_set_hasrecvd(zc->zc_name);
2717         }
2718 
2719         errors = fnvlist_alloc();
2720         if (error == 0)
2721                 error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
2722 
2723         if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2724                 (void) put_nvlist(zc, errors);
2725         }
2726 
2727         nvlist_free(errors);
2728         nvlist_free(nvl);
2729         return (error);
2730 }
2731 
2732 /*
2733  * inputs:
2734  * zc_name              name of filesystem
2735  * zc_value             name of property to inherit
2736  * zc_cookie            revert to received value if TRUE
2737  *
2738  * outputs:             none
2739  */
2740 static int
2741 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2742 {
2743         const char *propname = zc->zc_value;
2744         zfs_prop_t prop = zfs_name_to_prop(propname);
2745         boolean_t received = zc->zc_cookie;
2746         zprop_source_t source = (received
2747             ? ZPROP_SRC_NONE            /* revert to received value, if any */
2748             : ZPROP_SRC_INHERITED);     /* explicitly inherit */
2749 
2750         if (received) {
2751                 nvlist_t *dummy;
2752                 nvpair_t *pair;
2753                 zprop_type_t type;
2754                 int err;
2755 
2756                 /*
2757                  * zfs_prop_set_special() expects properties in the form of an
2758                  * nvpair with type info.
2759                  */
2760                 if (prop == ZPROP_INVAL) {
2761                         if (!zfs_prop_user(propname))
2762                                 return (SET_ERROR(EINVAL));
2763 
2764                         type = PROP_TYPE_STRING;
2765                 } else if (prop == ZFS_PROP_VOLSIZE ||
2766                     prop == ZFS_PROP_VERSION) {
2767                         return (SET_ERROR(EINVAL));
2768                 } else {
2769                         type = zfs_prop_get_type(prop);
2770                 }
2771 
2772                 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2773 
2774                 switch (type) {
2775                 case PROP_TYPE_STRING:
2776                         VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2777                         break;
2778                 case PROP_TYPE_NUMBER:
2779                 case PROP_TYPE_INDEX:
2780                         VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2781                         break;
2782                 default:
2783                         nvlist_free(dummy);
2784                         return (SET_ERROR(EINVAL));
2785                 }
2786 
2787                 pair = nvlist_next_nvpair(dummy, NULL);
2788                 err = zfs_prop_set_special(zc->zc_name, source, pair);
2789                 nvlist_free(dummy);
2790                 if (err != -1)
2791                         return (err); /* special property already handled */
2792         } else {
2793                 /*
2794                  * Only check this in the non-received case. We want to allow
2795                  * 'inherit -S' to revert non-inheritable properties like quota
2796                  * and reservation to the received or default values even though
2797                  * they are not considered inheritable.
2798                  */
2799                 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2800                         return (SET_ERROR(EINVAL));
2801         }
2802 
2803         /* property name has been validated by zfs_secpolicy_inherit_prop() */
2804         return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
2805 }
2806 
2807 static int
2808 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2809 {
2810         nvlist_t *props;
2811         spa_t *spa;
2812         int error;
2813         nvpair_t *pair;
2814 
2815         if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2816             zc->zc_iflags, &props))
2817                 return (error);
2818 
2819         /*
2820          * If the only property is the configfile, then just do a spa_lookup()
2821          * to handle the faulted case.
2822          */
2823         pair = nvlist_next_nvpair(props, NULL);
2824         if (pair != NULL && strcmp(nvpair_name(pair),
2825             zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2826             nvlist_next_nvpair(props, pair) == NULL) {
2827                 mutex_enter(&spa_namespace_lock);
2828                 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2829                         spa_configfile_set(spa, props, B_FALSE);
2830                         spa_config_sync(spa, B_FALSE, B_TRUE);
2831                 }
2832                 mutex_exit(&spa_namespace_lock);
2833                 if (spa != NULL) {
2834                         nvlist_free(props);
2835                         return (0);
2836                 }
2837         }
2838 
2839         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2840                 nvlist_free(props);
2841                 return (error);
2842         }
2843 
2844         error = spa_prop_set(spa, props);
2845 
2846         nvlist_free(props);
2847         spa_close(spa, FTAG);
2848 
2849         return (error);
2850 }
2851 
2852 static int
2853 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2854 {
2855         spa_t *spa;
2856         int error;
2857         nvlist_t *nvp = NULL;
2858 
2859         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2860                 /*
2861                  * If the pool is faulted, there may be properties we can still
2862                  * get (such as altroot and cachefile), so attempt to get them
2863                  * anyway.
2864                  */
2865                 mutex_enter(&spa_namespace_lock);
2866                 if ((spa = spa_lookup(zc->zc_name)) != NULL)
2867                         error = spa_prop_get(spa, &nvp);
2868                 mutex_exit(&spa_namespace_lock);
2869         } else {
2870                 error = spa_prop_get(spa, &nvp);
2871                 spa_close(spa, FTAG);
2872         }
2873 
2874         if (error == 0 && zc->zc_nvlist_dst != NULL)
2875                 error = put_nvlist(zc, nvp);
2876         else
2877                 error = SET_ERROR(EFAULT);
2878 
2879         nvlist_free(nvp);
2880         return (error);
2881 }
2882 
2883 /*
2884  * inputs:
2885  * zc_name              name of filesystem
2886  * zc_nvlist_src{_size} nvlist of delegated permissions
2887  * zc_perm_action       allow/unallow flag
2888  *
2889  * outputs:             none
2890  */
2891 static int
2892 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2893 {
2894         int error;
2895         nvlist_t *fsaclnv = NULL;
2896 
2897         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2898             zc->zc_iflags, &fsaclnv)) != 0)
2899                 return (error);
2900 
2901         /*
2902          * Verify nvlist is constructed correctly
2903          */
2904         if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2905                 nvlist_free(fsaclnv);
2906                 return (SET_ERROR(EINVAL));
2907         }
2908 
2909         /*
2910          * If we don't have PRIV_SYS_MOUNT, then validate
2911          * that user is allowed to hand out each permission in
2912          * the nvlist(s)
2913          */
2914 
2915         error = secpolicy_zfs(CRED());
2916         if (error != 0) {
2917                 if (zc->zc_perm_action == B_FALSE) {
2918                         error = dsl_deleg_can_allow(zc->zc_name,
2919                             fsaclnv, CRED());
2920                 } else {
2921                         error = dsl_deleg_can_unallow(zc->zc_name,
2922                             fsaclnv, CRED());
2923                 }
2924         }
2925 
2926         if (error == 0)
2927                 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2928 
2929         nvlist_free(fsaclnv);
2930         return (error);
2931 }
2932 
2933 /*
2934  * inputs:
2935  * zc_name              name of filesystem
2936  *
2937  * outputs:
2938  * zc_nvlist_src{_size} nvlist of delegated permissions
2939  */
2940 static int
2941 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2942 {
2943         nvlist_t *nvp;
2944         int error;
2945 
2946         if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2947                 error = put_nvlist(zc, nvp);
2948                 nvlist_free(nvp);
2949         }
2950 
2951         return (error);
2952 }
2953 
2954 /*
2955  * Search the vfs list for a specified resource.  Returns a pointer to it
2956  * or NULL if no suitable entry is found. The caller of this routine
2957  * is responsible for releasing the returned vfs pointer.
2958  */
2959 static vfs_t *
2960 zfs_get_vfs(const char *resource)
2961 {
2962         struct vfs *vfsp;
2963         struct vfs *vfs_found = NULL;
2964 
2965         vfs_list_read_lock();
2966         vfsp = rootvfs;
2967         do {
2968                 if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2969                         VFS_HOLD(vfsp);
2970                         vfs_found = vfsp;
2971                         break;
2972                 }
2973                 vfsp = vfsp->vfs_next;
2974         } while (vfsp != rootvfs);
2975         vfs_list_unlock();
2976         return (vfs_found);
2977 }
2978 
2979 /* ARGSUSED */
2980 static void
2981 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2982 {
2983         zfs_creat_t *zct = arg;
2984 
2985         zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2986 }
2987 
2988 #define ZFS_PROP_UNDEFINED      ((uint64_t)-1)
2989 
2990 /*
2991  * inputs:
2992  * os                   parent objset pointer (NULL if root fs)
2993  * fuids_ok             fuids allowed in this version of the spa?
2994  * sa_ok                SAs allowed in this version of the spa?
2995  * createprops          list of properties requested by creator
2996  *
2997  * outputs:
2998  * zplprops     values for the zplprops we attach to the master node object
2999  * is_ci        true if requested file system will be purely case-insensitive
3000  *
3001  * Determine the settings for utf8only, normalization and
3002  * casesensitivity.  Specific values may have been requested by the
3003  * creator and/or we can inherit values from the parent dataset.  If
3004  * the file system is of too early a vintage, a creator can not
3005  * request settings for these properties, even if the requested
3006  * setting is the default value.  We don't actually want to create dsl
3007  * properties for these, so remove them from the source nvlist after
3008  * processing.
3009  */
3010 static int
3011 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3012     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3013     nvlist_t *zplprops, boolean_t *is_ci)
3014 {
3015         uint64_t sense = ZFS_PROP_UNDEFINED;
3016         uint64_t norm = ZFS_PROP_UNDEFINED;
3017         uint64_t u8 = ZFS_PROP_UNDEFINED;
3018 
3019         ASSERT(zplprops != NULL);
3020 
3021         /*
3022          * Pull out creator prop choices, if any.
3023          */
3024         if (createprops) {
3025                 (void) nvlist_lookup_uint64(createprops,
3026                     zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3027                 (void) nvlist_lookup_uint64(createprops,
3028                     zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3029                 (void) nvlist_remove_all(createprops,
3030                     zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3031                 (void) nvlist_lookup_uint64(createprops,
3032                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3033                 (void) nvlist_remove_all(createprops,
3034                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3035                 (void) nvlist_lookup_uint64(createprops,
3036                     zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3037                 (void) nvlist_remove_all(createprops,
3038                     zfs_prop_to_name(ZFS_PROP_CASE));
3039         }
3040 
3041         /*
3042          * If the zpl version requested is whacky or the file system
3043          * or pool is version is too "young" to support normalization
3044          * and the creator tried to set a value for one of the props,
3045          * error out.
3046          */
3047         if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3048             (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3049             (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3050             (zplver < ZPL_VERSION_NORMALIZATION &&
3051             (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3052             sense != ZFS_PROP_UNDEFINED)))
3053                 return (SET_ERROR(ENOTSUP));
3054 
3055         /*
3056          * Put the version in the zplprops
3057          */
3058         VERIFY(nvlist_add_uint64(zplprops,
3059             zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
3060 
3061         if (norm == ZFS_PROP_UNDEFINED)
3062                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
3063         VERIFY(nvlist_add_uint64(zplprops,
3064             zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
3065 
3066         /*
3067          * If we're normalizing, names must always be valid UTF-8 strings.
3068          */
3069         if (norm)
3070                 u8 = 1;
3071         if (u8 == ZFS_PROP_UNDEFINED)
3072                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
3073         VERIFY(nvlist_add_uint64(zplprops,
3074             zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
3075 
3076         if (sense == ZFS_PROP_UNDEFINED)
3077                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
3078         VERIFY(nvlist_add_uint64(zplprops,
3079             zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
3080 
3081         if (is_ci)
3082                 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
3083 
3084         return (0);
3085 }
3086 
3087 static int
3088 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3089     nvlist_t *zplprops, boolean_t *is_ci)
3090 {
3091         boolean_t fuids_ok, sa_ok;
3092         uint64_t zplver = ZPL_VERSION;
3093         objset_t *os = NULL;
3094         char parentname[MAXNAMELEN];
3095         char *cp;
3096         spa_t *spa;
3097         uint64_t spa_vers;
3098         int error;
3099 
3100         (void) strlcpy(parentname, dataset, sizeof (parentname));
3101         cp = strrchr(parentname, '/');
3102         ASSERT(cp != NULL);
3103         cp[0] = '\0';
3104 
3105         if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3106                 return (error);
3107 
3108         spa_vers = spa_version(spa);
3109         spa_close(spa, FTAG);
3110 
3111         zplver = zfs_zpl_version_map(spa_vers);
3112         fuids_ok = (zplver >= ZPL_VERSION_FUID);
3113         sa_ok = (zplver >= ZPL_VERSION_SA);
3114 
3115         /*
3116          * Open parent object set so we can inherit zplprop values.
3117          */
3118         if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3119                 return (error);
3120 
3121         error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3122             zplprops, is_ci);
3123         dmu_objset_rele(os, FTAG);
3124         return (error);
3125 }
3126 
3127 static int
3128 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3129     nvlist_t *zplprops, boolean_t *is_ci)
3130 {
3131         boolean_t fuids_ok;
3132         boolean_t sa_ok;
3133         uint64_t zplver = ZPL_VERSION;
3134         int error;
3135 
3136         zplver = zfs_zpl_version_map(spa_vers);
3137         fuids_ok = (zplver >= ZPL_VERSION_FUID);
3138         sa_ok = (zplver >= ZPL_VERSION_SA);
3139 
3140         error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3141             createprops, zplprops, is_ci);
3142         return (error);
3143 }
3144 
3145 /*
3146  * innvl: {
3147  *     "type" -> dmu_objset_type_t (int32)
3148  *     (optional) "props" -> { prop -> value }
3149  * }
3150  *
3151  * outnvl: propname -> error code (int32)
3152  */
3153 static int
3154 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3155 {
3156         int error = 0;
3157         zfs_creat_t zct = { 0 };
3158         nvlist_t *nvprops = NULL;
3159         void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3160         int32_t type32;
3161         dmu_objset_type_t type;
3162         boolean_t is_insensitive = B_FALSE;
3163 
3164         if (nvlist_lookup_int32(innvl, "type", &type32) != 0)
3165                 return (SET_ERROR(EINVAL));
3166         type = type32;
3167         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3168 
3169         switch (type) {
3170         case DMU_OST_ZFS:
3171                 cbfunc = zfs_create_cb;
3172                 break;
3173 
3174         case DMU_OST_ZVOL:
3175                 cbfunc = zvol_create_cb;
3176                 break;
3177 
3178         default:
3179                 cbfunc = NULL;
3180                 break;
3181         }
3182         if (strchr(fsname, '@') ||
3183             strchr(fsname, '%'))
3184                 return (SET_ERROR(EINVAL));
3185 
3186         zct.zct_props = nvprops;
3187 
3188         if (cbfunc == NULL)
3189                 return (SET_ERROR(EINVAL));
3190 
3191         if (type == DMU_OST_ZVOL) {
3192                 uint64_t volsize, volblocksize;
3193 
3194                 if (nvprops == NULL)
3195                         return (SET_ERROR(EINVAL));
3196                 if (nvlist_lookup_uint64(nvprops,
3197                     zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3198                         return (SET_ERROR(EINVAL));
3199 
3200                 if ((error = nvlist_lookup_uint64(nvprops,
3201                     zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3202                     &volblocksize)) != 0 && error != ENOENT)
3203                         return (SET_ERROR(EINVAL));
3204 
3205                 if (error != 0)
3206                         volblocksize = zfs_prop_default_numeric(
3207                             ZFS_PROP_VOLBLOCKSIZE);
3208 
3209                 if ((error = zvol_check_volblocksize(
3210                     volblocksize)) != 0 ||
3211                     (error = zvol_check_volsize(volsize,
3212                     volblocksize)) != 0)
3213                         return (error);
3214         } else if (type == DMU_OST_ZFS) {
3215                 int error;
3216 
3217                 /*
3218                  * We have to have normalization and
3219                  * case-folding flags correct when we do the
3220                  * file system creation, so go figure them out
3221                  * now.
3222                  */
3223                 VERIFY(nvlist_alloc(&zct.zct_zplprops,
3224                     NV_UNIQUE_NAME, KM_SLEEP) == 0);
3225                 error = zfs_fill_zplprops(fsname, nvprops,
3226                     zct.zct_zplprops, &is_insensitive);
3227                 if (error != 0) {
3228                         nvlist_free(zct.zct_zplprops);
3229                         return (error);
3230                 }
3231         }
3232 
3233         error = dmu_objset_create(fsname, type,
3234             is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3235         nvlist_free(zct.zct_zplprops);
3236 
3237         /*
3238          * It would be nice to do this atomically.
3239          */
3240         if (error == 0) {
3241                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3242                     nvprops, outnvl);
3243                 if (error != 0)
3244                         (void) dsl_destroy_head(fsname);
3245         }
3246         return (error);
3247 }
3248 
3249 /*
3250  * innvl: {
3251  *     "origin" -> name of origin snapshot
3252  *     (optional) "props" -> { prop -> value }
3253  * }
3254  *
3255  * outnvl: propname -> error code (int32)
3256  */
3257 static int
3258 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3259 {
3260         int error = 0;
3261         nvlist_t *nvprops = NULL;
3262         char *origin_name;
3263 
3264         if (nvlist_lookup_string(innvl, "origin", &origin_name) != 0)
3265                 return (SET_ERROR(EINVAL));
3266         (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3267 
3268         if (strchr(fsname, '@') ||
3269             strchr(fsname, '%'))
3270                 return (SET_ERROR(EINVAL));
3271 
3272         if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3273                 return (SET_ERROR(EINVAL));
3274         error = dmu_objset_clone(fsname, origin_name);
3275         if (error != 0)
3276                 return (error);
3277 
3278         /*
3279          * It would be nice to do this atomically.
3280          */
3281         if (error == 0) {
3282                 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3283                     nvprops, outnvl);
3284                 if (error != 0)
3285                         (void) dsl_destroy_head(fsname);
3286         }
3287         return (error);
3288 }
3289 
3290 /*
3291  * innvl: {
3292  *     "snaps" -> { snapshot1, snapshot2 }
3293  *     (optional) "props" -> { prop -> value (string) }
3294  * }
3295  *
3296  * outnvl: snapshot -> error code (int32)
3297  */
3298 static int
3299 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3300 {
3301         nvlist_t *snaps;
3302         nvlist_t *props = NULL;
3303         int error, poollen;
3304         nvpair_t *pair;
3305 
3306         (void) nvlist_lookup_nvlist(innvl, "props", &props);
3307         if ((error = zfs_check_userprops(poolname, props)) != 0)
3308                 return (error);
3309 
3310         if (!nvlist_empty(props) &&
3311             zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
3312                 return (SET_ERROR(ENOTSUP));
3313 
3314         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3315                 return (SET_ERROR(EINVAL));
3316         poollen = strlen(poolname);
3317         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3318             pair = nvlist_next_nvpair(snaps, pair)) {
3319                 const char *name = nvpair_name(pair);
3320                 const char *cp = strchr(name, '@');
3321 
3322                 /*
3323                  * The snap name must contain an @, and the part after it must
3324                  * contain only valid characters.
3325                  */
3326                 if (cp == NULL || snapshot_namecheck(cp + 1, NULL, NULL) != 0)
3327                         return (SET_ERROR(EINVAL));
3328 
3329                 /*
3330                  * The snap must be in the specified pool.
3331                  */
3332                 if (strncmp(name, poolname, poollen) != 0 ||
3333                     (name[poollen] != '/' && name[poollen] != '@'))
3334                         return (SET_ERROR(EXDEV));
3335 
3336                 /* This must be the only snap of this fs. */
3337                 for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
3338                     pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
3339                         if (strncmp(name, nvpair_name(pair2), cp - name + 1)
3340                             == 0) {
3341                                 return (SET_ERROR(EXDEV));
3342                         }
3343                 }
3344         }
3345 
3346         error = dsl_dataset_snapshot(snaps, props, outnvl);
3347         return (error);
3348 }
3349 
3350 /*
3351  * innvl: "message" -> string
3352  */
3353 /* ARGSUSED */
3354 static int
3355 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
3356 {
3357         char *message;
3358         spa_t *spa;
3359         int error;
3360         char *poolname;
3361 
3362         /*
3363          * The poolname in the ioctl is not set, we get it from the TSD,
3364          * which was set at the end of the last successful ioctl that allows
3365          * logging.  The secpolicy func already checked that it is set.
3366          * Only one log ioctl is allowed after each successful ioctl, so
3367          * we clear the TSD here.
3368          */
3369         poolname = tsd_get(zfs_allow_log_key);
3370         (void) tsd_set(zfs_allow_log_key, NULL);
3371         error = spa_open(poolname, &spa, FTAG);
3372         strfree(poolname);
3373         if (error != 0)
3374                 return (error);
3375 
3376         if (nvlist_lookup_string(innvl, "message", &message) != 0)  {
3377                 spa_close(spa, FTAG);
3378                 return (SET_ERROR(EINVAL));
3379         }
3380 
3381         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
3382                 spa_close(spa, FTAG);
3383                 return (SET_ERROR(ENOTSUP));
3384         }
3385 
3386         error = spa_history_log(spa, message);
3387         spa_close(spa, FTAG);
3388         return (error);
3389 }
3390 
3391 /*
3392  * The dp_config_rwlock must not be held when calling this, because the
3393  * unmount may need to write out data.
3394  *
3395  * This function is best-effort.  Callers must deal gracefully if it
3396  * remains mounted (or is remounted after this call).
3397  *
3398  * Returns 0 if the argument is not a snapshot, or it is not currently a
3399  * filesystem, or we were able to unmount it.  Returns error code otherwise.
3400  */
3401 int
3402 zfs_unmount_snap(const char *snapname)
3403 {
3404         vfs_t *vfsp;
3405         zfsvfs_t *zfsvfs;
3406         int err;
3407 
3408         if (strchr(snapname, '@') == NULL)
3409                 return (0);
3410 
3411         vfsp = zfs_get_vfs(snapname);
3412         if (vfsp == NULL)
3413                 return (0);
3414 
3415         zfsvfs = vfsp->vfs_data;
3416         ASSERT(!dsl_pool_config_held(dmu_objset_pool(zfsvfs->z_os)));
3417 
3418         err = vn_vfswlock(vfsp->vfs_vnodecovered);
3419         VFS_RELE(vfsp);
3420         if (err != 0)
3421                 return (SET_ERROR(err));
3422 
3423         /*
3424          * Always force the unmount for snapshots.
3425          */
3426         (void) dounmount(vfsp, MS_FORCE, kcred);
3427         return (0);
3428 }
3429 
3430 /* ARGSUSED */
3431 static int
3432 zfs_unmount_snap_cb(const char *snapname, void *arg)
3433 {
3434         return (zfs_unmount_snap(snapname));
3435 }
3436 
3437 /*
3438  * When a clone is destroyed, its origin may also need to be destroyed,
3439  * in which case it must be unmounted.  This routine will do that unmount
3440  * if necessary.
3441  */
3442 void
3443 zfs_destroy_unmount_origin(const char *fsname)
3444 {
3445         int error;
3446         objset_t *os;
3447         dsl_dataset_t *ds;
3448 
3449         error = dmu_objset_hold(fsname, FTAG, &os);
3450         if (error != 0)
3451                 return;
3452         ds = dmu_objset_ds(os);
3453         if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
3454                 char originname[MAXNAMELEN];
3455                 dsl_dataset_name(ds->ds_prev, originname);
3456                 dmu_objset_rele(os, FTAG);
3457                 (void) zfs_unmount_snap(originname);
3458         } else {
3459                 dmu_objset_rele(os, FTAG);
3460         }
3461 }
3462 
3463 /*
3464  * innvl: {
3465  *     "snaps" -> { snapshot1, snapshot2 }
3466  *     (optional boolean) "defer"
3467  * }
3468  *
3469  * outnvl: snapshot -> error code (int32)
3470  *
3471  */
3472 static int
3473 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3474 {
3475         int error, poollen;
3476         nvlist_t *snaps;
3477         nvpair_t *pair;
3478         boolean_t defer;
3479 
3480         if (nvlist_lookup_nvlist(innvl, "snaps", &snaps) != 0)
3481                 return (SET_ERROR(EINVAL));
3482         defer = nvlist_exists(innvl, "defer");
3483 
3484         poollen = strlen(poolname);
3485         for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
3486             pair = nvlist_next_nvpair(snaps, pair)) {
3487                 const char *name = nvpair_name(pair);
3488 
3489                 /*
3490                  * The snap must be in the specified pool.
3491                  */
3492                 if (strncmp(name, poolname, poollen) != 0 ||
3493                     (name[poollen] != '/' && name[poollen] != '@'))
3494                         return (SET_ERROR(EXDEV));
3495 
3496                 error = zfs_unmount_snap(name);
3497                 if (error != 0)
3498                         return (error);
3499         }
3500 
3501         return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
3502 }
3503 
3504 /*
3505  * inputs:
3506  * zc_name              name of dataset to destroy
3507  * zc_objset_type       type of objset
3508  * zc_defer_destroy     mark for deferred destroy
3509  *
3510  * outputs:             none
3511  */
3512 static int
3513 zfs_ioc_destroy(zfs_cmd_t *zc)
3514 {
3515         int err;
3516 
3517         if (zc->zc_objset_type == DMU_OST_ZFS) {
3518                 err = zfs_unmount_snap(zc->zc_name);
3519                 if (err != 0)
3520                         return (err);
3521         }
3522 
3523         if (strchr(zc->zc_name, '@'))
3524                 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
3525         else
3526                 err = dsl_destroy_head(zc->zc_name);
3527         if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3528                 (void) zvol_remove_minor(zc->zc_name);
3529         return (err);
3530 }
3531 
3532 /*
3533  * fsname is name of dataset to rollback (to most recent snapshot)
3534  *
3535  * innvl is not used.
3536  *
3537  * outnvl: "target" -> name of most recent snapshot
3538  * }
3539  */
3540 /* ARGSUSED */
3541 static int
3542 zfs_ioc_rollback(const char *fsname, nvlist_t *args, nvlist_t *outnvl)
3543 {
3544         zfsvfs_t *zfsvfs;
3545         int error;
3546 
3547         if (getzfsvfs(fsname, &zfsvfs) == 0) {
3548                 error = zfs_suspend_fs(zfsvfs);
3549                 if (error == 0) {
3550                         int resume_err;
3551 
3552                         error = dsl_dataset_rollback(fsname, zfsvfs, outnvl);
3553                         resume_err = zfs_resume_fs(zfsvfs, fsname);
3554                         error = error ? error : resume_err;
3555                 }
3556                 VFS_RELE(zfsvfs->z_vfs);
3557         } else {
3558                 error = dsl_dataset_rollback(fsname, NULL, outnvl);
3559         }
3560         return (error);
3561 }
3562 
3563 static int
3564 recursive_unmount(const char *fsname, void *arg)
3565 {
3566         const char *snapname = arg;
3567         char fullname[MAXNAMELEN];
3568 
3569         (void) snprintf(fullname, sizeof (fullname), "%s@%s", fsname, snapname);
3570         return (zfs_unmount_snap(fullname));
3571 }
3572 
3573 /*
3574  * inputs:
3575  * zc_name      old name of dataset
3576  * zc_value     new name of dataset
3577  * zc_cookie    recursive flag (only valid for snapshots)
3578  *
3579  * outputs:     none
3580  */
3581 static int
3582 zfs_ioc_rename(zfs_cmd_t *zc)
3583 {
3584         boolean_t recursive = zc->zc_cookie & 1;
3585         char *at;
3586 
3587         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3588         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3589             strchr(zc->zc_value, '%'))
3590                 return (SET_ERROR(EINVAL));
3591 
3592         at = strchr(zc->zc_name, '@');
3593         if (at != NULL) {
3594                 /* snaps must be in same fs */
3595                 int error;
3596 
3597                 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
3598                         return (SET_ERROR(EXDEV));
3599                 *at = '\0';
3600                 if (zc->zc_objset_type == DMU_OST_ZFS) {
3601                         error = dmu_objset_find(zc->zc_name,
3602                             recursive_unmount, at + 1,
3603                             recursive ? DS_FIND_CHILDREN : 0);
3604                         if (error != 0) {
3605                                 *at = '@';
3606                                 return (error);
3607                         }
3608                 }
3609                 error = dsl_dataset_rename_snapshot(zc->zc_name,
3610                     at + 1, strchr(zc->zc_value, '@') + 1, recursive);
3611                 *at = '@';
3612 
3613                 return (error);
3614         } else {
3615                 if (zc->zc_objset_type == DMU_OST_ZVOL)
3616                         (void) zvol_remove_minor(zc->zc_name);
3617                 return (dsl_dir_rename(zc->zc_name, zc->zc_value));
3618         }
3619 }
3620 
3621 static int
3622 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3623 {
3624         const char *propname = nvpair_name(pair);
3625         boolean_t issnap = (strchr(dsname, '@') != NULL);
3626         zfs_prop_t prop = zfs_name_to_prop(propname);
3627         uint64_t intval;
3628         int err;
3629 
3630         if (prop == ZPROP_INVAL) {
3631                 if (zfs_prop_user(propname)) {
3632                         if (err = zfs_secpolicy_write_perms(dsname,
3633                             ZFS_DELEG_PERM_USERPROP, cr))
3634                                 return (err);
3635                         return (0);
3636                 }
3637 
3638                 if (!issnap && zfs_prop_userquota(propname)) {
3639                         const char *perm = NULL;
3640                         const char *uq_prefix =
3641                             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3642                         const char *gq_prefix =
3643                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3644 
3645                         if (strncmp(propname, uq_prefix,
3646                             strlen(uq_prefix)) == 0) {
3647                                 perm = ZFS_DELEG_PERM_USERQUOTA;
3648                         } else if (strncmp(propname, gq_prefix,
3649                             strlen(gq_prefix)) == 0) {
3650                                 perm = ZFS_DELEG_PERM_GROUPQUOTA;
3651                         } else {
3652                                 /* USERUSED and GROUPUSED are read-only */
3653                                 return (SET_ERROR(EINVAL));
3654                         }
3655 
3656                         if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3657                                 return (err);
3658                         return (0);
3659                 }
3660 
3661                 return (SET_ERROR(EINVAL));
3662         }
3663 
3664         if (issnap)
3665                 return (SET_ERROR(EINVAL));
3666 
3667         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3668                 /*
3669                  * dsl_prop_get_all_impl() returns properties in this
3670                  * format.
3671                  */
3672                 nvlist_t *attrs;
3673                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3674                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3675                     &pair) == 0);
3676         }
3677 
3678         /*
3679          * Check that this value is valid for this pool version
3680          */
3681         switch (prop) {
3682         case ZFS_PROP_COMPRESSION:
3683                 /*
3684                  * If the user specified gzip compression, make sure
3685                  * the SPA supports it. We ignore any errors here since
3686                  * we'll catch them later.
3687                  */
3688                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3689                     nvpair_value_uint64(pair, &intval) == 0) {
3690                         if (intval >= ZIO_COMPRESS_GZIP_1 &&
3691                             intval <= ZIO_COMPRESS_GZIP_9 &&
3692                             zfs_earlier_version(dsname,
3693                             SPA_VERSION_GZIP_COMPRESSION)) {
3694                                 return (SET_ERROR(ENOTSUP));
3695                         }
3696 
3697                         if (intval == ZIO_COMPRESS_ZLE &&
3698                             zfs_earlier_version(dsname,
3699                             SPA_VERSION_ZLE_COMPRESSION))
3700                                 return (SET_ERROR(ENOTSUP));
3701 
3702                         if (intval == ZIO_COMPRESS_LZ4) {
3703                                 zfeature_info_t *feature =
3704                                     &spa_feature_table[
3705                                     SPA_FEATURE_LZ4_COMPRESS];
3706                                 spa_t *spa;
3707 
3708                                 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3709                                         return (err);
3710 
3711                                 if (!spa_feature_is_enabled(spa, feature)) {
3712                                         spa_close(spa, FTAG);
3713                                         return (SET_ERROR(ENOTSUP));
3714                                 }
3715                                 spa_close(spa, FTAG);
3716                         }
3717 
3718                         /*
3719                          * If this is a bootable dataset then
3720                          * verify that the compression algorithm
3721                          * is supported for booting. We must return
3722                          * something other than ENOTSUP since it
3723                          * implies a downrev pool version.
3724                          */
3725                         if (zfs_is_bootfs(dsname) &&
3726                             !BOOTFS_COMPRESS_VALID(intval)) {
3727                                 return (SET_ERROR(ERANGE));
3728                         }
3729                 }
3730                 break;
3731 
3732         case ZFS_PROP_COPIES:
3733                 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3734                         return (SET_ERROR(ENOTSUP));
3735                 break;
3736 
3737         case ZFS_PROP_SHARESMB:
3738                 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3739                         return (SET_ERROR(ENOTSUP));
3740                 break;
3741 
3742         case ZFS_PROP_ACLINHERIT:
3743                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3744                     nvpair_value_uint64(pair, &intval) == 0) {
3745                         if (intval == ZFS_ACL_PASSTHROUGH_X &&
3746                             zfs_earlier_version(dsname,
3747                             SPA_VERSION_PASSTHROUGH_X))
3748                                 return (SET_ERROR(ENOTSUP));
3749                 }
3750                 break;
3751 
3752         case ZFS_PROP_CHECKSUM:
3753         case ZFS_PROP_DEDUP: {
3754                 zfeature_info_t *feature = NULL;
3755 
3756                 /* dedup feature version checks */
3757                 if (prop == ZFS_PROP_DEDUP &&
3758                     zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3759                         return (SET_ERROR(ENOTSUP));
3760 
3761                 if (nvpair_value_uint64(pair, &intval) != 0)
3762                         return (SET_ERROR(EINVAL));
3763 
3764                 /* check prop value is enabled in features */
3765                 if (intval == ZIO_CHECKSUM_SHA512)
3766                         feature = &spa_feature_table[SPA_FEATURE_SHA512];
3767                 else if (intval == ZIO_CHECKSUM_SKEIN)
3768                         feature = &spa_feature_table[SPA_FEATURE_SKEIN];
3769                 else if (intval == ZIO_CHECKSUM_EDONR)
3770                         feature = &spa_feature_table[SPA_FEATURE_EDONR];
3771 
3772                 if (feature != NULL) {
3773                         spa_t *spa;
3774 
3775                         if ((err = spa_open(dsname, &spa, FTAG)) != 0)
3776                                 return (err);
3777                         /*
3778                          * Salted checksums are not supported on root pools.
3779                          */
3780                         if (spa_bootfs(spa) != 0 &&
3781                             intval < ZIO_CHECKSUM_FUNCTIONS &&
3782                             zio_checksum_table[intval].ci_salted != 0) {
3783                                 spa_close(spa, FTAG);
3784                                 return (SET_ERROR(ERANGE));
3785                         }
3786                         if (!spa_feature_is_enabled(spa, feature)) {
3787                                 spa_close(spa, FTAG);
3788                                 return (SET_ERROR(ENOTSUP));
3789                         }
3790                         spa_close(spa, FTAG);
3791                 }
3792                 break;
3793         }
3794         }
3795 
3796         return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3797 }
3798 
3799 /*
3800  * Checks for a race condition to make sure we don't increment a feature flag
3801  * multiple times.
3802  */
3803 static int
3804 zfs_prop_activate_feature_check(void *arg, dmu_tx_t *tx)
3805 {
3806         spa_t *spa = dmu_tx_pool(tx)->dp_spa;
3807         zfeature_info_t *feature = arg;
3808 
3809         if (!spa_feature_is_active(spa, feature))
3810                 return (0);
3811         else
3812                 return (SET_ERROR(EBUSY));
3813 }
3814 
3815 /*
3816  * The callback invoked on feature activation in the sync task caused by
3817  * zfs_prop_activate_feature.
3818  */
3819 static void
3820 zfs_prop_activate_feature_sync(void *arg, dmu_tx_t *tx)
3821 {
3822         spa_t *spa = dmu_tx_pool(tx)->dp_spa;
3823         zfeature_info_t *feature = arg;
3824 
3825         spa_feature_incr(spa, feature, tx);
3826 }
3827 
3828 /*
3829  * Activates a feature on a pool in response to a property setting. This
3830  * creates a new sync task which modifies the pool to reflect the feature
3831  * as being active.
3832  */
3833 static int
3834 zfs_prop_activate_feature(spa_t *spa, zfeature_info_t *feature)
3835 {
3836         int err;
3837 
3838         /* EBUSY here indicates that the feature is already active */
3839         err = dsl_sync_task(spa_name(spa),
3840             zfs_prop_activate_feature_check, zfs_prop_activate_feature_sync,
3841             feature, 2);
3842 
3843         if (err != 0 && err != EBUSY)
3844                 return (err);
3845         else
3846                 return (0);
3847 }
3848 
3849 /*
3850  * Removes properties from the given props list that fail permission checks
3851  * needed to clear them and to restore them in case of a receive error. For each
3852  * property, make sure we have both set and inherit permissions.
3853  *
3854  * Returns the first error encountered if any permission checks fail. If the
3855  * caller provides a non-NULL errlist, it also gives the complete list of names
3856  * of all the properties that failed a permission check along with the
3857  * corresponding error numbers. The caller is responsible for freeing the
3858  * returned errlist.
3859  *
3860  * If every property checks out successfully, zero is returned and the list
3861  * pointed at by errlist is NULL.
3862  */
3863 static int
3864 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3865 {
3866         zfs_cmd_t *zc;
3867         nvpair_t *pair, *next_pair;
3868         nvlist_t *errors;
3869         int err, rv = 0;
3870 
3871         if (props == NULL)
3872                 return (0);
3873 
3874         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3875 
3876         zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3877         (void) strcpy(zc->zc_name, dataset);
3878         pair = nvlist_next_nvpair(props, NULL);
3879         while (pair != NULL) {
3880                 next_pair = nvlist_next_nvpair(props, pair);
3881 
3882                 (void) strcpy(zc->zc_value, nvpair_name(pair));
3883                 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3884                     (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
3885                         VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3886                         VERIFY(nvlist_add_int32(errors,
3887                             zc->zc_value, err) == 0);
3888                 }
3889                 pair = next_pair;
3890         }
3891         kmem_free(zc, sizeof (zfs_cmd_t));
3892 
3893         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3894                 nvlist_free(errors);
3895                 errors = NULL;
3896         } else {
3897                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
3898         }
3899 
3900         if (errlist == NULL)
3901                 nvlist_free(errors);
3902         else
3903                 *errlist = errors;
3904 
3905         return (rv);
3906 }
3907 
3908 static boolean_t
3909 propval_equals(nvpair_t *p1, nvpair_t *p2)
3910 {
3911         if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3912                 /* dsl_prop_get_all_impl() format */
3913                 nvlist_t *attrs;
3914                 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3915                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3916                     &p1) == 0);
3917         }
3918 
3919         if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3920                 nvlist_t *attrs;
3921                 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3922                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3923                     &p2) == 0);
3924         }
3925 
3926         if (nvpair_type(p1) != nvpair_type(p2))
3927                 return (B_FALSE);
3928 
3929         if (nvpair_type(p1) == DATA_TYPE_STRING) {
3930                 char *valstr1, *valstr2;
3931 
3932                 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3933                 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3934                 return (strcmp(valstr1, valstr2) == 0);
3935         } else {
3936                 uint64_t intval1, intval2;
3937 
3938                 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3939                 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3940                 return (intval1 == intval2);
3941         }
3942 }
3943 
3944 /*
3945  * Remove properties from props if they are not going to change (as determined
3946  * by comparison with origprops). Remove them from origprops as well, since we
3947  * do not need to clear or restore properties that won't change.
3948  */
3949 static void
3950 props_reduce(nvlist_t *props, nvlist_t *origprops)
3951 {
3952         nvpair_t *pair, *next_pair;
3953 
3954         if (origprops == NULL)
3955                 return; /* all props need to be received */
3956 
3957         pair = nvlist_next_nvpair(props, NULL);
3958         while (pair != NULL) {
3959                 const char *propname = nvpair_name(pair);
3960                 nvpair_t *match;
3961 
3962                 next_pair = nvlist_next_nvpair(props, pair);
3963 
3964                 if ((nvlist_lookup_nvpair(origprops, propname,
3965                     &match) != 0) || !propval_equals(pair, match))
3966                         goto next; /* need to set received value */
3967 
3968                 /* don't clear the existing received value */
3969                 (void) nvlist_remove_nvpair(origprops, match);
3970                 /* don't bother receiving the property */
3971                 (void) nvlist_remove_nvpair(props, pair);
3972 next:
3973                 pair = next_pair;
3974         }
3975 }
3976 
3977 #ifdef  DEBUG
3978 static boolean_t zfs_ioc_recv_inject_err;
3979 #endif
3980 
3981 /*
3982  * inputs:
3983  * zc_name              name of containing filesystem
3984  * zc_nvlist_src{_size} nvlist of properties to apply
3985  * zc_value             name of snapshot to create
3986  * zc_string            name of clone origin (if DRR_FLAG_CLONE)
3987  * zc_cookie            file descriptor to recv from
3988  * zc_begin_record      the BEGIN record of the stream (not byteswapped)
3989  * zc_guid              force flag
3990  * zc_cleanup_fd        cleanup-on-exit file descriptor
3991  * zc_action_handle     handle for this guid/ds mapping (or zero on first call)
3992  *
3993  * outputs:
3994  * zc_cookie            number of bytes read
3995  * zc_nvlist_dst{_size} error for each unapplied received property
3996  * zc_obj               zprop_errflags_t
3997  * zc_action_handle     handle for this guid/ds mapping
3998  */
3999 static int
4000 zfs_ioc_recv(zfs_cmd_t *zc)
4001 {
4002         file_t *fp;
4003         dmu_recv_cookie_t drc;
4004         boolean_t force = (boolean_t)zc->zc_guid;
4005         int fd;
4006         int error = 0;
4007         int props_error = 0;
4008         nvlist_t *errors;
4009         offset_t off;
4010         nvlist_t *props = NULL; /* sent properties */
4011         nvlist_t *origprops = NULL; /* existing properties */
4012         char *origin = NULL;
4013         char *tosnap;
4014         char tofs[ZFS_MAXNAMELEN];
4015         boolean_t first_recvd_props = B_FALSE;
4016 
4017         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
4018             strchr(zc->zc_value, '@') == NULL ||
4019             strchr(zc->zc_value, '%'))
4020                 return (SET_ERROR(EINVAL));
4021 
4022         (void) strcpy(tofs, zc->zc_value);
4023         tosnap = strchr(tofs, '@');
4024         *tosnap++ = '\0';
4025 
4026         if (zc->zc_nvlist_src != NULL &&
4027             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
4028             zc->zc_iflags, &props)) != 0)
4029                 return (error);
4030 
4031         fd = zc->zc_cookie;
4032         fp = getf(fd);
4033         if (fp == NULL) {
4034                 nvlist_free(props);
4035                 return (SET_ERROR(EBADF));
4036         }
4037 
4038         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4039 
4040         if (zc->zc_string[0])
4041                 origin = zc->zc_string;
4042 
4043         error = dmu_recv_begin(tofs, tosnap,
4044             &zc->zc_begin_record, force, origin, &drc);
4045         if (error != 0)
4046                 goto out;
4047 
4048         /*
4049          * Set properties before we receive the stream so that they are applied
4050          * to the new data. Note that we must call dmu_recv_stream() if
4051          * dmu_recv_begin() succeeds.
4052          */
4053         if (props != NULL && !drc.drc_newfs) {
4054                 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
4055                     SPA_VERSION_RECVD_PROPS &&
4056                     !dsl_prop_get_hasrecvd(tofs))
4057                         first_recvd_props = B_TRUE;
4058 
4059                 /*
4060                  * If new received properties are supplied, they are to
4061                  * completely replace the existing received properties, so stash
4062                  * away the existing ones.
4063                  */
4064                 if (dsl_prop_get_received(tofs, &origprops) == 0) {
4065                         nvlist_t *errlist = NULL;
4066                         /*
4067                          * Don't bother writing a property if its value won't
4068                          * change (and avoid the unnecessary security checks).
4069                          *
4070                          * The first receive after SPA_VERSION_RECVD_PROPS is a
4071                          * special case where we blow away all local properties
4072                          * regardless.
4073                          */
4074                         if (!first_recvd_props)
4075                                 props_reduce(props, origprops);
4076                         if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
4077                                 (void) nvlist_merge(errors, errlist, 0);
4078                         nvlist_free(errlist);
4079 
4080                         if (clear_received_props(tofs, origprops,
4081                             first_recvd_props ? NULL : props) != 0)
4082                                 zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4083                 } else {
4084                         zc->zc_obj |= ZPROP_ERR_NOCLEAR;
4085                 }
4086         }
4087 
4088         if (props != NULL) {
4089                 props_error = dsl_prop_set_hasrecvd(tofs);
4090 
4091                 if (props_error == 0) {
4092                         (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
4093                             props, errors);
4094                 }
4095         }
4096 
4097         if (zc->zc_nvlist_dst_size != 0 &&
4098             (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
4099             put_nvlist(zc, errors) != 0)) {
4100                 /*
4101                  * Caller made zc->zc_nvlist_dst less than the minimum expected
4102                  * size or supplied an invalid address.
4103                  */
4104                 props_error = SET_ERROR(EINVAL);
4105         }
4106 
4107         off = fp->f_offset;
4108         error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
4109             &zc->zc_action_handle);
4110 
4111         if (error == 0) {
4112                 zfsvfs_t *zfsvfs = NULL;
4113 
4114                 if (getzfsvfs(tofs, &zfsvfs) == 0) {
4115                         /* online recv */
4116                         int end_err;
4117 
4118                         error = zfs_suspend_fs(zfsvfs);
4119                         /*
4120                          * If the suspend fails, then the recv_end will
4121                          * likely also fail, and clean up after itself.
4122                          */
4123                         end_err = dmu_recv_end(&drc, zfsvfs);
4124                         if (error == 0)
4125                                 error = zfs_resume_fs(zfsvfs, tofs);
4126                         error = error ? error : end_err;
4127                         VFS_RELE(zfsvfs->z_vfs);
4128                 } else {
4129                         error = dmu_recv_end(&drc, NULL);
4130                 }
4131         }
4132 
4133         zc->zc_cookie = off - fp->f_offset;
4134         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4135                 fp->f_offset = off;
4136 
4137 #ifdef  DEBUG
4138         if (zfs_ioc_recv_inject_err) {
4139                 zfs_ioc_recv_inject_err = B_FALSE;
4140                 error = 1;
4141         }
4142 #endif
4143         /*
4144          * On error, restore the original props.
4145          */
4146         if (error != 0 && props != NULL && !drc.drc_newfs) {
4147                 if (clear_received_props(tofs, props, NULL) != 0) {
4148                         /*
4149                          * We failed to clear the received properties.
4150                          * Since we may have left a $recvd value on the
4151                          * system, we can't clear the $hasrecvd flag.
4152                          */
4153                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
4154                 } else if (first_recvd_props) {
4155                         dsl_prop_unset_hasrecvd(tofs);
4156                 }
4157 
4158                 if (origprops == NULL && !drc.drc_newfs) {
4159                         /* We failed to stash the original properties. */
4160                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
4161                 }
4162 
4163                 /*
4164                  * dsl_props_set() will not convert RECEIVED to LOCAL on or
4165                  * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
4166                  * explictly if we're restoring local properties cleared in the
4167                  * first new-style receive.
4168                  */
4169                 if (origprops != NULL &&
4170                     zfs_set_prop_nvlist(tofs, (first_recvd_props ?
4171                     ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
4172                     origprops, NULL) != 0) {
4173                         /*
4174                          * We stashed the original properties but failed to
4175                          * restore them.
4176                          */
4177                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
4178                 }
4179         }
4180 out:
4181         nvlist_free(props);
4182         nvlist_free(origprops);
4183         nvlist_free(errors);
4184         releasef(fd);
4185 
4186         if (error == 0)
4187                 error = props_error;
4188 
4189         return (error);
4190 }
4191 
4192 /*
4193  * inputs:
4194  * zc_name      name of snapshot to send
4195  * zc_cookie    file descriptor to send stream to
4196  * zc_obj       fromorigin flag (mutually exclusive with zc_fromobj)
4197  * zc_sendobj   objsetid of snapshot to send
4198  * zc_fromobj   objsetid of incremental fromsnap (may be zero)
4199  * zc_guid      if set, estimate size of stream only.  zc_cookie is ignored.
4200  *              output size in zc_objset_type.
4201  *
4202  * outputs: none
4203  */
4204 static int
4205 zfs_ioc_send(zfs_cmd_t *zc)
4206 {
4207         int error;
4208         offset_t off;
4209         boolean_t estimate = (zc->zc_guid != 0);
4210 
4211         if (zc->zc_obj != 0) {
4212                 dsl_pool_t *dp;
4213                 dsl_dataset_t *tosnap;
4214 
4215                 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4216                 if (error != 0)
4217                         return (error);
4218 
4219                 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4220                 if (error != 0) {
4221                         dsl_pool_rele(dp, FTAG);
4222                         return (error);
4223                 }
4224 
4225                 if (dsl_dir_is_clone(tosnap->ds_dir))
4226                         zc->zc_fromobj = tosnap->ds_dir->dd_phys->dd_origin_obj;
4227                 dsl_dataset_rele(tosnap, FTAG);
4228                 dsl_pool_rele(dp, FTAG);
4229         }
4230 
4231         if (estimate) {
4232                 dsl_pool_t *dp;
4233                 dsl_dataset_t *tosnap;
4234                 dsl_dataset_t *fromsnap = NULL;
4235 
4236                 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4237                 if (error != 0)
4238                         return (error);
4239 
4240                 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
4241                 if (error != 0) {
4242                         dsl_pool_rele(dp, FTAG);
4243                         return (error);
4244                 }
4245 
4246                 if (zc->zc_fromobj != 0) {
4247                         error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
4248                             FTAG, &fromsnap);
4249                         if (error != 0) {
4250                                 dsl_dataset_rele(tosnap, FTAG);
4251                                 dsl_pool_rele(dp, FTAG);
4252                                 return (error);
4253                         }
4254                 }
4255 
4256                 error = dmu_send_estimate(tosnap, fromsnap,
4257                     &zc->zc_objset_type);
4258 
4259                 if (fromsnap != NULL)
4260                         dsl_dataset_rele(fromsnap, FTAG);
4261                 dsl_dataset_rele(tosnap, FTAG);
4262                 dsl_pool_rele(dp, FTAG);
4263         } else {
4264                 file_t *fp = getf(zc->zc_cookie);
4265                 if (fp == NULL)
4266                         return (SET_ERROR(EBADF));
4267 
4268                 off = fp->f_offset;
4269                 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
4270                     zc->zc_fromobj, zc->zc_cookie, fp->f_vnode, &off);
4271 
4272                 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4273                         fp->f_offset = off;
4274                 releasef(zc->zc_cookie);
4275         }
4276         return (error);
4277 }
4278 
4279 /*
4280  * inputs:
4281  * zc_name      name of snapshot on which to report progress
4282  * zc_cookie    file descriptor of send stream
4283  *
4284  * outputs:
4285  * zc_cookie    number of bytes written in send stream thus far
4286  */
4287 static int
4288 zfs_ioc_send_progress(zfs_cmd_t *zc)
4289 {
4290         dsl_pool_t *dp;
4291         dsl_dataset_t *ds;
4292         dmu_sendarg_t *dsp = NULL;
4293         int error;
4294 
4295         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
4296         if (error != 0)
4297                 return (error);
4298 
4299         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
4300         if (error != 0) {
4301                 dsl_pool_rele(dp, FTAG);
4302                 return (error);
4303         }
4304 
4305         mutex_enter(&ds->ds_sendstream_lock);
4306 
4307         /*
4308          * Iterate over all the send streams currently active on this dataset.
4309          * If there's one which matches the specified file descriptor _and_ the
4310          * stream was started by the current process, return the progress of
4311          * that stream.
4312          */
4313         for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
4314             dsp = list_next(&ds->ds_sendstreams, dsp)) {
4315                 if (dsp->dsa_outfd == zc->zc_cookie &&
4316                     dsp->dsa_proc == curproc)
4317                         break;
4318         }
4319 
4320         if (dsp != NULL)
4321                 zc->zc_cookie = *(dsp->dsa_off);
4322         else
4323                 error = SET_ERROR(ENOENT);
4324 
4325         mutex_exit(&ds->ds_sendstream_lock);
4326         dsl_dataset_rele(ds, FTAG);
4327         dsl_pool_rele(dp, FTAG);
4328         return (error);
4329 }
4330 
4331 static int
4332 zfs_ioc_inject_fault(zfs_cmd_t *zc)
4333 {
4334         int id, error;
4335 
4336         error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
4337             &zc->zc_inject_record);
4338 
4339         if (error == 0)
4340                 zc->zc_guid = (uint64_t)id;
4341 
4342         return (error);
4343 }
4344 
4345 static int
4346 zfs_ioc_clear_fault(zfs_cmd_t *zc)
4347 {
4348         return (zio_clear_fault((int)zc->zc_guid));
4349 }
4350 
4351 static int
4352 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4353 {
4354         int id = (int)zc->zc_guid;
4355         int error;
4356 
4357         error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4358             &zc->zc_inject_record);
4359 
4360         zc->zc_guid = id;
4361 
4362         return (error);
4363 }
4364 
4365 static int
4366 zfs_ioc_error_log(zfs_cmd_t *zc)
4367 {
4368         spa_t *spa;
4369         int error;
4370         size_t count = (size_t)zc->zc_nvlist_dst_size;
4371 
4372         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4373                 return (error);
4374 
4375         error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4376             &count);
4377         if (error == 0)
4378                 zc->zc_nvlist_dst_size = count;
4379         else
4380                 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4381 
4382         spa_close(spa, FTAG);
4383 
4384         return (error);
4385 }
4386 
4387 static int
4388 zfs_ioc_clear(zfs_cmd_t *zc)
4389 {
4390         spa_t *spa;
4391         vdev_t *vd;
4392         int error;
4393 
4394         /*
4395          * On zpool clear we also fix up missing slogs
4396          */
4397         mutex_enter(&spa_namespace_lock);
4398         spa = spa_lookup(zc->zc_name);
4399         if (spa == NULL) {
4400                 mutex_exit(&spa_namespace_lock);
4401                 return (SET_ERROR(EIO));
4402         }
4403         if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4404                 /* we need to let spa_open/spa_load clear the chains */
4405                 spa_set_log_state(spa, SPA_LOG_CLEAR);
4406         }
4407         spa->spa_last_open_failed = 0;
4408         mutex_exit(&spa_namespace_lock);
4409 
4410         if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4411                 error = spa_open(zc->zc_name, &spa, FTAG);
4412         } else {
4413                 nvlist_t *policy;
4414                 nvlist_t *config = NULL;
4415 
4416                 if (zc->zc_nvlist_src == NULL)
4417                         return (SET_ERROR(EINVAL));
4418 
4419                 if ((error = get_nvlist(zc->zc_nvlist_src,
4420                     zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4421                         error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4422                             policy, &config);
4423                         if (config != NULL) {
4424                                 int err;
4425 
4426                                 if ((err = put_nvlist(zc, config)) != 0)
4427                                         error = err;
4428                                 nvlist_free(config);
4429                         }
4430                         nvlist_free(policy);
4431                 }
4432         }
4433 
4434         if (error != 0)
4435                 return (error);
4436 
4437         spa_vdev_state_enter(spa, SCL_NONE);
4438 
4439         if (zc->zc_guid == 0) {
4440                 vd = NULL;
4441         } else {
4442                 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4443                 if (vd == NULL) {
4444                         (void) spa_vdev_state_exit(spa, NULL, ENODEV);
4445                         spa_close(spa, FTAG);
4446                         return (SET_ERROR(ENODEV));
4447                 }
4448         }
4449 
4450         vdev_clear(spa, vd);
4451 
4452         (void) spa_vdev_state_exit(spa, NULL, 0);
4453 
4454         /*
4455          * Resume any suspended I/Os.
4456          */
4457         if (zio_resume(spa) != 0)
4458                 error = SET_ERROR(EIO);
4459 
4460         spa_close(spa, FTAG);
4461 
4462         return (error);
4463 }
4464 
4465 static int
4466 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4467 {
4468         spa_t *spa;
4469         int error;
4470 
4471         error = spa_open(zc->zc_name, &spa, FTAG);
4472         if (error != 0)
4473                 return (error);
4474 
4475         spa_vdev_state_enter(spa, SCL_NONE);
4476 
4477         /*
4478          * If a resilver is already in progress then set the
4479          * spa_scrub_reopen flag to B_TRUE so that we don't restart
4480          * the scan as a side effect of the reopen. Otherwise, let
4481          * vdev_open() decided if a resilver is required.
4482          */
4483         spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool);
4484         vdev_reopen(spa->spa_root_vdev);
4485         spa->spa_scrub_reopen = B_FALSE;
4486 
4487         (void) spa_vdev_state_exit(spa, NULL, 0);
4488         spa_close(spa, FTAG);
4489         return (0);
4490 }
4491 /*
4492  * inputs:
4493  * zc_name      name of filesystem
4494  * zc_value     name of origin snapshot
4495  *
4496  * outputs:
4497  * zc_string    name of conflicting snapshot, if there is one
4498  */
4499 static int
4500 zfs_ioc_promote(zfs_cmd_t *zc)
4501 {
4502         char *cp;
4503 
4504         /*
4505          * We don't need to unmount *all* the origin fs's snapshots, but
4506          * it's easier.
4507          */
4508         cp = strchr(zc->zc_value, '@');
4509         if (cp)
4510                 *cp = '\0';
4511         (void) dmu_objset_find(zc->zc_value,
4512             zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
4513         return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4514 }
4515 
4516 /*
4517  * Retrieve a single {user|group}{used|quota}@... property.
4518  *
4519  * inputs:
4520  * zc_name      name of filesystem
4521  * zc_objset_type zfs_userquota_prop_t
4522  * zc_value     domain name (eg. "S-1-234-567-89")
4523  * zc_guid      RID/UID/GID
4524  *
4525  * outputs:
4526  * zc_cookie    property value
4527  */
4528 static int
4529 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4530 {
4531         zfsvfs_t *zfsvfs;
4532         int error;
4533 
4534         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4535                 return (SET_ERROR(EINVAL));
4536 
4537         error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4538         if (error != 0)
4539                 return (error);
4540 
4541         error = zfs_userspace_one(zfsvfs,
4542             zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4543         zfsvfs_rele(zfsvfs, FTAG);
4544 
4545         return (error);
4546 }
4547 
4548 /*
4549  * inputs:
4550  * zc_name              name of filesystem
4551  * zc_cookie            zap cursor
4552  * zc_objset_type       zfs_userquota_prop_t
4553  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4554  *
4555  * outputs:
4556  * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
4557  * zc_cookie    zap cursor
4558  */
4559 static int
4560 zfs_ioc_userspace_many(zfs_cmd_t *zc)
4561 {
4562         zfsvfs_t *zfsvfs;
4563         int bufsize = zc->zc_nvlist_dst_size;
4564 
4565         if (bufsize <= 0)
4566                 return (SET_ERROR(ENOMEM));
4567 
4568         int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4569         if (error != 0)
4570                 return (error);
4571 
4572         void *buf = kmem_alloc(bufsize, KM_SLEEP);
4573 
4574         error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4575             buf, &zc->zc_nvlist_dst_size);
4576 
4577         if (error == 0) {
4578                 error = xcopyout(buf,
4579                     (void *)(uintptr_t)zc->zc_nvlist_dst,
4580                     zc->zc_nvlist_dst_size);
4581         }
4582         kmem_free(buf, bufsize);
4583         zfsvfs_rele(zfsvfs, FTAG);
4584 
4585         return (error);
4586 }
4587 
4588 /*
4589  * inputs:
4590  * zc_name              name of filesystem
4591  *
4592  * outputs:
4593  * none
4594  */
4595 static int
4596 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4597 {
4598         objset_t *os;
4599         int error = 0;
4600         zfsvfs_t *zfsvfs;
4601 
4602         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4603                 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4604                         /*
4605                          * If userused is not enabled, it may be because the
4606                          * objset needs to be closed & reopened (to grow the
4607                          * objset_phys_t).  Suspend/resume the fs will do that.
4608                          */
4609                         error = zfs_suspend_fs(zfsvfs);
4610                         if (error == 0) {
4611                                 dmu_objset_refresh_ownership(zfsvfs->z_os,
4612                                     zfsvfs);
4613                                 error = zfs_resume_fs(zfsvfs, zc->zc_name);
4614                         }
4615                 }
4616                 if (error == 0)
4617                         error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4618                 VFS_RELE(zfsvfs->z_vfs);
4619         } else {
4620                 /* XXX kind of reading contents without owning */
4621                 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4622                 if (error != 0)
4623                         return (error);
4624 
4625                 error = dmu_objset_userspace_upgrade(os);
4626                 dmu_objset_rele(os, FTAG);
4627         }
4628 
4629         return (error);
4630 }
4631 
4632 /*
4633  * We don't want to have a hard dependency
4634  * against some special symbols in sharefs
4635  * nfs, and smbsrv.  Determine them if needed when
4636  * the first file system is shared.
4637  * Neither sharefs, nfs or smbsrv are unloadable modules.
4638  */
4639 int (*znfsexport_fs)(void *arg);
4640 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4641 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4642 
4643 int zfs_nfsshare_inited;
4644 int zfs_smbshare_inited;
4645 
4646 ddi_modhandle_t nfs_mod;
4647 ddi_modhandle_t sharefs_mod;
4648 ddi_modhandle_t smbsrv_mod;
4649 kmutex_t zfs_share_lock;
4650 
4651 static int
4652 zfs_init_sharefs()
4653 {
4654         int error;
4655 
4656         ASSERT(MUTEX_HELD(&zfs_share_lock));
4657         /* Both NFS and SMB shares also require sharetab support. */
4658         if (sharefs_mod == NULL && ((sharefs_mod =
4659             ddi_modopen("fs/sharefs",
4660             KRTLD_MODE_FIRST, &error)) == NULL)) {
4661                 return (SET_ERROR(ENOSYS));
4662         }
4663         if (zshare_fs == NULL && ((zshare_fs =
4664             (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4665             ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4666                 return (SET_ERROR(ENOSYS));
4667         }
4668         return (0);
4669 }
4670 
4671 static int
4672 zfs_ioc_share(zfs_cmd_t *zc)
4673 {
4674         int error;
4675         int opcode;
4676 
4677         switch (zc->zc_share.z_sharetype) {
4678         case ZFS_SHARE_NFS:
4679         case ZFS_UNSHARE_NFS:
4680                 if (zfs_nfsshare_inited == 0) {
4681                         mutex_enter(&zfs_share_lock);
4682                         if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4683                             KRTLD_MODE_FIRST, &error)) == NULL)) {
4684                                 mutex_exit(&zfs_share_lock);
4685                                 return (SET_ERROR(ENOSYS));
4686                         }
4687                         if (znfsexport_fs == NULL &&
4688                             ((znfsexport_fs = (int (*)(void *))
4689                             ddi_modsym(nfs_mod,
4690                             "nfs_export", &error)) == NULL)) {
4691                                 mutex_exit(&zfs_share_lock);
4692                                 return (SET_ERROR(ENOSYS));
4693                         }
4694                         error = zfs_init_sharefs();
4695                         if (error != 0) {
4696                                 mutex_exit(&zfs_share_lock);
4697                                 return (SET_ERROR(ENOSYS));
4698                         }
4699                         zfs_nfsshare_inited = 1;
4700                         mutex_exit(&zfs_share_lock);
4701                 }
4702                 break;
4703         case ZFS_SHARE_SMB:
4704         case ZFS_UNSHARE_SMB:
4705                 if (zfs_smbshare_inited == 0) {
4706                         mutex_enter(&zfs_share_lock);
4707                         if (smbsrv_mod == NULL && ((smbsrv_mod =
4708                             ddi_modopen("drv/smbsrv",
4709                             KRTLD_MODE_FIRST, &error)) == NULL)) {
4710                                 mutex_exit(&zfs_share_lock);
4711                                 return (SET_ERROR(ENOSYS));
4712                         }
4713                         if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4714                             (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4715                             "smb_server_share", &error)) == NULL)) {
4716                                 mutex_exit(&zfs_share_lock);
4717                                 return (SET_ERROR(ENOSYS));
4718                         }
4719                         error = zfs_init_sharefs();
4720                         if (error != 0) {
4721                                 mutex_exit(&zfs_share_lock);
4722                                 return (SET_ERROR(ENOSYS));
4723                         }
4724                         zfs_smbshare_inited = 1;
4725                         mutex_exit(&zfs_share_lock);
4726                 }
4727                 break;
4728         default:
4729                 return (SET_ERROR(EINVAL));
4730         }
4731 
4732         switch (zc->zc_share.z_sharetype) {
4733         case ZFS_SHARE_NFS:
4734         case ZFS_UNSHARE_NFS:
4735                 if (error =
4736                     znfsexport_fs((void *)
4737                     (uintptr_t)zc->zc_share.z_exportdata))
4738                         return (error);
4739                 break;
4740         case ZFS_SHARE_SMB:
4741         case ZFS_UNSHARE_SMB:
4742                 if (error = zsmbexport_fs((void *)
4743                     (uintptr_t)zc->zc_share.z_exportdata,
4744                     zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4745                     B_TRUE: B_FALSE)) {
4746                         return (error);
4747                 }
4748                 break;
4749         }
4750 
4751         opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4752             zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4753             SHAREFS_ADD : SHAREFS_REMOVE;
4754 
4755         /*
4756          * Add or remove share from sharetab
4757          */
4758         error = zshare_fs(opcode,
4759             (void *)(uintptr_t)zc->zc_share.z_sharedata,
4760             zc->zc_share.z_sharemax);
4761 
4762         return (error);
4763 
4764 }
4765 
4766 ace_t full_access[] = {
4767         {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4768 };
4769 
4770 /*
4771  * inputs:
4772  * zc_name              name of containing filesystem
4773  * zc_obj               object # beyond which we want next in-use object #
4774  *
4775  * outputs:
4776  * zc_obj               next in-use object #
4777  */
4778 static int
4779 zfs_ioc_next_obj(zfs_cmd_t *zc)
4780 {
4781         objset_t *os = NULL;
4782         int error;
4783 
4784         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4785         if (error != 0)
4786                 return (error);
4787 
4788         error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
4789             os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
4790 
4791         dmu_objset_rele(os, FTAG);
4792         return (error);
4793 }
4794 
4795 /*
4796  * inputs:
4797  * zc_name              name of filesystem
4798  * zc_value             prefix name for snapshot
4799  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
4800  *
4801  * outputs:
4802  * zc_value             short name of new snapshot
4803  */
4804 static int
4805 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
4806 {
4807         char *snap_name;
4808         char *hold_name;
4809         int error;
4810         minor_t minor;
4811 
4812         error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
4813         if (error != 0)
4814                 return (error);
4815 
4816         snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
4817             (u_longlong_t)ddi_get_lbolt64());
4818         hold_name = kmem_asprintf("%%%s", zc->zc_value);
4819 
4820         error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
4821             hold_name);
4822         if (error == 0)
4823                 (void) strcpy(zc->zc_value, snap_name);
4824         strfree(snap_name);
4825         strfree(hold_name);
4826         zfs_onexit_fd_rele(zc->zc_cleanup_fd);
4827         return (error);
4828 }
4829 
4830 /*
4831  * inputs:
4832  * zc_name              name of "to" snapshot
4833  * zc_value             name of "from" snapshot
4834  * zc_cookie            file descriptor to write diff data on
4835  *
4836  * outputs:
4837  * dmu_diff_record_t's to the file descriptor
4838  */
4839 static int
4840 zfs_ioc_diff(zfs_cmd_t *zc)
4841 {
4842         file_t *fp;
4843         offset_t off;
4844         int error;
4845 
4846         fp = getf(zc->zc_cookie);
4847         if (fp == NULL)
4848                 return (SET_ERROR(EBADF));
4849 
4850         off = fp->f_offset;
4851 
4852         error = dmu_diff(zc->zc_name, zc->zc_value, fp->f_vnode, &off);
4853 
4854         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4855                 fp->f_offset = off;
4856         releasef(zc->zc_cookie);
4857 
4858         return (error);
4859 }
4860 
4861 /*
4862  * Remove all ACL files in shares dir
4863  */
4864 static int
4865 zfs_smb_acl_purge(znode_t *dzp)
4866 {
4867         zap_cursor_t    zc;
4868         zap_attribute_t zap;
4869         zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4870         int error;
4871 
4872         for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4873             (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4874             zap_cursor_advance(&zc)) {
4875                 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4876                     NULL, 0)) != 0)
4877                         break;
4878         }
4879         zap_cursor_fini(&zc);
4880         return (error);
4881 }
4882 
4883 static int
4884 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4885 {
4886         vnode_t *vp;
4887         znode_t *dzp;
4888         vnode_t *resourcevp = NULL;
4889         znode_t *sharedir;
4890         zfsvfs_t *zfsvfs;
4891         nvlist_t *nvlist;
4892         char *src, *target;
4893         vattr_t vattr;
4894         vsecattr_t vsec;
4895         int error = 0;
4896 
4897         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4898             NO_FOLLOW, NULL, &vp)) != 0)
4899                 return (error);
4900 
4901         /* Now make sure mntpnt and dataset are ZFS */
4902 
4903         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4904             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4905             zc->zc_name) != 0)) {
4906                 VN_RELE(vp);
4907                 return (SET_ERROR(EINVAL));
4908         }
4909 
4910         dzp = VTOZ(vp);
4911         zfsvfs = dzp->z_zfsvfs;
4912         ZFS_ENTER(zfsvfs);
4913 
4914         /*
4915          * Create share dir if its missing.
4916          */
4917         mutex_enter(&zfsvfs->z_lock);
4918         if (zfsvfs->z_shares_dir == 0) {
4919                 dmu_tx_t *tx;
4920 
4921                 tx = dmu_tx_create(zfsvfs->z_os);
4922                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4923                     ZFS_SHARES_DIR);
4924                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4925                 error = dmu_tx_assign(tx, TXG_WAIT);
4926                 if (error != 0) {
4927                         dmu_tx_abort(tx);
4928                 } else {
4929                         error = zfs_create_share_dir(zfsvfs, tx);
4930                         dmu_tx_commit(tx);
4931                 }
4932                 if (error != 0) {
4933                         mutex_exit(&zfsvfs->z_lock);
4934                         VN_RELE(vp);
4935                         ZFS_EXIT(zfsvfs);
4936                         return (error);
4937                 }
4938         }
4939         mutex_exit(&zfsvfs->z_lock);
4940 
4941         ASSERT(zfsvfs->z_shares_dir);
4942         if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4943                 VN_RELE(vp);
4944                 ZFS_EXIT(zfsvfs);
4945                 return (error);
4946         }
4947 
4948         switch (zc->zc_cookie) {
4949         case ZFS_SMB_ACL_ADD:
4950                 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4951                 vattr.va_type = VREG;
4952                 vattr.va_mode = S_IFREG|0777;
4953                 vattr.va_uid = 0;
4954                 vattr.va_gid = 0;
4955 
4956                 vsec.vsa_mask = VSA_ACE;
4957                 vsec.vsa_aclentp = &full_access;
4958                 vsec.vsa_aclentsz = sizeof (full_access);
4959                 vsec.vsa_aclcnt = 1;
4960 
4961                 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4962                     &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4963                 if (resourcevp)
4964                         VN_RELE(resourcevp);
4965                 break;
4966 
4967         case ZFS_SMB_ACL_REMOVE:
4968                 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4969                     NULL, 0);
4970                 break;
4971 
4972         case ZFS_SMB_ACL_RENAME:
4973                 if ((error = get_nvlist(zc->zc_nvlist_src,
4974                     zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4975                         VN_RELE(vp);
4976                         ZFS_EXIT(zfsvfs);
4977                         return (error);
4978                 }
4979                 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4980                     nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4981                     &target)) {
4982                         VN_RELE(vp);
4983                         VN_RELE(ZTOV(sharedir));
4984                         ZFS_EXIT(zfsvfs);
4985                         nvlist_free(nvlist);
4986                         return (error);
4987                 }
4988                 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4989                     kcred, NULL, 0);
4990                 nvlist_free(nvlist);
4991                 break;
4992 
4993         case ZFS_SMB_ACL_PURGE:
4994                 error = zfs_smb_acl_purge(sharedir);
4995                 break;
4996 
4997         default:
4998                 error = SET_ERROR(EINVAL);
4999                 break;
5000         }
5001 
5002         VN_RELE(vp);
5003         VN_RELE(ZTOV(sharedir));
5004 
5005         ZFS_EXIT(zfsvfs);
5006 
5007         return (error);
5008 }
5009 
5010 /*
5011  * innvl: {
5012  *     "holds" -> { snapname -> holdname (string), ... }
5013  *     (optional) "cleanup_fd" -> fd (int32)
5014  * }
5015  *
5016  * outnvl: {
5017  *     snapname -> error value (int32)
5018  *     ...
5019  * }
5020  */
5021 /* ARGSUSED */
5022 static int
5023 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
5024 {
5025         nvlist_t *holds;
5026         int cleanup_fd = -1;
5027         int error;
5028         minor_t minor = 0;
5029 
5030         error = nvlist_lookup_nvlist(args, "holds", &holds);
5031         if (error != 0)
5032                 return (SET_ERROR(EINVAL));
5033 
5034         if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
5035                 error = zfs_onexit_fd_hold(cleanup_fd, &minor);
5036                 if (error != 0)
5037                         return (error);
5038         }
5039 
5040         error = dsl_dataset_user_hold(holds, minor, errlist);
5041         if (minor != 0)
5042                 zfs_onexit_fd_rele(cleanup_fd);
5043         return (error);
5044 }
5045 
5046 /*
5047  * innvl is not used.
5048  *
5049  * outnvl: {
5050  *    holdname -> time added (uint64 seconds since epoch)
5051  *    ...
5052  * }
5053  */
5054 /* ARGSUSED */
5055 static int
5056 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
5057 {
5058         return (dsl_dataset_get_holds(snapname, outnvl));
5059 }
5060 
5061 /*
5062  * innvl: {
5063  *     snapname -> { holdname, ... }
5064  *     ...
5065  * }
5066  *
5067  * outnvl: {
5068  *     snapname -> error value (int32)
5069  *     ...
5070  * }
5071  */
5072 /* ARGSUSED */
5073 static int
5074 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
5075 {
5076         return (dsl_dataset_user_release(holds, errlist));
5077 }
5078 
5079 /*
5080  * inputs:
5081  * zc_name              name of new filesystem or snapshot
5082  * zc_value             full name of old snapshot
5083  *
5084  * outputs:
5085  * zc_cookie            space in bytes
5086  * zc_objset_type       compressed space in bytes
5087  * zc_perm_action       uncompressed space in bytes
5088  */
5089 static int
5090 zfs_ioc_space_written(zfs_cmd_t *zc)
5091 {
5092         int error;
5093         dsl_pool_t *dp;
5094         dsl_dataset_t *new, *old;
5095 
5096         error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
5097         if (error != 0)
5098                 return (error);
5099         error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
5100         if (error != 0) {
5101                 dsl_pool_rele(dp, FTAG);
5102                 return (error);
5103         }
5104         error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
5105         if (error != 0) {
5106                 dsl_dataset_rele(new, FTAG);
5107                 dsl_pool_rele(dp, FTAG);
5108                 return (error);
5109         }
5110 
5111         error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
5112             &zc->zc_objset_type, &zc->zc_perm_action);
5113         dsl_dataset_rele(old, FTAG);
5114         dsl_dataset_rele(new, FTAG);
5115         dsl_pool_rele(dp, FTAG);
5116         return (error);
5117 }
5118 
5119 /*
5120  * innvl: {
5121  *     "firstsnap" -> snapshot name
5122  * }
5123  *
5124  * outnvl: {
5125  *     "used" -> space in bytes
5126  *     "compressed" -> compressed space in bytes
5127  *     "uncompressed" -> uncompressed space in bytes
5128  * }
5129  */
5130 static int
5131 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
5132 {
5133         int error;
5134         dsl_pool_t *dp;
5135         dsl_dataset_t *new, *old;
5136         char *firstsnap;
5137         uint64_t used, comp, uncomp;
5138 
5139         if (nvlist_lookup_string(innvl, "firstsnap", &firstsnap) != 0)
5140                 return (SET_ERROR(EINVAL));
5141 
5142         error = dsl_pool_hold(lastsnap, FTAG, &dp);
5143         if (error != 0)
5144                 return (error);
5145 
5146         error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
5147         if (error != 0) {
5148                 dsl_pool_rele(dp, FTAG);
5149                 return (error);
5150         }
5151         error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
5152         if (error != 0) {
5153                 dsl_dataset_rele(new, FTAG);
5154                 dsl_pool_rele(dp, FTAG);
5155                 return (error);
5156         }
5157 
5158         error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
5159         dsl_dataset_rele(old, FTAG);
5160         dsl_dataset_rele(new, FTAG);
5161         dsl_pool_rele(dp, FTAG);
5162         fnvlist_add_uint64(outnvl, "used", used);
5163         fnvlist_add_uint64(outnvl, "compressed", comp);
5164         fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
5165         return (error);
5166 }
5167 
5168 /*
5169  * innvl: {
5170  *     "fd" -> file descriptor to write stream to (int32)
5171  *     (optional) "fromsnap" -> full snap name to send an incremental from
5172  * }
5173  *
5174  * outnvl is unused
5175  */
5176 /* ARGSUSED */
5177 static int
5178 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5179 {
5180         int error;
5181         offset_t off;
5182         char *fromname = NULL;
5183         int fd;
5184 
5185         error = nvlist_lookup_int32(innvl, "fd", &fd);
5186         if (error != 0)
5187                 return (SET_ERROR(EINVAL));
5188 
5189         (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
5190 
5191         file_t *fp = getf(fd);
5192         if (fp == NULL)
5193                 return (SET_ERROR(EBADF));
5194 
5195         off = fp->f_offset;
5196         error = dmu_send(snapname, fromname, fd, fp->f_vnode, &off);
5197 
5198         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
5199                 fp->f_offset = off;
5200         releasef(fd);
5201         return (error);
5202 }
5203 
5204 /*
5205  * Determine approximately how large a zfs send stream will be -- the number
5206  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
5207  *
5208  * innvl: {
5209  *     (optional) "fromsnap" -> full snap name to send an incremental from
5210  * }
5211  *
5212  * outnvl: {
5213  *     "space" -> bytes of space (uint64)
5214  * }
5215  */
5216 static int
5217 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5218 {
5219         dsl_pool_t *dp;
5220         dsl_dataset_t *fromsnap = NULL;
5221         dsl_dataset_t *tosnap;
5222         int error;
5223         char *fromname;
5224         uint64_t space;
5225 
5226         error = dsl_pool_hold(snapname, FTAG, &dp);
5227         if (error != 0)
5228                 return (error);
5229 
5230         error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
5231         if (error != 0) {
5232                 dsl_pool_rele(dp, FTAG);
5233                 return (error);
5234         }
5235 
5236         error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
5237         if (error == 0) {
5238                 error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
5239                 if (error != 0) {
5240                         dsl_dataset_rele(tosnap, FTAG);
5241                         dsl_pool_rele(dp, FTAG);
5242                         return (error);
5243                 }
5244         }
5245 
5246         error = dmu_send_estimate(tosnap, fromsnap, &space);
5247         fnvlist_add_uint64(outnvl, "space", space);
5248 
5249         if (fromsnap != NULL)
5250                 dsl_dataset_rele(fromsnap, FTAG);
5251         dsl_dataset_rele(tosnap, FTAG);
5252         dsl_pool_rele(dp, FTAG);
5253         return (error);
5254 }
5255 
5256 
5257 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
5258 
5259 static void
5260 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5261     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5262     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
5263 {
5264         zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5265 
5266         ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5267         ASSERT3U(ioc, <, ZFS_IOC_LAST);
5268         ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5269         ASSERT3P(vec->zvec_func, ==, NULL);
5270 
5271         vec->zvec_legacy_func = func;
5272         vec->zvec_secpolicy = secpolicy;
5273         vec->zvec_namecheck = namecheck;
5274         vec->zvec_allow_log = log_history;
5275         vec->zvec_pool_check = pool_check;
5276 }
5277 
5278 /*
5279  * See the block comment at the beginning of this file for details on
5280  * each argument to this function.
5281  */
5282 static void
5283 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
5284     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
5285     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
5286     boolean_t allow_log)
5287 {
5288         zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
5289 
5290         ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
5291         ASSERT3U(ioc, <, ZFS_IOC_LAST);
5292         ASSERT3P(vec->zvec_legacy_func, ==, NULL);
5293         ASSERT3P(vec->zvec_func, ==, NULL);
5294 
5295         /* if we are logging, the name must be valid */
5296         ASSERT(!allow_log || namecheck != NO_NAME);
5297 
5298         vec->zvec_name = name;
5299         vec->zvec_func = func;
5300         vec->zvec_secpolicy = secpolicy;
5301         vec->zvec_namecheck = namecheck;
5302         vec->zvec_pool_check = pool_check;
5303         vec->zvec_smush_outnvlist = smush_outnvlist;
5304         vec->zvec_allow_log = allow_log;
5305 }
5306 
5307 static void
5308 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5309     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
5310     zfs_ioc_poolcheck_t pool_check)
5311 {
5312         zfs_ioctl_register_legacy(ioc, func, secpolicy,
5313             POOL_NAME, log_history, pool_check);
5314 }
5315 
5316 static void
5317 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5318     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
5319 {
5320         zfs_ioctl_register_legacy(ioc, func, secpolicy,
5321             DATASET_NAME, B_FALSE, pool_check);
5322 }
5323 
5324 static void
5325 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5326 {
5327         zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
5328             POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5329 }
5330 
5331 static void
5332 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5333     zfs_secpolicy_func_t *secpolicy)
5334 {
5335         zfs_ioctl_register_legacy(ioc, func, secpolicy,
5336             NO_NAME, B_FALSE, POOL_CHECK_NONE);
5337 }
5338 
5339 static void
5340 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
5341     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
5342 {
5343         zfs_ioctl_register_legacy(ioc, func, secpolicy,
5344             DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
5345 }
5346 
5347 static void
5348 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
5349 {
5350         zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
5351             zfs_secpolicy_read);
5352 }
5353 
5354 static void
5355 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
5356         zfs_secpolicy_func_t *secpolicy)
5357 {
5358         zfs_ioctl_register_legacy(ioc, func, secpolicy,
5359             DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5360 }
5361 
5362 static void
5363 zfs_ioctl_init(void)
5364 {
5365         zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
5366             zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
5367             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5368 
5369         zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
5370             zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
5371             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
5372 
5373         zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
5374             zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
5375             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5376 
5377         zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
5378             zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
5379             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5380 
5381         zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
5382             zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
5383             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5384 
5385         zfs_ioctl_register("create", ZFS_IOC_CREATE,
5386             zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
5387             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5388 
5389         zfs_ioctl_register("clone", ZFS_IOC_CLONE,
5390             zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
5391             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5392 
5393         zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
5394             zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
5395             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5396 
5397         zfs_ioctl_register("hold", ZFS_IOC_HOLD,
5398             zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
5399             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5400         zfs_ioctl_register("release", ZFS_IOC_RELEASE,
5401             zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
5402             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
5403 
5404         zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
5405             zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
5406             POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE);
5407 
5408         zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
5409             zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
5410             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE);
5411 
5412         /* IOCTLS that use the legacy function signature */
5413 
5414         zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
5415             zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
5416 
5417         zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
5418             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5419         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
5420             zfs_ioc_pool_scan);
5421         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
5422             zfs_ioc_pool_upgrade);
5423         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
5424             zfs_ioc_vdev_add);
5425         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
5426             zfs_ioc_vdev_remove);
5427         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
5428             zfs_ioc_vdev_set_state);
5429         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
5430             zfs_ioc_vdev_attach);
5431         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
5432             zfs_ioc_vdev_detach);
5433         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
5434             zfs_ioc_vdev_setpath);
5435         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
5436             zfs_ioc_vdev_setfru);
5437         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
5438             zfs_ioc_pool_set_props);
5439         zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
5440             zfs_ioc_vdev_split);
5441         zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
5442             zfs_ioc_pool_reguid);
5443 
5444         zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
5445             zfs_ioc_pool_configs, zfs_secpolicy_none);
5446         zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
5447             zfs_ioc_pool_tryimport, zfs_secpolicy_config);
5448         zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
5449             zfs_ioc_inject_fault, zfs_secpolicy_inject);
5450         zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
5451             zfs_ioc_clear_fault, zfs_secpolicy_inject);
5452         zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
5453             zfs_ioc_inject_list_next, zfs_secpolicy_inject);
5454 
5455         /*
5456          * pool destroy, and export don't log the history as part of
5457          * zfsdev_ioctl, but rather zfs_ioc_pool_export
5458          * does the logging of those commands.
5459          */
5460         zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
5461             zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
5462         zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
5463             zfs_secpolicy_config, B_FALSE, POOL_CHECK_NONE);
5464 
5465         zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
5466             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5467         zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_PROPS, zfs_ioc_pool_get_props,
5468             zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
5469 
5470         zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
5471             zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
5472         zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
5473             zfs_ioc_dsobj_to_dsname,
5474             zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
5475         zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
5476             zfs_ioc_pool_get_history,
5477             zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
5478 
5479         zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
5480             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5481 
5482         zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
5483             zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
5484         zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
5485             zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
5486 
5487         zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
5488             zfs_ioc_space_written);
5489         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
5490             zfs_ioc_objset_recvd_props);
5491         zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
5492             zfs_ioc_next_obj);
5493         zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
5494             zfs_ioc_get_fsacl);
5495         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
5496             zfs_ioc_objset_stats);
5497         zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
5498             zfs_ioc_objset_zplprops);
5499         zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
5500             zfs_ioc_dataset_list_next);
5501         zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
5502             zfs_ioc_snapshot_list_next);
5503         zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
5504             zfs_ioc_send_progress);
5505 
5506         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
5507             zfs_ioc_diff, zfs_secpolicy_diff);
5508         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
5509             zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
5510         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
5511             zfs_ioc_obj_to_path, zfs_secpolicy_diff);
5512         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
5513             zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
5514         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
5515             zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
5516         zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
5517             zfs_ioc_send, zfs_secpolicy_send);
5518 
5519         zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
5520             zfs_secpolicy_none);
5521         zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
5522             zfs_secpolicy_destroy);
5523         zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
5524             zfs_secpolicy_rename);
5525         zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
5526             zfs_secpolicy_recv);
5527         zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
5528             zfs_secpolicy_promote);
5529         zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
5530             zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
5531         zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
5532             zfs_secpolicy_set_fsacl);
5533 
5534         zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
5535             zfs_secpolicy_share, POOL_CHECK_NONE);
5536         zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
5537             zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
5538         zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
5539             zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
5540             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5541         zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
5542             zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
5543             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
5544 }
5545 
5546 int
5547 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
5548     zfs_ioc_poolcheck_t check)
5549 {
5550         spa_t *spa;
5551         int error;
5552 
5553         ASSERT(type == POOL_NAME || type == DATASET_NAME);
5554 
5555         if (check & POOL_CHECK_NONE)
5556                 return (0);
5557 
5558         error = spa_open(name, &spa, FTAG);
5559         if (error == 0) {
5560                 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
5561                         error = SET_ERROR(EAGAIN);
5562                 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
5563                         error = SET_ERROR(EROFS);
5564                 spa_close(spa, FTAG);
5565         }
5566         return (error);
5567 }
5568 
5569 /*
5570  * Find a free minor number.
5571  */
5572 minor_t
5573 zfsdev_minor_alloc(void)
5574 {
5575         static minor_t last_minor;
5576         minor_t m;
5577 
5578         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5579 
5580         for (m = last_minor + 1; m != last_minor; m++) {
5581                 if (m > ZFSDEV_MAX_MINOR)
5582                         m = 1;
5583                 if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
5584                         last_minor = m;
5585                         return (m);
5586                 }
5587         }
5588 
5589         return (0);
5590 }
5591 
5592 static int
5593 zfs_ctldev_init(dev_t *devp)
5594 {
5595         minor_t minor;
5596         zfs_soft_state_t *zs;
5597 
5598         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5599         ASSERT(getminor(*devp) == 0);
5600 
5601         minor = zfsdev_minor_alloc();
5602         if (minor == 0)
5603                 return (SET_ERROR(ENXIO));
5604 
5605         if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
5606                 return (SET_ERROR(EAGAIN));
5607 
5608         *devp = makedevice(getemajor(*devp), minor);
5609 
5610         zs = ddi_get_soft_state(zfsdev_state, minor);
5611         zs->zss_type = ZSST_CTLDEV;
5612         zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
5613 
5614         return (0);
5615 }
5616 
5617 static void
5618 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
5619 {
5620         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5621 
5622         zfs_onexit_destroy(zo);
5623         ddi_soft_state_free(zfsdev_state, minor);
5624 }
5625 
5626 void *
5627 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
5628 {
5629         zfs_soft_state_t *zp;
5630 
5631         zp = ddi_get_soft_state(zfsdev_state, minor);
5632         if (zp == NULL || zp->zss_type != which)
5633                 return (NULL);
5634 
5635         return (zp->zss_data);
5636 }
5637 
5638 static int
5639 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
5640 {
5641         int error = 0;
5642 
5643         if (getminor(*devp) != 0)
5644                 return (zvol_open(devp, flag, otyp, cr));
5645 
5646         /* This is the control device. Allocate a new minor if requested. */
5647         if (flag & FEXCL) {
5648                 mutex_enter(&zfsdev_state_lock);
5649                 error = zfs_ctldev_init(devp);
5650                 mutex_exit(&zfsdev_state_lock);
5651         }
5652 
5653         return (error);
5654 }
5655 
5656 static int
5657 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
5658 {
5659         zfs_onexit_t *zo;
5660         minor_t minor = getminor(dev);
5661 
5662         if (minor == 0)
5663                 return (0);
5664 
5665         mutex_enter(&zfsdev_state_lock);
5666         zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
5667         if (zo == NULL) {
5668                 mutex_exit(&zfsdev_state_lock);
5669                 return (zvol_close(dev, flag, otyp, cr));
5670         }
5671         zfs_ctldev_destroy(zo, minor);
5672         mutex_exit(&zfsdev_state_lock);
5673 
5674         return (0);
5675 }
5676 
5677 static int
5678 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
5679 {
5680         zfs_cmd_t *zc;
5681         uint_t vecnum;
5682         int error, rc, len;
5683         minor_t minor = getminor(dev);
5684         const zfs_ioc_vec_t *vec;
5685         char *saved_poolname = NULL;
5686         nvlist_t *innvl = NULL;
5687 
5688         if (minor != 0 &&
5689             zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
5690                 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
5691 
5692         vecnum = cmd - ZFS_IOC_FIRST;
5693         ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
5694 
5695         if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
5696                 return (SET_ERROR(EINVAL));
5697         vec = &zfs_ioc_vec[vecnum];
5698 
5699         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
5700 
5701         error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
5702         if (error != 0) {
5703                 error = SET_ERROR(EFAULT);
5704                 goto out;
5705         }
5706 
5707         zc->zc_iflags = flag & FKIOCTL;
5708         if (zc->zc_nvlist_src_size != 0) {
5709                 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5710                     zc->zc_iflags, &innvl);
5711                 if (error != 0)
5712                         goto out;
5713         }
5714 
5715         /*
5716          * Ensure that all pool/dataset names are valid before we pass down to
5717          * the lower layers.
5718          */
5719         zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5720         switch (vec->zvec_namecheck) {
5721         case POOL_NAME:
5722                 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
5723                         error = SET_ERROR(EINVAL);
5724                 else
5725                         error = pool_status_check(zc->zc_name,
5726                             vec->zvec_namecheck, vec->zvec_pool_check);
5727                 break;
5728 
5729         case DATASET_NAME:
5730                 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
5731                         error = SET_ERROR(EINVAL);
5732                 else
5733                         error = pool_status_check(zc->zc_name,
5734                             vec->zvec_namecheck, vec->zvec_pool_check);
5735                 break;
5736 
5737         case NO_NAME:
5738                 break;
5739         }
5740 
5741 
5742         if (error == 0 && !(flag & FKIOCTL))
5743                 error = vec->zvec_secpolicy(zc, innvl, cr);
5744 
5745         if (error != 0)
5746                 goto out;
5747 
5748         /* legacy ioctls can modify zc_name */
5749         len = strcspn(zc->zc_name, "/@") + 1;
5750         saved_poolname = kmem_alloc(len, KM_SLEEP);
5751         (void) strlcpy(saved_poolname, zc->zc_name, len);
5752 
5753         if (vec->zvec_func != NULL) {
5754                 nvlist_t *outnvl;
5755                 int puterror = 0;
5756                 spa_t *spa;
5757                 nvlist_t *lognv = NULL;
5758 
5759                 ASSERT(vec->zvec_legacy_func == NULL);
5760 
5761                 /*
5762                  * Add the innvl to the lognv before calling the func,
5763                  * in case the func changes the innvl.
5764                  */
5765                 if (vec->zvec_allow_log) {
5766                         lognv = fnvlist_alloc();
5767                         fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
5768                             vec->zvec_name);
5769                         if (!nvlist_empty(innvl)) {
5770                                 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
5771                                     innvl);
5772                         }
5773                 }
5774 
5775                 outnvl = fnvlist_alloc();
5776                 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
5777 
5778                 if (error == 0 && vec->zvec_allow_log &&
5779                     spa_open(zc->zc_name, &spa, FTAG) == 0) {
5780                         if (!nvlist_empty(outnvl)) {
5781                                 fnvlist_add_nvlist(lognv, ZPOOL_HIST_OUTPUT_NVL,
5782                                     outnvl);
5783                         }
5784                         (void) spa_history_log_nvl(spa, lognv);
5785                         spa_close(spa, FTAG);
5786                 }
5787                 fnvlist_free(lognv);
5788 
5789                 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
5790                         int smusherror = 0;
5791                         if (vec->zvec_smush_outnvlist) {
5792                                 smusherror = nvlist_smush(outnvl,
5793                                     zc->zc_nvlist_dst_size);
5794                         }
5795                         if (smusherror == 0)
5796                                 puterror = put_nvlist(zc, outnvl);
5797                 }
5798 
5799                 if (puterror != 0)
5800                         error = puterror;
5801 
5802                 nvlist_free(outnvl);
5803         } else {
5804                 error = vec->zvec_legacy_func(zc);
5805         }
5806 
5807 out:
5808         nvlist_free(innvl);
5809         rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
5810         if (error == 0 && rc != 0)
5811                 error = SET_ERROR(EFAULT);
5812         if (error == 0 && vec->zvec_allow_log) {
5813                 char *s = tsd_get(zfs_allow_log_key);
5814                 if (s != NULL)
5815                         strfree(s);
5816                 (void) tsd_set(zfs_allow_log_key, saved_poolname);
5817         } else {
5818                 if (saved_poolname != NULL)
5819                         strfree(saved_poolname);
5820         }
5821 
5822         kmem_free(zc, sizeof (zfs_cmd_t));
5823         return (error);
5824 }
5825 
5826 static int
5827 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5828 {
5829         if (cmd != DDI_ATTACH)
5830                 return (DDI_FAILURE);
5831 
5832         if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
5833             DDI_PSEUDO, 0) == DDI_FAILURE)
5834                 return (DDI_FAILURE);
5835 
5836         zfs_dip = dip;
5837 
5838         ddi_report_dev(dip);
5839 
5840         return (DDI_SUCCESS);
5841 }
5842 
5843 static int
5844 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5845 {
5846         if (spa_busy() || zfs_busy() || zvol_busy())
5847                 return (DDI_FAILURE);
5848 
5849         if (cmd != DDI_DETACH)
5850                 return (DDI_FAILURE);
5851 
5852         zfs_dip = NULL;
5853 
5854         ddi_prop_remove_all(dip);
5855         ddi_remove_minor_node(dip, NULL);
5856 
5857         return (DDI_SUCCESS);
5858 }
5859 
5860 /*ARGSUSED*/
5861 static int
5862 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
5863 {
5864         switch (infocmd) {
5865         case DDI_INFO_DEVT2DEVINFO:
5866                 *result = zfs_dip;
5867                 return (DDI_SUCCESS);
5868 
5869         case DDI_INFO_DEVT2INSTANCE:
5870                 *result = (void *)0;
5871                 return (DDI_SUCCESS);
5872         }
5873 
5874         return (DDI_FAILURE);
5875 }
5876 
5877 /*
5878  * OK, so this is a little weird.
5879  *
5880  * /dev/zfs is the control node, i.e. minor 0.
5881  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
5882  *
5883  * /dev/zfs has basically nothing to do except serve up ioctls,
5884  * so most of the standard driver entry points are in zvol.c.
5885  */
5886 static struct cb_ops zfs_cb_ops = {
5887         zfsdev_open,    /* open */
5888         zfsdev_close,   /* close */
5889         zvol_strategy,  /* strategy */
5890         nodev,          /* print */
5891         zvol_dump,      /* dump */
5892         zvol_read,      /* read */
5893         zvol_write,     /* write */
5894         zfsdev_ioctl,   /* ioctl */
5895         nodev,          /* devmap */
5896         nodev,          /* mmap */
5897         nodev,          /* segmap */
5898         nochpoll,       /* poll */
5899         ddi_prop_op,    /* prop_op */
5900         NULL,           /* streamtab */
5901         D_NEW | D_MP | D_64BIT,         /* Driver compatibility flag */
5902         CB_REV,         /* version */
5903         nodev,          /* async read */
5904         nodev,          /* async write */
5905 };
5906 
5907 static struct dev_ops zfs_dev_ops = {
5908         DEVO_REV,       /* version */
5909         0,              /* refcnt */
5910         zfs_info,       /* info */
5911         nulldev,        /* identify */
5912         nulldev,        /* probe */
5913         zfs_attach,     /* attach */
5914         zfs_detach,     /* detach */
5915         nodev,          /* reset */
5916         &zfs_cb_ops,        /* driver operations */
5917         NULL,           /* no bus operations */
5918         NULL,           /* power */
5919         ddi_quiesce_not_needed, /* quiesce */
5920 };
5921 
5922 static struct modldrv zfs_modldrv = {
5923         &mod_driverops,
5924         "ZFS storage pool",
5925         &zfs_dev_ops
5926 };
5927 
5928 static struct modlinkage modlinkage = {
5929         MODREV_1,
5930         (void *)&zfs_modlfs,
5931         (void *)&zfs_modldrv,
5932         NULL
5933 };
5934 
5935 static void
5936 zfs_allow_log_destroy(void *arg)
5937 {
5938         char *poolname = arg;
5939         strfree(poolname);
5940 }
5941 
5942 int
5943 _init(void)
5944 {
5945         int error;
5946 
5947         spa_init(FREAD | FWRITE);
5948         zfs_init();
5949         zvol_init();
5950         zfs_ioctl_init();
5951 
5952         if ((error = mod_install(&modlinkage)) != 0) {
5953                 zvol_fini();
5954                 zfs_fini();
5955                 spa_fini();
5956                 return (error);
5957         }
5958 
5959         tsd_create(&zfs_fsyncer_key, NULL);
5960         tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
5961         tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
5962 
5963         error = ldi_ident_from_mod(&modlinkage, &zfs_li);
5964         ASSERT(error == 0);
5965         mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
5966 
5967         return (0);
5968 }
5969 
5970 int
5971 _fini(void)
5972 {
5973         int error;
5974 
5975         if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
5976                 return (SET_ERROR(EBUSY));
5977 
5978         if ((error = mod_remove(&modlinkage)) != 0)
5979                 return (error);
5980 
5981         zvol_fini();
5982         zfs_fini();
5983         spa_fini();
5984         if (zfs_nfsshare_inited)
5985                 (void) ddi_modclose(nfs_mod);
5986         if (zfs_smbshare_inited)
5987                 (void) ddi_modclose(smbsrv_mod);
5988         if (zfs_nfsshare_inited || zfs_smbshare_inited)
5989                 (void) ddi_modclose(sharefs_mod);
5990 
5991         tsd_destroy(&zfs_fsyncer_key);
5992         ldi_ident_release(zfs_li);
5993         zfs_li = NULL;
5994         mutex_destroy(&zfs_share_lock);
5995 
5996         return (error);
5997 }
5998 
5999 int
6000 _info(struct modinfo *modinfop)
6001 {
6002         return (mod_info(&modlinkage, modinfop));
6003 }