1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Portions Copyright 2011 Martin Matuska
  25  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright (c) 2012 by Delphix. All rights reserved.
  27  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/errno.h>
  33 #include <sys/uio.h>
  34 #include <sys/buf.h>
  35 #include <sys/modctl.h>
  36 #include <sys/open.h>
  37 #include <sys/file.h>
  38 #include <sys/kmem.h>
  39 #include <sys/conf.h>
  40 #include <sys/cmn_err.h>
  41 #include <sys/stat.h>
  42 #include <sys/zfs_ioctl.h>
  43 #include <sys/zfs_vfsops.h>
  44 #include <sys/zfs_znode.h>
  45 #include <sys/zap.h>
  46 #include <sys/spa.h>
  47 #include <sys/spa_impl.h>
  48 #include <sys/vdev.h>
  49 #include <sys/priv_impl.h>
  50 #include <sys/dmu.h>
  51 #include <sys/dsl_dir.h>
  52 #include <sys/dsl_dataset.h>
  53 #include <sys/dsl_prop.h>
  54 #include <sys/dsl_deleg.h>
  55 #include <sys/dmu_objset.h>
  56 #include <sys/dmu_impl.h>
  57 #include <sys/ddi.h>
  58 #include <sys/sunddi.h>
  59 #include <sys/sunldi.h>
  60 #include <sys/policy.h>
  61 #include <sys/zone.h>
  62 #include <sys/nvpair.h>
  63 #include <sys/pathname.h>
  64 #include <sys/mount.h>
  65 #include <sys/sdt.h>
  66 #include <sys/fs/zfs.h>
  67 #include <sys/zfs_ctldir.h>
  68 #include <sys/zfs_dir.h>
  69 #include <sys/zfs_onexit.h>
  70 #include <sys/zvol.h>
  71 #include <sys/dsl_scan.h>
  72 #include <sharefs/share.h>
  73 #include <sys/dmu_objset.h>
  74 
  75 #include "zfs_namecheck.h"
  76 #include "zfs_prop.h"
  77 #include "zfs_deleg.h"
  78 #include "zfs_comutil.h"
  79 
  80 extern struct modlfs zfs_modlfs;
  81 
  82 extern void zfs_init(void);
  83 extern void zfs_fini(void);
  84 
  85 ldi_ident_t zfs_li = NULL;
  86 dev_info_t *zfs_dip;
  87 
  88 typedef int zfs_ioc_func_t(zfs_cmd_t *);
  89 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
  90 
  91 typedef enum {
  92         NO_NAME,
  93         POOL_NAME,
  94         DATASET_NAME
  95 } zfs_ioc_namecheck_t;
  96 
  97 typedef enum {
  98         POOL_CHECK_NONE         = 1 << 0,
  99         POOL_CHECK_SUSPENDED    = 1 << 1,
 100         POOL_CHECK_READONLY     = 1 << 2
 101 } zfs_ioc_poolcheck_t;
 102 
 103 typedef struct zfs_ioc_vec {
 104         zfs_ioc_func_t          *zvec_func;
 105         zfs_secpolicy_func_t    *zvec_secpolicy;
 106         zfs_ioc_namecheck_t     zvec_namecheck;
 107         boolean_t               zvec_his_log;
 108         zfs_ioc_poolcheck_t     zvec_pool_check;
 109 } zfs_ioc_vec_t;
 110 
 111 /* This array is indexed by zfs_userquota_prop_t */
 112 static const char *userquota_perms[] = {
 113         ZFS_DELEG_PERM_USERUSED,
 114         ZFS_DELEG_PERM_USERQUOTA,
 115         ZFS_DELEG_PERM_GROUPUSED,
 116         ZFS_DELEG_PERM_GROUPQUOTA,
 117 };
 118 
 119 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 120 static int zfs_check_settable(const char *name, nvpair_t *property,
 121     cred_t *cr);
 122 static int zfs_check_clearable(char *dataset, nvlist_t *props,
 123     nvlist_t **errors);
 124 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
 125     boolean_t *);
 126 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
 127 
 128 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
 129 void
 130 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 131 {
 132         const char *newfile;
 133         char buf[512];
 134         va_list adx;
 135 
 136         /*
 137          * Get rid of annoying "../common/" prefix to filename.
 138          */
 139         newfile = strrchr(file, '/');
 140         if (newfile != NULL) {
 141                 newfile = newfile + 1; /* Get rid of leading / */
 142         } else {
 143                 newfile = file;
 144         }
 145 
 146         va_start(adx, fmt);
 147         (void) vsnprintf(buf, sizeof (buf), fmt, adx);
 148         va_end(adx);
 149 
 150         /*
 151          * To get this data, use the zfs-dprintf probe as so:
 152          * dtrace -q -n 'zfs-dprintf \
 153          *      /stringof(arg0) == "dbuf.c"/ \
 154          *      {printf("%s: %s", stringof(arg1), stringof(arg3))}'
 155          * arg0 = file name
 156          * arg1 = function name
 157          * arg2 = line number
 158          * arg3 = message
 159          */
 160         DTRACE_PROBE4(zfs__dprintf,
 161             char *, newfile, char *, func, int, line, char *, buf);
 162 }
 163 
 164 static void
 165 history_str_free(char *buf)
 166 {
 167         kmem_free(buf, HIS_MAX_RECORD_LEN);
 168 }
 169 
 170 static char *
 171 history_str_get(zfs_cmd_t *zc)
 172 {
 173         char *buf;
 174 
 175         if (zc->zc_history == NULL)
 176                 return (NULL);
 177 
 178         buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
 179         if (copyinstr((void *)(uintptr_t)zc->zc_history,
 180             buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 181                 history_str_free(buf);
 182                 return (NULL);
 183         }
 184 
 185         buf[HIS_MAX_RECORD_LEN -1] = '\0';
 186 
 187         return (buf);
 188 }
 189 
 190 /*
 191  * Check to see if the named dataset is currently defined as bootable
 192  */
 193 static boolean_t
 194 zfs_is_bootfs(const char *name)
 195 {
 196         objset_t *os;
 197 
 198         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 199                 boolean_t ret;
 200                 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
 201                 dmu_objset_rele(os, FTAG);
 202                 return (ret);
 203         }
 204         return (B_FALSE);
 205 }
 206 
 207 /*
 208  * zfs_earlier_version
 209  *
 210  *      Return non-zero if the spa version is less than requested version.
 211  */
 212 static int
 213 zfs_earlier_version(const char *name, int version)
 214 {
 215         spa_t *spa;
 216 
 217         if (spa_open(name, &spa, FTAG) == 0) {
 218                 if (spa_version(spa) < version) {
 219                         spa_close(spa, FTAG);
 220                         return (1);
 221                 }
 222                 spa_close(spa, FTAG);
 223         }
 224         return (0);
 225 }
 226 
 227 /*
 228  * zpl_earlier_version
 229  *
 230  * Return TRUE if the ZPL version is less than requested version.
 231  */
 232 static boolean_t
 233 zpl_earlier_version(const char *name, int version)
 234 {
 235         objset_t *os;
 236         boolean_t rc = B_TRUE;
 237 
 238         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 239                 uint64_t zplversion;
 240 
 241                 if (dmu_objset_type(os) != DMU_OST_ZFS) {
 242                         dmu_objset_rele(os, FTAG);
 243                         return (B_TRUE);
 244                 }
 245                 /* XXX reading from non-owned objset */
 246                 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 247                         rc = zplversion < version;
 248                 dmu_objset_rele(os, FTAG);
 249         }
 250         return (rc);
 251 }
 252 
 253 static void
 254 zfs_log_history(zfs_cmd_t *zc)
 255 {
 256         spa_t *spa;
 257         char *buf;
 258 
 259         if ((buf = history_str_get(zc)) == NULL)
 260                 return;
 261 
 262         if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 263                 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
 264                         (void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
 265                 spa_close(spa, FTAG);
 266         }
 267         history_str_free(buf);
 268 }
 269 
 270 /*
 271  * Policy for top-level read operations (list pools).  Requires no privileges,
 272  * and can be used in the local zone, as there is no associated dataset.
 273  */
 274 /* ARGSUSED */
 275 static int
 276 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
 277 {
 278         return (0);
 279 }
 280 
 281 /*
 282  * Policy for dataset read operations (list children, get statistics).  Requires
 283  * no privileges, but must be visible in the local zone.
 284  */
 285 /* ARGSUSED */
 286 static int
 287 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
 288 {
 289         if (INGLOBALZONE(curproc) ||
 290             zone_dataset_visible(zc->zc_name, NULL))
 291                 return (0);
 292 
 293         return (ENOENT);
 294 }
 295 
 296 static int
 297 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 298 {
 299         int writable = 1;
 300 
 301         /*
 302          * The dataset must be visible by this zone -- check this first
 303          * so they don't see EPERM on something they shouldn't know about.
 304          */
 305         if (!INGLOBALZONE(curproc) &&
 306             !zone_dataset_visible(dataset, &writable))
 307                 return (ENOENT);
 308 
 309         if (INGLOBALZONE(curproc)) {
 310                 /*
 311                  * If the fs is zoned, only root can access it from the
 312                  * global zone.
 313                  */
 314                 if (secpolicy_zfs(cr) && zoned)
 315                         return (EPERM);
 316         } else {
 317                 /*
 318                  * If we are in a local zone, the 'zoned' property must be set.
 319                  */
 320                 if (!zoned)
 321                         return (EPERM);
 322 
 323                 /* must be writable by this zone */
 324                 if (!writable)
 325                         return (EPERM);
 326         }
 327         return (0);
 328 }
 329 
 330 static int
 331 zfs_dozonecheck(const char *dataset, cred_t *cr)
 332 {
 333         uint64_t zoned;
 334 
 335         if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
 336                 return (ENOENT);
 337 
 338         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 339 }
 340 
 341 static int
 342 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 343 {
 344         uint64_t zoned;
 345 
 346         rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 347         if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
 348                 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 349                 return (ENOENT);
 350         }
 351         rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 352 
 353         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 354 }
 355 
 356 /*
 357  * If name ends in a '@', then require recursive permissions.
 358  */
 359 int
 360 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 361 {
 362         int error;
 363         boolean_t descendent = B_FALSE;
 364         dsl_dataset_t *ds;
 365         char *at;
 366 
 367         at = strchr(name, '@');
 368         if (at != NULL && at[1] == '\0') {
 369                 *at = '\0';
 370                 descendent = B_TRUE;
 371         }
 372 
 373         error = dsl_dataset_hold(name, FTAG, &ds);
 374         if (at != NULL)
 375                 *at = '@';
 376         if (error != 0)
 377                 return (error);
 378 
 379         error = zfs_dozonecheck_ds(name, ds, cr);
 380         if (error == 0) {
 381                 error = secpolicy_zfs(cr);
 382                 if (error)
 383                         error = dsl_deleg_access_impl(ds, descendent, perm, cr);
 384         }
 385 
 386         dsl_dataset_rele(ds, FTAG);
 387         return (error);
 388 }
 389 
 390 int
 391 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
 392     const char *perm, cred_t *cr)
 393 {
 394         int error;
 395 
 396         error = zfs_dozonecheck_ds(name, ds, cr);
 397         if (error == 0) {
 398                 error = secpolicy_zfs(cr);
 399                 if (error)
 400                         error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
 401         }
 402         return (error);
 403 }
 404 
 405 /*
 406  * Policy for setting the security label property.
 407  *
 408  * Returns 0 for success, non-zero for access and other errors.
 409  */
 410 static int
 411 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
 412 {
 413         char            ds_hexsl[MAXNAMELEN];
 414         bslabel_t       ds_sl, new_sl;
 415         boolean_t       new_default = FALSE;
 416         uint64_t        zoned;
 417         int             needed_priv = -1;
 418         int             error;
 419 
 420         /* First get the existing dataset label. */
 421         error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 422             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 423         if (error)
 424                 return (EPERM);
 425 
 426         if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 427                 new_default = TRUE;
 428 
 429         /* The label must be translatable */
 430         if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 431                 return (EINVAL);
 432 
 433         /*
 434          * In a non-global zone, disallow attempts to set a label that
 435          * doesn't match that of the zone; otherwise no other checks
 436          * are needed.
 437          */
 438         if (!INGLOBALZONE(curproc)) {
 439                 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 440                         return (EPERM);
 441                 return (0);
 442         }
 443 
 444         /*
 445          * For global-zone datasets (i.e., those whose zoned property is
 446          * "off", verify that the specified new label is valid for the
 447          * global zone.
 448          */
 449         if (dsl_prop_get_integer(name,
 450             zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 451                 return (EPERM);
 452         if (!zoned) {
 453                 if (zfs_check_global_label(name, strval) != 0)
 454                         return (EPERM);
 455         }
 456 
 457         /*
 458          * If the existing dataset label is nondefault, check if the
 459          * dataset is mounted (label cannot be changed while mounted).
 460          * Get the zfsvfs; if there isn't one, then the dataset isn't
 461          * mounted (or isn't a dataset, doesn't exist, ...).
 462          */
 463         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 464                 objset_t *os;
 465                 static char *setsl_tag = "setsl_tag";
 466 
 467                 /*
 468                  * Try to own the dataset; abort if there is any error,
 469                  * (e.g., already mounted, in use, or other error).
 470                  */
 471                 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
 472                     setsl_tag, &os);
 473                 if (error)
 474                         return (EPERM);
 475 
 476                 dmu_objset_disown(os, setsl_tag);
 477 
 478                 if (new_default) {
 479                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 480                         goto out_check;
 481                 }
 482 
 483                 if (hexstr_to_label(strval, &new_sl) != 0)
 484                         return (EPERM);
 485 
 486                 if (blstrictdom(&ds_sl, &new_sl))
 487                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 488                 else if (blstrictdom(&new_sl, &ds_sl))
 489                         needed_priv = PRIV_FILE_UPGRADE_SL;
 490         } else {
 491                 /* dataset currently has a default label */
 492                 if (!new_default)
 493                         needed_priv = PRIV_FILE_UPGRADE_SL;
 494         }
 495 
 496 out_check:
 497         if (needed_priv != -1)
 498                 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 499         return (0);
 500 }
 501 
 502 static int
 503 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
 504     cred_t *cr)
 505 {
 506         char *strval;
 507 
 508         /*
 509          * Check permissions for special properties.
 510          */
 511         switch (prop) {
 512         case ZFS_PROP_ZONED:
 513                 /*
 514                  * Disallow setting of 'zoned' from within a local zone.
 515                  */
 516                 if (!INGLOBALZONE(curproc))
 517                         return (EPERM);
 518                 break;
 519 
 520         case ZFS_PROP_QUOTA:
 521                 if (!INGLOBALZONE(curproc)) {
 522                         uint64_t zoned;
 523                         char setpoint[MAXNAMELEN];
 524                         /*
 525                          * Unprivileged users are allowed to modify the
 526                          * quota on things *under* (ie. contained by)
 527                          * the thing they own.
 528                          */
 529                         if (dsl_prop_get_integer(dsname, "zoned", &zoned,
 530                             setpoint))
 531                                 return (EPERM);
 532                         if (!zoned || strlen(dsname) <= strlen(setpoint))
 533                                 return (EPERM);
 534                 }
 535                 break;
 536 
 537         case ZFS_PROP_MLSLABEL:
 538                 if (!is_system_labeled())
 539                         return (EPERM);
 540 
 541                 if (nvpair_value_string(propval, &strval) == 0) {
 542                         int err;
 543 
 544                         err = zfs_set_slabel_policy(dsname, strval, CRED());
 545                         if (err != 0)
 546                                 return (err);
 547                 }
 548                 break;
 549         }
 550 
 551         return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 552 }
 553 
 554 int
 555 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
 556 {
 557         int error;
 558 
 559         error = zfs_dozonecheck(zc->zc_name, cr);
 560         if (error)
 561                 return (error);
 562 
 563         /*
 564          * permission to set permissions will be evaluated later in
 565          * dsl_deleg_can_allow()
 566          */
 567         return (0);
 568 }
 569 
 570 int
 571 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
 572 {
 573         return (zfs_secpolicy_write_perms(zc->zc_name,
 574             ZFS_DELEG_PERM_ROLLBACK, cr));
 575 }
 576 
 577 int
 578 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
 579 {
 580         spa_t *spa;
 581         dsl_pool_t *dp;
 582         dsl_dataset_t *ds;
 583         char *cp;
 584         int error;
 585 
 586         /*
 587          * Generate the current snapshot name from the given objsetid, then
 588          * use that name for the secpolicy/zone checks.
 589          */
 590         cp = strchr(zc->zc_name, '@');
 591         if (cp == NULL)
 592                 return (EINVAL);
 593         error = spa_open(zc->zc_name, &spa, FTAG);
 594         if (error)
 595                 return (error);
 596 
 597         dp = spa_get_dsl(spa);
 598         rw_enter(&dp->dp_config_rwlock, RW_READER);
 599         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 600         rw_exit(&dp->dp_config_rwlock);
 601         spa_close(spa, FTAG);
 602         if (error)
 603                 return (error);
 604 
 605         dsl_dataset_name(ds, zc->zc_name);
 606 
 607         error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 608             ZFS_DELEG_PERM_SEND, cr);
 609         dsl_dataset_rele(ds, FTAG);
 610 
 611         return (error);
 612 }
 613 
 614 static int
 615 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
 616 {
 617         vnode_t *vp;
 618         int error;
 619 
 620         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 621             NO_FOLLOW, NULL, &vp)) != 0)
 622                 return (error);
 623 
 624         /* Now make sure mntpnt and dataset are ZFS */
 625 
 626         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
 627             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 628             zc->zc_name) != 0)) {
 629                 VN_RELE(vp);
 630                 return (EPERM);
 631         }
 632 
 633         VN_RELE(vp);
 634         return (dsl_deleg_access(zc->zc_name,
 635             ZFS_DELEG_PERM_SHARE, cr));
 636 }
 637 
 638 int
 639 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
 640 {
 641         if (!INGLOBALZONE(curproc))
 642                 return (EPERM);
 643 
 644         if (secpolicy_nfs(cr) == 0) {
 645                 return (0);
 646         } else {
 647                 return (zfs_secpolicy_deleg_share(zc, cr));
 648         }
 649 }
 650 
 651 int
 652 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
 653 {
 654         if (!INGLOBALZONE(curproc))
 655                 return (EPERM);
 656 
 657         if (secpolicy_smb(cr) == 0) {
 658                 return (0);
 659         } else {
 660                 return (zfs_secpolicy_deleg_share(zc, cr));
 661         }
 662 }
 663 
 664 static int
 665 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 666 {
 667         char *cp;
 668 
 669         /*
 670          * Remove the @bla or /bla from the end of the name to get the parent.
 671          */
 672         (void) strncpy(parent, datasetname, parentsize);
 673         cp = strrchr(parent, '@');
 674         if (cp != NULL) {
 675                 cp[0] = '\0';
 676         } else {
 677                 cp = strrchr(parent, '/');
 678                 if (cp == NULL)
 679                         return (ENOENT);
 680                 cp[0] = '\0';
 681         }
 682 
 683         return (0);
 684 }
 685 
 686 int
 687 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 688 {
 689         int error;
 690 
 691         if ((error = zfs_secpolicy_write_perms(name,
 692             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 693                 return (error);
 694 
 695         return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 696 }
 697 
 698 static int
 699 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
 700 {
 701         return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 702 }
 703 
 704 /*
 705  * Destroying snapshots with delegated permissions requires
 706  * descendent mount and destroy permissions.
 707  */
 708 static int
 709 zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr)
 710 {
 711         int error;
 712         char *dsname;
 713 
 714         dsname = kmem_asprintf("%s@", zc->zc_name);
 715 
 716         error = zfs_secpolicy_destroy_perms(dsname, cr);
 717 
 718         strfree(dsname);
 719         return (error);
 720 }
 721 
 722 int
 723 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 724 {
 725         char    parentname[MAXNAMELEN];
 726         int     error;
 727 
 728         if ((error = zfs_secpolicy_write_perms(from,
 729             ZFS_DELEG_PERM_RENAME, cr)) != 0)
 730                 return (error);
 731 
 732         if ((error = zfs_secpolicy_write_perms(from,
 733             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 734                 return (error);
 735 
 736         if ((error = zfs_get_parent(to, parentname,
 737             sizeof (parentname))) != 0)
 738                 return (error);
 739 
 740         if ((error = zfs_secpolicy_write_perms(parentname,
 741             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 742                 return (error);
 743 
 744         if ((error = zfs_secpolicy_write_perms(parentname,
 745             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 746                 return (error);
 747 
 748         return (error);
 749 }
 750 
 751 static int
 752 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
 753 {
 754         return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
 755 }
 756 
 757 static int
 758 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
 759 {
 760         char    parentname[MAXNAMELEN];
 761         objset_t *clone;
 762         int error;
 763 
 764         error = zfs_secpolicy_write_perms(zc->zc_name,
 765             ZFS_DELEG_PERM_PROMOTE, cr);
 766         if (error)
 767                 return (error);
 768 
 769         error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
 770 
 771         if (error == 0) {
 772                 dsl_dataset_t *pclone = NULL;
 773                 dsl_dir_t *dd;
 774                 dd = clone->os_dsl_dataset->ds_dir;
 775 
 776                 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 777                 error = dsl_dataset_hold_obj(dd->dd_pool,
 778                     dd->dd_phys->dd_origin_obj, FTAG, &pclone);
 779                 rw_exit(&dd->dd_pool->dp_config_rwlock);
 780                 if (error) {
 781                         dmu_objset_rele(clone, FTAG);
 782                         return (error);
 783                 }
 784 
 785                 error = zfs_secpolicy_write_perms(zc->zc_name,
 786                     ZFS_DELEG_PERM_MOUNT, cr);
 787 
 788                 dsl_dataset_name(pclone, parentname);
 789                 dmu_objset_rele(clone, FTAG);
 790                 dsl_dataset_rele(pclone, FTAG);
 791                 if (error == 0)
 792                         error = zfs_secpolicy_write_perms(parentname,
 793                             ZFS_DELEG_PERM_PROMOTE, cr);
 794         }
 795         return (error);
 796 }
 797 
 798 static int
 799 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
 800 {
 801         int error;
 802 
 803         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 804             ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
 805                 return (error);
 806 
 807         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 808             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 809                 return (error);
 810 
 811         return (zfs_secpolicy_write_perms(zc->zc_name,
 812             ZFS_DELEG_PERM_CREATE, cr));
 813 }
 814 
 815 int
 816 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 817 {
 818         return (zfs_secpolicy_write_perms(name,
 819             ZFS_DELEG_PERM_SNAPSHOT, cr));
 820 }
 821 
 822 static int
 823 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
 824 {
 825 
 826         return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
 827 }
 828 
 829 static int
 830 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
 831 {
 832         char    parentname[MAXNAMELEN];
 833         int     error;
 834 
 835         if ((error = zfs_get_parent(zc->zc_name, parentname,
 836             sizeof (parentname))) != 0)
 837                 return (error);
 838 
 839         if (zc->zc_value[0] != '\0') {
 840                 if ((error = zfs_secpolicy_write_perms(zc->zc_value,
 841                     ZFS_DELEG_PERM_CLONE, cr)) != 0)
 842                         return (error);
 843         }
 844 
 845         if ((error = zfs_secpolicy_write_perms(parentname,
 846             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 847                 return (error);
 848 
 849         error = zfs_secpolicy_write_perms(parentname,
 850             ZFS_DELEG_PERM_MOUNT, cr);
 851 
 852         return (error);
 853 }
 854 
 855 static int
 856 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
 857 {
 858         int error;
 859 
 860         error = secpolicy_fs_unmount(cr, NULL);
 861         if (error) {
 862                 error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
 863         }
 864         return (error);
 865 }
 866 
 867 /*
 868  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
 869  * SYS_CONFIG privilege, which is not available in a local zone.
 870  */
 871 /* ARGSUSED */
 872 static int
 873 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
 874 {
 875         if (secpolicy_sys_config(cr, B_FALSE) != 0)
 876                 return (EPERM);
 877 
 878         return (0);
 879 }
 880 
 881 /*
 882  * Policy for object to name lookups.
 883  */
 884 /* ARGSUSED */
 885 static int
 886 zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)
 887 {
 888         int error;
 889 
 890         if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
 891                 return (0);
 892 
 893         error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
 894         return (error);
 895 }
 896 
 897 /*
 898  * Policy for fault injection.  Requires all privileges.
 899  */
 900 /* ARGSUSED */
 901 static int
 902 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
 903 {
 904         return (secpolicy_zinject(cr));
 905 }
 906 
 907 static int
 908 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
 909 {
 910         zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
 911 
 912         if (prop == ZPROP_INVAL) {
 913                 if (!zfs_prop_user(zc->zc_value))
 914                         return (EINVAL);
 915                 return (zfs_secpolicy_write_perms(zc->zc_name,
 916                     ZFS_DELEG_PERM_USERPROP, cr));
 917         } else {
 918                 return (zfs_secpolicy_setprop(zc->zc_name, prop,
 919                     NULL, cr));
 920         }
 921 }
 922 
 923 static int
 924 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
 925 {
 926         int err = zfs_secpolicy_read(zc, cr);
 927         if (err)
 928                 return (err);
 929 
 930         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 931                 return (EINVAL);
 932 
 933         if (zc->zc_value[0] == 0) {
 934                 /*
 935                  * They are asking about a posix uid/gid.  If it's
 936                  * themself, allow it.
 937                  */
 938                 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
 939                     zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
 940                         if (zc->zc_guid == crgetuid(cr))
 941                                 return (0);
 942                 } else {
 943                         if (groupmember(zc->zc_guid, cr))
 944                                 return (0);
 945                 }
 946         }
 947 
 948         return (zfs_secpolicy_write_perms(zc->zc_name,
 949             userquota_perms[zc->zc_objset_type], cr));
 950 }
 951 
 952 static int
 953 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
 954 {
 955         int err = zfs_secpolicy_read(zc, cr);
 956         if (err)
 957                 return (err);
 958 
 959         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 960                 return (EINVAL);
 961 
 962         return (zfs_secpolicy_write_perms(zc->zc_name,
 963             userquota_perms[zc->zc_objset_type], cr));
 964 }
 965 
 966 static int
 967 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
 968 {
 969         return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
 970             NULL, cr));
 971 }
 972 
 973 static int
 974 zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
 975 {
 976         return (zfs_secpolicy_write_perms(zc->zc_name,
 977             ZFS_DELEG_PERM_HOLD, cr));
 978 }
 979 
 980 static int
 981 zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
 982 {
 983         return (zfs_secpolicy_write_perms(zc->zc_name,
 984             ZFS_DELEG_PERM_RELEASE, cr));
 985 }
 986 
 987 /*
 988  * Policy for allowing temporary snapshots to be taken or released
 989  */
 990 static int
 991 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)
 992 {
 993         /*
 994          * A temporary snapshot is the same as a snapshot,
 995          * hold, destroy and release all rolled into one.
 996          * Delegated diff alone is sufficient that we allow this.
 997          */
 998         int error;
 999 
1000         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1001             ZFS_DELEG_PERM_DIFF, cr)) == 0)
1002                 return (0);
1003 
1004         error = zfs_secpolicy_snapshot(zc, cr);
1005         if (!error)
1006                 error = zfs_secpolicy_hold(zc, cr);
1007         if (!error)
1008                 error = zfs_secpolicy_release(zc, cr);
1009         if (!error)
1010                 error = zfs_secpolicy_destroy(zc, cr);
1011         return (error);
1012 }
1013 
1014 /*
1015  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1016  */
1017 static int
1018 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1019 {
1020         char *packed;
1021         int error;
1022         nvlist_t *list = NULL;
1023 
1024         /*
1025          * Read in and unpack the user-supplied nvlist.
1026          */
1027         if (size == 0)
1028                 return (EINVAL);
1029 
1030         packed = kmem_alloc(size, KM_SLEEP);
1031 
1032         if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1033             iflag)) != 0) {
1034                 kmem_free(packed, size);
1035                 return (error);
1036         }
1037 
1038         if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1039                 kmem_free(packed, size);
1040                 return (error);
1041         }
1042 
1043         kmem_free(packed, size);
1044 
1045         *nvp = list;
1046         return (0);
1047 }
1048 
1049 static int
1050 fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
1051 {
1052         size_t size;
1053 
1054         VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1055 
1056         if (size > zc->zc_nvlist_dst_size) {
1057                 nvpair_t *more_errors;
1058                 int n = 0;
1059 
1060                 if (zc->zc_nvlist_dst_size < 1024)
1061                         return (ENOMEM);
1062 
1063                 VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
1064                 more_errors = nvlist_prev_nvpair(*errors, NULL);
1065 
1066                 do {
1067                         nvpair_t *pair = nvlist_prev_nvpair(*errors,
1068                             more_errors);
1069                         VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
1070                         n++;
1071                         VERIFY(nvlist_size(*errors, &size,
1072                             NV_ENCODE_NATIVE) == 0);
1073                 } while (size > zc->zc_nvlist_dst_size);
1074 
1075                 VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
1076                 VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
1077                 ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1078                 ASSERT(size <= zc->zc_nvlist_dst_size);
1079         }
1080 
1081         return (0);
1082 }
1083 
1084 static int
1085 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1086 {
1087         char *packed = NULL;
1088         int error = 0;
1089         size_t size;
1090 
1091         VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
1092 
1093         if (size > zc->zc_nvlist_dst_size) {
1094                 error = ENOMEM;
1095         } else {
1096                 packed = kmem_alloc(size, KM_SLEEP);
1097                 VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
1098                     KM_SLEEP) == 0);
1099                 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1100                     size, zc->zc_iflags) != 0)
1101                         error = EFAULT;
1102                 kmem_free(packed, size);
1103         }
1104 
1105         zc->zc_nvlist_dst_size = size;
1106         return (error);
1107 }
1108 
1109 static int
1110 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1111 {
1112         objset_t *os;
1113         int error;
1114 
1115         error = dmu_objset_hold(dsname, FTAG, &os);
1116         if (error)
1117                 return (error);
1118         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1119                 dmu_objset_rele(os, FTAG);
1120                 return (EINVAL);
1121         }
1122 
1123         mutex_enter(&os->os_user_ptr_lock);
1124         *zfvp = dmu_objset_get_user(os);
1125         if (*zfvp) {
1126                 VFS_HOLD((*zfvp)->z_vfs);
1127         } else {
1128                 error = ESRCH;
1129         }
1130         mutex_exit(&os->os_user_ptr_lock);
1131         dmu_objset_rele(os, FTAG);
1132         return (error);
1133 }
1134 
1135 /*
1136  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1137  * case its z_vfs will be NULL, and it will be opened as the owner.
1138  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1139  * which prevents all vnode ops from running.
1140  */
1141 static int
1142 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1143 {
1144         int error = 0;
1145 
1146         if (getzfsvfs(name, zfvp) != 0)
1147                 error = zfsvfs_create(name, zfvp);
1148         if (error == 0) {
1149                 rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1150                     RW_READER, tag);
1151                 if ((*zfvp)->z_unmounted) {
1152                         /*
1153                          * XXX we could probably try again, since the unmounting
1154                          * thread should be just about to disassociate the
1155                          * objset from the zfsvfs.
1156                          */
1157                         rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1158                         return (EBUSY);
1159                 }
1160         }
1161         return (error);
1162 }
1163 
1164 static void
1165 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1166 {
1167         rrw_exit(&zfsvfs->z_teardown_lock, tag);
1168 
1169         if (zfsvfs->z_vfs) {
1170                 VFS_RELE(zfsvfs->z_vfs);
1171         } else {
1172                 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1173                 zfsvfs_free(zfsvfs);
1174         }
1175 }
1176 
1177 static int
1178 zfs_ioc_pool_create(zfs_cmd_t *zc)
1179 {
1180         int error;
1181         nvlist_t *config, *props = NULL;
1182         nvlist_t *rootprops = NULL;
1183         nvlist_t *zplprops = NULL;
1184         char *buf;
1185 
1186         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1187             zc->zc_iflags, &config))
1188                 return (error);
1189 
1190         if (zc->zc_nvlist_src_size != 0 && (error =
1191             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1192             zc->zc_iflags, &props))) {
1193                 nvlist_free(config);
1194                 return (error);
1195         }
1196 
1197         if (props) {
1198                 nvlist_t *nvl = NULL;
1199                 uint64_t version = SPA_VERSION;
1200 
1201                 (void) nvlist_lookup_uint64(props,
1202                     zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1203                 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1204                         error = EINVAL;
1205                         goto pool_props_bad;
1206                 }
1207                 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1208                 if (nvl) {
1209                         error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1210                         if (error != 0) {
1211                                 nvlist_free(config);
1212                                 nvlist_free(props);
1213                                 return (error);
1214                         }
1215                         (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1216                 }
1217                 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1218                 error = zfs_fill_zplprops_root(version, rootprops,
1219                     zplprops, NULL);
1220                 if (error)
1221                         goto pool_props_bad;
1222         }
1223 
1224         buf = history_str_get(zc);
1225 
1226         error = spa_create(zc->zc_name, config, props, buf, zplprops);
1227 
1228         /*
1229          * Set the remaining root properties
1230          */
1231         if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1232             ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1233                 (void) spa_destroy(zc->zc_name);
1234 
1235         if (buf != NULL)
1236                 history_str_free(buf);
1237 
1238 pool_props_bad:
1239         nvlist_free(rootprops);
1240         nvlist_free(zplprops);
1241         nvlist_free(config);
1242         nvlist_free(props);
1243 
1244         return (error);
1245 }
1246 
1247 static int
1248 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1249 {
1250         int error;
1251         zfs_log_history(zc);
1252         error = spa_destroy(zc->zc_name);
1253         if (error == 0)
1254                 zvol_remove_minors(zc->zc_name);
1255         return (error);
1256 }
1257 
1258 static int
1259 zfs_ioc_pool_import(zfs_cmd_t *zc)
1260 {
1261         nvlist_t *config, *props = NULL;
1262         uint64_t guid;
1263         int error;
1264 
1265         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1266             zc->zc_iflags, &config)) != 0)
1267                 return (error);
1268 
1269         if (zc->zc_nvlist_src_size != 0 && (error =
1270             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1271             zc->zc_iflags, &props))) {
1272                 nvlist_free(config);
1273                 return (error);
1274         }
1275 
1276         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1277             guid != zc->zc_guid)
1278                 error = EINVAL;
1279         else
1280                 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1281 
1282         if (zc->zc_nvlist_dst != 0) {
1283                 int err;
1284 
1285                 if ((err = put_nvlist(zc, config)) != 0)
1286                         error = err;
1287         }
1288 
1289         nvlist_free(config);
1290 
1291         if (props)
1292                 nvlist_free(props);
1293 
1294         return (error);
1295 }
1296 
1297 static int
1298 zfs_ioc_pool_export(zfs_cmd_t *zc)
1299 {
1300         int error;
1301         boolean_t force = (boolean_t)zc->zc_cookie;
1302         boolean_t hardforce = (boolean_t)zc->zc_guid;
1303 
1304         zfs_log_history(zc);
1305         error = spa_export(zc->zc_name, NULL, force, hardforce);
1306         if (error == 0)
1307                 zvol_remove_minors(zc->zc_name);
1308         return (error);
1309 }
1310 
1311 static int
1312 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1313 {
1314         nvlist_t *configs;
1315         int error;
1316 
1317         if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1318                 return (EEXIST);
1319 
1320         error = put_nvlist(zc, configs);
1321 
1322         nvlist_free(configs);
1323 
1324         return (error);
1325 }
1326 
1327 /*
1328  * inputs:
1329  * zc_name              name of the pool
1330  *
1331  * outputs:
1332  * zc_cookie            real errno
1333  * zc_nvlist_dst        config nvlist
1334  * zc_nvlist_dst_size   size of config nvlist
1335  */
1336 static int
1337 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1338 {
1339         nvlist_t *config;
1340         int error;
1341         int ret = 0;
1342 
1343         error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1344             sizeof (zc->zc_value));
1345 
1346         if (config != NULL) {
1347                 ret = put_nvlist(zc, config);
1348                 nvlist_free(config);
1349 
1350                 /*
1351                  * The config may be present even if 'error' is non-zero.
1352                  * In this case we return success, and preserve the real errno
1353                  * in 'zc_cookie'.
1354                  */
1355                 zc->zc_cookie = error;
1356         } else {
1357                 ret = error;
1358         }
1359 
1360         return (ret);
1361 }
1362 
1363 /*
1364  * Try to import the given pool, returning pool stats as appropriate so that
1365  * user land knows which devices are available and overall pool health.
1366  */
1367 static int
1368 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1369 {
1370         nvlist_t *tryconfig, *config;
1371         int error;
1372 
1373         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1374             zc->zc_iflags, &tryconfig)) != 0)
1375                 return (error);
1376 
1377         config = spa_tryimport(tryconfig);
1378 
1379         nvlist_free(tryconfig);
1380 
1381         if (config == NULL)
1382                 return (EINVAL);
1383 
1384         error = put_nvlist(zc, config);
1385         nvlist_free(config);
1386 
1387         return (error);
1388 }
1389 
1390 /*
1391  * inputs:
1392  * zc_name              name of the pool
1393  * zc_cookie            scan func (pool_scan_func_t)
1394  */
1395 static int
1396 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1397 {
1398         spa_t *spa;
1399         int error;
1400 
1401         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1402                 return (error);
1403 
1404         if (zc->zc_cookie == POOL_SCAN_NONE)
1405                 error = spa_scan_stop(spa);
1406         else
1407                 error = spa_scan(spa, zc->zc_cookie);
1408 
1409         spa_close(spa, FTAG);
1410 
1411         return (error);
1412 }
1413 
1414 static int
1415 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1416 {
1417         spa_t *spa;
1418         int error;
1419 
1420         error = spa_open(zc->zc_name, &spa, FTAG);
1421         if (error == 0) {
1422                 spa_freeze(spa);
1423                 spa_close(spa, FTAG);
1424         }
1425         return (error);
1426 }
1427 
1428 static int
1429 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1430 {
1431         spa_t *spa;
1432         int error;
1433 
1434         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1435                 return (error);
1436 
1437         if (zc->zc_cookie < spa_version(spa) ||
1438             !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1439                 spa_close(spa, FTAG);
1440                 return (EINVAL);
1441         }
1442 
1443         spa_upgrade(spa, zc->zc_cookie);
1444         spa_close(spa, FTAG);
1445 
1446         return (error);
1447 }
1448 
1449 static int
1450 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1451 {
1452         spa_t *spa;
1453         char *hist_buf;
1454         uint64_t size;
1455         int error;
1456 
1457         if ((size = zc->zc_history_len) == 0)
1458                 return (EINVAL);
1459 
1460         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1461                 return (error);
1462 
1463         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1464                 spa_close(spa, FTAG);
1465                 return (ENOTSUP);
1466         }
1467 
1468         hist_buf = kmem_alloc(size, KM_SLEEP);
1469         if ((error = spa_history_get(spa, &zc->zc_history_offset,
1470             &zc->zc_history_len, hist_buf)) == 0) {
1471                 error = ddi_copyout(hist_buf,
1472                     (void *)(uintptr_t)zc->zc_history,
1473                     zc->zc_history_len, zc->zc_iflags);
1474         }
1475 
1476         spa_close(spa, FTAG);
1477         kmem_free(hist_buf, size);
1478         return (error);
1479 }
1480 
1481 static int
1482 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1483 {
1484         spa_t *spa;
1485         int error;
1486 
1487         error = spa_open(zc->zc_name, &spa, FTAG);
1488         if (error == 0) {
1489                 error = spa_change_guid(spa);
1490                 spa_close(spa, FTAG);
1491         }
1492         return (error);
1493 }
1494 
1495 static int
1496 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1497 {
1498         int error;
1499 
1500         if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1501                 return (error);
1502 
1503         return (0);
1504 }
1505 
1506 /*
1507  * inputs:
1508  * zc_name              name of filesystem
1509  * zc_obj               object to find
1510  *
1511  * outputs:
1512  * zc_value             name of object
1513  */
1514 static int
1515 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1516 {
1517         objset_t *os;
1518         int error;
1519 
1520         /* XXX reading from objset not owned */
1521         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1522                 return (error);
1523         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1524                 dmu_objset_rele(os, FTAG);
1525                 return (EINVAL);
1526         }
1527         error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1528             sizeof (zc->zc_value));
1529         dmu_objset_rele(os, FTAG);
1530 
1531         return (error);
1532 }
1533 
1534 /*
1535  * inputs:
1536  * zc_name              name of filesystem
1537  * zc_obj               object to find
1538  *
1539  * outputs:
1540  * zc_stat              stats on object
1541  * zc_value             path to object
1542  */
1543 static int
1544 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1545 {
1546         objset_t *os;
1547         int error;
1548 
1549         /* XXX reading from objset not owned */
1550         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1551                 return (error);
1552         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1553                 dmu_objset_rele(os, FTAG);
1554                 return (EINVAL);
1555         }
1556         error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1557             sizeof (zc->zc_value));
1558         dmu_objset_rele(os, FTAG);
1559 
1560         return (error);
1561 }
1562 
1563 static int
1564 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1565 {
1566         spa_t *spa;
1567         int error;
1568         nvlist_t *config, **l2cache, **spares;
1569         uint_t nl2cache = 0, nspares = 0;
1570 
1571         error = spa_open(zc->zc_name, &spa, FTAG);
1572         if (error != 0)
1573                 return (error);
1574 
1575         error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1576             zc->zc_iflags, &config);
1577         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1578             &l2cache, &nl2cache);
1579 
1580         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1581             &spares, &nspares);
1582 
1583         /*
1584          * A root pool with concatenated devices is not supported.
1585          * Thus, can not add a device to a root pool.
1586          *
1587          * Intent log device can not be added to a rootpool because
1588          * during mountroot, zil is replayed, a seperated log device
1589          * can not be accessed during the mountroot time.
1590          *
1591          * l2cache and spare devices are ok to be added to a rootpool.
1592          */
1593         if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1594                 nvlist_free(config);
1595                 spa_close(spa, FTAG);
1596                 return (EDOM);
1597         }
1598 
1599         if (error == 0) {
1600                 error = spa_vdev_add(spa, config);
1601                 nvlist_free(config);
1602         }
1603         spa_close(spa, FTAG);
1604         return (error);
1605 }
1606 
1607 /*
1608  * inputs:
1609  * zc_name              name of the pool
1610  * zc_nvlist_conf       nvlist of devices to remove
1611  * zc_cookie            to stop the remove?
1612  */
1613 static int
1614 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1615 {
1616         spa_t *spa;
1617         int error;
1618 
1619         error = spa_open(zc->zc_name, &spa, FTAG);
1620         if (error != 0)
1621                 return (error);
1622         error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1623         spa_close(spa, FTAG);
1624         return (error);
1625 }
1626 
1627 static int
1628 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1629 {
1630         spa_t *spa;
1631         int error;
1632         vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1633 
1634         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1635                 return (error);
1636         switch (zc->zc_cookie) {
1637         case VDEV_STATE_ONLINE:
1638                 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1639                 break;
1640 
1641         case VDEV_STATE_OFFLINE:
1642                 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1643                 break;
1644 
1645         case VDEV_STATE_FAULTED:
1646                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1647                     zc->zc_obj != VDEV_AUX_EXTERNAL)
1648                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1649 
1650                 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1651                 break;
1652 
1653         case VDEV_STATE_DEGRADED:
1654                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1655                     zc->zc_obj != VDEV_AUX_EXTERNAL)
1656                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1657 
1658                 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1659                 break;
1660 
1661         default:
1662                 error = EINVAL;
1663         }
1664         zc->zc_cookie = newstate;
1665         spa_close(spa, FTAG);
1666         return (error);
1667 }
1668 
1669 static int
1670 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1671 {
1672         spa_t *spa;
1673         int replacing = zc->zc_cookie;
1674         nvlist_t *config;
1675         int error;
1676 
1677         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1678                 return (error);
1679 
1680         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1681             zc->zc_iflags, &config)) == 0) {
1682                 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1683                 nvlist_free(config);
1684         }
1685 
1686         spa_close(spa, FTAG);
1687         return (error);
1688 }
1689 
1690 static int
1691 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1692 {
1693         spa_t *spa;
1694         int error;
1695 
1696         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1697                 return (error);
1698 
1699         error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1700 
1701         spa_close(spa, FTAG);
1702         return (error);
1703 }
1704 
1705 static int
1706 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1707 {
1708         spa_t *spa;
1709         nvlist_t *config, *props = NULL;
1710         int error;
1711         boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1712 
1713         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1714                 return (error);
1715 
1716         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1717             zc->zc_iflags, &config)) {
1718                 spa_close(spa, FTAG);
1719                 return (error);
1720         }
1721 
1722         if (zc->zc_nvlist_src_size != 0 && (error =
1723             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1724             zc->zc_iflags, &props))) {
1725                 spa_close(spa, FTAG);
1726                 nvlist_free(config);
1727                 return (error);
1728         }
1729 
1730         error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1731 
1732         spa_close(spa, FTAG);
1733 
1734         nvlist_free(config);
1735         nvlist_free(props);
1736 
1737         return (error);
1738 }
1739 
1740 static int
1741 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1742 {
1743         spa_t *spa;
1744         char *path = zc->zc_value;
1745         uint64_t guid = zc->zc_guid;
1746         int error;
1747 
1748         error = spa_open(zc->zc_name, &spa, FTAG);
1749         if (error != 0)
1750                 return (error);
1751 
1752         error = spa_vdev_setpath(spa, guid, path);
1753         spa_close(spa, FTAG);
1754         return (error);
1755 }
1756 
1757 static int
1758 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1759 {
1760         spa_t *spa;
1761         char *fru = zc->zc_value;
1762         uint64_t guid = zc->zc_guid;
1763         int error;
1764 
1765         error = spa_open(zc->zc_name, &spa, FTAG);
1766         if (error != 0)
1767                 return (error);
1768 
1769         error = spa_vdev_setfru(spa, guid, fru);
1770         spa_close(spa, FTAG);
1771         return (error);
1772 }
1773 
1774 static int
1775 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
1776 {
1777         int error = 0;
1778         nvlist_t *nv;
1779 
1780         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1781 
1782         if (zc->zc_nvlist_dst != 0 &&
1783             (error = dsl_prop_get_all(os, &nv)) == 0) {
1784                 dmu_objset_stats(os, nv);
1785                 /*
1786                  * NB: zvol_get_stats() will read the objset contents,
1787                  * which we aren't supposed to do with a
1788                  * DS_MODE_USER hold, because it could be
1789                  * inconsistent.  So this is a bit of a workaround...
1790                  * XXX reading with out owning
1791                  */
1792                 if (!zc->zc_objset_stats.dds_inconsistent &&
1793                     dmu_objset_type(os) == DMU_OST_ZVOL) {
1794                         error = zvol_get_stats(os, nv);
1795                         if (error == EIO)
1796                                 return (error);
1797                         VERIFY3S(error, ==, 0);
1798                 }
1799                 error = put_nvlist(zc, nv);
1800                 nvlist_free(nv);
1801         }
1802 
1803         return (error);
1804 }
1805 
1806 /*
1807  * inputs:
1808  * zc_name              name of filesystem
1809  * zc_nvlist_dst_size   size of buffer for property nvlist
1810  *
1811  * outputs:
1812  * zc_objset_stats      stats
1813  * zc_nvlist_dst        property nvlist
1814  * zc_nvlist_dst_size   size of property nvlist
1815  */
1816 static int
1817 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1818 {
1819         objset_t *os = NULL;
1820         int error;
1821 
1822         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1823                 return (error);
1824 
1825         error = zfs_ioc_objset_stats_impl(zc, os);
1826 
1827         dmu_objset_rele(os, FTAG);
1828 
1829         return (error);
1830 }
1831 
1832 /*
1833  * inputs:
1834  * zc_name              name of filesystem
1835  * zc_nvlist_dst_size   size of buffer for property nvlist
1836  *
1837  * outputs:
1838  * zc_nvlist_dst        received property nvlist
1839  * zc_nvlist_dst_size   size of received property nvlist
1840  *
1841  * Gets received properties (distinct from local properties on or after
1842  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1843  * local property values.
1844  */
1845 static int
1846 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1847 {
1848         objset_t *os = NULL;
1849         int error;
1850         nvlist_t *nv;
1851 
1852         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1853                 return (error);
1854 
1855         /*
1856          * Without this check, we would return local property values if the
1857          * caller has not already received properties on or after
1858          * SPA_VERSION_RECVD_PROPS.
1859          */
1860         if (!dsl_prop_get_hasrecvd(os)) {
1861                 dmu_objset_rele(os, FTAG);
1862                 return (ENOTSUP);
1863         }
1864 
1865         if (zc->zc_nvlist_dst != 0 &&
1866             (error = dsl_prop_get_received(os, &nv)) == 0) {
1867                 error = put_nvlist(zc, nv);
1868                 nvlist_free(nv);
1869         }
1870 
1871         dmu_objset_rele(os, FTAG);
1872         return (error);
1873 }
1874 
1875 static int
1876 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1877 {
1878         uint64_t value;
1879         int error;
1880 
1881         /*
1882          * zfs_get_zplprop() will either find a value or give us
1883          * the default value (if there is one).
1884          */
1885         if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1886                 return (error);
1887         VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1888         return (0);
1889 }
1890 
1891 /*
1892  * inputs:
1893  * zc_name              name of filesystem
1894  * zc_nvlist_dst_size   size of buffer for zpl property nvlist
1895  *
1896  * outputs:
1897  * zc_nvlist_dst        zpl property nvlist
1898  * zc_nvlist_dst_size   size of zpl property nvlist
1899  */
1900 static int
1901 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1902 {
1903         objset_t *os;
1904         int err;
1905 
1906         /* XXX reading without owning */
1907         if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1908                 return (err);
1909 
1910         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1911 
1912         /*
1913          * NB: nvl_add_zplprop() will read the objset contents,
1914          * which we aren't supposed to do with a DS_MODE_USER
1915          * hold, because it could be inconsistent.
1916          */
1917         if (zc->zc_nvlist_dst != NULL &&
1918             !zc->zc_objset_stats.dds_inconsistent &&
1919             dmu_objset_type(os) == DMU_OST_ZFS) {
1920                 nvlist_t *nv;
1921 
1922                 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1923                 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1924                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1925                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1926                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1927                         err = put_nvlist(zc, nv);
1928                 nvlist_free(nv);
1929         } else {
1930                 err = ENOENT;
1931         }
1932         dmu_objset_rele(os, FTAG);
1933         return (err);
1934 }
1935 
1936 static boolean_t
1937 dataset_name_hidden(const char *name)
1938 {
1939         /*
1940          * Skip over datasets that are not visible in this zone,
1941          * internal datasets (which have a $ in their name), and
1942          * temporary datasets (which have a % in their name).
1943          */
1944         if (strchr(name, '$') != NULL)
1945                 return (B_TRUE);
1946         if (strchr(name, '%') != NULL)
1947                 return (B_TRUE);
1948         if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1949                 return (B_TRUE);
1950         return (B_FALSE);
1951 }
1952 
1953 /*
1954  * inputs:
1955  * zc_name              name of filesystem
1956  * zc_cookie            zap cursor
1957  * zc_nvlist_dst_size   size of buffer for property nvlist
1958  *
1959  * outputs:
1960  * zc_name              name of next filesystem
1961  * zc_cookie            zap cursor
1962  * zc_objset_stats      stats
1963  * zc_nvlist_dst        property nvlist
1964  * zc_nvlist_dst_size   size of property nvlist
1965  */
1966 static int
1967 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1968 {
1969         objset_t *os;
1970         int error;
1971         char *p;
1972         size_t orig_len = strlen(zc->zc_name);
1973 
1974 top:
1975         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1976                 if (error == ENOENT)
1977                         error = ESRCH;
1978                 return (error);
1979         }
1980 
1981         p = strrchr(zc->zc_name, '/');
1982         if (p == NULL || p[1] != '\0')
1983                 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1984         p = zc->zc_name + strlen(zc->zc_name);
1985 
1986         /*
1987          * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1988          * but is not declared void because its called by dmu_objset_find().
1989          */
1990         if (zc->zc_cookie == 0) {
1991                 uint64_t cookie = 0;
1992                 int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1993 
1994                 while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
1995                         if (!dataset_name_hidden(zc->zc_name))
1996                                 (void) dmu_objset_prefetch(zc->zc_name, NULL);
1997                 }
1998         }
1999 
2000         do {
2001                 error = dmu_dir_list_next(os,
2002                     sizeof (zc->zc_name) - (p - zc->zc_name), p,
2003                     NULL, &zc->zc_cookie);
2004                 if (error == ENOENT)
2005                         error = ESRCH;
2006         } while (error == 0 && dataset_name_hidden(zc->zc_name));
2007         dmu_objset_rele(os, FTAG);
2008 
2009         /*
2010          * If it's an internal dataset (ie. with a '$' in its name),
2011          * don't try to get stats for it, otherwise we'll return ENOENT.
2012          */
2013         if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2014                 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2015                 if (error == ENOENT) {
2016                         /* We lost a race with destroy, get the next one. */
2017                         zc->zc_name[orig_len] = '\0';
2018                         goto top;
2019                 }
2020         }
2021         return (error);
2022 }
2023 
2024 /*
2025  * inputs:
2026  * zc_name              name of filesystem
2027  * zc_cookie            zap cursor
2028  * zc_nvlist_dst_size   size of buffer for property nvlist
2029  *
2030  * outputs:
2031  * zc_name              name of next snapshot
2032  * zc_objset_stats      stats
2033  * zc_nvlist_dst        property nvlist
2034  * zc_nvlist_dst_size   size of property nvlist
2035  */
2036 static int
2037 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2038 {
2039         objset_t *os;
2040         int error;
2041 
2042 top:
2043         if (zc->zc_cookie == 0)
2044                 (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
2045                     NULL, DS_FIND_SNAPSHOTS);
2046 
2047         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2048         if (error)
2049                 return (error == ENOENT ? ESRCH : error);
2050 
2051         /*
2052          * A dataset name of maximum length cannot have any snapshots,
2053          * so exit immediately.
2054          */
2055         if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
2056                 dmu_objset_rele(os, FTAG);
2057                 return (ESRCH);
2058         }
2059 
2060         error = dmu_snapshot_list_next(os,
2061             sizeof (zc->zc_name) - strlen(zc->zc_name),
2062             zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2063             NULL);
2064 
2065         if (error == 0) {
2066                 dsl_dataset_t *ds;
2067                 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2068 
2069                 /*
2070                  * Since we probably don't have a hold on this snapshot,
2071                  * it's possible that the objsetid could have been destroyed
2072                  * and reused for a new objset. It's OK if this happens during
2073                  * a zfs send operation, since the new createtxg will be
2074                  * beyond the range we're interested in.
2075                  */
2076                 rw_enter(&dp->dp_config_rwlock, RW_READER);
2077                 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2078                 rw_exit(&dp->dp_config_rwlock);
2079                 if (error) {
2080                         if (error == ENOENT) {
2081                                 /* Racing with destroy, get the next one. */
2082                                 *strchr(zc->zc_name, '@') = '\0';
2083                                 dmu_objset_rele(os, FTAG);
2084                                 goto top;
2085                         }
2086                 } else {
2087                         objset_t *ossnap;
2088 
2089                         error = dmu_objset_from_ds(ds, &ossnap);
2090                         if (error == 0)
2091                                 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2092                         dsl_dataset_rele(ds, FTAG);
2093                 }
2094         } else if (error == ENOENT) {
2095                 error = ESRCH;
2096         }
2097 
2098         dmu_objset_rele(os, FTAG);
2099         /* if we failed, undo the @ that we tacked on to zc_name */
2100         if (error)
2101                 *strchr(zc->zc_name, '@') = '\0';
2102         return (error);
2103 }
2104 
2105 static int
2106 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2107 {
2108         const char *propname = nvpair_name(pair);
2109         uint64_t *valary;
2110         unsigned int vallen;
2111         const char *domain;
2112         char *dash;
2113         zfs_userquota_prop_t type;
2114         uint64_t rid;
2115         uint64_t quota;
2116         zfsvfs_t *zfsvfs;
2117         int err;
2118 
2119         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2120                 nvlist_t *attrs;
2121                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2122                 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2123                     &pair) != 0)
2124                         return (EINVAL);
2125         }
2126 
2127         /*
2128          * A correctly constructed propname is encoded as
2129          * userquota@<rid>-<domain>.
2130          */
2131         if ((dash = strchr(propname, '-')) == NULL ||
2132             nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2133             vallen != 3)
2134                 return (EINVAL);
2135 
2136         domain = dash + 1;
2137         type = valary[0];
2138         rid = valary[1];
2139         quota = valary[2];
2140 
2141         err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2142         if (err == 0) {
2143                 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2144                 zfsvfs_rele(zfsvfs, FTAG);
2145         }
2146 
2147         return (err);
2148 }
2149 
2150 /*
2151  * If the named property is one that has a special function to set its value,
2152  * return 0 on success and a positive error code on failure; otherwise if it is
2153  * not one of the special properties handled by this function, return -1.
2154  *
2155  * XXX: It would be better for callers of the property interface if we handled
2156  * these special cases in dsl_prop.c (in the dsl layer).
2157  */
2158 static int
2159 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2160     nvpair_t *pair)
2161 {
2162         const char *propname = nvpair_name(pair);
2163         zfs_prop_t prop = zfs_name_to_prop(propname);
2164         uint64_t intval;
2165         int err;
2166 
2167         if (prop == ZPROP_INVAL) {
2168                 if (zfs_prop_userquota(propname))
2169                         return (zfs_prop_set_userquota(dsname, pair));
2170                 return (-1);
2171         }
2172 
2173         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2174                 nvlist_t *attrs;
2175                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2176                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2177                     &pair) == 0);
2178         }
2179 
2180         if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2181                 return (-1);
2182 
2183         VERIFY(0 == nvpair_value_uint64(pair, &intval));
2184 
2185         switch (prop) {
2186         case ZFS_PROP_QUOTA:
2187                 err = dsl_dir_set_quota(dsname, source, intval);
2188                 break;
2189         case ZFS_PROP_REFQUOTA:
2190                 err = dsl_dataset_set_quota(dsname, source, intval);
2191                 break;
2192         case ZFS_PROP_RESERVATION:
2193                 err = dsl_dir_set_reservation(dsname, source, intval);
2194                 break;
2195         case ZFS_PROP_REFRESERVATION:
2196                 err = dsl_dataset_set_reservation(dsname, source, intval);
2197                 break;
2198         case ZFS_PROP_VOLSIZE:
2199                 err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
2200                     intval);
2201                 break;
2202         case ZFS_PROP_VERSION:
2203         {
2204                 zfsvfs_t *zfsvfs;
2205 
2206                 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2207                         break;
2208 
2209                 err = zfs_set_version(zfsvfs, intval);
2210                 zfsvfs_rele(zfsvfs, FTAG);
2211 
2212                 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2213                         zfs_cmd_t *zc;
2214 
2215                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2216                         (void) strcpy(zc->zc_name, dsname);
2217                         (void) zfs_ioc_userspace_upgrade(zc);
2218                         kmem_free(zc, sizeof (zfs_cmd_t));
2219                 }
2220                 break;
2221         }
2222 
2223         default:
2224                 err = -1;
2225         }
2226 
2227         return (err);
2228 }
2229 
2230 /*
2231  * This function is best effort. If it fails to set any of the given properties,
2232  * it continues to set as many as it can and returns the first error
2233  * encountered. If the caller provides a non-NULL errlist, it also gives the
2234  * complete list of names of all the properties it failed to set along with the
2235  * corresponding error numbers. The caller is responsible for freeing the
2236  * returned errlist.
2237  *
2238  * If every property is set successfully, zero is returned and the list pointed
2239  * at by errlist is NULL.
2240  */
2241 int
2242 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2243     nvlist_t **errlist)
2244 {
2245         nvpair_t *pair;
2246         nvpair_t *propval;
2247         int rv = 0;
2248         uint64_t intval;
2249         char *strval;
2250         nvlist_t *genericnvl;
2251         nvlist_t *errors;
2252         nvlist_t *retrynvl;
2253 
2254         VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2255         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2256         VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2257 
2258 retry:
2259         pair = NULL;
2260         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2261                 const char *propname = nvpair_name(pair);
2262                 zfs_prop_t prop = zfs_name_to_prop(propname);
2263                 int err = 0;
2264 
2265                 /* decode the property value */
2266                 propval = pair;
2267                 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2268                         nvlist_t *attrs;
2269                         VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2270                         if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2271                             &propval) != 0)
2272                                 err = EINVAL;
2273                 }
2274 
2275                 /* Validate value type */
2276                 if (err == 0 && prop == ZPROP_INVAL) {
2277                         if (zfs_prop_user(propname)) {
2278                                 if (nvpair_type(propval) != DATA_TYPE_STRING)
2279                                         err = EINVAL;
2280                         } else if (zfs_prop_userquota(propname)) {
2281                                 if (nvpair_type(propval) !=
2282                                     DATA_TYPE_UINT64_ARRAY)
2283                                         err = EINVAL;
2284                         } else {
2285                                 err = EINVAL;
2286                         }
2287                 } else if (err == 0) {
2288                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2289                                 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2290                                         err = EINVAL;
2291                         } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2292                                 const char *unused;
2293 
2294                                 VERIFY(nvpair_value_uint64(propval,
2295                                     &intval) == 0);
2296 
2297                                 switch (zfs_prop_get_type(prop)) {
2298                                 case PROP_TYPE_NUMBER:
2299                                         break;
2300                                 case PROP_TYPE_STRING:
2301                                         err = EINVAL;
2302                                         break;
2303                                 case PROP_TYPE_INDEX:
2304                                         if (zfs_prop_index_to_string(prop,
2305                                             intval, &unused) != 0)
2306                                                 err = EINVAL;
2307                                         break;
2308                                 default:
2309                                         cmn_err(CE_PANIC,
2310                                             "unknown property type");
2311                                 }
2312                         } else {
2313                                 err = EINVAL;
2314                         }
2315                 }
2316 
2317                 /* Validate permissions */
2318                 if (err == 0)
2319                         err = zfs_check_settable(dsname, pair, CRED());
2320 
2321                 if (err == 0) {
2322                         err = zfs_prop_set_special(dsname, source, pair);
2323                         if (err == -1) {
2324                                 /*
2325                                  * For better performance we build up a list of
2326                                  * properties to set in a single transaction.
2327                                  */
2328                                 err = nvlist_add_nvpair(genericnvl, pair);
2329                         } else if (err != 0 && nvl != retrynvl) {
2330                                 /*
2331                                  * This may be a spurious error caused by
2332                                  * receiving quota and reservation out of order.
2333                                  * Try again in a second pass.
2334                                  */
2335                                 err = nvlist_add_nvpair(retrynvl, pair);
2336                         }
2337                 }
2338 
2339                 if (err != 0)
2340                         VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2341         }
2342 
2343         if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2344                 nvl = retrynvl;
2345                 goto retry;
2346         }
2347 
2348         if (!nvlist_empty(genericnvl) &&
2349             dsl_props_set(dsname, source, genericnvl) != 0) {
2350                 /*
2351                  * If this fails, we still want to set as many properties as we
2352                  * can, so try setting them individually.
2353                  */
2354                 pair = NULL;
2355                 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2356                         const char *propname = nvpair_name(pair);
2357                         int err = 0;
2358 
2359                         propval = pair;
2360                         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2361                                 nvlist_t *attrs;
2362                                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2363                                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2364                                     &propval) == 0);
2365                         }
2366 
2367                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2368                                 VERIFY(nvpair_value_string(propval,
2369                                     &strval) == 0);
2370                                 err = dsl_prop_set(dsname, propname, source, 1,
2371                                     strlen(strval) + 1, strval);
2372                         } else {
2373                                 VERIFY(nvpair_value_uint64(propval,
2374                                     &intval) == 0);
2375                                 err = dsl_prop_set(dsname, propname, source, 8,
2376                                     1, &intval);
2377                         }
2378 
2379                         if (err != 0) {
2380                                 VERIFY(nvlist_add_int32(errors, propname,
2381                                     err) == 0);
2382                         }
2383                 }
2384         }
2385         nvlist_free(genericnvl);
2386         nvlist_free(retrynvl);
2387 
2388         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2389                 nvlist_free(errors);
2390                 errors = NULL;
2391         } else {
2392                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
2393         }
2394 
2395         if (errlist == NULL)
2396                 nvlist_free(errors);
2397         else
2398                 *errlist = errors;
2399 
2400         return (rv);
2401 }
2402 
2403 /*
2404  * Check that all the properties are valid user properties.
2405  */
2406 static int
2407 zfs_check_userprops(char *fsname, nvlist_t *nvl)
2408 {
2409         nvpair_t *pair = NULL;
2410         int error = 0;
2411 
2412         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2413                 const char *propname = nvpair_name(pair);
2414                 char *valstr;
2415 
2416                 if (!zfs_prop_user(propname) ||
2417                     nvpair_type(pair) != DATA_TYPE_STRING)
2418                         return (EINVAL);
2419 
2420                 if (error = zfs_secpolicy_write_perms(fsname,
2421                     ZFS_DELEG_PERM_USERPROP, CRED()))
2422                         return (error);
2423 
2424                 if (strlen(propname) >= ZAP_MAXNAMELEN)
2425                         return (ENAMETOOLONG);
2426 
2427                 VERIFY(nvpair_value_string(pair, &valstr) == 0);
2428                 if (strlen(valstr) >= ZAP_MAXVALUELEN)
2429                         return (E2BIG);
2430         }
2431         return (0);
2432 }
2433 
2434 static void
2435 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2436 {
2437         nvpair_t *pair;
2438 
2439         VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2440 
2441         pair = NULL;
2442         while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2443                 if (nvlist_exists(skipped, nvpair_name(pair)))
2444                         continue;
2445 
2446                 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2447         }
2448 }
2449 
2450 static int
2451 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2452     nvlist_t *skipped)
2453 {
2454         int err = 0;
2455         nvlist_t *cleared_props = NULL;
2456         props_skip(props, skipped, &cleared_props);
2457         if (!nvlist_empty(cleared_props)) {
2458                 /*
2459                  * Acts on local properties until the dataset has received
2460                  * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2461                  */
2462                 zprop_source_t flags = (ZPROP_SRC_NONE |
2463                     (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2464                 err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2465         }
2466         nvlist_free(cleared_props);
2467         return (err);
2468 }
2469 
2470 /*
2471  * inputs:
2472  * zc_name              name of filesystem
2473  * zc_value             name of property to set
2474  * zc_nvlist_src{_size} nvlist of properties to apply
2475  * zc_cookie            received properties flag
2476  *
2477  * outputs:
2478  * zc_nvlist_dst{_size} error for each unapplied received property
2479  */
2480 static int
2481 zfs_ioc_set_prop(zfs_cmd_t *zc)
2482 {
2483         nvlist_t *nvl;
2484         boolean_t received = zc->zc_cookie;
2485         zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2486             ZPROP_SRC_LOCAL);
2487         nvlist_t *errors = NULL;
2488         int error;
2489 
2490         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2491             zc->zc_iflags, &nvl)) != 0)
2492                 return (error);
2493 
2494         if (received) {
2495                 nvlist_t *origprops;
2496                 objset_t *os;
2497 
2498                 if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2499                         if (dsl_prop_get_received(os, &origprops) == 0) {
2500                                 (void) clear_received_props(os,
2501                                     zc->zc_name, origprops, nvl);
2502                                 nvlist_free(origprops);
2503                         }
2504 
2505                         dsl_prop_set_hasrecvd(os);
2506                         dmu_objset_rele(os, FTAG);
2507                 }
2508         }
2509 
2510         error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2511 
2512         if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2513                 (void) put_nvlist(zc, errors);
2514         }
2515 
2516         nvlist_free(errors);
2517         nvlist_free(nvl);
2518         return (error);
2519 }
2520 
2521 /*
2522  * inputs:
2523  * zc_name              name of filesystem
2524  * zc_value             name of property to inherit
2525  * zc_cookie            revert to received value if TRUE
2526  *
2527  * outputs:             none
2528  */
2529 static int
2530 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2531 {
2532         const char *propname = zc->zc_value;
2533         zfs_prop_t prop = zfs_name_to_prop(propname);
2534         boolean_t received = zc->zc_cookie;
2535         zprop_source_t source = (received
2536             ? ZPROP_SRC_NONE            /* revert to received value, if any */
2537             : ZPROP_SRC_INHERITED);     /* explicitly inherit */
2538 
2539         if (received) {
2540                 nvlist_t *dummy;
2541                 nvpair_t *pair;
2542                 zprop_type_t type;
2543                 int err;
2544 
2545                 /*
2546                  * zfs_prop_set_special() expects properties in the form of an
2547                  * nvpair with type info.
2548                  */
2549                 if (prop == ZPROP_INVAL) {
2550                         if (!zfs_prop_user(propname))
2551                                 return (EINVAL);
2552 
2553                         type = PROP_TYPE_STRING;
2554                 } else if (prop == ZFS_PROP_VOLSIZE ||
2555                     prop == ZFS_PROP_VERSION) {
2556                         return (EINVAL);
2557                 } else {
2558                         type = zfs_prop_get_type(prop);
2559                 }
2560 
2561                 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2562 
2563                 switch (type) {
2564                 case PROP_TYPE_STRING:
2565                         VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2566                         break;
2567                 case PROP_TYPE_NUMBER:
2568                 case PROP_TYPE_INDEX:
2569                         VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2570                         break;
2571                 default:
2572                         nvlist_free(dummy);
2573                         return (EINVAL);
2574                 }
2575 
2576                 pair = nvlist_next_nvpair(dummy, NULL);
2577                 err = zfs_prop_set_special(zc->zc_name, source, pair);
2578                 nvlist_free(dummy);
2579                 if (err != -1)
2580                         return (err); /* special property already handled */
2581         } else {
2582                 /*
2583                  * Only check this in the non-received case. We want to allow
2584                  * 'inherit -S' to revert non-inheritable properties like quota
2585                  * and reservation to the received or default values even though
2586                  * they are not considered inheritable.
2587                  */
2588                 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2589                         return (EINVAL);
2590         }
2591 
2592         /* the property name has been validated by zfs_secpolicy_inherit() */
2593         return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2594 }
2595 
2596 static int
2597 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2598 {
2599         nvlist_t *props;
2600         spa_t *spa;
2601         int error;
2602         nvpair_t *pair;
2603 
2604         if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2605             zc->zc_iflags, &props))
2606                 return (error);
2607 
2608         /*
2609          * If the only property is the configfile, then just do a spa_lookup()
2610          * to handle the faulted case.
2611          */
2612         pair = nvlist_next_nvpair(props, NULL);
2613         if (pair != NULL && strcmp(nvpair_name(pair),
2614             zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2615             nvlist_next_nvpair(props, pair) == NULL) {
2616                 mutex_enter(&spa_namespace_lock);
2617                 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2618                         spa_configfile_set(spa, props, B_FALSE);
2619                         spa_config_sync(spa, B_FALSE, B_TRUE);
2620                 }
2621                 mutex_exit(&spa_namespace_lock);
2622                 if (spa != NULL) {
2623                         nvlist_free(props);
2624                         return (0);
2625                 }
2626         }
2627 
2628         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2629                 nvlist_free(props);
2630                 return (error);
2631         }
2632 
2633         error = spa_prop_set(spa, props);
2634 
2635         nvlist_free(props);
2636         spa_close(spa, FTAG);
2637 
2638         return (error);
2639 }
2640 
2641 static int
2642 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2643 {
2644         spa_t *spa;
2645         int error;
2646         nvlist_t *nvp = NULL;
2647 
2648         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2649                 /*
2650                  * If the pool is faulted, there may be properties we can still
2651                  * get (such as altroot and cachefile), so attempt to get them
2652                  * anyway.
2653                  */
2654                 mutex_enter(&spa_namespace_lock);
2655                 if ((spa = spa_lookup(zc->zc_name)) != NULL)
2656                         error = spa_prop_get(spa, &nvp);
2657                 mutex_exit(&spa_namespace_lock);
2658         } else {
2659                 error = spa_prop_get(spa, &nvp);
2660                 spa_close(spa, FTAG);
2661         }
2662 
2663         if (error == 0 && zc->zc_nvlist_dst != NULL)
2664                 error = put_nvlist(zc, nvp);
2665         else
2666                 error = EFAULT;
2667 
2668         nvlist_free(nvp);
2669         return (error);
2670 }
2671 
2672 /*
2673  * inputs:
2674  * zc_name              name of filesystem
2675  * zc_nvlist_src{_size} nvlist of delegated permissions
2676  * zc_perm_action       allow/unallow flag
2677  *
2678  * outputs:             none
2679  */
2680 static int
2681 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2682 {
2683         int error;
2684         nvlist_t *fsaclnv = NULL;
2685 
2686         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2687             zc->zc_iflags, &fsaclnv)) != 0)
2688                 return (error);
2689 
2690         /*
2691          * Verify nvlist is constructed correctly
2692          */
2693         if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2694                 nvlist_free(fsaclnv);
2695                 return (EINVAL);
2696         }
2697 
2698         /*
2699          * If we don't have PRIV_SYS_MOUNT, then validate
2700          * that user is allowed to hand out each permission in
2701          * the nvlist(s)
2702          */
2703 
2704         error = secpolicy_zfs(CRED());
2705         if (error) {
2706                 if (zc->zc_perm_action == B_FALSE) {
2707                         error = dsl_deleg_can_allow(zc->zc_name,
2708                             fsaclnv, CRED());
2709                 } else {
2710                         error = dsl_deleg_can_unallow(zc->zc_name,
2711                             fsaclnv, CRED());
2712                 }
2713         }
2714 
2715         if (error == 0)
2716                 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2717 
2718         nvlist_free(fsaclnv);
2719         return (error);
2720 }
2721 
2722 /*
2723  * inputs:
2724  * zc_name              name of filesystem
2725  *
2726  * outputs:
2727  * zc_nvlist_src{_size} nvlist of delegated permissions
2728  */
2729 static int
2730 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2731 {
2732         nvlist_t *nvp;
2733         int error;
2734 
2735         if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2736                 error = put_nvlist(zc, nvp);
2737                 nvlist_free(nvp);
2738         }
2739 
2740         return (error);
2741 }
2742 
2743 /*
2744  * Search the vfs list for a specified resource.  Returns a pointer to it
2745  * or NULL if no suitable entry is found. The caller of this routine
2746  * is responsible for releasing the returned vfs pointer.
2747  */
2748 static vfs_t *
2749 zfs_get_vfs(const char *resource)
2750 {
2751         struct vfs *vfsp;
2752         struct vfs *vfs_found = NULL;
2753 
2754         vfs_list_read_lock();
2755         vfsp = rootvfs;
2756         do {
2757                 if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2758                         VFS_HOLD(vfsp);
2759                         vfs_found = vfsp;
2760                         break;
2761                 }
2762                 vfsp = vfsp->vfs_next;
2763         } while (vfsp != rootvfs);
2764         vfs_list_unlock();
2765         return (vfs_found);
2766 }
2767 
2768 /* ARGSUSED */
2769 static void
2770 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2771 {
2772         zfs_creat_t *zct = arg;
2773 
2774         zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2775 }
2776 
2777 #define ZFS_PROP_UNDEFINED      ((uint64_t)-1)
2778 
2779 /*
2780  * inputs:
2781  * createprops          list of properties requested by creator
2782  * default_zplver       zpl version to use if unspecified in createprops
2783  * fuids_ok             fuids allowed in this version of the spa?
2784  * os                   parent objset pointer (NULL if root fs)
2785  *
2786  * outputs:
2787  * zplprops     values for the zplprops we attach to the master node object
2788  * is_ci        true if requested file system will be purely case-insensitive
2789  *
2790  * Determine the settings for utf8only, normalization and
2791  * casesensitivity.  Specific values may have been requested by the
2792  * creator and/or we can inherit values from the parent dataset.  If
2793  * the file system is of too early a vintage, a creator can not
2794  * request settings for these properties, even if the requested
2795  * setting is the default value.  We don't actually want to create dsl
2796  * properties for these, so remove them from the source nvlist after
2797  * processing.
2798  */
2799 static int
2800 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2801     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2802     nvlist_t *zplprops, boolean_t *is_ci)
2803 {
2804         uint64_t sense = ZFS_PROP_UNDEFINED;
2805         uint64_t norm = ZFS_PROP_UNDEFINED;
2806         uint64_t u8 = ZFS_PROP_UNDEFINED;
2807 
2808         ASSERT(zplprops != NULL);
2809 
2810         /*
2811          * Pull out creator prop choices, if any.
2812          */
2813         if (createprops) {
2814                 (void) nvlist_lookup_uint64(createprops,
2815                     zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2816                 (void) nvlist_lookup_uint64(createprops,
2817                     zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2818                 (void) nvlist_remove_all(createprops,
2819                     zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2820                 (void) nvlist_lookup_uint64(createprops,
2821                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2822                 (void) nvlist_remove_all(createprops,
2823                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2824                 (void) nvlist_lookup_uint64(createprops,
2825                     zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2826                 (void) nvlist_remove_all(createprops,
2827                     zfs_prop_to_name(ZFS_PROP_CASE));
2828         }
2829 
2830         /*
2831          * If the zpl version requested is whacky or the file system
2832          * or pool is version is too "young" to support normalization
2833          * and the creator tried to set a value for one of the props,
2834          * error out.
2835          */
2836         if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2837             (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2838             (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2839             (zplver < ZPL_VERSION_NORMALIZATION &&
2840             (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2841             sense != ZFS_PROP_UNDEFINED)))
2842                 return (ENOTSUP);
2843 
2844         /*
2845          * Put the version in the zplprops
2846          */
2847         VERIFY(nvlist_add_uint64(zplprops,
2848             zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2849 
2850         if (norm == ZFS_PROP_UNDEFINED)
2851                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2852         VERIFY(nvlist_add_uint64(zplprops,
2853             zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2854 
2855         /*
2856          * If we're normalizing, names must always be valid UTF-8 strings.
2857          */
2858         if (norm)
2859                 u8 = 1;
2860         if (u8 == ZFS_PROP_UNDEFINED)
2861                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2862         VERIFY(nvlist_add_uint64(zplprops,
2863             zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2864 
2865         if (sense == ZFS_PROP_UNDEFINED)
2866                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2867         VERIFY(nvlist_add_uint64(zplprops,
2868             zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2869 
2870         if (is_ci)
2871                 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
2872 
2873         return (0);
2874 }
2875 
2876 static int
2877 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2878     nvlist_t *zplprops, boolean_t *is_ci)
2879 {
2880         boolean_t fuids_ok, sa_ok;
2881         uint64_t zplver = ZPL_VERSION;
2882         objset_t *os = NULL;
2883         char parentname[MAXNAMELEN];
2884         char *cp;
2885         spa_t *spa;
2886         uint64_t spa_vers;
2887         int error;
2888 
2889         (void) strlcpy(parentname, dataset, sizeof (parentname));
2890         cp = strrchr(parentname, '/');
2891         ASSERT(cp != NULL);
2892         cp[0] = '\0';
2893 
2894         if ((error = spa_open(dataset, &spa, FTAG)) != 0)
2895                 return (error);
2896 
2897         spa_vers = spa_version(spa);
2898         spa_close(spa, FTAG);
2899 
2900         zplver = zfs_zpl_version_map(spa_vers);
2901         fuids_ok = (zplver >= ZPL_VERSION_FUID);
2902         sa_ok = (zplver >= ZPL_VERSION_SA);
2903 
2904         /*
2905          * Open parent object set so we can inherit zplprop values.
2906          */
2907         if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2908                 return (error);
2909 
2910         error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
2911             zplprops, is_ci);
2912         dmu_objset_rele(os, FTAG);
2913         return (error);
2914 }
2915 
2916 static int
2917 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2918     nvlist_t *zplprops, boolean_t *is_ci)
2919 {
2920         boolean_t fuids_ok;
2921         boolean_t sa_ok;
2922         uint64_t zplver = ZPL_VERSION;
2923         int error;
2924 
2925         zplver = zfs_zpl_version_map(spa_vers);
2926         fuids_ok = (zplver >= ZPL_VERSION_FUID);
2927         sa_ok = (zplver >= ZPL_VERSION_SA);
2928 
2929         error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
2930             createprops, zplprops, is_ci);
2931         return (error);
2932 }
2933 
2934 /*
2935  * inputs:
2936  * zc_objset_type       type of objset to create (fs vs zvol)
2937  * zc_name              name of new objset
2938  * zc_value             name of snapshot to clone from (may be empty)
2939  * zc_nvlist_src{_size} nvlist of properties to apply
2940  *
2941  * outputs: none
2942  */
2943 static int
2944 zfs_ioc_create(zfs_cmd_t *zc)
2945 {
2946         objset_t *clone;
2947         int error = 0;
2948         zfs_creat_t zct;
2949         nvlist_t *nvprops = NULL;
2950         void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2951         dmu_objset_type_t type = zc->zc_objset_type;
2952 
2953         switch (type) {
2954 
2955         case DMU_OST_ZFS:
2956                 cbfunc = zfs_create_cb;
2957                 break;
2958 
2959         case DMU_OST_ZVOL:
2960                 cbfunc = zvol_create_cb;
2961                 break;
2962 
2963         default:
2964                 cbfunc = NULL;
2965                 break;
2966         }
2967         if (strchr(zc->zc_name, '@') ||
2968             strchr(zc->zc_name, '%'))
2969                 return (EINVAL);
2970 
2971         if (zc->zc_nvlist_src != NULL &&
2972             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2973             zc->zc_iflags, &nvprops)) != 0)
2974                 return (error);
2975 
2976         zct.zct_zplprops = NULL;
2977         zct.zct_props = nvprops;
2978 
2979         if (zc->zc_value[0] != '\0') {
2980                 /*
2981                  * We're creating a clone of an existing snapshot.
2982                  */
2983                 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2984                 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2985                         nvlist_free(nvprops);
2986                         return (EINVAL);
2987                 }
2988 
2989                 error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2990                 if (error) {
2991                         nvlist_free(nvprops);
2992                         return (error);
2993                 }
2994 
2995                 error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2996                 dmu_objset_rele(clone, FTAG);
2997                 if (error) {
2998                         nvlist_free(nvprops);
2999                         return (error);
3000                 }
3001         } else {
3002                 boolean_t is_insensitive = B_FALSE;
3003 
3004                 if (cbfunc == NULL) {
3005                         nvlist_free(nvprops);
3006                         return (EINVAL);
3007                 }
3008 
3009                 if (type == DMU_OST_ZVOL) {
3010                         uint64_t volsize, volblocksize;
3011 
3012                         if (nvprops == NULL ||
3013                             nvlist_lookup_uint64(nvprops,
3014                             zfs_prop_to_name(ZFS_PROP_VOLSIZE),
3015                             &volsize) != 0) {
3016                                 nvlist_free(nvprops);
3017                                 return (EINVAL);
3018                         }
3019 
3020                         if ((error = nvlist_lookup_uint64(nvprops,
3021                             zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3022                             &volblocksize)) != 0 && error != ENOENT) {
3023                                 nvlist_free(nvprops);
3024                                 return (EINVAL);
3025                         }
3026 
3027                         if (error != 0)
3028                                 volblocksize = zfs_prop_default_numeric(
3029                                     ZFS_PROP_VOLBLOCKSIZE);
3030 
3031                         if ((error = zvol_check_volblocksize(
3032                             volblocksize)) != 0 ||
3033                             (error = zvol_check_volsize(volsize,
3034                             volblocksize)) != 0) {
3035                                 nvlist_free(nvprops);
3036                                 return (error);
3037                         }
3038                 } else if (type == DMU_OST_ZFS) {
3039                         int error;
3040 
3041                         /*
3042                          * We have to have normalization and
3043                          * case-folding flags correct when we do the
3044                          * file system creation, so go figure them out
3045                          * now.
3046                          */
3047                         VERIFY(nvlist_alloc(&zct.zct_zplprops,
3048                             NV_UNIQUE_NAME, KM_SLEEP) == 0);
3049                         error = zfs_fill_zplprops(zc->zc_name, nvprops,
3050                             zct.zct_zplprops, &is_insensitive);
3051                         if (error != 0) {
3052                                 nvlist_free(nvprops);
3053                                 nvlist_free(zct.zct_zplprops);
3054                                 return (error);
3055                         }
3056                 }
3057                 error = dmu_objset_create(zc->zc_name, type,
3058                     is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3059                 nvlist_free(zct.zct_zplprops);
3060         }
3061 
3062         /*
3063          * It would be nice to do this atomically.
3064          */
3065         if (error == 0) {
3066                 error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
3067                     nvprops, NULL);
3068                 if (error != 0)
3069                         (void) dmu_objset_destroy(zc->zc_name, B_FALSE);
3070         }
3071         nvlist_free(nvprops);
3072         return (error);
3073 }
3074 
3075 /*
3076  * inputs:
3077  * zc_name      name of filesystem
3078  * zc_value     short name of snapshot
3079  * zc_cookie    recursive flag
3080  * zc_nvlist_src[_size] property list
3081  *
3082  * outputs:
3083  * zc_value     short snapname (i.e. part after the '@')
3084  */
3085 static int
3086 zfs_ioc_snapshot(zfs_cmd_t *zc)
3087 {
3088         nvlist_t *nvprops = NULL;
3089         int error;
3090         boolean_t recursive = zc->zc_cookie;
3091 
3092         if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3093                 return (EINVAL);
3094 
3095         if (zc->zc_nvlist_src != NULL &&
3096             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3097             zc->zc_iflags, &nvprops)) != 0)
3098                 return (error);
3099 
3100         error = zfs_check_userprops(zc->zc_name, nvprops);
3101         if (error)
3102                 goto out;
3103 
3104         if (!nvlist_empty(nvprops) &&
3105             zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
3106                 error = ENOTSUP;
3107                 goto out;
3108         }
3109 
3110         error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL,
3111             nvprops, recursive, B_FALSE, -1);
3112 
3113 out:
3114         nvlist_free(nvprops);
3115         return (error);
3116 }
3117 
3118 int
3119 zfs_unmount_snap(const char *name, void *arg)
3120 {
3121         vfs_t *vfsp = NULL;
3122 
3123         if (arg) {
3124                 char *snapname = arg;
3125                 char *fullname = kmem_asprintf("%s@%s", name, snapname);
3126                 vfsp = zfs_get_vfs(fullname);
3127                 strfree(fullname);
3128         } else if (strchr(name, '@')) {
3129                 vfsp = zfs_get_vfs(name);
3130         }
3131 
3132         if (vfsp) {
3133                 /*
3134                  * Always force the unmount for snapshots.
3135                  */
3136                 int flag = MS_FORCE;
3137                 int err;
3138 
3139                 if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
3140                         VFS_RELE(vfsp);
3141                         return (err);
3142                 }
3143                 VFS_RELE(vfsp);
3144                 if ((err = dounmount(vfsp, flag, kcred)) != 0)
3145                         return (err);
3146         }
3147         return (0);
3148 }
3149 
3150 /*
3151  * inputs:
3152  * zc_name              name of filesystem, snaps must be under it
3153  * zc_nvlist_src[_size] full names of snapshots to destroy
3154  * zc_defer_destroy     mark for deferred destroy
3155  *
3156  * outputs:
3157  * zc_name              on failure, name of failed snapshot
3158  */
3159 static int
3160 zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc)
3161 {
3162         int err, len;
3163         nvlist_t *nvl;
3164         nvpair_t *pair;
3165 
3166         if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3167             zc->zc_iflags, &nvl)) != 0)
3168                 return (err);
3169 
3170         len = strlen(zc->zc_name);
3171         for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
3172             pair = nvlist_next_nvpair(nvl, pair)) {
3173                 const char *name = nvpair_name(pair);
3174                 /*
3175                  * The snap name must be underneath the zc_name.  This ensures
3176                  * that our permission checks were legitimate.
3177                  */
3178                 if (strncmp(zc->zc_name, name, len) != 0 ||
3179                     (name[len] != '@' && name[len] != '/')) {
3180                         nvlist_free(nvl);
3181                         return (EINVAL);
3182                 }
3183 
3184                 (void) zfs_unmount_snap(name, NULL);
3185         }
3186 
3187         err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy,
3188             zc->zc_name);
3189         nvlist_free(nvl);
3190         return (err);
3191 }
3192 
3193 /*
3194  * inputs:
3195  * zc_name              name of dataset to destroy
3196  * zc_objset_type       type of objset
3197  * zc_defer_destroy     mark for deferred destroy
3198  *
3199  * outputs:             none
3200  */
3201 static int
3202 zfs_ioc_destroy(zfs_cmd_t *zc)
3203 {
3204         int err;
3205         if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
3206                 err = zfs_unmount_snap(zc->zc_name, NULL);
3207                 if (err)
3208                         return (err);
3209         }
3210 
3211         err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
3212         if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3213                 (void) zvol_remove_minor(zc->zc_name);
3214         return (err);
3215 }
3216 
3217 /*
3218  * inputs:
3219  * zc_name      name of dataset to rollback (to most recent snapshot)
3220  *
3221  * outputs:     none
3222  */
3223 static int
3224 zfs_ioc_rollback(zfs_cmd_t *zc)
3225 {
3226         dsl_dataset_t *ds, *clone;
3227         int error;
3228         zfsvfs_t *zfsvfs;
3229         char *clone_name;
3230 
3231         error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
3232         if (error)
3233                 return (error);
3234 
3235         /* must not be a snapshot */
3236         if (dsl_dataset_is_snapshot(ds)) {
3237                 dsl_dataset_rele(ds, FTAG);
3238                 return (EINVAL);
3239         }
3240 
3241         /* must have a most recent snapshot */
3242         if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
3243                 dsl_dataset_rele(ds, FTAG);
3244                 return (EINVAL);
3245         }
3246 
3247         /*
3248          * Create clone of most recent snapshot.
3249          */
3250         clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
3251         error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
3252         if (error)
3253                 goto out;
3254 
3255         error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3256         if (error)
3257                 goto out;
3258 
3259         /*
3260          * Do clone swap.
3261          */
3262         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3263                 error = zfs_suspend_fs(zfsvfs);
3264                 if (error == 0) {
3265                         int resume_err;
3266 
3267                         if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3268                                 error = dsl_dataset_clone_swap(clone, ds,
3269                                     B_TRUE);
3270                                 dsl_dataset_disown(ds, FTAG);
3271                                 ds = NULL;
3272                         } else {
3273                                 error = EBUSY;
3274                         }
3275                         resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3276                         error = error ? error : resume_err;
3277                 }
3278                 VFS_RELE(zfsvfs->z_vfs);
3279         } else {
3280                 if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3281                         error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3282                         dsl_dataset_disown(ds, FTAG);
3283                         ds = NULL;
3284                 } else {
3285                         error = EBUSY;
3286                 }
3287         }
3288 
3289         /*
3290          * Destroy clone (which also closes it).
3291          */
3292         (void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3293 
3294 out:
3295         strfree(clone_name);
3296         if (ds)
3297                 dsl_dataset_rele(ds, FTAG);
3298         return (error);
3299 }
3300 
3301 /*
3302  * inputs:
3303  * zc_name      old name of dataset
3304  * zc_value     new name of dataset
3305  * zc_cookie    recursive flag (only valid for snapshots)
3306  *
3307  * outputs:     none
3308  */
3309 static int
3310 zfs_ioc_rename(zfs_cmd_t *zc)
3311 {
3312         boolean_t recursive = zc->zc_cookie & 1;
3313 
3314         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3315         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3316             strchr(zc->zc_value, '%'))
3317                 return (EINVAL);
3318 
3319         /*
3320          * Unmount snapshot unless we're doing a recursive rename,
3321          * in which case the dataset code figures out which snapshots
3322          * to unmount.
3323          */
3324         if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3325             zc->zc_objset_type == DMU_OST_ZFS) {
3326                 int err = zfs_unmount_snap(zc->zc_name, NULL);
3327                 if (err)
3328                         return (err);
3329         }
3330         if (zc->zc_objset_type == DMU_OST_ZVOL)
3331                 (void) zvol_remove_minor(zc->zc_name);
3332         return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3333 }
3334 
3335 static int
3336 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3337 {
3338         const char *propname = nvpair_name(pair);
3339         boolean_t issnap = (strchr(dsname, '@') != NULL);
3340         zfs_prop_t prop = zfs_name_to_prop(propname);
3341         uint64_t intval;
3342         int err;
3343 
3344         if (prop == ZPROP_INVAL) {
3345                 if (zfs_prop_user(propname)) {
3346                         if (err = zfs_secpolicy_write_perms(dsname,
3347                             ZFS_DELEG_PERM_USERPROP, cr))
3348                                 return (err);
3349                         return (0);
3350                 }
3351 
3352                 if (!issnap && zfs_prop_userquota(propname)) {
3353                         const char *perm = NULL;
3354                         const char *uq_prefix =
3355                             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3356                         const char *gq_prefix =
3357                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3358 
3359                         if (strncmp(propname, uq_prefix,
3360                             strlen(uq_prefix)) == 0) {
3361                                 perm = ZFS_DELEG_PERM_USERQUOTA;
3362                         } else if (strncmp(propname, gq_prefix,
3363                             strlen(gq_prefix)) == 0) {
3364                                 perm = ZFS_DELEG_PERM_GROUPQUOTA;
3365                         } else {
3366                                 /* USERUSED and GROUPUSED are read-only */
3367                                 return (EINVAL);
3368                         }
3369 
3370                         if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3371                                 return (err);
3372                         return (0);
3373                 }
3374 
3375                 return (EINVAL);
3376         }
3377 
3378         if (issnap)
3379                 return (EINVAL);
3380 
3381         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3382                 /*
3383                  * dsl_prop_get_all_impl() returns properties in this
3384                  * format.
3385                  */
3386                 nvlist_t *attrs;
3387                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3388                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3389                     &pair) == 0);
3390         }
3391 
3392         /*
3393          * Check that this value is valid for this pool version
3394          */
3395         switch (prop) {
3396         case ZFS_PROP_COMPRESSION:
3397                 /*
3398                  * If the user specified gzip compression, make sure
3399                  * the SPA supports it. We ignore any errors here since
3400                  * we'll catch them later.
3401                  */
3402                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3403                     nvpair_value_uint64(pair, &intval) == 0) {
3404                         if (intval >= ZIO_COMPRESS_GZIP_1 &&
3405                             intval <= ZIO_COMPRESS_GZIP_9 &&
3406                             zfs_earlier_version(dsname,
3407                             SPA_VERSION_GZIP_COMPRESSION)) {
3408                                 return (ENOTSUP);
3409                         }
3410 
3411                         if (intval == ZIO_COMPRESS_ZLE &&
3412                             zfs_earlier_version(dsname,
3413                             SPA_VERSION_ZLE_COMPRESSION))
3414                                 return (ENOTSUP);
3415 
3416                         /*
3417                          * If this is a bootable dataset then
3418                          * verify that the compression algorithm
3419                          * is supported for booting. We must return
3420                          * something other than ENOTSUP since it
3421                          * implies a downrev pool version.
3422                          */
3423                         if (zfs_is_bootfs(dsname) &&
3424                             !BOOTFS_COMPRESS_VALID(intval)) {
3425                                 return (ERANGE);
3426                         }
3427                 }
3428                 break;
3429 
3430         case ZFS_PROP_COPIES:
3431                 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3432                         return (ENOTSUP);
3433                 break;
3434 
3435         case ZFS_PROP_DEDUP:
3436                 if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3437                         return (ENOTSUP);
3438                 break;
3439 
3440         case ZFS_PROP_SHARESMB:
3441                 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3442                         return (ENOTSUP);
3443                 break;
3444 
3445         case ZFS_PROP_ACLINHERIT:
3446                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3447                     nvpair_value_uint64(pair, &intval) == 0) {
3448                         if (intval == ZFS_ACL_PASSTHROUGH_X &&
3449                             zfs_earlier_version(dsname,
3450                             SPA_VERSION_PASSTHROUGH_X))
3451                                 return (ENOTSUP);
3452                 }
3453                 break;
3454         }
3455 
3456         return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3457 }
3458 
3459 /*
3460  * Removes properties from the given props list that fail permission checks
3461  * needed to clear them and to restore them in case of a receive error. For each
3462  * property, make sure we have both set and inherit permissions.
3463  *
3464  * Returns the first error encountered if any permission checks fail. If the
3465  * caller provides a non-NULL errlist, it also gives the complete list of names
3466  * of all the properties that failed a permission check along with the
3467  * corresponding error numbers. The caller is responsible for freeing the
3468  * returned errlist.
3469  *
3470  * If every property checks out successfully, zero is returned and the list
3471  * pointed at by errlist is NULL.
3472  */
3473 static int
3474 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3475 {
3476         zfs_cmd_t *zc;
3477         nvpair_t *pair, *next_pair;
3478         nvlist_t *errors;
3479         int err, rv = 0;
3480 
3481         if (props == NULL)
3482                 return (0);
3483 
3484         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3485 
3486         zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3487         (void) strcpy(zc->zc_name, dataset);
3488         pair = nvlist_next_nvpair(props, NULL);
3489         while (pair != NULL) {
3490                 next_pair = nvlist_next_nvpair(props, pair);
3491 
3492                 (void) strcpy(zc->zc_value, nvpair_name(pair));
3493                 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3494                     (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3495                         VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3496                         VERIFY(nvlist_add_int32(errors,
3497                             zc->zc_value, err) == 0);
3498                 }
3499                 pair = next_pair;
3500         }
3501         kmem_free(zc, sizeof (zfs_cmd_t));
3502 
3503         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3504                 nvlist_free(errors);
3505                 errors = NULL;
3506         } else {
3507                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
3508         }
3509 
3510         if (errlist == NULL)
3511                 nvlist_free(errors);
3512         else
3513                 *errlist = errors;
3514 
3515         return (rv);
3516 }
3517 
3518 static boolean_t
3519 propval_equals(nvpair_t *p1, nvpair_t *p2)
3520 {
3521         if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3522                 /* dsl_prop_get_all_impl() format */
3523                 nvlist_t *attrs;
3524                 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3525                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3526                     &p1) == 0);
3527         }
3528 
3529         if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3530                 nvlist_t *attrs;
3531                 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3532                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3533                     &p2) == 0);
3534         }
3535 
3536         if (nvpair_type(p1) != nvpair_type(p2))
3537                 return (B_FALSE);
3538 
3539         if (nvpair_type(p1) == DATA_TYPE_STRING) {
3540                 char *valstr1, *valstr2;
3541 
3542                 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3543                 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3544                 return (strcmp(valstr1, valstr2) == 0);
3545         } else {
3546                 uint64_t intval1, intval2;
3547 
3548                 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3549                 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3550                 return (intval1 == intval2);
3551         }
3552 }
3553 
3554 /*
3555  * Remove properties from props if they are not going to change (as determined
3556  * by comparison with origprops). Remove them from origprops as well, since we
3557  * do not need to clear or restore properties that won't change.
3558  */
3559 static void
3560 props_reduce(nvlist_t *props, nvlist_t *origprops)
3561 {
3562         nvpair_t *pair, *next_pair;
3563 
3564         if (origprops == NULL)
3565                 return; /* all props need to be received */
3566 
3567         pair = nvlist_next_nvpair(props, NULL);
3568         while (pair != NULL) {
3569                 const char *propname = nvpair_name(pair);
3570                 nvpair_t *match;
3571 
3572                 next_pair = nvlist_next_nvpair(props, pair);
3573 
3574                 if ((nvlist_lookup_nvpair(origprops, propname,
3575                     &match) != 0) || !propval_equals(pair, match))
3576                         goto next; /* need to set received value */
3577 
3578                 /* don't clear the existing received value */
3579                 (void) nvlist_remove_nvpair(origprops, match);
3580                 /* don't bother receiving the property */
3581                 (void) nvlist_remove_nvpair(props, pair);
3582 next:
3583                 pair = next_pair;
3584         }
3585 }
3586 
3587 #ifdef  DEBUG
3588 static boolean_t zfs_ioc_recv_inject_err;
3589 #endif
3590 
3591 /*
3592  * inputs:
3593  * zc_name              name of containing filesystem
3594  * zc_nvlist_src{_size} nvlist of properties to apply
3595  * zc_value             name of snapshot to create
3596  * zc_string            name of clone origin (if DRR_FLAG_CLONE)
3597  * zc_cookie            file descriptor to recv from
3598  * zc_begin_record      the BEGIN record of the stream (not byteswapped)
3599  * zc_guid              force flag
3600  * zc_cleanup_fd        cleanup-on-exit file descriptor
3601  * zc_action_handle     handle for this guid/ds mapping (or zero on first call)
3602  *
3603  * outputs:
3604  * zc_cookie            number of bytes read
3605  * zc_nvlist_dst{_size} error for each unapplied received property
3606  * zc_obj               zprop_errflags_t
3607  * zc_action_handle     handle for this guid/ds mapping
3608  */
3609 static int
3610 zfs_ioc_recv(zfs_cmd_t *zc)
3611 {
3612         file_t *fp;
3613         objset_t *os;
3614         dmu_recv_cookie_t drc;
3615         boolean_t force = (boolean_t)zc->zc_guid;
3616         int fd;
3617         int error = 0;
3618         int props_error = 0;
3619         nvlist_t *errors;
3620         offset_t off;
3621         nvlist_t *props = NULL; /* sent properties */
3622         nvlist_t *origprops = NULL; /* existing properties */
3623         objset_t *origin = NULL;
3624         char *tosnap;
3625         char tofs[ZFS_MAXNAMELEN];
3626         boolean_t first_recvd_props = B_FALSE;
3627 
3628         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3629             strchr(zc->zc_value, '@') == NULL ||
3630             strchr(zc->zc_value, '%'))
3631                 return (EINVAL);
3632 
3633         (void) strcpy(tofs, zc->zc_value);
3634         tosnap = strchr(tofs, '@');
3635         *tosnap++ = '\0';
3636 
3637         if (zc->zc_nvlist_src != NULL &&
3638             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3639             zc->zc_iflags, &props)) != 0)
3640                 return (error);
3641 
3642         fd = zc->zc_cookie;
3643         fp = getf(fd);
3644         if (fp == NULL) {
3645                 nvlist_free(props);
3646                 return (EBADF);
3647         }
3648 
3649         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3650 
3651         if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3652                 if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3653                     !dsl_prop_get_hasrecvd(os)) {
3654                         first_recvd_props = B_TRUE;
3655                 }
3656 
3657                 /*
3658                  * If new received properties are supplied, they are to
3659                  * completely replace the existing received properties, so stash
3660                  * away the existing ones.
3661                  */
3662                 if (dsl_prop_get_received(os, &origprops) == 0) {
3663                         nvlist_t *errlist = NULL;
3664                         /*
3665                          * Don't bother writing a property if its value won't
3666                          * change (and avoid the unnecessary security checks).
3667                          *
3668                          * The first receive after SPA_VERSION_RECVD_PROPS is a
3669                          * special case where we blow away all local properties
3670                          * regardless.
3671                          */
3672                         if (!first_recvd_props)
3673                                 props_reduce(props, origprops);
3674                         if (zfs_check_clearable(tofs, origprops,
3675                             &errlist) != 0)
3676                                 (void) nvlist_merge(errors, errlist, 0);
3677                         nvlist_free(errlist);
3678                 }
3679 
3680                 dmu_objset_rele(os, FTAG);
3681         }
3682 
3683         if (zc->zc_string[0]) {
3684                 error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3685                 if (error)
3686                         goto out;
3687         }
3688 
3689         error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3690             &zc->zc_begin_record, force, origin, &drc);
3691         if (origin)
3692                 dmu_objset_rele(origin, FTAG);
3693         if (error)
3694                 goto out;
3695 
3696         /*
3697          * Set properties before we receive the stream so that they are applied
3698          * to the new data. Note that we must call dmu_recv_stream() if
3699          * dmu_recv_begin() succeeds.
3700          */
3701         if (props) {
3702                 nvlist_t *errlist;
3703 
3704                 if (dmu_objset_from_ds(drc.drc_logical_ds, &os) == 0) {
3705                         if (drc.drc_newfs) {
3706                                 if (spa_version(os->os_spa) >=
3707                                     SPA_VERSION_RECVD_PROPS)
3708                                         first_recvd_props = B_TRUE;
3709                         } else if (origprops != NULL) {
3710                                 if (clear_received_props(os, tofs, origprops,
3711                                     first_recvd_props ? NULL : props) != 0)
3712                                         zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3713                         } else {
3714                                 zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3715                         }
3716                         dsl_prop_set_hasrecvd(os);
3717                 } else if (!drc.drc_newfs) {
3718                         zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3719                 }
3720 
3721                 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3722                     props, &errlist);
3723                 (void) nvlist_merge(errors, errlist, 0);
3724                 nvlist_free(errlist);
3725         }
3726 
3727         if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3728                 /*
3729                  * Caller made zc->zc_nvlist_dst less than the minimum expected
3730                  * size or supplied an invalid address.
3731                  */
3732                 props_error = EINVAL;
3733         }
3734 
3735         off = fp->f_offset;
3736         error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
3737             &zc->zc_action_handle);
3738 
3739         if (error == 0) {
3740                 zfsvfs_t *zfsvfs = NULL;
3741 
3742                 if (getzfsvfs(tofs, &zfsvfs) == 0) {
3743                         /* online recv */
3744                         int end_err;
3745 
3746                         error = zfs_suspend_fs(zfsvfs);
3747                         /*
3748                          * If the suspend fails, then the recv_end will
3749                          * likely also fail, and clean up after itself.
3750                          */
3751                         end_err = dmu_recv_end(&drc);
3752                         if (error == 0)
3753                                 error = zfs_resume_fs(zfsvfs, tofs);
3754                         error = error ? error : end_err;
3755                         VFS_RELE(zfsvfs->z_vfs);
3756                 } else {
3757                         error = dmu_recv_end(&drc);
3758                 }
3759         }
3760 
3761         zc->zc_cookie = off - fp->f_offset;
3762         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3763                 fp->f_offset = off;
3764 
3765 #ifdef  DEBUG
3766         if (zfs_ioc_recv_inject_err) {
3767                 zfs_ioc_recv_inject_err = B_FALSE;
3768                 error = 1;
3769         }
3770 #endif
3771         /*
3772          * On error, restore the original props.
3773          */
3774         if (error && props) {
3775                 if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3776                         if (clear_received_props(os, tofs, props, NULL) != 0) {
3777                                 /*
3778                                  * We failed to clear the received properties.
3779                                  * Since we may have left a $recvd value on the
3780                                  * system, we can't clear the $hasrecvd flag.
3781                                  */
3782                                 zc->zc_obj |= ZPROP_ERR_NORESTORE;
3783                         } else if (first_recvd_props) {
3784                                 dsl_prop_unset_hasrecvd(os);
3785                         }
3786                         dmu_objset_rele(os, FTAG);
3787                 } else if (!drc.drc_newfs) {
3788                         /* We failed to clear the received properties. */
3789                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
3790                 }
3791 
3792                 if (origprops == NULL && !drc.drc_newfs) {
3793                         /* We failed to stash the original properties. */
3794                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
3795                 }
3796 
3797                 /*
3798                  * dsl_props_set() will not convert RECEIVED to LOCAL on or
3799                  * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3800                  * explictly if we're restoring local properties cleared in the
3801                  * first new-style receive.
3802                  */
3803                 if (origprops != NULL &&
3804                     zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3805                     ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3806                     origprops, NULL) != 0) {
3807                         /*
3808                          * We stashed the original properties but failed to
3809                          * restore them.
3810                          */
3811                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
3812                 }
3813         }
3814 out:
3815         nvlist_free(props);
3816         nvlist_free(origprops);
3817         nvlist_free(errors);
3818         releasef(fd);
3819 
3820         if (error == 0)
3821                 error = props_error;
3822 
3823         return (error);
3824 }
3825 
3826 /*
3827  * inputs:
3828  * zc_name      name of snapshot to send
3829  * zc_cookie    file descriptor to send stream to
3830  * zc_obj       fromorigin flag (mutually exclusive with zc_fromobj)
3831  * zc_sendobj   objsetid of snapshot to send
3832  * zc_fromobj   objsetid of incremental fromsnap (may be zero)
3833  * zc_guid      if set, estimate size of stream only.  zc_cookie is ignored.
3834  *              output size in zc_objset_type.
3835  *
3836  * outputs: none
3837  */
3838 static int
3839 zfs_ioc_send(zfs_cmd_t *zc)
3840 {
3841         objset_t *fromsnap = NULL;
3842         objset_t *tosnap;
3843         int error;
3844         offset_t off;
3845         dsl_dataset_t *ds;
3846         dsl_dataset_t *dsfrom = NULL;
3847         spa_t *spa;
3848         dsl_pool_t *dp;
3849         boolean_t estimate = (zc->zc_guid != 0);
3850 
3851         error = spa_open(zc->zc_name, &spa, FTAG);
3852         if (error)
3853                 return (error);
3854 
3855         dp = spa_get_dsl(spa);
3856         rw_enter(&dp->dp_config_rwlock, RW_READER);
3857         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
3858         rw_exit(&dp->dp_config_rwlock);
3859         if (error) {
3860                 spa_close(spa, FTAG);
3861                 return (error);
3862         }
3863 
3864         error = dmu_objset_from_ds(ds, &tosnap);
3865         if (error) {
3866                 dsl_dataset_rele(ds, FTAG);
3867                 spa_close(spa, FTAG);
3868                 return (error);
3869         }
3870 
3871         if (zc->zc_fromobj != 0) {
3872                 rw_enter(&dp->dp_config_rwlock, RW_READER);
3873                 error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
3874                 rw_exit(&dp->dp_config_rwlock);
3875                 spa_close(spa, FTAG);
3876                 if (error) {
3877                         dsl_dataset_rele(ds, FTAG);
3878                         return (error);
3879                 }
3880                 error = dmu_objset_from_ds(dsfrom, &fromsnap);
3881                 if (error) {
3882                         dsl_dataset_rele(dsfrom, FTAG);
3883                         dsl_dataset_rele(ds, FTAG);
3884                         return (error);
3885                 }
3886         } else {
3887                 spa_close(spa, FTAG);
3888         }
3889 
3890         if (estimate) {
3891                 error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
3892                     &zc->zc_objset_type);
3893         } else {
3894                 file_t *fp = getf(zc->zc_cookie);
3895                 if (fp == NULL) {
3896                         dsl_dataset_rele(ds, FTAG);
3897                         if (dsfrom)
3898                                 dsl_dataset_rele(dsfrom, FTAG);
3899                         return (EBADF);
3900                 }
3901 
3902                 off = fp->f_offset;
3903                 error = dmu_send(tosnap, fromsnap, zc->zc_obj,
3904                     zc->zc_cookie, fp->f_vnode, &off);
3905 
3906                 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3907                         fp->f_offset = off;
3908                 releasef(zc->zc_cookie);
3909         }
3910         if (dsfrom)
3911                 dsl_dataset_rele(dsfrom, FTAG);
3912         dsl_dataset_rele(ds, FTAG);
3913         return (error);
3914 }
3915 
3916 /*
3917  * inputs:
3918  * zc_name      name of snapshot on which to report progress
3919  * zc_cookie    file descriptor of send stream
3920  *
3921  * outputs:
3922  * zc_cookie    number of bytes written in send stream thus far
3923  */
3924 static int
3925 zfs_ioc_send_progress(zfs_cmd_t *zc)
3926 {
3927         dsl_dataset_t *ds;
3928         dmu_sendarg_t *dsp = NULL;
3929         int error;
3930 
3931         if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
3932                 return (error);
3933 
3934         mutex_enter(&ds->ds_sendstream_lock);
3935 
3936         /*
3937          * Iterate over all the send streams currently active on this dataset.
3938          * If there's one which matches the specified file descriptor _and_ the
3939          * stream was started by the current process, return the progress of
3940          * that stream.
3941          */
3942         for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
3943             dsp = list_next(&ds->ds_sendstreams, dsp)) {
3944                 if (dsp->dsa_outfd == zc->zc_cookie &&
3945                     dsp->dsa_proc == curproc)
3946                         break;
3947         }
3948 
3949         if (dsp != NULL)
3950                 zc->zc_cookie = *(dsp->dsa_off);
3951         else
3952                 error = ENOENT;
3953 
3954         mutex_exit(&ds->ds_sendstream_lock);
3955         dsl_dataset_rele(ds, FTAG);
3956         return (error);
3957 }
3958 
3959 static int
3960 zfs_ioc_inject_fault(zfs_cmd_t *zc)
3961 {
3962         int id, error;
3963 
3964         error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3965             &zc->zc_inject_record);
3966 
3967         if (error == 0)
3968                 zc->zc_guid = (uint64_t)id;
3969 
3970         return (error);
3971 }
3972 
3973 static int
3974 zfs_ioc_clear_fault(zfs_cmd_t *zc)
3975 {
3976         return (zio_clear_fault((int)zc->zc_guid));
3977 }
3978 
3979 static int
3980 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
3981 {
3982         int id = (int)zc->zc_guid;
3983         int error;
3984 
3985         error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
3986             &zc->zc_inject_record);
3987 
3988         zc->zc_guid = id;
3989 
3990         return (error);
3991 }
3992 
3993 static int
3994 zfs_ioc_error_log(zfs_cmd_t *zc)
3995 {
3996         spa_t *spa;
3997         int error;
3998         size_t count = (size_t)zc->zc_nvlist_dst_size;
3999 
4000         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4001                 return (error);
4002 
4003         error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4004             &count);
4005         if (error == 0)
4006                 zc->zc_nvlist_dst_size = count;
4007         else
4008                 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4009 
4010         spa_close(spa, FTAG);
4011 
4012         return (error);
4013 }
4014 
4015 static int
4016 zfs_ioc_clear(zfs_cmd_t *zc)
4017 {
4018         spa_t *spa;
4019         vdev_t *vd;
4020         int error;
4021 
4022         /*
4023          * On zpool clear we also fix up missing slogs
4024          */
4025         mutex_enter(&spa_namespace_lock);
4026         spa = spa_lookup(zc->zc_name);
4027         if (spa == NULL) {
4028                 mutex_exit(&spa_namespace_lock);
4029                 return (EIO);
4030         }
4031         if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4032                 /* we need to let spa_open/spa_load clear the chains */
4033                 spa_set_log_state(spa, SPA_LOG_CLEAR);
4034         }
4035         spa->spa_last_open_failed = 0;
4036         mutex_exit(&spa_namespace_lock);
4037 
4038         if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4039                 error = spa_open(zc->zc_name, &spa, FTAG);
4040         } else {
4041                 nvlist_t *policy;
4042                 nvlist_t *config = NULL;
4043 
4044                 if (zc->zc_nvlist_src == NULL)
4045                         return (EINVAL);
4046 
4047                 if ((error = get_nvlist(zc->zc_nvlist_src,
4048                     zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4049                         error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4050                             policy, &config);
4051                         if (config != NULL) {
4052                                 int err;
4053 
4054                                 if ((err = put_nvlist(zc, config)) != 0)
4055                                         error = err;
4056                                 nvlist_free(config);
4057                         }
4058                         nvlist_free(policy);
4059                 }
4060         }
4061 
4062         if (error)
4063                 return (error);
4064 
4065         spa_vdev_state_enter(spa, SCL_NONE);
4066 
4067         if (zc->zc_guid == 0) {
4068                 vd = NULL;
4069         } else {
4070                 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4071                 if (vd == NULL) {
4072                         (void) spa_vdev_state_exit(spa, NULL, ENODEV);
4073                         spa_close(spa, FTAG);
4074                         return (ENODEV);
4075                 }
4076         }
4077 
4078         vdev_clear(spa, vd);
4079 
4080         (void) spa_vdev_state_exit(spa, NULL, 0);
4081 
4082         /*
4083          * Resume any suspended I/Os.
4084          */
4085         if (zio_resume(spa) != 0)
4086                 error = EIO;
4087 
4088         spa_close(spa, FTAG);
4089 
4090         return (error);
4091 }
4092 
4093 static int
4094 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4095 {
4096         spa_t *spa;
4097         int error;
4098 
4099         error = spa_open(zc->zc_name, &spa, FTAG);
4100         if (error)
4101                 return (error);
4102 
4103         spa_vdev_state_enter(spa, SCL_NONE);
4104         vdev_reopen(spa->spa_root_vdev);
4105         (void) spa_vdev_state_exit(spa, NULL, 0);
4106         spa_close(spa, FTAG);
4107         return (0);
4108 }
4109 /*
4110  * inputs:
4111  * zc_name      name of filesystem
4112  * zc_value     name of origin snapshot
4113  *
4114  * outputs:
4115  * zc_string    name of conflicting snapshot, if there is one
4116  */
4117 static int
4118 zfs_ioc_promote(zfs_cmd_t *zc)
4119 {
4120         char *cp;
4121 
4122         /*
4123          * We don't need to unmount *all* the origin fs's snapshots, but
4124          * it's easier.
4125          */
4126         cp = strchr(zc->zc_value, '@');
4127         if (cp)
4128                 *cp = '\0';
4129         (void) dmu_objset_find(zc->zc_value,
4130             zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
4131         return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4132 }
4133 
4134 /*
4135  * Retrieve a single {user|group}{used|quota}@... property.
4136  *
4137  * inputs:
4138  * zc_name      name of filesystem
4139  * zc_objset_type zfs_userquota_prop_t
4140  * zc_value     domain name (eg. "S-1-234-567-89")
4141  * zc_guid      RID/UID/GID
4142  *
4143  * outputs:
4144  * zc_cookie    property value
4145  */
4146 static int
4147 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4148 {
4149         zfsvfs_t *zfsvfs;
4150         int error;
4151 
4152         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4153                 return (EINVAL);
4154 
4155         error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4156         if (error)
4157                 return (error);
4158 
4159         error = zfs_userspace_one(zfsvfs,
4160             zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4161         zfsvfs_rele(zfsvfs, FTAG);
4162 
4163         return (error);
4164 }
4165 
4166 /*
4167  * inputs:
4168  * zc_name              name of filesystem
4169  * zc_cookie            zap cursor
4170  * zc_objset_type       zfs_userquota_prop_t
4171  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4172  *
4173  * outputs:
4174  * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
4175  * zc_cookie    zap cursor
4176  */
4177 static int
4178 zfs_ioc_userspace_many(zfs_cmd_t *zc)
4179 {
4180         zfsvfs_t *zfsvfs;
4181         int bufsize = zc->zc_nvlist_dst_size;
4182 
4183         if (bufsize <= 0)
4184                 return (ENOMEM);
4185 
4186         int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4187         if (error)
4188                 return (error);
4189 
4190         void *buf = kmem_alloc(bufsize, KM_SLEEP);
4191 
4192         error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4193             buf, &zc->zc_nvlist_dst_size);
4194 
4195         if (error == 0) {
4196                 error = xcopyout(buf,
4197                     (void *)(uintptr_t)zc->zc_nvlist_dst,
4198                     zc->zc_nvlist_dst_size);
4199         }
4200         kmem_free(buf, bufsize);
4201         zfsvfs_rele(zfsvfs, FTAG);
4202 
4203         return (error);
4204 }
4205 
4206 /*
4207  * inputs:
4208  * zc_name              name of filesystem
4209  *
4210  * outputs:
4211  * none
4212  */
4213 static int
4214 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4215 {
4216         objset_t *os;
4217         int error = 0;
4218         zfsvfs_t *zfsvfs;
4219 
4220         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4221                 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4222                         /*
4223                          * If userused is not enabled, it may be because the
4224                          * objset needs to be closed & reopened (to grow the
4225                          * objset_phys_t).  Suspend/resume the fs will do that.
4226                          */
4227                         error = zfs_suspend_fs(zfsvfs);
4228                         if (error == 0)
4229                                 error = zfs_resume_fs(zfsvfs, zc->zc_name);
4230                 }
4231                 if (error == 0)
4232                         error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4233                 VFS_RELE(zfsvfs->z_vfs);
4234         } else {
4235                 /* XXX kind of reading contents without owning */
4236                 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4237                 if (error)
4238                         return (error);
4239 
4240                 error = dmu_objset_userspace_upgrade(os);
4241                 dmu_objset_rele(os, FTAG);
4242         }
4243 
4244         return (error);
4245 }
4246 
4247 /*
4248  * We don't want to have a hard dependency
4249  * against some special symbols in sharefs
4250  * nfs, and smbsrv.  Determine them if needed when
4251  * the first file system is shared.
4252  * Neither sharefs, nfs or smbsrv are unloadable modules.
4253  */
4254 int (*znfsexport_fs)(void *arg);
4255 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4256 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4257 
4258 int zfs_nfsshare_inited;
4259 int zfs_smbshare_inited;
4260 
4261 ddi_modhandle_t nfs_mod;
4262 ddi_modhandle_t sharefs_mod;
4263 ddi_modhandle_t smbsrv_mod;
4264 kmutex_t zfs_share_lock;
4265 
4266 static int
4267 zfs_init_sharefs()
4268 {
4269         int error;
4270 
4271         ASSERT(MUTEX_HELD(&zfs_share_lock));
4272         /* Both NFS and SMB shares also require sharetab support. */
4273         if (sharefs_mod == NULL && ((sharefs_mod =
4274             ddi_modopen("fs/sharefs",
4275             KRTLD_MODE_FIRST, &error)) == NULL)) {
4276                 return (ENOSYS);
4277         }
4278         if (zshare_fs == NULL && ((zshare_fs =
4279             (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4280             ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4281                 return (ENOSYS);
4282         }
4283         return (0);
4284 }
4285 
4286 static int
4287 zfs_ioc_share(zfs_cmd_t *zc)
4288 {
4289         int error;
4290         int opcode;
4291 
4292         switch (zc->zc_share.z_sharetype) {
4293         case ZFS_SHARE_NFS:
4294         case ZFS_UNSHARE_NFS:
4295                 if (zfs_nfsshare_inited == 0) {
4296                         mutex_enter(&zfs_share_lock);
4297                         if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4298                             KRTLD_MODE_FIRST, &error)) == NULL)) {
4299                                 mutex_exit(&zfs_share_lock);
4300                                 return (ENOSYS);
4301                         }
4302                         if (znfsexport_fs == NULL &&
4303                             ((znfsexport_fs = (int (*)(void *))
4304                             ddi_modsym(nfs_mod,
4305                             "nfs_export", &error)) == NULL)) {
4306                                 mutex_exit(&zfs_share_lock);
4307                                 return (ENOSYS);
4308                         }
4309                         error = zfs_init_sharefs();
4310                         if (error) {
4311                                 mutex_exit(&zfs_share_lock);
4312                                 return (ENOSYS);
4313                         }
4314                         zfs_nfsshare_inited = 1;
4315                         mutex_exit(&zfs_share_lock);
4316                 }
4317                 break;
4318         case ZFS_SHARE_SMB:
4319         case ZFS_UNSHARE_SMB:
4320                 if (zfs_smbshare_inited == 0) {
4321                         mutex_enter(&zfs_share_lock);
4322                         if (smbsrv_mod == NULL && ((smbsrv_mod =
4323                             ddi_modopen("drv/smbsrv",
4324                             KRTLD_MODE_FIRST, &error)) == NULL)) {
4325                                 mutex_exit(&zfs_share_lock);
4326                                 return (ENOSYS);
4327                         }
4328                         if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4329                             (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4330                             "smb_server_share", &error)) == NULL)) {
4331                                 mutex_exit(&zfs_share_lock);
4332                                 return (ENOSYS);
4333                         }
4334                         error = zfs_init_sharefs();
4335                         if (error) {
4336                                 mutex_exit(&zfs_share_lock);
4337                                 return (ENOSYS);
4338                         }
4339                         zfs_smbshare_inited = 1;
4340                         mutex_exit(&zfs_share_lock);
4341                 }
4342                 break;
4343         default:
4344                 return (EINVAL);
4345         }
4346 
4347         switch (zc->zc_share.z_sharetype) {
4348         case ZFS_SHARE_NFS:
4349         case ZFS_UNSHARE_NFS:
4350                 if (error =
4351                     znfsexport_fs((void *)
4352                     (uintptr_t)zc->zc_share.z_exportdata))
4353                         return (error);
4354                 break;
4355         case ZFS_SHARE_SMB:
4356         case ZFS_UNSHARE_SMB:
4357                 if (error = zsmbexport_fs((void *)
4358                     (uintptr_t)zc->zc_share.z_exportdata,
4359                     zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4360                     B_TRUE: B_FALSE)) {
4361                         return (error);
4362                 }
4363                 break;
4364         }
4365 
4366         opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4367             zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4368             SHAREFS_ADD : SHAREFS_REMOVE;
4369 
4370         /*
4371          * Add or remove share from sharetab
4372          */
4373         error = zshare_fs(opcode,
4374             (void *)(uintptr_t)zc->zc_share.z_sharedata,
4375             zc->zc_share.z_sharemax);
4376 
4377         return (error);
4378 
4379 }
4380 
4381 ace_t full_access[] = {
4382         {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4383 };
4384 
4385 /*
4386  * inputs:
4387  * zc_name              name of containing filesystem
4388  * zc_obj               object # beyond which we want next in-use object #
4389  *
4390  * outputs:
4391  * zc_obj               next in-use object #
4392  */
4393 static int
4394 zfs_ioc_next_obj(zfs_cmd_t *zc)
4395 {
4396         objset_t *os = NULL;
4397         int error;
4398 
4399         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4400         if (error)
4401                 return (error);
4402 
4403         error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
4404             os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
4405 
4406         dmu_objset_rele(os, FTAG);
4407         return (error);
4408 }
4409 
4410 /*
4411  * inputs:
4412  * zc_name              name of filesystem
4413  * zc_value             prefix name for snapshot
4414  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
4415  *
4416  * outputs:
4417  */
4418 static int
4419 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
4420 {
4421         char *snap_name;
4422         int error;
4423 
4424         snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
4425             (u_longlong_t)ddi_get_lbolt64());
4426 
4427         if (strlen(snap_name) >= MAXNAMELEN) {
4428                 strfree(snap_name);
4429                 return (E2BIG);
4430         }
4431 
4432         error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name,
4433             NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd);
4434         if (error != 0) {
4435                 strfree(snap_name);
4436                 return (error);
4437         }
4438 
4439         (void) strcpy(zc->zc_value, snap_name);
4440         strfree(snap_name);
4441         return (0);
4442 }
4443 
4444 /*
4445  * inputs:
4446  * zc_name              name of "to" snapshot
4447  * zc_value             name of "from" snapshot
4448  * zc_cookie            file descriptor to write diff data on
4449  *
4450  * outputs:
4451  * dmu_diff_record_t's to the file descriptor
4452  */
4453 static int
4454 zfs_ioc_diff(zfs_cmd_t *zc)
4455 {
4456         objset_t *fromsnap;
4457         objset_t *tosnap;
4458         file_t *fp;
4459         offset_t off;
4460         int error;
4461 
4462         error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
4463         if (error)
4464                 return (error);
4465 
4466         error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
4467         if (error) {
4468                 dmu_objset_rele(tosnap, FTAG);
4469                 return (error);
4470         }
4471 
4472         fp = getf(zc->zc_cookie);
4473         if (fp == NULL) {
4474                 dmu_objset_rele(fromsnap, FTAG);
4475                 dmu_objset_rele(tosnap, FTAG);
4476                 return (EBADF);
4477         }
4478 
4479         off = fp->f_offset;
4480 
4481         error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off);
4482 
4483         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4484                 fp->f_offset = off;
4485         releasef(zc->zc_cookie);
4486 
4487         dmu_objset_rele(fromsnap, FTAG);
4488         dmu_objset_rele(tosnap, FTAG);
4489         return (error);
4490 }
4491 
4492 /*
4493  * Remove all ACL files in shares dir
4494  */
4495 static int
4496 zfs_smb_acl_purge(znode_t *dzp)
4497 {
4498         zap_cursor_t    zc;
4499         zap_attribute_t zap;
4500         zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4501         int error;
4502 
4503         for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4504             (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4505             zap_cursor_advance(&zc)) {
4506                 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4507                     NULL, 0)) != 0)
4508                         break;
4509         }
4510         zap_cursor_fini(&zc);
4511         return (error);
4512 }
4513 
4514 static int
4515 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4516 {
4517         vnode_t *vp;
4518         znode_t *dzp;
4519         vnode_t *resourcevp = NULL;
4520         znode_t *sharedir;
4521         zfsvfs_t *zfsvfs;
4522         nvlist_t *nvlist;
4523         char *src, *target;
4524         vattr_t vattr;
4525         vsecattr_t vsec;
4526         int error = 0;
4527 
4528         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4529             NO_FOLLOW, NULL, &vp)) != 0)
4530                 return (error);
4531 
4532         /* Now make sure mntpnt and dataset are ZFS */
4533 
4534         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4535             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4536             zc->zc_name) != 0)) {
4537                 VN_RELE(vp);
4538                 return (EINVAL);
4539         }
4540 
4541         dzp = VTOZ(vp);
4542         zfsvfs = dzp->z_zfsvfs;
4543         ZFS_ENTER(zfsvfs);
4544 
4545         /*
4546          * Create share dir if its missing.
4547          */
4548         mutex_enter(&zfsvfs->z_lock);
4549         if (zfsvfs->z_shares_dir == 0) {
4550                 dmu_tx_t *tx;
4551 
4552                 tx = dmu_tx_create(zfsvfs->z_os);
4553                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4554                     ZFS_SHARES_DIR);
4555                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4556                 error = dmu_tx_assign(tx, TXG_WAIT);
4557                 if (error) {
4558                         dmu_tx_abort(tx);
4559                 } else {
4560                         error = zfs_create_share_dir(zfsvfs, tx);
4561                         dmu_tx_commit(tx);
4562                 }
4563                 if (error) {
4564                         mutex_exit(&zfsvfs->z_lock);
4565                         VN_RELE(vp);
4566                         ZFS_EXIT(zfsvfs);
4567                         return (error);
4568                 }
4569         }
4570         mutex_exit(&zfsvfs->z_lock);
4571 
4572         ASSERT(zfsvfs->z_shares_dir);
4573         if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4574                 VN_RELE(vp);
4575                 ZFS_EXIT(zfsvfs);
4576                 return (error);
4577         }
4578 
4579         switch (zc->zc_cookie) {
4580         case ZFS_SMB_ACL_ADD:
4581                 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4582                 vattr.va_type = VREG;
4583                 vattr.va_mode = S_IFREG|0777;
4584                 vattr.va_uid = 0;
4585                 vattr.va_gid = 0;
4586 
4587                 vsec.vsa_mask = VSA_ACE;
4588                 vsec.vsa_aclentp = &full_access;
4589                 vsec.vsa_aclentsz = sizeof (full_access);
4590                 vsec.vsa_aclcnt = 1;
4591 
4592                 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4593                     &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4594                 if (resourcevp)
4595                         VN_RELE(resourcevp);
4596                 break;
4597 
4598         case ZFS_SMB_ACL_REMOVE:
4599                 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4600                     NULL, 0);
4601                 break;
4602 
4603         case ZFS_SMB_ACL_RENAME:
4604                 if ((error = get_nvlist(zc->zc_nvlist_src,
4605                     zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4606                         VN_RELE(vp);
4607                         ZFS_EXIT(zfsvfs);
4608                         return (error);
4609                 }
4610                 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4611                     nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4612                     &target)) {
4613                         VN_RELE(vp);
4614                         VN_RELE(ZTOV(sharedir));
4615                         ZFS_EXIT(zfsvfs);
4616                         nvlist_free(nvlist);
4617                         return (error);
4618                 }
4619                 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4620                     kcred, NULL, 0);
4621                 nvlist_free(nvlist);
4622                 break;
4623 
4624         case ZFS_SMB_ACL_PURGE:
4625                 error = zfs_smb_acl_purge(sharedir);
4626                 break;
4627 
4628         default:
4629                 error = EINVAL;
4630                 break;
4631         }
4632 
4633         VN_RELE(vp);
4634         VN_RELE(ZTOV(sharedir));
4635 
4636         ZFS_EXIT(zfsvfs);
4637 
4638         return (error);
4639 }
4640 
4641 /*
4642  * inputs:
4643  * zc_name              name of filesystem
4644  * zc_value             short name of snap
4645  * zc_string            user-supplied tag for this hold
4646  * zc_cookie            recursive flag
4647  * zc_temphold          set if hold is temporary
4648  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
4649  * zc_sendobj           if non-zero, the objid for zc_name@zc_value
4650  * zc_createtxg         if zc_sendobj is non-zero, snap must have zc_createtxg
4651  *
4652  * outputs:             none
4653  */
4654 static int
4655 zfs_ioc_hold(zfs_cmd_t *zc)
4656 {
4657         boolean_t recursive = zc->zc_cookie;
4658         spa_t *spa;
4659         dsl_pool_t *dp;
4660         dsl_dataset_t *ds;
4661         int error;
4662         minor_t minor = 0;
4663 
4664         if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4665                 return (EINVAL);
4666 
4667         if (zc->zc_sendobj == 0) {
4668                 return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4669                     zc->zc_string, recursive, zc->zc_temphold,
4670                     zc->zc_cleanup_fd));
4671         }
4672 
4673         if (recursive)
4674                 return (EINVAL);
4675 
4676         error = spa_open(zc->zc_name, &spa, FTAG);
4677         if (error)
4678                 return (error);
4679 
4680         dp = spa_get_dsl(spa);
4681         rw_enter(&dp->dp_config_rwlock, RW_READER);
4682         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
4683         rw_exit(&dp->dp_config_rwlock);
4684         spa_close(spa, FTAG);
4685         if (error)
4686                 return (error);
4687 
4688         /*
4689          * Until we have a hold on this snapshot, it's possible that
4690          * zc_sendobj could've been destroyed and reused as part
4691          * of a later txg.  Make sure we're looking at the right object.
4692          */
4693         if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
4694                 dsl_dataset_rele(ds, FTAG);
4695                 return (ENOENT);
4696         }
4697 
4698         if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
4699                 error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
4700                 if (error) {
4701                         dsl_dataset_rele(ds, FTAG);
4702                         return (error);
4703                 }
4704         }
4705 
4706         error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
4707             zc->zc_temphold);
4708         if (minor != 0) {
4709                 if (error == 0) {
4710                         dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
4711                             minor);
4712                 }
4713                 zfs_onexit_fd_rele(zc->zc_cleanup_fd);
4714         }
4715         dsl_dataset_rele(ds, FTAG);
4716 
4717         return (error);
4718 }
4719 
4720 /*
4721  * inputs:
4722  * zc_name      name of dataset from which we're releasing a user hold
4723  * zc_value     short name of snap
4724  * zc_string    user-supplied tag for this hold
4725  * zc_cookie    recursive flag
4726  *
4727  * outputs:     none
4728  */
4729 static int
4730 zfs_ioc_release(zfs_cmd_t *zc)
4731 {
4732         boolean_t recursive = zc->zc_cookie;
4733 
4734         if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4735                 return (EINVAL);
4736 
4737         return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4738             zc->zc_string, recursive));
4739 }
4740 
4741 /*
4742  * inputs:
4743  * zc_name              name of filesystem
4744  *
4745  * outputs:
4746  * zc_nvlist_src{_size} nvlist of snapshot holds
4747  */
4748 static int
4749 zfs_ioc_get_holds(zfs_cmd_t *zc)
4750 {
4751         nvlist_t *nvp;
4752         int error;
4753 
4754         if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4755                 error = put_nvlist(zc, nvp);
4756                 nvlist_free(nvp);
4757         }
4758 
4759         return (error);
4760 }
4761 
4762 /*
4763  * inputs:
4764  * zc_name              name of new filesystem or snapshot
4765  * zc_value             full name of old snapshot
4766  *
4767  * outputs:
4768  * zc_cookie            space in bytes
4769  * zc_objset_type       compressed space in bytes
4770  * zc_perm_action       uncompressed space in bytes
4771  */
4772 static int
4773 zfs_ioc_space_written(zfs_cmd_t *zc)
4774 {
4775         int error;
4776         dsl_dataset_t *new, *old;
4777 
4778         error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
4779         if (error != 0)
4780                 return (error);
4781         error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
4782         if (error != 0) {
4783                 dsl_dataset_rele(new, FTAG);
4784                 return (error);
4785         }
4786 
4787         error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
4788             &zc->zc_objset_type, &zc->zc_perm_action);
4789         dsl_dataset_rele(old, FTAG);
4790         dsl_dataset_rele(new, FTAG);
4791         return (error);
4792 }
4793 
4794 /*
4795  * inputs:
4796  * zc_name              full name of last snapshot
4797  * zc_value             full name of first snapshot
4798  *
4799  * outputs:
4800  * zc_cookie            space in bytes
4801  * zc_objset_type       compressed space in bytes
4802  * zc_perm_action       uncompressed space in bytes
4803  */
4804 static int
4805 zfs_ioc_space_snaps(zfs_cmd_t *zc)
4806 {
4807         int error;
4808         dsl_dataset_t *new, *old;
4809 
4810         error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
4811         if (error != 0)
4812                 return (error);
4813         error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
4814         if (error != 0) {
4815                 dsl_dataset_rele(new, FTAG);
4816                 return (error);
4817         }
4818 
4819         error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie,
4820             &zc->zc_objset_type, &zc->zc_perm_action);
4821         dsl_dataset_rele(old, FTAG);
4822         dsl_dataset_rele(new, FTAG);
4823         return (error);
4824 }
4825 
4826 /*
4827  * pool create, destroy, and export don't log the history as part of
4828  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4829  * do the logging of those commands.
4830  */
4831 static zfs_ioc_vec_t zfs_ioc_vec[] = {
4832         { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4833             POOL_CHECK_NONE },
4834         { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4835             POOL_CHECK_NONE },
4836         { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4837             POOL_CHECK_NONE },
4838         { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4839             POOL_CHECK_NONE },
4840         { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE,
4841             POOL_CHECK_NONE },
4842         { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4843             POOL_CHECK_NONE },
4844         { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4845             POOL_CHECK_NONE },
4846         { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4847             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4848         { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4849             POOL_CHECK_READONLY },
4850         { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4851             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4852         { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4853             POOL_CHECK_NONE },
4854         { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4855             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4856         { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4857             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4858         { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4859             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4860         { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4861             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4862         { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4863             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4864         { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4865             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4866         { zfs_ioc_vdev_setfru,  zfs_secpolicy_config, POOL_NAME, B_FALSE,
4867             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4868         { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4869             POOL_CHECK_SUSPENDED },
4870         { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4871             POOL_CHECK_NONE },
4872         { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4873             POOL_CHECK_SUSPENDED },
4874         { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4875             POOL_CHECK_SUSPENDED },
4876         { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE,
4877             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4878         { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE,
4879             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4880         { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4881             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4882         { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4883             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4884         { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE,
4885             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4886         { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
4887             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4888         { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE,
4889             POOL_CHECK_NONE },
4890         { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4891             POOL_CHECK_NONE },
4892         { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4893             POOL_CHECK_NONE },
4894         { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4895             POOL_CHECK_NONE },
4896         { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4897             POOL_CHECK_NONE },
4898         { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4899             POOL_CHECK_NONE },
4900         { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4901             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4902         { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4903             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4904         { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
4905             POOL_CHECK_NONE },
4906         { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4907             POOL_CHECK_SUSPENDED },
4908         { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4909             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4910         { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4911             POOL_CHECK_NONE },
4912         { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4913             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4914         { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4915             POOL_CHECK_NONE },
4916         { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE,
4917             POOL_CHECK_NONE },
4918         { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4919             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4920         { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4921             POOL_CHECK_NONE },
4922         { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME,
4923             B_FALSE, POOL_CHECK_NONE },
4924         { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME,
4925             B_FALSE, POOL_CHECK_NONE },
4926         { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4927             DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4928         { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE,
4929             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4930         { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4931             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4932         { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4933             POOL_CHECK_SUSPENDED },
4934         { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4935             POOL_CHECK_NONE },
4936         { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4937             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4938         { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4939             POOL_CHECK_NONE },
4940         { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4941             POOL_CHECK_NONE },
4942         { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
4943             B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4944         { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4945             POOL_CHECK_SUSPENDED },
4946         { zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4947             POOL_CHECK_SUSPENDED },
4948         { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4949             POOL_CHECK_SUSPENDED },
4950         { zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive,
4951             DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4952         { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4953             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4954         { zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4955             POOL_CHECK_SUSPENDED },
4956         { zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4957             POOL_CHECK_NONE }
4958 };
4959 
4960 int
4961 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
4962     zfs_ioc_poolcheck_t check)
4963 {
4964         spa_t *spa;
4965         int error;
4966 
4967         ASSERT(type == POOL_NAME || type == DATASET_NAME);
4968 
4969         if (check & POOL_CHECK_NONE)
4970                 return (0);
4971 
4972         error = spa_open(name, &spa, FTAG);
4973         if (error == 0) {
4974                 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
4975                         error = EAGAIN;
4976                 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
4977                         error = EROFS;
4978                 spa_close(spa, FTAG);
4979         }
4980         return (error);
4981 }
4982 
4983 /*
4984  * Find a free minor number.
4985  */
4986 minor_t
4987 zfsdev_minor_alloc(void)
4988 {
4989         static minor_t last_minor;
4990         minor_t m;
4991 
4992         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
4993 
4994         for (m = last_minor + 1; m != last_minor; m++) {
4995                 if (m > ZFSDEV_MAX_MINOR)
4996                         m = 1;
4997                 if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
4998                         last_minor = m;
4999                         return (m);
5000                 }
5001         }
5002 
5003         return (0);
5004 }
5005 
5006 static int
5007 zfs_ctldev_init(dev_t *devp)
5008 {
5009         minor_t minor;
5010         zfs_soft_state_t *zs;
5011 
5012         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5013         ASSERT(getminor(*devp) == 0);
5014 
5015         minor = zfsdev_minor_alloc();
5016         if (minor == 0)
5017                 return (ENXIO);
5018 
5019         if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
5020                 return (EAGAIN);
5021 
5022         *devp = makedevice(getemajor(*devp), minor);
5023 
5024         zs = ddi_get_soft_state(zfsdev_state, minor);
5025         zs->zss_type = ZSST_CTLDEV;
5026         zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
5027 
5028         return (0);
5029 }
5030 
5031 static void
5032 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
5033 {
5034         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5035 
5036         zfs_onexit_destroy(zo);
5037         ddi_soft_state_free(zfsdev_state, minor);
5038 }
5039 
5040 void *
5041 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
5042 {
5043         zfs_soft_state_t *zp;
5044 
5045         zp = ddi_get_soft_state(zfsdev_state, minor);
5046         if (zp == NULL || zp->zss_type != which)
5047                 return (NULL);
5048 
5049         return (zp->zss_data);
5050 }
5051 
5052 static int
5053 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
5054 {
5055         int error = 0;
5056 
5057         if (getminor(*devp) != 0)
5058                 return (zvol_open(devp, flag, otyp, cr));
5059 
5060         /* This is the control device. Allocate a new minor if requested. */
5061         if (flag & FEXCL) {
5062                 mutex_enter(&zfsdev_state_lock);
5063                 error = zfs_ctldev_init(devp);
5064                 mutex_exit(&zfsdev_state_lock);
5065         }
5066 
5067         return (error);
5068 }
5069 
5070 static int
5071 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
5072 {
5073         zfs_onexit_t *zo;
5074         minor_t minor = getminor(dev);
5075 
5076         if (minor == 0)
5077                 return (0);
5078 
5079         mutex_enter(&zfsdev_state_lock);
5080         zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
5081         if (zo == NULL) {
5082                 mutex_exit(&zfsdev_state_lock);
5083                 return (zvol_close(dev, flag, otyp, cr));
5084         }
5085         zfs_ctldev_destroy(zo, minor);
5086         mutex_exit(&zfsdev_state_lock);
5087 
5088         return (0);
5089 }
5090 
5091 static int
5092 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
5093 {
5094         zfs_cmd_t *zc;
5095         uint_t vec;
5096         int error, rc;
5097         minor_t minor = getminor(dev);
5098 
5099         if (minor != 0 &&
5100             zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
5101                 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
5102 
5103         vec = cmd - ZFS_IOC;
5104         ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
5105 
5106         if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
5107                 return (EINVAL);
5108 
5109         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
5110 
5111         error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
5112         if (error != 0)
5113                 error = EFAULT;
5114 
5115         if ((error == 0) && !(flag & FKIOCTL))
5116                 error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
5117 
5118         /*
5119          * Ensure that all pool/dataset names are valid before we pass down to
5120          * the lower layers.
5121          */
5122         if (error == 0) {
5123                 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5124                 zc->zc_iflags = flag & FKIOCTL;
5125                 switch (zfs_ioc_vec[vec].zvec_namecheck) {
5126                 case POOL_NAME:
5127                         if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
5128                                 error = EINVAL;
5129                         error = pool_status_check(zc->zc_name,
5130                             zfs_ioc_vec[vec].zvec_namecheck,
5131                             zfs_ioc_vec[vec].zvec_pool_check);
5132                         break;
5133 
5134                 case DATASET_NAME:
5135                         if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
5136                                 error = EINVAL;
5137                         error = pool_status_check(zc->zc_name,
5138                             zfs_ioc_vec[vec].zvec_namecheck,
5139                             zfs_ioc_vec[vec].zvec_pool_check);
5140                         break;
5141 
5142                 case NO_NAME:
5143                         break;
5144                 }
5145         }
5146 
5147         if (error == 0)
5148                 error = zfs_ioc_vec[vec].zvec_func(zc);
5149 
5150         rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
5151         if (error == 0) {
5152                 if (rc != 0)
5153                         error = EFAULT;
5154                 if (zfs_ioc_vec[vec].zvec_his_log)
5155                         zfs_log_history(zc);
5156         }
5157 
5158         kmem_free(zc, sizeof (zfs_cmd_t));
5159         return (error);
5160 }
5161 
5162 static int
5163 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5164 {
5165         if (cmd != DDI_ATTACH)
5166                 return (DDI_FAILURE);
5167 
5168         if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
5169             DDI_PSEUDO, 0) == DDI_FAILURE)
5170                 return (DDI_FAILURE);
5171 
5172         zfs_dip = dip;
5173 
5174         ddi_report_dev(dip);
5175 
5176         return (DDI_SUCCESS);
5177 }
5178 
5179 static int
5180 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5181 {
5182         if (spa_busy() || zfs_busy() || zvol_busy())
5183                 return (DDI_FAILURE);
5184 
5185         if (cmd != DDI_DETACH)
5186                 return (DDI_FAILURE);
5187 
5188         zfs_dip = NULL;
5189 
5190         ddi_prop_remove_all(dip);
5191         ddi_remove_minor_node(dip, NULL);
5192 
5193         return (DDI_SUCCESS);
5194 }
5195 
5196 /*ARGSUSED*/
5197 static int
5198 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
5199 {
5200         switch (infocmd) {
5201         case DDI_INFO_DEVT2DEVINFO:
5202                 *result = zfs_dip;
5203                 return (DDI_SUCCESS);
5204 
5205         case DDI_INFO_DEVT2INSTANCE:
5206                 *result = (void *)0;
5207                 return (DDI_SUCCESS);
5208         }
5209 
5210         return (DDI_FAILURE);
5211 }
5212 
5213 /*
5214  * OK, so this is a little weird.
5215  *
5216  * /dev/zfs is the control node, i.e. minor 0.
5217  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
5218  *
5219  * /dev/zfs has basically nothing to do except serve up ioctls,
5220  * so most of the standard driver entry points are in zvol.c.
5221  */
5222 static struct cb_ops zfs_cb_ops = {
5223         zfsdev_open,    /* open */
5224         zfsdev_close,   /* close */
5225         zvol_strategy,  /* strategy */
5226         nodev,          /* print */
5227         zvol_dump,      /* dump */
5228         zvol_read,      /* read */
5229         zvol_write,     /* write */
5230         zfsdev_ioctl,   /* ioctl */
5231         nodev,          /* devmap */
5232         nodev,          /* mmap */
5233         nodev,          /* segmap */
5234         nochpoll,       /* poll */
5235         ddi_prop_op,    /* prop_op */
5236         NULL,           /* streamtab */
5237         D_NEW | D_MP | D_64BIT,         /* Driver compatibility flag */
5238         CB_REV,         /* version */
5239         nodev,          /* async read */
5240         nodev,          /* async write */
5241 };
5242 
5243 static struct dev_ops zfs_dev_ops = {
5244         DEVO_REV,       /* version */
5245         0,              /* refcnt */
5246         zfs_info,       /* info */
5247         nulldev,        /* identify */
5248         nulldev,        /* probe */
5249         zfs_attach,     /* attach */
5250         zfs_detach,     /* detach */
5251         nodev,          /* reset */
5252         &zfs_cb_ops,        /* driver operations */
5253         NULL,           /* no bus operations */
5254         NULL,           /* power */
5255         ddi_quiesce_not_needed, /* quiesce */
5256 };
5257 
5258 static struct modldrv zfs_modldrv = {
5259         &mod_driverops,
5260         "ZFS storage pool",
5261         &zfs_dev_ops
5262 };
5263 
5264 static struct modlinkage modlinkage = {
5265         MODREV_1,
5266         (void *)&zfs_modlfs,
5267         (void *)&zfs_modldrv,
5268         NULL
5269 };
5270 
5271 
5272 uint_t zfs_fsyncer_key;
5273 extern uint_t rrw_tsd_key;
5274 
5275 int
5276 _init(void)
5277 {
5278         int error;
5279 
5280         spa_init(FREAD | FWRITE);
5281         zfs_init();
5282         zvol_init();
5283 
5284         if ((error = mod_install(&modlinkage)) != 0) {
5285                 zvol_fini();
5286                 zfs_fini();
5287                 spa_fini();
5288                 return (error);
5289         }
5290 
5291         tsd_create(&zfs_fsyncer_key, NULL);
5292         tsd_create(&rrw_tsd_key, NULL);
5293 
5294         error = ldi_ident_from_mod(&modlinkage, &zfs_li);
5295         ASSERT(error == 0);
5296         mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
5297 
5298         return (0);
5299 }
5300 
5301 int
5302 _fini(void)
5303 {
5304         int error;
5305 
5306         if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
5307                 return (EBUSY);
5308 
5309         if ((error = mod_remove(&modlinkage)) != 0)
5310                 return (error);
5311 
5312         zvol_fini();
5313         zfs_fini();
5314         spa_fini();
5315         if (zfs_nfsshare_inited)
5316                 (void) ddi_modclose(nfs_mod);
5317         if (zfs_smbshare_inited)
5318                 (void) ddi_modclose(smbsrv_mod);
5319         if (zfs_nfsshare_inited || zfs_smbshare_inited)
5320                 (void) ddi_modclose(sharefs_mod);
5321 
5322         tsd_destroy(&zfs_fsyncer_key);
5323         ldi_ident_release(zfs_li);
5324         zfs_li = NULL;
5325         mutex_destroy(&zfs_share_lock);
5326 
5327         return (error);
5328 }
5329 
5330 int
5331 _info(struct modinfo *modinfop)
5332 {
5333         return (mod_info(&modlinkage, modinfop));
5334 }