1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Portions Copyright 2011 Martin Matuska
  25  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright (c) 2012 by Delphix. All rights reserved.
  27  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  28  */
  29 
  30 #include <sys/types.h>
  31 #include <sys/param.h>
  32 #include <sys/errno.h>
  33 #include <sys/uio.h>
  34 #include <sys/buf.h>
  35 #include <sys/modctl.h>
  36 #include <sys/open.h>
  37 #include <sys/file.h>
  38 #include <sys/kmem.h>
  39 #include <sys/conf.h>
  40 #include <sys/cmn_err.h>
  41 #include <sys/stat.h>
  42 #include <sys/zfs_ioctl.h>
  43 #include <sys/zfs_vfsops.h>
  44 #include <sys/zfs_znode.h>
  45 #include <sys/zap.h>
  46 #include <sys/spa.h>
  47 #include <sys/spa_impl.h>
  48 #include <sys/vdev.h>
  49 #include <sys/priv_impl.h>
  50 #include <sys/dmu.h>
  51 #include <sys/dsl_dir.h>
  52 #include <sys/dsl_dataset.h>
  53 #include <sys/dsl_prop.h>
  54 #include <sys/dsl_deleg.h>
  55 #include <sys/dmu_objset.h>
  56 #include <sys/dmu_impl.h>
  57 #include <sys/ddi.h>
  58 #include <sys/sunddi.h>
  59 #include <sys/sunldi.h>
  60 #include <sys/policy.h>
  61 #include <sys/zone.h>
  62 #include <sys/nvpair.h>
  63 #include <sys/pathname.h>
  64 #include <sys/mount.h>
  65 #include <sys/sdt.h>
  66 #include <sys/fs/zfs.h>
  67 #include <sys/zfs_ctldir.h>
  68 #include <sys/zfs_dir.h>
  69 #include <sys/zfs_onexit.h>
  70 #include <sys/zvol.h>
  71 #include <sys/dsl_scan.h>
  72 #include <sharefs/share.h>
  73 #include <sys/dmu_objset.h>
  74 
  75 #include "zfs_namecheck.h"
  76 #include "zfs_prop.h"
  77 #include "zfs_deleg.h"
  78 #include "zfs_comutil.h"
  79 
  80 extern struct modlfs zfs_modlfs;
  81 
  82 extern void zfs_init(void);
  83 extern void zfs_fini(void);
  84 
  85 ldi_ident_t zfs_li = NULL;
  86 dev_info_t *zfs_dip;
  87 
  88 typedef int zfs_ioc_func_t(zfs_cmd_t *);
  89 typedef int zfs_secpolicy_func_t(zfs_cmd_t *, cred_t *);
  90 
  91 typedef enum {
  92         NO_NAME,
  93         POOL_NAME,
  94         DATASET_NAME
  95 } zfs_ioc_namecheck_t;
  96 
  97 typedef enum {
  98         POOL_CHECK_NONE         = 1 << 0,
  99         POOL_CHECK_SUSPENDED    = 1 << 1,
 100         POOL_CHECK_READONLY     = 1 << 2
 101 } zfs_ioc_poolcheck_t;
 102 
 103 typedef struct zfs_ioc_vec {
 104         zfs_ioc_func_t          *zvec_func;
 105         zfs_secpolicy_func_t    *zvec_secpolicy;
 106         zfs_ioc_namecheck_t     zvec_namecheck;
 107         boolean_t               zvec_his_log;
 108         zfs_ioc_poolcheck_t     zvec_pool_check;
 109 } zfs_ioc_vec_t;
 110 
 111 /* This array is indexed by zfs_userquota_prop_t */
 112 static const char *userquota_perms[] = {
 113         ZFS_DELEG_PERM_USERUSED,
 114         ZFS_DELEG_PERM_USERQUOTA,
 115         ZFS_DELEG_PERM_GROUPUSED,
 116         ZFS_DELEG_PERM_GROUPQUOTA,
 117 };
 118 
 119 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
 120 static int zfs_check_settable(const char *name, nvpair_t *property,
 121     cred_t *cr);
 122 static int zfs_check_clearable(char *dataset, nvlist_t *props,
 123     nvlist_t **errors);
 124 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
 125     boolean_t *);
 126 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t **);
 127 
 128 /* _NOTE(PRINTFLIKE(4)) - this is printf-like, but lint is too whiney */
 129 void
 130 __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
 131 {
 132         const char *newfile;
 133         char buf[512];
 134         va_list adx;
 135 
 136         /*
 137          * Get rid of annoying "../common/" prefix to filename.
 138          */
 139         newfile = strrchr(file, '/');
 140         if (newfile != NULL) {
 141                 newfile = newfile + 1; /* Get rid of leading / */
 142         } else {
 143                 newfile = file;
 144         }
 145 
 146         va_start(adx, fmt);
 147         (void) vsnprintf(buf, sizeof (buf), fmt, adx);
 148         va_end(adx);
 149 
 150         /*
 151          * To get this data, use the zfs-dprintf probe as so:
 152          * dtrace -q -n 'zfs-dprintf \
 153          *      /stringof(arg0) == "dbuf.c"/ \
 154          *      {printf("%s: %s", stringof(arg1), stringof(arg3))}'
 155          * arg0 = file name
 156          * arg1 = function name
 157          * arg2 = line number
 158          * arg3 = message
 159          */
 160         DTRACE_PROBE4(zfs__dprintf,
 161             char *, newfile, char *, func, int, line, char *, buf);
 162 }
 163 
 164 static void
 165 history_str_free(char *buf)
 166 {
 167         kmem_free(buf, HIS_MAX_RECORD_LEN);
 168 }
 169 
 170 static char *
 171 history_str_get(zfs_cmd_t *zc)
 172 {
 173         char *buf;
 174 
 175         if (zc->zc_history == NULL)
 176                 return (NULL);
 177 
 178         buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
 179         if (copyinstr((void *)(uintptr_t)zc->zc_history,
 180             buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
 181                 history_str_free(buf);
 182                 return (NULL);
 183         }
 184 
 185         buf[HIS_MAX_RECORD_LEN -1] = '\0';
 186 
 187         return (buf);
 188 }
 189 
 190 /*
 191  * Check to see if the named dataset is currently defined as bootable
 192  */
 193 static boolean_t
 194 zfs_is_bootfs(const char *name)
 195 {
 196         objset_t *os;
 197 
 198         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 199                 boolean_t ret;
 200                 ret = (dmu_objset_id(os) == spa_bootfs(dmu_objset_spa(os)));
 201                 dmu_objset_rele(os, FTAG);
 202                 return (ret);
 203         }
 204         return (B_FALSE);
 205 }
 206 
 207 /*
 208  * zfs_earlier_version
 209  *
 210  *      Return non-zero if the spa version is less than requested version.
 211  */
 212 static int
 213 zfs_earlier_version(const char *name, int version)
 214 {
 215         spa_t *spa;
 216 
 217         if (spa_open(name, &spa, FTAG) == 0) {
 218                 if (spa_version(spa) < version) {
 219                         spa_close(spa, FTAG);
 220                         return (1);
 221                 }
 222                 spa_close(spa, FTAG);
 223         }
 224         return (0);
 225 }
 226 
 227 /*
 228  * zpl_earlier_version
 229  *
 230  * Return TRUE if the ZPL version is less than requested version.
 231  */
 232 static boolean_t
 233 zpl_earlier_version(const char *name, int version)
 234 {
 235         objset_t *os;
 236         boolean_t rc = B_TRUE;
 237 
 238         if (dmu_objset_hold(name, FTAG, &os) == 0) {
 239                 uint64_t zplversion;
 240 
 241                 if (dmu_objset_type(os) != DMU_OST_ZFS) {
 242                         dmu_objset_rele(os, FTAG);
 243                         return (B_TRUE);
 244                 }
 245                 /* XXX reading from non-owned objset */
 246                 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
 247                         rc = zplversion < version;
 248                 dmu_objset_rele(os, FTAG);
 249         }
 250         return (rc);
 251 }
 252 
 253 static void
 254 zfs_log_history(zfs_cmd_t *zc)
 255 {
 256         spa_t *spa;
 257         char *buf;
 258 
 259         if ((buf = history_str_get(zc)) == NULL)
 260                 return;
 261 
 262         if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
 263                 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
 264                         (void) spa_history_log(spa, buf, LOG_CMD_NORMAL);
 265                 spa_close(spa, FTAG);
 266         }
 267         history_str_free(buf);
 268 }
 269 
 270 /*
 271  * Policy for top-level read operations (list pools).  Requires no privileges,
 272  * and can be used in the local zone, as there is no associated dataset.
 273  */
 274 /* ARGSUSED */
 275 static int
 276 zfs_secpolicy_none(zfs_cmd_t *zc, cred_t *cr)
 277 {
 278         return (0);
 279 }
 280 
 281 /*
 282  * Policy for dataset read operations (list children, get statistics).  Requires
 283  * no privileges, but must be visible in the local zone.
 284  */
 285 /* ARGSUSED */
 286 static int
 287 zfs_secpolicy_read(zfs_cmd_t *zc, cred_t *cr)
 288 {
 289         if (INGLOBALZONE(curproc) ||
 290             zone_dataset_visible(zc->zc_name, NULL))
 291                 return (0);
 292 
 293         return (ENOENT);
 294 }
 295 
 296 static int
 297 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
 298 {
 299         int writable = 1;
 300 
 301         /*
 302          * The dataset must be visible by this zone -- check this first
 303          * so they don't see EPERM on something they shouldn't know about.
 304          */
 305         if (!INGLOBALZONE(curproc) &&
 306             !zone_dataset_visible(dataset, &writable))
 307                 return (ENOENT);
 308 
 309         if (INGLOBALZONE(curproc)) {
 310                 /*
 311                  * If the fs is zoned, only root can access it from the
 312                  * global zone.
 313                  */
 314                 if (secpolicy_zfs(cr) && zoned)
 315                         return (EPERM);
 316         } else {
 317                 /*
 318                  * If we are in a local zone, the 'zoned' property must be set.
 319                  */
 320                 if (!zoned)
 321                         return (EPERM);
 322 
 323                 /* must be writable by this zone */
 324                 if (!writable)
 325                         return (EPERM);
 326         }
 327         return (0);
 328 }
 329 
 330 static int
 331 zfs_dozonecheck(const char *dataset, cred_t *cr)
 332 {
 333         uint64_t zoned;
 334 
 335         if (dsl_prop_get_integer(dataset, "zoned", &zoned, NULL))
 336                 return (ENOENT);
 337 
 338         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 339 }
 340 
 341 static int
 342 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
 343 {
 344         uint64_t zoned;
 345 
 346         rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
 347         if (dsl_prop_get_ds(ds, "zoned", 8, 1, &zoned, NULL)) {
 348                 rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 349                 return (ENOENT);
 350         }
 351         rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
 352 
 353         return (zfs_dozonecheck_impl(dataset, zoned, cr));
 354 }
 355 
 356 /*
 357  * If name ends in a '@', then require recursive permissions.
 358  */
 359 int
 360 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
 361 {
 362         int error;
 363         boolean_t descendent = B_FALSE;
 364         dsl_dataset_t *ds;
 365         char *at;
 366 
 367         at = strchr(name, '@');
 368         if (at != NULL && at[1] == '\0') {
 369                 *at = '\0';
 370                 descendent = B_TRUE;
 371         }
 372 
 373         error = dsl_dataset_hold(name, FTAG, &ds);
 374         if (at != NULL)
 375                 *at = '@';
 376         if (error != 0)
 377                 return (error);
 378 
 379         error = zfs_dozonecheck_ds(name, ds, cr);
 380         if (error == 0) {
 381                 error = secpolicy_zfs(cr);
 382                 if (error)
 383                         error = dsl_deleg_access_impl(ds, descendent, perm, cr);
 384         }
 385 
 386         dsl_dataset_rele(ds, FTAG);
 387         return (error);
 388 }
 389 
 390 int
 391 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
 392     const char *perm, cred_t *cr)
 393 {
 394         int error;
 395 
 396         error = zfs_dozonecheck_ds(name, ds, cr);
 397         if (error == 0) {
 398                 error = secpolicy_zfs(cr);
 399                 if (error)
 400                         error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
 401         }
 402         return (error);
 403 }
 404 
 405 /*
 406  * Policy for setting the security label property.
 407  *
 408  * Returns 0 for success, non-zero for access and other errors.
 409  */
 410 static int
 411 zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
 412 {
 413         char            ds_hexsl[MAXNAMELEN];
 414         bslabel_t       ds_sl, new_sl;
 415         boolean_t       new_default = FALSE;
 416         uint64_t        zoned;
 417         int             needed_priv = -1;
 418         int             error;
 419 
 420         /* First get the existing dataset label. */
 421         error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
 422             1, sizeof (ds_hexsl), &ds_hexsl, NULL);
 423         if (error)
 424                 return (EPERM);
 425 
 426         if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
 427                 new_default = TRUE;
 428 
 429         /* The label must be translatable */
 430         if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
 431                 return (EINVAL);
 432 
 433         /*
 434          * In a non-global zone, disallow attempts to set a label that
 435          * doesn't match that of the zone; otherwise no other checks
 436          * are needed.
 437          */
 438         if (!INGLOBALZONE(curproc)) {
 439                 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
 440                         return (EPERM);
 441                 return (0);
 442         }
 443 
 444         /*
 445          * For global-zone datasets (i.e., those whose zoned property is
 446          * "off", verify that the specified new label is valid for the
 447          * global zone.
 448          */
 449         if (dsl_prop_get_integer(name,
 450             zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
 451                 return (EPERM);
 452         if (!zoned) {
 453                 if (zfs_check_global_label(name, strval) != 0)
 454                         return (EPERM);
 455         }
 456 
 457         /*
 458          * If the existing dataset label is nondefault, check if the
 459          * dataset is mounted (label cannot be changed while mounted).
 460          * Get the zfsvfs; if there isn't one, then the dataset isn't
 461          * mounted (or isn't a dataset, doesn't exist, ...).
 462          */
 463         if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
 464                 objset_t *os;
 465                 static char *setsl_tag = "setsl_tag";
 466 
 467                 /*
 468                  * Try to own the dataset; abort if there is any error,
 469                  * (e.g., already mounted, in use, or other error).
 470                  */
 471                 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE,
 472                     setsl_tag, &os);
 473                 if (error)
 474                         return (EPERM);
 475 
 476                 dmu_objset_disown(os, setsl_tag);
 477 
 478                 if (new_default) {
 479                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 480                         goto out_check;
 481                 }
 482 
 483                 if (hexstr_to_label(strval, &new_sl) != 0)
 484                         return (EPERM);
 485 
 486                 if (blstrictdom(&ds_sl, &new_sl))
 487                         needed_priv = PRIV_FILE_DOWNGRADE_SL;
 488                 else if (blstrictdom(&new_sl, &ds_sl))
 489                         needed_priv = PRIV_FILE_UPGRADE_SL;
 490         } else {
 491                 /* dataset currently has a default label */
 492                 if (!new_default)
 493                         needed_priv = PRIV_FILE_UPGRADE_SL;
 494         }
 495 
 496 out_check:
 497         if (needed_priv != -1)
 498                 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
 499         return (0);
 500 }
 501 
 502 static int
 503 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
 504     cred_t *cr)
 505 {
 506         char *strval;
 507 
 508         /*
 509          * Check permissions for special properties.
 510          */
 511         switch (prop) {
 512         case ZFS_PROP_ZONED:
 513                 /*
 514                  * Disallow setting of 'zoned' from within a local zone.
 515                  */
 516                 if (!INGLOBALZONE(curproc))
 517                         return (EPERM);
 518                 break;
 519 
 520         case ZFS_PROP_QUOTA:
 521                 if (!INGLOBALZONE(curproc)) {
 522                         uint64_t zoned;
 523                         char setpoint[MAXNAMELEN];
 524                         /*
 525                          * Unprivileged users are allowed to modify the
 526                          * quota on things *under* (ie. contained by)
 527                          * the thing they own.
 528                          */
 529                         if (dsl_prop_get_integer(dsname, "zoned", &zoned,
 530                             setpoint))
 531                                 return (EPERM);
 532                         if (!zoned || strlen(dsname) <= strlen(setpoint))
 533                                 return (EPERM);
 534                 }
 535                 break;
 536 
 537         case ZFS_PROP_MLSLABEL:
 538                 if (!is_system_labeled())
 539                         return (EPERM);
 540 
 541                 if (nvpair_value_string(propval, &strval) == 0) {
 542                         int err;
 543 
 544                         err = zfs_set_slabel_policy(dsname, strval, CRED());
 545                         if (err != 0)
 546                                 return (err);
 547                 }
 548                 break;
 549         }
 550 
 551         return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
 552 }
 553 
 554 int
 555 zfs_secpolicy_fsacl(zfs_cmd_t *zc, cred_t *cr)
 556 {
 557         int error;
 558 
 559         error = zfs_dozonecheck(zc->zc_name, cr);
 560         if (error)
 561                 return (error);
 562 
 563         /*
 564          * permission to set permissions will be evaluated later in
 565          * dsl_deleg_can_allow()
 566          */
 567         return (0);
 568 }
 569 
 570 int
 571 zfs_secpolicy_rollback(zfs_cmd_t *zc, cred_t *cr)
 572 {
 573         return (zfs_secpolicy_write_perms(zc->zc_name,
 574             ZFS_DELEG_PERM_ROLLBACK, cr));
 575 }
 576 
 577 int
 578 zfs_secpolicy_send(zfs_cmd_t *zc, cred_t *cr)
 579 {
 580         spa_t *spa;
 581         dsl_pool_t *dp;
 582         dsl_dataset_t *ds;
 583         char *cp;
 584         int error;
 585 
 586         /*
 587          * Generate the current snapshot name from the given objsetid, then
 588          * use that name for the secpolicy/zone checks.
 589          */
 590         cp = strchr(zc->zc_name, '@');
 591         if (cp == NULL)
 592                 return (EINVAL);
 593         error = spa_open(zc->zc_name, &spa, FTAG);
 594         if (error)
 595                 return (error);
 596 
 597         dp = spa_get_dsl(spa);
 598         rw_enter(&dp->dp_config_rwlock, RW_READER);
 599         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
 600         rw_exit(&dp->dp_config_rwlock);
 601         spa_close(spa, FTAG);
 602         if (error)
 603                 return (error);
 604 
 605         dsl_dataset_name(ds, zc->zc_name);
 606 
 607         error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
 608             ZFS_DELEG_PERM_SEND, cr);
 609         dsl_dataset_rele(ds, FTAG);
 610 
 611         return (error);
 612 }
 613 
 614 static int
 615 zfs_secpolicy_deleg_share(zfs_cmd_t *zc, cred_t *cr)
 616 {
 617         vnode_t *vp;
 618         int error;
 619 
 620         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
 621             NO_FOLLOW, NULL, &vp)) != 0)
 622                 return (error);
 623 
 624         /* Now make sure mntpnt and dataset are ZFS */
 625 
 626         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
 627             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
 628             zc->zc_name) != 0)) {
 629                 VN_RELE(vp);
 630                 return (EPERM);
 631         }
 632 
 633         VN_RELE(vp);
 634         return (dsl_deleg_access(zc->zc_name,
 635             ZFS_DELEG_PERM_SHARE, cr));
 636 }
 637 
 638 int
 639 zfs_secpolicy_share(zfs_cmd_t *zc, cred_t *cr)
 640 {
 641         if (!INGLOBALZONE(curproc))
 642                 return (EPERM);
 643 
 644         if (secpolicy_nfs(cr) == 0) {
 645                 return (0);
 646         } else {
 647                 return (zfs_secpolicy_deleg_share(zc, cr));
 648         }
 649 }
 650 
 651 int
 652 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, cred_t *cr)
 653 {
 654         if (!INGLOBALZONE(curproc))
 655                 return (EPERM);
 656 
 657         if (secpolicy_smb(cr) == 0) {
 658                 return (0);
 659         } else {
 660                 return (zfs_secpolicy_deleg_share(zc, cr));
 661         }
 662 }
 663 
 664 static int
 665 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
 666 {
 667         char *cp;
 668 
 669         /*
 670          * Remove the @bla or /bla from the end of the name to get the parent.
 671          */
 672         (void) strncpy(parent, datasetname, parentsize);
 673         cp = strrchr(parent, '@');
 674         if (cp != NULL) {
 675                 cp[0] = '\0';
 676         } else {
 677                 cp = strrchr(parent, '/');
 678                 if (cp == NULL)
 679                         return (ENOENT);
 680                 cp[0] = '\0';
 681         }
 682 
 683         return (0);
 684 }
 685 
 686 int
 687 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
 688 {
 689         int error;
 690 
 691         if ((error = zfs_secpolicy_write_perms(name,
 692             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 693                 return (error);
 694 
 695         return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
 696 }
 697 
 698 static int
 699 zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
 700 {
 701         return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
 702 }
 703 
 704 /*
 705  * Destroying snapshots with delegated permissions requires
 706  * descendent mount and destroy permissions.
 707  */
 708 static int
 709 zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr)
 710 {
 711         int error;
 712         char *dsname;
 713 
 714         dsname = kmem_asprintf("%s@", zc->zc_name);
 715 
 716         error = zfs_secpolicy_destroy_perms(dsname, cr);
 717 
 718         strfree(dsname);
 719         return (error);
 720 }
 721 
 722 int
 723 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
 724 {
 725         char    parentname[MAXNAMELEN];
 726         int     error;
 727 
 728         if ((error = zfs_secpolicy_write_perms(from,
 729             ZFS_DELEG_PERM_RENAME, cr)) != 0)
 730                 return (error);
 731 
 732         if ((error = zfs_secpolicy_write_perms(from,
 733             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 734                 return (error);
 735 
 736         if ((error = zfs_get_parent(to, parentname,
 737             sizeof (parentname))) != 0)
 738                 return (error);
 739 
 740         if ((error = zfs_secpolicy_write_perms(parentname,
 741             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 742                 return (error);
 743 
 744         if ((error = zfs_secpolicy_write_perms(parentname,
 745             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 746                 return (error);
 747 
 748         return (error);
 749 }
 750 
 751 static int
 752 zfs_secpolicy_rename(zfs_cmd_t *zc, cred_t *cr)
 753 {
 754         return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
 755 }
 756 
 757 static int
 758 zfs_secpolicy_promote(zfs_cmd_t *zc, cred_t *cr)
 759 {
 760         char    parentname[MAXNAMELEN];
 761         objset_t *clone;
 762         int error;
 763 
 764         error = zfs_secpolicy_write_perms(zc->zc_name,
 765             ZFS_DELEG_PERM_PROMOTE, cr);
 766         if (error)
 767                 return (error);
 768 
 769         error = dmu_objset_hold(zc->zc_name, FTAG, &clone);
 770 
 771         if (error == 0) {
 772                 dsl_dataset_t *pclone = NULL;
 773                 dsl_dir_t *dd;
 774                 dd = clone->os_dsl_dataset->ds_dir;
 775 
 776                 rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
 777                 error = dsl_dataset_hold_obj(dd->dd_pool,
 778                     dd->dd_phys->dd_origin_obj, FTAG, &pclone);
 779                 rw_exit(&dd->dd_pool->dp_config_rwlock);
 780                 if (error) {
 781                         dmu_objset_rele(clone, FTAG);
 782                         return (error);
 783                 }
 784 
 785                 error = zfs_secpolicy_write_perms(zc->zc_name,
 786                     ZFS_DELEG_PERM_MOUNT, cr);
 787 
 788                 dsl_dataset_name(pclone, parentname);
 789                 dmu_objset_rele(clone, FTAG);
 790                 dsl_dataset_rele(pclone, FTAG);
 791                 if (error == 0)
 792                         error = zfs_secpolicy_write_perms(parentname,
 793                             ZFS_DELEG_PERM_PROMOTE, cr);
 794         }
 795         return (error);
 796 }
 797 
 798 static int
 799 zfs_secpolicy_receive(zfs_cmd_t *zc, cred_t *cr)
 800 {
 801         int error;
 802 
 803         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 804             ZFS_DELEG_PERM_RECEIVE, cr)) != 0)
 805                 return (error);
 806 
 807         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
 808             ZFS_DELEG_PERM_MOUNT, cr)) != 0)
 809                 return (error);
 810 
 811         return (zfs_secpolicy_write_perms(zc->zc_name,
 812             ZFS_DELEG_PERM_CREATE, cr));
 813 }
 814 
 815 int
 816 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
 817 {
 818         return (zfs_secpolicy_write_perms(name,
 819             ZFS_DELEG_PERM_SNAPSHOT, cr));
 820 }
 821 
 822 static int
 823 zfs_secpolicy_snapshot(zfs_cmd_t *zc, cred_t *cr)
 824 {
 825 
 826         return (zfs_secpolicy_snapshot_perms(zc->zc_name, cr));
 827 }
 828 
 829 static int
 830 zfs_secpolicy_create(zfs_cmd_t *zc, cred_t *cr)
 831 {
 832         char    parentname[MAXNAMELEN];
 833         int     error;
 834 
 835         if ((error = zfs_get_parent(zc->zc_name, parentname,
 836             sizeof (parentname))) != 0)
 837                 return (error);
 838 
 839         if (zc->zc_value[0] != '\0') {
 840                 if ((error = zfs_secpolicy_write_perms(zc->zc_value,
 841                     ZFS_DELEG_PERM_CLONE, cr)) != 0)
 842                         return (error);
 843         }
 844 
 845         if ((error = zfs_secpolicy_write_perms(parentname,
 846             ZFS_DELEG_PERM_CREATE, cr)) != 0)
 847                 return (error);
 848 
 849         error = zfs_secpolicy_write_perms(parentname,
 850             ZFS_DELEG_PERM_MOUNT, cr);
 851 
 852         return (error);
 853 }
 854 
 855 static int
 856 zfs_secpolicy_umount(zfs_cmd_t *zc, cred_t *cr)
 857 {
 858         int error;
 859 
 860         error = secpolicy_fs_unmount(cr, NULL);
 861         if (error) {
 862                 error = dsl_deleg_access(zc->zc_name, ZFS_DELEG_PERM_MOUNT, cr);
 863         }
 864         return (error);
 865 }
 866 
 867 /*
 868  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
 869  * SYS_CONFIG privilege, which is not available in a local zone.
 870  */
 871 /* ARGSUSED */
 872 static int
 873 zfs_secpolicy_config(zfs_cmd_t *zc, cred_t *cr)
 874 {
 875         if (secpolicy_sys_config(cr, B_FALSE) != 0)
 876                 return (EPERM);
 877 
 878         return (0);
 879 }
 880 
 881 /*
 882  * Policy for object to name lookups.
 883  */
 884 /* ARGSUSED */
 885 static int
 886 zfs_secpolicy_diff(zfs_cmd_t *zc, cred_t *cr)
 887 {
 888         int error;
 889 
 890         if ((error = secpolicy_sys_config(cr, B_FALSE)) == 0)
 891                 return (0);
 892 
 893         error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
 894         return (error);
 895 }
 896 
 897 /*
 898  * Policy for fault injection.  Requires all privileges.
 899  */
 900 /* ARGSUSED */
 901 static int
 902 zfs_secpolicy_inject(zfs_cmd_t *zc, cred_t *cr)
 903 {
 904         return (secpolicy_zinject(cr));
 905 }
 906 
 907 static int
 908 zfs_secpolicy_inherit(zfs_cmd_t *zc, cred_t *cr)
 909 {
 910         zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
 911 
 912         if (prop == ZPROP_INVAL) {
 913                 if (!zfs_prop_user(zc->zc_value))
 914                         return (EINVAL);
 915                 return (zfs_secpolicy_write_perms(zc->zc_name,
 916                     ZFS_DELEG_PERM_USERPROP, cr));
 917         } else {
 918                 return (zfs_secpolicy_setprop(zc->zc_name, prop,
 919                     NULL, cr));
 920         }
 921 }
 922 
 923 static int
 924 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, cred_t *cr)
 925 {
 926         int err = zfs_secpolicy_read(zc, cr);
 927         if (err)
 928                 return (err);
 929 
 930         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 931                 return (EINVAL);
 932 
 933         if (zc->zc_value[0] == 0) {
 934                 /*
 935                  * They are asking about a posix uid/gid.  If it's
 936                  * themself, allow it.
 937                  */
 938                 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
 939                     zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
 940                         if (zc->zc_guid == crgetuid(cr))
 941                                 return (0);
 942                 } else {
 943                         if (groupmember(zc->zc_guid, cr))
 944                                 return (0);
 945                 }
 946         }
 947 
 948         return (zfs_secpolicy_write_perms(zc->zc_name,
 949             userquota_perms[zc->zc_objset_type], cr));
 950 }
 951 
 952 static int
 953 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, cred_t *cr)
 954 {
 955         int err = zfs_secpolicy_read(zc, cr);
 956         if (err)
 957                 return (err);
 958 
 959         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
 960                 return (EINVAL);
 961 
 962         return (zfs_secpolicy_write_perms(zc->zc_name,
 963             userquota_perms[zc->zc_objset_type], cr));
 964 }
 965 
 966 static int
 967 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, cred_t *cr)
 968 {
 969         return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
 970             NULL, cr));
 971 }
 972 
 973 static int
 974 zfs_secpolicy_hold(zfs_cmd_t *zc, cred_t *cr)
 975 {
 976         return (zfs_secpolicy_write_perms(zc->zc_name,
 977             ZFS_DELEG_PERM_HOLD, cr));
 978 }
 979 
 980 static int
 981 zfs_secpolicy_release(zfs_cmd_t *zc, cred_t *cr)
 982 {
 983         return (zfs_secpolicy_write_perms(zc->zc_name,
 984             ZFS_DELEG_PERM_RELEASE, cr));
 985 }
 986 
 987 /*
 988  * Policy for allowing temporary snapshots to be taken or released
 989  */
 990 static int
 991 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, cred_t *cr)
 992 {
 993         /*
 994          * A temporary snapshot is the same as a snapshot,
 995          * hold, destroy and release all rolled into one.
 996          * Delegated diff alone is sufficient that we allow this.
 997          */
 998         int error;
 999 
1000         if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1001             ZFS_DELEG_PERM_DIFF, cr)) == 0)
1002                 return (0);
1003 
1004         error = zfs_secpolicy_snapshot(zc, cr);
1005         if (!error)
1006                 error = zfs_secpolicy_hold(zc, cr);
1007         if (!error)
1008                 error = zfs_secpolicy_release(zc, cr);
1009         if (!error)
1010                 error = zfs_secpolicy_destroy(zc, cr);
1011         return (error);
1012 }
1013 
1014 /*
1015  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1016  */
1017 static int
1018 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1019 {
1020         char *packed;
1021         int error;
1022         nvlist_t *list = NULL;
1023 
1024         /*
1025          * Read in and unpack the user-supplied nvlist.
1026          */
1027         if (size == 0)
1028                 return (EINVAL);
1029 
1030         packed = kmem_alloc(size, KM_SLEEP);
1031 
1032         if ((error = ddi_copyin((void *)(uintptr_t)nvl, packed, size,
1033             iflag)) != 0) {
1034                 kmem_free(packed, size);
1035                 return (error);
1036         }
1037 
1038         if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1039                 kmem_free(packed, size);
1040                 return (error);
1041         }
1042 
1043         kmem_free(packed, size);
1044 
1045         *nvp = list;
1046         return (0);
1047 }
1048 
1049 static int
1050 fit_error_list(zfs_cmd_t *zc, nvlist_t **errors)
1051 {
1052         size_t size;
1053 
1054         VERIFY(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1055 
1056         if (size > zc->zc_nvlist_dst_size) {
1057                 nvpair_t *more_errors;
1058                 int n = 0;
1059 
1060                 if (zc->zc_nvlist_dst_size < 1024)
1061                         return (ENOMEM);
1062 
1063                 VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, 0) == 0);
1064                 more_errors = nvlist_prev_nvpair(*errors, NULL);
1065 
1066                 do {
1067                         nvpair_t *pair = nvlist_prev_nvpair(*errors,
1068                             more_errors);
1069                         VERIFY(nvlist_remove_nvpair(*errors, pair) == 0);
1070                         n++;
1071                         VERIFY(nvlist_size(*errors, &size,
1072                             NV_ENCODE_NATIVE) == 0);
1073                 } while (size > zc->zc_nvlist_dst_size);
1074 
1075                 VERIFY(nvlist_remove_nvpair(*errors, more_errors) == 0);
1076                 VERIFY(nvlist_add_int32(*errors, ZPROP_N_MORE_ERRORS, n) == 0);
1077                 ASSERT(nvlist_size(*errors, &size, NV_ENCODE_NATIVE) == 0);
1078                 ASSERT(size <= zc->zc_nvlist_dst_size);
1079         }
1080 
1081         return (0);
1082 }
1083 
1084 static int
1085 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1086 {
1087         char *packed = NULL;
1088         int error = 0;
1089         size_t size;
1090 
1091         VERIFY(nvlist_size(nvl, &size, NV_ENCODE_NATIVE) == 0);
1092 
1093         if (size > zc->zc_nvlist_dst_size) {
1094                 error = ENOMEM;
1095         } else {
1096                 packed = kmem_alloc(size, KM_SLEEP);
1097                 VERIFY(nvlist_pack(nvl, &packed, &size, NV_ENCODE_NATIVE,
1098                     KM_SLEEP) == 0);
1099                 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1100                     size, zc->zc_iflags) != 0)
1101                         error = EFAULT;
1102                 kmem_free(packed, size);
1103         }
1104 
1105         zc->zc_nvlist_dst_size = size;
1106         return (error);
1107 }
1108 
1109 static int
1110 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1111 {
1112         objset_t *os;
1113         int error;
1114 
1115         error = dmu_objset_hold(dsname, FTAG, &os);
1116         if (error)
1117                 return (error);
1118         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1119                 dmu_objset_rele(os, FTAG);
1120                 return (EINVAL);
1121         }
1122 
1123         mutex_enter(&os->os_user_ptr_lock);
1124         *zfvp = dmu_objset_get_user(os);
1125         if (*zfvp) {
1126                 VFS_HOLD((*zfvp)->z_vfs);
1127         } else {
1128                 error = ESRCH;
1129         }
1130         mutex_exit(&os->os_user_ptr_lock);
1131         dmu_objset_rele(os, FTAG);
1132         return (error);
1133 }
1134 
1135 /*
1136  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1137  * case its z_vfs will be NULL, and it will be opened as the owner.
1138  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1139  * which prevents all vnode ops from running.
1140  */
1141 static int
1142 zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
1143 {
1144         int error = 0;
1145 
1146         if (getzfsvfs(name, zfvp) != 0)
1147                 error = zfsvfs_create(name, zfvp);
1148         if (error == 0) {
1149                 rrw_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
1150                     RW_READER, tag);
1151                 if ((*zfvp)->z_unmounted) {
1152                         /*
1153                          * XXX we could probably try again, since the unmounting
1154                          * thread should be just about to disassociate the
1155                          * objset from the zfsvfs.
1156                          */
1157                         rrw_exit(&(*zfvp)->z_teardown_lock, tag);
1158                         return (EBUSY);
1159                 }
1160         }
1161         return (error);
1162 }
1163 
1164 static void
1165 zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
1166 {
1167         rrw_exit(&zfsvfs->z_teardown_lock, tag);
1168 
1169         if (zfsvfs->z_vfs) {
1170                 VFS_RELE(zfsvfs->z_vfs);
1171         } else {
1172                 dmu_objset_disown(zfsvfs->z_os, zfsvfs);
1173                 zfsvfs_free(zfsvfs);
1174         }
1175 }
1176 
1177 static int
1178 zfs_ioc_pool_create(zfs_cmd_t *zc)
1179 {
1180         int error;
1181         nvlist_t *config, *props = NULL;
1182         nvlist_t *rootprops = NULL;
1183         nvlist_t *zplprops = NULL;
1184         char *buf;
1185 
1186         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1187             zc->zc_iflags, &config))
1188                 return (error);
1189 
1190         if (zc->zc_nvlist_src_size != 0 && (error =
1191             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1192             zc->zc_iflags, &props))) {
1193                 nvlist_free(config);
1194                 return (error);
1195         }
1196 
1197         if (props) {
1198                 nvlist_t *nvl = NULL;
1199                 uint64_t version = SPA_VERSION;
1200 
1201                 (void) nvlist_lookup_uint64(props,
1202                     zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1203                 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1204                         error = EINVAL;
1205                         goto pool_props_bad;
1206                 }
1207                 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1208                 if (nvl) {
1209                         error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1210                         if (error != 0) {
1211                                 nvlist_free(config);
1212                                 nvlist_free(props);
1213                                 return (error);
1214                         }
1215                         (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1216                 }
1217                 VERIFY(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1218                 error = zfs_fill_zplprops_root(version, rootprops,
1219                     zplprops, NULL);
1220                 if (error)
1221                         goto pool_props_bad;
1222         }
1223 
1224         buf = history_str_get(zc);
1225 
1226         error = spa_create(zc->zc_name, config, props, buf, zplprops);
1227 
1228         /*
1229          * Set the remaining root properties
1230          */
1231         if (!error && (error = zfs_set_prop_nvlist(zc->zc_name,
1232             ZPROP_SRC_LOCAL, rootprops, NULL)) != 0)
1233                 (void) spa_destroy(zc->zc_name);
1234 
1235         if (buf != NULL)
1236                 history_str_free(buf);
1237 
1238 pool_props_bad:
1239         nvlist_free(rootprops);
1240         nvlist_free(zplprops);
1241         nvlist_free(config);
1242         nvlist_free(props);
1243 
1244         return (error);
1245 }
1246 
1247 static int
1248 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1249 {
1250         int error;
1251         zfs_log_history(zc);
1252         error = spa_destroy(zc->zc_name);
1253         if (error == 0)
1254                 zvol_remove_minors(zc->zc_name);
1255         return (error);
1256 }
1257 
1258 static int
1259 zfs_ioc_pool_import(zfs_cmd_t *zc)
1260 {
1261         nvlist_t *config, *props = NULL;
1262         uint64_t guid;
1263         int error;
1264 
1265         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1266             zc->zc_iflags, &config)) != 0)
1267                 return (error);
1268 
1269         if (zc->zc_nvlist_src_size != 0 && (error =
1270             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1271             zc->zc_iflags, &props))) {
1272                 nvlist_free(config);
1273                 return (error);
1274         }
1275 
1276         if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1277             guid != zc->zc_guid)
1278                 error = EINVAL;
1279         else
1280                 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1281 
1282         if (zc->zc_nvlist_dst != 0) {
1283                 int err;
1284 
1285                 if ((err = put_nvlist(zc, config)) != 0)
1286                         error = err;
1287         }
1288 
1289         nvlist_free(config);
1290 
1291         if (props)
1292                 nvlist_free(props);
1293 
1294         return (error);
1295 }
1296 
1297 static int
1298 zfs_ioc_pool_export(zfs_cmd_t *zc)
1299 {
1300         int error;
1301         boolean_t force = (boolean_t)zc->zc_cookie;
1302         boolean_t hardforce = (boolean_t)zc->zc_guid;
1303 
1304         zfs_log_history(zc);
1305         error = spa_export(zc->zc_name, NULL, force, hardforce);
1306         if (error == 0)
1307                 zvol_remove_minors(zc->zc_name);
1308         return (error);
1309 }
1310 
1311 static int
1312 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1313 {
1314         nvlist_t *configs;
1315         int error;
1316 
1317         if ((configs = spa_all_configs(&zc->zc_cookie)) == NULL)
1318                 return (EEXIST);
1319 
1320         error = put_nvlist(zc, configs);
1321 
1322         nvlist_free(configs);
1323 
1324         return (error);
1325 }
1326 
1327 /*
1328  * inputs:
1329  * zc_name              name of the pool
1330  *
1331  * outputs:
1332  * zc_cookie            real errno
1333  * zc_nvlist_dst        config nvlist
1334  * zc_nvlist_dst_size   size of config nvlist
1335  */
1336 static int
1337 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1338 {
1339         nvlist_t *config;
1340         int error;
1341         int ret = 0;
1342 
1343         error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1344             sizeof (zc->zc_value));
1345 
1346         if (config != NULL) {
1347                 ret = put_nvlist(zc, config);
1348                 nvlist_free(config);
1349 
1350                 /*
1351                  * The config may be present even if 'error' is non-zero.
1352                  * In this case we return success, and preserve the real errno
1353                  * in 'zc_cookie'.
1354                  */
1355                 zc->zc_cookie = error;
1356         } else {
1357                 ret = error;
1358         }
1359 
1360         return (ret);
1361 }
1362 
1363 /*
1364  * Try to import the given pool, returning pool stats as appropriate so that
1365  * user land knows which devices are available and overall pool health.
1366  */
1367 static int
1368 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1369 {
1370         nvlist_t *tryconfig, *config;
1371         int error;
1372 
1373         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1374             zc->zc_iflags, &tryconfig)) != 0)
1375                 return (error);
1376 
1377         config = spa_tryimport(tryconfig);
1378 
1379         nvlist_free(tryconfig);
1380 
1381         if (config == NULL)
1382                 return (EINVAL);
1383 
1384         error = put_nvlist(zc, config);
1385         nvlist_free(config);
1386 
1387         return (error);
1388 }
1389 
1390 /*
1391  * inputs:
1392  * zc_name              name of the pool
1393  * zc_cookie            scan func (pool_scan_func_t)
1394  */
1395 static int
1396 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1397 {
1398         spa_t *spa;
1399         int error;
1400 
1401         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1402                 return (error);
1403 
1404         if (zc->zc_cookie == POOL_SCAN_NONE)
1405                 error = spa_scan_stop(spa);
1406         else
1407                 error = spa_scan(spa, zc->zc_cookie);
1408 
1409         spa_close(spa, FTAG);
1410 
1411         return (error);
1412 }
1413 
1414 static int
1415 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1416 {
1417         spa_t *spa;
1418         int error;
1419 
1420         error = spa_open(zc->zc_name, &spa, FTAG);
1421         if (error == 0) {
1422                 spa_freeze(spa);
1423                 spa_close(spa, FTAG);
1424         }
1425         return (error);
1426 }
1427 
1428 static int
1429 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
1430 {
1431         spa_t *spa;
1432         int error;
1433 
1434         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1435                 return (error);
1436 
1437         if (zc->zc_cookie < spa_version(spa) ||
1438             !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
1439                 spa_close(spa, FTAG);
1440                 return (EINVAL);
1441         }
1442 
1443         spa_upgrade(spa, zc->zc_cookie);
1444         spa_close(spa, FTAG);
1445 
1446         return (error);
1447 }
1448 
1449 static int
1450 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
1451 {
1452         spa_t *spa;
1453         char *hist_buf;
1454         uint64_t size;
1455         int error;
1456 
1457         if ((size = zc->zc_history_len) == 0)
1458                 return (EINVAL);
1459 
1460         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1461                 return (error);
1462 
1463         if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
1464                 spa_close(spa, FTAG);
1465                 return (ENOTSUP);
1466         }
1467 
1468         hist_buf = kmem_alloc(size, KM_SLEEP);
1469         if ((error = spa_history_get(spa, &zc->zc_history_offset,
1470             &zc->zc_history_len, hist_buf)) == 0) {
1471                 error = ddi_copyout(hist_buf,
1472                     (void *)(uintptr_t)zc->zc_history,
1473                     zc->zc_history_len, zc->zc_iflags);
1474         }
1475 
1476         spa_close(spa, FTAG);
1477         kmem_free(hist_buf, size);
1478         return (error);
1479 }
1480 
1481 static int
1482 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
1483 {
1484         spa_t *spa;
1485         int error;
1486 
1487         error = spa_open(zc->zc_name, &spa, FTAG);
1488         if (error == 0) {
1489                 error = spa_change_guid(spa);
1490                 spa_close(spa, FTAG);
1491         }
1492         return (error);
1493 }
1494 
1495 static int
1496 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
1497 {
1498         int error;
1499 
1500         if (error = dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value))
1501                 return (error);
1502 
1503         return (0);
1504 }
1505 
1506 /*
1507  * inputs:
1508  * zc_name              name of filesystem
1509  * zc_obj               object to find
1510  *
1511  * outputs:
1512  * zc_value             name of object
1513  */
1514 static int
1515 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
1516 {
1517         objset_t *os;
1518         int error;
1519 
1520         /* XXX reading from objset not owned */
1521         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1522                 return (error);
1523         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1524                 dmu_objset_rele(os, FTAG);
1525                 return (EINVAL);
1526         }
1527         error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
1528             sizeof (zc->zc_value));
1529         dmu_objset_rele(os, FTAG);
1530 
1531         return (error);
1532 }
1533 
1534 /*
1535  * inputs:
1536  * zc_name              name of filesystem
1537  * zc_obj               object to find
1538  *
1539  * outputs:
1540  * zc_stat              stats on object
1541  * zc_value             path to object
1542  */
1543 static int
1544 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
1545 {
1546         objset_t *os;
1547         int error;
1548 
1549         /* XXX reading from objset not owned */
1550         if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os)) != 0)
1551                 return (error);
1552         if (dmu_objset_type(os) != DMU_OST_ZFS) {
1553                 dmu_objset_rele(os, FTAG);
1554                 return (EINVAL);
1555         }
1556         error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
1557             sizeof (zc->zc_value));
1558         dmu_objset_rele(os, FTAG);
1559 
1560         return (error);
1561 }
1562 
1563 static int
1564 zfs_ioc_vdev_add(zfs_cmd_t *zc)
1565 {
1566         spa_t *spa;
1567         int error;
1568         nvlist_t *config, **l2cache, **spares;
1569         uint_t nl2cache = 0, nspares = 0;
1570 
1571         error = spa_open(zc->zc_name, &spa, FTAG);
1572         if (error != 0)
1573                 return (error);
1574 
1575         error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1576             zc->zc_iflags, &config);
1577         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_L2CACHE,
1578             &l2cache, &nl2cache);
1579 
1580         (void) nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_SPARES,
1581             &spares, &nspares);
1582 
1583         /*
1584          * A root pool with concatenated devices is not supported.
1585          * Thus, can not add a device to a root pool.
1586          *
1587          * Intent log device can not be added to a rootpool because
1588          * during mountroot, zil is replayed, a seperated log device
1589          * can not be accessed during the mountroot time.
1590          *
1591          * l2cache and spare devices are ok to be added to a rootpool.
1592          */
1593         if (spa_bootfs(spa) != 0 && nl2cache == 0 && nspares == 0) {
1594                 nvlist_free(config);
1595                 spa_close(spa, FTAG);
1596                 return (EDOM);
1597         }
1598 
1599         if (error == 0) {
1600                 error = spa_vdev_add(spa, config);
1601                 nvlist_free(config);
1602         }
1603         spa_close(spa, FTAG);
1604         return (error);
1605 }
1606 
1607 /*
1608  * inputs:
1609  * zc_name              name of the pool
1610  * zc_nvlist_conf       nvlist of devices to remove
1611  * zc_cookie            to stop the remove?
1612  */
1613 static int
1614 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
1615 {
1616         spa_t *spa;
1617         int error;
1618 
1619         error = spa_open(zc->zc_name, &spa, FTAG);
1620         if (error != 0)
1621                 return (error);
1622         error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
1623         spa_close(spa, FTAG);
1624         return (error);
1625 }
1626 
1627 static int
1628 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
1629 {
1630         spa_t *spa;
1631         int error;
1632         vdev_state_t newstate = VDEV_STATE_UNKNOWN;
1633 
1634         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1635                 return (error);
1636         switch (zc->zc_cookie) {
1637         case VDEV_STATE_ONLINE:
1638                 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
1639                 break;
1640 
1641         case VDEV_STATE_OFFLINE:
1642                 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
1643                 break;
1644 
1645         case VDEV_STATE_FAULTED:
1646                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1647                     zc->zc_obj != VDEV_AUX_EXTERNAL)
1648                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1649 
1650                 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
1651                 break;
1652 
1653         case VDEV_STATE_DEGRADED:
1654                 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
1655                     zc->zc_obj != VDEV_AUX_EXTERNAL)
1656                         zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
1657 
1658                 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
1659                 break;
1660 
1661         default:
1662                 error = EINVAL;
1663         }
1664         zc->zc_cookie = newstate;
1665         spa_close(spa, FTAG);
1666         return (error);
1667 }
1668 
1669 static int
1670 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
1671 {
1672         spa_t *spa;
1673         int replacing = zc->zc_cookie;
1674         nvlist_t *config;
1675         int error;
1676 
1677         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1678                 return (error);
1679 
1680         if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1681             zc->zc_iflags, &config)) == 0) {
1682                 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing);
1683                 nvlist_free(config);
1684         }
1685 
1686         spa_close(spa, FTAG);
1687         return (error);
1688 }
1689 
1690 static int
1691 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
1692 {
1693         spa_t *spa;
1694         int error;
1695 
1696         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1697                 return (error);
1698 
1699         error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
1700 
1701         spa_close(spa, FTAG);
1702         return (error);
1703 }
1704 
1705 static int
1706 zfs_ioc_vdev_split(zfs_cmd_t *zc)
1707 {
1708         spa_t *spa;
1709         nvlist_t *config, *props = NULL;
1710         int error;
1711         boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
1712 
1713         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1714                 return (error);
1715 
1716         if (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1717             zc->zc_iflags, &config)) {
1718                 spa_close(spa, FTAG);
1719                 return (error);
1720         }
1721 
1722         if (zc->zc_nvlist_src_size != 0 && (error =
1723             get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1724             zc->zc_iflags, &props))) {
1725                 spa_close(spa, FTAG);
1726                 nvlist_free(config);
1727                 return (error);
1728         }
1729 
1730         error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
1731 
1732         spa_close(spa, FTAG);
1733 
1734         nvlist_free(config);
1735         nvlist_free(props);
1736 
1737         return (error);
1738 }
1739 
1740 static int
1741 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
1742 {
1743         spa_t *spa;
1744         char *path = zc->zc_value;
1745         uint64_t guid = zc->zc_guid;
1746         int error;
1747 
1748         error = spa_open(zc->zc_name, &spa, FTAG);
1749         if (error != 0)
1750                 return (error);
1751 
1752         error = spa_vdev_setpath(spa, guid, path);
1753         spa_close(spa, FTAG);
1754         return (error);
1755 }
1756 
1757 static int
1758 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
1759 {
1760         spa_t *spa;
1761         char *fru = zc->zc_value;
1762         uint64_t guid = zc->zc_guid;
1763         int error;
1764 
1765         error = spa_open(zc->zc_name, &spa, FTAG);
1766         if (error != 0)
1767                 return (error);
1768 
1769         error = spa_vdev_setfru(spa, guid, fru);
1770         spa_close(spa, FTAG);
1771         return (error);
1772 }
1773 
1774 static int
1775 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
1776 {
1777         int error = 0;
1778         nvlist_t *nv;
1779 
1780         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1781 
1782         if (zc->zc_nvlist_dst != 0 &&
1783             (error = dsl_prop_get_all(os, &nv)) == 0) {
1784                 dmu_objset_stats(os, nv);
1785                 /*
1786                  * NB: zvol_get_stats() will read the objset contents,
1787                  * which we aren't supposed to do with a
1788                  * DS_MODE_USER hold, because it could be
1789                  * inconsistent.  So this is a bit of a workaround...
1790                  * XXX reading with out owning
1791                  */
1792                 if (!zc->zc_objset_stats.dds_inconsistent &&
1793                     dmu_objset_type(os) == DMU_OST_ZVOL) {
1794                         error = zvol_get_stats(os, nv);
1795                         if (error == EIO)
1796                                 return (error);
1797                         VERIFY3S(error, ==, 0);
1798                 }
1799                 error = put_nvlist(zc, nv);
1800                 nvlist_free(nv);
1801         }
1802 
1803         return (error);
1804 }
1805 
1806 /*
1807  * inputs:
1808  * zc_name              name of filesystem
1809  * zc_nvlist_dst_size   size of buffer for property nvlist
1810  *
1811  * outputs:
1812  * zc_objset_stats      stats
1813  * zc_nvlist_dst        property nvlist
1814  * zc_nvlist_dst_size   size of property nvlist
1815  */
1816 static int
1817 zfs_ioc_objset_stats(zfs_cmd_t *zc)
1818 {
1819         objset_t *os = NULL;
1820         int error;
1821 
1822         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1823                 return (error);
1824 
1825         error = zfs_ioc_objset_stats_impl(zc, os);
1826 
1827         dmu_objset_rele(os, FTAG);
1828 
1829         return (error);
1830 }
1831 
1832 /*
1833  * inputs:
1834  * zc_name              name of filesystem
1835  * zc_nvlist_dst_size   size of buffer for property nvlist
1836  *
1837  * outputs:
1838  * zc_nvlist_dst        received property nvlist
1839  * zc_nvlist_dst_size   size of received property nvlist
1840  *
1841  * Gets received properties (distinct from local properties on or after
1842  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
1843  * local property values.
1844  */
1845 static int
1846 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
1847 {
1848         objset_t *os = NULL;
1849         int error;
1850         nvlist_t *nv;
1851 
1852         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os))
1853                 return (error);
1854 
1855         /*
1856          * Without this check, we would return local property values if the
1857          * caller has not already received properties on or after
1858          * SPA_VERSION_RECVD_PROPS.
1859          */
1860         if (!dsl_prop_get_hasrecvd(os)) {
1861                 dmu_objset_rele(os, FTAG);
1862                 return (ENOTSUP);
1863         }
1864 
1865         if (zc->zc_nvlist_dst != 0 &&
1866             (error = dsl_prop_get_received(os, &nv)) == 0) {
1867                 error = put_nvlist(zc, nv);
1868                 nvlist_free(nv);
1869         }
1870 
1871         dmu_objset_rele(os, FTAG);
1872         return (error);
1873 }
1874 
1875 static int
1876 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
1877 {
1878         uint64_t value;
1879         int error;
1880 
1881         /*
1882          * zfs_get_zplprop() will either find a value or give us
1883          * the default value (if there is one).
1884          */
1885         if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
1886                 return (error);
1887         VERIFY(nvlist_add_uint64(props, zfs_prop_to_name(prop), value) == 0);
1888         return (0);
1889 }
1890 
1891 /*
1892  * inputs:
1893  * zc_name              name of filesystem
1894  * zc_nvlist_dst_size   size of buffer for zpl property nvlist
1895  *
1896  * outputs:
1897  * zc_nvlist_dst        zpl property nvlist
1898  * zc_nvlist_dst_size   size of zpl property nvlist
1899  */
1900 static int
1901 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
1902 {
1903         objset_t *os;
1904         int err;
1905 
1906         /* XXX reading without owning */
1907         if (err = dmu_objset_hold(zc->zc_name, FTAG, &os))
1908                 return (err);
1909 
1910         dmu_objset_fast_stat(os, &zc->zc_objset_stats);
1911 
1912         /*
1913          * NB: nvl_add_zplprop() will read the objset contents,
1914          * which we aren't supposed to do with a DS_MODE_USER
1915          * hold, because it could be inconsistent.
1916          */
1917         if (zc->zc_nvlist_dst != NULL &&
1918             !zc->zc_objset_stats.dds_inconsistent &&
1919             dmu_objset_type(os) == DMU_OST_ZFS) {
1920                 nvlist_t *nv;
1921 
1922                 VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1923                 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
1924                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
1925                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
1926                     (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0)
1927                         err = put_nvlist(zc, nv);
1928                 nvlist_free(nv);
1929         } else {
1930                 err = ENOENT;
1931         }
1932         dmu_objset_rele(os, FTAG);
1933         return (err);
1934 }
1935 
1936 static boolean_t
1937 dataset_name_hidden(const char *name)
1938 {
1939         /*
1940          * Skip over datasets that are not visible in this zone,
1941          * internal datasets (which have a $ in their name), and
1942          * temporary datasets (which have a % in their name).
1943          */
1944         if (strchr(name, '$') != NULL)
1945                 return (B_TRUE);
1946         if (strchr(name, '%') != NULL)
1947                 return (B_TRUE);
1948         if (!INGLOBALZONE(curproc) && !zone_dataset_visible(name, NULL))
1949                 return (B_TRUE);
1950         return (B_FALSE);
1951 }
1952 
1953 /*
1954  * inputs:
1955  * zc_name              name of filesystem
1956  * zc_cookie            zap cursor
1957  * zc_nvlist_dst_size   size of buffer for property nvlist
1958  *
1959  * outputs:
1960  * zc_name              name of next filesystem
1961  * zc_cookie            zap cursor
1962  * zc_objset_stats      stats
1963  * zc_nvlist_dst        property nvlist
1964  * zc_nvlist_dst_size   size of property nvlist
1965  */
1966 static int
1967 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
1968 {
1969         objset_t *os;
1970         int error;
1971         char *p;
1972         size_t orig_len = strlen(zc->zc_name);
1973 
1974 top:
1975         if (error = dmu_objset_hold(zc->zc_name, FTAG, &os)) {
1976                 if (error == ENOENT)
1977                         error = ESRCH;
1978                 return (error);
1979         }
1980 
1981         p = strrchr(zc->zc_name, '/');
1982         if (p == NULL || p[1] != '\0')
1983                 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
1984         p = zc->zc_name + strlen(zc->zc_name);
1985 
1986         /*
1987          * Pre-fetch the datasets.  dmu_objset_prefetch() always returns 0
1988          * but is not declared void because its called by dmu_objset_find().
1989          */
1990         if (zc->zc_cookie == 0) {
1991                 uint64_t cookie = 0;
1992                 int len = sizeof (zc->zc_name) - (p - zc->zc_name);
1993 
1994                 while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
1995                         if (!dataset_name_hidden(zc->zc_name))
1996                                 (void) dmu_objset_prefetch(zc->zc_name, NULL);
1997                 }
1998         }
1999 
2000         do {
2001                 error = dmu_dir_list_next(os,
2002                     sizeof (zc->zc_name) - (p - zc->zc_name), p,
2003                     NULL, &zc->zc_cookie);
2004                 if (error == ENOENT)
2005                         error = ESRCH;
2006         } while (error == 0 && dataset_name_hidden(zc->zc_name));
2007         dmu_objset_rele(os, FTAG);
2008 
2009         /*
2010          * If it's an internal dataset (ie. with a '$' in its name),
2011          * don't try to get stats for it, otherwise we'll return ENOENT.
2012          */
2013         if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2014                 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2015                 if (error == ENOENT) {
2016                         /* We lost a race with destroy, get the next one. */
2017                         zc->zc_name[orig_len] = '\0';
2018                         goto top;
2019                 }
2020         }
2021         return (error);
2022 }
2023 
2024 /*
2025  * inputs:
2026  * zc_name              name of filesystem
2027  * zc_cookie            zap cursor
2028  * zc_nvlist_dst_size   size of buffer for property nvlist
2029  *
2030  * outputs:
2031  * zc_name              name of next snapshot
2032  * zc_objset_stats      stats
2033  * zc_nvlist_dst        property nvlist
2034  * zc_nvlist_dst_size   size of property nvlist
2035  */
2036 static int
2037 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2038 {
2039         objset_t *os;
2040         int error;
2041 
2042 top:
2043         if (zc->zc_cookie == 0)
2044                 (void) dmu_objset_find(zc->zc_name, dmu_objset_prefetch,
2045                     NULL, DS_FIND_SNAPSHOTS);
2046 
2047         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2048         if (error)
2049                 return (error == ENOENT ? ESRCH : error);
2050 
2051         /*
2052          * A dataset name of maximum length cannot have any snapshots,
2053          * so exit immediately.
2054          */
2055         if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
2056                 dmu_objset_rele(os, FTAG);
2057                 return (ESRCH);
2058         }
2059 
2060         error = dmu_snapshot_list_next(os,
2061             sizeof (zc->zc_name) - strlen(zc->zc_name),
2062             zc->zc_name + strlen(zc->zc_name), &zc->zc_obj, &zc->zc_cookie,
2063             NULL);
2064 
2065         if (error == 0) {
2066                 dsl_dataset_t *ds;
2067                 dsl_pool_t *dp = os->os_dsl_dataset->ds_dir->dd_pool;
2068 
2069                 /*
2070                  * Since we probably don't have a hold on this snapshot,
2071                  * it's possible that the objsetid could have been destroyed
2072                  * and reused for a new objset. It's OK if this happens during
2073                  * a zfs send operation, since the new createtxg will be
2074                  * beyond the range we're interested in.
2075                  */
2076                 rw_enter(&dp->dp_config_rwlock, RW_READER);
2077                 error = dsl_dataset_hold_obj(dp, zc->zc_obj, FTAG, &ds);
2078                 rw_exit(&dp->dp_config_rwlock);
2079                 if (error) {
2080                         if (error == ENOENT) {
2081                                 /* Racing with destroy, get the next one. */
2082                                 *strchr(zc->zc_name, '@') = '\0';
2083                                 dmu_objset_rele(os, FTAG);
2084                                 goto top;
2085                         }
2086                 } else {
2087                         objset_t *ossnap;
2088 
2089                         error = dmu_objset_from_ds(ds, &ossnap);
2090                         if (error == 0)
2091                                 error = zfs_ioc_objset_stats_impl(zc, ossnap);
2092                         dsl_dataset_rele(ds, FTAG);
2093                 }
2094         } else if (error == ENOENT) {
2095                 error = ESRCH;
2096         }
2097 
2098         dmu_objset_rele(os, FTAG);
2099         /* if we failed, undo the @ that we tacked on to zc_name */
2100         if (error)
2101                 *strchr(zc->zc_name, '@') = '\0';
2102         return (error);
2103 }
2104 
2105 static int
2106 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2107 {
2108         const char *propname = nvpair_name(pair);
2109         uint64_t *valary;
2110         unsigned int vallen;
2111         const char *domain;
2112         char *dash;
2113         zfs_userquota_prop_t type;
2114         uint64_t rid;
2115         uint64_t quota;
2116         zfsvfs_t *zfsvfs;
2117         int err;
2118 
2119         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2120                 nvlist_t *attrs;
2121                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2122                 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2123                     &pair) != 0)
2124                         return (EINVAL);
2125         }
2126 
2127         /*
2128          * A correctly constructed propname is encoded as
2129          * userquota@<rid>-<domain>.
2130          */
2131         if ((dash = strchr(propname, '-')) == NULL ||
2132             nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2133             vallen != 3)
2134                 return (EINVAL);
2135 
2136         domain = dash + 1;
2137         type = valary[0];
2138         rid = valary[1];
2139         quota = valary[2];
2140 
2141         err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2142         if (err == 0) {
2143                 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2144                 zfsvfs_rele(zfsvfs, FTAG);
2145         }
2146 
2147         return (err);
2148 }
2149 
2150 /*
2151  * If the named property is one that has a special function to set its value,
2152  * return 0 on success and a positive error code on failure; otherwise if it is
2153  * not one of the special properties handled by this function, return -1.
2154  *
2155  * XXX: It would be better for callers of the property interface if we handled
2156  * these special cases in dsl_prop.c (in the dsl layer).
2157  */
2158 static int
2159 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2160     nvpair_t *pair)
2161 {
2162         const char *propname = nvpair_name(pair);
2163         zfs_prop_t prop = zfs_name_to_prop(propname);
2164         uint64_t intval;
2165         int err;
2166 
2167         if (prop == ZPROP_INVAL) {
2168                 if (zfs_prop_userquota(propname))
2169                         return (zfs_prop_set_userquota(dsname, pair));
2170                 return (-1);
2171         }
2172 
2173         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2174                 nvlist_t *attrs;
2175                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2176                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2177                     &pair) == 0);
2178         }
2179 
2180         if (zfs_prop_get_type(prop) == PROP_TYPE_STRING)
2181                 return (-1);
2182 
2183         VERIFY(0 == nvpair_value_uint64(pair, &intval));
2184 
2185         switch (prop) {
2186         case ZFS_PROP_QUOTA:
2187                 err = dsl_dir_set_quota(dsname, source, intval);
2188                 break;
2189         case ZFS_PROP_REFQUOTA:
2190                 err = dsl_dataset_set_quota(dsname, source, intval);
2191                 break;
2192         case ZFS_PROP_RESERVATION:
2193                 err = dsl_dir_set_reservation(dsname, source, intval);
2194                 break;
2195         case ZFS_PROP_REFRESERVATION:
2196                 err = dsl_dataset_set_reservation(dsname, source, intval);
2197                 break;
2198         case ZFS_PROP_VOLSIZE:
2199                 err = zvol_set_volsize(dsname, ddi_driver_major(zfs_dip),
2200                     intval);
2201                 break;
2202         case ZFS_PROP_VERSION:
2203         {
2204                 zfsvfs_t *zfsvfs;
2205 
2206                 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2207                         break;
2208 
2209                 err = zfs_set_version(zfsvfs, intval);
2210                 zfsvfs_rele(zfsvfs, FTAG);
2211 
2212                 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2213                         zfs_cmd_t *zc;
2214 
2215                         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2216                         (void) strcpy(zc->zc_name, dsname);
2217                         (void) zfs_ioc_userspace_upgrade(zc);
2218                         kmem_free(zc, sizeof (zfs_cmd_t));
2219                 }
2220                 break;
2221         }
2222 
2223         default:
2224                 err = -1;
2225         }
2226 
2227         return (err);
2228 }
2229 
2230 /*
2231  * This function is best effort. If it fails to set any of the given properties,
2232  * it continues to set as many as it can and returns the first error
2233  * encountered. If the caller provides a non-NULL errlist, it also gives the
2234  * complete list of names of all the properties it failed to set along with the
2235  * corresponding error numbers. The caller is responsible for freeing the
2236  * returned errlist.
2237  *
2238  * If every property is set successfully, zero is returned and the list pointed
2239  * at by errlist is NULL.
2240  */
2241 int
2242 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2243     nvlist_t **errlist)
2244 {
2245         nvpair_t *pair;
2246         nvpair_t *propval;
2247         int rv = 0;
2248         uint64_t intval;
2249         char *strval;
2250         nvlist_t *genericnvl;
2251         nvlist_t *errors;
2252         nvlist_t *retrynvl;
2253 
2254         VERIFY(nvlist_alloc(&genericnvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2255         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2256         VERIFY(nvlist_alloc(&retrynvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2257 
2258 retry:
2259         pair = NULL;
2260         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2261                 const char *propname = nvpair_name(pair);
2262                 zfs_prop_t prop = zfs_name_to_prop(propname);
2263                 int err = 0;
2264 
2265                 /* decode the property value */
2266                 propval = pair;
2267                 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2268                         nvlist_t *attrs;
2269                         VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2270                         if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2271                             &propval) != 0)
2272                                 err = EINVAL;
2273                 }
2274 
2275                 /* Validate value type */
2276                 if (err == 0 && prop == ZPROP_INVAL) {
2277                         if (zfs_prop_user(propname)) {
2278                                 if (nvpair_type(propval) != DATA_TYPE_STRING)
2279                                         err = EINVAL;
2280                         } else if (zfs_prop_userquota(propname)) {
2281                                 if (nvpair_type(propval) !=
2282                                     DATA_TYPE_UINT64_ARRAY)
2283                                         err = EINVAL;
2284                         } else {
2285                                 err = EINVAL;
2286                         }
2287                 } else if (err == 0) {
2288                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2289                                 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
2290                                         err = EINVAL;
2291                         } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
2292                                 const char *unused;
2293 
2294                                 VERIFY(nvpair_value_uint64(propval,
2295                                     &intval) == 0);
2296 
2297                                 switch (zfs_prop_get_type(prop)) {
2298                                 case PROP_TYPE_NUMBER:
2299                                         break;
2300                                 case PROP_TYPE_STRING:
2301                                         err = EINVAL;
2302                                         break;
2303                                 case PROP_TYPE_INDEX:
2304                                         if (zfs_prop_index_to_string(prop,
2305                                             intval, &unused) != 0)
2306                                                 err = EINVAL;
2307                                         break;
2308                                 default:
2309                                         cmn_err(CE_PANIC,
2310                                             "unknown property type");
2311                                 }
2312                         } else {
2313                                 err = EINVAL;
2314                         }
2315                 }
2316 
2317                 /* Validate permissions */
2318                 if (err == 0)
2319                         err = zfs_check_settable(dsname, pair, CRED());
2320 
2321                 if (err == 0) {
2322                         err = zfs_prop_set_special(dsname, source, pair);
2323                         if (err == -1) {
2324                                 /*
2325                                  * For better performance we build up a list of
2326                                  * properties to set in a single transaction.
2327                                  */
2328                                 err = nvlist_add_nvpair(genericnvl, pair);
2329                         } else if (err != 0 && nvl != retrynvl) {
2330                                 /*
2331                                  * This may be a spurious error caused by
2332                                  * receiving quota and reservation out of order.
2333                                  * Try again in a second pass.
2334                                  */
2335                                 err = nvlist_add_nvpair(retrynvl, pair);
2336                         }
2337                 }
2338 
2339                 if (err != 0)
2340                         VERIFY(nvlist_add_int32(errors, propname, err) == 0);
2341         }
2342 
2343         if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
2344                 nvl = retrynvl;
2345                 goto retry;
2346         }
2347 
2348         if (!nvlist_empty(genericnvl) &&
2349             dsl_props_set(dsname, source, genericnvl) != 0) {
2350                 /*
2351                  * If this fails, we still want to set as many properties as we
2352                  * can, so try setting them individually.
2353                  */
2354                 pair = NULL;
2355                 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
2356                         const char *propname = nvpair_name(pair);
2357                         int err = 0;
2358 
2359                         propval = pair;
2360                         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2361                                 nvlist_t *attrs;
2362                                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
2363                                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2364                                     &propval) == 0);
2365                         }
2366 
2367                         if (nvpair_type(propval) == DATA_TYPE_STRING) {
2368                                 VERIFY(nvpair_value_string(propval,
2369                                     &strval) == 0);
2370                                 err = dsl_prop_set(dsname, propname, source, 1,
2371                                     strlen(strval) + 1, strval);
2372                         } else {
2373                                 VERIFY(nvpair_value_uint64(propval,
2374                                     &intval) == 0);
2375                                 err = dsl_prop_set(dsname, propname, source, 8,
2376                                     1, &intval);
2377                         }
2378 
2379                         if (err != 0) {
2380                                 VERIFY(nvlist_add_int32(errors, propname,
2381                                     err) == 0);
2382                         }
2383                 }
2384         }
2385         nvlist_free(genericnvl);
2386         nvlist_free(retrynvl);
2387 
2388         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
2389                 nvlist_free(errors);
2390                 errors = NULL;
2391         } else {
2392                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
2393         }
2394 
2395         if (errlist == NULL)
2396                 nvlist_free(errors);
2397         else
2398                 *errlist = errors;
2399 
2400         return (rv);
2401 }
2402 
2403 /*
2404  * Check that all the properties are valid user properties.
2405  */
2406 static int
2407 zfs_check_userprops(char *fsname, nvlist_t *nvl)
2408 {
2409         nvpair_t *pair = NULL;
2410         int error = 0;
2411 
2412         while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2413                 const char *propname = nvpair_name(pair);
2414                 char *valstr;
2415 
2416                 if (!zfs_prop_user(propname) ||
2417                     nvpair_type(pair) != DATA_TYPE_STRING)
2418                         return (EINVAL);
2419 
2420                 if (error = zfs_secpolicy_write_perms(fsname,
2421                     ZFS_DELEG_PERM_USERPROP, CRED()))
2422                         return (error);
2423 
2424                 if (strlen(propname) >= ZAP_MAXNAMELEN)
2425                         return (ENAMETOOLONG);
2426 
2427                 VERIFY(nvpair_value_string(pair, &valstr) == 0);
2428                 if (strlen(valstr) >= ZAP_MAXVALUELEN)
2429                         return (E2BIG);
2430         }
2431         return (0);
2432 }
2433 
2434 static void
2435 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
2436 {
2437         nvpair_t *pair;
2438 
2439         VERIFY(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2440 
2441         pair = NULL;
2442         while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
2443                 if (nvlist_exists(skipped, nvpair_name(pair)))
2444                         continue;
2445 
2446                 VERIFY(nvlist_add_nvpair(*newprops, pair) == 0);
2447         }
2448 }
2449 
2450 static int
2451 clear_received_props(objset_t *os, const char *fs, nvlist_t *props,
2452     nvlist_t *skipped)
2453 {
2454         int err = 0;
2455         nvlist_t *cleared_props = NULL;
2456         props_skip(props, skipped, &cleared_props);
2457         if (!nvlist_empty(cleared_props)) {
2458                 /*
2459                  * Acts on local properties until the dataset has received
2460                  * properties at least once on or after SPA_VERSION_RECVD_PROPS.
2461                  */
2462                 zprop_source_t flags = (ZPROP_SRC_NONE |
2463                     (dsl_prop_get_hasrecvd(os) ? ZPROP_SRC_RECEIVED : 0));
2464                 err = zfs_set_prop_nvlist(fs, flags, cleared_props, NULL);
2465         }
2466         nvlist_free(cleared_props);
2467         return (err);
2468 }
2469 
2470 /*
2471  * inputs:
2472  * zc_name              name of filesystem
2473  * zc_value             name of property to set
2474  * zc_nvlist_src{_size} nvlist of properties to apply
2475  * zc_cookie            received properties flag
2476  *
2477  * outputs:
2478  * zc_nvlist_dst{_size} error for each unapplied received property
2479  */
2480 static int
2481 zfs_ioc_set_prop(zfs_cmd_t *zc)
2482 {
2483         nvlist_t *nvl;
2484         boolean_t received = zc->zc_cookie;
2485         zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
2486             ZPROP_SRC_LOCAL);
2487         nvlist_t *errors = NULL;
2488         int error;
2489 
2490         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2491             zc->zc_iflags, &nvl)) != 0)
2492                 return (error);
2493 
2494         if (received) {
2495                 nvlist_t *origprops;
2496                 objset_t *os;
2497 
2498                 if (dmu_objset_hold(zc->zc_name, FTAG, &os) == 0) {
2499                         if (dsl_prop_get_received(os, &origprops) == 0) {
2500                                 (void) clear_received_props(os,
2501                                     zc->zc_name, origprops, nvl);
2502                                 nvlist_free(origprops);
2503                         }
2504 
2505                         dsl_prop_set_hasrecvd(os);
2506                         dmu_objset_rele(os, FTAG);
2507                 }
2508         }
2509 
2510         error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, &errors);
2511 
2512         if (zc->zc_nvlist_dst != NULL && errors != NULL) {
2513                 (void) put_nvlist(zc, errors);
2514         }
2515 
2516         nvlist_free(errors);
2517         nvlist_free(nvl);
2518         return (error);
2519 }
2520 
2521 /*
2522  * inputs:
2523  * zc_name              name of filesystem
2524  * zc_value             name of property to inherit
2525  * zc_cookie            revert to received value if TRUE
2526  *
2527  * outputs:             none
2528  */
2529 static int
2530 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
2531 {
2532         const char *propname = zc->zc_value;
2533         zfs_prop_t prop = zfs_name_to_prop(propname);
2534         boolean_t received = zc->zc_cookie;
2535         zprop_source_t source = (received
2536             ? ZPROP_SRC_NONE            /* revert to received value, if any */
2537             : ZPROP_SRC_INHERITED);     /* explicitly inherit */
2538 
2539         if (received) {
2540                 nvlist_t *dummy;
2541                 nvpair_t *pair;
2542                 zprop_type_t type;
2543                 int err;
2544 
2545                 /*
2546                  * zfs_prop_set_special() expects properties in the form of an
2547                  * nvpair with type info.
2548                  */
2549                 if (prop == ZPROP_INVAL) {
2550                         if (!zfs_prop_user(propname))
2551                                 return (EINVAL);
2552 
2553                         type = PROP_TYPE_STRING;
2554                 } else if (prop == ZFS_PROP_VOLSIZE ||
2555                     prop == ZFS_PROP_VERSION) {
2556                         return (EINVAL);
2557                 } else {
2558                         type = zfs_prop_get_type(prop);
2559                 }
2560 
2561                 VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
2562 
2563                 switch (type) {
2564                 case PROP_TYPE_STRING:
2565                         VERIFY(0 == nvlist_add_string(dummy, propname, ""));
2566                         break;
2567                 case PROP_TYPE_NUMBER:
2568                 case PROP_TYPE_INDEX:
2569                         VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
2570                         break;
2571                 default:
2572                         nvlist_free(dummy);
2573                         return (EINVAL);
2574                 }
2575 
2576                 pair = nvlist_next_nvpair(dummy, NULL);
2577                 err = zfs_prop_set_special(zc->zc_name, source, pair);
2578                 nvlist_free(dummy);
2579                 if (err != -1)
2580                         return (err); /* special property already handled */
2581         } else {
2582                 /*
2583                  * Only check this in the non-received case. We want to allow
2584                  * 'inherit -S' to revert non-inheritable properties like quota
2585                  * and reservation to the received or default values even though
2586                  * they are not considered inheritable.
2587                  */
2588                 if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
2589                         return (EINVAL);
2590         }
2591 
2592         /* the property name has been validated by zfs_secpolicy_inherit() */
2593         return (dsl_prop_set(zc->zc_name, zc->zc_value, source, 0, 0, NULL));
2594 }
2595 
2596 static int
2597 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
2598 {
2599         nvlist_t *props;
2600         spa_t *spa;
2601         int error;
2602         nvpair_t *pair;
2603 
2604         if (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2605             zc->zc_iflags, &props))
2606                 return (error);
2607 
2608         /*
2609          * If the only property is the configfile, then just do a spa_lookup()
2610          * to handle the faulted case.
2611          */
2612         pair = nvlist_next_nvpair(props, NULL);
2613         if (pair != NULL && strcmp(nvpair_name(pair),
2614             zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
2615             nvlist_next_nvpair(props, pair) == NULL) {
2616                 mutex_enter(&spa_namespace_lock);
2617                 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
2618                         spa_configfile_set(spa, props, B_FALSE);
2619                         spa_config_sync(spa, B_FALSE, B_TRUE);
2620                 }
2621                 mutex_exit(&spa_namespace_lock);
2622                 if (spa != NULL) {
2623                         nvlist_free(props);
2624                         return (0);
2625                 }
2626         }
2627 
2628         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2629                 nvlist_free(props);
2630                 return (error);
2631         }
2632 
2633         error = spa_prop_set(spa, props);
2634 
2635         nvlist_free(props);
2636         spa_close(spa, FTAG);
2637 
2638         return (error);
2639 }
2640 
2641 static int
2642 zfs_ioc_pool_get_props(zfs_cmd_t *zc)
2643 {
2644         spa_t *spa;
2645         int error;
2646         nvlist_t *nvp = NULL;
2647 
2648         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
2649                 /*
2650                  * If the pool is faulted, there may be properties we can still
2651                  * get (such as altroot and cachefile), so attempt to get them
2652                  * anyway.
2653                  */
2654                 mutex_enter(&spa_namespace_lock);
2655                 if ((spa = spa_lookup(zc->zc_name)) != NULL)
2656                         error = spa_prop_get(spa, &nvp);
2657                 mutex_exit(&spa_namespace_lock);
2658         } else {
2659                 error = spa_prop_get(spa, &nvp);
2660                 spa_close(spa, FTAG);
2661         }
2662 
2663         if (error == 0 && zc->zc_nvlist_dst != NULL)
2664                 error = put_nvlist(zc, nvp);
2665         else
2666                 error = EFAULT;
2667 
2668         nvlist_free(nvp);
2669         return (error);
2670 }
2671 
2672 /*
2673  * inputs:
2674  * zc_name              name of filesystem
2675  * zc_nvlist_src{_size} nvlist of delegated permissions
2676  * zc_perm_action       allow/unallow flag
2677  *
2678  * outputs:             none
2679  */
2680 static int
2681 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
2682 {
2683         int error;
2684         nvlist_t *fsaclnv = NULL;
2685 
2686         if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2687             zc->zc_iflags, &fsaclnv)) != 0)
2688                 return (error);
2689 
2690         /*
2691          * Verify nvlist is constructed correctly
2692          */
2693         if ((error = zfs_deleg_verify_nvlist(fsaclnv)) != 0) {
2694                 nvlist_free(fsaclnv);
2695                 return (EINVAL);
2696         }
2697 
2698         /*
2699          * If we don't have PRIV_SYS_MOUNT, then validate
2700          * that user is allowed to hand out each permission in
2701          * the nvlist(s)
2702          */
2703 
2704         error = secpolicy_zfs(CRED());
2705         if (error) {
2706                 if (zc->zc_perm_action == B_FALSE) {
2707                         error = dsl_deleg_can_allow(zc->zc_name,
2708                             fsaclnv, CRED());
2709                 } else {
2710                         error = dsl_deleg_can_unallow(zc->zc_name,
2711                             fsaclnv, CRED());
2712                 }
2713         }
2714 
2715         if (error == 0)
2716                 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
2717 
2718         nvlist_free(fsaclnv);
2719         return (error);
2720 }
2721 
2722 /*
2723  * inputs:
2724  * zc_name              name of filesystem
2725  *
2726  * outputs:
2727  * zc_nvlist_src{_size} nvlist of delegated permissions
2728  */
2729 static int
2730 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
2731 {
2732         nvlist_t *nvp;
2733         int error;
2734 
2735         if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
2736                 error = put_nvlist(zc, nvp);
2737                 nvlist_free(nvp);
2738         }
2739 
2740         return (error);
2741 }
2742 
2743 /*
2744  * Search the vfs list for a specified resource.  Returns a pointer to it
2745  * or NULL if no suitable entry is found. The caller of this routine
2746  * is responsible for releasing the returned vfs pointer.
2747  */
2748 static vfs_t *
2749 zfs_get_vfs(const char *resource)
2750 {
2751         struct vfs *vfsp;
2752         struct vfs *vfs_found = NULL;
2753 
2754         vfs_list_read_lock();
2755         vfsp = rootvfs;
2756         do {
2757                 if (strcmp(refstr_value(vfsp->vfs_resource), resource) == 0) {
2758                         VFS_HOLD(vfsp);
2759                         vfs_found = vfsp;
2760                         break;
2761                 }
2762                 vfsp = vfsp->vfs_next;
2763         } while (vfsp != rootvfs);
2764         vfs_list_unlock();
2765         return (vfs_found);
2766 }
2767 
2768 /* ARGSUSED */
2769 static void
2770 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
2771 {
2772         zfs_creat_t *zct = arg;
2773 
2774         zfs_create_fs(os, cr, zct->zct_zplprops, tx);
2775 }
2776 
2777 #define ZFS_PROP_UNDEFINED      ((uint64_t)-1)
2778 
2779 /*
2780  * inputs:
2781  * createprops          list of properties requested by creator
2782  * default_zplver       zpl version to use if unspecified in createprops
2783  * fuids_ok             fuids allowed in this version of the spa?
2784  * os                   parent objset pointer (NULL if root fs)
2785  *
2786  * outputs:
2787  * zplprops     values for the zplprops we attach to the master node object
2788  * is_ci        true if requested file system will be purely case-insensitive
2789  *
2790  * Determine the settings for utf8only, normalization and
2791  * casesensitivity.  Specific values may have been requested by the
2792  * creator and/or we can inherit values from the parent dataset.  If
2793  * the file system is of too early a vintage, a creator can not
2794  * request settings for these properties, even if the requested
2795  * setting is the default value.  We don't actually want to create dsl
2796  * properties for these, so remove them from the source nvlist after
2797  * processing.
2798  */
2799 static int
2800 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
2801     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
2802     nvlist_t *zplprops, boolean_t *is_ci)
2803 {
2804         uint64_t sense = ZFS_PROP_UNDEFINED;
2805         uint64_t norm = ZFS_PROP_UNDEFINED;
2806         uint64_t u8 = ZFS_PROP_UNDEFINED;
2807 
2808         ASSERT(zplprops != NULL);
2809 
2810         /*
2811          * Pull out creator prop choices, if any.
2812          */
2813         if (createprops) {
2814                 (void) nvlist_lookup_uint64(createprops,
2815                     zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
2816                 (void) nvlist_lookup_uint64(createprops,
2817                     zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
2818                 (void) nvlist_remove_all(createprops,
2819                     zfs_prop_to_name(ZFS_PROP_NORMALIZE));
2820                 (void) nvlist_lookup_uint64(createprops,
2821                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
2822                 (void) nvlist_remove_all(createprops,
2823                     zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
2824                 (void) nvlist_lookup_uint64(createprops,
2825                     zfs_prop_to_name(ZFS_PROP_CASE), &sense);
2826                 (void) nvlist_remove_all(createprops,
2827                     zfs_prop_to_name(ZFS_PROP_CASE));
2828         }
2829 
2830         /*
2831          * If the zpl version requested is whacky or the file system
2832          * or pool is version is too "young" to support normalization
2833          * and the creator tried to set a value for one of the props,
2834          * error out.
2835          */
2836         if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
2837             (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
2838             (zplver >= ZPL_VERSION_SA && !sa_ok) ||
2839             (zplver < ZPL_VERSION_NORMALIZATION &&
2840             (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
2841             sense != ZFS_PROP_UNDEFINED)))
2842                 return (ENOTSUP);
2843 
2844         /*
2845          * Put the version in the zplprops
2846          */
2847         VERIFY(nvlist_add_uint64(zplprops,
2848             zfs_prop_to_name(ZFS_PROP_VERSION), zplver) == 0);
2849 
2850         if (norm == ZFS_PROP_UNDEFINED)
2851                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm) == 0);
2852         VERIFY(nvlist_add_uint64(zplprops,
2853             zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm) == 0);
2854 
2855         /*
2856          * If we're normalizing, names must always be valid UTF-8 strings.
2857          */
2858         if (norm)
2859                 u8 = 1;
2860         if (u8 == ZFS_PROP_UNDEFINED)
2861                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8) == 0);
2862         VERIFY(nvlist_add_uint64(zplprops,
2863             zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8) == 0);
2864 
2865         if (sense == ZFS_PROP_UNDEFINED)
2866                 VERIFY(zfs_get_zplprop(os, ZFS_PROP_CASE, &sense) == 0);
2867         VERIFY(nvlist_add_uint64(zplprops,
2868             zfs_prop_to_name(ZFS_PROP_CASE), sense) == 0);
2869 
2870         if (is_ci)
2871                 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
2872 
2873         return (0);
2874 }
2875 
2876 static int
2877 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
2878     nvlist_t *zplprops, boolean_t *is_ci)
2879 {
2880         boolean_t fuids_ok, sa_ok;
2881         uint64_t zplver = ZPL_VERSION;
2882         objset_t *os = NULL;
2883         char parentname[MAXNAMELEN];
2884         char *cp;
2885         spa_t *spa;
2886         uint64_t spa_vers;
2887         int error;
2888 
2889         (void) strlcpy(parentname, dataset, sizeof (parentname));
2890         cp = strrchr(parentname, '/');
2891         ASSERT(cp != NULL);
2892         cp[0] = '\0';
2893 
2894         if ((error = spa_open(dataset, &spa, FTAG)) != 0)
2895                 return (error);
2896 
2897         spa_vers = spa_version(spa);
2898         spa_close(spa, FTAG);
2899 
2900         zplver = zfs_zpl_version_map(spa_vers);
2901         fuids_ok = (zplver >= ZPL_VERSION_FUID);
2902         sa_ok = (zplver >= ZPL_VERSION_SA);
2903 
2904         /*
2905          * Open parent object set so we can inherit zplprop values.
2906          */
2907         if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
2908                 return (error);
2909 
2910         error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
2911             zplprops, is_ci);
2912         dmu_objset_rele(os, FTAG);
2913         return (error);
2914 }
2915 
2916 static int
2917 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
2918     nvlist_t *zplprops, boolean_t *is_ci)
2919 {
2920         boolean_t fuids_ok;
2921         boolean_t sa_ok;
2922         uint64_t zplver = ZPL_VERSION;
2923         int error;
2924 
2925         zplver = zfs_zpl_version_map(spa_vers);
2926         fuids_ok = (zplver >= ZPL_VERSION_FUID);
2927         sa_ok = (zplver >= ZPL_VERSION_SA);
2928 
2929         error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
2930             createprops, zplprops, is_ci);
2931         return (error);
2932 }
2933 
2934 /*
2935  * inputs:
2936  * zc_objset_type       type of objset to create (fs vs zvol)
2937  * zc_name              name of new objset
2938  * zc_value             name of snapshot to clone from (may be empty)
2939  * zc_nvlist_src{_size} nvlist of properties to apply
2940  *
2941  * outputs: none
2942  */
2943 static int
2944 zfs_ioc_create(zfs_cmd_t *zc)
2945 {
2946         objset_t *clone;
2947         int error = 0;
2948         zfs_creat_t zct;
2949         nvlist_t *nvprops = NULL;
2950         void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
2951         dmu_objset_type_t type = zc->zc_objset_type;
2952 
2953         switch (type) {
2954 
2955         case DMU_OST_ZFS:
2956                 cbfunc = zfs_create_cb;
2957                 break;
2958 
2959         case DMU_OST_ZVOL:
2960                 cbfunc = zvol_create_cb;
2961                 break;
2962 
2963         default:
2964                 cbfunc = NULL;
2965                 break;
2966         }
2967         if (strchr(zc->zc_name, '@') ||
2968             strchr(zc->zc_name, '%'))
2969                 return (EINVAL);
2970 
2971         if (zc->zc_nvlist_src != NULL &&
2972             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2973             zc->zc_iflags, &nvprops)) != 0)
2974                 return (error);
2975 
2976         zct.zct_zplprops = NULL;
2977         zct.zct_props = nvprops;
2978 
2979         if (zc->zc_value[0] != '\0') {
2980                 /*
2981                  * We're creating a clone of an existing snapshot.
2982                  */
2983                 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
2984                 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0) {
2985                         nvlist_free(nvprops);
2986                         return (EINVAL);
2987                 }
2988 
2989                 error = dmu_objset_hold(zc->zc_value, FTAG, &clone);
2990                 if (error) {
2991                         nvlist_free(nvprops);
2992                         return (error);
2993                 }
2994 
2995                 error = dmu_objset_clone(zc->zc_name, dmu_objset_ds(clone), 0);
2996                 dmu_objset_rele(clone, FTAG);
2997                 if (error) {
2998                         nvlist_free(nvprops);
2999                         return (error);
3000                 }
3001         } else {
3002                 boolean_t is_insensitive = B_FALSE;
3003 
3004                 if (cbfunc == NULL) {
3005                         nvlist_free(nvprops);
3006                         return (EINVAL);
3007                 }
3008 
3009                 if (type == DMU_OST_ZVOL) {
3010                         uint64_t volsize, volblocksize;
3011 
3012                         if (nvprops == NULL ||
3013                             nvlist_lookup_uint64(nvprops,
3014                             zfs_prop_to_name(ZFS_PROP_VOLSIZE),
3015                             &volsize) != 0) {
3016                                 nvlist_free(nvprops);
3017                                 return (EINVAL);
3018                         }
3019 
3020                         if ((error = nvlist_lookup_uint64(nvprops,
3021                             zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3022                             &volblocksize)) != 0 && error != ENOENT) {
3023                                 nvlist_free(nvprops);
3024                                 return (EINVAL);
3025                         }
3026 
3027                         if (error != 0)
3028                                 volblocksize = zfs_prop_default_numeric(
3029                                     ZFS_PROP_VOLBLOCKSIZE);
3030 
3031                         if ((error = zvol_check_volblocksize(
3032                             volblocksize)) != 0 ||
3033                             (error = zvol_check_volsize(volsize,
3034                             volblocksize)) != 0) {
3035                                 nvlist_free(nvprops);
3036                                 return (error);
3037                         }
3038                 } else if (type == DMU_OST_ZFS) {
3039                         int error;
3040 
3041                         /*
3042                          * We have to have normalization and
3043                          * case-folding flags correct when we do the
3044                          * file system creation, so go figure them out
3045                          * now.
3046                          */
3047                         VERIFY(nvlist_alloc(&zct.zct_zplprops,
3048                             NV_UNIQUE_NAME, KM_SLEEP) == 0);
3049                         error = zfs_fill_zplprops(zc->zc_name, nvprops,
3050                             zct.zct_zplprops, &is_insensitive);
3051                         if (error != 0) {
3052                                 nvlist_free(nvprops);
3053                                 nvlist_free(zct.zct_zplprops);
3054                                 return (error);
3055                         }
3056                 }
3057                 error = dmu_objset_create(zc->zc_name, type,
3058                     is_insensitive ? DS_FLAG_CI_DATASET : 0, cbfunc, &zct);
3059                 nvlist_free(zct.zct_zplprops);
3060         }
3061 
3062         /*
3063          * It would be nice to do this atomically.
3064          */
3065         if (error == 0) {
3066                 error = zfs_set_prop_nvlist(zc->zc_name, ZPROP_SRC_LOCAL,
3067                     nvprops, NULL);
3068                 if (error != 0)
3069                         (void) dmu_objset_destroy(zc->zc_name, B_FALSE);
3070         }
3071         nvlist_free(nvprops);
3072         return (error);
3073 }
3074 
3075 /*
3076  * inputs:
3077  * zc_name      name of filesystem
3078  * zc_value     short name of snapshot
3079  * zc_cookie    recursive flag
3080  * zc_nvlist_src[_size] property list
3081  *
3082  * outputs:
3083  * zc_value     short snapname (i.e. part after the '@')
3084  */
3085 static int
3086 zfs_ioc_snapshot(zfs_cmd_t *zc)
3087 {
3088         nvlist_t *nvprops = NULL;
3089         int error;
3090         boolean_t recursive = zc->zc_cookie;
3091 
3092         if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
3093                 return (EINVAL);
3094 
3095         if (zc->zc_nvlist_src != NULL &&
3096             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3097             zc->zc_iflags, &nvprops)) != 0)
3098                 return (error);
3099 
3100         error = zfs_check_userprops(zc->zc_name, nvprops);
3101         if (error)
3102                 goto out;
3103 
3104         if (!nvlist_empty(nvprops) &&
3105             zfs_earlier_version(zc->zc_name, SPA_VERSION_SNAP_PROPS)) {
3106                 error = ENOTSUP;
3107                 goto out;
3108         }
3109 
3110         error = dmu_objset_snapshot(zc->zc_name, zc->zc_value, NULL,
3111             nvprops, recursive, B_FALSE, -1);
3112 
3113 out:
3114         nvlist_free(nvprops);
3115         return (error);
3116 }
3117 
3118 int
3119 zfs_unmount_snap(const char *name, void *arg)
3120 {
3121         vfs_t *vfsp = NULL;
3122 
3123         if (arg) {
3124                 char *snapname = arg;
3125                 char *fullname = kmem_asprintf("%s@%s", name, snapname);
3126                 vfsp = zfs_get_vfs(fullname);
3127                 strfree(fullname);
3128         } else if (strchr(name, '@')) {
3129                 vfsp = zfs_get_vfs(name);
3130         }
3131 
3132         if (vfsp) {
3133                 /*
3134                  * Always force the unmount for snapshots.
3135                  */
3136                 int flag = MS_FORCE;
3137                 int err;
3138 
3139                 if ((err = vn_vfswlock(vfsp->vfs_vnodecovered)) != 0) {
3140                         VFS_RELE(vfsp);
3141                         return (err);
3142                 }
3143                 VFS_RELE(vfsp);
3144                 if ((err = dounmount(vfsp, flag, kcred)) != 0)
3145                         return (err);
3146         }
3147         return (0);
3148 }
3149 
3150 /*
3151  * inputs:
3152  * zc_name              name of filesystem, snaps must be under it
3153  * zc_nvlist_src[_size] full names of snapshots to destroy
3154  * zc_defer_destroy     mark for deferred destroy
3155  *
3156  * outputs:
3157  * zc_name              on failure, name of failed snapshot
3158  */
3159 static int
3160 zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc)
3161 {
3162         int err, len;
3163         nvlist_t *nvl;
3164         nvpair_t *pair;
3165 
3166         if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3167             zc->zc_iflags, &nvl)) != 0)
3168                 return (err);
3169 
3170         len = strlen(zc->zc_name);
3171         for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
3172             pair = nvlist_next_nvpair(nvl, pair)) {
3173                 const char *name = nvpair_name(pair);
3174                 /*
3175                  * The snap name must be underneath the zc_name.  This ensures
3176                  * that our permission checks were legitimate.
3177                  */
3178                 if (strncmp(zc->zc_name, name, len) != 0 ||
3179                     (name[len] != '@' && name[len] != '/')) {
3180                         nvlist_free(nvl);
3181                         return (EINVAL);
3182                 }
3183 
3184                 (void) zfs_unmount_snap(name, NULL);
3185         }
3186 
3187         err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy,
3188             zc->zc_name);
3189         nvlist_free(nvl);
3190         return (err);
3191 }
3192 
3193 /*
3194  * inputs:
3195  * zc_name              name of dataset to destroy
3196  * zc_objset_type       type of objset
3197  * zc_defer_destroy     mark for deferred destroy
3198  *
3199  * outputs:             none
3200  */
3201 static int
3202 zfs_ioc_destroy(zfs_cmd_t *zc)
3203 {
3204         int err;
3205         if (strchr(zc->zc_name, '@') && zc->zc_objset_type == DMU_OST_ZFS) {
3206                 err = zfs_unmount_snap(zc->zc_name, NULL);
3207                 if (err)
3208                         return (err);
3209         }
3210 
3211         err = dmu_objset_destroy(zc->zc_name, zc->zc_defer_destroy);
3212         if (zc->zc_objset_type == DMU_OST_ZVOL && err == 0)
3213                 (void) zvol_remove_minor(zc->zc_name);
3214         return (err);
3215 }
3216 
3217 /*
3218  * inputs:
3219  * zc_name      name of dataset to rollback (to most recent snapshot)
3220  *
3221  * outputs:     none
3222  */
3223 static int
3224 zfs_ioc_rollback(zfs_cmd_t *zc)
3225 {
3226         dsl_dataset_t *ds, *clone;
3227         int error;
3228         zfsvfs_t *zfsvfs;
3229         char *clone_name;
3230         boolean_t umounted = B_FALSE;
3231 
3232         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
3233                 error = zfs_suspend_fs(zfsvfs);
3234                 if (error) {
3235                         VFS_RELE(zfsvfs->z_vfs);
3236                         return (error);
3237                 }
3238 
3239                 umounted = B_TRUE;
3240         }
3241 
3242         error = dsl_dataset_hold(zc->zc_name, FTAG, &ds);
3243         if (error)
3244                 goto out_vfs;
3245 
3246         /* must not be a snapshot */
3247         if (dsl_dataset_is_snapshot(ds)) {
3248                 error = EINVAL;
3249                 goto out_ds;
3250         }
3251 
3252         /* must have a most recent snapshot */
3253         if (ds->ds_phys->ds_prev_snap_txg < TXG_INITIAL) {
3254                 error = EINVAL;
3255                 goto out_ds;
3256         }
3257 
3258         /*
3259          * Create clone of most recent snapshot.
3260          */
3261         clone_name = kmem_asprintf("%s/%%rollback", zc->zc_name);
3262         error = dmu_objset_clone(clone_name, ds->ds_prev, DS_FLAG_INCONSISTENT);
3263         if (error)
3264                 goto out;
3265 
3266         error = dsl_dataset_own(clone_name, B_TRUE, FTAG, &clone);
3267         if (error)
3268                 goto out;
3269 
3270         /*
3271          * Do clone swap.
3272          */
3273         if (umounted) {
3274                 if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3275                         error = dsl_dataset_clone_swap(clone, ds,
3276                             B_TRUE);
3277                         dsl_dataset_disown(ds, FTAG);
3278                         ds = NULL;
3279                 } else {
3280                         error = EBUSY;
3281                 }
3282         } else {
3283                 if (dsl_dataset_tryown(ds, B_FALSE, FTAG)) {
3284                         error = dsl_dataset_clone_swap(clone, ds, B_TRUE);
3285                         dsl_dataset_disown(ds, FTAG);
3286                         ds = NULL;
3287                 } else {
3288                         error = EBUSY;
3289                 }
3290         }
3291 
3292         /*
3293          * Destroy clone (which also closes it).
3294          */
3295         (void) dsl_dataset_destroy(clone, FTAG, B_FALSE);
3296 
3297 out:
3298         strfree(clone_name);
3299 out_ds:
3300         if (ds)
3301                 dsl_dataset_rele(ds, FTAG);
3302 out_vfs:
3303         if (umounted) {
3304                 int resume_err;
3305 
3306                 resume_err = zfs_resume_fs(zfsvfs, zc->zc_name);
3307                 error = error ? error : resume_err;
3308                 VFS_RELE(zfsvfs->z_vfs);
3309         }
3310 
3311         return (error);
3312 }
3313 
3314 /*
3315  * inputs:
3316  * zc_name      old name of dataset
3317  * zc_value     new name of dataset
3318  * zc_cookie    recursive flag (only valid for snapshots)
3319  *
3320  * outputs:     none
3321  */
3322 static int
3323 zfs_ioc_rename(zfs_cmd_t *zc)
3324 {
3325         boolean_t recursive = zc->zc_cookie & 1;
3326 
3327         zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
3328         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3329             strchr(zc->zc_value, '%'))
3330                 return (EINVAL);
3331 
3332         /*
3333          * Unmount snapshot unless we're doing a recursive rename,
3334          * in which case the dataset code figures out which snapshots
3335          * to unmount.
3336          */
3337         if (!recursive && strchr(zc->zc_name, '@') != NULL &&
3338             zc->zc_objset_type == DMU_OST_ZFS) {
3339                 int err = zfs_unmount_snap(zc->zc_name, NULL);
3340                 if (err)
3341                         return (err);
3342         }
3343         if (zc->zc_objset_type == DMU_OST_ZVOL)
3344                 (void) zvol_remove_minor(zc->zc_name);
3345         return (dmu_objset_rename(zc->zc_name, zc->zc_value, recursive));
3346 }
3347 
3348 static int
3349 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
3350 {
3351         const char *propname = nvpair_name(pair);
3352         boolean_t issnap = (strchr(dsname, '@') != NULL);
3353         zfs_prop_t prop = zfs_name_to_prop(propname);
3354         uint64_t intval;
3355         int err;
3356 
3357         if (prop == ZPROP_INVAL) {
3358                 if (zfs_prop_user(propname)) {
3359                         if (err = zfs_secpolicy_write_perms(dsname,
3360                             ZFS_DELEG_PERM_USERPROP, cr))
3361                                 return (err);
3362                         return (0);
3363                 }
3364 
3365                 if (!issnap && zfs_prop_userquota(propname)) {
3366                         const char *perm = NULL;
3367                         const char *uq_prefix =
3368                             zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
3369                         const char *gq_prefix =
3370                             zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
3371 
3372                         if (strncmp(propname, uq_prefix,
3373                             strlen(uq_prefix)) == 0) {
3374                                 perm = ZFS_DELEG_PERM_USERQUOTA;
3375                         } else if (strncmp(propname, gq_prefix,
3376                             strlen(gq_prefix)) == 0) {
3377                                 perm = ZFS_DELEG_PERM_GROUPQUOTA;
3378                         } else {
3379                                 /* USERUSED and GROUPUSED are read-only */
3380                                 return (EINVAL);
3381                         }
3382 
3383                         if (err = zfs_secpolicy_write_perms(dsname, perm, cr))
3384                                 return (err);
3385                         return (0);
3386                 }
3387 
3388                 return (EINVAL);
3389         }
3390 
3391         if (issnap)
3392                 return (EINVAL);
3393 
3394         if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3395                 /*
3396                  * dsl_prop_get_all_impl() returns properties in this
3397                  * format.
3398                  */
3399                 nvlist_t *attrs;
3400                 VERIFY(nvpair_value_nvlist(pair, &attrs) == 0);
3401                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3402                     &pair) == 0);
3403         }
3404 
3405         /*
3406          * Check that this value is valid for this pool version
3407          */
3408         switch (prop) {
3409         case ZFS_PROP_COMPRESSION:
3410                 /*
3411                  * If the user specified gzip compression, make sure
3412                  * the SPA supports it. We ignore any errors here since
3413                  * we'll catch them later.
3414                  */
3415                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3416                     nvpair_value_uint64(pair, &intval) == 0) {
3417                         if (intval >= ZIO_COMPRESS_GZIP_1 &&
3418                             intval <= ZIO_COMPRESS_GZIP_9 &&
3419                             zfs_earlier_version(dsname,
3420                             SPA_VERSION_GZIP_COMPRESSION)) {
3421                                 return (ENOTSUP);
3422                         }
3423 
3424                         if (intval == ZIO_COMPRESS_ZLE &&
3425                             zfs_earlier_version(dsname,
3426                             SPA_VERSION_ZLE_COMPRESSION))
3427                                 return (ENOTSUP);
3428 
3429                         /*
3430                          * If this is a bootable dataset then
3431                          * verify that the compression algorithm
3432                          * is supported for booting. We must return
3433                          * something other than ENOTSUP since it
3434                          * implies a downrev pool version.
3435                          */
3436                         if (zfs_is_bootfs(dsname) &&
3437                             !BOOTFS_COMPRESS_VALID(intval)) {
3438                                 return (ERANGE);
3439                         }
3440                 }
3441                 break;
3442 
3443         case ZFS_PROP_COPIES:
3444                 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
3445                         return (ENOTSUP);
3446                 break;
3447 
3448         case ZFS_PROP_DEDUP:
3449                 if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
3450                         return (ENOTSUP);
3451                 break;
3452 
3453         case ZFS_PROP_SHARESMB:
3454                 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
3455                         return (ENOTSUP);
3456                 break;
3457 
3458         case ZFS_PROP_ACLINHERIT:
3459                 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
3460                     nvpair_value_uint64(pair, &intval) == 0) {
3461                         if (intval == ZFS_ACL_PASSTHROUGH_X &&
3462                             zfs_earlier_version(dsname,
3463                             SPA_VERSION_PASSTHROUGH_X))
3464                                 return (ENOTSUP);
3465                 }
3466                 break;
3467         }
3468 
3469         return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
3470 }
3471 
3472 /*
3473  * Removes properties from the given props list that fail permission checks
3474  * needed to clear them and to restore them in case of a receive error. For each
3475  * property, make sure we have both set and inherit permissions.
3476  *
3477  * Returns the first error encountered if any permission checks fail. If the
3478  * caller provides a non-NULL errlist, it also gives the complete list of names
3479  * of all the properties that failed a permission check along with the
3480  * corresponding error numbers. The caller is responsible for freeing the
3481  * returned errlist.
3482  *
3483  * If every property checks out successfully, zero is returned and the list
3484  * pointed at by errlist is NULL.
3485  */
3486 static int
3487 zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
3488 {
3489         zfs_cmd_t *zc;
3490         nvpair_t *pair, *next_pair;
3491         nvlist_t *errors;
3492         int err, rv = 0;
3493 
3494         if (props == NULL)
3495                 return (0);
3496 
3497         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3498 
3499         zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
3500         (void) strcpy(zc->zc_name, dataset);
3501         pair = nvlist_next_nvpair(props, NULL);
3502         while (pair != NULL) {
3503                 next_pair = nvlist_next_nvpair(props, pair);
3504 
3505                 (void) strcpy(zc->zc_value, nvpair_name(pair));
3506                 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
3507                     (err = zfs_secpolicy_inherit(zc, CRED())) != 0) {
3508                         VERIFY(nvlist_remove_nvpair(props, pair) == 0);
3509                         VERIFY(nvlist_add_int32(errors,
3510                             zc->zc_value, err) == 0);
3511                 }
3512                 pair = next_pair;
3513         }
3514         kmem_free(zc, sizeof (zfs_cmd_t));
3515 
3516         if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
3517                 nvlist_free(errors);
3518                 errors = NULL;
3519         } else {
3520                 VERIFY(nvpair_value_int32(pair, &rv) == 0);
3521         }
3522 
3523         if (errlist == NULL)
3524                 nvlist_free(errors);
3525         else
3526                 *errlist = errors;
3527 
3528         return (rv);
3529 }
3530 
3531 static boolean_t
3532 propval_equals(nvpair_t *p1, nvpair_t *p2)
3533 {
3534         if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
3535                 /* dsl_prop_get_all_impl() format */
3536                 nvlist_t *attrs;
3537                 VERIFY(nvpair_value_nvlist(p1, &attrs) == 0);
3538                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3539                     &p1) == 0);
3540         }
3541 
3542         if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
3543                 nvlist_t *attrs;
3544                 VERIFY(nvpair_value_nvlist(p2, &attrs) == 0);
3545                 VERIFY(nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3546                     &p2) == 0);
3547         }
3548 
3549         if (nvpair_type(p1) != nvpair_type(p2))
3550                 return (B_FALSE);
3551 
3552         if (nvpair_type(p1) == DATA_TYPE_STRING) {
3553                 char *valstr1, *valstr2;
3554 
3555                 VERIFY(nvpair_value_string(p1, (char **)&valstr1) == 0);
3556                 VERIFY(nvpair_value_string(p2, (char **)&valstr2) == 0);
3557                 return (strcmp(valstr1, valstr2) == 0);
3558         } else {
3559                 uint64_t intval1, intval2;
3560 
3561                 VERIFY(nvpair_value_uint64(p1, &intval1) == 0);
3562                 VERIFY(nvpair_value_uint64(p2, &intval2) == 0);
3563                 return (intval1 == intval2);
3564         }
3565 }
3566 
3567 /*
3568  * Remove properties from props if they are not going to change (as determined
3569  * by comparison with origprops). Remove them from origprops as well, since we
3570  * do not need to clear or restore properties that won't change.
3571  */
3572 static void
3573 props_reduce(nvlist_t *props, nvlist_t *origprops)
3574 {
3575         nvpair_t *pair, *next_pair;
3576 
3577         if (origprops == NULL)
3578                 return; /* all props need to be received */
3579 
3580         pair = nvlist_next_nvpair(props, NULL);
3581         while (pair != NULL) {
3582                 const char *propname = nvpair_name(pair);
3583                 nvpair_t *match;
3584 
3585                 next_pair = nvlist_next_nvpair(props, pair);
3586 
3587                 if ((nvlist_lookup_nvpair(origprops, propname,
3588                     &match) != 0) || !propval_equals(pair, match))
3589                         goto next; /* need to set received value */
3590 
3591                 /* don't clear the existing received value */
3592                 (void) nvlist_remove_nvpair(origprops, match);
3593                 /* don't bother receiving the property */
3594                 (void) nvlist_remove_nvpair(props, pair);
3595 next:
3596                 pair = next_pair;
3597         }
3598 }
3599 
3600 #ifdef  DEBUG
3601 static boolean_t zfs_ioc_recv_inject_err;
3602 #endif
3603 
3604 /*
3605  * inputs:
3606  * zc_name              name of containing filesystem
3607  * zc_nvlist_src{_size} nvlist of properties to apply
3608  * zc_value             name of snapshot to create
3609  * zc_string            name of clone origin (if DRR_FLAG_CLONE)
3610  * zc_cookie            file descriptor to recv from
3611  * zc_begin_record      the BEGIN record of the stream (not byteswapped)
3612  * zc_guid              force flag
3613  * zc_cleanup_fd        cleanup-on-exit file descriptor
3614  * zc_action_handle     handle for this guid/ds mapping (or zero on first call)
3615  *
3616  * outputs:
3617  * zc_cookie            number of bytes read
3618  * zc_nvlist_dst{_size} error for each unapplied received property
3619  * zc_obj               zprop_errflags_t
3620  * zc_action_handle     handle for this guid/ds mapping
3621  */
3622 static int
3623 zfs_ioc_recv(zfs_cmd_t *zc)
3624 {
3625         file_t *fp;
3626         objset_t *os;
3627         dmu_recv_cookie_t drc;
3628         boolean_t force = (boolean_t)zc->zc_guid;
3629         int fd;
3630         int error = 0;
3631         int props_error = 0;
3632         nvlist_t *errors;
3633         offset_t off;
3634         nvlist_t *props = NULL; /* sent properties */
3635         nvlist_t *origprops = NULL; /* existing properties */
3636         objset_t *origin = NULL;
3637         char *tosnap;
3638         char tofs[ZFS_MAXNAMELEN];
3639         boolean_t first_recvd_props = B_FALSE;
3640 
3641         if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
3642             strchr(zc->zc_value, '@') == NULL ||
3643             strchr(zc->zc_value, '%'))
3644                 return (EINVAL);
3645 
3646         (void) strcpy(tofs, zc->zc_value);
3647         tosnap = strchr(tofs, '@');
3648         *tosnap++ = '\0';
3649 
3650         if (zc->zc_nvlist_src != NULL &&
3651             (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3652             zc->zc_iflags, &props)) != 0)
3653                 return (error);
3654 
3655         fd = zc->zc_cookie;
3656         fp = getf(fd);
3657         if (fp == NULL) {
3658                 nvlist_free(props);
3659                 return (EBADF);
3660         }
3661 
3662         VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
3663 
3664         if (props && dmu_objset_hold(tofs, FTAG, &os) == 0) {
3665                 if ((spa_version(os->os_spa) >= SPA_VERSION_RECVD_PROPS) &&
3666                     !dsl_prop_get_hasrecvd(os)) {
3667                         first_recvd_props = B_TRUE;
3668                 }
3669 
3670                 /*
3671                  * If new received properties are supplied, they are to
3672                  * completely replace the existing received properties, so stash
3673                  * away the existing ones.
3674                  */
3675                 if (dsl_prop_get_received(os, &origprops) == 0) {
3676                         nvlist_t *errlist = NULL;
3677                         /*
3678                          * Don't bother writing a property if its value won't
3679                          * change (and avoid the unnecessary security checks).
3680                          *
3681                          * The first receive after SPA_VERSION_RECVD_PROPS is a
3682                          * special case where we blow away all local properties
3683                          * regardless.
3684                          */
3685                         if (!first_recvd_props)
3686                                 props_reduce(props, origprops);
3687                         if (zfs_check_clearable(tofs, origprops,
3688                             &errlist) != 0)
3689                                 (void) nvlist_merge(errors, errlist, 0);
3690                         nvlist_free(errlist);
3691                 }
3692 
3693                 dmu_objset_rele(os, FTAG);
3694         }
3695 
3696         if (zc->zc_string[0]) {
3697                 error = dmu_objset_hold(zc->zc_string, FTAG, &origin);
3698                 if (error)
3699                         goto out;
3700         }
3701 
3702         error = dmu_recv_begin(tofs, tosnap, zc->zc_top_ds,
3703             &zc->zc_begin_record, force, origin, &drc);
3704         if (origin)
3705                 dmu_objset_rele(origin, FTAG);
3706         if (error)
3707                 goto out;
3708 
3709         /*
3710          * Set properties before we receive the stream so that they are applied
3711          * to the new data. Note that we must call dmu_recv_stream() if
3712          * dmu_recv_begin() succeeds.
3713          */
3714         if (props) {
3715                 nvlist_t *errlist;
3716 
3717                 if (drc.drc_newfs)
3718                         error = dmu_objset_from_ds(drc.drc_real_ds, &os);
3719                 else
3720                         error = dmu_objset_hold(tofs, FTAG, &os);
3721 
3722                 if (error == 0) {
3723                         if (drc.drc_newfs) {
3724                                 if (spa_version(os->os_spa) >=
3725                                     SPA_VERSION_RECVD_PROPS)
3726                                         first_recvd_props = B_TRUE;
3727                         } else if (origprops != NULL) {
3728                                 if (clear_received_props(os, tofs, origprops,
3729                                     first_recvd_props ? NULL : props) != 0)
3730                                         zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3731                         } else {
3732                                 zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3733                         }
3734                         dsl_prop_set_hasrecvd(os);
3735 
3736                         if (!drc.drc_newfs)
3737                                 dmu_objset_rele(os, FTAG);
3738                 } else if (!drc.drc_newfs) {
3739                         zc->zc_obj |= ZPROP_ERR_NOCLEAR;
3740                 }
3741 
3742                 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
3743                     props, &errlist);
3744                 (void) nvlist_merge(errors, errlist, 0);
3745                 nvlist_free(errlist);
3746         }
3747 
3748         if (fit_error_list(zc, &errors) != 0 || put_nvlist(zc, errors) != 0) {
3749                 /*
3750                  * Caller made zc->zc_nvlist_dst less than the minimum expected
3751                  * size or supplied an invalid address.
3752                  */
3753                 props_error = EINVAL;
3754         }
3755 
3756         off = fp->f_offset;
3757         error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
3758             &zc->zc_action_handle);
3759 
3760         if (error == 0) {
3761                 zfsvfs_t *zfsvfs = NULL;
3762 
3763                 if (getzfsvfs(tofs, &zfsvfs) == 0) {
3764                         /* online recv */
3765                         int end_err;
3766 
3767                         error = zfs_suspend_fs(zfsvfs);
3768                         /*
3769                          * If the suspend fails, then the recv_end will
3770                          * likely also fail, and clean up after itself.
3771                          */
3772                         end_err = dmu_recv_end(&drc);
3773                         if (error == 0)
3774                                 error = zfs_resume_fs(zfsvfs, tofs);
3775                         error = error ? error : end_err;
3776                         VFS_RELE(zfsvfs->z_vfs);
3777                 } else {
3778                         error = dmu_recv_end(&drc);
3779                 }
3780         }
3781 
3782         zc->zc_cookie = off - fp->f_offset;
3783         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3784                 fp->f_offset = off;
3785 
3786 #ifdef  DEBUG
3787         if (zfs_ioc_recv_inject_err) {
3788                 zfs_ioc_recv_inject_err = B_FALSE;
3789                 error = 1;
3790         }
3791 #endif
3792         /*
3793          * On error, restore the original props.
3794          */
3795         if (error && props) {
3796                 if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
3797                         if (clear_received_props(os, tofs, props, NULL) != 0) {
3798                                 /*
3799                                  * We failed to clear the received properties.
3800                                  * Since we may have left a $recvd value on the
3801                                  * system, we can't clear the $hasrecvd flag.
3802                                  */
3803                                 zc->zc_obj |= ZPROP_ERR_NORESTORE;
3804                         } else if (first_recvd_props) {
3805                                 dsl_prop_unset_hasrecvd(os);
3806                         }
3807                         dmu_objset_rele(os, FTAG);
3808                 } else if (!drc.drc_newfs) {
3809                         /* We failed to clear the received properties. */
3810                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
3811                 }
3812 
3813                 if (origprops == NULL && !drc.drc_newfs) {
3814                         /* We failed to stash the original properties. */
3815                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
3816                 }
3817 
3818                 /*
3819                  * dsl_props_set() will not convert RECEIVED to LOCAL on or
3820                  * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
3821                  * explictly if we're restoring local properties cleared in the
3822                  * first new-style receive.
3823                  */
3824                 if (origprops != NULL &&
3825                     zfs_set_prop_nvlist(tofs, (first_recvd_props ?
3826                     ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
3827                     origprops, NULL) != 0) {
3828                         /*
3829                          * We stashed the original properties but failed to
3830                          * restore them.
3831                          */
3832                         zc->zc_obj |= ZPROP_ERR_NORESTORE;
3833                 }
3834         }
3835 out:
3836         nvlist_free(props);
3837         nvlist_free(origprops);
3838         nvlist_free(errors);
3839         releasef(fd);
3840 
3841         if (error == 0)
3842                 error = props_error;
3843 
3844         return (error);
3845 }
3846 
3847 /*
3848  * inputs:
3849  * zc_name      name of snapshot to send
3850  * zc_cookie    file descriptor to send stream to
3851  * zc_obj       fromorigin flag (mutually exclusive with zc_fromobj)
3852  * zc_sendobj   objsetid of snapshot to send
3853  * zc_fromobj   objsetid of incremental fromsnap (may be zero)
3854  * zc_guid      if set, estimate size of stream only.  zc_cookie is ignored.
3855  *              output size in zc_objset_type.
3856  *
3857  * outputs: none
3858  */
3859 static int
3860 zfs_ioc_send(zfs_cmd_t *zc)
3861 {
3862         objset_t *fromsnap = NULL;
3863         objset_t *tosnap;
3864         int error;
3865         offset_t off;
3866         dsl_dataset_t *ds;
3867         dsl_dataset_t *dsfrom = NULL;
3868         spa_t *spa;
3869         dsl_pool_t *dp;
3870         boolean_t estimate = (zc->zc_guid != 0);
3871 
3872         error = spa_open(zc->zc_name, &spa, FTAG);
3873         if (error)
3874                 return (error);
3875 
3876         dp = spa_get_dsl(spa);
3877         rw_enter(&dp->dp_config_rwlock, RW_READER);
3878         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
3879         rw_exit(&dp->dp_config_rwlock);
3880         if (error) {
3881                 spa_close(spa, FTAG);
3882                 return (error);
3883         }
3884 
3885         error = dmu_objset_from_ds(ds, &tosnap);
3886         if (error) {
3887                 dsl_dataset_rele(ds, FTAG);
3888                 spa_close(spa, FTAG);
3889                 return (error);
3890         }
3891 
3892         if (zc->zc_fromobj != 0) {
3893                 rw_enter(&dp->dp_config_rwlock, RW_READER);
3894                 error = dsl_dataset_hold_obj(dp, zc->zc_fromobj, FTAG, &dsfrom);
3895                 rw_exit(&dp->dp_config_rwlock);
3896                 spa_close(spa, FTAG);
3897                 if (error) {
3898                         dsl_dataset_rele(ds, FTAG);
3899                         return (error);
3900                 }
3901                 error = dmu_objset_from_ds(dsfrom, &fromsnap);
3902                 if (error) {
3903                         dsl_dataset_rele(dsfrom, FTAG);
3904                         dsl_dataset_rele(ds, FTAG);
3905                         return (error);
3906                 }
3907         } else {
3908                 spa_close(spa, FTAG);
3909         }
3910 
3911         if (estimate) {
3912                 error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
3913                     &zc->zc_objset_type);
3914         } else {
3915                 file_t *fp = getf(zc->zc_cookie);
3916                 if (fp == NULL) {
3917                         dsl_dataset_rele(ds, FTAG);
3918                         if (dsfrom)
3919                                 dsl_dataset_rele(dsfrom, FTAG);
3920                         return (EBADF);
3921                 }
3922 
3923                 off = fp->f_offset;
3924                 error = dmu_send(tosnap, fromsnap, zc->zc_obj,
3925                     zc->zc_cookie, fp->f_vnode, &off);
3926 
3927                 if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
3928                         fp->f_offset = off;
3929                 releasef(zc->zc_cookie);
3930         }
3931         if (dsfrom)
3932                 dsl_dataset_rele(dsfrom, FTAG);
3933         dsl_dataset_rele(ds, FTAG);
3934         return (error);
3935 }
3936 
3937 /*
3938  * inputs:
3939  * zc_name      name of snapshot on which to report progress
3940  * zc_cookie    file descriptor of send stream
3941  *
3942  * outputs:
3943  * zc_cookie    number of bytes written in send stream thus far
3944  */
3945 static int
3946 zfs_ioc_send_progress(zfs_cmd_t *zc)
3947 {
3948         dsl_dataset_t *ds;
3949         dmu_sendarg_t *dsp = NULL;
3950         int error;
3951 
3952         if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
3953                 return (error);
3954 
3955         mutex_enter(&ds->ds_sendstream_lock);
3956 
3957         /*
3958          * Iterate over all the send streams currently active on this dataset.
3959          * If there's one which matches the specified file descriptor _and_ the
3960          * stream was started by the current process, return the progress of
3961          * that stream.
3962          */
3963         for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
3964             dsp = list_next(&ds->ds_sendstreams, dsp)) {
3965                 if (dsp->dsa_outfd == zc->zc_cookie &&
3966                     dsp->dsa_proc == curproc)
3967                         break;
3968         }
3969 
3970         if (dsp != NULL)
3971                 zc->zc_cookie = *(dsp->dsa_off);
3972         else
3973                 error = ENOENT;
3974 
3975         mutex_exit(&ds->ds_sendstream_lock);
3976         dsl_dataset_rele(ds, FTAG);
3977         return (error);
3978 }
3979 
3980 static int
3981 zfs_ioc_inject_fault(zfs_cmd_t *zc)
3982 {
3983         int id, error;
3984 
3985         error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
3986             &zc->zc_inject_record);
3987 
3988         if (error == 0)
3989                 zc->zc_guid = (uint64_t)id;
3990 
3991         return (error);
3992 }
3993 
3994 static int
3995 zfs_ioc_clear_fault(zfs_cmd_t *zc)
3996 {
3997         return (zio_clear_fault((int)zc->zc_guid));
3998 }
3999 
4000 static int
4001 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
4002 {
4003         int id = (int)zc->zc_guid;
4004         int error;
4005 
4006         error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
4007             &zc->zc_inject_record);
4008 
4009         zc->zc_guid = id;
4010 
4011         return (error);
4012 }
4013 
4014 static int
4015 zfs_ioc_error_log(zfs_cmd_t *zc)
4016 {
4017         spa_t *spa;
4018         int error;
4019         size_t count = (size_t)zc->zc_nvlist_dst_size;
4020 
4021         if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
4022                 return (error);
4023 
4024         error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
4025             &count);
4026         if (error == 0)
4027                 zc->zc_nvlist_dst_size = count;
4028         else
4029                 zc->zc_nvlist_dst_size = spa_get_errlog_size(spa);
4030 
4031         spa_close(spa, FTAG);
4032 
4033         return (error);
4034 }
4035 
4036 static int
4037 zfs_ioc_clear(zfs_cmd_t *zc)
4038 {
4039         spa_t *spa;
4040         vdev_t *vd;
4041         int error;
4042 
4043         /*
4044          * On zpool clear we also fix up missing slogs
4045          */
4046         mutex_enter(&spa_namespace_lock);
4047         spa = spa_lookup(zc->zc_name);
4048         if (spa == NULL) {
4049                 mutex_exit(&spa_namespace_lock);
4050                 return (EIO);
4051         }
4052         if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
4053                 /* we need to let spa_open/spa_load clear the chains */
4054                 spa_set_log_state(spa, SPA_LOG_CLEAR);
4055         }
4056         spa->spa_last_open_failed = 0;
4057         mutex_exit(&spa_namespace_lock);
4058 
4059         if (zc->zc_cookie & ZPOOL_NO_REWIND) {
4060                 error = spa_open(zc->zc_name, &spa, FTAG);
4061         } else {
4062                 nvlist_t *policy;
4063                 nvlist_t *config = NULL;
4064 
4065                 if (zc->zc_nvlist_src == NULL)
4066                         return (EINVAL);
4067 
4068                 if ((error = get_nvlist(zc->zc_nvlist_src,
4069                     zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
4070                         error = spa_open_rewind(zc->zc_name, &spa, FTAG,
4071                             policy, &config);
4072                         if (config != NULL) {
4073                                 int err;
4074 
4075                                 if ((err = put_nvlist(zc, config)) != 0)
4076                                         error = err;
4077                                 nvlist_free(config);
4078                         }
4079                         nvlist_free(policy);
4080                 }
4081         }
4082 
4083         if (error)
4084                 return (error);
4085 
4086         spa_vdev_state_enter(spa, SCL_NONE);
4087 
4088         if (zc->zc_guid == 0) {
4089                 vd = NULL;
4090         } else {
4091                 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
4092                 if (vd == NULL) {
4093                         (void) spa_vdev_state_exit(spa, NULL, ENODEV);
4094                         spa_close(spa, FTAG);
4095                         return (ENODEV);
4096                 }
4097         }
4098 
4099         vdev_clear(spa, vd);
4100 
4101         (void) spa_vdev_state_exit(spa, NULL, 0);
4102 
4103         /*
4104          * Resume any suspended I/Os.
4105          */
4106         if (zio_resume(spa) != 0)
4107                 error = EIO;
4108 
4109         spa_close(spa, FTAG);
4110 
4111         return (error);
4112 }
4113 
4114 static int
4115 zfs_ioc_pool_reopen(zfs_cmd_t *zc)
4116 {
4117         spa_t *spa;
4118         int error;
4119 
4120         error = spa_open(zc->zc_name, &spa, FTAG);
4121         if (error)
4122                 return (error);
4123 
4124         spa_vdev_state_enter(spa, SCL_NONE);
4125         vdev_reopen(spa->spa_root_vdev);
4126         (void) spa_vdev_state_exit(spa, NULL, 0);
4127         spa_close(spa, FTAG);
4128         return (0);
4129 }
4130 /*
4131  * inputs:
4132  * zc_name      name of filesystem
4133  * zc_value     name of origin snapshot
4134  *
4135  * outputs:
4136  * zc_string    name of conflicting snapshot, if there is one
4137  */
4138 static int
4139 zfs_ioc_promote(zfs_cmd_t *zc)
4140 {
4141         char *cp;
4142 
4143         /*
4144          * We don't need to unmount *all* the origin fs's snapshots, but
4145          * it's easier.
4146          */
4147         cp = strchr(zc->zc_value, '@');
4148         if (cp)
4149                 *cp = '\0';
4150         (void) dmu_objset_find(zc->zc_value,
4151             zfs_unmount_snap, NULL, DS_FIND_SNAPSHOTS);
4152         return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
4153 }
4154 
4155 /*
4156  * Retrieve a single {user|group}{used|quota}@... property.
4157  *
4158  * inputs:
4159  * zc_name      name of filesystem
4160  * zc_objset_type zfs_userquota_prop_t
4161  * zc_value     domain name (eg. "S-1-234-567-89")
4162  * zc_guid      RID/UID/GID
4163  *
4164  * outputs:
4165  * zc_cookie    property value
4166  */
4167 static int
4168 zfs_ioc_userspace_one(zfs_cmd_t *zc)
4169 {
4170         zfsvfs_t *zfsvfs;
4171         int error;
4172 
4173         if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
4174                 return (EINVAL);
4175 
4176         error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4177         if (error)
4178                 return (error);
4179 
4180         error = zfs_userspace_one(zfsvfs,
4181             zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
4182         zfsvfs_rele(zfsvfs, FTAG);
4183 
4184         return (error);
4185 }
4186 
4187 /*
4188  * inputs:
4189  * zc_name              name of filesystem
4190  * zc_cookie            zap cursor
4191  * zc_objset_type       zfs_userquota_prop_t
4192  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
4193  *
4194  * outputs:
4195  * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
4196  * zc_cookie    zap cursor
4197  */
4198 static int
4199 zfs_ioc_userspace_many(zfs_cmd_t *zc)
4200 {
4201         zfsvfs_t *zfsvfs;
4202         int bufsize = zc->zc_nvlist_dst_size;
4203 
4204         if (bufsize <= 0)
4205                 return (ENOMEM);
4206 
4207         int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
4208         if (error)
4209                 return (error);
4210 
4211         void *buf = kmem_alloc(bufsize, KM_SLEEP);
4212 
4213         error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
4214             buf, &zc->zc_nvlist_dst_size);
4215 
4216         if (error == 0) {
4217                 error = xcopyout(buf,
4218                     (void *)(uintptr_t)zc->zc_nvlist_dst,
4219                     zc->zc_nvlist_dst_size);
4220         }
4221         kmem_free(buf, bufsize);
4222         zfsvfs_rele(zfsvfs, FTAG);
4223 
4224         return (error);
4225 }
4226 
4227 /*
4228  * inputs:
4229  * zc_name              name of filesystem
4230  *
4231  * outputs:
4232  * none
4233  */
4234 static int
4235 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
4236 {
4237         objset_t *os;
4238         int error = 0;
4239         zfsvfs_t *zfsvfs;
4240 
4241         if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
4242                 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
4243                         /*
4244                          * If userused is not enabled, it may be because the
4245                          * objset needs to be closed & reopened (to grow the
4246                          * objset_phys_t).  Suspend/resume the fs will do that.
4247                          */
4248                         error = zfs_suspend_fs(zfsvfs);
4249                         if (error == 0)
4250                                 error = zfs_resume_fs(zfsvfs, zc->zc_name);
4251                 }
4252                 if (error == 0)
4253                         error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
4254                 VFS_RELE(zfsvfs->z_vfs);
4255         } else {
4256                 /* XXX kind of reading contents without owning */
4257                 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4258                 if (error)
4259                         return (error);
4260 
4261                 error = dmu_objset_userspace_upgrade(os);
4262                 dmu_objset_rele(os, FTAG);
4263         }
4264 
4265         return (error);
4266 }
4267 
4268 /*
4269  * We don't want to have a hard dependency
4270  * against some special symbols in sharefs
4271  * nfs, and smbsrv.  Determine them if needed when
4272  * the first file system is shared.
4273  * Neither sharefs, nfs or smbsrv are unloadable modules.
4274  */
4275 int (*znfsexport_fs)(void *arg);
4276 int (*zshare_fs)(enum sharefs_sys_op, share_t *, uint32_t);
4277 int (*zsmbexport_fs)(void *arg, boolean_t add_share);
4278 
4279 int zfs_nfsshare_inited;
4280 int zfs_smbshare_inited;
4281 
4282 ddi_modhandle_t nfs_mod;
4283 ddi_modhandle_t sharefs_mod;
4284 ddi_modhandle_t smbsrv_mod;
4285 kmutex_t zfs_share_lock;
4286 
4287 static int
4288 zfs_init_sharefs()
4289 {
4290         int error;
4291 
4292         ASSERT(MUTEX_HELD(&zfs_share_lock));
4293         /* Both NFS and SMB shares also require sharetab support. */
4294         if (sharefs_mod == NULL && ((sharefs_mod =
4295             ddi_modopen("fs/sharefs",
4296             KRTLD_MODE_FIRST, &error)) == NULL)) {
4297                 return (ENOSYS);
4298         }
4299         if (zshare_fs == NULL && ((zshare_fs =
4300             (int (*)(enum sharefs_sys_op, share_t *, uint32_t))
4301             ddi_modsym(sharefs_mod, "sharefs_impl", &error)) == NULL)) {
4302                 return (ENOSYS);
4303         }
4304         return (0);
4305 }
4306 
4307 static int
4308 zfs_ioc_share(zfs_cmd_t *zc)
4309 {
4310         int error;
4311         int opcode;
4312 
4313         switch (zc->zc_share.z_sharetype) {
4314         case ZFS_SHARE_NFS:
4315         case ZFS_UNSHARE_NFS:
4316                 if (zfs_nfsshare_inited == 0) {
4317                         mutex_enter(&zfs_share_lock);
4318                         if (nfs_mod == NULL && ((nfs_mod = ddi_modopen("fs/nfs",
4319                             KRTLD_MODE_FIRST, &error)) == NULL)) {
4320                                 mutex_exit(&zfs_share_lock);
4321                                 return (ENOSYS);
4322                         }
4323                         if (znfsexport_fs == NULL &&
4324                             ((znfsexport_fs = (int (*)(void *))
4325                             ddi_modsym(nfs_mod,
4326                             "nfs_export", &error)) == NULL)) {
4327                                 mutex_exit(&zfs_share_lock);
4328                                 return (ENOSYS);
4329                         }
4330                         error = zfs_init_sharefs();
4331                         if (error) {
4332                                 mutex_exit(&zfs_share_lock);
4333                                 return (ENOSYS);
4334                         }
4335                         zfs_nfsshare_inited = 1;
4336                         mutex_exit(&zfs_share_lock);
4337                 }
4338                 break;
4339         case ZFS_SHARE_SMB:
4340         case ZFS_UNSHARE_SMB:
4341                 if (zfs_smbshare_inited == 0) {
4342                         mutex_enter(&zfs_share_lock);
4343                         if (smbsrv_mod == NULL && ((smbsrv_mod =
4344                             ddi_modopen("drv/smbsrv",
4345                             KRTLD_MODE_FIRST, &error)) == NULL)) {
4346                                 mutex_exit(&zfs_share_lock);
4347                                 return (ENOSYS);
4348                         }
4349                         if (zsmbexport_fs == NULL && ((zsmbexport_fs =
4350                             (int (*)(void *, boolean_t))ddi_modsym(smbsrv_mod,
4351                             "smb_server_share", &error)) == NULL)) {
4352                                 mutex_exit(&zfs_share_lock);
4353                                 return (ENOSYS);
4354                         }
4355                         error = zfs_init_sharefs();
4356                         if (error) {
4357                                 mutex_exit(&zfs_share_lock);
4358                                 return (ENOSYS);
4359                         }
4360                         zfs_smbshare_inited = 1;
4361                         mutex_exit(&zfs_share_lock);
4362                 }
4363                 break;
4364         default:
4365                 return (EINVAL);
4366         }
4367 
4368         switch (zc->zc_share.z_sharetype) {
4369         case ZFS_SHARE_NFS:
4370         case ZFS_UNSHARE_NFS:
4371                 if (error =
4372                     znfsexport_fs((void *)
4373                     (uintptr_t)zc->zc_share.z_exportdata))
4374                         return (error);
4375                 break;
4376         case ZFS_SHARE_SMB:
4377         case ZFS_UNSHARE_SMB:
4378                 if (error = zsmbexport_fs((void *)
4379                     (uintptr_t)zc->zc_share.z_exportdata,
4380                     zc->zc_share.z_sharetype == ZFS_SHARE_SMB ?
4381                     B_TRUE: B_FALSE)) {
4382                         return (error);
4383                 }
4384                 break;
4385         }
4386 
4387         opcode = (zc->zc_share.z_sharetype == ZFS_SHARE_NFS ||
4388             zc->zc_share.z_sharetype == ZFS_SHARE_SMB) ?
4389             SHAREFS_ADD : SHAREFS_REMOVE;
4390 
4391         /*
4392          * Add or remove share from sharetab
4393          */
4394         error = zshare_fs(opcode,
4395             (void *)(uintptr_t)zc->zc_share.z_sharedata,
4396             zc->zc_share.z_sharemax);
4397 
4398         return (error);
4399 
4400 }
4401 
4402 ace_t full_access[] = {
4403         {(uid_t)-1, ACE_ALL_PERMS, ACE_EVERYONE, 0}
4404 };
4405 
4406 /*
4407  * inputs:
4408  * zc_name              name of containing filesystem
4409  * zc_obj               object # beyond which we want next in-use object #
4410  *
4411  * outputs:
4412  * zc_obj               next in-use object #
4413  */
4414 static int
4415 zfs_ioc_next_obj(zfs_cmd_t *zc)
4416 {
4417         objset_t *os = NULL;
4418         int error;
4419 
4420         error = dmu_objset_hold(zc->zc_name, FTAG, &os);
4421         if (error)
4422                 return (error);
4423 
4424         error = dmu_object_next(os, &zc->zc_obj, B_FALSE,
4425             os->os_dsl_dataset->ds_phys->ds_prev_snap_txg);
4426 
4427         dmu_objset_rele(os, FTAG);
4428         return (error);
4429 }
4430 
4431 /*
4432  * inputs:
4433  * zc_name              name of filesystem
4434  * zc_value             prefix name for snapshot
4435  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
4436  *
4437  * outputs:
4438  */
4439 static int
4440 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
4441 {
4442         char *snap_name;
4443         int error;
4444 
4445         snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
4446             (u_longlong_t)ddi_get_lbolt64());
4447 
4448         if (strlen(snap_name) >= MAXNAMELEN) {
4449                 strfree(snap_name);
4450                 return (E2BIG);
4451         }
4452 
4453         error = dmu_objset_snapshot(zc->zc_name, snap_name, snap_name,
4454             NULL, B_FALSE, B_TRUE, zc->zc_cleanup_fd);
4455         if (error != 0) {
4456                 strfree(snap_name);
4457                 return (error);
4458         }
4459 
4460         (void) strcpy(zc->zc_value, snap_name);
4461         strfree(snap_name);
4462         return (0);
4463 }
4464 
4465 /*
4466  * inputs:
4467  * zc_name              name of "to" snapshot
4468  * zc_value             name of "from" snapshot
4469  * zc_cookie            file descriptor to write diff data on
4470  *
4471  * outputs:
4472  * dmu_diff_record_t's to the file descriptor
4473  */
4474 static int
4475 zfs_ioc_diff(zfs_cmd_t *zc)
4476 {
4477         objset_t *fromsnap;
4478         objset_t *tosnap;
4479         file_t *fp;
4480         offset_t off;
4481         int error;
4482 
4483         error = dmu_objset_hold(zc->zc_name, FTAG, &tosnap);
4484         if (error)
4485                 return (error);
4486 
4487         error = dmu_objset_hold(zc->zc_value, FTAG, &fromsnap);
4488         if (error) {
4489                 dmu_objset_rele(tosnap, FTAG);
4490                 return (error);
4491         }
4492 
4493         fp = getf(zc->zc_cookie);
4494         if (fp == NULL) {
4495                 dmu_objset_rele(fromsnap, FTAG);
4496                 dmu_objset_rele(tosnap, FTAG);
4497                 return (EBADF);
4498         }
4499 
4500         off = fp->f_offset;
4501 
4502         error = dmu_diff(tosnap, fromsnap, fp->f_vnode, &off);
4503 
4504         if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
4505                 fp->f_offset = off;
4506         releasef(zc->zc_cookie);
4507 
4508         dmu_objset_rele(fromsnap, FTAG);
4509         dmu_objset_rele(tosnap, FTAG);
4510         return (error);
4511 }
4512 
4513 /*
4514  * Remove all ACL files in shares dir
4515  */
4516 static int
4517 zfs_smb_acl_purge(znode_t *dzp)
4518 {
4519         zap_cursor_t    zc;
4520         zap_attribute_t zap;
4521         zfsvfs_t *zfsvfs = dzp->z_zfsvfs;
4522         int error;
4523 
4524         for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
4525             (error = zap_cursor_retrieve(&zc, &zap)) == 0;
4526             zap_cursor_advance(&zc)) {
4527                 if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
4528                     NULL, 0)) != 0)
4529                         break;
4530         }
4531         zap_cursor_fini(&zc);
4532         return (error);
4533 }
4534 
4535 static int
4536 zfs_ioc_smb_acl(zfs_cmd_t *zc)
4537 {
4538         vnode_t *vp;
4539         znode_t *dzp;
4540         vnode_t *resourcevp = NULL;
4541         znode_t *sharedir;
4542         zfsvfs_t *zfsvfs;
4543         nvlist_t *nvlist;
4544         char *src, *target;
4545         vattr_t vattr;
4546         vsecattr_t vsec;
4547         int error = 0;
4548 
4549         if ((error = lookupname(zc->zc_value, UIO_SYSSPACE,
4550             NO_FOLLOW, NULL, &vp)) != 0)
4551                 return (error);
4552 
4553         /* Now make sure mntpnt and dataset are ZFS */
4554 
4555         if (vp->v_vfsp->vfs_fstype != zfsfstype ||
4556             (strcmp((char *)refstr_value(vp->v_vfsp->vfs_resource),
4557             zc->zc_name) != 0)) {
4558                 VN_RELE(vp);
4559                 return (EINVAL);
4560         }
4561 
4562         dzp = VTOZ(vp);
4563         zfsvfs = dzp->z_zfsvfs;
4564         ZFS_ENTER(zfsvfs);
4565 
4566         /*
4567          * Create share dir if its missing.
4568          */
4569         mutex_enter(&zfsvfs->z_lock);
4570         if (zfsvfs->z_shares_dir == 0) {
4571                 dmu_tx_t *tx;
4572 
4573                 tx = dmu_tx_create(zfsvfs->z_os);
4574                 dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
4575                     ZFS_SHARES_DIR);
4576                 dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
4577                 error = dmu_tx_assign(tx, TXG_WAIT);
4578                 if (error) {
4579                         dmu_tx_abort(tx);
4580                 } else {
4581                         error = zfs_create_share_dir(zfsvfs, tx);
4582                         dmu_tx_commit(tx);
4583                 }
4584                 if (error) {
4585                         mutex_exit(&zfsvfs->z_lock);
4586                         VN_RELE(vp);
4587                         ZFS_EXIT(zfsvfs);
4588                         return (error);
4589                 }
4590         }
4591         mutex_exit(&zfsvfs->z_lock);
4592 
4593         ASSERT(zfsvfs->z_shares_dir);
4594         if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
4595                 VN_RELE(vp);
4596                 ZFS_EXIT(zfsvfs);
4597                 return (error);
4598         }
4599 
4600         switch (zc->zc_cookie) {
4601         case ZFS_SMB_ACL_ADD:
4602                 vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE;
4603                 vattr.va_type = VREG;
4604                 vattr.va_mode = S_IFREG|0777;
4605                 vattr.va_uid = 0;
4606                 vattr.va_gid = 0;
4607 
4608                 vsec.vsa_mask = VSA_ACE;
4609                 vsec.vsa_aclentp = &full_access;
4610                 vsec.vsa_aclentsz = sizeof (full_access);
4611                 vsec.vsa_aclcnt = 1;
4612 
4613                 error = VOP_CREATE(ZTOV(sharedir), zc->zc_string,
4614                     &vattr, EXCL, 0, &resourcevp, kcred, 0, NULL, &vsec);
4615                 if (resourcevp)
4616                         VN_RELE(resourcevp);
4617                 break;
4618 
4619         case ZFS_SMB_ACL_REMOVE:
4620                 error = VOP_REMOVE(ZTOV(sharedir), zc->zc_string, kcred,
4621                     NULL, 0);
4622                 break;
4623 
4624         case ZFS_SMB_ACL_RENAME:
4625                 if ((error = get_nvlist(zc->zc_nvlist_src,
4626                     zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
4627                         VN_RELE(vp);
4628                         ZFS_EXIT(zfsvfs);
4629                         return (error);
4630                 }
4631                 if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
4632                     nvlist_lookup_string(nvlist, ZFS_SMB_ACL_TARGET,
4633                     &target)) {
4634                         VN_RELE(vp);
4635                         VN_RELE(ZTOV(sharedir));
4636                         ZFS_EXIT(zfsvfs);
4637                         nvlist_free(nvlist);
4638                         return (error);
4639                 }
4640                 error = VOP_RENAME(ZTOV(sharedir), src, ZTOV(sharedir), target,
4641                     kcred, NULL, 0);
4642                 nvlist_free(nvlist);
4643                 break;
4644 
4645         case ZFS_SMB_ACL_PURGE:
4646                 error = zfs_smb_acl_purge(sharedir);
4647                 break;
4648 
4649         default:
4650                 error = EINVAL;
4651                 break;
4652         }
4653 
4654         VN_RELE(vp);
4655         VN_RELE(ZTOV(sharedir));
4656 
4657         ZFS_EXIT(zfsvfs);
4658 
4659         return (error);
4660 }
4661 
4662 /*
4663  * inputs:
4664  * zc_name              name of filesystem
4665  * zc_value             short name of snap
4666  * zc_string            user-supplied tag for this hold
4667  * zc_cookie            recursive flag
4668  * zc_temphold          set if hold is temporary
4669  * zc_cleanup_fd        cleanup-on-exit file descriptor for calling process
4670  * zc_sendobj           if non-zero, the objid for zc_name@zc_value
4671  * zc_createtxg         if zc_sendobj is non-zero, snap must have zc_createtxg
4672  *
4673  * outputs:             none
4674  */
4675 static int
4676 zfs_ioc_hold(zfs_cmd_t *zc)
4677 {
4678         boolean_t recursive = zc->zc_cookie;
4679         spa_t *spa;
4680         dsl_pool_t *dp;
4681         dsl_dataset_t *ds;
4682         int error;
4683         minor_t minor = 0;
4684 
4685         if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4686                 return (EINVAL);
4687 
4688         if (zc->zc_sendobj == 0) {
4689                 return (dsl_dataset_user_hold(zc->zc_name, zc->zc_value,
4690                     zc->zc_string, recursive, zc->zc_temphold,
4691                     zc->zc_cleanup_fd));
4692         }
4693 
4694         if (recursive)
4695                 return (EINVAL);
4696 
4697         error = spa_open(zc->zc_name, &spa, FTAG);
4698         if (error)
4699                 return (error);
4700 
4701         dp = spa_get_dsl(spa);
4702         rw_enter(&dp->dp_config_rwlock, RW_READER);
4703         error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
4704         rw_exit(&dp->dp_config_rwlock);
4705         spa_close(spa, FTAG);
4706         if (error)
4707                 return (error);
4708 
4709         /*
4710          * Until we have a hold on this snapshot, it's possible that
4711          * zc_sendobj could've been destroyed and reused as part
4712          * of a later txg.  Make sure we're looking at the right object.
4713          */
4714         if (zc->zc_createtxg != ds->ds_phys->ds_creation_txg) {
4715                 dsl_dataset_rele(ds, FTAG);
4716                 return (ENOENT);
4717         }
4718 
4719         if (zc->zc_cleanup_fd != -1 && zc->zc_temphold) {
4720                 error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
4721                 if (error) {
4722                         dsl_dataset_rele(ds, FTAG);
4723                         return (error);
4724                 }
4725         }
4726 
4727         error = dsl_dataset_user_hold_for_send(ds, zc->zc_string,
4728             zc->zc_temphold);
4729         if (minor != 0) {
4730                 if (error == 0) {
4731                         dsl_register_onexit_hold_cleanup(ds, zc->zc_string,
4732                             minor);
4733                 }
4734                 zfs_onexit_fd_rele(zc->zc_cleanup_fd);
4735         }
4736         dsl_dataset_rele(ds, FTAG);
4737 
4738         return (error);
4739 }
4740 
4741 /*
4742  * inputs:
4743  * zc_name      name of dataset from which we're releasing a user hold
4744  * zc_value     short name of snap
4745  * zc_string    user-supplied tag for this hold
4746  * zc_cookie    recursive flag
4747  *
4748  * outputs:     none
4749  */
4750 static int
4751 zfs_ioc_release(zfs_cmd_t *zc)
4752 {
4753         boolean_t recursive = zc->zc_cookie;
4754 
4755         if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
4756                 return (EINVAL);
4757 
4758         return (dsl_dataset_user_release(zc->zc_name, zc->zc_value,
4759             zc->zc_string, recursive));
4760 }
4761 
4762 /*
4763  * inputs:
4764  * zc_name              name of filesystem
4765  *
4766  * outputs:
4767  * zc_nvlist_src{_size} nvlist of snapshot holds
4768  */
4769 static int
4770 zfs_ioc_get_holds(zfs_cmd_t *zc)
4771 {
4772         nvlist_t *nvp;
4773         int error;
4774 
4775         if ((error = dsl_dataset_get_holds(zc->zc_name, &nvp)) == 0) {
4776                 error = put_nvlist(zc, nvp);
4777                 nvlist_free(nvp);
4778         }
4779 
4780         return (error);
4781 }
4782 
4783 /*
4784  * inputs:
4785  * zc_name              name of new filesystem or snapshot
4786  * zc_value             full name of old snapshot
4787  *
4788  * outputs:
4789  * zc_cookie            space in bytes
4790  * zc_objset_type       compressed space in bytes
4791  * zc_perm_action       uncompressed space in bytes
4792  */
4793 static int
4794 zfs_ioc_space_written(zfs_cmd_t *zc)
4795 {
4796         int error;
4797         dsl_dataset_t *new, *old;
4798 
4799         error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
4800         if (error != 0)
4801                 return (error);
4802         error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
4803         if (error != 0) {
4804                 dsl_dataset_rele(new, FTAG);
4805                 return (error);
4806         }
4807 
4808         error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
4809             &zc->zc_objset_type, &zc->zc_perm_action);
4810         dsl_dataset_rele(old, FTAG);
4811         dsl_dataset_rele(new, FTAG);
4812         return (error);
4813 }
4814 
4815 /*
4816  * inputs:
4817  * zc_name              full name of last snapshot
4818  * zc_value             full name of first snapshot
4819  *
4820  * outputs:
4821  * zc_cookie            space in bytes
4822  * zc_objset_type       compressed space in bytes
4823  * zc_perm_action       uncompressed space in bytes
4824  */
4825 static int
4826 zfs_ioc_space_snaps(zfs_cmd_t *zc)
4827 {
4828         int error;
4829         dsl_dataset_t *new, *old;
4830 
4831         error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
4832         if (error != 0)
4833                 return (error);
4834         error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
4835         if (error != 0) {
4836                 dsl_dataset_rele(new, FTAG);
4837                 return (error);
4838         }
4839 
4840         error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie,
4841             &zc->zc_objset_type, &zc->zc_perm_action);
4842         dsl_dataset_rele(old, FTAG);
4843         dsl_dataset_rele(new, FTAG);
4844         return (error);
4845 }
4846 
4847 /*
4848  * pool create, destroy, and export don't log the history as part of
4849  * zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
4850  * do the logging of those commands.
4851  */
4852 static zfs_ioc_vec_t zfs_ioc_vec[] = {
4853         { zfs_ioc_pool_create, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4854             POOL_CHECK_NONE },
4855         { zfs_ioc_pool_destroy, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4856             POOL_CHECK_NONE },
4857         { zfs_ioc_pool_import, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4858             POOL_CHECK_NONE },
4859         { zfs_ioc_pool_export, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4860             POOL_CHECK_NONE },
4861         { zfs_ioc_pool_configs, zfs_secpolicy_none, NO_NAME, B_FALSE,
4862             POOL_CHECK_NONE },
4863         { zfs_ioc_pool_stats, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4864             POOL_CHECK_NONE },
4865         { zfs_ioc_pool_tryimport, zfs_secpolicy_config, NO_NAME, B_FALSE,
4866             POOL_CHECK_NONE },
4867         { zfs_ioc_pool_scan, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4868             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4869         { zfs_ioc_pool_freeze, zfs_secpolicy_config, NO_NAME, B_FALSE,
4870             POOL_CHECK_READONLY },
4871         { zfs_ioc_pool_upgrade, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4872             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4873         { zfs_ioc_pool_get_history, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4874             POOL_CHECK_NONE },
4875         { zfs_ioc_vdev_add, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4876             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4877         { zfs_ioc_vdev_remove, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4878             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4879         { zfs_ioc_vdev_set_state, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4880             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4881         { zfs_ioc_vdev_attach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4882             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4883         { zfs_ioc_vdev_detach, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4884             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4885         { zfs_ioc_vdev_setpath, zfs_secpolicy_config, POOL_NAME, B_FALSE,
4886             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4887         { zfs_ioc_vdev_setfru,  zfs_secpolicy_config, POOL_NAME, B_FALSE,
4888             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4889         { zfs_ioc_objset_stats, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4890             POOL_CHECK_SUSPENDED },
4891         { zfs_ioc_objset_zplprops, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4892             POOL_CHECK_NONE },
4893         { zfs_ioc_dataset_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4894             POOL_CHECK_SUSPENDED },
4895         { zfs_ioc_snapshot_list_next, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4896             POOL_CHECK_SUSPENDED },
4897         { zfs_ioc_set_prop, zfs_secpolicy_none, DATASET_NAME, B_TRUE,
4898             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4899         { zfs_ioc_create, zfs_secpolicy_create, DATASET_NAME, B_TRUE,
4900             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4901         { zfs_ioc_destroy, zfs_secpolicy_destroy, DATASET_NAME, B_TRUE,
4902             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4903         { zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME, B_TRUE,
4904             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4905         { zfs_ioc_rename, zfs_secpolicy_rename, DATASET_NAME, B_TRUE,
4906             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4907         { zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
4908             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4909         { zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE,
4910             POOL_CHECK_NONE },
4911         { zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4912             POOL_CHECK_NONE },
4913         { zfs_ioc_clear_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4914             POOL_CHECK_NONE },
4915         { zfs_ioc_inject_list_next, zfs_secpolicy_inject, NO_NAME, B_FALSE,
4916             POOL_CHECK_NONE },
4917         { zfs_ioc_error_log, zfs_secpolicy_inject, POOL_NAME, B_FALSE,
4918             POOL_CHECK_NONE },
4919         { zfs_ioc_clear, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4920             POOL_CHECK_NONE },
4921         { zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
4922             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4923         { zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
4924             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4925         { zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
4926             POOL_CHECK_NONE },
4927         { zfs_ioc_obj_to_path, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4928             POOL_CHECK_SUSPENDED },
4929         { zfs_ioc_pool_set_props, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4930             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4931         { zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME, B_FALSE,
4932             POOL_CHECK_NONE },
4933         { zfs_ioc_set_fsacl, zfs_secpolicy_fsacl, DATASET_NAME, B_TRUE,
4934             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4935         { zfs_ioc_get_fsacl, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4936             POOL_CHECK_NONE },
4937         { zfs_ioc_share, zfs_secpolicy_share, DATASET_NAME, B_FALSE,
4938             POOL_CHECK_NONE },
4939         { zfs_ioc_inherit_prop, zfs_secpolicy_inherit, DATASET_NAME, B_TRUE,
4940             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4941         { zfs_ioc_smb_acl, zfs_secpolicy_smb_acl, DATASET_NAME, B_FALSE,
4942             POOL_CHECK_NONE },
4943         { zfs_ioc_userspace_one, zfs_secpolicy_userspace_one, DATASET_NAME,
4944             B_FALSE, POOL_CHECK_NONE },
4945         { zfs_ioc_userspace_many, zfs_secpolicy_userspace_many, DATASET_NAME,
4946             B_FALSE, POOL_CHECK_NONE },
4947         { zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
4948             DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4949         { zfs_ioc_hold, zfs_secpolicy_hold, DATASET_NAME, B_TRUE,
4950             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4951         { zfs_ioc_release, zfs_secpolicy_release, DATASET_NAME, B_TRUE,
4952             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4953         { zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4954             POOL_CHECK_SUSPENDED },
4955         { zfs_ioc_objset_recvd_props, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4956             POOL_CHECK_NONE },
4957         { zfs_ioc_vdev_split, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4958             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4959         { zfs_ioc_next_obj, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4960             POOL_CHECK_NONE },
4961         { zfs_ioc_diff, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4962             POOL_CHECK_NONE },
4963         { zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
4964             B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4965         { zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
4966             POOL_CHECK_SUSPENDED },
4967         { zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4968             POOL_CHECK_SUSPENDED },
4969         { zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4970             POOL_CHECK_SUSPENDED },
4971         { zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive,
4972             DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4973         { zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4974             POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
4975         { zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE,
4976             POOL_CHECK_SUSPENDED },
4977         { zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
4978             POOL_CHECK_NONE }
4979 };
4980 
4981 int
4982 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
4983     zfs_ioc_poolcheck_t check)
4984 {
4985         spa_t *spa;
4986         int error;
4987 
4988         ASSERT(type == POOL_NAME || type == DATASET_NAME);
4989 
4990         if (check & POOL_CHECK_NONE)
4991                 return (0);
4992 
4993         error = spa_open(name, &spa, FTAG);
4994         if (error == 0) {
4995                 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
4996                         error = EAGAIN;
4997                 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
4998                         error = EROFS;
4999                 spa_close(spa, FTAG);
5000         }
5001         return (error);
5002 }
5003 
5004 /*
5005  * Find a free minor number.
5006  */
5007 minor_t
5008 zfsdev_minor_alloc(void)
5009 {
5010         static minor_t last_minor;
5011         minor_t m;
5012 
5013         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5014 
5015         for (m = last_minor + 1; m != last_minor; m++) {
5016                 if (m > ZFSDEV_MAX_MINOR)
5017                         m = 1;
5018                 if (ddi_get_soft_state(zfsdev_state, m) == NULL) {
5019                         last_minor = m;
5020                         return (m);
5021                 }
5022         }
5023 
5024         return (0);
5025 }
5026 
5027 static int
5028 zfs_ctldev_init(dev_t *devp)
5029 {
5030         minor_t minor;
5031         zfs_soft_state_t *zs;
5032 
5033         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5034         ASSERT(getminor(*devp) == 0);
5035 
5036         minor = zfsdev_minor_alloc();
5037         if (minor == 0)
5038                 return (ENXIO);
5039 
5040         if (ddi_soft_state_zalloc(zfsdev_state, minor) != DDI_SUCCESS)
5041                 return (EAGAIN);
5042 
5043         *devp = makedevice(getemajor(*devp), minor);
5044 
5045         zs = ddi_get_soft_state(zfsdev_state, minor);
5046         zs->zss_type = ZSST_CTLDEV;
5047         zfs_onexit_init((zfs_onexit_t **)&zs->zss_data);
5048 
5049         return (0);
5050 }
5051 
5052 static void
5053 zfs_ctldev_destroy(zfs_onexit_t *zo, minor_t minor)
5054 {
5055         ASSERT(MUTEX_HELD(&zfsdev_state_lock));
5056 
5057         zfs_onexit_destroy(zo);
5058         ddi_soft_state_free(zfsdev_state, minor);
5059 }
5060 
5061 void *
5062 zfsdev_get_soft_state(minor_t minor, enum zfs_soft_state_type which)
5063 {
5064         zfs_soft_state_t *zp;
5065 
5066         zp = ddi_get_soft_state(zfsdev_state, minor);
5067         if (zp == NULL || zp->zss_type != which)
5068                 return (NULL);
5069 
5070         return (zp->zss_data);
5071 }
5072 
5073 static int
5074 zfsdev_open(dev_t *devp, int flag, int otyp, cred_t *cr)
5075 {
5076         int error = 0;
5077 
5078         if (getminor(*devp) != 0)
5079                 return (zvol_open(devp, flag, otyp, cr));
5080 
5081         /* This is the control device. Allocate a new minor if requested. */
5082         if (flag & FEXCL) {
5083                 mutex_enter(&zfsdev_state_lock);
5084                 error = zfs_ctldev_init(devp);
5085                 mutex_exit(&zfsdev_state_lock);
5086         }
5087 
5088         return (error);
5089 }
5090 
5091 static int
5092 zfsdev_close(dev_t dev, int flag, int otyp, cred_t *cr)
5093 {
5094         zfs_onexit_t *zo;
5095         minor_t minor = getminor(dev);
5096 
5097         if (minor == 0)
5098                 return (0);
5099 
5100         mutex_enter(&zfsdev_state_lock);
5101         zo = zfsdev_get_soft_state(minor, ZSST_CTLDEV);
5102         if (zo == NULL) {
5103                 mutex_exit(&zfsdev_state_lock);
5104                 return (zvol_close(dev, flag, otyp, cr));
5105         }
5106         zfs_ctldev_destroy(zo, minor);
5107         mutex_exit(&zfsdev_state_lock);
5108 
5109         return (0);
5110 }
5111 
5112 static int
5113 zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
5114 {
5115         zfs_cmd_t *zc;
5116         uint_t vec;
5117         int error, rc;
5118         minor_t minor = getminor(dev);
5119 
5120         if (minor != 0 &&
5121             zfsdev_get_soft_state(minor, ZSST_CTLDEV) == NULL)
5122                 return (zvol_ioctl(dev, cmd, arg, flag, cr, rvalp));
5123 
5124         vec = cmd - ZFS_IOC;
5125         ASSERT3U(getmajor(dev), ==, ddi_driver_major(zfs_dip));
5126 
5127         if (vec >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
5128                 return (EINVAL);
5129 
5130         zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
5131 
5132         error = ddi_copyin((void *)arg, zc, sizeof (zfs_cmd_t), flag);
5133         if (error != 0)
5134                 error = EFAULT;
5135 
5136         if ((error == 0) && !(flag & FKIOCTL))
5137                 error = zfs_ioc_vec[vec].zvec_secpolicy(zc, cr);
5138 
5139         /*
5140          * Ensure that all pool/dataset names are valid before we pass down to
5141          * the lower layers.
5142          */
5143         if (error == 0) {
5144                 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5145                 zc->zc_iflags = flag & FKIOCTL;
5146                 switch (zfs_ioc_vec[vec].zvec_namecheck) {
5147                 case POOL_NAME:
5148                         if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
5149                                 error = EINVAL;
5150                         error = pool_status_check(zc->zc_name,
5151                             zfs_ioc_vec[vec].zvec_namecheck,
5152                             zfs_ioc_vec[vec].zvec_pool_check);
5153                         break;
5154 
5155                 case DATASET_NAME:
5156                         if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
5157                                 error = EINVAL;
5158                         error = pool_status_check(zc->zc_name,
5159                             zfs_ioc_vec[vec].zvec_namecheck,
5160                             zfs_ioc_vec[vec].zvec_pool_check);
5161                         break;
5162 
5163                 case NO_NAME:
5164                         break;
5165                 }
5166         }
5167 
5168         if (error == 0)
5169                 error = zfs_ioc_vec[vec].zvec_func(zc);
5170 
5171         rc = ddi_copyout(zc, (void *)arg, sizeof (zfs_cmd_t), flag);
5172         if (error == 0) {
5173                 if (rc != 0)
5174                         error = EFAULT;
5175                 if (zfs_ioc_vec[vec].zvec_his_log)
5176                         zfs_log_history(zc);
5177         }
5178 
5179         kmem_free(zc, sizeof (zfs_cmd_t));
5180         return (error);
5181 }
5182 
5183 static int
5184 zfs_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5185 {
5186         if (cmd != DDI_ATTACH)
5187                 return (DDI_FAILURE);
5188 
5189         if (ddi_create_minor_node(dip, "zfs", S_IFCHR, 0,
5190             DDI_PSEUDO, 0) == DDI_FAILURE)
5191                 return (DDI_FAILURE);
5192 
5193         zfs_dip = dip;
5194 
5195         ddi_report_dev(dip);
5196 
5197         return (DDI_SUCCESS);
5198 }
5199 
5200 static int
5201 zfs_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5202 {
5203         if (spa_busy() || zfs_busy() || zvol_busy())
5204                 return (DDI_FAILURE);
5205 
5206         if (cmd != DDI_DETACH)
5207                 return (DDI_FAILURE);
5208 
5209         zfs_dip = NULL;
5210 
5211         ddi_prop_remove_all(dip);
5212         ddi_remove_minor_node(dip, NULL);
5213 
5214         return (DDI_SUCCESS);
5215 }
5216 
5217 /*ARGSUSED*/
5218 static int
5219 zfs_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
5220 {
5221         switch (infocmd) {
5222         case DDI_INFO_DEVT2DEVINFO:
5223                 *result = zfs_dip;
5224                 return (DDI_SUCCESS);
5225 
5226         case DDI_INFO_DEVT2INSTANCE:
5227                 *result = (void *)0;
5228                 return (DDI_SUCCESS);
5229         }
5230 
5231         return (DDI_FAILURE);
5232 }
5233 
5234 /*
5235  * OK, so this is a little weird.
5236  *
5237  * /dev/zfs is the control node, i.e. minor 0.
5238  * /dev/zvol/[r]dsk/pool/dataset are the zvols, minor > 0.
5239  *
5240  * /dev/zfs has basically nothing to do except serve up ioctls,
5241  * so most of the standard driver entry points are in zvol.c.
5242  */
5243 static struct cb_ops zfs_cb_ops = {
5244         zfsdev_open,    /* open */
5245         zfsdev_close,   /* close */
5246         zvol_strategy,  /* strategy */
5247         nodev,          /* print */
5248         zvol_dump,      /* dump */
5249         zvol_read,      /* read */
5250         zvol_write,     /* write */
5251         zfsdev_ioctl,   /* ioctl */
5252         nodev,          /* devmap */
5253         nodev,          /* mmap */
5254         nodev,          /* segmap */
5255         nochpoll,       /* poll */
5256         ddi_prop_op,    /* prop_op */
5257         NULL,           /* streamtab */
5258         D_NEW | D_MP | D_64BIT,         /* Driver compatibility flag */
5259         CB_REV,         /* version */
5260         nodev,          /* async read */
5261         nodev,          /* async write */
5262 };
5263 
5264 static struct dev_ops zfs_dev_ops = {
5265         DEVO_REV,       /* version */
5266         0,              /* refcnt */
5267         zfs_info,       /* info */
5268         nulldev,        /* identify */
5269         nulldev,        /* probe */
5270         zfs_attach,     /* attach */
5271         zfs_detach,     /* detach */
5272         nodev,          /* reset */
5273         &zfs_cb_ops,        /* driver operations */
5274         NULL,           /* no bus operations */
5275         NULL,           /* power */
5276         ddi_quiesce_not_needed, /* quiesce */
5277 };
5278 
5279 static struct modldrv zfs_modldrv = {
5280         &mod_driverops,
5281         "ZFS storage pool",
5282         &zfs_dev_ops
5283 };
5284 
5285 static struct modlinkage modlinkage = {
5286         MODREV_1,
5287         (void *)&zfs_modlfs,
5288         (void *)&zfs_modldrv,
5289         NULL
5290 };
5291 
5292 
5293 uint_t zfs_fsyncer_key;
5294 extern uint_t rrw_tsd_key;
5295 
5296 int
5297 _init(void)
5298 {
5299         int error;
5300 
5301         spa_init(FREAD | FWRITE);
5302         zfs_init();
5303         zvol_init();
5304 
5305         if ((error = mod_install(&modlinkage)) != 0) {
5306                 zvol_fini();
5307                 zfs_fini();
5308                 spa_fini();
5309                 return (error);
5310         }
5311 
5312         tsd_create(&zfs_fsyncer_key, NULL);
5313         tsd_create(&rrw_tsd_key, NULL);
5314 
5315         error = ldi_ident_from_mod(&modlinkage, &zfs_li);
5316         ASSERT(error == 0);
5317         mutex_init(&zfs_share_lock, NULL, MUTEX_DEFAULT, NULL);
5318 
5319         return (0);
5320 }
5321 
5322 int
5323 _fini(void)
5324 {
5325         int error;
5326 
5327         if (spa_busy() || zfs_busy() || zvol_busy() || zio_injection_enabled)
5328                 return (EBUSY);
5329 
5330         if ((error = mod_remove(&modlinkage)) != 0)
5331                 return (error);
5332 
5333         zvol_fini();
5334         zfs_fini();
5335         spa_fini();
5336         if (zfs_nfsshare_inited)
5337                 (void) ddi_modclose(nfs_mod);
5338         if (zfs_smbshare_inited)
5339                 (void) ddi_modclose(smbsrv_mod);
5340         if (zfs_nfsshare_inited || zfs_smbshare_inited)
5341                 (void) ddi_modclose(sharefs_mod);
5342 
5343         tsd_destroy(&zfs_fsyncer_key);
5344         ldi_ident_release(zfs_li);
5345         zfs_li = NULL;
5346         mutex_destroy(&zfs_share_lock);
5347 
5348         return (error);
5349 }
5350 
5351 int
5352 _info(struct modinfo *modinfop)
5353 {
5354         return (mod_info(&modlinkage, modinfop));
5355 }