illumos-gate New usr/src/lib/libzfs_core/common/libzfs

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  25  * Copyright (c) 2014 Integros [integros.com]
  26  * Copyright 2017 RackTop Systems.
  27  */
  28 
  29 /*
  30  * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
  31  * It has the following characteristics:
  32  *
  33  *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
  34  *  threads.  This is accomplished primarily by avoiding global data
  35  *  (e.g. caching).  Since it's thread-safe, there is no reason for a
  36  *  process to have multiple libzfs "instances".  Therefore, we store
  37  *  our few pieces of data (e.g. the file descriptor) in global
  38  *  variables.  The fd is reference-counted so that the libzfs_core
  39  *  library can be "initialized" multiple times (e.g. by different
  40  *  consumers within the same process).
  41  *
  42  *  - Committed Interface.  The libzfs_core interface will be committed,
  43  *  therefore consumers can compile against it and be confident that
  44  *  their code will continue to work on future releases of this code.
  45  *  Currently, the interface is Evolving (not Committed), but we intend
  46  *  to commit to it once it is more complete and we determine that it
  47  *  meets the needs of all consumers.
  48  *
  49  *  - Programatic Error Handling.  libzfs_core communicates errors with
  50  *  defined error numbers, and doesn't print anything to stdout/stderr.
  51  *
  52  *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
  53  *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
  54  *  between libzfs_core functions and ioctls to /dev/zfs.
  55  *
  56  *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
  57  *  with kernel ioctls, and kernel ioctls are general atomic, each
  58  *  libzfs_core function is atomic.  For example, creating multiple
  59  *  snapshots with a single call to lzc_snapshot() is atomic -- it
  60  *  can't fail with only some of the requested snapshots created, even
  61  *  in the event of power loss or system crash.
  62  *
  63  *  - Continued libzfs Support.  Some higher-level operations (e.g.
  64  *  support for "zfs send -R") are too complicated to fit the scope of
  65  *  libzfs_core.  This functionality will continue to live in libzfs.
  66  *  Where appropriate, libzfs will use the underlying atomic operations
  67  *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
  68  *  zfs receive" by using individual "send one snapshot", rename,
  69  *  destroy, and "receive one snapshot" operations in libzfs_core.
  70  *  /sbin/zfs and /zbin/zpool will link with both libzfs and
  71  *  libzfs_core.  Other consumers should aim to use only libzfs_core,
  72  *  since that will be the supported, stable interface going forwards.
  73  */
  74 
  75 #include <libzfs_core.h>
  76 #include <ctype.h>
  77 #include <unistd.h>
  78 #include <stdlib.h>
  79 #include <string.h>
  80 #include <errno.h>
  81 #include <fcntl.h>
  82 #include <pthread.h>
  83 #include <sys/nvpair.h>
  84 #include <sys/param.h>
  85 #include <sys/types.h>
  86 #include <sys/stat.h>
  87 #include <sys/zfs_ioctl.h>
  88 
  89 static int g_fd = -1;
  90 static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
  91 static int g_refcount;
  92 
  93 int
  94 libzfs_core_init(void)
  95 {
  96         (void) pthread_mutex_lock(&g_lock);
  97         if (g_refcount == 0) {
  98                 g_fd = open("/dev/zfs", O_RDWR);
  99                 if (g_fd < 0) {
 100                         (void) pthread_mutex_unlock(&g_lock);
 101                         return (errno);
 102                 }
 103         }
 104         g_refcount++;
 105         (void) pthread_mutex_unlock(&g_lock);
 106         return (0);
 107 }
 108 
 109 void
 110 libzfs_core_fini(void)
 111 {
 112         (void) pthread_mutex_lock(&g_lock);
 113         ASSERT3S(g_refcount, >, 0);
 114 
 115         if (g_refcount > 0)
 116                 g_refcount--;
 117 
 118         if (g_refcount == 0 && g_fd != -1) {
 119                 (void) close(g_fd);
 120                 g_fd = -1;
 121         }
 122         (void) pthread_mutex_unlock(&g_lock);
 123 }
 124 
 125 static int
 126 lzc_ioctl(zfs_ioc_t ioc, const char *name,
 127     nvlist_t *source, nvlist_t **resultp)
 128 {
 129         zfs_cmd_t zc = { 0 };
 130         int error = 0;
 131         char *packed;
 132         size_t size;
 133 
 134         ASSERT3S(g_refcount, >, 0);
 135         VERIFY3S(g_fd, !=, -1);
 136 
 137         (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
 138 
 139         packed = fnvlist_pack(source, &size);
 140         zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
 141         zc.zc_nvlist_src_size = size;
 142 
 143         if (resultp != NULL) {
 144                 *resultp = NULL;
 145                 zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
 146                 zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
 147                     malloc(zc.zc_nvlist_dst_size);
 148                 if (zc.zc_nvlist_dst == NULL) {
 149                         error = ENOMEM;
 150                         goto out;
 151                 }
 152         }
 153 
 154         while (ioctl(g_fd, ioc, &zc) != 0) {
 155                 if (errno == ENOMEM && resultp != NULL) {
 156                         free((void *)(uintptr_t)zc.zc_nvlist_dst);
 157                         zc.zc_nvlist_dst_size *= 2;
 158                         zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
 159                             malloc(zc.zc_nvlist_dst_size);
 160                         if (zc.zc_nvlist_dst == NULL) {
 161                                 error = ENOMEM;
 162                                 goto out;
 163                         }
 164                 } else {
 165                         error = errno;
 166                         break;
 167                 }
 168         }
 169         if (zc.zc_nvlist_dst_filled) {
 170                 *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
 171                     zc.zc_nvlist_dst_size);
 172         }
 173 
 174 out:
 175         fnvlist_pack_free(packed, size);
 176         free((void *)(uintptr_t)zc.zc_nvlist_dst);
 177         return (error);
 178 }
 179 
 180 int
 181 lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props)
 182 {
 183         int error;
 184         nvlist_t *args = fnvlist_alloc();
 185         fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
 186         if (props != NULL)
 187                 fnvlist_add_nvlist(args, "props", props);
 188         error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
 189         nvlist_free(args);
 190         return (error);
 191 }
 192 
 193 int
 194 lzc_clone(const char *fsname, const char *origin,
 195     nvlist_t *props)
 196 {
 197         int error;
 198         nvlist_t *args = fnvlist_alloc();
 199         fnvlist_add_string(args, "origin", origin);
 200         if (props != NULL)
 201                 fnvlist_add_nvlist(args, "props", props);
 202         error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
 203         nvlist_free(args);
 204         return (error);
 205 }
 206 
 207 int
 208 lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
 209 {
 210         /*
 211          * The promote ioctl is still legacy, so we need to construct our
 212          * own zfs_cmd_t rather than using lzc_ioctl().
 213          */
 214         zfs_cmd_t zc = { 0 };
 215 
 216         ASSERT3S(g_refcount, >, 0);
 217         VERIFY3S(g_fd, !=, -1);
 218 
 219         (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
 220         if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
 221                 if (errno == EEXIST && snapnamebuf != NULL)
 222                         (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
 223                 return (errno);
 224         }
 225         return (0);
 226 }
 227 
 228 /*
 229  * Creates snapshots.
 230  *
 231  * The keys in the snaps nvlist are the snapshots to be created.
 232  * They must all be in the same pool.
 233  *
 234  * The props nvlist is properties to set.  Currently only user properties
 235  * are supported.  { user:prop_name -> string value }
 236  *
 237  * The returned results nvlist will have an entry for each snapshot that failed.
 238  * The value will be the (int32) error code.
 239  *
 240  * The return value will be 0 if all snapshots were created, otherwise it will
 241  * be the errno of a (unspecified) snapshot that failed.
 242  */
 243 int
 244 lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
 245 {
 246         nvpair_t *elem;
 247         nvlist_t *args;
 248         int error;
 249         char pool[ZFS_MAX_DATASET_NAME_LEN];
 250 
 251         *errlist = NULL;
 252 
 253         /* determine the pool name */
 254         elem = nvlist_next_nvpair(snaps, NULL);
 255         if (elem == NULL)
 256                 return (0);
 257         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 258         pool[strcspn(pool, "/@")] = '\0';
 259 
 260         args = fnvlist_alloc();
 261         fnvlist_add_nvlist(args, "snaps", snaps);
 262         if (props != NULL)
 263                 fnvlist_add_nvlist(args, "props", props);
 264 
 265         error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
 266         nvlist_free(args);
 267 
 268         return (error);
 269 }
 270 
 271 /*
 272  * Destroys snapshots.
 273  *
 274  * The keys in the snaps nvlist are the snapshots to be destroyed.
 275  * They must all be in the same pool.
 276  *
 277  * Snapshots that do not exist will be silently ignored.
 278  *
 279  * If 'defer' is not set, and a snapshot has user holds or clones, the
 280  * destroy operation will fail and none of the snapshots will be
 281  * destroyed.
 282  *
 283  * If 'defer' is set, and a snapshot has user holds or clones, it will be
 284  * marked for deferred destruction, and will be destroyed when the last hold
 285  * or clone is removed/destroyed.
 286  *
 287  * The return value will be 0 if all snapshots were destroyed (or marked for
 288  * later destruction if 'defer' is set) or didn't exist to begin with.
 289  *
 290  * Otherwise the return value will be the errno of a (unspecified) snapshot
 291  * that failed, no snapshots will be destroyed, and the errlist will have an
 292  * entry for each snapshot that failed.  The value in the errlist will be
 293  * the (int32) error code.
 294  */
 295 int
 296 lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
 297 {
 298         nvpair_t *elem;
 299         nvlist_t *args;
 300         int error;
 301         char pool[ZFS_MAX_DATASET_NAME_LEN];
 302 
 303         /* determine the pool name */
 304         elem = nvlist_next_nvpair(snaps, NULL);
 305         if (elem == NULL)
 306                 return (0);
 307         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 308         pool[strcspn(pool, "/@")] = '\0';
 309 
 310         args = fnvlist_alloc();
 311         fnvlist_add_nvlist(args, "snaps", snaps);
 312         if (defer)
 313                 fnvlist_add_boolean(args, "defer");
 314 
 315         error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
 316         nvlist_free(args);
 317 
 318         return (error);
 319 }
 320 
 321 int
 322 lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
 323     uint64_t *usedp)
 324 {
 325         nvlist_t *args;
 326         nvlist_t *result;
 327         int err;
 328         char fs[ZFS_MAX_DATASET_NAME_LEN];
 329         char *atp;
 330 
 331         /* determine the fs name */
 332         (void) strlcpy(fs, firstsnap, sizeof (fs));
 333         atp = strchr(fs, '@');
 334         if (atp == NULL)
 335                 return (EINVAL);
 336         *atp = '\0';
 337 
 338         args = fnvlist_alloc();
 339         fnvlist_add_string(args, "firstsnap", firstsnap);
 340 
 341         err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
 342         nvlist_free(args);
 343         if (err == 0)
 344                 *usedp = fnvlist_lookup_uint64(result, "used");
 345         fnvlist_free(result);
 346 
 347         return (err);
 348 }
 349 
 350 boolean_t
 351 lzc_exists(const char *dataset)
 352 {
 353         /*
 354          * The objset_stats ioctl is still legacy, so we need to construct our
 355          * own zfs_cmd_t rather than using lzc_ioctl().
 356          */
 357         zfs_cmd_t zc = { 0 };
 358 
 359         ASSERT3S(g_refcount, >, 0);
 360         VERIFY3S(g_fd, !=, -1);
 361 
 362         (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 363         return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
 364 }
 365 
 366 /*
 367  * Create "user holds" on snapshots.  If there is a hold on a snapshot,
 368  * the snapshot can not be destroyed.  (However, it can be marked for deletion
 369  * by lzc_destroy_snaps(defer=B_TRUE).)
 370  *
 371  * The keys in the nvlist are snapshot names.
 372  * The snapshots must all be in the same pool.
 373  * The value is the name of the hold (string type).
 374  *
 375  * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
 376  * In this case, when the cleanup_fd is closed (including on process
 377  * termination), the holds will be released.  If the system is shut down
 378  * uncleanly, the holds will be released when the pool is next opened
 379  * or imported.
 380  *
 381  * Holds for snapshots which don't exist will be skipped and have an entry
 382  * added to errlist, but will not cause an overall failure.
 383  *
 384  * The return value will be 0 if all holds, for snapshots that existed,
 385  * were succesfully created.
 386  *
 387  * Otherwise the return value will be the errno of a (unspecified) hold that
 388  * failed and no holds will be created.
 389  *
 390  * In all cases the errlist will have an entry for each hold that failed
 391  * (name = snapshot), with its value being the error code (int32).
 392  */
 393 int
 394 lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
 395 {
 396         char pool[ZFS_MAX_DATASET_NAME_LEN];
 397         nvlist_t *args;
 398         nvpair_t *elem;
 399         int error;
 400 
 401         /* determine the pool name */
 402         elem = nvlist_next_nvpair(holds, NULL);
 403         if (elem == NULL)
 404                 return (0);
 405         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 406         pool[strcspn(pool, "/@")] = '\0';
 407 
 408         args = fnvlist_alloc();
 409         fnvlist_add_nvlist(args, "holds", holds);
 410         if (cleanup_fd != -1)
 411                 fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
 412 
 413         error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
 414         nvlist_free(args);
 415         return (error);
 416 }
 417 
 418 /*
 419  * Release "user holds" on snapshots.  If the snapshot has been marked for
 420  * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
 421  * any clones, and all the user holds are removed, then the snapshot will be
 422  * destroyed.
 423  *
 424  * The keys in the nvlist are snapshot names.
 425  * The snapshots must all be in the same pool.
 426  * The value is a nvlist whose keys are the holds to remove.
 427  *
 428  * Holds which failed to release because they didn't exist will have an entry
 429  * added to errlist, but will not cause an overall failure.
 430  *
 431  * The return value will be 0 if the nvl holds was empty or all holds that
 432  * existed, were successfully removed.
 433  *
 434  * Otherwise the return value will be the errno of a (unspecified) hold that
 435  * failed to release and no holds will be released.
 436  *
 437  * In all cases the errlist will have an entry for each hold that failed to
 438  * to release.
 439  */
 440 int
 441 lzc_release(nvlist_t *holds, nvlist_t **errlist)
 442 {
 443         char pool[ZFS_MAX_DATASET_NAME_LEN];
 444         nvpair_t *elem;
 445 
 446         /* determine the pool name */
 447         elem = nvlist_next_nvpair(holds, NULL);
 448         if (elem == NULL)
 449                 return (0);
 450         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 451         pool[strcspn(pool, "/@")] = '\0';
 452 
 453         return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
 454 }
 455 
 456 /*
 457  * Retrieve list of user holds on the specified snapshot.
 458  *
 459  * On success, *holdsp will be set to a nvlist which the caller must free.
 460  * The keys are the names of the holds, and the value is the creation time
 461  * of the hold (uint64) in seconds since the epoch.
 462  */
 463 int
 464 lzc_get_holds(const char *snapname, nvlist_t **holdsp)
 465 {
 466         int error;
 467         nvlist_t *innvl = fnvlist_alloc();
 468         error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
 469         fnvlist_free(innvl);
 470         return (error);
 471 }
 472 
 473 /*
 474  * Generate a zfs send stream for the specified snapshot and write it to
 475  * the specified file descriptor.
 476  *
 477  * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
 478  *
 479  * If "from" is NULL, a full (non-incremental) stream will be sent.
 480  * If "from" is non-NULL, it must be the full name of a snapshot or
 481  * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
 482  * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
 483  * bookmark must represent an earlier point in the history of "snapname").
 484  * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
 485  * or it can be the origin of "snapname"'s filesystem, or an earlier
 486  * snapshot in the origin, etc.
 487  *
 488  * "fd" is the file descriptor to write the send stream to.
 489  *
 490  * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
 491  * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
 492  * records with drr_blksz > 128K.
 493  *
 494  * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
 495  * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
 496  * which the receiving system must support (as indicated by support
 497  * for the "embedded_data" feature).
 498  */
 499 int
 500 lzc_send(const char *snapname, const char *from, int fd,
 501     enum lzc_send_flags flags)
 502 {
 503         return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
 504 }
 505 
 506 int
 507 lzc_send_resume(const char *snapname, const char *from, int fd,
 508     enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
 509 {
 510         nvlist_t *args;
 511         int err;
 512 
 513         args = fnvlist_alloc();
 514         fnvlist_add_int32(args, "fd", fd);
 515         if (from != NULL)
 516                 fnvlist_add_string(args, "fromsnap", from);
 517         if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
 518                 fnvlist_add_boolean(args, "largeblockok");
 519         if (flags & LZC_SEND_FLAG_EMBED_DATA)
 520                 fnvlist_add_boolean(args, "embedok");
 521         if (flags & LZC_SEND_FLAG_COMPRESS)
 522                 fnvlist_add_boolean(args, "compressok");
 523         if (resumeobj != 0 || resumeoff != 0) {
 524                 fnvlist_add_uint64(args, "resume_object", resumeobj);
 525                 fnvlist_add_uint64(args, "resume_offset", resumeoff);
 526         }
 527         err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
 528         nvlist_free(args);
 529         return (err);
 530 }
 531 
 532 /*
 533  * "from" can be NULL, a snapshot, or a bookmark.
 534  *
 535  * If from is NULL, a full (non-incremental) stream will be estimated.  This
 536  * is calculated very efficiently.
 537  *
 538  * If from is a snapshot, lzc_send_space uses the deadlists attached to
 539  * each snapshot to efficiently estimate the stream size.
 540  *
 541  * If from is a bookmark, the indirect blocks in the destination snapshot
 542  * are traversed, looking for blocks with a birth time since the creation TXG of
 543  * the snapshot this bookmark was created from.  This will result in
 544  * significantly more I/O and be less efficient than a send space estimation on
 545  * an equivalent snapshot.
 546  */
 547 int
 548 lzc_send_space(const char *snapname, const char *from,
 549     enum lzc_send_flags flags, uint64_t *spacep)
 550 {
 551         nvlist_t *args;
 552         nvlist_t *result;
 553         int err;
 554 
 555         args = fnvlist_alloc();
 556         if (from != NULL)
 557                 fnvlist_add_string(args, "from", from);
 558         if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
 559                 fnvlist_add_boolean(args, "largeblockok");
 560         if (flags & LZC_SEND_FLAG_EMBED_DATA)
 561                 fnvlist_add_boolean(args, "embedok");
 562         if (flags & LZC_SEND_FLAG_COMPRESS)
 563                 fnvlist_add_boolean(args, "compressok");
 564         err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
 565         nvlist_free(args);
 566         if (err == 0)
 567                 *spacep = fnvlist_lookup_uint64(result, "space");
 568         nvlist_free(result);
 569         return (err);
 570 }
 571 
 572 static int
 573 recv_read(int fd, void *buf, int ilen)
 574 {
 575         char *cp = buf;
 576         int rv;
 577         int len = ilen;
 578 
 579         do {
 580                 rv = read(fd, cp, len);
 581                 cp += rv;
 582                 len -= rv;
 583         } while (rv > 0);
 584 
 585         if (rv < 0 || len != 0)
 586                 return (EIO);
 587 
 588         return (0);
 589 }
 590 
 591 static int
 592 recv_impl(const char *snapname, nvlist_t *props, const char *origin,
 593     boolean_t force, boolean_t resumable, int fd,
 594     const dmu_replay_record_t *begin_record)
 595 {
 596         /*
 597          * The receive ioctl is still legacy, so we need to construct our own
 598          * zfs_cmd_t rather than using zfsc_ioctl().
 599          */
 600         zfs_cmd_t zc = { 0 };
 601         char *atp;
 602         char *packed = NULL;
 603         size_t size;
 604         int error;
 605 
 606         ASSERT3S(g_refcount, >, 0);
 607         VERIFY3S(g_fd, !=, -1);
 608 
 609         /* zc_name is name of containing filesystem */
 610         (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
 611         atp = strchr(zc.zc_name, '@');
 612         if (atp == NULL)
 613                 return (EINVAL);
 614         *atp = '\0';
 615 
 616         /* if the fs does not exist, try its parent. */
 617         if (!lzc_exists(zc.zc_name)) {
 618                 char *slashp = strrchr(zc.zc_name, '/');
 619                 if (slashp == NULL)
 620                         return (ENOENT);
 621                 *slashp = '\0';
 622 
 623         }
 624 
 625         /* zc_value is full name of the snapshot to create */
 626         (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 627 
 628         if (props != NULL) {
 629                 /* zc_nvlist_src is props to set */
 630                 packed = fnvlist_pack(props, &size);
 631                 zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
 632                 zc.zc_nvlist_src_size = size;
 633         }
 634 
 635         /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
 636         if (origin != NULL)
 637                 (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
 638 
 639         /* zc_begin_record is non-byteswapped BEGIN record */
 640         if (begin_record == NULL) {
 641                 error = recv_read(fd, &zc.zc_begin_record,
 642                     sizeof (zc.zc_begin_record));
 643                 if (error != 0)
 644                         goto out;
 645         } else {
 646                 zc.zc_begin_record = *begin_record;
 647         }
 648 
 649         /* zc_cookie is fd to read from */
 650         zc.zc_cookie = fd;
 651 
 652         /* zc guid is force flag */
 653         zc.zc_guid = force;
 654 
 655         zc.zc_resumable = resumable;
 656 
 657         /* zc_cleanup_fd is unused */
 658         zc.zc_cleanup_fd = -1;
 659 
 660         error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
 661         if (error != 0)
 662                 error = errno;
 663 
 664 out:
 665         if (packed != NULL)
 666                 fnvlist_pack_free(packed, size);
 667         free((void*)(uintptr_t)zc.zc_nvlist_dst);
 668         return (error);
 669 }
 670 
 671 /*
 672  * The simplest receive case: receive from the specified fd, creating the
 673  * specified snapshot.  Apply the specified properties as "received" properties
 674  * (which can be overridden by locally-set properties).  If the stream is a
 675  * clone, its origin snapshot must be specified by 'origin'.  The 'force'
 676  * flag will cause the target filesystem to be rolled back or destroyed if
 677  * necessary to receive.
 678  *
 679  * Return 0 on success or an errno on failure.
 680  *
 681  * Note: this interface does not work on dedup'd streams
 682  * (those with DMU_BACKUP_FEATURE_DEDUP).
 683  */
 684 int
 685 lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
 686     boolean_t force, int fd)
 687 {
 688         return (recv_impl(snapname, props, origin, force, B_FALSE, fd, NULL));
 689 }
 690 
 691 /*
 692  * Like lzc_receive, but if the receive fails due to premature stream
 693  * termination, the intermediate state will be preserved on disk.  In this
 694  * case, ECKSUM will be returned.  The receive may subsequently be resumed
 695  * with a resuming send stream generated by lzc_send_resume().
 696  */
 697 int
 698 lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
 699     boolean_t force, int fd)
 700 {
 701         return (recv_impl(snapname, props, origin, force, B_TRUE, fd, NULL));
 702 }
 703 
 704 /*
 705  * Like lzc_receive, but allows the caller to read the begin record and then to
 706  * pass it in.  That could be useful if the caller wants to derive, for example,
 707  * the snapname or the origin parameters based on the information contained in
 708  * the begin record.
 709  * The begin record must be in its original form as read from the stream,
 710  * in other words, it should not be byteswapped.
 711  *
 712  * The 'resumable' parameter allows to obtain the same behavior as with
 713  * lzc_receive_resumable.
 714  */
 715 int
 716 lzc_receive_with_header(const char *snapname, nvlist_t *props,
 717     const char *origin, boolean_t force, boolean_t resumable, int fd,
 718     const dmu_replay_record_t *begin_record)
 719 {
 720         if (begin_record == NULL)
 721                 return (EINVAL);
 722         return (recv_impl(snapname, props, origin, force, resumable, fd,
 723             begin_record));
 724 }
 725 
 726 /*
 727  * Roll back this filesystem or volume to its most recent snapshot.
 728  * If snapnamebuf is not NULL, it will be filled in with the name
 729  * of the most recent snapshot.
 730  *
 731  * Return 0 on success or an errno on failure.
 732  */
 733 int
 734 lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
 735 {
 736         nvlist_t *args;
 737         nvlist_t *result;
 738         int err;
 739 
 740         args = fnvlist_alloc();
 741         err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
 742         nvlist_free(args);
 743         if (err == 0 && snapnamebuf != NULL) {
 744                 const char *snapname = fnvlist_lookup_string(result, "target");
 745                 (void) strlcpy(snapnamebuf, snapname, snapnamelen);
 746         }
 747         nvlist_free(result);
 748 
 749         return (err);
 750 }
 751 
 752 /*
 753  * Creates bookmarks.
 754  *
 755  * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
 756  * the name of the snapshot (e.g. "pool/fs@snap").  All the bookmarks and
 757  * snapshots must be in the same pool.
 758  *
 759  * The returned results nvlist will have an entry for each bookmark that failed.
 760  * The value will be the (int32) error code.
 761  *
 762  * The return value will be 0 if all bookmarks were created, otherwise it will
 763  * be the errno of a (undetermined) bookmarks that failed.
 764  */
 765 int
 766 lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
 767 {
 768         nvpair_t *elem;
 769         int error;
 770         char pool[ZFS_MAX_DATASET_NAME_LEN];
 771 
 772         /* determine the pool name */
 773         elem = nvlist_next_nvpair(bookmarks, NULL);
 774         if (elem == NULL)
 775                 return (0);
 776         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 777         pool[strcspn(pool, "/#")] = '\0';
 778 
 779         error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
 780 
 781         return (error);
 782 }
 783 
 784 /*
 785  * Retrieve bookmarks.
 786  *
 787  * Retrieve the list of bookmarks for the given file system. The props
 788  * parameter is an nvlist of property names (with no values) that will be
 789  * returned for each bookmark.
 790  *
 791  * The following are valid properties on bookmarks, all of which are numbers
 792  * (represented as uint64 in the nvlist)
 793  *
 794  * "guid" - globally unique identifier of the snapshot it refers to
 795  * "createtxg" - txg when the snapshot it refers to was created
 796  * "creation" - timestamp when the snapshot it refers to was created
 797  *
 798  * The format of the returned nvlist as follows:
 799  * <short name of bookmark> -> {
 800  *     <name of property> -> {
 801  *         "value" -> uint64
 802  *     }
 803  *  }
 804  */
 805 int
 806 lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
 807 {
 808         return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
 809 }
 810 
 811 /*
 812  * Destroys bookmarks.
 813  *
 814  * The keys in the bmarks nvlist are the bookmarks to be destroyed.
 815  * They must all be in the same pool.  Bookmarks are specified as
 816  * <fs>#<bmark>.
 817  *
 818  * Bookmarks that do not exist will be silently ignored.
 819  *
 820  * The return value will be 0 if all bookmarks that existed were destroyed.
 821  *
 822  * Otherwise the return value will be the errno of a (undetermined) bookmark
 823  * that failed, no bookmarks will be destroyed, and the errlist will have an
 824  * entry for each bookmarks that failed.  The value in the errlist will be
 825  * the (int32) error code.
 826  */
 827 int
 828 lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
 829 {
 830         nvpair_t *elem;
 831         int error;
 832         char pool[ZFS_MAX_DATASET_NAME_LEN];
 833 
 834         /* determine the pool name */
 835         elem = nvlist_next_nvpair(bmarks, NULL);
 836         if (elem == NULL)
 837                 return (0);
 838         (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 839         pool[strcspn(pool, "/#")] = '\0';
 840 
 841         error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
 842 
 843         return (error);
 844 }