illumos-gate Wdiff usr/src/lib/libzfs_core/common/libzfs_core.c

Print this page

8264 want support for promoting datasets in libzfs_core

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libzfs_core/common/libzfs_core.c
          +++ new/usr/src/lib/libzfs_core/common/libzfs_core.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the

↓ open down ↓

15 lines elided

↑ open up ↑

  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
  24   24   * Copyright (c) 2013 Steven Hartland. All rights reserved.
  25   25   * Copyright (c) 2014 Integros [integros.com]
       26 + * Copyright 2017 RackTop Systems.
  26   27   */
  27   28  
  28   29  /*
  29   30   * LibZFS_Core (lzc) is intended to replace most functionality in libzfs.
  30   31   * It has the following characteristics:
  31   32   *
  32   33   *  - Thread Safe.  libzfs_core is accessible concurrently from multiple
  33   34   *  threads.  This is accomplished primarily by avoiding global data
  34   35   *  (e.g. caching).  Since it's thread-safe, there is no reason for a
  35   36   *  process to have multiple libzfs "instances".  Therefore, we store

  36   37   *  our few pieces of data (e.g. the file descriptor) in global
  37   38   *  variables.  The fd is reference-counted so that the libzfs_core
  38   39   *  library can be "initialized" multiple times (e.g. by different
  39   40   *  consumers within the same process).
  40   41   *
  41   42   *  - Committed Interface.  The libzfs_core interface will be committed,
  42   43   *  therefore consumers can compile against it and be confident that
  43   44   *  their code will continue to work on future releases of this code.
  44   45   *  Currently, the interface is Evolving (not Committed), but we intend
  45   46   *  to commit to it once it is more complete and we determine that it
  46   47   *  meets the needs of all consumers.
  47   48   *
  48   49   *  - Programatic Error Handling.  libzfs_core communicates errors with
  49   50   *  defined error numbers, and doesn't print anything to stdout/stderr.
  50   51   *
  51   52   *  - Thin Layer.  libzfs_core is a thin layer, marshaling arguments
  52   53   *  to/from the kernel ioctls.  There is generally a 1:1 correspondence
  53   54   *  between libzfs_core functions and ioctls to /dev/zfs.
  54   55   *
  55   56   *  - Clear Atomicity.  Because libzfs_core functions are generally 1:1
  56   57   *  with kernel ioctls, and kernel ioctls are general atomic, each
  57   58   *  libzfs_core function is atomic.  For example, creating multiple
  58   59   *  snapshots with a single call to lzc_snapshot() is atomic -- it
  59   60   *  can't fail with only some of the requested snapshots created, even
  60   61   *  in the event of power loss or system crash.
  61   62   *
  62   63   *  - Continued libzfs Support.  Some higher-level operations (e.g.
  63   64   *  support for "zfs send -R") are too complicated to fit the scope of
  64   65   *  libzfs_core.  This functionality will continue to live in libzfs.
  65   66   *  Where appropriate, libzfs will use the underlying atomic operations
  66   67   *  of libzfs_core.  For example, libzfs may implement "zfs send -R |
  67   68   *  zfs receive" by using individual "send one snapshot", rename,
  68   69   *  destroy, and "receive one snapshot" operations in libzfs_core.
  69   70   *  /sbin/zfs and /zbin/zpool will link with both libzfs and
  70   71   *  libzfs_core.  Other consumers should aim to use only libzfs_core,
  71   72   *  since that will be the supported, stable interface going forwards.
  72   73   */
  73   74  
  74   75  #include <libzfs_core.h>
  75   76  #include <ctype.h>
  76   77  #include <unistd.h>
  77   78  #include <stdlib.h>
  78   79  #include <string.h>
  79   80  #include <errno.h>
  80   81  #include <fcntl.h>
  81   82  #include <pthread.h>
  82   83  #include <sys/nvpair.h>
  83   84  #include <sys/param.h>
  84   85  #include <sys/types.h>
  85   86  #include <sys/stat.h>
  86   87  #include <sys/zfs_ioctl.h>
  87   88  
  88   89  static int g_fd = -1;
  89   90  static pthread_mutex_t g_lock = PTHREAD_MUTEX_INITIALIZER;
  90   91  static int g_refcount;
  91   92  
  92   93  int
  93   94  libzfs_core_init(void)
  94   95  {
  95   96          (void) pthread_mutex_lock(&g_lock);
  96   97          if (g_refcount == 0) {
  97   98                  g_fd = open("/dev/zfs", O_RDWR);
  98   99                  if (g_fd < 0) {
  99  100                          (void) pthread_mutex_unlock(&g_lock);
 100  101                          return (errno);
 101  102                  }
 102  103          }
 103  104          g_refcount++;
 104  105          (void) pthread_mutex_unlock(&g_lock);
 105  106          return (0);
 106  107  }
 107  108  
 108  109  void
 109  110  libzfs_core_fini(void)
 110  111  {
 111  112          (void) pthread_mutex_lock(&g_lock);
 112  113          ASSERT3S(g_refcount, >, 0);
 113  114  
 114  115          if (g_refcount > 0)
 115  116                  g_refcount--;
 116  117  
 117  118          if (g_refcount == 0 && g_fd != -1) {
 118  119                  (void) close(g_fd);
 119  120                  g_fd = -1;
 120  121          }
 121  122          (void) pthread_mutex_unlock(&g_lock);
 122  123  }
 123  124  
 124  125  static int
 125  126  lzc_ioctl(zfs_ioc_t ioc, const char *name,
 126  127      nvlist_t *source, nvlist_t **resultp)
 127  128  {
 128  129          zfs_cmd_t zc = { 0 };
 129  130          int error = 0;
 130  131          char *packed;
 131  132          size_t size;
 132  133  
 133  134          ASSERT3S(g_refcount, >, 0);
 134  135          VERIFY3S(g_fd, !=, -1);
 135  136  
 136  137          (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
 137  138  
 138  139          packed = fnvlist_pack(source, &size);
 139  140          zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
 140  141          zc.zc_nvlist_src_size = size;
 141  142  
 142  143          if (resultp != NULL) {
 143  144                  *resultp = NULL;
 144  145                  zc.zc_nvlist_dst_size = MAX(size * 2, 128 * 1024);
 145  146                  zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
 146  147                      malloc(zc.zc_nvlist_dst_size);
 147  148                  if (zc.zc_nvlist_dst == NULL) {
 148  149                          error = ENOMEM;
 149  150                          goto out;
 150  151                  }
 151  152          }
 152  153  
 153  154          while (ioctl(g_fd, ioc, &zc) != 0) {
 154  155                  if (errno == ENOMEM && resultp != NULL) {
 155  156                          free((void *)(uintptr_t)zc.zc_nvlist_dst);
 156  157                          zc.zc_nvlist_dst_size *= 2;
 157  158                          zc.zc_nvlist_dst = (uint64_t)(uintptr_t)
 158  159                              malloc(zc.zc_nvlist_dst_size);
 159  160                          if (zc.zc_nvlist_dst == NULL) {
 160  161                                  error = ENOMEM;
 161  162                                  goto out;
 162  163                          }
 163  164                  } else {
 164  165                          error = errno;
 165  166                          break;
 166  167                  }
 167  168          }
 168  169          if (zc.zc_nvlist_dst_filled) {
 169  170                  *resultp = fnvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
 170  171                      zc.zc_nvlist_dst_size);
 171  172          }
 172  173  
 173  174  out:
 174  175          fnvlist_pack_free(packed, size);
 175  176          free((void *)(uintptr_t)zc.zc_nvlist_dst);
 176  177          return (error);
 177  178  }
 178  179  
 179  180  int
 180  181  lzc_create(const char *fsname, enum lzc_dataset_type type, nvlist_t *props)
 181  182  {
 182  183          int error;
 183  184          nvlist_t *args = fnvlist_alloc();
 184  185          fnvlist_add_int32(args, "type", (dmu_objset_type_t)type);
 185  186          if (props != NULL)
 186  187                  fnvlist_add_nvlist(args, "props", props);
 187  188          error = lzc_ioctl(ZFS_IOC_CREATE, fsname, args, NULL);
 188  189          nvlist_free(args);
 189  190          return (error);
 190  191  }
 191  192  
 192  193  int
 193  194  lzc_clone(const char *fsname, const char *origin,
 194  195      nvlist_t *props)
 195  196  {

↓ open down ↓

160 lines elided

↑ open up ↑

 196  197          int error;
 197  198          nvlist_t *args = fnvlist_alloc();
 198  199          fnvlist_add_string(args, "origin", origin);
 199  200          if (props != NULL)
 200  201                  fnvlist_add_nvlist(args, "props", props);
 201  202          error = lzc_ioctl(ZFS_IOC_CLONE, fsname, args, NULL);
 202  203          nvlist_free(args);
 203  204          return (error);
 204  205  }
 205  206  
      207 +int
      208 +lzc_promote(const char *fsname, char *snapnamebuf, int snapnamelen)
      209 +{
      210 +        /*
      211 +         * The promote ioctl is still legacy, so we need to construct our
      212 +         * own zfs_cmd_t rather than using lzc_ioctl().
      213 +         */
      214 +        zfs_cmd_t zc = { 0 };
      215 +
      216 +        ASSERT3S(g_refcount, >, 0);
      217 +        VERIFY3S(g_fd, !=, -1);
      218 +
      219 +        (void) strlcpy(zc.zc_name, fsname, sizeof (zc.zc_name));
      220 +        if (ioctl(g_fd, ZFS_IOC_PROMOTE, &zc) != 0) {
      221 +                if (errno == EEXIST && snapnamebuf != NULL)
      222 +                        (void) strlcpy(snapnamebuf, zc.zc_string, snapnamelen);
      223 +                return (errno);
      224 +        }
      225 +        return (0);
      226 +}
      227 +
 206  228  /*
 207  229   * Creates snapshots.
 208  230   *
 209  231   * The keys in the snaps nvlist are the snapshots to be created.
 210  232   * They must all be in the same pool.
 211  233   *
 212  234   * The props nvlist is properties to set.  Currently only user properties
 213  235   * are supported.  { user:prop_name -> string value }
 214  236   *
 215  237   * The returned results nvlist will have an entry for each snapshot that failed.

 216  238   * The value will be the (int32) error code.
 217  239   *
 218  240   * The return value will be 0 if all snapshots were created, otherwise it will
 219  241   * be the errno of a (unspecified) snapshot that failed.
 220  242   */
 221  243  int
 222  244  lzc_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t **errlist)
 223  245  {
 224  246          nvpair_t *elem;
 225  247          nvlist_t *args;
 226  248          int error;
 227  249          char pool[ZFS_MAX_DATASET_NAME_LEN];
 228  250  
 229  251          *errlist = NULL;
 230  252  
 231  253          /* determine the pool name */
 232  254          elem = nvlist_next_nvpair(snaps, NULL);
 233  255          if (elem == NULL)
 234  256                  return (0);
 235  257          (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 236  258          pool[strcspn(pool, "/@")] = '\0';
 237  259  
 238  260          args = fnvlist_alloc();
 239  261          fnvlist_add_nvlist(args, "snaps", snaps);
 240  262          if (props != NULL)
 241  263                  fnvlist_add_nvlist(args, "props", props);
 242  264  
 243  265          error = lzc_ioctl(ZFS_IOC_SNAPSHOT, pool, args, errlist);
 244  266          nvlist_free(args);
 245  267  
 246  268          return (error);
 247  269  }
 248  270  
 249  271  /*
 250  272   * Destroys snapshots.
 251  273   *
 252  274   * The keys in the snaps nvlist are the snapshots to be destroyed.
 253  275   * They must all be in the same pool.
 254  276   *
 255  277   * Snapshots that do not exist will be silently ignored.
 256  278   *
 257  279   * If 'defer' is not set, and a snapshot has user holds or clones, the
 258  280   * destroy operation will fail and none of the snapshots will be
 259  281   * destroyed.
 260  282   *
 261  283   * If 'defer' is set, and a snapshot has user holds or clones, it will be
 262  284   * marked for deferred destruction, and will be destroyed when the last hold
 263  285   * or clone is removed/destroyed.
 264  286   *
 265  287   * The return value will be 0 if all snapshots were destroyed (or marked for
 266  288   * later destruction if 'defer' is set) or didn't exist to begin with.
 267  289   *
 268  290   * Otherwise the return value will be the errno of a (unspecified) snapshot
 269  291   * that failed, no snapshots will be destroyed, and the errlist will have an
 270  292   * entry for each snapshot that failed.  The value in the errlist will be
 271  293   * the (int32) error code.
 272  294   */
 273  295  int
 274  296  lzc_destroy_snaps(nvlist_t *snaps, boolean_t defer, nvlist_t **errlist)
 275  297  {
 276  298          nvpair_t *elem;
 277  299          nvlist_t *args;
 278  300          int error;
 279  301          char pool[ZFS_MAX_DATASET_NAME_LEN];
 280  302  
 281  303          /* determine the pool name */
 282  304          elem = nvlist_next_nvpair(snaps, NULL);
 283  305          if (elem == NULL)
 284  306                  return (0);
 285  307          (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 286  308          pool[strcspn(pool, "/@")] = '\0';
 287  309  
 288  310          args = fnvlist_alloc();
 289  311          fnvlist_add_nvlist(args, "snaps", snaps);
 290  312          if (defer)
 291  313                  fnvlist_add_boolean(args, "defer");
 292  314  
 293  315          error = lzc_ioctl(ZFS_IOC_DESTROY_SNAPS, pool, args, errlist);
 294  316          nvlist_free(args);
 295  317  
 296  318          return (error);
 297  319  }
 298  320  
 299  321  int
 300  322  lzc_snaprange_space(const char *firstsnap, const char *lastsnap,
 301  323      uint64_t *usedp)
 302  324  {
 303  325          nvlist_t *args;
 304  326          nvlist_t *result;
 305  327          int err;
 306  328          char fs[ZFS_MAX_DATASET_NAME_LEN];
 307  329          char *atp;
 308  330  
 309  331          /* determine the fs name */
 310  332          (void) strlcpy(fs, firstsnap, sizeof (fs));
 311  333          atp = strchr(fs, '@');
 312  334          if (atp == NULL)
 313  335                  return (EINVAL);
 314  336          *atp = '\0';
 315  337  
 316  338          args = fnvlist_alloc();
 317  339          fnvlist_add_string(args, "firstsnap", firstsnap);
 318  340  
 319  341          err = lzc_ioctl(ZFS_IOC_SPACE_SNAPS, lastsnap, args, &result);
 320  342          nvlist_free(args);
 321  343          if (err == 0)
 322  344                  *usedp = fnvlist_lookup_uint64(result, "used");

↓ open down ↓

107 lines elided

↑ open up ↑

 323  345          fnvlist_free(result);
 324  346  
 325  347          return (err);
 326  348  }
 327  349  
 328  350  boolean_t
 329  351  lzc_exists(const char *dataset)
 330  352  {
 331  353          /*
 332  354           * The objset_stats ioctl is still legacy, so we need to construct our
 333      -         * own zfs_cmd_t rather than using zfsc_ioctl().
      355 +         * own zfs_cmd_t rather than using lzc_ioctl().
 334  356           */
 335  357          zfs_cmd_t zc = { 0 };
 336  358  
 337  359          ASSERT3S(g_refcount, >, 0);
 338  360          VERIFY3S(g_fd, !=, -1);
 339  361  
 340  362          (void) strlcpy(zc.zc_name, dataset, sizeof (zc.zc_name));
 341  363          return (ioctl(g_fd, ZFS_IOC_OBJSET_STATS, &zc) == 0);
 342  364  }
 343  365

 344  366  /*
 345  367   * Create "user holds" on snapshots.  If there is a hold on a snapshot,
 346  368   * the snapshot can not be destroyed.  (However, it can be marked for deletion
 347  369   * by lzc_destroy_snaps(defer=B_TRUE).)
 348  370   *
 349  371   * The keys in the nvlist are snapshot names.
 350  372   * The snapshots must all be in the same pool.
 351  373   * The value is the name of the hold (string type).
 352  374   *
 353  375   * If cleanup_fd is not -1, it must be the result of open("/dev/zfs", O_EXCL).
 354  376   * In this case, when the cleanup_fd is closed (including on process
 355  377   * termination), the holds will be released.  If the system is shut down
 356  378   * uncleanly, the holds will be released when the pool is next opened
 357  379   * or imported.
 358  380   *
 359  381   * Holds for snapshots which don't exist will be skipped and have an entry
 360  382   * added to errlist, but will not cause an overall failure.
 361  383   *
 362  384   * The return value will be 0 if all holds, for snapshots that existed,
 363  385   * were succesfully created.
 364  386   *
 365  387   * Otherwise the return value will be the errno of a (unspecified) hold that
 366  388   * failed and no holds will be created.
 367  389   *
 368  390   * In all cases the errlist will have an entry for each hold that failed
 369  391   * (name = snapshot), with its value being the error code (int32).
 370  392   */
 371  393  int
 372  394  lzc_hold(nvlist_t *holds, int cleanup_fd, nvlist_t **errlist)
 373  395  {
 374  396          char pool[ZFS_MAX_DATASET_NAME_LEN];
 375  397          nvlist_t *args;
 376  398          nvpair_t *elem;
 377  399          int error;
 378  400  
 379  401          /* determine the pool name */
 380  402          elem = nvlist_next_nvpair(holds, NULL);
 381  403          if (elem == NULL)
 382  404                  return (0);
 383  405          (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 384  406          pool[strcspn(pool, "/@")] = '\0';
 385  407  
 386  408          args = fnvlist_alloc();
 387  409          fnvlist_add_nvlist(args, "holds", holds);
 388  410          if (cleanup_fd != -1)
 389  411                  fnvlist_add_int32(args, "cleanup_fd", cleanup_fd);
 390  412  
 391  413          error = lzc_ioctl(ZFS_IOC_HOLD, pool, args, errlist);
 392  414          nvlist_free(args);
 393  415          return (error);
 394  416  }
 395  417  
 396  418  /*
 397  419   * Release "user holds" on snapshots.  If the snapshot has been marked for
 398  420   * deferred destroy (by lzc_destroy_snaps(defer=B_TRUE)), it does not have
 399  421   * any clones, and all the user holds are removed, then the snapshot will be
 400  422   * destroyed.
 401  423   *
 402  424   * The keys in the nvlist are snapshot names.
 403  425   * The snapshots must all be in the same pool.
 404  426   * The value is a nvlist whose keys are the holds to remove.
 405  427   *
 406  428   * Holds which failed to release because they didn't exist will have an entry
 407  429   * added to errlist, but will not cause an overall failure.
 408  430   *
 409  431   * The return value will be 0 if the nvl holds was empty or all holds that
 410  432   * existed, were successfully removed.
 411  433   *
 412  434   * Otherwise the return value will be the errno of a (unspecified) hold that
 413  435   * failed to release and no holds will be released.
 414  436   *
 415  437   * In all cases the errlist will have an entry for each hold that failed to
 416  438   * to release.
 417  439   */
 418  440  int
 419  441  lzc_release(nvlist_t *holds, nvlist_t **errlist)
 420  442  {
 421  443          char pool[ZFS_MAX_DATASET_NAME_LEN];
 422  444          nvpair_t *elem;
 423  445  
 424  446          /* determine the pool name */
 425  447          elem = nvlist_next_nvpair(holds, NULL);
 426  448          if (elem == NULL)
 427  449                  return (0);
 428  450          (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 429  451          pool[strcspn(pool, "/@")] = '\0';
 430  452  
 431  453          return (lzc_ioctl(ZFS_IOC_RELEASE, pool, holds, errlist));
 432  454  }
 433  455  
 434  456  /*
 435  457   * Retrieve list of user holds on the specified snapshot.
 436  458   *
 437  459   * On success, *holdsp will be set to a nvlist which the caller must free.
 438  460   * The keys are the names of the holds, and the value is the creation time
 439  461   * of the hold (uint64) in seconds since the epoch.
 440  462   */
 441  463  int
 442  464  lzc_get_holds(const char *snapname, nvlist_t **holdsp)
 443  465  {
 444  466          int error;
 445  467          nvlist_t *innvl = fnvlist_alloc();
 446  468          error = lzc_ioctl(ZFS_IOC_GET_HOLDS, snapname, innvl, holdsp);
 447  469          fnvlist_free(innvl);
 448  470          return (error);
 449  471  }
 450  472  
 451  473  /*
 452  474   * Generate a zfs send stream for the specified snapshot and write it to
 453  475   * the specified file descriptor.
 454  476   *
 455  477   * "snapname" is the full name of the snapshot to send (e.g. "pool/fs@snap")
 456  478   *
 457  479   * If "from" is NULL, a full (non-incremental) stream will be sent.
 458  480   * If "from" is non-NULL, it must be the full name of a snapshot or
 459  481   * bookmark to send an incremental from (e.g. "pool/fs@earlier_snap" or
 460  482   * "pool/fs#earlier_bmark").  If non-NULL, the specified snapshot or
 461  483   * bookmark must represent an earlier point in the history of "snapname").
 462  484   * It can be an earlier snapshot in the same filesystem or zvol as "snapname",
 463  485   * or it can be the origin of "snapname"'s filesystem, or an earlier
 464  486   * snapshot in the origin, etc.
 465  487   *
 466  488   * "fd" is the file descriptor to write the send stream to.
 467  489   *
 468  490   * If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
 469  491   * to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
 470  492   * records with drr_blksz > 128K.
 471  493   *
 472  494   * If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
 473  495   * to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
 474  496   * which the receiving system must support (as indicated by support
 475  497   * for the "embedded_data" feature).
 476  498   */
 477  499  int
 478  500  lzc_send(const char *snapname, const char *from, int fd,
 479  501      enum lzc_send_flags flags)
 480  502  {
 481  503          return (lzc_send_resume(snapname, from, fd, flags, 0, 0));
 482  504  }
 483  505  
 484  506  int
 485  507  lzc_send_resume(const char *snapname, const char *from, int fd,
 486  508      enum lzc_send_flags flags, uint64_t resumeobj, uint64_t resumeoff)
 487  509  {
 488  510          nvlist_t *args;
 489  511          int err;
 490  512  
 491  513          args = fnvlist_alloc();
 492  514          fnvlist_add_int32(args, "fd", fd);
 493  515          if (from != NULL)
 494  516                  fnvlist_add_string(args, "fromsnap", from);
 495  517          if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
 496  518                  fnvlist_add_boolean(args, "largeblockok");
 497  519          if (flags & LZC_SEND_FLAG_EMBED_DATA)
 498  520                  fnvlist_add_boolean(args, "embedok");
 499  521          if (flags & LZC_SEND_FLAG_COMPRESS)
 500  522                  fnvlist_add_boolean(args, "compressok");
 501  523          if (resumeobj != 0 || resumeoff != 0) {
 502  524                  fnvlist_add_uint64(args, "resume_object", resumeobj);
 503  525                  fnvlist_add_uint64(args, "resume_offset", resumeoff);
 504  526          }
 505  527          err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
 506  528          nvlist_free(args);
 507  529          return (err);
 508  530  }
 509  531  
 510  532  /*
 511  533   * "from" can be NULL, a snapshot, or a bookmark.
 512  534   *
 513  535   * If from is NULL, a full (non-incremental) stream will be estimated.  This
 514  536   * is calculated very efficiently.
 515  537   *
 516  538   * If from is a snapshot, lzc_send_space uses the deadlists attached to
 517  539   * each snapshot to efficiently estimate the stream size.
 518  540   *
 519  541   * If from is a bookmark, the indirect blocks in the destination snapshot
 520  542   * are traversed, looking for blocks with a birth time since the creation TXG of
 521  543   * the snapshot this bookmark was created from.  This will result in
 522  544   * significantly more I/O and be less efficient than a send space estimation on
 523  545   * an equivalent snapshot.
 524  546   */
 525  547  int
 526  548  lzc_send_space(const char *snapname, const char *from,
 527  549      enum lzc_send_flags flags, uint64_t *spacep)
 528  550  {
 529  551          nvlist_t *args;
 530  552          nvlist_t *result;
 531  553          int err;
 532  554  
 533  555          args = fnvlist_alloc();
 534  556          if (from != NULL)
 535  557                  fnvlist_add_string(args, "from", from);
 536  558          if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
 537  559                  fnvlist_add_boolean(args, "largeblockok");
 538  560          if (flags & LZC_SEND_FLAG_EMBED_DATA)
 539  561                  fnvlist_add_boolean(args, "embedok");
 540  562          if (flags & LZC_SEND_FLAG_COMPRESS)
 541  563                  fnvlist_add_boolean(args, "compressok");
 542  564          err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
 543  565          nvlist_free(args);
 544  566          if (err == 0)
 545  567                  *spacep = fnvlist_lookup_uint64(result, "space");
 546  568          nvlist_free(result);
 547  569          return (err);
 548  570  }
 549  571  
 550  572  static int
 551  573  recv_read(int fd, void *buf, int ilen)
 552  574  {
 553  575          char *cp = buf;
 554  576          int rv;
 555  577          int len = ilen;
 556  578  
 557  579          do {
 558  580                  rv = read(fd, cp, len);
 559  581                  cp += rv;
 560  582                  len -= rv;
 561  583          } while (rv > 0);
 562  584  
 563  585          if (rv < 0 || len != 0)
 564  586                  return (EIO);
 565  587  
 566  588          return (0);
 567  589  }
 568  590  
 569  591  static int
 570  592  recv_impl(const char *snapname, nvlist_t *props, const char *origin,
 571  593      boolean_t force, boolean_t resumable, int fd,
 572  594      const dmu_replay_record_t *begin_record)
 573  595  {
 574  596          /*
 575  597           * The receive ioctl is still legacy, so we need to construct our own
 576  598           * zfs_cmd_t rather than using zfsc_ioctl().
 577  599           */
 578  600          zfs_cmd_t zc = { 0 };
 579  601          char *atp;
 580  602          char *packed = NULL;
 581  603          size_t size;
 582  604          int error;
 583  605  
 584  606          ASSERT3S(g_refcount, >, 0);
 585  607          VERIFY3S(g_fd, !=, -1);
 586  608  
 587  609          /* zc_name is name of containing filesystem */
 588  610          (void) strlcpy(zc.zc_name, snapname, sizeof (zc.zc_name));
 589  611          atp = strchr(zc.zc_name, '@');
 590  612          if (atp == NULL)
 591  613                  return (EINVAL);
 592  614          *atp = '\0';
 593  615  
 594  616          /* if the fs does not exist, try its parent. */
 595  617          if (!lzc_exists(zc.zc_name)) {
 596  618                  char *slashp = strrchr(zc.zc_name, '/');
 597  619                  if (slashp == NULL)
 598  620                          return (ENOENT);
 599  621                  *slashp = '\0';
 600  622  
 601  623          }
 602  624  
 603  625          /* zc_value is full name of the snapshot to create */
 604  626          (void) strlcpy(zc.zc_value, snapname, sizeof (zc.zc_value));
 605  627  
 606  628          if (props != NULL) {
 607  629                  /* zc_nvlist_src is props to set */
 608  630                  packed = fnvlist_pack(props, &size);
 609  631                  zc.zc_nvlist_src = (uint64_t)(uintptr_t)packed;
 610  632                  zc.zc_nvlist_src_size = size;
 611  633          }
 612  634  
 613  635          /* zc_string is name of clone origin (if DRR_FLAG_CLONE) */
 614  636          if (origin != NULL)
 615  637                  (void) strlcpy(zc.zc_string, origin, sizeof (zc.zc_string));
 616  638  
 617  639          /* zc_begin_record is non-byteswapped BEGIN record */
 618  640          if (begin_record == NULL) {
 619  641                  error = recv_read(fd, &zc.zc_begin_record,
 620  642                      sizeof (zc.zc_begin_record));
 621  643                  if (error != 0)
 622  644                          goto out;
 623  645          } else {
 624  646                  zc.zc_begin_record = *begin_record;
 625  647          }
 626  648  
 627  649          /* zc_cookie is fd to read from */
 628  650          zc.zc_cookie = fd;
 629  651  
 630  652          /* zc guid is force flag */
 631  653          zc.zc_guid = force;
 632  654  
 633  655          zc.zc_resumable = resumable;
 634  656  
 635  657          /* zc_cleanup_fd is unused */
 636  658          zc.zc_cleanup_fd = -1;
 637  659  
 638  660          error = ioctl(g_fd, ZFS_IOC_RECV, &zc);
 639  661          if (error != 0)
 640  662                  error = errno;
 641  663  
 642  664  out:
 643  665          if (packed != NULL)
 644  666                  fnvlist_pack_free(packed, size);
 645  667          free((void*)(uintptr_t)zc.zc_nvlist_dst);
 646  668          return (error);
 647  669  }
 648  670  
 649  671  /*
 650  672   * The simplest receive case: receive from the specified fd, creating the
 651  673   * specified snapshot.  Apply the specified properties as "received" properties
 652  674   * (which can be overridden by locally-set properties).  If the stream is a
 653  675   * clone, its origin snapshot must be specified by 'origin'.  The 'force'
 654  676   * flag will cause the target filesystem to be rolled back or destroyed if
 655  677   * necessary to receive.
 656  678   *
 657  679   * Return 0 on success or an errno on failure.
 658  680   *
 659  681   * Note: this interface does not work on dedup'd streams
 660  682   * (those with DMU_BACKUP_FEATURE_DEDUP).
 661  683   */
 662  684  int
 663  685  lzc_receive(const char *snapname, nvlist_t *props, const char *origin,
 664  686      boolean_t force, int fd)
 665  687  {
 666  688          return (recv_impl(snapname, props, origin, force, B_FALSE, fd, NULL));
 667  689  }
 668  690  
 669  691  /*
 670  692   * Like lzc_receive, but if the receive fails due to premature stream
 671  693   * termination, the intermediate state will be preserved on disk.  In this
 672  694   * case, ECKSUM will be returned.  The receive may subsequently be resumed
 673  695   * with a resuming send stream generated by lzc_send_resume().
 674  696   */
 675  697  int
 676  698  lzc_receive_resumable(const char *snapname, nvlist_t *props, const char *origin,
 677  699      boolean_t force, int fd)
 678  700  {
 679  701          return (recv_impl(snapname, props, origin, force, B_TRUE, fd, NULL));
 680  702  }
 681  703  
 682  704  /*
 683  705   * Like lzc_receive, but allows the caller to read the begin record and then to
 684  706   * pass it in.  That could be useful if the caller wants to derive, for example,
 685  707   * the snapname or the origin parameters based on the information contained in
 686  708   * the begin record.
 687  709   * The begin record must be in its original form as read from the stream,
 688  710   * in other words, it should not be byteswapped.
 689  711   *
 690  712   * The 'resumable' parameter allows to obtain the same behavior as with
 691  713   * lzc_receive_resumable.
 692  714   */
 693  715  int
 694  716  lzc_receive_with_header(const char *snapname, nvlist_t *props,
 695  717      const char *origin, boolean_t force, boolean_t resumable, int fd,
 696  718      const dmu_replay_record_t *begin_record)
 697  719  {
 698  720          if (begin_record == NULL)
 699  721                  return (EINVAL);
 700  722          return (recv_impl(snapname, props, origin, force, resumable, fd,
 701  723              begin_record));
 702  724  }
 703  725  
 704  726  /*
 705  727   * Roll back this filesystem or volume to its most recent snapshot.
 706  728   * If snapnamebuf is not NULL, it will be filled in with the name
 707  729   * of the most recent snapshot.
 708  730   *
 709  731   * Return 0 on success or an errno on failure.
 710  732   */
 711  733  int
 712  734  lzc_rollback(const char *fsname, char *snapnamebuf, int snapnamelen)
 713  735  {
 714  736          nvlist_t *args;
 715  737          nvlist_t *result;
 716  738          int err;
 717  739  
 718  740          args = fnvlist_alloc();
 719  741          err = lzc_ioctl(ZFS_IOC_ROLLBACK, fsname, args, &result);
 720  742          nvlist_free(args);
 721  743          if (err == 0 && snapnamebuf != NULL) {
 722  744                  const char *snapname = fnvlist_lookup_string(result, "target");
 723  745                  (void) strlcpy(snapnamebuf, snapname, snapnamelen);
 724  746          }
 725  747          nvlist_free(result);
 726  748  
 727  749          return (err);
 728  750  }
 729  751  
 730  752  /*
 731  753   * Creates bookmarks.
 732  754   *
 733  755   * The bookmarks nvlist maps from name of the bookmark (e.g. "pool/fs#bmark") to
 734  756   * the name of the snapshot (e.g. "pool/fs@snap").  All the bookmarks and
 735  757   * snapshots must be in the same pool.
 736  758   *
 737  759   * The returned results nvlist will have an entry for each bookmark that failed.
 738  760   * The value will be the (int32) error code.
 739  761   *
 740  762   * The return value will be 0 if all bookmarks were created, otherwise it will
 741  763   * be the errno of a (undetermined) bookmarks that failed.
 742  764   */
 743  765  int
 744  766  lzc_bookmark(nvlist_t *bookmarks, nvlist_t **errlist)
 745  767  {
 746  768          nvpair_t *elem;
 747  769          int error;
 748  770          char pool[ZFS_MAX_DATASET_NAME_LEN];
 749  771  
 750  772          /* determine the pool name */
 751  773          elem = nvlist_next_nvpair(bookmarks, NULL);
 752  774          if (elem == NULL)
 753  775                  return (0);
 754  776          (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 755  777          pool[strcspn(pool, "/#")] = '\0';
 756  778  
 757  779          error = lzc_ioctl(ZFS_IOC_BOOKMARK, pool, bookmarks, errlist);
 758  780  
 759  781          return (error);
 760  782  }
 761  783  
 762  784  /*
 763  785   * Retrieve bookmarks.
 764  786   *
 765  787   * Retrieve the list of bookmarks for the given file system. The props
 766  788   * parameter is an nvlist of property names (with no values) that will be
 767  789   * returned for each bookmark.
 768  790   *
 769  791   * The following are valid properties on bookmarks, all of which are numbers
 770  792   * (represented as uint64 in the nvlist)
 771  793   *
 772  794   * "guid" - globally unique identifier of the snapshot it refers to
 773  795   * "createtxg" - txg when the snapshot it refers to was created
 774  796   * "creation" - timestamp when the snapshot it refers to was created
 775  797   *
 776  798   * The format of the returned nvlist as follows:
 777  799   * <short name of bookmark> -> {
 778  800   *     <name of property> -> {
 779  801   *         "value" -> uint64
 780  802   *     }
 781  803   *  }
 782  804   */
 783  805  int
 784  806  lzc_get_bookmarks(const char *fsname, nvlist_t *props, nvlist_t **bmarks)
 785  807  {
 786  808          return (lzc_ioctl(ZFS_IOC_GET_BOOKMARKS, fsname, props, bmarks));
 787  809  }
 788  810  
 789  811  /*
 790  812   * Destroys bookmarks.
 791  813   *
 792  814   * The keys in the bmarks nvlist are the bookmarks to be destroyed.
 793  815   * They must all be in the same pool.  Bookmarks are specified as
 794  816   * <fs>#<bmark>.
 795  817   *
 796  818   * Bookmarks that do not exist will be silently ignored.
 797  819   *
 798  820   * The return value will be 0 if all bookmarks that existed were destroyed.
 799  821   *
 800  822   * Otherwise the return value will be the errno of a (undetermined) bookmark
 801  823   * that failed, no bookmarks will be destroyed, and the errlist will have an
 802  824   * entry for each bookmarks that failed.  The value in the errlist will be
 803  825   * the (int32) error code.
 804  826   */
 805  827  int
 806  828  lzc_destroy_bookmarks(nvlist_t *bmarks, nvlist_t **errlist)
 807  829  {
 808  830          nvpair_t *elem;
 809  831          int error;
 810  832          char pool[ZFS_MAX_DATASET_NAME_LEN];
 811  833  
 812  834          /* determine the pool name */
 813  835          elem = nvlist_next_nvpair(bmarks, NULL);
 814  836          if (elem == NULL)
 815  837                  return (0);
 816  838          (void) strlcpy(pool, nvpair_name(elem), sizeof (pool));
 817  839          pool[strcspn(pool, "/#")] = '\0';
 818  840  
 819  841          error = lzc_ioctl(ZFS_IOC_DESTROY_BOOKMARKS, pool, bmarks, errlist);
 820  842  
 821  843          return (error);
 822  844  }

↓ open down ↓

479 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX