Print this page
    
3740 Poor ZFS send / receive performance due to snapshot hold / release processing
Submitted by: Steven Hartland <steven.hartland@multiplay.co.uk>
    
      
        | Split | 
	Close | 
      
      | Expand all | 
      | Collapse all | 
    
    
          --- old/usr/src/lib/libzfs/common/libzfs_sendrecv.c
          +++ new/usr/src/lib/libzfs/common/libzfs_sendrecv.c
   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright (c) 2012 by Delphix. All rights reserved.
  25   25   * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  26   26   */
  27   27  
  28   28  #include <assert.h>
  29   29  #include <ctype.h>
  30   30  #include <errno.h>
  31   31  #include <libintl.h>
  32   32  #include <stdio.h>
  33   33  #include <stdlib.h>
  34   34  #include <strings.h>
  35   35  #include <unistd.h>
  36   36  #include <stddef.h>
  37   37  #include <fcntl.h>
  38   38  #include <sys/mount.h>
  39   39  #include <pthread.h>
  40   40  #include <umem.h>
  41   41  #include <time.h>
  42   42  
  43   43  #include <libzfs.h>
  44   44  
  45   45  #include "zfs_namecheck.h"
  46   46  #include "zfs_prop.h"
  47   47  #include "zfs_fletcher.h"
  48   48  #include "libzfs_impl.h"
  49   49  #include <sha2.h>
  50   50  #include <sys/zio_checksum.h>
  51   51  #include <sys/ddt.h>
  52   52  
  53   53  /* in libzfs_dataset.c */
  54   54  extern void zfs_setprop_error(libzfs_handle_t *, zfs_prop_t, int, char *);
  55   55  
  56   56  static int zfs_receive_impl(libzfs_handle_t *, const char *, recvflags_t *,
  57   57      int, const char *, nvlist_t *, avl_tree_t *, char **, int, uint64_t *);
  58   58  
  59   59  static const zio_cksum_t zero_cksum = { 0 };
  60   60  
  61   61  typedef struct dedup_arg {
  62   62          int     inputfd;
  63   63          int     outputfd;
  64   64          libzfs_handle_t  *dedup_hdl;
  65   65  } dedup_arg_t;
  66   66  
  67   67  typedef struct progress_arg {
  68   68          zfs_handle_t *pa_zhp;
  69   69          int pa_fd;
  70   70          boolean_t pa_parsable;
  71   71  } progress_arg_t;
  72   72  
  73   73  typedef struct dataref {
  74   74          uint64_t ref_guid;
  75   75          uint64_t ref_object;
  76   76          uint64_t ref_offset;
  77   77  } dataref_t;
  78   78  
  79   79  typedef struct dedup_entry {
  80   80          struct dedup_entry      *dde_next;
  81   81          zio_cksum_t dde_chksum;
  82   82          uint64_t dde_prop;
  83   83          dataref_t dde_ref;
  84   84  } dedup_entry_t;
  85   85  
  86   86  #define MAX_DDT_PHYSMEM_PERCENT         20
  87   87  #define SMALLEST_POSSIBLE_MAX_DDT_MB            128
  88   88  
  89   89  typedef struct dedup_table {
  90   90          dedup_entry_t   **dedup_hash_array;
  91   91          umem_cache_t    *ddecache;
  92   92          uint64_t        max_ddt_size;  /* max dedup table size in bytes */
  93   93          uint64_t        cur_ddt_size;  /* current dedup table size in bytes */
  94   94          uint64_t        ddt_count;
  95   95          int             numhashbits;
  96   96          boolean_t       ddt_full;
  97   97  } dedup_table_t;
  98   98  
  99   99  static int
 100  100  high_order_bit(uint64_t n)
 101  101  {
 102  102          int count;
 103  103  
 104  104          for (count = 0; n != 0; count++)
 105  105                  n >>= 1;
 106  106          return (count);
 107  107  }
 108  108  
 109  109  static size_t
 110  110  ssread(void *buf, size_t len, FILE *stream)
 111  111  {
 112  112          size_t outlen;
 113  113  
 114  114          if ((outlen = fread(buf, len, 1, stream)) == 0)
 115  115                  return (0);
 116  116  
 117  117          return (outlen);
 118  118  }
 119  119  
 120  120  static void
 121  121  ddt_hash_append(libzfs_handle_t *hdl, dedup_table_t *ddt, dedup_entry_t **ddepp,
 122  122      zio_cksum_t *cs, uint64_t prop, dataref_t *dr)
 123  123  {
 124  124          dedup_entry_t   *dde;
 125  125  
 126  126          if (ddt->cur_ddt_size >= ddt->max_ddt_size) {
 127  127                  if (ddt->ddt_full == B_FALSE) {
 128  128                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 129  129                              "Dedup table full.  Deduplication will continue "
 130  130                              "with existing table entries"));
 131  131                          ddt->ddt_full = B_TRUE;
 132  132                  }
 133  133                  return;
 134  134          }
 135  135  
 136  136          if ((dde = umem_cache_alloc(ddt->ddecache, UMEM_DEFAULT))
 137  137              != NULL) {
 138  138                  assert(*ddepp == NULL);
 139  139                  dde->dde_next = NULL;
 140  140                  dde->dde_chksum = *cs;
 141  141                  dde->dde_prop = prop;
 142  142                  dde->dde_ref = *dr;
 143  143                  *ddepp = dde;
 144  144                  ddt->cur_ddt_size += sizeof (dedup_entry_t);
 145  145                  ddt->ddt_count++;
 146  146          }
 147  147  }
 148  148  
 149  149  /*
 150  150   * Using the specified dedup table, do a lookup for an entry with
 151  151   * the checksum cs.  If found, return the block's reference info
 152  152   * in *dr. Otherwise, insert a new entry in the dedup table, using
 153  153   * the reference information specified by *dr.
 154  154   *
 155  155   * return value:  true - entry was found
 156  156   *                false - entry was not found
 157  157   */
 158  158  static boolean_t
 159  159  ddt_update(libzfs_handle_t *hdl, dedup_table_t *ddt, zio_cksum_t *cs,
 160  160      uint64_t prop, dataref_t *dr)
 161  161  {
 162  162          uint32_t hashcode;
 163  163          dedup_entry_t **ddepp;
 164  164  
 165  165          hashcode = BF64_GET(cs->zc_word[0], 0, ddt->numhashbits);
 166  166  
 167  167          for (ddepp = &(ddt->dedup_hash_array[hashcode]); *ddepp != NULL;
 168  168              ddepp = &((*ddepp)->dde_next)) {
 169  169                  if (ZIO_CHECKSUM_EQUAL(((*ddepp)->dde_chksum), *cs) &&
 170  170                      (*ddepp)->dde_prop == prop) {
 171  171                          *dr = (*ddepp)->dde_ref;
 172  172                          return (B_TRUE);
 173  173                  }
 174  174          }
 175  175          ddt_hash_append(hdl, ddt, ddepp, cs, prop, dr);
 176  176          return (B_FALSE);
 177  177  }
 178  178  
 179  179  static int
 180  180  cksum_and_write(const void *buf, uint64_t len, zio_cksum_t *zc, int outfd)
 181  181  {
 182  182          fletcher_4_incremental_native(buf, len, zc);
 183  183          return (write(outfd, buf, len));
 184  184  }
 185  185  
 186  186  /*
 187  187   * This function is started in a separate thread when the dedup option
 188  188   * has been requested.  The main send thread determines the list of
 189  189   * snapshots to be included in the send stream and makes the ioctl calls
 190  190   * for each one.  But instead of having the ioctl send the output to the
 191  191   * the output fd specified by the caller of zfs_send()), the
 192  192   * ioctl is told to direct the output to a pipe, which is read by the
 193  193   * alternate thread running THIS function.  This function does the
 194  194   * dedup'ing by:
 195  195   *  1. building a dedup table (the DDT)
 196  196   *  2. doing checksums on each data block and inserting a record in the DDT
 197  197   *  3. looking for matching checksums, and
 198  198   *  4.  sending a DRR_WRITE_BYREF record instead of a write record whenever
 199  199   *      a duplicate block is found.
 200  200   * The output of this function then goes to the output fd requested
 201  201   * by the caller of zfs_send().
 202  202   */
 203  203  static void *
 204  204  cksummer(void *arg)
 205  205  {
 206  206          dedup_arg_t *dda = arg;
 207  207          char *buf = malloc(1<<20);
 208  208          dmu_replay_record_t thedrr;
 209  209          dmu_replay_record_t *drr = &thedrr;
 210  210          struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
 211  211          struct drr_end *drre = &thedrr.drr_u.drr_end;
 212  212          struct drr_object *drro = &thedrr.drr_u.drr_object;
 213  213          struct drr_write *drrw = &thedrr.drr_u.drr_write;
 214  214          struct drr_spill *drrs = &thedrr.drr_u.drr_spill;
 215  215          FILE *ofp;
 216  216          int outfd;
 217  217          dmu_replay_record_t wbr_drr = {0};
 218  218          struct drr_write_byref *wbr_drrr = &wbr_drr.drr_u.drr_write_byref;
 219  219          dedup_table_t ddt;
 220  220          zio_cksum_t stream_cksum;
 221  221          uint64_t physmem = sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE);
 222  222          uint64_t numbuckets;
 223  223  
 224  224          ddt.max_ddt_size =
 225  225              MAX((physmem * MAX_DDT_PHYSMEM_PERCENT)/100,
 226  226              SMALLEST_POSSIBLE_MAX_DDT_MB<<20);
 227  227  
 228  228          numbuckets = ddt.max_ddt_size/(sizeof (dedup_entry_t));
 229  229  
 230  230          /*
 231  231           * numbuckets must be a power of 2.  Increase number to
 232  232           * a power of 2 if necessary.
 233  233           */
 234  234          if (!ISP2(numbuckets))
 235  235                  numbuckets = 1 << high_order_bit(numbuckets);
 236  236  
 237  237          ddt.dedup_hash_array = calloc(numbuckets, sizeof (dedup_entry_t *));
 238  238          ddt.ddecache = umem_cache_create("dde", sizeof (dedup_entry_t), 0,
 239  239              NULL, NULL, NULL, NULL, NULL, 0);
 240  240          ddt.cur_ddt_size = numbuckets * sizeof (dedup_entry_t *);
 241  241          ddt.numhashbits = high_order_bit(numbuckets) - 1;
 242  242          ddt.ddt_full = B_FALSE;
 243  243  
 244  244          /* Initialize the write-by-reference block. */
 245  245          wbr_drr.drr_type = DRR_WRITE_BYREF;
 246  246          wbr_drr.drr_payloadlen = 0;
 247  247  
 248  248          outfd = dda->outputfd;
 249  249          ofp = fdopen(dda->inputfd, "r");
 250  250          while (ssread(drr, sizeof (dmu_replay_record_t), ofp) != 0) {
 251  251  
 252  252                  switch (drr->drr_type) {
 253  253                  case DRR_BEGIN:
 254  254                  {
 255  255                          int     fflags;
 256  256                          ZIO_SET_CHECKSUM(&stream_cksum, 0, 0, 0, 0);
 257  257  
 258  258                          /* set the DEDUP feature flag for this stream */
 259  259                          fflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
 260  260                          fflags |= (DMU_BACKUP_FEATURE_DEDUP |
 261  261                              DMU_BACKUP_FEATURE_DEDUPPROPS);
 262  262                          DMU_SET_FEATUREFLAGS(drrb->drr_versioninfo, fflags);
 263  263  
 264  264                          if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
 265  265                              &stream_cksum, outfd) == -1)
 266  266                                  goto out;
 267  267                          if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 268  268                              DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
 269  269                                  int sz = drr->drr_payloadlen;
 270  270  
 271  271                                  if (sz > 1<<20) {
 272  272                                          free(buf);
 273  273                                          buf = malloc(sz);
 274  274                                  }
 275  275                                  (void) ssread(buf, sz, ofp);
 276  276                                  if (ferror(stdin))
 277  277                                          perror("fread");
 278  278                                  if (cksum_and_write(buf, sz, &stream_cksum,
 279  279                                      outfd) == -1)
 280  280                                          goto out;
 281  281                          }
 282  282                          break;
 283  283                  }
 284  284  
 285  285                  case DRR_END:
 286  286                  {
 287  287                          /* use the recalculated checksum */
 288  288                          ZIO_SET_CHECKSUM(&drre->drr_checksum,
 289  289                              stream_cksum.zc_word[0], stream_cksum.zc_word[1],
 290  290                              stream_cksum.zc_word[2], stream_cksum.zc_word[3]);
 291  291                          if ((write(outfd, drr,
 292  292                              sizeof (dmu_replay_record_t))) == -1)
 293  293                                  goto out;
 294  294                          break;
 295  295                  }
 296  296  
 297  297                  case DRR_OBJECT:
 298  298                  {
 299  299                          if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
 300  300                              &stream_cksum, outfd) == -1)
 301  301                                  goto out;
 302  302                          if (drro->drr_bonuslen > 0) {
 303  303                                  (void) ssread(buf,
 304  304                                      P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 305  305                                      ofp);
 306  306                                  if (cksum_and_write(buf,
 307  307                                      P2ROUNDUP((uint64_t)drro->drr_bonuslen, 8),
 308  308                                      &stream_cksum, outfd) == -1)
 309  309                                          goto out;
 310  310                          }
 311  311                          break;
 312  312                  }
 313  313  
 314  314                  case DRR_SPILL:
 315  315                  {
 316  316                          if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
 317  317                              &stream_cksum, outfd) == -1)
 318  318                                  goto out;
 319  319                          (void) ssread(buf, drrs->drr_length, ofp);
 320  320                          if (cksum_and_write(buf, drrs->drr_length,
 321  321                              &stream_cksum, outfd) == -1)
 322  322                                  goto out;
 323  323                          break;
 324  324                  }
 325  325  
 326  326                  case DRR_FREEOBJECTS:
 327  327                  {
 328  328                          if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
 329  329                              &stream_cksum, outfd) == -1)
 330  330                                  goto out;
 331  331                          break;
 332  332                  }
 333  333  
 334  334                  case DRR_WRITE:
 335  335                  {
 336  336                          dataref_t       dataref;
 337  337  
 338  338                          (void) ssread(buf, drrw->drr_length, ofp);
 339  339  
 340  340                          /*
 341  341                           * Use the existing checksum if it's dedup-capable,
 342  342                           * else calculate a SHA256 checksum for it.
 343  343                           */
 344  344  
 345  345                          if (ZIO_CHECKSUM_EQUAL(drrw->drr_key.ddk_cksum,
 346  346                              zero_cksum) ||
 347  347                              !DRR_IS_DEDUP_CAPABLE(drrw->drr_checksumflags)) {
 348  348                                  SHA256_CTX      ctx;
 349  349                                  zio_cksum_t     tmpsha256;
 350  350  
 351  351                                  SHA256Init(&ctx);
 352  352                                  SHA256Update(&ctx, buf, drrw->drr_length);
 353  353                                  SHA256Final(&tmpsha256, &ctx);
 354  354                                  drrw->drr_key.ddk_cksum.zc_word[0] =
 355  355                                      BE_64(tmpsha256.zc_word[0]);
 356  356                                  drrw->drr_key.ddk_cksum.zc_word[1] =
 357  357                                      BE_64(tmpsha256.zc_word[1]);
 358  358                                  drrw->drr_key.ddk_cksum.zc_word[2] =
 359  359                                      BE_64(tmpsha256.zc_word[2]);
 360  360                                  drrw->drr_key.ddk_cksum.zc_word[3] =
 361  361                                      BE_64(tmpsha256.zc_word[3]);
 362  362                                  drrw->drr_checksumtype = ZIO_CHECKSUM_SHA256;
 363  363                                  drrw->drr_checksumflags = DRR_CHECKSUM_DEDUP;
 364  364                          }
 365  365  
 366  366                          dataref.ref_guid = drrw->drr_toguid;
 367  367                          dataref.ref_object = drrw->drr_object;
 368  368                          dataref.ref_offset = drrw->drr_offset;
 369  369  
 370  370                          if (ddt_update(dda->dedup_hdl, &ddt,
 371  371                              &drrw->drr_key.ddk_cksum, drrw->drr_key.ddk_prop,
 372  372                              &dataref)) {
 373  373                                  /* block already present in stream */
 374  374                                  wbr_drrr->drr_object = drrw->drr_object;
 375  375                                  wbr_drrr->drr_offset = drrw->drr_offset;
 376  376                                  wbr_drrr->drr_length = drrw->drr_length;
 377  377                                  wbr_drrr->drr_toguid = drrw->drr_toguid;
 378  378                                  wbr_drrr->drr_refguid = dataref.ref_guid;
 379  379                                  wbr_drrr->drr_refobject =
 380  380                                      dataref.ref_object;
 381  381                                  wbr_drrr->drr_refoffset =
 382  382                                      dataref.ref_offset;
 383  383  
 384  384                                  wbr_drrr->drr_checksumtype =
 385  385                                      drrw->drr_checksumtype;
 386  386                                  wbr_drrr->drr_checksumflags =
 387  387                                      drrw->drr_checksumtype;
 388  388                                  wbr_drrr->drr_key.ddk_cksum =
 389  389                                      drrw->drr_key.ddk_cksum;
 390  390                                  wbr_drrr->drr_key.ddk_prop =
 391  391                                      drrw->drr_key.ddk_prop;
 392  392  
 393  393                                  if (cksum_and_write(&wbr_drr,
 394  394                                      sizeof (dmu_replay_record_t), &stream_cksum,
 395  395                                      outfd) == -1)
 396  396                                          goto out;
 397  397                          } else {
 398  398                                  /* block not previously seen */
 399  399                                  if (cksum_and_write(drr,
 400  400                                      sizeof (dmu_replay_record_t), &stream_cksum,
 401  401                                      outfd) == -1)
 402  402                                          goto out;
 403  403                                  if (cksum_and_write(buf,
 404  404                                      drrw->drr_length,
 405  405                                      &stream_cksum, outfd) == -1)
 406  406                                          goto out;
 407  407                          }
 408  408                          break;
 409  409                  }
 410  410  
 411  411                  case DRR_FREE:
 412  412                  {
 413  413                          if (cksum_and_write(drr, sizeof (dmu_replay_record_t),
 414  414                              &stream_cksum, outfd) == -1)
 415  415                                  goto out;
 416  416                          break;
 417  417                  }
 418  418  
 419  419                  default:
 420  420                          (void) printf("INVALID record type 0x%x\n",
 421  421                              drr->drr_type);
 422  422                          /* should never happen, so assert */
 423  423                          assert(B_FALSE);
 424  424                  }
 425  425          }
 426  426  out:
 427  427          umem_cache_destroy(ddt.ddecache);
 428  428          free(ddt.dedup_hash_array);
 429  429          free(buf);
 430  430          (void) fclose(ofp);
 431  431  
 432  432          return (NULL);
 433  433  }
 434  434  
 435  435  /*
 436  436   * Routines for dealing with the AVL tree of fs-nvlists
 437  437   */
 438  438  typedef struct fsavl_node {
 439  439          avl_node_t fn_node;
 440  440          nvlist_t *fn_nvfs;
 441  441          char *fn_snapname;
 442  442          uint64_t fn_guid;
 443  443  } fsavl_node_t;
 444  444  
 445  445  static int
 446  446  fsavl_compare(const void *arg1, const void *arg2)
 447  447  {
 448  448          const fsavl_node_t *fn1 = arg1;
 449  449          const fsavl_node_t *fn2 = arg2;
 450  450  
 451  451          if (fn1->fn_guid > fn2->fn_guid)
 452  452                  return (+1);
 453  453          else if (fn1->fn_guid < fn2->fn_guid)
 454  454                  return (-1);
 455  455          else
 456  456                  return (0);
 457  457  }
 458  458  
 459  459  /*
 460  460   * Given the GUID of a snapshot, find its containing filesystem and
 461  461   * (optionally) name.
 462  462   */
 463  463  static nvlist_t *
 464  464  fsavl_find(avl_tree_t *avl, uint64_t snapguid, char **snapname)
 465  465  {
 466  466          fsavl_node_t fn_find;
 467  467          fsavl_node_t *fn;
 468  468  
 469  469          fn_find.fn_guid = snapguid;
 470  470  
 471  471          fn = avl_find(avl, &fn_find, NULL);
 472  472          if (fn) {
 473  473                  if (snapname)
 474  474                          *snapname = fn->fn_snapname;
 475  475                  return (fn->fn_nvfs);
 476  476          }
 477  477          return (NULL);
 478  478  }
 479  479  
 480  480  static void
 481  481  fsavl_destroy(avl_tree_t *avl)
 482  482  {
 483  483          fsavl_node_t *fn;
 484  484          void *cookie;
 485  485  
 486  486          if (avl == NULL)
 487  487                  return;
 488  488  
 489  489          cookie = NULL;
 490  490          while ((fn = avl_destroy_nodes(avl, &cookie)) != NULL)
 491  491                  free(fn);
 492  492          avl_destroy(avl);
 493  493          free(avl);
 494  494  }
 495  495  
 496  496  /*
 497  497   * Given an nvlist, produce an avl tree of snapshots, ordered by guid
 498  498   */
 499  499  static avl_tree_t *
 500  500  fsavl_create(nvlist_t *fss)
 501  501  {
 502  502          avl_tree_t *fsavl;
 503  503          nvpair_t *fselem = NULL;
 504  504  
 505  505          if ((fsavl = malloc(sizeof (avl_tree_t))) == NULL)
 506  506                  return (NULL);
 507  507  
 508  508          avl_create(fsavl, fsavl_compare, sizeof (fsavl_node_t),
 509  509              offsetof(fsavl_node_t, fn_node));
 510  510  
 511  511          while ((fselem = nvlist_next_nvpair(fss, fselem)) != NULL) {
 512  512                  nvlist_t *nvfs, *snaps;
 513  513                  nvpair_t *snapelem = NULL;
 514  514  
 515  515                  VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
 516  516                  VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
 517  517  
 518  518                  while ((snapelem =
 519  519                      nvlist_next_nvpair(snaps, snapelem)) != NULL) {
 520  520                          fsavl_node_t *fn;
 521  521                          uint64_t guid;
 522  522  
 523  523                          VERIFY(0 == nvpair_value_uint64(snapelem, &guid));
 524  524                          if ((fn = malloc(sizeof (fsavl_node_t))) == NULL) {
 525  525                                  fsavl_destroy(fsavl);
 526  526                                  return (NULL);
 527  527                          }
 528  528                          fn->fn_nvfs = nvfs;
 529  529                          fn->fn_snapname = nvpair_name(snapelem);
 530  530                          fn->fn_guid = guid;
 531  531  
 532  532                          /*
 533  533                           * Note: if there are multiple snaps with the
 534  534                           * same GUID, we ignore all but one.
 535  535                           */
 536  536                          if (avl_find(fsavl, fn, NULL) == NULL)
 537  537                                  avl_add(fsavl, fn);
 538  538                          else
 539  539                                  free(fn);
 540  540                  }
 541  541          }
 542  542  
 543  543          return (fsavl);
 544  544  }
 545  545  
 546  546  /*
 547  547   * Routines for dealing with the giant nvlist of fs-nvlists, etc.
 548  548   */
 549  549  typedef struct send_data {
 550  550          uint64_t parent_fromsnap_guid;
 551  551          nvlist_t *parent_snaps;
 552  552          nvlist_t *fss;
 553  553          nvlist_t *snapprops;
 554  554          const char *fromsnap;
 555  555          const char *tosnap;
 556  556          boolean_t recursive;
 557  557  
 558  558          /*
 559  559           * The header nvlist is of the following format:
 560  560           * {
 561  561           *   "tosnap" -> string
 562  562           *   "fromsnap" -> string (if incremental)
 563  563           *   "fss" -> {
 564  564           *      id -> {
 565  565           *
 566  566           *       "name" -> string (full name; for debugging)
 567  567           *       "parentfromsnap" -> number (guid of fromsnap in parent)
 568  568           *
 569  569           *       "props" -> { name -> value (only if set here) }
 570  570           *       "snaps" -> { name (lastname) -> number (guid) }
 571  571           *       "snapprops" -> { name (lastname) -> { name -> value } }
 572  572           *
 573  573           *       "origin" -> number (guid) (if clone)
 574  574           *       "sent" -> boolean (not on-disk)
 575  575           *      }
 576  576           *   }
 577  577           * }
 578  578           *
 579  579           */
 580  580  } send_data_t;
 581  581  
 582  582  static void send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv);
 583  583  
 584  584  static int
 585  585  send_iterate_snap(zfs_handle_t *zhp, void *arg)
 586  586  {
 587  587          send_data_t *sd = arg;
 588  588          uint64_t guid = zhp->zfs_dmustats.dds_guid;
 589  589          char *snapname;
 590  590          nvlist_t *nv;
 591  591  
 592  592          snapname = strrchr(zhp->zfs_name, '@')+1;
 593  593  
 594  594          VERIFY(0 == nvlist_add_uint64(sd->parent_snaps, snapname, guid));
 595  595          /*
 596  596           * NB: if there is no fromsnap here (it's a newly created fs in
 597  597           * an incremental replication), we will substitute the tosnap.
 598  598           */
 599  599          if ((sd->fromsnap && strcmp(snapname, sd->fromsnap) == 0) ||
 600  600              (sd->parent_fromsnap_guid == 0 && sd->tosnap &&
 601  601              strcmp(snapname, sd->tosnap) == 0)) {
 602  602                  sd->parent_fromsnap_guid = guid;
 603  603          }
 604  604  
 605  605          VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
 606  606          send_iterate_prop(zhp, nv);
 607  607          VERIFY(0 == nvlist_add_nvlist(sd->snapprops, snapname, nv));
 608  608          nvlist_free(nv);
 609  609  
 610  610          zfs_close(zhp);
 611  611          return (0);
 612  612  }
 613  613  
 614  614  static void
 615  615  send_iterate_prop(zfs_handle_t *zhp, nvlist_t *nv)
 616  616  {
 617  617          nvpair_t *elem = NULL;
 618  618  
 619  619          while ((elem = nvlist_next_nvpair(zhp->zfs_props, elem)) != NULL) {
 620  620                  char *propname = nvpair_name(elem);
 621  621                  zfs_prop_t prop = zfs_name_to_prop(propname);
 622  622                  nvlist_t *propnv;
 623  623  
 624  624                  if (!zfs_prop_user(propname)) {
 625  625                          /*
 626  626                           * Realistically, this should never happen.  However,
 627  627                           * we want the ability to add DSL properties without
 628  628                           * needing to make incompatible version changes.  We
 629  629                           * need to ignore unknown properties to allow older
 630  630                           * software to still send datasets containing these
 631  631                           * properties, with the unknown properties elided.
 632  632                           */
 633  633                          if (prop == ZPROP_INVAL)
 634  634                                  continue;
 635  635  
 636  636                          if (zfs_prop_readonly(prop))
 637  637                                  continue;
 638  638                  }
 639  639  
 640  640                  verify(nvpair_value_nvlist(elem, &propnv) == 0);
 641  641                  if (prop == ZFS_PROP_QUOTA || prop == ZFS_PROP_RESERVATION ||
 642  642                      prop == ZFS_PROP_REFQUOTA ||
 643  643                      prop == ZFS_PROP_REFRESERVATION) {
 644  644                          char *source;
 645  645                          uint64_t value;
 646  646                          verify(nvlist_lookup_uint64(propnv,
 647  647                              ZPROP_VALUE, &value) == 0);
 648  648                          if (zhp->zfs_type == ZFS_TYPE_SNAPSHOT)
 649  649                                  continue;
 650  650                          /*
 651  651                           * May have no source before SPA_VERSION_RECVD_PROPS,
 652  652                           * but is still modifiable.
 653  653                           */
 654  654                          if (nvlist_lookup_string(propnv,
 655  655                              ZPROP_SOURCE, &source) == 0) {
 656  656                                  if ((strcmp(source, zhp->zfs_name) != 0) &&
 657  657                                      (strcmp(source,
 658  658                                      ZPROP_SOURCE_VAL_RECVD) != 0))
 659  659                                          continue;
 660  660                          }
 661  661                  } else {
 662  662                          char *source;
 663  663                          if (nvlist_lookup_string(propnv,
 664  664                              ZPROP_SOURCE, &source) != 0)
 665  665                                  continue;
 666  666                          if ((strcmp(source, zhp->zfs_name) != 0) &&
 667  667                              (strcmp(source, ZPROP_SOURCE_VAL_RECVD) != 0))
 668  668                                  continue;
 669  669                  }
 670  670  
 671  671                  if (zfs_prop_user(propname) ||
 672  672                      zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
 673  673                          char *value;
 674  674                          verify(nvlist_lookup_string(propnv,
 675  675                              ZPROP_VALUE, &value) == 0);
 676  676                          VERIFY(0 == nvlist_add_string(nv, propname, value));
 677  677                  } else {
 678  678                          uint64_t value;
 679  679                          verify(nvlist_lookup_uint64(propnv,
 680  680                              ZPROP_VALUE, &value) == 0);
 681  681                          VERIFY(0 == nvlist_add_uint64(nv, propname, value));
 682  682                  }
 683  683          }
 684  684  }
 685  685  
 686  686  /*
 687  687   * recursively generate nvlists describing datasets.  See comment
 688  688   * for the data structure send_data_t above for description of contents
 689  689   * of the nvlist.
 690  690   */
 691  691  static int
 692  692  send_iterate_fs(zfs_handle_t *zhp, void *arg)
 693  693  {
 694  694          send_data_t *sd = arg;
 695  695          nvlist_t *nvfs, *nv;
 696  696          int rv = 0;
 697  697          uint64_t parent_fromsnap_guid_save = sd->parent_fromsnap_guid;
 698  698          uint64_t guid = zhp->zfs_dmustats.dds_guid;
 699  699          char guidstring[64];
 700  700  
 701  701          VERIFY(0 == nvlist_alloc(&nvfs, NV_UNIQUE_NAME, 0));
 702  702          VERIFY(0 == nvlist_add_string(nvfs, "name", zhp->zfs_name));
 703  703          VERIFY(0 == nvlist_add_uint64(nvfs, "parentfromsnap",
 704  704              sd->parent_fromsnap_guid));
 705  705  
 706  706          if (zhp->zfs_dmustats.dds_origin[0]) {
 707  707                  zfs_handle_t *origin = zfs_open(zhp->zfs_hdl,
 708  708                      zhp->zfs_dmustats.dds_origin, ZFS_TYPE_SNAPSHOT);
 709  709                  if (origin == NULL)
 710  710                          return (-1);
 711  711                  VERIFY(0 == nvlist_add_uint64(nvfs, "origin",
 712  712                      origin->zfs_dmustats.dds_guid));
 713  713          }
 714  714  
 715  715          /* iterate over props */
 716  716          VERIFY(0 == nvlist_alloc(&nv, NV_UNIQUE_NAME, 0));
 717  717          send_iterate_prop(zhp, nv);
 718  718          VERIFY(0 == nvlist_add_nvlist(nvfs, "props", nv));
 719  719          nvlist_free(nv);
 720  720  
 721  721          /* iterate over snaps, and set sd->parent_fromsnap_guid */
 722  722          sd->parent_fromsnap_guid = 0;
 723  723          VERIFY(0 == nvlist_alloc(&sd->parent_snaps, NV_UNIQUE_NAME, 0));
 724  724          VERIFY(0 == nvlist_alloc(&sd->snapprops, NV_UNIQUE_NAME, 0));
 725  725          (void) zfs_iter_snapshots(zhp, send_iterate_snap, sd);
 726  726          VERIFY(0 == nvlist_add_nvlist(nvfs, "snaps", sd->parent_snaps));
 727  727          VERIFY(0 == nvlist_add_nvlist(nvfs, "snapprops", sd->snapprops));
 728  728          nvlist_free(sd->parent_snaps);
 729  729          nvlist_free(sd->snapprops);
 730  730  
 731  731          /* add this fs to nvlist */
 732  732          (void) snprintf(guidstring, sizeof (guidstring),
 733  733              "0x%llx", (longlong_t)guid);
 734  734          VERIFY(0 == nvlist_add_nvlist(sd->fss, guidstring, nvfs));
 735  735          nvlist_free(nvfs);
 736  736  
 737  737          /* iterate over children */
 738  738          if (sd->recursive)
 739  739                  rv = zfs_iter_filesystems(zhp, send_iterate_fs, sd);
 740  740  
 741  741          sd->parent_fromsnap_guid = parent_fromsnap_guid_save;
 742  742  
 743  743          zfs_close(zhp);
 744  744          return (rv);
 745  745  }
 746  746  
 747  747  static int
 748  748  gather_nvlist(libzfs_handle_t *hdl, const char *fsname, const char *fromsnap,
 749  749      const char *tosnap, boolean_t recursive, nvlist_t **nvlp, avl_tree_t **avlp)
 750  750  {
 751  751          zfs_handle_t *zhp;
 752  752          send_data_t sd = { 0 };
 753  753          int error;
 754  754  
 755  755          zhp = zfs_open(hdl, fsname, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
 756  756          if (zhp == NULL)
 757  757                  return (EZFS_BADTYPE);
 758  758  
 759  759          VERIFY(0 == nvlist_alloc(&sd.fss, NV_UNIQUE_NAME, 0));
 760  760          sd.fromsnap = fromsnap;
 761  761          sd.tosnap = tosnap;
 762  762          sd.recursive = recursive;
 763  763  
 764  764          if ((error = send_iterate_fs(zhp, &sd)) != 0) {
 765  765                  nvlist_free(sd.fss);
 766  766                  if (avlp != NULL)
 767  767                          *avlp = NULL;
 768  768                  *nvlp = NULL;
 769  769                  return (error);
 770  770          }
 771  771  
 772  772          if (avlp != NULL && (*avlp = fsavl_create(sd.fss)) == NULL) {
 773  773                  nvlist_free(sd.fss);
 774  774                  *nvlp = NULL;
 775  775                  return (EZFS_NOMEM);
 776  776          }
 777  777  
 778  778          *nvlp = sd.fss;
 779  779          return (0);
 780  780  }
 781  781  
 782  782  /*
 783  783   * Routines specific to "zfs send"
 784  784   */
 785  785  typedef struct send_dump_data {
  
    | 
      ↓ open down ↓ | 
    785 lines elided | 
    
      ↑ open up ↑ | 
  
 786  786          /* these are all just the short snapname (the part after the @) */
 787  787          const char *fromsnap;
 788  788          const char *tosnap;
 789  789          char prevsnap[ZFS_MAXNAMELEN];
 790  790          uint64_t prevsnap_obj;
 791  791          boolean_t seenfrom, seento, replicate, doall, fromorigin;
 792  792          boolean_t verbose, dryrun, parsable, progress;
 793  793          int outfd;
 794  794          boolean_t err;
 795  795          nvlist_t *fss;
      796 +        nvlist_t *snapholds;
 796  797          avl_tree_t *fsavl;
 797  798          snapfilter_cb_t *filter_cb;
 798  799          void *filter_cb_arg;
 799  800          nvlist_t *debugnv;
 800  801          char holdtag[ZFS_MAXNAMELEN];
 801  802          int cleanup_fd;
 802  803          uint64_t size;
 803  804  } send_dump_data_t;
 804  805  
 805  806  static int
 806  807  estimate_ioctl(zfs_handle_t *zhp, uint64_t fromsnap_obj,
 807  808      boolean_t fromorigin, uint64_t *sizep)
 808  809  {
 809  810          zfs_cmd_t zc = { 0 };
 810  811          libzfs_handle_t *hdl = zhp->zfs_hdl;
 811  812  
 812  813          assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 813  814          assert(fromsnap_obj == 0 || !fromorigin);
 814  815  
 815  816          (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 816  817          zc.zc_obj = fromorigin;
 817  818          zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 818  819          zc.zc_fromobj = fromsnap_obj;
 819  820          zc.zc_guid = 1;  /* estimate flag */
 820  821  
 821  822          if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
 822  823                  char errbuf[1024];
 823  824                  (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 824  825                      "warning: cannot estimate space for '%s'"), zhp->zfs_name);
 825  826  
 826  827                  switch (errno) {
 827  828                  case EXDEV:
 828  829                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 829  830                              "not an earlier snapshot from the same fs"));
 830  831                          return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 831  832  
 832  833                  case ENOENT:
 833  834                          if (zfs_dataset_exists(hdl, zc.zc_name,
 834  835                              ZFS_TYPE_SNAPSHOT)) {
 835  836                                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 836  837                                      "incremental source (@%s) does not exist"),
 837  838                                      zc.zc_value);
 838  839                          }
 839  840                          return (zfs_error(hdl, EZFS_NOENT, errbuf));
 840  841  
 841  842                  case EDQUOT:
 842  843                  case EFBIG:
 843  844                  case EIO:
 844  845                  case ENOLINK:
 845  846                  case ENOSPC:
 846  847                  case ENOSTR:
 847  848                  case ENXIO:
 848  849                  case EPIPE:
 849  850                  case ERANGE:
 850  851                  case EFAULT:
 851  852                  case EROFS:
 852  853                          zfs_error_aux(hdl, strerror(errno));
 853  854                          return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 854  855  
 855  856                  default:
 856  857                          return (zfs_standard_error(hdl, errno, errbuf));
 857  858                  }
 858  859          }
 859  860  
 860  861          *sizep = zc.zc_objset_type;
 861  862  
 862  863          return (0);
 863  864  }
 864  865  
 865  866  /*
 866  867   * Dumps a backup of the given snapshot (incremental from fromsnap if it's not
 867  868   * NULL) to the file descriptor specified by outfd.
 868  869   */
 869  870  static int
 870  871  dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
 871  872      boolean_t fromorigin, int outfd, nvlist_t *debugnv)
 872  873  {
 873  874          zfs_cmd_t zc = { 0 };
 874  875          libzfs_handle_t *hdl = zhp->zfs_hdl;
 875  876          nvlist_t *thisdbg;
 876  877  
 877  878          assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 878  879          assert(fromsnap_obj == 0 || !fromorigin);
 879  880  
 880  881          (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 881  882          zc.zc_cookie = outfd;
 882  883          zc.zc_obj = fromorigin;
 883  884          zc.zc_sendobj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
 884  885          zc.zc_fromobj = fromsnap_obj;
 885  886  
 886  887          VERIFY(0 == nvlist_alloc(&thisdbg, NV_UNIQUE_NAME, 0));
 887  888          if (fromsnap && fromsnap[0] != '\0') {
 888  889                  VERIFY(0 == nvlist_add_string(thisdbg,
 889  890                      "fromsnap", fromsnap));
 890  891          }
 891  892  
 892  893          if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
 893  894                  char errbuf[1024];
 894  895                  (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 895  896                      "warning: cannot send '%s'"), zhp->zfs_name);
 896  897  
 897  898                  VERIFY(0 == nvlist_add_uint64(thisdbg, "error", errno));
 898  899                  if (debugnv) {
 899  900                          VERIFY(0 == nvlist_add_nvlist(debugnv,
 900  901                              zhp->zfs_name, thisdbg));
 901  902                  }
 902  903                  nvlist_free(thisdbg);
 903  904  
 904  905                  switch (errno) {
 905  906                  case EXDEV:
 906  907                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 907  908                              "not an earlier snapshot from the same fs"));
 908  909                          return (zfs_error(hdl, EZFS_CROSSTARGET, errbuf));
 909  910  
 910  911                  case ENOENT:
 911  912                          if (zfs_dataset_exists(hdl, zc.zc_name,
 912  913                              ZFS_TYPE_SNAPSHOT)) {
 913  914                                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 914  915                                      "incremental source (@%s) does not exist"),
 915  916                                      zc.zc_value);
 916  917                          }
 917  918                          return (zfs_error(hdl, EZFS_NOENT, errbuf));
 918  919  
 919  920                  case EDQUOT:
 920  921                  case EFBIG:
 921  922                  case EIO:
 922  923                  case ENOLINK:
 923  924                  case ENOSPC:
 924  925                  case ENOSTR:
 925  926                  case ENXIO:
 926  927                  case EPIPE:
 927  928                  case ERANGE:
 928  929                  case EFAULT:
 929  930                  case EROFS:
 930  931                          zfs_error_aux(hdl, strerror(errno));
 931  932                          return (zfs_error(hdl, EZFS_BADBACKUP, errbuf));
 932  933  
 933  934                  default:
 934  935                          return (zfs_standard_error(hdl, errno, errbuf));
  
    | 
      ↓ open down ↓ | 
    129 lines elided | 
    
      ↑ open up ↑ | 
  
 935  936                  }
 936  937          }
 937  938  
 938  939          if (debugnv)
 939  940                  VERIFY(0 == nvlist_add_nvlist(debugnv, zhp->zfs_name, thisdbg));
 940  941          nvlist_free(thisdbg);
 941  942  
 942  943          return (0);
 943  944  }
 944  945  
 945      -static int
 946      -hold_for_send(zfs_handle_t *zhp, send_dump_data_t *sdd)
      946 +static void
      947 +gather_holds(zfs_handle_t *zhp, send_dump_data_t *sdd)
 947  948  {
 948      -        zfs_handle_t *pzhp;
 949      -        int error = 0;
 950      -        char *thissnap;
 951      -
 952  949          assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 953  950  
 954      -        if (sdd->dryrun)
 955      -                return (0);
 956      -
 957  951          /*
 958      -         * zfs_send() only opens a cleanup_fd for sends that need it,
      952 +         * zfs_send() only sets snapholds for sends that need them,
 959  953           * e.g. replication and doall.
 960  954           */
 961      -        if (sdd->cleanup_fd == -1)
 962      -                return (0);
 963      -
 964      -        thissnap = strchr(zhp->zfs_name, '@') + 1;
 965      -        *(thissnap - 1) = '\0';
 966      -        pzhp = zfs_open(zhp->zfs_hdl, zhp->zfs_name, ZFS_TYPE_DATASET);
 967      -        *(thissnap - 1) = '@';
 968      -
 969      -        /*
 970      -         * It's OK if the parent no longer exists.  The send code will
 971      -         * handle that error.
 972      -         */
 973      -        if (pzhp) {
 974      -                error = zfs_hold(pzhp, thissnap, sdd->holdtag,
 975      -                    B_FALSE, B_TRUE, sdd->cleanup_fd);
 976      -                zfs_close(pzhp);
 977      -        }
      955 +        if (sdd->snapholds == NULL)
      956 +                return;
 978  957  
 979      -        return (error);
      958 +        fnvlist_add_string(sdd->snapholds, zhp->zfs_name, sdd->holdtag);
 980  959  }
 981  960  
 982  961  static void *
 983  962  send_progress_thread(void *arg)
 984  963  {
 985  964          progress_arg_t *pa = arg;
 986  965  
 987  966          zfs_cmd_t zc = { 0 };
 988  967          zfs_handle_t *zhp = pa->pa_zhp;
 989  968          libzfs_handle_t *hdl = zhp->zfs_hdl;
 990  969          unsigned long long bytes;
 991  970          char buf[16];
 992  971  
 993  972          time_t t;
 994  973          struct tm *tm;
 995  974  
 996  975          assert(zhp->zfs_type == ZFS_TYPE_SNAPSHOT);
 997  976          (void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
 998  977  
 999  978          if (!pa->pa_parsable)
1000  979                  (void) fprintf(stderr, "TIME        SENT   SNAPSHOT\n");
1001  980  
1002  981          /*
1003  982           * Print the progress from ZFS_IOC_SEND_PROGRESS every second.
1004  983           */
1005  984          for (;;) {
1006  985                  (void) sleep(1);
1007  986  
1008  987                  zc.zc_cookie = pa->pa_fd;
1009  988                  if (zfs_ioctl(hdl, ZFS_IOC_SEND_PROGRESS, &zc) != 0)
1010  989                          return ((void *)-1);
1011  990  
1012  991                  (void) time(&t);
1013  992                  tm = localtime(&t);
1014  993                  bytes = zc.zc_cookie;
1015  994  
1016  995                  if (pa->pa_parsable) {
1017  996                          (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
1018  997                              tm->tm_hour, tm->tm_min, tm->tm_sec,
1019  998                              bytes, zhp->zfs_name);
1020  999                  } else {
1021 1000                          zfs_nicenum(bytes, buf, sizeof (buf));
1022 1001                          (void) fprintf(stderr, "%02d:%02d:%02d   %5s   %s\n",
1023 1002                              tm->tm_hour, tm->tm_min, tm->tm_sec,
1024 1003                              buf, zhp->zfs_name);
1025 1004                  }
1026 1005          }
1027 1006  }
1028 1007  
1029 1008  static int
1030 1009  dump_snapshot(zfs_handle_t *zhp, void *arg)
1031 1010  {
1032 1011          send_dump_data_t *sdd = arg;
1033 1012          progress_arg_t pa = { 0 };
1034 1013          pthread_t tid;
1035 1014  
  
    | 
      ↓ open down ↓ | 
    46 lines elided | 
    
      ↑ open up ↑ | 
  
1036 1015          char *thissnap;
1037 1016          int err;
1038 1017          boolean_t isfromsnap, istosnap, fromorigin;
1039 1018          boolean_t exclude = B_FALSE;
1040 1019  
1041 1020          thissnap = strchr(zhp->zfs_name, '@') + 1;
1042 1021          isfromsnap = (sdd->fromsnap != NULL &&
1043 1022              strcmp(sdd->fromsnap, thissnap) == 0);
1044 1023  
1045 1024          if (!sdd->seenfrom && isfromsnap) {
1046      -                err = hold_for_send(zhp, sdd);
1047      -                if (err == 0) {
1048      -                        sdd->seenfrom = B_TRUE;
1049      -                        (void) strcpy(sdd->prevsnap, thissnap);
1050      -                        sdd->prevsnap_obj = zfs_prop_get_int(zhp,
1051      -                            ZFS_PROP_OBJSETID);
1052      -                } else if (err == ENOENT) {
1053      -                        err = 0;
1054      -                }
     1025 +                gather_holds(zhp, sdd);
     1026 +                sdd->seenfrom = B_TRUE;
     1027 +                (void) strcpy(sdd->prevsnap, thissnap);
     1028 +                sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1055 1029                  zfs_close(zhp);
1056      -                return (err);
     1030 +                return (0);
1057 1031          }
1058 1032  
1059 1033          if (sdd->seento || !sdd->seenfrom) {
1060 1034                  zfs_close(zhp);
1061 1035                  return (0);
1062 1036          }
1063 1037  
1064 1038          istosnap = (strcmp(sdd->tosnap, thissnap) == 0);
1065 1039          if (istosnap)
1066 1040                  sdd->seento = B_TRUE;
1067 1041  
1068 1042          if (!sdd->doall && !isfromsnap && !istosnap) {
1069 1043                  if (sdd->replicate) {
1070 1044                          char *snapname;
1071 1045                          nvlist_t *snapprops;
1072 1046                          /*
1073 1047                           * Filter out all intermediate snapshots except origin
1074 1048                           * snapshots needed to replicate clones.
1075 1049                           */
1076 1050                          nvlist_t *nvfs = fsavl_find(sdd->fsavl,
1077 1051                              zhp->zfs_dmustats.dds_guid, &snapname);
1078 1052  
1079 1053                          VERIFY(0 == nvlist_lookup_nvlist(nvfs,
1080 1054                              "snapprops", &snapprops));
1081 1055                          VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1082 1056                              thissnap, &snapprops));
1083 1057                          exclude = !nvlist_exists(snapprops, "is_clone_origin");
1084 1058                  } else {
1085 1059                          exclude = B_TRUE;
1086 1060                  }
1087 1061          }
1088 1062  
1089 1063          /*
1090 1064           * If a filter function exists, call it to determine whether
1091 1065           * this snapshot will be sent.
1092 1066           */
1093 1067          if (exclude || (sdd->filter_cb != NULL &&
1094 1068              sdd->filter_cb(zhp, sdd->filter_cb_arg) == B_FALSE)) {
1095 1069                  /*
1096 1070                   * This snapshot is filtered out.  Don't send it, and don't
  
    | 
      ↓ open down ↓ | 
    30 lines elided | 
    
      ↑ open up ↑ | 
  
1097 1071                   * set prevsnap_obj, so it will be as if this snapshot didn't
1098 1072                   * exist, and the next accepted snapshot will be sent as
1099 1073                   * an incremental from the last accepted one, or as the
1100 1074                   * first (and full) snapshot in the case of a replication,
1101 1075                   * non-incremental send.
1102 1076                   */
1103 1077                  zfs_close(zhp);
1104 1078                  return (0);
1105 1079          }
1106 1080  
1107      -        err = hold_for_send(zhp, sdd);
1108      -        if (err) {
1109      -                if (err == ENOENT)
1110      -                        err = 0;
1111      -                zfs_close(zhp);
1112      -                return (err);
1113      -        }
1114      -
     1081 +        gather_holds(zhp, sdd);
1115 1082          fromorigin = sdd->prevsnap[0] == '\0' &&
1116 1083              (sdd->fromorigin || sdd->replicate);
1117 1084  
1118 1085          if (sdd->verbose) {
1119 1086                  uint64_t size;
1120 1087                  err = estimate_ioctl(zhp, sdd->prevsnap_obj,
1121 1088                      fromorigin, &size);
1122 1089  
1123 1090                  if (sdd->parsable) {
1124 1091                          if (sdd->prevsnap[0] != '\0') {
1125 1092                                  (void) fprintf(stderr, "incremental\t%s\t%s",
1126 1093                                      sdd->prevsnap, zhp->zfs_name);
1127 1094                          } else {
1128 1095                                  (void) fprintf(stderr, "full\t%s",
1129 1096                                      zhp->zfs_name);
1130 1097                          }
1131 1098                  } else {
1132 1099                          (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1133 1100                              "send from @%s to %s"),
1134 1101                              sdd->prevsnap, zhp->zfs_name);
1135 1102                  }
1136 1103                  if (err == 0) {
1137 1104                          if (sdd->parsable) {
1138 1105                                  (void) fprintf(stderr, "\t%llu\n",
1139 1106                                      (longlong_t)size);
1140 1107                          } else {
1141 1108                                  char buf[16];
1142 1109                                  zfs_nicenum(size, buf, sizeof (buf));
1143 1110                                  (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1144 1111                                      " estimated size is %s\n"), buf);
1145 1112                          }
1146 1113                          sdd->size += size;
1147 1114                  } else {
1148 1115                          (void) fprintf(stderr, "\n");
1149 1116                  }
1150 1117          }
1151 1118  
1152 1119          if (!sdd->dryrun) {
1153 1120                  /*
1154 1121                   * If progress reporting is requested, spawn a new thread to
1155 1122                   * poll ZFS_IOC_SEND_PROGRESS at a regular interval.
1156 1123                   */
1157 1124                  if (sdd->progress) {
1158 1125                          pa.pa_zhp = zhp;
1159 1126                          pa.pa_fd = sdd->outfd;
1160 1127                          pa.pa_parsable = sdd->parsable;
1161 1128  
1162 1129                          if (err = pthread_create(&tid, NULL,
1163 1130                              send_progress_thread, &pa)) {
1164 1131                                  zfs_close(zhp);
1165 1132                                  return (err);
1166 1133                          }
1167 1134                  }
1168 1135  
1169 1136                  err = dump_ioctl(zhp, sdd->prevsnap, sdd->prevsnap_obj,
1170 1137                      fromorigin, sdd->outfd, sdd->debugnv);
1171 1138  
1172 1139                  if (sdd->progress) {
1173 1140                          (void) pthread_cancel(tid);
1174 1141                          (void) pthread_join(tid, NULL);
1175 1142                  }
1176 1143          }
1177 1144  
1178 1145          (void) strcpy(sdd->prevsnap, thissnap);
1179 1146          sdd->prevsnap_obj = zfs_prop_get_int(zhp, ZFS_PROP_OBJSETID);
1180 1147          zfs_close(zhp);
1181 1148          return (err);
1182 1149  }
1183 1150  
1184 1151  static int
1185 1152  dump_filesystem(zfs_handle_t *zhp, void *arg)
1186 1153  {
1187 1154          int rv = 0;
1188 1155          send_dump_data_t *sdd = arg;
1189 1156          boolean_t missingfrom = B_FALSE;
1190 1157          zfs_cmd_t zc = { 0 };
1191 1158  
1192 1159          (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1193 1160              zhp->zfs_name, sdd->tosnap);
1194 1161          if (ioctl(zhp->zfs_hdl->libzfs_fd, ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1195 1162                  (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1196 1163                      "WARNING: could not send %s@%s: does not exist\n"),
1197 1164                      zhp->zfs_name, sdd->tosnap);
1198 1165                  sdd->err = B_TRUE;
1199 1166                  return (0);
1200 1167          }
1201 1168  
1202 1169          if (sdd->replicate && sdd->fromsnap) {
1203 1170                  /*
1204 1171                   * If this fs does not have fromsnap, and we're doing
1205 1172                   * recursive, we need to send a full stream from the
1206 1173                   * beginning (or an incremental from the origin if this
1207 1174                   * is a clone).  If we're doing non-recursive, then let
1208 1175                   * them get the error.
1209 1176                   */
1210 1177                  (void) snprintf(zc.zc_name, sizeof (zc.zc_name), "%s@%s",
1211 1178                      zhp->zfs_name, sdd->fromsnap);
1212 1179                  if (ioctl(zhp->zfs_hdl->libzfs_fd,
1213 1180                      ZFS_IOC_OBJSET_STATS, &zc) != 0) {
1214 1181                          missingfrom = B_TRUE;
1215 1182                  }
1216 1183          }
1217 1184  
1218 1185          sdd->seenfrom = sdd->seento = sdd->prevsnap[0] = 0;
1219 1186          sdd->prevsnap_obj = 0;
1220 1187          if (sdd->fromsnap == NULL || missingfrom)
1221 1188                  sdd->seenfrom = B_TRUE;
1222 1189  
1223 1190          rv = zfs_iter_snapshots_sorted(zhp, dump_snapshot, arg);
1224 1191          if (!sdd->seenfrom) {
1225 1192                  (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1226 1193                      "WARNING: could not send %s@%s:\n"
1227 1194                      "incremental source (%s@%s) does not exist\n"),
1228 1195                      zhp->zfs_name, sdd->tosnap,
1229 1196                      zhp->zfs_name, sdd->fromsnap);
1230 1197                  sdd->err = B_TRUE;
1231 1198          } else if (!sdd->seento) {
1232 1199                  if (sdd->fromsnap) {
1233 1200                          (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1234 1201                              "WARNING: could not send %s@%s:\n"
1235 1202                              "incremental source (%s@%s) "
1236 1203                              "is not earlier than it\n"),
1237 1204                              zhp->zfs_name, sdd->tosnap,
1238 1205                              zhp->zfs_name, sdd->fromsnap);
1239 1206                  } else {
1240 1207                          (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1241 1208                              "WARNING: "
1242 1209                              "could not send %s@%s: does not exist\n"),
1243 1210                              zhp->zfs_name, sdd->tosnap);
1244 1211                  }
1245 1212                  sdd->err = B_TRUE;
1246 1213          }
1247 1214  
1248 1215          return (rv);
1249 1216  }
1250 1217  
1251 1218  static int
1252 1219  dump_filesystems(zfs_handle_t *rzhp, void *arg)
1253 1220  {
1254 1221          send_dump_data_t *sdd = arg;
1255 1222          nvpair_t *fspair;
1256 1223          boolean_t needagain, progress;
1257 1224  
1258 1225          if (!sdd->replicate)
1259 1226                  return (dump_filesystem(rzhp, sdd));
1260 1227  
1261 1228          /* Mark the clone origin snapshots. */
1262 1229          for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1263 1230              fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1264 1231                  nvlist_t *nvfs;
1265 1232                  uint64_t origin_guid = 0;
1266 1233  
1267 1234                  VERIFY(0 == nvpair_value_nvlist(fspair, &nvfs));
1268 1235                  (void) nvlist_lookup_uint64(nvfs, "origin", &origin_guid);
1269 1236                  if (origin_guid != 0) {
1270 1237                          char *snapname;
1271 1238                          nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1272 1239                              origin_guid, &snapname);
1273 1240                          if (origin_nv != NULL) {
1274 1241                                  nvlist_t *snapprops;
1275 1242                                  VERIFY(0 == nvlist_lookup_nvlist(origin_nv,
1276 1243                                      "snapprops", &snapprops));
1277 1244                                  VERIFY(0 == nvlist_lookup_nvlist(snapprops,
1278 1245                                      snapname, &snapprops));
1279 1246                                  VERIFY(0 == nvlist_add_boolean(
1280 1247                                      snapprops, "is_clone_origin"));
1281 1248                          }
1282 1249                  }
1283 1250          }
1284 1251  again:
1285 1252          needagain = progress = B_FALSE;
1286 1253          for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1287 1254              fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1288 1255                  nvlist_t *fslist, *parent_nv;
1289 1256                  char *fsname;
1290 1257                  zfs_handle_t *zhp;
1291 1258                  int err;
1292 1259                  uint64_t origin_guid = 0;
1293 1260                  uint64_t parent_guid = 0;
1294 1261  
1295 1262                  VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1296 1263                  if (nvlist_lookup_boolean(fslist, "sent") == 0)
1297 1264                          continue;
1298 1265  
1299 1266                  VERIFY(nvlist_lookup_string(fslist, "name", &fsname) == 0);
1300 1267                  (void) nvlist_lookup_uint64(fslist, "origin", &origin_guid);
1301 1268                  (void) nvlist_lookup_uint64(fslist, "parentfromsnap",
1302 1269                      &parent_guid);
1303 1270  
1304 1271                  if (parent_guid != 0) {
1305 1272                          parent_nv = fsavl_find(sdd->fsavl, parent_guid, NULL);
1306 1273                          if (!nvlist_exists(parent_nv, "sent")) {
1307 1274                                  /* parent has not been sent; skip this one */
1308 1275                                  needagain = B_TRUE;
1309 1276                                  continue;
1310 1277                          }
1311 1278                  }
1312 1279  
1313 1280                  if (origin_guid != 0) {
1314 1281                          nvlist_t *origin_nv = fsavl_find(sdd->fsavl,
1315 1282                              origin_guid, NULL);
1316 1283                          if (origin_nv != NULL &&
1317 1284                              !nvlist_exists(origin_nv, "sent")) {
1318 1285                                  /*
1319 1286                                   * origin has not been sent yet;
1320 1287                                   * skip this clone.
1321 1288                                   */
1322 1289                                  needagain = B_TRUE;
1323 1290                                  continue;
1324 1291                          }
1325 1292                  }
1326 1293  
1327 1294                  zhp = zfs_open(rzhp->zfs_hdl, fsname, ZFS_TYPE_DATASET);
1328 1295                  if (zhp == NULL)
1329 1296                          return (-1);
1330 1297                  err = dump_filesystem(zhp, sdd);
1331 1298                  VERIFY(nvlist_add_boolean(fslist, "sent") == 0);
1332 1299                  progress = B_TRUE;
1333 1300                  zfs_close(zhp);
1334 1301                  if (err)
1335 1302                          return (err);
1336 1303          }
1337 1304          if (needagain) {
1338 1305                  assert(progress);
1339 1306                  goto again;
1340 1307          }
1341 1308  
1342 1309          /* clean out the sent flags in case we reuse this fss */
1343 1310          for (fspair = nvlist_next_nvpair(sdd->fss, NULL); fspair;
1344 1311              fspair = nvlist_next_nvpair(sdd->fss, fspair)) {
1345 1312                  nvlist_t *fslist;
1346 1313  
1347 1314                  VERIFY(nvpair_value_nvlist(fspair, &fslist) == 0);
1348 1315                  (void) nvlist_remove_all(fslist, "sent");
1349 1316          }
1350 1317  
1351 1318          return (0);
1352 1319  }
1353 1320  
1354 1321  /*
1355 1322   * Generate a send stream for the dataset identified by the argument zhp.
1356 1323   *
1357 1324   * The content of the send stream is the snapshot identified by
1358 1325   * 'tosnap'.  Incremental streams are requested in two ways:
1359 1326   *     - from the snapshot identified by "fromsnap" (if non-null) or
1360 1327   *     - from the origin of the dataset identified by zhp, which must
1361 1328   *       be a clone.  In this case, "fromsnap" is null and "fromorigin"
1362 1329   *       is TRUE.
1363 1330   *
1364 1331   * The send stream is recursive (i.e. dumps a hierarchy of snapshots) and
1365 1332   * uses a special header (with a hdrtype field of DMU_COMPOUNDSTREAM)
1366 1333   * if "replicate" is set.  If "doall" is set, dump all the intermediate
1367 1334   * snapshots. The DMU_COMPOUNDSTREAM header is used in the "doall"
1368 1335   * case too. If "props" is set, send properties.
1369 1336   */
1370 1337  int
1371 1338  zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
1372 1339      sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
1373 1340      void *cb_arg, nvlist_t **debugnvp)
1374 1341  {
1375 1342          char errbuf[1024];
1376 1343          send_dump_data_t sdd = { 0 };
1377 1344          int err = 0;
1378 1345          nvlist_t *fss = NULL;
1379 1346          avl_tree_t *fsavl = NULL;
1380 1347          static uint64_t holdseq;
1381 1348          int spa_version;
1382 1349          pthread_t tid;
1383 1350          int pipefd[2];
1384 1351          dedup_arg_t dda = { 0 };
1385 1352          int featureflags = 0;
1386 1353  
1387 1354          (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
1388 1355              "cannot send '%s'"), zhp->zfs_name);
1389 1356  
1390 1357          if (fromsnap && fromsnap[0] == '\0') {
1391 1358                  zfs_error_aux(zhp->zfs_hdl, dgettext(TEXT_DOMAIN,
1392 1359                      "zero-length incremental source"));
1393 1360                  return (zfs_error(zhp->zfs_hdl, EZFS_NOENT, errbuf));
1394 1361          }
1395 1362  
1396 1363          if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM) {
1397 1364                  uint64_t version;
1398 1365                  version = zfs_prop_get_int(zhp, ZFS_PROP_VERSION);
1399 1366                  if (version >= ZPL_VERSION_SA) {
1400 1367                          featureflags |= DMU_BACKUP_FEATURE_SA_SPILL;
1401 1368                  }
1402 1369          }
1403 1370  
1404 1371          if (flags->dedup && !flags->dryrun) {
1405 1372                  featureflags |= (DMU_BACKUP_FEATURE_DEDUP |
1406 1373                      DMU_BACKUP_FEATURE_DEDUPPROPS);
1407 1374                  if (err = pipe(pipefd)) {
1408 1375                          zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1409 1376                          return (zfs_error(zhp->zfs_hdl, EZFS_PIPEFAILED,
1410 1377                              errbuf));
1411 1378                  }
1412 1379                  dda.outputfd = outfd;
1413 1380                  dda.inputfd = pipefd[1];
1414 1381                  dda.dedup_hdl = zhp->zfs_hdl;
1415 1382                  if (err = pthread_create(&tid, NULL, cksummer, &dda)) {
1416 1383                          (void) close(pipefd[0]);
1417 1384                          (void) close(pipefd[1]);
1418 1385                          zfs_error_aux(zhp->zfs_hdl, strerror(errno));
1419 1386                          return (zfs_error(zhp->zfs_hdl,
1420 1387                              EZFS_THREADCREATEFAILED, errbuf));
1421 1388                  }
1422 1389          }
1423 1390  
1424 1391          if (flags->replicate || flags->doall || flags->props) {
1425 1392                  dmu_replay_record_t drr = { 0 };
1426 1393                  char *packbuf = NULL;
1427 1394                  size_t buflen = 0;
1428 1395                  zio_cksum_t zc = { 0 };
1429 1396  
1430 1397                  if (flags->replicate || flags->props) {
1431 1398                          nvlist_t *hdrnv;
1432 1399  
1433 1400                          VERIFY(0 == nvlist_alloc(&hdrnv, NV_UNIQUE_NAME, 0));
1434 1401                          if (fromsnap) {
1435 1402                                  VERIFY(0 == nvlist_add_string(hdrnv,
1436 1403                                      "fromsnap", fromsnap));
1437 1404                          }
1438 1405                          VERIFY(0 == nvlist_add_string(hdrnv, "tosnap", tosnap));
1439 1406                          if (!flags->replicate) {
1440 1407                                  VERIFY(0 == nvlist_add_boolean(hdrnv,
1441 1408                                      "not_recursive"));
1442 1409                          }
1443 1410  
1444 1411                          err = gather_nvlist(zhp->zfs_hdl, zhp->zfs_name,
1445 1412                              fromsnap, tosnap, flags->replicate, &fss, &fsavl);
1446 1413                          if (err)
1447 1414                                  goto err_out;
1448 1415                          VERIFY(0 == nvlist_add_nvlist(hdrnv, "fss", fss));
1449 1416                          err = nvlist_pack(hdrnv, &packbuf, &buflen,
1450 1417                              NV_ENCODE_XDR, 0);
1451 1418                          if (debugnvp)
1452 1419                                  *debugnvp = hdrnv;
1453 1420                          else
1454 1421                                  nvlist_free(hdrnv);
1455 1422                          if (err) {
1456 1423                                  fsavl_destroy(fsavl);
1457 1424                                  nvlist_free(fss);
1458 1425                                  goto stderr_out;
1459 1426                          }
1460 1427                  }
1461 1428  
1462 1429                  if (!flags->dryrun) {
1463 1430                          /* write first begin record */
1464 1431                          drr.drr_type = DRR_BEGIN;
1465 1432                          drr.drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
1466 1433                          DMU_SET_STREAM_HDRTYPE(drr.drr_u.drr_begin.
1467 1434                              drr_versioninfo, DMU_COMPOUNDSTREAM);
1468 1435                          DMU_SET_FEATUREFLAGS(drr.drr_u.drr_begin.
1469 1436                              drr_versioninfo, featureflags);
1470 1437                          (void) snprintf(drr.drr_u.drr_begin.drr_toname,
1471 1438                              sizeof (drr.drr_u.drr_begin.drr_toname),
1472 1439                              "%s@%s", zhp->zfs_name, tosnap);
1473 1440                          drr.drr_payloadlen = buflen;
1474 1441                          err = cksum_and_write(&drr, sizeof (drr), &zc, outfd);
1475 1442  
1476 1443                          /* write header nvlist */
1477 1444                          if (err != -1 && packbuf != NULL) {
1478 1445                                  err = cksum_and_write(packbuf, buflen, &zc,
1479 1446                                      outfd);
1480 1447                          }
1481 1448                          free(packbuf);
1482 1449                          if (err == -1) {
1483 1450                                  fsavl_destroy(fsavl);
1484 1451                                  nvlist_free(fss);
1485 1452                                  err = errno;
1486 1453                                  goto stderr_out;
1487 1454                          }
1488 1455  
1489 1456                          /* write end record */
1490 1457                          bzero(&drr, sizeof (drr));
1491 1458                          drr.drr_type = DRR_END;
1492 1459                          drr.drr_u.drr_end.drr_checksum = zc;
1493 1460                          err = write(outfd, &drr, sizeof (drr));
1494 1461                          if (err == -1) {
1495 1462                                  fsavl_destroy(fsavl);
1496 1463                                  nvlist_free(fss);
1497 1464                                  err = errno;
1498 1465                                  goto stderr_out;
1499 1466                          }
1500 1467  
1501 1468                          err = 0;
1502 1469                  }
1503 1470          }
1504 1471  
1505 1472          /* dump each stream */
1506 1473          sdd.fromsnap = fromsnap;
1507 1474          sdd.tosnap = tosnap;
1508 1475          if (flags->dedup)
1509 1476                  sdd.outfd = pipefd[0];
1510 1477          else
1511 1478                  sdd.outfd = outfd;
1512 1479          sdd.replicate = flags->replicate;
1513 1480          sdd.doall = flags->doall;
1514 1481          sdd.fromorigin = flags->fromorigin;
1515 1482          sdd.fss = fss;
1516 1483          sdd.fsavl = fsavl;
1517 1484          sdd.verbose = flags->verbose;
1518 1485          sdd.parsable = flags->parsable;
1519 1486          sdd.progress = flags->progress;
1520 1487          sdd.dryrun = flags->dryrun;
1521 1488          sdd.filter_cb = filter_func;
1522 1489          sdd.filter_cb_arg = cb_arg;
1523 1490          if (debugnvp)
1524 1491                  sdd.debugnv = *debugnvp;
1525 1492  
1526 1493          /*
1527 1494           * Some flags require that we place user holds on the datasets that are
1528 1495           * being sent so they don't get destroyed during the send. We can skip
1529 1496           * this step if the pool is imported read-only since the datasets cannot
1530 1497           * be destroyed.
1531 1498           */
1532 1499          if (!flags->dryrun && !zpool_get_prop_int(zfs_get_pool_handle(zhp),
1533 1500              ZPOOL_PROP_READONLY, NULL) &&
1534 1501              zfs_spa_version(zhp, &spa_version) == 0 &&
  
    | 
      ↓ open down ↓ | 
    410 lines elided | 
    
      ↑ open up ↑ | 
  
1535 1502              spa_version >= SPA_VERSION_USERREFS &&
1536 1503              (flags->doall || flags->replicate)) {
1537 1504                  ++holdseq;
1538 1505                  (void) snprintf(sdd.holdtag, sizeof (sdd.holdtag),
1539 1506                      ".send-%d-%llu", getpid(), (u_longlong_t)holdseq);
1540 1507                  sdd.cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
1541 1508                  if (sdd.cleanup_fd < 0) {
1542 1509                          err = errno;
1543 1510                          goto stderr_out;
1544 1511                  }
     1512 +                sdd.snapholds = fnvlist_alloc();
1545 1513          } else {
1546 1514                  sdd.cleanup_fd = -1;
     1515 +                sdd.snapholds = NULL;
1547 1516          }
1548 1517          if (flags->verbose) {
1549 1518                  /*
1550 1519                   * Do a verbose no-op dry run to get all the verbose output
1551 1520                   * before generating any data.  Then do a non-verbose real
1552 1521                   * run to generate the streams.
1553 1522                   */
1554 1523                  sdd.dryrun = B_TRUE;
1555 1524                  err = dump_filesystems(zhp, &sdd);
1556 1525                  sdd.dryrun = flags->dryrun;
1557 1526                  sdd.verbose = B_FALSE;
  
    | 
      ↓ open down ↓ | 
    1 lines elided | 
    
      ↑ open up ↑ | 
  
1558 1527                  if (flags->parsable) {
1559 1528                          (void) fprintf(stderr, "size\t%llu\n",
1560 1529                              (longlong_t)sdd.size);
1561 1530                  } else {
1562 1531                          char buf[16];
1563 1532                          zfs_nicenum(sdd.size, buf, sizeof (buf));
1564 1533                          (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
1565 1534                              "total estimated size is %s\n"), buf);
1566 1535                  }
1567 1536          }
     1537 +
     1538 +        if (sdd.snapholds != NULL) {
     1539 +                /* Holds are required. */
     1540 +                if (!flags->verbose) {
     1541 +                        /*
     1542 +                         * A verbose dry run wasn't done so do a non-verbose
     1543 +                         * dry run to gather snapshot hold's.
     1544 +                         */
     1545 +                        sdd.dryrun = B_TRUE;
     1546 +                        err = dump_filesystems(zhp, &sdd);
     1547 +                        sdd.dryrun = flags->dryrun;
     1548 +                }
     1549 +
     1550 +                if (err != 0) {
     1551 +                        fnvlist_free(sdd.snapholds);
     1552 +                        goto stderr_out;
     1553 +                }
     1554 +
     1555 +                err = zfs_hold_nvl(zhp, sdd.cleanup_fd, sdd.snapholds);
     1556 +                fnvlist_free(sdd.snapholds);
     1557 +                if (err != 0)
     1558 +                        goto stderr_out;
     1559 +        }
     1560 +        
1568 1561          err = dump_filesystems(zhp, &sdd);
1569 1562          fsavl_destroy(fsavl);
1570 1563          nvlist_free(fss);
1571 1564  
1572 1565          if (flags->dedup) {
1573 1566                  (void) close(pipefd[0]);
1574 1567                  (void) pthread_join(tid, NULL);
1575 1568          }
1576 1569  
1577 1570          if (sdd.cleanup_fd != -1) {
1578 1571                  VERIFY(0 == close(sdd.cleanup_fd));
1579 1572                  sdd.cleanup_fd = -1;
1580 1573          }
1581 1574  
1582 1575          if (!flags->dryrun && (flags->replicate || flags->doall ||
1583 1576              flags->props)) {
1584 1577                  /*
1585 1578                   * write final end record.  NB: want to do this even if
1586 1579                   * there was some error, because it might not be totally
1587 1580                   * failed.
1588 1581                   */
1589 1582                  dmu_replay_record_t drr = { 0 };
1590 1583                  drr.drr_type = DRR_END;
1591 1584                  if (write(outfd, &drr, sizeof (drr)) == -1) {
1592 1585                          return (zfs_standard_error(zhp->zfs_hdl,
1593 1586                              errno, errbuf));
1594 1587                  }
1595 1588          }
1596 1589  
1597 1590          return (err || sdd.err);
1598 1591  
1599 1592  stderr_out:
1600 1593          err = zfs_standard_error(zhp->zfs_hdl, err, errbuf);
1601 1594  err_out:
1602 1595          if (sdd.cleanup_fd != -1)
1603 1596                  VERIFY(0 == close(sdd.cleanup_fd));
1604 1597          if (flags->dedup) {
1605 1598                  (void) pthread_cancel(tid);
1606 1599                  (void) pthread_join(tid, NULL);
1607 1600                  (void) close(pipefd[0]);
1608 1601          }
1609 1602          return (err);
1610 1603  }
1611 1604  
1612 1605  /*
1613 1606   * Routines specific to "zfs recv"
1614 1607   */
1615 1608  
1616 1609  static int
1617 1610  recv_read(libzfs_handle_t *hdl, int fd, void *buf, int ilen,
1618 1611      boolean_t byteswap, zio_cksum_t *zc)
1619 1612  {
1620 1613          char *cp = buf;
1621 1614          int rv;
1622 1615          int len = ilen;
1623 1616  
1624 1617          do {
1625 1618                  rv = read(fd, cp, len);
1626 1619                  cp += rv;
1627 1620                  len -= rv;
1628 1621          } while (rv > 0);
1629 1622  
1630 1623          if (rv < 0 || len != 0) {
1631 1624                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
1632 1625                      "failed to read from stream"));
1633 1626                  return (zfs_error(hdl, EZFS_BADSTREAM, dgettext(TEXT_DOMAIN,
1634 1627                      "cannot receive")));
1635 1628          }
1636 1629  
1637 1630          if (zc) {
1638 1631                  if (byteswap)
1639 1632                          fletcher_4_incremental_byteswap(buf, ilen, zc);
1640 1633                  else
1641 1634                          fletcher_4_incremental_native(buf, ilen, zc);
1642 1635          }
1643 1636          return (0);
1644 1637  }
1645 1638  
1646 1639  static int
1647 1640  recv_read_nvlist(libzfs_handle_t *hdl, int fd, int len, nvlist_t **nvp,
1648 1641      boolean_t byteswap, zio_cksum_t *zc)
1649 1642  {
1650 1643          char *buf;
1651 1644          int err;
1652 1645  
1653 1646          buf = zfs_alloc(hdl, len);
1654 1647          if (buf == NULL)
1655 1648                  return (ENOMEM);
1656 1649  
1657 1650          err = recv_read(hdl, fd, buf, len, byteswap, zc);
1658 1651          if (err != 0) {
1659 1652                  free(buf);
1660 1653                  return (err);
1661 1654          }
1662 1655  
1663 1656          err = nvlist_unpack(buf, len, nvp, 0);
1664 1657          free(buf);
1665 1658          if (err != 0) {
1666 1659                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
1667 1660                      "stream (malformed nvlist)"));
1668 1661                  return (EINVAL);
1669 1662          }
1670 1663          return (0);
1671 1664  }
1672 1665  
1673 1666  static int
1674 1667  recv_rename(libzfs_handle_t *hdl, const char *name, const char *tryname,
1675 1668      int baselen, char *newname, recvflags_t *flags)
1676 1669  {
1677 1670          static int seq;
1678 1671          zfs_cmd_t zc = { 0 };
1679 1672          int err;
1680 1673          prop_changelist_t *clp;
1681 1674          zfs_handle_t *zhp;
1682 1675  
1683 1676          zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1684 1677          if (zhp == NULL)
1685 1678                  return (-1);
1686 1679          clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1687 1680              flags->force ? MS_FORCE : 0);
1688 1681          zfs_close(zhp);
1689 1682          if (clp == NULL)
1690 1683                  return (-1);
1691 1684          err = changelist_prefix(clp);
1692 1685          if (err)
1693 1686                  return (err);
1694 1687  
1695 1688          zc.zc_objset_type = DMU_OST_ZFS;
1696 1689          (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1697 1690  
1698 1691          if (tryname) {
1699 1692                  (void) strcpy(newname, tryname);
1700 1693  
1701 1694                  (void) strlcpy(zc.zc_value, tryname, sizeof (zc.zc_value));
1702 1695  
1703 1696                  if (flags->verbose) {
1704 1697                          (void) printf("attempting rename %s to %s\n",
1705 1698                              zc.zc_name, zc.zc_value);
1706 1699                  }
1707 1700                  err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1708 1701                  if (err == 0)
1709 1702                          changelist_rename(clp, name, tryname);
1710 1703          } else {
1711 1704                  err = ENOENT;
1712 1705          }
1713 1706  
1714 1707          if (err != 0 && strncmp(name + baselen, "recv-", 5) != 0) {
1715 1708                  seq++;
1716 1709  
1717 1710                  (void) snprintf(newname, ZFS_MAXNAMELEN, "%.*srecv-%u-%u",
1718 1711                      baselen, name, getpid(), seq);
1719 1712                  (void) strlcpy(zc.zc_value, newname, sizeof (zc.zc_value));
1720 1713  
1721 1714                  if (flags->verbose) {
1722 1715                          (void) printf("failed - trying rename %s to %s\n",
1723 1716                              zc.zc_name, zc.zc_value);
1724 1717                  }
1725 1718                  err = ioctl(hdl->libzfs_fd, ZFS_IOC_RENAME, &zc);
1726 1719                  if (err == 0)
1727 1720                          changelist_rename(clp, name, newname);
1728 1721                  if (err && flags->verbose) {
1729 1722                          (void) printf("failed (%u) - "
1730 1723                              "will try again on next pass\n", errno);
1731 1724                  }
1732 1725                  err = EAGAIN;
1733 1726          } else if (flags->verbose) {
1734 1727                  if (err == 0)
1735 1728                          (void) printf("success\n");
1736 1729                  else
1737 1730                          (void) printf("failed (%u)\n", errno);
1738 1731          }
1739 1732  
1740 1733          (void) changelist_postfix(clp);
1741 1734          changelist_free(clp);
1742 1735  
1743 1736          return (err);
1744 1737  }
1745 1738  
1746 1739  static int
1747 1740  recv_destroy(libzfs_handle_t *hdl, const char *name, int baselen,
1748 1741      char *newname, recvflags_t *flags)
1749 1742  {
1750 1743          zfs_cmd_t zc = { 0 };
1751 1744          int err = 0;
1752 1745          prop_changelist_t *clp;
1753 1746          zfs_handle_t *zhp;
1754 1747          boolean_t defer = B_FALSE;
1755 1748          int spa_version;
1756 1749  
1757 1750          zhp = zfs_open(hdl, name, ZFS_TYPE_DATASET);
1758 1751          if (zhp == NULL)
1759 1752                  return (-1);
1760 1753          clp = changelist_gather(zhp, ZFS_PROP_NAME, 0,
1761 1754              flags->force ? MS_FORCE : 0);
1762 1755          if (zfs_get_type(zhp) == ZFS_TYPE_SNAPSHOT &&
1763 1756              zfs_spa_version(zhp, &spa_version) == 0 &&
1764 1757              spa_version >= SPA_VERSION_USERREFS)
1765 1758                  defer = B_TRUE;
1766 1759          zfs_close(zhp);
1767 1760          if (clp == NULL)
1768 1761                  return (-1);
1769 1762          err = changelist_prefix(clp);
1770 1763          if (err)
1771 1764                  return (err);
1772 1765  
1773 1766          zc.zc_objset_type = DMU_OST_ZFS;
1774 1767          zc.zc_defer_destroy = defer;
1775 1768          (void) strlcpy(zc.zc_name, name, sizeof (zc.zc_name));
1776 1769  
1777 1770          if (flags->verbose)
1778 1771                  (void) printf("attempting destroy %s\n", zc.zc_name);
1779 1772          err = ioctl(hdl->libzfs_fd, ZFS_IOC_DESTROY, &zc);
1780 1773          if (err == 0) {
1781 1774                  if (flags->verbose)
1782 1775                          (void) printf("success\n");
1783 1776                  changelist_remove(clp, zc.zc_name);
1784 1777          }
1785 1778  
1786 1779          (void) changelist_postfix(clp);
1787 1780          changelist_free(clp);
1788 1781  
1789 1782          /*
1790 1783           * Deferred destroy might destroy the snapshot or only mark it to be
1791 1784           * destroyed later, and it returns success in either case.
1792 1785           */
1793 1786          if (err != 0 || (defer && zfs_dataset_exists(hdl, name,
1794 1787              ZFS_TYPE_SNAPSHOT))) {
1795 1788                  err = recv_rename(hdl, name, NULL, baselen, newname, flags);
1796 1789          }
1797 1790  
1798 1791          return (err);
1799 1792  }
1800 1793  
1801 1794  typedef struct guid_to_name_data {
1802 1795          uint64_t guid;
1803 1796          char *name;
1804 1797          char *skip;
1805 1798  } guid_to_name_data_t;
1806 1799  
1807 1800  static int
1808 1801  guid_to_name_cb(zfs_handle_t *zhp, void *arg)
1809 1802  {
1810 1803          guid_to_name_data_t *gtnd = arg;
1811 1804          int err;
1812 1805  
1813 1806          if (gtnd->skip != NULL &&
1814 1807              strcmp(zhp->zfs_name, gtnd->skip) == 0) {
1815 1808                  return (0);
1816 1809          }
1817 1810  
1818 1811          if (zhp->zfs_dmustats.dds_guid == gtnd->guid) {
1819 1812                  (void) strcpy(gtnd->name, zhp->zfs_name);
1820 1813                  zfs_close(zhp);
1821 1814                  return (EEXIST);
1822 1815          }
1823 1816  
1824 1817          err = zfs_iter_children(zhp, guid_to_name_cb, gtnd);
1825 1818          zfs_close(zhp);
1826 1819          return (err);
1827 1820  }
1828 1821  
1829 1822  /*
1830 1823   * Attempt to find the local dataset associated with this guid.  In the case of
1831 1824   * multiple matches, we attempt to find the "best" match by searching
1832 1825   * progressively larger portions of the hierarchy.  This allows one to send a
1833 1826   * tree of datasets individually and guarantee that we will find the source
1834 1827   * guid within that hierarchy, even if there are multiple matches elsewhere.
1835 1828   */
1836 1829  static int
1837 1830  guid_to_name(libzfs_handle_t *hdl, const char *parent, uint64_t guid,
1838 1831      char *name)
1839 1832  {
1840 1833          /* exhaustive search all local snapshots */
1841 1834          char pname[ZFS_MAXNAMELEN];
1842 1835          guid_to_name_data_t gtnd;
1843 1836          int err = 0;
1844 1837          zfs_handle_t *zhp;
1845 1838          char *cp;
1846 1839  
1847 1840          gtnd.guid = guid;
1848 1841          gtnd.name = name;
1849 1842          gtnd.skip = NULL;
1850 1843  
1851 1844          (void) strlcpy(pname, parent, sizeof (pname));
1852 1845  
1853 1846          /*
1854 1847           * Search progressively larger portions of the hierarchy.  This will
1855 1848           * select the "most local" version of the origin snapshot in the case
1856 1849           * that there are multiple matching snapshots in the system.
1857 1850           */
1858 1851          while ((cp = strrchr(pname, '/')) != NULL) {
1859 1852  
1860 1853                  /* Chop off the last component and open the parent */
1861 1854                  *cp = '\0';
1862 1855                  zhp = make_dataset_handle(hdl, pname);
1863 1856  
1864 1857                  if (zhp == NULL)
1865 1858                          continue;
1866 1859  
1867 1860                  err = zfs_iter_children(zhp, guid_to_name_cb, >nd);
1868 1861                  zfs_close(zhp);
1869 1862                  if (err == EEXIST)
1870 1863                          return (0);
1871 1864  
1872 1865                  /*
1873 1866                   * Remember the dataset that we already searched, so we
1874 1867                   * skip it next time through.
1875 1868                   */
1876 1869                  gtnd.skip = pname;
1877 1870          }
1878 1871  
1879 1872          return (ENOENT);
1880 1873  }
1881 1874  
1882 1875  /*
1883 1876   * Return +1 if guid1 is before guid2, 0 if they are the same, and -1 if
1884 1877   * guid1 is after guid2.
1885 1878   */
1886 1879  static int
1887 1880  created_before(libzfs_handle_t *hdl, avl_tree_t *avl,
1888 1881      uint64_t guid1, uint64_t guid2)
1889 1882  {
1890 1883          nvlist_t *nvfs;
1891 1884          char *fsname, *snapname;
1892 1885          char buf[ZFS_MAXNAMELEN];
1893 1886          int rv;
1894 1887          zfs_handle_t *guid1hdl, *guid2hdl;
1895 1888          uint64_t create1, create2;
1896 1889  
1897 1890          if (guid2 == 0)
1898 1891                  return (0);
1899 1892          if (guid1 == 0)
1900 1893                  return (1);
1901 1894  
1902 1895          nvfs = fsavl_find(avl, guid1, &snapname);
1903 1896          VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1904 1897          (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1905 1898          guid1hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1906 1899          if (guid1hdl == NULL)
1907 1900                  return (-1);
1908 1901  
1909 1902          nvfs = fsavl_find(avl, guid2, &snapname);
1910 1903          VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1911 1904          (void) snprintf(buf, sizeof (buf), "%s@%s", fsname, snapname);
1912 1905          guid2hdl = zfs_open(hdl, buf, ZFS_TYPE_SNAPSHOT);
1913 1906          if (guid2hdl == NULL) {
1914 1907                  zfs_close(guid1hdl);
1915 1908                  return (-1);
1916 1909          }
1917 1910  
1918 1911          create1 = zfs_prop_get_int(guid1hdl, ZFS_PROP_CREATETXG);
1919 1912          create2 = zfs_prop_get_int(guid2hdl, ZFS_PROP_CREATETXG);
1920 1913  
1921 1914          if (create1 < create2)
1922 1915                  rv = -1;
1923 1916          else if (create1 > create2)
1924 1917                  rv = +1;
1925 1918          else
1926 1919                  rv = 0;
1927 1920  
1928 1921          zfs_close(guid1hdl);
1929 1922          zfs_close(guid2hdl);
1930 1923  
1931 1924          return (rv);
1932 1925  }
1933 1926  
1934 1927  static int
1935 1928  recv_incremental_replication(libzfs_handle_t *hdl, const char *tofs,
1936 1929      recvflags_t *flags, nvlist_t *stream_nv, avl_tree_t *stream_avl,
1937 1930      nvlist_t *renamed)
1938 1931  {
1939 1932          nvlist_t *local_nv;
1940 1933          avl_tree_t *local_avl;
1941 1934          nvpair_t *fselem, *nextfselem;
1942 1935          char *fromsnap;
1943 1936          char newname[ZFS_MAXNAMELEN];
1944 1937          int error;
1945 1938          boolean_t needagain, progress, recursive;
1946 1939          char *s1, *s2;
1947 1940  
1948 1941          VERIFY(0 == nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap));
1949 1942  
1950 1943          recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
1951 1944              ENOENT);
1952 1945  
1953 1946          if (flags->dryrun)
1954 1947                  return (0);
1955 1948  
1956 1949  again:
1957 1950          needagain = progress = B_FALSE;
1958 1951  
1959 1952          if ((error = gather_nvlist(hdl, tofs, fromsnap, NULL,
1960 1953              recursive, &local_nv, &local_avl)) != 0)
1961 1954                  return (error);
1962 1955  
1963 1956          /*
1964 1957           * Process deletes and renames
1965 1958           */
1966 1959          for (fselem = nvlist_next_nvpair(local_nv, NULL);
1967 1960              fselem; fselem = nextfselem) {
1968 1961                  nvlist_t *nvfs, *snaps;
1969 1962                  nvlist_t *stream_nvfs = NULL;
1970 1963                  nvpair_t *snapelem, *nextsnapelem;
1971 1964                  uint64_t fromguid = 0;
1972 1965                  uint64_t originguid = 0;
1973 1966                  uint64_t stream_originguid = 0;
1974 1967                  uint64_t parent_fromsnap_guid, stream_parent_fromsnap_guid;
1975 1968                  char *fsname, *stream_fsname;
1976 1969  
1977 1970                  nextfselem = nvlist_next_nvpair(local_nv, fselem);
1978 1971  
1979 1972                  VERIFY(0 == nvpair_value_nvlist(fselem, &nvfs));
1980 1973                  VERIFY(0 == nvlist_lookup_nvlist(nvfs, "snaps", &snaps));
1981 1974                  VERIFY(0 == nvlist_lookup_string(nvfs, "name", &fsname));
1982 1975                  VERIFY(0 == nvlist_lookup_uint64(nvfs, "parentfromsnap",
1983 1976                      &parent_fromsnap_guid));
1984 1977                  (void) nvlist_lookup_uint64(nvfs, "origin", &originguid);
1985 1978  
1986 1979                  /*
1987 1980                   * First find the stream's fs, so we can check for
1988 1981                   * a different origin (due to "zfs promote")
1989 1982                   */
1990 1983                  for (snapelem = nvlist_next_nvpair(snaps, NULL);
1991 1984                      snapelem; snapelem = nvlist_next_nvpair(snaps, snapelem)) {
1992 1985                          uint64_t thisguid;
1993 1986  
1994 1987                          VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
1995 1988                          stream_nvfs = fsavl_find(stream_avl, thisguid, NULL);
1996 1989  
1997 1990                          if (stream_nvfs != NULL)
1998 1991                                  break;
1999 1992                  }
2000 1993  
2001 1994                  /* check for promote */
2002 1995                  (void) nvlist_lookup_uint64(stream_nvfs, "origin",
2003 1996                      &stream_originguid);
2004 1997                  if (stream_nvfs && originguid != stream_originguid) {
2005 1998                          switch (created_before(hdl, local_avl,
2006 1999                              stream_originguid, originguid)) {
2007 2000                          case 1: {
2008 2001                                  /* promote it! */
2009 2002                                  zfs_cmd_t zc = { 0 };
2010 2003                                  nvlist_t *origin_nvfs;
2011 2004                                  char *origin_fsname;
2012 2005  
2013 2006                                  if (flags->verbose)
2014 2007                                          (void) printf("promoting %s\n", fsname);
2015 2008  
2016 2009                                  origin_nvfs = fsavl_find(local_avl, originguid,
2017 2010                                      NULL);
2018 2011                                  VERIFY(0 == nvlist_lookup_string(origin_nvfs,
2019 2012                                      "name", &origin_fsname));
2020 2013                                  (void) strlcpy(zc.zc_value, origin_fsname,
2021 2014                                      sizeof (zc.zc_value));
2022 2015                                  (void) strlcpy(zc.zc_name, fsname,
2023 2016                                      sizeof (zc.zc_name));
2024 2017                                  error = zfs_ioctl(hdl, ZFS_IOC_PROMOTE, &zc);
2025 2018                                  if (error == 0)
2026 2019                                          progress = B_TRUE;
2027 2020                                  break;
2028 2021                          }
2029 2022                          default:
2030 2023                                  break;
2031 2024                          case -1:
2032 2025                                  fsavl_destroy(local_avl);
2033 2026                                  nvlist_free(local_nv);
2034 2027                                  return (-1);
2035 2028                          }
2036 2029                          /*
2037 2030                           * We had/have the wrong origin, therefore our
2038 2031                           * list of snapshots is wrong.  Need to handle
2039 2032                           * them on the next pass.
2040 2033                           */
2041 2034                          needagain = B_TRUE;
2042 2035                          continue;
2043 2036                  }
2044 2037  
2045 2038                  for (snapelem = nvlist_next_nvpair(snaps, NULL);
2046 2039                      snapelem; snapelem = nextsnapelem) {
2047 2040                          uint64_t thisguid;
2048 2041                          char *stream_snapname;
2049 2042                          nvlist_t *found, *props;
2050 2043  
2051 2044                          nextsnapelem = nvlist_next_nvpair(snaps, snapelem);
2052 2045  
2053 2046                          VERIFY(0 == nvpair_value_uint64(snapelem, &thisguid));
2054 2047                          found = fsavl_find(stream_avl, thisguid,
2055 2048                              &stream_snapname);
2056 2049  
2057 2050                          /* check for delete */
2058 2051                          if (found == NULL) {
2059 2052                                  char name[ZFS_MAXNAMELEN];
2060 2053  
2061 2054                                  if (!flags->force)
2062 2055                                          continue;
2063 2056  
2064 2057                                  (void) snprintf(name, sizeof (name), "%s@%s",
2065 2058                                      fsname, nvpair_name(snapelem));
2066 2059  
2067 2060                                  error = recv_destroy(hdl, name,
2068 2061                                      strlen(fsname)+1, newname, flags);
2069 2062                                  if (error)
2070 2063                                          needagain = B_TRUE;
2071 2064                                  else
2072 2065                                          progress = B_TRUE;
2073 2066                                  continue;
2074 2067                          }
2075 2068  
2076 2069                          stream_nvfs = found;
2077 2070  
2078 2071                          if (0 == nvlist_lookup_nvlist(stream_nvfs, "snapprops",
2079 2072                              &props) && 0 == nvlist_lookup_nvlist(props,
2080 2073                              stream_snapname, &props)) {
2081 2074                                  zfs_cmd_t zc = { 0 };
2082 2075  
2083 2076                                  zc.zc_cookie = B_TRUE; /* received */
2084 2077                                  (void) snprintf(zc.zc_name, sizeof (zc.zc_name),
2085 2078                                      "%s@%s", fsname, nvpair_name(snapelem));
2086 2079                                  if (zcmd_write_src_nvlist(hdl, &zc,
2087 2080                                      props) == 0) {
2088 2081                                          (void) zfs_ioctl(hdl,
2089 2082                                              ZFS_IOC_SET_PROP, &zc);
2090 2083                                          zcmd_free_nvlists(&zc);
2091 2084                                  }
2092 2085                          }
2093 2086  
2094 2087                          /* check for different snapname */
2095 2088                          if (strcmp(nvpair_name(snapelem),
2096 2089                              stream_snapname) != 0) {
2097 2090                                  char name[ZFS_MAXNAMELEN];
2098 2091                                  char tryname[ZFS_MAXNAMELEN];
2099 2092  
2100 2093                                  (void) snprintf(name, sizeof (name), "%s@%s",
2101 2094                                      fsname, nvpair_name(snapelem));
2102 2095                                  (void) snprintf(tryname, sizeof (name), "%s@%s",
2103 2096                                      fsname, stream_snapname);
2104 2097  
2105 2098                                  error = recv_rename(hdl, name, tryname,
2106 2099                                      strlen(fsname)+1, newname, flags);
2107 2100                                  if (error)
2108 2101                                          needagain = B_TRUE;
2109 2102                                  else
2110 2103                                          progress = B_TRUE;
2111 2104                          }
2112 2105  
2113 2106                          if (strcmp(stream_snapname, fromsnap) == 0)
2114 2107                                  fromguid = thisguid;
2115 2108                  }
2116 2109  
2117 2110                  /* check for delete */
2118 2111                  if (stream_nvfs == NULL) {
2119 2112                          if (!flags->force)
2120 2113                                  continue;
2121 2114  
2122 2115                          error = recv_destroy(hdl, fsname, strlen(tofs)+1,
2123 2116                              newname, flags);
2124 2117                          if (error)
2125 2118                                  needagain = B_TRUE;
2126 2119                          else
2127 2120                                  progress = B_TRUE;
2128 2121                          continue;
2129 2122                  }
2130 2123  
2131 2124                  if (fromguid == 0) {
2132 2125                          if (flags->verbose) {
2133 2126                                  (void) printf("local fs %s does not have "
2134 2127                                      "fromsnap (%s in stream); must have "
2135 2128                                      "been deleted locally; ignoring\n",
2136 2129                                      fsname, fromsnap);
2137 2130                          }
2138 2131                          continue;
2139 2132                  }
2140 2133  
2141 2134                  VERIFY(0 == nvlist_lookup_string(stream_nvfs,
2142 2135                      "name", &stream_fsname));
2143 2136                  VERIFY(0 == nvlist_lookup_uint64(stream_nvfs,
2144 2137                      "parentfromsnap", &stream_parent_fromsnap_guid));
2145 2138  
2146 2139                  s1 = strrchr(fsname, '/');
2147 2140                  s2 = strrchr(stream_fsname, '/');
2148 2141  
2149 2142                  /*
2150 2143                   * Check for rename. If the exact receive path is specified, it
2151 2144                   * does not count as a rename, but we still need to check the
2152 2145                   * datasets beneath it.
2153 2146                   */
2154 2147                  if ((stream_parent_fromsnap_guid != 0 &&
2155 2148                      parent_fromsnap_guid != 0 &&
2156 2149                      stream_parent_fromsnap_guid != parent_fromsnap_guid) ||
2157 2150                      ((flags->isprefix || strcmp(tofs, fsname) != 0) &&
2158 2151                      (s1 != NULL) && (s2 != NULL) && strcmp(s1, s2) != 0)) {
2159 2152                          nvlist_t *parent;
2160 2153                          char tryname[ZFS_MAXNAMELEN];
2161 2154  
2162 2155                          parent = fsavl_find(local_avl,
2163 2156                              stream_parent_fromsnap_guid, NULL);
2164 2157                          /*
2165 2158                           * NB: parent might not be found if we used the
2166 2159                           * tosnap for stream_parent_fromsnap_guid,
2167 2160                           * because the parent is a newly-created fs;
2168 2161                           * we'll be able to rename it after we recv the
2169 2162                           * new fs.
2170 2163                           */
2171 2164                          if (parent != NULL) {
2172 2165                                  char *pname;
2173 2166  
2174 2167                                  VERIFY(0 == nvlist_lookup_string(parent, "name",
2175 2168                                      &pname));
2176 2169                                  (void) snprintf(tryname, sizeof (tryname),
2177 2170                                      "%s%s", pname, strrchr(stream_fsname, '/'));
2178 2171                          } else {
2179 2172                                  tryname[0] = '\0';
2180 2173                                  if (flags->verbose) {
2181 2174                                          (void) printf("local fs %s new parent "
2182 2175                                              "not found\n", fsname);
2183 2176                                  }
2184 2177                          }
2185 2178  
2186 2179                          newname[0] = '\0';
2187 2180  
2188 2181                          error = recv_rename(hdl, fsname, tryname,
2189 2182                              strlen(tofs)+1, newname, flags);
2190 2183  
2191 2184                          if (renamed != NULL && newname[0] != '\0') {
2192 2185                                  VERIFY(0 == nvlist_add_boolean(renamed,
2193 2186                                      newname));
2194 2187                          }
2195 2188  
2196 2189                          if (error)
2197 2190                                  needagain = B_TRUE;
2198 2191                          else
2199 2192                                  progress = B_TRUE;
2200 2193                  }
2201 2194          }
2202 2195  
2203 2196          fsavl_destroy(local_avl);
2204 2197          nvlist_free(local_nv);
2205 2198  
2206 2199          if (needagain && progress) {
2207 2200                  /* do another pass to fix up temporary names */
2208 2201                  if (flags->verbose)
2209 2202                          (void) printf("another pass:\n");
2210 2203                  goto again;
2211 2204          }
2212 2205  
2213 2206          return (needagain);
2214 2207  }
2215 2208  
2216 2209  static int
2217 2210  zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
2218 2211      recvflags_t *flags, dmu_replay_record_t *drr, zio_cksum_t *zc,
2219 2212      char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
2220 2213  {
2221 2214          nvlist_t *stream_nv = NULL;
2222 2215          avl_tree_t *stream_avl = NULL;
2223 2216          char *fromsnap = NULL;
2224 2217          char *cp;
2225 2218          char tofs[ZFS_MAXNAMELEN];
2226 2219          char sendfs[ZFS_MAXNAMELEN];
2227 2220          char errbuf[1024];
2228 2221          dmu_replay_record_t drre;
2229 2222          int error;
2230 2223          boolean_t anyerr = B_FALSE;
2231 2224          boolean_t softerr = B_FALSE;
2232 2225          boolean_t recursive;
2233 2226  
2234 2227          (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2235 2228              "cannot receive"));
2236 2229  
2237 2230          assert(drr->drr_type == DRR_BEGIN);
2238 2231          assert(drr->drr_u.drr_begin.drr_magic == DMU_BACKUP_MAGIC);
2239 2232          assert(DMU_GET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo) ==
2240 2233              DMU_COMPOUNDSTREAM);
2241 2234  
2242 2235          /*
2243 2236           * Read in the nvlist from the stream.
2244 2237           */
2245 2238          if (drr->drr_payloadlen != 0) {
2246 2239                  error = recv_read_nvlist(hdl, fd, drr->drr_payloadlen,
2247 2240                      &stream_nv, flags->byteswap, zc);
2248 2241                  if (error) {
2249 2242                          error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2250 2243                          goto out;
2251 2244                  }
2252 2245          }
2253 2246  
2254 2247          recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2255 2248              ENOENT);
2256 2249  
2257 2250          if (recursive && strchr(destname, '@')) {
2258 2251                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2259 2252                      "cannot specify snapshot name for multi-snapshot stream"));
2260 2253                  error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2261 2254                  goto out;
2262 2255          }
2263 2256  
2264 2257          /*
2265 2258           * Read in the end record and verify checksum.
2266 2259           */
2267 2260          if (0 != (error = recv_read(hdl, fd, &drre, sizeof (drre),
2268 2261              flags->byteswap, NULL)))
2269 2262                  goto out;
2270 2263          if (flags->byteswap) {
2271 2264                  drre.drr_type = BSWAP_32(drre.drr_type);
2272 2265                  drre.drr_u.drr_end.drr_checksum.zc_word[0] =
2273 2266                      BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[0]);
2274 2267                  drre.drr_u.drr_end.drr_checksum.zc_word[1] =
2275 2268                      BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[1]);
2276 2269                  drre.drr_u.drr_end.drr_checksum.zc_word[2] =
2277 2270                      BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[2]);
2278 2271                  drre.drr_u.drr_end.drr_checksum.zc_word[3] =
2279 2272                      BSWAP_64(drre.drr_u.drr_end.drr_checksum.zc_word[3]);
2280 2273          }
2281 2274          if (drre.drr_type != DRR_END) {
2282 2275                  error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2283 2276                  goto out;
2284 2277          }
2285 2278          if (!ZIO_CHECKSUM_EQUAL(drre.drr_u.drr_end.drr_checksum, *zc)) {
2286 2279                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2287 2280                      "incorrect header checksum"));
2288 2281                  error = zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2289 2282                  goto out;
2290 2283          }
2291 2284  
2292 2285          (void) nvlist_lookup_string(stream_nv, "fromsnap", &fromsnap);
2293 2286  
2294 2287          if (drr->drr_payloadlen != 0) {
2295 2288                  nvlist_t *stream_fss;
2296 2289  
2297 2290                  VERIFY(0 == nvlist_lookup_nvlist(stream_nv, "fss",
2298 2291                      &stream_fss));
2299 2292                  if ((stream_avl = fsavl_create(stream_fss)) == NULL) {
2300 2293                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2301 2294                              "couldn't allocate avl tree"));
2302 2295                          error = zfs_error(hdl, EZFS_NOMEM, errbuf);
2303 2296                          goto out;
2304 2297                  }
2305 2298  
2306 2299                  if (fromsnap != NULL) {
2307 2300                          nvlist_t *renamed = NULL;
2308 2301                          nvpair_t *pair = NULL;
2309 2302  
2310 2303                          (void) strlcpy(tofs, destname, ZFS_MAXNAMELEN);
2311 2304                          if (flags->isprefix) {
2312 2305                                  struct drr_begin *drrb = &drr->drr_u.drr_begin;
2313 2306                                  int i;
2314 2307  
2315 2308                                  if (flags->istail) {
2316 2309                                          cp = strrchr(drrb->drr_toname, '/');
2317 2310                                          if (cp == NULL) {
2318 2311                                                  (void) strlcat(tofs, "/",
2319 2312                                                      ZFS_MAXNAMELEN);
2320 2313                                                  i = 0;
2321 2314                                          } else {
2322 2315                                                  i = (cp - drrb->drr_toname);
2323 2316                                          }
2324 2317                                  } else {
2325 2318                                          i = strcspn(drrb->drr_toname, "/@");
2326 2319                                  }
2327 2320                                  /* zfs_receive_one() will create_parents() */
2328 2321                                  (void) strlcat(tofs, &drrb->drr_toname[i],
2329 2322                                      ZFS_MAXNAMELEN);
2330 2323                                  *strchr(tofs, '@') = '\0';
2331 2324                          }
2332 2325  
2333 2326                          if (recursive && !flags->dryrun && !flags->nomount) {
2334 2327                                  VERIFY(0 == nvlist_alloc(&renamed,
2335 2328                                      NV_UNIQUE_NAME, 0));
2336 2329                          }
2337 2330  
2338 2331                          softerr = recv_incremental_replication(hdl, tofs, flags,
2339 2332                              stream_nv, stream_avl, renamed);
2340 2333  
2341 2334                          /* Unmount renamed filesystems before receiving. */
2342 2335                          while ((pair = nvlist_next_nvpair(renamed,
2343 2336                              pair)) != NULL) {
2344 2337                                  zfs_handle_t *zhp;
2345 2338                                  prop_changelist_t *clp = NULL;
2346 2339  
2347 2340                                  zhp = zfs_open(hdl, nvpair_name(pair),
2348 2341                                      ZFS_TYPE_FILESYSTEM);
2349 2342                                  if (zhp != NULL) {
2350 2343                                          clp = changelist_gather(zhp,
2351 2344                                              ZFS_PROP_MOUNTPOINT, 0, 0);
2352 2345                                          zfs_close(zhp);
2353 2346                                          if (clp != NULL) {
2354 2347                                                  softerr |=
2355 2348                                                      changelist_prefix(clp);
2356 2349                                                  changelist_free(clp);
2357 2350                                          }
2358 2351                                  }
2359 2352                          }
2360 2353  
2361 2354                          nvlist_free(renamed);
2362 2355                  }
2363 2356          }
2364 2357  
2365 2358          /*
2366 2359           * Get the fs specified by the first path in the stream (the top level
2367 2360           * specified by 'zfs send') and pass it to each invocation of
2368 2361           * zfs_receive_one().
2369 2362           */
2370 2363          (void) strlcpy(sendfs, drr->drr_u.drr_begin.drr_toname,
2371 2364              ZFS_MAXNAMELEN);
2372 2365          if ((cp = strchr(sendfs, '@')) != NULL)
2373 2366                  *cp = '\0';
2374 2367  
2375 2368          /* Finally, receive each contained stream */
2376 2369          do {
2377 2370                  /*
2378 2371                   * we should figure out if it has a recoverable
2379 2372                   * error, in which case do a recv_skip() and drive on.
2380 2373                   * Note, if we fail due to already having this guid,
2381 2374                   * zfs_receive_one() will take care of it (ie,
2382 2375                   * recv_skip() and return 0).
2383 2376                   */
2384 2377                  error = zfs_receive_impl(hdl, destname, flags, fd,
2385 2378                      sendfs, stream_nv, stream_avl, top_zfs, cleanup_fd,
2386 2379                      action_handlep);
2387 2380                  if (error == ENODATA) {
2388 2381                          error = 0;
2389 2382                          break;
2390 2383                  }
2391 2384                  anyerr |= error;
2392 2385          } while (error == 0);
2393 2386  
2394 2387          if (drr->drr_payloadlen != 0 && fromsnap != NULL) {
2395 2388                  /*
2396 2389                   * Now that we have the fs's they sent us, try the
2397 2390                   * renames again.
2398 2391                   */
2399 2392                  softerr = recv_incremental_replication(hdl, tofs, flags,
2400 2393                      stream_nv, stream_avl, NULL);
2401 2394          }
2402 2395  
2403 2396  out:
2404 2397          fsavl_destroy(stream_avl);
2405 2398          if (stream_nv)
2406 2399                  nvlist_free(stream_nv);
2407 2400          if (softerr)
2408 2401                  error = -2;
2409 2402          if (anyerr)
2410 2403                  error = -1;
2411 2404          return (error);
2412 2405  }
2413 2406  
2414 2407  static void
2415 2408  trunc_prop_errs(int truncated)
2416 2409  {
2417 2410          ASSERT(truncated != 0);
2418 2411  
2419 2412          if (truncated == 1)
2420 2413                  (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2421 2414                      "1 more property could not be set\n"));
2422 2415          else
2423 2416                  (void) fprintf(stderr, dgettext(TEXT_DOMAIN,
2424 2417                      "%d more properties could not be set\n"), truncated);
2425 2418  }
2426 2419  
2427 2420  static int
2428 2421  recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
2429 2422  {
2430 2423          dmu_replay_record_t *drr;
2431 2424          void *buf = malloc(1<<20);
2432 2425          char errbuf[1024];
2433 2426  
2434 2427          (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2435 2428              "cannot receive:"));
2436 2429  
2437 2430          /* XXX would be great to use lseek if possible... */
2438 2431          drr = buf;
2439 2432  
2440 2433          while (recv_read(hdl, fd, drr, sizeof (dmu_replay_record_t),
2441 2434              byteswap, NULL) == 0) {
2442 2435                  if (byteswap)
2443 2436                          drr->drr_type = BSWAP_32(drr->drr_type);
2444 2437  
2445 2438                  switch (drr->drr_type) {
2446 2439                  case DRR_BEGIN:
2447 2440                          /* NB: not to be used on v2 stream packages */
2448 2441                          if (drr->drr_payloadlen != 0) {
2449 2442                                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2450 2443                                      "invalid substream header"));
2451 2444                                  return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2452 2445                          }
2453 2446                          break;
2454 2447  
2455 2448                  case DRR_END:
2456 2449                          free(buf);
2457 2450                          return (0);
2458 2451  
2459 2452                  case DRR_OBJECT:
2460 2453                          if (byteswap) {
2461 2454                                  drr->drr_u.drr_object.drr_bonuslen =
2462 2455                                      BSWAP_32(drr->drr_u.drr_object.
2463 2456                                      drr_bonuslen);
2464 2457                          }
2465 2458                          (void) recv_read(hdl, fd, buf,
2466 2459                              P2ROUNDUP(drr->drr_u.drr_object.drr_bonuslen, 8),
2467 2460                              B_FALSE, NULL);
2468 2461                          break;
2469 2462  
2470 2463                  case DRR_WRITE:
2471 2464                          if (byteswap) {
2472 2465                                  drr->drr_u.drr_write.drr_length =
2473 2466                                      BSWAP_64(drr->drr_u.drr_write.drr_length);
2474 2467                          }
2475 2468                          (void) recv_read(hdl, fd, buf,
2476 2469                              drr->drr_u.drr_write.drr_length, B_FALSE, NULL);
2477 2470                          break;
2478 2471                  case DRR_SPILL:
2479 2472                          if (byteswap) {
2480 2473                                  drr->drr_u.drr_write.drr_length =
2481 2474                                      BSWAP_64(drr->drr_u.drr_spill.drr_length);
2482 2475                          }
2483 2476                          (void) recv_read(hdl, fd, buf,
2484 2477                              drr->drr_u.drr_spill.drr_length, B_FALSE, NULL);
2485 2478                          break;
2486 2479                  case DRR_WRITE_BYREF:
2487 2480                  case DRR_FREEOBJECTS:
2488 2481                  case DRR_FREE:
2489 2482                          break;
2490 2483  
2491 2484                  default:
2492 2485                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2493 2486                              "invalid record type"));
2494 2487                          return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2495 2488                  }
2496 2489          }
2497 2490  
2498 2491          free(buf);
2499 2492          return (-1);
2500 2493  }
2501 2494  
2502 2495  /*
2503 2496   * Restores a backup of tosnap from the file descriptor specified by infd.
2504 2497   */
2505 2498  static int
2506 2499  zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
2507 2500      recvflags_t *flags, dmu_replay_record_t *drr,
2508 2501      dmu_replay_record_t *drr_noswap, const char *sendfs,
2509 2502      nvlist_t *stream_nv, avl_tree_t *stream_avl, char **top_zfs, int cleanup_fd,
2510 2503      uint64_t *action_handlep)
2511 2504  {
2512 2505          zfs_cmd_t zc = { 0 };
2513 2506          time_t begin_time;
2514 2507          int ioctl_err, ioctl_errno, err;
2515 2508          char *cp;
2516 2509          struct drr_begin *drrb = &drr->drr_u.drr_begin;
2517 2510          char errbuf[1024];
2518 2511          char prop_errbuf[1024];
2519 2512          const char *chopprefix;
2520 2513          boolean_t newfs = B_FALSE;
2521 2514          boolean_t stream_wantsnewfs;
2522 2515          uint64_t parent_snapguid = 0;
2523 2516          prop_changelist_t *clp = NULL;
2524 2517          nvlist_t *snapprops_nvlist = NULL;
2525 2518          zprop_errflags_t prop_errflags;
2526 2519          boolean_t recursive;
2527 2520  
2528 2521          begin_time = time(NULL);
2529 2522  
2530 2523          (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2531 2524              "cannot receive"));
2532 2525  
2533 2526          recursive = (nvlist_lookup_boolean(stream_nv, "not_recursive") ==
2534 2527              ENOENT);
2535 2528  
2536 2529          if (stream_avl != NULL) {
2537 2530                  char *snapname;
2538 2531                  nvlist_t *fs = fsavl_find(stream_avl, drrb->drr_toguid,
2539 2532                      &snapname);
2540 2533                  nvlist_t *props;
2541 2534                  int ret;
2542 2535  
2543 2536                  (void) nvlist_lookup_uint64(fs, "parentfromsnap",
2544 2537                      &parent_snapguid);
2545 2538                  err = nvlist_lookup_nvlist(fs, "props", &props);
2546 2539                  if (err)
2547 2540                          VERIFY(0 == nvlist_alloc(&props, NV_UNIQUE_NAME, 0));
2548 2541  
2549 2542                  if (flags->canmountoff) {
2550 2543                          VERIFY(0 == nvlist_add_uint64(props,
2551 2544                              zfs_prop_to_name(ZFS_PROP_CANMOUNT), 0));
2552 2545                  }
2553 2546                  ret = zcmd_write_src_nvlist(hdl, &zc, props);
2554 2547                  if (err)
2555 2548                          nvlist_free(props);
2556 2549  
2557 2550                  if (0 == nvlist_lookup_nvlist(fs, "snapprops", &props)) {
2558 2551                          VERIFY(0 == nvlist_lookup_nvlist(props,
2559 2552                              snapname, &snapprops_nvlist));
2560 2553                  }
2561 2554  
2562 2555                  if (ret != 0)
2563 2556                          return (-1);
2564 2557          }
2565 2558  
2566 2559          cp = NULL;
2567 2560  
2568 2561          /*
2569 2562           * Determine how much of the snapshot name stored in the stream
2570 2563           * we are going to tack on to the name they specified on the
2571 2564           * command line, and how much we are going to chop off.
2572 2565           *
2573 2566           * If they specified a snapshot, chop the entire name stored in
2574 2567           * the stream.
2575 2568           */
2576 2569          if (flags->istail) {
2577 2570                  /*
2578 2571                   * A filesystem was specified with -e. We want to tack on only
2579 2572                   * the tail of the sent snapshot path.
2580 2573                   */
2581 2574                  if (strchr(tosnap, '@')) {
2582 2575                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2583 2576                              "argument - snapshot not allowed with -e"));
2584 2577                          return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2585 2578                  }
2586 2579  
2587 2580                  chopprefix = strrchr(sendfs, '/');
2588 2581  
2589 2582                  if (chopprefix == NULL) {
2590 2583                          /*
2591 2584                           * The tail is the poolname, so we need to
2592 2585                           * prepend a path separator.
2593 2586                           */
2594 2587                          int len = strlen(drrb->drr_toname);
2595 2588                          cp = malloc(len + 2);
2596 2589                          cp[0] = '/';
2597 2590                          (void) strcpy(&cp[1], drrb->drr_toname);
2598 2591                          chopprefix = cp;
2599 2592                  } else {
2600 2593                          chopprefix = drrb->drr_toname + (chopprefix - sendfs);
2601 2594                  }
2602 2595          } else if (flags->isprefix) {
2603 2596                  /*
2604 2597                   * A filesystem was specified with -d. We want to tack on
2605 2598                   * everything but the first element of the sent snapshot path
2606 2599                   * (all but the pool name).
2607 2600                   */
2608 2601                  if (strchr(tosnap, '@')) {
2609 2602                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
2610 2603                              "argument - snapshot not allowed with -d"));
2611 2604                          return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2612 2605                  }
2613 2606  
2614 2607                  chopprefix = strchr(drrb->drr_toname, '/');
2615 2608                  if (chopprefix == NULL)
2616 2609                          chopprefix = strchr(drrb->drr_toname, '@');
2617 2610          } else if (strchr(tosnap, '@') == NULL) {
2618 2611                  /*
2619 2612                   * If a filesystem was specified without -d or -e, we want to
2620 2613                   * tack on everything after the fs specified by 'zfs send'.
2621 2614                   */
2622 2615                  chopprefix = drrb->drr_toname + strlen(sendfs);
2623 2616          } else {
2624 2617                  /* A snapshot was specified as an exact path (no -d or -e). */
2625 2618                  if (recursive) {
2626 2619                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2627 2620                              "cannot specify snapshot name for multi-snapshot "
2628 2621                              "stream"));
2629 2622                          return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
2630 2623                  }
2631 2624                  chopprefix = drrb->drr_toname + strlen(drrb->drr_toname);
2632 2625          }
2633 2626  
2634 2627          ASSERT(strstr(drrb->drr_toname, sendfs) == drrb->drr_toname);
2635 2628          ASSERT(chopprefix > drrb->drr_toname);
2636 2629          ASSERT(chopprefix <= drrb->drr_toname + strlen(drrb->drr_toname));
2637 2630          ASSERT(chopprefix[0] == '/' || chopprefix[0] == '@' ||
2638 2631              chopprefix[0] == '\0');
2639 2632  
2640 2633          /*
2641 2634           * Determine name of destination snapshot, store in zc_value.
2642 2635           */
2643 2636          (void) strcpy(zc.zc_value, tosnap);
2644 2637          (void) strncat(zc.zc_value, chopprefix, sizeof (zc.zc_value));
2645 2638          free(cp);
2646 2639          if (!zfs_name_valid(zc.zc_value, ZFS_TYPE_SNAPSHOT)) {
2647 2640                  zcmd_free_nvlists(&zc);
2648 2641                  return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
2649 2642          }
2650 2643  
2651 2644          /*
2652 2645           * Determine the name of the origin snapshot, store in zc_string.
2653 2646           */
2654 2647          if (drrb->drr_flags & DRR_FLAG_CLONE) {
2655 2648                  if (guid_to_name(hdl, zc.zc_value,
2656 2649                      drrb->drr_fromguid, zc.zc_string) != 0) {
2657 2650                          zcmd_free_nvlists(&zc);
2658 2651                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2659 2652                              "local origin for clone %s does not exist"),
2660 2653                              zc.zc_value);
2661 2654                          return (zfs_error(hdl, EZFS_NOENT, errbuf));
2662 2655                  }
2663 2656                  if (flags->verbose)
2664 2657                          (void) printf("found clone origin %s\n", zc.zc_string);
2665 2658          }
2666 2659  
2667 2660          stream_wantsnewfs = (drrb->drr_fromguid == NULL ||
2668 2661              (drrb->drr_flags & DRR_FLAG_CLONE));
2669 2662  
2670 2663          if (stream_wantsnewfs) {
2671 2664                  /*
2672 2665                   * if the parent fs does not exist, look for it based on
2673 2666                   * the parent snap GUID
2674 2667                   */
2675 2668                  (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2676 2669                      "cannot receive new filesystem stream"));
2677 2670  
2678 2671                  (void) strcpy(zc.zc_name, zc.zc_value);
2679 2672                  cp = strrchr(zc.zc_name, '/');
2680 2673                  if (cp)
2681 2674                          *cp = '\0';
2682 2675                  if (cp &&
2683 2676                      !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2684 2677                          char suffix[ZFS_MAXNAMELEN];
2685 2678                          (void) strcpy(suffix, strrchr(zc.zc_value, '/'));
2686 2679                          if (guid_to_name(hdl, zc.zc_name, parent_snapguid,
2687 2680                              zc.zc_value) == 0) {
2688 2681                                  *strchr(zc.zc_value, '@') = '\0';
2689 2682                                  (void) strcat(zc.zc_value, suffix);
2690 2683                          }
2691 2684                  }
2692 2685          } else {
2693 2686                  /*
2694 2687                   * if the fs does not exist, look for it based on the
2695 2688                   * fromsnap GUID
2696 2689                   */
2697 2690                  (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
2698 2691                      "cannot receive incremental stream"));
2699 2692  
2700 2693                  (void) strcpy(zc.zc_name, zc.zc_value);
2701 2694                  *strchr(zc.zc_name, '@') = '\0';
2702 2695  
2703 2696                  /*
2704 2697                   * If the exact receive path was specified and this is the
2705 2698                   * topmost path in the stream, then if the fs does not exist we
2706 2699                   * should look no further.
2707 2700                   */
2708 2701                  if ((flags->isprefix || (*(chopprefix = drrb->drr_toname +
2709 2702                      strlen(sendfs)) != '\0' && *chopprefix != '@')) &&
2710 2703                      !zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2711 2704                          char snap[ZFS_MAXNAMELEN];
2712 2705                          (void) strcpy(snap, strchr(zc.zc_value, '@'));
2713 2706                          if (guid_to_name(hdl, zc.zc_name, drrb->drr_fromguid,
2714 2707                              zc.zc_value) == 0) {
2715 2708                                  *strchr(zc.zc_value, '@') = '\0';
2716 2709                                  (void) strcat(zc.zc_value, snap);
2717 2710                          }
2718 2711                  }
2719 2712          }
2720 2713  
2721 2714          (void) strcpy(zc.zc_name, zc.zc_value);
2722 2715          *strchr(zc.zc_name, '@') = '\0';
2723 2716  
2724 2717          if (zfs_dataset_exists(hdl, zc.zc_name, ZFS_TYPE_DATASET)) {
2725 2718                  zfs_handle_t *zhp;
2726 2719  
2727 2720                  /*
2728 2721                   * Destination fs exists.  Therefore this should either
2729 2722                   * be an incremental, or the stream specifies a new fs
2730 2723                   * (full stream or clone) and they want us to blow it
2731 2724                   * away (and have therefore specified -F and removed any
2732 2725                   * snapshots).
2733 2726                   */
2734 2727                  if (stream_wantsnewfs) {
2735 2728                          if (!flags->force) {
2736 2729                                  zcmd_free_nvlists(&zc);
2737 2730                                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2738 2731                                      "destination '%s' exists\n"
2739 2732                                      "must specify -F to overwrite it"),
2740 2733                                      zc.zc_name);
2741 2734                                  return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2742 2735                          }
2743 2736                          if (ioctl(hdl->libzfs_fd, ZFS_IOC_SNAPSHOT_LIST_NEXT,
2744 2737                              &zc) == 0) {
2745 2738                                  zcmd_free_nvlists(&zc);
2746 2739                                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2747 2740                                      "destination has snapshots (eg. %s)\n"
2748 2741                                      "must destroy them to overwrite it"),
2749 2742                                      zc.zc_name);
2750 2743                                  return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2751 2744                          }
2752 2745                  }
2753 2746  
2754 2747                  if ((zhp = zfs_open(hdl, zc.zc_name,
2755 2748                      ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME)) == NULL) {
2756 2749                          zcmd_free_nvlists(&zc);
2757 2750                          return (-1);
2758 2751                  }
2759 2752  
2760 2753                  if (stream_wantsnewfs &&
2761 2754                      zhp->zfs_dmustats.dds_origin[0]) {
2762 2755                          zcmd_free_nvlists(&zc);
2763 2756                          zfs_close(zhp);
2764 2757                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2765 2758                              "destination '%s' is a clone\n"
2766 2759                              "must destroy it to overwrite it"),
2767 2760                              zc.zc_name);
2768 2761                          return (zfs_error(hdl, EZFS_EXISTS, errbuf));
2769 2762                  }
2770 2763  
2771 2764                  if (!flags->dryrun && zhp->zfs_type == ZFS_TYPE_FILESYSTEM &&
2772 2765                      stream_wantsnewfs) {
2773 2766                          /* We can't do online recv in this case */
2774 2767                          clp = changelist_gather(zhp, ZFS_PROP_NAME, 0, 0);
2775 2768                          if (clp == NULL) {
2776 2769                                  zfs_close(zhp);
2777 2770                                  zcmd_free_nvlists(&zc);
2778 2771                                  return (-1);
2779 2772                          }
2780 2773                          if (changelist_prefix(clp) != 0) {
2781 2774                                  changelist_free(clp);
2782 2775                                  zfs_close(zhp);
2783 2776                                  zcmd_free_nvlists(&zc);
2784 2777                                  return (-1);
2785 2778                          }
2786 2779                  }
2787 2780                  zfs_close(zhp);
2788 2781          } else {
2789 2782                  /*
2790 2783                   * Destination filesystem does not exist.  Therefore we better
2791 2784                   * be creating a new filesystem (either from a full backup, or
2792 2785                   * a clone).  It would therefore be invalid if the user
2793 2786                   * specified only the pool name (i.e. if the destination name
2794 2787                   * contained no slash character).
2795 2788                   */
2796 2789                  if (!stream_wantsnewfs ||
2797 2790                      (cp = strrchr(zc.zc_name, '/')) == NULL) {
2798 2791                          zcmd_free_nvlists(&zc);
2799 2792                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2800 2793                              "destination '%s' does not exist"), zc.zc_name);
2801 2794                          return (zfs_error(hdl, EZFS_NOENT, errbuf));
2802 2795                  }
2803 2796  
2804 2797                  /*
2805 2798                   * Trim off the final dataset component so we perform the
2806 2799                   * recvbackup ioctl to the filesystems's parent.
2807 2800                   */
2808 2801                  *cp = '\0';
2809 2802  
2810 2803                  if (flags->isprefix && !flags->istail && !flags->dryrun &&
2811 2804                      create_parents(hdl, zc.zc_value, strlen(tosnap)) != 0) {
2812 2805                          zcmd_free_nvlists(&zc);
2813 2806                          return (zfs_error(hdl, EZFS_BADRESTORE, errbuf));
2814 2807                  }
2815 2808  
2816 2809                  newfs = B_TRUE;
2817 2810          }
2818 2811  
2819 2812          zc.zc_begin_record = drr_noswap->drr_u.drr_begin;
2820 2813          zc.zc_cookie = infd;
2821 2814          zc.zc_guid = flags->force;
2822 2815          if (flags->verbose) {
2823 2816                  (void) printf("%s %s stream of %s into %s\n",
2824 2817                      flags->dryrun ? "would receive" : "receiving",
2825 2818                      drrb->drr_fromguid ? "incremental" : "full",
2826 2819                      drrb->drr_toname, zc.zc_value);
2827 2820                  (void) fflush(stdout);
2828 2821          }
2829 2822  
2830 2823          if (flags->dryrun) {
2831 2824                  zcmd_free_nvlists(&zc);
2832 2825                  return (recv_skip(hdl, infd, flags->byteswap));
2833 2826          }
2834 2827  
2835 2828          zc.zc_nvlist_dst = (uint64_t)(uintptr_t)prop_errbuf;
2836 2829          zc.zc_nvlist_dst_size = sizeof (prop_errbuf);
2837 2830          zc.zc_cleanup_fd = cleanup_fd;
2838 2831          zc.zc_action_handle = *action_handlep;
2839 2832  
2840 2833          err = ioctl_err = zfs_ioctl(hdl, ZFS_IOC_RECV, &zc);
2841 2834          ioctl_errno = errno;
2842 2835          prop_errflags = (zprop_errflags_t)zc.zc_obj;
2843 2836  
2844 2837          if (err == 0) {
2845 2838                  nvlist_t *prop_errors;
2846 2839                  VERIFY(0 == nvlist_unpack((void *)(uintptr_t)zc.zc_nvlist_dst,
2847 2840                      zc.zc_nvlist_dst_size, &prop_errors, 0));
2848 2841  
2849 2842                  nvpair_t *prop_err = NULL;
2850 2843  
2851 2844                  while ((prop_err = nvlist_next_nvpair(prop_errors,
2852 2845                      prop_err)) != NULL) {
2853 2846                          char tbuf[1024];
2854 2847                          zfs_prop_t prop;
2855 2848                          int intval;
2856 2849  
2857 2850                          prop = zfs_name_to_prop(nvpair_name(prop_err));
2858 2851                          (void) nvpair_value_int32(prop_err, &intval);
2859 2852                          if (strcmp(nvpair_name(prop_err),
2860 2853                              ZPROP_N_MORE_ERRORS) == 0) {
2861 2854                                  trunc_prop_errs(intval);
2862 2855                                  break;
2863 2856                          } else {
2864 2857                                  (void) snprintf(tbuf, sizeof (tbuf),
2865 2858                                      dgettext(TEXT_DOMAIN,
2866 2859                                      "cannot receive %s property on %s"),
2867 2860                                      nvpair_name(prop_err), zc.zc_name);
2868 2861                                  zfs_setprop_error(hdl, prop, intval, tbuf);
2869 2862                          }
2870 2863                  }
2871 2864                  nvlist_free(prop_errors);
2872 2865          }
2873 2866  
2874 2867          zc.zc_nvlist_dst = 0;
2875 2868          zc.zc_nvlist_dst_size = 0;
2876 2869          zcmd_free_nvlists(&zc);
2877 2870  
2878 2871          if (err == 0 && snapprops_nvlist) {
2879 2872                  zfs_cmd_t zc2 = { 0 };
2880 2873  
2881 2874                  (void) strcpy(zc2.zc_name, zc.zc_value);
2882 2875                  zc2.zc_cookie = B_TRUE; /* received */
2883 2876                  if (zcmd_write_src_nvlist(hdl, &zc2, snapprops_nvlist) == 0) {
2884 2877                          (void) zfs_ioctl(hdl, ZFS_IOC_SET_PROP, &zc2);
2885 2878                          zcmd_free_nvlists(&zc2);
2886 2879                  }
2887 2880          }
2888 2881  
2889 2882          if (err && (ioctl_errno == ENOENT || ioctl_errno == EEXIST)) {
2890 2883                  /*
2891 2884                   * It may be that this snapshot already exists,
2892 2885                   * in which case we want to consume & ignore it
2893 2886                   * rather than failing.
2894 2887                   */
2895 2888                  avl_tree_t *local_avl;
2896 2889                  nvlist_t *local_nv, *fs;
2897 2890                  cp = strchr(zc.zc_value, '@');
2898 2891  
2899 2892                  /*
2900 2893                   * XXX Do this faster by just iterating over snaps in
2901 2894                   * this fs.  Also if zc_value does not exist, we will
2902 2895                   * get a strange "does not exist" error message.
2903 2896                   */
2904 2897                  *cp = '\0';
2905 2898                  if (gather_nvlist(hdl, zc.zc_value, NULL, NULL, B_FALSE,
2906 2899                      &local_nv, &local_avl) == 0) {
2907 2900                          *cp = '@';
2908 2901                          fs = fsavl_find(local_avl, drrb->drr_toguid, NULL);
2909 2902                          fsavl_destroy(local_avl);
2910 2903                          nvlist_free(local_nv);
2911 2904  
2912 2905                          if (fs != NULL) {
2913 2906                                  if (flags->verbose) {
2914 2907                                          (void) printf("snap %s already exists; "
2915 2908                                              "ignoring\n", zc.zc_value);
2916 2909                                  }
2917 2910                                  err = ioctl_err = recv_skip(hdl, infd,
2918 2911                                      flags->byteswap);
2919 2912                          }
2920 2913                  }
2921 2914                  *cp = '@';
2922 2915          }
2923 2916  
2924 2917          if (ioctl_err != 0) {
2925 2918                  switch (ioctl_errno) {
2926 2919                  case ENODEV:
2927 2920                          cp = strchr(zc.zc_value, '@');
2928 2921                          *cp = '\0';
2929 2922                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2930 2923                              "most recent snapshot of %s does not\n"
2931 2924                              "match incremental source"), zc.zc_value);
2932 2925                          (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2933 2926                          *cp = '@';
2934 2927                          break;
2935 2928                  case ETXTBSY:
2936 2929                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2937 2930                              "destination %s has been modified\n"
2938 2931                              "since most recent snapshot"), zc.zc_name);
2939 2932                          (void) zfs_error(hdl, EZFS_BADRESTORE, errbuf);
2940 2933                          break;
2941 2934                  case EEXIST:
2942 2935                          cp = strchr(zc.zc_value, '@');
2943 2936                          if (newfs) {
2944 2937                                  /* it's the containing fs that exists */
2945 2938                                  *cp = '\0';
2946 2939                          }
2947 2940                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2948 2941                              "destination already exists"));
2949 2942                          (void) zfs_error_fmt(hdl, EZFS_EXISTS,
2950 2943                              dgettext(TEXT_DOMAIN, "cannot restore to %s"),
2951 2944                              zc.zc_value);
2952 2945                          *cp = '@';
2953 2946                          break;
2954 2947                  case EINVAL:
2955 2948                          (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2956 2949                          break;
2957 2950                  case ECKSUM:
2958 2951                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2959 2952                              "invalid stream (checksum mismatch)"));
2960 2953                          (void) zfs_error(hdl, EZFS_BADSTREAM, errbuf);
2961 2954                          break;
2962 2955                  case ENOTSUP:
2963 2956                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2964 2957                              "pool must be upgraded to receive this stream."));
2965 2958                          (void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
2966 2959                          break;
2967 2960                  case EDQUOT:
2968 2961                          zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
2969 2962                              "destination %s space quota exceeded"), zc.zc_name);
2970 2963                          (void) zfs_error(hdl, EZFS_NOSPC, errbuf);
2971 2964                          break;
2972 2965                  default:
2973 2966                          (void) zfs_standard_error(hdl, ioctl_errno, errbuf);
2974 2967                  }
2975 2968          }
2976 2969  
2977 2970          /*
2978 2971           * Mount the target filesystem (if created).  Also mount any
2979 2972           * children of the target filesystem if we did a replication
2980 2973           * receive (indicated by stream_avl being non-NULL).
2981 2974           */
2982 2975          cp = strchr(zc.zc_value, '@');
2983 2976          if (cp && (ioctl_err == 0 || !newfs)) {
2984 2977                  zfs_handle_t *h;
2985 2978  
2986 2979                  *cp = '\0';
2987 2980                  h = zfs_open(hdl, zc.zc_value,
2988 2981                      ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME);
2989 2982                  if (h != NULL) {
2990 2983                          if (h->zfs_type == ZFS_TYPE_VOLUME) {
2991 2984                                  *cp = '@';
2992 2985                          } else if (newfs || stream_avl) {
2993 2986                                  /*
2994 2987                                   * Track the first/top of hierarchy fs,
2995 2988                                   * for mounting and sharing later.
2996 2989                                   */
2997 2990                                  if (top_zfs && *top_zfs == NULL)
2998 2991                                          *top_zfs = zfs_strdup(hdl, zc.zc_value);
2999 2992                          }
3000 2993                          zfs_close(h);
3001 2994                  }
3002 2995                  *cp = '@';
3003 2996          }
3004 2997  
3005 2998          if (clp) {
3006 2999                  err |= changelist_postfix(clp);
3007 3000                  changelist_free(clp);
3008 3001          }
3009 3002  
3010 3003          if (prop_errflags & ZPROP_ERR_NOCLEAR) {
3011 3004                  (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3012 3005                      "failed to clear unreceived properties on %s"),
3013 3006                      zc.zc_name);
3014 3007                  (void) fprintf(stderr, "\n");
3015 3008          }
3016 3009          if (prop_errflags & ZPROP_ERR_NORESTORE) {
3017 3010                  (void) fprintf(stderr, dgettext(TEXT_DOMAIN, "Warning: "
3018 3011                      "failed to restore original properties on %s"),
3019 3012                      zc.zc_name);
3020 3013                  (void) fprintf(stderr, "\n");
3021 3014          }
3022 3015  
3023 3016          if (err || ioctl_err)
3024 3017                  return (-1);
3025 3018  
3026 3019          *action_handlep = zc.zc_action_handle;
3027 3020  
3028 3021          if (flags->verbose) {
3029 3022                  char buf1[64];
3030 3023                  char buf2[64];
3031 3024                  uint64_t bytes = zc.zc_cookie;
3032 3025                  time_t delta = time(NULL) - begin_time;
3033 3026                  if (delta == 0)
3034 3027                          delta = 1;
3035 3028                  zfs_nicenum(bytes, buf1, sizeof (buf1));
3036 3029                  zfs_nicenum(bytes/delta, buf2, sizeof (buf1));
3037 3030  
3038 3031                  (void) printf("received %sB stream in %lu seconds (%sB/sec)\n",
3039 3032                      buf1, delta, buf2);
3040 3033          }
3041 3034  
3042 3035          return (0);
3043 3036  }
3044 3037  
3045 3038  static int
3046 3039  zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
3047 3040      int infd, const char *sendfs, nvlist_t *stream_nv, avl_tree_t *stream_avl,
3048 3041      char **top_zfs, int cleanup_fd, uint64_t *action_handlep)
3049 3042  {
3050 3043          int err;
3051 3044          dmu_replay_record_t drr, drr_noswap;
3052 3045          struct drr_begin *drrb = &drr.drr_u.drr_begin;
3053 3046          char errbuf[1024];
3054 3047          zio_cksum_t zcksum = { 0 };
3055 3048          uint64_t featureflags;
3056 3049          int hdrtype;
3057 3050  
3058 3051          (void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
3059 3052              "cannot receive"));
3060 3053  
3061 3054          if (flags->isprefix &&
3062 3055              !zfs_dataset_exists(hdl, tosnap, ZFS_TYPE_DATASET)) {
3063 3056                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "specified fs "
3064 3057                      "(%s) does not exist"), tosnap);
3065 3058                  return (zfs_error(hdl, EZFS_NOENT, errbuf));
3066 3059          }
3067 3060  
3068 3061          /* read in the BEGIN record */
3069 3062          if (0 != (err = recv_read(hdl, infd, &drr, sizeof (drr), B_FALSE,
3070 3063              &zcksum)))
3071 3064                  return (err);
3072 3065  
3073 3066          if (drr.drr_type == DRR_END || drr.drr_type == BSWAP_32(DRR_END)) {
3074 3067                  /* It's the double end record at the end of a package */
3075 3068                  return (ENODATA);
3076 3069          }
3077 3070  
3078 3071          /* the kernel needs the non-byteswapped begin record */
3079 3072          drr_noswap = drr;
3080 3073  
3081 3074          flags->byteswap = B_FALSE;
3082 3075          if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
3083 3076                  /*
3084 3077                   * We computed the checksum in the wrong byteorder in
3085 3078                   * recv_read() above; do it again correctly.
3086 3079                   */
3087 3080                  bzero(&zcksum, sizeof (zio_cksum_t));
3088 3081                  fletcher_4_incremental_byteswap(&drr, sizeof (drr), &zcksum);
3089 3082                  flags->byteswap = B_TRUE;
3090 3083  
3091 3084                  drr.drr_type = BSWAP_32(drr.drr_type);
3092 3085                  drr.drr_payloadlen = BSWAP_32(drr.drr_payloadlen);
3093 3086                  drrb->drr_magic = BSWAP_64(drrb->drr_magic);
3094 3087                  drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
3095 3088                  drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
3096 3089                  drrb->drr_type = BSWAP_32(drrb->drr_type);
3097 3090                  drrb->drr_flags = BSWAP_32(drrb->drr_flags);
3098 3091                  drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
3099 3092                  drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
3100 3093          }
3101 3094  
3102 3095          if (drrb->drr_magic != DMU_BACKUP_MAGIC || drr.drr_type != DRR_BEGIN) {
3103 3096                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3104 3097                      "stream (bad magic number)"));
3105 3098                  return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3106 3099          }
3107 3100  
3108 3101          featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
3109 3102          hdrtype = DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo);
3110 3103  
3111 3104          if (!DMU_STREAM_SUPPORTED(featureflags) ||
3112 3105              (hdrtype != DMU_SUBSTREAM && hdrtype != DMU_COMPOUNDSTREAM)) {
3113 3106                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
3114 3107                      "stream has unsupported feature, feature flags = %lx"),
3115 3108                      featureflags);
3116 3109                  return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3117 3110          }
3118 3111  
3119 3112          if (strchr(drrb->drr_toname, '@') == NULL) {
3120 3113                  zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "invalid "
3121 3114                      "stream (bad snapshot name)"));
3122 3115                  return (zfs_error(hdl, EZFS_BADSTREAM, errbuf));
3123 3116          }
3124 3117  
3125 3118          if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) == DMU_SUBSTREAM) {
3126 3119                  char nonpackage_sendfs[ZFS_MAXNAMELEN];
3127 3120                  if (sendfs == NULL) {
3128 3121                          /*
3129 3122                           * We were not called from zfs_receive_package(). Get
3130 3123                           * the fs specified by 'zfs send'.
3131 3124                           */
3132 3125                          char *cp;
3133 3126                          (void) strlcpy(nonpackage_sendfs,
3134 3127                              drr.drr_u.drr_begin.drr_toname, ZFS_MAXNAMELEN);
3135 3128                          if ((cp = strchr(nonpackage_sendfs, '@')) != NULL)
3136 3129                                  *cp = '\0';
3137 3130                          sendfs = nonpackage_sendfs;
3138 3131                  }
3139 3132                  return (zfs_receive_one(hdl, infd, tosnap, flags,
3140 3133                      &drr, &drr_noswap, sendfs, stream_nv, stream_avl,
3141 3134                      top_zfs, cleanup_fd, action_handlep));
3142 3135          } else {
3143 3136                  assert(DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
3144 3137                      DMU_COMPOUNDSTREAM);
3145 3138                  return (zfs_receive_package(hdl, infd, tosnap, flags,
3146 3139                      &drr, &zcksum, top_zfs, cleanup_fd, action_handlep));
3147 3140          }
3148 3141  }
3149 3142  
3150 3143  /*
3151 3144   * Restores a backup of tosnap from the file descriptor specified by infd.
3152 3145   * Return 0 on total success, -2 if some things couldn't be
3153 3146   * destroyed/renamed/promoted, -1 if some things couldn't be received.
3154 3147   * (-1 will override -2).
3155 3148   */
3156 3149  int
3157 3150  zfs_receive(libzfs_handle_t *hdl, const char *tosnap, recvflags_t *flags,
3158 3151      int infd, avl_tree_t *stream_avl)
3159 3152  {
3160 3153          char *top_zfs = NULL;
3161 3154          int err;
3162 3155          int cleanup_fd;
3163 3156          uint64_t action_handle = 0;
3164 3157  
3165 3158          cleanup_fd = open(ZFS_DEV, O_RDWR|O_EXCL);
3166 3159          VERIFY(cleanup_fd >= 0);
3167 3160  
3168 3161          err = zfs_receive_impl(hdl, tosnap, flags, infd, NULL, NULL,
3169 3162              stream_avl, &top_zfs, cleanup_fd, &action_handle);
3170 3163  
3171 3164          VERIFY(0 == close(cleanup_fd));
3172 3165  
3173 3166          if (err == 0 && !flags->nomount && top_zfs) {
3174 3167                  zfs_handle_t *zhp;
3175 3168                  prop_changelist_t *clp;
3176 3169  
3177 3170                  zhp = zfs_open(hdl, top_zfs, ZFS_TYPE_FILESYSTEM);
3178 3171                  if (zhp != NULL) {
3179 3172                          clp = changelist_gather(zhp, ZFS_PROP_MOUNTPOINT,
3180 3173                              CL_GATHER_MOUNT_ALWAYS, 0);
3181 3174                          zfs_close(zhp);
3182 3175                          if (clp != NULL) {
3183 3176                                  /* mount and share received datasets */
3184 3177                                  err = changelist_postfix(clp);
3185 3178                                  changelist_free(clp);
3186 3179                          }
3187 3180                  }
3188 3181                  if (zhp == NULL || clp == NULL || err)
3189 3182                          err = -1;
3190 3183          }
3191 3184          if (top_zfs)
3192 3185                  free(top_zfs);
3193 3186  
3194 3187          return (err);
3195 3188  }
  
    | 
      ↓ open down ↓ | 
    1618 lines elided | 
    
      ↑ open up ↑ | 
  
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX