1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  24  * Copyright (c) 2013 by Delphix. All rights reserved.
  25  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
  26  */
  27 
  28 #include <sys/dmu.h>
  29 #include <sys/dmu_impl.h>
  30 #include <sys/dmu_tx.h>
  31 #include <sys/dbuf.h>
  32 #include <sys/dnode.h>
  33 #include <sys/zfs_context.h>
  34 #include <sys/dmu_objset.h>
  35 #include <sys/dmu_traverse.h>
  36 #include <sys/dsl_dataset.h>
  37 #include <sys/dsl_dir.h>
  38 #include <sys/dsl_prop.h>
  39 #include <sys/dsl_pool.h>
  40 #include <sys/dsl_synctask.h>
  41 #include <sys/zfs_ioctl.h>
  42 #include <sys/zap.h>
  43 #include <sys/zio_checksum.h>
  44 #include <sys/zfs_znode.h>
  45 #include <zfs_fletcher.h>
  46 #include <sys/avl.h>
  47 #include <sys/ddt.h>
  48 #include <sys/zfs_onexit.h>
  49 #include <sys/dmu_send.h>
  50 #include <sys/dsl_destroy.h>
  51 
  52 /* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
  53 int zfs_send_corrupt_data = B_FALSE;
  54 
  55 static char *dmu_recv_tag = "dmu_recv_tag";
  56 static const char *recv_clone_name = "%recv";
  57 
  58 static int
  59 dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
  60 {
  61         dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
  62         ssize_t resid; /* have to get resid to get detailed errno */
  63         ASSERT0(len % 8);
  64 
  65         fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
  66         dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
  67             (caddr_t)buf, len,
  68             0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
  69 
  70         mutex_enter(&ds->ds_sendstream_lock);
  71         *dsp->dsa_off += len;
  72         mutex_exit(&ds->ds_sendstream_lock);
  73 
  74         return (dsp->dsa_err);
  75 }
  76 
  77 static int
  78 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
  79     uint64_t length)
  80 {
  81         struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
  82 
  83         if (length != -1ULL && offset + length < offset)
  84                 length = -1ULL;
  85 
  86         /*
  87          * If there is a pending op, but it's not PENDING_FREE, push it out,
  88          * since free block aggregation can only be done for blocks of the
  89          * same type (i.e., DRR_FREE records can only be aggregated with
  90          * other DRR_FREE records.  DRR_FREEOBJECTS records can only be
  91          * aggregated with other DRR_FREEOBJECTS records.
  92          */
  93         if (dsp->dsa_pending_op != PENDING_NONE &&
  94             dsp->dsa_pending_op != PENDING_FREE) {
  95                 if (dump_bytes(dsp, dsp->dsa_drr,
  96                     sizeof (dmu_replay_record_t)) != 0)
  97                         return (SET_ERROR(EINTR));
  98                 dsp->dsa_pending_op = PENDING_NONE;
  99         }
 100 
 101         if (dsp->dsa_pending_op == PENDING_FREE) {
 102                 /*
 103                  * There should never be a PENDING_FREE if length is -1
 104                  * (because dump_dnode is the only place where this
 105                  * function is called with a -1, and only after flushing
 106                  * any pending record).
 107                  */
 108                 ASSERT(length != -1ULL);
 109                 /*
 110                  * Check to see whether this free block can be aggregated
 111                  * with pending one.
 112                  */
 113                 if (drrf->drr_object == object && drrf->drr_offset +
 114                     drrf->drr_length == offset) {
 115                         drrf->drr_length += length;
 116                         return (0);
 117                 } else {
 118                         /* not a continuation.  Push out pending record */
 119                         if (dump_bytes(dsp, dsp->dsa_drr,
 120                             sizeof (dmu_replay_record_t)) != 0)
 121                                 return (SET_ERROR(EINTR));
 122                         dsp->dsa_pending_op = PENDING_NONE;
 123                 }
 124         }
 125         /* create a FREE record and make it pending */
 126         bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 127         dsp->dsa_drr->drr_type = DRR_FREE;
 128         drrf->drr_object = object;
 129         drrf->drr_offset = offset;
 130         drrf->drr_length = length;
 131         drrf->drr_toguid = dsp->dsa_toguid;
 132         if (length == -1ULL) {
 133                 if (dump_bytes(dsp, dsp->dsa_drr,
 134                     sizeof (dmu_replay_record_t)) != 0)
 135                         return (SET_ERROR(EINTR));
 136         } else {
 137                 dsp->dsa_pending_op = PENDING_FREE;
 138         }
 139 
 140         return (0);
 141 }
 142 
 143 static int
 144 dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
 145     uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
 146 {
 147         struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
 148 
 149 
 150         /*
 151          * If there is any kind of pending aggregation (currently either
 152          * a grouping of free objects or free blocks), push it out to
 153          * the stream, since aggregation can't be done across operations
 154          * of different types.
 155          */
 156         if (dsp->dsa_pending_op != PENDING_NONE) {
 157                 if (dump_bytes(dsp, dsp->dsa_drr,
 158                     sizeof (dmu_replay_record_t)) != 0)
 159                         return (SET_ERROR(EINTR));
 160                 dsp->dsa_pending_op = PENDING_NONE;
 161         }
 162         /* write a DATA record */
 163         bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 164         dsp->dsa_drr->drr_type = DRR_WRITE;
 165         drrw->drr_object = object;
 166         drrw->drr_type = type;
 167         drrw->drr_offset = offset;
 168         drrw->drr_length = blksz;
 169         drrw->drr_toguid = dsp->dsa_toguid;
 170         drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
 171         if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
 172                 drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
 173         DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
 174         DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
 175         DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
 176         drrw->drr_key.ddk_cksum = bp->blk_cksum;
 177 
 178         if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
 179                 return (SET_ERROR(EINTR));
 180         if (dump_bytes(dsp, data, blksz) != 0)
 181                 return (SET_ERROR(EINTR));
 182         return (0);
 183 }
 184 
 185 static int
 186 dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
 187 {
 188         struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
 189 
 190         if (dsp->dsa_pending_op != PENDING_NONE) {
 191                 if (dump_bytes(dsp, dsp->dsa_drr,
 192                     sizeof (dmu_replay_record_t)) != 0)
 193                         return (SET_ERROR(EINTR));
 194                 dsp->dsa_pending_op = PENDING_NONE;
 195         }
 196 
 197         /* write a SPILL record */
 198         bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 199         dsp->dsa_drr->drr_type = DRR_SPILL;
 200         drrs->drr_object = object;
 201         drrs->drr_length = blksz;
 202         drrs->drr_toguid = dsp->dsa_toguid;
 203 
 204         if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
 205                 return (SET_ERROR(EINTR));
 206         if (dump_bytes(dsp, data, blksz))
 207                 return (SET_ERROR(EINTR));
 208         return (0);
 209 }
 210 
 211 static int
 212 dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
 213 {
 214         struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
 215 
 216         /*
 217          * If there is a pending op, but it's not PENDING_FREEOBJECTS,
 218          * push it out, since free block aggregation can only be done for
 219          * blocks of the same type (i.e., DRR_FREE records can only be
 220          * aggregated with other DRR_FREE records.  DRR_FREEOBJECTS records
 221          * can only be aggregated with other DRR_FREEOBJECTS records.
 222          */
 223         if (dsp->dsa_pending_op != PENDING_NONE &&
 224             dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
 225                 if (dump_bytes(dsp, dsp->dsa_drr,
 226                     sizeof (dmu_replay_record_t)) != 0)
 227                         return (SET_ERROR(EINTR));
 228                 dsp->dsa_pending_op = PENDING_NONE;
 229         }
 230         if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
 231                 /*
 232                  * See whether this free object array can be aggregated
 233                  * with pending one
 234                  */
 235                 if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) {
 236                         drrfo->drr_numobjs += numobjs;
 237                         return (0);
 238                 } else {
 239                         /* can't be aggregated.  Push out pending record */
 240                         if (dump_bytes(dsp, dsp->dsa_drr,
 241                             sizeof (dmu_replay_record_t)) != 0)
 242                                 return (SET_ERROR(EINTR));
 243                         dsp->dsa_pending_op = PENDING_NONE;
 244                 }
 245         }
 246 
 247         /* write a FREEOBJECTS record */
 248         bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 249         dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
 250         drrfo->drr_firstobj = firstobj;
 251         drrfo->drr_numobjs = numobjs;
 252         drrfo->drr_toguid = dsp->dsa_toguid;
 253 
 254         dsp->dsa_pending_op = PENDING_FREEOBJECTS;
 255 
 256         return (0);
 257 }
 258 
 259 static int
 260 dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
 261 {
 262         struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
 263 
 264         if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
 265                 return (dump_freeobjects(dsp, object, 1));
 266 
 267         if (dsp->dsa_pending_op != PENDING_NONE) {
 268                 if (dump_bytes(dsp, dsp->dsa_drr,
 269                     sizeof (dmu_replay_record_t)) != 0)
 270                         return (SET_ERROR(EINTR));
 271                 dsp->dsa_pending_op = PENDING_NONE;
 272         }
 273 
 274         /* write an OBJECT record */
 275         bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 276         dsp->dsa_drr->drr_type = DRR_OBJECT;
 277         drro->drr_object = object;
 278         drro->drr_type = dnp->dn_type;
 279         drro->drr_bonustype = dnp->dn_bonustype;
 280         drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
 281         drro->drr_bonuslen = dnp->dn_bonuslen;
 282         drro->drr_checksumtype = dnp->dn_checksum;
 283         drro->drr_compress = dnp->dn_compress;
 284         drro->drr_toguid = dsp->dsa_toguid;
 285 
 286         if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
 287                 return (SET_ERROR(EINTR));
 288 
 289         if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
 290                 return (SET_ERROR(EINTR));
 291 
 292         /* free anything past the end of the file */
 293         if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
 294             (dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
 295                 return (SET_ERROR(EINTR));
 296         if (dsp->dsa_err != 0)
 297                 return (SET_ERROR(EINTR));
 298         return (0);
 299 }
 300 
 301 #define BP_SPAN(dnp, level) \
 302         (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
 303         (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
 304 
 305 /* ARGSUSED */
 306 static int
 307 backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
 308     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 309 {
 310         dmu_sendarg_t *dsp = arg;
 311         dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
 312         int err = 0;
 313 
 314         if (issig(JUSTLOOKING) && issig(FORREAL))
 315                 return (SET_ERROR(EINTR));
 316 
 317         if (zb->zb_object != DMU_META_DNODE_OBJECT &&
 318             DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
 319                 return (0);
 320         } else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
 321                 uint64_t span = BP_SPAN(dnp, zb->zb_level);
 322                 uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
 323                 err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
 324         } else if (bp == NULL) {
 325                 uint64_t span = BP_SPAN(dnp, zb->zb_level);
 326                 err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
 327         } else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
 328                 return (0);
 329         } else if (type == DMU_OT_DNODE) {
 330                 dnode_phys_t *blk;
 331                 int i;
 332                 int blksz = BP_GET_LSIZE(bp);
 333                 uint32_t aflags = ARC_WAIT;
 334                 arc_buf_t *abuf;
 335 
 336                 if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 337                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 338                     &aflags, zb) != 0)
 339                         return (SET_ERROR(EIO));
 340 
 341                 blk = abuf->b_data;
 342                 for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
 343                         uint64_t dnobj = (zb->zb_blkid <<
 344                             (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
 345                         err = dump_dnode(dsp, dnobj, blk+i);
 346                         if (err != 0)
 347                                 break;
 348                 }
 349                 (void) arc_buf_remove_ref(abuf, &abuf);
 350         } else if (type == DMU_OT_SA) {
 351                 uint32_t aflags = ARC_WAIT;
 352                 arc_buf_t *abuf;
 353                 int blksz = BP_GET_LSIZE(bp);
 354 
 355                 if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 356                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 357                     &aflags, zb) != 0)
 358                         return (SET_ERROR(EIO));
 359 
 360                 err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
 361                 (void) arc_buf_remove_ref(abuf, &abuf);
 362         } else { /* it's a level-0 block of a regular object */
 363                 uint32_t aflags = ARC_WAIT;
 364                 arc_buf_t *abuf;
 365                 int blksz = BP_GET_LSIZE(bp);
 366 
 367                 if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
 368                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 369                     &aflags, zb) != 0) {
 370                         if (zfs_send_corrupt_data) {
 371                                 /* Send a block filled with 0x"zfs badd bloc" */
 372                                 abuf = arc_buf_alloc(spa, blksz, &abuf,
 373                                     ARC_BUFC_DATA);
 374                                 uint64_t *ptr;
 375                                 for (ptr = abuf->b_data;
 376                                     (char *)ptr < (char *)abuf->b_data + blksz;
 377                                     ptr++)
 378                                         *ptr = 0x2f5baddb10c;
 379                         } else {
 380                                 return (SET_ERROR(EIO));
 381                         }
 382                 }
 383 
 384                 err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
 385                     blksz, bp, abuf->b_data);
 386                 (void) arc_buf_remove_ref(abuf, &abuf);
 387         }
 388 
 389         ASSERT(err == 0 || err == EINTR);
 390         return (err);
 391 }
 392 
 393 /*
 394  * Releases dp, ds, and fromds, using the specified tag.
 395  */
 396 static int
 397 dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
 398     dsl_dataset_t *fromds, int outfd, vnode_t *vp, offset_t *off)
 399 {
 400         objset_t *os;
 401         dmu_replay_record_t *drr;
 402         dmu_sendarg_t *dsp;
 403         int err;
 404         uint64_t fromtxg = 0;
 405 
 406         if (fromds != NULL && !dsl_dataset_is_before(ds, fromds)) {
 407                 dsl_dataset_rele(fromds, tag);
 408                 dsl_dataset_rele(ds, tag);
 409                 dsl_pool_rele(dp, tag);
 410                 return (SET_ERROR(EXDEV));
 411         }
 412 
 413         err = dmu_objset_from_ds(ds, &os);
 414         if (err != 0) {
 415                 if (fromds != NULL)
 416                         dsl_dataset_rele(fromds, tag);
 417                 dsl_dataset_rele(ds, tag);
 418                 dsl_pool_rele(dp, tag);
 419                 return (err);
 420         }
 421 
 422         drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 423         drr->drr_type = DRR_BEGIN;
 424         drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
 425         DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
 426             DMU_SUBSTREAM);
 427 
 428 #ifdef _KERNEL
 429         if (dmu_objset_type(os) == DMU_OST_ZFS) {
 430                 uint64_t version;
 431                 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &version) != 0) {
 432                         kmem_free(drr, sizeof (dmu_replay_record_t));
 433                         if (fromds != NULL)
 434                                 dsl_dataset_rele(fromds, tag);
 435                         dsl_dataset_rele(ds, tag);
 436                         dsl_pool_rele(dp, tag);
 437                         return (SET_ERROR(EINVAL));
 438                 }
 439                 if (version >= ZPL_VERSION_SA) {
 440                         DMU_SET_FEATUREFLAGS(
 441                             drr->drr_u.drr_begin.drr_versioninfo,
 442                             DMU_BACKUP_FEATURE_SA_SPILL);
 443                 }
 444         }
 445 #endif
 446 
 447         drr->drr_u.drr_begin.drr_creation_time =
 448             ds->ds_phys->ds_creation_time;
 449         drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
 450         if (fromds != NULL && ds->ds_dir != fromds->ds_dir)
 451                 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
 452         drr->drr_u.drr_begin.drr_toguid = ds->ds_phys->ds_guid;
 453         if (ds->ds_phys->ds_flags & DS_FLAG_CI_DATASET)
 454                 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
 455 
 456         if (fromds != NULL)
 457                 drr->drr_u.drr_begin.drr_fromguid = fromds->ds_phys->ds_guid;
 458         dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
 459 
 460         if (fromds != NULL) {
 461                 fromtxg = fromds->ds_phys->ds_creation_txg;
 462                 dsl_dataset_rele(fromds, tag);
 463                 fromds = NULL;
 464         }
 465 
 466         dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
 467 
 468         dsp->dsa_drr = drr;
 469         dsp->dsa_vp = vp;
 470         dsp->dsa_outfd = outfd;
 471         dsp->dsa_proc = curproc;
 472         dsp->dsa_os = os;
 473         dsp->dsa_off = off;
 474         dsp->dsa_toguid = ds->ds_phys->ds_guid;
 475         ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
 476         dsp->dsa_pending_op = PENDING_NONE;
 477 
 478         mutex_enter(&ds->ds_sendstream_lock);
 479         list_insert_head(&ds->ds_sendstreams, dsp);
 480         mutex_exit(&ds->ds_sendstream_lock);
 481 
 482         dsl_dataset_long_hold(ds, FTAG);
 483         dsl_pool_rele(dp, tag);
 484 
 485         if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
 486                 err = dsp->dsa_err;
 487                 goto out;
 488         }
 489 
 490         err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
 491             backup_cb, dsp);
 492 
 493         if (dsp->dsa_pending_op != PENDING_NONE)
 494                 if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
 495                         err = SET_ERROR(EINTR);
 496 
 497         if (err != 0) {
 498                 if (err == EINTR && dsp->dsa_err != 0)
 499                         err = dsp->dsa_err;
 500                 goto out;
 501         }
 502 
 503         bzero(drr, sizeof (dmu_replay_record_t));
 504         drr->drr_type = DRR_END;
 505         drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
 506         drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
 507 
 508         if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
 509                 err = dsp->dsa_err;
 510                 goto out;
 511         }
 512 
 513 out:
 514         mutex_enter(&ds->ds_sendstream_lock);
 515         list_remove(&ds->ds_sendstreams, dsp);
 516         mutex_exit(&ds->ds_sendstream_lock);
 517 
 518         kmem_free(drr, sizeof (dmu_replay_record_t));
 519         kmem_free(dsp, sizeof (dmu_sendarg_t));
 520 
 521         dsl_dataset_long_rele(ds, FTAG);
 522         dsl_dataset_rele(ds, tag);
 523 
 524         return (err);
 525 }
 526 
 527 int
 528 dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
 529     int outfd, vnode_t *vp, offset_t *off)
 530 {
 531         dsl_pool_t *dp;
 532         dsl_dataset_t *ds;
 533         dsl_dataset_t *fromds = NULL;
 534         int err;
 535 
 536         err = dsl_pool_hold(pool, FTAG, &dp);
 537         if (err != 0)
 538                 return (err);
 539 
 540         err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds);
 541         if (err != 0) {
 542                 dsl_pool_rele(dp, FTAG);
 543                 return (err);
 544         }
 545 
 546         if (fromsnap != 0) {
 547                 err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds);
 548                 if (err != 0) {
 549                         dsl_dataset_rele(ds, FTAG);
 550                         dsl_pool_rele(dp, FTAG);
 551                         return (err);
 552                 }
 553         }
 554 
 555         return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, vp, off));
 556 }
 557 
 558 int
 559 dmu_send(const char *tosnap, const char *fromsnap,
 560     int outfd, vnode_t *vp, offset_t *off)
 561 {
 562         dsl_pool_t *dp;
 563         dsl_dataset_t *ds;
 564         dsl_dataset_t *fromds = NULL;
 565         int err;
 566 
 567         if (strchr(tosnap, '@') == NULL)
 568                 return (SET_ERROR(EINVAL));
 569         if (fromsnap != NULL && strchr(fromsnap, '@') == NULL)
 570                 return (SET_ERROR(EINVAL));
 571 
 572         err = dsl_pool_hold(tosnap, FTAG, &dp);
 573         if (err != 0)
 574                 return (err);
 575 
 576         err = dsl_dataset_hold(dp, tosnap, FTAG, &ds);
 577         if (err != 0) {
 578                 dsl_pool_rele(dp, FTAG);
 579                 return (err);
 580         }
 581 
 582         if (fromsnap != NULL) {
 583                 err = dsl_dataset_hold(dp, fromsnap, FTAG, &fromds);
 584                 if (err != 0) {
 585                         dsl_dataset_rele(ds, FTAG);
 586                         dsl_pool_rele(dp, FTAG);
 587                         return (err);
 588                 }
 589         }
 590         return (dmu_send_impl(FTAG, dp, ds, fromds, outfd, vp, off));
 591 }
 592 
 593 int
 594 dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
 595 {
 596         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 597         int err;
 598         uint64_t size;
 599 
 600         ASSERT(dsl_pool_config_held(dp));
 601 
 602         /* tosnap must be a snapshot */
 603         if (!dsl_dataset_is_snapshot(ds))
 604                 return (SET_ERROR(EINVAL));
 605 
 606         /*
 607          * fromsnap must be an earlier snapshot from the same fs as tosnap,
 608          * or the origin's fs.
 609          */
 610         if (fromds != NULL && !dsl_dataset_is_before(ds, fromds))
 611                 return (SET_ERROR(EXDEV));
 612 
 613         /* Get uncompressed size estimate of changed data. */
 614         if (fromds == NULL) {
 615                 size = ds->ds_phys->ds_uncompressed_bytes;
 616         } else {
 617                 uint64_t used, comp;
 618                 err = dsl_dataset_space_written(fromds, ds,
 619                     &used, &comp, &size);
 620                 if (err != 0)
 621                         return (err);
 622         }
 623 
 624         /*
 625          * Assume that space (both on-disk and in-stream) is dominated by
 626          * data.  We will adjust for indirect blocks and the copies property,
 627          * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
 628          */
 629 
 630         /*
 631          * Subtract out approximate space used by indirect blocks.
 632          * Assume most space is used by data blocks (non-indirect, non-dnode).
 633          * Assume all blocks are recordsize.  Assume ditto blocks and
 634          * internal fragmentation counter out compression.
 635          *
 636          * Therefore, space used by indirect blocks is sizeof(blkptr_t) per
 637          * block, which we observe in practice.
 638          */
 639         uint64_t recordsize;
 640         err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
 641         if (err != 0)
 642                 return (err);
 643         size -= size / recordsize * sizeof (blkptr_t);
 644 
 645         /* Add in the space for the record associated with each block. */
 646         size += size / recordsize * sizeof (dmu_replay_record_t);
 647 
 648         *sizep = size;
 649 
 650         return (0);
 651 }
 652 
 653 typedef struct dmu_recv_begin_arg {
 654         const char *drba_origin;
 655         dmu_recv_cookie_t *drba_cookie;
 656         cred_t *drba_cred;
 657         uint64_t drba_snapobj;
 658 } dmu_recv_begin_arg_t;
 659 
 660 static int
 661 recv_begin_check_existing_impl(dmu_recv_begin_arg_t *drba, dsl_dataset_t *ds,
 662     uint64_t fromguid)
 663 {
 664         uint64_t val;
 665         int error;
 666         dsl_pool_t *dp = ds->ds_dir->dd_pool;
 667 
 668         /* temporary clone name must not exist */
 669         error = zap_lookup(dp->dp_meta_objset,
 670             ds->ds_dir->dd_phys->dd_child_dir_zapobj, recv_clone_name,
 671             8, 1, &val);
 672         if (error != ENOENT)
 673                 return (error == 0 ? EBUSY : error);
 674 
 675         /* new snapshot name must not exist */
 676         error = zap_lookup(dp->dp_meta_objset,
 677             ds->ds_phys->ds_snapnames_zapobj, drba->drba_cookie->drc_tosnap,
 678             8, 1, &val);
 679         if (error != ENOENT)
 680                 return (error == 0 ? EEXIST : error);
 681 
 682         if (fromguid != 0) {
 683                 dsl_dataset_t *snap;
 684                 uint64_t obj = ds->ds_phys->ds_prev_snap_obj;
 685 
 686                 /* Find snapshot in this dir that matches fromguid. */
 687                 while (obj != 0) {
 688                         error = dsl_dataset_hold_obj(dp, obj, FTAG,
 689                             &snap);
 690                         if (error != 0)
 691                                 return (SET_ERROR(ENODEV));
 692                         if (snap->ds_dir != ds->ds_dir) {
 693                                 dsl_dataset_rele(snap, FTAG);
 694                                 return (SET_ERROR(ENODEV));
 695                         }
 696                         if (snap->ds_phys->ds_guid == fromguid)
 697                                 break;
 698                         obj = snap->ds_phys->ds_prev_snap_obj;
 699                         dsl_dataset_rele(snap, FTAG);
 700                 }
 701                 if (obj == 0)
 702                         return (SET_ERROR(ENODEV));
 703 
 704                 if (drba->drba_cookie->drc_force) {
 705                         drba->drba_snapobj = obj;
 706                 } else {
 707                         /*
 708                          * If we are not forcing, there must be no
 709                          * changes since fromsnap.
 710                          */
 711                         if (dsl_dataset_modified_since_snap(ds, snap)) {
 712                                 dsl_dataset_rele(snap, FTAG);
 713                                 return (SET_ERROR(ETXTBSY));
 714                         }
 715                         drba->drba_snapobj = ds->ds_prev->ds_object;
 716                 }
 717 
 718                 dsl_dataset_rele(snap, FTAG);
 719         } else {
 720                 /* if full, most recent snapshot must be $ORIGIN */
 721                 if (ds->ds_phys->ds_prev_snap_txg >= TXG_INITIAL)
 722                         return (SET_ERROR(ENODEV));
 723                 drba->drba_snapobj = ds->ds_phys->ds_prev_snap_obj;
 724         }
 725 
 726         return (0);
 727 
 728 }
 729 
 730 static int
 731 dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
 732 {
 733         dmu_recv_begin_arg_t *drba = arg;
 734         dsl_pool_t *dp = dmu_tx_pool(tx);
 735         struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
 736         uint64_t fromguid = drrb->drr_fromguid;
 737         int flags = drrb->drr_flags;
 738         int error;
 739         dsl_dataset_t *ds;
 740         const char *tofs = drba->drba_cookie->drc_tofs;
 741 
 742         /* already checked */
 743         ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
 744 
 745         if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
 746             DMU_COMPOUNDSTREAM ||
 747             drrb->drr_type >= DMU_OST_NUMTYPES ||
 748             ((flags & DRR_FLAG_CLONE) && drba->drba_origin == NULL))
 749                 return (SET_ERROR(EINVAL));
 750 
 751         /* Verify pool version supports SA if SA_SPILL feature set */
 752         if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
 753             DMU_BACKUP_FEATURE_SA_SPILL) &&
 754             spa_version(dp->dp_spa) < SPA_VERSION_SA) {
 755                 return (SET_ERROR(ENOTSUP));
 756         }
 757 
 758         error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
 759         if (error == 0) {
 760                 /* target fs already exists; recv into temp clone */
 761 
 762                 /* Can't recv a clone into an existing fs */
 763                 if (flags & DRR_FLAG_CLONE) {
 764                         dsl_dataset_rele(ds, FTAG);
 765                         return (SET_ERROR(EINVAL));
 766                 }
 767 
 768                 error = recv_begin_check_existing_impl(drba, ds, fromguid);
 769                 dsl_dataset_rele(ds, FTAG);
 770         } else if (error == ENOENT) {
 771                 /* target fs does not exist; must be a full backup or clone */
 772                 char buf[MAXNAMELEN];
 773 
 774                 /*
 775                  * If it's a non-clone incremental, we are missing the
 776                  * target fs, so fail the recv.
 777                  */
 778                 if (fromguid != 0 && !(flags & DRR_FLAG_CLONE))
 779                         return (SET_ERROR(ENOENT));
 780 
 781                 /* Open the parent of tofs */
 782                 ASSERT3U(strlen(tofs), <, MAXNAMELEN);
 783                 (void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1);
 784                 error = dsl_dataset_hold(dp, buf, FTAG, &ds);
 785                 if (error != 0)
 786                         return (error);
 787 
 788                 if (drba->drba_origin != NULL) {
 789                         dsl_dataset_t *origin;
 790                         error = dsl_dataset_hold(dp, drba->drba_origin,
 791                             FTAG, &origin);
 792                         if (error != 0) {
 793                                 dsl_dataset_rele(ds, FTAG);
 794                                 return (error);
 795                         }
 796                         if (!dsl_dataset_is_snapshot(origin)) {
 797                                 dsl_dataset_rele(origin, FTAG);
 798                                 dsl_dataset_rele(ds, FTAG);
 799                                 return (SET_ERROR(EINVAL));
 800                         }
 801                         if (origin->ds_phys->ds_guid != fromguid) {
 802                                 dsl_dataset_rele(origin, FTAG);
 803                                 dsl_dataset_rele(ds, FTAG);
 804                                 return (SET_ERROR(ENODEV));
 805                         }
 806                         dsl_dataset_rele(origin, FTAG);
 807                 }
 808                 dsl_dataset_rele(ds, FTAG);
 809                 error = 0;
 810         }
 811         return (error);
 812 }
 813 
 814 static void
 815 dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
 816 {
 817         dmu_recv_begin_arg_t *drba = arg;
 818         dsl_pool_t *dp = dmu_tx_pool(tx);
 819         struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
 820         const char *tofs = drba->drba_cookie->drc_tofs;
 821         dsl_dataset_t *ds, *newds;
 822         uint64_t dsobj;
 823         int error;
 824         uint64_t crflags;
 825 
 826         crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ?
 827             DS_FLAG_CI_DATASET : 0;
 828 
 829         error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
 830         if (error == 0) {
 831                 /* create temporary clone */
 832                 dsl_dataset_t *snap = NULL;
 833                 if (drba->drba_snapobj != 0) {
 834                         VERIFY0(dsl_dataset_hold_obj(dp,
 835                             drba->drba_snapobj, FTAG, &snap));
 836                 }
 837                 dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name,
 838                     snap, crflags, drba->drba_cred, tx);
 839                 dsl_dataset_rele(snap, FTAG);
 840                 dsl_dataset_rele(ds, FTAG);
 841         } else {
 842                 dsl_dir_t *dd;
 843                 const char *tail;
 844                 dsl_dataset_t *origin = NULL;
 845 
 846                 VERIFY0(dsl_dir_hold(dp, tofs, FTAG, &dd, &tail));
 847 
 848                 if (drba->drba_origin != NULL) {
 849                         VERIFY0(dsl_dataset_hold(dp, drba->drba_origin,
 850                             FTAG, &origin));
 851                 }
 852 
 853                 /* Create new dataset. */
 854                 dsobj = dsl_dataset_create_sync(dd,
 855                     strrchr(tofs, '/') + 1,
 856                     origin, crflags, drba->drba_cred, tx);
 857                 if (origin != NULL)
 858                         dsl_dataset_rele(origin, FTAG);
 859                 dsl_dir_rele(dd, FTAG);
 860                 drba->drba_cookie->drc_newfs = B_TRUE;
 861         }
 862         VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
 863 
 864         dmu_buf_will_dirty(newds->ds_dbuf, tx);
 865         newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
 866 
 867         /*
 868          * If we actually created a non-clone, we need to create the
 869          * objset in our new dataset.
 870          */
 871         if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) {
 872                 (void) dmu_objset_create_impl(dp->dp_spa,
 873                     newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx);
 874         }
 875 
 876         drba->drba_cookie->drc_ds = newds;
 877 
 878         spa_history_log_internal_ds(newds, "receive", tx, "");
 879 }
 880 
 881 /*
 882  * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
 883  * succeeds; otherwise we will leak the holds on the datasets.
 884  */
 885 int
 886 dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
 887     boolean_t force, char *origin, dmu_recv_cookie_t *drc)
 888 {
 889         dmu_recv_begin_arg_t drba = { 0 };
 890         dmu_replay_record_t *drr;
 891 
 892         bzero(drc, sizeof (dmu_recv_cookie_t));
 893         drc->drc_drrb = drrb;
 894         drc->drc_tosnap = tosnap;
 895         drc->drc_tofs = tofs;
 896         drc->drc_force = force;
 897 
 898         if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
 899                 drc->drc_byteswap = B_TRUE;
 900         else if (drrb->drr_magic != DMU_BACKUP_MAGIC)
 901                 return (SET_ERROR(EINVAL));
 902 
 903         drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 904         drr->drr_type = DRR_BEGIN;
 905         drr->drr_u.drr_begin = *drc->drc_drrb;
 906         if (drc->drc_byteswap) {
 907                 fletcher_4_incremental_byteswap(drr,
 908                     sizeof (dmu_replay_record_t), &drc->drc_cksum);
 909         } else {
 910                 fletcher_4_incremental_native(drr,
 911                     sizeof (dmu_replay_record_t), &drc->drc_cksum);
 912         }
 913         kmem_free(drr, sizeof (dmu_replay_record_t));
 914 
 915         if (drc->drc_byteswap) {
 916                 drrb->drr_magic = BSWAP_64(drrb->drr_magic);
 917                 drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
 918                 drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
 919                 drrb->drr_type = BSWAP_32(drrb->drr_type);
 920                 drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
 921                 drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
 922         }
 923 
 924         drba.drba_origin = origin;
 925         drba.drba_cookie = drc;
 926         drba.drba_cred = CRED();
 927 
 928         return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync,
 929             &drba, 5));
 930 }
 931 
 932 struct restorearg {
 933         int err;
 934         boolean_t byteswap;
 935         vnode_t *vp;
 936         char *buf;
 937         uint64_t voff;
 938         int bufsize; /* amount of memory allocated for buf */
 939         zio_cksum_t cksum;
 940         avl_tree_t *guid_to_ds_map;
 941 };
 942 
 943 typedef struct guid_map_entry {
 944         uint64_t        guid;
 945         dsl_dataset_t   *gme_ds;
 946         avl_node_t      avlnode;
 947 } guid_map_entry_t;
 948 
 949 static int
 950 guid_compare(const void *arg1, const void *arg2)
 951 {
 952         const guid_map_entry_t *gmep1 = arg1;
 953         const guid_map_entry_t *gmep2 = arg2;
 954 
 955         if (gmep1->guid < gmep2->guid)
 956                 return (-1);
 957         else if (gmep1->guid > gmep2->guid)
 958                 return (1);
 959         return (0);
 960 }
 961 
 962 static void
 963 free_guid_map_onexit(void *arg)
 964 {
 965         avl_tree_t *ca = arg;
 966         void *cookie = NULL;
 967         guid_map_entry_t *gmep;
 968 
 969         while ((gmep = avl_destroy_nodes(ca, &cookie)) != NULL) {
 970                 dsl_dataset_long_rele(gmep->gme_ds, gmep);
 971                 dsl_dataset_rele(gmep->gme_ds, gmep);
 972                 kmem_free(gmep, sizeof (guid_map_entry_t));
 973         }
 974         avl_destroy(ca);
 975         kmem_free(ca, sizeof (avl_tree_t));
 976 }
 977 
 978 static void *
 979 restore_read(struct restorearg *ra, int len)
 980 {
 981         void *rv;
 982         int done = 0;
 983 
 984         /* some things will require 8-byte alignment, so everything must */
 985         ASSERT0(len % 8);
 986 
 987         while (done < len) {
 988                 ssize_t resid;
 989 
 990                 ra->err = vn_rdwr(UIO_READ, ra->vp,
 991                     (caddr_t)ra->buf + done, len - done,
 992                     ra->voff, UIO_SYSSPACE, FAPPEND,
 993                     RLIM64_INFINITY, CRED(), &resid);
 994 
 995                 if (resid == len - done)
 996                         ra->err = SET_ERROR(EINVAL);
 997                 ra->voff += len - done - resid;
 998                 done = len - resid;
 999                 if (ra->err != 0)
1000                         return (NULL);
1001         }
1002 
1003         ASSERT3U(done, ==, len);
1004         rv = ra->buf;
1005         if (ra->byteswap)
1006                 fletcher_4_incremental_byteswap(rv, len, &ra->cksum);
1007         else
1008                 fletcher_4_incremental_native(rv, len, &ra->cksum);
1009         return (rv);
1010 }
1011 
1012 static void
1013 backup_byteswap(dmu_replay_record_t *drr)
1014 {
1015 #define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
1016 #define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
1017         drr->drr_type = BSWAP_32(drr->drr_type);
1018         drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
1019         switch (drr->drr_type) {
1020         case DRR_BEGIN:
1021                 DO64(drr_begin.drr_magic);
1022                 DO64(drr_begin.drr_versioninfo);
1023                 DO64(drr_begin.drr_creation_time);
1024                 DO32(drr_begin.drr_type);
1025                 DO32(drr_begin.drr_flags);
1026                 DO64(drr_begin.drr_toguid);
1027                 DO64(drr_begin.drr_fromguid);
1028                 break;
1029         case DRR_OBJECT:
1030                 DO64(drr_object.drr_object);
1031                 /* DO64(drr_object.drr_allocation_txg); */
1032                 DO32(drr_object.drr_type);
1033                 DO32(drr_object.drr_bonustype);
1034                 DO32(drr_object.drr_blksz);
1035                 DO32(drr_object.drr_bonuslen);
1036                 DO64(drr_object.drr_toguid);
1037                 break;
1038         case DRR_FREEOBJECTS:
1039                 DO64(drr_freeobjects.drr_firstobj);
1040                 DO64(drr_freeobjects.drr_numobjs);
1041                 DO64(drr_freeobjects.drr_toguid);
1042                 break;
1043         case DRR_WRITE:
1044                 DO64(drr_write.drr_object);
1045                 DO32(drr_write.drr_type);
1046                 DO64(drr_write.drr_offset);
1047                 DO64(drr_write.drr_length);
1048                 DO64(drr_write.drr_toguid);
1049                 DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
1050                 DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
1051                 DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
1052                 DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
1053                 DO64(drr_write.drr_key.ddk_prop);
1054                 break;
1055         case DRR_WRITE_BYREF:
1056                 DO64(drr_write_byref.drr_object);
1057                 DO64(drr_write_byref.drr_offset);
1058                 DO64(drr_write_byref.drr_length);
1059                 DO64(drr_write_byref.drr_toguid);
1060                 DO64(drr_write_byref.drr_refguid);
1061                 DO64(drr_write_byref.drr_refobject);
1062                 DO64(drr_write_byref.drr_refoffset);
1063                 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
1064                 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
1065                 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
1066                 DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
1067                 DO64(drr_write_byref.drr_key.ddk_prop);
1068                 break;
1069         case DRR_FREE:
1070                 DO64(drr_free.drr_object);
1071                 DO64(drr_free.drr_offset);
1072                 DO64(drr_free.drr_length);
1073                 DO64(drr_free.drr_toguid);
1074                 break;
1075         case DRR_SPILL:
1076                 DO64(drr_spill.drr_object);
1077                 DO64(drr_spill.drr_length);
1078                 DO64(drr_spill.drr_toguid);
1079                 break;
1080         case DRR_END:
1081                 DO64(drr_end.drr_checksum.zc_word[0]);
1082                 DO64(drr_end.drr_checksum.zc_word[1]);
1083                 DO64(drr_end.drr_checksum.zc_word[2]);
1084                 DO64(drr_end.drr_checksum.zc_word[3]);
1085                 DO64(drr_end.drr_toguid);
1086                 break;
1087         }
1088 #undef DO64
1089 #undef DO32
1090 }
1091 
1092 static int
1093 restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
1094 {
1095         int err;
1096         dmu_tx_t *tx;
1097         void *data = NULL;
1098 
1099         if (drro->drr_type == DMU_OT_NONE ||
1100             !DMU_OT_IS_VALID(drro->drr_type) ||
1101             !DMU_OT_IS_VALID(drro->drr_bonustype) ||
1102             drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
1103             drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
1104             P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
1105             drro->drr_blksz < SPA_MINBLOCKSIZE ||
1106             drro->drr_blksz > SPA_MAXBLOCKSIZE ||
1107             drro->drr_bonuslen > DN_MAX_BONUSLEN) {
1108                 return (SET_ERROR(EINVAL));
1109         }
1110 
1111         err = dmu_object_info(os, drro->drr_object, NULL);
1112 
1113         if (err != 0 && err != ENOENT)
1114                 return (SET_ERROR(EINVAL));
1115 
1116         if (drro->drr_bonuslen) {
1117                 data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8));
1118                 if (ra->err != 0)
1119                         return (ra->err);
1120         }
1121 
1122         if (err == ENOENT) {
1123                 /* currently free, want to be allocated */
1124                 tx = dmu_tx_create(os);
1125                 dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
1126                 err = dmu_tx_assign(tx, TXG_WAIT);
1127                 if (err != 0) {
1128                         dmu_tx_abort(tx);
1129                         return (err);
1130                 }
1131                 err = dmu_object_claim(os, drro->drr_object,
1132                     drro->drr_type, drro->drr_blksz,
1133                     drro->drr_bonustype, drro->drr_bonuslen, tx);
1134                 dmu_tx_commit(tx);
1135         } else {
1136                 /* currently allocated, want to be allocated */
1137                 err = dmu_object_reclaim(os, drro->drr_object,
1138                     drro->drr_type, drro->drr_blksz,
1139                     drro->drr_bonustype, drro->drr_bonuslen);
1140         }
1141         if (err != 0) {
1142                 return (SET_ERROR(EINVAL));
1143         }
1144 
1145         tx = dmu_tx_create(os);
1146         dmu_tx_hold_bonus(tx, drro->drr_object);
1147         err = dmu_tx_assign(tx, TXG_WAIT);
1148         if (err != 0) {
1149                 dmu_tx_abort(tx);
1150                 return (err);
1151         }
1152 
1153         dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
1154             tx);
1155         dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
1156 
1157         if (data != NULL) {
1158                 dmu_buf_t *db;
1159 
1160                 VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
1161                 dmu_buf_will_dirty(db, tx);
1162 
1163                 ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
1164                 bcopy(data, db->db_data, drro->drr_bonuslen);
1165                 if (ra->byteswap) {
1166                         dmu_object_byteswap_t byteswap =
1167                             DMU_OT_BYTESWAP(drro->drr_bonustype);
1168                         dmu_ot_byteswap[byteswap].ob_func(db->db_data,
1169                             drro->drr_bonuslen);
1170                 }
1171                 dmu_buf_rele(db, FTAG);
1172         }
1173         dmu_tx_commit(tx);
1174         return (0);
1175 }
1176 
1177 /* ARGSUSED */
1178 static int
1179 restore_freeobjects(struct restorearg *ra, objset_t *os,
1180     struct drr_freeobjects *drrfo)
1181 {
1182         uint64_t obj;
1183 
1184         if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
1185                 return (SET_ERROR(EINVAL));
1186 
1187         for (obj = drrfo->drr_firstobj;
1188             obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
1189             (void) dmu_object_next(os, &obj, FALSE, 0)) {
1190                 int err;
1191 
1192                 if (dmu_object_info(os, obj, NULL) != 0)
1193                         continue;
1194 
1195                 err = dmu_free_object(os, obj);
1196                 if (err != 0)
1197                         return (err);
1198         }
1199         return (0);
1200 }
1201 
1202 static int
1203 restore_write(struct restorearg *ra, objset_t *os,
1204     struct drr_write *drrw)
1205 {
1206         dmu_tx_t *tx;
1207         void *data;
1208         int err;
1209 
1210         if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
1211             !DMU_OT_IS_VALID(drrw->drr_type))
1212                 return (SET_ERROR(EINVAL));
1213 
1214         data = restore_read(ra, drrw->drr_length);
1215         if (data == NULL)
1216                 return (ra->err);
1217 
1218         if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
1219                 return (SET_ERROR(EINVAL));
1220 
1221         tx = dmu_tx_create(os);
1222 
1223         dmu_tx_hold_write(tx, drrw->drr_object,
1224             drrw->drr_offset, drrw->drr_length);
1225         err = dmu_tx_assign(tx, TXG_WAIT);
1226         if (err != 0) {
1227                 dmu_tx_abort(tx);
1228                 return (err);
1229         }
1230         if (ra->byteswap) {
1231                 dmu_object_byteswap_t byteswap =
1232                     DMU_OT_BYTESWAP(drrw->drr_type);
1233                 dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
1234         }
1235         dmu_write(os, drrw->drr_object,
1236             drrw->drr_offset, drrw->drr_length, data, tx);
1237         dmu_tx_commit(tx);
1238         return (0);
1239 }
1240 
1241 /*
1242  * Handle a DRR_WRITE_BYREF record.  This record is used in dedup'ed
1243  * streams to refer to a copy of the data that is already on the
1244  * system because it came in earlier in the stream.  This function
1245  * finds the earlier copy of the data, and uses that copy instead of
1246  * data from the stream to fulfill this write.
1247  */
1248 static int
1249 restore_write_byref(struct restorearg *ra, objset_t *os,
1250     struct drr_write_byref *drrwbr)
1251 {
1252         dmu_tx_t *tx;
1253         int err;
1254         guid_map_entry_t gmesrch;
1255         guid_map_entry_t *gmep;
1256         avl_index_t     where;
1257         objset_t *ref_os = NULL;
1258         dmu_buf_t *dbp;
1259 
1260         if (drrwbr->drr_offset + drrwbr->drr_length < drrwbr->drr_offset)
1261                 return (SET_ERROR(EINVAL));
1262 
1263         /*
1264          * If the GUID of the referenced dataset is different from the
1265          * GUID of the target dataset, find the referenced dataset.
1266          */
1267         if (drrwbr->drr_toguid != drrwbr->drr_refguid) {
1268                 gmesrch.guid = drrwbr->drr_refguid;
1269                 if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch,
1270                     &where)) == NULL) {
1271                         return (SET_ERROR(EINVAL));
1272                 }
1273                 if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
1274                         return (SET_ERROR(EINVAL));
1275         } else {
1276                 ref_os = os;
1277         }
1278 
1279         if (err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
1280             drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH))
1281                 return (err);
1282 
1283         tx = dmu_tx_create(os);
1284 
1285         dmu_tx_hold_write(tx, drrwbr->drr_object,
1286             drrwbr->drr_offset, drrwbr->drr_length);
1287         err = dmu_tx_assign(tx, TXG_WAIT);
1288         if (err != 0) {
1289                 dmu_tx_abort(tx);
1290                 return (err);
1291         }
1292         dmu_write(os, drrwbr->drr_object,
1293             drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
1294         dmu_buf_rele(dbp, FTAG);
1295         dmu_tx_commit(tx);
1296         return (0);
1297 }
1298 
1299 static int
1300 restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
1301 {
1302         dmu_tx_t *tx;
1303         void *data;
1304         dmu_buf_t *db, *db_spill;
1305         int err;
1306 
1307         if (drrs->drr_length < SPA_MINBLOCKSIZE ||
1308             drrs->drr_length > SPA_MAXBLOCKSIZE)
1309                 return (SET_ERROR(EINVAL));
1310 
1311         data = restore_read(ra, drrs->drr_length);
1312         if (data == NULL)
1313                 return (ra->err);
1314 
1315         if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
1316                 return (SET_ERROR(EINVAL));
1317 
1318         VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
1319         if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
1320                 dmu_buf_rele(db, FTAG);
1321                 return (err);
1322         }
1323 
1324         tx = dmu_tx_create(os);
1325 
1326         dmu_tx_hold_spill(tx, db->db_object);
1327 
1328         err = dmu_tx_assign(tx, TXG_WAIT);
1329         if (err != 0) {
1330                 dmu_buf_rele(db, FTAG);
1331                 dmu_buf_rele(db_spill, FTAG);
1332                 dmu_tx_abort(tx);
1333                 return (err);
1334         }
1335         dmu_buf_will_dirty(db_spill, tx);
1336 
1337         if (db_spill->db_size < drrs->drr_length)
1338                 VERIFY(0 == dbuf_spill_set_blksz(db_spill,
1339                     drrs->drr_length, tx));
1340         bcopy(data, db_spill->db_data, drrs->drr_length);
1341 
1342         dmu_buf_rele(db, FTAG);
1343         dmu_buf_rele(db_spill, FTAG);
1344 
1345         dmu_tx_commit(tx);
1346         return (0);
1347 }
1348 
1349 /* ARGSUSED */
1350 static int
1351 restore_free(struct restorearg *ra, objset_t *os,
1352     struct drr_free *drrf)
1353 {
1354         int err;
1355 
1356         if (drrf->drr_length != -1ULL &&
1357             drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
1358                 return (SET_ERROR(EINVAL));
1359 
1360         if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
1361                 return (SET_ERROR(EINVAL));
1362 
1363         err = dmu_free_long_range(os, drrf->drr_object,
1364             drrf->drr_offset, drrf->drr_length);
1365         return (err);
1366 }
1367 
1368 /* used to destroy the drc_ds on error */
1369 static void
1370 dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
1371 {
1372         char name[MAXNAMELEN];
1373         dsl_dataset_name(drc->drc_ds, name);
1374         dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
1375         (void) dsl_destroy_head(name);
1376 }
1377 
1378 /*
1379  * NB: callers *must* call dmu_recv_end() if this succeeds.
1380  */
1381 int
1382 dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
1383     int cleanup_fd, uint64_t *action_handlep)
1384 {
1385         struct restorearg ra = { 0 };
1386         dmu_replay_record_t *drr;
1387         objset_t *os;
1388         zio_cksum_t pcksum;
1389         int featureflags;
1390 
1391         ra.byteswap = drc->drc_byteswap;
1392         ra.cksum = drc->drc_cksum;
1393         ra.vp = vp;
1394         ra.voff = *voffp;
1395         ra.bufsize = 1<<20;
1396         ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
1397 
1398         /* these were verified in dmu_recv_begin */
1399         ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
1400             DMU_SUBSTREAM);
1401         ASSERT3U(drc->drc_drrb->drr_type, <, DMU_OST_NUMTYPES);
1402 
1403         /*
1404          * Open the objset we are modifying.
1405          */
1406         VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os));
1407 
1408         ASSERT(drc->drc_ds->ds_phys->ds_flags & DS_FLAG_INCONSISTENT);
1409 
1410         featureflags = DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo);
1411 
1412         /* if this stream is dedup'ed, set up the avl tree for guid mapping */
1413         if (featureflags & DMU_BACKUP_FEATURE_DEDUP) {
1414                 minor_t minor;
1415 
1416                 if (cleanup_fd == -1) {
1417                         ra.err = SET_ERROR(EBADF);
1418                         goto out;
1419                 }
1420                 ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
1421                 if (ra.err != 0) {
1422                         cleanup_fd = -1;
1423                         goto out;
1424                 }
1425 
1426                 if (*action_handlep == 0) {
1427                         ra.guid_to_ds_map =
1428                             kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
1429                         avl_create(ra.guid_to_ds_map, guid_compare,
1430                             sizeof (guid_map_entry_t),
1431                             offsetof(guid_map_entry_t, avlnode));
1432                         ra.err = zfs_onexit_add_cb(minor,
1433                             free_guid_map_onexit, ra.guid_to_ds_map,
1434                             action_handlep);
1435                         if (ra.err != 0)
1436                                 goto out;
1437                 } else {
1438                         ra.err = zfs_onexit_cb_data(minor, *action_handlep,
1439                             (void **)&ra.guid_to_ds_map);
1440                         if (ra.err != 0)
1441                                 goto out;
1442                 }
1443 
1444                 drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
1445         }
1446 
1447         /*
1448          * Read records and process them.
1449          */
1450         pcksum = ra.cksum;
1451         while (ra.err == 0 &&
1452             NULL != (drr = restore_read(&ra, sizeof (*drr)))) {
1453                 if (issig(JUSTLOOKING) && issig(FORREAL)) {
1454                         ra.err = SET_ERROR(EINTR);
1455                         goto out;
1456                 }
1457 
1458                 if (ra.byteswap)
1459                         backup_byteswap(drr);
1460 
1461                 switch (drr->drr_type) {
1462                 case DRR_OBJECT:
1463                 {
1464                         /*
1465                          * We need to make a copy of the record header,
1466                          * because restore_{object,write} may need to
1467                          * restore_read(), which will invalidate drr.
1468                          */
1469                         struct drr_object drro = drr->drr_u.drr_object;
1470                         ra.err = restore_object(&ra, os, &drro);
1471                         break;
1472                 }
1473                 case DRR_FREEOBJECTS:
1474                 {
1475                         struct drr_freeobjects drrfo =
1476                             drr->drr_u.drr_freeobjects;
1477                         ra.err = restore_freeobjects(&ra, os, &drrfo);
1478                         break;
1479                 }
1480                 case DRR_WRITE:
1481                 {
1482                         struct drr_write drrw = drr->drr_u.drr_write;
1483                         ra.err = restore_write(&ra, os, &drrw);
1484                         break;
1485                 }
1486                 case DRR_WRITE_BYREF:
1487                 {
1488                         struct drr_write_byref drrwbr =
1489                             drr->drr_u.drr_write_byref;
1490                         ra.err = restore_write_byref(&ra, os, &drrwbr);
1491                         break;
1492                 }
1493                 case DRR_FREE:
1494                 {
1495                         struct drr_free drrf = drr->drr_u.drr_free;
1496                         ra.err = restore_free(&ra, os, &drrf);
1497                         break;
1498                 }
1499                 case DRR_END:
1500                 {
1501                         struct drr_end drre = drr->drr_u.drr_end;
1502                         /*
1503                          * We compare against the *previous* checksum
1504                          * value, because the stored checksum is of
1505                          * everything before the DRR_END record.
1506                          */
1507                         if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
1508                                 ra.err = SET_ERROR(ECKSUM);
1509                         goto out;
1510                 }
1511                 case DRR_SPILL:
1512                 {
1513                         struct drr_spill drrs = drr->drr_u.drr_spill;
1514                         ra.err = restore_spill(&ra, os, &drrs);
1515                         break;
1516                 }
1517                 default:
1518                         ra.err = SET_ERROR(EINVAL);
1519                         goto out;
1520                 }
1521                 pcksum = ra.cksum;
1522         }
1523         ASSERT(ra.err != 0);
1524 
1525 out:
1526         if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
1527                 zfs_onexit_fd_rele(cleanup_fd);
1528 
1529         if (ra.err != 0) {
1530                 /*
1531                  * destroy what we created, so we don't leave it in the
1532                  * inconsistent restoring state.
1533                  */
1534                 dmu_recv_cleanup_ds(drc);
1535         }
1536 
1537         kmem_free(ra.buf, ra.bufsize);
1538         *voffp = ra.voff;
1539         return (ra.err);
1540 }
1541 
1542 static int
1543 dmu_recv_end_check(void *arg, dmu_tx_t *tx)
1544 {
1545         dmu_recv_cookie_t *drc = arg;
1546         dsl_pool_t *dp = dmu_tx_pool(tx);
1547         int error;
1548 
1549         ASSERT3P(drc->drc_ds->ds_owner, ==, dmu_recv_tag);
1550 
1551         if (!drc->drc_newfs) {
1552                 dsl_dataset_t *origin_head;
1553 
1554                 error = dsl_dataset_hold(dp, drc->drc_tofs, FTAG, &origin_head);
1555                 if (error != 0)
1556                         return (error);
1557                 if (drc->drc_force) {
1558                         /*
1559                          * We will destroy any snapshots in tofs (i.e. before
1560                          * origin_head) that are after the origin (which is
1561                          * the snap before drc_ds, because drc_ds can not
1562                          * have any snaps of its own).
1563                          */
1564                         uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj;
1565                         while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) {
1566                                 dsl_dataset_t *snap;
1567                                 error = dsl_dataset_hold_obj(dp, obj, FTAG,
1568                                     &snap);
1569                                 if (error != 0)
1570                                         return (error);
1571                                 if (snap->ds_dir != origin_head->ds_dir)
1572                                         error = SET_ERROR(EINVAL);
1573                                 if (error == 0)  {
1574                                         error = dsl_destroy_snapshot_check_impl(
1575                                             snap, B_FALSE);
1576                                 }
1577                                 obj = snap->ds_phys->ds_prev_snap_obj;
1578                                 dsl_dataset_rele(snap, FTAG);
1579                                 if (error != 0)
1580                                         return (error);
1581                         }
1582                 }
1583                 error = dsl_dataset_clone_swap_check_impl(drc->drc_ds,
1584                     origin_head, drc->drc_force, drc->drc_owner, tx);
1585                 if (error != 0) {
1586                         dsl_dataset_rele(origin_head, FTAG);
1587                         return (error);
1588                 }
1589                 error = dsl_dataset_snapshot_check_impl(origin_head,
1590                     drc->drc_tosnap, tx, B_TRUE);
1591                 dsl_dataset_rele(origin_head, FTAG);
1592                 if (error != 0)
1593                         return (error);
1594 
1595                 error = dsl_destroy_head_check_impl(drc->drc_ds, 1);
1596         } else {
1597                 error = dsl_dataset_snapshot_check_impl(drc->drc_ds,
1598                     drc->drc_tosnap, tx, B_TRUE);
1599         }
1600         return (error);
1601 }
1602 
1603 static void
1604 dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
1605 {
1606         dmu_recv_cookie_t *drc = arg;
1607         dsl_pool_t *dp = dmu_tx_pool(tx);
1608 
1609         spa_history_log_internal_ds(drc->drc_ds, "finish receiving",
1610             tx, "snap=%s", drc->drc_tosnap);
1611 
1612         if (!drc->drc_newfs) {
1613                 dsl_dataset_t *origin_head;
1614 
1615                 VERIFY0(dsl_dataset_hold(dp, drc->drc_tofs, FTAG,
1616                     &origin_head));
1617 
1618                 if (drc->drc_force) {
1619                         /*
1620                          * Destroy any snapshots of drc_tofs (origin_head)
1621                          * after the origin (the snap before drc_ds).
1622                          */
1623                         uint64_t obj = origin_head->ds_phys->ds_prev_snap_obj;
1624                         while (obj != drc->drc_ds->ds_phys->ds_prev_snap_obj) {
1625                                 dsl_dataset_t *snap;
1626                                 VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG,
1627                                     &snap));
1628                                 ASSERT3P(snap->ds_dir, ==, origin_head->ds_dir);
1629                                 obj = snap->ds_phys->ds_prev_snap_obj;
1630                                 dsl_destroy_snapshot_sync_impl(snap,
1631                                     B_FALSE, tx);
1632                                 dsl_dataset_rele(snap, FTAG);
1633                         }
1634                 }
1635                 VERIFY3P(drc->drc_ds->ds_prev, ==,
1636                     origin_head->ds_prev);
1637 
1638                 dsl_dataset_clone_swap_sync_impl(drc->drc_ds,
1639                     origin_head, tx);
1640                 dsl_dataset_snapshot_sync_impl(origin_head,
1641                     drc->drc_tosnap, tx);
1642 
1643                 /* set snapshot's creation time and guid */
1644                 dmu_buf_will_dirty(origin_head->ds_prev->ds_dbuf, tx);
1645                 origin_head->ds_prev->ds_phys->ds_creation_time =
1646                     drc->drc_drrb->drr_creation_time;
1647                 origin_head->ds_prev->ds_phys->ds_guid =
1648                     drc->drc_drrb->drr_toguid;
1649                 origin_head->ds_prev->ds_phys->ds_flags &=
1650                     ~DS_FLAG_INCONSISTENT;
1651 
1652                 dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
1653                 origin_head->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
1654 
1655                 dsl_dataset_rele(origin_head, FTAG);
1656                 dsl_destroy_head_sync_impl(drc->drc_ds, tx);
1657 
1658                 if (drc->drc_owner != NULL)
1659                         VERIFY3P(origin_head->ds_owner, ==, drc->drc_owner);
1660         } else {
1661                 dsl_dataset_t *ds = drc->drc_ds;
1662 
1663                 dsl_dataset_snapshot_sync_impl(ds, drc->drc_tosnap, tx);
1664 
1665                 /* set snapshot's creation time and guid */
1666                 dmu_buf_will_dirty(ds->ds_prev->ds_dbuf, tx);
1667                 ds->ds_prev->ds_phys->ds_creation_time =
1668                     drc->drc_drrb->drr_creation_time;
1669                 ds->ds_prev->ds_phys->ds_guid = drc->drc_drrb->drr_toguid;
1670                 ds->ds_prev->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
1671 
1672                 dmu_buf_will_dirty(ds->ds_dbuf, tx);
1673                 ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
1674         }
1675         drc->drc_newsnapobj = drc->drc_ds->ds_phys->ds_prev_snap_obj;
1676         /*
1677          * Release the hold from dmu_recv_begin.  This must be done before
1678          * we return to open context, so that when we free the dataset's dnode,
1679          * we can evict its bonus buffer.
1680          */
1681         dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
1682         drc->drc_ds = NULL;
1683 }
1684 
1685 static int
1686 add_ds_to_guidmap(const char *name, avl_tree_t *guid_map, uint64_t snapobj)
1687 {
1688         dsl_pool_t *dp;
1689         dsl_dataset_t *snapds;
1690         guid_map_entry_t *gmep;
1691         int err;
1692 
1693         ASSERT(guid_map != NULL);
1694 
1695         err = dsl_pool_hold(name, FTAG, &dp);
1696         if (err != 0)
1697                 return (err);
1698         gmep = kmem_alloc(sizeof (*gmep), KM_SLEEP);
1699         err = dsl_dataset_hold_obj(dp, snapobj, gmep, &snapds);
1700         if (err == 0) {
1701                 gmep->guid = snapds->ds_phys->ds_guid;
1702                 gmep->gme_ds = snapds;
1703                 avl_add(guid_map, gmep);
1704                 dsl_dataset_long_hold(snapds, gmep);
1705         } else {
1706                 kmem_free(gmep, sizeof (*gmep));
1707         }
1708 
1709         dsl_pool_rele(dp, FTAG);
1710         return (err);
1711 }
1712 
1713 static int dmu_recv_end_modified_blocks = 3;
1714 
1715 static int
1716 dmu_recv_existing_end(dmu_recv_cookie_t *drc)
1717 {
1718         int error;
1719         char name[MAXNAMELEN];
1720 
1721 #ifdef _KERNEL
1722         /*
1723          * We will be destroying the ds; make sure its origin is unmounted if
1724          * necessary.
1725          */
1726         dsl_dataset_name(drc->drc_ds, name);
1727         zfs_destroy_unmount_origin(name);
1728 #endif
1729 
1730         error = dsl_sync_task(drc->drc_tofs,
1731             dmu_recv_end_check, dmu_recv_end_sync, drc,
1732             dmu_recv_end_modified_blocks);
1733 
1734         if (error != 0)
1735                 dmu_recv_cleanup_ds(drc);
1736         return (error);
1737 }
1738 
1739 static int
1740 dmu_recv_new_end(dmu_recv_cookie_t *drc)
1741 {
1742         int error;
1743 
1744         error = dsl_sync_task(drc->drc_tofs,
1745             dmu_recv_end_check, dmu_recv_end_sync, drc,
1746             dmu_recv_end_modified_blocks);
1747 
1748         if (error != 0) {
1749                 dmu_recv_cleanup_ds(drc);
1750         } else if (drc->drc_guid_to_ds_map != NULL) {
1751                 (void) add_ds_to_guidmap(drc->drc_tofs,
1752                     drc->drc_guid_to_ds_map,
1753                     drc->drc_newsnapobj);
1754         }
1755         return (error);
1756 }
1757 
1758 int
1759 dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
1760 {
1761         drc->drc_owner = owner;
1762 
1763         if (drc->drc_newfs)
1764                 return (dmu_recv_new_end(drc));
1765         else
1766                 return (dmu_recv_existing_end(drc));
1767 }