Print this page
6536 zfs send: want a way to disable sending of free records
Reviewed by: Alexander Stetsenko <astetsenko@racktopsystems.com>
Reviewed by: Kim Shrier <kshrier@racktopsystems.com>


   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  24  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  25  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  26  * Copyright 2014 HybridCluster. All rights reserved.

  27  */
  28 
  29 #include <sys/dmu.h>
  30 #include <sys/dmu_impl.h>
  31 #include <sys/dmu_tx.h>
  32 #include <sys/dbuf.h>
  33 #include <sys/dnode.h>
  34 #include <sys/zfs_context.h>
  35 #include <sys/dmu_objset.h>
  36 #include <sys/dmu_traverse.h>
  37 #include <sys/dsl_dataset.h>
  38 #include <sys/dsl_dir.h>
  39 #include <sys/dsl_prop.h>
  40 #include <sys/dsl_pool.h>
  41 #include <sys/dsl_synctask.h>
  42 #include <sys/zfs_ioctl.h>
  43 #include <sys/zap.h>
  44 #include <sys/zio_checksum.h>
  45 #include <sys/zfs_znode.h>
  46 #include <zfs_fletcher.h>


 135                         return (SET_ERROR(EINTR));
 136         }
 137         return (0);
 138 }
 139 
 140 /*
 141  * Fill in the drr_free struct, or perform aggregation if the previous record is
 142  * also a free record, and the two are adjacent.
 143  *
 144  * Note that we send free records even for a full send, because we want to be
 145  * able to receive a full send as a clone, which requires a list of all the free
 146  * and freeobject records that were generated on the source.
 147  */
 148 static int
 149 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
 150     uint64_t length)
 151 {
 152         struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
 153 
 154         /*







 155          * When we receive a free record, dbuf_free_range() assumes
 156          * that the receiving system doesn't have any dbufs in the range
 157          * being freed.  This is always true because there is a one-record
 158          * constraint: we only send one WRITE record for any given
 159          * object,offset.  We know that the one-record constraint is
 160          * true because we always send data in increasing order by
 161          * object,offset.
 162          *
 163          * If the increasing-order constraint ever changes, we should find
 164          * another way to assert that the one-record constraint is still
 165          * satisfied.
 166          */
 167         ASSERT(object > dsp->dsa_last_data_object ||
 168             (object == dsp->dsa_last_data_object &&
 169             offset > dsp->dsa_last_data_offset));
 170 
 171         if (length != -1ULL && offset + length < offset)
 172                 length = -1ULL;
 173 
 174         /*


 329                 dsp->dsa_pending_op = PENDING_NONE;
 330         }
 331 
 332         /* write a SPILL record */
 333         bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 334         dsp->dsa_drr->drr_type = DRR_SPILL;
 335         drrs->drr_object = object;
 336         drrs->drr_length = blksz;
 337         drrs->drr_toguid = dsp->dsa_toguid;
 338 
 339         if (dump_record(dsp, data, blksz) != 0)
 340                 return (SET_ERROR(EINTR));
 341         return (0);
 342 }
 343 
 344 static int
 345 dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
 346 {
 347         struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
 348 




 349         /*
 350          * If there is a pending op, but it's not PENDING_FREEOBJECTS,
 351          * push it out, since free block aggregation can only be done for
 352          * blocks of the same type (i.e., DRR_FREE records can only be
 353          * aggregated with other DRR_FREE records.  DRR_FREEOBJECTS records
 354          * can only be aggregated with other DRR_FREEOBJECTS records.
 355          */
 356         if (dsp->dsa_pending_op != PENDING_NONE &&
 357             dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
 358                 if (dump_record(dsp, NULL, 0) != 0)
 359                         return (SET_ERROR(EINTR));
 360                 dsp->dsa_pending_op = PENDING_NONE;
 361         }
 362         if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
 363                 /*
 364                  * See whether this free object array can be aggregated
 365                  * with pending one
 366                  */
 367                 if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) {
 368                         drrfo->drr_numobjs += numobjs;


 666 
 667 /*
 668  * Pop the new data off the queue, and free the old data.
 669  */
 670 static struct send_block_record *
 671 get_next_record(bqueue_t *bq, struct send_block_record *data)
 672 {
 673         struct send_block_record *tmp = bqueue_dequeue(bq);
 674         kmem_free(data, sizeof (*data));
 675         return (tmp);
 676 }
 677 
 678 /*
 679  * Actually do the bulk of the work in a zfs send.
 680  *
 681  * Note: Releases dp using the specified tag.
 682  */
 683 static int
 684 dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 685     zfs_bookmark_phys_t *ancestor_zb,
 686     boolean_t is_clone, boolean_t embedok, boolean_t large_block_ok, int outfd,
 687     uint64_t resumeobj, uint64_t resumeoff,
 688     vnode_t *vp, offset_t *off)
 689 {
 690         objset_t *os;
 691         dmu_replay_record_t *drr;
 692         dmu_sendarg_t *dsp;
 693         int err;
 694         uint64_t fromtxg = 0;
 695         uint64_t featureflags = 0;
 696         struct send_thread_arg to_arg = { 0 };
 697 
 698         err = dmu_objset_from_ds(to_ds, &os);
 699         if (err != 0) {
 700                 dsl_pool_rele(dp, tag);
 701                 return (err);
 702         }
 703 
 704         drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 705         drr->drr_type = DRR_BEGIN;
 706         drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
 707         DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,


 728                 featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
 729                 if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
 730                         featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
 731         }
 732 
 733         if (resumeobj != 0 || resumeoff != 0) {
 734                 featureflags |= DMU_BACKUP_FEATURE_RESUMING;
 735         }
 736 
 737         DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
 738             featureflags);
 739 
 740         drr->drr_u.drr_begin.drr_creation_time =
 741             dsl_dataset_phys(to_ds)->ds_creation_time;
 742         drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
 743         if (is_clone)
 744                 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
 745         drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid;
 746         if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET)
 747                 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;

 748         drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
 749 
 750         if (ancestor_zb != NULL) {
 751                 drr->drr_u.drr_begin.drr_fromguid =
 752                     ancestor_zb->zbm_guid;
 753                 fromtxg = ancestor_zb->zbm_creation_txg;
 754         }
 755         dsl_dataset_name(to_ds, drr->drr_u.drr_begin.drr_toname);
 756         if (!to_ds->ds_is_snapshot) {
 757                 (void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--",
 758                     sizeof (drr->drr_u.drr_begin.drr_toname));
 759         }
 760 
 761         dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
 762 
 763         dsp->dsa_drr = drr;
 764         dsp->dsa_vp = vp;
 765         dsp->dsa_outfd = outfd;
 766         dsp->dsa_proc = curproc;
 767         dsp->dsa_os = os;
 768         dsp->dsa_off = off;
 769         dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
 770         dsp->dsa_pending_op = PENDING_NONE;
 771         dsp->dsa_featureflags = featureflags;
 772         dsp->dsa_resume_object = resumeobj;
 773         dsp->dsa_resume_offset = resumeoff;

 774 
 775         mutex_enter(&to_ds->ds_sendstream_lock);
 776         list_insert_head(&to_ds->ds_sendstreams, dsp);
 777         mutex_exit(&to_ds->ds_sendstream_lock);
 778 
 779         dsl_dataset_long_hold(to_ds, FTAG);
 780         dsl_pool_rele(dp, tag);
 781 
 782         void *payload = NULL;
 783         size_t payload_len = 0;
 784         if (resumeobj != 0 || resumeoff != 0) {
 785                 dmu_object_info_t to_doi;
 786                 err = dmu_object_info(os, resumeobj, &to_doi);
 787                 if (err != 0)
 788                         goto out;
 789                 SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0,
 790                     resumeoff / to_doi.doi_data_block_size);
 791 
 792                 nvlist_t *nvl = fnvlist_alloc();
 793                 fnvlist_add_uint64(nvl, "resume_object", resumeobj);


 856         drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
 857 
 858         if (dump_record(dsp, NULL, 0) != 0)
 859                 err = dsp->dsa_err;
 860 
 861 out:
 862         mutex_enter(&to_ds->ds_sendstream_lock);
 863         list_remove(&to_ds->ds_sendstreams, dsp);
 864         mutex_exit(&to_ds->ds_sendstream_lock);
 865 
 866         kmem_free(drr, sizeof (dmu_replay_record_t));
 867         kmem_free(dsp, sizeof (dmu_sendarg_t));
 868 
 869         dsl_dataset_long_rele(to_ds, FTAG);
 870 
 871         return (err);
 872 }
 873 
 874 int
 875 dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
 876     boolean_t embedok, boolean_t large_block_ok,
 877     int outfd, vnode_t *vp, offset_t *off)
 878 {
 879         dsl_pool_t *dp;
 880         dsl_dataset_t *ds;
 881         dsl_dataset_t *fromds = NULL;
 882         int err;
 883 
 884         err = dsl_pool_hold(pool, FTAG, &dp);
 885         if (err != 0)
 886                 return (err);
 887 
 888         err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds);
 889         if (err != 0) {
 890                 dsl_pool_rele(dp, FTAG);
 891                 return (err);
 892         }
 893 
 894         if (fromsnap != 0) {
 895                 zfs_bookmark_phys_t zb;
 896                 boolean_t is_clone;
 897 
 898                 err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds);
 899                 if (err != 0) {
 900                         dsl_dataset_rele(ds, FTAG);
 901                         dsl_pool_rele(dp, FTAG);
 902                         return (err);
 903                 }
 904                 if (!dsl_dataset_is_before(ds, fromds, 0))
 905                         err = SET_ERROR(EXDEV);
 906                 zb.zbm_creation_time =
 907                     dsl_dataset_phys(fromds)->ds_creation_time;
 908                 zb.zbm_creation_txg = dsl_dataset_phys(fromds)->ds_creation_txg;
 909                 zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
 910                 is_clone = (fromds->ds_dir != ds->ds_dir);
 911                 dsl_dataset_rele(fromds, FTAG);
 912                 err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
 913                     embedok, large_block_ok, outfd, 0, 0, vp, off);
 914         } else {
 915                 err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
 916                     embedok, large_block_ok, outfd, 0, 0, vp, off);
 917         }
 918         dsl_dataset_rele(ds, FTAG);
 919         return (err);
 920 }
 921 
 922 int
 923 dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
 924     boolean_t large_block_ok, int outfd, uint64_t resumeobj, uint64_t resumeoff,
 925     vnode_t *vp, offset_t *off)
 926 {
 927         dsl_pool_t *dp;
 928         dsl_dataset_t *ds;
 929         int err;
 930         boolean_t owned = B_FALSE;
 931 
 932         if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL)
 933                 return (SET_ERROR(EINVAL));
 934 
 935         err = dsl_pool_hold(tosnap, FTAG, &dp);
 936         if (err != 0)
 937                 return (err);
 938 
 939         if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) {
 940                 /*
 941                  * We are sending a filesystem or volume.  Ensure
 942                  * that it doesn't change by owning the dataset.
 943                  */
 944                 err = dsl_dataset_own(dp, tosnap, FTAG, &ds);
 945                 owned = B_TRUE;


 972                         if (err == 0) {
 973                                 if (!dsl_dataset_is_before(ds, fromds, 0))
 974                                         err = SET_ERROR(EXDEV);
 975                                 zb.zbm_creation_time =
 976                                     dsl_dataset_phys(fromds)->ds_creation_time;
 977                                 zb.zbm_creation_txg =
 978                                     dsl_dataset_phys(fromds)->ds_creation_txg;
 979                                 zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
 980                                 is_clone = (ds->ds_dir != fromds->ds_dir);
 981                                 dsl_dataset_rele(fromds, FTAG);
 982                         }
 983                 } else {
 984                         err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb);
 985                 }
 986                 if (err != 0) {
 987                         dsl_dataset_rele(ds, FTAG);
 988                         dsl_pool_rele(dp, FTAG);
 989                         return (err);
 990                 }
 991                 err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
 992                     embedok, large_block_ok,
 993                     outfd, resumeobj, resumeoff, vp, off);
 994         } else {
 995                 err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
 996                     embedok, large_block_ok,
 997                     outfd, resumeobj, resumeoff, vp, off);
 998         }
 999         if (owned)
1000                 dsl_dataset_disown(ds, FTAG);
1001         else
1002                 dsl_dataset_rele(ds, FTAG);
1003         return (err);
1004 }
1005 
1006 static int
1007 dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
1008     uint64_t *sizep)
1009 {
1010         int err;
1011         /*
1012          * Assume that space (both on-disk and in-stream) is dominated by
1013          * data.  We will adjust for indirect blocks and the copies property,
1014          * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
1015          */
1016 




   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
  24  * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
  25  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
  26  * Copyright 2014 HybridCluster. All rights reserved.
  27  * Copyright 2015 RackTop Systems.
  28  */
  29 
  30 #include <sys/dmu.h>
  31 #include <sys/dmu_impl.h>
  32 #include <sys/dmu_tx.h>
  33 #include <sys/dbuf.h>
  34 #include <sys/dnode.h>
  35 #include <sys/zfs_context.h>
  36 #include <sys/dmu_objset.h>
  37 #include <sys/dmu_traverse.h>
  38 #include <sys/dsl_dataset.h>
  39 #include <sys/dsl_dir.h>
  40 #include <sys/dsl_prop.h>
  41 #include <sys/dsl_pool.h>
  42 #include <sys/dsl_synctask.h>
  43 #include <sys/zfs_ioctl.h>
  44 #include <sys/zap.h>
  45 #include <sys/zio_checksum.h>
  46 #include <sys/zfs_znode.h>
  47 #include <zfs_fletcher.h>


 136                         return (SET_ERROR(EINTR));
 137         }
 138         return (0);
 139 }
 140 
 141 /*
 142  * Fill in the drr_free struct, or perform aggregation if the previous record is
 143  * also a free record, and the two are adjacent.
 144  *
 145  * Note that we send free records even for a full send, because we want to be
 146  * able to receive a full send as a clone, which requires a list of all the free
 147  * and freeobject records that were generated on the source.
 148  */
 149 static int
 150 dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
 151     uint64_t length)
 152 {
 153         struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
 154 
 155         /*
 156          * Skip free records if asked not to send them.  The resulting
 157          * stream cannot be received as a clone.
 158          */
 159         if (dsp->dsa_skip_free)
 160                 return (0);
 161 
 162         /*
 163          * When we receive a free record, dbuf_free_range() assumes
 164          * that the receiving system doesn't have any dbufs in the range
 165          * being freed.  This is always true because there is a one-record
 166          * constraint: we only send one WRITE record for any given
 167          * object,offset.  We know that the one-record constraint is
 168          * true because we always send data in increasing order by
 169          * object,offset.
 170          *
 171          * If the increasing-order constraint ever changes, we should find
 172          * another way to assert that the one-record constraint is still
 173          * satisfied.
 174          */
 175         ASSERT(object > dsp->dsa_last_data_object ||
 176             (object == dsp->dsa_last_data_object &&
 177             offset > dsp->dsa_last_data_offset));
 178 
 179         if (length != -1ULL && offset + length < offset)
 180                 length = -1ULL;
 181 
 182         /*


 337                 dsp->dsa_pending_op = PENDING_NONE;
 338         }
 339 
 340         /* write a SPILL record */
 341         bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
 342         dsp->dsa_drr->drr_type = DRR_SPILL;
 343         drrs->drr_object = object;
 344         drrs->drr_length = blksz;
 345         drrs->drr_toguid = dsp->dsa_toguid;
 346 
 347         if (dump_record(dsp, data, blksz) != 0)
 348                 return (SET_ERROR(EINTR));
 349         return (0);
 350 }
 351 
 352 static int
 353 dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
 354 {
 355         struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
 356 
 357         /* See comment in dump_free(). */
 358         if (dsp->dsa_skip_free)
 359                 return (0);
 360 
 361         /*
 362          * If there is a pending op, but it's not PENDING_FREEOBJECTS,
 363          * push it out, since free block aggregation can only be done for
 364          * blocks of the same type (i.e., DRR_FREE records can only be
 365          * aggregated with other DRR_FREE records.  DRR_FREEOBJECTS records
 366          * can only be aggregated with other DRR_FREEOBJECTS records.
 367          */
 368         if (dsp->dsa_pending_op != PENDING_NONE &&
 369             dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
 370                 if (dump_record(dsp, NULL, 0) != 0)
 371                         return (SET_ERROR(EINTR));
 372                 dsp->dsa_pending_op = PENDING_NONE;
 373         }
 374         if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
 375                 /*
 376                  * See whether this free object array can be aggregated
 377                  * with pending one
 378                  */
 379                 if (drrfo->drr_firstobj + drrfo->drr_numobjs == firstobj) {
 380                         drrfo->drr_numobjs += numobjs;


 678 
 679 /*
 680  * Pop the new data off the queue, and free the old data.
 681  */
 682 static struct send_block_record *
 683 get_next_record(bqueue_t *bq, struct send_block_record *data)
 684 {
 685         struct send_block_record *tmp = bqueue_dequeue(bq);
 686         kmem_free(data, sizeof (*data));
 687         return (tmp);
 688 }
 689 
 690 /*
 691  * Actually do the bulk of the work in a zfs send.
 692  *
 693  * Note: Releases dp using the specified tag.
 694  */
 695 static int
 696 dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
 697     zfs_bookmark_phys_t *ancestor_zb,
 698     boolean_t is_clone, boolean_t embedok, boolean_t large_block_ok,
 699     boolean_t skip_free, int outfd, uint64_t resumeobj, uint64_t resumeoff,
 700     vnode_t *vp, offset_t *off)
 701 {
 702         objset_t *os;
 703         dmu_replay_record_t *drr;
 704         dmu_sendarg_t *dsp;
 705         int err;
 706         uint64_t fromtxg = 0;
 707         uint64_t featureflags = 0;
 708         struct send_thread_arg to_arg = { 0 };
 709 
 710         err = dmu_objset_from_ds(to_ds, &os);
 711         if (err != 0) {
 712                 dsl_pool_rele(dp, tag);
 713                 return (err);
 714         }
 715 
 716         drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
 717         drr->drr_type = DRR_BEGIN;
 718         drr->drr_u.drr_begin.drr_magic = DMU_BACKUP_MAGIC;
 719         DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,


 740                 featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
 741                 if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
 742                         featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
 743         }
 744 
 745         if (resumeobj != 0 || resumeoff != 0) {
 746                 featureflags |= DMU_BACKUP_FEATURE_RESUMING;
 747         }
 748 
 749         DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
 750             featureflags);
 751 
 752         drr->drr_u.drr_begin.drr_creation_time =
 753             dsl_dataset_phys(to_ds)->ds_creation_time;
 754         drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
 755         if (is_clone)
 756                 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
 757         drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid;
 758         if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET)
 759                 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
 760         if (!skip_free)
 761                 drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
 762 
 763         if (ancestor_zb != NULL) {
 764                 drr->drr_u.drr_begin.drr_fromguid =
 765                     ancestor_zb->zbm_guid;
 766                 fromtxg = ancestor_zb->zbm_creation_txg;
 767         }
 768         dsl_dataset_name(to_ds, drr->drr_u.drr_begin.drr_toname);
 769         if (!to_ds->ds_is_snapshot) {
 770                 (void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--",
 771                     sizeof (drr->drr_u.drr_begin.drr_toname));
 772         }
 773 
 774         dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
 775 
 776         dsp->dsa_drr = drr;
 777         dsp->dsa_vp = vp;
 778         dsp->dsa_outfd = outfd;
 779         dsp->dsa_proc = curproc;
 780         dsp->dsa_os = os;
 781         dsp->dsa_off = off;
 782         dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
 783         dsp->dsa_pending_op = PENDING_NONE;
 784         dsp->dsa_featureflags = featureflags;
 785         dsp->dsa_resume_object = resumeobj;
 786         dsp->dsa_resume_offset = resumeoff;
 787         dsp->dsa_skip_free = skip_free;
 788 
 789         mutex_enter(&to_ds->ds_sendstream_lock);
 790         list_insert_head(&to_ds->ds_sendstreams, dsp);
 791         mutex_exit(&to_ds->ds_sendstream_lock);
 792 
 793         dsl_dataset_long_hold(to_ds, FTAG);
 794         dsl_pool_rele(dp, tag);
 795 
 796         void *payload = NULL;
 797         size_t payload_len = 0;
 798         if (resumeobj != 0 || resumeoff != 0) {
 799                 dmu_object_info_t to_doi;
 800                 err = dmu_object_info(os, resumeobj, &to_doi);
 801                 if (err != 0)
 802                         goto out;
 803                 SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0,
 804                     resumeoff / to_doi.doi_data_block_size);
 805 
 806                 nvlist_t *nvl = fnvlist_alloc();
 807                 fnvlist_add_uint64(nvl, "resume_object", resumeobj);


 870         drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
 871 
 872         if (dump_record(dsp, NULL, 0) != 0)
 873                 err = dsp->dsa_err;
 874 
 875 out:
 876         mutex_enter(&to_ds->ds_sendstream_lock);
 877         list_remove(&to_ds->ds_sendstreams, dsp);
 878         mutex_exit(&to_ds->ds_sendstream_lock);
 879 
 880         kmem_free(drr, sizeof (dmu_replay_record_t));
 881         kmem_free(dsp, sizeof (dmu_sendarg_t));
 882 
 883         dsl_dataset_long_rele(to_ds, FTAG);
 884 
 885         return (err);
 886 }
 887 
 888 int
 889 dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
 890     boolean_t embedok, boolean_t large_block_ok, boolean_t skip_free,
 891     int outfd, vnode_t *vp, offset_t *off)
 892 {
 893         dsl_pool_t *dp;
 894         dsl_dataset_t *ds;
 895         dsl_dataset_t *fromds = NULL;
 896         int err;
 897 
 898         err = dsl_pool_hold(pool, FTAG, &dp);
 899         if (err != 0)
 900                 return (err);
 901 
 902         err = dsl_dataset_hold_obj(dp, tosnap, FTAG, &ds);
 903         if (err != 0) {
 904                 dsl_pool_rele(dp, FTAG);
 905                 return (err);
 906         }
 907 
 908         if (fromsnap != 0) {
 909                 zfs_bookmark_phys_t zb;
 910                 boolean_t is_clone;
 911 
 912                 err = dsl_dataset_hold_obj(dp, fromsnap, FTAG, &fromds);
 913                 if (err != 0) {
 914                         dsl_dataset_rele(ds, FTAG);
 915                         dsl_pool_rele(dp, FTAG);
 916                         return (err);
 917                 }
 918                 if (!dsl_dataset_is_before(ds, fromds, 0))
 919                         err = SET_ERROR(EXDEV);
 920                 zb.zbm_creation_time =
 921                     dsl_dataset_phys(fromds)->ds_creation_time;
 922                 zb.zbm_creation_txg = dsl_dataset_phys(fromds)->ds_creation_txg;
 923                 zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
 924                 is_clone = (fromds->ds_dir != ds->ds_dir);
 925                 dsl_dataset_rele(fromds, FTAG);
 926                 err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
 927                     embedok, large_block_ok, skip_free, outfd, 0, 0, vp, off);
 928         } else {
 929                 err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
 930                     embedok, large_block_ok, skip_free, outfd, 0, 0, vp, off);
 931         }
 932         dsl_dataset_rele(ds, FTAG);
 933         return (err);
 934 }
 935 
 936 int
 937 dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
 938     boolean_t large_block_ok, boolean_t skip_free, int outfd,
 939     uint64_t resumeobj, uint64_t resumeoff, vnode_t *vp, offset_t *off)
 940 {
 941         dsl_pool_t *dp;
 942         dsl_dataset_t *ds;
 943         int err;
 944         boolean_t owned = B_FALSE;
 945 
 946         if (fromsnap != NULL && strpbrk(fromsnap, "@#") == NULL)
 947                 return (SET_ERROR(EINVAL));
 948 
 949         err = dsl_pool_hold(tosnap, FTAG, &dp);
 950         if (err != 0)
 951                 return (err);
 952 
 953         if (strchr(tosnap, '@') == NULL && spa_writeable(dp->dp_spa)) {
 954                 /*
 955                  * We are sending a filesystem or volume.  Ensure
 956                  * that it doesn't change by owning the dataset.
 957                  */
 958                 err = dsl_dataset_own(dp, tosnap, FTAG, &ds);
 959                 owned = B_TRUE;


 986                         if (err == 0) {
 987                                 if (!dsl_dataset_is_before(ds, fromds, 0))
 988                                         err = SET_ERROR(EXDEV);
 989                                 zb.zbm_creation_time =
 990                                     dsl_dataset_phys(fromds)->ds_creation_time;
 991                                 zb.zbm_creation_txg =
 992                                     dsl_dataset_phys(fromds)->ds_creation_txg;
 993                                 zb.zbm_guid = dsl_dataset_phys(fromds)->ds_guid;
 994                                 is_clone = (ds->ds_dir != fromds->ds_dir);
 995                                 dsl_dataset_rele(fromds, FTAG);
 996                         }
 997                 } else {
 998                         err = dsl_bookmark_lookup(dp, fromsnap, ds, &zb);
 999                 }
1000                 if (err != 0) {
1001                         dsl_dataset_rele(ds, FTAG);
1002                         dsl_pool_rele(dp, FTAG);
1003                         return (err);
1004                 }
1005                 err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
1006                     embedok, large_block_ok, skip_free,
1007                     outfd, resumeobj, resumeoff, vp, off);
1008         } else {
1009                 err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
1010                     embedok, large_block_ok, skip_free,
1011                     outfd, resumeobj, resumeoff, vp, off);
1012         }
1013         if (owned)
1014                 dsl_dataset_disown(ds, FTAG);
1015         else
1016                 dsl_dataset_rele(ds, FTAG);
1017         return (err);
1018 }
1019 
1020 static int
1021 dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
1022     uint64_t *sizep)
1023 {
1024         int err;
1025         /*
1026          * Assume that space (both on-disk and in-stream) is dominated by
1027          * data.  We will adjust for indirect blocks and the copies property,
1028          * but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
1029          */
1030