1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2012 STRATO AG. All rights reserved.
  23  */
  24 #include <sys/zfs_context.h>
  25 #include <sys/stat.h>
  26 #include <sys/errno.h>
  27 #include <sys/mkdev.h>
  28 #include <sys/debug.h>
  29 #include <sys/open.h>
  30 #include <sys/zfs_ioctl.h>
  31 #include <zfs_namecheck.h>
  32 #include <sys/policy.h>
  33 #include <sys/dmu_objset.h>
  34 #include <sys/dsl_prop.h>
  35 #include <sys/zvol.h>
  36 #include <sys/zap.h>
  37 #include <sys/dsl_dataset.h>
  38 #include <sys/dmu_traverse.h>
  39 #include <sys/dsl_dir.h>
  40 #include <sys/arc.h>
  41 #include <sys/spa.h>
  42 #include <sys/spa_impl.h>
  43 #include <sys/sa.h>
  44 #include <sys/sa_impl.h>
  45 #include <sys/zfs_acl.h>
  46 #include <sys/zfs_sa.h>
  47 #include <sys/zfs_znode.h>
  48 #include <sys/dbuf.h>
  49 #include <sys/far.h>
  50 #include <sys/far_impl.h>
  51 
  52 /*
  53  * far_send generates a stream of filesystem data analogous to dmu_send.
  54  * The main difference is that the far-stream does not contain zfs-specific
  55  * data and can be replayed on any filesystem. It just contains commands like
  56  * MKDIR, CHMOD, RENAME etc.
  57  * The stream is generated in two passes. The first pass, PASS_LINK basically
  58  * creates all new files/directories and links, while the second pass,
  59  * PASS_UNLINK, does all the removal of old stuff.
  60  * Each pass enumerates all objects in inode order.
  61  * There are some corner cases:
  62  *   Files / directories can only be created if the parent already exists or
  63  * already has been created. If an object is encountered which parent does not
  64  * satisfy this condition, it is put back and its creation will be trigger
  65  * by the creation of the parent.
  66  *   A similar case applies on deletion. A directory can only be removed after
  67  * the last contained object has been removed. If a directory is not empty,
  68  * it is put back and the deletion of the last object in it triggers the
  69  * deletion.
  70  *   If an objects gets deleted, and a new object is created under the same
  71  * name, pass1 cannot create the object directly. So it is created under a
  72  * temporary name and gets renamed in pass2.
  73  *   If an object is deleted and a new object (of possibly diffent type)
  74  * created under the same inode and the same name, this change cannot be
  75  * detected by enumerating the containing directory (as name + inode are
  76  * unchanged). It is detected by a change of the inode generation number and
  77  * a flag is set for pass2. Creation is postponed. In pass2, all enumerated
  78  * directories are checked for this inode (although the entry is unchanged,
  79  * the directory has a bumped txg). If it is encountered, delete + create
  80  * happen both in pass2.
  81  *
  82  * There are lots of TODOs left:
  83  *  - add XATTR support
  84  *  - add path-caching
  85  *  - add a cache for brute-force parent search
  86  *  - add a cache for inode-search in a directory
  87  *  - use a hash instead of the linear list in far_count
  88  */
  89 static int
  90 far_dnode_changed(spa_t *spa, far_t *f, uint64_t dnobj,
  91     dnode_phys_t *from, arc_buf_t *frombuf, dnode_phys_t *to, arc_buf_t *tobuf);
  92 
  93 /* copied from zfs_znode.c */
  94 static int
  95 far_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
  96 {
  97         uint64_t sa_obj = 0;
  98         int error;
  99 
 100         error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
 101         if (error != 0 && error != ENOENT)
 102                 return (error);
 103 
 104         error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
 105         return (error);
 106 }
 107 
 108 static int
 109 far_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
 110     dmu_buf_t **db, void *tag)
 111 {
 112         dmu_object_info_t doi;
 113         int error;
 114 
 115         if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
 116                 return (error);
 117 
 118         dmu_object_info_from_db(*db, &doi);
 119         if ((doi.doi_bonus_type != DMU_OT_SA &&
 120             doi.doi_bonus_type != DMU_OT_ZNODE) ||
 121             (doi.doi_bonus_type == DMU_OT_ZNODE &&
 122             doi.doi_bonus_size < sizeof (znode_phys_t))) {
 123                 sa_buf_rele(*db, tag);
 124                 return (ENOTSUP);
 125         }
 126 
 127         error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
 128         if (error != 0) {
 129                 sa_buf_rele(*db, tag);
 130                 return (error);
 131         }
 132 
 133         return (0);
 134 }
 135 
 136 static void
 137 far_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
 138 {
 139         sa_handle_destroy(hdl);
 140         sa_buf_rele(db, tag);
 141 }
 142 
 143 static int
 144 far_find_from_bp(spa_t *spa, dnode_phys_t *dnp, blklevel_t *bl,
 145     const zbookmark_t *zb, blkptr_t **bpp, arc_buf_t **pbuf)
 146 {
 147         uint32_t flags;
 148         int epbs = dnp->dn_indblkshift - SPA_BLKPTRSHIFT;
 149         int epbmask = (1 << epbs) - 1;
 150         int level;
 151         int slot;
 152         uint64_t blkid;
 153         uint64_t blk;
 154         blklevel_t *blp;
 155         zbookmark_t czb;
 156         int i;
 157 
 158         *bpp = NULL;
 159         for (level = dnp->dn_nlevels - 1; level >= zb->zb_level; --level) {
 160                 blkid = zb->zb_blkid >> (epbs * (level - zb->zb_level));
 161                 blk = blkid >> epbs;
 162                 slot = blk & epbmask;
 163                 blp = bl + level;
 164 
 165                 if (blp->bl_blk == blk)
 166                         continue;
 167 
 168                 for (i = 0; i <= level; ++i) {
 169                         blklevel_t *b = bl + i;
 170 
 171                         if (b->bl_buf)
 172                                 arc_buf_remove_ref(b->bl_buf, &b->bl_buf);
 173                         b->bl_bp = NULL;
 174                         b->bl_buf = NULL;
 175                         b->bl_blk = -1;
 176                 }
 177                 ASSERT(slot < blp[1].bl_nslots);
 178                 if (BP_IS_HOLE(blp[1].bl_bp + slot)) {
 179                         *bpp = NULL;
 180                         return (0);
 181                 }
 182                 /*
 183                  * load indblk
 184                  */
 185                 flags = ARC_WAIT;
 186                 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, level, blkid);
 187                 if (dsl_read(NULL, spa, blp[1].bl_bp + slot, blp[1].bl_buf,
 188                     arc_getbuf_func, &blp->bl_buf, ZIO_PRIORITY_ASYNC_READ,
 189                     ZIO_FLAG_CANFAIL, &flags, &czb) != 0)
 190                         return (EIO);
 191                 blp->bl_bp = blp->bl_buf->b_data;
 192                 blp->bl_nslots = 1 << epbs;
 193                 blp->bl_blk = blk;
 194         }
 195         slot = zb->zb_blkid & epbmask;
 196         blp = bl + zb->zb_level;
 197         ASSERT(slot < blp->bl_nslots);
 198         *bpp = blp->bl_bp + slot;
 199         *pbuf = blp->bl_buf;
 200         if (BP_IS_HOLE(*bpp))
 201                 *bpp = NULL;
 202 
 203         return (0);
 204 }
 205 
 206 static int
 207 far_file_cb(spa_t *spa, far_t *f, zbookmark_t *zb,
 208     blkptr_t *bp, arc_buf_t *pbuf, void *ctx)
 209 {
 210         int err = 0;
 211         blkptr_t *fbp;
 212 
 213         if (issig(JUSTLOOKING) && issig(FORREAL))
 214                 return (EINTR);
 215 
 216         if (f->f_fromds && zb->zb_objset == f->f_fromds->ds_object)
 217                 return (0);
 218 
 219         if (bp == NULL) {
 220                 arc_buf_t *fpbuf = NULL;
 221                 zbookmark_t czb;
 222 
 223                 ASSERT(f->f_fromds);
 224                 SET_BOOKMARK(&czb, f->f_fromds->ds_object, zb->zb_object,
 225                     zb->zb_level, zb->zb_blkid);
 226                 err = far_find_from_bp(spa, f->f_dnp, f->f_filebl,
 227                     &czb, &fbp, &fpbuf);
 228                 if (err)
 229                         return (err);
 230                 if (fbp) {
 231 #if 0
 232                         /* XXX TODO callback for newly created hole */
 233                         err = far_enum_bp(spa, da, &czb, fbp, fpbuf);
 234                         if (err)
 235                                 return (err);
 236 #endif
 237                 }
 238         } else if (zb->zb_level == 0) {
 239                 arc_buf_t *tbuf;
 240                 uint32_t tflags = ARC_WAIT;
 241                 int blksz = BP_GET_LSIZE(bp);
 242 
 243                 if (dsl_read(NULL, spa, bp, pbuf,
 244                     arc_getbuf_func, &tbuf, ZIO_PRIORITY_ASYNC_READ,
 245                     ZIO_FLAG_CANFAIL, &tflags, zb) != 0)
 246                         return (EIO);
 247 
 248                 if (f->f_ops->far_file_data)
 249                         err = f->f_ops->far_file_data(ctx, tbuf->b_data,
 250                             zb->zb_blkid * blksz, blksz);
 251 
 252                 (void) arc_buf_remove_ref(tbuf, &tbuf);
 253         }
 254         return (err);
 255 }
 256 
 257 static int
 258 far_enum_bp(spa_t *spa, far_t *da, zbookmark_t *zb,
 259     blkptr_t *bp, arc_buf_t *pbuf, uint64_t min_txg, void *ctx)
 260 {
 261         int err = 0;
 262         arc_buf_t *buf = NULL;
 263         uint32_t flags = ARC_WAIT;
 264 
 265         if (BP_IS_HOLE(bp))
 266                 return (0);
 267 
 268         if (bp->blk_birth <= min_txg)
 269                 return (0);
 270 
 271         if (BP_GET_LEVEL(bp) > 0) {
 272                 int i;
 273                 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
 274                 blkptr_t *cbp;
 275                 zbookmark_t czb;
 276 
 277                 if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &buf,
 278                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb) != 0)
 279                         return (EIO);
 280                 cbp = buf->b_data;
 281                 for (i = 0; i < epb; ++i, ++cbp) {
 282                         SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
 283                             zb->zb_level - 1, zb->zb_blkid * epb + i);
 284                         err = far_enum_bp(spa, da, &czb, cbp, buf, min_txg,
 285                             ctx);
 286                         if (err)
 287                                 goto out;
 288                 }
 289         } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
 290                 int i;
 291                 int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 292                 dnode_phys_t *dnp;
 293 
 294                 if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &buf,
 295                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
 296                     &flags, zb) != 0) {
 297                         err = EIO;
 298                         goto out;
 299                 }
 300                 dnp = buf->b_data;
 301                 for (i = 0; i < epb; ++i, ++dnp) {
 302                         uint64_t dnobj = zb->zb_blkid * epb + i;
 303                         if (dnp->dn_type == DMU_OT_NONE)
 304                                 continue;
 305                         err = far_dnode_changed(spa, da, dnobj, dnp, buf,
 306                             NULL, NULL);
 307                         if (err)
 308                                 goto out;
 309                 }
 310         } else {
 311                 err = far_file_cb(spa, da, zb, bp, pbuf, ctx);
 312         }
 313 out:
 314         if (buf)
 315                 (void) arc_buf_remove_ref(buf, &buf);
 316 
 317         return (err);
 318 }
 319 
 320 static int
 321 far_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
 322     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 323 {
 324         int err = 0;
 325         far_t *f = arg;
 326         blkptr_t *fbp = NULL;
 327         zbookmark_t czb;
 328 
 329         if (issig(JUSTLOOKING) && issig(FORREAL))
 330                 return (EINTR);
 331 
 332         if (f->f_fromds)
 333                 SET_BOOKMARK(&czb, f->f_fromds->ds_object, zb->zb_object,
 334                     zb->zb_level, zb->zb_blkid);
 335 
 336         if (zb->zb_object != DMU_META_DNODE_OBJECT)
 337                 return (0);
 338 
 339         if (bp == NULL) {
 340                 arc_buf_t *fpbuf = NULL;
 341 
 342                 if (!f->f_fromds)
 343                         return (0);
 344 
 345                 err = far_find_from_bp(spa, f->f_dnp, f->f_bl,
 346                     &czb, &fbp, &fpbuf);
 347                 if (err)
 348                         return (EIO);
 349                 if (fbp) {
 350                         err = far_enum_bp(spa, f, &czb, fbp, fpbuf, 0, NULL);
 351                         if (err)
 352                                 return (EIO);
 353                 }
 354                 return (0);
 355         } else if (zb->zb_level == 0) {
 356                 dnode_phys_t *tblk;
 357                 dnode_phys_t *fblk = NULL;
 358                 arc_buf_t *tbuf;
 359                 arc_buf_t *fbuf = NULL;
 360                 arc_buf_t *fpbuf = NULL;
 361                 uint32_t fflags = ARC_WAIT;
 362                 uint32_t tflags = ARC_WAIT;
 363                 int blksz = BP_GET_LSIZE(bp);
 364                 int i;
 365 
 366                 if (dsl_read(NULL, spa, bp, pbuf,
 367                     arc_getbuf_func, &tbuf, ZIO_PRIORITY_ASYNC_READ,
 368                     ZIO_FLAG_CANFAIL, &tflags, zb) != 0)
 369                         return (EIO);
 370                 tblk = tbuf->b_data;
 371 
 372                 if (f->f_fromds) {
 373                         err = far_find_from_bp(spa, f->f_dnp, f->f_bl, zb,
 374                             &fbp, &fpbuf);
 375                         if (err)
 376                                 return (EIO);
 377                 }
 378                 if (fbp) {
 379                         if (dsl_read(NULL, spa, fbp, fpbuf,
 380                             arc_getbuf_func, &fbuf, ZIO_PRIORITY_ASYNC_READ,
 381                             ZIO_FLAG_CANFAIL, &fflags, &czb) != 0) {
 382                                 (void) arc_buf_remove_ref(tbuf, &tbuf);
 383                                 return (EIO);
 384                         }
 385                         fblk = fbuf->b_data;
 386                         if (blksz != BP_GET_LSIZE(fbp))
 387                                 return (EIO);
 388                 }
 389                 for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
 390                         uint64_t dnobj = (zb->zb_blkid <<
 391                                 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
 392                         err = 0;
 393                         if (fbuf && (tblk[i].dn_type == DMU_OT_NONE) &&
 394                             fblk[i].dn_type != DMU_OT_NONE) {
 395                                 err = far_dnode_changed(spa, f, dnobj,
 396                                     fblk + i, fbuf, NULL, NULL);
 397                         } else if (fbuf) {
 398                                 if (memcmp(tblk + i, fblk + i, sizeof (*tblk)))
 399                                         err = far_dnode_changed(spa, f,
 400                                             dnobj, fblk + i, fbuf, tblk + i,
 401                                             tbuf);
 402                         } else {
 403                                 if (tblk[i].dn_type != DMU_OT_NONE)
 404                                         err = far_dnode_changed(spa, f,
 405                                             dnobj, NULL, NULL, tblk + i, tbuf);
 406                         }
 407                         if (err)
 408                                 break;
 409                 }
 410                 (void) arc_buf_remove_ref(tbuf, &tbuf);
 411                 if (fbuf)
 412                         (void) arc_buf_remove_ref(fbuf, &fbuf);
 413 
 414                 if (err)
 415                         return (EIO);
 416                 /* Don't care about the data blocks */
 417                 return (TRAVERSE_VISIT_NO_CHILDREN);
 418         }
 419         return (0);
 420 }
 421 
 422 #define DIR_FROM        1
 423 #define DIR_TO          2
 424 static int
 425 far_diff_dir(far_t *f, uint64_t dnobj, int dir, void *ctx)
 426 {
 427         zap_cursor_t zc;
 428         zap_attribute_t *za;
 429         int err;
 430         objset_t *os1;
 431         objset_t *os2;
 432         uint64_t mask =  ZFS_DIRENT_OBJ(-1ULL);
 433         uint64_t num;
 434         uint64_t ix = 0;
 435 
 436         if (dir == DIR_FROM) {
 437                 os1 = f->f_fromsnap;
 438                 os2 = f->f_tosnap;
 439         } else {
 440                 os1 = f->f_tosnap;
 441                 os2 = f->f_fromsnap;
 442         }
 443 
 444         za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 445         for (zap_cursor_init(&zc, os1, dnobj);
 446             (err = zap_cursor_retrieve(&zc, za)) == 0;
 447             zap_cursor_advance(&zc), ++ix) {
 448                 err = zap_lookup(os2, dnobj, za->za_name, sizeof (num), 1,
 449                     &num);
 450                 if (err && err != ENOENT)
 451                         break;
 452                 if (err == ENOENT) {
 453                         if (dir == DIR_FROM) {
 454                                 if (f->f_ops->far_dirent_del) {
 455                                         err = f->f_ops->far_dirent_del(ctx,
 456                                             za->za_name,
 457                                             za->za_first_integer & mask);
 458                                         if (err)
 459                                                 goto out;
 460                                 }
 461                         } else {
 462                                 if (f->f_ops->far_dirent_add) {
 463                                         err = f->f_ops->far_dirent_add(ctx,
 464                                             za->za_name,
 465                                             za->za_first_integer & mask);
 466                                         if (err)
 467                                                 goto out;
 468                                 }
 469                         }
 470                 } else if ((za->za_first_integer & mask) != (num & mask)) {
 471                         if (dir == DIR_TO) {
 472                                 /* report only once */
 473                                 if (f->f_ops->far_dirent_mod) {
 474                                         err = f->f_ops->far_dirent_mod(ctx,
 475                                             za->za_name, num & mask,
 476                                             za->za_first_integer & mask);
 477                                         if (err)
 478                                                 goto out;
 479                                 }
 480                         }
 481                 } else {
 482                         if (dir == DIR_TO) {
 483                                 /* report only once */
 484                                 if (f->f_ops->far_dirent_unmod) {
 485                                         err = f->f_ops->far_dirent_unmod(ctx,
 486                                             za->za_name, num & mask);
 487                                         if (err)
 488                                                 goto out;
 489                                 }
 490                         }
 491                 }
 492         }
 493         err = 0;
 494 out:
 495         zap_cursor_fini(&zc);
 496         kmem_free(za, sizeof (zap_attribute_t));
 497 
 498         return (err);
 499 }
 500 static int
 501 far_enum_dir(far_t *f, uint64_t dnobj, int dir, void *ctx)
 502 {
 503         zap_cursor_t zc;
 504         zap_attribute_t *za;
 505         int err;
 506         objset_t *os;
 507         uint64_t mask =  ZFS_DIRENT_OBJ(-1ULL);
 508 
 509         if (dir == DIR_FROM)
 510                 os = f->f_fromsnap;
 511         else
 512                 os = f->f_tosnap;
 513 
 514         za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 515         for (zap_cursor_init(&zc, os, dnobj);
 516             (err = zap_cursor_retrieve(&zc, za)) == 0;
 517             zap_cursor_advance(&zc)) {
 518                 if (dir == DIR_FROM) {
 519                         if (f->f_ops->far_dirent_del) {
 520                                 err = f->f_ops->far_dirent_del(ctx,
 521                                     za->za_name, za->za_first_integer & mask);
 522                                 if (err)
 523                                         break;
 524                         }
 525                 } else {
 526                         if (f->f_ops->far_dirent_add) {
 527                                 err = f->f_ops->far_dirent_add(ctx,
 528                                     za->za_name, za->za_first_integer & mask);
 529                                 if (err)
 530                                         break;
 531                         }
 532                 }
 533         }
 534         if (err == ENOENT)
 535                 err = 0;
 536 
 537         zap_cursor_fini(&zc);
 538         kmem_free(za, sizeof (zap_attribute_t));
 539 
 540         return (err);
 541 }
 542 
 543 static int
 544 far_dnode_changed(spa_t *spa, far_t *f, uint64_t dnobj,
 545     dnode_phys_t *from, arc_buf_t *frombuf, dnode_phys_t *to, arc_buf_t *tobuf)
 546 {
 547         int err = 0;
 548         int type = 0;
 549         far_info_t si;
 550 
 551         if (dnobj == f->f_shares_dir)
 552                 return (0);
 553 
 554         if (to && to->dn_type != DMU_OT_PLAIN_FILE_CONTENTS &&
 555             to->dn_type != DMU_OT_DIRECTORY_CONTENTS) {
 556                 to = NULL;
 557         }
 558         if (from && from->dn_type != DMU_OT_PLAIN_FILE_CONTENTS &&
 559             from->dn_type != DMU_OT_DIRECTORY_CONTENTS) {
 560                 from = NULL;
 561         }
 562 
 563         if (from) {
 564                 err = far_get_info(f, dnobj, FAR_OLD, &si, FI_ATTR_LINKS);
 565                 if (err)
 566                         return (err);
 567                 if (si.si_nlinks == 0)
 568                         from = NULL;
 569         }
 570         if (to) {
 571                 err = far_get_info(f, dnobj, FAR_NEW, &si, FI_ATTR_LINKS);
 572                 if (err)
 573                         return (err);
 574                 if (si.si_nlinks == 0)
 575                         to = NULL;
 576         }
 577 
 578         if (!to && !from)
 579                 return (0);
 580 
 581         if (from) {
 582                 if (from->dn_bonustype != DMU_OT_SA &&
 583                     from->dn_bonustype != DMU_OT_ZNODE)
 584                         return (EINVAL);
 585         }
 586         if (to) {
 587                 if (to->dn_bonustype != DMU_OT_SA &&
 588                     to->dn_bonustype != DMU_OT_ZNODE)
 589                         return (EINVAL);
 590         }
 591 
 592         if (from)
 593                 type = from->dn_type;
 594         else if (to)
 595                 type = to->dn_type;
 596 
 597         err = 0;
 598         if (type == DMU_OT_DIRECTORY_CONTENTS) {
 599                 if (from && to) {
 600                         if (f->f_ops->far_dir_mod)
 601                                 err = f->f_ops->far_dir_mod(f, dnobj);
 602                 } else if (from) {
 603                         if (f->f_ops->far_dir_del)
 604                                 err = f->f_ops->far_dir_del(f, dnobj);
 605                 } else if (to) {
 606                         if (f->f_ops->far_dir_add)
 607                                 err = f->f_ops->far_dir_add(f, dnobj);
 608                 }
 609         } else if (type == DMU_OT_PLAIN_FILE_CONTENTS) {
 610                 if (from && to) {
 611                         if (f->f_ops->far_file_mod)
 612                                 err = f->f_ops->far_file_mod(f, dnobj);
 613                 } else if (from) {
 614                         if (f->f_ops->far_file_del)
 615                                 err = f->f_ops->far_file_del(f, dnobj);
 616                 } else if (to) {
 617                         if (f->f_ops->far_file_add)
 618                                 err = f->f_ops->far_file_add(f, dnobj);
 619                 }
 620         } else {
 621                 /* TODO other types, symlinks? */
 622                 err = 0;
 623         }
 624         return (err);
 625 }
 626 
 627 typedef struct _far_search {
 628         far_t *zs_f;
 629         uint64_t zs_dnobj;
 630         uint64_t zs_parent;
 631         objset_t *zs_osp;
 632 } far_search_t;
 633 
 634 static int
 635 search_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
 636     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
 637 {
 638         far_search_t *zs = arg;
 639         far_t *f = zs->zs_f;
 640         arc_buf_t *buf;
 641         uint32_t flags = ARC_WAIT;
 642         int ebp;
 643         int i;
 644         int ret;
 645 
 646         if (issig(JUSTLOOKING) && issig(FORREAL))
 647                 return (EINTR);
 648 
 649         if (zb->zb_object != DMU_META_DNODE_OBJECT)
 650                 return (0);
 651 
 652         if (zb->zb_level != 0)
 653                 return (0);
 654 
 655         if (!bp || BP_IS_HOLE(bp))
 656                 return (0);
 657 
 658         if (BP_GET_TYPE(bp) != DMU_OT_DNODE)
 659                 return (0);
 660 
 661         ebp = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
 662 
 663         if (dsl_read(NULL, spa, bp, pbuf,
 664             arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ,
 665             ZIO_FLAG_CANFAIL, &flags, zb) != 0)
 666                 return (EIO);
 667         dnp = buf->b_data;
 668 
 669         for (i = 0; i < ebp; ++i) {
 670                 zap_cursor_t zc;
 671                 zap_attribute_t *za;
 672                 uint64_t mask = ZFS_DIRENT_OBJ(-1ULL);
 673                 uint64_t ix = 0;
 674                 uint64_t dnobj = (zb->zb_blkid <<
 675                         (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
 676 
 677                 if (dnp[i].dn_type != DMU_OT_DIRECTORY_CONTENTS)
 678                         continue;
 679                 if (dnobj == f->f_shares_dir)
 680                         continue;
 681 
 682                 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 683                 for (zap_cursor_init(&zc, zs->zs_osp, dnobj);
 684                     (ret = zap_cursor_retrieve(&zc, za)) == 0;
 685                     zap_cursor_advance(&zc), ++ix) {
 686                         if ((za->za_first_integer & mask) ==
 687                             (zs->zs_dnobj & mask)) {
 688                                 zs->zs_parent = dnobj;
 689                                 break;
 690                         }
 691                 }
 692                 zap_cursor_fini(&zc);
 693                 kmem_free(za, sizeof (zap_attribute_t));
 694         }
 695 
 696         (void) arc_buf_remove_ref(buf, &buf);
 697 
 698         if (zs->zs_parent)
 699                 return (EIO);   /* abort search */
 700 
 701         return (TRAVERSE_VISIT_NO_CHILDREN);
 702 }
 703 
 704 static int
 705 far_search_parent(far_t *f, uint64_t dnobj, far_which_t which,
 706     uint64_t *parent)
 707 {
 708         dsl_dataset_t *ds;
 709         far_search_t zs;
 710         int ret;
 711 
 712         if (which == FAR_OLD) {
 713                 ds = f->f_fromds;
 714                 zs.zs_osp = f->f_fromsnap;
 715         } else {
 716                 ds = f->f_tods;
 717                 zs.zs_osp = f->f_tosnap;
 718         }
 719 
 720         zs.zs_f = f;
 721         zs.zs_dnobj = dnobj;
 722         zs.zs_parent = 0;
 723         ret = traverse_dataset(ds, 0, TRAVERSE_PRE, search_cb, &zs);
 724         if (zs.zs_parent) {
 725                 *parent = zs.zs_parent;
 726                 return (0);
 727         }
 728 
 729         return (ret ? ret : ENOENT);
 730 }
 731 
 732 int
 733 far_get_info(far_t *f, uint64_t dnobj, far_which_t which,
 734     far_info_t *sp, uint64_t flags)
 735 {
 736         int ret;
 737         sa_handle_t *hdl = NULL;
 738         dmu_buf_t *db;
 739         objset_t *osp;
 740         sa_bulk_attr_t bulk[13];
 741         int count = 0;
 742         sa_attr_type_t *sa_table;
 743 
 744         if (which == FAR_OLD) {
 745                 osp = f->f_fromsnap;
 746                 if (!osp)
 747                         return (ENOENT);
 748                 sa_table = f->f_from_sa_table;
 749         } else if (which == FAR_NEW) {
 750                 osp = f->f_tosnap;
 751                 sa_table = f->f_to_sa_table;
 752         } else {
 753                 return (EINVAL);
 754         }
 755 
 756         ret = far_grab_sa_handle(osp, dnobj, &hdl, &db, FTAG);
 757         if (ret)
 758                 return (ret);
 759 
 760         if (flags & FI_ATTR_ATIME) {
 761                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_ATIME], NULL,
 762                     &sp->si_atime, sizeof (sp->si_atime));
 763         }
 764         if (flags & FI_ATTR_MTIME) {
 765                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MTIME], NULL,
 766                     &sp->si_mtime, sizeof (sp->si_mtime));
 767         }
 768         if (flags & FI_ATTR_CTIME) {
 769                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
 770                     &sp->si_ctime, sizeof (sp->si_ctime));
 771         }
 772         if (flags & FI_ATTR_OTIME) {
 773                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CRTIME], NULL,
 774                     &sp->si_otime, sizeof (sp->si_otime));
 775         }
 776         if (flags & FI_ATTR_MODE) {
 777                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
 778                     &sp->si_mode, sizeof (sp->si_mode));
 779         }
 780         if (flags & FI_ATTR_SIZE) {
 781                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_SIZE], NULL,
 782                     &sp->si_size, sizeof (sp->si_size));
 783         }
 784         if (flags & FI_ATTR_PARENT) {
 785                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
 786                     &sp->si_parent, sizeof (sp->si_parent));
 787         }
 788         if (flags & FI_ATTR_LINKS) {
 789                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
 790                     &sp->si_nlinks, sizeof (sp->si_nlinks));
 791         }
 792         if (flags & FI_ATTR_RDEV) {
 793                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_RDEV], NULL,
 794                     &sp->si_rdev, sizeof (sp->si_rdev));
 795         }
 796         if (flags & FI_ATTR_UID) {
 797                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_UID], NULL,
 798                     &sp->si_uid, sizeof (sp->si_uid));
 799         }
 800         if (flags & FI_ATTR_GID) {
 801                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GID], NULL,
 802                     &sp->si_gid, sizeof (sp->si_gid));
 803         }
 804         if (flags & FI_ATTR_GEN) {
 805                 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
 806                     &sp->si_gen, sizeof (sp->si_gen));
 807         }
 808         /* XXX if you add things, also bump the size of bulk */
 809         /* XXX XATTR */
 810 
 811         /* XXX TODO get flags to check for xattrdir */
 812         if (count) {
 813                 ret = sa_bulk_lookup(hdl, bulk, count);
 814                 if (ret)
 815                         goto out;
 816         }
 817 
 818         if ((flags & FI_ATTR_PARENT) && sp->si_parent != dnobj) {
 819                 far_info_t si;
 820                 int good = 0;
 821                 /*
 822                  * verify parent. this is very expensive and only a workaround
 823                  */
 824                 ret = far_get_info(f, sp->si_parent, which, &si, FI_ATTR_MODE);
 825                 if (ret && ret != ENOENT)
 826                         goto out;
 827                 if (ret == 0 && S_ISDIR(si.si_mode)) {
 828                         ret = far_find_entry(f, sp->si_parent, dnobj, which,
 829                             NULL);
 830                         if (ret && ret != ENOENT)
 831                                 goto out;
 832                         if (ret == 0)
 833                                 good = 1;
 834                 }
 835                 if (!good) {
 836                         uint64_t parent;
 837 
 838                         cmn_err(CE_NOTE, "parent wrong, do a brute force "
 839                             "search for ino %"PRIu64"\n", dnobj);
 840                         ret = far_search_parent(f, dnobj, which, &parent);
 841                         if (ret == ENOENT) {
 842                                 cmn_err(CE_NOTE, "no parent found\n");
 843                                 ret = EINVAL;
 844                                 goto out;
 845                         }
 846                         if (ret)
 847                                 goto out;
 848                         sp->si_parent = parent;
 849                         cmn_err(CE_NOTE, "parent found, use %"PRIu64"\n",
 850                             parent);
 851                         /*
 852                          * TODO add a bad parent cache to prevent additional
 853                          * lookup in pass 2
 854                          */
 855                 }
 856         }
 857 
 858 out:
 859         far_release_sa_handle(hdl, db, FTAG);
 860         return (ret);
 861 }
 862 
 863 int
 864 far_file_contents(far_t *f, uint64_t dnobj, void *ctx)
 865 {
 866         dnode_t *from = NULL;
 867         dnode_t *to = NULL;
 868         int err;
 869         int i;
 870         zbookmark_t czb;
 871         spa_t *spa = f->f_tods->ds_dir->dd_pool->dp_spa;
 872 
 873         if (f->f_fromds) {
 874                 err = dnode_hold(f->f_fromsnap, dnobj, FTAG, &from);
 875                 if (err && err != ENOENT)
 876                         return (err);
 877         }
 878         if (from && from->dn_type != DMU_OT_PLAIN_FILE_CONTENTS) {
 879                 dnode_rele(from, FTAG);
 880                 from = NULL;
 881         }
 882         err = dnode_hold(f->f_tosnap, dnobj, FTAG, &to);
 883         if (err)
 884                 goto out;
 885         if (to->dn_type != DMU_OT_PLAIN_FILE_CONTENTS) {
 886                 err = EINVAL;
 887                 goto out;
 888         }
 889         if (from) {
 890                 f->f_filebl = kmem_zalloc(sizeof (blklevel_t)*from->dn_nlevels,
 891                                         KM_SLEEP);
 892                 for (i = 0; i < from->dn_nlevels; ++i)
 893                         f->f_filebl[i].bl_blk = -1;
 894                 i = from->dn_nlevels - 1;
 895                 f->f_filebl[i].bl_nslots = from->dn_nblkptr;
 896                 f->f_filebl[i].bl_bp = &from->dn_phys->dn_blkptr[0];
 897                 f->f_filebl[i].bl_blk = 0;
 898                 f->f_filebl[i].bl_buf = from->dn_dbuf->db_parent->db_buf;
 899         }
 900         for (i = 0; i < to->dn_nblkptr; ++i) {
 901                 SET_BOOKMARK(&czb, f->f_tods->ds_object, dnobj,
 902                     to->dn_nlevels - 1, i);
 903                 err = far_enum_bp(spa, f, &czb, to->dn_phys->dn_blkptr + i,
 904                     NULL, f->f_fromtxg, ctx);
 905                 if (err)
 906                         goto out;
 907         }
 908 out:
 909         if (f->f_filebl) {
 910                 kmem_free(f->f_filebl, sizeof (blklevel_t) * from->dn_nlevels);
 911                 f->f_filebl = NULL;
 912         }
 913         if (from)
 914                 dnode_rele(from, FTAG);
 915         if (to)
 916                 dnode_rele(to, FTAG);
 917 
 918         return (err);
 919 }
 920 
 921 int
 922 far_dir_contents(far_t *f, uint64_t dnobj, void *ctx)
 923 {
 924         dnode_t *from = NULL;
 925         dnode_t *to = NULL;
 926         int err;
 927 
 928         if (f->f_fromds) {
 929                 err = dnode_hold(f->f_fromsnap, dnobj, FTAG, &from);
 930                 if (err && err != ENOENT)
 931                         return (err);
 932         }
 933         if (from && from->dn_type != DMU_OT_DIRECTORY_CONTENTS) {
 934                 dnode_rele(from, FTAG);
 935                 from = NULL;
 936         }
 937         err = dnode_hold(f->f_tosnap, dnobj, FTAG, &to);
 938         if (err && err != ENOENT)
 939                 return (err);
 940         if (to && to->dn_type != DMU_OT_DIRECTORY_CONTENTS) {
 941                 dnode_rele(to, FTAG);
 942                 to = NULL;
 943         }
 944 
 945         if (to && from) {
 946                 err = far_diff_dir(f, dnobj, DIR_TO, ctx);
 947                 if (err)
 948                         goto out;
 949                 err = far_diff_dir(f, dnobj, DIR_FROM, ctx);
 950         } else if (to) {
 951                 err = far_enum_dir(f, dnobj, DIR_TO, ctx);
 952         } else if (from) {
 953                 err = far_enum_dir(f, dnobj, DIR_FROM, ctx);
 954         }
 955 out:
 956         if (from)
 957                 dnode_rele(from, FTAG);
 958         if (to)
 959                 dnode_rele(to, FTAG);
 960 
 961         return (err);
 962 }
 963 
 964 int
 965 far_find_entry(far_t *f, uint64_t dirobj, uint64_t dnobj,
 966     far_which_t which, char **name)
 967 {
 968         zap_cursor_t zc;
 969         zap_attribute_t *za;
 970         int err;
 971         uint64_t mask = ZFS_DIRENT_OBJ(-1ULL);
 972         struct objset *os;
 973 
 974         if (which == FAR_OLD) {
 975                 os = f->f_fromsnap;
 976                 if (!os)
 977                         return (ENOENT);
 978         } else if (which == FAR_NEW) {
 979                 os = f->f_tosnap;
 980         } else {
 981                 return (EINVAL);
 982         }
 983 
 984         if (name)
 985                 *name = NULL;
 986         za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
 987         for (zap_cursor_init(&zc, os, dirobj);
 988             (err = zap_cursor_retrieve(&zc, za)) == 0;
 989             zap_cursor_advance(&zc)) {
 990                 if ((za->za_first_integer & mask) == (dnobj & mask)) {
 991                         if (name)
 992                                 *name = za->za_name;
 993                         break;
 994                 }
 995         }
 996         zap_cursor_fini(&zc);
 997         return (err);
 998 }
 999 
1000 void
1001 far_free_name(char *name)
1002 {
1003         zap_attribute_t *za;
1004 
1005         if (!name)
1006                 return;
1007 
1008         za = (zap_attribute_t *)(name - offsetof(zap_attribute_t, za_name));
1009         kmem_free(za, sizeof (*za));
1010 }
1011 
1012 int
1013 far_lookup_entry(far_t *f, uint64_t dirobj, char *name,
1014     far_which_t which, uint64_t *dnobj)
1015 {
1016         struct objset *osp;
1017         int ret;
1018 
1019         if (which == FAR_OLD) {
1020                 osp = f->f_fromsnap;
1021                 if (!osp)
1022                         return (ENOENT);
1023         } else if (which == FAR_NEW) {
1024                 osp = f->f_tosnap;
1025         } else {
1026                 return (EINVAL);
1027         }
1028 
1029         ret =  zap_lookup(osp, dirobj, name, sizeof (*dnobj), 1, dnobj);
1030         if (ret)
1031                 return (ret);
1032         *dnobj = ZFS_DIRENT_OBJ(*dnobj);
1033 
1034         return (0);
1035 }
1036 
1037 int
1038 far_write(far_t *f, const uint8_t *data, int len)
1039 {
1040         ssize_t resid; /* have to get resid to get detailed errno */
1041         int err;
1042 
1043         err = vn_rdwr(UIO_WRITE, f->f_vp, (caddr_t)data,
1044             len, 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
1045         *f->f_offp += len;
1046 
1047         return (err);
1048 }
1049 
1050 int
1051 far_get_uuid(far_t *f, far_which_t which, uint8_t data[16])
1052 {
1053         if (which == FAR_OLD && !f->f_fromds)
1054                 return (ENOENT);
1055 
1056         LE_OUT64(data, f->f_tods->ds_dir->dd_pool->dp_spa->spa_config_guid);
1057         if (which == FAR_OLD) {
1058                 LE_OUT64(data + 8, f->f_fromds->ds_phys->ds_guid);
1059         } else {
1060                 LE_OUT64(data + 8, f->f_tods->ds_phys->ds_guid);
1061         }
1062         return (0);
1063 }
1064 
1065 int
1066 far_get_ctransid(far_t *f, far_which_t which, uint64_t *ctransid)
1067 {
1068         if (which == FAR_OLD && !f->f_fromds)
1069                 return (ENOENT);
1070 
1071         if (which == FAR_OLD)
1072                 *ctransid = f->f_fromds->ds_phys->ds_creation_txg;
1073         else
1074                 *ctransid = f->f_tods->ds_phys->ds_creation_txg;
1075         return (0);
1076 }
1077 
1078 int
1079 far_get_snapname(far_t *f, far_which_t which,
1080     char **name, int *len)
1081 {
1082         dsl_dataset_t *ds;
1083 
1084         if (which == FAR_OLD && !f->f_fromds)
1085                 return (ENOENT);
1086 
1087         if (which == FAR_OLD)
1088                 ds = f->f_fromds;
1089         else
1090                 ds = f->f_tods;
1091 
1092         *len = dsl_dataset_namelen(ds) + 1;
1093         *name = kmem_alloc(*len, KM_SLEEP);
1094         dsl_dataset_name(ds, *name);
1095         return (0);
1096 }
1097 
1098 int
1099 far_read_symlink(far_t *f, uint64_t dnobj, far_which_t which,
1100     char **target, int *plen)
1101 {
1102         int err;
1103         int ret;
1104         sa_handle_t *hdl = NULL;
1105         dmu_buf_t *db;
1106         objset_t *osp;
1107         dmu_object_info_t doi;
1108         sa_attr_type_t *sa_table;
1109 
1110         if (which == FAR_OLD) {
1111                 osp = f->f_fromsnap;
1112                 if (!osp)
1113                         return (EINVAL);
1114                 sa_table = f->f_from_sa_table;
1115         } else if (which == FAR_NEW) {
1116                 osp = f->f_tosnap;
1117                 sa_table = f->f_to_sa_table;
1118         } else {
1119                 return (EINVAL);
1120         }
1121 
1122         err = far_grab_sa_handle(osp, dnobj, &hdl, &db, FTAG);
1123         if (err)
1124                 return (err);
1125 
1126         dmu_object_info_from_db(db, &doi);
1127         if (doi.doi_bonus_type == DMU_OT_SA) {
1128                 int len;
1129 
1130                 ret = sa_size(hdl, sa_table[ZPL_SYMLINK], &len);
1131                 if (ret)
1132                         goto out;
1133                 *target = kmem_alloc(len + 1, KM_SLEEP);
1134                 *plen = len;
1135                 (*target)[len] = 0;
1136                 ret = sa_lookup(hdl, sa_table[ZPL_SYMLINK], *target, len + 1);
1137                 if (ret)
1138                         kmem_free(*target, len + 1);
1139         } else {
1140                 /*
1141                  * TODO read target from file data, the old way
1142                  * see zfs_readlink
1143                  */
1144                 ret = EINVAL;
1145         }
1146 
1147 out:
1148         far_release_sa_handle(hdl, db, FTAG);
1149 
1150         return (ret);
1151 }
1152 
1153 int
1154 far_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
1155     offset_t *off)
1156 {
1157         dsl_dataset_t *ds;
1158         dsl_dataset_t *fromds = NULL;
1159         int err = 0;
1160         far_t *f;
1161         arc_buf_t *buf = NULL;
1162         uint32_t flags;
1163         objset_phys_t *osp = NULL;
1164         int i;
1165         zbookmark_t zb;
1166 
1167         memset(&f, 0, sizeof (f));
1168         ds = tosnap->os_dsl_dataset;
1169         if (fromsnap)
1170                 fromds = fromsnap->os_dsl_dataset;
1171 
1172         /* make certain we are looking at snapshots */
1173         if (!dsl_dataset_is_snapshot(ds) ||
1174             (fromds && !dsl_dataset_is_snapshot(fromds)))
1175                 return (EINVAL);
1176 
1177         /* fromsnap must be earlier and from the same lineage as tosnap */
1178         if (fromds) {
1179                 if (fromds->ds_phys->ds_creation_txg >=
1180                     ds->ds_phys->ds_creation_txg)
1181                         return (EXDEV);
1182 
1183                 if (fromds->ds_dir != ds->ds_dir)
1184                         return (EXDEV);
1185 
1186                 /*
1187                  * read root dnode from from-dataset
1188                  */
1189                 flags = ARC_WAIT;
1190                 SET_BOOKMARK(&zb, fromds->ds_object, ZB_ROOT_OBJECT,
1191                     ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1192                 err = dsl_read_nolock(NULL, fromds->ds_dir->dd_pool->dp_spa,
1193                     &fromds->ds_phys->ds_bp, arc_getbuf_func, &buf,
1194                     ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, &zb);
1195                 if (err)
1196                         return (err);
1197                 osp = buf->b_data;
1198         }
1199         f = kmem_zalloc(sizeof (far_t), KM_SLEEP);
1200         f->f_vp = vp;
1201         f->f_offp = off;
1202         f->f_err = 0;
1203         f->f_fromds = fromds;
1204         f->f_tods = ds;
1205         f->f_fromsnap = fromsnap;
1206         f->f_tosnap = tosnap;
1207         f->f_dmu_sendarg.dsa_off = off;
1208         f->f_dmu_sendarg.dsa_outfd = outfd;
1209         f->f_dmu_sendarg.dsa_proc = curproc;
1210         f->f_dnp = osp ? &osp->os_meta_dnode : NULL;
1211         mutex_enter(&ds->ds_sendstream_lock);
1212         list_insert_head(&ds->ds_sendstreams, &f->f_dmu_sendarg);
1213         mutex_exit(&ds->ds_sendstream_lock);
1214 
1215         if (fromds) {
1216                 f->f_fromtxg = fromds->ds_phys->ds_creation_txg;
1217                 f->f_bl = kmem_zalloc(sizeof (blklevel_t) *
1218                     f->f_dnp->dn_nlevels, KM_SLEEP);
1219                 for (i = 0; i < f->f_dnp->dn_nlevels; ++i)
1220                         f->f_bl[i].bl_blk = -1;
1221                 i = f->f_dnp->dn_nlevels - 1;
1222                 f->f_bl[i].bl_nslots = f->f_dnp->dn_nblkptr;
1223                 f->f_bl[i].bl_bp = &f->f_dnp->dn_blkptr[0];
1224                 f->f_bl[i].bl_blk = 0;
1225 
1226                 err = far_sa_setup(fromsnap, &f->f_from_sa_table);
1227                 if (err)
1228                         goto out;
1229         }
1230         err = far_sa_setup(tosnap, &f->f_to_sa_table);
1231         if (err)
1232                 goto out;
1233 
1234         err = zap_lookup(tosnap, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
1235             &f->f_shares_dir);
1236         if (err && err != ENOENT)
1237                 goto out;
1238 
1239         err = far_start(f, &f->f_ops);
1240         if (err)
1241                 goto out;
1242 
1243 
1244 
1245         err = traverse_dataset(ds, f->f_fromtxg,
1246             TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, far_cb, f);
1247         if (err) {
1248                 far_abort(f);
1249                 goto out;
1250         }
1251         err = far_start2(f, &f->f_ops);
1252         if (err) {
1253                 goto out;
1254         }
1255         err = traverse_dataset(ds, f->f_fromtxg,
1256             TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, far_cb, f);
1257         if (err) {
1258                 far_abort(f);
1259                 goto out;
1260         }
1261 
1262         err = far_end(f);
1263         if (err)
1264                 goto out;
1265 
1266 out:
1267         if (fromds) {
1268                 for (i = 0; i < f->f_dnp->dn_nlevels - 1; ++i) {
1269                         blklevel_t *b = f->f_bl + i;
1270                         if (b->bl_buf)
1271                                 arc_buf_remove_ref(b->bl_buf, &b->bl_buf);
1272                 }
1273                 kmem_free(f->f_bl, sizeof (blklevel_t) * f->f_dnp->dn_nlevels);
1274         }
1275 
1276         if (buf)
1277                 arc_buf_remove_ref(buf, &buf);
1278 
1279         mutex_enter(&ds->ds_sendstream_lock);
1280         list_remove(&ds->ds_sendstreams, &f->f_dmu_sendarg);
1281         mutex_exit(&ds->ds_sendstream_lock);
1282         kmem_free(f, sizeof (far_t));
1283 
1284         return (err);
1285 }