1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2012 STRATO AG. All rights reserved. 23 */ 24 #include <sys/zfs_context.h> 25 #include <sys/stat.h> 26 #include <sys/errno.h> 27 #include <sys/mkdev.h> 28 #include <sys/debug.h> 29 #include <sys/open.h> 30 #include <sys/zfs_ioctl.h> 31 #include <zfs_namecheck.h> 32 #include <sys/policy.h> 33 #include <sys/dmu_objset.h> 34 #include <sys/dsl_prop.h> 35 #include <sys/zvol.h> 36 #include <sys/zap.h> 37 #include <sys/dsl_dataset.h> 38 #include <sys/dmu_traverse.h> 39 #include <sys/dsl_dir.h> 40 #include <sys/arc.h> 41 #include <sys/spa.h> 42 #include <sys/spa_impl.h> 43 #include <sys/sa.h> 44 #include <sys/sa_impl.h> 45 #include <sys/zfs_acl.h> 46 #include <sys/zfs_sa.h> 47 #include <sys/zfs_znode.h> 48 #include <sys/dbuf.h> 49 #include <sys/fits.h> 50 #include <sys/fits_impl.h> 51 52 /* 53 * fits_send generates a stream of filesystem data analogous to dmu_send. 54 * The main difference is that the fits-stream does not contain zfs-specific 55 * data and can be replayed on any filesystem. It just contains commands like 56 * MKDIR, CHMOD, RENAME etc. 57 * The stream is generated in two passes. The first pass, PASS_LINK basically 58 * creates all new files/directories and links, while the second pass, 59 * PASS_UNLINK, does all the removal of old stuff. 60 * Each pass enumerates all objects in inode order. 61 * There are some corner cases: 62 * Files / directories can only be created if the parent already exists or 63 * already has been created. If an object is encountered which parent does not 64 * satisfy this condition, it is put back and its creation will be trigger 65 * by the creation of the parent. 66 * A similar case applies on deletion. A directory can only be removed after 67 * the last contained object has been removed. If a directory is not empty, 68 * it is put back and the deletion of the last object in it triggers the 69 * deletion. 70 * If an objects gets deleted, and a new object is created under the same 71 * name, pass1 cannot create the object directly. So it is created under a 72 * temporary name and gets renamed in pass2. 73 * If an object is deleted and a new object (of possibly diffent type) 74 * created under the same inode and the same name, this change cannot be 75 * detected by enumerating the containing directory (as name + inode are 76 * unchanged). It is detected by a change of the inode generation number and 77 * a flag is set for pass2. Creation is postponed. In pass2, all enumerated 78 * directories are checked for this inode (although the entry is unchanged, 79 * the directory has a bumped txg). If it is encountered, delete + create 80 * happen both in pass2. 81 * 82 * There are lots of TODOs left: 83 * - add XATTR support 84 * - add path-caching 85 * - add a cache for brute-force parent search 86 * - add a cache for inode-search in a directory 87 * - use a hash instead of the linear list in fits_count 88 */ 89 static int 90 fits_dnode_changed(spa_t *spa, fits_t *f, uint64_t dnobj, 91 dnode_phys_t *from, arc_buf_t *frombuf, dnode_phys_t *to, arc_buf_t *tobuf); 92 93 /* copied from zfs_znode.c */ 94 static int 95 fits_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) 96 { 97 uint64_t sa_obj = 0; 98 int error; 99 100 error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); 101 if (error != 0 && error != ENOENT) 102 return (error); 103 104 error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); 105 return (error); 106 } 107 108 static int 109 fits_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, 110 dmu_buf_t **db, void *tag) 111 { 112 dmu_object_info_t doi; 113 int error; 114 115 if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) 116 return (error); 117 118 dmu_object_info_from_db(*db, &doi); 119 if ((doi.doi_bonus_type != DMU_OT_SA && 120 doi.doi_bonus_type != DMU_OT_ZNODE) || 121 (doi.doi_bonus_type == DMU_OT_ZNODE && 122 doi.doi_bonus_size < sizeof (znode_phys_t))) { 123 sa_buf_rele(*db, tag); 124 return (ENOTSUP); 125 } 126 127 error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); 128 if (error != 0) { 129 sa_buf_rele(*db, tag); 130 return (error); 131 } 132 133 return (0); 134 } 135 136 static void 137 fits_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) 138 { 139 sa_handle_destroy(hdl); 140 sa_buf_rele(db, tag); 141 } 142 143 static int 144 fits_find_from_bp(spa_t *spa, dnode_phys_t *dnp, blklevel_t *bl, 145 const zbookmark_t *zb, blkptr_t **bpp, arc_buf_t **pbuf) 146 { 147 uint32_t flags; 148 int epbs = dnp->dn_indblkshift - SPA_BLKPTRSHIFT; 149 int epbmask = (1 << epbs) - 1; 150 int level; 151 int slot; 152 uint64_t blkid; 153 uint64_t blk; 154 blklevel_t *blp; 155 zbookmark_t czb; 156 int i; 157 158 *bpp = NULL; 159 for (level = dnp->dn_nlevels - 1; level >= zb->zb_level; --level) { 160 blkid = zb->zb_blkid >> (epbs * (level - zb->zb_level)); 161 blk = blkid >> epbs; 162 slot = blk & epbmask; 163 blp = bl + level; 164 165 if (blp->bl_blk == blk) 166 continue; 167 168 for (i = 0; i <= level; ++i) { 169 blklevel_t *b = bl + i; 170 171 if (b->bl_buf) 172 arc_buf_remove_ref(b->bl_buf, &b->bl_buf); 173 b->bl_bp = NULL; 174 b->bl_buf = NULL; 175 b->bl_blk = -1; 176 } 177 ASSERT(slot < blp[1].bl_nslots); 178 if (BP_IS_HOLE(blp[1].bl_bp + slot)) { 179 *bpp = NULL; 180 return (0); 181 } 182 /* 183 * load indblk 184 */ 185 flags = ARC_WAIT; 186 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, level, blkid); 187 if (dsl_read(NULL, spa, blp[1].bl_bp + slot, blp[1].bl_buf, 188 arc_getbuf_func, &blp->bl_buf, ZIO_PRIORITY_ASYNC_READ, 189 ZIO_FLAG_CANFAIL, &flags, &czb) != 0) 190 return (EIO); 191 blp->bl_bp = blp->bl_buf->b_data; 192 blp->bl_nslots = 1 << epbs; 193 blp->bl_blk = blk; 194 } 195 slot = zb->zb_blkid & epbmask; 196 blp = bl + zb->zb_level; 197 ASSERT(slot < blp->bl_nslots); 198 *bpp = blp->bl_bp + slot; 199 *pbuf = blp->bl_buf; 200 if (BP_IS_HOLE(*bpp)) 201 *bpp = NULL; 202 203 return (0); 204 } 205 206 static int 207 fits_file_cb(spa_t *spa, fits_t *f, zbookmark_t *zb, 208 blkptr_t *bp, arc_buf_t *pbuf, void *ctx) 209 { 210 int err = 0; 211 blkptr_t *fbp; 212 213 if (issig(JUSTLOOKING) && issig(FORREAL)) 214 return (EINTR); 215 216 if (f->f_fromds && zb->zb_objset == f->f_fromds->ds_object) 217 return (0); 218 219 if (bp == NULL) { 220 arc_buf_t *fpbuf = NULL; 221 zbookmark_t czb; 222 223 ASSERT(f->f_fromds); 224 SET_BOOKMARK(&czb, f->f_fromds->ds_object, zb->zb_object, 225 zb->zb_level, zb->zb_blkid); 226 err = fits_find_from_bp(spa, f->f_dnp, f->f_filebl, 227 &czb, &fbp, &fpbuf); 228 if (err) 229 return (err); 230 if (fbp) { 231 #if 0 232 /* XXX TODO callback for newly created hole */ 233 err = fits_enum_bp(spa, da, &czb, fbp, fpbuf); 234 if (err) 235 return (err); 236 #endif 237 } 238 } else if (zb->zb_level == 0) { 239 arc_buf_t *tbuf; 240 uint32_t tflags = ARC_WAIT; 241 int blksz = BP_GET_LSIZE(bp); 242 243 if (dsl_read(NULL, spa, bp, pbuf, 244 arc_getbuf_func, &tbuf, ZIO_PRIORITY_ASYNC_READ, 245 ZIO_FLAG_CANFAIL, &tflags, zb) != 0) 246 return (EIO); 247 248 if (f->f_ops->fits_file_data) 249 err = f->f_ops->fits_file_data(ctx, tbuf->b_data, 250 zb->zb_blkid * blksz, blksz); 251 252 (void) arc_buf_remove_ref(tbuf, &tbuf); 253 } 254 return (err); 255 } 256 257 static int 258 fits_enum_bp(spa_t *spa, fits_t *da, zbookmark_t *zb, 259 blkptr_t *bp, arc_buf_t *pbuf, uint64_t min_txg, void *ctx) 260 { 261 int err = 0; 262 arc_buf_t *buf = NULL; 263 uint32_t flags = ARC_WAIT; 264 265 if (BP_IS_HOLE(bp)) 266 return (0); 267 268 if (bp->blk_birth <= min_txg) 269 return (0); 270 271 if (BP_GET_LEVEL(bp) > 0) { 272 int i; 273 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT; 274 blkptr_t *cbp; 275 zbookmark_t czb; 276 277 if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &buf, 278 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb) != 0) 279 return (EIO); 280 cbp = buf->b_data; 281 for (i = 0; i < epb; ++i, ++cbp) { 282 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, 283 zb->zb_level - 1, zb->zb_blkid * epb + i); 284 err = fits_enum_bp(spa, da, &czb, cbp, buf, min_txg, 285 ctx); 286 if (err) 287 goto out; 288 } 289 } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) { 290 int i; 291 int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT; 292 dnode_phys_t *dnp; 293 294 if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &buf, 295 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, 296 &flags, zb) != 0) { 297 err = EIO; 298 goto out; 299 } 300 dnp = buf->b_data; 301 for (i = 0; i < epb; ++i, ++dnp) { 302 uint64_t dnobj = zb->zb_blkid * epb + i; 303 if (dnp->dn_type == DMU_OT_NONE) 304 continue; 305 err = fits_dnode_changed(spa, da, dnobj, dnp, buf, 306 NULL, NULL); 307 if (err) 308 goto out; 309 } 310 } else { 311 err = fits_file_cb(spa, da, zb, bp, pbuf, ctx); 312 } 313 out: 314 if (buf) 315 (void) arc_buf_remove_ref(buf, &buf); 316 317 return (err); 318 } 319 320 static int 321 fits_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, 322 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 323 { 324 int err = 0; 325 fits_t *f = arg; 326 blkptr_t *fbp = NULL; 327 zbookmark_t czb; 328 329 if (issig(JUSTLOOKING) && issig(FORREAL)) 330 return (EINTR); 331 332 if (f->f_fromds) 333 SET_BOOKMARK(&czb, f->f_fromds->ds_object, zb->zb_object, 334 zb->zb_level, zb->zb_blkid); 335 336 if (zb->zb_object != DMU_META_DNODE_OBJECT) 337 return (0); 338 339 if (bp == NULL) { 340 arc_buf_t *fpbuf = NULL; 341 342 if (!f->f_fromds) 343 return (0); 344 345 err = fits_find_from_bp(spa, f->f_dnp, f->f_bl, 346 &czb, &fbp, &fpbuf); 347 if (err) 348 return (EIO); 349 if (fbp) { 350 err = fits_enum_bp(spa, f, &czb, fbp, fpbuf, 0, NULL); 351 if (err) 352 return (EIO); 353 } 354 return (0); 355 } else if (zb->zb_level == 0) { 356 dnode_phys_t *tblk; 357 dnode_phys_t *fblk = NULL; 358 arc_buf_t *tbuf; 359 arc_buf_t *fbuf = NULL; 360 arc_buf_t *fpbuf = NULL; 361 uint32_t fflags = ARC_WAIT; 362 uint32_t tflags = ARC_WAIT; 363 int blksz = BP_GET_LSIZE(bp); 364 int i; 365 366 if (dsl_read(NULL, spa, bp, pbuf, 367 arc_getbuf_func, &tbuf, ZIO_PRIORITY_ASYNC_READ, 368 ZIO_FLAG_CANFAIL, &tflags, zb) != 0) 369 return (EIO); 370 tblk = tbuf->b_data; 371 372 if (f->f_fromds) { 373 err = fits_find_from_bp(spa, f->f_dnp, f->f_bl, zb, 374 &fbp, &fpbuf); 375 if (err) 376 return (EIO); 377 } 378 if (fbp) { 379 if (dsl_read(NULL, spa, fbp, fpbuf, 380 arc_getbuf_func, &fbuf, ZIO_PRIORITY_ASYNC_READ, 381 ZIO_FLAG_CANFAIL, &fflags, &czb) != 0) { 382 (void) arc_buf_remove_ref(tbuf, &tbuf); 383 return (EIO); 384 } 385 fblk = fbuf->b_data; 386 if (blksz != BP_GET_LSIZE(fbp)) 387 return (EIO); 388 } 389 for (i = 0; i < blksz >> DNODE_SHIFT; i++) { 390 uint64_t dnobj = (zb->zb_blkid << 391 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 392 err = 0; 393 if (fbuf && (tblk[i].dn_type == DMU_OT_NONE) && 394 fblk[i].dn_type != DMU_OT_NONE) { 395 err = fits_dnode_changed(spa, f, dnobj, 396 fblk + i, fbuf, NULL, NULL); 397 } else if (fbuf) { 398 if (memcmp(tblk + i, fblk + i, sizeof (*tblk))) 399 err = fits_dnode_changed(spa, f, 400 dnobj, fblk + i, fbuf, tblk + i, 401 tbuf); 402 } else { 403 if (tblk[i].dn_type != DMU_OT_NONE) 404 err = fits_dnode_changed(spa, f, 405 dnobj, NULL, NULL, tblk + i, tbuf); 406 } 407 if (err) 408 break; 409 } 410 (void) arc_buf_remove_ref(tbuf, &tbuf); 411 if (fbuf) 412 (void) arc_buf_remove_ref(fbuf, &fbuf); 413 414 if (err) 415 return (EIO); 416 /* Don't care about the data blocks */ 417 return (TRAVERSE_VISIT_NO_CHILDREN); 418 } 419 return (0); 420 } 421 422 #define DIR_FROM 1 423 #define DIR_TO 2 424 static int 425 fits_diff_dir(fits_t *f, uint64_t dnobj, int dir, void *ctx) 426 { 427 zap_cursor_t zc; 428 zap_attribute_t *za; 429 int err; 430 objset_t *os1; 431 objset_t *os2; 432 uint64_t mask = ZFS_DIRENT_OBJ(-1ULL); 433 uint64_t num; 434 uint64_t ix = 0; 435 436 if (dir == DIR_FROM) { 437 os1 = f->f_fromsnap; 438 os2 = f->f_tosnap; 439 } else { 440 os1 = f->f_tosnap; 441 os2 = f->f_fromsnap; 442 } 443 444 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 445 for (zap_cursor_init(&zc, os1, dnobj); 446 (err = zap_cursor_retrieve(&zc, za)) == 0; 447 zap_cursor_advance(&zc), ++ix) { 448 err = zap_lookup(os2, dnobj, za->za_name, sizeof (num), 1, 449 &num); 450 if (err && err != ENOENT) 451 break; 452 if (err == ENOENT) { 453 if (dir == DIR_FROM) { 454 if (f->f_ops->fits_dirent_del) { 455 err = f->f_ops->fits_dirent_del(ctx, 456 za->za_name, 457 za->za_first_integer & mask); 458 if (err) 459 goto out; 460 } 461 } else { 462 if (f->f_ops->fits_dirent_add) { 463 err = f->f_ops->fits_dirent_add(ctx, 464 za->za_name, 465 za->za_first_integer & mask); 466 if (err) 467 goto out; 468 } 469 } 470 } else if ((za->za_first_integer & mask) != (num & mask)) { 471 if (dir == DIR_TO) { 472 /* report only once */ 473 if (f->f_ops->fits_dirent_mod) { 474 err = f->f_ops->fits_dirent_mod(ctx, 475 za->za_name, num & mask, 476 za->za_first_integer & mask); 477 if (err) 478 goto out; 479 } 480 } 481 } else { 482 if (dir == DIR_TO) { 483 /* report only once */ 484 if (f->f_ops->fits_dirent_unmod) { 485 err = f->f_ops->fits_dirent_unmod(ctx, 486 za->za_name, num & mask); 487 if (err) 488 goto out; 489 } 490 } 491 } 492 } 493 err = 0; 494 out: 495 zap_cursor_fini(&zc); 496 kmem_free(za, sizeof (zap_attribute_t)); 497 498 return (err); 499 } 500 static int 501 fits_enum_dir(fits_t *f, uint64_t dnobj, int dir, void *ctx) 502 { 503 zap_cursor_t zc; 504 zap_attribute_t *za; 505 int err; 506 objset_t *os; 507 uint64_t mask = ZFS_DIRENT_OBJ(-1ULL); 508 509 if (dir == DIR_FROM) 510 os = f->f_fromsnap; 511 else 512 os = f->f_tosnap; 513 514 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 515 for (zap_cursor_init(&zc, os, dnobj); 516 (err = zap_cursor_retrieve(&zc, za)) == 0; 517 zap_cursor_advance(&zc)) { 518 if (dir == DIR_FROM) { 519 if (f->f_ops->fits_dirent_del) { 520 err = f->f_ops->fits_dirent_del(ctx, 521 za->za_name, za->za_first_integer & mask); 522 if (err) 523 break; 524 } 525 } else { 526 if (f->f_ops->fits_dirent_add) { 527 err = f->f_ops->fits_dirent_add(ctx, 528 za->za_name, za->za_first_integer & mask); 529 if (err) 530 break; 531 } 532 } 533 } 534 if (err == ENOENT) 535 err = 0; 536 537 zap_cursor_fini(&zc); 538 kmem_free(za, sizeof (zap_attribute_t)); 539 540 return (err); 541 } 542 543 static int 544 fits_dnode_changed(spa_t *spa, fits_t *f, uint64_t dnobj, 545 dnode_phys_t *from, arc_buf_t *frombuf, dnode_phys_t *to, arc_buf_t *tobuf) 546 { 547 int err = 0; 548 int type = 0; 549 fits_info_t si; 550 551 if (dnobj == f->f_shares_dir) 552 return (0); 553 554 if (to && to->dn_type != DMU_OT_PLAIN_FILE_CONTENTS && 555 to->dn_type != DMU_OT_DIRECTORY_CONTENTS) { 556 to = NULL; 557 } 558 if (from && from->dn_type != DMU_OT_PLAIN_FILE_CONTENTS && 559 from->dn_type != DMU_OT_DIRECTORY_CONTENTS) { 560 from = NULL; 561 } 562 563 if (from) { 564 err = fits_get_info(f, dnobj, FITS_OLD, &si, FI_ATTR_LINKS); 565 if (err) 566 return (err); 567 if (si.si_nlinks == 0) 568 from = NULL; 569 } 570 if (to) { 571 err = fits_get_info(f, dnobj, FITS_NEW, &si, FI_ATTR_LINKS); 572 if (err) 573 return (err); 574 if (si.si_nlinks == 0) 575 to = NULL; 576 } 577 578 if (!to && !from) 579 return (0); 580 581 if (from) { 582 if (from->dn_bonustype != DMU_OT_SA && 583 from->dn_bonustype != DMU_OT_ZNODE) 584 return (EINVAL); 585 } 586 if (to) { 587 if (to->dn_bonustype != DMU_OT_SA && 588 to->dn_bonustype != DMU_OT_ZNODE) 589 return (EINVAL); 590 } 591 592 if (from) 593 type = from->dn_type; 594 else if (to) 595 type = to->dn_type; 596 597 err = 0; 598 if (type == DMU_OT_DIRECTORY_CONTENTS) { 599 if (from && to) { 600 if (f->f_ops->fits_dir_mod) 601 err = f->f_ops->fits_dir_mod(f, dnobj); 602 } else if (from) { 603 if (f->f_ops->fits_dir_del) 604 err = f->f_ops->fits_dir_del(f, dnobj); 605 } else if (to) { 606 if (f->f_ops->fits_dir_add) 607 err = f->f_ops->fits_dir_add(f, dnobj); 608 } 609 } else if (type == DMU_OT_PLAIN_FILE_CONTENTS) { 610 if (from && to) { 611 if (f->f_ops->fits_file_mod) 612 err = f->f_ops->fits_file_mod(f, dnobj); 613 } else if (from) { 614 if (f->f_ops->fits_file_del) 615 err = f->f_ops->fits_file_del(f, dnobj); 616 } else if (to) { 617 if (f->f_ops->fits_file_add) 618 err = f->f_ops->fits_file_add(f, dnobj); 619 } 620 } else { 621 /* TODO other types, symlinks? */ 622 err = 0; 623 } 624 return (err); 625 } 626 627 typedef struct _fits_search { 628 fits_t *zs_f; 629 uint64_t zs_dnobj; 630 uint64_t zs_parent; 631 objset_t *zs_osp; 632 } fits_search_t; 633 634 static int 635 search_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf, 636 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg) 637 { 638 fits_search_t *zs = arg; 639 fits_t *f = zs->zs_f; 640 arc_buf_t *buf; 641 uint32_t flags = ARC_WAIT; 642 int ebp; 643 int i; 644 int ret; 645 646 if (issig(JUSTLOOKING) && issig(FORREAL)) 647 return (EINTR); 648 649 if (zb->zb_object != DMU_META_DNODE_OBJECT) 650 return (0); 651 652 if (zb->zb_level != 0) 653 return (0); 654 655 if (!bp || BP_IS_HOLE(bp)) 656 return (0); 657 658 if (BP_GET_TYPE(bp) != DMU_OT_DNODE) 659 return (0); 660 661 ebp = BP_GET_LSIZE(bp) >> DNODE_SHIFT; 662 663 if (dsl_read(NULL, spa, bp, pbuf, 664 arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ, 665 ZIO_FLAG_CANFAIL, &flags, zb) != 0) 666 return (EIO); 667 dnp = buf->b_data; 668 669 for (i = 0; i < ebp; ++i) { 670 zap_cursor_t zc; 671 zap_attribute_t *za; 672 uint64_t mask = ZFS_DIRENT_OBJ(-1ULL); 673 uint64_t ix = 0; 674 uint64_t dnobj = (zb->zb_blkid << 675 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i; 676 677 if (dnp[i].dn_type != DMU_OT_DIRECTORY_CONTENTS) 678 continue; 679 if (dnobj == f->f_shares_dir) 680 continue; 681 682 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 683 for (zap_cursor_init(&zc, zs->zs_osp, dnobj); 684 (ret = zap_cursor_retrieve(&zc, za)) == 0; 685 zap_cursor_advance(&zc), ++ix) { 686 if ((za->za_first_integer & mask) == 687 (zs->zs_dnobj & mask)) { 688 zs->zs_parent = dnobj; 689 break; 690 } 691 } 692 zap_cursor_fini(&zc); 693 kmem_free(za, sizeof (zap_attribute_t)); 694 } 695 696 (void) arc_buf_remove_ref(buf, &buf); 697 698 if (zs->zs_parent) 699 return (EIO); /* abort search */ 700 701 return (TRAVERSE_VISIT_NO_CHILDREN); 702 } 703 704 static int 705 fits_search_parent(fits_t *f, uint64_t dnobj, fits_which_t which, 706 uint64_t *parent) 707 { 708 dsl_dataset_t *ds; 709 fits_search_t zs; 710 int ret; 711 712 if (which == FITS_OLD) { 713 ds = f->f_fromds; 714 zs.zs_osp = f->f_fromsnap; 715 } else { 716 ds = f->f_tods; 717 zs.zs_osp = f->f_tosnap; 718 } 719 720 zs.zs_f = f; 721 zs.zs_dnobj = dnobj; 722 zs.zs_parent = 0; 723 ret = traverse_dataset(ds, 0, TRAVERSE_PRE, search_cb, &zs); 724 if (zs.zs_parent) { 725 *parent = zs.zs_parent; 726 return (0); 727 } 728 729 return (ret ? ret : ENOENT); 730 } 731 732 int 733 fits_get_info(fits_t *f, uint64_t dnobj, fits_which_t which, 734 fits_info_t *sp, uint64_t flags) 735 { 736 int ret; 737 sa_handle_t *hdl = NULL; 738 dmu_buf_t *db; 739 objset_t *osp; 740 sa_bulk_attr_t bulk[13]; 741 int count = 0; 742 sa_attr_type_t *sa_table; 743 744 if (which == FITS_OLD) { 745 osp = f->f_fromsnap; 746 if (!osp) 747 return (ENOENT); 748 sa_table = f->f_from_sa_table; 749 } else if (which == FITS_NEW) { 750 osp = f->f_tosnap; 751 sa_table = f->f_to_sa_table; 752 } else { 753 return (EINVAL); 754 } 755 756 ret = fits_grab_sa_handle(osp, dnobj, &hdl, &db, FTAG); 757 if (ret) 758 return (ret); 759 760 if (flags & FI_ATTR_ATIME) { 761 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_ATIME], NULL, 762 &sp->si_atime, sizeof (sp->si_atime)); 763 } 764 if (flags & FI_ATTR_MTIME) { 765 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MTIME], NULL, 766 &sp->si_mtime, sizeof (sp->si_mtime)); 767 } 768 if (flags & FI_ATTR_CTIME) { 769 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, 770 &sp->si_ctime, sizeof (sp->si_ctime)); 771 } 772 if (flags & FI_ATTR_OTIME) { 773 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CRTIME], NULL, 774 &sp->si_otime, sizeof (sp->si_otime)); 775 } 776 if (flags & FI_ATTR_MODE) { 777 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, 778 &sp->si_mode, sizeof (sp->si_mode)); 779 } 780 if (flags & FI_ATTR_SIZE) { 781 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_SIZE], NULL, 782 &sp->si_size, sizeof (sp->si_size)); 783 } 784 if (flags & FI_ATTR_PARENT) { 785 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, 786 &sp->si_parent, sizeof (sp->si_parent)); 787 } 788 if (flags & FI_ATTR_LINKS) { 789 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, 790 &sp->si_nlinks, sizeof (sp->si_nlinks)); 791 } 792 if (flags & FI_ATTR_RDEV) { 793 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_RDEV], NULL, 794 &sp->si_rdev, sizeof (sp->si_rdev)); 795 } 796 if (flags & FI_ATTR_UID) { 797 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_UID], NULL, 798 &sp->si_uid, sizeof (sp->si_uid)); 799 } 800 if (flags & FI_ATTR_GID) { 801 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GID], NULL, 802 &sp->si_gid, sizeof (sp->si_gid)); 803 } 804 if (flags & FI_ATTR_GEN) { 805 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, 806 &sp->si_gen, sizeof (sp->si_gen)); 807 } 808 /* XXX if you add things, also bump the size of bulk */ 809 /* XXX XATTR */ 810 811 /* XXX TODO get flags to check for xattrdir */ 812 if (count) { 813 ret = sa_bulk_lookup(hdl, bulk, count); 814 if (ret) 815 goto out; 816 } 817 818 if ((flags & FI_ATTR_PARENT) && sp->si_parent != dnobj) { 819 fits_info_t si; 820 int good = 0; 821 /* 822 * verify parent. this is very expensive and only a workaround 823 */ 824 ret = fits_get_info(f, sp->si_parent, which, &si, FI_ATTR_MODE); 825 if (ret && ret != ENOENT) 826 goto out; 827 if (ret == 0 && S_ISDIR(si.si_mode)) { 828 ret = fits_find_entry(f, sp->si_parent, dnobj, which, 829 NULL); 830 if (ret && ret != ENOENT) 831 goto out; 832 if (ret == 0) 833 good = 1; 834 } 835 if (!good) { 836 uint64_t parent; 837 838 cmn_err(CE_NOTE, "parent wrong, do a brute force " 839 "search for ino %"PRIu64"\n", dnobj); 840 ret = fits_search_parent(f, dnobj, which, &parent); 841 if (ret == ENOENT) { 842 cmn_err(CE_NOTE, "no parent found\n"); 843 ret = EINVAL; 844 goto out; 845 } 846 if (ret) 847 goto out; 848 sp->si_parent = parent; 849 cmn_err(CE_NOTE, "parent found, use %"PRIu64"\n", 850 parent); 851 /* 852 * TODO add a bad parent cache to prevent additional 853 * lookup in pass 2 854 */ 855 } 856 } 857 858 out: 859 fits_release_sa_handle(hdl, db, FTAG); 860 return (ret); 861 } 862 863 int 864 fits_file_contents(fits_t *f, uint64_t dnobj, void *ctx) 865 { 866 dnode_t *from = NULL; 867 dnode_t *to = NULL; 868 int err; 869 int i; 870 zbookmark_t czb; 871 spa_t *spa = f->f_tods->ds_dir->dd_pool->dp_spa; 872 873 if (f->f_fromds) { 874 err = dnode_hold(f->f_fromsnap, dnobj, FTAG, &from); 875 if (err && err != ENOENT) 876 return (err); 877 } 878 if (from && from->dn_type != DMU_OT_PLAIN_FILE_CONTENTS) { 879 dnode_rele(from, FTAG); 880 from = NULL; 881 } 882 err = dnode_hold(f->f_tosnap, dnobj, FTAG, &to); 883 if (err) 884 goto out; 885 if (to->dn_type != DMU_OT_PLAIN_FILE_CONTENTS) { 886 err = EINVAL; 887 goto out; 888 } 889 if (from) { 890 f->f_filebl = kmem_zalloc(sizeof (blklevel_t)*from->dn_nlevels, 891 KM_SLEEP); 892 for (i = 0; i < from->dn_nlevels; ++i) 893 f->f_filebl[i].bl_blk = -1; 894 i = from->dn_nlevels - 1; 895 f->f_filebl[i].bl_nslots = from->dn_nblkptr; 896 f->f_filebl[i].bl_bp = &from->dn_phys->dn_blkptr[0]; 897 f->f_filebl[i].bl_blk = 0; 898 f->f_filebl[i].bl_buf = from->dn_dbuf->db_parent->db_buf; 899 } 900 for (i = 0; i < to->dn_nblkptr; ++i) { 901 SET_BOOKMARK(&czb, f->f_tods->ds_object, dnobj, 902 to->dn_nlevels - 1, i); 903 err = fits_enum_bp(spa, f, &czb, to->dn_phys->dn_blkptr + i, 904 NULL, f->f_fromtxg, ctx); 905 if (err) 906 goto out; 907 } 908 out: 909 if (f->f_filebl) { 910 kmem_free(f->f_filebl, sizeof (blklevel_t) * from->dn_nlevels); 911 f->f_filebl = NULL; 912 } 913 if (from) 914 dnode_rele(from, FTAG); 915 if (to) 916 dnode_rele(to, FTAG); 917 918 return (err); 919 } 920 921 int 922 fits_dir_contents(fits_t *f, uint64_t dnobj, void *ctx) 923 { 924 dnode_t *from = NULL; 925 dnode_t *to = NULL; 926 int err; 927 928 if (f->f_fromds) { 929 err = dnode_hold(f->f_fromsnap, dnobj, FTAG, &from); 930 if (err && err != ENOENT) 931 return (err); 932 } 933 if (from && from->dn_type != DMU_OT_DIRECTORY_CONTENTS) { 934 dnode_rele(from, FTAG); 935 from = NULL; 936 } 937 err = dnode_hold(f->f_tosnap, dnobj, FTAG, &to); 938 if (err && err != ENOENT) 939 return (err); 940 if (to && to->dn_type != DMU_OT_DIRECTORY_CONTENTS) { 941 dnode_rele(to, FTAG); 942 to = NULL; 943 } 944 945 if (to && from) { 946 err = fits_diff_dir(f, dnobj, DIR_TO, ctx); 947 if (err) 948 goto out; 949 err = fits_diff_dir(f, dnobj, DIR_FROM, ctx); 950 } else if (to) { 951 err = fits_enum_dir(f, dnobj, DIR_TO, ctx); 952 } else if (from) { 953 err = fits_enum_dir(f, dnobj, DIR_FROM, ctx); 954 } 955 out: 956 if (from) 957 dnode_rele(from, FTAG); 958 if (to) 959 dnode_rele(to, FTAG); 960 961 return (err); 962 } 963 964 int 965 fits_find_entry(fits_t *f, uint64_t dirobj, uint64_t dnobj, 966 fits_which_t which, char **name) 967 { 968 zap_cursor_t zc; 969 zap_attribute_t *za; 970 int err; 971 uint64_t mask = ZFS_DIRENT_OBJ(-1ULL); 972 struct objset *os; 973 974 if (which == FITS_OLD) { 975 os = f->f_fromsnap; 976 if (!os) 977 return (ENOENT); 978 } else if (which == FITS_NEW) { 979 os = f->f_tosnap; 980 } else { 981 return (EINVAL); 982 } 983 984 985 if (name) 986 *name = NULL; 987 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP); 988 for (zap_cursor_init(&zc, os, dirobj); 989 (err = zap_cursor_retrieve(&zc, za)) == 0; 990 zap_cursor_advance(&zc)) { 991 if ((za->za_first_integer & mask) == (dnobj & mask)) { 992 if (name) 993 *name = za->za_name; 994 break; 995 } 996 } 997 zap_cursor_fini(&zc); 998 return (err); 999 } 1000 1001 void 1002 fits_free_name(char *name) 1003 { 1004 zap_attribute_t *za; 1005 1006 if (!name) 1007 return; 1008 1009 za = (zap_attribute_t *)(name - offsetof(zap_attribute_t, za_name)); 1010 kmem_free(za, sizeof (*za)); 1011 } 1012 1013 int 1014 fits_lookup_entry(fits_t *f, uint64_t dirobj, char *name, 1015 fits_which_t which, uint64_t *dnobj) 1016 { 1017 struct objset *osp; 1018 int ret; 1019 1020 if (which == FITS_OLD) { 1021 osp = f->f_fromsnap; 1022 if (!osp) 1023 return (ENOENT); 1024 } else if (which == FITS_NEW) { 1025 osp = f->f_tosnap; 1026 } else { 1027 return (EINVAL); 1028 } 1029 1030 ret = zap_lookup(osp, dirobj, name, sizeof (*dnobj), 1, dnobj); 1031 if (ret) 1032 return (ret); 1033 *dnobj = ZFS_DIRENT_OBJ(*dnobj); 1034 1035 return (0); 1036 } 1037 1038 int 1039 fits_write(fits_t *f, const uint8_t *data, int len) 1040 { 1041 ssize_t resid; /* have to get resid to get detailed errno */ 1042 int err; 1043 1044 err = vn_rdwr(UIO_WRITE, f->f_vp, (caddr_t)data, 1045 len, 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid); 1046 *f->f_offp += len; 1047 1048 return (err); 1049 } 1050 1051 int 1052 fits_get_uuid(fits_t *f, fits_which_t which, uint8_t data[16]) 1053 { 1054 if (which == FITS_OLD && !f->f_fromds) 1055 return (ENOENT); 1056 1057 LE_OUT64(data, f->f_tods->ds_dir->dd_pool->dp_spa->spa_config_guid); 1058 if (which == FITS_OLD) { 1059 LE_OUT64(data + 8, f->f_fromds->ds_phys->ds_guid); 1060 } else { 1061 LE_OUT64(data + 8, f->f_tods->ds_phys->ds_guid); 1062 } 1063 return (0); 1064 } 1065 1066 int 1067 fits_get_ctransid(fits_t *f, fits_which_t which, uint64_t *ctransid) 1068 { 1069 if (which == FITS_OLD && !f->f_fromds) 1070 return (ENOENT); 1071 1072 if (which == FITS_OLD) 1073 *ctransid = f->f_fromds->ds_phys->ds_creation_txg; 1074 else 1075 *ctransid = f->f_tods->ds_phys->ds_creation_txg; 1076 return (0); 1077 } 1078 1079 int 1080 fits_get_snapname(fits_t *f, fits_which_t which, 1081 char **name, int *len) 1082 { 1083 dsl_dataset_t *ds; 1084 1085 if (which == FITS_OLD && !f->f_fromds) 1086 return (ENOENT); 1087 1088 if (which == FITS_OLD) 1089 ds = f->f_fromds; 1090 else 1091 ds = f->f_tods; 1092 1093 *len = dsl_dataset_namelen(ds) + 1; 1094 *name = kmem_alloc(*len, KM_SLEEP); 1095 dsl_dataset_name(ds, *name); 1096 return (0); 1097 } 1098 1099 int 1100 fits_read_symlink(fits_t *f, uint64_t dnobj, fits_which_t which, 1101 char **target, int *plen) 1102 { 1103 int err; 1104 int ret; 1105 sa_handle_t *hdl = NULL; 1106 dmu_buf_t *db; 1107 objset_t *osp; 1108 dmu_object_info_t doi; 1109 sa_attr_type_t *sa_table; 1110 1111 if (which == FITS_OLD) { 1112 osp = f->f_fromsnap; 1113 if (!osp) 1114 return (EINVAL); 1115 sa_table = f->f_from_sa_table; 1116 } else if (which == FITS_NEW) { 1117 osp = f->f_tosnap; 1118 sa_table = f->f_to_sa_table; 1119 } else { 1120 return (EINVAL); 1121 } 1122 1123 err = fits_grab_sa_handle(osp, dnobj, &hdl, &db, FTAG); 1124 if (err) 1125 return (err); 1126 1127 dmu_object_info_from_db(db, &doi); 1128 if (doi.doi_bonus_type == DMU_OT_SA) { 1129 int len; 1130 1131 ret = sa_size(hdl, sa_table[ZPL_SYMLINK], &len); 1132 if (ret) 1133 goto out; 1134 *target = kmem_alloc(len + 1, KM_SLEEP); 1135 *plen = len; 1136 (*target)[len] = 0; 1137 ret = sa_lookup(hdl, sa_table[ZPL_SYMLINK], *target, len + 1); 1138 if (ret) 1139 kmem_free(*target, len + 1); 1140 } else { 1141 /* 1142 * TODO read target from file data, the old way 1143 * see zfs_readlink 1144 */ 1145 ret = EINVAL; 1146 } 1147 1148 out: 1149 fits_release_sa_handle(hdl, db, FTAG); 1150 1151 return (ret); 1152 } 1153 1154 int 1155 fits_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp, 1156 offset_t *off) 1157 { 1158 dsl_dataset_t *ds; 1159 dsl_dataset_t *fromds = NULL; 1160 int err = 0; 1161 fits_t f; 1162 arc_buf_t *buf = NULL; 1163 uint32_t flags; 1164 objset_phys_t *osp; 1165 int i; 1166 zbookmark_t zb; 1167 1168 memset(&f, 0, sizeof (f)); 1169 ds = tosnap->os_dsl_dataset; 1170 if (fromsnap) 1171 fromds = fromsnap->os_dsl_dataset; 1172 1173 /* make certain we are looking at snapshots */ 1174 if (!dsl_dataset_is_snapshot(ds) || 1175 (fromds && !dsl_dataset_is_snapshot(fromds))) 1176 return (EINVAL); 1177 1178 /* fromsnap must be earlier and from the same lineage as tosnap */ 1179 if (fromds) { 1180 if (fromds->ds_phys->ds_creation_txg >= 1181 ds->ds_phys->ds_creation_txg) 1182 return (EXDEV); 1183 1184 if (fromds->ds_dir != ds->ds_dir) 1185 return (EXDEV); 1186 1187 /* 1188 * read root dnode from from-dataset 1189 */ 1190 flags = ARC_WAIT; 1191 SET_BOOKMARK(&zb, fromds->ds_object, ZB_ROOT_OBJECT, 1192 ZB_ROOT_LEVEL, ZB_ROOT_BLKID); 1193 err = dsl_read_nolock(NULL, fromds->ds_dir->dd_pool->dp_spa, 1194 &fromds->ds_phys->ds_bp, arc_getbuf_func, &buf, 1195 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, &zb); 1196 if (err) 1197 return (err); 1198 osp = buf->b_data; 1199 f.f_dnp = &osp->os_meta_dnode; 1200 } 1201 f.f_vp = vp; 1202 f.f_offp = off; 1203 f.f_err = 0; 1204 f.f_fromds = fromds; 1205 f.f_tods = ds; 1206 f.f_fromsnap = fromsnap; 1207 f.f_tosnap = tosnap; 1208 if (fromds) { 1209 f.f_fromtxg = fromds->ds_phys->ds_creation_txg; 1210 f.f_bl = kmem_zalloc(sizeof (blklevel_t) * 1211 f.f_dnp->dn_nlevels, KM_SLEEP); 1212 for (i = 0; i < f.f_dnp->dn_nlevels; ++i) 1213 f.f_bl[i].bl_blk = -1; 1214 i = f.f_dnp->dn_nlevels - 1; 1215 f.f_bl[i].bl_nslots = f.f_dnp->dn_nblkptr; 1216 f.f_bl[i].bl_bp = &f.f_dnp->dn_blkptr[0]; 1217 f.f_bl[i].bl_blk = 0; 1218 1219 err = fits_sa_setup(fromsnap, &f.f_from_sa_table); 1220 if (err) 1221 goto out; 1222 } 1223 err = fits_sa_setup(tosnap, &f.f_to_sa_table); 1224 if (err) 1225 goto out; 1226 1227 err = zap_lookup(tosnap, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, 1228 &f.f_shares_dir); 1229 if (err && err != ENOENT) 1230 goto out; 1231 1232 err = fits_start(&f, &f.f_ops); 1233 if (err) 1234 goto out; 1235 1236 err = traverse_dataset(ds, f.f_fromtxg, 1237 TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, fits_cb, &f); 1238 if (err) { 1239 fits_abort(&f); 1240 goto out; 1241 } 1242 err = fits_start2(&f, &f.f_ops); 1243 if (err) { 1244 goto out; 1245 } 1246 err = traverse_dataset(ds, f.f_fromtxg, 1247 TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, fits_cb, &f); 1248 if (err) { 1249 fits_abort(&f); 1250 goto out; 1251 } 1252 1253 err = fits_end(&f); 1254 if (err) 1255 goto out; 1256 1257 out: 1258 if (fromds) { 1259 for (i = 0; i < f.f_dnp->dn_nlevels - 1; ++i) { 1260 blklevel_t *b = f.f_bl + i; 1261 if (b->bl_buf) 1262 arc_buf_remove_ref(b->bl_buf, &b->bl_buf); 1263 } 1264 kmem_free(f.f_bl, sizeof (blklevel_t) * f.f_dnp->dn_nlevels); 1265 } 1266 1267 if (buf) 1268 arc_buf_remove_ref(buf, &buf); 1269 1270 return (err); 1271 }