1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2012 STRATO AG. All rights reserved.
23 */
24 #include <sys/zfs_context.h>
25 #include <sys/stat.h>
26 #include <sys/errno.h>
27 #include <sys/mkdev.h>
28 #include <sys/debug.h>
29 #include <sys/open.h>
30 #include <sys/zfs_ioctl.h>
31 #include <zfs_namecheck.h>
32 #include <sys/policy.h>
33 #include <sys/dmu_objset.h>
34 #include <sys/dsl_prop.h>
35 #include <sys/zvol.h>
36 #include <sys/zap.h>
37 #include <sys/dsl_dataset.h>
38 #include <sys/dmu_traverse.h>
39 #include <sys/dsl_dir.h>
40 #include <sys/arc.h>
41 #include <sys/spa.h>
42 #include <sys/spa_impl.h>
43 #include <sys/sa.h>
44 #include <sys/sa_impl.h>
45 #include <sys/zfs_acl.h>
46 #include <sys/zfs_sa.h>
47 #include <sys/zfs_znode.h>
48 #include <sys/dbuf.h>
49 #include <sys/fits.h>
50 #include <sys/fits_impl.h>
51
52 /*
53 * fits_send generates a stream of filesystem data analogous to dmu_send.
54 * The main difference is that the fits-stream does not contain zfs-specific
55 * data and can be replayed on any filesystem. It just contains commands like
56 * MKDIR, CHMOD, RENAME etc.
57 * The stream is generated in two passes. The first pass, PASS_LINK basically
58 * creates all new files/directories and links, while the second pass,
59 * PASS_UNLINK, does all the removal of old stuff.
60 * Each pass enumerates all objects in inode order.
61 * There are some corner cases:
62 * Files / directories can only be created if the parent already exists or
63 * already has been created. If an object is encountered which parent does not
64 * satisfy this condition, it is put back and its creation will be trigger
65 * by the creation of the parent.
66 * A similar case applies on deletion. A directory can only be removed after
67 * the last contained object has been removed. If a directory is not empty,
68 * it is put back and the deletion of the last object in it triggers the
69 * deletion.
70 * If an objects gets deleted, and a new object is created under the same
71 * name, pass1 cannot create the object directly. So it is created under a
72 * temporary name and gets renamed in pass2.
73 * If an object is deleted and a new object (of possibly diffent type)
74 * created under the same inode and the same name, this change cannot be
75 * detected by enumerating the containing directory (as name + inode are
76 * unchanged). It is detected by a change of the inode generation number and
77 * a flag is set for pass2. Creation is postponed. In pass2, all enumerated
78 * directories are checked for this inode (although the entry is unchanged,
79 * the directory has a bumped txg). If it is encountered, delete + create
80 * happen both in pass2.
81 *
82 * There are lots of TODOs left:
83 * - add XATTR support
84 * - add path-caching
85 * - add a cache for brute-force parent search
86 * - add a cache for inode-search in a directory
87 * - use a hash instead of the linear list in fits_count
88 */
89 static int
90 fits_dnode_changed(spa_t *spa, fits_t *f, uint64_t dnobj,
91 dnode_phys_t *from, arc_buf_t *frombuf, dnode_phys_t *to, arc_buf_t *tobuf);
92
93 /* copied from zfs_znode.c */
94 static int
95 fits_sa_setup(objset_t *osp, sa_attr_type_t **sa_table)
96 {
97 uint64_t sa_obj = 0;
98 int error;
99
100 error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj);
101 if (error != 0 && error != ENOENT)
102 return (error);
103
104 error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table);
105 return (error);
106 }
107
108 static int
109 fits_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp,
110 dmu_buf_t **db, void *tag)
111 {
112 dmu_object_info_t doi;
113 int error;
114
115 if ((error = sa_buf_hold(osp, obj, tag, db)) != 0)
116 return (error);
117
118 dmu_object_info_from_db(*db, &doi);
119 if ((doi.doi_bonus_type != DMU_OT_SA &&
120 doi.doi_bonus_type != DMU_OT_ZNODE) ||
121 (doi.doi_bonus_type == DMU_OT_ZNODE &&
122 doi.doi_bonus_size < sizeof (znode_phys_t))) {
123 sa_buf_rele(*db, tag);
124 return (ENOTSUP);
125 }
126
127 error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp);
128 if (error != 0) {
129 sa_buf_rele(*db, tag);
130 return (error);
131 }
132
133 return (0);
134 }
135
136 static void
137 fits_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
138 {
139 sa_handle_destroy(hdl);
140 sa_buf_rele(db, tag);
141 }
142
143 static int
144 fits_find_from_bp(spa_t *spa, dnode_phys_t *dnp, blklevel_t *bl,
145 const zbookmark_t *zb, blkptr_t **bpp, arc_buf_t **pbuf)
146 {
147 uint32_t flags;
148 int epbs = dnp->dn_indblkshift - SPA_BLKPTRSHIFT;
149 int epbmask = (1 << epbs) - 1;
150 int level;
151 int slot;
152 uint64_t blkid;
153 uint64_t blk;
154 blklevel_t *blp;
155 zbookmark_t czb;
156 int i;
157
158 *bpp = NULL;
159 for (level = dnp->dn_nlevels - 1; level >= zb->zb_level; --level) {
160 blkid = zb->zb_blkid >> (epbs * (level - zb->zb_level));
161 blk = blkid >> epbs;
162 slot = blk & epbmask;
163 blp = bl + level;
164
165 if (blp->bl_blk == blk)
166 continue;
167
168 for (i = 0; i <= level; ++i) {
169 blklevel_t *b = bl + i;
170
171 if (b->bl_buf)
172 arc_buf_remove_ref(b->bl_buf, &b->bl_buf);
173 b->bl_bp = NULL;
174 b->bl_buf = NULL;
175 b->bl_blk = -1;
176 }
177 ASSERT(slot < blp[1].bl_nslots);
178 if (BP_IS_HOLE(blp[1].bl_bp + slot)) {
179 *bpp = NULL;
180 return (0);
181 }
182 /*
183 * load indblk
184 */
185 flags = ARC_WAIT;
186 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, level, blkid);
187 if (dsl_read(NULL, spa, blp[1].bl_bp + slot, blp[1].bl_buf,
188 arc_getbuf_func, &blp->bl_buf, ZIO_PRIORITY_ASYNC_READ,
189 ZIO_FLAG_CANFAIL, &flags, &czb) != 0)
190 return (EIO);
191 blp->bl_bp = blp->bl_buf->b_data;
192 blp->bl_nslots = 1 << epbs;
193 blp->bl_blk = blk;
194 }
195 slot = zb->zb_blkid & epbmask;
196 blp = bl + zb->zb_level;
197 ASSERT(slot < blp->bl_nslots);
198 *bpp = blp->bl_bp + slot;
199 *pbuf = blp->bl_buf;
200 if (BP_IS_HOLE(*bpp))
201 *bpp = NULL;
202
203 return (0);
204 }
205
206 static int
207 fits_file_cb(spa_t *spa, fits_t *f, zbookmark_t *zb,
208 blkptr_t *bp, arc_buf_t *pbuf, void *ctx)
209 {
210 int err = 0;
211 blkptr_t *fbp;
212
213 if (issig(JUSTLOOKING) && issig(FORREAL))
214 return (EINTR);
215
216 if (f->f_fromds && zb->zb_objset == f->f_fromds->ds_object)
217 return (0);
218
219 if (bp == NULL) {
220 arc_buf_t *fpbuf = NULL;
221 zbookmark_t czb;
222
223 ASSERT(f->f_fromds);
224 SET_BOOKMARK(&czb, f->f_fromds->ds_object, zb->zb_object,
225 zb->zb_level, zb->zb_blkid);
226 err = fits_find_from_bp(spa, f->f_dnp, f->f_filebl,
227 &czb, &fbp, &fpbuf);
228 if (err)
229 return (err);
230 if (fbp) {
231 #if 0
232 /* XXX TODO callback for newly created hole */
233 err = fits_enum_bp(spa, da, &czb, fbp, fpbuf);
234 if (err)
235 return (err);
236 #endif
237 }
238 } else if (zb->zb_level == 0) {
239 arc_buf_t *tbuf;
240 uint32_t tflags = ARC_WAIT;
241 int blksz = BP_GET_LSIZE(bp);
242
243 if (dsl_read(NULL, spa, bp, pbuf,
244 arc_getbuf_func, &tbuf, ZIO_PRIORITY_ASYNC_READ,
245 ZIO_FLAG_CANFAIL, &tflags, zb) != 0)
246 return (EIO);
247
248 if (f->f_ops->fits_file_data)
249 err = f->f_ops->fits_file_data(ctx, tbuf->b_data,
250 zb->zb_blkid * blksz, blksz);
251
252 (void) arc_buf_remove_ref(tbuf, &tbuf);
253 }
254 return (err);
255 }
256
257 static int
258 fits_enum_bp(spa_t *spa, fits_t *da, zbookmark_t *zb,
259 blkptr_t *bp, arc_buf_t *pbuf, uint64_t min_txg, void *ctx)
260 {
261 int err = 0;
262 arc_buf_t *buf = NULL;
263 uint32_t flags = ARC_WAIT;
264
265 if (BP_IS_HOLE(bp))
266 return (0);
267
268 if (bp->blk_birth <= min_txg)
269 return (0);
270
271 if (BP_GET_LEVEL(bp) > 0) {
272 int i;
273 int epb = BP_GET_LSIZE(bp) >> SPA_BLKPTRSHIFT;
274 blkptr_t *cbp;
275 zbookmark_t czb;
276
277 if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &buf,
278 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb) != 0)
279 return (EIO);
280 cbp = buf->b_data;
281 for (i = 0; i < epb; ++i, ++cbp) {
282 SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object,
283 zb->zb_level - 1, zb->zb_blkid * epb + i);
284 err = fits_enum_bp(spa, da, &czb, cbp, buf, min_txg,
285 ctx);
286 if (err)
287 goto out;
288 }
289 } else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
290 int i;
291 int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
292 dnode_phys_t *dnp;
293
294 if (dsl_read(NULL, spa, bp, pbuf, arc_getbuf_func, &buf,
295 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
296 &flags, zb) != 0) {
297 err = EIO;
298 goto out;
299 }
300 dnp = buf->b_data;
301 for (i = 0; i < epb; ++i, ++dnp) {
302 uint64_t dnobj = zb->zb_blkid * epb + i;
303 if (dnp->dn_type == DMU_OT_NONE)
304 continue;
305 err = fits_dnode_changed(spa, da, dnobj, dnp, buf,
306 NULL, NULL);
307 if (err)
308 goto out;
309 }
310 } else {
311 err = fits_file_cb(spa, da, zb, bp, pbuf, ctx);
312 }
313 out:
314 if (buf)
315 (void) arc_buf_remove_ref(buf, &buf);
316
317 return (err);
318 }
319
320 static int
321 fits_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
322 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
323 {
324 int err = 0;
325 fits_t *f = arg;
326 blkptr_t *fbp = NULL;
327 zbookmark_t czb;
328
329 if (issig(JUSTLOOKING) && issig(FORREAL))
330 return (EINTR);
331
332 if (f->f_fromds)
333 SET_BOOKMARK(&czb, f->f_fromds->ds_object, zb->zb_object,
334 zb->zb_level, zb->zb_blkid);
335
336 if (zb->zb_object != DMU_META_DNODE_OBJECT)
337 return (0);
338
339 if (bp == NULL) {
340 arc_buf_t *fpbuf = NULL;
341
342 if (!f->f_fromds)
343 return (0);
344
345 err = fits_find_from_bp(spa, f->f_dnp, f->f_bl,
346 &czb, &fbp, &fpbuf);
347 if (err)
348 return (EIO);
349 if (fbp) {
350 err = fits_enum_bp(spa, f, &czb, fbp, fpbuf, 0, NULL);
351 if (err)
352 return (EIO);
353 }
354 return (0);
355 } else if (zb->zb_level == 0) {
356 dnode_phys_t *tblk;
357 dnode_phys_t *fblk = NULL;
358 arc_buf_t *tbuf;
359 arc_buf_t *fbuf = NULL;
360 arc_buf_t *fpbuf = NULL;
361 uint32_t fflags = ARC_WAIT;
362 uint32_t tflags = ARC_WAIT;
363 int blksz = BP_GET_LSIZE(bp);
364 int i;
365
366 if (dsl_read(NULL, spa, bp, pbuf,
367 arc_getbuf_func, &tbuf, ZIO_PRIORITY_ASYNC_READ,
368 ZIO_FLAG_CANFAIL, &tflags, zb) != 0)
369 return (EIO);
370 tblk = tbuf->b_data;
371
372 if (f->f_fromds) {
373 err = fits_find_from_bp(spa, f->f_dnp, f->f_bl, zb,
374 &fbp, &fpbuf);
375 if (err)
376 return (EIO);
377 }
378 if (fbp) {
379 if (dsl_read(NULL, spa, fbp, fpbuf,
380 arc_getbuf_func, &fbuf, ZIO_PRIORITY_ASYNC_READ,
381 ZIO_FLAG_CANFAIL, &fflags, &czb) != 0) {
382 (void) arc_buf_remove_ref(tbuf, &tbuf);
383 return (EIO);
384 }
385 fblk = fbuf->b_data;
386 if (blksz != BP_GET_LSIZE(fbp))
387 return (EIO);
388 }
389 for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
390 uint64_t dnobj = (zb->zb_blkid <<
391 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
392 err = 0;
393 if (fbuf && (tblk[i].dn_type == DMU_OT_NONE) &&
394 fblk[i].dn_type != DMU_OT_NONE) {
395 err = fits_dnode_changed(spa, f, dnobj,
396 fblk + i, fbuf, NULL, NULL);
397 } else if (fbuf) {
398 if (memcmp(tblk + i, fblk + i, sizeof (*tblk)))
399 err = fits_dnode_changed(spa, f,
400 dnobj, fblk + i, fbuf, tblk + i,
401 tbuf);
402 } else {
403 if (tblk[i].dn_type != DMU_OT_NONE)
404 err = fits_dnode_changed(spa, f,
405 dnobj, NULL, NULL, tblk + i, tbuf);
406 }
407 if (err)
408 break;
409 }
410 (void) arc_buf_remove_ref(tbuf, &tbuf);
411 if (fbuf)
412 (void) arc_buf_remove_ref(fbuf, &fbuf);
413
414 if (err)
415 return (EIO);
416 /* Don't care about the data blocks */
417 return (TRAVERSE_VISIT_NO_CHILDREN);
418 }
419 return (0);
420 }
421
422 #define DIR_FROM 1
423 #define DIR_TO 2
424 static int
425 fits_diff_dir(fits_t *f, uint64_t dnobj, int dir, void *ctx)
426 {
427 zap_cursor_t zc;
428 zap_attribute_t *za;
429 int err;
430 objset_t *os1;
431 objset_t *os2;
432 uint64_t mask = ZFS_DIRENT_OBJ(-1ULL);
433 uint64_t num;
434 uint64_t ix = 0;
435
436 if (dir == DIR_FROM) {
437 os1 = f->f_fromsnap;
438 os2 = f->f_tosnap;
439 } else {
440 os1 = f->f_tosnap;
441 os2 = f->f_fromsnap;
442 }
443
444 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
445 for (zap_cursor_init(&zc, os1, dnobj);
446 (err = zap_cursor_retrieve(&zc, za)) == 0;
447 zap_cursor_advance(&zc), ++ix) {
448 err = zap_lookup(os2, dnobj, za->za_name, sizeof (num), 1,
449 &num);
450 if (err && err != ENOENT)
451 break;
452 if (err == ENOENT) {
453 if (dir == DIR_FROM) {
454 if (f->f_ops->fits_dirent_del) {
455 err = f->f_ops->fits_dirent_del(ctx,
456 za->za_name,
457 za->za_first_integer & mask);
458 if (err)
459 goto out;
460 }
461 } else {
462 if (f->f_ops->fits_dirent_add) {
463 err = f->f_ops->fits_dirent_add(ctx,
464 za->za_name,
465 za->za_first_integer & mask);
466 if (err)
467 goto out;
468 }
469 }
470 } else if ((za->za_first_integer & mask) != (num & mask)) {
471 if (dir == DIR_TO) {
472 /* report only once */
473 if (f->f_ops->fits_dirent_mod) {
474 err = f->f_ops->fits_dirent_mod(ctx,
475 za->za_name, num & mask,
476 za->za_first_integer & mask);
477 if (err)
478 goto out;
479 }
480 }
481 } else {
482 if (dir == DIR_TO) {
483 /* report only once */
484 if (f->f_ops->fits_dirent_unmod) {
485 err = f->f_ops->fits_dirent_unmod(ctx,
486 za->za_name, num & mask);
487 if (err)
488 goto out;
489 }
490 }
491 }
492 }
493 err = 0;
494 out:
495 zap_cursor_fini(&zc);
496 kmem_free(za, sizeof (zap_attribute_t));
497
498 return (err);
499 }
500 static int
501 fits_enum_dir(fits_t *f, uint64_t dnobj, int dir, void *ctx)
502 {
503 zap_cursor_t zc;
504 zap_attribute_t *za;
505 int err;
506 objset_t *os;
507 uint64_t mask = ZFS_DIRENT_OBJ(-1ULL);
508
509 if (dir == DIR_FROM)
510 os = f->f_fromsnap;
511 else
512 os = f->f_tosnap;
513
514 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
515 for (zap_cursor_init(&zc, os, dnobj);
516 (err = zap_cursor_retrieve(&zc, za)) == 0;
517 zap_cursor_advance(&zc)) {
518 if (dir == DIR_FROM) {
519 if (f->f_ops->fits_dirent_del) {
520 err = f->f_ops->fits_dirent_del(ctx,
521 za->za_name, za->za_first_integer & mask);
522 if (err)
523 break;
524 }
525 } else {
526 if (f->f_ops->fits_dirent_add) {
527 err = f->f_ops->fits_dirent_add(ctx,
528 za->za_name, za->za_first_integer & mask);
529 if (err)
530 break;
531 }
532 }
533 }
534 if (err == ENOENT)
535 err = 0;
536
537 zap_cursor_fini(&zc);
538 kmem_free(za, sizeof (zap_attribute_t));
539
540 return (err);
541 }
542
543 static int
544 fits_dnode_changed(spa_t *spa, fits_t *f, uint64_t dnobj,
545 dnode_phys_t *from, arc_buf_t *frombuf, dnode_phys_t *to, arc_buf_t *tobuf)
546 {
547 int err = 0;
548 int type = 0;
549 fits_info_t si;
550
551 if (dnobj == f->f_shares_dir)
552 return (0);
553
554 if (to && to->dn_type != DMU_OT_PLAIN_FILE_CONTENTS &&
555 to->dn_type != DMU_OT_DIRECTORY_CONTENTS) {
556 to = NULL;
557 }
558 if (from && from->dn_type != DMU_OT_PLAIN_FILE_CONTENTS &&
559 from->dn_type != DMU_OT_DIRECTORY_CONTENTS) {
560 from = NULL;
561 }
562
563 if (from) {
564 err = fits_get_info(f, dnobj, FITS_OLD, &si, FI_ATTR_LINKS);
565 if (err)
566 return (err);
567 if (si.si_nlinks == 0)
568 from = NULL;
569 }
570 if (to) {
571 err = fits_get_info(f, dnobj, FITS_NEW, &si, FI_ATTR_LINKS);
572 if (err)
573 return (err);
574 if (si.si_nlinks == 0)
575 to = NULL;
576 }
577
578 if (!to && !from)
579 return (0);
580
581 if (from) {
582 if (from->dn_bonustype != DMU_OT_SA &&
583 from->dn_bonustype != DMU_OT_ZNODE)
584 return (EINVAL);
585 }
586 if (to) {
587 if (to->dn_bonustype != DMU_OT_SA &&
588 to->dn_bonustype != DMU_OT_ZNODE)
589 return (EINVAL);
590 }
591
592 if (from)
593 type = from->dn_type;
594 else if (to)
595 type = to->dn_type;
596
597 err = 0;
598 if (type == DMU_OT_DIRECTORY_CONTENTS) {
599 if (from && to) {
600 if (f->f_ops->fits_dir_mod)
601 err = f->f_ops->fits_dir_mod(f, dnobj);
602 } else if (from) {
603 if (f->f_ops->fits_dir_del)
604 err = f->f_ops->fits_dir_del(f, dnobj);
605 } else if (to) {
606 if (f->f_ops->fits_dir_add)
607 err = f->f_ops->fits_dir_add(f, dnobj);
608 }
609 } else if (type == DMU_OT_PLAIN_FILE_CONTENTS) {
610 if (from && to) {
611 if (f->f_ops->fits_file_mod)
612 err = f->f_ops->fits_file_mod(f, dnobj);
613 } else if (from) {
614 if (f->f_ops->fits_file_del)
615 err = f->f_ops->fits_file_del(f, dnobj);
616 } else if (to) {
617 if (f->f_ops->fits_file_add)
618 err = f->f_ops->fits_file_add(f, dnobj);
619 }
620 } else {
621 /* TODO other types, symlinks? */
622 err = 0;
623 }
624 return (err);
625 }
626
627 typedef struct _fits_search {
628 fits_t *zs_f;
629 uint64_t zs_dnobj;
630 uint64_t zs_parent;
631 objset_t *zs_osp;
632 } fits_search_t;
633
634 static int
635 search_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
636 const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
637 {
638 fits_search_t *zs = arg;
639 fits_t *f = zs->zs_f;
640 arc_buf_t *buf;
641 uint32_t flags = ARC_WAIT;
642 int ebp;
643 int i;
644 int ret;
645
646 if (issig(JUSTLOOKING) && issig(FORREAL))
647 return (EINTR);
648
649 if (zb->zb_object != DMU_META_DNODE_OBJECT)
650 return (0);
651
652 if (zb->zb_level != 0)
653 return (0);
654
655 if (!bp || BP_IS_HOLE(bp))
656 return (0);
657
658 if (BP_GET_TYPE(bp) != DMU_OT_DNODE)
659 return (0);
660
661 ebp = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
662
663 if (dsl_read(NULL, spa, bp, pbuf,
664 arc_getbuf_func, &buf, ZIO_PRIORITY_ASYNC_READ,
665 ZIO_FLAG_CANFAIL, &flags, zb) != 0)
666 return (EIO);
667 dnp = buf->b_data;
668
669 for (i = 0; i < ebp; ++i) {
670 zap_cursor_t zc;
671 zap_attribute_t *za;
672 uint64_t mask = ZFS_DIRENT_OBJ(-1ULL);
673 uint64_t ix = 0;
674 uint64_t dnobj = (zb->zb_blkid <<
675 (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
676
677 if (dnp[i].dn_type != DMU_OT_DIRECTORY_CONTENTS)
678 continue;
679 if (dnobj == f->f_shares_dir)
680 continue;
681
682 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
683 for (zap_cursor_init(&zc, zs->zs_osp, dnobj);
684 (ret = zap_cursor_retrieve(&zc, za)) == 0;
685 zap_cursor_advance(&zc), ++ix) {
686 if ((za->za_first_integer & mask) ==
687 (zs->zs_dnobj & mask)) {
688 zs->zs_parent = dnobj;
689 break;
690 }
691 }
692 zap_cursor_fini(&zc);
693 kmem_free(za, sizeof (zap_attribute_t));
694 }
695
696 (void) arc_buf_remove_ref(buf, &buf);
697
698 if (zs->zs_parent)
699 return (EIO); /* abort search */
700
701 return (TRAVERSE_VISIT_NO_CHILDREN);
702 }
703
704 static int
705 fits_search_parent(fits_t *f, uint64_t dnobj, fits_which_t which,
706 uint64_t *parent)
707 {
708 dsl_dataset_t *ds;
709 fits_search_t zs;
710 int ret;
711
712 if (which == FITS_OLD) {
713 ds = f->f_fromds;
714 zs.zs_osp = f->f_fromsnap;
715 } else {
716 ds = f->f_tods;
717 zs.zs_osp = f->f_tosnap;
718 }
719
720 zs.zs_f = f;
721 zs.zs_dnobj = dnobj;
722 zs.zs_parent = 0;
723 ret = traverse_dataset(ds, 0, TRAVERSE_PRE, search_cb, &zs);
724 if (zs.zs_parent) {
725 *parent = zs.zs_parent;
726 return (0);
727 }
728
729 return (ret ? ret : ENOENT);
730 }
731
732 int
733 fits_get_info(fits_t *f, uint64_t dnobj, fits_which_t which,
734 fits_info_t *sp, uint64_t flags)
735 {
736 int ret;
737 sa_handle_t *hdl = NULL;
738 dmu_buf_t *db;
739 objset_t *osp;
740 sa_bulk_attr_t bulk[13];
741 int count = 0;
742 sa_attr_type_t *sa_table;
743
744 if (which == FITS_OLD) {
745 osp = f->f_fromsnap;
746 if (!osp)
747 return (ENOENT);
748 sa_table = f->f_from_sa_table;
749 } else if (which == FITS_NEW) {
750 osp = f->f_tosnap;
751 sa_table = f->f_to_sa_table;
752 } else {
753 return (EINVAL);
754 }
755
756 ret = fits_grab_sa_handle(osp, dnobj, &hdl, &db, FTAG);
757 if (ret)
758 return (ret);
759
760 if (flags & FI_ATTR_ATIME) {
761 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_ATIME], NULL,
762 &sp->si_atime, sizeof (sp->si_atime));
763 }
764 if (flags & FI_ATTR_MTIME) {
765 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MTIME], NULL,
766 &sp->si_mtime, sizeof (sp->si_mtime));
767 }
768 if (flags & FI_ATTR_CTIME) {
769 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL,
770 &sp->si_ctime, sizeof (sp->si_ctime));
771 }
772 if (flags & FI_ATTR_OTIME) {
773 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CRTIME], NULL,
774 &sp->si_otime, sizeof (sp->si_otime));
775 }
776 if (flags & FI_ATTR_MODE) {
777 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL,
778 &sp->si_mode, sizeof (sp->si_mode));
779 }
780 if (flags & FI_ATTR_SIZE) {
781 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_SIZE], NULL,
782 &sp->si_size, sizeof (sp->si_size));
783 }
784 if (flags & FI_ATTR_PARENT) {
785 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL,
786 &sp->si_parent, sizeof (sp->si_parent));
787 }
788 if (flags & FI_ATTR_LINKS) {
789 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL,
790 &sp->si_nlinks, sizeof (sp->si_nlinks));
791 }
792 if (flags & FI_ATTR_RDEV) {
793 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_RDEV], NULL,
794 &sp->si_rdev, sizeof (sp->si_rdev));
795 }
796 if (flags & FI_ATTR_UID) {
797 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_UID], NULL,
798 &sp->si_uid, sizeof (sp->si_uid));
799 }
800 if (flags & FI_ATTR_GID) {
801 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GID], NULL,
802 &sp->si_gid, sizeof (sp->si_gid));
803 }
804 if (flags & FI_ATTR_GEN) {
805 SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL,
806 &sp->si_gen, sizeof (sp->si_gen));
807 }
808 /* XXX if you add things, also bump the size of bulk */
809 /* XXX XATTR */
810
811 /* XXX TODO get flags to check for xattrdir */
812 if (count) {
813 ret = sa_bulk_lookup(hdl, bulk, count);
814 if (ret)
815 goto out;
816 }
817
818 if ((flags & FI_ATTR_PARENT) && sp->si_parent != dnobj) {
819 fits_info_t si;
820 int good = 0;
821 /*
822 * verify parent. this is very expensive and only a workaround
823 */
824 ret = fits_get_info(f, sp->si_parent, which, &si, FI_ATTR_MODE);
825 if (ret && ret != ENOENT)
826 goto out;
827 if (ret == 0 && S_ISDIR(si.si_mode)) {
828 ret = fits_find_entry(f, sp->si_parent, dnobj, which,
829 NULL);
830 if (ret && ret != ENOENT)
831 goto out;
832 if (ret == 0)
833 good = 1;
834 }
835 if (!good) {
836 uint64_t parent;
837
838 cmn_err(CE_NOTE, "parent wrong, do a brute force "
839 "search for ino %"PRIu64"\n", dnobj);
840 ret = fits_search_parent(f, dnobj, which, &parent);
841 if (ret == ENOENT) {
842 cmn_err(CE_NOTE, "no parent found\n");
843 ret = EINVAL;
844 goto out;
845 }
846 if (ret)
847 goto out;
848 sp->si_parent = parent;
849 cmn_err(CE_NOTE, "parent found, use %"PRIu64"\n",
850 parent);
851 /*
852 * TODO add a bad parent cache to prevent additional
853 * lookup in pass 2
854 */
855 }
856 }
857
858 out:
859 fits_release_sa_handle(hdl, db, FTAG);
860 return (ret);
861 }
862
863 int
864 fits_file_contents(fits_t *f, uint64_t dnobj, void *ctx)
865 {
866 dnode_t *from = NULL;
867 dnode_t *to = NULL;
868 int err;
869 int i;
870 zbookmark_t czb;
871 spa_t *spa = f->f_tods->ds_dir->dd_pool->dp_spa;
872
873 if (f->f_fromds) {
874 err = dnode_hold(f->f_fromsnap, dnobj, FTAG, &from);
875 if (err && err != ENOENT)
876 return (err);
877 }
878 if (from && from->dn_type != DMU_OT_PLAIN_FILE_CONTENTS) {
879 dnode_rele(from, FTAG);
880 from = NULL;
881 }
882 err = dnode_hold(f->f_tosnap, dnobj, FTAG, &to);
883 if (err)
884 goto out;
885 if (to->dn_type != DMU_OT_PLAIN_FILE_CONTENTS) {
886 err = EINVAL;
887 goto out;
888 }
889 if (from) {
890 f->f_filebl = kmem_zalloc(sizeof (blklevel_t)*from->dn_nlevels,
891 KM_SLEEP);
892 for (i = 0; i < from->dn_nlevels; ++i)
893 f->f_filebl[i].bl_blk = -1;
894 i = from->dn_nlevels - 1;
895 f->f_filebl[i].bl_nslots = from->dn_nblkptr;
896 f->f_filebl[i].bl_bp = &from->dn_phys->dn_blkptr[0];
897 f->f_filebl[i].bl_blk = 0;
898 f->f_filebl[i].bl_buf = from->dn_dbuf->db_parent->db_buf;
899 }
900 for (i = 0; i < to->dn_nblkptr; ++i) {
901 SET_BOOKMARK(&czb, f->f_tods->ds_object, dnobj,
902 to->dn_nlevels - 1, i);
903 err = fits_enum_bp(spa, f, &czb, to->dn_phys->dn_blkptr + i,
904 NULL, f->f_fromtxg, ctx);
905 if (err)
906 goto out;
907 }
908 out:
909 if (f->f_filebl) {
910 kmem_free(f->f_filebl, sizeof (blklevel_t) * from->dn_nlevels);
911 f->f_filebl = NULL;
912 }
913 if (from)
914 dnode_rele(from, FTAG);
915 if (to)
916 dnode_rele(to, FTAG);
917
918 return (err);
919 }
920
921 int
922 fits_dir_contents(fits_t *f, uint64_t dnobj, void *ctx)
923 {
924 dnode_t *from = NULL;
925 dnode_t *to = NULL;
926 int err;
927
928 if (f->f_fromds) {
929 err = dnode_hold(f->f_fromsnap, dnobj, FTAG, &from);
930 if (err && err != ENOENT)
931 return (err);
932 }
933 if (from && from->dn_type != DMU_OT_DIRECTORY_CONTENTS) {
934 dnode_rele(from, FTAG);
935 from = NULL;
936 }
937 err = dnode_hold(f->f_tosnap, dnobj, FTAG, &to);
938 if (err && err != ENOENT)
939 return (err);
940 if (to && to->dn_type != DMU_OT_DIRECTORY_CONTENTS) {
941 dnode_rele(to, FTAG);
942 to = NULL;
943 }
944
945 if (to && from) {
946 err = fits_diff_dir(f, dnobj, DIR_TO, ctx);
947 if (err)
948 goto out;
949 err = fits_diff_dir(f, dnobj, DIR_FROM, ctx);
950 } else if (to) {
951 err = fits_enum_dir(f, dnobj, DIR_TO, ctx);
952 } else if (from) {
953 err = fits_enum_dir(f, dnobj, DIR_FROM, ctx);
954 }
955 out:
956 if (from)
957 dnode_rele(from, FTAG);
958 if (to)
959 dnode_rele(to, FTAG);
960
961 return (err);
962 }
963
964 int
965 fits_find_entry(fits_t *f, uint64_t dirobj, uint64_t dnobj,
966 fits_which_t which, char **name)
967 {
968 zap_cursor_t zc;
969 zap_attribute_t *za;
970 int err;
971 uint64_t mask = ZFS_DIRENT_OBJ(-1ULL);
972 struct objset *os;
973
974 if (which == FITS_OLD) {
975 os = f->f_fromsnap;
976 if (!os)
977 return (ENOENT);
978 } else if (which == FITS_NEW) {
979 os = f->f_tosnap;
980 } else {
981 return (EINVAL);
982 }
983
984
985 if (name)
986 *name = NULL;
987 za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
988 for (zap_cursor_init(&zc, os, dirobj);
989 (err = zap_cursor_retrieve(&zc, za)) == 0;
990 zap_cursor_advance(&zc)) {
991 if ((za->za_first_integer & mask) == (dnobj & mask)) {
992 if (name)
993 *name = za->za_name;
994 break;
995 }
996 }
997 zap_cursor_fini(&zc);
998 return (err);
999 }
1000
1001 void
1002 fits_free_name(char *name)
1003 {
1004 zap_attribute_t *za;
1005
1006 if (!name)
1007 return;
1008
1009 za = (zap_attribute_t *)(name - offsetof(zap_attribute_t, za_name));
1010 kmem_free(za, sizeof (*za));
1011 }
1012
1013 int
1014 fits_lookup_entry(fits_t *f, uint64_t dirobj, char *name,
1015 fits_which_t which, uint64_t *dnobj)
1016 {
1017 struct objset *osp;
1018 int ret;
1019
1020 if (which == FITS_OLD) {
1021 osp = f->f_fromsnap;
1022 if (!osp)
1023 return (ENOENT);
1024 } else if (which == FITS_NEW) {
1025 osp = f->f_tosnap;
1026 } else {
1027 return (EINVAL);
1028 }
1029
1030 ret = zap_lookup(osp, dirobj, name, sizeof (*dnobj), 1, dnobj);
1031 if (ret)
1032 return (ret);
1033 *dnobj = ZFS_DIRENT_OBJ(*dnobj);
1034
1035 return (0);
1036 }
1037
1038 int
1039 fits_write(fits_t *f, const uint8_t *data, int len)
1040 {
1041 ssize_t resid; /* have to get resid to get detailed errno */
1042 int err;
1043
1044 err = vn_rdwr(UIO_WRITE, f->f_vp, (caddr_t)data,
1045 len, 0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
1046 *f->f_offp += len;
1047
1048 return (err);
1049 }
1050
1051 int
1052 fits_get_uuid(fits_t *f, fits_which_t which, uint8_t data[16])
1053 {
1054 if (which == FITS_OLD && !f->f_fromds)
1055 return (ENOENT);
1056
1057 LE_OUT64(data, f->f_tods->ds_dir->dd_pool->dp_spa->spa_config_guid);
1058 if (which == FITS_OLD) {
1059 LE_OUT64(data + 8, f->f_fromds->ds_phys->ds_guid);
1060 } else {
1061 LE_OUT64(data + 8, f->f_tods->ds_phys->ds_guid);
1062 }
1063 return (0);
1064 }
1065
1066 int
1067 fits_get_ctransid(fits_t *f, fits_which_t which, uint64_t *ctransid)
1068 {
1069 if (which == FITS_OLD && !f->f_fromds)
1070 return (ENOENT);
1071
1072 if (which == FITS_OLD)
1073 *ctransid = f->f_fromds->ds_phys->ds_creation_txg;
1074 else
1075 *ctransid = f->f_tods->ds_phys->ds_creation_txg;
1076 return (0);
1077 }
1078
1079 int
1080 fits_get_snapname(fits_t *f, fits_which_t which,
1081 char **name, int *len)
1082 {
1083 dsl_dataset_t *ds;
1084
1085 if (which == FITS_OLD && !f->f_fromds)
1086 return (ENOENT);
1087
1088 if (which == FITS_OLD)
1089 ds = f->f_fromds;
1090 else
1091 ds = f->f_tods;
1092
1093 *len = dsl_dataset_namelen(ds) + 1;
1094 *name = kmem_alloc(*len, KM_SLEEP);
1095 dsl_dataset_name(ds, *name);
1096 return (0);
1097 }
1098
1099 int
1100 fits_read_symlink(fits_t *f, uint64_t dnobj, fits_which_t which,
1101 char **target, int *plen)
1102 {
1103 int err;
1104 int ret;
1105 sa_handle_t *hdl = NULL;
1106 dmu_buf_t *db;
1107 objset_t *osp;
1108 dmu_object_info_t doi;
1109 sa_attr_type_t *sa_table;
1110
1111 if (which == FITS_OLD) {
1112 osp = f->f_fromsnap;
1113 if (!osp)
1114 return (EINVAL);
1115 sa_table = f->f_from_sa_table;
1116 } else if (which == FITS_NEW) {
1117 osp = f->f_tosnap;
1118 sa_table = f->f_to_sa_table;
1119 } else {
1120 return (EINVAL);
1121 }
1122
1123 err = fits_grab_sa_handle(osp, dnobj, &hdl, &db, FTAG);
1124 if (err)
1125 return (err);
1126
1127 dmu_object_info_from_db(db, &doi);
1128 if (doi.doi_bonus_type == DMU_OT_SA) {
1129 int len;
1130
1131 ret = sa_size(hdl, sa_table[ZPL_SYMLINK], &len);
1132 if (ret)
1133 goto out;
1134 *target = kmem_alloc(len + 1, KM_SLEEP);
1135 *plen = len;
1136 (*target)[len] = 0;
1137 ret = sa_lookup(hdl, sa_table[ZPL_SYMLINK], *target, len + 1);
1138 if (ret)
1139 kmem_free(*target, len + 1);
1140 } else {
1141 /*
1142 * TODO read target from file data, the old way
1143 * see zfs_readlink
1144 */
1145 ret = EINVAL;
1146 }
1147
1148 out:
1149 fits_release_sa_handle(hdl, db, FTAG);
1150
1151 return (ret);
1152 }
1153
1154 int
1155 fits_send(objset_t *tosnap, objset_t *fromsnap, int outfd, vnode_t *vp,
1156 offset_t *off)
1157 {
1158 dsl_dataset_t *ds;
1159 dsl_dataset_t *fromds = NULL;
1160 int err = 0;
1161 fits_t f;
1162 arc_buf_t *buf = NULL;
1163 uint32_t flags;
1164 objset_phys_t *osp;
1165 int i;
1166 zbookmark_t zb;
1167
1168 memset(&f, 0, sizeof (f));
1169 ds = tosnap->os_dsl_dataset;
1170 if (fromsnap)
1171 fromds = fromsnap->os_dsl_dataset;
1172
1173 /* make certain we are looking at snapshots */
1174 if (!dsl_dataset_is_snapshot(ds) ||
1175 (fromds && !dsl_dataset_is_snapshot(fromds)))
1176 return (EINVAL);
1177
1178 /* fromsnap must be earlier and from the same lineage as tosnap */
1179 if (fromds) {
1180 if (fromds->ds_phys->ds_creation_txg >=
1181 ds->ds_phys->ds_creation_txg)
1182 return (EXDEV);
1183
1184 if (fromds->ds_dir != ds->ds_dir)
1185 return (EXDEV);
1186
1187 /*
1188 * read root dnode from from-dataset
1189 */
1190 flags = ARC_WAIT;
1191 SET_BOOKMARK(&zb, fromds->ds_object, ZB_ROOT_OBJECT,
1192 ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
1193 err = dsl_read_nolock(NULL, fromds->ds_dir->dd_pool->dp_spa,
1194 &fromds->ds_phys->ds_bp, arc_getbuf_func, &buf,
1195 ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, &zb);
1196 if (err)
1197 return (err);
1198 osp = buf->b_data;
1199 f.f_dnp = &osp->os_meta_dnode;
1200 }
1201 f.f_vp = vp;
1202 f.f_offp = off;
1203 f.f_err = 0;
1204 f.f_fromds = fromds;
1205 f.f_tods = ds;
1206 f.f_fromsnap = fromsnap;
1207 f.f_tosnap = tosnap;
1208 if (fromds) {
1209 f.f_fromtxg = fromds->ds_phys->ds_creation_txg;
1210 f.f_bl = kmem_zalloc(sizeof (blklevel_t) *
1211 f.f_dnp->dn_nlevels, KM_SLEEP);
1212 for (i = 0; i < f.f_dnp->dn_nlevels; ++i)
1213 f.f_bl[i].bl_blk = -1;
1214 i = f.f_dnp->dn_nlevels - 1;
1215 f.f_bl[i].bl_nslots = f.f_dnp->dn_nblkptr;
1216 f.f_bl[i].bl_bp = &f.f_dnp->dn_blkptr[0];
1217 f.f_bl[i].bl_blk = 0;
1218
1219 err = fits_sa_setup(fromsnap, &f.f_from_sa_table);
1220 if (err)
1221 goto out;
1222 }
1223 err = fits_sa_setup(tosnap, &f.f_to_sa_table);
1224 if (err)
1225 goto out;
1226
1227 err = zap_lookup(tosnap, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
1228 &f.f_shares_dir);
1229 if (err && err != ENOENT)
1230 goto out;
1231
1232 err = fits_start(&f, &f.f_ops);
1233 if (err)
1234 goto out;
1235
1236 err = traverse_dataset(ds, f.f_fromtxg,
1237 TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, fits_cb, &f);
1238 if (err) {
1239 fits_abort(&f);
1240 goto out;
1241 }
1242 err = fits_start2(&f, &f.f_ops);
1243 if (err) {
1244 goto out;
1245 }
1246 err = traverse_dataset(ds, f.f_fromtxg,
1247 TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA, fits_cb, &f);
1248 if (err) {
1249 fits_abort(&f);
1250 goto out;
1251 }
1252
1253 err = fits_end(&f);
1254 if (err)
1255 goto out;
1256
1257 out:
1258 if (fromds) {
1259 for (i = 0; i < f.f_dnp->dn_nlevels - 1; ++i) {
1260 blklevel_t *b = f.f_bl + i;
1261 if (b->bl_buf)
1262 arc_buf_remove_ref(b->bl_buf, &b->bl_buf);
1263 }
1264 kmem_free(f.f_bl, sizeof (blklevel_t) * f.f_dnp->dn_nlevels);
1265 }
1266
1267 if (buf)
1268 arc_buf_remove_ref(buf, &buf);
1269
1270 return (err);
1271 }