Print this page
3748 zfs headers should be C++ compatible
Submitted by:   Justin Gibbs <justing@spectralogic.com>
Submitted by:   Will Andrews <willa@spectralogic.com>
Reviewed by:    Matthew Ahrens <mahrens@delphix.com>
Reviewed by:    Eric Schrock <eric.schrock@delphix.com>
Reviewed by:    Robert Mustacchi <rm@joyent.com>


 492                 zio_execute(pio);
 493         } else {
 494                 mutex_exit(&pio->io_lock);
 495         }
 496 }
 497 
 498 static void
 499 zio_inherit_child_errors(zio_t *zio, enum zio_child c)
 500 {
 501         if (zio->io_child_error[c] != 0 && zio->io_error == 0)
 502                 zio->io_error = zio->io_child_error[c];
 503 }
 504 
 505 /*
 506  * ==========================================================================
 507  * Create the various types of I/O (read, write, free, etc)
 508  * ==========================================================================
 509  */
 510 static zio_t *
 511 zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
 512     void *data, uint64_t size, zio_done_func_t *done, void *private,
 513     zio_type_t type, int priority, enum zio_flag flags,
 514     vdev_t *vd, uint64_t offset, const zbookmark_t *zb,
 515     enum zio_stage stage, enum zio_stage pipeline)
 516 {
 517         zio_t *zio;
 518 
 519         ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
 520         ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
 521         ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
 522 
 523         ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER));
 524         ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
 525         ASSERT(vd || stage == ZIO_STAGE_OPEN);
 526 
 527         zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
 528         bzero(zio, sizeof (zio_t));
 529 
 530         mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL);
 531         cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL);
 532 


 543                 zio->io_child_type = ZIO_CHILD_DDT;
 544         else
 545                 zio->io_child_type = ZIO_CHILD_LOGICAL;
 546 
 547         if (bp != NULL) {
 548                 zio->io_bp = (blkptr_t *)bp;
 549                 zio->io_bp_copy = *bp;
 550                 zio->io_bp_orig = *bp;
 551                 if (type != ZIO_TYPE_WRITE ||
 552                     zio->io_child_type == ZIO_CHILD_DDT)
 553                         zio->io_bp = &zio->io_bp_copy;        /* so caller can free */
 554                 if (zio->io_child_type == ZIO_CHILD_LOGICAL)
 555                         zio->io_logical = zio;
 556                 if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp))
 557                         pipeline |= ZIO_GANG_STAGES;
 558         }
 559 
 560         zio->io_spa = spa;
 561         zio->io_txg = txg;
 562         zio->io_done = done;
 563         zio->io_private = private;
 564         zio->io_type = type;
 565         zio->io_priority = priority;
 566         zio->io_vd = vd;
 567         zio->io_offset = offset;
 568         zio->io_orig_data = zio->io_data = data;
 569         zio->io_orig_size = zio->io_size = size;
 570         zio->io_orig_flags = zio->io_flags = flags;
 571         zio->io_orig_stage = zio->io_stage = stage;
 572         zio->io_orig_pipeline = zio->io_pipeline = pipeline;
 573 
 574         zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY);
 575         zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE);
 576 
 577         if (zb != NULL)
 578                 zio->io_bookmark = *zb;
 579 
 580         if (pio != NULL) {
 581                 if (zio->io_logical == NULL)
 582                         zio->io_logical = pio->io_logical;
 583                 if (zio->io_child_type == ZIO_CHILD_GANG)
 584                         zio->io_gang_leader = pio->io_gang_leader;
 585                 zio_add_child(pio, zio);
 586         }
 587 
 588         return (zio);
 589 }
 590 
 591 static void
 592 zio_destroy(zio_t *zio)
 593 {
 594         list_destroy(&zio->io_parent_list);
 595         list_destroy(&zio->io_child_list);
 596         mutex_destroy(&zio->io_lock);
 597         cv_destroy(&zio->io_cv);
 598         kmem_cache_free(zio_cache, zio);
 599 }
 600 
 601 zio_t *
 602 zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
 603     void *private, enum zio_flag flags)
 604 {
 605         zio_t *zio;
 606 
 607         zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
 608             ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL,
 609             ZIO_STAGE_OPEN, ZIO_INTERLOCK_PIPELINE);
 610 
 611         return (zio);
 612 }
 613 
 614 zio_t *
 615 zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags)

 616 {
 617         return (zio_null(NULL, spa, NULL, done, private, flags));
 618 }
 619 
 620 zio_t *
 621 zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
 622     void *data, uint64_t size, zio_done_func_t *done, void *private,
 623     int priority, enum zio_flag flags, const zbookmark_t *zb)
 624 {
 625         zio_t *zio;
 626 
 627         zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
 628             data, size, done, private,
 629             ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
 630             ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
 631             ZIO_DDT_CHILD_READ_PIPELINE : ZIO_READ_PIPELINE);
 632 
 633         return (zio);
 634 }
 635 
 636 zio_t *
 637 zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
 638     void *data, uint64_t size, const zio_prop_t *zp,
 639     zio_done_func_t *ready, zio_done_func_t *done, void *private,
 640     int priority, enum zio_flag flags, const zbookmark_t *zb)
 641 {
 642         zio_t *zio;
 643 
 644         ASSERT(zp->zp_checksum >= ZIO_CHECKSUM_OFF &&
 645             zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS &&
 646             zp->zp_compress >= ZIO_COMPRESS_OFF &&
 647             zp->zp_compress < ZIO_COMPRESS_FUNCTIONS &&
 648             DMU_OT_IS_VALID(zp->zp_type) &&
 649             zp->zp_level < 32 &&
 650             zp->zp_copies > 0 &&
 651             zp->zp_copies <= spa_max_replication(spa));
 652 
 653         zio = zio_create(pio, spa, txg, bp, data, size, done, private,
 654             ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
 655             ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
 656             ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE);
 657 
 658         zio->io_ready = ready;
 659         zio->io_prop = *zp;
 660 
 661         return (zio);
 662 }
 663 
 664 zio_t *
 665 zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data,
 666     uint64_t size, zio_done_func_t *done, void *private, int priority,
 667     enum zio_flag flags, zbookmark_t *zb)
 668 {
 669         zio_t *zio;
 670 
 671         zio = zio_create(pio, spa, txg, bp, data, size, done, private,
 672             ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
 673             ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
 674 
 675         return (zio);
 676 }
 677 
 678 void
 679 zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite)
 680 {
 681         ASSERT(zio->io_type == ZIO_TYPE_WRITE);
 682         ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
 683         ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
 684         ASSERT(zio->io_txg == spa_syncing_txg(zio->io_spa));
 685 
 686         /*
 687          * We must reset the io_prop to match the values that existed
 688          * when the bp was first written by dmu_sync() keeping in mind
 689          * that nopwrite and dedup are mutually exclusive.
 690          */
 691         zio->io_prop.zp_dedup = nopwrite ? B_FALSE : zio->io_prop.zp_dedup;


 708         zio_t *zio;
 709 
 710         dprintf_bp(bp, "freeing in txg %llu, pass %u",
 711             (longlong_t)txg, spa->spa_sync_pass);
 712 
 713         ASSERT(!BP_IS_HOLE(bp));
 714         ASSERT(spa_syncing_txg(spa) == txg);
 715         ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
 716 
 717         metaslab_check_free(spa, bp);
 718 
 719         zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
 720             NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
 721             NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE);
 722 
 723         return (zio);
 724 }
 725 
 726 zio_t *
 727 zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
 728     zio_done_func_t *done, void *private, enum zio_flag flags)
 729 {
 730         zio_t *zio;
 731 
 732         /*
 733          * A claim is an allocation of a specific block.  Claims are needed
 734          * to support immediate writes in the intent log.  The issue is that
 735          * immediate writes contain committed data, but in a txg that was
 736          * *not* committed.  Upon opening the pool after an unclean shutdown,
 737          * the intent log claims all blocks that contain immediate write data
 738          * so that the SPA knows they're in use.
 739          *
 740          * All claims *must* be resolved in the first txg -- before the SPA
 741          * starts allocating blocks -- so that nothing is allocated twice.
 742          * If txg == 0 we just verify that the block is claimable.
 743          */
 744         ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa));
 745         ASSERT(txg == spa_first_txg(spa) || txg == 0);
 746         ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa));       /* zdb(1M) */
 747 
 748         zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
 749             done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
 750             NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
 751 
 752         return (zio);
 753 }
 754 
 755 zio_t *
 756 zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
 757     zio_done_func_t *done, void *private, int priority, enum zio_flag flags)
 758 {
 759         zio_t *zio;
 760         int c;
 761 
 762         if (vd->vdev_children == 0) {
 763                 zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
 764                     ZIO_TYPE_IOCTL, priority, flags, vd, 0, NULL,
 765                     ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
 766 
 767                 zio->io_cmd = cmd;
 768         } else {
 769                 zio = zio_null(pio, spa, NULL, NULL, NULL, flags);
 770 
 771                 for (c = 0; c < vd->vdev_children; c++)
 772                         zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd,
 773                             done, private, priority, flags));
 774         }
 775 
 776         return (zio);
 777 }
 778 
 779 zio_t *
 780 zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
 781     void *data, int checksum, zio_done_func_t *done, void *private,
 782     int priority, enum zio_flag flags, boolean_t labels)
 783 {
 784         zio_t *zio;
 785 
 786         ASSERT(vd->vdev_children == 0);
 787         ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
 788             offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
 789         ASSERT3U(offset + size, <=, vd->vdev_psize);
 790 
 791         zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private,
 792             ZIO_TYPE_READ, priority, flags, vd, offset, NULL,
 793             ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE);
 794 
 795         zio->io_prop.zp_checksum = checksum;
 796 
 797         return (zio);
 798 }
 799 
 800 zio_t *
 801 zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
 802     void *data, int checksum, zio_done_func_t *done, void *private,
 803     int priority, enum zio_flag flags, boolean_t labels)
 804 {
 805         zio_t *zio;
 806 
 807         ASSERT(vd->vdev_children == 0);
 808         ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
 809             offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
 810         ASSERT3U(offset + size, <=, vd->vdev_psize);
 811 
 812         zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private,
 813             ZIO_TYPE_WRITE, priority, flags, vd, offset, NULL,
 814             ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE);
 815 
 816         zio->io_prop.zp_checksum = checksum;
 817 
 818         if (zio_checksum_table[checksum].ci_eck) {
 819                 /*
 820                  * zec checksums are necessarily destructive -- they modify
 821                  * the end of the write buffer to hold the verifier/checksum.
 822                  * Therefore, we must make a local copy in case the data is
 823                  * being written to multiple places in parallel.
 824                  */
 825                 void *wbuf = zio_buf_alloc(size);
 826                 bcopy(data, wbuf, size);
 827                 zio_push_transform(zio, wbuf, size, size, NULL);
 828         }
 829 
 830         return (zio);
 831 }
 832 
 833 /*
 834  * Create a child I/O to do some work for us.
 835  */
 836 zio_t *
 837 zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
 838         void *data, uint64_t size, int type, int priority, enum zio_flag flags,
 839         zio_done_func_t *done, void *private)
 840 {
 841         enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE;
 842         zio_t *zio;
 843 
 844         ASSERT(vd->vdev_parent ==
 845             (pio->io_vd ? pio->io_vd : pio->io_spa->spa_root_vdev));
 846 
 847         if (type == ZIO_TYPE_READ && bp != NULL) {
 848                 /*
 849                  * If we have the bp, then the child should perform the
 850                  * checksum and the parent need not.  This pushes error
 851                  * detection as close to the leaves as possible and
 852                  * eliminates redundant checksums in the interior nodes.
 853                  */
 854                 pipeline |= ZIO_STAGE_CHECKSUM_VERIFY;
 855                 pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
 856         }
 857 
 858         if (vd->vdev_children == 0)
 859                 offset += VDEV_LABEL_START_SIZE;
 860 
 861         flags |= ZIO_VDEV_CHILD_FLAGS(pio) | ZIO_FLAG_DONT_PROPAGATE;
 862 
 863         /*
 864          * If we've decided to do a repair, the write is not speculative --
 865          * even if the original read was.
 866          */
 867         if (flags & ZIO_FLAG_IO_REPAIR)
 868                 flags &= ~ZIO_FLAG_SPECULATIVE;
 869 
 870         zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size,
 871             done, private, type, priority, flags, vd, offset, &pio->io_bookmark,
 872             ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
 873 
 874         return (zio);
 875 }
 876 
 877 zio_t *
 878 zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size,
 879         int type, int priority, enum zio_flag flags,
 880         zio_done_func_t *done, void *private)
 881 {
 882         zio_t *zio;
 883 
 884         ASSERT(vd->vdev_ops->vdev_op_leaf);
 885 
 886         zio = zio_create(NULL, vd->vdev_spa, 0, NULL,
 887             data, size, done, private, type, priority,
 888             flags | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY,
 889             vd, offset, NULL,
 890             ZIO_STAGE_VDEV_IO_START >> 1, ZIO_VDEV_CHILD_PIPELINE);
 891 
 892         return (zio);
 893 }
 894 
 895 void
 896 zio_flush(zio_t *zio, vdev_t *vd)
 897 {
 898         zio_nowait(zio_ioctl(zio, zio->io_spa, vd, DKIOCFLUSHWRITECACHE,
 899             NULL, NULL, ZIO_PRIORITY_NOW,
 900             ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY));
 901 }
 902 
 903 void
 904 zio_shrink(zio_t *zio, uint64_t size)
 905 {
 906         ASSERT(zio->io_executor == NULL);
 907         ASSERT(zio->io_orig_size == zio->io_size);




 492                 zio_execute(pio);
 493         } else {
 494                 mutex_exit(&pio->io_lock);
 495         }
 496 }
 497 
 498 static void
 499 zio_inherit_child_errors(zio_t *zio, enum zio_child c)
 500 {
 501         if (zio->io_child_error[c] != 0 && zio->io_error == 0)
 502                 zio->io_error = zio->io_child_error[c];
 503 }
 504 
 505 /*
 506  * ==========================================================================
 507  * Create the various types of I/O (read, write, free, etc)
 508  * ==========================================================================
 509  */
 510 static zio_t *
 511 zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
 512     void *data, uint64_t size, zio_done_func_t *done, void *io_private,
 513     zio_type_t type, int priority, enum zio_flag flags,
 514     vdev_t *vd, uint64_t offset, const zbookmark_t *zb,
 515     enum zio_stage stage, enum zio_stage pipeline)
 516 {
 517         zio_t *zio;
 518 
 519         ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
 520         ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
 521         ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
 522 
 523         ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER));
 524         ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
 525         ASSERT(vd || stage == ZIO_STAGE_OPEN);
 526 
 527         zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
 528         bzero(zio, sizeof (zio_t));
 529 
 530         mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL);
 531         cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL);
 532 


 543                 zio->io_child_type = ZIO_CHILD_DDT;
 544         else
 545                 zio->io_child_type = ZIO_CHILD_LOGICAL;
 546 
 547         if (bp != NULL) {
 548                 zio->io_bp = (blkptr_t *)bp;
 549                 zio->io_bp_copy = *bp;
 550                 zio->io_bp_orig = *bp;
 551                 if (type != ZIO_TYPE_WRITE ||
 552                     zio->io_child_type == ZIO_CHILD_DDT)
 553                         zio->io_bp = &zio->io_bp_copy;        /* so caller can free */
 554                 if (zio->io_child_type == ZIO_CHILD_LOGICAL)
 555                         zio->io_logical = zio;
 556                 if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp))
 557                         pipeline |= ZIO_GANG_STAGES;
 558         }
 559 
 560         zio->io_spa = spa;
 561         zio->io_txg = txg;
 562         zio->io_done = done;
 563         zio->io_private = io_private;
 564         zio->io_type = type;
 565         zio->io_priority = priority;
 566         zio->io_vd = vd;
 567         zio->io_offset = offset;
 568         zio->io_orig_data = zio->io_data = data;
 569         zio->io_orig_size = zio->io_size = size;
 570         zio->io_orig_flags = zio->io_flags = flags;
 571         zio->io_orig_stage = zio->io_stage = stage;
 572         zio->io_orig_pipeline = zio->io_pipeline = pipeline;
 573 
 574         zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY);
 575         zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE);
 576 
 577         if (zb != NULL)
 578                 zio->io_bookmark = *zb;
 579 
 580         if (pio != NULL) {
 581                 if (zio->io_logical == NULL)
 582                         zio->io_logical = pio->io_logical;
 583                 if (zio->io_child_type == ZIO_CHILD_GANG)
 584                         zio->io_gang_leader = pio->io_gang_leader;
 585                 zio_add_child(pio, zio);
 586         }
 587 
 588         return (zio);
 589 }
 590 
 591 static void
 592 zio_destroy(zio_t *zio)
 593 {
 594         list_destroy(&zio->io_parent_list);
 595         list_destroy(&zio->io_child_list);
 596         mutex_destroy(&zio->io_lock);
 597         cv_destroy(&zio->io_cv);
 598         kmem_cache_free(zio_cache, zio);
 599 }
 600 
 601 zio_t *
 602 zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
 603     void *io_private, enum zio_flag flags)
 604 {
 605         zio_t *zio;
 606 
 607         zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, io_private,
 608             ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL,
 609             ZIO_STAGE_OPEN, ZIO_INTERLOCK_PIPELINE);
 610 
 611         return (zio);
 612 }
 613 
 614 zio_t *
 615 zio_root(spa_t *spa, zio_done_func_t *done, void *io_private,
 616     enum zio_flag flags)
 617 {
 618         return (zio_null(NULL, spa, NULL, done, io_private, flags));
 619 }
 620 
 621 zio_t *
 622 zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
 623     void *data, uint64_t size, zio_done_func_t *done, void *io_private,
 624     int priority, enum zio_flag flags, const zbookmark_t *zb)
 625 {
 626         zio_t *zio;
 627 
 628         zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
 629             data, size, done, io_private,
 630             ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
 631             ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
 632             ZIO_DDT_CHILD_READ_PIPELINE : ZIO_READ_PIPELINE);
 633 
 634         return (zio);
 635 }
 636 
 637 zio_t *
 638 zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
 639     void *data, uint64_t size, const zio_prop_t *zp,
 640     zio_done_func_t *ready, zio_done_func_t *done, void *io_private,
 641     int priority, enum zio_flag flags, const zbookmark_t *zb)
 642 {
 643         zio_t *zio;
 644 
 645         ASSERT(zp->zp_checksum >= ZIO_CHECKSUM_OFF &&
 646             zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS &&
 647             zp->zp_compress >= ZIO_COMPRESS_OFF &&
 648             zp->zp_compress < ZIO_COMPRESS_FUNCTIONS &&
 649             DMU_OT_IS_VALID(zp->zp_type) &&
 650             zp->zp_level < 32 &&
 651             zp->zp_copies > 0 &&
 652             zp->zp_copies <= spa_max_replication(spa));
 653 
 654         zio = zio_create(pio, spa, txg, bp, data, size, done, io_private,
 655             ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
 656             ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
 657             ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE);
 658 
 659         zio->io_ready = ready;
 660         zio->io_prop = *zp;
 661 
 662         return (zio);
 663 }
 664 
 665 zio_t *
 666 zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data,
 667     uint64_t size, zio_done_func_t *done, void *io_private, int priority,
 668     enum zio_flag flags, zbookmark_t *zb)
 669 {
 670         zio_t *zio;
 671 
 672         zio = zio_create(pio, spa, txg, bp, data, size, done, io_private,
 673             ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
 674             ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
 675 
 676         return (zio);
 677 }
 678 
 679 void
 680 zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite)
 681 {
 682         ASSERT(zio->io_type == ZIO_TYPE_WRITE);
 683         ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
 684         ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
 685         ASSERT(zio->io_txg == spa_syncing_txg(zio->io_spa));
 686 
 687         /*
 688          * We must reset the io_prop to match the values that existed
 689          * when the bp was first written by dmu_sync() keeping in mind
 690          * that nopwrite and dedup are mutually exclusive.
 691          */
 692         zio->io_prop.zp_dedup = nopwrite ? B_FALSE : zio->io_prop.zp_dedup;


 709         zio_t *zio;
 710 
 711         dprintf_bp(bp, "freeing in txg %llu, pass %u",
 712             (longlong_t)txg, spa->spa_sync_pass);
 713 
 714         ASSERT(!BP_IS_HOLE(bp));
 715         ASSERT(spa_syncing_txg(spa) == txg);
 716         ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
 717 
 718         metaslab_check_free(spa, bp);
 719 
 720         zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
 721             NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
 722             NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE);
 723 
 724         return (zio);
 725 }
 726 
 727 zio_t *
 728 zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
 729     zio_done_func_t *done, void *io_private, enum zio_flag flags)
 730 {
 731         zio_t *zio;
 732 
 733         /*
 734          * A claim is an allocation of a specific block.  Claims are needed
 735          * to support immediate writes in the intent log.  The issue is that
 736          * immediate writes contain committed data, but in a txg that was
 737          * *not* committed.  Upon opening the pool after an unclean shutdown,
 738          * the intent log claims all blocks that contain immediate write data
 739          * so that the SPA knows they're in use.
 740          *
 741          * All claims *must* be resolved in the first txg -- before the SPA
 742          * starts allocating blocks -- so that nothing is allocated twice.
 743          * If txg == 0 we just verify that the block is claimable.
 744          */
 745         ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa));
 746         ASSERT(txg == spa_first_txg(spa) || txg == 0);
 747         ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa));       /* zdb(1M) */
 748 
 749         zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
 750             done, io_private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
 751             NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
 752 
 753         return (zio);
 754 }
 755 
 756 zio_t *
 757 zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
 758     zio_done_func_t *done, void *io_private, int priority, enum zio_flag flags)
 759 {
 760         zio_t *zio;
 761         int c;
 762 
 763         if (vd->vdev_children == 0) {
 764                 zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, io_private,
 765                     ZIO_TYPE_IOCTL, priority, flags, vd, 0, NULL,
 766                     ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
 767 
 768                 zio->io_cmd = cmd;
 769         } else {
 770                 zio = zio_null(pio, spa, NULL, NULL, NULL, flags);
 771 
 772                 for (c = 0; c < vd->vdev_children; c++)
 773                         zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd,
 774                             done, io_private, priority, flags));
 775         }
 776 
 777         return (zio);
 778 }
 779 
 780 zio_t *
 781 zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
 782     void *data, int checksum, zio_done_func_t *done, void *io_private,
 783     int priority, enum zio_flag flags, boolean_t labels)
 784 {
 785         zio_t *zio;
 786 
 787         ASSERT(vd->vdev_children == 0);
 788         ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
 789             offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
 790         ASSERT3U(offset + size, <=, vd->vdev_psize);
 791 
 792         zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done,
 793             io_private, ZIO_TYPE_READ, priority, flags, vd, offset, NULL,
 794             ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE);
 795 
 796         zio->io_prop.zp_checksum = checksum;
 797 
 798         return (zio);
 799 }
 800 
 801 zio_t *
 802 zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
 803     void *data, int checksum, zio_done_func_t *done, void *io_private,
 804     int priority, enum zio_flag flags, boolean_t labels)
 805 {
 806         zio_t *zio;
 807 
 808         ASSERT(vd->vdev_children == 0);
 809         ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
 810             offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
 811         ASSERT3U(offset + size, <=, vd->vdev_psize);
 812 
 813         zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done,
 814             io_private, ZIO_TYPE_WRITE, priority, flags, vd, offset, NULL,
 815             ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE);
 816 
 817         zio->io_prop.zp_checksum = checksum;
 818 
 819         if (zio_checksum_table[checksum].ci_eck) {
 820                 /*
 821                  * zec checksums are necessarily destructive -- they modify
 822                  * the end of the write buffer to hold the verifier/checksum.
 823                  * Therefore, we must make a local copy in case the data is
 824                  * being written to multiple places in parallel.
 825                  */
 826                 void *wbuf = zio_buf_alloc(size);
 827                 bcopy(data, wbuf, size);
 828                 zio_push_transform(zio, wbuf, size, size, NULL);
 829         }
 830 
 831         return (zio);
 832 }
 833 
 834 /*
 835  * Create a child I/O to do some work for us.
 836  */
 837 zio_t *
 838 zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
 839         void *data, uint64_t size, int type, int priority, enum zio_flag flags,
 840         zio_done_func_t *done, void *io_private)
 841 {
 842         enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE;
 843         zio_t *zio;
 844 
 845         ASSERT(vd->vdev_parent ==
 846             (pio->io_vd ? pio->io_vd : pio->io_spa->spa_root_vdev));
 847 
 848         if (type == ZIO_TYPE_READ && bp != NULL) {
 849                 /*
 850                  * If we have the bp, then the child should perform the
 851                  * checksum and the parent need not.  This pushes error
 852                  * detection as close to the leaves as possible and
 853                  * eliminates redundant checksums in the interior nodes.
 854                  */
 855                 pipeline |= ZIO_STAGE_CHECKSUM_VERIFY;
 856                 pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
 857         }
 858 
 859         if (vd->vdev_children == 0)
 860                 offset += VDEV_LABEL_START_SIZE;
 861 
 862         flags |= ZIO_VDEV_CHILD_FLAGS(pio) | ZIO_FLAG_DONT_PROPAGATE;
 863 
 864         /*
 865          * If we've decided to do a repair, the write is not speculative --
 866          * even if the original read was.
 867          */
 868         if (flags & ZIO_FLAG_IO_REPAIR)
 869                 flags &= ~ZIO_FLAG_SPECULATIVE;
 870 
 871         zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size,
 872             done, io_private, type, priority, flags, vd, offset,
 873             &pio->io_bookmark, ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
 874 
 875         return (zio);
 876 }
 877 
 878 zio_t *
 879 zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size,
 880         int type, int priority, enum zio_flag flags,
 881         zio_done_func_t *done, void *io_private)
 882 {
 883         zio_t *zio;
 884 
 885         ASSERT(vd->vdev_ops->vdev_op_leaf);
 886 
 887         zio = zio_create(NULL, vd->vdev_spa, 0, NULL,
 888             data, size, done, io_private, type, priority,
 889             flags | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY,
 890             vd, offset, NULL,
 891             ZIO_STAGE_VDEV_IO_START >> 1, ZIO_VDEV_CHILD_PIPELINE);
 892 
 893         return (zio);
 894 }
 895 
 896 void
 897 zio_flush(zio_t *zio, vdev_t *vd)
 898 {
 899         zio_nowait(zio_ioctl(zio, zio->io_spa, vd, DKIOCFLUSHWRITECACHE,
 900             NULL, NULL, ZIO_PRIORITY_NOW,
 901             ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY));
 902 }
 903 
 904 void
 905 zio_shrink(zio_t *zio, uint64_t size)
 906 {
 907         ASSERT(zio->io_executor == NULL);
 908         ASSERT(zio->io_orig_size == zio->io_size);