492 zio_execute(pio);
493 } else {
494 mutex_exit(&pio->io_lock);
495 }
496 }
497
498 static void
499 zio_inherit_child_errors(zio_t *zio, enum zio_child c)
500 {
501 if (zio->io_child_error[c] != 0 && zio->io_error == 0)
502 zio->io_error = zio->io_child_error[c];
503 }
504
505 /*
506 * ==========================================================================
507 * Create the various types of I/O (read, write, free, etc)
508 * ==========================================================================
509 */
510 static zio_t *
511 zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
512 void *data, uint64_t size, zio_done_func_t *done, void *private,
513 zio_type_t type, int priority, enum zio_flag flags,
514 vdev_t *vd, uint64_t offset, const zbookmark_t *zb,
515 enum zio_stage stage, enum zio_stage pipeline)
516 {
517 zio_t *zio;
518
519 ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
520 ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
521 ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
522
523 ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER));
524 ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
525 ASSERT(vd || stage == ZIO_STAGE_OPEN);
526
527 zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
528 bzero(zio, sizeof (zio_t));
529
530 mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL);
531 cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL);
532
543 zio->io_child_type = ZIO_CHILD_DDT;
544 else
545 zio->io_child_type = ZIO_CHILD_LOGICAL;
546
547 if (bp != NULL) {
548 zio->io_bp = (blkptr_t *)bp;
549 zio->io_bp_copy = *bp;
550 zio->io_bp_orig = *bp;
551 if (type != ZIO_TYPE_WRITE ||
552 zio->io_child_type == ZIO_CHILD_DDT)
553 zio->io_bp = &zio->io_bp_copy; /* so caller can free */
554 if (zio->io_child_type == ZIO_CHILD_LOGICAL)
555 zio->io_logical = zio;
556 if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp))
557 pipeline |= ZIO_GANG_STAGES;
558 }
559
560 zio->io_spa = spa;
561 zio->io_txg = txg;
562 zio->io_done = done;
563 zio->io_private = private;
564 zio->io_type = type;
565 zio->io_priority = priority;
566 zio->io_vd = vd;
567 zio->io_offset = offset;
568 zio->io_orig_data = zio->io_data = data;
569 zio->io_orig_size = zio->io_size = size;
570 zio->io_orig_flags = zio->io_flags = flags;
571 zio->io_orig_stage = zio->io_stage = stage;
572 zio->io_orig_pipeline = zio->io_pipeline = pipeline;
573
574 zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY);
575 zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE);
576
577 if (zb != NULL)
578 zio->io_bookmark = *zb;
579
580 if (pio != NULL) {
581 if (zio->io_logical == NULL)
582 zio->io_logical = pio->io_logical;
583 if (zio->io_child_type == ZIO_CHILD_GANG)
584 zio->io_gang_leader = pio->io_gang_leader;
585 zio_add_child(pio, zio);
586 }
587
588 return (zio);
589 }
590
591 static void
592 zio_destroy(zio_t *zio)
593 {
594 list_destroy(&zio->io_parent_list);
595 list_destroy(&zio->io_child_list);
596 mutex_destroy(&zio->io_lock);
597 cv_destroy(&zio->io_cv);
598 kmem_cache_free(zio_cache, zio);
599 }
600
601 zio_t *
602 zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
603 void *private, enum zio_flag flags)
604 {
605 zio_t *zio;
606
607 zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
608 ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL,
609 ZIO_STAGE_OPEN, ZIO_INTERLOCK_PIPELINE);
610
611 return (zio);
612 }
613
614 zio_t *
615 zio_root(spa_t *spa, zio_done_func_t *done, void *private, enum zio_flag flags)
616 {
617 return (zio_null(NULL, spa, NULL, done, private, flags));
618 }
619
620 zio_t *
621 zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
622 void *data, uint64_t size, zio_done_func_t *done, void *private,
623 int priority, enum zio_flag flags, const zbookmark_t *zb)
624 {
625 zio_t *zio;
626
627 zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
628 data, size, done, private,
629 ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
630 ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
631 ZIO_DDT_CHILD_READ_PIPELINE : ZIO_READ_PIPELINE);
632
633 return (zio);
634 }
635
636 zio_t *
637 zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
638 void *data, uint64_t size, const zio_prop_t *zp,
639 zio_done_func_t *ready, zio_done_func_t *done, void *private,
640 int priority, enum zio_flag flags, const zbookmark_t *zb)
641 {
642 zio_t *zio;
643
644 ASSERT(zp->zp_checksum >= ZIO_CHECKSUM_OFF &&
645 zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS &&
646 zp->zp_compress >= ZIO_COMPRESS_OFF &&
647 zp->zp_compress < ZIO_COMPRESS_FUNCTIONS &&
648 DMU_OT_IS_VALID(zp->zp_type) &&
649 zp->zp_level < 32 &&
650 zp->zp_copies > 0 &&
651 zp->zp_copies <= spa_max_replication(spa));
652
653 zio = zio_create(pio, spa, txg, bp, data, size, done, private,
654 ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
655 ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
656 ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE);
657
658 zio->io_ready = ready;
659 zio->io_prop = *zp;
660
661 return (zio);
662 }
663
664 zio_t *
665 zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data,
666 uint64_t size, zio_done_func_t *done, void *private, int priority,
667 enum zio_flag flags, zbookmark_t *zb)
668 {
669 zio_t *zio;
670
671 zio = zio_create(pio, spa, txg, bp, data, size, done, private,
672 ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
673 ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
674
675 return (zio);
676 }
677
678 void
679 zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite)
680 {
681 ASSERT(zio->io_type == ZIO_TYPE_WRITE);
682 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
683 ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
684 ASSERT(zio->io_txg == spa_syncing_txg(zio->io_spa));
685
686 /*
687 * We must reset the io_prop to match the values that existed
688 * when the bp was first written by dmu_sync() keeping in mind
689 * that nopwrite and dedup are mutually exclusive.
690 */
691 zio->io_prop.zp_dedup = nopwrite ? B_FALSE : zio->io_prop.zp_dedup;
708 zio_t *zio;
709
710 dprintf_bp(bp, "freeing in txg %llu, pass %u",
711 (longlong_t)txg, spa->spa_sync_pass);
712
713 ASSERT(!BP_IS_HOLE(bp));
714 ASSERT(spa_syncing_txg(spa) == txg);
715 ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
716
717 metaslab_check_free(spa, bp);
718
719 zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
720 NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
721 NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE);
722
723 return (zio);
724 }
725
726 zio_t *
727 zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
728 zio_done_func_t *done, void *private, enum zio_flag flags)
729 {
730 zio_t *zio;
731
732 /*
733 * A claim is an allocation of a specific block. Claims are needed
734 * to support immediate writes in the intent log. The issue is that
735 * immediate writes contain committed data, but in a txg that was
736 * *not* committed. Upon opening the pool after an unclean shutdown,
737 * the intent log claims all blocks that contain immediate write data
738 * so that the SPA knows they're in use.
739 *
740 * All claims *must* be resolved in the first txg -- before the SPA
741 * starts allocating blocks -- so that nothing is allocated twice.
742 * If txg == 0 we just verify that the block is claimable.
743 */
744 ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa));
745 ASSERT(txg == spa_first_txg(spa) || txg == 0);
746 ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(1M) */
747
748 zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
749 done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
750 NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
751
752 return (zio);
753 }
754
755 zio_t *
756 zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
757 zio_done_func_t *done, void *private, int priority, enum zio_flag flags)
758 {
759 zio_t *zio;
760 int c;
761
762 if (vd->vdev_children == 0) {
763 zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
764 ZIO_TYPE_IOCTL, priority, flags, vd, 0, NULL,
765 ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
766
767 zio->io_cmd = cmd;
768 } else {
769 zio = zio_null(pio, spa, NULL, NULL, NULL, flags);
770
771 for (c = 0; c < vd->vdev_children; c++)
772 zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd,
773 done, private, priority, flags));
774 }
775
776 return (zio);
777 }
778
779 zio_t *
780 zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
781 void *data, int checksum, zio_done_func_t *done, void *private,
782 int priority, enum zio_flag flags, boolean_t labels)
783 {
784 zio_t *zio;
785
786 ASSERT(vd->vdev_children == 0);
787 ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
788 offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
789 ASSERT3U(offset + size, <=, vd->vdev_psize);
790
791 zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private,
792 ZIO_TYPE_READ, priority, flags, vd, offset, NULL,
793 ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE);
794
795 zio->io_prop.zp_checksum = checksum;
796
797 return (zio);
798 }
799
800 zio_t *
801 zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
802 void *data, int checksum, zio_done_func_t *done, void *private,
803 int priority, enum zio_flag flags, boolean_t labels)
804 {
805 zio_t *zio;
806
807 ASSERT(vd->vdev_children == 0);
808 ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
809 offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
810 ASSERT3U(offset + size, <=, vd->vdev_psize);
811
812 zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private,
813 ZIO_TYPE_WRITE, priority, flags, vd, offset, NULL,
814 ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE);
815
816 zio->io_prop.zp_checksum = checksum;
817
818 if (zio_checksum_table[checksum].ci_eck) {
819 /*
820 * zec checksums are necessarily destructive -- they modify
821 * the end of the write buffer to hold the verifier/checksum.
822 * Therefore, we must make a local copy in case the data is
823 * being written to multiple places in parallel.
824 */
825 void *wbuf = zio_buf_alloc(size);
826 bcopy(data, wbuf, size);
827 zio_push_transform(zio, wbuf, size, size, NULL);
828 }
829
830 return (zio);
831 }
832
833 /*
834 * Create a child I/O to do some work for us.
835 */
836 zio_t *
837 zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
838 void *data, uint64_t size, int type, int priority, enum zio_flag flags,
839 zio_done_func_t *done, void *private)
840 {
841 enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE;
842 zio_t *zio;
843
844 ASSERT(vd->vdev_parent ==
845 (pio->io_vd ? pio->io_vd : pio->io_spa->spa_root_vdev));
846
847 if (type == ZIO_TYPE_READ && bp != NULL) {
848 /*
849 * If we have the bp, then the child should perform the
850 * checksum and the parent need not. This pushes error
851 * detection as close to the leaves as possible and
852 * eliminates redundant checksums in the interior nodes.
853 */
854 pipeline |= ZIO_STAGE_CHECKSUM_VERIFY;
855 pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
856 }
857
858 if (vd->vdev_children == 0)
859 offset += VDEV_LABEL_START_SIZE;
860
861 flags |= ZIO_VDEV_CHILD_FLAGS(pio) | ZIO_FLAG_DONT_PROPAGATE;
862
863 /*
864 * If we've decided to do a repair, the write is not speculative --
865 * even if the original read was.
866 */
867 if (flags & ZIO_FLAG_IO_REPAIR)
868 flags &= ~ZIO_FLAG_SPECULATIVE;
869
870 zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size,
871 done, private, type, priority, flags, vd, offset, &pio->io_bookmark,
872 ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
873
874 return (zio);
875 }
876
877 zio_t *
878 zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size,
879 int type, int priority, enum zio_flag flags,
880 zio_done_func_t *done, void *private)
881 {
882 zio_t *zio;
883
884 ASSERT(vd->vdev_ops->vdev_op_leaf);
885
886 zio = zio_create(NULL, vd->vdev_spa, 0, NULL,
887 data, size, done, private, type, priority,
888 flags | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY,
889 vd, offset, NULL,
890 ZIO_STAGE_VDEV_IO_START >> 1, ZIO_VDEV_CHILD_PIPELINE);
891
892 return (zio);
893 }
894
895 void
896 zio_flush(zio_t *zio, vdev_t *vd)
897 {
898 zio_nowait(zio_ioctl(zio, zio->io_spa, vd, DKIOCFLUSHWRITECACHE,
899 NULL, NULL, ZIO_PRIORITY_NOW,
900 ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY));
901 }
902
903 void
904 zio_shrink(zio_t *zio, uint64_t size)
905 {
906 ASSERT(zio->io_executor == NULL);
907 ASSERT(zio->io_orig_size == zio->io_size);
|
492 zio_execute(pio);
493 } else {
494 mutex_exit(&pio->io_lock);
495 }
496 }
497
498 static void
499 zio_inherit_child_errors(zio_t *zio, enum zio_child c)
500 {
501 if (zio->io_child_error[c] != 0 && zio->io_error == 0)
502 zio->io_error = zio->io_child_error[c];
503 }
504
505 /*
506 * ==========================================================================
507 * Create the various types of I/O (read, write, free, etc)
508 * ==========================================================================
509 */
510 static zio_t *
511 zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
512 void *data, uint64_t size, zio_done_func_t *done, void *io_private,
513 zio_type_t type, int priority, enum zio_flag flags,
514 vdev_t *vd, uint64_t offset, const zbookmark_t *zb,
515 enum zio_stage stage, enum zio_stage pipeline)
516 {
517 zio_t *zio;
518
519 ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
520 ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
521 ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
522
523 ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER));
524 ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
525 ASSERT(vd || stage == ZIO_STAGE_OPEN);
526
527 zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
528 bzero(zio, sizeof (zio_t));
529
530 mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL);
531 cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL);
532
543 zio->io_child_type = ZIO_CHILD_DDT;
544 else
545 zio->io_child_type = ZIO_CHILD_LOGICAL;
546
547 if (bp != NULL) {
548 zio->io_bp = (blkptr_t *)bp;
549 zio->io_bp_copy = *bp;
550 zio->io_bp_orig = *bp;
551 if (type != ZIO_TYPE_WRITE ||
552 zio->io_child_type == ZIO_CHILD_DDT)
553 zio->io_bp = &zio->io_bp_copy; /* so caller can free */
554 if (zio->io_child_type == ZIO_CHILD_LOGICAL)
555 zio->io_logical = zio;
556 if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp))
557 pipeline |= ZIO_GANG_STAGES;
558 }
559
560 zio->io_spa = spa;
561 zio->io_txg = txg;
562 zio->io_done = done;
563 zio->io_private = io_private;
564 zio->io_type = type;
565 zio->io_priority = priority;
566 zio->io_vd = vd;
567 zio->io_offset = offset;
568 zio->io_orig_data = zio->io_data = data;
569 zio->io_orig_size = zio->io_size = size;
570 zio->io_orig_flags = zio->io_flags = flags;
571 zio->io_orig_stage = zio->io_stage = stage;
572 zio->io_orig_pipeline = zio->io_pipeline = pipeline;
573
574 zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY);
575 zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE);
576
577 if (zb != NULL)
578 zio->io_bookmark = *zb;
579
580 if (pio != NULL) {
581 if (zio->io_logical == NULL)
582 zio->io_logical = pio->io_logical;
583 if (zio->io_child_type == ZIO_CHILD_GANG)
584 zio->io_gang_leader = pio->io_gang_leader;
585 zio_add_child(pio, zio);
586 }
587
588 return (zio);
589 }
590
591 static void
592 zio_destroy(zio_t *zio)
593 {
594 list_destroy(&zio->io_parent_list);
595 list_destroy(&zio->io_child_list);
596 mutex_destroy(&zio->io_lock);
597 cv_destroy(&zio->io_cv);
598 kmem_cache_free(zio_cache, zio);
599 }
600
601 zio_t *
602 zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
603 void *io_private, enum zio_flag flags)
604 {
605 zio_t *zio;
606
607 zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, io_private,
608 ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL,
609 ZIO_STAGE_OPEN, ZIO_INTERLOCK_PIPELINE);
610
611 return (zio);
612 }
613
614 zio_t *
615 zio_root(spa_t *spa, zio_done_func_t *done, void *io_private,
616 enum zio_flag flags)
617 {
618 return (zio_null(NULL, spa, NULL, done, io_private, flags));
619 }
620
621 zio_t *
622 zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
623 void *data, uint64_t size, zio_done_func_t *done, void *io_private,
624 int priority, enum zio_flag flags, const zbookmark_t *zb)
625 {
626 zio_t *zio;
627
628 zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
629 data, size, done, io_private,
630 ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
631 ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
632 ZIO_DDT_CHILD_READ_PIPELINE : ZIO_READ_PIPELINE);
633
634 return (zio);
635 }
636
637 zio_t *
638 zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
639 void *data, uint64_t size, const zio_prop_t *zp,
640 zio_done_func_t *ready, zio_done_func_t *done, void *io_private,
641 int priority, enum zio_flag flags, const zbookmark_t *zb)
642 {
643 zio_t *zio;
644
645 ASSERT(zp->zp_checksum >= ZIO_CHECKSUM_OFF &&
646 zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS &&
647 zp->zp_compress >= ZIO_COMPRESS_OFF &&
648 zp->zp_compress < ZIO_COMPRESS_FUNCTIONS &&
649 DMU_OT_IS_VALID(zp->zp_type) &&
650 zp->zp_level < 32 &&
651 zp->zp_copies > 0 &&
652 zp->zp_copies <= spa_max_replication(spa));
653
654 zio = zio_create(pio, spa, txg, bp, data, size, done, io_private,
655 ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
656 ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
657 ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE);
658
659 zio->io_ready = ready;
660 zio->io_prop = *zp;
661
662 return (zio);
663 }
664
665 zio_t *
666 zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data,
667 uint64_t size, zio_done_func_t *done, void *io_private, int priority,
668 enum zio_flag flags, zbookmark_t *zb)
669 {
670 zio_t *zio;
671
672 zio = zio_create(pio, spa, txg, bp, data, size, done, io_private,
673 ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
674 ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
675
676 return (zio);
677 }
678
679 void
680 zio_write_override(zio_t *zio, blkptr_t *bp, int copies, boolean_t nopwrite)
681 {
682 ASSERT(zio->io_type == ZIO_TYPE_WRITE);
683 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
684 ASSERT(zio->io_stage == ZIO_STAGE_OPEN);
685 ASSERT(zio->io_txg == spa_syncing_txg(zio->io_spa));
686
687 /*
688 * We must reset the io_prop to match the values that existed
689 * when the bp was first written by dmu_sync() keeping in mind
690 * that nopwrite and dedup are mutually exclusive.
691 */
692 zio->io_prop.zp_dedup = nopwrite ? B_FALSE : zio->io_prop.zp_dedup;
709 zio_t *zio;
710
711 dprintf_bp(bp, "freeing in txg %llu, pass %u",
712 (longlong_t)txg, spa->spa_sync_pass);
713
714 ASSERT(!BP_IS_HOLE(bp));
715 ASSERT(spa_syncing_txg(spa) == txg);
716 ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
717
718 metaslab_check_free(spa, bp);
719
720 zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
721 NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
722 NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_FREE_PIPELINE);
723
724 return (zio);
725 }
726
727 zio_t *
728 zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
729 zio_done_func_t *done, void *io_private, enum zio_flag flags)
730 {
731 zio_t *zio;
732
733 /*
734 * A claim is an allocation of a specific block. Claims are needed
735 * to support immediate writes in the intent log. The issue is that
736 * immediate writes contain committed data, but in a txg that was
737 * *not* committed. Upon opening the pool after an unclean shutdown,
738 * the intent log claims all blocks that contain immediate write data
739 * so that the SPA knows they're in use.
740 *
741 * All claims *must* be resolved in the first txg -- before the SPA
742 * starts allocating blocks -- so that nothing is allocated twice.
743 * If txg == 0 we just verify that the block is claimable.
744 */
745 ASSERT3U(spa->spa_uberblock.ub_rootbp.blk_birth, <, spa_first_txg(spa));
746 ASSERT(txg == spa_first_txg(spa) || txg == 0);
747 ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(1M) */
748
749 zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
750 done, io_private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
751 NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
752
753 return (zio);
754 }
755
756 zio_t *
757 zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
758 zio_done_func_t *done, void *io_private, int priority, enum zio_flag flags)
759 {
760 zio_t *zio;
761 int c;
762
763 if (vd->vdev_children == 0) {
764 zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, io_private,
765 ZIO_TYPE_IOCTL, priority, flags, vd, 0, NULL,
766 ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
767
768 zio->io_cmd = cmd;
769 } else {
770 zio = zio_null(pio, spa, NULL, NULL, NULL, flags);
771
772 for (c = 0; c < vd->vdev_children; c++)
773 zio_nowait(zio_ioctl(zio, spa, vd->vdev_child[c], cmd,
774 done, io_private, priority, flags));
775 }
776
777 return (zio);
778 }
779
780 zio_t *
781 zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
782 void *data, int checksum, zio_done_func_t *done, void *io_private,
783 int priority, enum zio_flag flags, boolean_t labels)
784 {
785 zio_t *zio;
786
787 ASSERT(vd->vdev_children == 0);
788 ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
789 offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
790 ASSERT3U(offset + size, <=, vd->vdev_psize);
791
792 zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done,
793 io_private, ZIO_TYPE_READ, priority, flags, vd, offset, NULL,
794 ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE);
795
796 zio->io_prop.zp_checksum = checksum;
797
798 return (zio);
799 }
800
801 zio_t *
802 zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
803 void *data, int checksum, zio_done_func_t *done, void *io_private,
804 int priority, enum zio_flag flags, boolean_t labels)
805 {
806 zio_t *zio;
807
808 ASSERT(vd->vdev_children == 0);
809 ASSERT(!labels || offset + size <= VDEV_LABEL_START_SIZE ||
810 offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
811 ASSERT3U(offset + size, <=, vd->vdev_psize);
812
813 zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done,
814 io_private, ZIO_TYPE_WRITE, priority, flags, vd, offset, NULL,
815 ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE);
816
817 zio->io_prop.zp_checksum = checksum;
818
819 if (zio_checksum_table[checksum].ci_eck) {
820 /*
821 * zec checksums are necessarily destructive -- they modify
822 * the end of the write buffer to hold the verifier/checksum.
823 * Therefore, we must make a local copy in case the data is
824 * being written to multiple places in parallel.
825 */
826 void *wbuf = zio_buf_alloc(size);
827 bcopy(data, wbuf, size);
828 zio_push_transform(zio, wbuf, size, size, NULL);
829 }
830
831 return (zio);
832 }
833
834 /*
835 * Create a child I/O to do some work for us.
836 */
837 zio_t *
838 zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
839 void *data, uint64_t size, int type, int priority, enum zio_flag flags,
840 zio_done_func_t *done, void *io_private)
841 {
842 enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE;
843 zio_t *zio;
844
845 ASSERT(vd->vdev_parent ==
846 (pio->io_vd ? pio->io_vd : pio->io_spa->spa_root_vdev));
847
848 if (type == ZIO_TYPE_READ && bp != NULL) {
849 /*
850 * If we have the bp, then the child should perform the
851 * checksum and the parent need not. This pushes error
852 * detection as close to the leaves as possible and
853 * eliminates redundant checksums in the interior nodes.
854 */
855 pipeline |= ZIO_STAGE_CHECKSUM_VERIFY;
856 pio->io_pipeline &= ~ZIO_STAGE_CHECKSUM_VERIFY;
857 }
858
859 if (vd->vdev_children == 0)
860 offset += VDEV_LABEL_START_SIZE;
861
862 flags |= ZIO_VDEV_CHILD_FLAGS(pio) | ZIO_FLAG_DONT_PROPAGATE;
863
864 /*
865 * If we've decided to do a repair, the write is not speculative --
866 * even if the original read was.
867 */
868 if (flags & ZIO_FLAG_IO_REPAIR)
869 flags &= ~ZIO_FLAG_SPECULATIVE;
870
871 zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size,
872 done, io_private, type, priority, flags, vd, offset,
873 &pio->io_bookmark, ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
874
875 return (zio);
876 }
877
878 zio_t *
879 zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size,
880 int type, int priority, enum zio_flag flags,
881 zio_done_func_t *done, void *io_private)
882 {
883 zio_t *zio;
884
885 ASSERT(vd->vdev_ops->vdev_op_leaf);
886
887 zio = zio_create(NULL, vd->vdev_spa, 0, NULL,
888 data, size, done, io_private, type, priority,
889 flags | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY,
890 vd, offset, NULL,
891 ZIO_STAGE_VDEV_IO_START >> 1, ZIO_VDEV_CHILD_PIPELINE);
892
893 return (zio);
894 }
895
896 void
897 zio_flush(zio_t *zio, vdev_t *vd)
898 {
899 zio_nowait(zio_ioctl(zio, zio->io_spa, vd, DKIOCFLUSHWRITECACHE,
900 NULL, NULL, ZIO_PRIORITY_NOW,
901 ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE | ZIO_FLAG_DONT_RETRY));
902 }
903
904 void
905 zio_shrink(zio_t *zio, uint64_t size)
906 {
907 ASSERT(zio->io_executor == NULL);
908 ASSERT(zio->io_orig_size == zio->io_size);
|