941 * We don't shrink for raidz because of problems with the
942 * reconstruction when reading back less than the block size.
943 * Note, BP_IS_RAIDZ() assumes no compression.
944 */
945 ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
946 if (!BP_IS_RAIDZ(zio->io_bp))
947 zio->io_orig_size = zio->io_size = size;
948 }
949
950 /*
951 * ==========================================================================
952 * Prepare to read and write logical blocks
953 * ==========================================================================
954 */
955
956 static int
957 zio_read_bp_init(zio_t *zio)
958 {
959 blkptr_t *bp = zio->io_bp;
960
961 if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
962 zio->io_child_type == ZIO_CHILD_LOGICAL &&
963 !(zio->io_flags & ZIO_FLAG_RAW)) {
964 uint64_t psize =
965 BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
966 void *cbuf = zio_buf_alloc(psize);
967
968 zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
969 }
970
971 if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) {
972 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
973 decode_embedded_bp_compressed(bp, zio->io_data);
974 } else {
975 ASSERT(!BP_IS_EMBEDDED(bp));
976 }
977
978 if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
979 zio->io_flags |= ZIO_FLAG_DONT_CACHE;
980
981 if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
982 zio->io_flags |= ZIO_FLAG_DONT_CACHE;
983
984 if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL)
985 zio->io_pipeline = ZIO_DDT_READ_PIPELINE;
986
987 return (ZIO_PIPELINE_CONTINUE);
988 }
989
990 static int
991 zio_write_bp_init(zio_t *zio)
992 {
993 spa_t *spa = zio->io_spa;
994 zio_prop_t *zp = &zio->io_prop;
995 enum zio_compress compress = zp->zp_compress;
996 blkptr_t *bp = zio->io_bp;
997 uint64_t lsize = zio->io_size;
998 uint64_t psize = lsize;
999 int pass = 1;
1000
1001 /*
1002 * If our children haven't all reached the ready stage,
1003 * wait for them and then repeat this pipeline stage.
1004 */
1005 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
1006 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
1007 return (ZIO_PIPELINE_STOP);
1008
1009 if (!IO_IS_ALLOCATING(zio))
1010 return (ZIO_PIPELINE_CONTINUE);
1011
1012 ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
1013
1014 if (zio->io_bp_override) {
1015 ASSERT(bp->blk_birth != zio->io_txg);
1016 ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0);
1017
1018 *bp = *zio->io_bp_override;
1019 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1020
1021 if (BP_IS_EMBEDDED(bp))
1022 return (ZIO_PIPELINE_CONTINUE);
1023
1024 /*
1025 * If we've been overridden and nopwrite is set then
1026 * set the flag accordingly to indicate that a nopwrite
1027 * has already occurred.
1028 */
1029 if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) {
1030 ASSERT(!zp->zp_dedup);
1031 zio->io_flags |= ZIO_FLAG_NOPWRITE;
1032 return (ZIO_PIPELINE_CONTINUE);
1033 }
1034
1035 ASSERT(!zp->zp_nopwrite);
1036
1037 if (BP_IS_HOLE(bp) || !zp->zp_dedup)
1038 return (ZIO_PIPELINE_CONTINUE);
1039
1040 ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup ||
1041 zp->zp_dedup_verify);
1042
1043 if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
1044 BP_SET_DEDUP(bp, 1);
1045 zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
1046 return (ZIO_PIPELINE_CONTINUE);
1047 }
1048 zio->io_bp_override = NULL;
1049 BP_ZERO(bp);
1050 }
1051
1052 if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
1053 /*
1054 * We're rewriting an existing block, which means we're
1055 * working on behalf of spa_sync(). For spa_sync() to
1056 * converge, it must eventually be the case that we don't
1057 * have to allocate new blocks. But compression changes
1058 * the blocksize, which forces a reallocate, and makes
1059 * convergence take longer. Therefore, after the first
1060 * few passes, stop compressing to ensure convergence.
1061 */
1062 pass = spa_sync_pass(spa);
1063
1136
1137 if (psize == 0) {
1138 if (zio->io_bp_orig.blk_birth != 0 &&
1139 spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
1140 BP_SET_LSIZE(bp, lsize);
1141 BP_SET_TYPE(bp, zp->zp_type);
1142 BP_SET_LEVEL(bp, zp->zp_level);
1143 BP_SET_BIRTH(bp, zio->io_txg, 0);
1144 }
1145 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1146 } else {
1147 ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER);
1148 BP_SET_LSIZE(bp, lsize);
1149 BP_SET_TYPE(bp, zp->zp_type);
1150 BP_SET_LEVEL(bp, zp->zp_level);
1151 BP_SET_PSIZE(bp, psize);
1152 BP_SET_COMPRESS(bp, compress);
1153 BP_SET_CHECKSUM(bp, zp->zp_checksum);
1154 BP_SET_DEDUP(bp, zp->zp_dedup);
1155 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
1156 if (zp->zp_dedup) {
1157 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
1158 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
1159 zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE;
1160 }
1161 if (zp->zp_nopwrite) {
1162 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
1163 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
1164 zio->io_pipeline |= ZIO_STAGE_NOP_WRITE;
1165 }
1166 }
1167
1168 return (ZIO_PIPELINE_CONTINUE);
1169 }
1170
1171 static int
1172 zio_free_bp_init(zio_t *zio)
1173 {
1174 blkptr_t *bp = zio->io_bp;
1175
1854 * Create the gang header.
1855 */
1856 zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, NULL, NULL,
1857 pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
1858
1859 /*
1860 * Create and nowait the gang children.
1861 */
1862 for (int g = 0; resid != 0; resid -= lsize, g++) {
1863 lsize = P2ROUNDUP(resid / (SPA_GBH_NBLKPTRS - g),
1864 SPA_MINBLOCKSIZE);
1865 ASSERT(lsize >= SPA_MINBLOCKSIZE && lsize <= resid);
1866
1867 zp.zp_checksum = gio->io_prop.zp_checksum;
1868 zp.zp_compress = ZIO_COMPRESS_OFF;
1869 zp.zp_type = DMU_OT_NONE;
1870 zp.zp_level = 0;
1871 zp.zp_copies = gio->io_prop.zp_copies;
1872 zp.zp_dedup = B_FALSE;
1873 zp.zp_dedup_verify = B_FALSE;
1874 zp.zp_nopwrite = B_FALSE;
1875
1876 zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
1877 (char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
1878 zio_write_gang_member_ready, NULL, NULL, &gn->gn_child[g],
1879 pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
1880 &pio->io_bookmark));
1881 }
1882
1883 /*
1884 * Set pio's pipeline to just wait for zio to finish.
1885 */
1886 pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1887
1888 zio_nowait(zio);
1889
1890 return (ZIO_PIPELINE_CONTINUE);
1891 }
1892
1893 /*
|
941 * We don't shrink for raidz because of problems with the
942 * reconstruction when reading back less than the block size.
943 * Note, BP_IS_RAIDZ() assumes no compression.
944 */
945 ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
946 if (!BP_IS_RAIDZ(zio->io_bp))
947 zio->io_orig_size = zio->io_size = size;
948 }
949
950 /*
951 * ==========================================================================
952 * Prepare to read and write logical blocks
953 * ==========================================================================
954 */
955
956 static int
957 zio_read_bp_init(zio_t *zio)
958 {
959 blkptr_t *bp = zio->io_bp;
960
961 if (!BP_IS_EMBEDDED(bp) && BP_GET_PROP_RESERVATION(bp)) {
962 memset(zio->io_orig_data, 0, zio->io_orig_size);
963 zio->io_pipeline = ZIO_INTERLOCK_STAGES;
964 return (ZIO_PIPELINE_CONTINUE);
965 }
966
967 if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
968 zio->io_child_type == ZIO_CHILD_LOGICAL &&
969 !(zio->io_flags & ZIO_FLAG_RAW)) {
970 uint64_t psize =
971 BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
972 void *cbuf = zio_buf_alloc(psize);
973
974 zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
975 }
976
977 if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) {
978 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
979 decode_embedded_bp_compressed(bp, zio->io_data);
980 } else {
981 ASSERT(!BP_IS_EMBEDDED(bp));
982 }
983
984 if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
985 zio->io_flags |= ZIO_FLAG_DONT_CACHE;
986
987 if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
988 zio->io_flags |= ZIO_FLAG_DONT_CACHE;
989
990 if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL)
991 zio->io_pipeline = ZIO_DDT_READ_PIPELINE;
992
993 return (ZIO_PIPELINE_CONTINUE);
994 }
995
996 static int
997 zio_write_bp_init(zio_t *zio)
998 {
999 spa_t *spa = zio->io_spa;
1000 zio_prop_t *zp = &zio->io_prop;
1001 enum zio_compress compress = zp->zp_compress;
1002 enum zio_checksum checksum = zp->zp_checksum;
1003 uint8_t dedup = zp->zp_dedup;
1004 blkptr_t *bp = zio->io_bp;
1005 uint64_t lsize = zio->io_size;
1006 uint64_t psize = lsize;
1007 int pass = 1;
1008
1009 /*
1010 * If our children haven't all reached the ready stage,
1011 * wait for them and then repeat this pipeline stage.
1012 */
1013 if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
1014 zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
1015 return (ZIO_PIPELINE_STOP);
1016
1017 if (!IO_IS_ALLOCATING(zio))
1018 return (ZIO_PIPELINE_CONTINUE);
1019
1020 ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
1021
1022 if (zp->zp_zero_write && !(zio->io_pipeline & ZIO_GANG_STAGES)) {
1023 dedup = B_FALSE;
1024 compress = ZIO_COMPRESS_OFF;
1025 checksum = ZIO_CHECKSUM_OFF;
1026 }
1027
1028 if (zio->io_bp_override) {
1029 ASSERT(bp->blk_birth != zio->io_txg);
1030 ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0);
1031
1032 *bp = *zio->io_bp_override;
1033 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1034
1035 if (BP_IS_EMBEDDED(bp))
1036 return (ZIO_PIPELINE_CONTINUE);
1037
1038 /*
1039 * If we've been overridden and nopwrite is set then
1040 * set the flag accordingly to indicate that a nopwrite
1041 * has already occurred.
1042 */
1043 if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) {
1044 ASSERT(!zp->zp_dedup);
1045 zio->io_flags |= ZIO_FLAG_NOPWRITE;
1046 return (ZIO_PIPELINE_CONTINUE);
1047 }
1048
1049 ASSERT(!zp->zp_nopwrite);
1050
1051 if (BP_IS_HOLE(bp) || !dedup)
1052 return (ZIO_PIPELINE_CONTINUE);
1053
1054 ASSERT(zio_checksum_table[checksum].ci_dedup ||
1055 zp->zp_dedup_verify);
1056
1057 if (BP_GET_CHECKSUM(bp) == checksum) {
1058 BP_SET_DEDUP(bp, 1);
1059 zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
1060 return (ZIO_PIPELINE_CONTINUE);
1061 }
1062 zio->io_bp_override = NULL;
1063 BP_ZERO(bp);
1064 }
1065
1066 if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
1067 /*
1068 * We're rewriting an existing block, which means we're
1069 * working on behalf of spa_sync(). For spa_sync() to
1070 * converge, it must eventually be the case that we don't
1071 * have to allocate new blocks. But compression changes
1072 * the blocksize, which forces a reallocate, and makes
1073 * convergence take longer. Therefore, after the first
1074 * few passes, stop compressing to ensure convergence.
1075 */
1076 pass = spa_sync_pass(spa);
1077
1150
1151 if (psize == 0) {
1152 if (zio->io_bp_orig.blk_birth != 0 &&
1153 spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
1154 BP_SET_LSIZE(bp, lsize);
1155 BP_SET_TYPE(bp, zp->zp_type);
1156 BP_SET_LEVEL(bp, zp->zp_level);
1157 BP_SET_BIRTH(bp, zio->io_txg, 0);
1158 }
1159 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1160 } else {
1161 ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER);
1162 BP_SET_LSIZE(bp, lsize);
1163 BP_SET_TYPE(bp, zp->zp_type);
1164 BP_SET_LEVEL(bp, zp->zp_level);
1165 BP_SET_PSIZE(bp, psize);
1166 BP_SET_COMPRESS(bp, compress);
1167 BP_SET_CHECKSUM(bp, zp->zp_checksum);
1168 BP_SET_DEDUP(bp, zp->zp_dedup);
1169 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
1170 if (zp->zp_zero_write && !(zio->io_pipeline & ZIO_GANG_STAGES)) {
1171 boolean_t need_allocate = B_FALSE;
1172 if (zio->io_pipeline & ZIO_STAGE_DVA_ALLOCATE)
1173 need_allocate = B_TRUE;
1174 zio->io_pipeline = ZIO_INTERLOCK_STAGES;
1175 if (need_allocate)
1176 zio->io_pipeline |= ZIO_STAGE_DVA_ALLOCATE;
1177 BP_SET_PROP_RESERVATION(bp, 1);
1178 } else {
1179 BP_SET_PROP_RESERVATION(bp, 0);
1180 }
1181 if (zp->zp_dedup) {
1182 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
1183 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
1184 zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE;
1185 }
1186 if (zp->zp_nopwrite) {
1187 ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
1188 ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
1189 zio->io_pipeline |= ZIO_STAGE_NOP_WRITE;
1190 }
1191 }
1192
1193 return (ZIO_PIPELINE_CONTINUE);
1194 }
1195
1196 static int
1197 zio_free_bp_init(zio_t *zio)
1198 {
1199 blkptr_t *bp = zio->io_bp;
1200
1879 * Create the gang header.
1880 */
1881 zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, NULL, NULL,
1882 pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
1883
1884 /*
1885 * Create and nowait the gang children.
1886 */
1887 for (int g = 0; resid != 0; resid -= lsize, g++) {
1888 lsize = P2ROUNDUP(resid / (SPA_GBH_NBLKPTRS - g),
1889 SPA_MINBLOCKSIZE);
1890 ASSERT(lsize >= SPA_MINBLOCKSIZE && lsize <= resid);
1891
1892 zp.zp_checksum = gio->io_prop.zp_checksum;
1893 zp.zp_compress = ZIO_COMPRESS_OFF;
1894 zp.zp_type = DMU_OT_NONE;
1895 zp.zp_level = 0;
1896 zp.zp_copies = gio->io_prop.zp_copies;
1897 zp.zp_dedup = B_FALSE;
1898 zp.zp_dedup_verify = B_FALSE;
1899 zp.zp_zero_write = B_FALSE;
1900 zp.zp_nopwrite = B_FALSE;
1901
1902 zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
1903 (char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
1904 zio_write_gang_member_ready, NULL, NULL, &gn->gn_child[g],
1905 pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
1906 &pio->io_bookmark));
1907 }
1908
1909 /*
1910 * Set pio's pipeline to just wait for zio to finish.
1911 */
1912 pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1913
1914 zio_nowait(zio);
1915
1916 return (ZIO_PIPELINE_CONTINUE);
1917 }
1918
1919 /*
|