Print this page
Possibility to physically reserve space without writing leaf blocks


 941          * We don't shrink for raidz because of problems with the
 942          * reconstruction when reading back less than the block size.
 943          * Note, BP_IS_RAIDZ() assumes no compression.
 944          */
 945         ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
 946         if (!BP_IS_RAIDZ(zio->io_bp))
 947                 zio->io_orig_size = zio->io_size = size;
 948 }
 949 
 950 /*
 951  * ==========================================================================
 952  * Prepare to read and write logical blocks
 953  * ==========================================================================
 954  */
 955 
 956 static int
 957 zio_read_bp_init(zio_t *zio)
 958 {
 959         blkptr_t *bp = zio->io_bp;
 960 






 961         if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
 962             zio->io_child_type == ZIO_CHILD_LOGICAL &&
 963             !(zio->io_flags & ZIO_FLAG_RAW)) {
 964                 uint64_t psize =
 965                     BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
 966                 void *cbuf = zio_buf_alloc(psize);
 967 
 968                 zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
 969         }
 970 
 971         if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) {
 972                 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
 973                 decode_embedded_bp_compressed(bp, zio->io_data);
 974         } else {
 975                 ASSERT(!BP_IS_EMBEDDED(bp));
 976         }
 977 
 978         if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
 979                 zio->io_flags |= ZIO_FLAG_DONT_CACHE;
 980 
 981         if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
 982                 zio->io_flags |= ZIO_FLAG_DONT_CACHE;
 983 
 984         if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL)
 985                 zio->io_pipeline = ZIO_DDT_READ_PIPELINE;
 986 
 987         return (ZIO_PIPELINE_CONTINUE);
 988 }
 989 
 990 static int
 991 zio_write_bp_init(zio_t *zio)
 992 {
 993         spa_t *spa = zio->io_spa;
 994         zio_prop_t *zp = &zio->io_prop;
 995         enum zio_compress compress = zp->zp_compress;


 996         blkptr_t *bp = zio->io_bp;
 997         uint64_t lsize = zio->io_size;
 998         uint64_t psize = lsize;
 999         int pass = 1;
1000 
1001         /*
1002          * If our children haven't all reached the ready stage,
1003          * wait for them and then repeat this pipeline stage.
1004          */
1005         if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
1006             zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
1007                 return (ZIO_PIPELINE_STOP);
1008 
1009         if (!IO_IS_ALLOCATING(zio))
1010                 return (ZIO_PIPELINE_CONTINUE);
1011 
1012         ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
1013 






1014         if (zio->io_bp_override) {
1015                 ASSERT(bp->blk_birth != zio->io_txg);
1016                 ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0);
1017 
1018                 *bp = *zio->io_bp_override;
1019                 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1020 
1021                 if (BP_IS_EMBEDDED(bp))
1022                         return (ZIO_PIPELINE_CONTINUE);
1023 
1024                 /*
1025                  * If we've been overridden and nopwrite is set then
1026                  * set the flag accordingly to indicate that a nopwrite
1027                  * has already occurred.
1028                  */
1029                 if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) {
1030                         ASSERT(!zp->zp_dedup);
1031                         zio->io_flags |= ZIO_FLAG_NOPWRITE;
1032                         return (ZIO_PIPELINE_CONTINUE);
1033                 }
1034 
1035                 ASSERT(!zp->zp_nopwrite);
1036 
1037                 if (BP_IS_HOLE(bp) || !zp->zp_dedup)
1038                         return (ZIO_PIPELINE_CONTINUE);
1039 
1040                 ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup ||
1041                     zp->zp_dedup_verify);
1042 
1043                 if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
1044                         BP_SET_DEDUP(bp, 1);
1045                         zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
1046                         return (ZIO_PIPELINE_CONTINUE);
1047                 }
1048                 zio->io_bp_override = NULL;
1049                 BP_ZERO(bp);
1050         }
1051 
1052         if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
1053                 /*
1054                  * We're rewriting an existing block, which means we're
1055                  * working on behalf of spa_sync().  For spa_sync() to
1056                  * converge, it must eventually be the case that we don't
1057                  * have to allocate new blocks.  But compression changes
1058                  * the blocksize, which forces a reallocate, and makes
1059                  * convergence take longer.  Therefore, after the first
1060                  * few passes, stop compressing to ensure convergence.
1061                  */
1062                 pass = spa_sync_pass(spa);
1063 


1136 
1137         if (psize == 0) {
1138                 if (zio->io_bp_orig.blk_birth != 0 &&
1139                     spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
1140                         BP_SET_LSIZE(bp, lsize);
1141                         BP_SET_TYPE(bp, zp->zp_type);
1142                         BP_SET_LEVEL(bp, zp->zp_level);
1143                         BP_SET_BIRTH(bp, zio->io_txg, 0);
1144                 }
1145                 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1146         } else {
1147                 ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER);
1148                 BP_SET_LSIZE(bp, lsize);
1149                 BP_SET_TYPE(bp, zp->zp_type);
1150                 BP_SET_LEVEL(bp, zp->zp_level);
1151                 BP_SET_PSIZE(bp, psize);
1152                 BP_SET_COMPRESS(bp, compress);
1153                 BP_SET_CHECKSUM(bp, zp->zp_checksum);
1154                 BP_SET_DEDUP(bp, zp->zp_dedup);
1155                 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);











1156                 if (zp->zp_dedup) {
1157                         ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
1158                         ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
1159                         zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE;
1160                 }
1161                 if (zp->zp_nopwrite) {
1162                         ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
1163                         ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
1164                         zio->io_pipeline |= ZIO_STAGE_NOP_WRITE;
1165                 }
1166         }
1167 
1168         return (ZIO_PIPELINE_CONTINUE);
1169 }
1170 
1171 static int
1172 zio_free_bp_init(zio_t *zio)
1173 {
1174         blkptr_t *bp = zio->io_bp;
1175 


1854          * Create the gang header.
1855          */
1856         zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, NULL, NULL,
1857             pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
1858 
1859         /*
1860          * Create and nowait the gang children.
1861          */
1862         for (int g = 0; resid != 0; resid -= lsize, g++) {
1863                 lsize = P2ROUNDUP(resid / (SPA_GBH_NBLKPTRS - g),
1864                     SPA_MINBLOCKSIZE);
1865                 ASSERT(lsize >= SPA_MINBLOCKSIZE && lsize <= resid);
1866 
1867                 zp.zp_checksum = gio->io_prop.zp_checksum;
1868                 zp.zp_compress = ZIO_COMPRESS_OFF;
1869                 zp.zp_type = DMU_OT_NONE;
1870                 zp.zp_level = 0;
1871                 zp.zp_copies = gio->io_prop.zp_copies;
1872                 zp.zp_dedup = B_FALSE;
1873                 zp.zp_dedup_verify = B_FALSE;

1874                 zp.zp_nopwrite = B_FALSE;
1875 
1876                 zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
1877                     (char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
1878                     zio_write_gang_member_ready, NULL, NULL, &gn->gn_child[g],
1879                     pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
1880                     &pio->io_bookmark));
1881         }
1882 
1883         /*
1884          * Set pio's pipeline to just wait for zio to finish.
1885          */
1886         pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1887 
1888         zio_nowait(zio);
1889 
1890         return (ZIO_PIPELINE_CONTINUE);
1891 }
1892 
1893 /*




 941          * We don't shrink for raidz because of problems with the
 942          * reconstruction when reading back less than the block size.
 943          * Note, BP_IS_RAIDZ() assumes no compression.
 944          */
 945         ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
 946         if (!BP_IS_RAIDZ(zio->io_bp))
 947                 zio->io_orig_size = zio->io_size = size;
 948 }
 949 
 950 /*
 951  * ==========================================================================
 952  * Prepare to read and write logical blocks
 953  * ==========================================================================
 954  */
 955 
 956 static int
 957 zio_read_bp_init(zio_t *zio)
 958 {
 959         blkptr_t *bp = zio->io_bp;
 960 
 961         if (!BP_IS_EMBEDDED(bp) && BP_GET_PROP_RESERVATION(bp)) {
 962                 memset(zio->io_orig_data, 0, zio->io_orig_size);
 963                 zio->io_pipeline = ZIO_INTERLOCK_STAGES;
 964                 return (ZIO_PIPELINE_CONTINUE);
 965         }
 966 
 967         if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
 968             zio->io_child_type == ZIO_CHILD_LOGICAL &&
 969             !(zio->io_flags & ZIO_FLAG_RAW)) {
 970                 uint64_t psize =
 971                     BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
 972                 void *cbuf = zio_buf_alloc(psize);
 973 
 974                 zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
 975         }
 976 
 977         if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) {
 978                 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
 979                 decode_embedded_bp_compressed(bp, zio->io_data);
 980         } else {
 981                 ASSERT(!BP_IS_EMBEDDED(bp));
 982         }
 983 
 984         if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
 985                 zio->io_flags |= ZIO_FLAG_DONT_CACHE;
 986 
 987         if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
 988                 zio->io_flags |= ZIO_FLAG_DONT_CACHE;
 989 
 990         if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL)
 991                 zio->io_pipeline = ZIO_DDT_READ_PIPELINE;
 992 
 993         return (ZIO_PIPELINE_CONTINUE);
 994 }
 995 
 996 static int
 997 zio_write_bp_init(zio_t *zio)
 998 {
 999         spa_t *spa = zio->io_spa;
1000         zio_prop_t *zp = &zio->io_prop;
1001         enum zio_compress compress = zp->zp_compress;
1002         enum zio_checksum checksum = zp->zp_checksum;
1003         uint8_t dedup = zp->zp_dedup;
1004         blkptr_t *bp = zio->io_bp;
1005         uint64_t lsize = zio->io_size;
1006         uint64_t psize = lsize;
1007         int pass = 1;
1008 
1009         /*
1010          * If our children haven't all reached the ready stage,
1011          * wait for them and then repeat this pipeline stage.
1012          */
1013         if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
1014             zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
1015                 return (ZIO_PIPELINE_STOP);
1016 
1017         if (!IO_IS_ALLOCATING(zio))
1018                 return (ZIO_PIPELINE_CONTINUE);
1019 
1020         ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
1021 
1022         if (zp->zp_zero_write && !(zio->io_pipeline & ZIO_GANG_STAGES)) {
1023                 dedup = B_FALSE;
1024                 compress = ZIO_COMPRESS_OFF;
1025                 checksum = ZIO_CHECKSUM_OFF;
1026         }
1027 
1028         if (zio->io_bp_override) {
1029                 ASSERT(bp->blk_birth != zio->io_txg);
1030                 ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0);
1031 
1032                 *bp = *zio->io_bp_override;
1033                 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1034 
1035                 if (BP_IS_EMBEDDED(bp))
1036                         return (ZIO_PIPELINE_CONTINUE);
1037 
1038                 /*
1039                  * If we've been overridden and nopwrite is set then
1040                  * set the flag accordingly to indicate that a nopwrite
1041                  * has already occurred.
1042                  */
1043                 if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) {
1044                         ASSERT(!zp->zp_dedup);
1045                         zio->io_flags |= ZIO_FLAG_NOPWRITE;
1046                         return (ZIO_PIPELINE_CONTINUE);
1047                 }
1048 
1049                 ASSERT(!zp->zp_nopwrite);
1050 
1051                 if (BP_IS_HOLE(bp) || !dedup)
1052                         return (ZIO_PIPELINE_CONTINUE);
1053 
1054                 ASSERT(zio_checksum_table[checksum].ci_dedup ||
1055                     zp->zp_dedup_verify);
1056 
1057                 if (BP_GET_CHECKSUM(bp) == checksum) {
1058                         BP_SET_DEDUP(bp, 1);
1059                         zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
1060                         return (ZIO_PIPELINE_CONTINUE);
1061                 }
1062                 zio->io_bp_override = NULL;
1063                 BP_ZERO(bp);
1064         }
1065 
1066         if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
1067                 /*
1068                  * We're rewriting an existing block, which means we're
1069                  * working on behalf of spa_sync().  For spa_sync() to
1070                  * converge, it must eventually be the case that we don't
1071                  * have to allocate new blocks.  But compression changes
1072                  * the blocksize, which forces a reallocate, and makes
1073                  * convergence take longer.  Therefore, after the first
1074                  * few passes, stop compressing to ensure convergence.
1075                  */
1076                 pass = spa_sync_pass(spa);
1077 


1150 
1151         if (psize == 0) {
1152                 if (zio->io_bp_orig.blk_birth != 0 &&
1153                     spa_feature_is_active(spa, SPA_FEATURE_HOLE_BIRTH)) {
1154                         BP_SET_LSIZE(bp, lsize);
1155                         BP_SET_TYPE(bp, zp->zp_type);
1156                         BP_SET_LEVEL(bp, zp->zp_level);
1157                         BP_SET_BIRTH(bp, zio->io_txg, 0);
1158                 }
1159                 zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1160         } else {
1161                 ASSERT(zp->zp_checksum != ZIO_CHECKSUM_GANG_HEADER);
1162                 BP_SET_LSIZE(bp, lsize);
1163                 BP_SET_TYPE(bp, zp->zp_type);
1164                 BP_SET_LEVEL(bp, zp->zp_level);
1165                 BP_SET_PSIZE(bp, psize);
1166                 BP_SET_COMPRESS(bp, compress);
1167                 BP_SET_CHECKSUM(bp, zp->zp_checksum);
1168                 BP_SET_DEDUP(bp, zp->zp_dedup);
1169                 BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
1170                 if (zp->zp_zero_write  && !(zio->io_pipeline & ZIO_GANG_STAGES)) {
1171                         boolean_t need_allocate = B_FALSE;
1172                         if (zio->io_pipeline & ZIO_STAGE_DVA_ALLOCATE)
1173                                 need_allocate = B_TRUE;
1174                         zio->io_pipeline = ZIO_INTERLOCK_STAGES;
1175                         if (need_allocate)
1176                                 zio->io_pipeline |= ZIO_STAGE_DVA_ALLOCATE;
1177                         BP_SET_PROP_RESERVATION(bp, 1);
1178                 } else {
1179                         BP_SET_PROP_RESERVATION(bp, 0);
1180                 }
1181                 if (zp->zp_dedup) {
1182                         ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
1183                         ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
1184                         zio->io_pipeline = ZIO_DDT_WRITE_PIPELINE;
1185                 }
1186                 if (zp->zp_nopwrite) {
1187                         ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
1188                         ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REWRITE));
1189                         zio->io_pipeline |= ZIO_STAGE_NOP_WRITE;
1190                 }
1191         }
1192 
1193         return (ZIO_PIPELINE_CONTINUE);
1194 }
1195 
1196 static int
1197 zio_free_bp_init(zio_t *zio)
1198 {
1199         blkptr_t *bp = zio->io_bp;
1200 


1879          * Create the gang header.
1880          */
1881         zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, NULL, NULL,
1882             pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
1883 
1884         /*
1885          * Create and nowait the gang children.
1886          */
1887         for (int g = 0; resid != 0; resid -= lsize, g++) {
1888                 lsize = P2ROUNDUP(resid / (SPA_GBH_NBLKPTRS - g),
1889                     SPA_MINBLOCKSIZE);
1890                 ASSERT(lsize >= SPA_MINBLOCKSIZE && lsize <= resid);
1891 
1892                 zp.zp_checksum = gio->io_prop.zp_checksum;
1893                 zp.zp_compress = ZIO_COMPRESS_OFF;
1894                 zp.zp_type = DMU_OT_NONE;
1895                 zp.zp_level = 0;
1896                 zp.zp_copies = gio->io_prop.zp_copies;
1897                 zp.zp_dedup = B_FALSE;
1898                 zp.zp_dedup_verify = B_FALSE;
1899                 zp.zp_zero_write = B_FALSE;
1900                 zp.zp_nopwrite = B_FALSE;
1901 
1902                 zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
1903                     (char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
1904                     zio_write_gang_member_ready, NULL, NULL, &gn->gn_child[g],
1905                     pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
1906                     &pio->io_bookmark));
1907         }
1908 
1909         /*
1910          * Set pio's pipeline to just wait for zio to finish.
1911          */
1912         pio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
1913 
1914         zio_nowait(zio);
1915 
1916         return (ZIO_PIPELINE_CONTINUE);
1917 }
1918 
1919 /*