Print this page
1862 incremental zfs receive fails for sparse file > 8PB
dmu_tx_count_free is doing a horrible over-estimation of used memory. It
assumes that the file is fully non-sparse and calculates a worst-case estimate
of how much memory is needed to hold all metadata for the file. If a large
hole needs to be freed, the estimation goes into the TB-range, which obviously
fails later on.
This patch tries to calculate a more realistic estimate by counting the l1
blocks (the loop for this is already present) and assumes a worst-case
distribution of those blocks over the full length given.
Reviewed by: Matt Ahrens <matthew.ahrens@delphix.com>
Reviewed by: Simon Klinkert <klinkert@webgods.de>
*** 427,436 ****
--- 427,437 ----
uint64_t space = 0, unref = 0, skipped = 0;
dnode_t *dn = txh->txh_dnode;
dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
int epbs;
+ uint64_t l0span = 0, nl1blks = 0;
if (dn->dn_nlevels == 0)
return;
/*
*** 459,468 ****
--- 460,470 ----
}
if (blkid + nblks > dn->dn_maxblkid)
nblks = dn->dn_maxblkid - blkid;
}
+ l0span = nblks; /* save for later use to calc level > 1 overhead */
if (dn->dn_nlevels == 1) {
int i;
for (i = 0; i < nblks; i++) {
blkptr_t *bp = dn->dn_phys->dn_blkptr;
ASSERT3U(blkid + i, <, dn->dn_nblkptr);
*** 471,498 ****
dprintf_bp(bp, "can free old%s", "");
space += bp_get_dsize(spa, bp);
}
unref += BP_GET_ASIZE(bp);
}
nblks = 0;
}
- /*
- * Add in memory requirements of higher-level indirects.
- * This assumes a worst-possible scenario for dn_nlevels.
- */
- {
- uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs);
- int level = (dn->dn_nlevels > 1) ? 2 : 1;
-
- while (level++ < DN_MAX_LEVELS) {
- txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift;
- blkcnt = 1 + (blkcnt >> epbs);
- }
- ASSERT(blkcnt <= dn->dn_nblkptr);
- }
-
lastblk = blkid + nblks - 1;
while (nblks) {
dmu_buf_impl_t *dbuf;
uint64_t ibyte, new_blkid;
int epb = 1 << epbs;
--- 473,486 ----
dprintf_bp(bp, "can free old%s", "");
space += bp_get_dsize(spa, bp);
}
unref += BP_GET_ASIZE(bp);
}
+ nl1blks = 1;
nblks = 0;
}
lastblk = blkid + nblks - 1;
while (nblks) {
dmu_buf_impl_t *dbuf;
uint64_t ibyte, new_blkid;
int epb = 1 << epbs;
*** 559,573 ****
--- 547,585 ----
}
unref += BP_GET_ASIZE(bp);
}
dbuf_rele(dbuf, FTAG);
+ ++nl1blks;
blkid += tochk;
nblks -= tochk;
}
rw_exit(&dn->dn_struct_rwlock);
+ /*
+ * Add in memory requirements of higher-level indirects.
+ * This assumes a worst-possible scenario for dn_nlevels and a
+ * worst-possible distribution of l1-blocks over the region to free.
+ */
+ {
+ uint64_t blkcnt = 1 + ((l0span >> epbs) >> epbs);
+ int level = 2;
+ /*
+ * Here we don't use DN_MAX_LEVEL, but calculate it with the
+ * given datablkshift and indblkshift. This makes the
+ * difference between 19 and 8 on large files.
+ */
+ int maxlevel = 2 + (DN_MAX_OFFSET_SHIFT - dn->dn_datablkshift) /
+ (dn->dn_indblkshift - SPA_BLKPTRSHIFT);
+
+ while (level++ < maxlevel) {
+ txh->txh_memory_tohold += MIN(blkcnt, (nl1blks >> epbs))
+ << dn->dn_indblkshift;
+ blkcnt = 1 + (blkcnt >> epbs);
+ }
+ }
+
/* account for new level 1 indirect blocks that might show up */
if (skipped > 0) {
txh->txh_fudge += skipped << dn->dn_indblkshift;
skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs);
txh->txh_memory_tohold += skipped << dn->dn_indblkshift;