412 ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
413
414 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
415 object, THT_WRITE, off, len);
416 if (txh == NULL)
417 return;
418
419 dmu_tx_count_write(txh, off, len);
420 dmu_tx_count_dnode(txh);
421 }
422
423 static void
424 dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
425 {
426 uint64_t blkid, nblks, lastblk;
427 uint64_t space = 0, unref = 0, skipped = 0;
428 dnode_t *dn = txh->txh_dnode;
429 dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
430 spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
431 int epbs;
432
433 if (dn->dn_nlevels == 0)
434 return;
435
436 /*
437 * The struct_rwlock protects us against dn_nlevels
438 * changing, in case (against all odds) we manage to dirty &
439 * sync out the changes after we check for being dirty.
440 * Also, dbuf_hold_impl() wants us to have the struct_rwlock.
441 */
442 rw_enter(&dn->dn_struct_rwlock, RW_READER);
443 epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
444 if (dn->dn_maxblkid == 0) {
445 if (off == 0 && len >= dn->dn_datablksz) {
446 blkid = 0;
447 nblks = 1;
448 } else {
449 rw_exit(&dn->dn_struct_rwlock);
450 return;
451 }
452 } else {
453 blkid = off >> dn->dn_datablkshift;
454 nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift;
455
456 if (blkid >= dn->dn_maxblkid) {
457 rw_exit(&dn->dn_struct_rwlock);
458 return;
459 }
460 if (blkid + nblks > dn->dn_maxblkid)
461 nblks = dn->dn_maxblkid - blkid;
462
463 }
464 if (dn->dn_nlevels == 1) {
465 int i;
466 for (i = 0; i < nblks; i++) {
467 blkptr_t *bp = dn->dn_phys->dn_blkptr;
468 ASSERT3U(blkid + i, <, dn->dn_nblkptr);
469 bp += blkid + i;
470 if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) {
471 dprintf_bp(bp, "can free old%s", "");
472 space += bp_get_dsize(spa, bp);
473 }
474 unref += BP_GET_ASIZE(bp);
475 }
476 nblks = 0;
477 }
478
479 /*
480 * Add in memory requirements of higher-level indirects.
481 * This assumes a worst-possible scenario for dn_nlevels.
482 */
483 {
484 uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs);
485 int level = (dn->dn_nlevels > 1) ? 2 : 1;
486
487 while (level++ < DN_MAX_LEVELS) {
488 txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift;
489 blkcnt = 1 + (blkcnt >> epbs);
490 }
491 ASSERT(blkcnt <= dn->dn_nblkptr);
492 }
493
494 lastblk = blkid + nblks - 1;
495 while (nblks) {
496 dmu_buf_impl_t *dbuf;
497 uint64_t ibyte, new_blkid;
498 int epb = 1 << epbs;
499 int err, i, blkoff, tochk;
500 blkptr_t *bp;
501
502 ibyte = blkid << dn->dn_datablkshift;
503 err = dnode_next_offset(dn,
504 DNODE_FIND_HAVELOCK, &ibyte, 2, 1, 0);
505 new_blkid = ibyte >> dn->dn_datablkshift;
506 if (err == ESRCH) {
507 skipped += (lastblk >> epbs) - (blkid >> epbs) + 1;
508 break;
509 }
510 if (err) {
511 txh->txh_tx->tx_err = err;
512 break;
513 }
544 err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL);
545 if (err != 0) {
546 txh->txh_tx->tx_err = err;
547 dbuf_rele(dbuf, FTAG);
548 break;
549 }
550
551 bp = dbuf->db.db_data;
552 bp += blkoff;
553
554 for (i = 0; i < tochk; i++) {
555 if (dsl_dataset_block_freeable(ds, &bp[i],
556 bp[i].blk_birth)) {
557 dprintf_bp(&bp[i], "can free old%s", "");
558 space += bp_get_dsize(spa, &bp[i]);
559 }
560 unref += BP_GET_ASIZE(bp);
561 }
562 dbuf_rele(dbuf, FTAG);
563
564 blkid += tochk;
565 nblks -= tochk;
566 }
567 rw_exit(&dn->dn_struct_rwlock);
568
569 /* account for new level 1 indirect blocks that might show up */
570 if (skipped > 0) {
571 txh->txh_fudge += skipped << dn->dn_indblkshift;
572 skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs);
573 txh->txh_memory_tohold += skipped << dn->dn_indblkshift;
574 }
575 txh->txh_space_tofree += space;
576 txh->txh_space_tounref += unref;
577 }
578
579 void
580 dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
581 {
582 dmu_tx_hold_t *txh;
583 dnode_t *dn;
584 uint64_t start, end, i;
585 int err, shift;
586 zio_t *zio;
587
588 ASSERT(tx->tx_txg == 0);
|
412 ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
413
414 txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
415 object, THT_WRITE, off, len);
416 if (txh == NULL)
417 return;
418
419 dmu_tx_count_write(txh, off, len);
420 dmu_tx_count_dnode(txh);
421 }
422
423 static void
424 dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
425 {
426 uint64_t blkid, nblks, lastblk;
427 uint64_t space = 0, unref = 0, skipped = 0;
428 dnode_t *dn = txh->txh_dnode;
429 dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
430 spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
431 int epbs;
432 uint64_t l0span = 0, nl1blks = 0;
433
434 if (dn->dn_nlevels == 0)
435 return;
436
437 /*
438 * The struct_rwlock protects us against dn_nlevels
439 * changing, in case (against all odds) we manage to dirty &
440 * sync out the changes after we check for being dirty.
441 * Also, dbuf_hold_impl() wants us to have the struct_rwlock.
442 */
443 rw_enter(&dn->dn_struct_rwlock, RW_READER);
444 epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
445 if (dn->dn_maxblkid == 0) {
446 if (off == 0 && len >= dn->dn_datablksz) {
447 blkid = 0;
448 nblks = 1;
449 } else {
450 rw_exit(&dn->dn_struct_rwlock);
451 return;
452 }
453 } else {
454 blkid = off >> dn->dn_datablkshift;
455 nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift;
456
457 if (blkid >= dn->dn_maxblkid) {
458 rw_exit(&dn->dn_struct_rwlock);
459 return;
460 }
461 if (blkid + nblks > dn->dn_maxblkid)
462 nblks = dn->dn_maxblkid - blkid;
463
464 }
465 l0span = nblks; /* save for later use to calc level > 1 overhead */
466 if (dn->dn_nlevels == 1) {
467 int i;
468 for (i = 0; i < nblks; i++) {
469 blkptr_t *bp = dn->dn_phys->dn_blkptr;
470 ASSERT3U(blkid + i, <, dn->dn_nblkptr);
471 bp += blkid + i;
472 if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) {
473 dprintf_bp(bp, "can free old%s", "");
474 space += bp_get_dsize(spa, bp);
475 }
476 unref += BP_GET_ASIZE(bp);
477 }
478 nl1blks = 1;
479 nblks = 0;
480 }
481
482 lastblk = blkid + nblks - 1;
483 while (nblks) {
484 dmu_buf_impl_t *dbuf;
485 uint64_t ibyte, new_blkid;
486 int epb = 1 << epbs;
487 int err, i, blkoff, tochk;
488 blkptr_t *bp;
489
490 ibyte = blkid << dn->dn_datablkshift;
491 err = dnode_next_offset(dn,
492 DNODE_FIND_HAVELOCK, &ibyte, 2, 1, 0);
493 new_blkid = ibyte >> dn->dn_datablkshift;
494 if (err == ESRCH) {
495 skipped += (lastblk >> epbs) - (blkid >> epbs) + 1;
496 break;
497 }
498 if (err) {
499 txh->txh_tx->tx_err = err;
500 break;
501 }
532 err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL);
533 if (err != 0) {
534 txh->txh_tx->tx_err = err;
535 dbuf_rele(dbuf, FTAG);
536 break;
537 }
538
539 bp = dbuf->db.db_data;
540 bp += blkoff;
541
542 for (i = 0; i < tochk; i++) {
543 if (dsl_dataset_block_freeable(ds, &bp[i],
544 bp[i].blk_birth)) {
545 dprintf_bp(&bp[i], "can free old%s", "");
546 space += bp_get_dsize(spa, &bp[i]);
547 }
548 unref += BP_GET_ASIZE(bp);
549 }
550 dbuf_rele(dbuf, FTAG);
551
552 ++nl1blks;
553 blkid += tochk;
554 nblks -= tochk;
555 }
556 rw_exit(&dn->dn_struct_rwlock);
557
558 /*
559 * Add in memory requirements of higher-level indirects.
560 * This assumes a worst-possible scenario for dn_nlevels and a
561 * worst-possible distribution of l1-blocks over the region to free.
562 */
563 {
564 uint64_t blkcnt = 1 + ((l0span >> epbs) >> epbs);
565 int level = 2;
566 /*
567 * Here we don't use DN_MAX_LEVEL, but calculate it with the
568 * given datablkshift and indblkshift. This makes the
569 * difference between 19 and 8 on large files.
570 */
571 int maxlevel = 2 + (DN_MAX_OFFSET_SHIFT - dn->dn_datablkshift) /
572 (dn->dn_indblkshift - SPA_BLKPTRSHIFT);
573
574 while (level++ < maxlevel) {
575 txh->txh_memory_tohold += MIN(blkcnt, (nl1blks >> epbs))
576 << dn->dn_indblkshift;
577 blkcnt = 1 + (blkcnt >> epbs);
578 }
579 }
580
581 /* account for new level 1 indirect blocks that might show up */
582 if (skipped > 0) {
583 txh->txh_fudge += skipped << dn->dn_indblkshift;
584 skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs);
585 txh->txh_memory_tohold += skipped << dn->dn_indblkshift;
586 }
587 txh->txh_space_tofree += space;
588 txh->txh_space_tounref += unref;
589 }
590
591 void
592 dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
593 {
594 dmu_tx_hold_t *txh;
595 dnode_t *dn;
596 uint64_t start, end, i;
597 int err, shift;
598 zio_t *zio;
599
600 ASSERT(tx->tx_txg == 0);
|