Print this page
4047 panic from dbuf_free_range() from dmu_free_object() while doing zfs receive
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/dmu.c
          +++ new/usr/src/uts/common/fs/zfs/dmu.c
↓ open down ↓ 561 lines elided ↑ open up ↑
 562  562          rw_exit(&dn->dn_struct_rwlock);
 563  563  
 564  564          dnode_rele(dn, FTAG);
 565  565  }
 566  566  
 567  567  /*
 568  568   * Get the next "chunk" of file data to free.  We traverse the file from
 569  569   * the end so that the file gets shorter over time (if we crashes in the
 570  570   * middle, this will leave us in a better state).  We find allocated file
 571  571   * data by simply searching the allocated level 1 indirects.
      572 + *
      573 + * On input, *start should be the first offset that does not need to be
      574 + * freed (e.g. "offset + length").  On return, *start will be the first
      575 + * offset that should be freed.
 572  576   */
 573  577  static int
 574      -get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t limit)
      578 +get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
 575  579  {
 576      -        uint64_t len = *start - limit;
 577      -        uint64_t blkcnt = 0;
 578      -        uint64_t maxblks = DMU_MAX_ACCESS / (1ULL << (dn->dn_indblkshift + 1));
      580 +        uint64_t maxblks = DMU_MAX_ACCESS >> (dn->dn_indblkshift + 1);
      581 +        /* bytes of data covered by a level-1 indirect block */
 579  582          uint64_t iblkrange =
 580  583              dn->dn_datablksz * EPB(dn->dn_indblkshift, SPA_BLKPTRSHIFT);
 581  584  
 582      -        ASSERT(limit <= *start);
      585 +        ASSERT3U(minimum, <=, *start);
 583  586  
 584      -        if (len <= iblkrange * maxblks) {
 585      -                *start = limit;
      587 +        if (*start - minimum <= iblkrange * maxblks) {
      588 +                *start = minimum;
 586  589                  return (0);
 587  590          }
 588  591          ASSERT(ISP2(iblkrange));
 589  592  
 590      -        while (*start > limit && blkcnt < maxblks) {
      593 +        for (uint64_t blks = 0; *start > minimum && blks < maxblks; blks++) {
 591  594                  int err;
 592  595  
 593      -                /* find next allocated L1 indirect */
      596 +                /*
      597 +                 * dnode_next_offset(BACKWARDS) will find an allocated L1
      598 +                 * indirect block at or before the input offset.  We must
      599 +                 * decrement *start so that it is at the end of the region
      600 +                 * to search.
      601 +                 */
      602 +                (*start)--;
 594  603                  err = dnode_next_offset(dn,
 595  604                      DNODE_FIND_BACKWARDS, start, 2, 1, 0);
 596  605  
 597      -                /* if there are no more, then we are done */
      606 +                /* if there are no indirect blocks before start, we are done */
 598  607                  if (err == ESRCH) {
 599      -                        *start = limit;
 600      -                        return (0);
 601      -                } else if (err) {
      608 +                        *start = minimum;
      609 +                        break;
      610 +                } else if (err != 0) {
 602  611                          return (err);
 603  612                  }
 604      -                blkcnt += 1;
 605  613  
 606      -                /* reset offset to end of "next" block back */
      614 +                /* set start to the beginning of this L1 indirect */
 607  615                  *start = P2ALIGN(*start, iblkrange);
 608      -                if (*start <= limit)
 609      -                        *start = limit;
 610      -                else
 611      -                        *start -= 1;
 612  616          }
      617 +        if (*start < minimum)
      618 +                *start = minimum;
 613  619          return (0);
 614  620  }
 615  621  
 616  622  static int
 617  623  dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
 618      -    uint64_t length, boolean_t free_dnode)
      624 +    uint64_t length)
 619  625  {
 620      -        dmu_tx_t *tx;
 621      -        uint64_t object_size, start, end, len;
 622      -        boolean_t trunc = (length == DMU_OBJECT_END);
 623      -        int align, err;
      626 +        uint64_t object_size = (dn->dn_maxblkid + 1) * dn->dn_datablksz;
      627 +        int err;
 624  628  
 625      -        align = 1 << dn->dn_datablkshift;
 626      -        ASSERT(align > 0);
 627      -        object_size = align == 1 ? dn->dn_datablksz :
 628      -            (dn->dn_maxblkid + 1) << dn->dn_datablkshift;
 629      -
 630      -        end = offset + length;
 631      -        if (trunc || end > object_size)
 632      -                end = object_size;
 633      -        if (end <= offset)
      629 +        if (offset >= object_size)
 634  630                  return (0);
 635      -        length = end - offset;
 636  631  
 637      -        while (length) {
 638      -                start = end;
 639      -                /* assert(offset <= start) */
 640      -                err = get_next_chunk(dn, &start, offset);
      632 +        if (length == DMU_OBJECT_END || offset + length > object_size)
      633 +                length = object_size - offset;
      634 +
      635 +        while (length != 0) {
      636 +                uint64_t chunk_end, chunk_begin;
      637 +
      638 +                chunk_end = chunk_begin = offset + length;
      639 +
      640 +                /* move chunk_begin backwards to the beginning of this chunk */
      641 +                err = get_next_chunk(dn, &chunk_begin, offset);
 641  642                  if (err)
 642  643                          return (err);
 643      -                len = trunc ? DMU_OBJECT_END : end - start;
      644 +                ASSERT3U(chunk_begin, >=, offset);
      645 +                ASSERT3U(chunk_begin, <=, chunk_end);
 644  646  
 645      -                tx = dmu_tx_create(os);
 646      -                dmu_tx_hold_free(tx, dn->dn_object, start, len);
      647 +                dmu_tx_t *tx = dmu_tx_create(os);
      648 +                dmu_tx_hold_free(tx, dn->dn_object,
      649 +                    chunk_begin, chunk_end - chunk_begin);
 647  650                  err = dmu_tx_assign(tx, TXG_WAIT);
 648  651                  if (err) {
 649  652                          dmu_tx_abort(tx);
 650  653                          return (err);
 651  654                  }
 652      -
 653      -                dnode_free_range(dn, start, trunc ? -1 : len, tx);
 654      -
 655      -                if (start == 0 && free_dnode) {
 656      -                        ASSERT(trunc);
 657      -                        dnode_free(dn, tx);
 658      -                }
 659      -
 660      -                length -= end - start;
 661      -
      655 +                dnode_free_range(dn, chunk_begin, chunk_end - chunk_begin, tx);
 662  656                  dmu_tx_commit(tx);
 663      -                end = start;
      657 +
      658 +                length -= chunk_end - chunk_begin;
 664  659          }
 665  660          return (0);
 666  661  }
 667  662  
 668  663  int
 669  664  dmu_free_long_range(objset_t *os, uint64_t object,
 670  665      uint64_t offset, uint64_t length)
 671  666  {
 672  667          dnode_t *dn;
 673  668          int err;
 674  669  
 675  670          err = dnode_hold(os, object, FTAG, &dn);
 676  671          if (err != 0)
 677  672                  return (err);
 678      -        err = dmu_free_long_range_impl(os, dn, offset, length, FALSE);
      673 +        err = dmu_free_long_range_impl(os, dn, offset, length);
 679  674          dnode_rele(dn, FTAG);
 680  675          return (err);
 681  676  }
 682  677  
 683  678  int
 684      -dmu_free_object(objset_t *os, uint64_t object)
      679 +dmu_free_long_object(objset_t *os, uint64_t object)
 685  680  {
 686      -        dnode_t *dn;
 687  681          dmu_tx_t *tx;
 688  682          int err;
 689  683  
 690      -        err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
 691      -            FTAG, &dn);
      684 +        err = dmu_free_long_range(os, object, 0, DMU_OBJECT_END);
 692  685          if (err != 0)
 693  686                  return (err);
 694      -        if (dn->dn_nlevels == 1) {
 695      -                tx = dmu_tx_create(os);
 696      -                dmu_tx_hold_bonus(tx, object);
 697      -                dmu_tx_hold_free(tx, dn->dn_object, 0, DMU_OBJECT_END);
 698      -                err = dmu_tx_assign(tx, TXG_WAIT);
 699      -                if (err == 0) {
 700      -                        dnode_free_range(dn, 0, DMU_OBJECT_END, tx);
 701      -                        dnode_free(dn, tx);
 702      -                        dmu_tx_commit(tx);
 703      -                } else {
 704      -                        dmu_tx_abort(tx);
 705      -                }
      687 +
      688 +        tx = dmu_tx_create(os);
      689 +        dmu_tx_hold_bonus(tx, object);
      690 +        dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
      691 +        err = dmu_tx_assign(tx, TXG_WAIT);
      692 +        if (err == 0) {
      693 +                err = dmu_object_free(os, object, tx);
      694 +                dmu_tx_commit(tx);
 706  695          } else {
 707      -                err = dmu_free_long_range_impl(os, dn, 0, DMU_OBJECT_END, TRUE);
      696 +                dmu_tx_abort(tx);
 708  697          }
 709      -        dnode_rele(dn, FTAG);
      698 +
 710  699          return (err);
 711  700  }
 712  701  
 713  702  int
 714  703  dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
 715  704      uint64_t size, dmu_tx_t *tx)
 716  705  {
 717  706          dnode_t *dn;
 718  707          int err = dnode_hold(os, object, FTAG, &dn);
 719  708          if (err)
↓ open down ↓ 1119 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX