Print this page
5269 zfs: zpool import slow
While importing a pool all objsets are enumerated twice, once to check
the zil log chains and once to claim them. On pools with many datasets
this process might take a substantial amount of time.
Speed up the process by parallelizing it utilizing a taskq. The number
of parallel tasks is limited to 4 times the number of leaf vdevs.


 607                         zio_free_zil(zilog->zl_spa, txg, &lwb->lwb_blk);
 608                         kmem_cache_free(zil_lwb_cache, lwb);
 609                 }
 610         } else if (!keep_first) {
 611                 zil_destroy_sync(zilog, tx);
 612         }
 613         mutex_exit(&zilog->zl_lock);
 614 
 615         dmu_tx_commit(tx);
 616 }
 617 
 618 void
 619 zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
 620 {
 621         ASSERT(list_is_empty(&zilog->zl_lwb_list));
 622         (void) zil_parse(zilog, zil_free_log_block,
 623             zil_free_log_record, tx, zilog->zl_header->zh_claim_txg);
 624 }
 625 
 626 int
 627 zil_claim(const char *osname, void *txarg)
 628 {
 629         dmu_tx_t *tx = txarg;
 630         uint64_t first_txg = dmu_tx_get_txg(tx);
 631         zilog_t *zilog;
 632         zil_header_t *zh;
 633         objset_t *os;
 634         int error;
 635 
 636         error = dmu_objset_own(osname, DMU_OST_ANY, B_FALSE, FTAG, &os);

 637         if (error != 0) {
 638                 cmn_err(CE_WARN, "can't open objset for %s", osname);

 639                 return (0);
 640         }
 641 
 642         zilog = dmu_objset_zil(os);
 643         zh = zil_header_in_syncing_context(zilog);
 644 
 645         if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR) {
 646                 if (!BP_IS_HOLE(&zh->zh_log))
 647                         zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
 648                 BP_ZERO(&zh->zh_log);
 649                 dsl_dataset_dirty(dmu_objset_ds(os), tx);
 650                 dmu_objset_disown(os, FTAG);
 651                 return (0);
 652         }
 653 
 654         /*
 655          * Claim all log blocks if we haven't already done so, and remember
 656          * the highest claimed sequence number.  This ensures that if we can
 657          * read only part of the log now (e.g. due to a missing device),
 658          * but we can read the entire log later, we will not try to replay


 665                 zh->zh_claim_txg = first_txg;
 666                 zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq;
 667                 zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq;
 668                 if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1)
 669                         zh->zh_flags |= ZIL_REPLAY_NEEDED;
 670                 zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID;
 671                 dsl_dataset_dirty(dmu_objset_ds(os), tx);
 672         }
 673 
 674         ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
 675         dmu_objset_disown(os, FTAG);
 676         return (0);
 677 }
 678 
 679 /*
 680  * Check the log by walking the log chain.
 681  * Checksum errors are ok as they indicate the end of the chain.
 682  * Any other error (no device or read failure) returns an error.
 683  */
 684 int
 685 zil_check_log_chain(const char *osname, void *tx)
 686 {
 687         zilog_t *zilog;
 688         objset_t *os;
 689         blkptr_t *bp;
 690         int error;
 691 
 692         ASSERT(tx == NULL);
 693 
 694         error = dmu_objset_hold(osname, FTAG, &os);
 695         if (error != 0) {
 696                 cmn_err(CE_WARN, "can't open objset for %s", osname);

 697                 return (0);
 698         }
 699 
 700         zilog = dmu_objset_zil(os);
 701         bp = (blkptr_t *)&zilog->zl_header->zh_log;
 702 
 703         /*
 704          * Check the first block and determine if it's on a log device
 705          * which may have been removed or faulted prior to loading this
 706          * pool.  If so, there's no point in checking the rest of the log
 707          * as its content should have already been synced to the pool.
 708          */
 709         if (!BP_IS_HOLE(bp)) {
 710                 vdev_t *vd;
 711                 boolean_t valid = B_TRUE;
 712 
 713                 spa_config_enter(os->os_spa, SCL_STATE, FTAG, RW_READER);
 714                 vd = vdev_lookup_top(os->os_spa, DVA_GET_VDEV(&bp->blk_dva[0]));
 715                 if (vd->vdev_islog && vdev_is_dead(vd))
 716                         valid = vdev_log_state_valid(vd);
 717                 spa_config_exit(os->os_spa, SCL_STATE, FTAG);
 718 
 719                 if (!valid) {
 720                         dmu_objset_rele(os, FTAG);
 721                         return (0);
 722                 }
 723         }
 724 
 725         /*
 726          * Because tx == NULL, zil_claim_log_block() will not actually claim
 727          * any blocks, but just determine whether it is possible to do so.
 728          * In addition to checking the log chain, zil_claim_log_block()
 729          * will invoke zio_claim() with a done func of spa_claim_notify(),
 730          * which will update spa_max_claim_txg.  See spa_load() for details.
 731          */
 732         error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx,
 733             zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa));
 734 
 735         dmu_objset_rele(os, FTAG);
 736 
 737         return ((error == ECKSUM || error == ENOENT) ? 0 : error);
 738 }
 739 
 740 static int
 741 zil_vdev_compare(const void *x1, const void *x2)
 742 {
 743         const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev;
 744         const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev;
 745 
 746         if (v1 < v2)
 747                 return (-1);
 748         if (v1 > v2)
 749                 return (1);
 750 
 751         return (0);
 752 }
 753 
 754 void
 755 zil_add_block(zilog_t *zilog, const blkptr_t *bp)
 756 {




 607                         zio_free_zil(zilog->zl_spa, txg, &lwb->lwb_blk);
 608                         kmem_cache_free(zil_lwb_cache, lwb);
 609                 }
 610         } else if (!keep_first) {
 611                 zil_destroy_sync(zilog, tx);
 612         }
 613         mutex_exit(&zilog->zl_lock);
 614 
 615         dmu_tx_commit(tx);
 616 }
 617 
 618 void
 619 zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
 620 {
 621         ASSERT(list_is_empty(&zilog->zl_lwb_list));
 622         (void) zil_parse(zilog, zil_free_log_block,
 623             zil_free_log_record, tx, zilog->zl_header->zh_claim_txg);
 624 }
 625 
 626 int
 627 zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
 628 {
 629         dmu_tx_t *tx = txarg;
 630         uint64_t first_txg = dmu_tx_get_txg(tx);
 631         zilog_t *zilog;
 632         zil_header_t *zh;
 633         objset_t *os;
 634         int error;
 635 
 636         error = dmu_objset_own_obj(dp, ds->ds_object,
 637             DMU_OST_ANY, B_FALSE, FTAG, &os);
 638         if (error != 0) {
 639                 cmn_err(CE_WARN, "can't open objset %llu, error %d",
 640                     (unsigned long long)ds->ds_object, error);
 641                 return (0);
 642         }
 643 
 644         zilog = dmu_objset_zil(os);
 645         zh = zil_header_in_syncing_context(zilog);
 646 
 647         if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR) {
 648                 if (!BP_IS_HOLE(&zh->zh_log))
 649                         zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
 650                 BP_ZERO(&zh->zh_log);
 651                 dsl_dataset_dirty(dmu_objset_ds(os), tx);
 652                 dmu_objset_disown(os, FTAG);
 653                 return (0);
 654         }
 655 
 656         /*
 657          * Claim all log blocks if we haven't already done so, and remember
 658          * the highest claimed sequence number.  This ensures that if we can
 659          * read only part of the log now (e.g. due to a missing device),
 660          * but we can read the entire log later, we will not try to replay


 667                 zh->zh_claim_txg = first_txg;
 668                 zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq;
 669                 zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq;
 670                 if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1)
 671                         zh->zh_flags |= ZIL_REPLAY_NEEDED;
 672                 zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID;
 673                 dsl_dataset_dirty(dmu_objset_ds(os), tx);
 674         }
 675 
 676         ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
 677         dmu_objset_disown(os, FTAG);
 678         return (0);
 679 }
 680 
 681 /*
 682  * Check the log by walking the log chain.
 683  * Checksum errors are ok as they indicate the end of the chain.
 684  * Any other error (no device or read failure) returns an error.
 685  */
 686 int
 687 zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx)
 688 {
 689         zilog_t *zilog;
 690         objset_t *os;
 691         blkptr_t *bp;
 692         int error;
 693 
 694         ASSERT(tx == NULL);
 695 
 696         error = dmu_objset_from_ds(ds, &os);
 697         if (error != 0) {
 698                 cmn_err(CE_WARN, "can't open objset %llu, error %d",
 699                     (unsigned long long)ds->ds_object, error);
 700                 return (0);
 701         }
 702 
 703         zilog = dmu_objset_zil(os);
 704         bp = (blkptr_t *)&zilog->zl_header->zh_log;
 705 
 706         /*
 707          * Check the first block and determine if it's on a log device
 708          * which may have been removed or faulted prior to loading this
 709          * pool.  If so, there's no point in checking the rest of the log
 710          * as its content should have already been synced to the pool.
 711          */
 712         if (!BP_IS_HOLE(bp)) {
 713                 vdev_t *vd;
 714                 boolean_t valid = B_TRUE;
 715 
 716                 spa_config_enter(os->os_spa, SCL_STATE, FTAG, RW_READER);
 717                 vd = vdev_lookup_top(os->os_spa, DVA_GET_VDEV(&bp->blk_dva[0]));
 718                 if (vd->vdev_islog && vdev_is_dead(vd))
 719                         valid = vdev_log_state_valid(vd);
 720                 spa_config_exit(os->os_spa, SCL_STATE, FTAG);
 721 
 722                 if (!valid)

 723                         return (0);
 724         }

 725 
 726         /*
 727          * Because tx == NULL, zil_claim_log_block() will not actually claim
 728          * any blocks, but just determine whether it is possible to do so.
 729          * In addition to checking the log chain, zil_claim_log_block()
 730          * will invoke zio_claim() with a done func of spa_claim_notify(),
 731          * which will update spa_max_claim_txg.  See spa_load() for details.
 732          */
 733         error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx,
 734             zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa));
 735 


 736         return ((error == ECKSUM || error == ENOENT) ? 0 : error);
 737 }
 738 
 739 static int
 740 zil_vdev_compare(const void *x1, const void *x2)
 741 {
 742         const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev;
 743         const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev;
 744 
 745         if (v1 < v2)
 746                 return (-1);
 747         if (v1 > v2)
 748                 return (1);
 749 
 750         return (0);
 751 }
 752 
 753 void
 754 zil_add_block(zilog_t *zilog, const blkptr_t *bp)
 755 {