Print this page
5269 zfs: zpool import slow
PORTING: this code relies on the property of taskq_wait to wait
until no more tasks are queued and no more tasks are active. As
we always queue new tasks from within other tasks, task_wait
reliably waits for the full recursion to finish, even though we
enqueue new tasks after taskq_wait has been called.
On platforms other than illumos, taskq_wait may not have this
property.
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: George Wilson <george.wilson@delphix.com>


 610                         zio_free_zil(zilog->zl_spa, txg, &lwb->lwb_blk);
 611                         kmem_cache_free(zil_lwb_cache, lwb);
 612                 }
 613         } else if (!keep_first) {
 614                 zil_destroy_sync(zilog, tx);
 615         }
 616         mutex_exit(&zilog->zl_lock);
 617 
 618         dmu_tx_commit(tx);
 619 }
 620 
 621 void
 622 zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
 623 {
 624         ASSERT(list_is_empty(&zilog->zl_lwb_list));
 625         (void) zil_parse(zilog, zil_free_log_block,
 626             zil_free_log_record, tx, zilog->zl_header->zh_claim_txg);
 627 }
 628 
 629 int
 630 zil_claim(const char *osname, void *txarg)
 631 {
 632         dmu_tx_t *tx = txarg;
 633         uint64_t first_txg = dmu_tx_get_txg(tx);
 634         zilog_t *zilog;
 635         zil_header_t *zh;
 636         objset_t *os;
 637         int error;
 638 
 639         error = dmu_objset_own(osname, DMU_OST_ANY, B_FALSE, FTAG, &os);

 640         if (error != 0) {
 641                 /*
 642                  * EBUSY indicates that the objset is inconsistent, in which
 643                  * case it can not have a ZIL.
 644                  */
 645                 if (error != EBUSY) {
 646                         cmn_err(CE_WARN, "can't open objset for %s, error %u",
 647                             osname, error);
 648                 }
 649                 return (0);
 650         }
 651 
 652         zilog = dmu_objset_zil(os);
 653         zh = zil_header_in_syncing_context(zilog);
 654 
 655         if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR) {
 656                 if (!BP_IS_HOLE(&zh->zh_log))
 657                         zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
 658                 BP_ZERO(&zh->zh_log);
 659                 dsl_dataset_dirty(dmu_objset_ds(os), tx);
 660                 dmu_objset_disown(os, FTAG);
 661                 return (0);
 662         }
 663 
 664         /*
 665          * Claim all log blocks if we haven't already done so, and remember
 666          * the highest claimed sequence number.  This ensures that if we can
 667          * read only part of the log now (e.g. due to a missing device),


 674                     zil_claim_log_record, tx, first_txg);
 675                 zh->zh_claim_txg = first_txg;
 676                 zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq;
 677                 zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq;
 678                 if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1)
 679                         zh->zh_flags |= ZIL_REPLAY_NEEDED;
 680                 zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID;
 681                 dsl_dataset_dirty(dmu_objset_ds(os), tx);
 682         }
 683 
 684         ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
 685         dmu_objset_disown(os, FTAG);
 686         return (0);
 687 }
 688 
 689 /*
 690  * Check the log by walking the log chain.
 691  * Checksum errors are ok as they indicate the end of the chain.
 692  * Any other error (no device or read failure) returns an error.
 693  */

 694 int
 695 zil_check_log_chain(const char *osname, void *tx)
 696 {
 697         zilog_t *zilog;
 698         objset_t *os;
 699         blkptr_t *bp;
 700         int error;
 701 
 702         ASSERT(tx == NULL);
 703 
 704         error = dmu_objset_hold(osname, FTAG, &os);
 705         if (error != 0) {
 706                 cmn_err(CE_WARN, "can't open objset for %s", osname);

 707                 return (0);
 708         }
 709 
 710         zilog = dmu_objset_zil(os);
 711         bp = (blkptr_t *)&zilog->zl_header->zh_log;
 712 
 713         /*
 714          * Check the first block and determine if it's on a log device
 715          * which may have been removed or faulted prior to loading this
 716          * pool.  If so, there's no point in checking the rest of the log
 717          * as its content should have already been synced to the pool.
 718          */
 719         if (!BP_IS_HOLE(bp)) {
 720                 vdev_t *vd;
 721                 boolean_t valid = B_TRUE;
 722 
 723                 spa_config_enter(os->os_spa, SCL_STATE, FTAG, RW_READER);
 724                 vd = vdev_lookup_top(os->os_spa, DVA_GET_VDEV(&bp->blk_dva[0]));
 725                 if (vd->vdev_islog && vdev_is_dead(vd))
 726                         valid = vdev_log_state_valid(vd);
 727                 spa_config_exit(os->os_spa, SCL_STATE, FTAG);
 728 
 729                 if (!valid) {
 730                         dmu_objset_rele(os, FTAG);
 731                         return (0);
 732                 }
 733         }
 734 
 735         /*
 736          * Because tx == NULL, zil_claim_log_block() will not actually claim
 737          * any blocks, but just determine whether it is possible to do so.
 738          * In addition to checking the log chain, zil_claim_log_block()
 739          * will invoke zio_claim() with a done func of spa_claim_notify(),
 740          * which will update spa_max_claim_txg.  See spa_load() for details.
 741          */
 742         error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx,
 743             zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa));
 744 
 745         dmu_objset_rele(os, FTAG);
 746 
 747         return ((error == ECKSUM || error == ENOENT) ? 0 : error);
 748 }
 749 
 750 static int
 751 zil_vdev_compare(const void *x1, const void *x2)
 752 {
 753         const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev;
 754         const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev;
 755 
 756         if (v1 < v2)
 757                 return (-1);
 758         if (v1 > v2)
 759                 return (1);
 760 
 761         return (0);
 762 }
 763 
 764 void
 765 zil_add_block(zilog_t *zilog, const blkptr_t *bp)
 766 {




 610                         zio_free_zil(zilog->zl_spa, txg, &lwb->lwb_blk);
 611                         kmem_cache_free(zil_lwb_cache, lwb);
 612                 }
 613         } else if (!keep_first) {
 614                 zil_destroy_sync(zilog, tx);
 615         }
 616         mutex_exit(&zilog->zl_lock);
 617 
 618         dmu_tx_commit(tx);
 619 }
 620 
 621 void
 622 zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
 623 {
 624         ASSERT(list_is_empty(&zilog->zl_lwb_list));
 625         (void) zil_parse(zilog, zil_free_log_block,
 626             zil_free_log_record, tx, zilog->zl_header->zh_claim_txg);
 627 }
 628 
 629 int
 630 zil_claim(dsl_pool_t *dp, dsl_dataset_t *ds, void *txarg)
 631 {
 632         dmu_tx_t *tx = txarg;
 633         uint64_t first_txg = dmu_tx_get_txg(tx);
 634         zilog_t *zilog;
 635         zil_header_t *zh;
 636         objset_t *os;
 637         int error;
 638 
 639         error = dmu_objset_own_obj(dp, ds->ds_object,
 640             DMU_OST_ANY, B_FALSE, FTAG, &os);
 641         if (error != 0) {
 642                 /*
 643                  * EBUSY indicates that the objset is inconsistent, in which
 644                  * case it can not have a ZIL.
 645                  */
 646                 if (error != EBUSY) {
 647                         cmn_err(CE_WARN, "can't open objset for %llu, error %u",
 648                             (unsigned long long)ds->ds_object, error);
 649                 }
 650                 return (0);
 651         }
 652 
 653         zilog = dmu_objset_zil(os);
 654         zh = zil_header_in_syncing_context(zilog);
 655 
 656         if (spa_get_log_state(zilog->zl_spa) == SPA_LOG_CLEAR) {
 657                 if (!BP_IS_HOLE(&zh->zh_log))
 658                         zio_free_zil(zilog->zl_spa, first_txg, &zh->zh_log);
 659                 BP_ZERO(&zh->zh_log);
 660                 dsl_dataset_dirty(dmu_objset_ds(os), tx);
 661                 dmu_objset_disown(os, FTAG);
 662                 return (0);
 663         }
 664 
 665         /*
 666          * Claim all log blocks if we haven't already done so, and remember
 667          * the highest claimed sequence number.  This ensures that if we can
 668          * read only part of the log now (e.g. due to a missing device),


 675                     zil_claim_log_record, tx, first_txg);
 676                 zh->zh_claim_txg = first_txg;
 677                 zh->zh_claim_blk_seq = zilog->zl_parse_blk_seq;
 678                 zh->zh_claim_lr_seq = zilog->zl_parse_lr_seq;
 679                 if (zilog->zl_parse_lr_count || zilog->zl_parse_blk_count > 1)
 680                         zh->zh_flags |= ZIL_REPLAY_NEEDED;
 681                 zh->zh_flags |= ZIL_CLAIM_LR_SEQ_VALID;
 682                 dsl_dataset_dirty(dmu_objset_ds(os), tx);
 683         }
 684 
 685         ASSERT3U(first_txg, ==, (spa_last_synced_txg(zilog->zl_spa) + 1));
 686         dmu_objset_disown(os, FTAG);
 687         return (0);
 688 }
 689 
 690 /*
 691  * Check the log by walking the log chain.
 692  * Checksum errors are ok as they indicate the end of the chain.
 693  * Any other error (no device or read failure) returns an error.
 694  */
 695 /* ARGSUSED */
 696 int
 697 zil_check_log_chain(dsl_pool_t *dp, dsl_dataset_t *ds, void *tx)
 698 {
 699         zilog_t *zilog;
 700         objset_t *os;
 701         blkptr_t *bp;
 702         int error;
 703 
 704         ASSERT(tx == NULL);
 705 
 706         error = dmu_objset_from_ds(ds, &os);
 707         if (error != 0) {
 708                 cmn_err(CE_WARN, "can't open objset %llu, error %d",
 709                     (unsigned long long)ds->ds_object, error);
 710                 return (0);
 711         }
 712 
 713         zilog = dmu_objset_zil(os);
 714         bp = (blkptr_t *)&zilog->zl_header->zh_log;
 715 
 716         /*
 717          * Check the first block and determine if it's on a log device
 718          * which may have been removed or faulted prior to loading this
 719          * pool.  If so, there's no point in checking the rest of the log
 720          * as its content should have already been synced to the pool.
 721          */
 722         if (!BP_IS_HOLE(bp)) {
 723                 vdev_t *vd;
 724                 boolean_t valid = B_TRUE;
 725 
 726                 spa_config_enter(os->os_spa, SCL_STATE, FTAG, RW_READER);
 727                 vd = vdev_lookup_top(os->os_spa, DVA_GET_VDEV(&bp->blk_dva[0]));
 728                 if (vd->vdev_islog && vdev_is_dead(vd))
 729                         valid = vdev_log_state_valid(vd);
 730                 spa_config_exit(os->os_spa, SCL_STATE, FTAG);
 731 
 732                 if (!valid)

 733                         return (0);
 734         }

 735 
 736         /*
 737          * Because tx == NULL, zil_claim_log_block() will not actually claim
 738          * any blocks, but just determine whether it is possible to do so.
 739          * In addition to checking the log chain, zil_claim_log_block()
 740          * will invoke zio_claim() with a done func of spa_claim_notify(),
 741          * which will update spa_max_claim_txg.  See spa_load() for details.
 742          */
 743         error = zil_parse(zilog, zil_claim_log_block, zil_claim_log_record, tx,
 744             zilog->zl_header->zh_claim_txg ? -1ULL : spa_first_txg(os->os_spa));
 745 


 746         return ((error == ECKSUM || error == ENOENT) ? 0 : error);
 747 }
 748 
 749 static int
 750 zil_vdev_compare(const void *x1, const void *x2)
 751 {
 752         const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev;
 753         const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev;
 754 
 755         if (v1 < v2)
 756                 return (-1);
 757         if (v1 > v2)
 758                 return (1);
 759 
 760         return (0);
 761 }
 762 
 763 void
 764 zil_add_block(zilog_t *zilog, const blkptr_t *bp)
 765 {