| 
 
 
  49  * requirement. In the event of a panic or power fail then those log
  50  * records (transactions) are replayed.
  51  *
  52  * There is one ZIL per file system. Its on-disk (pool) format consists
  53  * of 3 parts:
  54  *
  55  *      - ZIL header
  56  *      - ZIL blocks
  57  *      - ZIL records
  58  *
  59  * A log record holds a system call transaction. Log blocks can
  60  * hold many log records and the blocks are chained together.
  61  * Each ZIL block contains a block pointer (blkptr_t) to the next
  62  * ZIL block in the chain. The ZIL header points to the first
  63  * block in the chain. Note there is not a fixed place in the pool
  64  * to hold blocks. They are dynamically allocated and freed as
  65  * needed from the blocks available. Figure X shows the ZIL structure:
  66  */
  67 
  68 /*
  69  * This global ZIL switch affects all pools
  70  */
  71 int zil_replay_disable = 0;    /* disable intent logging replay */
  72 
  73 /*
  74  * Tunable parameter for debugging or performance analysis.  Setting
  75  * zfs_nocacheflush will cause corruption on power loss if a volatile
  76  * out-of-order write cache is enabled.
  77  */
  78 boolean_t zfs_nocacheflush = B_FALSE;
  79 
  80 static kmem_cache_t *zil_lwb_cache;
  81 
  82 static void zil_async_to_sync(zilog_t *zilog, uint64_t foid);
  83 
  84 #define LWB_EMPTY(lwb) ((BP_GET_LSIZE(&lwb->lwb_blk) - \
  85     sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused))
  86 
  87 
  88 /*
  89  * ziltest is by and large an ugly hack, but very useful in
  90  * checking replay without tedious work.
  91  * When running ziltest we want to keep all itx's and so maintain
 
 
 862         zbookmark_t zb;
 863 
 864         SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
 865             ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
 866             lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
 867 
 868         if (zilog->zl_root_zio == NULL) {
 869                 zilog->zl_root_zio = zio_root(zilog->zl_spa, NULL, NULL,
 870                     ZIO_FLAG_CANFAIL);
 871         }
 872         if (lwb->lwb_zio == NULL) {
 873                 lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
 874                     0, &lwb->lwb_blk, lwb->lwb_buf, BP_GET_LSIZE(&lwb->lwb_blk),
 875                     zil_lwb_write_done, lwb, ZIO_PRIORITY_LOG_WRITE,
 876                     ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE, &zb);
 877         }
 878 }
 879 
 880 /*
 881  * Define a limited set of intent log block sizes.
 882  * These must be a multiple of 4KB. Note only the amount used (again
 883  * aligned to 4KB) actually gets written. However, we can't always just
 884  * allocate SPA_MAXBLOCKSIZE as the slog space could be exhausted.
 885  */
 886 uint64_t zil_block_buckets[] = {
 887     4096,               /* non TX_WRITE */
 888     8192+4096,          /* data base */
 889     32*1024 + 4096,     /* NFS writes */
 890     UINT64_MAX
 891 };
 892 
 893 /*
 894  * Use the slog as long as the logbias is 'latency' and the current commit size
 895  * is less than the limit or the total list size is less than 2X the limit.
 896  * Limit checking is disabled by setting zil_slog_limit to UINT64_MAX.
 897  */
 898 uint64_t zil_slog_limit = 1024 * 1024;
 899 #define USE_SLOG(zilog) (((zilog)->zl_logbias == ZFS_LOGBIAS_LATENCY) && \
 900         (((zilog)->zl_cur_used < zil_slog_limit) || \
 901         ((zilog)->zl_itx_list_sz < (zil_slog_limit << 1))))
 | 
 
 
  49  * requirement. In the event of a panic or power fail then those log
  50  * records (transactions) are replayed.
  51  *
  52  * There is one ZIL per file system. Its on-disk (pool) format consists
  53  * of 3 parts:
  54  *
  55  *      - ZIL header
  56  *      - ZIL blocks
  57  *      - ZIL records
  58  *
  59  * A log record holds a system call transaction. Log blocks can
  60  * hold many log records and the blocks are chained together.
  61  * Each ZIL block contains a block pointer (blkptr_t) to the next
  62  * ZIL block in the chain. The ZIL header points to the first
  63  * block in the chain. Note there is not a fixed place in the pool
  64  * to hold blocks. They are dynamically allocated and freed as
  65  * needed from the blocks available. Figure X shows the ZIL structure:
  66  */
  67 
  68 /*
  69  * Disable intent logging replay.  This global ZIL switch affects all pools.
  70  */
  71 int zil_replay_disable = 0;
  72 
  73 /*
  74  * Tunable parameter for debugging or performance analysis.  Setting
  75  * zfs_nocacheflush will cause corruption on power loss if a volatile
  76  * out-of-order write cache is enabled.
  77  */
  78 boolean_t zfs_nocacheflush = B_FALSE;
  79 
  80 static kmem_cache_t *zil_lwb_cache;
  81 
  82 static void zil_async_to_sync(zilog_t *zilog, uint64_t foid);
  83 
  84 #define LWB_EMPTY(lwb) ((BP_GET_LSIZE(&lwb->lwb_blk) - \
  85     sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused))
  86 
  87 
  88 /*
  89  * ziltest is by and large an ugly hack, but very useful in
  90  * checking replay without tedious work.
  91  * When running ziltest we want to keep all itx's and so maintain
 
 
 862         zbookmark_t zb;
 863 
 864         SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
 865             ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
 866             lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_SEQ]);
 867 
 868         if (zilog->zl_root_zio == NULL) {
 869                 zilog->zl_root_zio = zio_root(zilog->zl_spa, NULL, NULL,
 870                     ZIO_FLAG_CANFAIL);
 871         }
 872         if (lwb->lwb_zio == NULL) {
 873                 lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
 874                     0, &lwb->lwb_blk, lwb->lwb_buf, BP_GET_LSIZE(&lwb->lwb_blk),
 875                     zil_lwb_write_done, lwb, ZIO_PRIORITY_LOG_WRITE,
 876                     ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE, &zb);
 877         }
 878 }
 879 
 880 /*
 881  * Define a limited set of intent log block sizes.
 882  *
 883  * These must be a multiple of 4KB. Note only the amount used (again
 884  * aligned to 4KB) actually gets written. However, we can't always just
 885  * allocate SPA_MAXBLOCKSIZE as the slog space could be exhausted.
 886  */
 887 uint64_t zil_block_buckets[] = {
 888     4096,               /* non TX_WRITE */
 889     8192+4096,          /* data base */
 890     32*1024 + 4096,     /* NFS writes */
 891     UINT64_MAX
 892 };
 893 
 894 /*
 895  * Use the slog as long as the logbias is 'latency' and the current commit size
 896  * is less than the limit or the total list size is less than 2X the limit.
 897  * Limit checking is disabled by setting zil_slog_limit to UINT64_MAX.
 898  */
 899 uint64_t zil_slog_limit = 1024 * 1024;
 900 #define USE_SLOG(zilog) (((zilog)->zl_logbias == ZFS_LOGBIAS_LATENCY) && \
 901         (((zilog)->zl_cur_used < zil_slog_limit) || \
 902         ((zilog)->zl_itx_list_sz < (zil_slog_limit << 1))))
 |