Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
*** 20,30 ****
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
! * Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
#ifndef _ZIO_H
#define _ZIO_H
--- 20,30 ----
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
! * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
#ifndef _ZIO_H
#define _ZIO_H
*** 124,147 ****
#define ZIO_FAILURE_MODE_WAIT 0
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2
! #define ZIO_PRIORITY_NOW (zio_priority_table[0])
! #define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
! #define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
! #define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[3])
! #define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[4])
! #define ZIO_PRIORITY_AGG (zio_priority_table[5])
! #define ZIO_PRIORITY_FREE (zio_priority_table[6])
! #define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[7])
! #define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[8])
! #define ZIO_PRIORITY_RESILVER (zio_priority_table[9])
! #define ZIO_PRIORITY_SCRUB (zio_priority_table[10])
! #define ZIO_PRIORITY_DDT_PREFETCH (zio_priority_table[11])
! #define ZIO_PRIORITY_TABLE_SIZE 12
#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
enum zio_flag {
/*
--- 124,144 ----
#define ZIO_FAILURE_MODE_WAIT 0
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2
! typedef enum zio_priority {
! ZIO_PRIORITY_SYNC_READ,
! ZIO_PRIORITY_SYNC_WRITE, /* ZIL */
! ZIO_PRIORITY_ASYNC_READ, /* prefetch */
! ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */
! ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */
! ZIO_PRIORITY_NUM_QUEUEABLE,
+ ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */
+ } zio_priority_t;
+
#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
enum zio_flag {
/*
*** 192,201 ****
--- 189,199 ----
ZIO_FLAG_GANG_CHILD = 1 << 22,
ZIO_FLAG_DDT_CHILD = 1 << 23,
ZIO_FLAG_GODFATHER = 1 << 24,
ZIO_FLAG_NOPWRITE = 1 << 25,
ZIO_FLAG_REEXECUTED = 1 << 26,
+ ZIO_FLAG_DELEGATED = 1 << 27,
};
#define ZIO_FLAG_MUSTSUCCEED 0
#define ZIO_DDT_CHILD_FLAGS(zio) \
*** 231,242 ****
#define ECKSUM EBADE
#define EFRAGS EBADR
typedef void zio_done_func_t(zio_t *zio);
! extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
! extern char *zio_type_name[ZIO_TYPES];
/*
* A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
* identifies any block in the pool. By convention, the meta-objset (MOS)
* is objset 0, and the meta-dnode is object 0. This covers all blocks
--- 229,239 ----
#define ECKSUM EBADE
#define EFRAGS EBADR
typedef void zio_done_func_t(zio_t *zio);
! extern const char *zio_type_name[ZIO_TYPES];
/*
* A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
* identifies any block in the pool. By convention, the meta-objset (MOS)
* is objset 0, and the meta-dnode is object 0. This covers all blocks
*** 372,382 ****
zbookmark_t io_bookmark;
zio_prop_t io_prop;
zio_type_t io_type;
enum zio_child io_child_type;
int io_cmd;
! uint8_t io_priority;
uint8_t io_reexecute;
uint8_t io_state[ZIO_WAIT_TYPES];
uint64_t io_txg;
spa_t *io_spa;
blkptr_t *io_bp;
--- 369,379 ----
zbookmark_t io_bookmark;
zio_prop_t io_prop;
zio_type_t io_type;
enum zio_child io_child_type;
int io_cmd;
! zio_priority_t io_priority;
uint8_t io_reexecute;
uint8_t io_state[ZIO_WAIT_TYPES];
uint64_t io_txg;
spa_t *io_spa;
blkptr_t *io_bp;
*** 388,397 ****
--- 385,395 ----
zio_t *io_logical;
zio_transform_t *io_transform_stack;
/* Callback info */
zio_done_func_t *io_ready;
+ zio_done_func_t *io_physdone;
zio_done_func_t *io_done;
void *io_private;
int64_t io_prev_space_delta; /* DMU private */
blkptr_t io_bp_orig;
*** 405,419 ****
vdev_t *io_vd;
void *io_vsd;
const zio_vsd_ops_t *io_vsd_ops;
uint64_t io_offset;
- uint64_t io_deadline;
hrtime_t io_timestamp;
! avl_node_t io_offset_node;
! avl_node_t io_deadline_node;
! avl_tree_t *io_vdev_tree;
/* Internal pipeline state */
enum zio_flag io_flags;
enum zio_stage io_stage;
enum zio_stage io_pipeline;
--- 403,414 ----
vdev_t *io_vd;
void *io_vsd;
const zio_vsd_ops_t *io_vsd_ops;
uint64_t io_offset;
hrtime_t io_timestamp;
! avl_node_t io_queue_node;
/* Internal pipeline state */
enum zio_flag io_flags;
enum zio_stage io_stage;
enum zio_stage io_pipeline;
*** 422,431 ****
--- 417,427 ----
enum zio_stage io_orig_pipeline;
int io_error;
int io_child_error[ZIO_CHILD_TYPES];
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
uint64_t io_child_count;
+ uint64_t io_phys_children;
uint64_t io_parent_count;
uint64_t *io_stall;
zio_t *io_gang_leader;
zio_gang_node_t *io_gang_tree;
void *io_executor;
*** 447,466 ****
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
uint64_t size, zio_done_func_t *done, void *private,
! int priority, enum zio_flag flags, const zbookmark_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, const zio_prop_t *zp,
! zio_done_func_t *ready, zio_done_func_t *done, void *private,
! int priority, enum zio_flag flags, const zbookmark_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, zio_done_func_t *done, void *private,
! int priority, enum zio_flag flags, zbookmark_t *zb);
extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies,
boolean_t nopwrite);
extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp);
--- 443,463 ----
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
uint64_t size, zio_done_func_t *done, void *private,
! zio_priority_t priority, enum zio_flag flags, const zbookmark_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, const zio_prop_t *zp,
! zio_done_func_t *ready, zio_done_func_t *physdone, zio_done_func_t *done,
! void *private,
! zio_priority_t priority, enum zio_flag flags, const zbookmark_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, zio_done_func_t *done, void *private,
! zio_priority_t priority, enum zio_flag flags, zbookmark_t *zb);
extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies,
boolean_t nopwrite);
extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp);
*** 468,488 ****
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
! zio_done_func_t *done, void *private, int priority, enum zio_flag flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
! zio_done_func_t *done, void *private, int priority, enum zio_flag flags,
! boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
! zio_done_func_t *done, void *private, int priority, enum zio_flag flags,
! boolean_t labels);
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
--- 465,485 ----
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
! zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
! zio_done_func_t *done, void *private, zio_priority_t priority,
! enum zio_flag flags, boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
! zio_done_func_t *done, void *private, zio_priority_t priority,
! enum zio_flag flags, boolean_t labels);
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
*** 507,521 ****
extern void zio_data_buf_free(void *buf, size_t size);
extern void zio_resubmit_stage_async(void *);
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
! uint64_t offset, void *data, uint64_t size, int type, int priority,
! enum zio_flag flags, zio_done_func_t *done, void *private);
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
! void *data, uint64_t size, int type, int priority,
enum zio_flag flags, zio_done_func_t *done, void *private);
extern void zio_vdev_io_bypass(zio_t *zio);
extern void zio_vdev_io_reissue(zio_t *zio);
extern void zio_vdev_io_redone(zio_t *zio);
--- 504,519 ----
extern void zio_data_buf_free(void *buf, size_t size);
extern void zio_resubmit_stage_async(void *);
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
! uint64_t offset, void *data, uint64_t size, int type,
! zio_priority_t priority, enum zio_flag flags,
! zio_done_func_t *done, void *private);
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
! void *data, uint64_t size, int type, zio_priority_t priority,
enum zio_flag flags, zio_done_func_t *done, void *private);
extern void zio_vdev_io_bypass(zio_t *zio);
extern void zio_vdev_io_reissue(zio_t *zio);
extern void zio_vdev_io_redone(zio_t *zio);