Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
@@ -20,11 +20,11 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
#ifndef _ZIO_H
#define _ZIO_H
@@ -124,24 +124,21 @@
#define ZIO_FAILURE_MODE_WAIT 0
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2
-#define ZIO_PRIORITY_NOW (zio_priority_table[0])
-#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
-#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
-#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[3])
-#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[4])
-#define ZIO_PRIORITY_AGG (zio_priority_table[5])
-#define ZIO_PRIORITY_FREE (zio_priority_table[6])
-#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[7])
-#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[8])
-#define ZIO_PRIORITY_RESILVER (zio_priority_table[9])
-#define ZIO_PRIORITY_SCRUB (zio_priority_table[10])
-#define ZIO_PRIORITY_DDT_PREFETCH (zio_priority_table[11])
-#define ZIO_PRIORITY_TABLE_SIZE 12
+typedef enum zio_priority {
+ ZIO_PRIORITY_SYNC_READ,
+ ZIO_PRIORITY_SYNC_WRITE, /* ZIL */
+ ZIO_PRIORITY_ASYNC_READ, /* prefetch */
+ ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */
+ ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */
+ ZIO_PRIORITY_NUM_QUEUEABLE,
+ ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */
+} zio_priority_t;
+
#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
enum zio_flag {
/*
@@ -192,10 +189,11 @@
ZIO_FLAG_GANG_CHILD = 1 << 22,
ZIO_FLAG_DDT_CHILD = 1 << 23,
ZIO_FLAG_GODFATHER = 1 << 24,
ZIO_FLAG_NOPWRITE = 1 << 25,
ZIO_FLAG_REEXECUTED = 1 << 26,
+ ZIO_FLAG_DELEGATED = 1 << 27,
};
#define ZIO_FLAG_MUSTSUCCEED 0
#define ZIO_DDT_CHILD_FLAGS(zio) \
@@ -231,12 +229,11 @@
#define ECKSUM EBADE
#define EFRAGS EBADR
typedef void zio_done_func_t(zio_t *zio);
-extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
-extern char *zio_type_name[ZIO_TYPES];
+extern const char *zio_type_name[ZIO_TYPES];
/*
* A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
* identifies any block in the pool. By convention, the meta-objset (MOS)
* is objset 0, and the meta-dnode is object 0. This covers all blocks
@@ -372,11 +369,11 @@
zbookmark_t io_bookmark;
zio_prop_t io_prop;
zio_type_t io_type;
enum zio_child io_child_type;
int io_cmd;
- uint8_t io_priority;
+ zio_priority_t io_priority;
uint8_t io_reexecute;
uint8_t io_state[ZIO_WAIT_TYPES];
uint64_t io_txg;
spa_t *io_spa;
blkptr_t *io_bp;
@@ -388,10 +385,11 @@
zio_t *io_logical;
zio_transform_t *io_transform_stack;
/* Callback info */
zio_done_func_t *io_ready;
+ zio_done_func_t *io_physdone;
zio_done_func_t *io_done;
void *io_private;
int64_t io_prev_space_delta; /* DMU private */
blkptr_t io_bp_orig;
@@ -405,15 +403,12 @@
vdev_t *io_vd;
void *io_vsd;
const zio_vsd_ops_t *io_vsd_ops;
uint64_t io_offset;
- uint64_t io_deadline;
hrtime_t io_timestamp;
- avl_node_t io_offset_node;
- avl_node_t io_deadline_node;
- avl_tree_t *io_vdev_tree;
+ avl_node_t io_queue_node;
/* Internal pipeline state */
enum zio_flag io_flags;
enum zio_stage io_stage;
enum zio_stage io_pipeline;
@@ -422,10 +417,11 @@
enum zio_stage io_orig_pipeline;
int io_error;
int io_child_error[ZIO_CHILD_TYPES];
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
uint64_t io_child_count;
+ uint64_t io_phys_children;
uint64_t io_parent_count;
uint64_t *io_stall;
zio_t *io_gang_leader;
zio_gang_node_t *io_gang_tree;
void *io_executor;
@@ -447,20 +443,21 @@
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
uint64_t size, zio_done_func_t *done, void *private,
- int priority, enum zio_flag flags, const zbookmark_t *zb);
+ zio_priority_t priority, enum zio_flag flags, const zbookmark_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, const zio_prop_t *zp,
- zio_done_func_t *ready, zio_done_func_t *done, void *private,
- int priority, enum zio_flag flags, const zbookmark_t *zb);
+ zio_done_func_t *ready, zio_done_func_t *physdone, zio_done_func_t *done,
+ void *private,
+ zio_priority_t priority, enum zio_flag flags, const zbookmark_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
void *data, uint64_t size, zio_done_func_t *done, void *private,
- int priority, enum zio_flag flags, zbookmark_t *zb);
+ zio_priority_t priority, enum zio_flag flags, zbookmark_t *zb);
extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies,
boolean_t nopwrite);
extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp);
@@ -468,21 +465,21 @@
extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
- zio_done_func_t *done, void *private, int priority, enum zio_flag flags);
+ zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
- zio_done_func_t *done, void *private, int priority, enum zio_flag flags,
- boolean_t labels);
+ zio_done_func_t *done, void *private, zio_priority_t priority,
+ enum zio_flag flags, boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
uint64_t size, void *data, int checksum,
- zio_done_func_t *done, void *private, int priority, enum zio_flag flags,
- boolean_t labels);
+ zio_done_func_t *done, void *private, zio_priority_t priority,
+ enum zio_flag flags, boolean_t labels);
extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
@@ -507,15 +504,16 @@
extern void zio_data_buf_free(void *buf, size_t size);
extern void zio_resubmit_stage_async(void *);
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
- uint64_t offset, void *data, uint64_t size, int type, int priority,
- enum zio_flag flags, zio_done_func_t *done, void *private);
+ uint64_t offset, void *data, uint64_t size, int type,
+ zio_priority_t priority, enum zio_flag flags,
+ zio_done_func_t *done, void *private);
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
- void *data, uint64_t size, int type, int priority,
+ void *data, uint64_t size, int type, zio_priority_t priority,
enum zio_flag flags, zio_done_func_t *done, void *private);
extern void zio_vdev_io_bypass(zio_t *zio);
extern void zio_vdev_io_reissue(zio_t *zio);
extern void zio_vdev_io_redone(zio_t *zio);