Print this page
4045 zfs write throttle & i/o scheduler performance work
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Adam Leventhal <ahl@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/sys/zio.h
          +++ new/usr/src/uts/common/fs/zfs/sys/zio.h
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  24   24   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  25      - * Copyright (c) 2012 by Delphix. All rights reserved.
       25 + * Copyright (c) 2013 by Delphix. All rights reserved.
  26   26   * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
  27   27   */
  28   28  
  29   29  #ifndef _ZIO_H
  30   30  #define _ZIO_H
  31   31  
  32   32  #include <sys/zfs_context.h>
  33   33  #include <sys/spa.h>
  34   34  #include <sys/txg.h>
  35   35  #include <sys/avl.h>
↓ open down ↓ 83 lines elided ↑ open up ↑
 119  119          ((compress) == ZIO_COMPRESS_LZJB ||             \
 120  120          (compress) == ZIO_COMPRESS_LZ4 ||               \
 121  121          ((compress) == ZIO_COMPRESS_ON &&               \
 122  122          ZIO_COMPRESS_ON_VALUE == ZIO_COMPRESS_LZJB) ||  \
 123  123          (compress) == ZIO_COMPRESS_OFF)
 124  124  
 125  125  #define ZIO_FAILURE_MODE_WAIT           0
 126  126  #define ZIO_FAILURE_MODE_CONTINUE       1
 127  127  #define ZIO_FAILURE_MODE_PANIC          2
 128  128  
 129      -#define ZIO_PRIORITY_NOW                (zio_priority_table[0])
 130      -#define ZIO_PRIORITY_SYNC_READ          (zio_priority_table[1])
 131      -#define ZIO_PRIORITY_SYNC_WRITE         (zio_priority_table[2])
 132      -#define ZIO_PRIORITY_LOG_WRITE          (zio_priority_table[3])
 133      -#define ZIO_PRIORITY_CACHE_FILL         (zio_priority_table[4])
 134      -#define ZIO_PRIORITY_AGG                (zio_priority_table[5])
 135      -#define ZIO_PRIORITY_FREE               (zio_priority_table[6])
 136      -#define ZIO_PRIORITY_ASYNC_WRITE        (zio_priority_table[7])
 137      -#define ZIO_PRIORITY_ASYNC_READ         (zio_priority_table[8])
 138      -#define ZIO_PRIORITY_RESILVER           (zio_priority_table[9])
 139      -#define ZIO_PRIORITY_SCRUB              (zio_priority_table[10])
 140      -#define ZIO_PRIORITY_DDT_PREFETCH       (zio_priority_table[11])
 141      -#define ZIO_PRIORITY_TABLE_SIZE         12
      129 +typedef enum zio_priority {
      130 +        ZIO_PRIORITY_SYNC_READ,
      131 +        ZIO_PRIORITY_SYNC_WRITE,        /* ZIL */
      132 +        ZIO_PRIORITY_ASYNC_READ,        /* prefetch */
      133 +        ZIO_PRIORITY_ASYNC_WRITE,       /* spa_sync() */
      134 +        ZIO_PRIORITY_SCRUB,             /* asynchronous scrub/resilver reads */
      135 +        ZIO_PRIORITY_NUM_QUEUEABLE,
 142  136  
      137 +        ZIO_PRIORITY_NOW                /* non-queued i/os (e.g. free) */
      138 +} zio_priority_t;
      139 +
 143  140  #define ZIO_PIPELINE_CONTINUE           0x100
 144  141  #define ZIO_PIPELINE_STOP               0x101
 145  142  
 146  143  enum zio_flag {
 147  144          /*
 148  145           * Flags inherited by gang, ddt, and vdev children,
 149  146           * and that must be equal for two zios to aggregate
 150  147           */
 151  148          ZIO_FLAG_DONT_AGGREGATE = 1 << 0,
 152  149          ZIO_FLAG_IO_REPAIR      = 1 << 1,
↓ open down ↓ 34 lines elided ↑ open up ↑
 187  184          ZIO_FLAG_DONT_QUEUE     = 1 << 17,      /* must be first for INHERIT */
 188  185          ZIO_FLAG_DONT_PROPAGATE = 1 << 18,
 189  186          ZIO_FLAG_IO_BYPASS      = 1 << 19,
 190  187          ZIO_FLAG_IO_REWRITE     = 1 << 20,
 191  188          ZIO_FLAG_RAW            = 1 << 21,
 192  189          ZIO_FLAG_GANG_CHILD     = 1 << 22,
 193  190          ZIO_FLAG_DDT_CHILD      = 1 << 23,
 194  191          ZIO_FLAG_GODFATHER      = 1 << 24,
 195  192          ZIO_FLAG_NOPWRITE       = 1 << 25,
 196  193          ZIO_FLAG_REEXECUTED     = 1 << 26,
      194 +        ZIO_FLAG_DELEGATED      = 1 << 27,
 197  195  };
 198  196  
 199  197  #define ZIO_FLAG_MUSTSUCCEED            0
 200  198  
 201  199  #define ZIO_DDT_CHILD_FLAGS(zio)                                \
 202  200          (((zio)->io_flags & ZIO_FLAG_DDT_INHERIT) |             \
 203  201          ZIO_FLAG_DDT_CHILD | ZIO_FLAG_CANFAIL)
 204  202  
 205  203  #define ZIO_GANG_CHILD_FLAGS(zio)                               \
 206  204          (((zio)->io_flags & ZIO_FLAG_GANG_INHERIT) |            \
↓ open down ↓ 19 lines elided ↑ open up ↑
 226  224  
 227  225  /*
 228  226   * We'll take the unused errnos, 'EBADE' and 'EBADR' (from the Convergent
 229  227   * graveyard) to indicate checksum errors and fragmentation.
 230  228   */
 231  229  #define ECKSUM  EBADE
 232  230  #define EFRAGS  EBADR
 233  231  
 234  232  typedef void zio_done_func_t(zio_t *zio);
 235  233  
 236      -extern uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE];
 237      -extern char *zio_type_name[ZIO_TYPES];
      234 +extern const char *zio_type_name[ZIO_TYPES];
 238  235  
 239  236  /*
 240  237   * A bookmark is a four-tuple <objset, object, level, blkid> that uniquely
 241  238   * identifies any block in the pool.  By convention, the meta-objset (MOS)
 242  239   * is objset 0, and the meta-dnode is object 0.  This covers all blocks
 243  240   * except root blocks and ZIL blocks, which are defined as follows:
 244  241   *
 245  242   * Root blocks (objset_phys_t) are object 0, level -1:  <objset, 0, -1, 0>.
 246  243   * ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>.
 247  244   * dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>.
↓ open down ↓ 119 lines elided ↑ open up ↑
 367  364          list_node_t     zl_child_node;
 368  365  } zio_link_t;
 369  366  
 370  367  struct zio {
 371  368          /* Core information about this I/O */
 372  369          zbookmark_t     io_bookmark;
 373  370          zio_prop_t      io_prop;
 374  371          zio_type_t      io_type;
 375  372          enum zio_child  io_child_type;
 376  373          int             io_cmd;
 377      -        uint8_t         io_priority;
      374 +        zio_priority_t  io_priority;
 378  375          uint8_t         io_reexecute;
 379  376          uint8_t         io_state[ZIO_WAIT_TYPES];
 380  377          uint64_t        io_txg;
 381  378          spa_t           *io_spa;
 382  379          blkptr_t        *io_bp;
 383  380          blkptr_t        *io_bp_override;
 384  381          blkptr_t        io_bp_copy;
 385  382          list_t          io_parent_list;
 386  383          list_t          io_child_list;
 387  384          zio_link_t      *io_walk_link;
 388  385          zio_t           *io_logical;
 389  386          zio_transform_t *io_transform_stack;
 390  387  
 391  388          /* Callback info */
 392  389          zio_done_func_t *io_ready;
      390 +        zio_done_func_t *io_physdone;
 393  391          zio_done_func_t *io_done;
 394  392          void            *io_private;
 395  393          int64_t         io_prev_space_delta;    /* DMU private */
 396  394          blkptr_t        io_bp_orig;
 397  395  
 398  396          /* Data represented by this I/O */
 399  397          void            *io_data;
 400  398          void            *io_orig_data;
 401  399          uint64_t        io_size;
 402  400          uint64_t        io_orig_size;
 403  401  
 404  402          /* Stuff for the vdev stack */
 405  403          vdev_t          *io_vd;
 406  404          void            *io_vsd;
 407  405          const zio_vsd_ops_t *io_vsd_ops;
 408  406  
 409  407          uint64_t        io_offset;
 410      -        uint64_t        io_deadline;
 411  408          hrtime_t        io_timestamp;
 412      -        avl_node_t      io_offset_node;
 413      -        avl_node_t      io_deadline_node;
 414      -        avl_tree_t      *io_vdev_tree;
      409 +        avl_node_t      io_queue_node;
 415  410  
 416  411          /* Internal pipeline state */
 417  412          enum zio_flag   io_flags;
 418  413          enum zio_stage  io_stage;
 419  414          enum zio_stage  io_pipeline;
 420  415          enum zio_flag   io_orig_flags;
 421  416          enum zio_stage  io_orig_stage;
 422  417          enum zio_stage  io_orig_pipeline;
 423  418          int             io_error;
 424  419          int             io_child_error[ZIO_CHILD_TYPES];
 425  420          uint64_t        io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
 426  421          uint64_t        io_child_count;
      422 +        uint64_t        io_phys_children;
 427  423          uint64_t        io_parent_count;
 428  424          uint64_t        *io_stall;
 429  425          zio_t           *io_gang_leader;
 430  426          zio_gang_node_t *io_gang_tree;
 431  427          void            *io_executor;
 432  428          void            *io_waiter;
 433  429          kmutex_t        io_lock;
 434  430          kcondvar_t      io_cv;
 435  431  
 436  432          /* FMA state */
↓ open down ↓ 5 lines elided ↑ open up ↑
 442  438  };
 443  439  
 444  440  extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
 445  441      zio_done_func_t *done, void *private, enum zio_flag flags);
 446  442  
 447  443  extern zio_t *zio_root(spa_t *spa,
 448  444      zio_done_func_t *done, void *private, enum zio_flag flags);
 449  445  
 450  446  extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
 451  447      uint64_t size, zio_done_func_t *done, void *private,
 452      -    int priority, enum zio_flag flags, const zbookmark_t *zb);
      448 +    zio_priority_t priority, enum zio_flag flags, const zbookmark_t *zb);
 453  449  
 454  450  extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
 455  451      void *data, uint64_t size, const zio_prop_t *zp,
 456      -    zio_done_func_t *ready, zio_done_func_t *done, void *private,
 457      -    int priority, enum zio_flag flags, const zbookmark_t *zb);
      452 +    zio_done_func_t *ready, zio_done_func_t *physdone, zio_done_func_t *done,
      453 +    void *private,
      454 +    zio_priority_t priority, enum zio_flag flags, const zbookmark_t *zb);
 458  455  
 459  456  extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
 460  457      void *data, uint64_t size, zio_done_func_t *done, void *private,
 461      -    int priority, enum zio_flag flags, zbookmark_t *zb);
      458 +    zio_priority_t priority, enum zio_flag flags, zbookmark_t *zb);
 462  459  
 463  460  extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies,
 464  461      boolean_t nopwrite);
 465  462  
 466  463  extern void zio_free(spa_t *spa, uint64_t txg, const blkptr_t *bp);
 467  464  
 468  465  extern zio_t *zio_claim(zio_t *pio, spa_t *spa, uint64_t txg,
 469  466      const blkptr_t *bp,
 470  467      zio_done_func_t *done, void *private, enum zio_flag flags);
 471  468  
 472  469  extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
 473      -    zio_done_func_t *done, void *private, int priority, enum zio_flag flags);
      470 +    zio_done_func_t *done, void *private, enum zio_flag flags);
 474  471  
 475  472  extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
 476  473      uint64_t size, void *data, int checksum,
 477      -    zio_done_func_t *done, void *private, int priority, enum zio_flag flags,
 478      -    boolean_t labels);
      474 +    zio_done_func_t *done, void *private, zio_priority_t priority,
      475 +    enum zio_flag flags, boolean_t labels);
 479  476  
 480  477  extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
 481  478      uint64_t size, void *data, int checksum,
 482      -    zio_done_func_t *done, void *private, int priority, enum zio_flag flags,
 483      -    boolean_t labels);
      479 +    zio_done_func_t *done, void *private, zio_priority_t priority,
      480 +    enum zio_flag flags, boolean_t labels);
 484  481  
 485  482  extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
 486  483      const blkptr_t *bp, enum zio_flag flags);
 487  484  
 488  485  extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
 489  486      blkptr_t *old_bp, uint64_t size, boolean_t use_slog);
 490  487  extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
 491  488  extern void zio_flush(zio_t *zio, vdev_t *vd);
 492  489  extern void zio_shrink(zio_t *zio, uint64_t size);
 493  490  
↓ open down ↓ 8 lines elided ↑ open up ↑
 502  499  extern void zio_add_child(zio_t *pio, zio_t *cio);
 503  500  
 504  501  extern void *zio_buf_alloc(size_t size);
 505  502  extern void zio_buf_free(void *buf, size_t size);
 506  503  extern void *zio_data_buf_alloc(size_t size);
 507  504  extern void zio_data_buf_free(void *buf, size_t size);
 508  505  
 509  506  extern void zio_resubmit_stage_async(void *);
 510  507  
 511  508  extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
 512      -    uint64_t offset, void *data, uint64_t size, int type, int priority,
 513      -    enum zio_flag flags, zio_done_func_t *done, void *private);
      509 +    uint64_t offset, void *data, uint64_t size, int type,
      510 +    zio_priority_t priority, enum zio_flag flags,
      511 +    zio_done_func_t *done, void *private);
 514  512  
 515  513  extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
 516      -    void *data, uint64_t size, int type, int priority,
      514 +    void *data, uint64_t size, int type, zio_priority_t priority,
 517  515      enum zio_flag flags, zio_done_func_t *done, void *private);
 518  516  
 519  517  extern void zio_vdev_io_bypass(zio_t *zio);
 520  518  extern void zio_vdev_io_reissue(zio_t *zio);
 521  519  extern void zio_vdev_io_redone(zio_t *zio);
 522  520  
 523  521  extern void zio_checksum_verified(zio_t *zio);
 524  522  extern int zio_worst_error(int e1, int e2);
 525  523  
 526  524  extern enum zio_checksum zio_checksum_select(enum zio_checksum child,
↓ open down ↓ 61 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX