Print this page
4334 Improve ZFS N-way mirror read performance


   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.

  24  */
  25 
  26 #ifndef _SYS_VDEV_IMPL_H
  27 #define _SYS_VDEV_IMPL_H
  28 
  29 #include <sys/avl.h>
  30 #include <sys/dmu.h>
  31 #include <sys/metaslab.h>
  32 #include <sys/nvpair.h>
  33 #include <sys/space_map.h>
  34 #include <sys/vdev.h>
  35 #include <sys/dkio.h>
  36 #include <sys/uberblock_impl.h>
  37 
  38 #ifdef  __cplusplus
  39 extern "C" {
  40 #endif
  41 
  42 /*
  43  * Virtual device descriptors.


  99         kmutex_t        vc_lock;
 100 };
 101 
 102 typedef struct vdev_queue_class {
 103         uint32_t        vqc_active;
 104 
 105         /*
 106          * Sorted by offset or timestamp, depending on if the queue is
 107          * LBA-ordered vs FIFO.
 108          */
 109         avl_tree_t      vqc_queued_tree;
 110 } vdev_queue_class_t;
 111 
 112 struct vdev_queue {
 113         vdev_t          *vq_vdev;
 114         vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
 115         avl_tree_t      vq_active_tree;
 116         uint64_t        vq_last_offset;
 117         hrtime_t        vq_io_complete_ts; /* time last i/o completed */
 118         kmutex_t        vq_lock;

 119 };
 120 
 121 /*
 122  * Virtual device descriptor
 123  */
 124 struct vdev {
 125         /*
 126          * Common to all vdev types.
 127          */
 128         uint64_t        vdev_id;        /* child number in vdev parent  */
 129         uint64_t        vdev_guid;      /* unique ID for this vdev      */
 130         uint64_t        vdev_guid_sum;  /* self guid + all child guids  */
 131         uint64_t        vdev_orig_guid; /* orig. guid prior to remove   */
 132         uint64_t        vdev_asize;     /* allocatable device capacity  */
 133         uint64_t        vdev_min_asize; /* min acceptable asize         */
 134         uint64_t        vdev_max_asize; /* max acceptable asize         */
 135         uint64_t        vdev_ashift;    /* block alignment shift        */
 136         uint64_t        vdev_state;     /* see VDEV_STATE_* #defines    */
 137         uint64_t        vdev_prevstate; /* used when reopening a vdev   */
 138         vdev_ops_t      *vdev_ops;      /* vdev operations              */


 191         char            *vdev_physpath; /* vdev device path (if any)    */
 192         char            *vdev_fru;      /* physical FRU location        */
 193         uint64_t        vdev_not_present; /* not present during import  */
 194         uint64_t        vdev_unspare;   /* unspare when resilvering done */
 195         boolean_t       vdev_nowritecache; /* true if flushwritecache failed */
 196         boolean_t       vdev_checkremove; /* temporary online test      */
 197         boolean_t       vdev_forcefault; /* force online fault          */
 198         boolean_t       vdev_splitting; /* split or repair in progress  */
 199         boolean_t       vdev_delayed_close; /* delayed device close?    */
 200         boolean_t       vdev_tmpoffline; /* device taken offline temporarily? */
 201         boolean_t       vdev_detached;  /* device detached?             */
 202         boolean_t       vdev_cant_read; /* vdev is failing all reads    */
 203         boolean_t       vdev_cant_write; /* vdev is failing all writes  */
 204         boolean_t       vdev_isspare;   /* was a hot spare              */
 205         boolean_t       vdev_isl2cache; /* was a l2cache device         */
 206         vdev_queue_t    vdev_queue;     /* I/O deadline schedule queue  */
 207         vdev_cache_t    vdev_cache;     /* physical block cache         */
 208         spa_aux_vdev_t  *vdev_aux;      /* for l2cache vdevs            */
 209         zio_t           *vdev_probe_zio; /* root of current probe       */
 210         vdev_aux_t      vdev_label_aux; /* on-disk aux state            */



 211 
 212         /*
 213          * For DTrace to work in userland (libzpool) context, these fields must
 214          * remain at the end of the structure.  DTrace will use the kernel's
 215          * CTF definition for 'struct vdev', and since the size of a kmutex_t is
 216          * larger in userland, the offsets for the rest of the fields would be
 217          * incorrect.
 218          */
 219         kmutex_t        vdev_dtl_lock;  /* vdev_dtl_{map,resilver}      */
 220         kmutex_t        vdev_stat_lock; /* vdev_stat                    */
 221         kmutex_t        vdev_probe_lock; /* protects vdev_probe_zio     */
 222 };
 223 
 224 #define VDEV_RAIDZ_MAXPARITY    3
 225 
 226 #define VDEV_PAD_SIZE           (8 << 10)
 227 /* 2 padding areas (vl_pad1 and vl_pad2) to skip */
 228 #define VDEV_SKIP_SIZE          VDEV_PAD_SIZE * 2
 229 #define VDEV_PHYS_SIZE          (112 << 10)
 230 #define VDEV_UBERBLOCK_RING     (128 << 10)




   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2013 by Delphix. All rights reserved.
  24  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  25  */
  26 
  27 #ifndef _SYS_VDEV_IMPL_H
  28 #define _SYS_VDEV_IMPL_H
  29 
  30 #include <sys/avl.h>
  31 #include <sys/dmu.h>
  32 #include <sys/metaslab.h>
  33 #include <sys/nvpair.h>
  34 #include <sys/space_map.h>
  35 #include <sys/vdev.h>
  36 #include <sys/dkio.h>
  37 #include <sys/uberblock_impl.h>
  38 
  39 #ifdef  __cplusplus
  40 extern "C" {
  41 #endif
  42 
  43 /*
  44  * Virtual device descriptors.


 100         kmutex_t        vc_lock;
 101 };
 102 
 103 typedef struct vdev_queue_class {
 104         uint32_t        vqc_active;
 105 
 106         /*
 107          * Sorted by offset or timestamp, depending on if the queue is
 108          * LBA-ordered vs FIFO.
 109          */
 110         avl_tree_t      vqc_queued_tree;
 111 } vdev_queue_class_t;
 112 
 113 struct vdev_queue {
 114         vdev_t          *vq_vdev;
 115         vdev_queue_class_t vq_class[ZIO_PRIORITY_NUM_QUEUEABLE];
 116         avl_tree_t      vq_active_tree;
 117         uint64_t        vq_last_offset;
 118         hrtime_t        vq_io_complete_ts; /* time last i/o completed */
 119         kmutex_t        vq_lock;
 120         uint64_t        vq_lastoffset;
 121 };
 122 
 123 /*
 124  * Virtual device descriptor
 125  */
 126 struct vdev {
 127         /*
 128          * Common to all vdev types.
 129          */
 130         uint64_t        vdev_id;        /* child number in vdev parent  */
 131         uint64_t        vdev_guid;      /* unique ID for this vdev      */
 132         uint64_t        vdev_guid_sum;  /* self guid + all child guids  */
 133         uint64_t        vdev_orig_guid; /* orig. guid prior to remove   */
 134         uint64_t        vdev_asize;     /* allocatable device capacity  */
 135         uint64_t        vdev_min_asize; /* min acceptable asize         */
 136         uint64_t        vdev_max_asize; /* max acceptable asize         */
 137         uint64_t        vdev_ashift;    /* block alignment shift        */
 138         uint64_t        vdev_state;     /* see VDEV_STATE_* #defines    */
 139         uint64_t        vdev_prevstate; /* used when reopening a vdev   */
 140         vdev_ops_t      *vdev_ops;      /* vdev operations              */


 193         char            *vdev_physpath; /* vdev device path (if any)    */
 194         char            *vdev_fru;      /* physical FRU location        */
 195         uint64_t        vdev_not_present; /* not present during import  */
 196         uint64_t        vdev_unspare;   /* unspare when resilvering done */
 197         boolean_t       vdev_nowritecache; /* true if flushwritecache failed */
 198         boolean_t       vdev_checkremove; /* temporary online test      */
 199         boolean_t       vdev_forcefault; /* force online fault          */
 200         boolean_t       vdev_splitting; /* split or repair in progress  */
 201         boolean_t       vdev_delayed_close; /* delayed device close?    */
 202         boolean_t       vdev_tmpoffline; /* device taken offline temporarily? */
 203         boolean_t       vdev_detached;  /* device detached?             */
 204         boolean_t       vdev_cant_read; /* vdev is failing all reads    */
 205         boolean_t       vdev_cant_write; /* vdev is failing all writes  */
 206         boolean_t       vdev_isspare;   /* was a hot spare              */
 207         boolean_t       vdev_isl2cache; /* was a l2cache device         */
 208         vdev_queue_t    vdev_queue;     /* I/O deadline schedule queue  */
 209         vdev_cache_t    vdev_cache;     /* physical block cache         */
 210         spa_aux_vdev_t  *vdev_aux;      /* for l2cache vdevs            */
 211         zio_t           *vdev_probe_zio; /* root of current probe       */
 212         vdev_aux_t      vdev_label_aux; /* on-disk aux state            */
 213         uint16_t        vdev_rotation_rate; /* rotational rate of the media */
 214 #define VDEV_RATE_UNKNOWN       0
 215 #define VDEV_RATE_NON_ROTATING  1
 216 
 217         /*
 218          * For DTrace to work in userland (libzpool) context, these fields must
 219          * remain at the end of the structure.  DTrace will use the kernel's
 220          * CTF definition for 'struct vdev', and since the size of a kmutex_t is
 221          * larger in userland, the offsets for the rest of the fields would be
 222          * incorrect.
 223          */
 224         kmutex_t        vdev_dtl_lock;  /* vdev_dtl_{map,resilver}      */
 225         kmutex_t        vdev_stat_lock; /* vdev_stat                    */
 226         kmutex_t        vdev_probe_lock; /* protects vdev_probe_zio     */
 227 };
 228 
 229 #define VDEV_RAIDZ_MAXPARITY    3
 230 
 231 #define VDEV_PAD_SIZE           (8 << 10)
 232 /* 2 padding areas (vl_pad1 and vl_pad2) to skip */
 233 #define VDEV_SKIP_SIZE          VDEV_PAD_SIZE * 2
 234 #define VDEV_PHYS_SIZE          (112 << 10)
 235 #define VDEV_UBERBLOCK_RING     (128 << 10)