Print this page
4334 Improve ZFS N-way mirror read performance


   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2013 by Delphix. All rights reserved.

  28  */
  29 
  30 #include <sys/zfs_context.h>
  31 #include <sys/vdev_impl.h>
  32 #include <sys/spa_impl.h>
  33 #include <sys/zio.h>
  34 #include <sys/avl.h>
  35 #include <sys/dsl_pool.h>
  36 
  37 /*
  38  * ZFS I/O Scheduler
  39  * ---------------
  40  *
  41  * ZFS issues I/O operations to leaf vdevs to satisfy and complete zios.  The
  42  * I/O scheduler determines when and in what order those operations are
  43  * issued.  The I/O scheduler divides operations into five I/O classes
  44  * prioritized in the following order: sync read, sync write, async read,
  45  * async write, and scrub/resilver.  Each queue defines the minimum and
  46  * maximum number of concurrent operations that may be issued to the device.
  47  * In addition, the device has an aggregate maximum. Note that the sum of the


 212         mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
 213         vq->vq_vdev = vd;
 214 
 215         avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
 216             sizeof (zio_t), offsetof(struct zio, io_queue_node));
 217 
 218         for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
 219                 /*
 220                  * The synchronous i/o queues are FIFO rather than LBA ordered.
 221                  * This provides more consistent latency for these i/os, and
 222                  * they tend to not be tightly clustered anyway so there is
 223                  * little to no throughput loss.
 224                  */
 225                 boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ ||
 226                     p == ZIO_PRIORITY_SYNC_WRITE);
 227                 avl_create(&vq->vq_class[p].vqc_queued_tree,
 228                     fifo ? vdev_queue_timestamp_compare :
 229                     vdev_queue_offset_compare,
 230                     sizeof (zio_t), offsetof(struct zio, io_queue_node));
 231         }


 232 }
 233 
 234 void
 235 vdev_queue_fini(vdev_t *vd)
 236 {
 237         vdev_queue_t *vq = &vd->vdev_queue;
 238 
 239         for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
 240                 avl_destroy(&vq->vq_class[p].vqc_queued_tree);
 241         avl_destroy(&vq->vq_active_tree);
 242 
 243         mutex_destroy(&vq->vq_lock);
 244 }
 245 
 246 static void
 247 vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
 248 {
 249         spa_t *spa = zio->io_spa;
 250         ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 251         avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);


 708 
 709         mutex_enter(&vq->vq_lock);
 710 
 711         vdev_queue_pending_remove(vq, zio);
 712 
 713         vq->vq_io_complete_ts = gethrtime();
 714 
 715         while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
 716                 mutex_exit(&vq->vq_lock);
 717                 if (nio->io_done == vdev_queue_agg_io_done) {
 718                         zio_nowait(nio);
 719                 } else {
 720                         zio_vdev_io_reissue(nio);
 721                         zio_execute(nio);
 722                 }
 723                 mutex_enter(&vq->vq_lock);
 724         }
 725 
 726         mutex_exit(&vq->vq_lock);
 727 }


























   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 
  26 /*
  27  * Copyright (c) 2013 by Delphix. All rights reserved.
  28  * Copyright (c) 2013 Steven Hartland. All rights reserved.
  29  */
  30 
  31 #include <sys/zfs_context.h>
  32 #include <sys/vdev_impl.h>
  33 #include <sys/spa_impl.h>
  34 #include <sys/zio.h>
  35 #include <sys/avl.h>
  36 #include <sys/dsl_pool.h>
  37 
  38 /*
  39  * ZFS I/O Scheduler
  40  * ---------------
  41  *
  42  * ZFS issues I/O operations to leaf vdevs to satisfy and complete zios.  The
  43  * I/O scheduler determines when and in what order those operations are
  44  * issued.  The I/O scheduler divides operations into five I/O classes
  45  * prioritized in the following order: sync read, sync write, async read,
  46  * async write, and scrub/resilver.  Each queue defines the minimum and
  47  * maximum number of concurrent operations that may be issued to the device.
  48  * In addition, the device has an aggregate maximum. Note that the sum of the


 213         mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
 214         vq->vq_vdev = vd;
 215 
 216         avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
 217             sizeof (zio_t), offsetof(struct zio, io_queue_node));
 218 
 219         for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
 220                 /*
 221                  * The synchronous i/o queues are FIFO rather than LBA ordered.
 222                  * This provides more consistent latency for these i/os, and
 223                  * they tend to not be tightly clustered anyway so there is
 224                  * little to no throughput loss.
 225                  */
 226                 boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ ||
 227                     p == ZIO_PRIORITY_SYNC_WRITE);
 228                 avl_create(&vq->vq_class[p].vqc_queued_tree,
 229                     fifo ? vdev_queue_timestamp_compare :
 230                     vdev_queue_offset_compare,
 231                     sizeof (zio_t), offsetof(struct zio, io_queue_node));
 232         }
 233 
 234         vq->vq_lastoffset = 0;
 235 }
 236 
 237 void
 238 vdev_queue_fini(vdev_t *vd)
 239 {
 240         vdev_queue_t *vq = &vd->vdev_queue;
 241 
 242         for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
 243                 avl_destroy(&vq->vq_class[p].vqc_queued_tree);
 244         avl_destroy(&vq->vq_active_tree);
 245 
 246         mutex_destroy(&vq->vq_lock);
 247 }
 248 
 249 static void
 250 vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
 251 {
 252         spa_t *spa = zio->io_spa;
 253         ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
 254         avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);


 711 
 712         mutex_enter(&vq->vq_lock);
 713 
 714         vdev_queue_pending_remove(vq, zio);
 715 
 716         vq->vq_io_complete_ts = gethrtime();
 717 
 718         while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
 719                 mutex_exit(&vq->vq_lock);
 720                 if (nio->io_done == vdev_queue_agg_io_done) {
 721                         zio_nowait(nio);
 722                 } else {
 723                         zio_vdev_io_reissue(nio);
 724                         zio_execute(nio);
 725                 }
 726                 mutex_enter(&vq->vq_lock);
 727         }
 728 
 729         mutex_exit(&vq->vq_lock);
 730 }
 731 
 732 /*
 733  * As these three methods are only used for load calculations we're not
 734  * concerned if we get an incorrect value on 32bit platforms due to lack of
 735  * vq_lock mutex use here, instead we prefer to keep it lock free for
 736  * performance.
 737  */
 738 int
 739 vdev_queue_length(vdev_t *vd)
 740 {
 741         return (avl_numnodes(&vd->vdev_queue.vq_pending_tree));
 742 }
 743 
 744 uint64_t
 745 vdev_queue_lastoffset(vdev_t *vd)
 746 {
 747         return (vd->vdev_queue.vq_lastoffset);
 748 }
 749 
 750 void
 751 vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio)
 752 {
 753         vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size;
 754 }