8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2013 by Delphix. All rights reserved.
28 */
29
30 #include <sys/zfs_context.h>
31 #include <sys/vdev_impl.h>
32 #include <sys/spa_impl.h>
33 #include <sys/zio.h>
34 #include <sys/avl.h>
35 #include <sys/dsl_pool.h>
36
37 /*
38 * ZFS I/O Scheduler
39 * ---------------
40 *
41 * ZFS issues I/O operations to leaf vdevs to satisfy and complete zios. The
42 * I/O scheduler determines when and in what order those operations are
43 * issued. The I/O scheduler divides operations into five I/O classes
44 * prioritized in the following order: sync read, sync write, async read,
45 * async write, and scrub/resilver. Each queue defines the minimum and
46 * maximum number of concurrent operations that may be issued to the device.
47 * In addition, the device has an aggregate maximum. Note that the sum of the
212 mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
213 vq->vq_vdev = vd;
214
215 avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
216 sizeof (zio_t), offsetof(struct zio, io_queue_node));
217
218 for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
219 /*
220 * The synchronous i/o queues are FIFO rather than LBA ordered.
221 * This provides more consistent latency for these i/os, and
222 * they tend to not be tightly clustered anyway so there is
223 * little to no throughput loss.
224 */
225 boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ ||
226 p == ZIO_PRIORITY_SYNC_WRITE);
227 avl_create(&vq->vq_class[p].vqc_queued_tree,
228 fifo ? vdev_queue_timestamp_compare :
229 vdev_queue_offset_compare,
230 sizeof (zio_t), offsetof(struct zio, io_queue_node));
231 }
232 }
233
234 void
235 vdev_queue_fini(vdev_t *vd)
236 {
237 vdev_queue_t *vq = &vd->vdev_queue;
238
239 for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
240 avl_destroy(&vq->vq_class[p].vqc_queued_tree);
241 avl_destroy(&vq->vq_active_tree);
242
243 mutex_destroy(&vq->vq_lock);
244 }
245
246 static void
247 vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
248 {
249 spa_t *spa = zio->io_spa;
250 ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
251 avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
708
709 mutex_enter(&vq->vq_lock);
710
711 vdev_queue_pending_remove(vq, zio);
712
713 vq->vq_io_complete_ts = gethrtime();
714
715 while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
716 mutex_exit(&vq->vq_lock);
717 if (nio->io_done == vdev_queue_agg_io_done) {
718 zio_nowait(nio);
719 } else {
720 zio_vdev_io_reissue(nio);
721 zio_execute(nio);
722 }
723 mutex_enter(&vq->vq_lock);
724 }
725
726 mutex_exit(&vq->vq_lock);
727 }
|
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 2013 by Delphix. All rights reserved.
28 * Copyright (c) 2013 Steven Hartland. All rights reserved.
29 */
30
31 #include <sys/zfs_context.h>
32 #include <sys/vdev_impl.h>
33 #include <sys/spa_impl.h>
34 #include <sys/zio.h>
35 #include <sys/avl.h>
36 #include <sys/dsl_pool.h>
37
38 /*
39 * ZFS I/O Scheduler
40 * ---------------
41 *
42 * ZFS issues I/O operations to leaf vdevs to satisfy and complete zios. The
43 * I/O scheduler determines when and in what order those operations are
44 * issued. The I/O scheduler divides operations into five I/O classes
45 * prioritized in the following order: sync read, sync write, async read,
46 * async write, and scrub/resilver. Each queue defines the minimum and
47 * maximum number of concurrent operations that may be issued to the device.
48 * In addition, the device has an aggregate maximum. Note that the sum of the
213 mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL);
214 vq->vq_vdev = vd;
215
216 avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
217 sizeof (zio_t), offsetof(struct zio, io_queue_node));
218
219 for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
220 /*
221 * The synchronous i/o queues are FIFO rather than LBA ordered.
222 * This provides more consistent latency for these i/os, and
223 * they tend to not be tightly clustered anyway so there is
224 * little to no throughput loss.
225 */
226 boolean_t fifo = (p == ZIO_PRIORITY_SYNC_READ ||
227 p == ZIO_PRIORITY_SYNC_WRITE);
228 avl_create(&vq->vq_class[p].vqc_queued_tree,
229 fifo ? vdev_queue_timestamp_compare :
230 vdev_queue_offset_compare,
231 sizeof (zio_t), offsetof(struct zio, io_queue_node));
232 }
233
234 vq->vq_last_queued_offset = 0;
235 }
236
237 void
238 vdev_queue_fini(vdev_t *vd)
239 {
240 vdev_queue_t *vq = &vd->vdev_queue;
241
242 for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
243 avl_destroy(&vq->vq_class[p].vqc_queued_tree);
244 avl_destroy(&vq->vq_active_tree);
245
246 mutex_destroy(&vq->vq_lock);
247 }
248
249 static void
250 vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
251 {
252 spa_t *spa = zio->io_spa;
253 ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
254 avl_add(&vq->vq_class[zio->io_priority].vqc_queued_tree, zio);
711
712 mutex_enter(&vq->vq_lock);
713
714 vdev_queue_pending_remove(vq, zio);
715
716 vq->vq_io_complete_ts = gethrtime();
717
718 while ((nio = vdev_queue_io_to_issue(vq)) != NULL) {
719 mutex_exit(&vq->vq_lock);
720 if (nio->io_done == vdev_queue_agg_io_done) {
721 zio_nowait(nio);
722 } else {
723 zio_vdev_io_reissue(nio);
724 zio_execute(nio);
725 }
726 mutex_enter(&vq->vq_lock);
727 }
728
729 mutex_exit(&vq->vq_lock);
730 }
731
732 /*
733 * As these three methods are only used for load calculations we're not
734 * concerned if we get an incorrect value on 32bit platforms due to lack of
735 * vq_lock mutex use here, instead we prefer to keep it lock free for
736 * performance.
737 */
738 int
739 vdev_queue_length(vdev_t *vd)
740 {
741 vdev_queue_t *vq = &vd->vdev_queue;
742 int len = 0;
743
744 for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++)
745 len += avl_numnodes(&vq->vq_class[p].vqc_queued_tree);
746
747 return (len);
748 }
749
750 uint64_t
751 vdev_queue_last_queued_offset(vdev_t *vd)
752 {
753 return (vd->vdev_queue.vq_last_queued_offset);
754 }
755
756 void
757 vdev_queue_register_last_queued_offset(vdev_t *vd, zio_t *zio)
758 {
759 vd->vdev_queue.vq_last_queued_offset = zio->io_offset + zio->io_size;
760 }
|