Print this page
4334 Improve ZFS N-way mirror read performance
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/zfs/vdev_mirror.c
+++ new/usr/src/uts/common/fs/zfs/vdev_mirror.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /*
27 27 * Copyright (c) 2013 by Delphix. All rights reserved.
28 + * Copyright (c) 2013 Steven Hartland. All rights reserved.
28 29 */
29 30
30 31 #include <sys/zfs_context.h>
31 32 #include <sys/spa.h>
32 33 #include <sys/vdev_impl.h>
33 34 #include <sys/zio.h>
34 35 #include <sys/fs/zfs.h>
35 36
36 37 /*
37 38 * Virtual device vector for mirroring.
38 39 */
39 40
40 41 typedef struct mirror_child {
41 42 vdev_t *mc_vd;
42 43 uint64_t mc_offset;
43 44 int mc_error;
45 + int mc_load;
44 46 uint8_t mc_tried;
45 47 uint8_t mc_skipped;
46 48 uint8_t mc_speculative;
47 49 } mirror_child_t;
48 50
49 51 typedef struct mirror_map {
52 + int *mm_preferred;
53 + int mm_preferred_cnt;
50 54 int mm_children;
51 - int mm_replacing;
52 - int mm_preferred;
53 - int mm_root;
54 - mirror_child_t mm_child[1];
55 + boolean_t mm_replacing;
56 + boolean_t mm_root;
57 + mirror_child_t mm_child[];
55 58 } mirror_map_t;
56 59
57 -int vdev_mirror_shift = 21;
60 +static int vdev_mirror_shift = 21;
61 +
62 +/*
63 + * The load configuration settings below are tuned by default for
64 + * the case where all devices are of the same rotational type.
65 + *
66 + * If there is a mixture of rotating and non-rotating media, setting
67 + * non_rotating_seek_inc to 0 may well provide better results as it
68 + * will direct more reads to the non-rotating vdevs which are more
69 + * likely to have a higher performance.
70 + */
71 +
72 +/* Rotating media load calculation configuration. */
73 +/* Rotating media load increment for non-seeking I/O's. */
74 +static int rotating_inc = 0;
75 +
76 +/* Rotating media load increment for seeking I/O's. */
77 +static int rotating_seek_inc = 5;
78 +
79 +/*
80 + * Offset in bytes from the last I/O which triggers a reduced rotating media
81 + * seek increment.
82 + */
83 +static int rotating_seek_offset = 1 * 1024 * 1024;
84 +
85 +/* Non-rotating media load calculation configuration. */
86 +/* Non-rotating media load increment for non-seeking I/O's. */
87 +static int non_rotating_inc = 0;
88 +
89 +/* Non-rotating media load increment for seeking I/O's. */
90 +static int non_rotating_seek_inc = 1;
91 +
92 +static inline size_t
93 +vdev_mirror_map_size(int children)
94 +{
95 + return (offsetof(mirror_map_t, mm_child[children]) +
96 + sizeof (int) * children);
97 +}
98 +
99 +static inline mirror_map_t *
100 +vdev_mirror_map_alloc(int children, boolean_t replacing, boolean_t root)
101 +{
102 + mirror_map_t *mm;
103 +
104 + mm = kmem_zalloc(vdev_mirror_map_size(children), KM_SLEEP);
105 + mm->mm_children = children;
106 + mm->mm_replacing = replacing;
107 + mm->mm_root = root;
108 + mm->mm_preferred = (int *)((uintptr_t)mm +
109 + offsetof(mirror_map_t, mm_child[children]));
110 +
111 + return (mm);
112 +}
58 113
59 114 static void
60 115 vdev_mirror_map_free(zio_t *zio)
61 116 {
62 117 mirror_map_t *mm = zio->io_vsd;
63 118
64 - kmem_free(mm, offsetof(mirror_map_t, mm_child[mm->mm_children]));
119 + kmem_free(mm, vdev_mirror_map_size(mm->mm_children));
65 120 }
66 121
67 122 static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
68 123 vdev_mirror_map_free,
69 124 zio_vsd_default_cksum_report
70 125 };
71 126
127 +static int
128 +vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
129 +{
130 + uint64_t lastoffset;
131 + int load;
132 +
133 + /* All DVAs have equal weight at the root. */
134 + if (mm->mm_root)
135 + return (INT_MAX);
136 +
137 + /*
138 + * We don't return INT_MAX if the device is resilvering i.e.
139 + * vdev_resilver_txg != 0 as when tested performance was slightly
140 + * worse overall when resilvering with compared to without.
141 + */
142 +
143 + /* Standard load based on pending queue length. */
144 + load = vdev_queue_length(vd);
145 + lastoffset = vdev_queue_lastoffset(vd);
146 +
147 + if (vd->vdev_rotation_rate == VDEV_RATE_NON_ROTATING) {
148 + /* Non-rotating media. */
149 + if (lastoffset == zio_offset)
150 + return (load + non_rotating_inc);
151 +
152 + /*
153 + * Apply a seek penalty even for non-rotating devices as
154 + * sequential I/O'a can be aggregated into fewer operations
155 + * on the device, thus avoiding unnecessary per-command
156 + * overhead and boosting performance.
157 + */
158 + return (load + non_rotating_seek_inc);
159 + }
160 +
161 + /* Rotating media I/O's which directly follow the last I/O. */
162 + if (lastoffset == zio_offset)
163 + return (load + rotating_inc);
164 +
165 + /*
166 + * Apply half the seek increment to I/O's within seek offset
167 + * of the last I/O queued to this vdev as they should incure less
168 + * of a seek increment.
169 + */
170 + if (ABS(lastoffset - zio_offset) < rotating_seek_offset)
171 + return (load + (rotating_seek_inc / 2));
172 +
173 + /* Apply the full seek increment to all other I/O's. */
174 + return (load + rotating_seek_inc);
175 +}
176 +
177 +
72 178 static mirror_map_t *
73 -vdev_mirror_map_alloc(zio_t *zio)
179 +vdev_mirror_map_init(zio_t *zio)
74 180 {
75 181 mirror_map_t *mm = NULL;
76 182 mirror_child_t *mc;
77 183 vdev_t *vd = zio->io_vd;
78 - int c, d;
184 + int c;
79 185
80 186 if (vd == NULL) {
81 187 dva_t *dva = zio->io_bp->blk_dva;
82 188 spa_t *spa = zio->io_spa;
83 189
84 - c = BP_GET_NDVAS(zio->io_bp);
85 -
86 - mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP);
87 - mm->mm_children = c;
88 - mm->mm_replacing = B_FALSE;
89 - mm->mm_preferred = spa_get_random(c);
90 - mm->mm_root = B_TRUE;
91 -
92 - /*
93 - * Check the other, lower-index DVAs to see if they're on
94 - * the same vdev as the child we picked. If they are, use
95 - * them since they are likely to have been allocated from
96 - * the primary metaslab in use at the time, and hence are
97 - * more likely to have locality with single-copy data.
98 - */
99 - for (c = mm->mm_preferred, d = c - 1; d >= 0; d--) {
100 - if (DVA_GET_VDEV(&dva[d]) == DVA_GET_VDEV(&dva[c]))
101 - mm->mm_preferred = d;
102 - }
103 -
190 + mm = vdev_mirror_map_alloc(BP_GET_NDVAS(zio->io_bp), B_FALSE,
191 + B_TRUE);
104 192 for (c = 0; c < mm->mm_children; c++) {
105 193 mc = &mm->mm_child[c];
106 -
107 194 mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c]));
108 195 mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
109 196 }
110 197 } else {
111 - c = vd->vdev_children;
112 -
113 - mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP);
114 - mm->mm_children = c;
115 - mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops ||
116 - vd->vdev_ops == &vdev_spare_ops);
117 - mm->mm_preferred = mm->mm_replacing ? 0 :
118 - (zio->io_offset >> vdev_mirror_shift) % c;
119 - mm->mm_root = B_FALSE;
120 -
198 + mm = vdev_mirror_map_alloc(vd->vdev_children,
199 + (vd->vdev_ops == &vdev_replacing_ops ||
200 + vd->vdev_ops == &vdev_spare_ops), B_FALSE);
121 201 for (c = 0; c < mm->mm_children; c++) {
122 202 mc = &mm->mm_child[c];
123 203 mc->mc_vd = vd->vdev_child[c];
124 204 mc->mc_offset = zio->io_offset;
125 205 }
126 206 }
127 207
128 208 zio->io_vsd = mm;
129 209 zio->io_vsd_ops = &vdev_mirror_vsd_ops;
130 210 return (mm);
131 211 }
132 212
133 213 static int
134 214 vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
135 215 uint64_t *ashift)
136 216 {
137 217 int numerrors = 0;
138 218 int lasterror = 0;
139 219
140 220 if (vd->vdev_children == 0) {
141 221 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
142 222 return (SET_ERROR(EINVAL));
143 223 }
144 224
145 225 vdev_open_children(vd);
146 226
147 227 for (int c = 0; c < vd->vdev_children; c++) {
148 228 vdev_t *cvd = vd->vdev_child[c];
149 229
150 230 if (cvd->vdev_open_error) {
151 231 lasterror = cvd->vdev_open_error;
152 232 numerrors++;
153 233 continue;
154 234 }
155 235
156 236 *asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
157 237 *max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
158 238 *ashift = MAX(*ashift, cvd->vdev_ashift);
159 239 }
160 240
161 241 if (numerrors == vd->vdev_children) {
162 242 vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
163 243 return (lasterror);
164 244 }
165 245
166 246 return (0);
167 247 }
168 248
169 249 static void
170 250 vdev_mirror_close(vdev_t *vd)
171 251 {
172 252 for (int c = 0; c < vd->vdev_children; c++)
173 253 vdev_close(vd->vdev_child[c]);
174 254 }
175 255
176 256 static void
177 257 vdev_mirror_child_done(zio_t *zio)
178 258 {
179 259 mirror_child_t *mc = zio->io_private;
180 260
181 261 mc->mc_error = zio->io_error;
182 262 mc->mc_tried = 1;
183 263 mc->mc_skipped = 0;
184 264 }
185 265
186 266 static void
187 267 vdev_mirror_scrub_done(zio_t *zio)
188 268 {
189 269 mirror_child_t *mc = zio->io_private;
190 270
191 271 if (zio->io_error == 0) {
192 272 zio_t *pio;
193 273
194 274 mutex_enter(&zio->io_lock);
195 275 while ((pio = zio_walk_parents(zio)) != NULL) {
196 276 mutex_enter(&pio->io_lock);
197 277 ASSERT3U(zio->io_size, >=, pio->io_size);
198 278 bcopy(zio->io_data, pio->io_data, pio->io_size);
199 279 mutex_exit(&pio->io_lock);
200 280 }
201 281 mutex_exit(&zio->io_lock);
↓ open down ↓ |
71 lines elided |
↑ open up ↑ |
202 282 }
203 283
204 284 zio_buf_free(zio->io_data, zio->io_size);
205 285
206 286 mc->mc_error = zio->io_error;
207 287 mc->mc_tried = 1;
208 288 mc->mc_skipped = 0;
209 289 }
210 290
211 291 /*
212 - * Try to find a child whose DTL doesn't contain the block we want to read.
292 + * Check the other, lower-index DVAs to see if they're on the same
293 + * vdev as the child we picked. If they are, use them since they
294 + * are likely to have been allocated from the primary metaslab in
295 + * use at the time, and hence are more likely to have locality with
296 + * single-copy data.
297 + */
298 +static int
299 +vdev_mirror_dva_select(zio_t *zio, int preferred)
300 +{
301 + dva_t *dva = zio->io_bp->blk_dva;
302 + mirror_map_t *mm = zio->io_vsd;
303 + int c;
304 +
305 + for (c = preferred - 1; c >= 0; c--) {
306 + if (DVA_GET_VDEV(&dva[c]) == DVA_GET_VDEV(&dva[preferred]))
307 + preferred = c;
308 + }
309 + return (preferred);
310 +}
311 +
312 +static int
313 +vdev_mirror_preferred_child_randomize(zio_t *zio)
314 +{
315 + mirror_map_t *mm = zio->io_vsd;
316 + int p;
317 +
318 + if (mm->mm_root) {
319 + p = spa_get_random(mm->mm_preferred_cnt);
320 + return (vdev_mirror_dva_select(zio, mm->mm_preferred[p]));
321 + }
322 +
323 + /*
324 + * To ensure we don't always favour the first matching vdev,
325 + * which could lead to wear leveling issues on SSD's, we
326 + * use the I/O offset as a pseudo random seed into the vdevs
327 + * which have the lowest load.
328 + */
329 + p = (zio->io_offset >> vdev_mirror_shift) % mm->mm_preferred_cnt;
330 + return (mm->mm_preferred[p]);
331 +}
332 +
333 +/*
334 + * Try to find a vdev whose DTL doesn't contain the block we want to read
335 + * prefering vdevs based on determined load.
336 + *
213 337 * If we can't, try the read on any vdev we haven't already tried.
214 338 */
215 339 static int
216 340 vdev_mirror_child_select(zio_t *zio)
217 341 {
218 342 mirror_map_t *mm = zio->io_vsd;
219 - mirror_child_t *mc;
220 343 uint64_t txg = zio->io_txg;
221 - int i, c;
344 + int c, lowest_load;
222 345
223 346 ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg);
224 347
225 - /*
226 - * Try to find a child whose DTL doesn't contain the block to read.
227 - * If a child is known to be completely inaccessible (indicated by
228 - * vdev_readable() returning B_FALSE), don't even try.
229 - */
230 - for (i = 0, c = mm->mm_preferred; i < mm->mm_children; i++, c++) {
231 - if (c >= mm->mm_children)
232 - c = 0;
348 + lowest_load = INT_MAX;
349 + mm->mm_preferred_cnt = 0;
350 + for (c = 0; c < mm->mm_children; c++) {
351 + mirror_child_t *mc;
352 +
233 353 mc = &mm->mm_child[c];
234 354 if (mc->mc_tried || mc->mc_skipped)
235 355 continue;
356 +
236 357 if (!vdev_readable(mc->mc_vd)) {
237 358 mc->mc_error = SET_ERROR(ENXIO);
238 359 mc->mc_tried = 1; /* don't even try */
239 360 mc->mc_skipped = 1;
240 361 continue;
241 362 }
242 - if (!vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1))
243 - return (c);
244 - mc->mc_error = SET_ERROR(ESTALE);
245 - mc->mc_skipped = 1;
246 - mc->mc_speculative = 1;
363 +
364 + if (vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) {
365 + mc->mc_error = SET_ERROR(ESTALE);
366 + mc->mc_skipped = 1;
367 + mc->mc_speculative = 1;
368 + continue;
369 + }
370 +
371 + mc->mc_load = vdev_mirror_load(mm, mc->mc_vd, mc->mc_offset);
372 + if (mc->mc_load > lowest_load)
373 + continue;
374 +
375 + if (mc->mc_load < lowest_load) {
376 + lowest_load = mc->mc_load;
377 + mm->mm_preferred_cnt = 0;
378 + }
379 + mm->mm_preferred[mm->mm_preferred_cnt] = c;
380 + mm->mm_preferred_cnt++;
381 + }
382 +
383 + if (mm->mm_preferred_cnt == 1) {
384 + vdev_queue_register_lastoffset(
385 + mm->mm_child[mm->mm_preferred[0]].mc_vd, zio);
386 + return (mm->mm_preferred[0]);
387 + }
388 +
389 + if (mm->mm_preferred_cnt > 1) {
390 + int c = vdev_mirror_preferred_child_randomize(zio);
391 +
392 + vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio);
393 + return (c);
247 394 }
248 395
249 396 /*
250 397 * Every device is either missing or has this txg in its DTL.
251 398 * Look for any child we haven't already tried before giving up.
252 399 */
253 - for (c = 0; c < mm->mm_children; c++)
254 - if (!mm->mm_child[c].mc_tried)
400 + for (c = 0; c < mm->mm_children; c++) {
401 + if (!mm->mm_child[c].mc_tried) {
402 + vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd,
403 + zio);
255 404 return (c);
405 + }
406 + }
256 407
257 408 /*
258 409 * Every child failed. There's no place left to look.
259 410 */
260 411 return (-1);
261 412 }
262 413
263 414 static int
264 415 vdev_mirror_io_start(zio_t *zio)
265 416 {
266 417 mirror_map_t *mm;
267 418 mirror_child_t *mc;
268 419 int c, children;
269 420
270 - mm = vdev_mirror_map_alloc(zio);
421 + mm = vdev_mirror_map_init(zio);
271 422
272 423 if (zio->io_type == ZIO_TYPE_READ) {
273 424 if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) {
274 425 /*
275 426 * For scrubbing reads we need to allocate a read
276 427 * buffer for each child and issue reads to all
277 428 * children. If any child succeeds, it will copy its
278 429 * data into zio->io_data in vdev_mirror_scrub_done.
279 430 */
280 431 for (c = 0; c < mm->mm_children; c++) {
281 432 mc = &mm->mm_child[c];
282 433 zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
283 434 mc->mc_vd, mc->mc_offset,
284 435 zio_buf_alloc(zio->io_size), zio->io_size,
285 436 zio->io_type, zio->io_priority, 0,
286 437 vdev_mirror_scrub_done, mc));
287 438 }
288 439 return (ZIO_PIPELINE_CONTINUE);
289 440 }
290 441 /*
291 442 * For normal reads just pick one child.
292 443 */
293 444 c = vdev_mirror_child_select(zio);
294 445 children = (c >= 0);
295 446 } else {
296 447 ASSERT(zio->io_type == ZIO_TYPE_WRITE);
297 448
298 449 /*
299 450 * Writes go to all children.
300 451 */
301 452 c = 0;
302 453 children = mm->mm_children;
303 454 }
304 455
305 456 while (children--) {
306 457 mc = &mm->mm_child[c];
307 458 zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
308 459 mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size,
309 460 zio->io_type, zio->io_priority, 0,
310 461 vdev_mirror_child_done, mc));
311 462 c++;
312 463 }
313 464
314 465 return (ZIO_PIPELINE_CONTINUE);
315 466 }
316 467
317 468 static int
318 469 vdev_mirror_worst_error(mirror_map_t *mm)
319 470 {
320 471 int error[2] = { 0, 0 };
321 472
322 473 for (int c = 0; c < mm->mm_children; c++) {
323 474 mirror_child_t *mc = &mm->mm_child[c];
324 475 int s = mc->mc_speculative;
325 476 error[s] = zio_worst_error(error[s], mc->mc_error);
326 477 }
327 478
328 479 return (error[0] ? error[0] : error[1]);
329 480 }
330 481
331 482 static void
332 483 vdev_mirror_io_done(zio_t *zio)
333 484 {
334 485 mirror_map_t *mm = zio->io_vsd;
335 486 mirror_child_t *mc;
336 487 int c;
337 488 int good_copies = 0;
338 489 int unexpected_errors = 0;
339 490
340 491 for (c = 0; c < mm->mm_children; c++) {
341 492 mc = &mm->mm_child[c];
342 493
343 494 if (mc->mc_error) {
344 495 if (!mc->mc_skipped)
345 496 unexpected_errors++;
346 497 } else if (mc->mc_tried) {
347 498 good_copies++;
348 499 }
349 500 }
350 501
351 502 if (zio->io_type == ZIO_TYPE_WRITE) {
352 503 /*
353 504 * XXX -- for now, treat partial writes as success.
354 505 *
355 506 * Now that we support write reallocation, it would be better
356 507 * to treat partial failure as real failure unless there are
357 508 * no non-degraded top-level vdevs left, and not update DTLs
358 509 * if we intend to reallocate.
359 510 */
360 511 /* XXPOLICY */
361 512 if (good_copies != mm->mm_children) {
362 513 /*
363 514 * Always require at least one good copy.
364 515 *
365 516 * For ditto blocks (io_vd == NULL), require
366 517 * all copies to be good.
367 518 *
368 519 * XXX -- for replacing vdevs, there's no great answer.
369 520 * If the old device is really dead, we may not even
370 521 * be able to access it -- so we only want to
371 522 * require good writes to the new device. But if
372 523 * the new device turns out to be flaky, we want
373 524 * to be able to detach it -- which requires all
374 525 * writes to the old device to have succeeded.
375 526 */
376 527 if (good_copies == 0 || zio->io_vd == NULL)
377 528 zio->io_error = vdev_mirror_worst_error(mm);
378 529 }
379 530 return;
380 531 }
381 532
382 533 ASSERT(zio->io_type == ZIO_TYPE_READ);
383 534
384 535 /*
385 536 * If we don't have a good copy yet, keep trying other children.
386 537 */
387 538 /* XXPOLICY */
388 539 if (good_copies == 0 && (c = vdev_mirror_child_select(zio)) != -1) {
389 540 ASSERT(c >= 0 && c < mm->mm_children);
390 541 mc = &mm->mm_child[c];
391 542 zio_vdev_io_redone(zio);
392 543 zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
393 544 mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size,
394 545 ZIO_TYPE_READ, zio->io_priority, 0,
395 546 vdev_mirror_child_done, mc));
396 547 return;
397 548 }
398 549
399 550 /* XXPOLICY */
400 551 if (good_copies == 0) {
401 552 zio->io_error = vdev_mirror_worst_error(mm);
402 553 ASSERT(zio->io_error != 0);
403 554 }
404 555
405 556 if (good_copies && spa_writeable(zio->io_spa) &&
406 557 (unexpected_errors ||
407 558 (zio->io_flags & ZIO_FLAG_RESILVER) ||
408 559 ((zio->io_flags & ZIO_FLAG_SCRUB) && mm->mm_replacing))) {
409 560 /*
410 561 * Use the good data we have in hand to repair damaged children.
411 562 */
412 563 for (c = 0; c < mm->mm_children; c++) {
413 564 /*
414 565 * Don't rewrite known good children.
415 566 * Not only is it unnecessary, it could
416 567 * actually be harmful: if the system lost
417 568 * power while rewriting the only good copy,
418 569 * there would be no good copies left!
419 570 */
420 571 mc = &mm->mm_child[c];
421 572
422 573 if (mc->mc_error == 0) {
423 574 if (mc->mc_tried)
424 575 continue;
425 576 if (!(zio->io_flags & ZIO_FLAG_SCRUB) &&
426 577 !vdev_dtl_contains(mc->mc_vd, DTL_PARTIAL,
427 578 zio->io_txg, 1))
428 579 continue;
429 580 mc->mc_error = SET_ERROR(ESTALE);
430 581 }
431 582
432 583 zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
433 584 mc->mc_vd, mc->mc_offset,
434 585 zio->io_data, zio->io_size,
435 586 ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE,
436 587 ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
437 588 ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
438 589 }
439 590 }
440 591 }
441 592
442 593 static void
443 594 vdev_mirror_state_change(vdev_t *vd, int faulted, int degraded)
444 595 {
445 596 if (faulted == vd->vdev_children)
446 597 vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
447 598 VDEV_AUX_NO_REPLICAS);
448 599 else if (degraded + faulted != 0)
449 600 vdev_set_state(vd, B_FALSE, VDEV_STATE_DEGRADED, VDEV_AUX_NONE);
450 601 else
451 602 vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
452 603 }
453 604
454 605 vdev_ops_t vdev_mirror_ops = {
455 606 vdev_mirror_open,
456 607 vdev_mirror_close,
457 608 vdev_default_asize,
458 609 vdev_mirror_io_start,
459 610 vdev_mirror_io_done,
460 611 vdev_mirror_state_change,
461 612 NULL,
462 613 NULL,
463 614 VDEV_TYPE_MIRROR, /* name of this vdev type */
464 615 B_FALSE /* not a leaf vdev */
465 616 };
466 617
467 618 vdev_ops_t vdev_replacing_ops = {
468 619 vdev_mirror_open,
469 620 vdev_mirror_close,
470 621 vdev_default_asize,
471 622 vdev_mirror_io_start,
472 623 vdev_mirror_io_done,
473 624 vdev_mirror_state_change,
474 625 NULL,
475 626 NULL,
476 627 VDEV_TYPE_REPLACING, /* name of this vdev type */
477 628 B_FALSE /* not a leaf vdev */
478 629 };
479 630
480 631 vdev_ops_t vdev_spare_ops = {
481 632 vdev_mirror_open,
482 633 vdev_mirror_close,
483 634 vdev_default_asize,
484 635 vdev_mirror_io_start,
485 636 vdev_mirror_io_done,
486 637 vdev_mirror_state_change,
487 638 NULL,
488 639 NULL,
489 640 VDEV_TYPE_SPARE, /* name of this vdev type */
490 641 B_FALSE /* not a leaf vdev */
491 642 };
↓ open down ↓ |
211 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX