Print this page
LOCAL: listen for ldi notifications of disk offline/degrade
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/fs/zfs/vdev_disk.c
+++ new/usr/src/uts/common/fs/zfs/vdev_disk.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23 23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 24 * Copyright (c) 2012, Joyent, Inc. All rights reserved.
25 + * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
25 26 */
26 27
27 28 #include <sys/zfs_context.h>
28 29 #include <sys/zfs_zone.h>
29 30 #include <sys/spa_impl.h>
30 31 #include <sys/refcount.h>
31 32 #include <sys/vdev_disk.h>
32 33 #include <sys/vdev_impl.h>
33 34 #include <sys/fs/zfs.h>
34 35 #include <sys/zio.h>
35 36 #include <sys/sunldi.h>
36 37 #include <sys/efi_partition.h>
37 38 #include <sys/fm/fs/zfs.h>
38 39
39 40 /*
40 41 * Virtual device vector for disks.
41 42 */
42 43
43 44 extern ldi_ident_t zfs_li;
44 45
46 +static void vdev_disk_close(vdev_t *);
47 +
45 48 typedef struct vdev_disk_buf {
46 49 buf_t vdb_buf;
47 50 zio_t *vdb_io;
48 51 } vdev_disk_buf_t;
49 52
53 +typedef struct vdev_disk_ldi_cb {
54 + list_node_t lcb_next;
55 + ldi_callback_id_t lcb_id;
56 +} vdev_disk_ldi_cb_t;
57 +
58 +static void vdev_disk_alloc(vdev_t *vd)
59 +{
60 + vdev_disk_t *dvd;
61 +
62 + dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
63 + /*
64 + * Create the LDI event callback list.
65 + */
66 + list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t),
67 + offsetof(vdev_disk_ldi_cb_t, lcb_next));
68 +}
69 +
70 +static void vdev_disk_free(vdev_t *vd)
71 +{
72 + vdev_disk_t *dvd = vd->vdev_tsd;
73 + vdev_disk_ldi_cb_t *lcb;
74 +
75 + /*
76 + * We have already closed the LDI handle. Clean up the LDI event
77 + * callbacks and free vd->vdev_tsd.
78 + */
79 + while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) {
80 + list_remove(&dvd->vd_ldi_cbs, lcb);
81 + (void) ldi_ev_remove_callbacks(lcb->lcb_id);
82 + kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t));
83 + }
84 + list_destroy(&dvd->vd_ldi_cbs);
85 + kmem_free(dvd, sizeof (vdev_disk_t));
86 + vd->vdev_tsd = NULL;
87 +}
88 +
89 +/* ARGSUSED */
90 +static int
91 +vdev_disk_off_notify(ldi_handle_t lh, ldi_ev_cookie_t ecookie, void *arg,
92 + void *ev_data)
93 +{
94 + vdev_t *vd = (vdev_t *)arg;
95 + vdev_disk_t *dvd = vd->vdev_tsd;
96 +
97 + /*
98 + * Ignore events other than offline.
99 + */
100 + if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
101 + return (LDI_EV_SUCCESS);
102 +
103 + /*
104 + * All LDI handles must be closed for the state change to succeed, so
105 + * call on vdev_disk_close() to do this.
106 + *
107 + * We inform vdev_disk_close that it is being called from offline
108 + * notify context so it will defer cleanup of LDI event callbacks and
109 + * freeing of vd->vdev_tsd to the offline finalize or a reopen.
110 + */
111 + dvd->vd_ldi_offline = B_TRUE;
112 + vdev_disk_close(vd);
113 +
114 + /*
115 + * Now that the device is closed, request that the spa_async_thread
116 + * mark the device as REMOVED and notify FMA of the removal.
117 + */
118 + zfs_post_remove(vd->vdev_spa, vd);
119 + vd->vdev_remove_wanted = B_TRUE;
120 + spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
121 +
122 + return (LDI_EV_SUCCESS);
123 +}
124 +
125 +/* ARGSUSED */
50 126 static void
127 +vdev_disk_off_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
128 + int ldi_result, void *arg, void *ev_data)
129 +{
130 + vdev_t *vd = (vdev_t *)arg;
131 + vdev_disk_t *dvd = vd->vdev_tsd;
132 + vdev_disk_ldi_cb_t *lcb;
133 +
134 + /*
135 + * Ignore events other than offline.
136 + */
137 + if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
138 + return;
139 +
140 + /*
141 + * We have already closed the LDI handle in notify.
142 + * Clean up the LDI event callbacks and free vd->vdev_tsd.
143 + */
144 + vdev_disk_free(vd);
145 +
146 + /*
147 + * Request that the vdev be reopened if the offline state change was
148 + * unsuccessful.
149 + */
150 + if (ldi_result != LDI_EV_SUCCESS) {
151 + vd->vdev_probe_wanted = B_TRUE;
152 + spa_async_request(vd->vdev_spa, SPA_ASYNC_PROBE);
153 + }
154 +}
155 +
156 +static ldi_ev_callback_t vdev_disk_off_callb = {
157 + .cb_vers = LDI_EV_CB_VERS,
158 + .cb_notify = vdev_disk_off_notify,
159 + .cb_finalize = vdev_disk_off_finalize
160 +};
161 +
162 +/* ARGSUSED */
163 +static void
164 +vdev_disk_dgrd_finalize(ldi_handle_t lh, ldi_ev_cookie_t ecookie,
165 + int ldi_result, void *arg, void *ev_data)
166 +{
167 + vdev_t *vd = (vdev_t *)arg;
168 +
169 + /*
170 + * Ignore events other than degrade.
171 + */
172 + if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_DEGRADE) != 0)
173 + return;
174 +
175 + /*
176 + * Degrade events always succeed. Mark the vdev as degraded.
177 + * This status is purely informative for the user.
178 + */
179 + (void) vdev_degrade(vd->vdev_spa, vd->vdev_guid, 0);
180 +}
181 +
182 +static ldi_ev_callback_t vdev_disk_dgrd_callb = {
183 + .cb_vers = LDI_EV_CB_VERS,
184 + .cb_notify = NULL,
185 + .cb_finalize = vdev_disk_dgrd_finalize
186 +};
187 +
188 +static void
51 189 vdev_disk_hold(vdev_t *vd)
52 190 {
53 191 ddi_devid_t devid;
54 192 char *minor;
55 193
56 194 ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
57 195
58 196 /*
59 197 * We must have a pathname, and it must be absolute.
60 198 */
61 199 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/')
62 200 return;
63 201
64 202 /*
65 203 * Only prefetch path and devid info if the device has
66 204 * never been opened.
67 205 */
68 206 if (vd->vdev_tsd != NULL)
69 207 return;
70 208
71 209 if (vd->vdev_wholedisk == -1ULL) {
72 210 size_t len = strlen(vd->vdev_path) + 3;
73 211 char *buf = kmem_alloc(len, KM_SLEEP);
74 212
75 213 (void) snprintf(buf, len, "%ss0", vd->vdev_path);
76 214
77 215 (void) ldi_vp_from_name(buf, &vd->vdev_name_vp);
78 216 kmem_free(buf, len);
79 217 }
80 218
81 219 if (vd->vdev_name_vp == NULL)
82 220 (void) ldi_vp_from_name(vd->vdev_path, &vd->vdev_name_vp);
83 221
84 222 if (vd->vdev_devid != NULL &&
85 223 ddi_devid_str_decode(vd->vdev_devid, &devid, &minor) == 0) {
86 224 (void) ldi_vp_from_devid(devid, minor, &vd->vdev_devid_vp);
87 225 ddi_devid_str_free(minor);
88 226 ddi_devid_free(devid);
89 227 }
90 228 }
91 229
92 230 static void
93 231 vdev_disk_rele(vdev_t *vd)
94 232 {
95 233 ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_WRITER));
96 234
97 235 if (vd->vdev_name_vp) {
98 236 VN_RELE_ASYNC(vd->vdev_name_vp,
99 237 dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
100 238 vd->vdev_name_vp = NULL;
101 239 }
102 240 if (vd->vdev_devid_vp) {
103 241 VN_RELE_ASYNC(vd->vdev_devid_vp,
104 242 dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
105 243 vd->vdev_devid_vp = NULL;
106 244 }
107 245 }
108 246
109 247 static uint64_t
110 248 vdev_disk_get_space(vdev_t *vd, uint64_t capacity, uint_t blksz)
111 249 {
112 250 ASSERT(vd->vdev_wholedisk);
113 251
114 252 vdev_disk_t *dvd = vd->vdev_tsd;
115 253 dk_efi_t dk_ioc;
116 254 efi_gpt_t *efi;
117 255 uint64_t avail_space = 0;
118 256 int efisize = EFI_LABEL_SIZE * 2;
119 257
120 258 dk_ioc.dki_data = kmem_alloc(efisize, KM_SLEEP);
121 259 dk_ioc.dki_lba = 1;
122 260 dk_ioc.dki_length = efisize;
123 261 dk_ioc.dki_data_64 = (uint64_t)(uintptr_t)dk_ioc.dki_data;
124 262 efi = dk_ioc.dki_data;
125 263
126 264 if (ldi_ioctl(dvd->vd_lh, DKIOCGETEFI, (intptr_t)&dk_ioc,
127 265 FKIOCTL, kcred, NULL) == 0) {
128 266 uint64_t efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA);
129 267
130 268 zfs_dbgmsg("vdev %s, capacity %llu, altern lba %llu",
131 269 vd->vdev_path, capacity, efi_altern_lba);
132 270 if (capacity > efi_altern_lba)
133 271 avail_space = (capacity - efi_altern_lba) * blksz;
↓ open down ↓ |
73 lines elided |
↑ open up ↑ |
134 272 }
135 273 kmem_free(dk_ioc.dki_data, efisize);
136 274 return (avail_space);
137 275 }
138 276
139 277 static int
140 278 vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
141 279 uint64_t *ashift)
142 280 {
143 281 spa_t *spa = vd->vdev_spa;
144 - vdev_disk_t *dvd;
282 + vdev_disk_t *dvd = vd->vdev_tsd;
145 283 struct dk_minfo_ext dkmext;
284 + ldi_ev_cookie_t ecookie;
285 + vdev_disk_ldi_cb_t *lcb;
146 286 int error;
147 287 dev_t dev;
148 288 int otyp;
149 289
150 290 /*
151 291 * We must have a pathname, and it must be absolute.
152 292 */
153 293 if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
154 294 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
155 295 return (EINVAL);
156 296 }
157 297
158 298 /*
159 299 * Reopen the device if it's not currently open. Otherwise,
160 300 * just update the physical size of the device.
161 301 */
162 - if (vd->vdev_tsd != NULL) {
163 - ASSERT(vd->vdev_reopening);
164 - dvd = vd->vdev_tsd;
165 - goto skip_open;
302 + if (dvd != NULL) {
303 + if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) {
304 + /*
305 + * If we are opening a device in its offline notify
306 + * context, the LDI handle was just closed. Clean
307 + * up the LDI event callbacks and free vd->vdev_tsd.
308 + */
309 + vdev_disk_free(vd);
310 + } else {
311 + VERIFY(vd->vdev_reopening);
312 + goto skip_open;
313 + }
166 314 }
167 315
168 - dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
316 + /*
317 + * Create vd->vdev_tsd.
318 + */
319 + vdev_disk_alloc(vd);
320 + dvd = vd->vdev_tsd;
169 321
170 322 /*
171 323 * When opening a disk device, we want to preserve the user's original
172 324 * intent. We always want to open the device by the path the user gave
173 325 * us, even if it is one of multiple paths to the save device. But we
174 326 * also want to be able to survive disks being removed/recabled.
175 327 * Therefore the sequence of opening devices is:
176 328 *
177 329 * 1. Try opening the device by path. For legacy pools without the
178 330 * 'whole_disk' property, attempt to fix the path by appending 's0'.
179 331 *
180 332 * 2. If the devid of the device matches the stored value, return
181 333 * success.
182 334 *
183 335 * 3. Otherwise, the device may have moved. Try opening the device
184 336 * by the devid instead.
185 337 */
186 338 if (vd->vdev_devid != NULL) {
187 339 if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
188 340 &dvd->vd_minor) != 0) {
189 341 vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
190 342 return (EINVAL);
191 343 }
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
192 344 }
193 345
194 346 error = EINVAL; /* presume failure */
195 347
196 348 if (vd->vdev_path != NULL) {
197 349 ddi_devid_t devid;
198 350
199 351 if (vd->vdev_wholedisk == -1ULL) {
200 352 size_t len = strlen(vd->vdev_path) + 3;
201 353 char *buf = kmem_alloc(len, KM_SLEEP);
202 - ldi_handle_t lh;
203 354
204 355 (void) snprintf(buf, len, "%ss0", vd->vdev_path);
205 356
206 - if (ldi_open_by_name(buf, spa_mode(spa), kcred,
207 - &lh, zfs_li) == 0) {
357 + error = ldi_open_by_name(buf, spa_mode(spa), kcred,
358 + &dvd->vd_lh, zfs_li);
359 + if (error == 0) {
208 360 spa_strfree(vd->vdev_path);
209 361 vd->vdev_path = buf;
210 362 vd->vdev_wholedisk = 1ULL;
211 - (void) ldi_close(lh, spa_mode(spa), kcred);
212 363 } else {
213 364 kmem_free(buf, len);
214 365 }
215 366 }
216 367
217 - error = ldi_open_by_name(vd->vdev_path, spa_mode(spa), kcred,
218 - &dvd->vd_lh, zfs_li);
368 + /*
369 + * If we have not yet opened the device, try to open it by the
370 + * specified path.
371 + */
372 + if (error != 0) {
373 + error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
374 + kcred, &dvd->vd_lh, zfs_li);
375 + }
219 376
220 377 /*
221 378 * Compare the devid to the stored value.
222 379 */
223 380 if (error == 0 && vd->vdev_devid != NULL &&
224 381 ldi_get_devid(dvd->vd_lh, &devid) == 0) {
225 382 if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
226 383 error = EINVAL;
227 384 (void) ldi_close(dvd->vd_lh, spa_mode(spa),
228 385 kcred);
229 386 dvd->vd_lh = NULL;
230 387 }
231 388 ddi_devid_free(devid);
232 389 }
233 390
234 391 /*
235 392 * If we succeeded in opening the device, but 'vdev_wholedisk'
236 393 * is not yet set, then this must be a slice.
237 394 */
238 395 if (error == 0 && vd->vdev_wholedisk == -1ULL)
239 396 vd->vdev_wholedisk = 0;
240 397 }
241 398
242 399 /*
243 400 * If we were unable to open by path, or the devid check fails, open by
244 401 * devid instead.
245 402 */
246 403 if (error != 0 && vd->vdev_devid != NULL)
247 404 error = ldi_open_by_devid(dvd->vd_devid, dvd->vd_minor,
248 405 spa_mode(spa), kcred, &dvd->vd_lh, zfs_li);
249 406
250 407 /*
251 408 * If all else fails, then try opening by physical path (if available)
252 409 * or the logical path (if we failed due to the devid check). While not
253 410 * as reliable as the devid, this will give us something, and the higher
254 411 * level vdev validation will prevent us from opening the wrong device.
255 412 */
256 413 if (error) {
257 414 if (vd->vdev_physpath != NULL &&
258 415 (dev = ddi_pathname_to_dev_t(vd->vdev_physpath)) != NODEV)
259 416 error = ldi_open_by_dev(&dev, OTYP_BLK, spa_mode(spa),
260 417 kcred, &dvd->vd_lh, zfs_li);
261 418
262 419 /*
263 420 * Note that we don't support the legacy auto-wholedisk support
264 421 * as above. This hasn't been used in a very long time and we
265 422 * don't need to propagate its oddities to this edge condition.
266 423 */
267 424 if (error && vd->vdev_path != NULL)
268 425 error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
269 426 kcred, &dvd->vd_lh, zfs_li);
270 427 }
271 428
272 429 if (error) {
273 430 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
274 431 return (error);
275 432 }
276 433
277 434 /*
278 435 * Once a device is opened, verify that the physical device path (if
279 436 * available) is up to date.
280 437 */
281 438 if (ldi_get_dev(dvd->vd_lh, &dev) == 0 &&
282 439 ldi_get_otyp(dvd->vd_lh, &otyp) == 0) {
283 440 char *physpath, *minorname;
284 441
285 442 physpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
286 443 minorname = NULL;
287 444 if (ddi_dev_pathname(dev, otyp, physpath) == 0 &&
288 445 ldi_get_minor_name(dvd->vd_lh, &minorname) == 0 &&
289 446 (vd->vdev_physpath == NULL ||
290 447 strcmp(vd->vdev_physpath, physpath) != 0)) {
291 448 if (vd->vdev_physpath)
↓ open down ↓ |
63 lines elided |
↑ open up ↑ |
292 449 spa_strfree(vd->vdev_physpath);
293 450 (void) strlcat(physpath, ":", MAXPATHLEN);
294 451 (void) strlcat(physpath, minorname, MAXPATHLEN);
295 452 vd->vdev_physpath = spa_strdup(physpath);
296 453 }
297 454 if (minorname)
298 455 kmem_free(minorname, strlen(minorname) + 1);
299 456 kmem_free(physpath, MAXPATHLEN);
300 457 }
301 458
459 + /*
460 + * Register callbacks for the LDI offline event.
461 + */
462 + if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_OFFLINE, &ecookie) ==
463 + LDI_EV_SUCCESS) {
464 + lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
465 + list_insert_tail(&dvd->vd_ldi_cbs, lcb);
466 + (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
467 + &vdev_disk_off_callb, (void *) vd, &lcb->lcb_id);
468 + }
469 +
470 + /*
471 + * Register callbacks for the LDI degrade event.
472 + */
473 + if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) ==
474 + LDI_EV_SUCCESS) {
475 + lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
476 + list_insert_tail(&dvd->vd_ldi_cbs, lcb);
477 + (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
478 + &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id);
479 + }
302 480 skip_open:
303 481 /*
304 482 * Determine the actual size of the device.
305 483 */
306 484 if (ldi_get_size(dvd->vd_lh, psize) != 0) {
307 485 vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
308 486 return (EINVAL);
309 487 }
310 488
311 489 /*
312 490 * Determine the device's minimum transfer size.
313 491 * If the ioctl isn't supported, assume DEV_BSIZE.
314 492 */
315 493 if (ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT, (intptr_t)&dkmext,
316 494 FKIOCTL, kcred, NULL) != 0)
317 495 dkmext.dki_pbsize = DEV_BSIZE;
318 496
319 497 *ashift = highbit(MAX(dkmext.dki_pbsize, SPA_MINBLOCKSIZE)) - 1;
320 498
321 499 if (vd->vdev_wholedisk == 1) {
322 500 uint64_t capacity = dkmext.dki_capacity - 1;
323 501 uint64_t blksz = dkmext.dki_lbsize;
324 502 int wce = 1;
325 503
326 504 /*
327 505 * If we own the whole disk, try to enable disk write caching.
328 506 * We ignore errors because it's OK if we can't do it.
329 507 */
330 508 (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
331 509 FKIOCTL, kcred, NULL);
332 510
333 511 *max_psize = *psize + vdev_disk_get_space(vd, capacity, blksz);
334 512 zfs_dbgmsg("capacity change: vdev %s, psize %llu, "
335 513 "max_psize %llu", vd->vdev_path, *psize, *max_psize);
336 514 } else {
337 515 *max_psize = *psize;
338 516 }
339 517
340 518 /*
341 519 * Clear the nowritecache bit, so that on a vdev_reopen() we will
342 520 * try again.
↓ open down ↓ |
31 lines elided |
↑ open up ↑ |
343 521 */
344 522 vd->vdev_nowritecache = B_FALSE;
345 523
346 524 return (0);
347 525 }
348 526
349 527 static void
350 528 vdev_disk_close(vdev_t *vd)
351 529 {
352 530 vdev_disk_t *dvd = vd->vdev_tsd;
531 + vdev_disk_ldi_cb_t *lcb;
353 532
354 533 if (vd->vdev_reopening || dvd == NULL)
355 534 return;
356 535
357 - if (dvd->vd_minor != NULL)
536 + if (dvd->vd_minor != NULL) {
358 537 ddi_devid_str_free(dvd->vd_minor);
538 + dvd->vd_minor = NULL;
539 + }
359 540
360 - if (dvd->vd_devid != NULL)
541 + if (dvd->vd_devid != NULL) {
361 542 ddi_devid_free(dvd->vd_devid);
543 + dvd->vd_devid = NULL;
544 + }
362 545
363 - if (dvd->vd_lh != NULL)
546 + if (dvd->vd_lh != NULL) {
364 547 (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
548 + dvd->vd_lh = NULL;
549 + }
365 550
366 551 vd->vdev_delayed_close = B_FALSE;
367 - kmem_free(dvd, sizeof (vdev_disk_t));
368 - vd->vdev_tsd = NULL;
552 + /*
553 + * If we closed the LDI handle due to an offline notify from LDI,
554 + * don't free vd->vdev_tsd or unregister the callbacks here;
555 + * the offline finalize callback or a reopen will take care of it.
556 + */
557 + if (dvd->vd_ldi_offline)
558 + return;
559 +
560 + vdev_disk_free(vd);
369 561 }
370 562
371 563 int
372 564 vdev_disk_physio(vdev_t *vd, caddr_t data,
373 565 size_t size, uint64_t offset, int flags)
374 566 {
375 567 vdev_disk_t *dvd = vd->vdev_tsd;
376 568
377 569 /*
378 570 * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
379 571 * Nothing to be done here but return failure.
380 572 */
381 - if (dvd == NULL)
573 + if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL))
382 574 return (EIO);
383 575
384 576 ASSERT(vd->vdev_ops == &vdev_disk_ops);
385 577 return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags));
386 578 }
387 579
388 580 int
389 581 vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
390 582 size_t size, uint64_t offset, int flags)
391 583 {
392 584 buf_t *bp;
393 585 int error = 0;
394 586
395 587 if (vd_lh == NULL)
396 588 return (EINVAL);
397 589
398 590 ASSERT(flags & B_READ || flags & B_WRITE);
399 591
400 592 bp = getrbuf(KM_SLEEP);
401 593 bp->b_flags = flags | B_BUSY | B_NOCACHE | B_FAILFAST;
402 594 bp->b_bcount = size;
403 595 bp->b_un.b_addr = (void *)data;
404 596 bp->b_lblkno = lbtodb(offset);
405 597 bp->b_bufsize = size;
406 598
407 599 error = ldi_strategy(vd_lh, bp);
408 600 ASSERT(error == 0);
409 601 if ((error = biowait(bp)) == 0 && bp->b_resid != 0)
410 602 error = EIO;
411 603 freerbuf(bp);
412 604
413 605 return (error);
414 606 }
415 607
416 608 static void
417 609 vdev_disk_io_intr(buf_t *bp)
418 610 {
419 611 vdev_disk_buf_t *vdb = (vdev_disk_buf_t *)bp;
420 612 zio_t *zio = vdb->vdb_io;
421 613
422 614 /*
423 615 * The rest of the zio stack only deals with EIO, ECKSUM, and ENXIO.
424 616 * Rather than teach the rest of the stack about other error
425 617 * possibilities (EFAULT, etc), we normalize the error value here.
426 618 */
427 619 zio->io_error = (geterror(bp) != 0 ? EIO : 0);
428 620
429 621 if (zio->io_error == 0 && bp->b_resid != 0)
430 622 zio->io_error = EIO;
431 623
432 624 kmem_free(vdb, sizeof (vdev_disk_buf_t));
433 625
434 626 zio_interrupt(zio);
435 627 }
436 628
437 629 static void
438 630 vdev_disk_ioctl_free(zio_t *zio)
439 631 {
440 632 kmem_free(zio->io_vsd, sizeof (struct dk_callback));
441 633 }
442 634
443 635 static const zio_vsd_ops_t vdev_disk_vsd_ops = {
444 636 vdev_disk_ioctl_free,
445 637 zio_vsd_default_cksum_report
446 638 };
447 639
448 640 static void
449 641 vdev_disk_ioctl_done(void *zio_arg, int error)
450 642 {
451 643 zio_t *zio = zio_arg;
452 644
453 645 zio->io_error = error;
454 646
455 647 zio_interrupt(zio);
456 648 }
457 649
↓ open down ↓ |
66 lines elided |
↑ open up ↑ |
458 650 static int
459 651 vdev_disk_io_start(zio_t *zio)
460 652 {
461 653 vdev_t *vd = zio->io_vd;
462 654 vdev_disk_t *dvd = vd->vdev_tsd;
463 655 vdev_disk_buf_t *vdb;
464 656 struct dk_callback *dkc;
465 657 buf_t *bp;
466 658 int error;
467 659
660 + /*
661 + * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
662 + * Nothing to be done here but return failure.
663 + */
664 + if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) {
665 + zio->io_error = ENXIO;
666 + return (ZIO_PIPELINE_CONTINUE);
667 + }
668 +
468 669 if (zio->io_type == ZIO_TYPE_IOCTL) {
469 670 /* XXPOLICY */
470 671 if (!vdev_readable(vd)) {
471 672 zio->io_error = ENXIO;
472 673 return (ZIO_PIPELINE_CONTINUE);
473 674 }
474 675
475 676 switch (zio->io_cmd) {
476 677
477 678 case DKIOCFLUSHWRITECACHE:
478 679
479 680 if (zfs_nocacheflush)
480 681 break;
481 682
482 683 if (vd->vdev_nowritecache) {
483 684 zio->io_error = ENOTSUP;
484 685 break;
485 686 }
486 687
487 688 zio->io_vsd = dkc = kmem_alloc(sizeof (*dkc), KM_SLEEP);
488 689 zio->io_vsd_ops = &vdev_disk_vsd_ops;
489 690
490 691 dkc->dkc_callback = vdev_disk_ioctl_done;
491 692 dkc->dkc_flag = FLUSH_VOLATILE;
492 693 dkc->dkc_cookie = zio;
493 694
494 695 error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
495 696 (uintptr_t)dkc, FKIOCTL, kcred, NULL);
496 697
497 698 if (error == 0) {
498 699 /*
499 700 * The ioctl will be done asychronously,
500 701 * and will call vdev_disk_ioctl_done()
501 702 * upon completion.
502 703 */
503 704 return (ZIO_PIPELINE_STOP);
504 705 }
505 706
506 707 if (error == ENOTSUP || error == ENOTTY) {
507 708 /*
508 709 * If we get ENOTSUP or ENOTTY, we know that
509 710 * no future attempts will ever succeed.
510 711 * In this case we set a persistent bit so
511 712 * that we don't bother with the ioctl in the
512 713 * future.
513 714 */
514 715 vd->vdev_nowritecache = B_TRUE;
515 716 }
516 717 zio->io_error = error;
517 718
518 719 break;
519 720
520 721 default:
521 722 zio->io_error = ENOTSUP;
522 723 }
523 724
524 725 return (ZIO_PIPELINE_CONTINUE);
525 726 }
526 727
527 728 vdb = kmem_alloc(sizeof (vdev_disk_buf_t), KM_SLEEP);
528 729
529 730 vdb->vdb_io = zio;
530 731 bp = &vdb->vdb_buf;
531 732
532 733 bioinit(bp);
533 734 bp->b_flags = B_BUSY | B_NOCACHE |
534 735 (zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
535 736 if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
536 737 bp->b_flags |= B_FAILFAST;
537 738 bp->b_bcount = zio->io_size;
538 739 bp->b_un.b_addr = zio->io_data;
539 740 bp->b_lblkno = lbtodb(zio->io_offset);
540 741 bp->b_bufsize = zio->io_size;
541 742 bp->b_iodone = (int (*)())vdev_disk_io_intr;
542 743
543 744 zfs_zone_zio_start(zio);
544 745
545 746 /* ldi_strategy() will return non-zero only on programming errors */
546 747 VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0);
547 748
548 749 return (ZIO_PIPELINE_STOP);
549 750 }
550 751
551 752 static void
552 753 vdev_disk_io_done(zio_t *zio)
553 754 {
554 755 vdev_t *vd = zio->io_vd;
555 756
556 757 zfs_zone_zio_done(zio);
557 758
558 759 /*
559 760 * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
560 761 * the device has been removed. If this is the case, then we trigger an
561 762 * asynchronous removal of the device. Otherwise, probe the device and
562 763 * make sure it's still accessible.
563 764 */
564 765 if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
565 766 vdev_disk_t *dvd = vd->vdev_tsd;
566 767 int state = DKIO_NONE;
567 768
568 769 if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state,
569 770 FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) {
570 771 /*
571 772 * We post the resource as soon as possible, instead of
572 773 * when the async removal actually happens, because the
573 774 * DE is using this information to discard previous I/O
574 775 * errors.
575 776 */
576 777 zfs_post_remove(zio->io_spa, vd);
577 778 vd->vdev_remove_wanted = B_TRUE;
578 779 spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
579 780 } else if (!vd->vdev_delayed_close) {
580 781 vd->vdev_delayed_close = B_TRUE;
581 782 }
582 783 }
583 784 }
584 785
585 786 vdev_ops_t vdev_disk_ops = {
586 787 vdev_disk_open,
587 788 vdev_disk_close,
588 789 vdev_default_asize,
589 790 vdev_disk_io_start,
590 791 vdev_disk_io_done,
591 792 NULL,
592 793 vdev_disk_hold,
593 794 vdev_disk_rele,
594 795 VDEV_TYPE_DISK, /* name of this vdev type */
595 796 B_TRUE /* leaf vdev */
596 797 };
597 798
598 799 /*
599 800 * Given the root disk device devid or pathname, read the label from
600 801 * the device, and construct a configuration nvlist.
601 802 */
602 803 int
603 804 vdev_disk_read_rootlabel(char *devpath, char *devid, nvlist_t **config)
604 805 {
605 806 ldi_handle_t vd_lh;
606 807 vdev_label_t *label;
607 808 uint64_t s, size;
608 809 int l;
609 810 ddi_devid_t tmpdevid;
610 811 int error = -1;
611 812 char *minor_name;
612 813
613 814 /*
614 815 * Read the device label and build the nvlist.
615 816 */
616 817 if (devid != NULL && ddi_devid_str_decode(devid, &tmpdevid,
617 818 &minor_name) == 0) {
618 819 error = ldi_open_by_devid(tmpdevid, minor_name,
619 820 FREAD, kcred, &vd_lh, zfs_li);
620 821 ddi_devid_free(tmpdevid);
621 822 ddi_devid_str_free(minor_name);
622 823 }
623 824
624 825 if (error && (error = ldi_open_by_name(devpath, FREAD, kcred, &vd_lh,
625 826 zfs_li)))
626 827 return (error);
627 828
628 829 if (ldi_get_size(vd_lh, &s)) {
629 830 (void) ldi_close(vd_lh, FREAD, kcred);
630 831 return (EIO);
631 832 }
632 833
633 834 size = P2ALIGN_TYPED(s, sizeof (vdev_label_t), uint64_t);
634 835 label = kmem_alloc(sizeof (vdev_label_t), KM_SLEEP);
635 836
636 837 *config = NULL;
637 838 for (l = 0; l < VDEV_LABELS; l++) {
638 839 uint64_t offset, state, txg = 0;
639 840
640 841 /* read vdev label */
641 842 offset = vdev_label_offset(size, l, 0);
642 843 if (vdev_disk_ldi_physio(vd_lh, (caddr_t)label,
643 844 VDEV_SKIP_SIZE + VDEV_PHYS_SIZE, offset, B_READ) != 0)
644 845 continue;
645 846
646 847 if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
647 848 sizeof (label->vl_vdev_phys.vp_nvlist), config, 0) != 0) {
648 849 *config = NULL;
649 850 continue;
650 851 }
651 852
652 853 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_STATE,
653 854 &state) != 0 || state >= POOL_STATE_DESTROYED) {
654 855 nvlist_free(*config);
655 856 *config = NULL;
656 857 continue;
657 858 }
658 859
659 860 if (nvlist_lookup_uint64(*config, ZPOOL_CONFIG_POOL_TXG,
660 861 &txg) != 0 || txg == 0) {
661 862 nvlist_free(*config);
662 863 *config = NULL;
663 864 continue;
664 865 }
665 866
666 867 break;
667 868 }
668 869
669 870 kmem_free(label, sizeof (vdev_label_t));
670 871 (void) ldi_close(vd_lh, FREAD, kcred);
671 872 if (*config == NULL)
672 873 error = EIDRM;
673 874
674 875 return (error);
675 876 }
↓ open down ↓ |
198 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX