Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/xen/io/xdf.c
+++ new/usr/src/uts/common/xen/io/xdf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * xdf.c - Xen Virtual Block Device Driver
29 29 * TODO:
30 30 * - support alternate block size (currently only DEV_BSIZE supported)
31 31 * - revalidate geometry for removable devices
32 32 *
33 33 * This driver export solaris disk device nodes, accepts IO requests from
34 34 * those nodes, and services those requests by talking to a backend device
35 35 * in another domain.
36 36 *
37 37 * Communication with the backend device is done via a ringbuffer (which is
38 38 * managed via xvdi interfaces) and dma memory (which is managed via ddi
39 39 * interfaces).
40 40 *
41 41 * Communication with the backend device is dependant upon establishing a
42 42 * connection to the backend device. This connection process involves
43 43 * reading device configuration information from xenbus and publishing
44 44 * some frontend runtime configuration parameters via the xenbus (for
45 45 * consumption by the backend). Once we've published runtime configuration
46 46 * information via the xenbus, the backend device can enter the connected
47 47 * state and we'll enter the XD_CONNECTED state. But before we can allow
48 48 * random IO to begin, we need to do IO to the backend device to determine
49 49 * the device label and if flush operations are supported. Once this is
50 50 * done we enter the XD_READY state and can process any IO operations.
51 51 *
52 52 * We recieve notifications of xenbus state changes for the backend device
53 53 * (aka, the "other end") via the xdf_oe_change() callback. This callback
54 54 * is single threaded, meaning that we can't recieve new notification of
55 55 * other end state changes while we're processing an outstanding
56 56 * notification of an other end state change. There for we can't do any
57 57 * blocking operations from the xdf_oe_change() callback. This is why we
58 58 * have a seperate taskq (xdf_ready_tq) which exists to do the necessary
59 59 * IO to get us from the XD_CONNECTED to the XD_READY state. All IO
60 60 * generated by the xdf_ready_tq thread (xdf_ready_tq_thread) will go
61 61 * throught xdf_lb_rdwr(), which is a synchronous IO interface. IOs
62 62 * generated by the xdf_ready_tq_thread thread have priority over all
63 63 * other IO requests.
64 64 *
65 65 * We also communicate with the backend device via the xenbus "media-req"
66 66 * (XBP_MEDIA_REQ) property. For more information on this see the
67 67 * comments in blkif.h.
68 68 */
69 69
70 70 #include <io/xdf.h>
71 71
72 72 #include <sys/conf.h>
73 73 #include <sys/dkio.h>
74 74 #include <sys/promif.h>
75 75 #include <sys/sysmacros.h>
76 76 #include <sys/kstat.h>
77 77 #include <sys/mach_mmu.h>
78 78 #ifdef XPV_HVM_DRIVER
79 79 #include <sys/xpv_support.h>
80 80 #include <sys/sunndi.h>
81 81 #else /* !XPV_HVM_DRIVER */
82 82 #include <sys/evtchn_impl.h>
83 83 #endif /* !XPV_HVM_DRIVER */
84 84 #include <public/io/xenbus.h>
85 85 #include <xen/sys/xenbus_impl.h>
86 86 #include <sys/scsi/generic/inquiry.h>
87 87 #include <xen/io/blkif_impl.h>
88 88 #include <sys/fdio.h>
89 89 #include <sys/cdio.h>
90 90
91 91 /*
92 92 * DEBUG_EVAL can be used to include debug only statements without
93 93 * having to use '#ifdef DEBUG' statements
94 94 */
95 95 #ifdef DEBUG
96 96 #define DEBUG_EVAL(x) (x)
97 97 #else /* !DEBUG */
98 98 #define DEBUG_EVAL(x)
99 99 #endif /* !DEBUG */
100 100
101 101 #define XDF_DRAIN_MSEC_DELAY (50*1000) /* 00.05 sec */
102 102 #define XDF_DRAIN_RETRY_COUNT 200 /* 10.00 sec */
103 103
104 104 #define INVALID_DOMID ((domid_t)-1)
105 105 #define FLUSH_DISKCACHE 0x1
106 106 #define WRITE_BARRIER 0x2
107 107 #define DEFAULT_FLUSH_BLOCK 156 /* block to write to cause a cache flush */
108 108 #define USE_WRITE_BARRIER(vdp) \
109 109 ((vdp)->xdf_feature_barrier && !(vdp)->xdf_flush_supported)
110 110 #define USE_FLUSH_DISKCACHE(vdp) \
111 111 ((vdp)->xdf_feature_barrier && (vdp)->xdf_flush_supported)
112 112 #define IS_WRITE_BARRIER(vdp, bp) \
113 113 (!IS_READ(bp) && USE_WRITE_BARRIER(vdp) && \
114 114 ((bp)->b_un.b_addr == (vdp)->xdf_cache_flush_block))
115 115 #define IS_FLUSH_DISKCACHE(bp) \
116 116 (!IS_READ(bp) && USE_FLUSH_DISKCACHE(vdp) && ((bp)->b_bcount == 0))
117 117
118 118 #define VREQ_DONE(vreq) \
119 119 VOID2BOOLEAN(((vreq)->v_status == VREQ_DMAWIN_DONE) && \
120 120 (((vreq)->v_flush_diskcache == FLUSH_DISKCACHE) || \
121 121 (((vreq)->v_dmaw + 1) == (vreq)->v_ndmaws)))
122 122
123 123 #define BP_VREQ(bp) ((v_req_t *)((bp)->av_back))
124 124 #define BP_VREQ_SET(bp, vreq) (((bp)->av_back = (buf_t *)(vreq)))
125 125
126 126 extern int do_polled_io;
127 127
128 128 /* run-time tunables that we don't want the compiler to optimize away */
129 129 volatile int xdf_debug = 0;
130 130 volatile boolean_t xdf_barrier_flush_disable = B_FALSE;
131 131
132 132 /* per module globals */
133 133 major_t xdf_major;
134 134 static void *xdf_ssp;
135 135 static kmem_cache_t *xdf_vreq_cache;
136 136 static kmem_cache_t *xdf_gs_cache;
137 137 static int xdf_maxphys = XB_MAXPHYS;
138 138 static diskaddr_t xdf_flush_block = DEFAULT_FLUSH_BLOCK;
139 139 static int xdf_fbrewrites; /* flush block re-write count */
140 140
141 141 /* misc public functions (used by xdf_shell.c) */
142 142 int xdf_lb_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t, void *);
143 143 int xdf_lb_getinfo(dev_info_t *, int, void *, void *);
144 144
145 145 /* misc private functions */
146 146 static void xdf_io_start(xdf_t *);
147 147
148 148 /* callbacks from commmon label */
149 149 static cmlb_tg_ops_t xdf_lb_ops = {
150 150 TG_DK_OPS_VERSION_1,
151 151 xdf_lb_rdwr,
152 152 xdf_lb_getinfo
153 153 };
154 154
155 155 /*
156 156 * I/O buffer DMA attributes
157 157 * Make sure: one DMA window contains BLKIF_MAX_SEGMENTS_PER_REQUEST at most
158 158 */
159 159 static ddi_dma_attr_t xb_dma_attr = {
160 160 DMA_ATTR_V0,
161 161 (uint64_t)0, /* lowest address */
162 162 (uint64_t)0xffffffffffffffff, /* highest usable address */
163 163 (uint64_t)0xffffff, /* DMA counter limit max */
164 164 (uint64_t)XB_BSIZE, /* alignment in bytes */
165 165 XB_BSIZE - 1, /* bitmap of burst sizes */
166 166 XB_BSIZE, /* min transfer */
167 167 (uint64_t)XB_MAX_XFER, /* maximum transfer */
168 168 (uint64_t)PAGEOFFSET, /* 1 page segment length */
169 169 BLKIF_MAX_SEGMENTS_PER_REQUEST, /* maximum number of segments */
170 170 XB_BSIZE, /* granularity */
171 171 0, /* flags (reserved) */
172 172 };
173 173
174 174 static ddi_device_acc_attr_t xc_acc_attr = {
175 175 DDI_DEVICE_ATTR_V0,
176 176 DDI_NEVERSWAP_ACC,
177 177 DDI_STRICTORDER_ACC
178 178 };
179 179
180 180 static void
181 181 xdf_timeout_handler(void *arg)
182 182 {
183 183 xdf_t *vdp = arg;
184 184
185 185 mutex_enter(&vdp->xdf_dev_lk);
186 186 vdp->xdf_timeout_id = 0;
187 187 mutex_exit(&vdp->xdf_dev_lk);
188 188
189 189 /* new timeout thread could be re-scheduled */
190 190 xdf_io_start(vdp);
191 191 }
192 192
193 193 /*
194 194 * callback func when DMA/GTE resources is available
195 195 *
196 196 * Note: we only register one callback function to grant table subsystem
197 197 * since we only have one 'struct gnttab_free_callback' in xdf_t.
198 198 */
199 199 static int
200 200 xdf_dmacallback(caddr_t arg)
201 201 {
202 202 xdf_t *vdp = (xdf_t *)arg;
203 203 ASSERT(vdp != NULL);
204 204
205 205 DPRINTF(DMA_DBG, ("xdf@%s: DMA callback started\n",
206 206 vdp->xdf_addr));
207 207
208 208 ddi_trigger_softintr(vdp->xdf_softintr_id);
209 209 return (DDI_DMA_CALLBACK_DONE);
210 210 }
211 211
212 212 static ge_slot_t *
213 213 gs_get(xdf_t *vdp, int isread)
214 214 {
215 215 grant_ref_t gh;
216 216 ge_slot_t *gs;
217 217
218 218 /* try to alloc GTEs needed in this slot, first */
219 219 if (gnttab_alloc_grant_references(
220 220 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gh) == -1) {
221 221 if (vdp->xdf_gnt_callback.next == NULL) {
222 222 SETDMACBON(vdp);
223 223 gnttab_request_free_callback(
224 224 &vdp->xdf_gnt_callback,
225 225 (void (*)(void *))xdf_dmacallback,
226 226 (void *)vdp,
227 227 BLKIF_MAX_SEGMENTS_PER_REQUEST);
228 228 }
229 229 return (NULL);
230 230 }
231 231
232 232 gs = kmem_cache_alloc(xdf_gs_cache, KM_NOSLEEP);
233 233 if (gs == NULL) {
234 234 gnttab_free_grant_references(gh);
235 235 if (vdp->xdf_timeout_id == 0)
236 236 /* restart I/O after one second */
237 237 vdp->xdf_timeout_id =
238 238 timeout(xdf_timeout_handler, vdp, hz);
239 239 return (NULL);
240 240 }
241 241
242 242 /* init gs_slot */
243 243 gs->gs_oeid = vdp->xdf_peer;
244 244 gs->gs_isread = isread;
245 245 gs->gs_ghead = gh;
246 246 gs->gs_ngrefs = 0;
247 247
248 248 return (gs);
249 249 }
250 250
251 251 static void
252 252 gs_free(ge_slot_t *gs)
253 253 {
254 254 int i;
255 255
256 256 /* release all grant table entry resources used in this slot */
257 257 for (i = 0; i < gs->gs_ngrefs; i++)
258 258 gnttab_end_foreign_access(gs->gs_ge[i], !gs->gs_isread, 0);
259 259 gnttab_free_grant_references(gs->gs_ghead);
260 260 list_remove(&gs->gs_vreq->v_gs, gs);
261 261 kmem_cache_free(xdf_gs_cache, gs);
262 262 }
263 263
264 264 static grant_ref_t
265 265 gs_grant(ge_slot_t *gs, mfn_t mfn)
266 266 {
267 267 grant_ref_t gr = gnttab_claim_grant_reference(&gs->gs_ghead);
268 268
269 269 ASSERT(gr != -1);
270 270 ASSERT(gs->gs_ngrefs < BLKIF_MAX_SEGMENTS_PER_REQUEST);
271 271 gs->gs_ge[gs->gs_ngrefs++] = gr;
272 272 gnttab_grant_foreign_access_ref(gr, gs->gs_oeid, mfn, !gs->gs_isread);
273 273
274 274 return (gr);
275 275 }
276 276
277 277 /*
278 278 * Alloc a vreq for this bp
279 279 * bp->av_back contains the pointer to the vreq upon return
280 280 */
281 281 static v_req_t *
282 282 vreq_get(xdf_t *vdp, buf_t *bp)
283 283 {
284 284 v_req_t *vreq = NULL;
285 285
286 286 ASSERT(BP_VREQ(bp) == NULL);
287 287
288 288 vreq = kmem_cache_alloc(xdf_vreq_cache, KM_NOSLEEP);
289 289 if (vreq == NULL) {
290 290 if (vdp->xdf_timeout_id == 0)
291 291 /* restart I/O after one second */
292 292 vdp->xdf_timeout_id =
293 293 timeout(xdf_timeout_handler, vdp, hz);
294 294 return (NULL);
295 295 }
296 296 bzero(vreq, sizeof (v_req_t));
297 297 list_create(&vreq->v_gs, sizeof (ge_slot_t),
298 298 offsetof(ge_slot_t, gs_vreq_link));
299 299 vreq->v_buf = bp;
300 300 vreq->v_status = VREQ_INIT;
301 301 vreq->v_runq = B_FALSE;
302 302 BP_VREQ_SET(bp, vreq);
303 303 /* init of other fields in vreq is up to the caller */
304 304
305 305 list_insert_head(&vdp->xdf_vreq_act, (void *)vreq);
306 306
307 307 return (vreq);
308 308 }
309 309
310 310 static void
311 311 vreq_free(xdf_t *vdp, v_req_t *vreq)
312 312 {
313 313 buf_t *bp = vreq->v_buf;
314 314
315 315 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
316 316 ASSERT(BP_VREQ(bp) == vreq);
317 317
318 318 list_remove(&vdp->xdf_vreq_act, vreq);
319 319
320 320 if (vreq->v_flush_diskcache == FLUSH_DISKCACHE)
321 321 goto done;
322 322
323 323 switch (vreq->v_status) {
324 324 case VREQ_DMAWIN_DONE:
325 325 case VREQ_GS_ALLOCED:
326 326 case VREQ_DMABUF_BOUND:
327 327 (void) ddi_dma_unbind_handle(vreq->v_dmahdl);
328 328 /*FALLTHRU*/
329 329 case VREQ_DMAMEM_ALLOCED:
330 330 if (!ALIGNED_XFER(bp)) {
331 331 ASSERT(vreq->v_abuf != NULL);
332 332 if (!IS_ERROR(bp) && IS_READ(bp))
333 333 bcopy(vreq->v_abuf, bp->b_un.b_addr,
334 334 bp->b_bcount);
335 335 ddi_dma_mem_free(&vreq->v_align);
336 336 }
337 337 /*FALLTHRU*/
338 338 case VREQ_MEMDMAHDL_ALLOCED:
339 339 if (!ALIGNED_XFER(bp))
340 340 ddi_dma_free_handle(&vreq->v_memdmahdl);
341 341 /*FALLTHRU*/
342 342 case VREQ_DMAHDL_ALLOCED:
343 343 ddi_dma_free_handle(&vreq->v_dmahdl);
344 344 break;
345 345 default:
346 346 break;
347 347 }
348 348 done:
349 349 ASSERT(!vreq->v_runq);
350 350 list_destroy(&vreq->v_gs);
351 351 kmem_cache_free(xdf_vreq_cache, vreq);
352 352 }
353 353
354 354 /*
355 355 * Snarf new data if our flush block was re-written
356 356 */
357 357 static void
358 358 check_fbwrite(xdf_t *vdp, buf_t *bp, daddr_t blkno)
359 359 {
360 360 int nblks;
361 361 boolean_t mapin;
362 362
363 363 if (IS_WRITE_BARRIER(vdp, bp))
364 364 return; /* write was a flush write */
365 365
366 366 mapin = B_FALSE;
367 367 nblks = bp->b_bcount >> DEV_BSHIFT;
368 368 if (xdf_flush_block >= blkno && xdf_flush_block < (blkno + nblks)) {
369 369 xdf_fbrewrites++;
370 370 if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
371 371 mapin = B_TRUE;
372 372 bp_mapin(bp);
373 373 }
374 374 bcopy(bp->b_un.b_addr +
375 375 ((xdf_flush_block - blkno) << DEV_BSHIFT),
376 376 vdp->xdf_cache_flush_block, DEV_BSIZE);
377 377 if (mapin)
378 378 bp_mapout(bp);
379 379 }
380 380 }
381 381
382 382 /*
383 383 * Initalize the DMA and grant table resources for the buf
384 384 */
385 385 static int
386 386 vreq_setup(xdf_t *vdp, v_req_t *vreq)
387 387 {
388 388 int rc;
389 389 ddi_dma_attr_t dmaattr;
390 390 uint_t ndcs, ndws;
391 391 ddi_dma_handle_t dh;
392 392 ddi_dma_handle_t mdh;
393 393 ddi_dma_cookie_t dc;
394 394 ddi_acc_handle_t abh;
395 395 caddr_t aba;
396 396 ge_slot_t *gs;
397 397 size_t bufsz;
398 398 off_t off;
399 399 size_t sz;
400 400 buf_t *bp = vreq->v_buf;
401 401 int dma_flags = (IS_READ(bp) ? DDI_DMA_READ : DDI_DMA_WRITE) |
402 402 DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
403 403
404 404 switch (vreq->v_status) {
405 405 case VREQ_INIT:
406 406 if (IS_FLUSH_DISKCACHE(bp)) {
407 407 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
408 408 DPRINTF(DMA_DBG, ("xdf@%s: "
409 409 "get ge_slotfailed\n", vdp->xdf_addr));
410 410 return (DDI_FAILURE);
411 411 }
412 412 vreq->v_blkno = 0;
413 413 vreq->v_nslots = 1;
414 414 vreq->v_flush_diskcache = FLUSH_DISKCACHE;
415 415 vreq->v_status = VREQ_GS_ALLOCED;
416 416 gs->gs_vreq = vreq;
417 417 list_insert_head(&vreq->v_gs, gs);
418 418 return (DDI_SUCCESS);
419 419 }
420 420
421 421 if (IS_WRITE_BARRIER(vdp, bp))
422 422 vreq->v_flush_diskcache = WRITE_BARRIER;
423 423 vreq->v_blkno = bp->b_blkno +
424 424 (diskaddr_t)(uintptr_t)bp->b_private;
425 425 /* See if we wrote new data to our flush block */
426 426 if (!IS_READ(bp) && USE_WRITE_BARRIER(vdp))
427 427 check_fbwrite(vdp, bp, vreq->v_blkno);
428 428 vreq->v_status = VREQ_INIT_DONE;
429 429 /*FALLTHRU*/
430 430
431 431 case VREQ_INIT_DONE:
432 432 /*
433 433 * alloc DMA handle
434 434 */
435 435 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &xb_dma_attr,
436 436 xdf_dmacallback, (caddr_t)vdp, &dh);
437 437 if (rc != DDI_SUCCESS) {
438 438 SETDMACBON(vdp);
439 439 DPRINTF(DMA_DBG, ("xdf@%s: DMA handle alloc failed\n",
440 440 vdp->xdf_addr));
441 441 return (DDI_FAILURE);
442 442 }
443 443
444 444 vreq->v_dmahdl = dh;
445 445 vreq->v_status = VREQ_DMAHDL_ALLOCED;
446 446 /*FALLTHRU*/
447 447
448 448 case VREQ_DMAHDL_ALLOCED:
449 449 /*
450 450 * alloc dma handle for 512-byte aligned buf
451 451 */
452 452 if (!ALIGNED_XFER(bp)) {
453 453 /*
454 454 * XXPV: we need to temporarily enlarge the seg
455 455 * boundary and s/g length to work round CR6381968
456 456 */
457 457 dmaattr = xb_dma_attr;
458 458 dmaattr.dma_attr_seg = (uint64_t)-1;
459 459 dmaattr.dma_attr_sgllen = INT_MAX;
460 460 rc = ddi_dma_alloc_handle(vdp->xdf_dip, &dmaattr,
461 461 xdf_dmacallback, (caddr_t)vdp, &mdh);
462 462 if (rc != DDI_SUCCESS) {
463 463 SETDMACBON(vdp);
464 464 DPRINTF(DMA_DBG, ("xdf@%s: "
465 465 "unaligned buf DMAhandle alloc failed\n",
466 466 vdp->xdf_addr));
467 467 return (DDI_FAILURE);
468 468 }
469 469 vreq->v_memdmahdl = mdh;
470 470 vreq->v_status = VREQ_MEMDMAHDL_ALLOCED;
471 471 }
472 472 /*FALLTHRU*/
473 473
474 474 case VREQ_MEMDMAHDL_ALLOCED:
475 475 /*
476 476 * alloc 512-byte aligned buf
477 477 */
478 478 if (!ALIGNED_XFER(bp)) {
479 479 if (bp->b_flags & (B_PAGEIO | B_PHYS))
480 480 bp_mapin(bp);
481 481 rc = ddi_dma_mem_alloc(vreq->v_memdmahdl,
482 482 roundup(bp->b_bcount, XB_BSIZE), &xc_acc_attr,
483 483 DDI_DMA_STREAMING, xdf_dmacallback, (caddr_t)vdp,
484 484 &aba, &bufsz, &abh);
485 485 if (rc != DDI_SUCCESS) {
486 486 SETDMACBON(vdp);
487 487 DPRINTF(DMA_DBG, ("xdf@%s: "
488 488 "DMA mem allocation failed\n",
489 489 vdp->xdf_addr));
490 490 return (DDI_FAILURE);
491 491 }
492 492
493 493 vreq->v_abuf = aba;
494 494 vreq->v_align = abh;
495 495 vreq->v_status = VREQ_DMAMEM_ALLOCED;
496 496
497 497 ASSERT(bufsz >= bp->b_bcount);
498 498 if (!IS_READ(bp))
499 499 bcopy(bp->b_un.b_addr, vreq->v_abuf,
500 500 bp->b_bcount);
501 501 }
502 502 /*FALLTHRU*/
503 503
504 504 case VREQ_DMAMEM_ALLOCED:
505 505 /*
506 506 * dma bind
507 507 */
508 508 if (ALIGNED_XFER(bp)) {
509 509 rc = ddi_dma_buf_bind_handle(vreq->v_dmahdl, bp,
510 510 dma_flags, xdf_dmacallback, (caddr_t)vdp,
511 511 &dc, &ndcs);
512 512 } else {
513 513 rc = ddi_dma_addr_bind_handle(vreq->v_dmahdl,
514 514 NULL, vreq->v_abuf, bp->b_bcount, dma_flags,
515 515 xdf_dmacallback, (caddr_t)vdp, &dc, &ndcs);
516 516 }
517 517 if (rc == DDI_DMA_MAPPED || rc == DDI_DMA_PARTIAL_MAP) {
518 518 /* get num of dma windows */
519 519 if (rc == DDI_DMA_PARTIAL_MAP) {
520 520 rc = ddi_dma_numwin(vreq->v_dmahdl, &ndws);
521 521 ASSERT(rc == DDI_SUCCESS);
522 522 } else {
523 523 ndws = 1;
524 524 }
525 525 } else {
526 526 SETDMACBON(vdp);
527 527 DPRINTF(DMA_DBG, ("xdf@%s: DMA bind failed\n",
528 528 vdp->xdf_addr));
529 529 return (DDI_FAILURE);
530 530 }
531 531
532 532 vreq->v_dmac = dc;
533 533 vreq->v_dmaw = 0;
534 534 vreq->v_ndmacs = ndcs;
535 535 vreq->v_ndmaws = ndws;
536 536 vreq->v_nslots = ndws;
537 537 vreq->v_status = VREQ_DMABUF_BOUND;
538 538 /*FALLTHRU*/
539 539
540 540 case VREQ_DMABUF_BOUND:
541 541 /*
542 542 * get ge_slot, callback is set upon failure from gs_get(),
543 543 * if not set previously
544 544 */
545 545 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
546 546 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
547 547 vdp->xdf_addr));
548 548 return (DDI_FAILURE);
549 549 }
550 550
551 551 vreq->v_status = VREQ_GS_ALLOCED;
552 552 gs->gs_vreq = vreq;
553 553 list_insert_head(&vreq->v_gs, gs);
554 554 break;
555 555
556 556 case VREQ_GS_ALLOCED:
557 557 /* nothing need to be done */
558 558 break;
559 559
560 560 case VREQ_DMAWIN_DONE:
561 561 /*
562 562 * move to the next dma window
563 563 */
564 564 ASSERT((vreq->v_dmaw + 1) < vreq->v_ndmaws);
565 565
566 566 /* get a ge_slot for this DMA window */
567 567 if ((gs = gs_get(vdp, IS_READ(bp))) == NULL) {
568 568 DPRINTF(DMA_DBG, ("xdf@%s: get ge_slot failed\n",
569 569 vdp->xdf_addr));
570 570 return (DDI_FAILURE);
571 571 }
572 572
573 573 vreq->v_dmaw++;
574 574 VERIFY(ddi_dma_getwin(vreq->v_dmahdl, vreq->v_dmaw, &off, &sz,
575 575 &vreq->v_dmac, &vreq->v_ndmacs) == DDI_SUCCESS);
576 576 vreq->v_status = VREQ_GS_ALLOCED;
577 577 gs->gs_vreq = vreq;
578 578 list_insert_head(&vreq->v_gs, gs);
579 579 break;
580 580
581 581 default:
582 582 return (DDI_FAILURE);
583 583 }
584 584
585 585 return (DDI_SUCCESS);
586 586 }
587 587
588 588 static int
589 589 xdf_cmlb_attach(xdf_t *vdp)
590 590 {
591 591 dev_info_t *dip = vdp->xdf_dip;
592 592
593 593 return (cmlb_attach(dip, &xdf_lb_ops,
594 594 XD_IS_CD(vdp) ? DTYPE_RODIRECT : DTYPE_DIRECT,
595 595 XD_IS_RM(vdp),
596 596 B_TRUE,
597 597 XD_IS_CD(vdp) ? DDI_NT_CD_XVMD : DDI_NT_BLOCK_XVMD,
598 598 #if defined(XPV_HVM_DRIVER)
599 599 (XD_IS_CD(vdp) ? 0 : CMLB_CREATE_ALTSLICE_VTOC_16_DTYPE_DIRECT) |
600 600 CMLB_INTERNAL_MINOR_NODES,
601 601 #else /* !XPV_HVM_DRIVER */
602 602 XD_IS_CD(vdp) ? 0 : CMLB_FAKE_LABEL_ONE_PARTITION,
603 603 #endif /* !XPV_HVM_DRIVER */
604 604 vdp->xdf_vd_lbl, NULL));
605 605 }
606 606
607 607 static void
608 608 xdf_io_err(buf_t *bp, int err, size_t resid)
609 609 {
610 610 bioerror(bp, err);
611 611 if (resid == 0)
612 612 bp->b_resid = bp->b_bcount;
613 613 biodone(bp);
614 614 }
615 615
616 616 static void
617 617 xdf_kstat_enter(xdf_t *vdp, buf_t *bp)
618 618 {
619 619 v_req_t *vreq = BP_VREQ(bp);
620 620
621 621 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
622 622
623 623 if (vdp->xdf_xdev_iostat == NULL)
624 624 return;
625 625 if ((vreq != NULL) && vreq->v_runq) {
626 626 kstat_runq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
627 627 } else {
628 628 kstat_waitq_enter(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
629 629 }
630 630 }
631 631
632 632 static void
633 633 xdf_kstat_exit(xdf_t *vdp, buf_t *bp)
634 634 {
635 635 v_req_t *vreq = BP_VREQ(bp);
636 636
637 637 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
638 638
639 639 if (vdp->xdf_xdev_iostat == NULL)
640 640 return;
641 641 if ((vreq != NULL) && vreq->v_runq) {
642 642 kstat_runq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
643 643 } else {
644 644 kstat_waitq_exit(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
645 645 }
646 646 }
647 647
648 648 static void
649 649 xdf_kstat_waitq_to_runq(xdf_t *vdp, buf_t *bp)
650 650 {
651 651 v_req_t *vreq = BP_VREQ(bp);
652 652
653 653 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
654 654 ASSERT(!vreq->v_runq);
655 655
656 656 vreq->v_runq = B_TRUE;
657 657 if (vdp->xdf_xdev_iostat == NULL)
658 658 return;
659 659 kstat_waitq_to_runq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
660 660 }
661 661
662 662 static void
663 663 xdf_kstat_runq_to_waitq(xdf_t *vdp, buf_t *bp)
664 664 {
665 665 v_req_t *vreq = BP_VREQ(bp);
666 666
667 667 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
668 668 ASSERT(vreq->v_runq);
669 669
670 670 vreq->v_runq = B_FALSE;
671 671 if (vdp->xdf_xdev_iostat == NULL)
672 672 return;
673 673 kstat_runq_back_to_waitq(KSTAT_IO_PTR(vdp->xdf_xdev_iostat));
674 674 }
675 675
676 676 int
677 677 xdf_kstat_create(dev_info_t *dip, char *ks_module, int instance)
678 678 {
679 679 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
680 680 kstat_t *kstat;
681 681 buf_t *bp;
682 682
683 683 if ((kstat = kstat_create(
684 684 ks_module, instance, NULL, "disk",
685 685 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL)
686 686 return (-1);
687 687
688 688 /* See comment about locking in xdf_kstat_delete(). */
689 689 mutex_enter(&vdp->xdf_iostat_lk);
690 690 mutex_enter(&vdp->xdf_dev_lk);
691 691
692 692 /* only one kstat can exist at a time */
693 693 if (vdp->xdf_xdev_iostat != NULL) {
694 694 mutex_exit(&vdp->xdf_dev_lk);
695 695 mutex_exit(&vdp->xdf_iostat_lk);
696 696 kstat_delete(kstat);
697 697 return (-1);
698 698 }
699 699
700 700 vdp->xdf_xdev_iostat = kstat;
701 701 vdp->xdf_xdev_iostat->ks_lock = &vdp->xdf_dev_lk;
702 702 kstat_install(vdp->xdf_xdev_iostat);
703 703
704 704 /*
705 705 * Now that we've created a kstat, we need to update the waitq and
706 706 * runq counts for the kstat to reflect our current state.
707 707 *
708 708 * For a buf_t structure to be on the runq, it must have a ring
709 709 * buffer slot associated with it. To get a ring buffer slot the
710 710 * buf must first have a v_req_t and a ge_slot_t associated with it.
711 711 * Then when it is granted a ring buffer slot, v_runq will be set to
712 712 * true.
713 713 *
714 714 * For a buf_t structure to be on the waitq, it must not be on the
715 715 * runq. So to find all the buf_t's that should be on waitq, we
716 716 * walk the active buf list and add any buf_t's which aren't on the
717 717 * runq to the waitq.
718 718 */
719 719 bp = vdp->xdf_f_act;
720 720 while (bp != NULL) {
721 721 xdf_kstat_enter(vdp, bp);
722 722 bp = bp->av_forw;
723 723 }
724 724 if (vdp->xdf_ready_tq_bp != NULL)
725 725 xdf_kstat_enter(vdp, vdp->xdf_ready_tq_bp);
726 726
727 727 mutex_exit(&vdp->xdf_dev_lk);
728 728 mutex_exit(&vdp->xdf_iostat_lk);
729 729 return (0);
730 730 }
731 731
732 732 void
733 733 xdf_kstat_delete(dev_info_t *dip)
734 734 {
735 735 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
736 736 kstat_t *kstat;
737 737 buf_t *bp;
738 738
739 739 /*
740 740 * The locking order here is xdf_iostat_lk and then xdf_dev_lk.
741 741 * xdf_dev_lk is used to protect the xdf_xdev_iostat pointer
742 742 * and the contents of the our kstat. xdf_iostat_lk is used
743 743 * to protect the allocation and freeing of the actual kstat.
744 744 * xdf_dev_lk can't be used for this purpose because kstat
745 745 * readers use it to access the contents of the kstat and
746 746 * hence it can't be held when calling kstat_delete().
747 747 */
748 748 mutex_enter(&vdp->xdf_iostat_lk);
749 749 mutex_enter(&vdp->xdf_dev_lk);
750 750
751 751 if (vdp->xdf_xdev_iostat == NULL) {
752 752 mutex_exit(&vdp->xdf_dev_lk);
753 753 mutex_exit(&vdp->xdf_iostat_lk);
754 754 return;
755 755 }
756 756
757 757 /*
758 758 * We're about to destroy the kstat structures, so it isn't really
759 759 * necessary to update the runq and waitq counts. But, since this
760 760 * isn't a hot code path we can afford to be a little pedantic and
761 761 * go ahead and decrement the runq and waitq kstat counters to zero
762 762 * before free'ing them. This helps us ensure that we've gotten all
763 763 * our accounting correct.
764 764 *
765 765 * For an explanation of how we determine which buffers go on the
766 766 * runq vs which go on the waitq, see the comments in
767 767 * xdf_kstat_create().
768 768 */
769 769 bp = vdp->xdf_f_act;
770 770 while (bp != NULL) {
771 771 xdf_kstat_exit(vdp, bp);
772 772 bp = bp->av_forw;
773 773 }
774 774 if (vdp->xdf_ready_tq_bp != NULL)
775 775 xdf_kstat_exit(vdp, vdp->xdf_ready_tq_bp);
776 776
777 777 kstat = vdp->xdf_xdev_iostat;
778 778 vdp->xdf_xdev_iostat = NULL;
779 779 mutex_exit(&vdp->xdf_dev_lk);
780 780 kstat_delete(kstat);
781 781 mutex_exit(&vdp->xdf_iostat_lk);
782 782 }
783 783
784 784 /*
785 785 * Add an IO requests onto the active queue.
786 786 *
787 787 * We have to detect IOs generated by xdf_ready_tq_thread. These IOs
788 788 * are used to establish a connection to the backend, so they recieve
789 789 * priority over all other IOs. Since xdf_ready_tq_thread only does
790 790 * synchronous IO, there can only be one xdf_ready_tq_thread request at any
791 791 * given time and we record the buf associated with that request in
792 792 * xdf_ready_tq_bp.
793 793 */
794 794 static void
795 795 xdf_bp_push(xdf_t *vdp, buf_t *bp)
796 796 {
797 797 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
798 798 ASSERT(bp->av_forw == NULL);
799 799
800 800 xdf_kstat_enter(vdp, bp);
801 801
802 802 if (curthread == vdp->xdf_ready_tq_thread) {
803 803 /* new IO requests from the ready thread */
804 804 ASSERT(vdp->xdf_ready_tq_bp == NULL);
805 805 vdp->xdf_ready_tq_bp = bp;
806 806 return;
807 807 }
808 808
809 809 /* this is normal IO request */
810 810 ASSERT(bp != vdp->xdf_ready_tq_bp);
811 811
812 812 if (vdp->xdf_f_act == NULL) {
813 813 /* this is only only IO on the active queue */
814 814 ASSERT(vdp->xdf_l_act == NULL);
815 815 ASSERT(vdp->xdf_i_act == NULL);
816 816 vdp->xdf_f_act = vdp->xdf_l_act = vdp->xdf_i_act = bp;
817 817 return;
818 818 }
819 819
820 820 /* add this IO to the tail of the active queue */
821 821 vdp->xdf_l_act->av_forw = bp;
822 822 vdp->xdf_l_act = bp;
823 823 if (vdp->xdf_i_act == NULL)
824 824 vdp->xdf_i_act = bp;
825 825 }
826 826
827 827 static void
828 828 xdf_bp_pop(xdf_t *vdp, buf_t *bp)
829 829 {
830 830 buf_t *bp_iter;
831 831
832 832 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
833 833 ASSERT(VREQ_DONE(BP_VREQ(bp)));
834 834
835 835 if (vdp->xdf_ready_tq_bp == bp) {
836 836 /* we're done with a ready thread IO request */
837 837 ASSERT(bp->av_forw == NULL);
838 838 vdp->xdf_ready_tq_bp = NULL;
839 839 return;
840 840 }
841 841
842 842 /* we're done with a normal IO request */
843 843 ASSERT((bp->av_forw != NULL) || (bp == vdp->xdf_l_act));
844 844 ASSERT((bp->av_forw == NULL) || (bp != vdp->xdf_l_act));
845 845 ASSERT(VREQ_DONE(BP_VREQ(vdp->xdf_f_act)));
846 846 ASSERT(vdp->xdf_f_act != vdp->xdf_i_act);
847 847
848 848 if (bp == vdp->xdf_f_act) {
849 849 /* This IO was at the head of our active queue. */
850 850 vdp->xdf_f_act = bp->av_forw;
851 851 if (bp == vdp->xdf_l_act)
852 852 vdp->xdf_l_act = NULL;
853 853 } else {
854 854 /* There IO finished before some other pending IOs. */
855 855 bp_iter = vdp->xdf_f_act;
856 856 while (bp != bp_iter->av_forw) {
857 857 bp_iter = bp_iter->av_forw;
858 858 ASSERT(VREQ_DONE(BP_VREQ(bp_iter)));
859 859 ASSERT(bp_iter != vdp->xdf_i_act);
860 860 }
861 861 bp_iter->av_forw = bp->av_forw;
862 862 if (bp == vdp->xdf_l_act)
863 863 vdp->xdf_l_act = bp_iter;
864 864 }
865 865 bp->av_forw = NULL;
866 866 }
867 867
868 868 static buf_t *
869 869 xdf_bp_next(xdf_t *vdp)
870 870 {
871 871 v_req_t *vreq;
872 872 buf_t *bp;
873 873
874 874 if (vdp->xdf_state == XD_CONNECTED) {
875 875 /*
876 876 * If we're in the XD_CONNECTED state, we only service IOs
877 877 * from the xdf_ready_tq_thread thread.
878 878 */
879 879 if ((bp = vdp->xdf_ready_tq_bp) == NULL)
880 880 return (NULL);
881 881 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq)))
882 882 return (bp);
883 883 return (NULL);
884 884 }
885 885
886 886 /* if we're not in the XD_CONNECTED or XD_READY state we can't do IO */
887 887 if (vdp->xdf_state != XD_READY)
888 888 return (NULL);
889 889
890 890 ASSERT(vdp->xdf_ready_tq_bp == NULL);
891 891 for (;;) {
892 892 if ((bp = vdp->xdf_i_act) == NULL)
893 893 return (NULL);
894 894 if (((vreq = BP_VREQ(bp)) == NULL) || (!VREQ_DONE(vreq)))
895 895 return (bp);
896 896
897 897 /* advance the active buf index pointer */
898 898 vdp->xdf_i_act = bp->av_forw;
899 899 }
900 900 }
901 901
902 902 static void
903 903 xdf_io_fini(xdf_t *vdp, uint64_t id, int bioerr)
904 904 {
905 905 ge_slot_t *gs = (ge_slot_t *)(uintptr_t)id;
906 906 v_req_t *vreq = gs->gs_vreq;
907 907 buf_t *bp = vreq->v_buf;
908 908
909 909 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
910 910 ASSERT(BP_VREQ(bp) == vreq);
911 911
912 912 gs_free(gs);
913 913
914 914 if (bioerr != 0)
915 915 bioerror(bp, bioerr);
916 916 ASSERT(vreq->v_nslots > 0);
917 917 if (--vreq->v_nslots > 0)
918 918 return;
919 919
920 920 /* remove this IO from our active queue */
921 921 xdf_bp_pop(vdp, bp);
922 922
923 923 ASSERT(vreq->v_runq);
924 924 xdf_kstat_exit(vdp, bp);
925 925 vreq->v_runq = B_FALSE;
926 926 vreq_free(vdp, vreq);
927 927
928 928 if (IS_ERROR(bp)) {
929 929 xdf_io_err(bp, geterror(bp), 0);
930 930 } else if (bp->b_resid != 0) {
931 931 /* Partial transfers are an error */
932 932 xdf_io_err(bp, EIO, bp->b_resid);
933 933 } else {
934 934 biodone(bp);
935 935 }
936 936 }
937 937
938 938 /*
939 939 * xdf interrupt handler
940 940 */
941 941 static uint_t
942 942 xdf_intr_locked(xdf_t *vdp)
943 943 {
944 944 xendev_ring_t *xbr;
945 945 blkif_response_t *resp;
946 946 int bioerr;
947 947 uint64_t id;
948 948 uint8_t op;
949 949 uint16_t status;
950 950 ddi_acc_handle_t acchdl;
951 951
952 952 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
953 953
954 954 if ((xbr = vdp->xdf_xb_ring) == NULL)
955 955 return (DDI_INTR_UNCLAIMED);
956 956
957 957 acchdl = vdp->xdf_xb_ring_hdl;
958 958
959 959 /*
960 960 * complete all requests which have a response
961 961 */
962 962 while (resp = xvdi_ring_get_response(xbr)) {
963 963 id = ddi_get64(acchdl, &resp->id);
964 964 op = ddi_get8(acchdl, &resp->operation);
965 965 status = ddi_get16(acchdl, (uint16_t *)&resp->status);
966 966 DPRINTF(INTR_DBG, ("resp: op %d id %"PRIu64" status %d\n",
967 967 op, id, status));
968 968
969 969 if (status != BLKIF_RSP_OKAY) {
970 970 DPRINTF(IO_DBG, ("xdf@%s: I/O error while %s",
971 971 vdp->xdf_addr,
972 972 (op == BLKIF_OP_READ) ? "reading" : "writing"));
973 973 bioerr = EIO;
974 974 } else {
975 975 bioerr = 0;
976 976 }
977 977
978 978 xdf_io_fini(vdp, id, bioerr);
979 979 }
980 980 return (DDI_INTR_CLAIMED);
981 981 }
982 982
983 983 /*
984 984 * xdf_intr runs at PIL 5, so no one else can grab xdf_dev_lk and
985 985 * block at a lower pil.
986 986 */
987 987 static uint_t
988 988 xdf_intr(caddr_t arg)
989 989 {
990 990 xdf_t *vdp = (xdf_t *)arg;
991 991 int rv;
992 992
993 993 mutex_enter(&vdp->xdf_dev_lk);
994 994 rv = xdf_intr_locked(vdp);
995 995 mutex_exit(&vdp->xdf_dev_lk);
996 996
997 997 if (!do_polled_io)
998 998 xdf_io_start(vdp);
999 999
1000 1000 return (rv);
1001 1001 }
1002 1002
1003 1003 static void
1004 1004 xdf_ring_push(xdf_t *vdp)
1005 1005 {
1006 1006 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1007 1007
1008 1008 if (vdp->xdf_xb_ring == NULL)
1009 1009 return;
1010 1010
1011 1011 if (xvdi_ring_push_request(vdp->xdf_xb_ring)) {
1012 1012 DPRINTF(IO_DBG, (
1013 1013 "xdf@%s: xdf_ring_push: sent request(s) to backend\n",
1014 1014 vdp->xdf_addr));
1015 1015 }
1016 1016
1017 1017 if (xvdi_get_evtchn(vdp->xdf_dip) != INVALID_EVTCHN)
1018 1018 xvdi_notify_oe(vdp->xdf_dip);
1019 1019 }
1020 1020
1021 1021 static int
1022 1022 xdf_ring_drain_locked(xdf_t *vdp)
1023 1023 {
1024 1024 int pollc, rv = 0;
1025 1025
1026 1026 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1027 1027
1028 1028 if (xdf_debug & SUSRES_DBG)
1029 1029 xen_printf("xdf_ring_drain: start\n");
1030 1030
1031 1031 for (pollc = 0; pollc < XDF_DRAIN_RETRY_COUNT; pollc++) {
1032 1032 if (vdp->xdf_xb_ring == NULL)
1033 1033 goto out;
1034 1034
1035 1035 if (xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring))
1036 1036 (void) xdf_intr_locked(vdp);
1037 1037 if (!xvdi_ring_has_incomp_request(vdp->xdf_xb_ring))
1038 1038 goto out;
1039 1039 xdf_ring_push(vdp);
1040 1040
1041 1041 /* file-backed devices can be slow */
1042 1042 mutex_exit(&vdp->xdf_dev_lk);
1043 1043 #ifdef XPV_HVM_DRIVER
1044 1044 (void) HYPERVISOR_yield();
1045 1045 #endif /* XPV_HVM_DRIVER */
1046 1046 delay(drv_usectohz(XDF_DRAIN_MSEC_DELAY));
1047 1047 mutex_enter(&vdp->xdf_dev_lk);
1048 1048 }
1049 1049 cmn_err(CE_WARN, "xdf@%s: xdf_ring_drain: timeout", vdp->xdf_addr);
1050 1050
1051 1051 out:
1052 1052 if (vdp->xdf_xb_ring != NULL) {
1053 1053 if (xvdi_ring_has_incomp_request(vdp->xdf_xb_ring) ||
1054 1054 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring))
1055 1055 rv = EIO;
1056 1056 }
1057 1057 if (xdf_debug & SUSRES_DBG)
1058 1058 xen_printf("xdf@%s: xdf_ring_drain: end, err=%d\n",
1059 1059 vdp->xdf_addr, rv);
1060 1060 return (rv);
1061 1061 }
1062 1062
1063 1063 static int
1064 1064 xdf_ring_drain(xdf_t *vdp)
1065 1065 {
1066 1066 int rv;
1067 1067 mutex_enter(&vdp->xdf_dev_lk);
1068 1068 rv = xdf_ring_drain_locked(vdp);
1069 1069 mutex_exit(&vdp->xdf_dev_lk);
1070 1070 return (rv);
1071 1071 }
1072 1072
1073 1073 /*
1074 1074 * Destroy all v_req_t, grant table entries, and our ring buffer.
1075 1075 */
1076 1076 static void
1077 1077 xdf_ring_destroy(xdf_t *vdp)
1078 1078 {
1079 1079 v_req_t *vreq;
1080 1080 buf_t *bp;
1081 1081 ge_slot_t *gs;
1082 1082
1083 1083 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1084 1084 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1085 1085
1086 1086 if ((vdp->xdf_state != XD_INIT) &&
1087 1087 (vdp->xdf_state != XD_CONNECTED) &&
1088 1088 (vdp->xdf_state != XD_READY)) {
1089 1089 ASSERT(vdp->xdf_xb_ring == NULL);
1090 1090 ASSERT(vdp->xdf_xb_ring_hdl == NULL);
1091 1091 ASSERT(vdp->xdf_peer == INVALID_DOMID);
1092 1092 ASSERT(vdp->xdf_evtchn == INVALID_EVTCHN);
1093 1093 ASSERT(list_is_empty(&vdp->xdf_vreq_act));
1094 1094 return;
1095 1095 }
1096 1096
1097 1097 /*
1098 1098 * We don't want to recieve async notifications from the backend
1099 1099 * when it finishes processing ring entries.
1100 1100 */
1101 1101 #ifdef XPV_HVM_DRIVER
1102 1102 ec_unbind_evtchn(vdp->xdf_evtchn);
1103 1103 #else /* !XPV_HVM_DRIVER */
1104 1104 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
1105 1105 #endif /* !XPV_HVM_DRIVER */
1106 1106
1107 1107 /*
1108 1108 * Drain any requests in the ring. We need to do this before we
1109 1109 * can free grant table entries, because if active ring entries
1110 1110 * point to grants, then the backend could be trying to access
1111 1111 * those grants.
1112 1112 */
1113 1113 (void) xdf_ring_drain_locked(vdp);
1114 1114
1115 1115 /* We're done talking to the backend so free up our event channel */
1116 1116 xvdi_free_evtchn(vdp->xdf_dip);
1117 1117 vdp->xdf_evtchn = INVALID_EVTCHN;
1118 1118
1119 1119 while ((vreq = list_head(&vdp->xdf_vreq_act)) != NULL) {
1120 1120 bp = vreq->v_buf;
1121 1121 ASSERT(BP_VREQ(bp) == vreq);
1122 1122
1123 1123 /* Free up any grant table entries associaed with this IO */
1124 1124 while ((gs = list_head(&vreq->v_gs)) != NULL)
1125 1125 gs_free(gs);
1126 1126
1127 1127 /* If this IO was on the runq, move it back to the waitq. */
1128 1128 if (vreq->v_runq)
1129 1129 xdf_kstat_runq_to_waitq(vdp, bp);
1130 1130
1131 1131 /*
1132 1132 * Reset any buf IO state since we're going to re-issue the
1133 1133 * IO when we reconnect.
1134 1134 */
1135 1135 vreq_free(vdp, vreq);
1136 1136 BP_VREQ_SET(bp, NULL);
1137 1137 bioerror(bp, 0);
1138 1138 }
1139 1139
1140 1140 /* reset the active queue index pointer */
1141 1141 vdp->xdf_i_act = vdp->xdf_f_act;
1142 1142
1143 1143 /* Destroy the ring */
1144 1144 xvdi_free_ring(vdp->xdf_xb_ring);
1145 1145 vdp->xdf_xb_ring = NULL;
1146 1146 vdp->xdf_xb_ring_hdl = NULL;
1147 1147 vdp->xdf_peer = INVALID_DOMID;
1148 1148 }
1149 1149
1150 1150 void
1151 1151 xdfmin(struct buf *bp)
1152 1152 {
1153 1153 if (bp->b_bcount > xdf_maxphys)
1154 1154 bp->b_bcount = xdf_maxphys;
1155 1155 }
1156 1156
1157 1157 /*
1158 1158 * Check if we have a pending "eject" media request.
1159 1159 */
1160 1160 static int
1161 1161 xdf_eject_pending(xdf_t *vdp)
1162 1162 {
1163 1163 dev_info_t *dip = vdp->xdf_dip;
1164 1164 char *xsname, *str;
1165 1165
1166 1166 if (!vdp->xdf_media_req_supported)
1167 1167 return (B_FALSE);
1168 1168
1169 1169 if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1170 1170 (xenbus_read_str(xsname, XBP_MEDIA_REQ, &str) != 0))
1171 1171 return (B_FALSE);
1172 1172
1173 1173 if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) {
1174 1174 strfree(str);
1175 1175 return (B_FALSE);
1176 1176 }
1177 1177 strfree(str);
1178 1178 return (B_TRUE);
1179 1179 }
1180 1180
1181 1181 /*
1182 1182 * Generate a media request.
1183 1183 */
1184 1184 static int
1185 1185 xdf_media_req(xdf_t *vdp, char *req, boolean_t media_required)
1186 1186 {
1187 1187 dev_info_t *dip = vdp->xdf_dip;
1188 1188 char *xsname;
1189 1189
1190 1190 /*
1191 1191 * we can't be holding xdf_dev_lk because xenbus_printf() can
1192 1192 * block while waiting for a PIL 1 interrupt message. this
1193 1193 * would cause a deadlock with xdf_intr() which needs to grab
1194 1194 * xdf_dev_lk as well and runs at PIL 5.
1195 1195 */
1196 1196 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1197 1197 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1198 1198
1199 1199 if ((xsname = xvdi_get_xsname(dip)) == NULL)
1200 1200 return (ENXIO);
1201 1201
1202 1202 /* Check if we support media requests */
1203 1203 if (!XD_IS_CD(vdp) || !vdp->xdf_media_req_supported)
1204 1204 return (ENOTTY);
1205 1205
1206 1206 /* If an eject is pending then don't allow any new requests */
1207 1207 if (xdf_eject_pending(vdp))
1208 1208 return (ENXIO);
1209 1209
1210 1210 /* Make sure that there is media present */
1211 1211 if (media_required && (vdp->xdf_xdev_nblocks == 0))
1212 1212 return (ENXIO);
1213 1213
1214 1214 /* We only allow operations when the device is ready and connected */
1215 1215 if (vdp->xdf_state != XD_READY)
1216 1216 return (EIO);
1217 1217
1218 1218 if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ, "%s", req) != 0)
1219 1219 return (EIO);
1220 1220
1221 1221 return (0);
1222 1222 }
1223 1223
1224 1224 /*
1225 1225 * populate a single blkif_request_t w/ a buf
1226 1226 */
1227 1227 static void
1228 1228 xdf_process_rreq(xdf_t *vdp, struct buf *bp, blkif_request_t *rreq)
1229 1229 {
1230 1230 grant_ref_t gr;
1231 1231 uint8_t fsect, lsect;
1232 1232 size_t bcnt;
1233 1233 paddr_t dma_addr;
1234 1234 off_t blk_off;
1235 1235 dev_info_t *dip = vdp->xdf_dip;
1236 1236 blkif_vdev_t vdev = xvdi_get_vdevnum(dip);
1237 1237 v_req_t *vreq = BP_VREQ(bp);
1238 1238 uint64_t blkno = vreq->v_blkno;
1239 1239 uint_t ndmacs = vreq->v_ndmacs;
1240 1240 ddi_acc_handle_t acchdl = vdp->xdf_xb_ring_hdl;
1241 1241 int seg = 0;
1242 1242 int isread = IS_READ(bp);
1243 1243 ge_slot_t *gs = list_head(&vreq->v_gs);
1244 1244
1245 1245 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1246 1246 ASSERT(vreq->v_status == VREQ_GS_ALLOCED);
1247 1247
1248 1248 if (isread)
1249 1249 ddi_put8(acchdl, &rreq->operation, BLKIF_OP_READ);
1250 1250 else {
1251 1251 switch (vreq->v_flush_diskcache) {
1252 1252 case FLUSH_DISKCACHE:
1253 1253 ddi_put8(acchdl, &rreq->operation,
1254 1254 BLKIF_OP_FLUSH_DISKCACHE);
1255 1255 ddi_put16(acchdl, &rreq->handle, vdev);
1256 1256 ddi_put64(acchdl, &rreq->id,
1257 1257 (uint64_t)(uintptr_t)(gs));
1258 1258 ddi_put8(acchdl, &rreq->nr_segments, 0);
1259 1259 vreq->v_status = VREQ_DMAWIN_DONE;
1260 1260 return;
1261 1261 case WRITE_BARRIER:
1262 1262 ddi_put8(acchdl, &rreq->operation,
1263 1263 BLKIF_OP_WRITE_BARRIER);
1264 1264 break;
1265 1265 default:
1266 1266 if (!vdp->xdf_wce)
1267 1267 ddi_put8(acchdl, &rreq->operation,
1268 1268 BLKIF_OP_WRITE_BARRIER);
1269 1269 else
1270 1270 ddi_put8(acchdl, &rreq->operation,
1271 1271 BLKIF_OP_WRITE);
1272 1272 break;
1273 1273 }
1274 1274 }
1275 1275
1276 1276 ddi_put16(acchdl, &rreq->handle, vdev);
1277 1277 ddi_put64(acchdl, &rreq->sector_number, blkno);
1278 1278 ddi_put64(acchdl, &rreq->id, (uint64_t)(uintptr_t)(gs));
1279 1279
1280 1280 /*
1281 1281 * loop until all segments are populated or no more dma cookie in buf
1282 1282 */
1283 1283 for (;;) {
1284 1284 /*
1285 1285 * Each segment of a blkif request can transfer up to
1286 1286 * one 4K page of data.
1287 1287 */
1288 1288 bcnt = vreq->v_dmac.dmac_size;
1289 1289 dma_addr = vreq->v_dmac.dmac_laddress;
1290 1290 blk_off = (uint_t)((paddr_t)XB_SEGOFFSET & dma_addr);
1291 1291 fsect = blk_off >> XB_BSHIFT;
1292 1292 lsect = fsect + (bcnt >> XB_BSHIFT) - 1;
1293 1293
1294 1294 ASSERT(bcnt <= PAGESIZE);
1295 1295 ASSERT((bcnt % XB_BSIZE) == 0);
1296 1296 ASSERT((blk_off & XB_BMASK) == 0);
1297 1297 ASSERT(fsect < XB_MAX_SEGLEN / XB_BSIZE &&
1298 1298 lsect < XB_MAX_SEGLEN / XB_BSIZE);
1299 1299
1300 1300 gr = gs_grant(gs, PATOMA(dma_addr) >> PAGESHIFT);
1301 1301 ddi_put32(acchdl, &rreq->seg[seg].gref, gr);
1302 1302 ddi_put8(acchdl, &rreq->seg[seg].first_sect, fsect);
1303 1303 ddi_put8(acchdl, &rreq->seg[seg].last_sect, lsect);
1304 1304
1305 1305 DPRINTF(IO_DBG, (
1306 1306 "xdf@%s: seg%d: dmacS %lu blk_off %ld\n",
1307 1307 vdp->xdf_addr, seg, vreq->v_dmac.dmac_size, blk_off));
1308 1308 DPRINTF(IO_DBG, (
1309 1309 "xdf@%s: seg%d: fs %d ls %d gr %d dma 0x%"PRIx64"\n",
1310 1310 vdp->xdf_addr, seg, fsect, lsect, gr, dma_addr));
1311 1311
1312 1312 blkno += (bcnt >> XB_BSHIFT);
1313 1313 seg++;
1314 1314 ASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
1315 1315 if (--ndmacs) {
1316 1316 ddi_dma_nextcookie(vreq->v_dmahdl, &vreq->v_dmac);
1317 1317 continue;
1318 1318 }
1319 1319
1320 1320 vreq->v_status = VREQ_DMAWIN_DONE;
1321 1321 vreq->v_blkno = blkno;
1322 1322 break;
1323 1323 }
1324 1324 ddi_put8(acchdl, &rreq->nr_segments, seg);
1325 1325 DPRINTF(IO_DBG, (
1326 1326 "xdf@%s: xdf_process_rreq: request id=%"PRIx64" ready\n",
1327 1327 vdp->xdf_addr, rreq->id));
1328 1328 }
1329 1329
1330 1330 static void
1331 1331 xdf_io_start(xdf_t *vdp)
1332 1332 {
1333 1333 struct buf *bp;
1334 1334 v_req_t *vreq;
1335 1335 blkif_request_t *rreq;
1336 1336 boolean_t rreqready = B_FALSE;
1337 1337
1338 1338 mutex_enter(&vdp->xdf_dev_lk);
1339 1339
1340 1340 /*
1341 1341 * Populate the ring request(s). Loop until there is no buf to
1342 1342 * transfer or no free slot available in I/O ring.
1343 1343 */
1344 1344 for (;;) {
1345 1345 /* don't start any new IO if we're suspending */
1346 1346 if (vdp->xdf_suspending)
1347 1347 break;
1348 1348 if ((bp = xdf_bp_next(vdp)) == NULL)
1349 1349 break;
1350 1350
1351 1351 /* if the buf doesn't already have a vreq, allocate one */
1352 1352 if (((vreq = BP_VREQ(bp)) == NULL) &&
1353 1353 ((vreq = vreq_get(vdp, bp)) == NULL))
1354 1354 break;
1355 1355
1356 1356 /* alloc DMA/GTE resources */
1357 1357 if (vreq_setup(vdp, vreq) != DDI_SUCCESS)
1358 1358 break;
1359 1359
1360 1360 /* get next blkif_request in the ring */
1361 1361 if ((rreq = xvdi_ring_get_request(vdp->xdf_xb_ring)) == NULL)
1362 1362 break;
1363 1363 bzero(rreq, sizeof (blkif_request_t));
1364 1364 rreqready = B_TRUE;
1365 1365
1366 1366 /* populate blkif_request with this buf */
1367 1367 xdf_process_rreq(vdp, bp, rreq);
1368 1368
1369 1369 /*
1370 1370 * This buffer/vreq pair is has been allocated a ring buffer
1371 1371 * resources, so if it isn't already in our runq, add it.
1372 1372 */
1373 1373 if (!vreq->v_runq)
1374 1374 xdf_kstat_waitq_to_runq(vdp, bp);
1375 1375 }
1376 1376
1377 1377 /* Send the request(s) to the backend */
1378 1378 if (rreqready)
1379 1379 xdf_ring_push(vdp);
1380 1380
1381 1381 mutex_exit(&vdp->xdf_dev_lk);
1382 1382 }
1383 1383
1384 1384
1385 1385 /* check if partition is open, -1 - check all partitions on the disk */
1386 1386 static boolean_t
1387 1387 xdf_isopen(xdf_t *vdp, int partition)
1388 1388 {
1389 1389 int i;
1390 1390 ulong_t parbit;
1391 1391 boolean_t rval = B_FALSE;
1392 1392
1393 1393 ASSERT((partition == -1) ||
1394 1394 ((partition >= 0) || (partition < XDF_PEXT)));
1395 1395
1396 1396 if (partition == -1)
1397 1397 parbit = (ulong_t)-1;
1398 1398 else
1399 1399 parbit = 1 << partition;
1400 1400
1401 1401 for (i = 0; i < OTYPCNT; i++) {
1402 1402 if (vdp->xdf_vd_open[i] & parbit)
1403 1403 rval = B_TRUE;
1404 1404 }
1405 1405
1406 1406 return (rval);
1407 1407 }
1408 1408
1409 1409 /*
1410 1410 * The connection should never be closed as long as someone is holding
1411 1411 * us open, there is pending IO, or someone is waiting waiting for a
1412 1412 * connection.
1413 1413 */
1414 1414 static boolean_t
1415 1415 xdf_busy(xdf_t *vdp)
1416 1416 {
1417 1417 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1418 1418
1419 1419 if ((vdp->xdf_xb_ring != NULL) &&
1420 1420 xvdi_ring_has_unconsumed_responses(vdp->xdf_xb_ring)) {
1421 1421 ASSERT(vdp->xdf_state != XD_CLOSED);
1422 1422 return (B_TRUE);
1423 1423 }
1424 1424
1425 1425 if (!list_is_empty(&vdp->xdf_vreq_act) || (vdp->xdf_f_act != NULL)) {
1426 1426 ASSERT(vdp->xdf_state != XD_CLOSED);
1427 1427 return (B_TRUE);
1428 1428 }
1429 1429
1430 1430 if (xdf_isopen(vdp, -1)) {
1431 1431 ASSERT(vdp->xdf_state != XD_CLOSED);
1432 1432 return (B_TRUE);
1433 1433 }
1434 1434
1435 1435 if (vdp->xdf_connect_req > 0) {
1436 1436 ASSERT(vdp->xdf_state != XD_CLOSED);
1437 1437 return (B_TRUE);
1438 1438 }
1439 1439
1440 1440 return (B_FALSE);
1441 1441 }
1442 1442
1443 1443 static void
1444 1444 xdf_set_state(xdf_t *vdp, xdf_state_t new_state)
1445 1445 {
1446 1446 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1447 1447 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1448 1448 DPRINTF(DDI_DBG, ("xdf@%s: state change %d -> %d\n",
1449 1449 vdp->xdf_addr, vdp->xdf_state, new_state));
1450 1450 vdp->xdf_state = new_state;
1451 1451 cv_broadcast(&vdp->xdf_dev_cv);
1452 1452 }
1453 1453
1454 1454 static void
1455 1455 xdf_disconnect(xdf_t *vdp, xdf_state_t new_state, boolean_t quiet)
1456 1456 {
1457 1457 dev_info_t *dip = vdp->xdf_dip;
1458 1458 boolean_t busy;
1459 1459
1460 1460 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1461 1461 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1462 1462 ASSERT((new_state == XD_UNKNOWN) || (new_state == XD_CLOSED));
1463 1463
1464 1464 /* Check if we're already there. */
1465 1465 if (vdp->xdf_state == new_state)
1466 1466 return;
1467 1467
1468 1468 mutex_enter(&vdp->xdf_dev_lk);
1469 1469 busy = xdf_busy(vdp);
1470 1470
1471 1471 /* If we're already closed then there's nothing todo. */
1472 1472 if (vdp->xdf_state == XD_CLOSED) {
1473 1473 ASSERT(!busy);
1474 1474 xdf_set_state(vdp, new_state);
1475 1475 mutex_exit(&vdp->xdf_dev_lk);
1476 1476 return;
1477 1477 }
1478 1478
1479 1479 #ifdef DEBUG
1480 1480 /* UhOh. Warn the user that something bad has happened. */
1481 1481 if (!quiet && busy && (vdp->xdf_state == XD_READY) &&
1482 1482 (vdp->xdf_xdev_nblocks != 0)) {
1483 1483 cmn_err(CE_WARN, "xdf@%s: disconnected while in use",
1484 1484 vdp->xdf_addr);
1485 1485 }
1486 1486 #endif /* DEBUG */
1487 1487
1488 1488 xdf_ring_destroy(vdp);
1489 1489
1490 1490 /* If we're busy then we can only go into the unknown state */
1491 1491 xdf_set_state(vdp, (busy) ? XD_UNKNOWN : new_state);
1492 1492 mutex_exit(&vdp->xdf_dev_lk);
1493 1493
1494 1494 /* if we're closed now, let the other end know */
1495 1495 if (vdp->xdf_state == XD_CLOSED)
1496 1496 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed);
1497 1497 }
1498 1498
1499 1499
1500 1500 /*
1501 1501 * Kick-off connect process
1502 1502 * Status should be XD_UNKNOWN or XD_CLOSED
1503 1503 * On success, status will be changed to XD_INIT
1504 1504 * On error, it will be changed to XD_UNKNOWN
1505 1505 */
1506 1506 static int
1507 1507 xdf_setstate_init(xdf_t *vdp)
1508 1508 {
1509 1509 dev_info_t *dip = vdp->xdf_dip;
1510 1510 xenbus_transaction_t xbt;
1511 1511 grant_ref_t gref;
1512 1512 char *xsname, *str;
1513 1513 int rv;
1514 1514
1515 1515 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1516 1516 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1517 1517 ASSERT((vdp->xdf_state == XD_UNKNOWN) ||
1518 1518 (vdp->xdf_state == XD_CLOSED));
1519 1519
1520 1520 DPRINTF(DDI_DBG,
1521 1521 ("xdf@%s: starting connection process\n", vdp->xdf_addr));
1522 1522
1523 1523 /*
1524 1524 * If an eject is pending then don't allow a new connection.
1525 1525 * (Only the backend can clear media request eject request.)
1526 1526 */
1527 1527 if (xdf_eject_pending(vdp))
1528 1528 return (DDI_FAILURE);
1529 1529
1530 1530 if ((xsname = xvdi_get_xsname(dip)) == NULL)
1531 1531 goto errout;
1532 1532
1533 1533 if ((vdp->xdf_peer = xvdi_get_oeid(dip)) == INVALID_DOMID)
1534 1534 goto errout;
1535 1535
1536 1536 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialising);
1537 1537
1538 1538 /*
1539 1539 * Sanity check for the existance of the xenbus device-type property.
1540 1540 * This property might not exist if we our xenbus device nodes was
1541 1541 * force destroyed while we were still connected to the backend.
1542 1542 */
1543 1543 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0)
1544 1544 goto errout;
1545 1545 strfree(str);
1546 1546
1547 1547 if (xvdi_alloc_evtchn(dip) != DDI_SUCCESS)
1548 1548 goto errout;
1549 1549
1550 1550 vdp->xdf_evtchn = xvdi_get_evtchn(dip);
1551 1551 #ifdef XPV_HVM_DRIVER
1552 1552 ec_bind_evtchn_to_handler(vdp->xdf_evtchn, IPL_VBD, xdf_intr, vdp);
1553 1553 #else /* !XPV_HVM_DRIVER */
1554 1554 if (ddi_add_intr(dip, 0, NULL, NULL, xdf_intr, (caddr_t)vdp) !=
1555 1555 DDI_SUCCESS) {
1556 1556 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_init: "
1557 1557 "failed to add intr handler", vdp->xdf_addr);
1558 1558 goto errout1;
1559 1559 }
1560 1560 #endif /* !XPV_HVM_DRIVER */
1561 1561
1562 1562 if (xvdi_alloc_ring(dip, BLKIF_RING_SIZE,
1563 1563 sizeof (union blkif_sring_entry), &gref, &vdp->xdf_xb_ring) !=
1564 1564 DDI_SUCCESS) {
1565 1565 cmn_err(CE_WARN, "xdf@%s: failed to alloc comm ring",
1566 1566 vdp->xdf_addr);
1567 1567 goto errout2;
1568 1568 }
1569 1569 vdp->xdf_xb_ring_hdl = vdp->xdf_xb_ring->xr_acc_hdl; /* ugly!! */
1570 1570
1571 1571 /*
1572 1572 * Write into xenstore the info needed by backend
1573 1573 */
1574 1574 trans_retry:
1575 1575 if (xenbus_transaction_start(&xbt)) {
1576 1576 cmn_err(CE_WARN, "xdf@%s: failed to start transaction",
1577 1577 vdp->xdf_addr);
1578 1578 xvdi_fatal_error(dip, EIO, "connect transaction init");
1579 1579 goto fail_trans;
1580 1580 }
1581 1581
1582 1582 /*
1583 1583 * XBP_PROTOCOL is written by the domain builder in the case of PV
1584 1584 * domains. However, it is not written for HVM domains, so let's
1585 1585 * write it here.
1586 1586 */
1587 1587 if (((rv = xenbus_printf(xbt, xsname,
1588 1588 XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE)) != 0) ||
1589 1589 ((rv = xenbus_printf(xbt, xsname,
1590 1590 XBP_RING_REF, "%u", gref)) != 0) ||
1591 1591 ((rv = xenbus_printf(xbt, xsname,
1592 1592 XBP_EVENT_CHAN, "%u", vdp->xdf_evtchn)) != 0) ||
1593 1593 ((rv = xenbus_printf(xbt, xsname,
1594 1594 XBP_PROTOCOL, "%s", XEN_IO_PROTO_ABI_NATIVE)) != 0) ||
1595 1595 ((rv = xvdi_switch_state(dip, xbt, XenbusStateInitialised)) > 0)) {
1596 1596 (void) xenbus_transaction_end(xbt, 1);
1597 1597 xvdi_fatal_error(dip, rv, "connect transaction setup");
1598 1598 goto fail_trans;
1599 1599 }
1600 1600
1601 1601 /* kick-off connect process */
1602 1602 if (rv = xenbus_transaction_end(xbt, 0)) {
1603 1603 if (rv == EAGAIN)
1604 1604 goto trans_retry;
1605 1605 xvdi_fatal_error(dip, rv, "connect transaction commit");
1606 1606 goto fail_trans;
1607 1607 }
1608 1608
1609 1609 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1610 1610 mutex_enter(&vdp->xdf_dev_lk);
1611 1611 xdf_set_state(vdp, XD_INIT);
1612 1612 mutex_exit(&vdp->xdf_dev_lk);
1613 1613
1614 1614 return (DDI_SUCCESS);
1615 1615
1616 1616 fail_trans:
1617 1617 xvdi_free_ring(vdp->xdf_xb_ring);
1618 1618 errout2:
1619 1619 #ifdef XPV_HVM_DRIVER
1620 1620 ec_unbind_evtchn(vdp->xdf_evtchn);
1621 1621 #else /* !XPV_HVM_DRIVER */
1622 1622 (void) ddi_remove_intr(vdp->xdf_dip, 0, NULL);
1623 1623 #endif /* !XPV_HVM_DRIVER */
1624 1624 errout1:
1625 1625 xvdi_free_evtchn(dip);
1626 1626 vdp->xdf_evtchn = INVALID_EVTCHN;
1627 1627 errout:
1628 1628 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1629 1629 cmn_err(CE_WARN, "xdf@%s: failed to start connection to backend",
1630 1630 vdp->xdf_addr);
1631 1631 return (DDI_FAILURE);
1632 1632 }
1633 1633
1634 1634 int
1635 1635 xdf_get_flush_block(xdf_t *vdp)
1636 1636 {
1637 1637 /*
1638 1638 * Get a DEV_BSIZE aligned bufer
1639 1639 */
1640 1640 vdp->xdf_flush_mem = kmem_alloc(vdp->xdf_xdev_secsize * 2, KM_SLEEP);
1641 1641 vdp->xdf_cache_flush_block =
1642 1642 (char *)P2ROUNDUP((uintptr_t)(vdp->xdf_flush_mem),
1643 1643 (int)vdp->xdf_xdev_secsize);
1644 1644
1645 1645 if (xdf_lb_rdwr(vdp->xdf_dip, TG_READ, vdp->xdf_cache_flush_block,
1646 1646 xdf_flush_block, vdp->xdf_xdev_secsize, NULL) != 0)
1647 1647 return (DDI_FAILURE);
1648 1648 return (DDI_SUCCESS);
1649 1649 }
1650 1650
1651 1651 static void
1652 1652 xdf_setstate_ready(void *arg)
1653 1653 {
1654 1654 xdf_t *vdp = (xdf_t *)arg;
1655 1655
1656 1656 vdp->xdf_ready_tq_thread = curthread;
1657 1657
1658 1658 /*
1659 1659 * We've created all the minor nodes via cmlb_attach() using default
1660 1660 * value in xdf_attach() to make it possible to block in xdf_open(),
1661 1661 * in case there's anyone (say, booting thread) ever trying to open
1662 1662 * it before connected to backend. We will refresh all those minor
1663 1663 * nodes w/ latest info we've got now when we are almost connected.
1664 1664 */
1665 1665 mutex_enter(&vdp->xdf_dev_lk);
1666 1666 if (vdp->xdf_cmbl_reattach) {
1667 1667 vdp->xdf_cmbl_reattach = B_FALSE;
1668 1668
1669 1669 mutex_exit(&vdp->xdf_dev_lk);
1670 1670 if (xdf_cmlb_attach(vdp) != 0) {
1671 1671 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1672 1672 return;
1673 1673 }
1674 1674 mutex_enter(&vdp->xdf_dev_lk);
1675 1675 }
1676 1676
1677 1677 /* If we're not still trying to get to the ready state, then bail. */
1678 1678 if (vdp->xdf_state != XD_CONNECTED) {
1679 1679 mutex_exit(&vdp->xdf_dev_lk);
1680 1680 return;
1681 1681 }
1682 1682 mutex_exit(&vdp->xdf_dev_lk);
1683 1683
1684 1684 /*
1685 1685 * If backend has feature-barrier, see if it supports disk
1686 1686 * cache flush op.
1687 1687 */
1688 1688 vdp->xdf_flush_supported = B_FALSE;
1689 1689 if (vdp->xdf_feature_barrier) {
1690 1690 /*
1691 1691 * Pretend we already know flush is supported so probe
1692 1692 * will attempt the correct op.
1693 1693 */
1694 1694 vdp->xdf_flush_supported = B_TRUE;
1695 1695 if (xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE, NULL, 0, 0, 0) == 0) {
1696 1696 vdp->xdf_flush_supported = B_TRUE;
1697 1697 } else {
1698 1698 vdp->xdf_flush_supported = B_FALSE;
1699 1699 /*
1700 1700 * If the other end does not support the cache flush op
1701 1701 * then we must use a barrier-write to force disk
1702 1702 * cache flushing. Barrier writes require that a data
1703 1703 * block actually be written.
1704 1704 * Cache a block to barrier-write when we are
1705 1705 * asked to perform a flush.
1706 1706 * XXX - would it be better to just copy 1 block
1707 1707 * (512 bytes) from whatever write we did last
1708 1708 * and rewrite that block?
1709 1709 */
1710 1710 if (xdf_get_flush_block(vdp) != DDI_SUCCESS) {
1711 1711 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1712 1712 return;
1713 1713 }
1714 1714 }
1715 1715 }
1716 1716
1717 1717 mutex_enter(&vdp->xdf_cb_lk);
1718 1718 mutex_enter(&vdp->xdf_dev_lk);
1719 1719 if (vdp->xdf_state == XD_CONNECTED)
1720 1720 xdf_set_state(vdp, XD_READY);
1721 1721 mutex_exit(&vdp->xdf_dev_lk);
1722 1722
1723 1723 /* Restart any currently queued up io */
1724 1724 xdf_io_start(vdp);
1725 1725
1726 1726 mutex_exit(&vdp->xdf_cb_lk);
1727 1727 }
1728 1728
1729 1729 /*
1730 1730 * synthetic geometry
1731 1731 */
1732 1732 #define XDF_NSECTS 256
1733 1733 #define XDF_NHEADS 16
1734 1734
1735 1735 static void
1736 1736 xdf_synthetic_pgeom(dev_info_t *dip, cmlb_geom_t *geomp)
1737 1737 {
1738 1738 xdf_t *vdp;
1739 1739 uint_t ncyl;
1740 1740
1741 1741 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
1742 1742
1743 1743 ncyl = vdp->xdf_xdev_nblocks / (XDF_NHEADS * XDF_NSECTS);
1744 1744
1745 1745 bzero(geomp, sizeof (*geomp));
1746 1746 geomp->g_ncyl = ncyl == 0 ? 1 : ncyl;
1747 1747 geomp->g_acyl = 0;
1748 1748 geomp->g_nhead = XDF_NHEADS;
1749 1749 geomp->g_nsect = XDF_NSECTS;
1750 1750 geomp->g_secsize = vdp->xdf_xdev_secsize;
1751 1751 geomp->g_capacity = vdp->xdf_xdev_nblocks;
1752 1752 geomp->g_intrlv = 0;
1753 1753 geomp->g_rpm = 7200;
1754 1754 }
1755 1755
1756 1756 /*
1757 1757 * Finish other initialization after we've connected to backend
1758 1758 * Status should be XD_INIT before calling this routine
1759 1759 * On success, status should be changed to XD_CONNECTED.
1760 1760 * On error, status should stay XD_INIT
1761 1761 */
1762 1762 static int
1763 1763 xdf_setstate_connected(xdf_t *vdp)
1764 1764 {
1765 1765 dev_info_t *dip = vdp->xdf_dip;
1766 1766 cmlb_geom_t pgeom;
1767 1767 diskaddr_t nblocks = 0;
1768 1768 uint_t secsize = 0;
1769 1769 char *oename, *xsname, *str;
1770 1770 uint_t dinfo;
1771 1771
1772 1772 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1773 1773 ASSERT(MUTEX_NOT_HELD(&vdp->xdf_dev_lk));
1774 1774 ASSERT(vdp->xdf_state == XD_INIT);
1775 1775
1776 1776 if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1777 1777 ((oename = xvdi_get_oename(dip)) == NULL))
1778 1778 return (DDI_FAILURE);
1779 1779
1780 1780 /* Make sure the other end is XenbusStateConnected */
1781 1781 if (xenbus_read_driver_state(oename) != XenbusStateConnected)
1782 1782 return (DDI_FAILURE);
1783 1783
1784 1784 /* Determine if feature barrier is supported by backend */
1785 1785 if (!(vdp->xdf_feature_barrier = xenbus_exists(oename, XBP_FB)))
1786 1786 cmn_err(CE_NOTE, "!xdf@%s: feature-barrier not supported",
1787 1787 vdp->xdf_addr);
1788 1788
1789 1789 /*
1790 1790 * Probe backend. Read the device size into xdf_xdev_nblocks
1791 1791 * and set the VDISK_READONLY, VDISK_CDROM, and VDISK_REMOVABLE
1792 1792 * flags in xdf_dinfo. If the emulated device type is "cdrom",
1793 1793 * we always set VDISK_CDROM, regardless of if it's present in
1794 1794 * the xenbus info parameter.
1795 1795 */
1796 1796 if (xenbus_gather(XBT_NULL, oename,
1797 1797 XBP_SECTORS, "%"SCNu64, &nblocks,
1798 1798 XBP_SECTOR_SIZE, "%u", &secsize,
1799 1799 XBP_INFO, "%u", &dinfo,
1800 1800 NULL) != 0) {
1801 1801 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
1802 1802 "cannot read backend info", vdp->xdf_addr);
1803 1803 return (DDI_FAILURE);
1804 1804 }
1805 1805 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) {
1806 1806 cmn_err(CE_WARN, "xdf@%s: cannot read device-type",
1807 1807 vdp->xdf_addr);
1808 1808 return (DDI_FAILURE);
1809 1809 }
1810 1810 if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
1811 1811 dinfo |= VDISK_CDROM;
1812 1812 strfree(str);
1813 1813
1814 1814 if (secsize == 0 || !(ISP2(secsize / DEV_BSIZE)))
1815 1815 secsize = DEV_BSIZE;
1816 1816 vdp->xdf_xdev_nblocks = nblocks;
1817 1817 vdp->xdf_xdev_secsize = secsize;
1818 1818 #ifdef _ILP32
1819 1819 if (vdp->xdf_xdev_nblocks > DK_MAX_BLOCKS) {
1820 1820 cmn_err(CE_WARN, "xdf@%s: xdf_setstate_connected: "
1821 1821 "backend disk device too large with %llu blocks for"
1822 1822 " 32-bit kernel", vdp->xdf_addr, vdp->xdf_xdev_nblocks);
1823 1823 xvdi_fatal_error(dip, EFBIG, "reading backend info");
1824 1824 return (DDI_FAILURE);
1825 1825 }
1826 1826 #endif
1827 1827
1828 1828 /*
1829 1829 * If the physical geometry for a fixed disk has been explicity
1830 1830 * set then make sure that the specified physical geometry isn't
1831 1831 * larger than the device we connected to.
1832 1832 */
1833 1833 if (vdp->xdf_pgeom_fixed &&
1834 1834 (vdp->xdf_pgeom.g_capacity > vdp->xdf_xdev_nblocks)) {
1835 1835 cmn_err(CE_WARN,
1836 1836 "xdf@%s: connect failed, fixed geometry too large",
1837 1837 vdp->xdf_addr);
1838 1838 return (DDI_FAILURE);
1839 1839 }
1840 1840
1841 1841 vdp->xdf_media_req_supported = xenbus_exists(oename, XBP_MEDIA_REQ_SUP);
1842 1842
1843 1843 /* mark vbd is ready for I/O */
1844 1844 mutex_enter(&vdp->xdf_dev_lk);
1845 1845 xdf_set_state(vdp, XD_CONNECTED);
1846 1846
1847 1847 /* check if the cmlb label should be updated */
1848 1848 xdf_synthetic_pgeom(dip, &pgeom);
1849 1849 if ((vdp->xdf_dinfo != dinfo) ||
1850 1850 (!vdp->xdf_pgeom_fixed &&
1851 1851 (memcmp(&vdp->xdf_pgeom, &pgeom, sizeof (pgeom)) != 0))) {
1852 1852 vdp->xdf_cmbl_reattach = B_TRUE;
1853 1853
1854 1854 vdp->xdf_dinfo = dinfo;
1855 1855 if (!vdp->xdf_pgeom_fixed)
1856 1856 vdp->xdf_pgeom = pgeom;
1857 1857 }
1858 1858
1859 1859 if (XD_IS_CD(vdp) || XD_IS_RM(vdp)) {
1860 1860 if (vdp->xdf_xdev_nblocks == 0) {
1861 1861 vdp->xdf_mstate = DKIO_EJECTED;
1862 1862 cv_broadcast(&vdp->xdf_mstate_cv);
1863 1863 } else {
1864 1864 vdp->xdf_mstate = DKIO_INSERTED;
1865 1865 cv_broadcast(&vdp->xdf_mstate_cv);
1866 1866 }
1867 1867 } else {
1868 1868 if (vdp->xdf_mstate != DKIO_NONE) {
1869 1869 vdp->xdf_mstate = DKIO_NONE;
1870 1870 cv_broadcast(&vdp->xdf_mstate_cv);
1871 1871 }
1872 1872 }
1873 1873
1874 1874 mutex_exit(&vdp->xdf_dev_lk);
1875 1875
1876 1876 cmn_err(CE_CONT, "?xdf@%s: %"PRIu64" blocks", vdp->xdf_addr,
1877 1877 (uint64_t)vdp->xdf_xdev_nblocks);
1878 1878
1879 1879 /* Restart any currently queued up io */
1880 1880 xdf_io_start(vdp);
1881 1881
1882 1882 /*
1883 1883 * To get to the ready state we have to do IO to the backend device,
1884 1884 * but we can't initiate IO from the other end change callback thread
1885 1885 * (which is the current context we're executing in.) This is because
1886 1886 * if the other end disconnects while we're doing IO from the callback
1887 1887 * thread, then we can't recieve that disconnect event and we hang
1888 1888 * waiting for an IO that can never complete.
1889 1889 */
1890 1890 (void) ddi_taskq_dispatch(vdp->xdf_ready_tq, xdf_setstate_ready, vdp,
1891 1891 DDI_SLEEP);
1892 1892
1893 1893 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected);
1894 1894 return (DDI_SUCCESS);
1895 1895 }
1896 1896
1897 1897 /*ARGSUSED*/
1898 1898 static void
1899 1899 xdf_oe_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg, void *impl_data)
1900 1900 {
1901 1901 XenbusState new_state = *(XenbusState *)impl_data;
1902 1902 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
1903 1903
1904 1904 DPRINTF(DDI_DBG, ("xdf@%s: otherend state change to %d!\n",
1905 1905 vdp->xdf_addr, new_state));
1906 1906
1907 1907 mutex_enter(&vdp->xdf_cb_lk);
1908 1908
1909 1909 /* We assume that this callback is single threaded */
1910 1910 ASSERT(vdp->xdf_oe_change_thread == NULL);
1911 1911 DEBUG_EVAL(vdp->xdf_oe_change_thread = curthread);
1912 1912
1913 1913 /* ignore any backend state changes if we're suspending/suspended */
1914 1914 if (vdp->xdf_suspending || (vdp->xdf_state == XD_SUSPEND)) {
1915 1915 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL);
1916 1916 mutex_exit(&vdp->xdf_cb_lk);
1917 1917 return;
1918 1918 }
1919 1919
1920 1920 switch (new_state) {
1921 1921 case XenbusStateUnknown:
1922 1922 case XenbusStateInitialising:
1923 1923 case XenbusStateInitWait:
1924 1924 case XenbusStateInitialised:
1925 1925 if (vdp->xdf_state == XD_INIT)
1926 1926 break;
1927 1927
1928 1928 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1929 1929 if (xdf_setstate_init(vdp) != DDI_SUCCESS)
1930 1930 break;
1931 1931 ASSERT(vdp->xdf_state == XD_INIT);
1932 1932 break;
1933 1933
1934 1934 case XenbusStateConnected:
1935 1935 if ((vdp->xdf_state == XD_CONNECTED) ||
1936 1936 (vdp->xdf_state == XD_READY))
1937 1937 break;
1938 1938
1939 1939 if (vdp->xdf_state != XD_INIT) {
1940 1940 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1941 1941 if (xdf_setstate_init(vdp) != DDI_SUCCESS)
1942 1942 break;
1943 1943 ASSERT(vdp->xdf_state == XD_INIT);
1944 1944 }
1945 1945
1946 1946 if (xdf_setstate_connected(vdp) != DDI_SUCCESS) {
1947 1947 xdf_disconnect(vdp, XD_UNKNOWN, B_FALSE);
1948 1948 break;
1949 1949 }
1950 1950 ASSERT(vdp->xdf_state == XD_CONNECTED);
1951 1951 break;
1952 1952
1953 1953 case XenbusStateClosing:
1954 1954 if (xdf_isopen(vdp, -1)) {
1955 1955 cmn_err(CE_NOTE,
1956 1956 "xdf@%s: hot-unplug failed, still in use",
1957 1957 vdp->xdf_addr);
1958 1958 break;
1959 1959 }
1960 1960 /*FALLTHROUGH*/
1961 1961 case XenbusStateClosed:
1962 1962 xdf_disconnect(vdp, XD_CLOSED, B_FALSE);
1963 1963 break;
1964 1964 }
1965 1965
1966 1966 /* notify anybody waiting for oe state change */
1967 1967 cv_broadcast(&vdp->xdf_dev_cv);
1968 1968 DEBUG_EVAL(vdp->xdf_oe_change_thread = NULL);
1969 1969 mutex_exit(&vdp->xdf_cb_lk);
1970 1970 }
1971 1971
1972 1972 static int
1973 1973 xdf_connect_locked(xdf_t *vdp, boolean_t wait)
1974 1974 {
1975 1975 int rv, timeouts = 0, reset = 20;
1976 1976
1977 1977 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
1978 1978 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
1979 1979
1980 1980 /* we can't connect once we're in the closed state */
1981 1981 if (vdp->xdf_state == XD_CLOSED)
1982 1982 return (XD_CLOSED);
1983 1983
1984 1984 vdp->xdf_connect_req++;
1985 1985 while (vdp->xdf_state != XD_READY) {
1986 1986 mutex_exit(&vdp->xdf_dev_lk);
1987 1987
1988 1988 /* only one thread at a time can be the connection thread */
1989 1989 if (vdp->xdf_connect_thread == NULL)
1990 1990 vdp->xdf_connect_thread = curthread;
1991 1991
1992 1992 if (vdp->xdf_connect_thread == curthread) {
1993 1993 if ((timeouts > 0) && ((timeouts % reset) == 0)) {
1994 1994 /*
1995 1995 * If we haven't establised a connection
1996 1996 * within the reset time, then disconnect
1997 1997 * so we can try again, and double the reset
1998 1998 * time. The reset time starts at 2 sec.
1999 1999 */
2000 2000 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE);
2001 2001 reset *= 2;
2002 2002 }
2003 2003 if (vdp->xdf_state == XD_UNKNOWN)
2004 2004 (void) xdf_setstate_init(vdp);
2005 2005 if (vdp->xdf_state == XD_INIT)
2006 2006 (void) xdf_setstate_connected(vdp);
2007 2007 }
2008 2008
2009 2009 mutex_enter(&vdp->xdf_dev_lk);
2010 2010 if (!wait || (vdp->xdf_state == XD_READY))
2011 2011 goto out;
2012 2012
2013 2013 mutex_exit((&vdp->xdf_cb_lk));
2014 2014 if (vdp->xdf_connect_thread != curthread) {
2015 2015 rv = cv_wait_sig(&vdp->xdf_dev_cv, &vdp->xdf_dev_lk);
2016 2016 } else {
2017 2017 /* delay for 0.1 sec */
2018 2018 rv = cv_reltimedwait_sig(&vdp->xdf_dev_cv,
2019 2019 &vdp->xdf_dev_lk, drv_usectohz(100*1000),
2020 2020 TR_CLOCK_TICK);
2021 2021 if (rv == -1)
2022 2022 timeouts++;
2023 2023 }
2024 2024 mutex_exit((&vdp->xdf_dev_lk));
2025 2025 mutex_enter((&vdp->xdf_cb_lk));
2026 2026 mutex_enter((&vdp->xdf_dev_lk));
2027 2027 if (rv == 0)
2028 2028 goto out;
2029 2029 }
2030 2030
2031 2031 out:
2032 2032 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
2033 2033 ASSERT(MUTEX_HELD(&vdp->xdf_dev_lk));
2034 2034
2035 2035 if (vdp->xdf_connect_thread == curthread) {
2036 2036 /*
2037 2037 * wake up someone else so they can become the connection
2038 2038 * thread.
2039 2039 */
2040 2040 cv_signal(&vdp->xdf_dev_cv);
2041 2041 vdp->xdf_connect_thread = NULL;
2042 2042 }
2043 2043
2044 2044 /* Try to lock the media */
2045 2045 mutex_exit((&vdp->xdf_dev_lk));
2046 2046 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
2047 2047 mutex_enter((&vdp->xdf_dev_lk));
2048 2048
2049 2049 vdp->xdf_connect_req--;
2050 2050 return (vdp->xdf_state);
2051 2051 }
2052 2052
2053 2053 static uint_t
2054 2054 xdf_iorestart(caddr_t arg)
2055 2055 {
2056 2056 xdf_t *vdp = (xdf_t *)arg;
2057 2057
2058 2058 ASSERT(vdp != NULL);
2059 2059
2060 2060 mutex_enter(&vdp->xdf_dev_lk);
2061 2061 ASSERT(ISDMACBON(vdp));
2062 2062 SETDMACBOFF(vdp);
2063 2063 mutex_exit(&vdp->xdf_dev_lk);
2064 2064
2065 2065 xdf_io_start(vdp);
2066 2066
2067 2067 return (DDI_INTR_CLAIMED);
2068 2068 }
2069 2069
2070 2070 #if defined(XPV_HVM_DRIVER)
2071 2071
2072 2072 typedef struct xdf_hvm_entry {
2073 2073 list_node_t xdf_he_list;
2074 2074 char *xdf_he_path;
2075 2075 dev_info_t *xdf_he_dip;
2076 2076 } xdf_hvm_entry_t;
2077 2077
2078 2078 static list_t xdf_hvm_list;
2079 2079 static kmutex_t xdf_hvm_list_lock;
2080 2080
2081 2081 static xdf_hvm_entry_t *
2082 2082 i_xdf_hvm_find(const char *path, dev_info_t *dip)
2083 2083 {
2084 2084 xdf_hvm_entry_t *i;
2085 2085
2086 2086 ASSERT((path != NULL) || (dip != NULL));
2087 2087 ASSERT(MUTEX_HELD(&xdf_hvm_list_lock));
2088 2088
2089 2089 i = list_head(&xdf_hvm_list);
2090 2090 while (i != NULL) {
2091 2091 if ((path != NULL) && strcmp(i->xdf_he_path, path) != 0) {
2092 2092 i = list_next(&xdf_hvm_list, i);
2093 2093 continue;
2094 2094 }
2095 2095 if ((dip != NULL) && (i->xdf_he_dip != dip)) {
2096 2096 i = list_next(&xdf_hvm_list, i);
2097 2097 continue;
2098 2098 }
2099 2099 break;
2100 2100 }
2101 2101 return (i);
2102 2102 }
2103 2103
2104 2104 dev_info_t *
2105 2105 xdf_hvm_hold(const char *path)
2106 2106 {
2107 2107 xdf_hvm_entry_t *i;
2108 2108 dev_info_t *dip;
2109 2109
2110 2110 mutex_enter(&xdf_hvm_list_lock);
2111 2111 i = i_xdf_hvm_find(path, NULL);
2112 2112 if (i == NULL) {
2113 2113 mutex_exit(&xdf_hvm_list_lock);
2114 2114 return (B_FALSE);
2115 2115 }
2116 2116 ndi_hold_devi(dip = i->xdf_he_dip);
2117 2117 mutex_exit(&xdf_hvm_list_lock);
2118 2118 return (dip);
2119 2119 }
2120 2120
2121 2121 static void
2122 2122 xdf_hvm_add(dev_info_t *dip)
2123 2123 {
2124 2124 xdf_hvm_entry_t *i;
2125 2125 char *path;
2126 2126
2127 2127 /* figure out the path for the dip */
2128 2128 path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
2129 2129 (void) ddi_pathname(dip, path);
2130 2130
2131 2131 i = kmem_alloc(sizeof (*i), KM_SLEEP);
2132 2132 i->xdf_he_dip = dip;
2133 2133 i->xdf_he_path = i_ddi_strdup(path, KM_SLEEP);
2134 2134
2135 2135 mutex_enter(&xdf_hvm_list_lock);
2136 2136 ASSERT(i_xdf_hvm_find(path, NULL) == NULL);
2137 2137 ASSERT(i_xdf_hvm_find(NULL, dip) == NULL);
2138 2138 list_insert_head(&xdf_hvm_list, i);
2139 2139 mutex_exit(&xdf_hvm_list_lock);
2140 2140
2141 2141 kmem_free(path, MAXPATHLEN);
2142 2142 }
2143 2143
2144 2144 static void
2145 2145 xdf_hvm_rm(dev_info_t *dip)
2146 2146 {
2147 2147 xdf_hvm_entry_t *i;
2148 2148
2149 2149 mutex_enter(&xdf_hvm_list_lock);
2150 2150 VERIFY((i = i_xdf_hvm_find(NULL, dip)) != NULL);
2151 2151 list_remove(&xdf_hvm_list, i);
2152 2152 mutex_exit(&xdf_hvm_list_lock);
2153 2153
2154 2154 kmem_free(i->xdf_he_path, strlen(i->xdf_he_path) + 1);
2155 2155 kmem_free(i, sizeof (*i));
2156 2156 }
2157 2157
2158 2158 static void
2159 2159 xdf_hvm_init(void)
2160 2160 {
2161 2161 list_create(&xdf_hvm_list, sizeof (xdf_hvm_entry_t),
2162 2162 offsetof(xdf_hvm_entry_t, xdf_he_list));
2163 2163 mutex_init(&xdf_hvm_list_lock, NULL, MUTEX_DEFAULT, NULL);
2164 2164 }
2165 2165
2166 2166 static void
2167 2167 xdf_hvm_fini(void)
2168 2168 {
2169 2169 ASSERT(list_head(&xdf_hvm_list) == NULL);
2170 2170 list_destroy(&xdf_hvm_list);
2171 2171 mutex_destroy(&xdf_hvm_list_lock);
2172 2172 }
2173 2173
2174 2174 boolean_t
2175 2175 xdf_hvm_connect(dev_info_t *dip)
2176 2176 {
2177 2177 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2178 2178 char *oename, *str;
2179 2179 int rv;
2180 2180
2181 2181 mutex_enter(&vdp->xdf_cb_lk);
2182 2182
2183 2183 /*
2184 2184 * Before try to establish a connection we need to wait for the
2185 2185 * backend hotplug scripts to have run. Once they are run the
2186 2186 * "<oename>/hotplug-status" property will be set to "connected".
2187 2187 */
2188 2188 for (;;) {
2189 2189 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
2190 2190
2191 2191 /*
2192 2192 * Get the xenbus path to the backend device. Note that
2193 2193 * we can't cache this path (and we look it up on each pass
2194 2194 * through this loop) because it could change during
2195 2195 * suspend, resume, and migration operations.
2196 2196 */
2197 2197 if ((oename = xvdi_get_oename(dip)) == NULL) {
2198 2198 mutex_exit(&vdp->xdf_cb_lk);
2199 2199 return (B_FALSE);
2200 2200 }
2201 2201
2202 2202 str = NULL;
2203 2203 if ((xenbus_read_str(oename, XBP_HP_STATUS, &str) == 0) &&
2204 2204 (strcmp(str, XBV_HP_STATUS_CONN) == 0))
2205 2205 break;
2206 2206
2207 2207 if (str != NULL)
2208 2208 strfree(str);
2209 2209
2210 2210 /* wait for an update to "<oename>/hotplug-status" */
2211 2211 if (cv_wait_sig(&vdp->xdf_hp_status_cv, &vdp->xdf_cb_lk) == 0) {
2212 2212 /* we got interrupted by a signal */
2213 2213 mutex_exit(&vdp->xdf_cb_lk);
2214 2214 return (B_FALSE);
2215 2215 }
2216 2216 }
2217 2217
2218 2218 /* Good news. The backend hotplug scripts have been run. */
2219 2219 ASSERT(MUTEX_HELD(&vdp->xdf_cb_lk));
2220 2220 ASSERT(strcmp(str, XBV_HP_STATUS_CONN) == 0);
2221 2221 strfree(str);
2222 2222
2223 2223 /*
2224 2224 * If we're emulating a cd device and if the backend doesn't support
2225 2225 * media request opreations, then we're not going to bother trying
2226 2226 * to establish a connection for a couple reasons. First off, media
2227 2227 * requests support is required to support operations like eject and
2228 2228 * media locking. Second, other backend platforms like Linux don't
2229 2229 * support hvm pv cdrom access. They don't even have a backend pv
2230 2230 * driver for cdrom device nodes, so we don't want to block forever
2231 2231 * waiting for a connection to a backend driver that doesn't exist.
2232 2232 */
2233 2233 if (XD_IS_CD(vdp) && !xenbus_exists(oename, XBP_MEDIA_REQ_SUP)) {
2234 2234 mutex_exit(&vdp->xdf_cb_lk);
2235 2235 return (B_FALSE);
2236 2236 }
2237 2237
2238 2238 mutex_enter(&vdp->xdf_dev_lk);
2239 2239 rv = xdf_connect_locked(vdp, B_TRUE);
2240 2240 mutex_exit(&vdp->xdf_dev_lk);
2241 2241 mutex_exit(&vdp->xdf_cb_lk);
2242 2242
2243 2243 return ((rv == XD_READY) ? B_TRUE : B_FALSE);
2244 2244 }
2245 2245
2246 2246 int
2247 2247 xdf_hvm_setpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
2248 2248 {
2249 2249 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2250 2250
2251 2251 /* sanity check the requested physical geometry */
2252 2252 mutex_enter(&vdp->xdf_dev_lk);
2253 2253 if ((geomp->g_secsize != XB_BSIZE) ||
2254 2254 (geomp->g_capacity == 0)) {
2255 2255 mutex_exit(&vdp->xdf_dev_lk);
2256 2256 return (EINVAL);
2257 2257 }
2258 2258
2259 2259 /*
2260 2260 * If we've already connected to the backend device then make sure
2261 2261 * we're not defining a physical geometry larger than our backend
2262 2262 * device.
2263 2263 */
2264 2264 if ((vdp->xdf_xdev_nblocks != 0) &&
2265 2265 (geomp->g_capacity > vdp->xdf_xdev_nblocks)) {
2266 2266 mutex_exit(&vdp->xdf_dev_lk);
2267 2267 return (EINVAL);
2268 2268 }
2269 2269
2270 2270 bzero(&vdp->xdf_pgeom, sizeof (vdp->xdf_pgeom));
2271 2271 vdp->xdf_pgeom.g_ncyl = geomp->g_ncyl;
2272 2272 vdp->xdf_pgeom.g_acyl = geomp->g_acyl;
2273 2273 vdp->xdf_pgeom.g_nhead = geomp->g_nhead;
2274 2274 vdp->xdf_pgeom.g_nsect = geomp->g_nsect;
2275 2275 vdp->xdf_pgeom.g_secsize = geomp->g_secsize;
2276 2276 vdp->xdf_pgeom.g_capacity = geomp->g_capacity;
2277 2277 vdp->xdf_pgeom.g_intrlv = geomp->g_intrlv;
2278 2278 vdp->xdf_pgeom.g_rpm = geomp->g_rpm;
2279 2279
2280 2280 vdp->xdf_pgeom_fixed = B_TRUE;
2281 2281 mutex_exit(&vdp->xdf_dev_lk);
2282 2282
2283 2283 /* force a re-validation */
2284 2284 cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
2285 2285
2286 2286 return (0);
2287 2287 }
2288 2288
2289 2289 boolean_t
2290 2290 xdf_is_cd(dev_info_t *dip)
2291 2291 {
2292 2292 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2293 2293 boolean_t rv;
2294 2294
2295 2295 mutex_enter(&vdp->xdf_cb_lk);
2296 2296 rv = XD_IS_CD(vdp);
2297 2297 mutex_exit(&vdp->xdf_cb_lk);
2298 2298 return (rv);
2299 2299 }
2300 2300
2301 2301 boolean_t
2302 2302 xdf_is_rm(dev_info_t *dip)
2303 2303 {
2304 2304 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2305 2305 boolean_t rv;
2306 2306
2307 2307 mutex_enter(&vdp->xdf_cb_lk);
2308 2308 rv = XD_IS_RM(vdp);
2309 2309 mutex_exit(&vdp->xdf_cb_lk);
2310 2310 return (rv);
2311 2311 }
2312 2312
2313 2313 boolean_t
2314 2314 xdf_media_req_supported(dev_info_t *dip)
2315 2315 {
2316 2316 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
2317 2317 boolean_t rv;
2318 2318
2319 2319 mutex_enter(&vdp->xdf_cb_lk);
2320 2320 rv = vdp->xdf_media_req_supported;
2321 2321 mutex_exit(&vdp->xdf_cb_lk);
2322 2322 return (rv);
2323 2323 }
2324 2324
2325 2325 #endif /* XPV_HVM_DRIVER */
2326 2326
2327 2327 static int
2328 2328 xdf_lb_getcap(dev_info_t *dip, diskaddr_t *capp)
2329 2329 {
2330 2330 xdf_t *vdp;
2331 2331 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
2332 2332
2333 2333 if (vdp == NULL)
2334 2334 return (ENXIO);
2335 2335
2336 2336 mutex_enter(&vdp->xdf_dev_lk);
2337 2337 *capp = vdp->xdf_pgeom.g_capacity;
2338 2338 DPRINTF(LBL_DBG, ("xdf@%s:capacity %llu\n", vdp->xdf_addr, *capp));
2339 2339 mutex_exit(&vdp->xdf_dev_lk);
2340 2340 return (0);
2341 2341 }
2342 2342
2343 2343 static int
2344 2344 xdf_lb_getpgeom(dev_info_t *dip, cmlb_geom_t *geomp)
2345 2345 {
2346 2346 xdf_t *vdp;
2347 2347
2348 2348 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL)
2349 2349 return (ENXIO);
2350 2350 *geomp = vdp->xdf_pgeom;
2351 2351 return (0);
2352 2352 }
2353 2353
2354 2354 /*
2355 2355 * No real HBA, no geometry available from it
2356 2356 */
2357 2357 /*ARGSUSED*/
2358 2358 static int
2359 2359 xdf_lb_getvgeom(dev_info_t *dip, cmlb_geom_t *geomp)
2360 2360 {
2361 2361 return (EINVAL);
2362 2362 }
2363 2363
2364 2364 static int
2365 2365 xdf_lb_getattribute(dev_info_t *dip, tg_attribute_t *tgattributep)
2366 2366 {
2367 2367 xdf_t *vdp;
2368 2368
2369 2369 if (!(vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))))
2370 2370 return (ENXIO);
2371 2371
2372 2372 if (XD_IS_RO(vdp))
2373 2373 tgattributep->media_is_writable = 0;
2374 2374 else
2375 2375 tgattributep->media_is_writable = 1;
2376 2376 return (0);
2377 2377 }
2378 2378
2379 2379 /* ARGSUSED3 */
2380 2380 int
2381 2381 xdf_lb_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
2382 2382 {
2383 2383 int instance;
2384 2384 xdf_t *vdp;
2385 2385
2386 2386 instance = ddi_get_instance(dip);
2387 2387
2388 2388 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL)
2389 2389 return (ENXIO);
2390 2390
2391 2391 switch (cmd) {
2392 2392 case TG_GETPHYGEOM:
2393 2393 return (xdf_lb_getpgeom(dip, (cmlb_geom_t *)arg));
2394 2394 case TG_GETVIRTGEOM:
2395 2395 return (xdf_lb_getvgeom(dip, (cmlb_geom_t *)arg));
2396 2396 case TG_GETCAPACITY:
2397 2397 return (xdf_lb_getcap(dip, (diskaddr_t *)arg));
2398 2398 case TG_GETBLOCKSIZE:
2399 2399 mutex_enter(&vdp->xdf_cb_lk);
2400 2400 *(uint32_t *)arg = vdp->xdf_xdev_secsize;
2401 2401 mutex_exit(&vdp->xdf_cb_lk);
2402 2402 return (0);
2403 2403 case TG_GETATTR:
2404 2404 return (xdf_lb_getattribute(dip, (tg_attribute_t *)arg));
2405 2405 default:
2406 2406 return (ENOTTY);
2407 2407 }
2408 2408 }
2409 2409
2410 2410 /* ARGSUSED5 */
2411 2411 int
2412 2412 xdf_lb_rdwr(dev_info_t *dip, uchar_t cmd, void *bufp,
2413 2413 diskaddr_t start, size_t reqlen, void *tg_cookie)
2414 2414 {
2415 2415 xdf_t *vdp;
2416 2416 struct buf *bp;
2417 2417 int err = 0;
2418 2418
2419 2419 vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
2420 2420
2421 2421 /* We don't allow IO from the oe_change callback thread */
2422 2422 ASSERT(curthread != vdp->xdf_oe_change_thread);
2423 2423
2424 2424 if ((start + ((reqlen / (vdp->xdf_xdev_secsize / DEV_BSIZE))
2425 2425 >> DEV_BSHIFT)) > vdp->xdf_pgeom.g_capacity)
2426 2426 return (EINVAL);
2427 2427
2428 2428 bp = getrbuf(KM_SLEEP);
2429 2429 if (cmd == TG_READ)
2430 2430 bp->b_flags = B_BUSY | B_READ;
2431 2431 else
2432 2432 bp->b_flags = B_BUSY | B_WRITE;
2433 2433
2434 2434 bp->b_un.b_addr = bufp;
2435 2435 bp->b_bcount = reqlen;
2436 2436 bp->b_blkno = start * (vdp->xdf_xdev_secsize / DEV_BSIZE);
2437 2437 bp->b_edev = DDI_DEV_T_NONE; /* don't have dev_t */
2438 2438
2439 2439 mutex_enter(&vdp->xdf_dev_lk);
2440 2440 xdf_bp_push(vdp, bp);
2441 2441 mutex_exit(&vdp->xdf_dev_lk);
2442 2442 xdf_io_start(vdp);
2443 2443 if (curthread == vdp->xdf_ready_tq_thread)
2444 2444 (void) xdf_ring_drain(vdp);
2445 2445 err = biowait(bp);
2446 2446 ASSERT(bp->b_flags & B_DONE);
2447 2447 freerbuf(bp);
2448 2448 return (err);
2449 2449 }
2450 2450
2451 2451 /*
2452 2452 * Lock the current media. Set the media state to "lock".
2453 2453 * (Media locks are only respected by the backend driver.)
2454 2454 */
2455 2455 static int
2456 2456 xdf_ioctl_mlock(xdf_t *vdp)
2457 2457 {
2458 2458 int rv;
2459 2459 mutex_enter(&vdp->xdf_cb_lk);
2460 2460 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
2461 2461 mutex_exit(&vdp->xdf_cb_lk);
2462 2462 return (rv);
2463 2463 }
2464 2464
2465 2465 /*
2466 2466 * Release a media lock. Set the media state to "none".
2467 2467 */
2468 2468 static int
2469 2469 xdf_ioctl_munlock(xdf_t *vdp)
2470 2470 {
2471 2471 int rv;
2472 2472 mutex_enter(&vdp->xdf_cb_lk);
2473 2473 rv = xdf_media_req(vdp, XBV_MEDIA_REQ_NONE, B_TRUE);
2474 2474 mutex_exit(&vdp->xdf_cb_lk);
2475 2475 return (rv);
2476 2476 }
2477 2477
2478 2478 /*
2479 2479 * Eject the current media. Ignores any media locks. (Media locks
2480 2480 * are only for benifit of the the backend.)
2481 2481 */
2482 2482 static int
2483 2483 xdf_ioctl_eject(xdf_t *vdp)
2484 2484 {
2485 2485 int rv;
2486 2486
2487 2487 mutex_enter(&vdp->xdf_cb_lk);
2488 2488 if ((rv = xdf_media_req(vdp, XBV_MEDIA_REQ_EJECT, B_FALSE)) != 0) {
2489 2489 mutex_exit(&vdp->xdf_cb_lk);
2490 2490 return (rv);
2491 2491 }
2492 2492
2493 2493 /*
2494 2494 * We've set the media requests xenbus parameter to eject, so now
2495 2495 * disconnect from the backend, wait for the backend to clear
2496 2496 * the media requets xenbus paramter, and then we can reconnect
2497 2497 * to the backend.
2498 2498 */
2499 2499 (void) xdf_disconnect(vdp, XD_UNKNOWN, B_TRUE);
2500 2500 mutex_enter(&vdp->xdf_dev_lk);
2501 2501 if (xdf_connect_locked(vdp, B_TRUE) != XD_READY) {
2502 2502 mutex_exit(&vdp->xdf_dev_lk);
2503 2503 mutex_exit(&vdp->xdf_cb_lk);
2504 2504 return (EIO);
2505 2505 }
2506 2506 mutex_exit(&vdp->xdf_dev_lk);
2507 2507 mutex_exit(&vdp->xdf_cb_lk);
2508 2508 return (0);
2509 2509 }
2510 2510
2511 2511 /*
2512 2512 * Watch for media state changes. This can be an insertion of a device
2513 2513 * (triggered by a 'xm block-configure' request in another domain) or
2514 2514 * the ejection of a device (triggered by a local "eject" operation).
2515 2515 * For a full description of the DKIOCSTATE ioctl behavior see dkio(7I).
2516 2516 */
2517 2517 static int
2518 2518 xdf_dkstate(xdf_t *vdp, enum dkio_state mstate)
2519 2519 {
2520 2520 enum dkio_state prev_state;
2521 2521
2522 2522 mutex_enter(&vdp->xdf_cb_lk);
2523 2523 prev_state = vdp->xdf_mstate;
2524 2524
2525 2525 if (vdp->xdf_mstate == mstate) {
2526 2526 while (vdp->xdf_mstate == prev_state) {
2527 2527 if (cv_wait_sig(&vdp->xdf_mstate_cv,
2528 2528 &vdp->xdf_cb_lk) == 0) {
2529 2529 mutex_exit(&vdp->xdf_cb_lk);
2530 2530 return (EINTR);
2531 2531 }
2532 2532 }
2533 2533 }
2534 2534
2535 2535 if ((prev_state != DKIO_INSERTED) &&
2536 2536 (vdp->xdf_mstate == DKIO_INSERTED)) {
2537 2537 (void) xdf_media_req(vdp, XBV_MEDIA_REQ_LOCK, B_TRUE);
2538 2538 mutex_exit(&vdp->xdf_cb_lk);
2539 2539 return (0);
2540 2540 }
2541 2541
2542 2542 mutex_exit(&vdp->xdf_cb_lk);
2543 2543 return (0);
2544 2544 }
2545 2545
2546 2546 /*ARGSUSED*/
2547 2547 static int
2548 2548 xdf_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
2549 2549 int *rvalp)
2550 2550 {
2551 2551 minor_t minor = getminor(dev);
2552 2552 int part = XDF_PART(minor);
2553 2553 xdf_t *vdp;
2554 2554 int rv;
2555 2555
2556 2556 if (((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL) ||
2557 2557 (!xdf_isopen(vdp, part)))
2558 2558 return (ENXIO);
2559 2559
2560 2560 DPRINTF(IOCTL_DBG, ("xdf@%s:ioctl: cmd %d (0x%x)\n",
2561 2561 vdp->xdf_addr, cmd, cmd));
2562 2562
2563 2563 switch (cmd) {
2564 2564 default:
2565 2565 return (ENOTTY);
2566 2566 case DKIOCG_PHYGEOM:
2567 2567 case DKIOCG_VIRTGEOM:
2568 2568 case DKIOCGGEOM:
2569 2569 case DKIOCSGEOM:
2570 2570 case DKIOCGAPART:
2571 2571 case DKIOCSAPART:
2572 2572 case DKIOCGVTOC:
2573 2573 case DKIOCSVTOC:
2574 2574 case DKIOCPARTINFO:
2575 2575 case DKIOCGEXTVTOC:
2576 2576 case DKIOCSEXTVTOC:
2577 2577 case DKIOCEXTPARTINFO:
2578 2578 case DKIOCGMBOOT:
2579 2579 case DKIOCSMBOOT:
2580 2580 case DKIOCGETEFI:
2581 2581 case DKIOCSETEFI:
2582 2582 case DKIOCSETEXTPART:
2583 2583 case DKIOCPARTITION:
2584 2584 return (cmlb_ioctl(vdp->xdf_vd_lbl, dev, cmd, arg, mode, credp,
2585 2585 rvalp, NULL));
2586 2586 case FDEJECT:
2587 2587 case DKIOCEJECT:
2588 2588 case CDROMEJECT:
2589 2589 return (xdf_ioctl_eject(vdp));
2590 2590 case DKIOCLOCK:
2591 2591 return (xdf_ioctl_mlock(vdp));
2592 2592 case DKIOCUNLOCK:
2593 2593 return (xdf_ioctl_munlock(vdp));
2594 2594 case CDROMREADOFFSET: {
2595 2595 int offset = 0;
2596 2596 if (!XD_IS_CD(vdp))
2597 2597 return (ENOTTY);
2598 2598 if (ddi_copyout(&offset, (void *)arg, sizeof (int), mode))
2599 2599 return (EFAULT);
2600 2600 return (0);
2601 2601 }
2602 2602 case DKIOCGMEDIAINFO: {
2603 2603 struct dk_minfo media_info;
2604 2604
2605 2605 media_info.dki_lbsize = vdp->xdf_xdev_secsize;
2606 2606 media_info.dki_capacity = vdp->xdf_pgeom.g_capacity;
2607 2607 if (XD_IS_CD(vdp))
2608 2608 media_info.dki_media_type = DK_CDROM;
2609 2609 else
2610 2610 media_info.dki_media_type = DK_FIXED_DISK;
2611 2611
2612 2612 if (ddi_copyout(&media_info, (void *)arg,
2613 2613 sizeof (struct dk_minfo), mode))
2614 2614 return (EFAULT);
2615 2615 return (0);
2616 2616 }
2617 2617 case DKIOCINFO: {
2618 2618 struct dk_cinfo info;
2619 2619
2620 2620 /* controller information */
2621 2621 if (XD_IS_CD(vdp))
2622 2622 info.dki_ctype = DKC_CDROM;
2623 2623 else
2624 2624 info.dki_ctype = DKC_VBD;
2625 2625
2626 2626 info.dki_cnum = 0;
2627 2627 (void) strncpy((char *)(&info.dki_cname), "xdf", 8);
2628 2628
2629 2629 /* unit information */
2630 2630 info.dki_unit = ddi_get_instance(vdp->xdf_dip);
2631 2631 (void) strncpy((char *)(&info.dki_dname), "xdf", 8);
2632 2632 info.dki_flags = DKI_FMTVOL;
2633 2633 info.dki_partition = part;
2634 2634 info.dki_maxtransfer = maxphys / DEV_BSIZE;
2635 2635 info.dki_addr = 0;
2636 2636 info.dki_space = 0;
2637 2637 info.dki_prio = 0;
2638 2638 info.dki_vec = 0;
2639 2639
2640 2640 if (ddi_copyout(&info, (void *)arg, sizeof (info), mode))
2641 2641 return (EFAULT);
2642 2642 return (0);
2643 2643 }
2644 2644 case DKIOCSTATE: {
2645 2645 enum dkio_state mstate;
2646 2646
2647 2647 if (ddi_copyin((void *)arg, &mstate,
2648 2648 sizeof (mstate), mode) != 0)
2649 2649 return (EFAULT);
2650 2650 if ((rv = xdf_dkstate(vdp, mstate)) != 0)
2651 2651 return (rv);
2652 2652 mstate = vdp->xdf_mstate;
2653 2653 if (ddi_copyout(&mstate, (void *)arg,
2654 2654 sizeof (mstate), mode) != 0)
2655 2655 return (EFAULT);
2656 2656 return (0);
2657 2657 }
2658 2658 case DKIOCREMOVABLE: {
2659 2659 int i = BOOLEAN2VOID(XD_IS_RM(vdp));
2660 2660 if (ddi_copyout(&i, (caddr_t)arg, sizeof (i), mode))
2661 2661 return (EFAULT);
2662 2662 return (0);
2663 2663 }
2664 2664 case DKIOCGETWCE: {
2665 2665 int i = BOOLEAN2VOID(XD_IS_RM(vdp));
2666 2666 if (ddi_copyout(&i, (void *)arg, sizeof (i), mode))
2667 2667 return (EFAULT);
2668 2668 return (0);
2669 2669 }
2670 2670 case DKIOCSETWCE: {
2671 2671 int i;
2672 2672 if (ddi_copyin((void *)arg, &i, sizeof (i), mode))
2673 2673 return (EFAULT);
2674 2674 vdp->xdf_wce = VOID2BOOLEAN(i);
2675 2675 return (0);
2676 2676 }
2677 2677 case DKIOCFLUSHWRITECACHE: {
2678 2678 struct dk_callback *dkc = (struct dk_callback *)arg;
2679 2679
2680 2680 if (vdp->xdf_flush_supported) {
2681 2681 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
2682 2682 NULL, 0, 0, (void *)dev);
2683 2683 } else if (vdp->xdf_feature_barrier &&
2684 2684 !xdf_barrier_flush_disable) {
2685 2685 rv = xdf_lb_rdwr(vdp->xdf_dip, TG_WRITE,
2686 2686 vdp->xdf_cache_flush_block, xdf_flush_block,
2687 2687 vdp->xdf_xdev_secsize, (void *)dev);
2688 2688 } else {
2689 2689 return (ENOTTY);
2690 2690 }
2691 2691 if ((mode & FKIOCTL) && (dkc != NULL) &&
2692 2692 (dkc->dkc_callback != NULL)) {
2693 2693 (*dkc->dkc_callback)(dkc->dkc_cookie, rv);
2694 2694 /* need to return 0 after calling callback */
2695 2695 rv = 0;
2696 2696 }
2697 2697 return (rv);
2698 2698 }
2699 2699 }
2700 2700 /*NOTREACHED*/
2701 2701 }
2702 2702
2703 2703 static int
2704 2704 xdf_strategy(struct buf *bp)
2705 2705 {
2706 2706 xdf_t *vdp;
2707 2707 minor_t minor;
2708 2708 diskaddr_t p_blkct, p_blkst;
2709 2709 daddr_t blkno;
2710 2710 ulong_t nblks;
2711 2711 int part;
2712 2712
2713 2713 minor = getminor(bp->b_edev);
2714 2714 part = XDF_PART(minor);
2715 2715 vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor));
2716 2716
2717 2717 mutex_enter(&vdp->xdf_dev_lk);
2718 2718 if (!xdf_isopen(vdp, part)) {
2719 2719 mutex_exit(&vdp->xdf_dev_lk);
2720 2720 xdf_io_err(bp, ENXIO, 0);
2721 2721 return (0);
2722 2722 }
2723 2723
2724 2724 /* We don't allow IO from the oe_change callback thread */
2725 2725 ASSERT(curthread != vdp->xdf_oe_change_thread);
2726 2726
2727 2727 /* Check for writes to a read only device */
2728 2728 if (!IS_READ(bp) && XD_IS_RO(vdp)) {
2729 2729 mutex_exit(&vdp->xdf_dev_lk);
2730 2730 xdf_io_err(bp, EROFS, 0);
2731 2731 return (0);
2732 2732 }
2733 2733
2734 2734 /* Check if this I/O is accessing a partition or the entire disk */
2735 2735 if ((long)bp->b_private == XB_SLICE_NONE) {
2736 2736 /* This I/O is using an absolute offset */
2737 2737 p_blkct = vdp->xdf_xdev_nblocks;
2738 2738 p_blkst = 0;
2739 2739 } else {
2740 2740 /* This I/O is using a partition relative offset */
2741 2741 mutex_exit(&vdp->xdf_dev_lk);
2742 2742 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
2743 2743 &p_blkst, NULL, NULL, NULL)) {
2744 2744 xdf_io_err(bp, ENXIO, 0);
2745 2745 return (0);
2746 2746 }
2747 2747 mutex_enter(&vdp->xdf_dev_lk);
2748 2748 }
2749 2749
2750 2750 /*
2751 2751 * Adjust the real blkno and bcount according to the underline
2752 2752 * physical sector size.
2753 2753 */
2754 2754 blkno = bp->b_blkno / (vdp->xdf_xdev_secsize / XB_BSIZE);
2755 2755
2756 2756 /* check for a starting block beyond the disk or partition limit */
2757 2757 if (blkno > p_blkct) {
2758 2758 DPRINTF(IO_DBG, ("xdf@%s: block %lld exceeds VBD size %"PRIu64,
2759 2759 vdp->xdf_addr, (longlong_t)blkno, (uint64_t)p_blkct));
2760 2760 mutex_exit(&vdp->xdf_dev_lk);
2761 2761 xdf_io_err(bp, EINVAL, 0);
2762 2762 return (0);
2763 2763 }
2764 2764
2765 2765 /* Legacy: don't set error flag at this case */
2766 2766 if (blkno == p_blkct) {
2767 2767 mutex_exit(&vdp->xdf_dev_lk);
2768 2768 bp->b_resid = bp->b_bcount;
2769 2769 biodone(bp);
2770 2770 return (0);
2771 2771 }
2772 2772
2773 2773 /* sanitize the input buf */
2774 2774 bioerror(bp, 0);
2775 2775 bp->b_resid = 0;
2776 2776 bp->av_back = bp->av_forw = NULL;
2777 2777
2778 2778 /* Adjust for partial transfer, this will result in an error later */
2779 2779 if (vdp->xdf_xdev_secsize != 0 &&
2780 2780 vdp->xdf_xdev_secsize != XB_BSIZE) {
2781 2781 nblks = bp->b_bcount / vdp->xdf_xdev_secsize;
2782 2782 } else {
2783 2783 nblks = bp->b_bcount >> XB_BSHIFT;
2784 2784 }
2785 2785
2786 2786 if ((blkno + nblks) > p_blkct) {
2787 2787 if (vdp->xdf_xdev_secsize != 0 &&
2788 2788 vdp->xdf_xdev_secsize != XB_BSIZE) {
2789 2789 bp->b_resid =
2790 2790 ((blkno + nblks) - p_blkct) *
2791 2791 vdp->xdf_xdev_secsize;
2792 2792 } else {
2793 2793 bp->b_resid =
2794 2794 ((blkno + nblks) - p_blkct) <<
2795 2795 XB_BSHIFT;
2796 2796 }
2797 2797 bp->b_bcount -= bp->b_resid;
2798 2798 }
2799 2799
2800 2800 DPRINTF(IO_DBG, ("xdf@%s: strategy blk %lld len %lu\n",
2801 2801 vdp->xdf_addr, (longlong_t)blkno, (ulong_t)bp->b_bcount));
2802 2802
2803 2803 /* Fix up the buf struct */
2804 2804 bp->b_flags |= B_BUSY;
2805 2805 bp->b_private = (void *)(uintptr_t)p_blkst;
2806 2806
2807 2807 xdf_bp_push(vdp, bp);
2808 2808 mutex_exit(&vdp->xdf_dev_lk);
2809 2809 xdf_io_start(vdp);
2810 2810 if (do_polled_io)
2811 2811 (void) xdf_ring_drain(vdp);
2812 2812 return (0);
2813 2813 }
2814 2814
2815 2815 /*ARGSUSED*/
2816 2816 static int
2817 2817 xdf_read(dev_t dev, struct uio *uiop, cred_t *credp)
2818 2818 {
2819 2819 xdf_t *vdp;
2820 2820 minor_t minor;
2821 2821 diskaddr_t p_blkcnt;
2822 2822 int part;
2823 2823
2824 2824 minor = getminor(dev);
2825 2825 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2826 2826 return (ENXIO);
2827 2827
2828 2828 DPRINTF(IO_DBG, ("xdf@%s: read offset 0x%"PRIx64"\n",
2829 2829 vdp->xdf_addr, (int64_t)uiop->uio_offset));
2830 2830
2831 2831 part = XDF_PART(minor);
2832 2832 if (!xdf_isopen(vdp, part))
2833 2833 return (ENXIO);
2834 2834
2835 2835 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2836 2836 NULL, NULL, NULL, NULL))
2837 2837 return (ENXIO);
2838 2838
2839 2839 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2840 2840 return (ENOSPC);
2841 2841
2842 2842 if (U_INVAL(uiop))
2843 2843 return (EINVAL);
2844 2844
2845 2845 return (physio(xdf_strategy, NULL, dev, B_READ, xdfmin, uiop));
2846 2846 }
2847 2847
2848 2848 /*ARGSUSED*/
2849 2849 static int
2850 2850 xdf_write(dev_t dev, struct uio *uiop, cred_t *credp)
2851 2851 {
2852 2852 xdf_t *vdp;
2853 2853 minor_t minor;
2854 2854 diskaddr_t p_blkcnt;
2855 2855 int part;
2856 2856
2857 2857 minor = getminor(dev);
2858 2858 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2859 2859 return (ENXIO);
2860 2860
2861 2861 DPRINTF(IO_DBG, ("xdf@%s: write offset 0x%"PRIx64"\n",
2862 2862 vdp->xdf_addr, (int64_t)uiop->uio_offset));
2863 2863
2864 2864 part = XDF_PART(minor);
2865 2865 if (!xdf_isopen(vdp, part))
2866 2866 return (ENXIO);
2867 2867
2868 2868 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2869 2869 NULL, NULL, NULL, NULL))
2870 2870 return (ENXIO);
2871 2871
2872 2872 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2873 2873 return (ENOSPC);
2874 2874
2875 2875 if (U_INVAL(uiop))
2876 2876 return (EINVAL);
2877 2877
2878 2878 return (physio(xdf_strategy, NULL, dev, B_WRITE, xdfmin, uiop));
2879 2879 }
2880 2880
2881 2881 /*ARGSUSED*/
2882 2882 static int
2883 2883 xdf_aread(dev_t dev, struct aio_req *aiop, cred_t *credp)
2884 2884 {
2885 2885 xdf_t *vdp;
2886 2886 minor_t minor;
2887 2887 struct uio *uiop = aiop->aio_uio;
2888 2888 diskaddr_t p_blkcnt;
2889 2889 int part;
2890 2890
2891 2891 minor = getminor(dev);
2892 2892 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2893 2893 return (ENXIO);
2894 2894
2895 2895 part = XDF_PART(minor);
2896 2896 if (!xdf_isopen(vdp, part))
2897 2897 return (ENXIO);
2898 2898
2899 2899 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2900 2900 NULL, NULL, NULL, NULL))
2901 2901 return (ENXIO);
2902 2902
2903 2903 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2904 2904 return (ENOSPC);
2905 2905
2906 2906 if (U_INVAL(uiop))
2907 2907 return (EINVAL);
2908 2908
2909 2909 return (aphysio(xdf_strategy, anocancel, dev, B_READ, xdfmin, aiop));
2910 2910 }
2911 2911
2912 2912 /*ARGSUSED*/
2913 2913 static int
2914 2914 xdf_awrite(dev_t dev, struct aio_req *aiop, cred_t *credp)
2915 2915 {
2916 2916 xdf_t *vdp;
2917 2917 minor_t minor;
2918 2918 struct uio *uiop = aiop->aio_uio;
2919 2919 diskaddr_t p_blkcnt;
2920 2920 int part;
2921 2921
2922 2922 minor = getminor(dev);
2923 2923 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2924 2924 return (ENXIO);
2925 2925
2926 2926 part = XDF_PART(minor);
2927 2927 if (!xdf_isopen(vdp, part))
2928 2928 return (ENXIO);
2929 2929
2930 2930 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt,
2931 2931 NULL, NULL, NULL, NULL))
2932 2932 return (ENXIO);
2933 2933
2934 2934 if (uiop->uio_loffset >= XB_DTOB(p_blkcnt, vdp))
2935 2935 return (ENOSPC);
2936 2936
2937 2937 if (U_INVAL(uiop))
2938 2938 return (EINVAL);
2939 2939
2940 2940 return (aphysio(xdf_strategy, anocancel, dev, B_WRITE, xdfmin, aiop));
2941 2941 }
2942 2942
2943 2943 static int
2944 2944 xdf_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
2945 2945 {
2946 2946 struct buf dumpbuf, *dbp = &dumpbuf;
2947 2947 xdf_t *vdp;
2948 2948 minor_t minor;
2949 2949 int err = 0;
2950 2950 int part;
2951 2951 diskaddr_t p_blkcnt, p_blkst;
2952 2952
2953 2953 minor = getminor(dev);
2954 2954 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
2955 2955 return (ENXIO);
2956 2956
2957 2957 DPRINTF(IO_DBG, ("xdf@%s: dump addr (0x%p) blk (%ld) nblks (%d)\n",
2958 2958 vdp->xdf_addr, (void *)addr, blkno, nblk));
2959 2959
2960 2960 /* We don't allow IO from the oe_change callback thread */
2961 2961 ASSERT(curthread != vdp->xdf_oe_change_thread);
2962 2962
2963 2963 part = XDF_PART(minor);
2964 2964 if (!xdf_isopen(vdp, part))
2965 2965 return (ENXIO);
2966 2966
2967 2967 if (cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkcnt, &p_blkst,
2968 2968 NULL, NULL, NULL))
2969 2969 return (ENXIO);
2970 2970
2971 2971 if ((blkno + nblk) >
2972 2972 (p_blkcnt * (vdp->xdf_xdev_secsize / XB_BSIZE))) {
2973 2973 cmn_err(CE_WARN, "xdf@%s: block %ld exceeds VBD size %"PRIu64,
2974 2974 vdp->xdf_addr, (daddr_t)((blkno + nblk) /
2975 2975 (vdp->xdf_xdev_secsize / XB_BSIZE)), (uint64_t)p_blkcnt);
2976 2976 return (EINVAL);
2977 2977 }
2978 2978
2979 2979 bioinit(dbp);
2980 2980 dbp->b_flags = B_BUSY;
2981 2981 dbp->b_un.b_addr = addr;
2982 2982 dbp->b_bcount = nblk << DEV_BSHIFT;
2983 2983 dbp->b_blkno = blkno;
2984 2984 dbp->b_edev = dev;
2985 2985 dbp->b_private = (void *)(uintptr_t)p_blkst;
2986 2986
2987 2987 mutex_enter(&vdp->xdf_dev_lk);
2988 2988 xdf_bp_push(vdp, dbp);
2989 2989 mutex_exit(&vdp->xdf_dev_lk);
2990 2990 xdf_io_start(vdp);
2991 2991 err = xdf_ring_drain(vdp);
2992 2992 biofini(dbp);
2993 2993 return (err);
2994 2994 }
2995 2995
2996 2996 /*ARGSUSED*/
2997 2997 static int
2998 2998 xdf_close(dev_t dev, int flag, int otyp, struct cred *credp)
2999 2999 {
3000 3000 minor_t minor;
3001 3001 xdf_t *vdp;
3002 3002 int part;
3003 3003 ulong_t parbit;
3004 3004
3005 3005 minor = getminor(dev);
3006 3006 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
3007 3007 return (ENXIO);
3008 3008
3009 3009 mutex_enter(&vdp->xdf_dev_lk);
3010 3010 part = XDF_PART(minor);
3011 3011 if (!xdf_isopen(vdp, part)) {
3012 3012 mutex_exit(&vdp->xdf_dev_lk);
3013 3013 return (ENXIO);
3014 3014 }
3015 3015 parbit = 1 << part;
3016 3016
3017 3017 ASSERT((vdp->xdf_vd_open[otyp] & parbit) != 0);
3018 3018 if (otyp == OTYP_LYR) {
3019 3019 ASSERT(vdp->xdf_vd_lyropen[part] > 0);
3020 3020 if (--vdp->xdf_vd_lyropen[part] == 0)
3021 3021 vdp->xdf_vd_open[otyp] &= ~parbit;
3022 3022 } else {
3023 3023 vdp->xdf_vd_open[otyp] &= ~parbit;
3024 3024 }
3025 3025 vdp->xdf_vd_exclopen &= ~parbit;
3026 3026
3027 3027 mutex_exit(&vdp->xdf_dev_lk);
3028 3028 return (0);
3029 3029 }
3030 3030
3031 3031 static int
3032 3032 xdf_open(dev_t *devp, int flag, int otyp, cred_t *credp)
3033 3033 {
3034 3034 minor_t minor;
3035 3035 xdf_t *vdp;
3036 3036 int part;
3037 3037 ulong_t parbit;
3038 3038 diskaddr_t p_blkct = 0;
3039 3039 boolean_t firstopen;
3040 3040 boolean_t nodelay;
3041 3041
3042 3042 minor = getminor(*devp);
3043 3043 if ((vdp = ddi_get_soft_state(xdf_ssp, XDF_INST(minor))) == NULL)
3044 3044 return (ENXIO);
3045 3045
3046 3046 nodelay = (flag & (FNDELAY | FNONBLOCK));
3047 3047
3048 3048 DPRINTF(DDI_DBG, ("xdf@%s: opening\n", vdp->xdf_addr));
3049 3049
3050 3050 /* do cv_wait until connected or failed */
3051 3051 mutex_enter(&vdp->xdf_cb_lk);
3052 3052 mutex_enter(&vdp->xdf_dev_lk);
3053 3053 if (!nodelay && (xdf_connect_locked(vdp, B_TRUE) != XD_READY)) {
3054 3054 mutex_exit(&vdp->xdf_dev_lk);
3055 3055 mutex_exit(&vdp->xdf_cb_lk);
3056 3056 return (ENXIO);
3057 3057 }
3058 3058 mutex_exit(&vdp->xdf_cb_lk);
3059 3059
3060 3060 if ((flag & FWRITE) && XD_IS_RO(vdp)) {
3061 3061 mutex_exit(&vdp->xdf_dev_lk);
3062 3062 return (EROFS);
3063 3063 }
3064 3064
3065 3065 part = XDF_PART(minor);
3066 3066 parbit = 1 << part;
3067 3067 if ((vdp->xdf_vd_exclopen & parbit) ||
3068 3068 ((flag & FEXCL) && xdf_isopen(vdp, part))) {
3069 3069 mutex_exit(&vdp->xdf_dev_lk);
3070 3070 return (EBUSY);
3071 3071 }
3072 3072
3073 3073 /* are we the first one to open this node? */
3074 3074 firstopen = !xdf_isopen(vdp, -1);
3075 3075
3076 3076 if (otyp == OTYP_LYR)
3077 3077 vdp->xdf_vd_lyropen[part]++;
3078 3078
3079 3079 vdp->xdf_vd_open[otyp] |= parbit;
3080 3080
3081 3081 if (flag & FEXCL)
3082 3082 vdp->xdf_vd_exclopen |= parbit;
3083 3083
3084 3084 mutex_exit(&vdp->xdf_dev_lk);
3085 3085
3086 3086 /* force a re-validation */
3087 3087 if (firstopen)
3088 3088 cmlb_invalidate(vdp->xdf_vd_lbl, NULL);
3089 3089
3090 3090 /* If this is a non-blocking open then we're done */
3091 3091 if (nodelay)
3092 3092 return (0);
3093 3093
3094 3094 /*
3095 3095 * This is a blocking open, so we require:
3096 3096 * - that the disk have a valid label on it
3097 3097 * - that the size of the partition that we're opening is non-zero
3098 3098 */
3099 3099 if ((cmlb_partinfo(vdp->xdf_vd_lbl, part, &p_blkct,
3100 3100 NULL, NULL, NULL, NULL) != 0) || (p_blkct == 0)) {
3101 3101 (void) xdf_close(*devp, flag, otyp, credp);
3102 3102 return (ENXIO);
3103 3103 }
3104 3104
3105 3105 return (0);
3106 3106 }
3107 3107
3108 3108 /*ARGSUSED*/
3109 3109 static void
3110 3110 xdf_watch_hp_status_cb(dev_info_t *dip, const char *path, void *arg)
3111 3111 {
3112 3112 xdf_t *vdp = (xdf_t *)ddi_get_driver_private(dip);
3113 3113 cv_broadcast(&vdp->xdf_hp_status_cv);
3114 3114 }
3115 3115
3116 3116 static int
3117 3117 xdf_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int flags,
3118 3118 char *name, caddr_t valuep, int *lengthp)
3119 3119 {
3120 3120 xdf_t *vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip));
3121 3121
3122 3122 /*
3123 3123 * Sanity check that if a dev_t or dip were specified that they
3124 3124 * correspond to this device driver. On debug kernels we'll
3125 3125 * panic and on non-debug kernels we'll return failure.
3126 3126 */
3127 3127 ASSERT(ddi_driver_major(dip) == xdf_major);
3128 3128 ASSERT((dev == DDI_DEV_T_ANY) || (getmajor(dev) == xdf_major));
3129 3129 if ((ddi_driver_major(dip) != xdf_major) ||
3130 3130 ((dev != DDI_DEV_T_ANY) && (getmajor(dev) != xdf_major)))
3131 3131 return (DDI_PROP_NOT_FOUND);
3132 3132
3133 3133 if (vdp == NULL)
3134 3134 return (ddi_prop_op(dev, dip, prop_op, flags,
3135 3135 name, valuep, lengthp));
3136 3136
3137 3137 return (cmlb_prop_op(vdp->xdf_vd_lbl,
3138 3138 dev, dip, prop_op, flags, name, valuep, lengthp,
3139 3139 XDF_PART(getminor(dev)), NULL));
3140 3140 }
3141 3141
3142 3142 /*ARGSUSED*/
3143 3143 static int
3144 3144 xdf_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **rp)
3145 3145 {
3146 3146 int instance = XDF_INST(getminor((dev_t)arg));
3147 3147 xdf_t *vbdp;
3148 3148
3149 3149 switch (cmd) {
3150 3150 case DDI_INFO_DEVT2DEVINFO:
3151 3151 if ((vbdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL) {
3152 3152 *rp = NULL;
3153 3153 return (DDI_FAILURE);
3154 3154 }
3155 3155 *rp = vbdp->xdf_dip;
3156 3156 return (DDI_SUCCESS);
3157 3157
3158 3158 case DDI_INFO_DEVT2INSTANCE:
3159 3159 *rp = (void *)(uintptr_t)instance;
3160 3160 return (DDI_SUCCESS);
3161 3161
3162 3162 default:
3163 3163 return (DDI_FAILURE);
3164 3164 }
3165 3165 }
3166 3166
3167 3167 /*ARGSUSED*/
3168 3168 static int
3169 3169 xdf_resume(dev_info_t *dip)
3170 3170 {
3171 3171 xdf_t *vdp;
3172 3172 char *oename;
3173 3173
3174 3174 if ((vdp = ddi_get_soft_state(xdf_ssp, ddi_get_instance(dip))) == NULL)
3175 3175 goto err;
3176 3176
3177 3177 if (xdf_debug & SUSRES_DBG)
3178 3178 xen_printf("xdf@%s: xdf_resume\n", vdp->xdf_addr);
3179 3179
3180 3180 mutex_enter(&vdp->xdf_cb_lk);
3181 3181
3182 3182 if (xvdi_resume(dip) != DDI_SUCCESS) {
3183 3183 mutex_exit(&vdp->xdf_cb_lk);
3184 3184 goto err;
3185 3185 }
3186 3186
3187 3187 if (((oename = xvdi_get_oename(dip)) == NULL) ||
3188 3188 (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS,
3189 3189 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)) {
3190 3190 mutex_exit(&vdp->xdf_cb_lk);
3191 3191 goto err;
3192 3192 }
3193 3193
3194 3194 mutex_enter(&vdp->xdf_dev_lk);
3195 3195 ASSERT(vdp->xdf_state != XD_READY);
3196 3196 xdf_set_state(vdp, XD_UNKNOWN);
3197 3197 mutex_exit(&vdp->xdf_dev_lk);
3198 3198
3199 3199 if (xdf_setstate_init(vdp) != DDI_SUCCESS) {
3200 3200 mutex_exit(&vdp->xdf_cb_lk);
3201 3201 goto err;
3202 3202 }
3203 3203
3204 3204 mutex_exit(&vdp->xdf_cb_lk);
3205 3205
3206 3206 if (xdf_debug & SUSRES_DBG)
3207 3207 xen_printf("xdf@%s: xdf_resume: done\n", vdp->xdf_addr);
3208 3208 return (DDI_SUCCESS);
3209 3209 err:
3210 3210 if (xdf_debug & SUSRES_DBG)
3211 3211 xen_printf("xdf@%s: xdf_resume: fail\n", vdp->xdf_addr);
3212 3212 return (DDI_FAILURE);
3213 3213 }
3214 3214
3215 3215 static int
3216 3216 xdf_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
3217 3217 {
3218 3218 int n, instance = ddi_get_instance(dip);
3219 3219 ddi_iblock_cookie_t ibc, softibc;
3220 3220 boolean_t dev_iscd = B_FALSE;
3221 3221 xdf_t *vdp;
3222 3222 char *oename, *xsname, *str;
3223 3223
3224 3224 if ((n = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_NOTPROM,
3225 3225 "xdf_debug", 0)) != 0)
3226 3226 xdf_debug = n;
3227 3227
3228 3228 switch (cmd) {
3229 3229 case DDI_RESUME:
3230 3230 return (xdf_resume(dip));
3231 3231 case DDI_ATTACH:
3232 3232 break;
3233 3233 default:
3234 3234 return (DDI_FAILURE);
3235 3235 }
3236 3236 /* DDI_ATTACH */
3237 3237
3238 3238 if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
3239 3239 ((oename = xvdi_get_oename(dip)) == NULL))
3240 3240 return (DDI_FAILURE);
3241 3241
3242 3242 /*
3243 3243 * Disable auto-detach. This is necessary so that we don't get
3244 3244 * detached while we're disconnected from the back end.
3245 3245 */
3246 3246 if ((ddi_prop_update_int(DDI_DEV_T_NONE, dip,
3247 3247 DDI_NO_AUTODETACH, 1) != DDI_PROP_SUCCESS))
3248 3248 return (DDI_FAILURE);
3249 3249
3250 3250 /* driver handles kernel-issued IOCTLs */
3251 3251 if (ddi_prop_create(DDI_DEV_T_NONE, dip,
3252 3252 DDI_PROP_CANSLEEP, DDI_KERNEL_IOCTL, NULL, 0) != DDI_PROP_SUCCESS)
3253 3253 return (DDI_FAILURE);
3254 3254
3255 3255 if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS)
3256 3256 return (DDI_FAILURE);
3257 3257
3258 3258 if (ddi_get_soft_iblock_cookie(dip,
3259 3259 DDI_SOFTINT_LOW, &softibc) != DDI_SUCCESS)
3260 3260 return (DDI_FAILURE);
3261 3261
3262 3262 if (xenbus_read_str(xsname, XBP_DEV_TYPE, &str) != 0) {
3263 3263 cmn_err(CE_WARN, "xdf@%s: cannot read device-type",
3264 3264 ddi_get_name_addr(dip));
3265 3265 return (DDI_FAILURE);
3266 3266 }
3267 3267 if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
3268 3268 dev_iscd = B_TRUE;
3269 3269 strfree(str);
3270 3270
3271 3271 if (ddi_soft_state_zalloc(xdf_ssp, instance) != DDI_SUCCESS)
3272 3272 return (DDI_FAILURE);
3273 3273
3274 3274 DPRINTF(DDI_DBG, ("xdf@%s: attaching\n", ddi_get_name_addr(dip)));
3275 3275 vdp = ddi_get_soft_state(xdf_ssp, instance);
3276 3276 ddi_set_driver_private(dip, vdp);
3277 3277 vdp->xdf_dip = dip;
3278 3278 vdp->xdf_addr = ddi_get_name_addr(dip);
3279 3279 vdp->xdf_suspending = B_FALSE;
3280 3280 vdp->xdf_media_req_supported = B_FALSE;
3281 3281 vdp->xdf_peer = INVALID_DOMID;
3282 3282 vdp->xdf_evtchn = INVALID_EVTCHN;
3283 3283 list_create(&vdp->xdf_vreq_act, sizeof (v_req_t),
3284 3284 offsetof(v_req_t, v_link));
3285 3285 cv_init(&vdp->xdf_dev_cv, NULL, CV_DEFAULT, NULL);
3286 3286 cv_init(&vdp->xdf_hp_status_cv, NULL, CV_DEFAULT, NULL);
3287 3287 cv_init(&vdp->xdf_mstate_cv, NULL, CV_DEFAULT, NULL);
3288 3288 mutex_init(&vdp->xdf_dev_lk, NULL, MUTEX_DRIVER, (void *)ibc);
3289 3289 mutex_init(&vdp->xdf_cb_lk, NULL, MUTEX_DRIVER, (void *)ibc);
3290 3290 mutex_init(&vdp->xdf_iostat_lk, NULL, MUTEX_DRIVER, (void *)ibc);
3291 3291 vdp->xdf_cmbl_reattach = B_TRUE;
3292 3292 if (dev_iscd) {
3293 3293 vdp->xdf_dinfo |= VDISK_CDROM;
3294 3294 vdp->xdf_mstate = DKIO_EJECTED;
3295 3295 } else {
3296 3296 vdp->xdf_mstate = DKIO_NONE;
3297 3297 }
3298 3298
3299 3299 if ((vdp->xdf_ready_tq = ddi_taskq_create(dip, "xdf_ready_tq",
3300 3300 1, TASKQ_DEFAULTPRI, 0)) == NULL)
3301 3301 goto errout0;
3302 3302
3303 3303 if (xvdi_add_xb_watch_handler(dip, oename, XBP_HP_STATUS,
3304 3304 xdf_watch_hp_status_cb, NULL) != DDI_SUCCESS)
3305 3305 goto errout0;
3306 3306
3307 3307 if (ddi_add_softintr(dip, DDI_SOFTINT_LOW, &vdp->xdf_softintr_id,
3308 3308 &softibc, NULL, xdf_iorestart, (caddr_t)vdp) != DDI_SUCCESS) {
3309 3309 cmn_err(CE_WARN, "xdf@%s: failed to add softintr",
3310 3310 ddi_get_name_addr(dip));
3311 3311 goto errout0;
3312 3312 }
3313 3313
3314 3314 /*
3315 3315 * Initialize the physical geometry stucture. Note that currently
3316 3316 * we don't know the size of the backend device so the number
3317 3317 * of blocks on the device will be initialized to zero. Once
3318 3318 * we connect to the backend device we'll update the physical
3319 3319 * geometry to reflect the real size of the device.
3320 3320 */
3321 3321 xdf_synthetic_pgeom(dip, &vdp->xdf_pgeom);
3322 3322 vdp->xdf_pgeom_fixed = B_FALSE;
3323 3323
3324 3324 /*
3325 3325 * create default device minor nodes: non-removable disk
3326 3326 * we will adjust minor nodes after we are connected w/ backend
3327 3327 */
3328 3328 cmlb_alloc_handle(&vdp->xdf_vd_lbl);
3329 3329 if (xdf_cmlb_attach(vdp) != 0) {
3330 3330 cmn_err(CE_WARN,
3331 3331 "xdf@%s: attach failed, cmlb attach failed",
3332 3332 ddi_get_name_addr(dip));
3333 3333 goto errout0;
3334 3334 }
3335 3335
3336 3336 /*
3337 3337 * We ship with cache-enabled disks
3338 3338 */
3339 3339 vdp->xdf_wce = B_TRUE;
3340 3340
3341 3341 mutex_enter(&vdp->xdf_cb_lk);
3342 3342 /* Watch backend XenbusState change */
3343 3343 if (xvdi_add_event_handler(dip,
3344 3344 XS_OE_STATE, xdf_oe_change, NULL) != DDI_SUCCESS) {
3345 3345 mutex_exit(&vdp->xdf_cb_lk);
3346 3346 goto errout0;
3347 3347 }
3348 3348
3349 3349 if (xdf_setstate_init(vdp) != DDI_SUCCESS) {
3350 3350 cmn_err(CE_WARN, "xdf@%s: start connection failed",
3351 3351 ddi_get_name_addr(dip));
3352 3352 mutex_exit(&vdp->xdf_cb_lk);
3353 3353 goto errout1;
3354 3354 }
3355 3355 mutex_exit(&vdp->xdf_cb_lk);
3356 3356
3357 3357 #if defined(XPV_HVM_DRIVER)
3358 3358
3359 3359 xdf_hvm_add(dip);
3360 3360
3361 3361 /* Report our version to dom0. */
3362 3362 if (xenbus_printf(XBT_NULL, "guest/xdf", "version", "%d",
3363 3363 HVMPV_XDF_VERS))
3364 3364 cmn_err(CE_WARN, "xdf: couldn't write version\n");
3365 3365
3366 3366 #else /* !XPV_HVM_DRIVER */
3367 3367
3368 3368 /* create kstat for iostat(1M) */
3369 3369 if (xdf_kstat_create(dip, "xdf", instance) != 0) {
3370 3370 cmn_err(CE_WARN, "xdf@%s: failed to create kstat",
3371 3371 ddi_get_name_addr(dip));
3372 3372 goto errout1;
3373 3373 }
3374 3374
3375 3375 #endif /* !XPV_HVM_DRIVER */
3376 3376
3377 3377 ddi_report_dev(dip);
3378 3378 DPRINTF(DDI_DBG, ("xdf@%s: attached\n", vdp->xdf_addr));
3379 3379 return (DDI_SUCCESS);
3380 3380
3381 3381 errout1:
3382 3382 (void) xvdi_switch_state(vdp->xdf_dip, XBT_NULL, XenbusStateClosed);
3383 3383 xvdi_remove_event_handler(dip, XS_OE_STATE);
3384 3384 errout0:
3385 3385 if (vdp->xdf_vd_lbl != NULL) {
3386 3386 cmlb_detach(vdp->xdf_vd_lbl, NULL);
3387 3387 cmlb_free_handle(&vdp->xdf_vd_lbl);
3388 3388 vdp->xdf_vd_lbl = NULL;
3389 3389 }
3390 3390 if (vdp->xdf_softintr_id != NULL)
3391 3391 ddi_remove_softintr(vdp->xdf_softintr_id);
3392 3392 xvdi_remove_xb_watch_handlers(dip);
3393 3393 if (vdp->xdf_ready_tq != NULL)
3394 3394 ddi_taskq_destroy(vdp->xdf_ready_tq);
3395 3395 mutex_destroy(&vdp->xdf_cb_lk);
3396 3396 mutex_destroy(&vdp->xdf_dev_lk);
3397 3397 cv_destroy(&vdp->xdf_dev_cv);
3398 3398 cv_destroy(&vdp->xdf_hp_status_cv);
3399 3399 ddi_soft_state_free(xdf_ssp, instance);
3400 3400 ddi_set_driver_private(dip, NULL);
3401 3401 ddi_prop_remove_all(dip);
3402 3402 cmn_err(CE_WARN, "xdf@%s: attach failed", ddi_get_name_addr(dip));
3403 3403 return (DDI_FAILURE);
3404 3404 }
3405 3405
3406 3406 static int
3407 3407 xdf_suspend(dev_info_t *dip)
3408 3408 {
3409 3409 int instance = ddi_get_instance(dip);
3410 3410 xdf_t *vdp;
3411 3411
3412 3412 if ((vdp = ddi_get_soft_state(xdf_ssp, instance)) == NULL)
3413 3413 return (DDI_FAILURE);
3414 3414
3415 3415 if (xdf_debug & SUSRES_DBG)
3416 3416 xen_printf("xdf@%s: xdf_suspend\n", vdp->xdf_addr);
3417 3417
3418 3418 xvdi_suspend(dip);
3419 3419
3420 3420 mutex_enter(&vdp->xdf_cb_lk);
3421 3421 mutex_enter(&vdp->xdf_dev_lk);
3422 3422
3423 3423 vdp->xdf_suspending = B_TRUE;
3424 3424 xdf_ring_destroy(vdp);
3425 3425 xdf_set_state(vdp, XD_SUSPEND);
3426 3426 vdp->xdf_suspending = B_FALSE;
3427 3427
3428 3428 mutex_exit(&vdp->xdf_dev_lk);
3429 3429 mutex_exit(&vdp->xdf_cb_lk);
3430 3430
3431 3431 if (xdf_debug & SUSRES_DBG)
3432 3432 xen_printf("xdf@%s: xdf_suspend: done\n", vdp->xdf_addr);
3433 3433
3434 3434 return (DDI_SUCCESS);
3435 3435 }
3436 3436
3437 3437 static int
3438 3438 xdf_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
3439 3439 {
3440 3440 xdf_t *vdp;
3441 3441 int instance;
3442 3442
3443 3443 switch (cmd) {
3444 3444
3445 3445 case DDI_PM_SUSPEND:
3446 3446 break;
3447 3447
3448 3448 case DDI_SUSPEND:
3449 3449 return (xdf_suspend(dip));
3450 3450
3451 3451 case DDI_DETACH:
3452 3452 break;
3453 3453
3454 3454 default:
3455 3455 return (DDI_FAILURE);
3456 3456 }
3457 3457
3458 3458 instance = ddi_get_instance(dip);
3459 3459 DPRINTF(DDI_DBG, ("xdf@%s: detaching\n", ddi_get_name_addr(dip)));
3460 3460 vdp = ddi_get_soft_state(xdf_ssp, instance);
3461 3461
3462 3462 if (vdp == NULL)
3463 3463 return (DDI_FAILURE);
3464 3464
3465 3465 mutex_enter(&vdp->xdf_cb_lk);
3466 3466 xdf_disconnect(vdp, XD_CLOSED, B_FALSE);
3467 3467 if (vdp->xdf_state != XD_CLOSED) {
3468 3468 mutex_exit(&vdp->xdf_cb_lk);
3469 3469 return (DDI_FAILURE);
3470 3470 }
3471 3471 mutex_exit(&vdp->xdf_cb_lk);
3472 3472
3473 3473 ASSERT(!ISDMACBON(vdp));
3474 3474
3475 3475 #if defined(XPV_HVM_DRIVER)
3476 3476 xdf_hvm_rm(dip);
3477 3477 #endif /* XPV_HVM_DRIVER */
3478 3478
3479 3479 if (vdp->xdf_timeout_id != 0)
3480 3480 (void) untimeout(vdp->xdf_timeout_id);
3481 3481
3482 3482 xvdi_remove_event_handler(dip, XS_OE_STATE);
3483 3483 ddi_taskq_destroy(vdp->xdf_ready_tq);
3484 3484
3485 3485 cmlb_detach(vdp->xdf_vd_lbl, NULL);
3486 3486 cmlb_free_handle(&vdp->xdf_vd_lbl);
3487 3487
3488 3488 /* we'll support backend running in domU later */
3489 3489 #ifdef DOMU_BACKEND
3490 3490 (void) xvdi_post_event(dip, XEN_HP_REMOVE);
3491 3491 #endif
3492 3492
3493 3493 list_destroy(&vdp->xdf_vreq_act);
3494 3494 ddi_prop_remove_all(dip);
3495 3495 xdf_kstat_delete(dip);
3496 3496 ddi_remove_softintr(vdp->xdf_softintr_id);
3497 3497 xvdi_remove_xb_watch_handlers(dip);
3498 3498 ddi_set_driver_private(dip, NULL);
3499 3499 cv_destroy(&vdp->xdf_dev_cv);
3500 3500 mutex_destroy(&vdp->xdf_cb_lk);
3501 3501 mutex_destroy(&vdp->xdf_dev_lk);
3502 3502 if (vdp->xdf_cache_flush_block != NULL)
3503 3503 kmem_free(vdp->xdf_flush_mem, 2 * vdp->xdf_xdev_secsize);
3504 3504 ddi_soft_state_free(xdf_ssp, instance);
3505 3505 return (DDI_SUCCESS);
3506 3506 }
3507 3507
3508 3508 /*
3509 3509 * Driver linkage structures.
3510 3510 */
3511 3511 static struct cb_ops xdf_cbops = {
3512 3512 xdf_open,
3513 3513 xdf_close,
3514 3514 xdf_strategy,
3515 3515 nodev,
3516 3516 xdf_dump,
3517 3517 xdf_read,
3518 3518 xdf_write,
3519 3519 xdf_ioctl,
3520 3520 nodev,
3521 3521 nodev,
3522 3522 nodev,
3523 3523 nochpoll,
3524 3524 xdf_prop_op,
3525 3525 NULL,
3526 3526 D_MP | D_NEW | D_64BIT,
3527 3527 CB_REV,
3528 3528 xdf_aread,
3529 3529 xdf_awrite
3530 3530 };
3531 3531
3532 3532 struct dev_ops xdf_devops = {
3533 3533 DEVO_REV, /* devo_rev */
3534 3534 0, /* devo_refcnt */
3535 3535 xdf_getinfo, /* devo_getinfo */
3536 3536 nulldev, /* devo_identify */
3537 3537 nulldev, /* devo_probe */
3538 3538 xdf_attach, /* devo_attach */
3539 3539 xdf_detach, /* devo_detach */
3540 3540 nodev, /* devo_reset */
3541 3541 &xdf_cbops, /* devo_cb_ops */
3542 3542 NULL, /* devo_bus_ops */
3543 3543 NULL, /* devo_power */
3544 3544 ddi_quiesce_not_supported, /* devo_quiesce */
3545 3545 };
3546 3546
↓ open down ↓ |
3546 lines elided |
↑ open up ↑ |
3547 3547 /*
3548 3548 * Module linkage structures.
3549 3549 */
3550 3550 static struct modldrv modldrv = {
3551 3551 &mod_driverops, /* Type of module. This one is a driver */
3552 3552 "virtual block driver", /* short description */
3553 3553 &xdf_devops /* driver specific ops */
3554 3554 };
3555 3555
3556 3556 static struct modlinkage xdf_modlinkage = {
3557 - MODREV_1, (void *)&modldrv, NULL
3557 + MODREV_1, { (void *)&modldrv, NULL }
3558 3558 };
3559 3559
3560 3560 /*
3561 3561 * standard module entry points
3562 3562 */
3563 3563 int
3564 3564 _init(void)
3565 3565 {
3566 3566 int rc;
3567 3567
3568 3568 xdf_major = ddi_name_to_major("xdf");
3569 3569 if (xdf_major == (major_t)-1)
3570 3570 return (EINVAL);
3571 3571
3572 3572 if ((rc = ddi_soft_state_init(&xdf_ssp, sizeof (xdf_t), 0)) != 0)
3573 3573 return (rc);
3574 3574
3575 3575 xdf_vreq_cache = kmem_cache_create("xdf_vreq_cache",
3576 3576 sizeof (v_req_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
3577 3577 xdf_gs_cache = kmem_cache_create("xdf_gs_cache",
3578 3578 sizeof (ge_slot_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
3579 3579
3580 3580 #if defined(XPV_HVM_DRIVER)
3581 3581 xdf_hvm_init();
3582 3582 #endif /* XPV_HVM_DRIVER */
3583 3583
3584 3584 if ((rc = mod_install(&xdf_modlinkage)) != 0) {
3585 3585 #if defined(XPV_HVM_DRIVER)
3586 3586 xdf_hvm_fini();
3587 3587 #endif /* XPV_HVM_DRIVER */
3588 3588 kmem_cache_destroy(xdf_vreq_cache);
3589 3589 kmem_cache_destroy(xdf_gs_cache);
3590 3590 ddi_soft_state_fini(&xdf_ssp);
3591 3591 return (rc);
3592 3592 }
3593 3593
3594 3594 return (rc);
3595 3595 }
3596 3596
3597 3597 int
3598 3598 _fini(void)
3599 3599 {
3600 3600 int err;
3601 3601 if ((err = mod_remove(&xdf_modlinkage)) != 0)
3602 3602 return (err);
3603 3603
3604 3604 #if defined(XPV_HVM_DRIVER)
3605 3605 xdf_hvm_fini();
3606 3606 #endif /* XPV_HVM_DRIVER */
3607 3607
3608 3608 kmem_cache_destroy(xdf_vreq_cache);
3609 3609 kmem_cache_destroy(xdf_gs_cache);
3610 3610 ddi_soft_state_fini(&xdf_ssp);
3611 3611
3612 3612 return (0);
3613 3613 }
3614 3614
3615 3615 int
3616 3616 _info(struct modinfo *modinfop)
3617 3617 {
3618 3618 return (mod_info(&xdf_modlinkage, modinfop));
3619 3619 }
↓ open down ↓ |
52 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX