1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Note: This is the backend part of the split PV disk driver. This driver
  29  * is not a nexus driver, nor is it a leaf driver(block/char/stream driver).
  30  * Currently, it does not create any minor node. So, although, it runs in
  31  * backend domain, it will not be used directly from within dom0.
  32  * It simply gets block I/O requests issued by frontend from a shared page
  33  * (blkif ring buffer - defined by Xen) between backend and frontend domain,
  34  * generates a buf, and push it down to underlying disk target driver via
  35  * ldi interface. When buf is done, this driver will generate a response
  36  * and put it into ring buffer to inform frontend of the status of the I/O
  37  * request issued by it. When a new virtual device entry is added in xenstore,
  38  * there will be an watch event sent from Xen to xvdi framework, who will,
  39  * in turn, create the devinfo node and try to attach this driver
  40  * (see xvdi_create_dev). When frontend peer changes its state to
  41  * XenbusStateClose, an event will also be sent from Xen to xvdi framework,
  42  * who will detach and remove this devinfo node (see i_xvdi_oestate_handler).
  43  * I/O requests get from ring buffer and event coming from xenstore cannot be
  44  * trusted. We verify them in xdb_get_buf() and xdb_check_state_transition().
  45  *
  46  * Virtual device configuration is read/written from/to the database via
  47  * xenbus_* interfaces. Driver also use xvdi_* to interact with hypervisor.
  48  * There is an on-going effort to make xvdi_* cover all xenbus_*.
  49  */
  50 
  51 #include <sys/types.h>
  52 #include <sys/conf.h>
  53 #include <sys/ddi.h>
  54 #include <sys/dditypes.h>
  55 #include <sys/sunddi.h>
  56 #include <sys/list.h>
  57 #include <sys/dkio.h>
  58 #include <sys/cmlb.h>
  59 #include <sys/vtoc.h>
  60 #include <sys/modctl.h>
  61 #include <sys/bootconf.h>
  62 #include <sys/promif.h>
  63 #include <sys/sysmacros.h>
  64 #include <public/io/xenbus.h>
  65 #include <public/io/xs_wire.h>
  66 #include <xen/sys/xenbus_impl.h>
  67 #include <xen/sys/xendev.h>
  68 #include <sys/gnttab.h>
  69 #include <sys/scsi/generic/inquiry.h>
  70 #include <vm/seg_kmem.h>
  71 #include <vm/hat_i86.h>
  72 #include <sys/gnttab.h>
  73 #include <sys/lofi.h>
  74 #include <io/xdf.h>
  75 #include <xen/io/blkif_impl.h>
  76 #include <io/xdb.h>
  77 
  78 static xdb_t *xdb_statep;
  79 static int xdb_debug = 0;
  80 
  81 static void xdb_close(dev_info_t *);
  82 static int xdb_push_response(xdb_t *, uint64_t, uint8_t, uint16_t);
  83 static int xdb_get_request(xdb_t *, blkif_request_t *);
  84 static void blkif_get_x86_32_req(blkif_request_t *, blkif_x86_32_request_t *);
  85 static void blkif_get_x86_64_req(blkif_request_t *, blkif_x86_64_request_t *);
  86 static int xdb_biodone(buf_t *);
  87 
  88 
  89 #ifdef DEBUG
  90 /*
  91  * debug aid functions
  92  */
  93 
  94 static void
  95 logva(xdb_t *vdp, uint64_t va)
  96 {
  97         uint64_t *page_addrs;
  98         int i;
  99 
 100         page_addrs = vdp->page_addrs;
 101         for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
 102                 if (page_addrs[i] == va)
 103                         debug_enter("VA remapping found!");
 104         }
 105 
 106         for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
 107                 if (page_addrs[i] == 0) {
 108                         page_addrs[i] = va;
 109                         break;
 110                 }
 111         }
 112         ASSERT(i < XDB_MAX_IO_PAGES(vdp));
 113 }
 114 
 115 static void
 116 unlogva(xdb_t *vdp, uint64_t va)
 117 {
 118         uint64_t *page_addrs;
 119         int i;
 120 
 121         page_addrs = vdp->page_addrs;
 122         for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++) {
 123                 if (page_addrs[i] == va) {
 124                         page_addrs[i] = 0;
 125                         break;
 126                 }
 127         }
 128         ASSERT(i < XDB_MAX_IO_PAGES(vdp));
 129 }
 130 
 131 static void
 132 xdb_dump_request_oe(blkif_request_t *req)
 133 {
 134         int i;
 135 
 136         /*
 137          * Exploit the public interface definitions for BLKIF_OP_READ
 138          * etc..
 139          */
 140         char *op_name[] = { "read", "write", "barrier", "flush" };
 141 
 142         XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "op=%s", op_name[req->operation]));
 143         XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "num of segments=%d",
 144             req->nr_segments));
 145         XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "handle=%d", req->handle));
 146         XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "id=%llu",
 147             (unsigned long long)req->id));
 148         XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "start sector=%llu",
 149             (unsigned long long)req->sector_number));
 150         for (i = 0; i < req->nr_segments; i++) {
 151                 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "gref=%d, first sec=%d,"
 152                     "last sec=%d", req->seg[i].gref, req->seg[i].first_sect,
 153                     req->seg[i].last_sect));
 154         }
 155 }
 156 #endif /* DEBUG */
 157 
 158 /*
 159  * Statistics.
 160  */
 161 static char *xdb_stats[] = {
 162         "rd_reqs",
 163         "wr_reqs",
 164         "br_reqs",
 165         "fl_reqs",
 166         "oo_reqs"
 167 };
 168 
 169 static int
 170 xdb_kstat_update(kstat_t *ksp, int flag)
 171 {
 172         xdb_t *vdp;
 173         kstat_named_t *knp;
 174 
 175         if (flag != KSTAT_READ)
 176                 return (EACCES);
 177 
 178         vdp = ksp->ks_private;
 179         knp = ksp->ks_data;
 180 
 181         /*
 182          * Assignment order should match that of the names in
 183          * xdb_stats.
 184          */
 185         (knp++)->value.ui64 = vdp->xs_stat_req_reads;
 186         (knp++)->value.ui64 = vdp->xs_stat_req_writes;
 187         (knp++)->value.ui64 = vdp->xs_stat_req_barriers;
 188         (knp++)->value.ui64 = vdp->xs_stat_req_flushes;
 189         (knp++)->value.ui64 = 0; /* oo_req */
 190 
 191         return (0);
 192 }
 193 
 194 static boolean_t
 195 xdb_kstat_init(xdb_t *vdp)
 196 {
 197         int nstat = sizeof (xdb_stats) / sizeof (xdb_stats[0]);
 198         char **cp = xdb_stats;
 199         kstat_named_t *knp;
 200 
 201         if ((vdp->xs_kstats = kstat_create("xdb",
 202             ddi_get_instance(vdp->xs_dip),
 203             "req_statistics", "block", KSTAT_TYPE_NAMED,
 204             nstat, 0)) == NULL)
 205                 return (B_FALSE);
 206 
 207         vdp->xs_kstats->ks_private = vdp;
 208         vdp->xs_kstats->ks_update = xdb_kstat_update;
 209 
 210         knp = vdp->xs_kstats->ks_data;
 211         while (nstat > 0) {
 212                 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
 213                 knp++;
 214                 cp++;
 215                 nstat--;
 216         }
 217 
 218         kstat_install(vdp->xs_kstats);
 219 
 220         return (B_TRUE);
 221 }
 222 
 223 static char *
 224 i_pathname(dev_info_t *dip)
 225 {
 226         char *path, *rv;
 227 
 228         path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
 229         (void) ddi_pathname(dip, path);
 230         rv = strdup(path);
 231         kmem_free(path, MAXPATHLEN);
 232 
 233         return (rv);
 234 }
 235 
 236 static buf_t *
 237 xdb_get_buf(xdb_t *vdp, blkif_request_t *req, xdb_request_t *xreq)
 238 {
 239         buf_t *bp;
 240         uint8_t segs, curseg;
 241         int sectors;
 242         int i, err;
 243         gnttab_map_grant_ref_t mapops[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 244         ddi_acc_handle_t acchdl;
 245 
 246         acchdl = vdp->xs_ring_hdl;
 247         bp = XDB_XREQ2BP(xreq);
 248         curseg = xreq->xr_curseg;
 249         /* init a new xdb request */
 250         if (req != NULL) {
 251                 ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
 252                 boolean_t pagemapok = B_TRUE;
 253                 uint8_t op = ddi_get8(acchdl, &req->operation);
 254 
 255                 xreq->xr_vdp = vdp;
 256                 xreq->xr_op = op;
 257                 xreq->xr_id = ddi_get64(acchdl, &req->id);
 258                 segs = xreq->xr_buf_pages = ddi_get8(acchdl, &req->nr_segments);
 259                 if (segs == 0) {
 260                         if (op != BLKIF_OP_FLUSH_DISKCACHE)
 261                                 cmn_err(CE_WARN, "!non-BLKIF_OP_FLUSH_DISKCACHE"
 262                                     " is seen from domain %d with zero "
 263                                     "length data buffer!", vdp->xs_peer);
 264                         bioinit(bp);
 265                         bp->b_bcount = 0;
 266                         bp->b_lblkno = 0;
 267                         bp->b_un.b_addr = NULL;
 268                         return (bp);
 269                 } else if (op == BLKIF_OP_FLUSH_DISKCACHE) {
 270                         cmn_err(CE_WARN, "!BLKIF_OP_FLUSH_DISKCACHE"
 271                             " is seen from domain %d with non-zero "
 272                             "length data buffer!", vdp->xs_peer);
 273                 }
 274 
 275                 /*
 276                  * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
 277                  * according to the definition of blk interface by Xen
 278                  * we do sanity check here
 279                  */
 280                 if (segs > BLKIF_MAX_SEGMENTS_PER_REQUEST)
 281                         segs = xreq->xr_buf_pages =
 282                             BLKIF_MAX_SEGMENTS_PER_REQUEST;
 283 
 284                 for (i = 0; i < segs; i++) {
 285                         uint8_t fs, ls;
 286 
 287                         mapops[i].host_addr =
 288                             (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
 289                             vdp->xs_iopage_va, xreq->xr_idx, i);
 290                         mapops[i].dom = vdp->xs_peer;
 291                         mapops[i].ref = ddi_get32(acchdl, &req->seg[i].gref);
 292                         mapops[i].flags = GNTMAP_host_map;
 293                         if (op != BLKIF_OP_READ)
 294                                 mapops[i].flags |= GNTMAP_readonly;
 295 
 296                         fs = ddi_get8(acchdl, &req->seg[i].first_sect);
 297                         ls = ddi_get8(acchdl, &req->seg[i].last_sect);
 298 
 299                         /*
 300                          * first_sect should be no bigger than last_sect and
 301                          * both of them should be no bigger than
 302                          * XB_LAST_SECTOR_IN_SEG according to definition
 303                          * of blk interface by Xen, so sanity check again
 304                          */
 305                         if (fs > XB_LAST_SECTOR_IN_SEG)
 306                                 fs = XB_LAST_SECTOR_IN_SEG;
 307                         if (ls > XB_LAST_SECTOR_IN_SEG)
 308                                 ls = XB_LAST_SECTOR_IN_SEG;
 309                         if (fs > ls)
 310                                 fs = ls;
 311 
 312                         xreq->xr_segs[i].fs = fs;
 313                         xreq->xr_segs[i].ls = ls;
 314                 }
 315 
 316                 /* map in io pages */
 317                 err = xen_map_gref(GNTTABOP_map_grant_ref, mapops, i, B_FALSE);
 318                 if (err != 0)
 319                         return (NULL);
 320                 for (i = 0; i < segs; i++) {
 321                         /*
 322                          * Although HYPERVISOR_grant_table_op() returned no
 323                          * error, mapping of each single page can fail. So,
 324                          * we have to do the check here and handle the error
 325                          * if needed
 326                          */
 327                         if (mapops[i].status != GNTST_okay) {
 328                                 int j;
 329                                 for (j = 0; j < i; j++) {
 330 #ifdef DEBUG
 331                                         unlogva(vdp, mapops[j].host_addr);
 332 #endif
 333                                         xen_release_pfn(
 334                                             xreq->xr_plist[j].p_pagenum);
 335                                 }
 336                                 pagemapok = B_FALSE;
 337                                 break;
 338                         }
 339                         /* record page mapping handle for unmapping later */
 340                         xreq->xr_page_hdls[i] = mapops[i].handle;
 341 #ifdef DEBUG
 342                         logva(vdp, mapops[i].host_addr);
 343 #endif
 344                         /*
 345                          * Pass the MFNs down using the shadow list (xr_pplist)
 346                          *
 347                          * This is pretty ugly since we have implict knowledge
 348                          * of how the rootnex binds buffers.
 349                          * The GNTTABOP_map_grant_ref op makes us do some ugly
 350                          * stuff since we're not allowed to touch these PTEs
 351                          * from the VM.
 352                          *
 353                          * Obviously, these aren't real page_t's. The rootnex
 354                          * only needs p_pagenum.
 355                          * Also, don't use btop() here or 32 bit PAE breaks.
 356                          */
 357                         xreq->xr_pplist[i] = &xreq->xr_plist[i];
 358                         xreq->xr_plist[i].p_pagenum =
 359                             xen_assign_pfn(mapops[i].dev_bus_addr >> PAGESHIFT);
 360                 }
 361 
 362                 /*
 363                  * not all pages mapped in successfully, unmap those mapped-in
 364                  * page and return failure
 365                  */
 366                 if (!pagemapok) {
 367                         gnttab_unmap_grant_ref_t unmapop;
 368 
 369                         for (i = 0; i < segs; i++) {
 370                                 if (mapops[i].status != GNTST_okay)
 371                                         continue;
 372                                 unmapop.host_addr =
 373                                     (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
 374                                     vdp->xs_iopage_va, xreq->xr_idx, i);
 375                                 unmapop.dev_bus_addr = NULL;
 376                                 unmapop.handle = mapops[i].handle;
 377                                 (void) HYPERVISOR_grant_table_op(
 378                                     GNTTABOP_unmap_grant_ref, &unmapop, 1);
 379                         }
 380 
 381                         return (NULL);
 382                 }
 383                 bioinit(bp);
 384                 bp->b_lblkno = ddi_get64(acchdl, &req->sector_number);
 385                 bp->b_flags = B_BUSY | B_SHADOW | B_PHYS;
 386                 bp->b_flags |= (ddi_get8(acchdl, &req->operation) ==
 387                     BLKIF_OP_READ) ? B_READ : (B_WRITE | B_ASYNC);
 388         } else {
 389                 uint64_t blkst;
 390                 int isread;
 391 
 392                 /* reuse this buf */
 393                 blkst = bp->b_lblkno + bp->b_bcount / DEV_BSIZE;
 394                 isread = bp->b_flags & B_READ;
 395                 bioreset(bp);
 396                 bp->b_lblkno = blkst;
 397                 bp->b_flags = B_BUSY | B_SHADOW | B_PHYS;
 398                 bp->b_flags |= isread ? B_READ : (B_WRITE | B_ASYNC);
 399                 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "reuse buf, xreq is %d!!",
 400                     xreq->xr_idx));
 401         }
 402 
 403         /* form a buf */
 404         bp->b_un.b_addr = XDB_IOPAGE_VA(vdp->xs_iopage_va, xreq->xr_idx,
 405             curseg) + xreq->xr_segs[curseg].fs * DEV_BSIZE;
 406         bp->b_shadow = &xreq->xr_pplist[curseg];
 407         bp->b_iodone = xdb_biodone;
 408         sectors = 0;
 409 
 410         /*
 411          * Run through the segments. There are XB_NUM_SECTORS_PER_SEG sectors
 412          * per segment. On some OSes (e.g. Linux), there may be empty gaps
 413          * between segments. (i.e. the first segment may end on sector 6 and
 414          * the second segment start on sector 4).
 415          *
 416          * if a segments first sector is not set to 0, and this is not the
 417          * first segment in our buf, end this buf now.
 418          *
 419          * if a segments last sector is not set to XB_LAST_SECTOR_IN_SEG, and
 420          * this is not the last segment in the request, add this segment into
 421          * the buf, then end this buf (updating the pointer to point to the
 422          * next segment next time around).
 423          */
 424         for (i = curseg; i < xreq->xr_buf_pages; i++) {
 425                 if ((xreq->xr_segs[i].fs != 0) && (i != curseg)) {
 426                         break;
 427                 }
 428                 sectors += (xreq->xr_segs[i].ls - xreq->xr_segs[i].fs + 1);
 429                 if ((xreq->xr_segs[i].ls != XB_LAST_SECTOR_IN_SEG) &&
 430                     (i != (xreq->xr_buf_pages - 1))) {
 431                         i++;
 432                         break;
 433                 }
 434         }
 435         xreq->xr_curseg = i;
 436         bp->b_bcount = sectors * DEV_BSIZE;
 437         bp->b_bufsize = bp->b_bcount;
 438 
 439         return (bp);
 440 }
 441 
 442 static xdb_request_t *
 443 xdb_get_req(xdb_t *vdp)
 444 {
 445         xdb_request_t *req;
 446         int idx;
 447 
 448         ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
 449         ASSERT(vdp->xs_free_req != -1);
 450         req = &vdp->xs_req[vdp->xs_free_req];
 451         vdp->xs_free_req = req->xr_next;
 452         idx = req->xr_idx;
 453         bzero(req, sizeof (xdb_request_t));
 454         req->xr_idx = idx;
 455         return (req);
 456 }
 457 
 458 static void
 459 xdb_free_req(xdb_request_t *req)
 460 {
 461         xdb_t *vdp = req->xr_vdp;
 462 
 463         ASSERT(MUTEX_HELD(&vdp->xs_iomutex));
 464         req->xr_next = vdp->xs_free_req;
 465         vdp->xs_free_req = req->xr_idx;
 466 }
 467 
 468 static void
 469 xdb_response(xdb_t *vdp, blkif_request_t *req, boolean_t ok)
 470 {
 471         ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
 472 
 473         if (xdb_push_response(vdp, ddi_get64(acchdl, &req->id),
 474             ddi_get8(acchdl, &req->operation), ok))
 475                 xvdi_notify_oe(vdp->xs_dip);
 476 }
 477 
 478 static void
 479 xdb_init_ioreqs(xdb_t *vdp)
 480 {
 481         int i;
 482 
 483         ASSERT(vdp->xs_nentry);
 484 
 485         if (vdp->xs_req == NULL)
 486                 vdp->xs_req = kmem_alloc(vdp->xs_nentry *
 487                     sizeof (xdb_request_t), KM_SLEEP);
 488 #ifdef DEBUG
 489         if (vdp->page_addrs == NULL)
 490                 vdp->page_addrs = kmem_zalloc(XDB_MAX_IO_PAGES(vdp) *
 491                     sizeof (uint64_t), KM_SLEEP);
 492 #endif
 493         for (i = 0; i < vdp->xs_nentry; i++) {
 494                 vdp->xs_req[i].xr_idx = i;
 495                 vdp->xs_req[i].xr_next = i + 1;
 496         }
 497         vdp->xs_req[vdp->xs_nentry - 1].xr_next = -1;
 498         vdp->xs_free_req = 0;
 499 
 500         /* alloc va in host dom for io page mapping */
 501         vdp->xs_iopage_va = vmem_xalloc(heap_arena,
 502             XDB_MAX_IO_PAGES(vdp) * PAGESIZE, PAGESIZE, 0, 0, 0, 0,
 503             VM_SLEEP);
 504         for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++)
 505                 hat_prepare_mapping(kas.a_hat,
 506                     vdp->xs_iopage_va + i * PAGESIZE, NULL);
 507 }
 508 
 509 static void
 510 xdb_uninit_ioreqs(xdb_t *vdp)
 511 {
 512         int i;
 513 
 514         for (i = 0; i < XDB_MAX_IO_PAGES(vdp); i++)
 515                 hat_release_mapping(kas.a_hat,
 516                     vdp->xs_iopage_va + i * PAGESIZE);
 517         vmem_xfree(heap_arena, vdp->xs_iopage_va,
 518             XDB_MAX_IO_PAGES(vdp) * PAGESIZE);
 519         if (vdp->xs_req != NULL) {
 520                 kmem_free(vdp->xs_req, vdp->xs_nentry * sizeof (xdb_request_t));
 521                 vdp->xs_req = NULL;
 522         }
 523 #ifdef DEBUG
 524         if (vdp->page_addrs != NULL) {
 525                 kmem_free(vdp->page_addrs, XDB_MAX_IO_PAGES(vdp) *
 526                     sizeof (uint64_t));
 527                 vdp->page_addrs = NULL;
 528         }
 529 #endif
 530 }
 531 
 532 static uint_t
 533 xdb_intr(caddr_t arg)
 534 {
 535         xdb_t           *vdp = (xdb_t *)arg;
 536         dev_info_t      *dip = vdp->xs_dip;
 537         blkif_request_t req, *reqp = &req;
 538         xdb_request_t   *xreq;
 539         buf_t           *bp;
 540         uint8_t         op;
 541         int             ret = DDI_INTR_UNCLAIMED;
 542 
 543         XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
 544             "xdb@%s: I/O request received from dom %d",
 545             ddi_get_name_addr(dip), vdp->xs_peer));
 546 
 547         mutex_enter(&vdp->xs_iomutex);
 548 
 549         /* shouldn't touch ring buffer if not in connected state */
 550         if (!vdp->xs_if_connected) {
 551                 mutex_exit(&vdp->xs_iomutex);
 552                 return (DDI_INTR_UNCLAIMED);
 553         }
 554         ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised);
 555 
 556         /*
 557          * We'll loop till there is no more request in the ring
 558          * We won't stuck in this loop for ever since the size of ring buffer
 559          * is limited, and frontend will stop pushing requests into it when
 560          * the ring buffer is full
 561          */
 562 
 563         /* req_event will be increased in xvdi_ring_get_request() */
 564         while (xdb_get_request(vdp, reqp)) {
 565                 ret = DDI_INTR_CLAIMED;
 566 
 567                 op = ddi_get8(vdp->xs_ring_hdl, &reqp->operation);
 568                 if (op == BLKIF_OP_READ                 ||
 569                     op == BLKIF_OP_WRITE                ||
 570                     op == BLKIF_OP_WRITE_BARRIER        ||
 571                     op == BLKIF_OP_FLUSH_DISKCACHE) {
 572 #ifdef DEBUG
 573                         xdb_dump_request_oe(reqp);
 574 #endif
 575                         xreq = xdb_get_req(vdp);
 576                         ASSERT(xreq);
 577                         switch (op) {
 578                         case BLKIF_OP_READ:
 579                                 vdp->xs_stat_req_reads++;
 580                                 break;
 581                         case BLKIF_OP_WRITE_BARRIER:
 582                                 vdp->xs_stat_req_barriers++;
 583                                 /* FALLTHRU */
 584                         case BLKIF_OP_WRITE:
 585                                 vdp->xs_stat_req_writes++;
 586                                 break;
 587                         case BLKIF_OP_FLUSH_DISKCACHE:
 588                                 vdp->xs_stat_req_flushes++;
 589                                 break;
 590                         }
 591 
 592                         xreq->xr_curseg = 0; /* start from first segment */
 593                         bp = xdb_get_buf(vdp, reqp, xreq);
 594                         if (bp == NULL) {
 595                                 /* failed to form a buf */
 596                                 xdb_free_req(xreq);
 597                                 xdb_response(vdp, reqp, B_FALSE);
 598                                 continue;
 599                         }
 600                         bp->av_forw = NULL;
 601 
 602                         XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
 603                             " buf %p, blkno %lld, size %lu, addr %p",
 604                             (void *)bp, (longlong_t)bp->b_blkno,
 605                             (ulong_t)bp->b_bcount, (void *)bp->b_un.b_addr));
 606 
 607                         /* send bp to underlying blk driver */
 608                         if (vdp->xs_f_iobuf == NULL) {
 609                                 vdp->xs_f_iobuf = vdp->xs_l_iobuf = bp;
 610                         } else {
 611                                 vdp->xs_l_iobuf->av_forw = bp;
 612                                 vdp->xs_l_iobuf = bp;
 613                         }
 614                 } else {
 615                         xdb_response(vdp, reqp, B_FALSE);
 616                         XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: "
 617                             "Unsupported cmd received from dom %d",
 618                             ddi_get_name_addr(dip), vdp->xs_peer));
 619                 }
 620         }
 621         /* notify our taskq to push buf to underlying blk driver */
 622         if (ret == DDI_INTR_CLAIMED)
 623                 cv_broadcast(&vdp->xs_iocv);
 624 
 625         mutex_exit(&vdp->xs_iomutex);
 626 
 627         return (ret);
 628 }
 629 
 630 static int
 631 xdb_biodone(buf_t *bp)
 632 {
 633         int i, err, bioerr;
 634         uint8_t segs;
 635         gnttab_unmap_grant_ref_t unmapops[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 636         xdb_request_t *xreq = XDB_BP2XREQ(bp);
 637         xdb_t *vdp = xreq->xr_vdp;
 638         buf_t *nbp;
 639 
 640         bioerr = geterror(bp);
 641         if (bioerr)
 642                 XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: I/O error %d",
 643                     ddi_get_name_addr(vdp->xs_dip), bioerr));
 644 
 645         /* check if we are done w/ this I/O request */
 646         if ((bioerr == 0) && (xreq->xr_curseg < xreq->xr_buf_pages)) {
 647                 nbp = xdb_get_buf(vdp, NULL, xreq);
 648                 if (nbp) {
 649                         err = ldi_strategy(vdp->xs_ldi_hdl, nbp);
 650                         if (err == 0) {
 651                                 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
 652                                     "sent buf to backend ok"));
 653                                 return (DDI_SUCCESS);
 654                         }
 655                         bioerr = EIO;
 656                         XDB_DBPRINT(XDB_DBG_IO, (CE_WARN, "xdb@%s: "
 657                             "sent buf to backend dev failed, err=%d",
 658                             ddi_get_name_addr(vdp->xs_dip), err));
 659                 } else {
 660                         bioerr = EIO;
 661                 }
 662         }
 663 
 664         /* unmap io pages */
 665         segs = xreq->xr_buf_pages;
 666         /*
 667          * segs should be no bigger than BLKIF_MAX_SEGMENTS_PER_REQUEST
 668          * according to the definition of blk interface by Xen
 669          */
 670         ASSERT(segs <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
 671         for (i = 0; i < segs; i++) {
 672                 unmapops[i].host_addr = (uint64_t)(uintptr_t)XDB_IOPAGE_VA(
 673                     vdp->xs_iopage_va, xreq->xr_idx, i);
 674 #ifdef DEBUG
 675                 mutex_enter(&vdp->xs_iomutex);
 676                 unlogva(vdp, unmapops[i].host_addr);
 677                 mutex_exit(&vdp->xs_iomutex);
 678 #endif
 679                 unmapops[i].dev_bus_addr = NULL;
 680                 unmapops[i].handle = xreq->xr_page_hdls[i];
 681         }
 682         err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
 683             unmapops, segs);
 684         ASSERT(!err);
 685 
 686         /*
 687          * If we have reached a barrier write or a cache flush , then we must
 688          * flush all our I/Os.
 689          */
 690         if (xreq->xr_op == BLKIF_OP_WRITE_BARRIER ||
 691             xreq->xr_op == BLKIF_OP_FLUSH_DISKCACHE) {
 692                 /*
 693                  * XXX At this point the write did succeed, so I don't
 694                  * believe we should report an error because the flush
 695                  * failed. However, this is a debatable point, so
 696                  * maybe we need to think more carefully about this.
 697                  * For now, just cast to void.
 698                  */
 699                 (void) ldi_ioctl(vdp->xs_ldi_hdl,
 700                     DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, kcred, NULL);
 701         }
 702 
 703         mutex_enter(&vdp->xs_iomutex);
 704 
 705         /* send response back to frontend */
 706         if (vdp->xs_if_connected) {
 707                 ASSERT(vdp->xs_hp_connected && vdp->xs_fe_initialised);
 708                 if (xdb_push_response(vdp, xreq->xr_id, xreq->xr_op, bioerr))
 709                         xvdi_notify_oe(vdp->xs_dip);
 710                 XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
 711                     "sent resp back to frontend, id=%llu",
 712                     (unsigned long long)xreq->xr_id));
 713         }
 714         /* free io resources */
 715         biofini(bp);
 716         xdb_free_req(xreq);
 717 
 718         vdp->xs_ionum--;
 719         if (!vdp->xs_if_connected && (vdp->xs_ionum == 0)) {
 720                 /* we're closing, someone is waiting for I/O clean-up */
 721                 cv_signal(&vdp->xs_ionumcv);
 722         }
 723 
 724         mutex_exit(&vdp->xs_iomutex);
 725 
 726         return (DDI_SUCCESS);
 727 }
 728 
 729 static int
 730 xdb_bindto_frontend(xdb_t *vdp)
 731 {
 732         int err;
 733         char *oename;
 734         grant_ref_t gref;
 735         evtchn_port_t evtchn;
 736         dev_info_t *dip = vdp->xs_dip;
 737         char protocol[64] = "";
 738 
 739         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
 740 
 741         /*
 742          * Switch to the XenbusStateInitialised state.  This let's the
 743          * frontend know that we're about to negotiate a connection.
 744          */
 745         (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitialised);
 746 
 747         /*
 748          * Gather info from frontend
 749          */
 750         oename = xvdi_get_oename(dip);
 751         if (oename == NULL)
 752                 return (DDI_FAILURE);
 753 
 754         err = xenbus_gather(XBT_NULL, oename,
 755             XBP_RING_REF, "%lu", &gref,
 756             XBP_EVENT_CHAN, "%u", &evtchn,
 757             NULL);
 758         if (err != 0) {
 759                 xvdi_dev_error(dip, err,
 760                     "Getting ring-ref and evtchn from frontend");
 761                 return (DDI_FAILURE);
 762         }
 763 
 764         vdp->xs_blk_protocol = BLKIF_PROTOCOL_NATIVE;
 765         vdp->xs_nentry = BLKIF_RING_SIZE;
 766         vdp->xs_entrysize = sizeof (union blkif_sring_entry);
 767 
 768         err = xenbus_gather(XBT_NULL, oename,
 769             XBP_PROTOCOL, "%63s", protocol, NULL);
 770         if (err)
 771                 (void) strcpy(protocol, "unspecified, assuming native");
 772         else {
 773                 /*
 774                  * We must check for NATIVE first, so that the fast path
 775                  * is taken for copying data from the guest to the host.
 776                  */
 777                 if (strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE) != 0) {
 778                         if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_32) == 0) {
 779                                 vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_32;
 780                                 vdp->xs_nentry = BLKIF_X86_32_RING_SIZE;
 781                                 vdp->xs_entrysize =
 782                                     sizeof (union blkif_x86_32_sring_entry);
 783                         } else if (strcmp(protocol, XEN_IO_PROTO_ABI_X86_64) ==
 784                             0) {
 785                                 vdp->xs_blk_protocol = BLKIF_PROTOCOL_X86_64;
 786                                 vdp->xs_nentry = BLKIF_X86_64_RING_SIZE;
 787                                 vdp->xs_entrysize =
 788                                     sizeof (union blkif_x86_64_sring_entry);
 789                         } else {
 790                                 xvdi_fatal_error(dip, err, "unknown protocol");
 791                                 return (DDI_FAILURE);
 792                         }
 793                 }
 794         }
 795 #ifdef DEBUG
 796         cmn_err(CE_NOTE, "!xdb@%s: blkif protocol '%s' ",
 797             ddi_get_name_addr(dip), protocol);
 798 #endif
 799 
 800         /*
 801          * Map and init ring.  The ring parameters must match those which
 802          * have been allocated in the front end.
 803          */
 804         if (xvdi_map_ring(dip, vdp->xs_nentry, vdp->xs_entrysize,
 805             gref, &vdp->xs_ring) != DDI_SUCCESS)
 806                 return (DDI_FAILURE);
 807 
 808         /*
 809          * This will be removed after we use shadow I/O ring request since
 810          * we don't need to access the ring itself directly, thus the access
 811          * handle is not needed
 812          */
 813         vdp->xs_ring_hdl = vdp->xs_ring->xr_acc_hdl;
 814 
 815         /* bind event channel */
 816         err = xvdi_bind_evtchn(dip, evtchn);
 817         if (err != DDI_SUCCESS) {
 818                 xvdi_unmap_ring(vdp->xs_ring);
 819                 return (DDI_FAILURE);
 820         }
 821 
 822         return (DDI_SUCCESS);
 823 }
 824 
 825 static void
 826 xdb_unbindfrom_frontend(xdb_t *vdp)
 827 {
 828         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
 829 
 830         xvdi_free_evtchn(vdp->xs_dip);
 831         xvdi_unmap_ring(vdp->xs_ring);
 832 }
 833 
 834 /*
 835  * xdb_params_change() initiates a allows change to the underlying device/file
 836  * that the backend is accessing.  It does this by disconnecting from the
 837  * frontend, closing the old device, clearing a bunch of xenbus parameters,
 838  * and switching back to the XenbusStateInitialising state.  The frontend
 839  * should notice this transition to the XenbusStateInitialising state and
 840  * should attempt to reconnect to us (the backend).
 841  */
 842 static void
 843 xdb_params_change(xdb_t *vdp, char *params, boolean_t update_xs)
 844 {
 845         xenbus_transaction_t    xbt;
 846         dev_info_t              *dip = vdp->xs_dip;
 847         char                    *xsname;
 848         int                     err;
 849 
 850         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
 851         ASSERT(vdp->xs_params_path != NULL);
 852 
 853         if ((xsname = xvdi_get_xsname(dip)) == NULL)
 854                 return;
 855         if (strcmp(vdp->xs_params_path, params) == 0)
 856                 return;
 857 
 858         /*
 859          * Close the device we're currently accessing and update the
 860          * path which points to our backend device/file.
 861          */
 862         xdb_close(dip);
 863         vdp->xs_fe_initialised = B_FALSE;
 864 
 865 trans_retry:
 866         if ((err = xenbus_transaction_start(&xbt)) != 0) {
 867                 xvdi_dev_error(dip, err, "params change transaction init");
 868                 goto errout;
 869         }
 870 
 871         /*
 872          * Delete all the xenbus properties that are connection dependant
 873          * and go back to the initializing state so that the frontend
 874          * driver can re-negotiate a connection.
 875          */
 876         if (((err = xenbus_rm(xbt, xsname, XBP_FB)) != 0) ||
 877             ((err = xenbus_rm(xbt, xsname, XBP_INFO)) != 0) ||
 878             ((err = xenbus_rm(xbt, xsname, "sector-size")) != 0) ||
 879             ((err = xenbus_rm(xbt, xsname, XBP_SECTORS)) != 0) ||
 880             ((err = xenbus_rm(xbt, xsname, "instance")) != 0) ||
 881             ((err = xenbus_rm(xbt, xsname, "node")) != 0) ||
 882             (update_xs && ((err = xenbus_printf(xbt, xsname,
 883             "params", "%s", params)) != 0)) ||
 884             ((err = xvdi_switch_state(dip,
 885             xbt, XenbusStateInitialising) > 0))) {
 886                 (void) xenbus_transaction_end(xbt, 1);
 887                 xvdi_dev_error(dip, err, "params change transaction setup");
 888                 goto errout;
 889         }
 890 
 891         if ((err = xenbus_transaction_end(xbt, 0)) != 0) {
 892                 if (err == EAGAIN) {
 893                         /* transaction is ended, don't need to abort it */
 894                         goto trans_retry;
 895                 }
 896                 xvdi_dev_error(dip, err, "params change transaction commit");
 897                 goto errout;
 898         }
 899 
 900         /* Change the device that we plan to access */
 901         strfree(vdp->xs_params_path);
 902         vdp->xs_params_path = strdup(params);
 903         return;
 904 
 905 errout:
 906         (void) xvdi_switch_state(dip, xbt, XenbusStateInitialising);
 907 }
 908 
 909 /*
 910  * xdb_watch_params_cb() - This callback is invoked whenever there
 911  * is an update to the following xenbus parameter:
 912  *     /local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params
 913  *
 914  * This normally happens during xm block-configure operations, which
 915  * are used to change CD device images for HVM domUs.
 916  */
 917 /*ARGSUSED*/
 918 static void
 919 xdb_watch_params_cb(dev_info_t *dip, const char *path, void *arg)
 920 {
 921         xdb_t                   *vdp = (xdb_t *)ddi_get_driver_private(dip);
 922         char                    *xsname, *oename, *str, *str2;
 923 
 924         if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
 925             ((oename = xvdi_get_oename(dip)) == NULL)) {
 926                 return;
 927         }
 928 
 929         mutex_enter(&vdp->xs_cbmutex);
 930 
 931         if (xenbus_read_str(xsname, "params", &str) != 0) {
 932                 mutex_exit(&vdp->xs_cbmutex);
 933                 return;
 934         }
 935 
 936         if (strcmp(vdp->xs_params_path, str) == 0) {
 937                 /* Nothing todo */
 938                 mutex_exit(&vdp->xs_cbmutex);
 939                 strfree(str);
 940                 return;
 941         }
 942 
 943         /*
 944          * If the frontend isn't a cd device, doesn't support media
 945          * requests, or has locked the media, then we can't change
 946          * the params value.  restore the current value.
 947          */
 948         str2 = NULL;
 949         if (!XDB_IS_FE_CD(vdp) ||
 950             (xenbus_read_str(oename, XBP_MEDIA_REQ, &str2) != 0) ||
 951             (strcmp(str2, XBV_MEDIA_REQ_LOCK) == 0)) {
 952                 if (str2 != NULL)
 953                         strfree(str2);
 954                 strfree(str);
 955 
 956                 str = i_pathname(dip);
 957                 cmn_err(CE_NOTE,
 958                     "!%s: media locked, ignoring params update", str);
 959                 strfree(str);
 960 
 961                 mutex_exit(&vdp->xs_cbmutex);
 962                 return;
 963         }
 964 
 965         XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE,
 966             "block-configure params request: \"%s\"", str));
 967 
 968         xdb_params_change(vdp, str, B_FALSE);
 969         mutex_exit(&vdp->xs_cbmutex);
 970         strfree(str);
 971 }
 972 
 973 /*
 974  * xdb_watch_media_req_cb() - This callback is invoked whenever there
 975  * is an update to the following xenbus parameter:
 976  *     /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
 977  *
 978  * Media requests are only supported on CD devices and are issued by
 979  * the frontend.  Currently the only supported media request operaions
 980  * are "lock" and "eject".  A "lock" prevents the backend from changing
 981  * the backing device/file (via xm block-configure).  An "eject" requests
 982  * tells the backend device that it should disconnect from the frontend
 983  * and closing the backing device/file that is currently in use.
 984  */
 985 /*ARGSUSED*/
 986 static void
 987 xdb_watch_media_req_cb(dev_info_t *dip, const char *path, void *arg)
 988 {
 989         xdb_t                   *vdp = (xdb_t *)ddi_get_driver_private(dip);
 990         char                    *oename, *str;
 991 
 992         mutex_enter(&vdp->xs_cbmutex);
 993 
 994         if ((oename = xvdi_get_oename(dip)) == NULL) {
 995                 mutex_exit(&vdp->xs_cbmutex);
 996                 return;
 997         }
 998 
 999         if (xenbus_read_str(oename, XBP_MEDIA_REQ, &str) != 0) {
1000                 mutex_exit(&vdp->xs_cbmutex);
1001                 return;
1002         }
1003 
1004         if (!XDB_IS_FE_CD(vdp)) {
1005                 xvdi_dev_error(dip, EINVAL,
1006                     "media-req only supported for cdrom devices");
1007                 mutex_exit(&vdp->xs_cbmutex);
1008                 return;
1009         }
1010 
1011         if (strcmp(str, XBV_MEDIA_REQ_EJECT) != 0) {
1012                 mutex_exit(&vdp->xs_cbmutex);
1013                 strfree(str);
1014                 return;
1015         }
1016         strfree(str);
1017 
1018         XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "media eject request"));
1019 
1020         xdb_params_change(vdp, "", B_TRUE);
1021         (void) xenbus_printf(XBT_NULL, oename,
1022             XBP_MEDIA_REQ, "%s", XBV_MEDIA_REQ_NONE);
1023         mutex_exit(&vdp->xs_cbmutex);
1024 }
1025 
1026 /*
1027  * If we're dealing with a cdrom device, let the frontend know that
1028  * we support media requests via XBP_MEDIA_REQ_SUP, and setup a watch
1029  * to handle those frontend media request changes, which modify the
1030  * following xenstore parameter:
1031  *      /local/domain/<domU_id>/device/vbd/<domU_dev>/media-req
1032  */
1033 static boolean_t
1034 xdb_media_req_init(xdb_t *vdp)
1035 {
1036         dev_info_t              *dip = vdp->xs_dip;
1037         char                    *xsname, *oename;
1038 
1039         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1040 
1041         if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1042             ((oename = xvdi_get_oename(dip)) == NULL))
1043                 return (B_FALSE);
1044 
1045         if (!XDB_IS_FE_CD(vdp))
1046                 return (B_TRUE);
1047 
1048         if (xenbus_printf(XBT_NULL, xsname, XBP_MEDIA_REQ_SUP, "%d", 1) != 0)
1049                 return (B_FALSE);
1050 
1051         if (xvdi_add_xb_watch_handler(dip, oename,
1052             XBP_MEDIA_REQ, xdb_watch_media_req_cb, NULL) != DDI_SUCCESS) {
1053                 xvdi_dev_error(dip, EAGAIN,
1054                     "Failed to register watch for cdrom media requests");
1055                 return (B_FALSE);
1056         }
1057 
1058         return (B_TRUE);
1059 }
1060 
1061 /*
1062  * Get our params value.  Also, if we're using "params" then setup a
1063  * watch to handle xm block-configure operations which modify the
1064  * following xenstore parameter:
1065  *      /local/domain/0/backend/vbd/<domU_id>/<domU_dev>/params
1066  */
1067 static boolean_t
1068 xdb_params_init(xdb_t *vdp)
1069 {
1070         dev_info_t              *dip = vdp->xs_dip;
1071         char                    *str, *xsname;
1072         int                     err;
1073 
1074         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1075         ASSERT(vdp->xs_params_path == NULL);
1076 
1077         if ((xsname = xvdi_get_xsname(dip)) == NULL)
1078                 return (B_FALSE);
1079 
1080         err = xenbus_read_str(xsname, "params", &str);
1081         if (err != 0) {
1082                 return (B_FALSE);
1083         }
1084         vdp->xs_params_path = str;
1085 
1086         if (xvdi_add_xb_watch_handler(dip, xsname, "params",
1087             xdb_watch_params_cb, NULL) != DDI_SUCCESS) {
1088                 strfree(vdp->xs_params_path);
1089                 vdp->xs_params_path = NULL;
1090                 return (B_FALSE);
1091         }
1092 
1093         return (B_TRUE);
1094 }
1095 
1096 #define LOFI_CTRL_NODE  "/dev/lofictl"
1097 #define LOFI_DEV_NODE   "/devices/pseudo/lofi@0:"
1098 #define LOFI_MODE       (FREAD | FWRITE | FEXCL)
1099 
1100 static int
1101 xdb_setup_node(xdb_t *vdp, char *path)
1102 {
1103         dev_info_t              *dip = vdp->xs_dip;
1104         char                    *xsname, *str;
1105         ldi_handle_t            ldi_hdl;
1106         struct lofi_ioctl       *li;
1107         int                     minor, err;
1108 
1109         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1110 
1111         if ((xsname = xvdi_get_xsname(dip)) == NULL)
1112                 return (DDI_FAILURE);
1113 
1114         if ((err = xenbus_read_str(xsname, "type", &str)) != 0) {
1115                 xvdi_dev_error(dip, err, "Getting type from backend device");
1116                 return (DDI_FAILURE);
1117         }
1118         if (strcmp(str, "file") == 0)
1119                 vdp->xs_type |= XDB_DEV_BE_LOFI;
1120         strfree(str);
1121 
1122         if (!XDB_IS_BE_LOFI(vdp)) {
1123                 (void) strlcpy(path, vdp->xs_params_path, MAXPATHLEN);
1124                 ASSERT(vdp->xs_lofi_path == NULL);
1125                 return (DDI_SUCCESS);
1126         }
1127 
1128         do {
1129                 err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred,
1130                     &ldi_hdl, vdp->xs_ldi_li);
1131         } while (err == EBUSY);
1132         if (err != 0) {
1133                 return (DDI_FAILURE);
1134         }
1135 
1136         li = kmem_zalloc(sizeof (*li), KM_SLEEP);
1137         (void) strlcpy(li->li_filename, vdp->xs_params_path,
1138             sizeof (li->li_filename));
1139         err = ldi_ioctl(ldi_hdl, LOFI_MAP_FILE, (intptr_t)li,
1140             LOFI_MODE | FKIOCTL, kcred, &minor);
1141         (void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
1142         kmem_free(li, sizeof (*li));
1143 
1144         if (err != 0) {
1145                 cmn_err(CE_WARN, "xdb@%s: Failed to create lofi dev for %s",
1146                     ddi_get_name_addr(dip), vdp->xs_params_path);
1147                 return (DDI_FAILURE);
1148         }
1149 
1150         /*
1151          * return '/devices/...' instead of '/dev/lofi/...' since the
1152          * former is available immediately after calling ldi_ioctl
1153          */
1154         (void) snprintf(path, MAXPATHLEN, LOFI_DEV_NODE "%d", minor);
1155         (void) xenbus_printf(XBT_NULL, xsname, "node", "%s", path);
1156 
1157         ASSERT(vdp->xs_lofi_path == NULL);
1158         vdp->xs_lofi_path = strdup(path);
1159 
1160         return (DDI_SUCCESS);
1161 }
1162 
1163 static void
1164 xdb_teardown_node(xdb_t *vdp)
1165 {
1166         dev_info_t *dip = vdp->xs_dip;
1167         ldi_handle_t ldi_hdl;
1168         struct lofi_ioctl *li;
1169         int err;
1170 
1171         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1172 
1173         if (!XDB_IS_BE_LOFI(vdp))
1174                 return;
1175 
1176         vdp->xs_type &= ~XDB_DEV_BE_LOFI;
1177         ASSERT(vdp->xs_lofi_path != NULL);
1178 
1179         li = kmem_zalloc(sizeof (*li), KM_SLEEP);
1180         (void) strlcpy(li->li_filename, vdp->xs_params_path,
1181             sizeof (li->li_filename));
1182 
1183         do {
1184                 err = ldi_open_by_name(LOFI_CTRL_NODE, LOFI_MODE, kcred,
1185                     &ldi_hdl, vdp->xs_ldi_li);
1186         } while (err == EBUSY);
1187 
1188         if (err != 0) {
1189                 kmem_free(li, sizeof (*li));
1190                 return;
1191         }
1192 
1193         if (ldi_ioctl(ldi_hdl, LOFI_UNMAP_FILE, (intptr_t)li,
1194             LOFI_MODE | FKIOCTL, kcred, NULL) != 0) {
1195                 cmn_err(CE_WARN, "xdb@%s: Failed to delete lofi dev for %s",
1196                     ddi_get_name_addr(dip), li->li_filename);
1197         }
1198 
1199         (void) ldi_close(ldi_hdl, LOFI_MODE, kcred);
1200         kmem_free(li, sizeof (*li));
1201 
1202         strfree(vdp->xs_lofi_path);
1203         vdp->xs_lofi_path = NULL;
1204 }
1205 
1206 static int
1207 xdb_open_device(xdb_t *vdp)
1208 {
1209         dev_info_t *dip = vdp->xs_dip;
1210         uint64_t devsize;
1211         int blksize;
1212         char *nodepath;
1213         char *xsname;
1214         char *str;
1215         int err;
1216 
1217         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1218 
1219         if (strlen(vdp->xs_params_path) == 0) {
1220                 /*
1221                  * it's possible to have no backing device when dealing
1222                  * with a pv cdrom drive that has no virtual cd associated
1223                  * with it.
1224                  */
1225                 ASSERT(XDB_IS_FE_CD(vdp));
1226                 ASSERT(vdp->xs_sectors == 0);
1227                 ASSERT(vdp->xs_ldi_li == NULL);
1228                 ASSERT(vdp->xs_ldi_hdl == NULL);
1229                 return (DDI_SUCCESS);
1230         }
1231 
1232         /*
1233          * after the hotplug scripts have "connected" the device, check to see
1234          * if we're using a dynamic device.  If so, replace the params path
1235          * with the dynamic one.
1236          */
1237         xsname = xvdi_get_xsname(dip);
1238         err = xenbus_read_str(xsname, "dynamic-device-path", &str);
1239         if (err == 0) {
1240                 strfree(vdp->xs_params_path);
1241                 vdp->xs_params_path = str;
1242         }
1243 
1244         if (ldi_ident_from_dip(dip, &vdp->xs_ldi_li) != 0)
1245                 return (DDI_FAILURE);
1246 
1247         nodepath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1248 
1249         /* try to open backend device */
1250         if (xdb_setup_node(vdp, nodepath) != DDI_SUCCESS) {
1251                 xvdi_dev_error(dip, ENXIO,
1252                     "Getting device path of backend device");
1253                 ldi_ident_release(vdp->xs_ldi_li);
1254                 kmem_free(nodepath, MAXPATHLEN);
1255                 return (DDI_FAILURE);
1256         }
1257 
1258         if (ldi_open_by_name(nodepath,
1259             FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE),
1260             kcred, &vdp->xs_ldi_hdl, vdp->xs_ldi_li) != 0) {
1261                 xdb_teardown_node(vdp);
1262                 ldi_ident_release(vdp->xs_ldi_li);
1263                 cmn_err(CE_WARN, "xdb@%s: Failed to open: %s",
1264                     ddi_get_name_addr(dip), nodepath);
1265                 kmem_free(nodepath, MAXPATHLEN);
1266                 return (DDI_FAILURE);
1267         }
1268 
1269         if (ldi_get_size(vdp->xs_ldi_hdl, &devsize) != DDI_SUCCESS) {
1270                 (void) ldi_close(vdp->xs_ldi_hdl,
1271                     FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred);
1272                 xdb_teardown_node(vdp);
1273                 ldi_ident_release(vdp->xs_ldi_li);
1274                 kmem_free(nodepath, MAXPATHLEN);
1275                 return (DDI_FAILURE);
1276         }
1277 
1278         blksize = ldi_prop_get_int64(vdp->xs_ldi_hdl,
1279             DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
1280             "blksize", DEV_BSIZE);
1281         if (blksize == DEV_BSIZE)
1282                 blksize = ldi_prop_get_int(vdp->xs_ldi_hdl,
1283                     LDI_DEV_T_ANY | DDI_PROP_DONTPASS |
1284                     DDI_PROP_NOTPROM, "device-blksize", DEV_BSIZE);
1285 
1286         vdp->xs_sec_size = blksize;
1287         vdp->xs_sectors = devsize / blksize;
1288 
1289         /* check if the underlying device is a CD/DVD disc */
1290         if (ldi_prop_get_int(vdp->xs_ldi_hdl, LDI_DEV_T_ANY | DDI_PROP_DONTPASS,
1291             INQUIRY_DEVICE_TYPE, DTYPE_DIRECT) == DTYPE_RODIRECT)
1292                 vdp->xs_type |= XDB_DEV_BE_CD;
1293 
1294         /* check if the underlying device is a removable disk */
1295         if (ldi_prop_exists(vdp->xs_ldi_hdl,
1296             LDI_DEV_T_ANY | DDI_PROP_DONTPASS | DDI_PROP_NOTPROM,
1297             "removable-media"))
1298                 vdp->xs_type |= XDB_DEV_BE_RMB;
1299 
1300         kmem_free(nodepath, MAXPATHLEN);
1301         return (DDI_SUCCESS);
1302 }
1303 
1304 static void
1305 xdb_close_device(xdb_t *vdp)
1306 {
1307         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1308 
1309         if (strlen(vdp->xs_params_path) == 0) {
1310                 ASSERT(XDB_IS_FE_CD(vdp));
1311                 ASSERT(vdp->xs_sectors == 0);
1312                 ASSERT(vdp->xs_ldi_li == NULL);
1313                 ASSERT(vdp->xs_ldi_hdl == NULL);
1314                 return;
1315         }
1316 
1317         (void) ldi_close(vdp->xs_ldi_hdl,
1318             FREAD | (XDB_IS_RO(vdp) ? 0 : FWRITE), kcred);
1319         xdb_teardown_node(vdp);
1320         ldi_ident_release(vdp->xs_ldi_li);
1321         vdp->xs_type &= ~(XDB_DEV_BE_CD | XDB_DEV_BE_RMB);
1322         vdp->xs_sectors = 0;
1323         vdp->xs_ldi_li = NULL;
1324         vdp->xs_ldi_hdl = NULL;
1325 }
1326 
1327 /*
1328  * Kick-off connect process
1329  * If xs_fe_initialised == B_TRUE and xs_hp_connected == B_TRUE
1330  * the xs_if_connected will be changed to B_TRUE on success,
1331  */
1332 static void
1333 xdb_start_connect(xdb_t *vdp)
1334 {
1335         xenbus_transaction_t    xbt;
1336         dev_info_t              *dip = vdp->xs_dip;
1337         boolean_t               fb_exists;
1338         int                     err, instance = ddi_get_instance(dip);
1339         uint64_t                sectors;
1340         uint_t                  dinfo, ssize;
1341         char                    *xsname;
1342 
1343         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1344 
1345         if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1346             ((vdp->xs_peer = xvdi_get_oeid(dip)) == (domid_t)-1))
1347                 return;
1348 
1349         mutex_enter(&vdp->xs_iomutex);
1350         /*
1351          * if the hotplug scripts haven't run or if the frontend is not
1352          * initialized, then we can't try to connect.
1353          */
1354         if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) {
1355                 ASSERT(!vdp->xs_if_connected);
1356                 mutex_exit(&vdp->xs_iomutex);
1357                 return;
1358         }
1359 
1360         /* If we're already connected then there's nothing todo */
1361         if (vdp->xs_if_connected) {
1362                 mutex_exit(&vdp->xs_iomutex);
1363                 return;
1364         }
1365         mutex_exit(&vdp->xs_iomutex);
1366 
1367         /*
1368          * Start connect to frontend only when backend device are ready
1369          * and frontend has moved to XenbusStateInitialised, which means
1370          * ready to connect.
1371          */
1372         XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE,
1373             "xdb@%s: starting connection process", ddi_get_name_addr(dip)));
1374 
1375         if (xdb_open_device(vdp) != DDI_SUCCESS)
1376                 return;
1377 
1378         if (xdb_bindto_frontend(vdp) != DDI_SUCCESS) {
1379                 xdb_close_device(vdp);
1380                 return;
1381         }
1382 
1383         /* init i/o requests */
1384         xdb_init_ioreqs(vdp);
1385 
1386         if (ddi_add_intr(dip, 0, NULL, NULL, xdb_intr, (caddr_t)vdp)
1387             != DDI_SUCCESS) {
1388                 xdb_uninit_ioreqs(vdp);
1389                 xdb_unbindfrom_frontend(vdp);
1390                 xdb_close_device(vdp);
1391                 return;
1392         }
1393 
1394         dinfo = 0;
1395         if (XDB_IS_RO(vdp))
1396                 dinfo |= VDISK_READONLY;
1397         if (XDB_IS_BE_RMB(vdp))
1398                 dinfo |= VDISK_REMOVABLE;
1399         if (XDB_IS_BE_CD(vdp))
1400                 dinfo |= VDISK_CDROM;
1401         if (XDB_IS_FE_CD(vdp))
1402                 dinfo |= VDISK_REMOVABLE | VDISK_CDROM;
1403 
1404         /*
1405          * we can recieve intr any time from now on
1406          * mark that we're ready to take intr
1407          */
1408         mutex_enter(&vdp->xs_iomutex);
1409         ASSERT(vdp->xs_fe_initialised);
1410         vdp->xs_if_connected = B_TRUE;
1411         mutex_exit(&vdp->xs_iomutex);
1412 
1413 trans_retry:
1414         /* write into xenstore the info needed by frontend */
1415         if ((err = xenbus_transaction_start(&xbt)) != 0) {
1416                 xvdi_dev_error(dip, err, "connect transaction init");
1417                 goto errout;
1418         }
1419 
1420         /* If feature-barrier isn't present in xenstore, add it.  */
1421         fb_exists = xenbus_exists(xsname, XBP_FB);
1422 
1423         ssize = (vdp->xs_sec_size == 0) ? DEV_BSIZE : vdp->xs_sec_size;
1424         sectors = vdp->xs_sectors;
1425         if (((!fb_exists &&
1426             (err = xenbus_printf(xbt, xsname, XBP_FB, "%d", 1)))) ||
1427             (err = xenbus_printf(xbt, xsname, XBP_INFO, "%u", dinfo)) ||
1428             (err = xenbus_printf(xbt, xsname, XBP_SECTOR_SIZE, "%u", ssize)) ||
1429             (err = xenbus_printf(xbt, xsname,
1430             XBP_SECTORS, "%"PRIu64, sectors)) ||
1431             (err = xenbus_printf(xbt, xsname, "instance", "%d", instance)) ||
1432             ((err = xvdi_switch_state(dip, xbt, XenbusStateConnected)) > 0)) {
1433                 (void) xenbus_transaction_end(xbt, 1);
1434                 xvdi_dev_error(dip, err, "connect transaction setup");
1435                 goto errout;
1436         }
1437 
1438         if ((err = xenbus_transaction_end(xbt, 0)) != 0) {
1439                 if (err == EAGAIN) {
1440                         /* transaction is ended, don't need to abort it */
1441                         goto trans_retry;
1442                 }
1443                 xvdi_dev_error(dip, err, "connect transaction commit");
1444                 goto errout;
1445         }
1446 
1447         return;
1448 
1449 errout:
1450         xdb_close(dip);
1451 }
1452 
1453 /*
1454  * Disconnect from frontend and close backend device
1455  */
1456 static void
1457 xdb_close(dev_info_t *dip)
1458 {
1459         xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
1460 
1461         ASSERT(MUTEX_HELD(&vdp->xs_cbmutex));
1462         mutex_enter(&vdp->xs_iomutex);
1463 
1464         /*
1465          * if the hotplug scripts haven't run or if the frontend is not
1466          * initialized, then we can't be connected, so there's no
1467          * connection to close.
1468          */
1469         if (!vdp->xs_hp_connected || !vdp->xs_fe_initialised) {
1470                 ASSERT(!vdp->xs_if_connected);
1471                 mutex_exit(&vdp->xs_iomutex);
1472                 return;
1473         }
1474 
1475         /* if we're not connected, there's nothing to do */
1476         if (!vdp->xs_if_connected) {
1477                 cv_broadcast(&vdp->xs_iocv);
1478                 mutex_exit(&vdp->xs_iomutex);
1479                 return;
1480         }
1481 
1482         XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "closing while connected"));
1483 
1484         vdp->xs_if_connected = B_FALSE;
1485         cv_broadcast(&vdp->xs_iocv);
1486 
1487         mutex_exit(&vdp->xs_iomutex);
1488 
1489         /* stop accepting I/O request from frontend */
1490         ddi_remove_intr(dip, 0, NULL);
1491 
1492         /* clear all on-going I/Os, if any */
1493         mutex_enter(&vdp->xs_iomutex);
1494         while (vdp->xs_ionum > 0)
1495                 cv_wait(&vdp->xs_ionumcv, &vdp->xs_iomutex);
1496         mutex_exit(&vdp->xs_iomutex);
1497 
1498         /* clean up resources and close this interface */
1499         xdb_uninit_ioreqs(vdp);
1500         xdb_unbindfrom_frontend(vdp);
1501         xdb_close_device(vdp);
1502         vdp->xs_peer = (domid_t)-1;
1503 }
1504 
1505 static void
1506 xdb_send_buf(void *arg)
1507 {
1508         xdb_t   *vdp = (xdb_t *)arg;
1509         buf_t   *bp;
1510         int     err;
1511 
1512         mutex_enter(&vdp->xs_iomutex);
1513         while (vdp->xs_send_buf) {
1514                 if ((bp = vdp->xs_f_iobuf) == NULL) {
1515                         /* wait for some io to send */
1516                         XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
1517                             "send buf waiting for io"));
1518                         cv_wait(&vdp->xs_iocv, &vdp->xs_iomutex);
1519                         continue;
1520                 }
1521 
1522                 vdp->xs_f_iobuf = bp->av_forw;
1523                 bp->av_forw = NULL;
1524                 vdp->xs_ionum++;
1525 
1526                 mutex_exit(&vdp->xs_iomutex);
1527                 if (bp->b_bcount == 0) {
1528                         /* no I/O needs to be done */
1529                         (void) xdb_biodone(bp);
1530                         mutex_enter(&vdp->xs_iomutex);
1531                         continue;
1532                 }
1533 
1534                 err = EIO;
1535                 if (vdp->xs_ldi_hdl != NULL)
1536                         err = ldi_strategy(vdp->xs_ldi_hdl, bp);
1537                 if (err != 0) {
1538                         bp->b_flags |= B_ERROR;
1539                         (void) xdb_biodone(bp);
1540                         XDB_DBPRINT(XDB_DBG_IO, (CE_WARN,
1541                             "xdb@%s: sent buf to backend devfailed, err=%d",
1542                             ddi_get_name_addr(vdp->xs_dip), err));
1543                 } else {
1544                         XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE,
1545                             "sent buf to backend ok"));
1546                 }
1547                 mutex_enter(&vdp->xs_iomutex);
1548         }
1549         XDB_DBPRINT(XDB_DBG_IO, (CE_NOTE, "send buf finishing"));
1550         mutex_exit(&vdp->xs_iomutex);
1551 }
1552 
1553 /*ARGSUSED*/
1554 static void
1555 xdb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
1556     void *impl_data)
1557 {
1558         xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data;
1559         xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
1560 
1561         XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: "
1562             "hotplug status change to %d!", ddi_get_name_addr(dip), state));
1563 
1564         if (state != Connected)
1565                 return;
1566 
1567         mutex_enter(&vdp->xs_cbmutex);
1568 
1569         /* If hotplug script have already run, there's nothing todo */
1570         if (vdp->xs_hp_connected) {
1571                 mutex_exit(&vdp->xs_cbmutex);
1572                 return;
1573         }
1574 
1575         vdp->xs_hp_connected = B_TRUE;
1576         xdb_start_connect(vdp);
1577         mutex_exit(&vdp->xs_cbmutex);
1578 }
1579 
1580 /*ARGSUSED*/
1581 static void
1582 xdb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, void *arg,
1583     void *impl_data)
1584 {
1585         XenbusState new_state = *(XenbusState *)impl_data;
1586         xdb_t *vdp = (xdb_t *)ddi_get_driver_private(dip);
1587 
1588         XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: "
1589             "otherend state change to %d!", ddi_get_name_addr(dip), new_state));
1590 
1591         mutex_enter(&vdp->xs_cbmutex);
1592 
1593         /*
1594          * Now it'd really be nice if there was a well defined state
1595          * transition model for xen frontend drivers, but unfortunatly
1596          * there isn't.  So we're stuck with assuming that all state
1597          * transitions are possible, and we'll just have to deal with
1598          * them regardless of what state we're in.
1599          */
1600         switch (new_state) {
1601         case XenbusStateUnknown:
1602         case XenbusStateInitialising:
1603         case XenbusStateInitWait:
1604                 /* tear down our connection to the frontend */
1605                 xdb_close(dip);
1606                 vdp->xs_fe_initialised = B_FALSE;
1607                 break;
1608 
1609         case XenbusStateInitialised:
1610                 /*
1611                  * If we were conected, then we need to drop the connection
1612                  * and re-negotiate it.
1613                  */
1614                 xdb_close(dip);
1615                 vdp->xs_fe_initialised = B_TRUE;
1616                 xdb_start_connect(vdp);
1617                 break;
1618 
1619         case XenbusStateConnected:
1620                 /* nothing todo here other than congratulate the frontend */
1621                 break;
1622 
1623         case XenbusStateClosing:
1624                 /* monkey see monkey do */
1625                 (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing);
1626                 break;
1627 
1628         case XenbusStateClosed:
1629                 /* tear down our connection to the frontend */
1630                 xdb_close(dip);
1631                 vdp->xs_fe_initialised = B_FALSE;
1632                 (void) xvdi_switch_state(dip, XBT_NULL, new_state);
1633                 break;
1634         }
1635 
1636         mutex_exit(&vdp->xs_cbmutex);
1637 }
1638 
1639 static int
1640 xdb_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
1641 {
1642         ddi_iblock_cookie_t     ibc;
1643         xdb_t                   *vdp;
1644         int                     instance = ddi_get_instance(dip);
1645         char                    *xsname, *oename;
1646         char                    *str;
1647 
1648         switch (cmd) {
1649         case DDI_RESUME:
1650                 return (DDI_FAILURE);
1651         case DDI_ATTACH:
1652                 break;
1653         default:
1654                 return (DDI_FAILURE);
1655         }
1656         /* DDI_ATTACH */
1657 
1658         if (((xsname = xvdi_get_xsname(dip)) == NULL) ||
1659             ((oename = xvdi_get_oename(dip)) == NULL))
1660                 return (DDI_FAILURE);
1661 
1662         /*
1663          * Disable auto-detach.  This is necessary so that we don't get
1664          * detached while we're disconnected from the front end.
1665          */
1666         (void) ddi_prop_update_int(DDI_DEV_T_NONE, dip, DDI_NO_AUTODETACH, 1);
1667 
1668         if (ddi_get_iblock_cookie(dip, 0, &ibc) != DDI_SUCCESS)
1669                 return (DDI_FAILURE);
1670 
1671         if (ddi_soft_state_zalloc(xdb_statep, instance) != DDI_SUCCESS)
1672                 return (DDI_FAILURE);
1673 
1674         vdp = ddi_get_soft_state(xdb_statep, instance);
1675         vdp->xs_dip = dip;
1676         mutex_init(&vdp->xs_iomutex, NULL, MUTEX_DRIVER, (void *)ibc);
1677         mutex_init(&vdp->xs_cbmutex, NULL, MUTEX_DRIVER, (void *)ibc);
1678         cv_init(&vdp->xs_iocv, NULL, CV_DRIVER, NULL);
1679         cv_init(&vdp->xs_ionumcv, NULL, CV_DRIVER, NULL);
1680         ddi_set_driver_private(dip, vdp);
1681 
1682         if (!xdb_kstat_init(vdp))
1683                 goto errout1;
1684 
1685         /* Check if the frontend device is supposed to be a cdrom */
1686         if (xenbus_read_str(oename, XBP_DEV_TYPE, &str) != 0)
1687                 return (DDI_FAILURE);
1688         if (strcmp(str, XBV_DEV_TYPE_CD) == 0)
1689                 vdp->xs_type |= XDB_DEV_FE_CD;
1690         strfree(str);
1691 
1692         /* Check if the frontend device is supposed to be read only */
1693         if (xenbus_read_str(xsname, "mode", &str) != 0)
1694                 return (DDI_FAILURE);
1695         if ((strcmp(str, "r") == NULL) || (strcmp(str, "ro") == NULL))
1696                 vdp->xs_type |= XDB_DEV_RO;
1697         strfree(str);
1698 
1699         mutex_enter(&vdp->xs_cbmutex);
1700         if (!xdb_media_req_init(vdp) || !xdb_params_init(vdp)) {
1701                 xvdi_remove_xb_watch_handlers(dip);
1702                 mutex_exit(&vdp->xs_cbmutex);
1703                 goto errout2;
1704         }
1705         mutex_exit(&vdp->xs_cbmutex);
1706 
1707         vdp->xs_send_buf = B_TRUE;
1708         vdp->xs_iotaskq = ddi_taskq_create(dip, "xdb_iotask", 1,
1709             TASKQ_DEFAULTPRI, 0);
1710         (void) ddi_taskq_dispatch(vdp->xs_iotaskq, xdb_send_buf, vdp,
1711             DDI_SLEEP);
1712 
1713         /* Watch frontend and hotplug state change */
1714         if ((xvdi_add_event_handler(dip, XS_OE_STATE, xdb_oe_state_change,
1715             NULL) != DDI_SUCCESS) ||
1716             (xvdi_add_event_handler(dip, XS_HP_STATE, xdb_hp_state_change,
1717             NULL) != DDI_SUCCESS))
1718                 goto errout3;
1719 
1720         /*
1721          * Kick-off hotplug script
1722          */
1723         if (xvdi_post_event(dip, XEN_HP_ADD) != DDI_SUCCESS) {
1724                 cmn_err(CE_WARN, "xdb@%s: failed to start hotplug script",
1725                     ddi_get_name_addr(dip));
1726                 goto errout3;
1727         }
1728 
1729         /*
1730          * start waiting for hotplug event and otherend state event
1731          * mainly for debugging, frontend will not take any op seeing this
1732          */
1733         (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait);
1734 
1735         XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: attached!",
1736             ddi_get_name_addr(dip)));
1737         return (DDI_SUCCESS);
1738 
1739 errout3:
1740         ASSERT(vdp->xs_hp_connected && vdp->xs_if_connected);
1741 
1742         xvdi_remove_event_handler(dip, NULL);
1743 
1744         /* Disconnect from the backend */
1745         mutex_enter(&vdp->xs_cbmutex);
1746         mutex_enter(&vdp->xs_iomutex);
1747         vdp->xs_send_buf = B_FALSE;
1748         cv_broadcast(&vdp->xs_iocv);
1749         mutex_exit(&vdp->xs_iomutex);
1750         mutex_exit(&vdp->xs_cbmutex);
1751 
1752         /* wait for all io to dtrain and destroy io taskq */
1753         ddi_taskq_destroy(vdp->xs_iotaskq);
1754 
1755         /* tear down block-configure watch */
1756         mutex_enter(&vdp->xs_cbmutex);
1757         xvdi_remove_xb_watch_handlers(dip);
1758         mutex_exit(&vdp->xs_cbmutex);
1759 
1760 errout2:
1761         /* remove kstats */
1762         kstat_delete(vdp->xs_kstats);
1763 
1764 errout1:
1765         /* free up driver state */
1766         ddi_set_driver_private(dip, NULL);
1767         cv_destroy(&vdp->xs_iocv);
1768         cv_destroy(&vdp->xs_ionumcv);
1769         mutex_destroy(&vdp->xs_cbmutex);
1770         mutex_destroy(&vdp->xs_iomutex);
1771         ddi_soft_state_free(xdb_statep, instance);
1772 
1773         return (DDI_FAILURE);
1774 }
1775 
1776 /*ARGSUSED*/
1777 static int
1778 xdb_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
1779 {
1780         int instance = ddi_get_instance(dip);
1781         xdb_t *vdp = XDB_INST2SOFTS(instance);
1782 
1783         switch (cmd) {
1784         case DDI_SUSPEND:
1785                 return (DDI_FAILURE);
1786         case DDI_DETACH:
1787                 break;
1788         default:
1789                 return (DDI_FAILURE);
1790         }
1791 
1792         /* DDI_DETACH handling */
1793 
1794         /* refuse to detach if we're still in use by the frontend */
1795         mutex_enter(&vdp->xs_iomutex);
1796         if (vdp->xs_if_connected) {
1797                 mutex_exit(&vdp->xs_iomutex);
1798                 return (DDI_FAILURE);
1799         }
1800         vdp->xs_send_buf = B_FALSE;
1801         cv_broadcast(&vdp->xs_iocv);
1802         mutex_exit(&vdp->xs_iomutex);
1803 
1804         xvdi_remove_event_handler(dip, NULL);
1805         (void) xvdi_post_event(dip, XEN_HP_REMOVE);
1806 
1807         ddi_taskq_destroy(vdp->xs_iotaskq);
1808 
1809         mutex_enter(&vdp->xs_cbmutex);
1810         xvdi_remove_xb_watch_handlers(dip);
1811         mutex_exit(&vdp->xs_cbmutex);
1812 
1813         cv_destroy(&vdp->xs_iocv);
1814         cv_destroy(&vdp->xs_ionumcv);
1815         mutex_destroy(&vdp->xs_cbmutex);
1816         mutex_destroy(&vdp->xs_iomutex);
1817         kstat_delete(vdp->xs_kstats);
1818         ddi_set_driver_private(dip, NULL);
1819         ddi_soft_state_free(xdb_statep, instance);
1820 
1821         XDB_DBPRINT(XDB_DBG_INFO, (CE_NOTE, "xdb@%s: detached!",
1822             ddi_get_name_addr(dip)));
1823         return (DDI_SUCCESS);
1824 }
1825 
1826 static struct dev_ops xdb_dev_ops = {
1827         DEVO_REV,       /* devo_rev */
1828         0,              /* devo_refcnt */
1829         ddi_getinfo_1to1, /* devo_getinfo */
1830         nulldev,        /* devo_identify */
1831         nulldev,        /* devo_probe */
1832         xdb_attach,     /* devo_attach */
1833         xdb_detach,     /* devo_detach */
1834         nodev,          /* devo_reset */
1835         NULL,           /* devo_cb_ops */
1836         NULL,           /* devo_bus_ops */
1837         NULL,           /* power */
1838         ddi_quiesce_not_needed, /* quiesce */
1839 };
1840 
1841 /*
1842  * Module linkage information for the kernel.
1843  */
1844 static struct modldrv modldrv = {
1845         &mod_driverops,                     /* Type of module. */
1846         "vbd backend driver",           /* Name of the module */
1847         &xdb_dev_ops                        /* driver ops */
1848 };
1849 
1850 static struct modlinkage xdb_modlinkage = {
1851         MODREV_1,
1852         { &modldrv, NULL }
1853 };
1854 
1855 int
1856 _init(void)
1857 {
1858         int rv;
1859 
1860         if ((rv = ddi_soft_state_init((void **)&xdb_statep,
1861             sizeof (xdb_t), 0)) == 0)
1862                 if ((rv = mod_install(&xdb_modlinkage)) != 0)
1863                         ddi_soft_state_fini((void **)&xdb_statep);
1864         return (rv);
1865 }
1866 
1867 int
1868 _fini(void)
1869 {
1870         int rv;
1871 
1872         if ((rv = mod_remove(&xdb_modlinkage)) != 0)
1873                 return (rv);
1874         ddi_soft_state_fini((void **)&xdb_statep);
1875         return (rv);
1876 }
1877 
1878 int
1879 _info(struct modinfo *modinfop)
1880 {
1881         return (mod_info(&xdb_modlinkage, modinfop));
1882 }
1883 
1884 static int
1885 xdb_get_request(xdb_t *vdp, blkif_request_t *req)
1886 {
1887         void *src = xvdi_ring_get_request(vdp->xs_ring);
1888 
1889         if (src == NULL)
1890                 return (0);
1891 
1892         switch (vdp->xs_blk_protocol) {
1893         case BLKIF_PROTOCOL_NATIVE:
1894                 (void) memcpy(req, src, sizeof (*req));
1895                 break;
1896         case BLKIF_PROTOCOL_X86_32:
1897                 blkif_get_x86_32_req(req, src);
1898                 break;
1899         case BLKIF_PROTOCOL_X86_64:
1900                 blkif_get_x86_64_req(req, src);
1901                 break;
1902         default:
1903                 cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d",
1904                     ddi_get_name_addr(vdp->xs_dip),
1905                     vdp->xs_blk_protocol);
1906         }
1907         return (1);
1908 }
1909 
1910 static int
1911 xdb_push_response(xdb_t *vdp, uint64_t id, uint8_t op, uint16_t status)
1912 {
1913         ddi_acc_handle_t acchdl = vdp->xs_ring_hdl;
1914         blkif_response_t *rsp = xvdi_ring_get_response(vdp->xs_ring);
1915         blkif_x86_32_response_t *rsp_32 = (blkif_x86_32_response_t *)rsp;
1916         blkif_x86_64_response_t *rsp_64 = (blkif_x86_64_response_t *)rsp;
1917 
1918         ASSERT(rsp);
1919 
1920         switch (vdp->xs_blk_protocol) {
1921         case BLKIF_PROTOCOL_NATIVE:
1922                 ddi_put64(acchdl, &rsp->id, id);
1923                 ddi_put8(acchdl, &rsp->operation, op);
1924                 ddi_put16(acchdl, (uint16_t *)&rsp->status,
1925                     status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
1926                 break;
1927         case BLKIF_PROTOCOL_X86_32:
1928                 ddi_put64(acchdl, &rsp_32->id, id);
1929                 ddi_put8(acchdl, &rsp_32->operation, op);
1930                 ddi_put16(acchdl, (uint16_t *)&rsp_32->status,
1931                     status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
1932                 break;
1933         case BLKIF_PROTOCOL_X86_64:
1934                 ddi_put64(acchdl, &rsp_64->id, id);
1935                 ddi_put8(acchdl, &rsp_64->operation, op);
1936                 ddi_put16(acchdl, (uint16_t *)&rsp_64->status,
1937                     status == 0 ? BLKIF_RSP_OKAY : BLKIF_RSP_ERROR);
1938                 break;
1939         default:
1940                 cmn_err(CE_PANIC, "xdb@%s: unrecognised protocol: %d",
1941                     ddi_get_name_addr(vdp->xs_dip),
1942                     vdp->xs_blk_protocol);
1943         }
1944 
1945         return (xvdi_ring_push_response(vdp->xs_ring));
1946 }
1947 
1948 static void
1949 blkif_get_x86_32_req(blkif_request_t *dst, blkif_x86_32_request_t *src)
1950 {
1951         int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
1952         dst->operation = src->operation;
1953         dst->nr_segments = src->nr_segments;
1954         dst->handle = src->handle;
1955         dst->id = src->id;
1956         dst->sector_number = src->sector_number;
1957         if (n > src->nr_segments)
1958                 n = src->nr_segments;
1959         for (i = 0; i < n; i++)
1960                 dst->seg[i] = src->seg[i];
1961 }
1962 
1963 static void
1964 blkif_get_x86_64_req(blkif_request_t *dst, blkif_x86_64_request_t *src)
1965 {
1966         int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
1967         dst->operation = src->operation;
1968         dst->nr_segments = src->nr_segments;
1969         dst->handle = src->handle;
1970         dst->id = src->id;
1971         dst->sector_number = src->sector_number;
1972         if (n > src->nr_segments)
1973                 n = src->nr_segments;
1974         for (i = 0; i < n; i++)
1975                 dst->seg[i] = src->seg[i];
1976 }