1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright 2017 The MathWorks, Inc.  All rights reserved.
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/ksynch.h>
  31 #include <sys/kmem.h>
  32 #include <sys/file.h>
  33 #include <sys/errno.h>
  34 #include <sys/open.h>
  35 #include <sys/buf.h>
  36 #include <sys/uio.h>
  37 #include <sys/aio_req.h>
  38 #include <sys/cred.h>
  39 #include <sys/modctl.h>
  40 #include <sys/cmlb.h>
  41 #include <sys/conf.h>
  42 #include <sys/devops.h>
  43 #include <sys/list.h>
  44 #include <sys/sysmacros.h>
  45 #include <sys/dkio.h>
  46 #include <sys/vtoc.h>
  47 #include <sys/scsi/scsi.h>        /* for DTYPE_DIRECT */
  48 #include <sys/kstat.h>
  49 #include <sys/fs/dv_node.h>
  50 #include <sys/ddi.h>
  51 #include <sys/sunddi.h>
  52 #include <sys/note.h>
  53 #include <sys/blkdev.h>
  54 #include <sys/scsi/impl/inquiry.h>
  55 
  56 #define BD_MAXPART      64
  57 #define BDINST(dev)     (getminor(dev) / BD_MAXPART)
  58 #define BDPART(dev)     (getminor(dev) % BD_MAXPART)
  59 
  60 typedef struct bd bd_t;
  61 typedef struct bd_xfer_impl bd_xfer_impl_t;
  62 
  63 struct bd {
  64         void            *d_private;
  65         dev_info_t      *d_dip;
  66         kmutex_t        d_ocmutex;
  67         kmutex_t        d_iomutex;
  68         kmutex_t        *d_errmutex;
  69         kmutex_t        d_statemutex;
  70         kcondvar_t      d_statecv;
  71         enum dkio_state d_state;
  72         cmlb_handle_t   d_cmlbh;
  73         unsigned        d_open_lyr[BD_MAXPART]; /* open count */
  74         uint64_t        d_open_excl;    /* bit mask indexed by partition */
  75         uint64_t        d_open_reg[OTYPCNT];            /* bit mask */
  76 
  77         uint32_t        d_qsize;
  78         uint32_t        d_qactive;
  79         uint32_t        d_maxxfer;
  80         uint32_t        d_blkshift;
  81         uint32_t        d_pblkshift;
  82         uint64_t        d_numblks;
  83         ddi_devid_t     d_devid;
  84 
  85         kmem_cache_t    *d_cache;
  86         list_t          d_runq;
  87         list_t          d_waitq;
  88         kstat_t         *d_ksp;
  89         kstat_io_t      *d_kiop;
  90         kstat_t         *d_errstats;
  91         struct bd_errstats *d_kerr;
  92 
  93         boolean_t       d_rdonly;
  94         boolean_t       d_ssd;
  95         boolean_t       d_removable;
  96         boolean_t       d_hotpluggable;
  97         boolean_t       d_use_dma;
  98 
  99         ddi_dma_attr_t  d_dma;
 100         bd_ops_t        d_ops;
 101         bd_handle_t     d_handle;
 102 };
 103 
 104 struct bd_handle {
 105         bd_ops_t        h_ops;
 106         ddi_dma_attr_t  *h_dma;
 107         dev_info_t      *h_parent;
 108         dev_info_t      *h_child;
 109         void            *h_private;
 110         bd_t            *h_bd;
 111         char            *h_name;
 112         char            h_addr[30];     /* enough for w%0.16x,%X */
 113 };
 114 
 115 struct bd_xfer_impl {
 116         bd_xfer_t       i_public;
 117         list_node_t     i_linkage;
 118         bd_t            *i_bd;
 119         buf_t           *i_bp;
 120         uint_t          i_num_win;
 121         uint_t          i_cur_win;
 122         off_t           i_offset;
 123         int             (*i_func)(void *, bd_xfer_t *);
 124         uint32_t        i_blkshift;
 125         size_t          i_len;
 126         size_t          i_resid;
 127 };
 128 
 129 #define i_dmah          i_public.x_dmah
 130 #define i_dmac          i_public.x_dmac
 131 #define i_ndmac         i_public.x_ndmac
 132 #define i_kaddr         i_public.x_kaddr
 133 #define i_nblks         i_public.x_nblks
 134 #define i_blkno         i_public.x_blkno
 135 #define i_flags         i_public.x_flags
 136 
 137 
 138 /*
 139  * Private prototypes.
 140  */
 141 
 142 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
 143 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
 144 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
 145 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
 146 static void bd_init_errstats(bd_t *, bd_drive_t *);
 147 
 148 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
 149 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
 150 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
 151 
 152 static int bd_open(dev_t *, int, int, cred_t *);
 153 static int bd_close(dev_t, int, int, cred_t *);
 154 static int bd_strategy(struct buf *);
 155 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 156 static int bd_dump(dev_t, caddr_t, daddr_t, int);
 157 static int bd_read(dev_t, struct uio *, cred_t *);
 158 static int bd_write(dev_t, struct uio *, cred_t *);
 159 static int bd_aread(dev_t, struct aio_req *, cred_t *);
 160 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
 161 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
 162     caddr_t, int *);
 163 
 164 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
 165     void *);
 166 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
 167 static int bd_xfer_ctor(void *, void *, int);
 168 static void bd_xfer_dtor(void *, void *);
 169 static void bd_sched(bd_t *);
 170 static void bd_submit(bd_t *, bd_xfer_impl_t *);
 171 static void bd_runq_exit(bd_xfer_impl_t *, int);
 172 static void bd_update_state(bd_t *);
 173 static int bd_check_state(bd_t *, enum dkio_state *);
 174 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
 175 
 176 struct cmlb_tg_ops bd_tg_ops = {
 177         TG_DK_OPS_VERSION_1,
 178         bd_tg_rdwr,
 179         bd_tg_getinfo,
 180 };
 181 
 182 static struct cb_ops bd_cb_ops = {
 183         bd_open,                /* open */
 184         bd_close,               /* close */
 185         bd_strategy,            /* strategy */
 186         nodev,                  /* print */
 187         bd_dump,                /* dump */
 188         bd_read,                /* read */
 189         bd_write,               /* write */
 190         bd_ioctl,               /* ioctl */
 191         nodev,                  /* devmap */
 192         nodev,                  /* mmap */
 193         nodev,                  /* segmap */
 194         nochpoll,               /* poll */
 195         bd_prop_op,             /* cb_prop_op */
 196         0,                      /* streamtab  */
 197         D_64BIT | D_MP,         /* Driver comaptibility flag */
 198         CB_REV,                 /* cb_rev */
 199         bd_aread,               /* async read */
 200         bd_awrite               /* async write */
 201 };
 202 
 203 struct dev_ops bd_dev_ops = {
 204         DEVO_REV,               /* devo_rev, */
 205         0,                      /* refcnt  */
 206         bd_getinfo,             /* getinfo */
 207         nulldev,                /* identify */
 208         nulldev,                /* probe */
 209         bd_attach,              /* attach */
 210         bd_detach,              /* detach */
 211         nodev,                  /* reset */
 212         &bd_cb_ops,                 /* driver operations */
 213         NULL,                   /* bus operations */
 214         NULL,                   /* power */
 215         ddi_quiesce_not_needed, /* quiesce */
 216 };
 217 
 218 static struct modldrv modldrv = {
 219         &mod_driverops,
 220         "Generic Block Device",
 221         &bd_dev_ops,
 222 };
 223 
 224 static struct modlinkage modlinkage = {
 225         MODREV_1, { &modldrv, NULL }
 226 };
 227 
 228 static void *bd_state;
 229 static krwlock_t bd_lock;
 230 
 231 int
 232 _init(void)
 233 {
 234         int     rv;
 235 
 236         rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
 237         if (rv != DDI_SUCCESS) {
 238                 return (rv);
 239         }
 240         rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
 241         rv = mod_install(&modlinkage);
 242         if (rv != DDI_SUCCESS) {
 243                 rw_destroy(&bd_lock);
 244                 ddi_soft_state_fini(&bd_state);
 245         }
 246         return (rv);
 247 }
 248 
 249 int
 250 _fini(void)
 251 {
 252         int     rv;
 253 
 254         rv = mod_remove(&modlinkage);
 255         if (rv == DDI_SUCCESS) {
 256                 rw_destroy(&bd_lock);
 257                 ddi_soft_state_fini(&bd_state);
 258         }
 259         return (rv);
 260 }
 261 
 262 int
 263 _info(struct modinfo *modinfop)
 264 {
 265         return (mod_info(&modlinkage, modinfop));
 266 }
 267 
 268 static int
 269 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
 270 {
 271         bd_t    *bd;
 272         minor_t inst;
 273 
 274         _NOTE(ARGUNUSED(dip));
 275 
 276         inst = BDINST((dev_t)arg);
 277 
 278         switch (cmd) {
 279         case DDI_INFO_DEVT2DEVINFO:
 280                 bd = ddi_get_soft_state(bd_state, inst);
 281                 if (bd == NULL) {
 282                         return (DDI_FAILURE);
 283                 }
 284                 *resultp = (void *)bd->d_dip;
 285                 break;
 286 
 287         case DDI_INFO_DEVT2INSTANCE:
 288                 *resultp = (void *)(intptr_t)inst;
 289                 break;
 290 
 291         default:
 292                 return (DDI_FAILURE);
 293         }
 294         return (DDI_SUCCESS);
 295 }
 296 
 297 static void
 298 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
 299 {
 300         int     ilen;
 301         char    *data_string;
 302 
 303         ilen = scsi_ascii_inquiry_len(data, len);
 304         ASSERT3U(ilen, <=, len);
 305         if (ilen <= 0)
 306                 return;
 307         /* ensure null termination */
 308         data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
 309         bcopy(data, data_string, ilen);
 310         (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
 311         kmem_free(data_string, ilen + 1);
 312 }
 313 
 314 static void
 315 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
 316 {
 317         if (drive->d_vendor_len > 0)
 318                 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
 319                     drive->d_vendor, drive->d_vendor_len);
 320 
 321         if (drive->d_product_len > 0)
 322                 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
 323                     drive->d_product, drive->d_product_len);
 324 
 325         if (drive->d_serial_len > 0)
 326                 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
 327                     drive->d_serial, drive->d_serial_len);
 328 
 329         if (drive->d_revision_len > 0)
 330                 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
 331                     drive->d_revision, drive->d_revision_len);
 332 }
 333 
 334 static void
 335 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
 336 {
 337         char    ks_module[KSTAT_STRLEN];
 338         char    ks_name[KSTAT_STRLEN];
 339         int     ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
 340 
 341         if (bd->d_errstats != NULL)
 342                 return;
 343 
 344         (void) snprintf(ks_module, sizeof (ks_module), "%serr",
 345             ddi_driver_name(bd->d_dip));
 346         (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
 347             ddi_driver_name(bd->d_dip), inst);
 348 
 349         bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
 350             KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
 351 
 352         if (bd->d_errstats == NULL) {
 353                 /*
 354                  * Even if we cannot create the kstat, we create a
 355                  * scratch kstat.  The reason for this is to ensure
 356                  * that we can update the kstat all of the time,
 357                  * without adding an extra branch instruction.
 358                  */
 359                 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
 360                     KM_SLEEP);
 361                 bd->d_errmutex = kmem_zalloc(sizeof (kmutex_t), KM_SLEEP);
 362                 mutex_init(bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
 363         } else {
 364                 if (bd->d_errstats->ks_lock == NULL) {
 365                         bd->d_errstats->ks_lock = kmem_zalloc(sizeof (kmutex_t),
 366                             KM_SLEEP);
 367                         mutex_init(bd->d_errstats->ks_lock, NULL, MUTEX_DRIVER,
 368                             NULL);
 369                 }
 370 
 371                 bd->d_errmutex = bd->d_errstats->ks_lock;
 372                 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
 373         }
 374 
 375         kstat_named_init(&bd->d_kerr->bd_softerrs,    "Soft Errors",
 376             KSTAT_DATA_UINT32);
 377         kstat_named_init(&bd->d_kerr->bd_harderrs,    "Hard Errors",
 378             KSTAT_DATA_UINT32);
 379         kstat_named_init(&bd->d_kerr->bd_transerrs,   "Transport Errors",
 380             KSTAT_DATA_UINT32);
 381 
 382         if (drive->d_model_len > 0) {
 383                 kstat_named_init(&bd->d_kerr->bd_model,       "Model",
 384                     KSTAT_DATA_STRING);
 385         } else {
 386                 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor",
 387                     KSTAT_DATA_STRING);
 388                 kstat_named_init(&bd->d_kerr->bd_pid, "Product",
 389                     KSTAT_DATA_STRING);
 390         }
 391 
 392         kstat_named_init(&bd->d_kerr->bd_revision,    "Revision",
 393             KSTAT_DATA_STRING);
 394         kstat_named_init(&bd->d_kerr->bd_serial,      "Serial No",
 395             KSTAT_DATA_STRING);
 396         kstat_named_init(&bd->d_kerr->bd_capacity,    "Size",
 397             KSTAT_DATA_ULONGLONG);
 398         kstat_named_init(&bd->d_kerr->bd_rq_media_err,        "Media Error",
 399             KSTAT_DATA_UINT32);
 400         kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err,        "Device Not Ready",
 401             KSTAT_DATA_UINT32);
 402         kstat_named_init(&bd->d_kerr->bd_rq_nodev_err,        "No Device",
 403             KSTAT_DATA_UINT32);
 404         kstat_named_init(&bd->d_kerr->bd_rq_recov_err,        "Recoverable",
 405             KSTAT_DATA_UINT32);
 406         kstat_named_init(&bd->d_kerr->bd_rq_illrq_err,        "Illegal Request",
 407             KSTAT_DATA_UINT32);
 408         kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
 409             "Predictive Failure Analysis", KSTAT_DATA_UINT32);
 410 
 411         bd->d_errstats->ks_private = bd;
 412 
 413         kstat_install(bd->d_errstats);
 414 }
 415 
 416 static void
 417 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
 418 {
 419         char    *tmp;
 420 
 421         if (KSTAT_NAMED_STR_PTR(k) == NULL) {
 422                 if (len > 0) {
 423                         tmp = kmem_alloc(len + 1, KM_SLEEP);
 424                         (void) strlcpy(tmp, str, len + 1);
 425                 } else {
 426                         tmp = alt;
 427                 }
 428 
 429                 kstat_named_setstr(k, tmp);
 430         }
 431 }
 432 
 433 static void
 434 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
 435 {
 436         struct bd_errstats      *est = bd->d_kerr;
 437 
 438         mutex_enter(bd->d_errmutex);
 439 
 440         if (drive->d_model_len > 0 &&
 441             KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
 442                 bd_errstats_setstr(&est->bd_model, drive->d_model,
 443                     drive->d_model_len, NULL);
 444         } else {
 445                 bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
 446                     drive->d_vendor_len, "Unknown ");
 447                 bd_errstats_setstr(&est->bd_pid, drive->d_product,
 448                     drive->d_product_len, "Unknown         ");
 449         }
 450 
 451         bd_errstats_setstr(&est->bd_revision, drive->d_revision,
 452             drive->d_revision_len, "0001");
 453         bd_errstats_setstr(&est->bd_serial, drive->d_serial,
 454             drive->d_serial_len, "0               ");
 455 
 456         mutex_exit(bd->d_errmutex);
 457 }
 458 
 459 static int
 460 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 461 {
 462         int             inst;
 463         bd_handle_t     hdl;
 464         bd_t            *bd;
 465         bd_drive_t      drive;
 466         int             rv;
 467         char            name[16];
 468         char            kcache[32];
 469 
 470         switch (cmd) {
 471         case DDI_ATTACH:
 472                 break;
 473         case DDI_RESUME:
 474                 /* We don't do anything native for suspend/resume */
 475                 return (DDI_SUCCESS);
 476         default:
 477                 return (DDI_FAILURE);
 478         }
 479 
 480         inst = ddi_get_instance(dip);
 481         hdl = ddi_get_parent_data(dip);
 482 
 483         (void) snprintf(name, sizeof (name), "%s%d",
 484             ddi_driver_name(dip), ddi_get_instance(dip));
 485         (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
 486 
 487         if (hdl == NULL) {
 488                 cmn_err(CE_WARN, "%s: missing parent data!", name);
 489                 return (DDI_FAILURE);
 490         }
 491 
 492         if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
 493                 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
 494                 return (DDI_FAILURE);
 495         }
 496         bd = ddi_get_soft_state(bd_state, inst);
 497 
 498         if (hdl->h_dma) {
 499                 bd->d_dma = *(hdl->h_dma);
 500                 bd->d_dma.dma_attr_granular =
 501                     max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
 502                 bd->d_use_dma = B_TRUE;
 503 
 504                 if (bd->d_maxxfer &&
 505                     (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
 506                         cmn_err(CE_WARN,
 507                             "%s: inconsistent maximum transfer size!",
 508                             name);
 509                         /* We force it */
 510                         bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
 511                 } else {
 512                         bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
 513                 }
 514         } else {
 515                 bd->d_use_dma = B_FALSE;
 516                 if (bd->d_maxxfer == 0) {
 517                         bd->d_maxxfer = 1024 * 1024;
 518                 }
 519         }
 520         bd->d_ops = hdl->h_ops;
 521         bd->d_private = hdl->h_private;
 522         bd->d_blkshift = 9;  /* 512 bytes, to start */
 523 
 524         if (bd->d_maxxfer % DEV_BSIZE) {
 525                 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
 526                 bd->d_maxxfer &= ~(DEV_BSIZE - 1);
 527         }
 528         if (bd->d_maxxfer < DEV_BSIZE) {
 529                 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
 530                 ddi_soft_state_free(bd_state, inst);
 531                 return (DDI_FAILURE);
 532         }
 533 
 534         bd->d_dip = dip;
 535         bd->d_handle = hdl;
 536         hdl->h_bd = bd;
 537         ddi_set_driver_private(dip, bd);
 538 
 539         mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL);
 540         mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
 541         mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
 542         cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
 543 
 544         list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t),
 545             offsetof(struct bd_xfer_impl, i_linkage));
 546         list_create(&bd->d_runq, sizeof (bd_xfer_impl_t),
 547             offsetof(struct bd_xfer_impl, i_linkage));
 548 
 549         bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
 550             bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
 551 
 552         bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
 553             KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
 554         if (bd->d_ksp != NULL) {
 555                 bd->d_ksp->ks_lock = &bd->d_iomutex;
 556                 kstat_install(bd->d_ksp);
 557                 bd->d_kiop = bd->d_ksp->ks_data;
 558         } else {
 559                 /*
 560                  * Even if we cannot create the kstat, we create a
 561                  * scratch kstat.  The reason for this is to ensure
 562                  * that we can update the kstat all of the time,
 563                  * without adding an extra branch instruction.
 564                  */
 565                 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
 566         }
 567 
 568         cmlb_alloc_handle(&bd->d_cmlbh);
 569 
 570         bd->d_state = DKIO_NONE;
 571 
 572         bzero(&drive, sizeof (drive));
 573         bd->d_ops.o_drive_info(bd->d_private, &drive);
 574         bd->d_qsize = drive.d_qsize;
 575         bd->d_removable = drive.d_removable;
 576         bd->d_hotpluggable = drive.d_hotpluggable;
 577 
 578         if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
 579                 bd->d_maxxfer = drive.d_maxxfer;
 580 
 581         bd_create_inquiry_props(dip, &drive);
 582 
 583         bd_create_errstats(bd, inst, &drive);
 584         bd_init_errstats(bd, &drive);
 585         bd_update_state(bd);
 586 
 587         rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
 588             bd->d_removable, bd->d_hotpluggable,
 589             /*LINTED: E_BAD_PTR_CAST_ALIGN*/
 590             *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV :
 591             drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
 592             CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
 593         if (rv != 0) {
 594                 cmlb_free_handle(&bd->d_cmlbh);
 595                 kmem_cache_destroy(bd->d_cache);
 596                 mutex_destroy(&bd->d_iomutex);
 597                 mutex_destroy(&bd->d_ocmutex);
 598                 mutex_destroy(&bd->d_statemutex);
 599                 cv_destroy(&bd->d_statecv);
 600                 list_destroy(&bd->d_waitq);
 601                 list_destroy(&bd->d_runq);
 602                 if (bd->d_ksp != NULL) {
 603                         kstat_delete(bd->d_ksp);
 604                         bd->d_ksp = NULL;
 605                 } else {
 606                         kmem_free(bd->d_kiop, sizeof (kstat_io_t));
 607                 }
 608                 ddi_soft_state_free(bd_state, inst);
 609                 return (DDI_FAILURE);
 610         }
 611 
 612         if (bd->d_ops.o_devid_init != NULL) {
 613                 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
 614                 if (rv == DDI_SUCCESS) {
 615                         if (ddi_devid_register(dip, bd->d_devid) !=
 616                             DDI_SUCCESS) {
 617                                 cmn_err(CE_WARN,
 618                                     "%s: unable to register devid", name);
 619                         }
 620                 }
 621         }
 622 
 623         /*
 624          * Add a zero-length attribute to tell the world we support
 625          * kernel ioctls (for layered drivers).  Also set up properties
 626          * used by HAL to identify removable media.
 627          */
 628         (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 629             DDI_KERNEL_IOCTL, NULL, 0);
 630         if (bd->d_removable) {
 631                 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 632                     "removable-media", NULL, 0);
 633         }
 634         if (bd->d_hotpluggable) {
 635                 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 636                     "hotpluggable", NULL, 0);
 637         }
 638 
 639         ddi_report_dev(dip);
 640 
 641         return (DDI_SUCCESS);
 642 }
 643 
 644 static int
 645 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 646 {
 647         bd_t    *bd;
 648 
 649         bd = ddi_get_driver_private(dip);
 650 
 651         switch (cmd) {
 652         case DDI_DETACH:
 653                 break;
 654         case DDI_SUSPEND:
 655                 /* We don't suspend, but our parent does */
 656                 return (DDI_SUCCESS);
 657         default:
 658                 return (DDI_FAILURE);
 659         }
 660         if (bd->d_ksp != NULL) {
 661                 kstat_delete(bd->d_ksp);
 662                 bd->d_ksp = NULL;
 663         } else {
 664                 kmem_free(bd->d_kiop, sizeof (kstat_io_t));
 665         }
 666 
 667         if (bd->d_errstats != NULL) {
 668                 kstat_delete(bd->d_errstats);
 669                 bd->d_errstats = NULL;
 670         } else {
 671                 kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
 672                 mutex_destroy(bd->d_errmutex);
 673         }
 674 
 675         cmlb_detach(bd->d_cmlbh, 0);
 676         cmlb_free_handle(&bd->d_cmlbh);
 677         if (bd->d_devid)
 678                 ddi_devid_free(bd->d_devid);
 679         kmem_cache_destroy(bd->d_cache);
 680         mutex_destroy(&bd->d_iomutex);
 681         mutex_destroy(&bd->d_ocmutex);
 682         mutex_destroy(&bd->d_statemutex);
 683         cv_destroy(&bd->d_statecv);
 684         list_destroy(&bd->d_waitq);
 685         list_destroy(&bd->d_runq);
 686         ddi_soft_state_free(bd_state, ddi_get_instance(dip));
 687         return (DDI_SUCCESS);
 688 }
 689 
 690 static int
 691 bd_xfer_ctor(void *buf, void *arg, int kmflag)
 692 {
 693         bd_xfer_impl_t  *xi;
 694         bd_t            *bd = arg;
 695         int             (*dcb)(caddr_t);
 696 
 697         if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
 698                 dcb = DDI_DMA_SLEEP;
 699         } else {
 700                 dcb = DDI_DMA_DONTWAIT;
 701         }
 702 
 703         xi = buf;
 704         bzero(xi, sizeof (*xi));
 705         xi->i_bd = bd;
 706 
 707         if (bd->d_use_dma) {
 708                 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
 709                     &xi->i_dmah) != DDI_SUCCESS) {
 710                         return (-1);
 711                 }
 712         }
 713 
 714         return (0);
 715 }
 716 
 717 static void
 718 bd_xfer_dtor(void *buf, void *arg)
 719 {
 720         bd_xfer_impl_t  *xi = buf;
 721 
 722         _NOTE(ARGUNUSED(arg));
 723 
 724         if (xi->i_dmah)
 725                 ddi_dma_free_handle(&xi->i_dmah);
 726         xi->i_dmah = NULL;
 727 }
 728 
 729 static bd_xfer_impl_t *
 730 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
 731     int kmflag)
 732 {
 733         bd_xfer_impl_t          *xi;
 734         int                     rv = 0;
 735         int                     status;
 736         unsigned                dir;
 737         int                     (*cb)(caddr_t);
 738         size_t                  len;
 739         uint32_t                shift;
 740 
 741         if (kmflag == KM_SLEEP) {
 742                 cb = DDI_DMA_SLEEP;
 743         } else {
 744                 cb = DDI_DMA_DONTWAIT;
 745         }
 746 
 747         xi = kmem_cache_alloc(bd->d_cache, kmflag);
 748         if (xi == NULL) {
 749                 bioerror(bp, ENOMEM);
 750                 return (NULL);
 751         }
 752 
 753         ASSERT(bp);
 754 
 755         xi->i_bp = bp;
 756         xi->i_func = func;
 757         xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
 758 
 759         if (bp->b_bcount == 0) {
 760                 xi->i_len = 0;
 761                 xi->i_nblks = 0;
 762                 xi->i_kaddr = NULL;
 763                 xi->i_resid = 0;
 764                 xi->i_num_win = 0;
 765                 goto done;
 766         }
 767 
 768         if (bp->b_flags & B_READ) {
 769                 dir = DDI_DMA_READ;
 770                 xi->i_func = bd->d_ops.o_read;
 771         } else {
 772                 dir = DDI_DMA_WRITE;
 773                 xi->i_func = bd->d_ops.o_write;
 774         }
 775 
 776         shift = bd->d_blkshift;
 777         xi->i_blkshift = shift;
 778 
 779         if (!bd->d_use_dma) {
 780                 bp_mapin(bp);
 781                 rv = 0;
 782                 xi->i_offset = 0;
 783                 xi->i_num_win =
 784                     (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
 785                 xi->i_cur_win = 0;
 786                 xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
 787                 xi->i_nblks = howmany(xi->i_len, (1U << shift));
 788                 xi->i_kaddr = bp->b_un.b_addr;
 789                 xi->i_resid = bp->b_bcount;
 790         } else {
 791 
 792                 /*
 793                  * We have to use consistent DMA if the address is misaligned.
 794                  */
 795                 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
 796                     ((uintptr_t)bp->b_un.b_addr & 0x7)) {
 797                         dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
 798                 } else {
 799                         dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
 800                 }
 801 
 802                 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
 803                     NULL, &xi->i_dmac, &xi->i_ndmac);
 804                 switch (status) {
 805                 case DDI_DMA_MAPPED:
 806                         xi->i_num_win = 1;
 807                         xi->i_cur_win = 0;
 808                         xi->i_offset = 0;
 809                         xi->i_len = bp->b_bcount;
 810                         xi->i_nblks = howmany(xi->i_len, (1U << shift));
 811                         xi->i_resid = bp->b_bcount;
 812                         rv = 0;
 813                         break;
 814                 case DDI_DMA_PARTIAL_MAP:
 815                         xi->i_cur_win = 0;
 816 
 817                         if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
 818                             DDI_SUCCESS) ||
 819                             (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
 820                             &len, &xi->i_dmac, &xi->i_ndmac) !=
 821                             DDI_SUCCESS) ||
 822                             (P2PHASE(len, (1U << DEV_BSHIFT)) != 0)) {
 823                                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 824                                 rv = EFAULT;
 825                                 goto done;
 826                         }
 827                         xi->i_len = len;
 828                         xi->i_nblks = howmany(xi->i_len, (1U << shift));
 829                         xi->i_resid = bp->b_bcount;
 830                         rv = 0;
 831                         break;
 832                 case DDI_DMA_NORESOURCES:
 833                         rv = EAGAIN;
 834                         goto done;
 835                 case DDI_DMA_TOOBIG:
 836                         rv = EINVAL;
 837                         goto done;
 838                 case DDI_DMA_NOMAPPING:
 839                 case DDI_DMA_INUSE:
 840                 default:
 841                         rv = EFAULT;
 842                         goto done;
 843                 }
 844         }
 845 
 846 done:
 847         if (rv != 0) {
 848                 kmem_cache_free(bd->d_cache, xi);
 849                 bioerror(bp, rv);
 850                 return (NULL);
 851         }
 852 
 853         return (xi);
 854 }
 855 
 856 static void
 857 bd_xfer_free(bd_xfer_impl_t *xi)
 858 {
 859         if (xi->i_dmah) {
 860                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 861         }
 862         kmem_cache_free(xi->i_bd->d_cache, xi);
 863 }
 864 
 865 static int
 866 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
 867 {
 868         dev_t           dev = *devp;
 869         bd_t            *bd;
 870         minor_t         part;
 871         minor_t         inst;
 872         uint64_t        mask;
 873         boolean_t       ndelay;
 874         int             rv;
 875         diskaddr_t      nblks;
 876         diskaddr_t      lba;
 877 
 878         _NOTE(ARGUNUSED(credp));
 879 
 880         part = BDPART(dev);
 881         inst = BDINST(dev);
 882 
 883         if (otyp >= OTYPCNT)
 884                 return (EINVAL);
 885 
 886         ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
 887 
 888         /*
 889          * Block any DR events from changing the set of registered
 890          * devices while we function.
 891          */
 892         rw_enter(&bd_lock, RW_READER);
 893         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
 894                 rw_exit(&bd_lock);
 895                 return (ENXIO);
 896         }
 897 
 898         mutex_enter(&bd->d_ocmutex);
 899 
 900         ASSERT(part < 64);
 901         mask = (1U << part);
 902 
 903         bd_update_state(bd);
 904 
 905         if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
 906 
 907                 /* non-blocking opens are allowed to succeed */
 908                 if (!ndelay) {
 909                         rv = ENXIO;
 910                         goto done;
 911                 }
 912         } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
 913             NULL, NULL, 0) == 0) {
 914 
 915                 /*
 916                  * We read the partinfo, verify valid ranges.  If the
 917                  * partition is invalid, and we aren't blocking or
 918                  * doing a raw access, then fail. (Non-blocking and
 919                  * raw accesses can still succeed to allow a disk with
 920                  * bad partition data to opened by format and fdisk.)
 921                  */
 922                 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
 923                         rv = ENXIO;
 924                         goto done;
 925                 }
 926         } else if (!ndelay) {
 927                 /*
 928                  * cmlb_partinfo failed -- invalid partition or no
 929                  * disk label.
 930                  */
 931                 rv = ENXIO;
 932                 goto done;
 933         }
 934 
 935         if ((flag & FWRITE) && bd->d_rdonly) {
 936                 rv = EROFS;
 937                 goto done;
 938         }
 939 
 940         if ((bd->d_open_excl) & (mask)) {
 941                 rv = EBUSY;
 942                 goto done;
 943         }
 944         if (flag & FEXCL) {
 945                 if (bd->d_open_lyr[part]) {
 946                         rv = EBUSY;
 947                         goto done;
 948                 }
 949                 for (int i = 0; i < OTYP_LYR; i++) {
 950                         if (bd->d_open_reg[i] & mask) {
 951                                 rv = EBUSY;
 952                                 goto done;
 953                         }
 954                 }
 955         }
 956 
 957         if (otyp == OTYP_LYR) {
 958                 bd->d_open_lyr[part]++;
 959         } else {
 960                 bd->d_open_reg[otyp] |= mask;
 961         }
 962         if (flag & FEXCL) {
 963                 bd->d_open_excl |= mask;
 964         }
 965 
 966         rv = 0;
 967 done:
 968         mutex_exit(&bd->d_ocmutex);
 969         rw_exit(&bd_lock);
 970 
 971         return (rv);
 972 }
 973 
 974 static int
 975 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
 976 {
 977         bd_t            *bd;
 978         minor_t         inst;
 979         minor_t         part;
 980         uint64_t        mask;
 981         boolean_t       last = B_TRUE;
 982 
 983         _NOTE(ARGUNUSED(flag));
 984         _NOTE(ARGUNUSED(credp));
 985 
 986         part = BDPART(dev);
 987         inst = BDINST(dev);
 988 
 989         ASSERT(part < 64);
 990         mask = (1U << part);
 991 
 992         rw_enter(&bd_lock, RW_READER);
 993 
 994         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
 995                 rw_exit(&bd_lock);
 996                 return (ENXIO);
 997         }
 998 
 999         mutex_enter(&bd->d_ocmutex);
1000         if (bd->d_open_excl & mask) {
1001                 bd->d_open_excl &= ~mask;
1002         }
1003         if (otyp == OTYP_LYR) {
1004                 bd->d_open_lyr[part]--;
1005         } else {
1006                 bd->d_open_reg[otyp] &= ~mask;
1007         }
1008         for (int i = 0; i < 64; i++) {
1009                 if (bd->d_open_lyr[part]) {
1010                         last = B_FALSE;
1011                 }
1012         }
1013         for (int i = 0; last && (i < OTYP_LYR); i++) {
1014                 if (bd->d_open_reg[i]) {
1015                         last = B_FALSE;
1016                 }
1017         }
1018         mutex_exit(&bd->d_ocmutex);
1019 
1020         if (last) {
1021                 cmlb_invalidate(bd->d_cmlbh, 0);
1022         }
1023         rw_exit(&bd_lock);
1024 
1025         return (0);
1026 }
1027 
1028 static int
1029 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1030 {
1031         minor_t         inst;
1032         minor_t         part;
1033         diskaddr_t      pstart;
1034         diskaddr_t      psize;
1035         bd_t            *bd;
1036         bd_xfer_impl_t  *xi;
1037         buf_t           *bp;
1038         int             rv;
1039         uint32_t        shift;
1040         daddr_t         d_blkno;
1041         int     d_nblk;
1042 
1043         rw_enter(&bd_lock, RW_READER);
1044 
1045         part = BDPART(dev);
1046         inst = BDINST(dev);
1047 
1048         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1049                 rw_exit(&bd_lock);
1050                 return (ENXIO);
1051         }
1052         shift = bd->d_blkshift;
1053         d_blkno = blkno >> (shift - DEV_BSHIFT);
1054         d_nblk = howmany((nblk << DEV_BSHIFT), (1U << shift));
1055         /*
1056          * do cmlb, but do it synchronously unless we already have the
1057          * partition (which we probably should.)
1058          */
1059         if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1060             (void *)1)) {
1061                 rw_exit(&bd_lock);
1062                 return (ENXIO);
1063         }
1064 
1065         if ((d_blkno + d_nblk) > psize) {
1066                 rw_exit(&bd_lock);
1067                 return (EINVAL);
1068         }
1069         bp = getrbuf(KM_NOSLEEP);
1070         if (bp == NULL) {
1071                 rw_exit(&bd_lock);
1072                 return (ENOMEM);
1073         }
1074 
1075         bp->b_bcount = nblk << DEV_BSHIFT;
1076         bp->b_resid = bp->b_bcount;
1077         bp->b_lblkno = blkno;
1078         bp->b_un.b_addr = caddr;
1079 
1080         xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1081         if (xi == NULL) {
1082                 rw_exit(&bd_lock);
1083                 freerbuf(bp);
1084                 return (ENOMEM);
1085         }
1086         xi->i_blkno = d_blkno + pstart;
1087         xi->i_flags = BD_XFER_POLL;
1088         bd_submit(bd, xi);
1089         rw_exit(&bd_lock);
1090 
1091         /*
1092          * Generally, we should have run this entirely synchronously
1093          * at this point and the biowait call should be a no-op.  If
1094          * it didn't happen this way, it's a bug in the underlying
1095          * driver not honoring BD_XFER_POLL.
1096          */
1097         (void) biowait(bp);
1098         rv = geterror(bp);
1099         freerbuf(bp);
1100         return (rv);
1101 }
1102 
1103 void
1104 bd_minphys(struct buf *bp)
1105 {
1106         minor_t inst;
1107         bd_t    *bd;
1108         inst = BDINST(bp->b_edev);
1109 
1110         bd = ddi_get_soft_state(bd_state, inst);
1111 
1112         /*
1113          * In a non-debug kernel, bd_strategy will catch !bd as
1114          * well, and will fail nicely.
1115          */
1116         ASSERT(bd);
1117 
1118         if (bp->b_bcount > bd->d_maxxfer)
1119                 bp->b_bcount = bd->d_maxxfer;
1120 }
1121 
1122 static int
1123 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1124 {
1125         _NOTE(ARGUNUSED(credp));
1126         return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1127 }
1128 
1129 static int
1130 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1131 {
1132         _NOTE(ARGUNUSED(credp));
1133         return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1134 }
1135 
1136 static int
1137 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1138 {
1139         _NOTE(ARGUNUSED(credp));
1140         return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1141 }
1142 
1143 static int
1144 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1145 {
1146         _NOTE(ARGUNUSED(credp));
1147         return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1148 }
1149 
1150 static int
1151 bd_strategy(struct buf *bp)
1152 {
1153         minor_t         inst;
1154         minor_t         part;
1155         bd_t            *bd;
1156         diskaddr_t      p_lba;
1157         diskaddr_t      p_nblks;
1158         diskaddr_t      b_nblks;
1159         bd_xfer_impl_t  *xi;
1160         uint32_t        shift;
1161         int             (*func)(void *, bd_xfer_t *);
1162         diskaddr_t      lblkno;
1163 
1164         part = BDPART(bp->b_edev);
1165         inst = BDINST(bp->b_edev);
1166 
1167         ASSERT(bp);
1168 
1169         bp->b_resid = bp->b_bcount;
1170 
1171         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1172                 bioerror(bp, ENXIO);
1173                 biodone(bp);
1174                 return (0);
1175         }
1176 
1177         if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1178             NULL, NULL, 0)) {
1179                 bioerror(bp, ENXIO);
1180                 biodone(bp);
1181                 return (0);
1182         }
1183 
1184         shift = bd->d_blkshift;
1185         lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
1186         if ((P2PHASE(bp->b_bcount, (1U << DEV_BSHIFT)) != 0) ||
1187             (lblkno > p_nblks)) {
1188                 bioerror(bp, ENXIO);
1189                 biodone(bp);
1190                 return (0);
1191         }
1192         b_nblks = howmany(bp->b_bcount, (1U << shift));
1193         if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1194                 biodone(bp);
1195                 return (0);
1196         }
1197 
1198         if ((b_nblks + lblkno) > p_nblks) {
1199                 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1200                 bp->b_bcount -= bp->b_resid;
1201         } else {
1202                 bp->b_resid = 0;
1203         }
1204         func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1205 
1206         xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1207         if (xi == NULL) {
1208                 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1209         }
1210         if (xi == NULL) {
1211                 /* bd_request_alloc will have done bioerror */
1212                 biodone(bp);
1213                 return (0);
1214         }
1215         xi->i_blkno = lblkno + p_lba;
1216 
1217         bd_submit(bd, xi);
1218 
1219         return (0);
1220 }
1221 
1222 static int
1223 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1224 {
1225         minor_t         inst;
1226         uint16_t        part;
1227         bd_t            *bd;
1228         void            *ptr = (void *)arg;
1229         int             rv;
1230 
1231         part = BDPART(dev);
1232         inst = BDINST(dev);
1233 
1234         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1235                 return (ENXIO);
1236         }
1237 
1238         rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1239         if (rv != ENOTTY)
1240                 return (rv);
1241 
1242         if (rvalp != NULL) {
1243                 /* the return value of the ioctl is 0 by default */
1244                 *rvalp = 0;
1245         }
1246 
1247         switch (cmd) {
1248         case DKIOCGMEDIAINFO: {
1249                 struct dk_minfo minfo;
1250 
1251                 /* make sure our state information is current */
1252                 bd_update_state(bd);
1253                 bzero(&minfo, sizeof (minfo));
1254                 minfo.dki_media_type = DK_FIXED_DISK;
1255                 minfo.dki_lbsize = (1U << bd->d_blkshift);
1256                 minfo.dki_capacity = bd->d_numblks;
1257                 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1258                         return (EFAULT);
1259                 }
1260                 return (0);
1261         }
1262         case DKIOCGMEDIAINFOEXT: {
1263                 struct dk_minfo_ext miext;
1264 
1265                 /* make sure our state information is current */
1266                 bd_update_state(bd);
1267                 bzero(&miext, sizeof (miext));
1268                 miext.dki_media_type = DK_FIXED_DISK;
1269                 miext.dki_lbsize = (1U << bd->d_blkshift);
1270                 miext.dki_pbsize = (1U << bd->d_pblkshift);
1271                 miext.dki_capacity = bd->d_numblks;
1272                 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) {
1273                         return (EFAULT);
1274                 }
1275                 return (0);
1276         }
1277         case DKIOCINFO: {
1278                 struct dk_cinfo cinfo;
1279                 bzero(&cinfo, sizeof (cinfo));
1280                 cinfo.dki_ctype = DKC_BLKDEV;
1281                 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1282                 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1283                     "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1284                 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1285                     "%s", ddi_driver_name(bd->d_dip));
1286                 cinfo.dki_unit = inst;
1287                 cinfo.dki_flags = DKI_FMTVOL;
1288                 cinfo.dki_partition = part;
1289                 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1290                 cinfo.dki_addr = 0;
1291                 cinfo.dki_slave = 0;
1292                 cinfo.dki_space = 0;
1293                 cinfo.dki_prio = 0;
1294                 cinfo.dki_vec = 0;
1295                 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1296                         return (EFAULT);
1297                 }
1298                 return (0);
1299         }
1300         case DKIOCREMOVABLE: {
1301                 int i;
1302                 i = bd->d_removable ? 1 : 0;
1303                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1304                         return (EFAULT);
1305                 }
1306                 return (0);
1307         }
1308         case DKIOCHOTPLUGGABLE: {
1309                 int i;
1310                 i = bd->d_hotpluggable ? 1 : 0;
1311                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1312                         return (EFAULT);
1313                 }
1314                 return (0);
1315         }
1316         case DKIOCREADONLY: {
1317                 int i;
1318                 i = bd->d_rdonly ? 1 : 0;
1319                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1320                         return (EFAULT);
1321                 }
1322                 return (0);
1323         }
1324         case DKIOCSOLIDSTATE: {
1325                 int i;
1326                 i = bd->d_ssd ? 1 : 0;
1327                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1328                         return (EFAULT);
1329                 }
1330                 return (0);
1331         }
1332         case DKIOCSTATE: {
1333                 enum dkio_state state;
1334                 if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1335                         return (EFAULT);
1336                 }
1337                 if ((rv = bd_check_state(bd, &state)) != 0) {
1338                         return (rv);
1339                 }
1340                 if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1341                         return (EFAULT);
1342                 }
1343                 return (0);
1344         }
1345         case DKIOCFLUSHWRITECACHE: {
1346                 struct dk_callback *dkc = NULL;
1347 
1348                 if (flag & FKIOCTL)
1349                         dkc = (void *)arg;
1350 
1351                 rv = bd_flush_write_cache(bd, dkc);
1352                 return (rv);
1353         }
1354 
1355         default:
1356                 break;
1357 
1358         }
1359         return (ENOTTY);
1360 }
1361 
1362 static int
1363 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1364     char *name, caddr_t valuep, int *lengthp)
1365 {
1366         bd_t    *bd;
1367 
1368         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1369         if (bd == NULL)
1370                 return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1371                     name, valuep, lengthp));
1372 
1373         return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1374             valuep, lengthp, BDPART(dev), 0));
1375 }
1376 
1377 
1378 static int
1379 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1380     size_t length, void *tg_cookie)
1381 {
1382         bd_t            *bd;
1383         buf_t           *bp;
1384         bd_xfer_impl_t  *xi;
1385         int             rv;
1386         int             (*func)(void *, bd_xfer_t *);
1387         int             kmflag;
1388 
1389         /*
1390          * If we are running in polled mode (such as during dump(9e)
1391          * execution), then we cannot sleep for kernel allocations.
1392          */
1393         kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1394 
1395         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1396 
1397         if (P2PHASE(length, (1U << DEV_BSHIFT)) != 0) {
1398                 /* We can only transfer whole blocks at a time! */
1399                 return (EINVAL);
1400         }
1401 
1402         if ((bp = getrbuf(kmflag)) == NULL) {
1403                 return (ENOMEM);
1404         }
1405 
1406         switch (cmd) {
1407         case TG_READ:
1408                 bp->b_flags = B_READ;
1409                 func = bd->d_ops.o_read;
1410                 break;
1411         case TG_WRITE:
1412                 bp->b_flags = B_WRITE;
1413                 func = bd->d_ops.o_write;
1414                 break;
1415         default:
1416                 freerbuf(bp);
1417                 return (EINVAL);
1418         }
1419 
1420         bp->b_un.b_addr = bufaddr;
1421         bp->b_bcount = length;
1422         xi = bd_xfer_alloc(bd, bp, func, kmflag);
1423         if (xi == NULL) {
1424                 rv = geterror(bp);
1425                 freerbuf(bp);
1426                 return (rv);
1427         }
1428         xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1429         xi->i_blkno = start;
1430         bd_submit(bd, xi);
1431         (void) biowait(bp);
1432         rv = geterror(bp);
1433         freerbuf(bp);
1434 
1435         return (rv);
1436 }
1437 
1438 static int
1439 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1440 {
1441         bd_t            *bd;
1442 
1443         _NOTE(ARGUNUSED(tg_cookie));
1444         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1445 
1446         switch (cmd) {
1447         case TG_GETPHYGEOM:
1448         case TG_GETVIRTGEOM:
1449                 /*
1450                  * We don't have any "geometry" as such, let cmlb
1451                  * fabricate something.
1452                  */
1453                 return (ENOTTY);
1454 
1455         case TG_GETCAPACITY:
1456                 bd_update_state(bd);
1457                 *(diskaddr_t *)arg = bd->d_numblks;
1458                 return (0);
1459 
1460         case TG_GETBLOCKSIZE:
1461                 *(uint32_t *)arg = (1U << bd->d_blkshift);
1462                 return (0);
1463 
1464         case TG_GETATTR:
1465                 /*
1466                  * It turns out that cmlb really doesn't do much for
1467                  * non-writable media, but lets make the information
1468                  * available for it in case it does more in the
1469                  * future.  (The value is currently used for
1470                  * triggering special behavior for CD-ROMs.)
1471                  */
1472                 bd_update_state(bd);
1473                 ((tg_attribute_t *)arg)->media_is_writable =
1474                     bd->d_rdonly ? B_FALSE : B_TRUE;
1475                 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1476                 return (0);
1477 
1478         default:
1479                 return (EINVAL);
1480         }
1481 }
1482 
1483 
1484 static void
1485 bd_sched(bd_t *bd)
1486 {
1487         bd_xfer_impl_t  *xi;
1488         struct buf      *bp;
1489         int             rv;
1490 
1491         mutex_enter(&bd->d_iomutex);
1492 
1493         while ((bd->d_qactive < bd->d_qsize) &&
1494             ((xi = list_remove_head(&bd->d_waitq)) != NULL)) {
1495                 bd->d_qactive++;
1496                 kstat_waitq_to_runq(bd->d_kiop);
1497                 list_insert_tail(&bd->d_runq, xi);
1498 
1499                 /*
1500                  * Submit the job to the driver.  We drop the I/O mutex
1501                  * so that we can deal with the case where the driver
1502                  * completion routine calls back into us synchronously.
1503                  */
1504 
1505                 mutex_exit(&bd->d_iomutex);
1506 
1507                 rv = xi->i_func(bd->d_private, &xi->i_public);
1508                 if (rv != 0) {
1509                         bp = xi->i_bp;
1510                         bioerror(bp, rv);
1511                         biodone(bp);
1512 
1513                         atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1514 
1515                         mutex_enter(&bd->d_iomutex);
1516                         bd->d_qactive--;
1517                         kstat_runq_exit(bd->d_kiop);
1518                         list_remove(&bd->d_runq, xi);
1519                         bd_xfer_free(xi);
1520                 } else {
1521                         mutex_enter(&bd->d_iomutex);
1522                 }
1523         }
1524 
1525         mutex_exit(&bd->d_iomutex);
1526 }
1527 
1528 static void
1529 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1530 {
1531         mutex_enter(&bd->d_iomutex);
1532         list_insert_tail(&bd->d_waitq, xi);
1533         kstat_waitq_enter(bd->d_kiop);
1534         mutex_exit(&bd->d_iomutex);
1535 
1536         bd_sched(bd);
1537 }
1538 
1539 static void
1540 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1541 {
1542         bd_t    *bd = xi->i_bd;
1543         buf_t   *bp = xi->i_bp;
1544 
1545         mutex_enter(&bd->d_iomutex);
1546         bd->d_qactive--;
1547         kstat_runq_exit(bd->d_kiop);
1548         list_remove(&bd->d_runq, xi);
1549         mutex_exit(&bd->d_iomutex);
1550 
1551         if (err == 0) {
1552                 if (bp->b_flags & B_READ) {
1553                         bd->d_kiop->reads++;
1554                         bd->d_kiop->nread += (bp->b_bcount - xi->i_resid);
1555                 } else {
1556                         bd->d_kiop->writes++;
1557                         bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid);
1558                 }
1559         }
1560         bd_sched(bd);
1561 }
1562 
1563 static void
1564 bd_update_state(bd_t *bd)
1565 {
1566         enum    dkio_state      state = DKIO_INSERTED;
1567         boolean_t               docmlb = B_FALSE;
1568         bd_media_t              media;
1569 
1570         bzero(&media, sizeof (media));
1571 
1572         mutex_enter(&bd->d_statemutex);
1573         if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
1574                 bd->d_numblks = 0;
1575                 state = DKIO_EJECTED;
1576                 goto done;
1577         }
1578 
1579         if ((media.m_blksize < 512) ||
1580             (!ISP2(media.m_blksize)) ||
1581             (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
1582                 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)",
1583                     ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip),
1584                     media.m_blksize);
1585                 /*
1586                  * We can't use the media, treat it as not present.
1587                  */
1588                 state = DKIO_EJECTED;
1589                 bd->d_numblks = 0;
1590                 goto done;
1591         }
1592 
1593         if (((1U << bd->d_blkshift) != media.m_blksize) ||
1594             (bd->d_numblks != media.m_nblks)) {
1595                 /* Device size changed */
1596                 docmlb = B_TRUE;
1597         }
1598 
1599         bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
1600         bd->d_pblkshift = bd->d_blkshift;
1601         bd->d_numblks = media.m_nblks;
1602         bd->d_rdonly = media.m_readonly;
1603         bd->d_ssd = media.m_solidstate;
1604 
1605         /*
1606          * Only use the supplied physical block size if it is non-zero,
1607          * greater or equal to the block size, and a power of 2. Ignore it
1608          * if not, it's just informational and we can still use the media.
1609          */
1610         if ((media.m_pblksize != 0) &&
1611             (media.m_pblksize >= media.m_blksize) &&
1612             (ISP2(media.m_pblksize)))
1613                 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
1614 
1615 done:
1616         if (state != bd->d_state) {
1617                 bd->d_state = state;
1618                 cv_broadcast(&bd->d_statecv);
1619                 docmlb = B_TRUE;
1620         }
1621         mutex_exit(&bd->d_statemutex);
1622 
1623         bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
1624 
1625         if (docmlb) {
1626                 if (state == DKIO_INSERTED) {
1627                         (void) cmlb_validate(bd->d_cmlbh, 0, 0);
1628                 } else {
1629                         cmlb_invalidate(bd->d_cmlbh, 0);
1630                 }
1631         }
1632 }
1633 
1634 static int
1635 bd_check_state(bd_t *bd, enum dkio_state *state)
1636 {
1637         clock_t         when;
1638 
1639         for (;;) {
1640 
1641                 bd_update_state(bd);
1642 
1643                 mutex_enter(&bd->d_statemutex);
1644 
1645                 if (bd->d_state != *state) {
1646                         *state = bd->d_state;
1647                         mutex_exit(&bd->d_statemutex);
1648                         break;
1649                 }
1650 
1651                 when = drv_usectohz(1000000);
1652                 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
1653                     when, TR_CLOCK_TICK) == 0) {
1654                         mutex_exit(&bd->d_statemutex);
1655                         return (EINTR);
1656                 }
1657 
1658                 mutex_exit(&bd->d_statemutex);
1659         }
1660 
1661         return (0);
1662 }
1663 
1664 static int
1665 bd_flush_write_cache_done(struct buf *bp)
1666 {
1667         struct dk_callback *dc = (void *)bp->b_private;
1668 
1669         (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
1670         kmem_free(dc, sizeof (*dc));
1671         freerbuf(bp);
1672         return (0);
1673 }
1674 
1675 static int
1676 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
1677 {
1678         buf_t                   *bp;
1679         struct dk_callback      *dc;
1680         bd_xfer_impl_t          *xi;
1681         int                     rv;
1682 
1683         if (bd->d_ops.o_sync_cache == NULL) {
1684                 return (ENOTSUP);
1685         }
1686         if ((bp = getrbuf(KM_SLEEP)) == NULL) {
1687                 return (ENOMEM);
1688         }
1689         bp->b_resid = 0;
1690         bp->b_bcount = 0;
1691 
1692         xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
1693         if (xi == NULL) {
1694                 rv = geterror(bp);
1695                 freerbuf(bp);
1696                 return (rv);
1697         }
1698 
1699         /* Make an asynchronous flush, but only if there is a callback */
1700         if (dkc != NULL && dkc->dkc_callback != NULL) {
1701                 /* Make a private copy of the callback structure */
1702                 dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
1703                 *dc = *dkc;
1704                 bp->b_private = dc;
1705                 bp->b_iodone = bd_flush_write_cache_done;
1706 
1707                 bd_submit(bd, xi);
1708                 return (0);
1709         }
1710 
1711         /* In case there is no callback, perform a synchronous flush */
1712         bd_submit(bd, xi);
1713         (void) biowait(bp);
1714         rv = geterror(bp);
1715         freerbuf(bp);
1716 
1717         return (rv);
1718 }
1719 
1720 /*
1721  * Nexus support.
1722  */
1723 int
1724 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1725     void *arg, void *result)
1726 {
1727         bd_handle_t     hdl;
1728 
1729         switch (ctlop) {
1730         case DDI_CTLOPS_REPORTDEV:
1731                 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
1732                     ddi_node_name(rdip), ddi_get_name_addr(rdip),
1733                     ddi_driver_name(rdip), ddi_get_instance(rdip));
1734                 return (DDI_SUCCESS);
1735 
1736         case DDI_CTLOPS_INITCHILD:
1737                 hdl = ddi_get_parent_data((dev_info_t *)arg);
1738                 if (hdl == NULL) {
1739                         return (DDI_NOT_WELL_FORMED);
1740                 }
1741                 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
1742                 return (DDI_SUCCESS);
1743 
1744         case DDI_CTLOPS_UNINITCHILD:
1745                 ddi_set_name_addr((dev_info_t *)arg, NULL);
1746                 ndi_prop_remove_all((dev_info_t *)arg);
1747                 return (DDI_SUCCESS);
1748 
1749         default:
1750                 return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1751         }
1752 }
1753 
1754 /*
1755  * Functions for device drivers.
1756  */
1757 bd_handle_t
1758 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
1759 {
1760         bd_handle_t     hdl;
1761 
1762         hdl = kmem_zalloc(sizeof (*hdl), kmflag);
1763         if (hdl != NULL) {
1764                 hdl->h_ops = *ops;
1765                 hdl->h_dma = dma;
1766                 hdl->h_private = private;
1767         }
1768 
1769         return (hdl);
1770 }
1771 
1772 void
1773 bd_free_handle(bd_handle_t hdl)
1774 {
1775         kmem_free(hdl, sizeof (*hdl));
1776 }
1777 
1778 int
1779 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
1780 {
1781         dev_info_t      *child;
1782         bd_drive_t      drive = { 0 };
1783 
1784         /* if drivers don't override this, make it assume none */
1785         drive.d_lun = -1;
1786         hdl->h_ops.o_drive_info(hdl->h_private, &drive);
1787 
1788         hdl->h_parent = dip;
1789         hdl->h_name = "blkdev";
1790 
1791         /*LINTED: E_BAD_PTR_CAST_ALIGN*/
1792         if (*(uint64_t *)drive.d_eui64 != 0) {
1793                 if (drive.d_lun >= 0) {
1794                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1795                             "w%02X%02X%02X%02X%02X%02X%02X%02X,%X",
1796                             drive.d_eui64[0], drive.d_eui64[1],
1797                             drive.d_eui64[2], drive.d_eui64[3],
1798                             drive.d_eui64[4], drive.d_eui64[5],
1799                             drive.d_eui64[6], drive.d_eui64[7], drive.d_lun);
1800                 } else {
1801                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1802                             "w%02X%02X%02X%02X%02X%02X%02X%02X",
1803                             drive.d_eui64[0], drive.d_eui64[1],
1804                             drive.d_eui64[2], drive.d_eui64[3],
1805                             drive.d_eui64[4], drive.d_eui64[5],
1806                             drive.d_eui64[6], drive.d_eui64[7]);
1807                 }
1808         } else {
1809                 if (drive.d_lun >= 0) {
1810                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1811                             "%X,%X", drive.d_target, drive.d_lun);
1812                 } else {
1813                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1814                             "%X", drive.d_target);
1815                 }
1816         }
1817 
1818         if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
1819             &child) != NDI_SUCCESS) {
1820                 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
1821                     ddi_driver_name(dip), ddi_get_instance(dip),
1822                     "blkdev", hdl->h_addr);
1823                 return (DDI_FAILURE);
1824         }
1825 
1826         ddi_set_parent_data(child, hdl);
1827         hdl->h_child = child;
1828 
1829         if (ndi_devi_online(child, 0) == NDI_FAILURE) {
1830                 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
1831                     ddi_driver_name(dip), ddi_get_instance(dip),
1832                     hdl->h_name, hdl->h_addr);
1833                 (void) ndi_devi_free(child);
1834                 return (DDI_FAILURE);
1835         }
1836 
1837         return (DDI_SUCCESS);
1838 }
1839 
1840 int
1841 bd_detach_handle(bd_handle_t hdl)
1842 {
1843         int     circ;
1844         int     rv;
1845         char    *devnm;
1846 
1847         if (hdl->h_child == NULL) {
1848                 return (DDI_SUCCESS);
1849         }
1850         ndi_devi_enter(hdl->h_parent, &circ);
1851         if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
1852                 rv = ddi_remove_child(hdl->h_child, 0);
1853         } else {
1854                 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
1855                 (void) ddi_deviname(hdl->h_child, devnm);
1856                 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
1857                 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
1858                     NDI_DEVI_REMOVE | NDI_UNCONFIG);
1859                 kmem_free(devnm, MAXNAMELEN + 1);
1860         }
1861         if (rv == 0) {
1862                 hdl->h_child = NULL;
1863         }
1864 
1865         ndi_devi_exit(hdl->h_parent, circ);
1866         return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
1867 }
1868 
1869 void
1870 bd_xfer_done(bd_xfer_t *xfer, int err)
1871 {
1872         bd_xfer_impl_t  *xi = (void *)xfer;
1873         buf_t           *bp = xi->i_bp;
1874         int             rv = DDI_SUCCESS;
1875         bd_t            *bd = xi->i_bd;
1876         size_t          len;
1877 
1878         if (err != 0) {
1879                 bd_runq_exit(xi, err);
1880                 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
1881 
1882                 bp->b_resid += xi->i_resid;
1883                 bd_xfer_free(xi);
1884                 bioerror(bp, err);
1885                 biodone(bp);
1886                 return;
1887         }
1888 
1889         xi->i_cur_win++;
1890         xi->i_resid -= xi->i_len;
1891 
1892         if (xi->i_resid == 0) {
1893                 /* Job completed succcessfully! */
1894                 bd_runq_exit(xi, 0);
1895 
1896                 bd_xfer_free(xi);
1897                 biodone(bp);
1898                 return;
1899         }
1900 
1901         xi->i_blkno += xi->i_nblks;
1902 
1903         if (bd->d_use_dma) {
1904                 /* More transfer still pending... advance to next DMA window. */
1905                 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1906                     &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1907         } else {
1908                 /* Advance memory window. */
1909                 xi->i_kaddr += xi->i_len;
1910                 xi->i_offset += xi->i_len;
1911                 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1912         }
1913 
1914 
1915         if ((rv != DDI_SUCCESS) ||
1916             (P2PHASE(len, (1U << DEV_BSHIFT) != 0))) {
1917                 bd_runq_exit(xi, EFAULT);
1918 
1919                 bp->b_resid += xi->i_resid;
1920                 bd_xfer_free(xi);
1921                 bioerror(bp, EFAULT);
1922                 biodone(bp);
1923                 return;
1924         }
1925         xi->i_len = len;
1926         xi->i_nblks = howmany(len, (1U << xi->i_blkshift));
1927 
1928         /* Submit next window to hardware. */
1929         rv = xi->i_func(bd->d_private, &xi->i_public);
1930         if (rv != 0) {
1931                 bd_runq_exit(xi, rv);
1932 
1933                 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1934 
1935                 bp->b_resid += xi->i_resid;
1936                 bd_xfer_free(xi);
1937                 bioerror(bp, rv);
1938                 biodone(bp);
1939         }
1940 }
1941 
1942 void
1943 bd_error(bd_xfer_t *xfer, int error)
1944 {
1945         bd_xfer_impl_t  *xi = (void *)xfer;
1946         bd_t            *bd = xi->i_bd;
1947 
1948         switch (error) {
1949         case BD_ERR_MEDIA:
1950                 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
1951                 break;
1952         case BD_ERR_NTRDY:
1953                 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
1954                 break;
1955         case BD_ERR_NODEV:
1956                 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
1957                 break;
1958         case BD_ERR_RECOV:
1959                 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
1960                 break;
1961         case BD_ERR_ILLRQ:
1962                 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
1963                 break;
1964         case BD_ERR_PFA:
1965                 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
1966                 break;
1967         default:
1968                 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
1969                 break;
1970         }
1971 }
1972 
1973 void
1974 bd_state_change(bd_handle_t hdl)
1975 {
1976         bd_t            *bd;
1977 
1978         if ((bd = hdl->h_bd) != NULL) {
1979                 bd_update_state(bd);
1980         }
1981 }
1982 
1983 void
1984 bd_mod_init(struct dev_ops *devops)
1985 {
1986         static struct bus_ops bd_bus_ops = {
1987                 BUSO_REV,               /* busops_rev */
1988                 nullbusmap,             /* bus_map */
1989                 NULL,                   /* bus_get_intrspec (OBSOLETE) */
1990                 NULL,                   /* bus_add_intrspec (OBSOLETE) */
1991                 NULL,                   /* bus_remove_intrspec (OBSOLETE) */
1992                 i_ddi_map_fault,        /* bus_map_fault */
1993                 NULL,                   /* bus_dma_map (OBSOLETE) */
1994                 ddi_dma_allochdl,       /* bus_dma_allochdl */
1995                 ddi_dma_freehdl,        /* bus_dma_freehdl */
1996                 ddi_dma_bindhdl,        /* bus_dma_bindhdl */
1997                 ddi_dma_unbindhdl,      /* bus_dma_unbindhdl */
1998                 ddi_dma_flush,          /* bus_dma_flush */
1999                 ddi_dma_win,            /* bus_dma_win */
2000                 ddi_dma_mctl,           /* bus_dma_ctl */
2001                 bd_bus_ctl,             /* bus_ctl */
2002                 ddi_bus_prop_op,        /* bus_prop_op */
2003                 NULL,                   /* bus_get_eventcookie */
2004                 NULL,                   /* bus_add_eventcall */
2005                 NULL,                   /* bus_remove_eventcall */
2006                 NULL,                   /* bus_post_event */
2007                 NULL,                   /* bus_intr_ctl (OBSOLETE) */
2008                 NULL,                   /* bus_config */
2009                 NULL,                   /* bus_unconfig */
2010                 NULL,                   /* bus_fm_init */
2011                 NULL,                   /* bus_fm_fini */
2012                 NULL,                   /* bus_fm_access_enter */
2013                 NULL,                   /* bus_fm_access_exit */
2014                 NULL,                   /* bus_power */
2015                 NULL,                   /* bus_intr_op */
2016         };
2017 
2018         devops->devo_bus_ops = &bd_bus_ops;
2019 
2020         /*
2021          * NB: The device driver is free to supply its own
2022          * character entry device support.
2023          */
2024 }
2025 
2026 void
2027 bd_mod_fini(struct dev_ops *devops)
2028 {
2029         devops->devo_bus_ops = NULL;
2030 }