1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  26  */
  27 
  28 #include <sys/types.h>
  29 #include <sys/ksynch.h>
  30 #include <sys/kmem.h>
  31 #include <sys/file.h>
  32 #include <sys/errno.h>
  33 #include <sys/open.h>
  34 #include <sys/buf.h>
  35 #include <sys/uio.h>
  36 #include <sys/aio_req.h>
  37 #include <sys/cred.h>
  38 #include <sys/modctl.h>
  39 #include <sys/cmlb.h>
  40 #include <sys/conf.h>
  41 #include <sys/devops.h>
  42 #include <sys/list.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/dkio.h>
  45 #include <sys/vtoc.h>
  46 #include <sys/scsi/scsi.h>        /* for DTYPE_DIRECT */
  47 #include <sys/kstat.h>
  48 #include <sys/fs/dv_node.h>
  49 #include <sys/ddi.h>
  50 #include <sys/sunddi.h>
  51 #include <sys/note.h>
  52 #include <sys/blkdev.h>
  53 #include <sys/scsi/impl/inquiry.h>
  54 
  55 #define BD_MAXPART      64
  56 #define BDINST(dev)     (getminor(dev) / BD_MAXPART)
  57 #define BDPART(dev)     (getminor(dev) % BD_MAXPART)
  58 
  59 typedef struct bd bd_t;
  60 typedef struct bd_xfer_impl bd_xfer_impl_t;
  61 
  62 struct bd {
  63         void            *d_private;
  64         dev_info_t      *d_dip;
  65         kmutex_t        d_ocmutex;
  66         kmutex_t        d_iomutex;
  67         kmutex_t        *d_errmutex;
  68         kmutex_t        d_statemutex;
  69         kcondvar_t      d_statecv;
  70         enum dkio_state d_state;
  71         cmlb_handle_t   d_cmlbh;
  72         unsigned        d_open_lyr[BD_MAXPART]; /* open count */
  73         uint64_t        d_open_excl;    /* bit mask indexed by partition */
  74         uint64_t        d_open_reg[OTYPCNT];            /* bit mask */
  75 
  76         uint32_t        d_qsize;
  77         uint32_t        d_qactive;
  78         uint32_t        d_maxxfer;
  79         uint32_t        d_blkshift;
  80         uint32_t        d_pblkshift;
  81         uint64_t        d_numblks;
  82         ddi_devid_t     d_devid;
  83 
  84         kmem_cache_t    *d_cache;
  85         list_t          d_runq;
  86         list_t          d_waitq;
  87         kstat_t         *d_ksp;
  88         kstat_io_t      *d_kiop;
  89         kstat_t         *d_errstats;
  90         struct bd_errstats *d_kerr;
  91 
  92         boolean_t       d_rdonly;
  93         boolean_t       d_ssd;
  94         boolean_t       d_removable;
  95         boolean_t       d_hotpluggable;
  96         boolean_t       d_use_dma;
  97 
  98         ddi_dma_attr_t  d_dma;
  99         bd_ops_t        d_ops;
 100         bd_handle_t     d_handle;
 101 };
 102 
 103 struct bd_handle {
 104         bd_ops_t        h_ops;
 105         ddi_dma_attr_t  *h_dma;
 106         dev_info_t      *h_parent;
 107         dev_info_t      *h_child;
 108         void            *h_private;
 109         bd_t            *h_bd;
 110         char            *h_name;
 111         char            h_addr[30];     /* enough for w%0.16x,%X */
 112 };
 113 
 114 struct bd_xfer_impl {
 115         bd_xfer_t       i_public;
 116         list_node_t     i_linkage;
 117         bd_t            *i_bd;
 118         buf_t           *i_bp;
 119         uint_t          i_num_win;
 120         uint_t          i_cur_win;
 121         off_t           i_offset;
 122         int             (*i_func)(void *, bd_xfer_t *);
 123         uint32_t        i_blkshift;
 124         size_t          i_len;
 125         size_t          i_resid;
 126 };
 127 
 128 #define i_dmah          i_public.x_dmah
 129 #define i_dmac          i_public.x_dmac
 130 #define i_ndmac         i_public.x_ndmac
 131 #define i_kaddr         i_public.x_kaddr
 132 #define i_nblks         i_public.x_nblks
 133 #define i_blkno         i_public.x_blkno
 134 #define i_flags         i_public.x_flags
 135 
 136 
 137 /*
 138  * Private prototypes.
 139  */
 140 
 141 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
 142 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
 143 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
 144 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
 145 static void bd_init_errstats(bd_t *, bd_drive_t *);
 146 
 147 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
 148 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
 149 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
 150 
 151 static int bd_open(dev_t *, int, int, cred_t *);
 152 static int bd_close(dev_t, int, int, cred_t *);
 153 static int bd_strategy(struct buf *);
 154 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 155 static int bd_dump(dev_t, caddr_t, daddr_t, int);
 156 static int bd_read(dev_t, struct uio *, cred_t *);
 157 static int bd_write(dev_t, struct uio *, cred_t *);
 158 static int bd_aread(dev_t, struct aio_req *, cred_t *);
 159 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
 160 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
 161     caddr_t, int *);
 162 
 163 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
 164     void *);
 165 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
 166 static int bd_xfer_ctor(void *, void *, int);
 167 static void bd_xfer_dtor(void *, void *);
 168 static void bd_sched(bd_t *);
 169 static void bd_submit(bd_t *, bd_xfer_impl_t *);
 170 static void bd_runq_exit(bd_xfer_impl_t *, int);
 171 static void bd_update_state(bd_t *);
 172 static int bd_check_state(bd_t *, enum dkio_state *);
 173 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
 174 
 175 struct cmlb_tg_ops bd_tg_ops = {
 176         TG_DK_OPS_VERSION_1,
 177         bd_tg_rdwr,
 178         bd_tg_getinfo,
 179 };
 180 
 181 static struct cb_ops bd_cb_ops = {
 182         bd_open,                /* open */
 183         bd_close,               /* close */
 184         bd_strategy,            /* strategy */
 185         nodev,                  /* print */
 186         bd_dump,                /* dump */
 187         bd_read,                /* read */
 188         bd_write,               /* write */
 189         bd_ioctl,               /* ioctl */
 190         nodev,                  /* devmap */
 191         nodev,                  /* mmap */
 192         nodev,                  /* segmap */
 193         nochpoll,               /* poll */
 194         bd_prop_op,             /* cb_prop_op */
 195         0,                      /* streamtab  */
 196         D_64BIT | D_MP,         /* Driver comaptibility flag */
 197         CB_REV,                 /* cb_rev */
 198         bd_aread,               /* async read */
 199         bd_awrite               /* async write */
 200 };
 201 
 202 struct dev_ops bd_dev_ops = {
 203         DEVO_REV,               /* devo_rev, */
 204         0,                      /* refcnt  */
 205         bd_getinfo,             /* getinfo */
 206         nulldev,                /* identify */
 207         nulldev,                /* probe */
 208         bd_attach,              /* attach */
 209         bd_detach,              /* detach */
 210         nodev,                  /* reset */
 211         &bd_cb_ops,                 /* driver operations */
 212         NULL,                   /* bus operations */
 213         NULL,                   /* power */
 214         ddi_quiesce_not_needed, /* quiesce */
 215 };
 216 
 217 static struct modldrv modldrv = {
 218         &mod_driverops,
 219         "Generic Block Device",
 220         &bd_dev_ops,
 221 };
 222 
 223 static struct modlinkage modlinkage = {
 224         MODREV_1, { &modldrv, NULL }
 225 };
 226 
 227 static void *bd_state;
 228 static krwlock_t bd_lock;
 229 
 230 int
 231 _init(void)
 232 {
 233         int     rv;
 234 
 235         rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
 236         if (rv != DDI_SUCCESS) {
 237                 return (rv);
 238         }
 239         rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
 240         rv = mod_install(&modlinkage);
 241         if (rv != DDI_SUCCESS) {
 242                 rw_destroy(&bd_lock);
 243                 ddi_soft_state_fini(&bd_state);
 244         }
 245         return (rv);
 246 }
 247 
 248 int
 249 _fini(void)
 250 {
 251         int     rv;
 252 
 253         rv = mod_remove(&modlinkage);
 254         if (rv == DDI_SUCCESS) {
 255                 rw_destroy(&bd_lock);
 256                 ddi_soft_state_fini(&bd_state);
 257         }
 258         return (rv);
 259 }
 260 
 261 int
 262 _info(struct modinfo *modinfop)
 263 {
 264         return (mod_info(&modlinkage, modinfop));
 265 }
 266 
 267 static int
 268 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
 269 {
 270         bd_t    *bd;
 271         minor_t inst;
 272 
 273         _NOTE(ARGUNUSED(dip));
 274 
 275         inst = BDINST((dev_t)arg);
 276 
 277         switch (cmd) {
 278         case DDI_INFO_DEVT2DEVINFO:
 279                 bd = ddi_get_soft_state(bd_state, inst);
 280                 if (bd == NULL) {
 281                         return (DDI_FAILURE);
 282                 }
 283                 *resultp = (void *)bd->d_dip;
 284                 break;
 285 
 286         case DDI_INFO_DEVT2INSTANCE:
 287                 *resultp = (void *)(intptr_t)inst;
 288                 break;
 289 
 290         default:
 291                 return (DDI_FAILURE);
 292         }
 293         return (DDI_SUCCESS);
 294 }
 295 
 296 static void
 297 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
 298 {
 299         int     ilen;
 300         char    *data_string;
 301 
 302         ilen = scsi_ascii_inquiry_len(data, len);
 303         ASSERT3U(ilen, <=, len);
 304         if (ilen <= 0)
 305                 return;
 306         /* ensure null termination */
 307         data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
 308         bcopy(data, data_string, ilen);
 309         (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
 310         kmem_free(data_string, ilen + 1);
 311 }
 312 
 313 static void
 314 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
 315 {
 316         if (drive->d_vendor_len > 0)
 317                 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
 318                     drive->d_vendor, drive->d_vendor_len);
 319 
 320         if (drive->d_product_len > 0)
 321                 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
 322                     drive->d_product, drive->d_product_len);
 323 
 324         if (drive->d_serial_len > 0)
 325                 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
 326                     drive->d_serial, drive->d_serial_len);
 327 
 328         if (drive->d_revision_len > 0)
 329                 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
 330                     drive->d_revision, drive->d_revision_len);
 331 }
 332 
 333 static void
 334 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
 335 {
 336         char    ks_module[KSTAT_STRLEN];
 337         char    ks_name[KSTAT_STRLEN];
 338         int     ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
 339 
 340         if (bd->d_errstats != NULL)
 341                 return;
 342 
 343         (void) snprintf(ks_module, sizeof (ks_module), "%serr",
 344             ddi_driver_name(bd->d_dip));
 345         (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
 346             ddi_driver_name(bd->d_dip), inst);
 347 
 348         bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
 349             KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
 350 
 351         if (bd->d_errstats == NULL) {
 352                 /*
 353                  * Even if we cannot create the kstat, we create a
 354                  * scratch kstat.  The reason for this is to ensure
 355                  * that we can update the kstat all of the time,
 356                  * without adding an extra branch instruction.
 357                  */
 358                 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
 359                     KM_SLEEP);
 360                 bd->d_errmutex = kmem_zalloc(sizeof (kmutex_t), KM_SLEEP);
 361                 mutex_init(bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
 362         } else {
 363                 if (bd->d_errstats->ks_lock == NULL) {
 364                         bd->d_errstats->ks_lock = kmem_zalloc(sizeof (kmutex_t),
 365                             KM_SLEEP);
 366                         mutex_init(bd->d_errstats->ks_lock, NULL, MUTEX_DRIVER,
 367                             NULL);
 368                 }
 369 
 370                 bd->d_errmutex = bd->d_errstats->ks_lock;
 371                 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
 372         }
 373 
 374         kstat_named_init(&bd->d_kerr->bd_softerrs,    "Soft Errors",
 375             KSTAT_DATA_UINT32);
 376         kstat_named_init(&bd->d_kerr->bd_harderrs,    "Hard Errors",
 377             KSTAT_DATA_UINT32);
 378         kstat_named_init(&bd->d_kerr->bd_transerrs,   "Transport Errors",
 379             KSTAT_DATA_UINT32);
 380 
 381         if (drive->d_model_len > 0) {
 382                 kstat_named_init(&bd->d_kerr->bd_model,       "Model",
 383                     KSTAT_DATA_STRING);
 384         } else {
 385                 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor",
 386                     KSTAT_DATA_STRING);
 387                 kstat_named_init(&bd->d_kerr->bd_pid, "Product",
 388                     KSTAT_DATA_STRING);
 389         }
 390 
 391         kstat_named_init(&bd->d_kerr->bd_revision,    "Revision",
 392             KSTAT_DATA_STRING);
 393         kstat_named_init(&bd->d_kerr->bd_serial,      "Serial No",
 394             KSTAT_DATA_STRING);
 395         kstat_named_init(&bd->d_kerr->bd_capacity,    "Size",
 396             KSTAT_DATA_ULONGLONG);
 397         kstat_named_init(&bd->d_kerr->bd_rq_media_err,        "Media Error",
 398             KSTAT_DATA_UINT32);
 399         kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err,        "Device Not Ready",
 400             KSTAT_DATA_UINT32);
 401         kstat_named_init(&bd->d_kerr->bd_rq_nodev_err,        "No Device",
 402             KSTAT_DATA_UINT32);
 403         kstat_named_init(&bd->d_kerr->bd_rq_recov_err,        "Recoverable",
 404             KSTAT_DATA_UINT32);
 405         kstat_named_init(&bd->d_kerr->bd_rq_illrq_err,        "Illegal Request",
 406             KSTAT_DATA_UINT32);
 407         kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
 408             "Predictive Failure Analysis", KSTAT_DATA_UINT32);
 409 
 410         bd->d_errstats->ks_private = bd;
 411 
 412         kstat_install(bd->d_errstats);
 413 }
 414 
 415 static void
 416 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
 417 {
 418         char    *tmp;
 419 
 420         if (KSTAT_NAMED_STR_PTR(k) == NULL) {
 421                 if (len > 0) {
 422                         tmp = kmem_alloc(len + 1, KM_SLEEP);
 423                         (void) strlcpy(tmp, str, len + 1);
 424                 } else {
 425                         tmp = alt;
 426                 }
 427 
 428                 kstat_named_setstr(k, tmp);
 429         }
 430 }
 431 
 432 static void
 433 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
 434 {
 435         struct bd_errstats      *est = bd->d_kerr;
 436 
 437         mutex_enter(bd->d_errmutex);
 438 
 439         if (drive->d_model_len > 0 &&
 440             KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
 441                 bd_errstats_setstr(&est->bd_model, drive->d_model,
 442                     drive->d_model_len, NULL);
 443         } else {
 444                 bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
 445                     drive->d_vendor_len, "Unknown ");
 446                 bd_errstats_setstr(&est->bd_pid, drive->d_product,
 447                     drive->d_product_len, "Unknown         ");
 448         }
 449 
 450         bd_errstats_setstr(&est->bd_revision, drive->d_revision,
 451             drive->d_revision_len, "0001");
 452         bd_errstats_setstr(&est->bd_serial, drive->d_serial,
 453             drive->d_serial_len, "0               ");
 454 
 455         mutex_exit(bd->d_errmutex);
 456 }
 457 
 458 static int
 459 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 460 {
 461         int             inst;
 462         bd_handle_t     hdl;
 463         bd_t            *bd;
 464         bd_drive_t      drive;
 465         int             rv;
 466         char            name[16];
 467         char            kcache[32];
 468 
 469         switch (cmd) {
 470         case DDI_ATTACH:
 471                 break;
 472         case DDI_RESUME:
 473                 /* We don't do anything native for suspend/resume */
 474                 return (DDI_SUCCESS);
 475         default:
 476                 return (DDI_FAILURE);
 477         }
 478 
 479         inst = ddi_get_instance(dip);
 480         hdl = ddi_get_parent_data(dip);
 481 
 482         (void) snprintf(name, sizeof (name), "%s%d",
 483             ddi_driver_name(dip), ddi_get_instance(dip));
 484         (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
 485 
 486         if (hdl == NULL) {
 487                 cmn_err(CE_WARN, "%s: missing parent data!", name);
 488                 return (DDI_FAILURE);
 489         }
 490 
 491         if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
 492                 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
 493                 return (DDI_FAILURE);
 494         }
 495         bd = ddi_get_soft_state(bd_state, inst);
 496 
 497         if (hdl->h_dma) {
 498                 bd->d_dma = *(hdl->h_dma);
 499                 bd->d_dma.dma_attr_granular =
 500                     max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
 501                 bd->d_use_dma = B_TRUE;
 502 
 503                 if (bd->d_maxxfer &&
 504                     (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
 505                         cmn_err(CE_WARN,
 506                             "%s: inconsistent maximum transfer size!",
 507                             name);
 508                         /* We force it */
 509                         bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
 510                 } else {
 511                         bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
 512                 }
 513         } else {
 514                 bd->d_use_dma = B_FALSE;
 515                 if (bd->d_maxxfer == 0) {
 516                         bd->d_maxxfer = 1024 * 1024;
 517                 }
 518         }
 519         bd->d_ops = hdl->h_ops;
 520         bd->d_private = hdl->h_private;
 521         bd->d_blkshift = 9;  /* 512 bytes, to start */
 522 
 523         if (bd->d_maxxfer % DEV_BSIZE) {
 524                 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
 525                 bd->d_maxxfer &= ~(DEV_BSIZE - 1);
 526         }
 527         if (bd->d_maxxfer < DEV_BSIZE) {
 528                 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
 529                 ddi_soft_state_free(bd_state, inst);
 530                 return (DDI_FAILURE);
 531         }
 532 
 533         bd->d_dip = dip;
 534         bd->d_handle = hdl;
 535         hdl->h_bd = bd;
 536         ddi_set_driver_private(dip, bd);
 537 
 538         mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL);
 539         mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
 540         mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
 541         cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
 542 
 543         list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t),
 544             offsetof(struct bd_xfer_impl, i_linkage));
 545         list_create(&bd->d_runq, sizeof (bd_xfer_impl_t),
 546             offsetof(struct bd_xfer_impl, i_linkage));
 547 
 548         bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
 549             bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
 550 
 551         bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
 552             KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
 553         if (bd->d_ksp != NULL) {
 554                 bd->d_ksp->ks_lock = &bd->d_iomutex;
 555                 kstat_install(bd->d_ksp);
 556                 bd->d_kiop = bd->d_ksp->ks_data;
 557         } else {
 558                 /*
 559                  * Even if we cannot create the kstat, we create a
 560                  * scratch kstat.  The reason for this is to ensure
 561                  * that we can update the kstat all of the time,
 562                  * without adding an extra branch instruction.
 563                  */
 564                 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
 565         }
 566 
 567         cmlb_alloc_handle(&bd->d_cmlbh);
 568 
 569         bd->d_state = DKIO_NONE;
 570 
 571         bzero(&drive, sizeof (drive));
 572         bd->d_ops.o_drive_info(bd->d_private, &drive);
 573         bd->d_qsize = drive.d_qsize;
 574         bd->d_removable = drive.d_removable;
 575         bd->d_hotpluggable = drive.d_hotpluggable;
 576 
 577         if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
 578                 bd->d_maxxfer = drive.d_maxxfer;
 579 
 580         bd_create_inquiry_props(dip, &drive);
 581 
 582         bd_create_errstats(bd, inst, &drive);
 583         bd_init_errstats(bd, &drive);
 584         bd_update_state(bd);
 585 
 586         rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
 587             bd->d_removable, bd->d_hotpluggable,
 588             /*LINTED: E_BAD_PTR_CAST_ALIGN*/
 589             *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV :
 590             drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
 591             CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
 592         if (rv != 0) {
 593                 cmlb_free_handle(&bd->d_cmlbh);
 594                 kmem_cache_destroy(bd->d_cache);
 595                 mutex_destroy(&bd->d_iomutex);
 596                 mutex_destroy(&bd->d_ocmutex);
 597                 mutex_destroy(&bd->d_statemutex);
 598                 cv_destroy(&bd->d_statecv);
 599                 list_destroy(&bd->d_waitq);
 600                 list_destroy(&bd->d_runq);
 601                 if (bd->d_ksp != NULL) {
 602                         kstat_delete(bd->d_ksp);
 603                         bd->d_ksp = NULL;
 604                 } else {
 605                         kmem_free(bd->d_kiop, sizeof (kstat_io_t));
 606                 }
 607                 ddi_soft_state_free(bd_state, inst);
 608                 return (DDI_FAILURE);
 609         }
 610 
 611         if (bd->d_ops.o_devid_init != NULL) {
 612                 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
 613                 if (rv == DDI_SUCCESS) {
 614                         if (ddi_devid_register(dip, bd->d_devid) !=
 615                             DDI_SUCCESS) {
 616                                 cmn_err(CE_WARN,
 617                                     "%s: unable to register devid", name);
 618                         }
 619                 }
 620         }
 621 
 622         /*
 623          * Add a zero-length attribute to tell the world we support
 624          * kernel ioctls (for layered drivers).  Also set up properties
 625          * used by HAL to identify removable media.
 626          */
 627         (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 628             DDI_KERNEL_IOCTL, NULL, 0);
 629         if (bd->d_removable) {
 630                 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 631                     "removable-media", NULL, 0);
 632         }
 633         if (bd->d_hotpluggable) {
 634                 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 635                     "hotpluggable", NULL, 0);
 636         }
 637 
 638         ddi_report_dev(dip);
 639 
 640         return (DDI_SUCCESS);
 641 }
 642 
 643 static int
 644 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 645 {
 646         bd_t    *bd;
 647 
 648         bd = ddi_get_driver_private(dip);
 649 
 650         switch (cmd) {
 651         case DDI_DETACH:
 652                 break;
 653         case DDI_SUSPEND:
 654                 /* We don't suspend, but our parent does */
 655                 return (DDI_SUCCESS);
 656         default:
 657                 return (DDI_FAILURE);
 658         }
 659         if (bd->d_ksp != NULL) {
 660                 kstat_delete(bd->d_ksp);
 661                 bd->d_ksp = NULL;
 662         } else {
 663                 kmem_free(bd->d_kiop, sizeof (kstat_io_t));
 664         }
 665 
 666         if (bd->d_errstats != NULL) {
 667                 kstat_delete(bd->d_errstats);
 668                 bd->d_errstats = NULL;
 669         } else {
 670                 kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
 671                 mutex_destroy(bd->d_errmutex);
 672         }
 673 
 674         cmlb_detach(bd->d_cmlbh, 0);
 675         cmlb_free_handle(&bd->d_cmlbh);
 676         if (bd->d_devid)
 677                 ddi_devid_free(bd->d_devid);
 678         kmem_cache_destroy(bd->d_cache);
 679         mutex_destroy(&bd->d_iomutex);
 680         mutex_destroy(&bd->d_ocmutex);
 681         mutex_destroy(&bd->d_statemutex);
 682         cv_destroy(&bd->d_statecv);
 683         list_destroy(&bd->d_waitq);
 684         list_destroy(&bd->d_runq);
 685         ddi_soft_state_free(bd_state, ddi_get_instance(dip));
 686         return (DDI_SUCCESS);
 687 }
 688 
 689 static int
 690 bd_xfer_ctor(void *buf, void *arg, int kmflag)
 691 {
 692         bd_xfer_impl_t  *xi;
 693         bd_t            *bd = arg;
 694         int             (*dcb)(caddr_t);
 695 
 696         if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
 697                 dcb = DDI_DMA_SLEEP;
 698         } else {
 699                 dcb = DDI_DMA_DONTWAIT;
 700         }
 701 
 702         xi = buf;
 703         bzero(xi, sizeof (*xi));
 704         xi->i_bd = bd;
 705 
 706         if (bd->d_use_dma) {
 707                 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
 708                     &xi->i_dmah) != DDI_SUCCESS) {
 709                         return (-1);
 710                 }
 711         }
 712 
 713         return (0);
 714 }
 715 
 716 static void
 717 bd_xfer_dtor(void *buf, void *arg)
 718 {
 719         bd_xfer_impl_t  *xi = buf;
 720 
 721         _NOTE(ARGUNUSED(arg));
 722 
 723         if (xi->i_dmah)
 724                 ddi_dma_free_handle(&xi->i_dmah);
 725         xi->i_dmah = NULL;
 726 }
 727 
 728 static bd_xfer_impl_t *
 729 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
 730     int kmflag)
 731 {
 732         bd_xfer_impl_t          *xi;
 733         int                     rv = 0;
 734         int                     status;
 735         unsigned                dir;
 736         int                     (*cb)(caddr_t);
 737         size_t                  len;
 738         uint32_t                shift;
 739 
 740         if (kmflag == KM_SLEEP) {
 741                 cb = DDI_DMA_SLEEP;
 742         } else {
 743                 cb = DDI_DMA_DONTWAIT;
 744         }
 745 
 746         xi = kmem_cache_alloc(bd->d_cache, kmflag);
 747         if (xi == NULL) {
 748                 bioerror(bp, ENOMEM);
 749                 return (NULL);
 750         }
 751 
 752         ASSERT(bp);
 753 
 754         xi->i_bp = bp;
 755         xi->i_func = func;
 756         xi->i_blkno = bp->b_lblkno;
 757 
 758         if (bp->b_bcount == 0) {
 759                 xi->i_len = 0;
 760                 xi->i_nblks = 0;
 761                 xi->i_kaddr = NULL;
 762                 xi->i_resid = 0;
 763                 xi->i_num_win = 0;
 764                 goto done;
 765         }
 766 
 767         if (bp->b_flags & B_READ) {
 768                 dir = DDI_DMA_READ;
 769                 xi->i_func = bd->d_ops.o_read;
 770         } else {
 771                 dir = DDI_DMA_WRITE;
 772                 xi->i_func = bd->d_ops.o_write;
 773         }
 774 
 775         shift = bd->d_blkshift;
 776         xi->i_blkshift = shift;
 777 
 778         if (!bd->d_use_dma) {
 779                 bp_mapin(bp);
 780                 rv = 0;
 781                 xi->i_offset = 0;
 782                 xi->i_num_win =
 783                     (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
 784                 xi->i_cur_win = 0;
 785                 xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
 786                 xi->i_nblks = xi->i_len >> shift;
 787                 xi->i_kaddr = bp->b_un.b_addr;
 788                 xi->i_resid = bp->b_bcount;
 789         } else {
 790 
 791                 /*
 792                  * We have to use consistent DMA if the address is misaligned.
 793                  */
 794                 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
 795                     ((uintptr_t)bp->b_un.b_addr & 0x7)) {
 796                         dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
 797                 } else {
 798                         dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
 799                 }
 800 
 801                 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
 802                     NULL, &xi->i_dmac, &xi->i_ndmac);
 803                 switch (status) {
 804                 case DDI_DMA_MAPPED:
 805                         xi->i_num_win = 1;
 806                         xi->i_cur_win = 0;
 807                         xi->i_offset = 0;
 808                         xi->i_len = bp->b_bcount;
 809                         xi->i_nblks = xi->i_len >> shift;
 810                         xi->i_resid = bp->b_bcount;
 811                         rv = 0;
 812                         break;
 813                 case DDI_DMA_PARTIAL_MAP:
 814                         xi->i_cur_win = 0;
 815 
 816                         if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
 817                             DDI_SUCCESS) ||
 818                             (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
 819                             &len, &xi->i_dmac, &xi->i_ndmac) !=
 820                             DDI_SUCCESS) ||
 821                             (P2PHASE(len, shift) != 0)) {
 822                                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 823                                 rv = EFAULT;
 824                                 goto done;
 825                         }
 826                         xi->i_len = len;
 827                         xi->i_nblks = xi->i_len >> shift;
 828                         xi->i_resid = bp->b_bcount;
 829                         rv = 0;
 830                         break;
 831                 case DDI_DMA_NORESOURCES:
 832                         rv = EAGAIN;
 833                         goto done;
 834                 case DDI_DMA_TOOBIG:
 835                         rv = EINVAL;
 836                         goto done;
 837                 case DDI_DMA_NOMAPPING:
 838                 case DDI_DMA_INUSE:
 839                 default:
 840                         rv = EFAULT;
 841                         goto done;
 842                 }
 843         }
 844 
 845 done:
 846         if (rv != 0) {
 847                 kmem_cache_free(bd->d_cache, xi);
 848                 bioerror(bp, rv);
 849                 return (NULL);
 850         }
 851 
 852         return (xi);
 853 }
 854 
 855 static void
 856 bd_xfer_free(bd_xfer_impl_t *xi)
 857 {
 858         if (xi->i_dmah) {
 859                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 860         }
 861         kmem_cache_free(xi->i_bd->d_cache, xi);
 862 }
 863 
 864 static int
 865 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
 866 {
 867         dev_t           dev = *devp;
 868         bd_t            *bd;
 869         minor_t         part;
 870         minor_t         inst;
 871         uint64_t        mask;
 872         boolean_t       ndelay;
 873         int             rv;
 874         diskaddr_t      nblks;
 875         diskaddr_t      lba;
 876 
 877         _NOTE(ARGUNUSED(credp));
 878 
 879         part = BDPART(dev);
 880         inst = BDINST(dev);
 881 
 882         if (otyp >= OTYPCNT)
 883                 return (EINVAL);
 884 
 885         ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
 886 
 887         /*
 888          * Block any DR events from changing the set of registered
 889          * devices while we function.
 890          */
 891         rw_enter(&bd_lock, RW_READER);
 892         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
 893                 rw_exit(&bd_lock);
 894                 return (ENXIO);
 895         }
 896 
 897         mutex_enter(&bd->d_ocmutex);
 898 
 899         ASSERT(part < 64);
 900         mask = (1U << part);
 901 
 902         bd_update_state(bd);
 903 
 904         if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
 905 
 906                 /* non-blocking opens are allowed to succeed */
 907                 if (!ndelay) {
 908                         rv = ENXIO;
 909                         goto done;
 910                 }
 911         } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
 912             NULL, NULL, 0) == 0) {
 913 
 914                 /*
 915                  * We read the partinfo, verify valid ranges.  If the
 916                  * partition is invalid, and we aren't blocking or
 917                  * doing a raw access, then fail. (Non-blocking and
 918                  * raw accesses can still succeed to allow a disk with
 919                  * bad partition data to opened by format and fdisk.)
 920                  */
 921                 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
 922                         rv = ENXIO;
 923                         goto done;
 924                 }
 925         } else if (!ndelay) {
 926                 /*
 927                  * cmlb_partinfo failed -- invalid partition or no
 928                  * disk label.
 929                  */
 930                 rv = ENXIO;
 931                 goto done;
 932         }
 933 
 934         if ((flag & FWRITE) && bd->d_rdonly) {
 935                 rv = EROFS;
 936                 goto done;
 937         }
 938 
 939         if ((bd->d_open_excl) & (mask)) {
 940                 rv = EBUSY;
 941                 goto done;
 942         }
 943         if (flag & FEXCL) {
 944                 if (bd->d_open_lyr[part]) {
 945                         rv = EBUSY;
 946                         goto done;
 947                 }
 948                 for (int i = 0; i < OTYP_LYR; i++) {
 949                         if (bd->d_open_reg[i] & mask) {
 950                                 rv = EBUSY;
 951                                 goto done;
 952                         }
 953                 }
 954         }
 955 
 956         if (otyp == OTYP_LYR) {
 957                 bd->d_open_lyr[part]++;
 958         } else {
 959                 bd->d_open_reg[otyp] |= mask;
 960         }
 961         if (flag & FEXCL) {
 962                 bd->d_open_excl |= mask;
 963         }
 964 
 965         rv = 0;
 966 done:
 967         mutex_exit(&bd->d_ocmutex);
 968         rw_exit(&bd_lock);
 969 
 970         return (rv);
 971 }
 972 
 973 static int
 974 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
 975 {
 976         bd_t            *bd;
 977         minor_t         inst;
 978         minor_t         part;
 979         uint64_t        mask;
 980         boolean_t       last = B_TRUE;
 981 
 982         _NOTE(ARGUNUSED(flag));
 983         _NOTE(ARGUNUSED(credp));
 984 
 985         part = BDPART(dev);
 986         inst = BDINST(dev);
 987 
 988         ASSERT(part < 64);
 989         mask = (1U << part);
 990 
 991         rw_enter(&bd_lock, RW_READER);
 992 
 993         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
 994                 rw_exit(&bd_lock);
 995                 return (ENXIO);
 996         }
 997 
 998         mutex_enter(&bd->d_ocmutex);
 999         if (bd->d_open_excl & mask) {
1000                 bd->d_open_excl &= ~mask;
1001         }
1002         if (otyp == OTYP_LYR) {
1003                 bd->d_open_lyr[part]--;
1004         } else {
1005                 bd->d_open_reg[otyp] &= ~mask;
1006         }
1007         for (int i = 0; i < 64; i++) {
1008                 if (bd->d_open_lyr[part]) {
1009                         last = B_FALSE;
1010                 }
1011         }
1012         for (int i = 0; last && (i < OTYP_LYR); i++) {
1013                 if (bd->d_open_reg[i]) {
1014                         last = B_FALSE;
1015                 }
1016         }
1017         mutex_exit(&bd->d_ocmutex);
1018 
1019         if (last) {
1020                 cmlb_invalidate(bd->d_cmlbh, 0);
1021         }
1022         rw_exit(&bd_lock);
1023 
1024         return (0);
1025 }
1026 
1027 static int
1028 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1029 {
1030         minor_t         inst;
1031         minor_t         part;
1032         diskaddr_t      pstart;
1033         diskaddr_t      psize;
1034         bd_t            *bd;
1035         bd_xfer_impl_t  *xi;
1036         buf_t           *bp;
1037         int             rv;
1038 
1039         rw_enter(&bd_lock, RW_READER);
1040 
1041         part = BDPART(dev);
1042         inst = BDINST(dev);
1043 
1044         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1045                 rw_exit(&bd_lock);
1046                 return (ENXIO);
1047         }
1048         /*
1049          * do cmlb, but do it synchronously unless we already have the
1050          * partition (which we probably should.)
1051          */
1052         if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1053             (void *)1)) {
1054                 rw_exit(&bd_lock);
1055                 return (ENXIO);
1056         }
1057 
1058         if ((blkno + nblk) > psize) {
1059                 rw_exit(&bd_lock);
1060                 return (EINVAL);
1061         }
1062         bp = getrbuf(KM_NOSLEEP);
1063         if (bp == NULL) {
1064                 rw_exit(&bd_lock);
1065                 return (ENOMEM);
1066         }
1067 
1068         bp->b_bcount = nblk << bd->d_blkshift;
1069         bp->b_resid = bp->b_bcount;
1070         bp->b_lblkno = blkno;
1071         bp->b_un.b_addr = caddr;
1072 
1073         xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1074         if (xi == NULL) {
1075                 rw_exit(&bd_lock);
1076                 freerbuf(bp);
1077                 return (ENOMEM);
1078         }
1079         xi->i_blkno = blkno + pstart;
1080         xi->i_flags = BD_XFER_POLL;
1081         bd_submit(bd, xi);
1082         rw_exit(&bd_lock);
1083 
1084         /*
1085          * Generally, we should have run this entirely synchronously
1086          * at this point and the biowait call should be a no-op.  If
1087          * it didn't happen this way, it's a bug in the underlying
1088          * driver not honoring BD_XFER_POLL.
1089          */
1090         (void) biowait(bp);
1091         rv = geterror(bp);
1092         freerbuf(bp);
1093         return (rv);
1094 }
1095 
1096 void
1097 bd_minphys(struct buf *bp)
1098 {
1099         minor_t inst;
1100         bd_t    *bd;
1101         inst = BDINST(bp->b_edev);
1102 
1103         bd = ddi_get_soft_state(bd_state, inst);
1104 
1105         /*
1106          * In a non-debug kernel, bd_strategy will catch !bd as
1107          * well, and will fail nicely.
1108          */
1109         ASSERT(bd);
1110 
1111         if (bp->b_bcount > bd->d_maxxfer)
1112                 bp->b_bcount = bd->d_maxxfer;
1113 }
1114 
1115 static int
1116 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1117 {
1118         _NOTE(ARGUNUSED(credp));
1119         return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1120 }
1121 
1122 static int
1123 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1124 {
1125         _NOTE(ARGUNUSED(credp));
1126         return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1127 }
1128 
1129 static int
1130 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1131 {
1132         _NOTE(ARGUNUSED(credp));
1133         return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1134 }
1135 
1136 static int
1137 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1138 {
1139         _NOTE(ARGUNUSED(credp));
1140         return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1141 }
1142 
1143 static int
1144 bd_strategy(struct buf *bp)
1145 {
1146         minor_t         inst;
1147         minor_t         part;
1148         bd_t            *bd;
1149         diskaddr_t      p_lba;
1150         diskaddr_t      p_nblks;
1151         diskaddr_t      b_nblks;
1152         bd_xfer_impl_t  *xi;
1153         uint32_t        shift;
1154         int             (*func)(void *, bd_xfer_t *);
1155 
1156         part = BDPART(bp->b_edev);
1157         inst = BDINST(bp->b_edev);
1158 
1159         ASSERT(bp);
1160 
1161         bp->b_resid = bp->b_bcount;
1162 
1163         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1164                 bioerror(bp, ENXIO);
1165                 biodone(bp);
1166                 return (0);
1167         }
1168 
1169         if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1170             NULL, NULL, 0)) {
1171                 bioerror(bp, ENXIO);
1172                 biodone(bp);
1173                 return (0);
1174         }
1175 
1176         shift = bd->d_blkshift;
1177 
1178         if ((P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1179             (bp->b_lblkno > p_nblks)) {
1180                 bioerror(bp, ENXIO);
1181                 biodone(bp);
1182                 return (0);
1183         }
1184         b_nblks = bp->b_bcount >> shift;
1185         if ((bp->b_lblkno == p_nblks) || (bp->b_bcount == 0)) {
1186                 biodone(bp);
1187                 return (0);
1188         }
1189 
1190         if ((b_nblks + bp->b_lblkno) > p_nblks) {
1191                 bp->b_resid = ((bp->b_lblkno + b_nblks - p_nblks) << shift);
1192                 bp->b_bcount -= bp->b_resid;
1193         } else {
1194                 bp->b_resid = 0;
1195         }
1196         func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1197 
1198         xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1199         if (xi == NULL) {
1200                 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1201         }
1202         if (xi == NULL) {
1203                 /* bd_request_alloc will have done bioerror */
1204                 biodone(bp);
1205                 return (0);
1206         }
1207         xi->i_blkno = bp->b_lblkno + p_lba;
1208 
1209         bd_submit(bd, xi);
1210 
1211         return (0);
1212 }
1213 
1214 static int
1215 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1216 {
1217         minor_t         inst;
1218         uint16_t        part;
1219         bd_t            *bd;
1220         void            *ptr = (void *)arg;
1221         int             rv;
1222 
1223         part = BDPART(dev);
1224         inst = BDINST(dev);
1225 
1226         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1227                 return (ENXIO);
1228         }
1229 
1230         rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1231         if (rv != ENOTTY)
1232                 return (rv);
1233 
1234         if (rvalp != NULL) {
1235                 /* the return value of the ioctl is 0 by default */
1236                 *rvalp = 0;
1237         }
1238 
1239         switch (cmd) {
1240         case DKIOCGMEDIAINFO: {
1241                 struct dk_minfo minfo;
1242 
1243                 /* make sure our state information is current */
1244                 bd_update_state(bd);
1245                 bzero(&minfo, sizeof (minfo));
1246                 minfo.dki_media_type = DK_FIXED_DISK;
1247                 minfo.dki_lbsize = (1U << bd->d_blkshift);
1248                 minfo.dki_capacity = bd->d_numblks;
1249                 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1250                         return (EFAULT);
1251                 }
1252                 return (0);
1253         }
1254         case DKIOCGMEDIAINFOEXT: {
1255                 struct dk_minfo_ext miext;
1256 
1257                 /* make sure our state information is current */
1258                 bd_update_state(bd);
1259                 bzero(&miext, sizeof (miext));
1260                 miext.dki_media_type = DK_FIXED_DISK;
1261                 miext.dki_lbsize = (1U << bd->d_blkshift);
1262                 miext.dki_pbsize = (1U << bd->d_pblkshift);
1263                 miext.dki_capacity = bd->d_numblks;
1264                 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) {
1265                         return (EFAULT);
1266                 }
1267                 return (0);
1268         }
1269         case DKIOCINFO: {
1270                 struct dk_cinfo cinfo;
1271                 bzero(&cinfo, sizeof (cinfo));
1272                 cinfo.dki_ctype = DKC_BLKDEV;
1273                 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1274                 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1275                     "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1276                 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1277                     "%s", ddi_driver_name(bd->d_dip));
1278                 cinfo.dki_unit = inst;
1279                 cinfo.dki_flags = DKI_FMTVOL;
1280                 cinfo.dki_partition = part;
1281                 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1282                 cinfo.dki_addr = 0;
1283                 cinfo.dki_slave = 0;
1284                 cinfo.dki_space = 0;
1285                 cinfo.dki_prio = 0;
1286                 cinfo.dki_vec = 0;
1287                 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1288                         return (EFAULT);
1289                 }
1290                 return (0);
1291         }
1292         case DKIOCREMOVABLE: {
1293                 int i;
1294                 i = bd->d_removable ? 1 : 0;
1295                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1296                         return (EFAULT);
1297                 }
1298                 return (0);
1299         }
1300         case DKIOCHOTPLUGGABLE: {
1301                 int i;
1302                 i = bd->d_hotpluggable ? 1 : 0;
1303                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1304                         return (EFAULT);
1305                 }
1306                 return (0);
1307         }
1308         case DKIOCREADONLY: {
1309                 int i;
1310                 i = bd->d_rdonly ? 1 : 0;
1311                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1312                         return (EFAULT);
1313                 }
1314                 return (0);
1315         }
1316         case DKIOCSOLIDSTATE: {
1317                 int i;
1318                 i = bd->d_ssd ? 1 : 0;
1319                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1320                         return (EFAULT);
1321                 }
1322                 return (0);
1323         }
1324         case DKIOCSTATE: {
1325                 enum dkio_state state;
1326                 if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1327                         return (EFAULT);
1328                 }
1329                 if ((rv = bd_check_state(bd, &state)) != 0) {
1330                         return (rv);
1331                 }
1332                 if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1333                         return (EFAULT);
1334                 }
1335                 return (0);
1336         }
1337         case DKIOCFLUSHWRITECACHE: {
1338                 struct dk_callback *dkc = NULL;
1339 
1340                 if (flag & FKIOCTL)
1341                         dkc = (void *)arg;
1342 
1343                 rv = bd_flush_write_cache(bd, dkc);
1344                 return (rv);
1345         }
1346 
1347         default:
1348                 break;
1349 
1350         }
1351         return (ENOTTY);
1352 }
1353 
1354 static int
1355 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1356     char *name, caddr_t valuep, int *lengthp)
1357 {
1358         bd_t    *bd;
1359 
1360         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1361         if (bd == NULL)
1362                 return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1363                     name, valuep, lengthp));
1364 
1365         return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1366             valuep, lengthp, BDPART(dev), 0));
1367 }
1368 
1369 
1370 static int
1371 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1372     size_t length, void *tg_cookie)
1373 {
1374         bd_t            *bd;
1375         buf_t           *bp;
1376         bd_xfer_impl_t  *xi;
1377         int             rv;
1378         int             (*func)(void *, bd_xfer_t *);
1379         int             kmflag;
1380 
1381         /*
1382          * If we are running in polled mode (such as during dump(9e)
1383          * execution), then we cannot sleep for kernel allocations.
1384          */
1385         kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1386 
1387         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1388 
1389         if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
1390                 /* We can only transfer whole blocks at a time! */
1391                 return (EINVAL);
1392         }
1393 
1394         if ((bp = getrbuf(kmflag)) == NULL) {
1395                 return (ENOMEM);
1396         }
1397 
1398         switch (cmd) {
1399         case TG_READ:
1400                 bp->b_flags = B_READ;
1401                 func = bd->d_ops.o_read;
1402                 break;
1403         case TG_WRITE:
1404                 bp->b_flags = B_WRITE;
1405                 func = bd->d_ops.o_write;
1406                 break;
1407         default:
1408                 freerbuf(bp);
1409                 return (EINVAL);
1410         }
1411 
1412         bp->b_un.b_addr = bufaddr;
1413         bp->b_bcount = length;
1414         xi = bd_xfer_alloc(bd, bp, func, kmflag);
1415         if (xi == NULL) {
1416                 rv = geterror(bp);
1417                 freerbuf(bp);
1418                 return (rv);
1419         }
1420         xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1421         xi->i_blkno = start;
1422         bd_submit(bd, xi);
1423         (void) biowait(bp);
1424         rv = geterror(bp);
1425         freerbuf(bp);
1426 
1427         return (rv);
1428 }
1429 
1430 static int
1431 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1432 {
1433         bd_t            *bd;
1434 
1435         _NOTE(ARGUNUSED(tg_cookie));
1436         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1437 
1438         switch (cmd) {
1439         case TG_GETPHYGEOM:
1440         case TG_GETVIRTGEOM:
1441                 /*
1442                  * We don't have any "geometry" as such, let cmlb
1443                  * fabricate something.
1444                  */
1445                 return (ENOTTY);
1446 
1447         case TG_GETCAPACITY:
1448                 bd_update_state(bd);
1449                 *(diskaddr_t *)arg = bd->d_numblks;
1450                 return (0);
1451 
1452         case TG_GETBLOCKSIZE:
1453                 *(uint32_t *)arg = (1U << bd->d_blkshift);
1454                 return (0);
1455 
1456         case TG_GETATTR:
1457                 /*
1458                  * It turns out that cmlb really doesn't do much for
1459                  * non-writable media, but lets make the information
1460                  * available for it in case it does more in the
1461                  * future.  (The value is currently used for
1462                  * triggering special behavior for CD-ROMs.)
1463                  */
1464                 bd_update_state(bd);
1465                 ((tg_attribute_t *)arg)->media_is_writable =
1466                     bd->d_rdonly ? B_FALSE : B_TRUE;
1467                 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1468                 return (0);
1469 
1470         default:
1471                 return (EINVAL);
1472         }
1473 }
1474 
1475 
1476 static void
1477 bd_sched(bd_t *bd)
1478 {
1479         bd_xfer_impl_t  *xi;
1480         struct buf      *bp;
1481         int             rv;
1482 
1483         mutex_enter(&bd->d_iomutex);
1484 
1485         while ((bd->d_qactive < bd->d_qsize) &&
1486             ((xi = list_remove_head(&bd->d_waitq)) != NULL)) {
1487                 bd->d_qactive++;
1488                 kstat_waitq_to_runq(bd->d_kiop);
1489                 list_insert_tail(&bd->d_runq, xi);
1490 
1491                 /*
1492                  * Submit the job to the driver.  We drop the I/O mutex
1493                  * so that we can deal with the case where the driver
1494                  * completion routine calls back into us synchronously.
1495                  */
1496 
1497                 mutex_exit(&bd->d_iomutex);
1498 
1499                 rv = xi->i_func(bd->d_private, &xi->i_public);
1500                 if (rv != 0) {
1501                         bp = xi->i_bp;
1502                         bioerror(bp, rv);
1503                         biodone(bp);
1504 
1505                         atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1506 
1507                         mutex_enter(&bd->d_iomutex);
1508                         bd->d_qactive--;
1509                         kstat_runq_exit(bd->d_kiop);
1510                         list_remove(&bd->d_runq, xi);
1511                         bd_xfer_free(xi);
1512                 } else {
1513                         mutex_enter(&bd->d_iomutex);
1514                 }
1515         }
1516 
1517         mutex_exit(&bd->d_iomutex);
1518 }
1519 
1520 static void
1521 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1522 {
1523         mutex_enter(&bd->d_iomutex);
1524         list_insert_tail(&bd->d_waitq, xi);
1525         kstat_waitq_enter(bd->d_kiop);
1526         mutex_exit(&bd->d_iomutex);
1527 
1528         bd_sched(bd);
1529 }
1530 
1531 static void
1532 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1533 {
1534         bd_t    *bd = xi->i_bd;
1535         buf_t   *bp = xi->i_bp;
1536 
1537         mutex_enter(&bd->d_iomutex);
1538         bd->d_qactive--;
1539         kstat_runq_exit(bd->d_kiop);
1540         list_remove(&bd->d_runq, xi);
1541         mutex_exit(&bd->d_iomutex);
1542 
1543         if (err == 0) {
1544                 if (bp->b_flags & B_READ) {
1545                         bd->d_kiop->reads++;
1546                         bd->d_kiop->nread += (bp->b_bcount - xi->i_resid);
1547                 } else {
1548                         bd->d_kiop->writes++;
1549                         bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid);
1550                 }
1551         }
1552         bd_sched(bd);
1553 }
1554 
1555 static void
1556 bd_update_state(bd_t *bd)
1557 {
1558         enum    dkio_state      state = DKIO_INSERTED;
1559         boolean_t               docmlb = B_FALSE;
1560         bd_media_t              media;
1561 
1562         bzero(&media, sizeof (media));
1563 
1564         mutex_enter(&bd->d_statemutex);
1565         if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
1566                 bd->d_numblks = 0;
1567                 state = DKIO_EJECTED;
1568                 goto done;
1569         }
1570 
1571         if ((media.m_blksize < 512) ||
1572             (!ISP2(media.m_blksize)) ||
1573             (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
1574                 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)",
1575                     ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip),
1576                     media.m_blksize);
1577                 /*
1578                  * We can't use the media, treat it as not present.
1579                  */
1580                 state = DKIO_EJECTED;
1581                 bd->d_numblks = 0;
1582                 goto done;
1583         }
1584 
1585         if (((1U << bd->d_blkshift) != media.m_blksize) ||
1586             (bd->d_numblks != media.m_nblks)) {
1587                 /* Device size changed */
1588                 docmlb = B_TRUE;
1589         }
1590 
1591         bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
1592         bd->d_pblkshift = bd->d_blkshift;
1593         bd->d_numblks = media.m_nblks;
1594         bd->d_rdonly = media.m_readonly;
1595         bd->d_ssd = media.m_solidstate;
1596 
1597         /*
1598          * Only use the supplied physical block size if it is non-zero,
1599          * greater or equal to the block size, and a power of 2. Ignore it
1600          * if not, it's just informational and we can still use the media.
1601          */
1602         if ((media.m_pblksize != 0) &&
1603             (media.m_pblksize >= media.m_blksize) &&
1604             (ISP2(media.m_pblksize)))
1605                 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
1606 
1607 done:
1608         if (state != bd->d_state) {
1609                 bd->d_state = state;
1610                 cv_broadcast(&bd->d_statecv);
1611                 docmlb = B_TRUE;
1612         }
1613         mutex_exit(&bd->d_statemutex);
1614 
1615         bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
1616 
1617         if (docmlb) {
1618                 if (state == DKIO_INSERTED) {
1619                         (void) cmlb_validate(bd->d_cmlbh, 0, 0);
1620                 } else {
1621                         cmlb_invalidate(bd->d_cmlbh, 0);
1622                 }
1623         }
1624 }
1625 
1626 static int
1627 bd_check_state(bd_t *bd, enum dkio_state *state)
1628 {
1629         clock_t         when;
1630 
1631         for (;;) {
1632 
1633                 bd_update_state(bd);
1634 
1635                 mutex_enter(&bd->d_statemutex);
1636 
1637                 if (bd->d_state != *state) {
1638                         *state = bd->d_state;
1639                         mutex_exit(&bd->d_statemutex);
1640                         break;
1641                 }
1642 
1643                 when = drv_usectohz(1000000);
1644                 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
1645                     when, TR_CLOCK_TICK) == 0) {
1646                         mutex_exit(&bd->d_statemutex);
1647                         return (EINTR);
1648                 }
1649 
1650                 mutex_exit(&bd->d_statemutex);
1651         }
1652 
1653         return (0);
1654 }
1655 
1656 static int
1657 bd_flush_write_cache_done(struct buf *bp)
1658 {
1659         struct dk_callback *dc = (void *)bp->b_private;
1660 
1661         (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
1662         kmem_free(dc, sizeof (*dc));
1663         freerbuf(bp);
1664         return (0);
1665 }
1666 
1667 static int
1668 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
1669 {
1670         buf_t                   *bp;
1671         struct dk_callback      *dc;
1672         bd_xfer_impl_t          *xi;
1673         int                     rv;
1674 
1675         if (bd->d_ops.o_sync_cache == NULL) {
1676                 return (ENOTSUP);
1677         }
1678         if ((bp = getrbuf(KM_SLEEP)) == NULL) {
1679                 return (ENOMEM);
1680         }
1681         bp->b_resid = 0;
1682         bp->b_bcount = 0;
1683 
1684         xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
1685         if (xi == NULL) {
1686                 rv = geterror(bp);
1687                 freerbuf(bp);
1688                 return (rv);
1689         }
1690 
1691         /* Make an asynchronous flush, but only if there is a callback */
1692         if (dkc != NULL && dkc->dkc_callback != NULL) {
1693                 /* Make a private copy of the callback structure */
1694                 dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
1695                 *dc = *dkc;
1696                 bp->b_private = dc;
1697                 bp->b_iodone = bd_flush_write_cache_done;
1698 
1699                 bd_submit(bd, xi);
1700                 return (0);
1701         }
1702 
1703         /* In case there is no callback, perform a synchronous flush */
1704         bd_submit(bd, xi);
1705         (void) biowait(bp);
1706         rv = geterror(bp);
1707         freerbuf(bp);
1708 
1709         return (rv);
1710 }
1711 
1712 /*
1713  * Nexus support.
1714  */
1715 int
1716 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1717     void *arg, void *result)
1718 {
1719         bd_handle_t     hdl;
1720 
1721         switch (ctlop) {
1722         case DDI_CTLOPS_REPORTDEV:
1723                 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
1724                     ddi_node_name(rdip), ddi_get_name_addr(rdip),
1725                     ddi_driver_name(rdip), ddi_get_instance(rdip));
1726                 return (DDI_SUCCESS);
1727 
1728         case DDI_CTLOPS_INITCHILD:
1729                 hdl = ddi_get_parent_data((dev_info_t *)arg);
1730                 if (hdl == NULL) {
1731                         return (DDI_NOT_WELL_FORMED);
1732                 }
1733                 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
1734                 return (DDI_SUCCESS);
1735 
1736         case DDI_CTLOPS_UNINITCHILD:
1737                 ddi_set_name_addr((dev_info_t *)arg, NULL);
1738                 ndi_prop_remove_all((dev_info_t *)arg);
1739                 return (DDI_SUCCESS);
1740 
1741         default:
1742                 return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1743         }
1744 }
1745 
1746 /*
1747  * Functions for device drivers.
1748  */
1749 bd_handle_t
1750 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
1751 {
1752         bd_handle_t     hdl;
1753 
1754         hdl = kmem_zalloc(sizeof (*hdl), kmflag);
1755         if (hdl != NULL) {
1756                 hdl->h_ops = *ops;
1757                 hdl->h_dma = dma;
1758                 hdl->h_private = private;
1759         }
1760 
1761         return (hdl);
1762 }
1763 
1764 void
1765 bd_free_handle(bd_handle_t hdl)
1766 {
1767         kmem_free(hdl, sizeof (*hdl));
1768 }
1769 
1770 int
1771 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
1772 {
1773         dev_info_t      *child;
1774         bd_drive_t      drive = { 0 };
1775 
1776         /* if drivers don't override this, make it assume none */
1777         drive.d_lun = -1;
1778         hdl->h_ops.o_drive_info(hdl->h_private, &drive);
1779 
1780         hdl->h_parent = dip;
1781         hdl->h_name = "blkdev";
1782 
1783         /*LINTED: E_BAD_PTR_CAST_ALIGN*/
1784         if (*(uint64_t *)drive.d_eui64 != 0) {
1785                 if (drive.d_lun >= 0) {
1786                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1787                             "w%02X%02X%02X%02X%02X%02X%02X%02X,%X",
1788                             drive.d_eui64[0], drive.d_eui64[1],
1789                             drive.d_eui64[2], drive.d_eui64[3],
1790                             drive.d_eui64[4], drive.d_eui64[5],
1791                             drive.d_eui64[6], drive.d_eui64[7], drive.d_lun);
1792                 } else {
1793                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1794                             "w%02X%02X%02X%02X%02X%02X%02X%02X",
1795                             drive.d_eui64[0], drive.d_eui64[1],
1796                             drive.d_eui64[2], drive.d_eui64[3],
1797                             drive.d_eui64[4], drive.d_eui64[5],
1798                             drive.d_eui64[6], drive.d_eui64[7]);
1799                 }
1800         } else {
1801                 if (drive.d_lun >= 0) {
1802                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1803                             "%X,%X", drive.d_target, drive.d_lun);
1804                 } else {
1805                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1806                             "%X", drive.d_target);
1807                 }
1808         }
1809 
1810         if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
1811             &child) != NDI_SUCCESS) {
1812                 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
1813                     ddi_driver_name(dip), ddi_get_instance(dip),
1814                     "blkdev", hdl->h_addr);
1815                 return (DDI_FAILURE);
1816         }
1817 
1818         ddi_set_parent_data(child, hdl);
1819         hdl->h_child = child;
1820 
1821         if (ndi_devi_online(child, 0) == NDI_FAILURE) {
1822                 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
1823                     ddi_driver_name(dip), ddi_get_instance(dip),
1824                     hdl->h_name, hdl->h_addr);
1825                 (void) ndi_devi_free(child);
1826                 return (DDI_FAILURE);
1827         }
1828 
1829         return (DDI_SUCCESS);
1830 }
1831 
1832 int
1833 bd_detach_handle(bd_handle_t hdl)
1834 {
1835         int     circ;
1836         int     rv;
1837         char    *devnm;
1838 
1839         if (hdl->h_child == NULL) {
1840                 return (DDI_SUCCESS);
1841         }
1842         ndi_devi_enter(hdl->h_parent, &circ);
1843         if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
1844                 rv = ddi_remove_child(hdl->h_child, 0);
1845         } else {
1846                 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
1847                 (void) ddi_deviname(hdl->h_child, devnm);
1848                 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
1849                 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
1850                     NDI_DEVI_REMOVE | NDI_UNCONFIG);
1851                 kmem_free(devnm, MAXNAMELEN + 1);
1852         }
1853         if (rv == 0) {
1854                 hdl->h_child = NULL;
1855         }
1856 
1857         ndi_devi_exit(hdl->h_parent, circ);
1858         return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
1859 }
1860 
1861 void
1862 bd_xfer_done(bd_xfer_t *xfer, int err)
1863 {
1864         bd_xfer_impl_t  *xi = (void *)xfer;
1865         buf_t           *bp = xi->i_bp;
1866         int             rv = DDI_SUCCESS;
1867         bd_t            *bd = xi->i_bd;
1868         size_t          len;
1869 
1870         if (err != 0) {
1871                 bd_runq_exit(xi, err);
1872                 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
1873 
1874                 bp->b_resid += xi->i_resid;
1875                 bd_xfer_free(xi);
1876                 bioerror(bp, err);
1877                 biodone(bp);
1878                 return;
1879         }
1880 
1881         xi->i_cur_win++;
1882         xi->i_resid -= xi->i_len;
1883 
1884         if (xi->i_resid == 0) {
1885                 /* Job completed succcessfully! */
1886                 bd_runq_exit(xi, 0);
1887 
1888                 bd_xfer_free(xi);
1889                 biodone(bp);
1890                 return;
1891         }
1892 
1893         xi->i_blkno += xi->i_nblks;
1894 
1895         if (bd->d_use_dma) {
1896                 /* More transfer still pending... advance to next DMA window. */
1897                 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1898                     &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1899         } else {
1900                 /* Advance memory window. */
1901                 xi->i_kaddr += xi->i_len;
1902                 xi->i_offset += xi->i_len;
1903                 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1904         }
1905 
1906 
1907         if ((rv != DDI_SUCCESS) ||
1908             (P2PHASE(len, (1U << xi->i_blkshift) != 0))) {
1909                 bd_runq_exit(xi, EFAULT);
1910 
1911                 bp->b_resid += xi->i_resid;
1912                 bd_xfer_free(xi);
1913                 bioerror(bp, EFAULT);
1914                 biodone(bp);
1915                 return;
1916         }
1917         xi->i_len = len;
1918         xi->i_nblks = len >> xi->i_blkshift;
1919 
1920         /* Submit next window to hardware. */
1921         rv = xi->i_func(bd->d_private, &xi->i_public);
1922         if (rv != 0) {
1923                 bd_runq_exit(xi, rv);
1924 
1925                 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1926 
1927                 bp->b_resid += xi->i_resid;
1928                 bd_xfer_free(xi);
1929                 bioerror(bp, rv);
1930                 biodone(bp);
1931         }
1932 }
1933 
1934 void
1935 bd_error(bd_xfer_t *xfer, int error)
1936 {
1937         bd_xfer_impl_t  *xi = (void *)xfer;
1938         bd_t            *bd = xi->i_bd;
1939 
1940         switch (error) {
1941         case BD_ERR_MEDIA:
1942                 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
1943                 break;
1944         case BD_ERR_NTRDY:
1945                 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
1946                 break;
1947         case BD_ERR_NODEV:
1948                 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
1949                 break;
1950         case BD_ERR_RECOV:
1951                 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
1952                 break;
1953         case BD_ERR_ILLRQ:
1954                 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
1955                 break;
1956         case BD_ERR_PFA:
1957                 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
1958                 break;
1959         default:
1960                 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
1961                 break;
1962         }
1963 }
1964 
1965 void
1966 bd_state_change(bd_handle_t hdl)
1967 {
1968         bd_t            *bd;
1969 
1970         if ((bd = hdl->h_bd) != NULL) {
1971                 bd_update_state(bd);
1972         }
1973 }
1974 
1975 void
1976 bd_mod_init(struct dev_ops *devops)
1977 {
1978         static struct bus_ops bd_bus_ops = {
1979                 BUSO_REV,               /* busops_rev */
1980                 nullbusmap,             /* bus_map */
1981                 NULL,                   /* bus_get_intrspec (OBSOLETE) */
1982                 NULL,                   /* bus_add_intrspec (OBSOLETE) */
1983                 NULL,                   /* bus_remove_intrspec (OBSOLETE) */
1984                 i_ddi_map_fault,        /* bus_map_fault */
1985                 NULL,                   /* bus_dma_map (OBSOLETE) */
1986                 ddi_dma_allochdl,       /* bus_dma_allochdl */
1987                 ddi_dma_freehdl,        /* bus_dma_freehdl */
1988                 ddi_dma_bindhdl,        /* bus_dma_bindhdl */
1989                 ddi_dma_unbindhdl,      /* bus_dma_unbindhdl */
1990                 ddi_dma_flush,          /* bus_dma_flush */
1991                 ddi_dma_win,            /* bus_dma_win */
1992                 ddi_dma_mctl,           /* bus_dma_ctl */
1993                 bd_bus_ctl,             /* bus_ctl */
1994                 ddi_bus_prop_op,        /* bus_prop_op */
1995                 NULL,                   /* bus_get_eventcookie */
1996                 NULL,                   /* bus_add_eventcall */
1997                 NULL,                   /* bus_remove_eventcall */
1998                 NULL,                   /* bus_post_event */
1999                 NULL,                   /* bus_intr_ctl (OBSOLETE) */
2000                 NULL,                   /* bus_config */
2001                 NULL,                   /* bus_unconfig */
2002                 NULL,                   /* bus_fm_init */
2003                 NULL,                   /* bus_fm_fini */
2004                 NULL,                   /* bus_fm_access_enter */
2005                 NULL,                   /* bus_fm_access_exit */
2006                 NULL,                   /* bus_power */
2007                 NULL,                   /* bus_intr_op */
2008         };
2009 
2010         devops->devo_bus_ops = &bd_bus_ops;
2011 
2012         /*
2013          * NB: The device driver is free to supply its own
2014          * character entry device support.
2015          */
2016 }
2017 
2018 void
2019 bd_mod_fini(struct dev_ops *devops)
2020 {
2021         devops->devo_bus_ops = NULL;
2022 }