1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright 2017 The MathWorks, Inc.  All rights reserved.
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/ksynch.h>
  31 #include <sys/kmem.h>
  32 #include <sys/file.h>
  33 #include <sys/errno.h>
  34 #include <sys/open.h>
  35 #include <sys/buf.h>
  36 #include <sys/uio.h>
  37 #include <sys/aio_req.h>
  38 #include <sys/cred.h>
  39 #include <sys/modctl.h>
  40 #include <sys/cmlb.h>
  41 #include <sys/conf.h>
  42 #include <sys/devops.h>
  43 #include <sys/list.h>
  44 #include <sys/sysmacros.h>
  45 #include <sys/dkio.h>
  46 #include <sys/vtoc.h>
  47 #include <sys/scsi/scsi.h>        /* for DTYPE_DIRECT */
  48 #include <sys/kstat.h>
  49 #include <sys/fs/dv_node.h>
  50 #include <sys/ddi.h>
  51 #include <sys/sunddi.h>
  52 #include <sys/note.h>
  53 #include <sys/blkdev.h>
  54 #include <sys/scsi/impl/inquiry.h>
  55 
  56 #define BD_MAXPART      64
  57 #define BDINST(dev)     (getminor(dev) / BD_MAXPART)
  58 #define BDPART(dev)     (getminor(dev) % BD_MAXPART)
  59 
  60 typedef struct bd bd_t;
  61 typedef struct bd_xfer_impl bd_xfer_impl_t;
  62 
  63 struct bd {
  64         void            *d_private;
  65         dev_info_t      *d_dip;
  66         kmutex_t        d_ocmutex;
  67         kmutex_t        d_iomutex;
  68         kmutex_t        *d_errmutex;
  69         kmutex_t        d_statemutex;
  70         kcondvar_t      d_statecv;
  71         enum dkio_state d_state;
  72         cmlb_handle_t   d_cmlbh;
  73         unsigned        d_open_lyr[BD_MAXPART]; /* open count */
  74         uint64_t        d_open_excl;    /* bit mask indexed by partition */
  75         uint64_t        d_open_reg[OTYPCNT];            /* bit mask */
  76 
  77         uint32_t        d_qsize;
  78         uint32_t        d_qactive;
  79         uint32_t        d_maxxfer;
  80         uint32_t        d_blkshift;
  81         uint32_t        d_pblkshift;
  82         uint64_t        d_numblks;
  83         ddi_devid_t     d_devid;
  84 
  85         kmem_cache_t    *d_cache;
  86         list_t          d_runq;
  87         list_t          d_waitq;
  88         kstat_t         *d_ksp;
  89         kstat_io_t      *d_kiop;
  90         kstat_t         *d_errstats;
  91         struct bd_errstats *d_kerr;
  92 
  93         boolean_t       d_rdonly;
  94         boolean_t       d_ssd;
  95         boolean_t       d_removable;
  96         boolean_t       d_hotpluggable;
  97         boolean_t       d_use_dma;
  98 
  99         ddi_dma_attr_t  d_dma;
 100         bd_ops_t        d_ops;
 101         bd_handle_t     d_handle;
 102 };
 103 
 104 struct bd_handle {
 105         bd_ops_t        h_ops;
 106         ddi_dma_attr_t  *h_dma;
 107         dev_info_t      *h_parent;
 108         dev_info_t      *h_child;
 109         void            *h_private;
 110         bd_t            *h_bd;
 111         char            *h_name;
 112         char            h_addr[30];     /* enough for w%0.16x,%X */
 113 };
 114 
 115 struct bd_xfer_impl {
 116         bd_xfer_t       i_public;
 117         list_node_t     i_linkage;
 118         bd_t            *i_bd;
 119         buf_t           *i_bp;
 120         uint_t          i_num_win;
 121         uint_t          i_cur_win;
 122         off_t           i_offset;
 123         int             (*i_func)(void *, bd_xfer_t *);
 124         uint32_t        i_blkshift;
 125         size_t          i_len;
 126         size_t          i_resid;
 127 };
 128 
 129 #define i_dmah          i_public.x_dmah
 130 #define i_dmac          i_public.x_dmac
 131 #define i_ndmac         i_public.x_ndmac
 132 #define i_kaddr         i_public.x_kaddr
 133 #define i_nblks         i_public.x_nblks
 134 #define i_blkno         i_public.x_blkno
 135 #define i_flags         i_public.x_flags
 136 
 137 
 138 /*
 139  * Private prototypes.
 140  */
 141 
 142 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
 143 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
 144 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
 145 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
 146 static void bd_init_errstats(bd_t *, bd_drive_t *);
 147 
 148 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
 149 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
 150 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
 151 
 152 static int bd_open(dev_t *, int, int, cred_t *);
 153 static int bd_close(dev_t, int, int, cred_t *);
 154 static int bd_strategy(struct buf *);
 155 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 156 static int bd_dump(dev_t, caddr_t, daddr_t, int);
 157 static int bd_read(dev_t, struct uio *, cred_t *);
 158 static int bd_write(dev_t, struct uio *, cred_t *);
 159 static int bd_aread(dev_t, struct aio_req *, cred_t *);
 160 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
 161 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
 162     caddr_t, int *);
 163 
 164 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
 165     void *);
 166 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
 167 static int bd_xfer_ctor(void *, void *, int);
 168 static void bd_xfer_dtor(void *, void *);
 169 static void bd_sched(bd_t *);
 170 static void bd_submit(bd_t *, bd_xfer_impl_t *);
 171 static void bd_runq_exit(bd_xfer_impl_t *, int);
 172 static void bd_update_state(bd_t *);
 173 static int bd_check_state(bd_t *, enum dkio_state *);
 174 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
 175 static int bd_check_uio(dev_t, struct uio *);
 176 
 177 struct cmlb_tg_ops bd_tg_ops = {
 178         TG_DK_OPS_VERSION_1,
 179         bd_tg_rdwr,
 180         bd_tg_getinfo,
 181 };
 182 
 183 static struct cb_ops bd_cb_ops = {
 184         bd_open,                /* open */
 185         bd_close,               /* close */
 186         bd_strategy,            /* strategy */
 187         nodev,                  /* print */
 188         bd_dump,                /* dump */
 189         bd_read,                /* read */
 190         bd_write,               /* write */
 191         bd_ioctl,               /* ioctl */
 192         nodev,                  /* devmap */
 193         nodev,                  /* mmap */
 194         nodev,                  /* segmap */
 195         nochpoll,               /* poll */
 196         bd_prop_op,             /* cb_prop_op */
 197         0,                      /* streamtab  */
 198         D_64BIT | D_MP,         /* Driver comaptibility flag */
 199         CB_REV,                 /* cb_rev */
 200         bd_aread,               /* async read */
 201         bd_awrite               /* async write */
 202 };
 203 
 204 struct dev_ops bd_dev_ops = {
 205         DEVO_REV,               /* devo_rev, */
 206         0,                      /* refcnt  */
 207         bd_getinfo,             /* getinfo */
 208         nulldev,                /* identify */
 209         nulldev,                /* probe */
 210         bd_attach,              /* attach */
 211         bd_detach,              /* detach */
 212         nodev,                  /* reset */
 213         &bd_cb_ops,                 /* driver operations */
 214         NULL,                   /* bus operations */
 215         NULL,                   /* power */
 216         ddi_quiesce_not_needed, /* quiesce */
 217 };
 218 
 219 static struct modldrv modldrv = {
 220         &mod_driverops,
 221         "Generic Block Device",
 222         &bd_dev_ops,
 223 };
 224 
 225 static struct modlinkage modlinkage = {
 226         MODREV_1, { &modldrv, NULL }
 227 };
 228 
 229 static void *bd_state;
 230 static krwlock_t bd_lock;
 231 
 232 int
 233 _init(void)
 234 {
 235         int     rv;
 236 
 237         rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
 238         if (rv != DDI_SUCCESS) {
 239                 return (rv);
 240         }
 241         rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
 242         rv = mod_install(&modlinkage);
 243         if (rv != DDI_SUCCESS) {
 244                 rw_destroy(&bd_lock);
 245                 ddi_soft_state_fini(&bd_state);
 246         }
 247         return (rv);
 248 }
 249 
 250 int
 251 _fini(void)
 252 {
 253         int     rv;
 254 
 255         rv = mod_remove(&modlinkage);
 256         if (rv == DDI_SUCCESS) {
 257                 rw_destroy(&bd_lock);
 258                 ddi_soft_state_fini(&bd_state);
 259         }
 260         return (rv);
 261 }
 262 
 263 int
 264 _info(struct modinfo *modinfop)
 265 {
 266         return (mod_info(&modlinkage, modinfop));
 267 }
 268 
 269 static int
 270 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
 271 {
 272         bd_t    *bd;
 273         minor_t inst;
 274 
 275         _NOTE(ARGUNUSED(dip));
 276 
 277         inst = BDINST((dev_t)arg);
 278 
 279         switch (cmd) {
 280         case DDI_INFO_DEVT2DEVINFO:
 281                 bd = ddi_get_soft_state(bd_state, inst);
 282                 if (bd == NULL) {
 283                         return (DDI_FAILURE);
 284                 }
 285                 *resultp = (void *)bd->d_dip;
 286                 break;
 287 
 288         case DDI_INFO_DEVT2INSTANCE:
 289                 *resultp = (void *)(intptr_t)inst;
 290                 break;
 291 
 292         default:
 293                 return (DDI_FAILURE);
 294         }
 295         return (DDI_SUCCESS);
 296 }
 297 
 298 static void
 299 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
 300 {
 301         int     ilen;
 302         char    *data_string;
 303 
 304         ilen = scsi_ascii_inquiry_len(data, len);
 305         ASSERT3U(ilen, <=, len);
 306         if (ilen <= 0)
 307                 return;
 308         /* ensure null termination */
 309         data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
 310         bcopy(data, data_string, ilen);
 311         (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
 312         kmem_free(data_string, ilen + 1);
 313 }
 314 
 315 static void
 316 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
 317 {
 318         if (drive->d_vendor_len > 0)
 319                 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
 320                     drive->d_vendor, drive->d_vendor_len);
 321 
 322         if (drive->d_product_len > 0)
 323                 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
 324                     drive->d_product, drive->d_product_len);
 325 
 326         if (drive->d_serial_len > 0)
 327                 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
 328                     drive->d_serial, drive->d_serial_len);
 329 
 330         if (drive->d_revision_len > 0)
 331                 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
 332                     drive->d_revision, drive->d_revision_len);
 333 }
 334 
 335 static void
 336 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
 337 {
 338         char    ks_module[KSTAT_STRLEN];
 339         char    ks_name[KSTAT_STRLEN];
 340         int     ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
 341 
 342         if (bd->d_errstats != NULL)
 343                 return;
 344 
 345         (void) snprintf(ks_module, sizeof (ks_module), "%serr",
 346             ddi_driver_name(bd->d_dip));
 347         (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
 348             ddi_driver_name(bd->d_dip), inst);
 349 
 350         bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
 351             KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
 352 
 353         if (bd->d_errstats == NULL) {
 354                 /*
 355                  * Even if we cannot create the kstat, we create a
 356                  * scratch kstat.  The reason for this is to ensure
 357                  * that we can update the kstat all of the time,
 358                  * without adding an extra branch instruction.
 359                  */
 360                 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
 361                     KM_SLEEP);
 362                 bd->d_errmutex = kmem_zalloc(sizeof (kmutex_t), KM_SLEEP);
 363                 mutex_init(bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
 364         } else {
 365                 if (bd->d_errstats->ks_lock == NULL) {
 366                         bd->d_errstats->ks_lock = kmem_zalloc(sizeof (kmutex_t),
 367                             KM_SLEEP);
 368                         mutex_init(bd->d_errstats->ks_lock, NULL, MUTEX_DRIVER,
 369                             NULL);
 370                 }
 371 
 372                 bd->d_errmutex = bd->d_errstats->ks_lock;
 373                 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
 374         }
 375 
 376         kstat_named_init(&bd->d_kerr->bd_softerrs,    "Soft Errors",
 377             KSTAT_DATA_UINT32);
 378         kstat_named_init(&bd->d_kerr->bd_harderrs,    "Hard Errors",
 379             KSTAT_DATA_UINT32);
 380         kstat_named_init(&bd->d_kerr->bd_transerrs,   "Transport Errors",
 381             KSTAT_DATA_UINT32);
 382 
 383         if (drive->d_model_len > 0) {
 384                 kstat_named_init(&bd->d_kerr->bd_model,       "Model",
 385                     KSTAT_DATA_STRING);
 386         } else {
 387                 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor",
 388                     KSTAT_DATA_STRING);
 389                 kstat_named_init(&bd->d_kerr->bd_pid, "Product",
 390                     KSTAT_DATA_STRING);
 391         }
 392 
 393         kstat_named_init(&bd->d_kerr->bd_revision,    "Revision",
 394             KSTAT_DATA_STRING);
 395         kstat_named_init(&bd->d_kerr->bd_serial,      "Serial No",
 396             KSTAT_DATA_STRING);
 397         kstat_named_init(&bd->d_kerr->bd_capacity,    "Size",
 398             KSTAT_DATA_ULONGLONG);
 399         kstat_named_init(&bd->d_kerr->bd_rq_media_err,        "Media Error",
 400             KSTAT_DATA_UINT32);
 401         kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err,        "Device Not Ready",
 402             KSTAT_DATA_UINT32);
 403         kstat_named_init(&bd->d_kerr->bd_rq_nodev_err,        "No Device",
 404             KSTAT_DATA_UINT32);
 405         kstat_named_init(&bd->d_kerr->bd_rq_recov_err,        "Recoverable",
 406             KSTAT_DATA_UINT32);
 407         kstat_named_init(&bd->d_kerr->bd_rq_illrq_err,        "Illegal Request",
 408             KSTAT_DATA_UINT32);
 409         kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
 410             "Predictive Failure Analysis", KSTAT_DATA_UINT32);
 411 
 412         bd->d_errstats->ks_private = bd;
 413 
 414         kstat_install(bd->d_errstats);
 415 }
 416 
 417 static void
 418 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
 419 {
 420         char    *tmp;
 421 
 422         if (KSTAT_NAMED_STR_PTR(k) == NULL) {
 423                 if (len > 0) {
 424                         tmp = kmem_alloc(len + 1, KM_SLEEP);
 425                         (void) strlcpy(tmp, str, len + 1);
 426                 } else {
 427                         tmp = alt;
 428                 }
 429 
 430                 kstat_named_setstr(k, tmp);
 431         }
 432 }
 433 
 434 static void
 435 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
 436 {
 437         struct bd_errstats      *est = bd->d_kerr;
 438 
 439         mutex_enter(bd->d_errmutex);
 440 
 441         if (drive->d_model_len > 0 &&
 442             KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
 443                 bd_errstats_setstr(&est->bd_model, drive->d_model,
 444                     drive->d_model_len, NULL);
 445         } else {
 446                 bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
 447                     drive->d_vendor_len, "Unknown ");
 448                 bd_errstats_setstr(&est->bd_pid, drive->d_product,
 449                     drive->d_product_len, "Unknown         ");
 450         }
 451 
 452         bd_errstats_setstr(&est->bd_revision, drive->d_revision,
 453             drive->d_revision_len, "0001");
 454         bd_errstats_setstr(&est->bd_serial, drive->d_serial,
 455             drive->d_serial_len, "0               ");
 456 
 457         mutex_exit(bd->d_errmutex);
 458 }
 459 
 460 static int
 461 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 462 {
 463         int             inst;
 464         bd_handle_t     hdl;
 465         bd_t            *bd;
 466         bd_drive_t      drive;
 467         int             rv;
 468         char            name[16];
 469         char            kcache[32];
 470 
 471         switch (cmd) {
 472         case DDI_ATTACH:
 473                 break;
 474         case DDI_RESUME:
 475                 /* We don't do anything native for suspend/resume */
 476                 return (DDI_SUCCESS);
 477         default:
 478                 return (DDI_FAILURE);
 479         }
 480 
 481         inst = ddi_get_instance(dip);
 482         hdl = ddi_get_parent_data(dip);
 483 
 484         (void) snprintf(name, sizeof (name), "%s%d",
 485             ddi_driver_name(dip), ddi_get_instance(dip));
 486         (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
 487 
 488         if (hdl == NULL) {
 489                 cmn_err(CE_WARN, "%s: missing parent data!", name);
 490                 return (DDI_FAILURE);
 491         }
 492 
 493         if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
 494                 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
 495                 return (DDI_FAILURE);
 496         }
 497         bd = ddi_get_soft_state(bd_state, inst);
 498 
 499         if (hdl->h_dma) {
 500                 bd->d_dma = *(hdl->h_dma);
 501                 bd->d_dma.dma_attr_granular =
 502                     max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
 503                 bd->d_use_dma = B_TRUE;
 504 
 505                 if (bd->d_maxxfer &&
 506                     (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
 507                         cmn_err(CE_WARN,
 508                             "%s: inconsistent maximum transfer size!",
 509                             name);
 510                         /* We force it */
 511                         bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
 512                 } else {
 513                         bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
 514                 }
 515         } else {
 516                 bd->d_use_dma = B_FALSE;
 517                 if (bd->d_maxxfer == 0) {
 518                         bd->d_maxxfer = 1024 * 1024;
 519                 }
 520         }
 521         bd->d_ops = hdl->h_ops;
 522         bd->d_private = hdl->h_private;
 523         bd->d_blkshift = 9;  /* 512 bytes, to start */
 524 
 525         if (bd->d_maxxfer % DEV_BSIZE) {
 526                 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
 527                 bd->d_maxxfer &= ~(DEV_BSIZE - 1);
 528         }
 529         if (bd->d_maxxfer < DEV_BSIZE) {
 530                 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
 531                 ddi_soft_state_free(bd_state, inst);
 532                 return (DDI_FAILURE);
 533         }
 534 
 535         bd->d_dip = dip;
 536         bd->d_handle = hdl;
 537         hdl->h_bd = bd;
 538         ddi_set_driver_private(dip, bd);
 539 
 540         mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL);
 541         mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
 542         mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
 543         cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
 544 
 545         list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t),
 546             offsetof(struct bd_xfer_impl, i_linkage));
 547         list_create(&bd->d_runq, sizeof (bd_xfer_impl_t),
 548             offsetof(struct bd_xfer_impl, i_linkage));
 549 
 550         bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
 551             bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
 552 
 553         bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
 554             KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
 555         if (bd->d_ksp != NULL) {
 556                 bd->d_ksp->ks_lock = &bd->d_iomutex;
 557                 kstat_install(bd->d_ksp);
 558                 bd->d_kiop = bd->d_ksp->ks_data;
 559         } else {
 560                 /*
 561                  * Even if we cannot create the kstat, we create a
 562                  * scratch kstat.  The reason for this is to ensure
 563                  * that we can update the kstat all of the time,
 564                  * without adding an extra branch instruction.
 565                  */
 566                 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
 567         }
 568 
 569         cmlb_alloc_handle(&bd->d_cmlbh);
 570 
 571         bd->d_state = DKIO_NONE;
 572 
 573         bzero(&drive, sizeof (drive));
 574         bd->d_ops.o_drive_info(bd->d_private, &drive);
 575         bd->d_qsize = drive.d_qsize;
 576         bd->d_removable = drive.d_removable;
 577         bd->d_hotpluggable = drive.d_hotpluggable;
 578 
 579         if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
 580                 bd->d_maxxfer = drive.d_maxxfer;
 581 
 582         bd_create_inquiry_props(dip, &drive);
 583 
 584         bd_create_errstats(bd, inst, &drive);
 585         bd_init_errstats(bd, &drive);
 586         bd_update_state(bd);
 587 
 588         rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
 589             bd->d_removable, bd->d_hotpluggable,
 590             /*LINTED: E_BAD_PTR_CAST_ALIGN*/
 591             *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV :
 592             drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
 593             CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
 594         if (rv != 0) {
 595                 cmlb_free_handle(&bd->d_cmlbh);
 596                 kmem_cache_destroy(bd->d_cache);
 597                 mutex_destroy(&bd->d_iomutex);
 598                 mutex_destroy(&bd->d_ocmutex);
 599                 mutex_destroy(&bd->d_statemutex);
 600                 cv_destroy(&bd->d_statecv);
 601                 list_destroy(&bd->d_waitq);
 602                 list_destroy(&bd->d_runq);
 603                 if (bd->d_ksp != NULL) {
 604                         kstat_delete(bd->d_ksp);
 605                         bd->d_ksp = NULL;
 606                 } else {
 607                         kmem_free(bd->d_kiop, sizeof (kstat_io_t));
 608                 }
 609                 ddi_soft_state_free(bd_state, inst);
 610                 return (DDI_FAILURE);
 611         }
 612 
 613         if (bd->d_ops.o_devid_init != NULL) {
 614                 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
 615                 if (rv == DDI_SUCCESS) {
 616                         if (ddi_devid_register(dip, bd->d_devid) !=
 617                             DDI_SUCCESS) {
 618                                 cmn_err(CE_WARN,
 619                                     "%s: unable to register devid", name);
 620                         }
 621                 }
 622         }
 623 
 624         /*
 625          * Add a zero-length attribute to tell the world we support
 626          * kernel ioctls (for layered drivers).  Also set up properties
 627          * used by HAL to identify removable media.
 628          */
 629         (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 630             DDI_KERNEL_IOCTL, NULL, 0);
 631         if (bd->d_removable) {
 632                 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 633                     "removable-media", NULL, 0);
 634         }
 635         if (bd->d_hotpluggable) {
 636                 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 637                     "hotpluggable", NULL, 0);
 638         }
 639 
 640         ddi_report_dev(dip);
 641 
 642         return (DDI_SUCCESS);
 643 }
 644 
 645 static int
 646 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 647 {
 648         bd_t    *bd;
 649 
 650         bd = ddi_get_driver_private(dip);
 651 
 652         switch (cmd) {
 653         case DDI_DETACH:
 654                 break;
 655         case DDI_SUSPEND:
 656                 /* We don't suspend, but our parent does */
 657                 return (DDI_SUCCESS);
 658         default:
 659                 return (DDI_FAILURE);
 660         }
 661         if (bd->d_ksp != NULL) {
 662                 kstat_delete(bd->d_ksp);
 663                 bd->d_ksp = NULL;
 664         } else {
 665                 kmem_free(bd->d_kiop, sizeof (kstat_io_t));
 666         }
 667 
 668         if (bd->d_errstats != NULL) {
 669                 kstat_delete(bd->d_errstats);
 670                 bd->d_errstats = NULL;
 671         } else {
 672                 kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
 673                 mutex_destroy(bd->d_errmutex);
 674         }
 675 
 676         cmlb_detach(bd->d_cmlbh, 0);
 677         cmlb_free_handle(&bd->d_cmlbh);
 678         if (bd->d_devid)
 679                 ddi_devid_free(bd->d_devid);
 680         kmem_cache_destroy(bd->d_cache);
 681         mutex_destroy(&bd->d_iomutex);
 682         mutex_destroy(&bd->d_ocmutex);
 683         mutex_destroy(&bd->d_statemutex);
 684         cv_destroy(&bd->d_statecv);
 685         list_destroy(&bd->d_waitq);
 686         list_destroy(&bd->d_runq);
 687         ddi_soft_state_free(bd_state, ddi_get_instance(dip));
 688         return (DDI_SUCCESS);
 689 }
 690 
 691 static int
 692 bd_xfer_ctor(void *buf, void *arg, int kmflag)
 693 {
 694         bd_xfer_impl_t  *xi;
 695         bd_t            *bd = arg;
 696         int             (*dcb)(caddr_t);
 697 
 698         if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
 699                 dcb = DDI_DMA_SLEEP;
 700         } else {
 701                 dcb = DDI_DMA_DONTWAIT;
 702         }
 703 
 704         xi = buf;
 705         bzero(xi, sizeof (*xi));
 706         xi->i_bd = bd;
 707 
 708         if (bd->d_use_dma) {
 709                 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
 710                     &xi->i_dmah) != DDI_SUCCESS) {
 711                         return (-1);
 712                 }
 713         }
 714 
 715         return (0);
 716 }
 717 
 718 static void
 719 bd_xfer_dtor(void *buf, void *arg)
 720 {
 721         bd_xfer_impl_t  *xi = buf;
 722 
 723         _NOTE(ARGUNUSED(arg));
 724 
 725         if (xi->i_dmah)
 726                 ddi_dma_free_handle(&xi->i_dmah);
 727         xi->i_dmah = NULL;
 728 }
 729 
 730 static bd_xfer_impl_t *
 731 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
 732     int kmflag)
 733 {
 734         bd_xfer_impl_t          *xi;
 735         int                     rv = 0;
 736         int                     status;
 737         unsigned                dir;
 738         int                     (*cb)(caddr_t);
 739         size_t                  len;
 740         uint32_t                shift;
 741 
 742         if (kmflag == KM_SLEEP) {
 743                 cb = DDI_DMA_SLEEP;
 744         } else {
 745                 cb = DDI_DMA_DONTWAIT;
 746         }
 747 
 748         xi = kmem_cache_alloc(bd->d_cache, kmflag);
 749         if (xi == NULL) {
 750                 bioerror(bp, ENOMEM);
 751                 return (NULL);
 752         }
 753 
 754         ASSERT(bp);
 755 
 756         xi->i_bp = bp;
 757         xi->i_func = func;
 758         xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
 759 
 760         if (bp->b_bcount == 0) {
 761                 xi->i_len = 0;
 762                 xi->i_nblks = 0;
 763                 xi->i_kaddr = NULL;
 764                 xi->i_resid = 0;
 765                 xi->i_num_win = 0;
 766                 goto done;
 767         }
 768 
 769         if (bp->b_flags & B_READ) {
 770                 dir = DDI_DMA_READ;
 771                 xi->i_func = bd->d_ops.o_read;
 772         } else {
 773                 dir = DDI_DMA_WRITE;
 774                 xi->i_func = bd->d_ops.o_write;
 775         }
 776 
 777         shift = bd->d_blkshift;
 778         xi->i_blkshift = shift;
 779 
 780         if (!bd->d_use_dma) {
 781                 bp_mapin(bp);
 782                 rv = 0;
 783                 xi->i_offset = 0;
 784                 xi->i_num_win =
 785                     (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
 786                 xi->i_cur_win = 0;
 787                 xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
 788                 xi->i_nblks = xi->i_len >> shift;
 789                 xi->i_kaddr = bp->b_un.b_addr;
 790                 xi->i_resid = bp->b_bcount;
 791         } else {
 792 
 793                 /*
 794                  * We have to use consistent DMA if the address is misaligned.
 795                  */
 796                 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
 797                     ((uintptr_t)bp->b_un.b_addr & 0x7)) {
 798                         dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
 799                 } else {
 800                         dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
 801                 }
 802 
 803                 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
 804                     NULL, &xi->i_dmac, &xi->i_ndmac);
 805                 switch (status) {
 806                 case DDI_DMA_MAPPED:
 807                         xi->i_num_win = 1;
 808                         xi->i_cur_win = 0;
 809                         xi->i_offset = 0;
 810                         xi->i_len = bp->b_bcount;
 811                         xi->i_nblks = xi->i_len >> shift;
 812                         xi->i_resid = bp->b_bcount;
 813                         rv = 0;
 814                         break;
 815                 case DDI_DMA_PARTIAL_MAP:
 816                         xi->i_cur_win = 0;
 817 
 818                         if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
 819                             DDI_SUCCESS) ||
 820                             (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
 821                             &len, &xi->i_dmac, &xi->i_ndmac) !=
 822                             DDI_SUCCESS) ||
 823                             (P2PHASE(len, (1U << shift)) != 0)) {
 824                                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 825                                 rv = EFAULT;
 826                                 goto done;
 827                         }
 828                         xi->i_len = len;
 829                         xi->i_nblks = xi->i_len >> shift;
 830                         xi->i_resid = bp->b_bcount;
 831                         rv = 0;
 832                         break;
 833                 case DDI_DMA_NORESOURCES:
 834                         rv = EAGAIN;
 835                         goto done;
 836                 case DDI_DMA_TOOBIG:
 837                         rv = EINVAL;
 838                         goto done;
 839                 case DDI_DMA_NOMAPPING:
 840                 case DDI_DMA_INUSE:
 841                 default:
 842                         rv = EFAULT;
 843                         goto done;
 844                 }
 845         }
 846 
 847 done:
 848         if (rv != 0) {
 849                 kmem_cache_free(bd->d_cache, xi);
 850                 bioerror(bp, rv);
 851                 return (NULL);
 852         }
 853 
 854         return (xi);
 855 }
 856 
 857 static void
 858 bd_xfer_free(bd_xfer_impl_t *xi)
 859 {
 860         if (xi->i_dmah) {
 861                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 862         }
 863         kmem_cache_free(xi->i_bd->d_cache, xi);
 864 }
 865 
 866 static int
 867 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
 868 {
 869         dev_t           dev = *devp;
 870         bd_t            *bd;
 871         minor_t         part;
 872         minor_t         inst;
 873         uint64_t        mask;
 874         boolean_t       ndelay;
 875         int             rv;
 876         diskaddr_t      nblks;
 877         diskaddr_t      lba;
 878 
 879         _NOTE(ARGUNUSED(credp));
 880 
 881         part = BDPART(dev);
 882         inst = BDINST(dev);
 883 
 884         if (otyp >= OTYPCNT)
 885                 return (EINVAL);
 886 
 887         ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
 888 
 889         /*
 890          * Block any DR events from changing the set of registered
 891          * devices while we function.
 892          */
 893         rw_enter(&bd_lock, RW_READER);
 894         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
 895                 rw_exit(&bd_lock);
 896                 return (ENXIO);
 897         }
 898 
 899         mutex_enter(&bd->d_ocmutex);
 900 
 901         ASSERT(part < 64);
 902         mask = (1U << part);
 903 
 904         bd_update_state(bd);
 905 
 906         if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
 907 
 908                 /* non-blocking opens are allowed to succeed */
 909                 if (!ndelay) {
 910                         rv = ENXIO;
 911                         goto done;
 912                 }
 913         } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
 914             NULL, NULL, 0) == 0) {
 915 
 916                 /*
 917                  * We read the partinfo, verify valid ranges.  If the
 918                  * partition is invalid, and we aren't blocking or
 919                  * doing a raw access, then fail. (Non-blocking and
 920                  * raw accesses can still succeed to allow a disk with
 921                  * bad partition data to opened by format and fdisk.)
 922                  */
 923                 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
 924                         rv = ENXIO;
 925                         goto done;
 926                 }
 927         } else if (!ndelay) {
 928                 /*
 929                  * cmlb_partinfo failed -- invalid partition or no
 930                  * disk label.
 931                  */
 932                 rv = ENXIO;
 933                 goto done;
 934         }
 935 
 936         if ((flag & FWRITE) && bd->d_rdonly) {
 937                 rv = EROFS;
 938                 goto done;
 939         }
 940 
 941         if ((bd->d_open_excl) & (mask)) {
 942                 rv = EBUSY;
 943                 goto done;
 944         }
 945         if (flag & FEXCL) {
 946                 if (bd->d_open_lyr[part]) {
 947                         rv = EBUSY;
 948                         goto done;
 949                 }
 950                 for (int i = 0; i < OTYP_LYR; i++) {
 951                         if (bd->d_open_reg[i] & mask) {
 952                                 rv = EBUSY;
 953                                 goto done;
 954                         }
 955                 }
 956         }
 957 
 958         if (otyp == OTYP_LYR) {
 959                 bd->d_open_lyr[part]++;
 960         } else {
 961                 bd->d_open_reg[otyp] |= mask;
 962         }
 963         if (flag & FEXCL) {
 964                 bd->d_open_excl |= mask;
 965         }
 966 
 967         rv = 0;
 968 done:
 969         mutex_exit(&bd->d_ocmutex);
 970         rw_exit(&bd_lock);
 971 
 972         return (rv);
 973 }
 974 
 975 static int
 976 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
 977 {
 978         bd_t            *bd;
 979         minor_t         inst;
 980         minor_t         part;
 981         uint64_t        mask;
 982         boolean_t       last = B_TRUE;
 983 
 984         _NOTE(ARGUNUSED(flag));
 985         _NOTE(ARGUNUSED(credp));
 986 
 987         part = BDPART(dev);
 988         inst = BDINST(dev);
 989 
 990         ASSERT(part < 64);
 991         mask = (1U << part);
 992 
 993         rw_enter(&bd_lock, RW_READER);
 994 
 995         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
 996                 rw_exit(&bd_lock);
 997                 return (ENXIO);
 998         }
 999 
1000         mutex_enter(&bd->d_ocmutex);
1001         if (bd->d_open_excl & mask) {
1002                 bd->d_open_excl &= ~mask;
1003         }
1004         if (otyp == OTYP_LYR) {
1005                 bd->d_open_lyr[part]--;
1006         } else {
1007                 bd->d_open_reg[otyp] &= ~mask;
1008         }
1009         for (int i = 0; i < 64; i++) {
1010                 if (bd->d_open_lyr[part]) {
1011                         last = B_FALSE;
1012                 }
1013         }
1014         for (int i = 0; last && (i < OTYP_LYR); i++) {
1015                 if (bd->d_open_reg[i]) {
1016                         last = B_FALSE;
1017                 }
1018         }
1019         mutex_exit(&bd->d_ocmutex);
1020 
1021         if (last) {
1022                 cmlb_invalidate(bd->d_cmlbh, 0);
1023         }
1024         rw_exit(&bd_lock);
1025 
1026         return (0);
1027 }
1028 
1029 static int
1030 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1031 {
1032         minor_t         inst;
1033         minor_t         part;
1034         diskaddr_t      pstart;
1035         diskaddr_t      psize;
1036         bd_t            *bd;
1037         bd_xfer_impl_t  *xi;
1038         buf_t           *bp;
1039         int             rv;
1040         uint32_t        shift;
1041         daddr_t         d_blkno;
1042         int     d_nblk;
1043 
1044         rw_enter(&bd_lock, RW_READER);
1045 
1046         part = BDPART(dev);
1047         inst = BDINST(dev);
1048 
1049         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1050                 rw_exit(&bd_lock);
1051                 return (ENXIO);
1052         }
1053         shift = bd->d_blkshift;
1054         d_blkno = blkno >> (shift - DEV_BSHIFT);
1055         d_nblk = nblk >> (shift - DEV_BSHIFT);
1056         /*
1057          * do cmlb, but do it synchronously unless we already have the
1058          * partition (which we probably should.)
1059          */
1060         if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1061             (void *)1)) {
1062                 rw_exit(&bd_lock);
1063                 return (ENXIO);
1064         }
1065 
1066         if ((d_blkno + d_nblk) > psize) {
1067                 rw_exit(&bd_lock);
1068                 return (EINVAL);
1069         }
1070         bp = getrbuf(KM_NOSLEEP);
1071         if (bp == NULL) {
1072                 rw_exit(&bd_lock);
1073                 return (ENOMEM);
1074         }
1075 
1076         bp->b_bcount = nblk << DEV_BSHIFT;
1077         bp->b_resid = bp->b_bcount;
1078         bp->b_lblkno = blkno;
1079         bp->b_un.b_addr = caddr;
1080 
1081         xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1082         if (xi == NULL) {
1083                 rw_exit(&bd_lock);
1084                 freerbuf(bp);
1085                 return (ENOMEM);
1086         }
1087         xi->i_blkno = d_blkno + pstart;
1088         xi->i_flags = BD_XFER_POLL;
1089         bd_submit(bd, xi);
1090         rw_exit(&bd_lock);
1091 
1092         /*
1093          * Generally, we should have run this entirely synchronously
1094          * at this point and the biowait call should be a no-op.  If
1095          * it didn't happen this way, it's a bug in the underlying
1096          * driver not honoring BD_XFER_POLL.
1097          */
1098         (void) biowait(bp);
1099         rv = geterror(bp);
1100         freerbuf(bp);
1101         return (rv);
1102 }
1103 
1104 void
1105 bd_minphys(struct buf *bp)
1106 {
1107         minor_t inst;
1108         bd_t    *bd;
1109         inst = BDINST(bp->b_edev);
1110 
1111         bd = ddi_get_soft_state(bd_state, inst);
1112 
1113         /*
1114          * In a non-debug kernel, bd_strategy will catch !bd as
1115          * well, and will fail nicely.
1116          */
1117         ASSERT(bd);
1118 
1119         if (bp->b_bcount > bd->d_maxxfer)
1120                 bp->b_bcount = bd->d_maxxfer;
1121 }
1122 
1123 static int
1124 bd_check_uio(dev_t dev, struct uio *uio)
1125 {
1126         bd_t            *bd;
1127         uint32_t        shift;
1128 
1129         if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) {
1130                 return (ENXIO);
1131         }
1132 
1133         shift = bd->d_blkshift;
1134         if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) ||
1135             (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) {
1136                 return (EINVAL);
1137         }
1138 
1139         return (0);
1140 }
1141 
1142 static int
1143 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1144 {
1145         _NOTE(ARGUNUSED(credp));
1146         int     ret = bd_check_uio(dev, uio);
1147         if (ret != 0) {
1148                 return (ret);
1149         }
1150         return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1151 }
1152 
1153 static int
1154 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1155 {
1156         _NOTE(ARGUNUSED(credp));
1157         int     ret = bd_check_uio(dev, uio);
1158         if (ret != 0) {
1159                 return (ret);
1160         }
1161         return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1162 }
1163 
1164 static int
1165 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1166 {
1167         _NOTE(ARGUNUSED(credp));
1168         int     ret = bd_check_uio(dev, aio->aio_uio);
1169         if (ret != 0) {
1170                 return (ret);
1171         }
1172         return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1173 }
1174 
1175 static int
1176 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1177 {
1178         _NOTE(ARGUNUSED(credp));
1179         int     ret = bd_check_uio(dev, aio->aio_uio);
1180         if (ret != 0) {
1181                 return (ret);
1182         }
1183         return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1184 }
1185 
1186 static int
1187 bd_strategy(struct buf *bp)
1188 {
1189         minor_t         inst;
1190         minor_t         part;
1191         bd_t            *bd;
1192         diskaddr_t      p_lba;
1193         diskaddr_t      p_nblks;
1194         diskaddr_t      b_nblks;
1195         bd_xfer_impl_t  *xi;
1196         uint32_t        shift;
1197         int             (*func)(void *, bd_xfer_t *);
1198         diskaddr_t      lblkno;
1199 
1200         part = BDPART(bp->b_edev);
1201         inst = BDINST(bp->b_edev);
1202 
1203         ASSERT(bp);
1204 
1205         bp->b_resid = bp->b_bcount;
1206 
1207         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1208                 bioerror(bp, ENXIO);
1209                 biodone(bp);
1210                 return (0);
1211         }
1212 
1213         if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1214             NULL, NULL, 0)) {
1215                 bioerror(bp, ENXIO);
1216                 biodone(bp);
1217                 return (0);
1218         }
1219 
1220         shift = bd->d_blkshift;
1221         lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
1222         if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) ||
1223             (P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1224             (lblkno > p_nblks)) {
1225                 bioerror(bp, EINVAL);
1226                 biodone(bp);
1227                 return (0);
1228         }
1229         b_nblks = bp->b_bcount >> shift;
1230         if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1231                 biodone(bp);
1232                 return (0);
1233         }
1234 
1235         if ((b_nblks + lblkno) > p_nblks) {
1236                 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1237                 bp->b_bcount -= bp->b_resid;
1238         } else {
1239                 bp->b_resid = 0;
1240         }
1241         func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1242 
1243         xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1244         if (xi == NULL) {
1245                 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1246         }
1247         if (xi == NULL) {
1248                 /* bd_request_alloc will have done bioerror */
1249                 biodone(bp);
1250                 return (0);
1251         }
1252         xi->i_blkno = lblkno + p_lba;
1253 
1254         bd_submit(bd, xi);
1255 
1256         return (0);
1257 }
1258 
1259 static int
1260 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1261 {
1262         minor_t         inst;
1263         uint16_t        part;
1264         bd_t            *bd;
1265         void            *ptr = (void *)arg;
1266         int             rv;
1267 
1268         part = BDPART(dev);
1269         inst = BDINST(dev);
1270 
1271         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1272                 return (ENXIO);
1273         }
1274 
1275         rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1276         if (rv != ENOTTY)
1277                 return (rv);
1278 
1279         if (rvalp != NULL) {
1280                 /* the return value of the ioctl is 0 by default */
1281                 *rvalp = 0;
1282         }
1283 
1284         switch (cmd) {
1285         case DKIOCGMEDIAINFO: {
1286                 struct dk_minfo minfo;
1287 
1288                 /* make sure our state information is current */
1289                 bd_update_state(bd);
1290                 bzero(&minfo, sizeof (minfo));
1291                 minfo.dki_media_type = DK_FIXED_DISK;
1292                 minfo.dki_lbsize = (1U << bd->d_blkshift);
1293                 minfo.dki_capacity = bd->d_numblks;
1294                 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1295                         return (EFAULT);
1296                 }
1297                 return (0);
1298         }
1299         case DKIOCGMEDIAINFOEXT: {
1300                 struct dk_minfo_ext miext;
1301 
1302                 /* make sure our state information is current */
1303                 bd_update_state(bd);
1304                 bzero(&miext, sizeof (miext));
1305                 miext.dki_media_type = DK_FIXED_DISK;
1306                 miext.dki_lbsize = (1U << bd->d_blkshift);
1307                 miext.dki_pbsize = (1U << bd->d_pblkshift);
1308                 miext.dki_capacity = bd->d_numblks;
1309                 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) {
1310                         return (EFAULT);
1311                 }
1312                 return (0);
1313         }
1314         case DKIOCINFO: {
1315                 struct dk_cinfo cinfo;
1316                 bzero(&cinfo, sizeof (cinfo));
1317                 cinfo.dki_ctype = DKC_BLKDEV;
1318                 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1319                 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1320                     "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1321                 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1322                     "%s", ddi_driver_name(bd->d_dip));
1323                 cinfo.dki_unit = inst;
1324                 cinfo.dki_flags = DKI_FMTVOL;
1325                 cinfo.dki_partition = part;
1326                 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1327                 cinfo.dki_addr = 0;
1328                 cinfo.dki_slave = 0;
1329                 cinfo.dki_space = 0;
1330                 cinfo.dki_prio = 0;
1331                 cinfo.dki_vec = 0;
1332                 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1333                         return (EFAULT);
1334                 }
1335                 return (0);
1336         }
1337         case DKIOCREMOVABLE: {
1338                 int i;
1339                 i = bd->d_removable ? 1 : 0;
1340                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1341                         return (EFAULT);
1342                 }
1343                 return (0);
1344         }
1345         case DKIOCHOTPLUGGABLE: {
1346                 int i;
1347                 i = bd->d_hotpluggable ? 1 : 0;
1348                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1349                         return (EFAULT);
1350                 }
1351                 return (0);
1352         }
1353         case DKIOCREADONLY: {
1354                 int i;
1355                 i = bd->d_rdonly ? 1 : 0;
1356                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1357                         return (EFAULT);
1358                 }
1359                 return (0);
1360         }
1361         case DKIOCSOLIDSTATE: {
1362                 int i;
1363                 i = bd->d_ssd ? 1 : 0;
1364                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1365                         return (EFAULT);
1366                 }
1367                 return (0);
1368         }
1369         case DKIOCSTATE: {
1370                 enum dkio_state state;
1371                 if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1372                         return (EFAULT);
1373                 }
1374                 if ((rv = bd_check_state(bd, &state)) != 0) {
1375                         return (rv);
1376                 }
1377                 if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1378                         return (EFAULT);
1379                 }
1380                 return (0);
1381         }
1382         case DKIOCFLUSHWRITECACHE: {
1383                 struct dk_callback *dkc = NULL;
1384 
1385                 if (flag & FKIOCTL)
1386                         dkc = (void *)arg;
1387 
1388                 rv = bd_flush_write_cache(bd, dkc);
1389                 return (rv);
1390         }
1391 
1392         default:
1393                 break;
1394 
1395         }
1396         return (ENOTTY);
1397 }
1398 
1399 static int
1400 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1401     char *name, caddr_t valuep, int *lengthp)
1402 {
1403         bd_t    *bd;
1404 
1405         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1406         if (bd == NULL)
1407                 return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1408                     name, valuep, lengthp));
1409 
1410         return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1411             valuep, lengthp, BDPART(dev), 0));
1412 }
1413 
1414 
1415 static int
1416 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1417     size_t length, void *tg_cookie)
1418 {
1419         bd_t            *bd;
1420         buf_t           *bp;
1421         bd_xfer_impl_t  *xi;
1422         int             rv;
1423         int             (*func)(void *, bd_xfer_t *);
1424         int             kmflag;
1425 
1426         /*
1427          * If we are running in polled mode (such as during dump(9e)
1428          * execution), then we cannot sleep for kernel allocations.
1429          */
1430         kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1431 
1432         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1433 
1434         if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
1435                 /* We can only transfer whole blocks at a time! */
1436                 return (EINVAL);
1437         }
1438 
1439         if ((bp = getrbuf(kmflag)) == NULL) {
1440                 return (ENOMEM);
1441         }
1442 
1443         switch (cmd) {
1444         case TG_READ:
1445                 bp->b_flags = B_READ;
1446                 func = bd->d_ops.o_read;
1447                 break;
1448         case TG_WRITE:
1449                 bp->b_flags = B_WRITE;
1450                 func = bd->d_ops.o_write;
1451                 break;
1452         default:
1453                 freerbuf(bp);
1454                 return (EINVAL);
1455         }
1456 
1457         bp->b_un.b_addr = bufaddr;
1458         bp->b_bcount = length;
1459         xi = bd_xfer_alloc(bd, bp, func, kmflag);
1460         if (xi == NULL) {
1461                 rv = geterror(bp);
1462                 freerbuf(bp);
1463                 return (rv);
1464         }
1465         xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1466         xi->i_blkno = start;
1467         bd_submit(bd, xi);
1468         (void) biowait(bp);
1469         rv = geterror(bp);
1470         freerbuf(bp);
1471 
1472         return (rv);
1473 }
1474 
1475 static int
1476 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1477 {
1478         bd_t            *bd;
1479 
1480         _NOTE(ARGUNUSED(tg_cookie));
1481         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1482 
1483         switch (cmd) {
1484         case TG_GETPHYGEOM:
1485         case TG_GETVIRTGEOM:
1486                 /*
1487                  * We don't have any "geometry" as such, let cmlb
1488                  * fabricate something.
1489                  */
1490                 return (ENOTTY);
1491 
1492         case TG_GETCAPACITY:
1493                 bd_update_state(bd);
1494                 *(diskaddr_t *)arg = bd->d_numblks;
1495                 return (0);
1496 
1497         case TG_GETBLOCKSIZE:
1498                 *(uint32_t *)arg = (1U << bd->d_blkshift);
1499                 return (0);
1500 
1501         case TG_GETATTR:
1502                 /*
1503                  * It turns out that cmlb really doesn't do much for
1504                  * non-writable media, but lets make the information
1505                  * available for it in case it does more in the
1506                  * future.  (The value is currently used for
1507                  * triggering special behavior for CD-ROMs.)
1508                  */
1509                 bd_update_state(bd);
1510                 ((tg_attribute_t *)arg)->media_is_writable =
1511                     bd->d_rdonly ? B_FALSE : B_TRUE;
1512                 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1513                 return (0);
1514 
1515         default:
1516                 return (EINVAL);
1517         }
1518 }
1519 
1520 
1521 static void
1522 bd_sched(bd_t *bd)
1523 {
1524         bd_xfer_impl_t  *xi;
1525         struct buf      *bp;
1526         int             rv;
1527 
1528         mutex_enter(&bd->d_iomutex);
1529 
1530         while ((bd->d_qactive < bd->d_qsize) &&
1531             ((xi = list_remove_head(&bd->d_waitq)) != NULL)) {
1532                 bd->d_qactive++;
1533                 kstat_waitq_to_runq(bd->d_kiop);
1534                 list_insert_tail(&bd->d_runq, xi);
1535 
1536                 /*
1537                  * Submit the job to the driver.  We drop the I/O mutex
1538                  * so that we can deal with the case where the driver
1539                  * completion routine calls back into us synchronously.
1540                  */
1541 
1542                 mutex_exit(&bd->d_iomutex);
1543 
1544                 rv = xi->i_func(bd->d_private, &xi->i_public);
1545                 if (rv != 0) {
1546                         bp = xi->i_bp;
1547                         bioerror(bp, rv);
1548                         biodone(bp);
1549 
1550                         atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1551 
1552                         mutex_enter(&bd->d_iomutex);
1553                         bd->d_qactive--;
1554                         kstat_runq_exit(bd->d_kiop);
1555                         list_remove(&bd->d_runq, xi);
1556                         bd_xfer_free(xi);
1557                 } else {
1558                         mutex_enter(&bd->d_iomutex);
1559                 }
1560         }
1561 
1562         mutex_exit(&bd->d_iomutex);
1563 }
1564 
1565 static void
1566 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1567 {
1568         mutex_enter(&bd->d_iomutex);
1569         list_insert_tail(&bd->d_waitq, xi);
1570         kstat_waitq_enter(bd->d_kiop);
1571         mutex_exit(&bd->d_iomutex);
1572 
1573         bd_sched(bd);
1574 }
1575 
1576 static void
1577 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1578 {
1579         bd_t    *bd = xi->i_bd;
1580         buf_t   *bp = xi->i_bp;
1581 
1582         mutex_enter(&bd->d_iomutex);
1583         bd->d_qactive--;
1584         kstat_runq_exit(bd->d_kiop);
1585         list_remove(&bd->d_runq, xi);
1586         mutex_exit(&bd->d_iomutex);
1587 
1588         if (err == 0) {
1589                 if (bp->b_flags & B_READ) {
1590                         bd->d_kiop->reads++;
1591                         bd->d_kiop->nread += (bp->b_bcount - xi->i_resid);
1592                 } else {
1593                         bd->d_kiop->writes++;
1594                         bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid);
1595                 }
1596         }
1597         bd_sched(bd);
1598 }
1599 
1600 static void
1601 bd_update_state(bd_t *bd)
1602 {
1603         enum    dkio_state      state = DKIO_INSERTED;
1604         boolean_t               docmlb = B_FALSE;
1605         bd_media_t              media;
1606 
1607         bzero(&media, sizeof (media));
1608 
1609         mutex_enter(&bd->d_statemutex);
1610         if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
1611                 bd->d_numblks = 0;
1612                 state = DKIO_EJECTED;
1613                 goto done;
1614         }
1615 
1616         if ((media.m_blksize < 512) ||
1617             (!ISP2(media.m_blksize)) ||
1618             (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
1619                 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)",
1620                     ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip),
1621                     media.m_blksize);
1622                 /*
1623                  * We can't use the media, treat it as not present.
1624                  */
1625                 state = DKIO_EJECTED;
1626                 bd->d_numblks = 0;
1627                 goto done;
1628         }
1629 
1630         if (((1U << bd->d_blkshift) != media.m_blksize) ||
1631             (bd->d_numblks != media.m_nblks)) {
1632                 /* Device size changed */
1633                 docmlb = B_TRUE;
1634         }
1635 
1636         bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
1637         bd->d_pblkshift = bd->d_blkshift;
1638         bd->d_numblks = media.m_nblks;
1639         bd->d_rdonly = media.m_readonly;
1640         bd->d_ssd = media.m_solidstate;
1641 
1642         /*
1643          * Only use the supplied physical block size if it is non-zero,
1644          * greater or equal to the block size, and a power of 2. Ignore it
1645          * if not, it's just informational and we can still use the media.
1646          */
1647         if ((media.m_pblksize != 0) &&
1648             (media.m_pblksize >= media.m_blksize) &&
1649             (ISP2(media.m_pblksize)))
1650                 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
1651 
1652 done:
1653         if (state != bd->d_state) {
1654                 bd->d_state = state;
1655                 cv_broadcast(&bd->d_statecv);
1656                 docmlb = B_TRUE;
1657         }
1658         mutex_exit(&bd->d_statemutex);
1659 
1660         bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
1661 
1662         if (docmlb) {
1663                 if (state == DKIO_INSERTED) {
1664                         (void) cmlb_validate(bd->d_cmlbh, 0, 0);
1665                 } else {
1666                         cmlb_invalidate(bd->d_cmlbh, 0);
1667                 }
1668         }
1669 }
1670 
1671 static int
1672 bd_check_state(bd_t *bd, enum dkio_state *state)
1673 {
1674         clock_t         when;
1675 
1676         for (;;) {
1677 
1678                 bd_update_state(bd);
1679 
1680                 mutex_enter(&bd->d_statemutex);
1681 
1682                 if (bd->d_state != *state) {
1683                         *state = bd->d_state;
1684                         mutex_exit(&bd->d_statemutex);
1685                         break;
1686                 }
1687 
1688                 when = drv_usectohz(1000000);
1689                 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
1690                     when, TR_CLOCK_TICK) == 0) {
1691                         mutex_exit(&bd->d_statemutex);
1692                         return (EINTR);
1693                 }
1694 
1695                 mutex_exit(&bd->d_statemutex);
1696         }
1697 
1698         return (0);
1699 }
1700 
1701 static int
1702 bd_flush_write_cache_done(struct buf *bp)
1703 {
1704         struct dk_callback *dc = (void *)bp->b_private;
1705 
1706         (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
1707         kmem_free(dc, sizeof (*dc));
1708         freerbuf(bp);
1709         return (0);
1710 }
1711 
1712 static int
1713 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
1714 {
1715         buf_t                   *bp;
1716         struct dk_callback      *dc;
1717         bd_xfer_impl_t          *xi;
1718         int                     rv;
1719 
1720         if (bd->d_ops.o_sync_cache == NULL) {
1721                 return (ENOTSUP);
1722         }
1723         if ((bp = getrbuf(KM_SLEEP)) == NULL) {
1724                 return (ENOMEM);
1725         }
1726         bp->b_resid = 0;
1727         bp->b_bcount = 0;
1728 
1729         xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
1730         if (xi == NULL) {
1731                 rv = geterror(bp);
1732                 freerbuf(bp);
1733                 return (rv);
1734         }
1735 
1736         /* Make an asynchronous flush, but only if there is a callback */
1737         if (dkc != NULL && dkc->dkc_callback != NULL) {
1738                 /* Make a private copy of the callback structure */
1739                 dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
1740                 *dc = *dkc;
1741                 bp->b_private = dc;
1742                 bp->b_iodone = bd_flush_write_cache_done;
1743 
1744                 bd_submit(bd, xi);
1745                 return (0);
1746         }
1747 
1748         /* In case there is no callback, perform a synchronous flush */
1749         bd_submit(bd, xi);
1750         (void) biowait(bp);
1751         rv = geterror(bp);
1752         freerbuf(bp);
1753 
1754         return (rv);
1755 }
1756 
1757 /*
1758  * Nexus support.
1759  */
1760 int
1761 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1762     void *arg, void *result)
1763 {
1764         bd_handle_t     hdl;
1765 
1766         switch (ctlop) {
1767         case DDI_CTLOPS_REPORTDEV:
1768                 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
1769                     ddi_node_name(rdip), ddi_get_name_addr(rdip),
1770                     ddi_driver_name(rdip), ddi_get_instance(rdip));
1771                 return (DDI_SUCCESS);
1772 
1773         case DDI_CTLOPS_INITCHILD:
1774                 hdl = ddi_get_parent_data((dev_info_t *)arg);
1775                 if (hdl == NULL) {
1776                         return (DDI_NOT_WELL_FORMED);
1777                 }
1778                 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
1779                 return (DDI_SUCCESS);
1780 
1781         case DDI_CTLOPS_UNINITCHILD:
1782                 ddi_set_name_addr((dev_info_t *)arg, NULL);
1783                 ndi_prop_remove_all((dev_info_t *)arg);
1784                 return (DDI_SUCCESS);
1785 
1786         default:
1787                 return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1788         }
1789 }
1790 
1791 /*
1792  * Functions for device drivers.
1793  */
1794 bd_handle_t
1795 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
1796 {
1797         bd_handle_t     hdl;
1798 
1799         hdl = kmem_zalloc(sizeof (*hdl), kmflag);
1800         if (hdl != NULL) {
1801                 hdl->h_ops = *ops;
1802                 hdl->h_dma = dma;
1803                 hdl->h_private = private;
1804         }
1805 
1806         return (hdl);
1807 }
1808 
1809 void
1810 bd_free_handle(bd_handle_t hdl)
1811 {
1812         kmem_free(hdl, sizeof (*hdl));
1813 }
1814 
1815 int
1816 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
1817 {
1818         dev_info_t      *child;
1819         bd_drive_t      drive = { 0 };
1820 
1821         /* if drivers don't override this, make it assume none */
1822         drive.d_lun = -1;
1823         hdl->h_ops.o_drive_info(hdl->h_private, &drive);
1824 
1825         hdl->h_parent = dip;
1826         hdl->h_name = "blkdev";
1827 
1828         /*LINTED: E_BAD_PTR_CAST_ALIGN*/
1829         if (*(uint64_t *)drive.d_eui64 != 0) {
1830                 if (drive.d_lun >= 0) {
1831                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1832                             "w%02X%02X%02X%02X%02X%02X%02X%02X,%X",
1833                             drive.d_eui64[0], drive.d_eui64[1],
1834                             drive.d_eui64[2], drive.d_eui64[3],
1835                             drive.d_eui64[4], drive.d_eui64[5],
1836                             drive.d_eui64[6], drive.d_eui64[7], drive.d_lun);
1837                 } else {
1838                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1839                             "w%02X%02X%02X%02X%02X%02X%02X%02X",
1840                             drive.d_eui64[0], drive.d_eui64[1],
1841                             drive.d_eui64[2], drive.d_eui64[3],
1842                             drive.d_eui64[4], drive.d_eui64[5],
1843                             drive.d_eui64[6], drive.d_eui64[7]);
1844                 }
1845         } else {
1846                 if (drive.d_lun >= 0) {
1847                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1848                             "%X,%X", drive.d_target, drive.d_lun);
1849                 } else {
1850                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1851                             "%X", drive.d_target);
1852                 }
1853         }
1854 
1855         if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
1856             &child) != NDI_SUCCESS) {
1857                 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
1858                     ddi_driver_name(dip), ddi_get_instance(dip),
1859                     "blkdev", hdl->h_addr);
1860                 return (DDI_FAILURE);
1861         }
1862 
1863         ddi_set_parent_data(child, hdl);
1864         hdl->h_child = child;
1865 
1866         if (ndi_devi_online(child, 0) == NDI_FAILURE) {
1867                 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
1868                     ddi_driver_name(dip), ddi_get_instance(dip),
1869                     hdl->h_name, hdl->h_addr);
1870                 (void) ndi_devi_free(child);
1871                 return (DDI_FAILURE);
1872         }
1873 
1874         return (DDI_SUCCESS);
1875 }
1876 
1877 int
1878 bd_detach_handle(bd_handle_t hdl)
1879 {
1880         int     circ;
1881         int     rv;
1882         char    *devnm;
1883 
1884         if (hdl->h_child == NULL) {
1885                 return (DDI_SUCCESS);
1886         }
1887         ndi_devi_enter(hdl->h_parent, &circ);
1888         if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
1889                 rv = ddi_remove_child(hdl->h_child, 0);
1890         } else {
1891                 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
1892                 (void) ddi_deviname(hdl->h_child, devnm);
1893                 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
1894                 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
1895                     NDI_DEVI_REMOVE | NDI_UNCONFIG);
1896                 kmem_free(devnm, MAXNAMELEN + 1);
1897         }
1898         if (rv == 0) {
1899                 hdl->h_child = NULL;
1900         }
1901 
1902         ndi_devi_exit(hdl->h_parent, circ);
1903         return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
1904 }
1905 
1906 void
1907 bd_xfer_done(bd_xfer_t *xfer, int err)
1908 {
1909         bd_xfer_impl_t  *xi = (void *)xfer;
1910         buf_t           *bp = xi->i_bp;
1911         int             rv = DDI_SUCCESS;
1912         bd_t            *bd = xi->i_bd;
1913         size_t          len;
1914 
1915         if (err != 0) {
1916                 bd_runq_exit(xi, err);
1917                 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
1918 
1919                 bp->b_resid += xi->i_resid;
1920                 bd_xfer_free(xi);
1921                 bioerror(bp, err);
1922                 biodone(bp);
1923                 return;
1924         }
1925 
1926         xi->i_cur_win++;
1927         xi->i_resid -= xi->i_len;
1928 
1929         if (xi->i_resid == 0) {
1930                 /* Job completed succcessfully! */
1931                 bd_runq_exit(xi, 0);
1932 
1933                 bd_xfer_free(xi);
1934                 biodone(bp);
1935                 return;
1936         }
1937 
1938         xi->i_blkno += xi->i_nblks;
1939 
1940         if (bd->d_use_dma) {
1941                 /* More transfer still pending... advance to next DMA window. */
1942                 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
1943                     &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
1944         } else {
1945                 /* Advance memory window. */
1946                 xi->i_kaddr += xi->i_len;
1947                 xi->i_offset += xi->i_len;
1948                 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
1949         }
1950 
1951 
1952         if ((rv != DDI_SUCCESS) ||
1953             (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) {
1954                 bd_runq_exit(xi, EFAULT);
1955 
1956                 bp->b_resid += xi->i_resid;
1957                 bd_xfer_free(xi);
1958                 bioerror(bp, EFAULT);
1959                 biodone(bp);
1960                 return;
1961         }
1962         xi->i_len = len;
1963         xi->i_nblks = len >> xi->i_blkshift;
1964 
1965         /* Submit next window to hardware. */
1966         rv = xi->i_func(bd->d_private, &xi->i_public);
1967         if (rv != 0) {
1968                 bd_runq_exit(xi, rv);
1969 
1970                 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1971 
1972                 bp->b_resid += xi->i_resid;
1973                 bd_xfer_free(xi);
1974                 bioerror(bp, rv);
1975                 biodone(bp);
1976         }
1977 }
1978 
1979 void
1980 bd_error(bd_xfer_t *xfer, int error)
1981 {
1982         bd_xfer_impl_t  *xi = (void *)xfer;
1983         bd_t            *bd = xi->i_bd;
1984 
1985         switch (error) {
1986         case BD_ERR_MEDIA:
1987                 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
1988                 break;
1989         case BD_ERR_NTRDY:
1990                 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
1991                 break;
1992         case BD_ERR_NODEV:
1993                 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
1994                 break;
1995         case BD_ERR_RECOV:
1996                 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
1997                 break;
1998         case BD_ERR_ILLRQ:
1999                 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
2000                 break;
2001         case BD_ERR_PFA:
2002                 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
2003                 break;
2004         default:
2005                 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
2006                 break;
2007         }
2008 }
2009 
2010 void
2011 bd_state_change(bd_handle_t hdl)
2012 {
2013         bd_t            *bd;
2014 
2015         if ((bd = hdl->h_bd) != NULL) {
2016                 bd_update_state(bd);
2017         }
2018 }
2019 
2020 void
2021 bd_mod_init(struct dev_ops *devops)
2022 {
2023         static struct bus_ops bd_bus_ops = {
2024                 BUSO_REV,               /* busops_rev */
2025                 nullbusmap,             /* bus_map */
2026                 NULL,                   /* bus_get_intrspec (OBSOLETE) */
2027                 NULL,                   /* bus_add_intrspec (OBSOLETE) */
2028                 NULL,                   /* bus_remove_intrspec (OBSOLETE) */
2029                 i_ddi_map_fault,        /* bus_map_fault */
2030                 NULL,                   /* bus_dma_map (OBSOLETE) */
2031                 ddi_dma_allochdl,       /* bus_dma_allochdl */
2032                 ddi_dma_freehdl,        /* bus_dma_freehdl */
2033                 ddi_dma_bindhdl,        /* bus_dma_bindhdl */
2034                 ddi_dma_unbindhdl,      /* bus_dma_unbindhdl */
2035                 ddi_dma_flush,          /* bus_dma_flush */
2036                 ddi_dma_win,            /* bus_dma_win */
2037                 ddi_dma_mctl,           /* bus_dma_ctl */
2038                 bd_bus_ctl,             /* bus_ctl */
2039                 ddi_bus_prop_op,        /* bus_prop_op */
2040                 NULL,                   /* bus_get_eventcookie */
2041                 NULL,                   /* bus_add_eventcall */
2042                 NULL,                   /* bus_remove_eventcall */
2043                 NULL,                   /* bus_post_event */
2044                 NULL,                   /* bus_intr_ctl (OBSOLETE) */
2045                 NULL,                   /* bus_config */
2046                 NULL,                   /* bus_unconfig */
2047                 NULL,                   /* bus_fm_init */
2048                 NULL,                   /* bus_fm_fini */
2049                 NULL,                   /* bus_fm_access_enter */
2050                 NULL,                   /* bus_fm_access_exit */
2051                 NULL,                   /* bus_power */
2052                 NULL,                   /* bus_intr_op */
2053         };
2054 
2055         devops->devo_bus_ops = &bd_bus_ops;
2056 
2057         /*
2058          * NB: The device driver is free to supply its own
2059          * character entry device support.
2060          */
2061 }
2062 
2063 void
2064 bd_mod_fini(struct dev_ops *devops)
2065 {
2066         devops->devo_bus_ops = NULL;
2067 }