1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2016 Nexenta Systems, Inc. All rights reserved. 14 * Copyright 2016 Tegile Systems, Inc. All rights reserved. 15 * Copyright (c) 2016 The MathWorks, Inc. All rights reserved. 16 */ 17 18 /* 19 * blkdev driver for NVMe compliant storage devices 20 * 21 * This driver was written to conform to version 1.0e of the NVMe specification. 22 * It may work with newer versions, but that is completely untested and disabled 23 * by default. 24 * 25 * The driver has only been tested on x86 systems and will not work on big- 26 * endian systems without changes to the code accessing registers and data 27 * structures used by the hardware. 28 * 29 * 30 * Interrupt Usage: 31 * 32 * The driver will use a FIXED interrupt while configuring the device as the 33 * specification requires. Later in the attach process it will switch to MSI-X 34 * or MSI if supported. The driver wants to have one interrupt vector per CPU, 35 * but it will work correctly if less are available. Interrupts can be shared 36 * by queues, the interrupt handler will iterate through the I/O queue array by 37 * steps of n_intr_cnt. Usually only the admin queue will share an interrupt 38 * with one I/O queue. The interrupt handler will retrieve completed commands 39 * from all queues sharing an interrupt vector and will post them to a taskq 40 * for completion processing. 41 * 42 * 43 * Command Processing: 44 * 45 * NVMe devices can have up to 65536 I/O queue pairs, with each queue holding up 46 * to 65536 I/O commands. The driver will configure one I/O queue pair per 47 * available interrupt vector, with the queue length usually much smaller than 48 * the maximum of 65536. If the hardware doesn't provide enough queues, fewer 49 * interrupt vectors will be used. 50 * 51 * Additionally the hardware provides a single special admin queue pair that can 52 * hold up to 4096 admin commands. 53 * 54 * From the hardware perspective both queues of a queue pair are independent, 55 * but they share some driver state: the command array (holding pointers to 56 * commands currently being processed by the hardware) and the active command 57 * counter. Access to the submission side of a queue pair and the shared state 58 * is protected by nq_mutex. The completion side of a queue pair does not need 59 * that protection apart from its access to the shared state; it is called only 60 * in the interrupt handler which does not run concurrently for the same 61 * interrupt vector. 62 * 63 * When a command is submitted to a queue pair the active command counter is 64 * incremented and a pointer to the command is stored in the command array. The 65 * array index is used as command identifier (CID) in the submission queue 66 * entry. Some commands may take a very long time to complete, and if the queue 67 * wraps around in that time a submission may find the next array slot to still 68 * be used by a long-running command. In this case the array is sequentially 69 * searched for the next free slot. The length of the command array is the same 70 * as the configured queue length. 71 * 72 * 73 * Namespace Support: 74 * 75 * NVMe devices can have multiple namespaces, each being a independent data 76 * store. The driver supports multiple namespaces and creates a blkdev interface 77 * for each namespace found. Namespaces can have various attributes to support 78 * thin provisioning and protection information. This driver does not support 79 * any of this and ignores namespaces that have these attributes. 80 * 81 * 82 * Blkdev Interface: 83 * 84 * This driver uses blkdev to do all the heavy lifting involved with presenting 85 * a disk device to the system. As a result, the processing of I/O requests is 86 * relatively simple as blkdev takes care of partitioning, boundary checks, DMA 87 * setup, and splitting of transfers into manageable chunks. 88 * 89 * I/O requests coming in from blkdev are turned into NVM commands and posted to 90 * an I/O queue. The queue is selected by taking the CPU id modulo the number of 91 * queues. There is currently no timeout handling of I/O commands. 92 * 93 * Blkdev also supports querying device/media information and generating a 94 * devid. The driver reports the best block size as determined by the namespace 95 * format back to blkdev as physical block size to support partition and block 96 * alignment. The devid is composed using the device vendor ID, model number, 97 * serial number, and the namespace ID. 98 * 99 * 100 * Error Handling: 101 * 102 * Error handling is currently limited to detecting fatal hardware errors, 103 * either by asynchronous events, or synchronously through command status or 104 * admin command timeouts. In case of severe errors the device is fenced off, 105 * all further requests will return EIO. FMA is then called to fault the device. 106 * 107 * The hardware has a limit for outstanding asynchronous event requests. Before 108 * this limit is known the driver assumes it is at least 1 and posts a single 109 * asynchronous request. Later when the limit is known more asynchronous event 110 * requests are posted to allow quicker reception of error information. When an 111 * asynchronous event is posted by the hardware the driver will parse the error 112 * status fields and log information or fault the device, depending on the 113 * severity of the asynchronous event. The asynchronous event request is then 114 * reused and posted to the admin queue again. 115 * 116 * On command completion the command status is checked for errors. In case of 117 * errors indicating a driver bug the driver panics. Almost all other error 118 * status values just cause EIO to be returned. 119 * 120 * Command timeouts are currently detected for all admin commands except 121 * asynchronous event requests. If a command times out and the hardware appears 122 * to be healthy the driver attempts to abort the command. If this fails the 123 * driver assumes the device to be dead, fences it off, and calls FMA to retire 124 * it. In general admin commands are issued at attach time only. No timeout 125 * handling of normal I/O commands is presently done. 126 * 127 * In some cases it may be possible that the ABORT command times out, too. In 128 * that case the device is also declared dead and fenced off. 129 * 130 * 131 * Quiesce / Fast Reboot: 132 * 133 * The driver currently does not support fast reboot. A quiesce(9E) entry point 134 * is still provided which is used to send a shutdown notification to the 135 * device. 136 * 137 * 138 * Driver Configuration: 139 * 140 * The following driver properties can be changed to control some aspects of the 141 * drivers operation: 142 * - strict-version: can be set to 0 to allow devices conforming to newer 143 * versions to be used 144 * - ignore-unknown-vendor-status: can be set to 1 to not handle any vendor 145 * specific command status as a fatal error leading device faulting 146 * - admin-queue-len: the maximum length of the admin queue (16-4096) 147 * - io-queue-len: the maximum length of the I/O queues (16-65536) 148 * - async-event-limit: the maximum number of asynchronous event requests to be 149 * posted by the driver 150 * 151 * 152 * TODO: 153 * - figure out sane default for I/O queue depth reported to blkdev 154 * - polled I/O support to support kernel core dumping 155 * - FMA handling of media errors 156 * - support for the Volatile Write Cache 157 * - support for devices supporting very large I/O requests using chained PRPs 158 * - support for querying log pages from user space 159 * - support for configuring hardware parameters like interrupt coalescing 160 * - support for media formatting and hard partitioning into namespaces 161 * - support for big-endian systems 162 * - support for fast reboot 163 */ 164 165 #include <sys/byteorder.h> 166 #ifdef _BIG_ENDIAN 167 #error nvme driver needs porting for big-endian platforms 168 #endif 169 170 #include <sys/modctl.h> 171 #include <sys/conf.h> 172 #include <sys/devops.h> 173 #include <sys/ddi.h> 174 #include <sys/sunddi.h> 175 #include <sys/bitmap.h> 176 #include <sys/sysmacros.h> 177 #include <sys/param.h> 178 #include <sys/varargs.h> 179 #include <sys/cpuvar.h> 180 #include <sys/disp.h> 181 #include <sys/blkdev.h> 182 #include <sys/atomic.h> 183 #include <sys/archsystm.h> 184 #include <sys/sata/sata_hba.h> 185 186 #include "nvme_reg.h" 187 #include "nvme_var.h" 188 189 190 /* NVMe spec version supported */ 191 static const int nvme_version_major = 1; 192 static const int nvme_version_minor = 0; 193 194 /* tunable for admin command timeout in seconds, default is 1s */ 195 static volatile int nvme_admin_cmd_timeout = 1; 196 197 static int nvme_attach(dev_info_t *, ddi_attach_cmd_t); 198 static int nvme_detach(dev_info_t *, ddi_detach_cmd_t); 199 static int nvme_quiesce(dev_info_t *); 200 static int nvme_fm_errcb(dev_info_t *, ddi_fm_error_t *, const void *); 201 static int nvme_setup_interrupts(nvme_t *, int, int); 202 static void nvme_release_interrupts(nvme_t *); 203 static uint_t nvme_intr(caddr_t, caddr_t); 204 205 static void nvme_shutdown(nvme_t *, int, boolean_t); 206 static boolean_t nvme_reset(nvme_t *, boolean_t); 207 static int nvme_init(nvme_t *); 208 static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int); 209 static void nvme_free_cmd(nvme_cmd_t *); 210 static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t, 211 bd_xfer_t *); 212 static int nvme_admin_cmd(nvme_cmd_t *, int); 213 static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *); 214 static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *); 215 static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t); 216 static void nvme_wakeup_cmd(void *); 217 static void nvme_async_event_task(void *); 218 219 static int nvme_check_unknown_cmd_status(nvme_cmd_t *); 220 static int nvme_check_vendor_cmd_status(nvme_cmd_t *); 221 static int nvme_check_integrity_cmd_status(nvme_cmd_t *); 222 static int nvme_check_specific_cmd_status(nvme_cmd_t *); 223 static int nvme_check_generic_cmd_status(nvme_cmd_t *); 224 static inline int nvme_check_cmd_status(nvme_cmd_t *); 225 226 static void nvme_abort_cmd(nvme_cmd_t *); 227 static int nvme_async_event(nvme_t *); 228 static void *nvme_get_logpage(nvme_t *, uint8_t, ...); 229 static void *nvme_identify(nvme_t *, uint32_t); 230 static int nvme_set_nqueues(nvme_t *, uint16_t); 231 232 static void nvme_free_dma(nvme_dma_t *); 233 static int nvme_zalloc_dma(nvme_t *, size_t, uint_t, ddi_dma_attr_t *, 234 nvme_dma_t **); 235 static int nvme_zalloc_queue_dma(nvme_t *, uint32_t, uint16_t, uint_t, 236 nvme_dma_t **); 237 static void nvme_free_qpair(nvme_qpair_t *); 238 static int nvme_alloc_qpair(nvme_t *, uint32_t, nvme_qpair_t **, int); 239 static int nvme_create_io_qpair(nvme_t *, nvme_qpair_t *, uint16_t); 240 241 static inline void nvme_put64(nvme_t *, uintptr_t, uint64_t); 242 static inline void nvme_put32(nvme_t *, uintptr_t, uint32_t); 243 static inline uint64_t nvme_get64(nvme_t *, uintptr_t); 244 static inline uint32_t nvme_get32(nvme_t *, uintptr_t); 245 246 static boolean_t nvme_check_regs_hdl(nvme_t *); 247 static boolean_t nvme_check_dma_hdl(nvme_dma_t *); 248 249 static int nvme_fill_prp(nvme_cmd_t *, bd_xfer_t *); 250 251 static void nvme_bd_xfer_done(void *); 252 static void nvme_bd_driveinfo(void *, bd_drive_t *); 253 static int nvme_bd_mediainfo(void *, bd_media_t *); 254 static int nvme_bd_cmd(nvme_namespace_t *, bd_xfer_t *, uint8_t); 255 static int nvme_bd_read(void *, bd_xfer_t *); 256 static int nvme_bd_write(void *, bd_xfer_t *); 257 static int nvme_bd_sync(void *, bd_xfer_t *); 258 static int nvme_bd_devid(void *, dev_info_t *, ddi_devid_t *); 259 260 static int nvme_prp_dma_constructor(void *, void *, int); 261 static void nvme_prp_dma_destructor(void *, void *); 262 263 static void nvme_prepare_devid(nvme_t *, uint32_t); 264 265 static void *nvme_state; 266 static kmem_cache_t *nvme_cmd_cache; 267 268 /* 269 * DMA attributes for queue DMA memory 270 * 271 * Queue DMA memory must be page aligned. The maximum length of a queue is 272 * 65536 entries, and an entry can be 64 bytes long. 273 */ 274 static ddi_dma_attr_t nvme_queue_dma_attr = { 275 .dma_attr_version = DMA_ATTR_V0, 276 .dma_attr_addr_lo = 0, 277 .dma_attr_addr_hi = 0xffffffffffffffffULL, 278 .dma_attr_count_max = (UINT16_MAX + 1) * sizeof (nvme_sqe_t) - 1, 279 .dma_attr_align = 0x1000, 280 .dma_attr_burstsizes = 0x7ff, 281 .dma_attr_minxfer = 0x1000, 282 .dma_attr_maxxfer = (UINT16_MAX + 1) * sizeof (nvme_sqe_t), 283 .dma_attr_seg = 0xffffffffffffffffULL, 284 .dma_attr_sgllen = 1, 285 .dma_attr_granular = 1, 286 .dma_attr_flags = 0, 287 }; 288 289 /* 290 * DMA attributes for transfers using Physical Region Page (PRP) entries 291 * 292 * A PRP entry describes one page of DMA memory using the page size specified 293 * in the controller configuration's memory page size register (CC.MPS). It uses 294 * a 64bit base address aligned to this page size. There is no limitation on 295 * chaining PRPs together for arbitrarily large DMA transfers. 296 */ 297 static ddi_dma_attr_t nvme_prp_dma_attr = { 298 .dma_attr_version = DMA_ATTR_V0, 299 .dma_attr_addr_lo = 0, 300 .dma_attr_addr_hi = 0xffffffffffffffffULL, 301 .dma_attr_count_max = 0xfff, 302 .dma_attr_align = 0x1000, 303 .dma_attr_burstsizes = 0x7ff, 304 .dma_attr_minxfer = 0x1000, 305 .dma_attr_maxxfer = 0x1000, 306 .dma_attr_seg = 0xfff, 307 .dma_attr_sgllen = -1, 308 .dma_attr_granular = 1, 309 .dma_attr_flags = 0, 310 }; 311 312 /* 313 * DMA attributes for transfers using scatter/gather lists 314 * 315 * A SGL entry describes a chunk of DMA memory using a 64bit base address and a 316 * 32bit length field. SGL Segment and SGL Last Segment entries require the 317 * length to be a multiple of 16 bytes. 318 */ 319 static ddi_dma_attr_t nvme_sgl_dma_attr = { 320 .dma_attr_version = DMA_ATTR_V0, 321 .dma_attr_addr_lo = 0, 322 .dma_attr_addr_hi = 0xffffffffffffffffULL, 323 .dma_attr_count_max = 0xffffffffUL, 324 .dma_attr_align = 1, 325 .dma_attr_burstsizes = 0x7ff, 326 .dma_attr_minxfer = 0x10, 327 .dma_attr_maxxfer = 0xfffffffffULL, 328 .dma_attr_seg = 0xffffffffffffffffULL, 329 .dma_attr_sgllen = -1, 330 .dma_attr_granular = 0x10, 331 .dma_attr_flags = 0 332 }; 333 334 static ddi_device_acc_attr_t nvme_reg_acc_attr = { 335 .devacc_attr_version = DDI_DEVICE_ATTR_V0, 336 .devacc_attr_endian_flags = DDI_STRUCTURE_LE_ACC, 337 .devacc_attr_dataorder = DDI_STRICTORDER_ACC 338 }; 339 340 static struct dev_ops nvme_dev_ops = { 341 .devo_rev = DEVO_REV, 342 .devo_refcnt = 0, 343 .devo_getinfo = ddi_no_info, 344 .devo_identify = nulldev, 345 .devo_probe = nulldev, 346 .devo_attach = nvme_attach, 347 .devo_detach = nvme_detach, 348 .devo_reset = nodev, 349 .devo_cb_ops = NULL, 350 .devo_bus_ops = NULL, 351 .devo_power = NULL, 352 .devo_quiesce = nvme_quiesce, 353 }; 354 355 static struct modldrv nvme_modldrv = { 356 .drv_modops = &mod_driverops, 357 .drv_linkinfo = "NVMe v1.0e", 358 .drv_dev_ops = &nvme_dev_ops 359 }; 360 361 static struct modlinkage nvme_modlinkage = { 362 .ml_rev = MODREV_1, 363 .ml_linkage = { &nvme_modldrv, NULL } 364 }; 365 366 static bd_ops_t nvme_bd_ops = { 367 .o_version = BD_OPS_VERSION_0, 368 .o_drive_info = nvme_bd_driveinfo, 369 .o_media_info = nvme_bd_mediainfo, 370 .o_devid_init = nvme_bd_devid, 371 .o_sync_cache = nvme_bd_sync, 372 .o_read = nvme_bd_read, 373 .o_write = nvme_bd_write, 374 }; 375 376 int 377 _init(void) 378 { 379 int error; 380 381 error = ddi_soft_state_init(&nvme_state, sizeof (nvme_t), 1); 382 if (error != DDI_SUCCESS) 383 return (error); 384 385 nvme_cmd_cache = kmem_cache_create("nvme_cmd_cache", 386 sizeof (nvme_cmd_t), 64, NULL, NULL, NULL, NULL, NULL, 0); 387 388 bd_mod_init(&nvme_dev_ops); 389 390 error = mod_install(&nvme_modlinkage); 391 if (error != DDI_SUCCESS) { 392 ddi_soft_state_fini(&nvme_state); 393 bd_mod_fini(&nvme_dev_ops); 394 } 395 396 return (error); 397 } 398 399 int 400 _fini(void) 401 { 402 int error; 403 404 error = mod_remove(&nvme_modlinkage); 405 if (error == DDI_SUCCESS) { 406 ddi_soft_state_fini(&nvme_state); 407 kmem_cache_destroy(nvme_cmd_cache); 408 bd_mod_fini(&nvme_dev_ops); 409 } 410 411 return (error); 412 } 413 414 int 415 _info(struct modinfo *modinfop) 416 { 417 return (mod_info(&nvme_modlinkage, modinfop)); 418 } 419 420 static inline void 421 nvme_put64(nvme_t *nvme, uintptr_t reg, uint64_t val) 422 { 423 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x7) == 0); 424 425 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 426 ddi_put64(nvme->n_regh, (uint64_t *)(nvme->n_regs + reg), val); 427 } 428 429 static inline void 430 nvme_put32(nvme_t *nvme, uintptr_t reg, uint32_t val) 431 { 432 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x3) == 0); 433 434 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 435 ddi_put32(nvme->n_regh, (uint32_t *)(nvme->n_regs + reg), val); 436 } 437 438 static inline uint64_t 439 nvme_get64(nvme_t *nvme, uintptr_t reg) 440 { 441 uint64_t val; 442 443 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x7) == 0); 444 445 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 446 val = ddi_get64(nvme->n_regh, (uint64_t *)(nvme->n_regs + reg)); 447 448 return (val); 449 } 450 451 static inline uint32_t 452 nvme_get32(nvme_t *nvme, uintptr_t reg) 453 { 454 uint32_t val; 455 456 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x3) == 0); 457 458 /*LINTED: E_BAD_PTR_CAST_ALIGN*/ 459 val = ddi_get32(nvme->n_regh, (uint32_t *)(nvme->n_regs + reg)); 460 461 return (val); 462 } 463 464 static boolean_t 465 nvme_check_regs_hdl(nvme_t *nvme) 466 { 467 ddi_fm_error_t error; 468 469 ddi_fm_acc_err_get(nvme->n_regh, &error, DDI_FME_VERSION); 470 471 if (error.fme_status != DDI_FM_OK) 472 return (B_TRUE); 473 474 return (B_FALSE); 475 } 476 477 static boolean_t 478 nvme_check_dma_hdl(nvme_dma_t *dma) 479 { 480 ddi_fm_error_t error; 481 482 if (dma == NULL) 483 return (B_FALSE); 484 485 ddi_fm_dma_err_get(dma->nd_dmah, &error, DDI_FME_VERSION); 486 487 if (error.fme_status != DDI_FM_OK) 488 return (B_TRUE); 489 490 return (B_FALSE); 491 } 492 493 static void 494 nvme_free_dma_common(nvme_dma_t *dma) 495 { 496 if (dma->nd_dmah != NULL) 497 (void) ddi_dma_unbind_handle(dma->nd_dmah); 498 if (dma->nd_acch != NULL) 499 ddi_dma_mem_free(&dma->nd_acch); 500 if (dma->nd_dmah != NULL) 501 ddi_dma_free_handle(&dma->nd_dmah); 502 } 503 504 static void 505 nvme_free_dma(nvme_dma_t *dma) 506 { 507 nvme_free_dma_common(dma); 508 kmem_free(dma, sizeof (*dma)); 509 } 510 511 static void 512 nvme_prp_dma_destructor(void *buf, void *private) 513 { 514 nvme_dma_t *dma = (nvme_dma_t *)buf; 515 516 nvme_free_dma_common(dma); 517 } 518 519 static int 520 nvme_alloc_dma_common(nvme_t *nvme, nvme_dma_t *dma, 521 size_t len, uint_t flags, ddi_dma_attr_t *dma_attr) 522 { 523 if (ddi_dma_alloc_handle(nvme->n_dip, dma_attr, DDI_DMA_SLEEP, NULL, 524 &dma->nd_dmah) != DDI_SUCCESS) { 525 /* 526 * Due to DDI_DMA_SLEEP this can't be DDI_DMA_NORESOURCES, and 527 * the only other possible error is DDI_DMA_BADATTR which 528 * indicates a driver bug which should cause a panic. 529 */ 530 dev_err(nvme->n_dip, CE_PANIC, 531 "!failed to get DMA handle, check DMA attributes"); 532 return (DDI_FAILURE); 533 } 534 535 /* 536 * ddi_dma_mem_alloc() can only fail when DDI_DMA_NOSLEEP is specified 537 * or the flags are conflicting, which isn't the case here. 538 */ 539 (void) ddi_dma_mem_alloc(dma->nd_dmah, len, &nvme->n_reg_acc_attr, 540 DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &dma->nd_memp, 541 &dma->nd_len, &dma->nd_acch); 542 543 if (ddi_dma_addr_bind_handle(dma->nd_dmah, NULL, dma->nd_memp, 544 dma->nd_len, flags | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, 545 &dma->nd_cookie, &dma->nd_ncookie) != DDI_DMA_MAPPED) { 546 dev_err(nvme->n_dip, CE_WARN, 547 "!failed to bind DMA memory"); 548 atomic_inc_32(&nvme->n_dma_bind_err); 549 nvme_free_dma_common(dma); 550 return (DDI_FAILURE); 551 } 552 553 return (DDI_SUCCESS); 554 } 555 556 static int 557 nvme_zalloc_dma(nvme_t *nvme, size_t len, uint_t flags, 558 ddi_dma_attr_t *dma_attr, nvme_dma_t **ret) 559 { 560 nvme_dma_t *dma = kmem_zalloc(sizeof (nvme_dma_t), KM_SLEEP); 561 562 if (nvme_alloc_dma_common(nvme, dma, len, flags, dma_attr) != 563 DDI_SUCCESS) { 564 *ret = NULL; 565 kmem_free(dma, sizeof (nvme_dma_t)); 566 return (DDI_FAILURE); 567 } 568 569 bzero(dma->nd_memp, dma->nd_len); 570 571 *ret = dma; 572 return (DDI_SUCCESS); 573 } 574 575 static int 576 nvme_prp_dma_constructor(void *buf, void *private, int flags) 577 { 578 nvme_dma_t *dma = (nvme_dma_t *)buf; 579 nvme_t *nvme = (nvme_t *)private; 580 581 dma->nd_dmah = NULL; 582 dma->nd_acch = NULL; 583 584 if (nvme_alloc_dma_common(nvme, dma, nvme->n_pagesize, 585 DDI_DMA_READ, &nvme->n_prp_dma_attr) != DDI_SUCCESS) { 586 return (-1); 587 } 588 589 ASSERT(dma->nd_ncookie == 1); 590 591 dma->nd_cached = B_TRUE; 592 593 return (0); 594 } 595 596 static int 597 nvme_zalloc_queue_dma(nvme_t *nvme, uint32_t nentry, uint16_t qe_len, 598 uint_t flags, nvme_dma_t **dma) 599 { 600 uint32_t len = nentry * qe_len; 601 ddi_dma_attr_t q_dma_attr = nvme->n_queue_dma_attr; 602 603 len = roundup(len, nvme->n_pagesize); 604 605 q_dma_attr.dma_attr_minxfer = len; 606 607 if (nvme_zalloc_dma(nvme, len, flags, &q_dma_attr, dma) 608 != DDI_SUCCESS) { 609 dev_err(nvme->n_dip, CE_WARN, 610 "!failed to get DMA memory for queue"); 611 goto fail; 612 } 613 614 if ((*dma)->nd_ncookie != 1) { 615 dev_err(nvme->n_dip, CE_WARN, 616 "!got too many cookies for queue DMA"); 617 goto fail; 618 } 619 620 return (DDI_SUCCESS); 621 622 fail: 623 if (*dma) { 624 nvme_free_dma(*dma); 625 *dma = NULL; 626 } 627 628 return (DDI_FAILURE); 629 } 630 631 static void 632 nvme_free_qpair(nvme_qpair_t *qp) 633 { 634 int i; 635 636 mutex_destroy(&qp->nq_mutex); 637 638 if (qp->nq_sqdma != NULL) 639 nvme_free_dma(qp->nq_sqdma); 640 if (qp->nq_cqdma != NULL) 641 nvme_free_dma(qp->nq_cqdma); 642 643 if (qp->nq_active_cmds > 0) 644 for (i = 0; i != qp->nq_nentry; i++) 645 if (qp->nq_cmd[i] != NULL) 646 nvme_free_cmd(qp->nq_cmd[i]); 647 648 if (qp->nq_cmd != NULL) 649 kmem_free(qp->nq_cmd, sizeof (nvme_cmd_t *) * qp->nq_nentry); 650 651 kmem_free(qp, sizeof (nvme_qpair_t)); 652 } 653 654 static int 655 nvme_alloc_qpair(nvme_t *nvme, uint32_t nentry, nvme_qpair_t **nqp, 656 int idx) 657 { 658 nvme_qpair_t *qp = kmem_zalloc(sizeof (*qp), KM_SLEEP); 659 660 mutex_init(&qp->nq_mutex, NULL, MUTEX_DRIVER, 661 DDI_INTR_PRI(nvme->n_intr_pri)); 662 663 if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_sqe_t), 664 DDI_DMA_WRITE, &qp->nq_sqdma) != DDI_SUCCESS) 665 goto fail; 666 667 if (nvme_zalloc_queue_dma(nvme, nentry, sizeof (nvme_cqe_t), 668 DDI_DMA_READ, &qp->nq_cqdma) != DDI_SUCCESS) 669 goto fail; 670 671 qp->nq_sq = (nvme_sqe_t *)qp->nq_sqdma->nd_memp; 672 qp->nq_cq = (nvme_cqe_t *)qp->nq_cqdma->nd_memp; 673 qp->nq_nentry = nentry; 674 675 qp->nq_sqtdbl = NVME_REG_SQTDBL(nvme, idx); 676 qp->nq_cqhdbl = NVME_REG_CQHDBL(nvme, idx); 677 678 qp->nq_cmd = kmem_zalloc(sizeof (nvme_cmd_t *) * nentry, KM_SLEEP); 679 qp->nq_next_cmd = 0; 680 681 *nqp = qp; 682 return (DDI_SUCCESS); 683 684 fail: 685 nvme_free_qpair(qp); 686 *nqp = NULL; 687 688 return (DDI_FAILURE); 689 } 690 691 static nvme_cmd_t * 692 nvme_alloc_cmd(nvme_t *nvme, int kmflag) 693 { 694 nvme_cmd_t *cmd = kmem_cache_alloc(nvme_cmd_cache, kmflag); 695 696 if (cmd == NULL) 697 return (cmd); 698 699 bzero(cmd, sizeof (nvme_cmd_t)); 700 701 cmd->nc_nvme = nvme; 702 703 mutex_init(&cmd->nc_mutex, NULL, MUTEX_DRIVER, 704 DDI_INTR_PRI(nvme->n_intr_pri)); 705 cv_init(&cmd->nc_cv, NULL, CV_DRIVER, NULL); 706 707 return (cmd); 708 } 709 710 static void 711 nvme_free_cmd(nvme_cmd_t *cmd) 712 { 713 if (cmd->nc_dma) { 714 if (cmd->nc_dma->nd_cached) 715 kmem_cache_free(cmd->nc_nvme->n_prp_cache, 716 cmd->nc_dma); 717 else 718 nvme_free_dma(cmd->nc_dma); 719 cmd->nc_dma = NULL; 720 } 721 722 cv_destroy(&cmd->nc_cv); 723 mutex_destroy(&cmd->nc_mutex); 724 725 kmem_cache_free(nvme_cmd_cache, cmd); 726 } 727 728 static int 729 nvme_submit_cmd(nvme_qpair_t *qp, nvme_cmd_t *cmd) 730 { 731 nvme_reg_sqtdbl_t tail = { 0 }; 732 733 mutex_enter(&qp->nq_mutex); 734 735 if (qp->nq_active_cmds == qp->nq_nentry) { 736 mutex_exit(&qp->nq_mutex); 737 return (DDI_FAILURE); 738 } 739 740 cmd->nc_completed = B_FALSE; 741 742 /* 743 * Try to insert the cmd into the active cmd array at the nq_next_cmd 744 * slot. If the slot is already occupied advance to the next slot and 745 * try again. This can happen for long running commands like async event 746 * requests. 747 */ 748 while (qp->nq_cmd[qp->nq_next_cmd] != NULL) 749 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry; 750 qp->nq_cmd[qp->nq_next_cmd] = cmd; 751 752 qp->nq_active_cmds++; 753 754 cmd->nc_sqe.sqe_cid = qp->nq_next_cmd; 755 bcopy(&cmd->nc_sqe, &qp->nq_sq[qp->nq_sqtail], sizeof (nvme_sqe_t)); 756 (void) ddi_dma_sync(qp->nq_sqdma->nd_dmah, 757 sizeof (nvme_sqe_t) * qp->nq_sqtail, 758 sizeof (nvme_sqe_t), DDI_DMA_SYNC_FORDEV); 759 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry; 760 761 tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry; 762 nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r); 763 764 mutex_exit(&qp->nq_mutex); 765 return (DDI_SUCCESS); 766 } 767 768 static nvme_cmd_t * 769 nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp) 770 { 771 nvme_reg_cqhdbl_t head = { 0 }; 772 773 nvme_cqe_t *cqe; 774 nvme_cmd_t *cmd; 775 776 (void) ddi_dma_sync(qp->nq_cqdma->nd_dmah, 0, 777 sizeof (nvme_cqe_t) * qp->nq_nentry, DDI_DMA_SYNC_FORKERNEL); 778 779 cqe = &qp->nq_cq[qp->nq_cqhead]; 780 781 /* Check phase tag of CQE. Hardware inverts it for new entries. */ 782 if (cqe->cqe_sf.sf_p == qp->nq_phase) 783 return (NULL); 784 785 ASSERT(nvme->n_ioq[cqe->cqe_sqid] == qp); 786 ASSERT(cqe->cqe_cid < qp->nq_nentry); 787 788 mutex_enter(&qp->nq_mutex); 789 cmd = qp->nq_cmd[cqe->cqe_cid]; 790 qp->nq_cmd[cqe->cqe_cid] = NULL; 791 qp->nq_active_cmds--; 792 mutex_exit(&qp->nq_mutex); 793 794 ASSERT(cmd != NULL); 795 ASSERT(cmd->nc_nvme == nvme); 796 ASSERT(cmd->nc_sqid == cqe->cqe_sqid); 797 ASSERT(cmd->nc_sqe.sqe_cid == cqe->cqe_cid); 798 bcopy(cqe, &cmd->nc_cqe, sizeof (nvme_cqe_t)); 799 800 qp->nq_sqhead = cqe->cqe_sqhd; 801 802 head.b.cqhdbl_cqh = qp->nq_cqhead = (qp->nq_cqhead + 1) % qp->nq_nentry; 803 804 /* Toggle phase on wrap-around. */ 805 if (qp->nq_cqhead == 0) 806 qp->nq_phase = qp->nq_phase ? 0 : 1; 807 808 nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r); 809 810 return (cmd); 811 } 812 813 static int 814 nvme_check_unknown_cmd_status(nvme_cmd_t *cmd) 815 { 816 nvme_cqe_t *cqe = &cmd->nc_cqe; 817 818 dev_err(cmd->nc_nvme->n_dip, CE_WARN, 819 "!unknown command status received: opc = %x, sqid = %d, cid = %d, " 820 "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc, 821 cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct, 822 cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m); 823 824 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 825 826 if (cmd->nc_nvme->n_strict_version) { 827 cmd->nc_nvme->n_dead = B_TRUE; 828 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST); 829 } 830 831 return (EIO); 832 } 833 834 static int 835 nvme_check_vendor_cmd_status(nvme_cmd_t *cmd) 836 { 837 nvme_cqe_t *cqe = &cmd->nc_cqe; 838 839 dev_err(cmd->nc_nvme->n_dip, CE_WARN, 840 "!unknown command status received: opc = %x, sqid = %d, cid = %d, " 841 "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc, 842 cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct, 843 cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m); 844 if (!cmd->nc_nvme->n_ignore_unknown_vendor_status) { 845 cmd->nc_nvme->n_dead = B_TRUE; 846 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST); 847 } 848 849 return (EIO); 850 } 851 852 static int 853 nvme_check_integrity_cmd_status(nvme_cmd_t *cmd) 854 { 855 nvme_cqe_t *cqe = &cmd->nc_cqe; 856 857 switch (cqe->cqe_sf.sf_sc) { 858 case NVME_CQE_SC_INT_NVM_WRITE: 859 /* write fail */ 860 /* TODO: post ereport */ 861 bd_error(cmd->nc_xfer, BD_ERR_MEDIA); 862 return (EIO); 863 864 case NVME_CQE_SC_INT_NVM_READ: 865 /* read fail */ 866 /* TODO: post ereport */ 867 bd_error(cmd->nc_xfer, BD_ERR_MEDIA); 868 return (EIO); 869 870 default: 871 return (nvme_check_unknown_cmd_status(cmd)); 872 } 873 } 874 875 static int 876 nvme_check_generic_cmd_status(nvme_cmd_t *cmd) 877 { 878 nvme_cqe_t *cqe = &cmd->nc_cqe; 879 880 switch (cqe->cqe_sf.sf_sc) { 881 case NVME_CQE_SC_GEN_SUCCESS: 882 return (0); 883 884 /* 885 * Errors indicating a bug in the driver should cause a panic. 886 */ 887 case NVME_CQE_SC_GEN_INV_OPC: 888 /* Invalid Command Opcode */ 889 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 890 "invalid opcode in cmd %p", (void *)cmd); 891 return (0); 892 893 case NVME_CQE_SC_GEN_INV_FLD: 894 /* Invalid Field in Command */ 895 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 896 "invalid field in cmd %p", (void *)cmd); 897 return (0); 898 899 case NVME_CQE_SC_GEN_ID_CNFL: 900 /* Command ID Conflict */ 901 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 902 "cmd ID conflict in cmd %p", (void *)cmd); 903 return (0); 904 905 case NVME_CQE_SC_GEN_INV_NS: 906 /* Invalid Namespace or Format */ 907 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 908 "invalid NS/format in cmd %p", (void *)cmd); 909 return (0); 910 911 case NVME_CQE_SC_GEN_NVM_LBA_RANGE: 912 /* LBA Out Of Range */ 913 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 914 "LBA out of range in cmd %p", (void *)cmd); 915 return (0); 916 917 /* 918 * Non-fatal errors, handle gracefully. 919 */ 920 case NVME_CQE_SC_GEN_DATA_XFR_ERR: 921 /* Data Transfer Error (DMA) */ 922 /* TODO: post ereport */ 923 atomic_inc_32(&cmd->nc_nvme->n_data_xfr_err); 924 bd_error(cmd->nc_xfer, BD_ERR_NTRDY); 925 return (EIO); 926 927 case NVME_CQE_SC_GEN_INTERNAL_ERR: 928 /* 929 * Internal Error. The spec (v1.0, section 4.5.1.2) says 930 * detailed error information is returned as async event, 931 * so we pretty much ignore the error here and handle it 932 * in the async event handler. 933 */ 934 atomic_inc_32(&cmd->nc_nvme->n_internal_err); 935 bd_error(cmd->nc_xfer, BD_ERR_NTRDY); 936 return (EIO); 937 938 case NVME_CQE_SC_GEN_ABORT_REQUEST: 939 /* 940 * Command Abort Requested. This normally happens only when a 941 * command times out. 942 */ 943 /* TODO: post ereport or change blkdev to handle this? */ 944 atomic_inc_32(&cmd->nc_nvme->n_abort_rq_err); 945 return (ECANCELED); 946 947 case NVME_CQE_SC_GEN_ABORT_PWRLOSS: 948 /* Command Aborted due to Power Loss Notification */ 949 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST); 950 cmd->nc_nvme->n_dead = B_TRUE; 951 return (EIO); 952 953 case NVME_CQE_SC_GEN_ABORT_SQ_DEL: 954 /* Command Aborted due to SQ Deletion */ 955 atomic_inc_32(&cmd->nc_nvme->n_abort_sq_del); 956 return (EIO); 957 958 case NVME_CQE_SC_GEN_NVM_CAP_EXC: 959 /* Capacity Exceeded */ 960 atomic_inc_32(&cmd->nc_nvme->n_nvm_cap_exc); 961 bd_error(cmd->nc_xfer, BD_ERR_MEDIA); 962 return (EIO); 963 964 case NVME_CQE_SC_GEN_NVM_NS_NOTRDY: 965 /* Namespace Not Ready */ 966 atomic_inc_32(&cmd->nc_nvme->n_nvm_ns_notrdy); 967 bd_error(cmd->nc_xfer, BD_ERR_NTRDY); 968 return (EIO); 969 970 default: 971 return (nvme_check_unknown_cmd_status(cmd)); 972 } 973 } 974 975 static int 976 nvme_check_specific_cmd_status(nvme_cmd_t *cmd) 977 { 978 nvme_cqe_t *cqe = &cmd->nc_cqe; 979 980 switch (cqe->cqe_sf.sf_sc) { 981 case NVME_CQE_SC_SPC_INV_CQ: 982 /* Completion Queue Invalid */ 983 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_SQUEUE); 984 atomic_inc_32(&cmd->nc_nvme->n_inv_cq_err); 985 return (EINVAL); 986 987 case NVME_CQE_SC_SPC_INV_QID: 988 /* Invalid Queue Identifier */ 989 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_SQUEUE || 990 cmd->nc_sqe.sqe_opc == NVME_OPC_DELETE_SQUEUE || 991 cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_CQUEUE || 992 cmd->nc_sqe.sqe_opc == NVME_OPC_DELETE_CQUEUE); 993 atomic_inc_32(&cmd->nc_nvme->n_inv_qid_err); 994 return (EINVAL); 995 996 case NVME_CQE_SC_SPC_MAX_QSZ_EXC: 997 /* Max Queue Size Exceeded */ 998 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_SQUEUE || 999 cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_CQUEUE); 1000 atomic_inc_32(&cmd->nc_nvme->n_max_qsz_exc); 1001 return (EINVAL); 1002 1003 case NVME_CQE_SC_SPC_ABRT_CMD_EXC: 1004 /* Abort Command Limit Exceeded */ 1005 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_ABORT); 1006 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 1007 "abort command limit exceeded in cmd %p", (void *)cmd); 1008 return (0); 1009 1010 case NVME_CQE_SC_SPC_ASYNC_EVREQ_EXC: 1011 /* Async Event Request Limit Exceeded */ 1012 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_ASYNC_EVENT); 1013 dev_err(cmd->nc_nvme->n_dip, CE_PANIC, "programming error: " 1014 "async event request limit exceeded in cmd %p", 1015 (void *)cmd); 1016 return (0); 1017 1018 case NVME_CQE_SC_SPC_INV_INT_VECT: 1019 /* Invalid Interrupt Vector */ 1020 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_CREATE_CQUEUE); 1021 atomic_inc_32(&cmd->nc_nvme->n_inv_int_vect); 1022 return (EINVAL); 1023 1024 case NVME_CQE_SC_SPC_INV_LOG_PAGE: 1025 /* Invalid Log Page */ 1026 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_GET_LOG_PAGE); 1027 atomic_inc_32(&cmd->nc_nvme->n_inv_log_page); 1028 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 1029 return (EINVAL); 1030 1031 case NVME_CQE_SC_SPC_INV_FORMAT: 1032 /* Invalid Format */ 1033 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_FORMAT); 1034 atomic_inc_32(&cmd->nc_nvme->n_inv_format); 1035 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 1036 return (EINVAL); 1037 1038 case NVME_CQE_SC_SPC_INV_Q_DEL: 1039 /* Invalid Queue Deletion */ 1040 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_DELETE_CQUEUE); 1041 atomic_inc_32(&cmd->nc_nvme->n_inv_q_del); 1042 return (EINVAL); 1043 1044 case NVME_CQE_SC_SPC_NVM_CNFL_ATTR: 1045 /* Conflicting Attributes */ 1046 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_DSET_MGMT || 1047 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_READ || 1048 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE); 1049 atomic_inc_32(&cmd->nc_nvme->n_cnfl_attr); 1050 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 1051 return (EINVAL); 1052 1053 case NVME_CQE_SC_SPC_NVM_INV_PROT: 1054 /* Invalid Protection Information */ 1055 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_COMPARE || 1056 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_READ || 1057 cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE); 1058 atomic_inc_32(&cmd->nc_nvme->n_inv_prot); 1059 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 1060 return (EINVAL); 1061 1062 case NVME_CQE_SC_SPC_NVM_READONLY: 1063 /* Write to Read Only Range */ 1064 ASSERT(cmd->nc_sqe.sqe_opc == NVME_OPC_NVM_WRITE); 1065 atomic_inc_32(&cmd->nc_nvme->n_readonly); 1066 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ); 1067 return (EROFS); 1068 1069 default: 1070 return (nvme_check_unknown_cmd_status(cmd)); 1071 } 1072 } 1073 1074 static inline int 1075 nvme_check_cmd_status(nvme_cmd_t *cmd) 1076 { 1077 nvme_cqe_t *cqe = &cmd->nc_cqe; 1078 1079 /* take a shortcut if everything is alright */ 1080 if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC && 1081 cqe->cqe_sf.sf_sc == NVME_CQE_SC_GEN_SUCCESS) 1082 return (0); 1083 1084 if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC) 1085 return (nvme_check_generic_cmd_status(cmd)); 1086 else if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_SPECIFIC) 1087 return (nvme_check_specific_cmd_status(cmd)); 1088 else if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_INTEGRITY) 1089 return (nvme_check_integrity_cmd_status(cmd)); 1090 else if (cqe->cqe_sf.sf_sct == NVME_CQE_SCT_VENDOR) 1091 return (nvme_check_vendor_cmd_status(cmd)); 1092 1093 return (nvme_check_unknown_cmd_status(cmd)); 1094 } 1095 1096 /* 1097 * nvme_abort_cmd_cb -- replaces nc_callback of aborted commands 1098 * 1099 * This functions takes care of cleaning up aborted commands. The command 1100 * status is checked to catch any fatal errors. 1101 */ 1102 static void 1103 nvme_abort_cmd_cb(void *arg) 1104 { 1105 nvme_cmd_t *cmd = arg; 1106 1107 /* 1108 * Grab the command mutex. Once we have it we hold the last reference 1109 * to the command and can safely free it. 1110 */ 1111 mutex_enter(&cmd->nc_mutex); 1112 (void) nvme_check_cmd_status(cmd); 1113 mutex_exit(&cmd->nc_mutex); 1114 1115 nvme_free_cmd(cmd); 1116 } 1117 1118 static void 1119 nvme_abort_cmd(nvme_cmd_t *abort_cmd) 1120 { 1121 nvme_t *nvme = abort_cmd->nc_nvme; 1122 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1123 nvme_abort_cmd_t ac = { 0 }; 1124 1125 sema_p(&nvme->n_abort_sema); 1126 1127 ac.b.ac_cid = abort_cmd->nc_sqe.sqe_cid; 1128 ac.b.ac_sqid = abort_cmd->nc_sqid; 1129 1130 /* 1131 * Drop the mutex of the aborted command. From this point on 1132 * we must assume that the abort callback has freed the command. 1133 */ 1134 mutex_exit(&abort_cmd->nc_mutex); 1135 1136 cmd->nc_sqid = 0; 1137 cmd->nc_sqe.sqe_opc = NVME_OPC_ABORT; 1138 cmd->nc_callback = nvme_wakeup_cmd; 1139 cmd->nc_sqe.sqe_cdw10 = ac.r; 1140 1141 /* 1142 * Send the ABORT to the hardware. The ABORT command will return _after_ 1143 * the aborted command has completed (aborted or otherwise). 1144 */ 1145 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1146 sema_v(&nvme->n_abort_sema); 1147 dev_err(nvme->n_dip, CE_WARN, 1148 "!nvme_admin_cmd failed for ABORT"); 1149 atomic_inc_32(&nvme->n_abort_failed); 1150 return; 1151 } 1152 sema_v(&nvme->n_abort_sema); 1153 1154 if (nvme_check_cmd_status(cmd)) { 1155 dev_err(nvme->n_dip, CE_WARN, 1156 "!ABORT failed with sct = %x, sc = %x", 1157 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1158 atomic_inc_32(&nvme->n_abort_failed); 1159 } else { 1160 atomic_inc_32(&nvme->n_cmd_aborted); 1161 } 1162 1163 nvme_free_cmd(cmd); 1164 } 1165 1166 /* 1167 * nvme_wait_cmd -- wait for command completion or timeout 1168 * 1169 * Returns B_TRUE if the command completed normally. 1170 * 1171 * Returns B_FALSE if the command timed out and an abort was attempted. The 1172 * command mutex will be dropped and the command must be considered freed. The 1173 * freeing of the command is normally done by the abort command callback. 1174 * 1175 * In case of a serious error or a timeout of the abort command the hardware 1176 * will be declared dead and FMA will be notified. 1177 */ 1178 static boolean_t 1179 nvme_wait_cmd(nvme_cmd_t *cmd, uint_t sec) 1180 { 1181 clock_t timeout = ddi_get_lbolt() + drv_usectohz(sec * MICROSEC); 1182 nvme_t *nvme = cmd->nc_nvme; 1183 nvme_reg_csts_t csts; 1184 1185 ASSERT(mutex_owned(&cmd->nc_mutex)); 1186 1187 while (!cmd->nc_completed) { 1188 if (cv_timedwait(&cmd->nc_cv, &cmd->nc_mutex, timeout) == -1) 1189 break; 1190 } 1191 1192 if (cmd->nc_completed) 1193 return (B_TRUE); 1194 1195 /* 1196 * The command timed out. Change the callback to the cleanup function. 1197 */ 1198 cmd->nc_callback = nvme_abort_cmd_cb; 1199 1200 /* 1201 * Check controller for fatal status, any errors associated with the 1202 * register or DMA handle, or for a double timeout (abort command timed 1203 * out). If necessary log a warning and call FMA. 1204 */ 1205 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1206 dev_err(nvme->n_dip, CE_WARN, "!command timeout, " 1207 "OPC = %x, CFS = %d", cmd->nc_sqe.sqe_opc, csts.b.csts_cfs); 1208 atomic_inc_32(&nvme->n_cmd_timeout); 1209 1210 if (csts.b.csts_cfs || 1211 nvme_check_regs_hdl(nvme) || 1212 nvme_check_dma_hdl(cmd->nc_dma) || 1213 cmd->nc_sqe.sqe_opc == NVME_OPC_ABORT) { 1214 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST); 1215 nvme->n_dead = B_TRUE; 1216 mutex_exit(&cmd->nc_mutex); 1217 } else { 1218 /* 1219 * Try to abort the command. The command mutex is released by 1220 * nvme_abort_cmd(). 1221 * If the abort succeeds it will have freed the aborted command. 1222 * If the abort fails for other reasons we must assume that the 1223 * command may complete at any time, and the callback will free 1224 * it for us. 1225 */ 1226 nvme_abort_cmd(cmd); 1227 } 1228 1229 return (B_FALSE); 1230 } 1231 1232 static void 1233 nvme_wakeup_cmd(void *arg) 1234 { 1235 nvme_cmd_t *cmd = arg; 1236 1237 mutex_enter(&cmd->nc_mutex); 1238 /* 1239 * There is a slight chance that this command completed shortly after 1240 * the timeout was hit in nvme_wait_cmd() but before the callback was 1241 * changed. Catch that case here and clean up accordingly. 1242 */ 1243 if (cmd->nc_callback == nvme_abort_cmd_cb) { 1244 mutex_exit(&cmd->nc_mutex); 1245 nvme_abort_cmd_cb(cmd); 1246 return; 1247 } 1248 1249 cmd->nc_completed = B_TRUE; 1250 cv_signal(&cmd->nc_cv); 1251 mutex_exit(&cmd->nc_mutex); 1252 } 1253 1254 static void 1255 nvme_async_event_task(void *arg) 1256 { 1257 nvme_cmd_t *cmd = arg; 1258 nvme_t *nvme = cmd->nc_nvme; 1259 nvme_error_log_entry_t *error_log = NULL; 1260 nvme_health_log_t *health_log = NULL; 1261 nvme_async_event_t event; 1262 int ret; 1263 1264 /* 1265 * Check for errors associated with the async request itself. The only 1266 * command-specific error is "async event limit exceeded", which 1267 * indicates a programming error in the driver and causes a panic in 1268 * nvme_check_cmd_status(). 1269 * 1270 * Other possible errors are various scenarios where the async request 1271 * was aborted, or internal errors in the device. Internal errors are 1272 * reported to FMA, the command aborts need no special handling here. 1273 */ 1274 if (nvme_check_cmd_status(cmd)) { 1275 dev_err(cmd->nc_nvme->n_dip, CE_WARN, 1276 "!async event request returned failure, sct = %x, " 1277 "sc = %x, dnr = %d, m = %d", cmd->nc_cqe.cqe_sf.sf_sct, 1278 cmd->nc_cqe.cqe_sf.sf_sc, cmd->nc_cqe.cqe_sf.sf_dnr, 1279 cmd->nc_cqe.cqe_sf.sf_m); 1280 1281 if (cmd->nc_cqe.cqe_sf.sf_sct == NVME_CQE_SCT_GENERIC && 1282 cmd->nc_cqe.cqe_sf.sf_sc == NVME_CQE_SC_GEN_INTERNAL_ERR) { 1283 cmd->nc_nvme->n_dead = B_TRUE; 1284 ddi_fm_service_impact(cmd->nc_nvme->n_dip, 1285 DDI_SERVICE_LOST); 1286 } 1287 nvme_free_cmd(cmd); 1288 return; 1289 } 1290 1291 1292 event.r = cmd->nc_cqe.cqe_dw0; 1293 1294 /* Clear CQE and re-submit the async request. */ 1295 bzero(&cmd->nc_cqe, sizeof (nvme_cqe_t)); 1296 ret = nvme_submit_cmd(nvme->n_adminq, cmd); 1297 1298 if (ret != DDI_SUCCESS) { 1299 dev_err(nvme->n_dip, CE_WARN, 1300 "!failed to resubmit async event request"); 1301 atomic_inc_32(&nvme->n_async_resubmit_failed); 1302 nvme_free_cmd(cmd); 1303 } 1304 1305 switch (event.b.ae_type) { 1306 case NVME_ASYNC_TYPE_ERROR: 1307 if (event.b.ae_logpage == NVME_LOGPAGE_ERROR) { 1308 error_log = (nvme_error_log_entry_t *) 1309 nvme_get_logpage(nvme, event.b.ae_logpage); 1310 } else { 1311 dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in " 1312 "async event reply: %d", event.b.ae_logpage); 1313 atomic_inc_32(&nvme->n_wrong_logpage); 1314 } 1315 1316 switch (event.b.ae_info) { 1317 case NVME_ASYNC_ERROR_INV_SQ: 1318 dev_err(nvme->n_dip, CE_PANIC, "programming error: " 1319 "invalid submission queue"); 1320 return; 1321 1322 case NVME_ASYNC_ERROR_INV_DBL: 1323 dev_err(nvme->n_dip, CE_PANIC, "programming error: " 1324 "invalid doorbell write value"); 1325 return; 1326 1327 case NVME_ASYNC_ERROR_DIAGFAIL: 1328 dev_err(nvme->n_dip, CE_WARN, "!diagnostic failure"); 1329 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST); 1330 nvme->n_dead = B_TRUE; 1331 atomic_inc_32(&nvme->n_diagfail_event); 1332 break; 1333 1334 case NVME_ASYNC_ERROR_PERSISTENT: 1335 dev_err(nvme->n_dip, CE_WARN, "!persistent internal " 1336 "device error"); 1337 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST); 1338 nvme->n_dead = B_TRUE; 1339 atomic_inc_32(&nvme->n_persistent_event); 1340 break; 1341 1342 case NVME_ASYNC_ERROR_TRANSIENT: 1343 dev_err(nvme->n_dip, CE_WARN, "!transient internal " 1344 "device error"); 1345 /* TODO: send ereport */ 1346 atomic_inc_32(&nvme->n_transient_event); 1347 break; 1348 1349 case NVME_ASYNC_ERROR_FW_LOAD: 1350 dev_err(nvme->n_dip, CE_WARN, 1351 "!firmware image load error"); 1352 atomic_inc_32(&nvme->n_fw_load_event); 1353 break; 1354 } 1355 break; 1356 1357 case NVME_ASYNC_TYPE_HEALTH: 1358 if (event.b.ae_logpage == NVME_LOGPAGE_HEALTH) { 1359 health_log = (nvme_health_log_t *) 1360 nvme_get_logpage(nvme, event.b.ae_logpage, -1); 1361 } else { 1362 dev_err(nvme->n_dip, CE_WARN, "!wrong logpage in " 1363 "async event reply: %d", event.b.ae_logpage); 1364 atomic_inc_32(&nvme->n_wrong_logpage); 1365 } 1366 1367 switch (event.b.ae_info) { 1368 case NVME_ASYNC_HEALTH_RELIABILITY: 1369 dev_err(nvme->n_dip, CE_WARN, 1370 "!device reliability compromised"); 1371 /* TODO: send ereport */ 1372 atomic_inc_32(&nvme->n_reliability_event); 1373 break; 1374 1375 case NVME_ASYNC_HEALTH_TEMPERATURE: 1376 dev_err(nvme->n_dip, CE_WARN, 1377 "!temperature above threshold"); 1378 /* TODO: send ereport */ 1379 atomic_inc_32(&nvme->n_temperature_event); 1380 break; 1381 1382 case NVME_ASYNC_HEALTH_SPARE: 1383 dev_err(nvme->n_dip, CE_WARN, 1384 "!spare space below threshold"); 1385 /* TODO: send ereport */ 1386 atomic_inc_32(&nvme->n_spare_event); 1387 break; 1388 } 1389 break; 1390 1391 case NVME_ASYNC_TYPE_VENDOR: 1392 dev_err(nvme->n_dip, CE_WARN, "!vendor specific async event " 1393 "received, info = %x, logpage = %x", event.b.ae_info, 1394 event.b.ae_logpage); 1395 atomic_inc_32(&nvme->n_vendor_event); 1396 break; 1397 1398 default: 1399 dev_err(nvme->n_dip, CE_WARN, "!unknown async event received, " 1400 "type = %x, info = %x, logpage = %x", event.b.ae_type, 1401 event.b.ae_info, event.b.ae_logpage); 1402 atomic_inc_32(&nvme->n_unknown_event); 1403 break; 1404 } 1405 1406 if (error_log) 1407 kmem_free(error_log, sizeof (nvme_error_log_entry_t) * 1408 nvme->n_error_log_len); 1409 1410 if (health_log) 1411 kmem_free(health_log, sizeof (nvme_health_log_t)); 1412 } 1413 1414 static int 1415 nvme_admin_cmd(nvme_cmd_t *cmd, int sec) 1416 { 1417 int ret; 1418 1419 mutex_enter(&cmd->nc_mutex); 1420 ret = nvme_submit_cmd(cmd->nc_nvme->n_adminq, cmd); 1421 1422 if (ret != DDI_SUCCESS) { 1423 mutex_exit(&cmd->nc_mutex); 1424 dev_err(cmd->nc_nvme->n_dip, CE_WARN, 1425 "!nvme_submit_cmd failed"); 1426 atomic_inc_32(&cmd->nc_nvme->n_admin_queue_full); 1427 nvme_free_cmd(cmd); 1428 return (DDI_FAILURE); 1429 } 1430 1431 if (nvme_wait_cmd(cmd, sec) == B_FALSE) { 1432 /* 1433 * The command timed out. An abort command was posted that 1434 * will take care of the cleanup. 1435 */ 1436 return (DDI_FAILURE); 1437 } 1438 mutex_exit(&cmd->nc_mutex); 1439 1440 return (DDI_SUCCESS); 1441 } 1442 1443 static int 1444 nvme_async_event(nvme_t *nvme) 1445 { 1446 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1447 int ret; 1448 1449 cmd->nc_sqid = 0; 1450 cmd->nc_sqe.sqe_opc = NVME_OPC_ASYNC_EVENT; 1451 cmd->nc_callback = nvme_async_event_task; 1452 1453 ret = nvme_submit_cmd(nvme->n_adminq, cmd); 1454 1455 if (ret != DDI_SUCCESS) { 1456 dev_err(nvme->n_dip, CE_WARN, 1457 "!nvme_submit_cmd failed for ASYNCHRONOUS EVENT"); 1458 nvme_free_cmd(cmd); 1459 return (DDI_FAILURE); 1460 } 1461 1462 return (DDI_SUCCESS); 1463 } 1464 1465 static void * 1466 nvme_get_logpage(nvme_t *nvme, uint8_t logpage, ...) 1467 { 1468 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1469 void *buf = NULL; 1470 nvme_getlogpage_t getlogpage = { 0 }; 1471 size_t bufsize; 1472 va_list ap; 1473 1474 va_start(ap, logpage); 1475 1476 cmd->nc_sqid = 0; 1477 cmd->nc_callback = nvme_wakeup_cmd; 1478 cmd->nc_sqe.sqe_opc = NVME_OPC_GET_LOG_PAGE; 1479 1480 getlogpage.b.lp_lid = logpage; 1481 1482 switch (logpage) { 1483 case NVME_LOGPAGE_ERROR: 1484 cmd->nc_sqe.sqe_nsid = (uint32_t)-1; 1485 bufsize = nvme->n_error_log_len * 1486 sizeof (nvme_error_log_entry_t); 1487 break; 1488 1489 case NVME_LOGPAGE_HEALTH: 1490 cmd->nc_sqe.sqe_nsid = va_arg(ap, uint32_t); 1491 bufsize = sizeof (nvme_health_log_t); 1492 break; 1493 1494 case NVME_LOGPAGE_FWSLOT: 1495 cmd->nc_sqe.sqe_nsid = (uint32_t)-1; 1496 bufsize = sizeof (nvme_fwslot_log_t); 1497 break; 1498 1499 default: 1500 dev_err(nvme->n_dip, CE_WARN, "!unknown log page requested: %d", 1501 logpage); 1502 atomic_inc_32(&nvme->n_unknown_logpage); 1503 goto fail; 1504 } 1505 1506 va_end(ap); 1507 1508 getlogpage.b.lp_numd = bufsize / sizeof (uint32_t) - 1; 1509 1510 cmd->nc_sqe.sqe_cdw10 = getlogpage.r; 1511 1512 if (nvme_zalloc_dma(nvme, getlogpage.b.lp_numd * sizeof (uint32_t), 1513 DDI_DMA_READ, &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) { 1514 dev_err(nvme->n_dip, CE_WARN, 1515 "!nvme_zalloc_dma failed for GET LOG PAGE"); 1516 goto fail; 1517 } 1518 1519 if (cmd->nc_dma->nd_ncookie > 2) { 1520 dev_err(nvme->n_dip, CE_WARN, 1521 "!too many DMA cookies for GET LOG PAGE"); 1522 atomic_inc_32(&nvme->n_too_many_cookies); 1523 goto fail; 1524 } 1525 1526 cmd->nc_sqe.sqe_dptr.d_prp[0] = cmd->nc_dma->nd_cookie.dmac_laddress; 1527 if (cmd->nc_dma->nd_ncookie > 1) { 1528 ddi_dma_nextcookie(cmd->nc_dma->nd_dmah, 1529 &cmd->nc_dma->nd_cookie); 1530 cmd->nc_sqe.sqe_dptr.d_prp[1] = 1531 cmd->nc_dma->nd_cookie.dmac_laddress; 1532 } 1533 1534 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1535 dev_err(nvme->n_dip, CE_WARN, 1536 "!nvme_admin_cmd failed for GET LOG PAGE"); 1537 return (NULL); 1538 } 1539 1540 if (nvme_check_cmd_status(cmd)) { 1541 dev_err(nvme->n_dip, CE_WARN, 1542 "!GET LOG PAGE failed with sct = %x, sc = %x", 1543 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1544 goto fail; 1545 } 1546 1547 buf = kmem_alloc(bufsize, KM_SLEEP); 1548 bcopy(cmd->nc_dma->nd_memp, buf, bufsize); 1549 1550 fail: 1551 nvme_free_cmd(cmd); 1552 1553 return (buf); 1554 } 1555 1556 static void * 1557 nvme_identify(nvme_t *nvme, uint32_t nsid) 1558 { 1559 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1560 void *buf = NULL; 1561 1562 cmd->nc_sqid = 0; 1563 cmd->nc_callback = nvme_wakeup_cmd; 1564 cmd->nc_sqe.sqe_opc = NVME_OPC_IDENTIFY; 1565 cmd->nc_sqe.sqe_nsid = nsid; 1566 cmd->nc_sqe.sqe_cdw10 = nsid ? NVME_IDENTIFY_NSID : NVME_IDENTIFY_CTRL; 1567 1568 if (nvme_zalloc_dma(nvme, NVME_IDENTIFY_BUFSIZE, DDI_DMA_READ, 1569 &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) { 1570 dev_err(nvme->n_dip, CE_WARN, 1571 "!nvme_zalloc_dma failed for IDENTIFY"); 1572 goto fail; 1573 } 1574 1575 if (cmd->nc_dma->nd_ncookie > 2) { 1576 dev_err(nvme->n_dip, CE_WARN, 1577 "!too many DMA cookies for IDENTIFY"); 1578 atomic_inc_32(&nvme->n_too_many_cookies); 1579 goto fail; 1580 } 1581 1582 cmd->nc_sqe.sqe_dptr.d_prp[0] = cmd->nc_dma->nd_cookie.dmac_laddress; 1583 if (cmd->nc_dma->nd_ncookie > 1) { 1584 ddi_dma_nextcookie(cmd->nc_dma->nd_dmah, 1585 &cmd->nc_dma->nd_cookie); 1586 cmd->nc_sqe.sqe_dptr.d_prp[1] = 1587 cmd->nc_dma->nd_cookie.dmac_laddress; 1588 } 1589 1590 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1591 dev_err(nvme->n_dip, CE_WARN, 1592 "!nvme_admin_cmd failed for IDENTIFY"); 1593 return (NULL); 1594 } 1595 1596 if (nvme_check_cmd_status(cmd)) { 1597 dev_err(nvme->n_dip, CE_WARN, 1598 "!IDENTIFY failed with sct = %x, sc = %x", 1599 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1600 goto fail; 1601 } 1602 1603 buf = kmem_alloc(NVME_IDENTIFY_BUFSIZE, KM_SLEEP); 1604 bcopy(cmd->nc_dma->nd_memp, buf, NVME_IDENTIFY_BUFSIZE); 1605 1606 fail: 1607 nvme_free_cmd(cmd); 1608 1609 return (buf); 1610 } 1611 1612 static int 1613 nvme_set_nqueues(nvme_t *nvme, uint16_t nqueues) 1614 { 1615 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1616 nvme_nqueue_t nq = { 0 }; 1617 1618 nq.b.nq_nsq = nq.b.nq_ncq = nqueues - 1; 1619 1620 cmd->nc_sqid = 0; 1621 cmd->nc_callback = nvme_wakeup_cmd; 1622 cmd->nc_sqe.sqe_opc = NVME_OPC_SET_FEATURES; 1623 cmd->nc_sqe.sqe_cdw10 = NVME_FEAT_NQUEUES; 1624 cmd->nc_sqe.sqe_cdw11 = nq.r; 1625 1626 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1627 dev_err(nvme->n_dip, CE_WARN, 1628 "!nvme_admin_cmd failed for SET FEATURES (NQUEUES)"); 1629 return (0); 1630 } 1631 1632 if (nvme_check_cmd_status(cmd)) { 1633 dev_err(nvme->n_dip, CE_WARN, 1634 "!SET FEATURES (NQUEUES) failed with sct = %x, sc = %x", 1635 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1636 nvme_free_cmd(cmd); 1637 return (0); 1638 } 1639 1640 nq.r = cmd->nc_cqe.cqe_dw0; 1641 nvme_free_cmd(cmd); 1642 1643 /* 1644 * Always use the same number of submission and completion queues, and 1645 * never use more than the requested number of queues. 1646 */ 1647 return (MIN(nqueues, MIN(nq.b.nq_nsq, nq.b.nq_ncq) + 1)); 1648 } 1649 1650 static int 1651 nvme_create_io_qpair(nvme_t *nvme, nvme_qpair_t *qp, uint16_t idx) 1652 { 1653 nvme_cmd_t *cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1654 nvme_create_queue_dw10_t dw10 = { 0 }; 1655 nvme_create_cq_dw11_t c_dw11 = { 0 }; 1656 nvme_create_sq_dw11_t s_dw11 = { 0 }; 1657 1658 dw10.b.q_qid = idx; 1659 dw10.b.q_qsize = qp->nq_nentry - 1; 1660 1661 c_dw11.b.cq_pc = 1; 1662 c_dw11.b.cq_ien = 1; 1663 c_dw11.b.cq_iv = idx % nvme->n_intr_cnt; 1664 1665 cmd->nc_sqid = 0; 1666 cmd->nc_callback = nvme_wakeup_cmd; 1667 cmd->nc_sqe.sqe_opc = NVME_OPC_CREATE_CQUEUE; 1668 cmd->nc_sqe.sqe_cdw10 = dw10.r; 1669 cmd->nc_sqe.sqe_cdw11 = c_dw11.r; 1670 cmd->nc_sqe.sqe_dptr.d_prp[0] = qp->nq_cqdma->nd_cookie.dmac_laddress; 1671 1672 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1673 dev_err(nvme->n_dip, CE_WARN, 1674 "!nvme_admin_cmd failed for CREATE CQUEUE"); 1675 return (DDI_FAILURE); 1676 } 1677 1678 if (nvme_check_cmd_status(cmd)) { 1679 dev_err(nvme->n_dip, CE_WARN, 1680 "!CREATE CQUEUE failed with sct = %x, sc = %x", 1681 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1682 nvme_free_cmd(cmd); 1683 return (DDI_FAILURE); 1684 } 1685 1686 nvme_free_cmd(cmd); 1687 1688 s_dw11.b.sq_pc = 1; 1689 s_dw11.b.sq_cqid = idx; 1690 1691 cmd = nvme_alloc_cmd(nvme, KM_SLEEP); 1692 cmd->nc_sqid = 0; 1693 cmd->nc_callback = nvme_wakeup_cmd; 1694 cmd->nc_sqe.sqe_opc = NVME_OPC_CREATE_SQUEUE; 1695 cmd->nc_sqe.sqe_cdw10 = dw10.r; 1696 cmd->nc_sqe.sqe_cdw11 = s_dw11.r; 1697 cmd->nc_sqe.sqe_dptr.d_prp[0] = qp->nq_sqdma->nd_cookie.dmac_laddress; 1698 1699 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) { 1700 dev_err(nvme->n_dip, CE_WARN, 1701 "!nvme_admin_cmd failed for CREATE SQUEUE"); 1702 return (DDI_FAILURE); 1703 } 1704 1705 if (nvme_check_cmd_status(cmd)) { 1706 dev_err(nvme->n_dip, CE_WARN, 1707 "!CREATE SQUEUE failed with sct = %x, sc = %x", 1708 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc); 1709 nvme_free_cmd(cmd); 1710 return (DDI_FAILURE); 1711 } 1712 1713 nvme_free_cmd(cmd); 1714 1715 return (DDI_SUCCESS); 1716 } 1717 1718 static boolean_t 1719 nvme_reset(nvme_t *nvme, boolean_t quiesce) 1720 { 1721 nvme_reg_csts_t csts; 1722 int i; 1723 1724 nvme_put32(nvme, NVME_REG_CC, 0); 1725 1726 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1727 if (csts.b.csts_rdy == 1) { 1728 nvme_put32(nvme, NVME_REG_CC, 0); 1729 for (i = 0; i != nvme->n_timeout * 10; i++) { 1730 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1731 if (csts.b.csts_rdy == 0) 1732 break; 1733 1734 if (quiesce) 1735 drv_usecwait(50000); 1736 else 1737 delay(drv_usectohz(50000)); 1738 } 1739 } 1740 1741 nvme_put32(nvme, NVME_REG_AQA, 0); 1742 nvme_put32(nvme, NVME_REG_ASQ, 0); 1743 nvme_put32(nvme, NVME_REG_ACQ, 0); 1744 1745 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1746 return (csts.b.csts_rdy == 0 ? B_TRUE : B_FALSE); 1747 } 1748 1749 static void 1750 nvme_shutdown(nvme_t *nvme, int mode, boolean_t quiesce) 1751 { 1752 nvme_reg_cc_t cc; 1753 nvme_reg_csts_t csts; 1754 int i; 1755 1756 ASSERT(mode == NVME_CC_SHN_NORMAL || mode == NVME_CC_SHN_ABRUPT); 1757 1758 cc.r = nvme_get32(nvme, NVME_REG_CC); 1759 cc.b.cc_shn = mode & 0x3; 1760 nvme_put32(nvme, NVME_REG_CC, cc.r); 1761 1762 for (i = 0; i != 10; i++) { 1763 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1764 if (csts.b.csts_shst == NVME_CSTS_SHN_COMPLETE) 1765 break; 1766 1767 if (quiesce) 1768 drv_usecwait(100000); 1769 else 1770 delay(drv_usectohz(100000)); 1771 } 1772 } 1773 1774 1775 static void 1776 nvme_prepare_devid(nvme_t *nvme, uint32_t nsid) 1777 { 1778 char model[sizeof (nvme->n_idctl->id_model) + 1]; 1779 char serial[sizeof (nvme->n_idctl->id_serial) + 1]; 1780 1781 bcopy(nvme->n_idctl->id_model, model, sizeof (nvme->n_idctl->id_model)); 1782 bcopy(nvme->n_idctl->id_serial, serial, 1783 sizeof (nvme->n_idctl->id_serial)); 1784 1785 model[sizeof (nvme->n_idctl->id_model)] = '\0'; 1786 serial[sizeof (nvme->n_idctl->id_serial)] = '\0'; 1787 1788 (void) snprintf(nvme->n_ns[nsid - 1].ns_devid, 1789 sizeof (nvme->n_ns[0].ns_devid), "%4X-%s-%s-%X", 1790 nvme->n_idctl->id_vid, model, serial, nsid); 1791 } 1792 1793 static int 1794 nvme_init(nvme_t *nvme) 1795 { 1796 nvme_reg_cc_t cc = { 0 }; 1797 nvme_reg_aqa_t aqa = { 0 }; 1798 nvme_reg_asq_t asq = { 0 }; 1799 nvme_reg_acq_t acq = { 0 }; 1800 nvme_reg_cap_t cap; 1801 nvme_reg_vs_t vs; 1802 nvme_reg_csts_t csts; 1803 int i = 0; 1804 int nqueues; 1805 char model[sizeof (nvme->n_idctl->id_model) + 1]; 1806 char *vendor, *product; 1807 1808 /* Check controller version */ 1809 vs.r = nvme_get32(nvme, NVME_REG_VS); 1810 dev_err(nvme->n_dip, CE_CONT, "?NVMe spec version %d.%d", 1811 vs.b.vs_mjr, vs.b.vs_mnr); 1812 1813 if (nvme_version_major < vs.b.vs_mjr || 1814 (nvme_version_major == vs.b.vs_mjr && 1815 nvme_version_minor < vs.b.vs_mnr)) { 1816 dev_err(nvme->n_dip, CE_WARN, "!no support for version > %d.%d", 1817 nvme_version_major, nvme_version_minor); 1818 if (nvme->n_strict_version) 1819 goto fail; 1820 } 1821 1822 /* retrieve controller configuration */ 1823 cap.r = nvme_get64(nvme, NVME_REG_CAP); 1824 1825 if ((cap.b.cap_css & NVME_CAP_CSS_NVM) == 0) { 1826 dev_err(nvme->n_dip, CE_WARN, 1827 "!NVM command set not supported by hardware"); 1828 goto fail; 1829 } 1830 1831 nvme->n_nssr_supported = cap.b.cap_nssrs; 1832 nvme->n_doorbell_stride = 4 << cap.b.cap_dstrd; 1833 nvme->n_timeout = cap.b.cap_to; 1834 nvme->n_arbitration_mechanisms = cap.b.cap_ams; 1835 nvme->n_cont_queues_reqd = cap.b.cap_cqr; 1836 nvme->n_max_queue_entries = cap.b.cap_mqes + 1; 1837 1838 /* 1839 * The MPSMIN and MPSMAX fields in the CAP register use 0 to specify 1840 * the base page size of 4k (1<<12), so add 12 here to get the real 1841 * page size value. 1842 */ 1843 nvme->n_pageshift = MIN(MAX(cap.b.cap_mpsmin + 12, PAGESHIFT), 1844 cap.b.cap_mpsmax + 12); 1845 nvme->n_pagesize = 1UL << (nvme->n_pageshift); 1846 1847 /* 1848 * Set up Queue DMA to transfer at least 1 page-aligned page at a time. 1849 */ 1850 nvme->n_queue_dma_attr.dma_attr_align = nvme->n_pagesize; 1851 nvme->n_queue_dma_attr.dma_attr_minxfer = nvme->n_pagesize; 1852 1853 /* 1854 * Set up PRP DMA to transfer 1 page-aligned page at a time. 1855 * Maxxfer may be increased after we identified the controller limits. 1856 */ 1857 nvme->n_prp_dma_attr.dma_attr_maxxfer = nvme->n_pagesize; 1858 nvme->n_prp_dma_attr.dma_attr_minxfer = nvme->n_pagesize; 1859 nvme->n_prp_dma_attr.dma_attr_align = nvme->n_pagesize; 1860 nvme->n_prp_dma_attr.dma_attr_seg = nvme->n_pagesize - 1; 1861 1862 /* 1863 * Reset controller if it's still in ready state. 1864 */ 1865 if (nvme_reset(nvme, B_FALSE) == B_FALSE) { 1866 dev_err(nvme->n_dip, CE_WARN, "!unable to reset controller"); 1867 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST); 1868 nvme->n_dead = B_TRUE; 1869 goto fail; 1870 } 1871 1872 /* 1873 * Create the admin queue pair. 1874 */ 1875 if (nvme_alloc_qpair(nvme, nvme->n_admin_queue_len, &nvme->n_adminq, 0) 1876 != DDI_SUCCESS) { 1877 dev_err(nvme->n_dip, CE_WARN, 1878 "!unable to allocate admin qpair"); 1879 goto fail; 1880 } 1881 nvme->n_ioq = kmem_alloc(sizeof (nvme_qpair_t *), KM_SLEEP); 1882 nvme->n_ioq[0] = nvme->n_adminq; 1883 1884 nvme->n_progress |= NVME_ADMIN_QUEUE; 1885 1886 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip, 1887 "admin-queue-len", nvme->n_admin_queue_len); 1888 1889 aqa.b.aqa_asqs = aqa.b.aqa_acqs = nvme->n_admin_queue_len - 1; 1890 asq = nvme->n_adminq->nq_sqdma->nd_cookie.dmac_laddress; 1891 acq = nvme->n_adminq->nq_cqdma->nd_cookie.dmac_laddress; 1892 1893 ASSERT((asq & (nvme->n_pagesize - 1)) == 0); 1894 ASSERT((acq & (nvme->n_pagesize - 1)) == 0); 1895 1896 nvme_put32(nvme, NVME_REG_AQA, aqa.r); 1897 nvme_put64(nvme, NVME_REG_ASQ, asq); 1898 nvme_put64(nvme, NVME_REG_ACQ, acq); 1899 1900 cc.b.cc_ams = 0; /* use Round-Robin arbitration */ 1901 cc.b.cc_css = 0; /* use NVM command set */ 1902 cc.b.cc_mps = nvme->n_pageshift - 12; 1903 cc.b.cc_shn = 0; /* no shutdown in progress */ 1904 cc.b.cc_en = 1; /* enable controller */ 1905 cc.b.cc_iosqes = 6; /* submission queue entry is 2^6 bytes long */ 1906 cc.b.cc_iocqes = 4; /* completion queue entry is 2^4 bytes long */ 1907 1908 nvme_put32(nvme, NVME_REG_CC, cc.r); 1909 1910 /* 1911 * Wait for the controller to become ready. 1912 */ 1913 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1914 if (csts.b.csts_rdy == 0) { 1915 for (i = 0; i != nvme->n_timeout * 10; i++) { 1916 delay(drv_usectohz(50000)); 1917 csts.r = nvme_get32(nvme, NVME_REG_CSTS); 1918 1919 if (csts.b.csts_cfs == 1) { 1920 dev_err(nvme->n_dip, CE_WARN, 1921 "!controller fatal status at init"); 1922 ddi_fm_service_impact(nvme->n_dip, 1923 DDI_SERVICE_LOST); 1924 nvme->n_dead = B_TRUE; 1925 goto fail; 1926 } 1927 1928 if (csts.b.csts_rdy == 1) 1929 break; 1930 } 1931 } 1932 1933 if (csts.b.csts_rdy == 0) { 1934 dev_err(nvme->n_dip, CE_WARN, "!controller not ready"); 1935 ddi_fm_service_impact(nvme->n_dip, DDI_SERVICE_LOST); 1936 nvme->n_dead = B_TRUE; 1937 goto fail; 1938 } 1939 1940 /* 1941 * Assume an abort command limit of 1. We'll destroy and re-init 1942 * that later when we know the true abort command limit. 1943 */ 1944 sema_init(&nvme->n_abort_sema, 1, NULL, SEMA_DRIVER, NULL); 1945 1946 /* 1947 * Setup initial interrupt for admin queue. 1948 */ 1949 if ((nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSIX, 1) 1950 != DDI_SUCCESS) && 1951 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSI, 1) 1952 != DDI_SUCCESS) && 1953 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_FIXED, 1) 1954 != DDI_SUCCESS)) { 1955 dev_err(nvme->n_dip, CE_WARN, 1956 "!failed to setup initial interrupt"); 1957 goto fail; 1958 } 1959 1960 /* 1961 * Post an asynchronous event command to catch errors. 1962 */ 1963 if (nvme_async_event(nvme) != DDI_SUCCESS) { 1964 dev_err(nvme->n_dip, CE_WARN, 1965 "!failed to post async event"); 1966 goto fail; 1967 } 1968 1969 /* 1970 * Identify Controller 1971 */ 1972 nvme->n_idctl = nvme_identify(nvme, 0); 1973 if (nvme->n_idctl == NULL) { 1974 dev_err(nvme->n_dip, CE_WARN, 1975 "!failed to identify controller"); 1976 goto fail; 1977 } 1978 1979 /* 1980 * Get Vendor & Product ID 1981 */ 1982 bcopy(nvme->n_idctl->id_model, model, sizeof (nvme->n_idctl->id_model)); 1983 model[sizeof (nvme->n_idctl->id_model)] = '\0'; 1984 sata_split_model(model, &vendor, &product); 1985 1986 if (vendor == NULL) 1987 nvme->n_vendor = strdup("NVMe"); 1988 else 1989 nvme->n_vendor = strdup(vendor); 1990 1991 nvme->n_product = strdup(product); 1992 1993 /* 1994 * Get controller limits. 1995 */ 1996 nvme->n_async_event_limit = MAX(NVME_MIN_ASYNC_EVENT_LIMIT, 1997 MIN(nvme->n_admin_queue_len / 10, 1998 MIN(nvme->n_idctl->id_aerl + 1, nvme->n_async_event_limit))); 1999 2000 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip, 2001 "async-event-limit", nvme->n_async_event_limit); 2002 2003 nvme->n_abort_command_limit = nvme->n_idctl->id_acl + 1; 2004 2005 /* 2006 * Reinitialize the semaphore with the true abort command limit 2007 * supported by the hardware. It's not necessary to disable interrupts 2008 * as only command aborts use the semaphore, and no commands are 2009 * executed or aborted while we're here. 2010 */ 2011 sema_destroy(&nvme->n_abort_sema); 2012 sema_init(&nvme->n_abort_sema, nvme->n_abort_command_limit - 1, NULL, 2013 SEMA_DRIVER, NULL); 2014 2015 nvme->n_progress |= NVME_CTRL_LIMITS; 2016 2017 if (nvme->n_idctl->id_mdts == 0) 2018 nvme->n_max_data_transfer_size = nvme->n_pagesize * 65536; 2019 else 2020 nvme->n_max_data_transfer_size = 2021 1ull << (nvme->n_pageshift + nvme->n_idctl->id_mdts); 2022 2023 nvme->n_error_log_len = nvme->n_idctl->id_elpe + 1; 2024 2025 /* 2026 * Limit n_max_data_transfer_size to what we can handle in one PRP. 2027 * Chained PRPs are currently unsupported. 2028 * 2029 * This is a no-op on hardware which doesn't support a transfer size 2030 * big enough to require chained PRPs. 2031 */ 2032 nvme->n_max_data_transfer_size = MIN(nvme->n_max_data_transfer_size, 2033 (nvme->n_pagesize / sizeof (uint64_t) * nvme->n_pagesize)); 2034 2035 nvme->n_prp_dma_attr.dma_attr_maxxfer = nvme->n_max_data_transfer_size; 2036 2037 /* 2038 * Make sure the minimum/maximum queue entry sizes are not 2039 * larger/smaller than the default. 2040 */ 2041 2042 if (((1 << nvme->n_idctl->id_sqes.qes_min) > sizeof (nvme_sqe_t)) || 2043 ((1 << nvme->n_idctl->id_sqes.qes_max) < sizeof (nvme_sqe_t)) || 2044 ((1 << nvme->n_idctl->id_cqes.qes_min) > sizeof (nvme_cqe_t)) || 2045 ((1 << nvme->n_idctl->id_cqes.qes_max) < sizeof (nvme_cqe_t))) 2046 goto fail; 2047 2048 /* 2049 * Check for the presence of a Volatile Write Cache. If present, 2050 * enable it by default. 2051 */ 2052 if (nvme->n_idctl->id_vwc.vwc_present == 0) { 2053 nvme->n_volatile_write_cache_enabled = B_FALSE; 2054 nvme_bd_ops.o_sync_cache = NULL; 2055 } else { 2056 /* 2057 * TODO: send SET FEATURES to enable VWC 2058 * (have no hardware to test this) 2059 */ 2060 nvme->n_volatile_write_cache_enabled = B_FALSE; 2061 nvme_bd_ops.o_sync_cache = NULL; 2062 } 2063 2064 /* 2065 * Grab a copy of all mandatory log pages. 2066 * 2067 * TODO: should go away once user space tool exists to print logs 2068 */ 2069 nvme->n_error_log = (nvme_error_log_entry_t *) 2070 nvme_get_logpage(nvme, NVME_LOGPAGE_ERROR); 2071 nvme->n_health_log = (nvme_health_log_t *) 2072 nvme_get_logpage(nvme, NVME_LOGPAGE_HEALTH, -1); 2073 nvme->n_fwslot_log = (nvme_fwslot_log_t *) 2074 nvme_get_logpage(nvme, NVME_LOGPAGE_FWSLOT); 2075 2076 /* 2077 * Identify Namespaces 2078 */ 2079 nvme->n_namespace_count = nvme->n_idctl->id_nn; 2080 nvme->n_ns = kmem_zalloc(sizeof (nvme_namespace_t) * 2081 nvme->n_namespace_count, KM_SLEEP); 2082 2083 for (i = 0; i != nvme->n_namespace_count; i++) { 2084 nvme_identify_nsid_t *idns; 2085 int last_rp; 2086 2087 nvme->n_ns[i].ns_nvme = nvme; 2088 nvme->n_ns[i].ns_idns = idns = nvme_identify(nvme, i + 1); 2089 2090 if (idns == NULL) { 2091 dev_err(nvme->n_dip, CE_WARN, 2092 "!failed to identify namespace %d", i + 1); 2093 goto fail; 2094 } 2095 2096 nvme->n_ns[i].ns_id = i + 1; 2097 nvme->n_ns[i].ns_block_count = idns->id_nsize; 2098 nvme->n_ns[i].ns_block_size = 2099 1 << idns->id_lbaf[idns->id_flbas.lba_format].lbaf_lbads; 2100 nvme->n_ns[i].ns_best_block_size = nvme->n_ns[i].ns_block_size; 2101 2102 nvme_prepare_devid(nvme, nvme->n_ns[i].ns_id); 2103 2104 /* 2105 * Find the LBA format with no metadata and the best relative 2106 * performance. A value of 3 means "degraded", 0 is best. 2107 */ 2108 last_rp = 3; 2109 for (int j = 0; j <= idns->id_nlbaf; j++) { 2110 if (idns->id_lbaf[j].lbaf_lbads == 0) 2111 break; 2112 if (idns->id_lbaf[j].lbaf_ms != 0) 2113 continue; 2114 if (idns->id_lbaf[j].lbaf_rp >= last_rp) 2115 continue; 2116 last_rp = idns->id_lbaf[j].lbaf_rp; 2117 nvme->n_ns[i].ns_best_block_size = 2118 1 << idns->id_lbaf[j].lbaf_lbads; 2119 } 2120 2121 /* 2122 * We currently don't support namespaces that use either: 2123 * - thin provisioning 2124 * - protection information 2125 */ 2126 if (idns->id_nsfeat.f_thin || 2127 idns->id_dps.dp_pinfo) { 2128 dev_err(nvme->n_dip, CE_WARN, 2129 "!ignoring namespace %d, unsupported features: " 2130 "thin = %d, pinfo = %d", i + 1, 2131 idns->id_nsfeat.f_thin, idns->id_dps.dp_pinfo); 2132 nvme->n_ns[i].ns_ignore = B_TRUE; 2133 } 2134 } 2135 2136 /* 2137 * Try to set up MSI/MSI-X interrupts. 2138 */ 2139 if ((nvme->n_intr_types & (DDI_INTR_TYPE_MSI | DDI_INTR_TYPE_MSIX)) 2140 != 0) { 2141 nvme_release_interrupts(nvme); 2142 2143 nqueues = MIN(UINT16_MAX, ncpus); 2144 2145 if ((nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSIX, 2146 nqueues) != DDI_SUCCESS) && 2147 (nvme_setup_interrupts(nvme, DDI_INTR_TYPE_MSI, 2148 nqueues) != DDI_SUCCESS)) { 2149 dev_err(nvme->n_dip, CE_WARN, 2150 "!failed to setup MSI/MSI-X interrupts"); 2151 goto fail; 2152 } 2153 } 2154 2155 nqueues = nvme->n_intr_cnt; 2156 2157 /* 2158 * Create I/O queue pairs. 2159 */ 2160 nvme->n_ioq_count = nvme_set_nqueues(nvme, nqueues); 2161 if (nvme->n_ioq_count == 0) { 2162 dev_err(nvme->n_dip, CE_WARN, 2163 "!failed to set number of I/O queues to %d", nqueues); 2164 goto fail; 2165 } 2166 2167 /* 2168 * Reallocate I/O queue array 2169 */ 2170 kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *)); 2171 nvme->n_ioq = kmem_zalloc(sizeof (nvme_qpair_t *) * 2172 (nvme->n_ioq_count + 1), KM_SLEEP); 2173 nvme->n_ioq[0] = nvme->n_adminq; 2174 2175 /* 2176 * If we got less queues than we asked for we might as well give 2177 * some of the interrupt vectors back to the system. 2178 */ 2179 if (nvme->n_ioq_count < nqueues) { 2180 nvme_release_interrupts(nvme); 2181 2182 if (nvme_setup_interrupts(nvme, nvme->n_intr_type, 2183 nvme->n_ioq_count) != DDI_SUCCESS) { 2184 dev_err(nvme->n_dip, CE_WARN, 2185 "!failed to reduce number of interrupts"); 2186 goto fail; 2187 } 2188 } 2189 2190 /* 2191 * Alloc & register I/O queue pairs 2192 */ 2193 nvme->n_io_queue_len = 2194 MIN(nvme->n_io_queue_len, nvme->n_max_queue_entries); 2195 (void) ddi_prop_update_int(DDI_DEV_T_NONE, nvme->n_dip, "io-queue-len", 2196 nvme->n_io_queue_len); 2197 2198 for (i = 1; i != nvme->n_ioq_count + 1; i++) { 2199 if (nvme_alloc_qpair(nvme, nvme->n_io_queue_len, 2200 &nvme->n_ioq[i], i) != DDI_SUCCESS) { 2201 dev_err(nvme->n_dip, CE_WARN, 2202 "!unable to allocate I/O qpair %d", i); 2203 goto fail; 2204 } 2205 2206 if (nvme_create_io_qpair(nvme, nvme->n_ioq[i], i) 2207 != DDI_SUCCESS) { 2208 dev_err(nvme->n_dip, CE_WARN, 2209 "!unable to create I/O qpair %d", i); 2210 goto fail; 2211 } 2212 } 2213 2214 /* 2215 * Post more asynchronous events commands to reduce event reporting 2216 * latency as suggested by the spec. 2217 */ 2218 for (i = 1; i != nvme->n_async_event_limit; i++) { 2219 if (nvme_async_event(nvme) != DDI_SUCCESS) { 2220 dev_err(nvme->n_dip, CE_WARN, 2221 "!failed to post async event %d", i); 2222 goto fail; 2223 } 2224 } 2225 2226 return (DDI_SUCCESS); 2227 2228 fail: 2229 (void) nvme_reset(nvme, B_FALSE); 2230 return (DDI_FAILURE); 2231 } 2232 2233 static uint_t 2234 nvme_intr(caddr_t arg1, caddr_t arg2) 2235 { 2236 /*LINTED: E_PTR_BAD_CAST_ALIGN*/ 2237 nvme_t *nvme = (nvme_t *)arg1; 2238 int inum = (int)(uintptr_t)arg2; 2239 int ccnt = 0; 2240 int qnum; 2241 nvme_cmd_t *cmd; 2242 2243 if (inum >= nvme->n_intr_cnt) 2244 return (DDI_INTR_UNCLAIMED); 2245 2246 /* 2247 * The interrupt vector a queue uses is calculated as queue_idx % 2248 * intr_cnt in nvme_create_io_qpair(). Iterate through the queue array 2249 * in steps of n_intr_cnt to process all queues using this vector. 2250 */ 2251 for (qnum = inum; 2252 qnum < nvme->n_ioq_count + 1 && nvme->n_ioq[qnum] != NULL; 2253 qnum += nvme->n_intr_cnt) { 2254 while ((cmd = nvme_retrieve_cmd(nvme, nvme->n_ioq[qnum]))) { 2255 taskq_dispatch_ent((taskq_t *)cmd->nc_nvme->n_cmd_taskq, 2256 cmd->nc_callback, cmd, TQ_NOSLEEP, &cmd->nc_tqent); 2257 ccnt++; 2258 } 2259 } 2260 2261 return (ccnt > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED); 2262 } 2263 2264 static void 2265 nvme_release_interrupts(nvme_t *nvme) 2266 { 2267 int i; 2268 2269 for (i = 0; i < nvme->n_intr_cnt; i++) { 2270 if (nvme->n_inth[i] == NULL) 2271 break; 2272 2273 if (nvme->n_intr_cap & DDI_INTR_FLAG_BLOCK) 2274 (void) ddi_intr_block_disable(&nvme->n_inth[i], 1); 2275 else 2276 (void) ddi_intr_disable(nvme->n_inth[i]); 2277 2278 (void) ddi_intr_remove_handler(nvme->n_inth[i]); 2279 (void) ddi_intr_free(nvme->n_inth[i]); 2280 } 2281 2282 kmem_free(nvme->n_inth, nvme->n_inth_sz); 2283 nvme->n_inth = NULL; 2284 nvme->n_inth_sz = 0; 2285 2286 nvme->n_progress &= ~NVME_INTERRUPTS; 2287 } 2288 2289 static int 2290 nvme_setup_interrupts(nvme_t *nvme, int intr_type, int nqpairs) 2291 { 2292 int nintrs, navail, count; 2293 int ret; 2294 int i; 2295 2296 if (nvme->n_intr_types == 0) { 2297 ret = ddi_intr_get_supported_types(nvme->n_dip, 2298 &nvme->n_intr_types); 2299 if (ret != DDI_SUCCESS) { 2300 dev_err(nvme->n_dip, CE_WARN, 2301 "!%s: ddi_intr_get_supported types failed", 2302 __func__); 2303 return (ret); 2304 } 2305 } 2306 2307 if ((nvme->n_intr_types & intr_type) == 0) 2308 return (DDI_FAILURE); 2309 2310 ret = ddi_intr_get_nintrs(nvme->n_dip, intr_type, &nintrs); 2311 if (ret != DDI_SUCCESS) { 2312 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_get_nintrs failed", 2313 __func__); 2314 return (ret); 2315 } 2316 2317 ret = ddi_intr_get_navail(nvme->n_dip, intr_type, &navail); 2318 if (ret != DDI_SUCCESS) { 2319 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_get_navail failed", 2320 __func__); 2321 return (ret); 2322 } 2323 2324 /* We want at most one interrupt per queue pair. */ 2325 if (navail > nqpairs) 2326 navail = nqpairs; 2327 2328 nvme->n_inth_sz = sizeof (ddi_intr_handle_t) * navail; 2329 nvme->n_inth = kmem_zalloc(nvme->n_inth_sz, KM_SLEEP); 2330 2331 ret = ddi_intr_alloc(nvme->n_dip, nvme->n_inth, intr_type, 0, navail, 2332 &count, 0); 2333 if (ret != DDI_SUCCESS) { 2334 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_alloc failed", 2335 __func__); 2336 goto fail; 2337 } 2338 2339 nvme->n_intr_cnt = count; 2340 2341 ret = ddi_intr_get_pri(nvme->n_inth[0], &nvme->n_intr_pri); 2342 if (ret != DDI_SUCCESS) { 2343 dev_err(nvme->n_dip, CE_WARN, "!%s: ddi_intr_get_pri failed", 2344 __func__); 2345 goto fail; 2346 } 2347 2348 for (i = 0; i < count; i++) { 2349 ret = ddi_intr_add_handler(nvme->n_inth[i], nvme_intr, 2350 (void *)nvme, (void *)(uintptr_t)i); 2351 if (ret != DDI_SUCCESS) { 2352 dev_err(nvme->n_dip, CE_WARN, 2353 "!%s: ddi_intr_add_handler failed", __func__); 2354 goto fail; 2355 } 2356 } 2357 2358 (void) ddi_intr_get_cap(nvme->n_inth[0], &nvme->n_intr_cap); 2359 2360 for (i = 0; i < count; i++) { 2361 if (nvme->n_intr_cap & DDI_INTR_FLAG_BLOCK) 2362 ret = ddi_intr_block_enable(&nvme->n_inth[i], 1); 2363 else 2364 ret = ddi_intr_enable(nvme->n_inth[i]); 2365 2366 if (ret != DDI_SUCCESS) { 2367 dev_err(nvme->n_dip, CE_WARN, 2368 "!%s: enabling interrupt %d failed", __func__, i); 2369 goto fail; 2370 } 2371 } 2372 2373 nvme->n_intr_type = intr_type; 2374 2375 nvme->n_progress |= NVME_INTERRUPTS; 2376 2377 return (DDI_SUCCESS); 2378 2379 fail: 2380 nvme_release_interrupts(nvme); 2381 2382 return (ret); 2383 } 2384 2385 static int 2386 nvme_fm_errcb(dev_info_t *dip, ddi_fm_error_t *fm_error, const void *arg) 2387 { 2388 _NOTE(ARGUNUSED(arg)); 2389 2390 pci_ereport_post(dip, fm_error, NULL); 2391 return (fm_error->fme_status); 2392 } 2393 2394 static int 2395 nvme_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 2396 { 2397 nvme_t *nvme; 2398 int instance; 2399 int nregs; 2400 off_t regsize; 2401 int i; 2402 char name[32]; 2403 2404 if (cmd != DDI_ATTACH) 2405 return (DDI_FAILURE); 2406 2407 instance = ddi_get_instance(dip); 2408 2409 if (ddi_soft_state_zalloc(nvme_state, instance) != DDI_SUCCESS) 2410 return (DDI_FAILURE); 2411 2412 nvme = ddi_get_soft_state(nvme_state, instance); 2413 ddi_set_driver_private(dip, nvme); 2414 nvme->n_dip = dip; 2415 2416 nvme->n_strict_version = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 2417 DDI_PROP_DONTPASS, "strict-version", 1) == 1 ? B_TRUE : B_FALSE; 2418 nvme->n_ignore_unknown_vendor_status = ddi_prop_get_int(DDI_DEV_T_ANY, 2419 dip, DDI_PROP_DONTPASS, "ignore-unknown-vendor-status", 0) == 1 ? 2420 B_TRUE : B_FALSE; 2421 nvme->n_admin_queue_len = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 2422 DDI_PROP_DONTPASS, "admin-queue-len", NVME_DEFAULT_ADMIN_QUEUE_LEN); 2423 nvme->n_io_queue_len = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 2424 DDI_PROP_DONTPASS, "io-queue-len", NVME_DEFAULT_IO_QUEUE_LEN); 2425 nvme->n_async_event_limit = ddi_prop_get_int(DDI_DEV_T_ANY, dip, 2426 DDI_PROP_DONTPASS, "async-event-limit", 2427 NVME_DEFAULT_ASYNC_EVENT_LIMIT); 2428 2429 if (nvme->n_admin_queue_len < NVME_MIN_ADMIN_QUEUE_LEN) 2430 nvme->n_admin_queue_len = NVME_MIN_ADMIN_QUEUE_LEN; 2431 else if (nvme->n_admin_queue_len > NVME_MAX_ADMIN_QUEUE_LEN) 2432 nvme->n_admin_queue_len = NVME_MAX_ADMIN_QUEUE_LEN; 2433 2434 if (nvme->n_io_queue_len < NVME_MIN_IO_QUEUE_LEN) 2435 nvme->n_io_queue_len = NVME_MIN_IO_QUEUE_LEN; 2436 2437 if (nvme->n_async_event_limit < 1) 2438 nvme->n_async_event_limit = NVME_DEFAULT_ASYNC_EVENT_LIMIT; 2439 2440 nvme->n_reg_acc_attr = nvme_reg_acc_attr; 2441 nvme->n_queue_dma_attr = nvme_queue_dma_attr; 2442 nvme->n_prp_dma_attr = nvme_prp_dma_attr; 2443 nvme->n_sgl_dma_attr = nvme_sgl_dma_attr; 2444 2445 /* 2446 * Setup FMA support. 2447 */ 2448 nvme->n_fm_cap = ddi_getprop(DDI_DEV_T_ANY, dip, 2449 DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "fm-capable", 2450 DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE | 2451 DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE); 2452 2453 ddi_fm_init(dip, &nvme->n_fm_cap, &nvme->n_fm_ibc); 2454 2455 if (nvme->n_fm_cap) { 2456 if (nvme->n_fm_cap & DDI_FM_ACCCHK_CAPABLE) 2457 nvme->n_reg_acc_attr.devacc_attr_access = 2458 DDI_FLAGERR_ACC; 2459 2460 if (nvme->n_fm_cap & DDI_FM_DMACHK_CAPABLE) { 2461 nvme->n_prp_dma_attr.dma_attr_flags |= DDI_DMA_FLAGERR; 2462 nvme->n_sgl_dma_attr.dma_attr_flags |= DDI_DMA_FLAGERR; 2463 } 2464 2465 if (DDI_FM_EREPORT_CAP(nvme->n_fm_cap) || 2466 DDI_FM_ERRCB_CAP(nvme->n_fm_cap)) 2467 pci_ereport_setup(dip); 2468 2469 if (DDI_FM_ERRCB_CAP(nvme->n_fm_cap)) 2470 ddi_fm_handler_register(dip, nvme_fm_errcb, 2471 (void *)nvme); 2472 } 2473 2474 nvme->n_progress |= NVME_FMA_INIT; 2475 2476 /* 2477 * The spec defines several register sets. Only the controller 2478 * registers (set 1) are currently used. 2479 */ 2480 if (ddi_dev_nregs(dip, &nregs) == DDI_FAILURE || 2481 nregs < 2 || 2482 ddi_dev_regsize(dip, 1, ®size) == DDI_FAILURE) 2483 goto fail; 2484 2485 if (ddi_regs_map_setup(dip, 1, &nvme->n_regs, 0, regsize, 2486 &nvme->n_reg_acc_attr, &nvme->n_regh) != DDI_SUCCESS) { 2487 dev_err(dip, CE_WARN, "!failed to map regset 1"); 2488 goto fail; 2489 } 2490 2491 nvme->n_progress |= NVME_REGS_MAPPED; 2492 2493 /* 2494 * Create taskq for command completion. 2495 */ 2496 (void) snprintf(name, sizeof (name), "%s%d_cmd_taskq", 2497 ddi_driver_name(dip), ddi_get_instance(dip)); 2498 nvme->n_cmd_taskq = ddi_taskq_create(dip, name, MIN(UINT16_MAX, ncpus), 2499 TASKQ_DEFAULTPRI, 0); 2500 if (nvme->n_cmd_taskq == NULL) { 2501 dev_err(dip, CE_WARN, "!failed to create cmd taskq"); 2502 goto fail; 2503 } 2504 2505 /* 2506 * Create PRP DMA cache 2507 */ 2508 (void) snprintf(name, sizeof (name), "%s%d_prp_cache", 2509 ddi_driver_name(dip), ddi_get_instance(dip)); 2510 nvme->n_prp_cache = kmem_cache_create(name, sizeof (nvme_dma_t), 2511 0, nvme_prp_dma_constructor, nvme_prp_dma_destructor, 2512 NULL, (void *)nvme, NULL, 0); 2513 2514 if (nvme_init(nvme) != DDI_SUCCESS) 2515 goto fail; 2516 2517 /* 2518 * Attach the blkdev driver for each namespace. 2519 */ 2520 for (i = 0; i != nvme->n_namespace_count; i++) { 2521 if (nvme->n_ns[i].ns_ignore) 2522 continue; 2523 2524 nvme->n_ns[i].ns_bd_hdl = bd_alloc_handle(&nvme->n_ns[i], 2525 &nvme_bd_ops, &nvme->n_prp_dma_attr, KM_SLEEP); 2526 2527 if (nvme->n_ns[i].ns_bd_hdl == NULL) { 2528 dev_err(dip, CE_WARN, 2529 "!failed to get blkdev handle for namespace %d", i); 2530 goto fail; 2531 } 2532 2533 if (bd_attach_handle(dip, nvme->n_ns[i].ns_bd_hdl) 2534 != DDI_SUCCESS) { 2535 dev_err(dip, CE_WARN, 2536 "!failed to attach blkdev handle for namespace %d", 2537 i); 2538 goto fail; 2539 } 2540 } 2541 2542 return (DDI_SUCCESS); 2543 2544 fail: 2545 /* attach successful anyway so that FMA can retire the device */ 2546 if (nvme->n_dead) 2547 return (DDI_SUCCESS); 2548 2549 (void) nvme_detach(dip, DDI_DETACH); 2550 2551 return (DDI_FAILURE); 2552 } 2553 2554 static int 2555 nvme_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 2556 { 2557 int instance, i; 2558 nvme_t *nvme; 2559 2560 if (cmd != DDI_DETACH) 2561 return (DDI_FAILURE); 2562 2563 instance = ddi_get_instance(dip); 2564 2565 nvme = ddi_get_soft_state(nvme_state, instance); 2566 2567 if (nvme == NULL) 2568 return (DDI_FAILURE); 2569 2570 if (nvme->n_ns) { 2571 for (i = 0; i != nvme->n_namespace_count; i++) { 2572 if (nvme->n_ns[i].ns_bd_hdl) { 2573 (void) bd_detach_handle( 2574 nvme->n_ns[i].ns_bd_hdl); 2575 bd_free_handle(nvme->n_ns[i].ns_bd_hdl); 2576 } 2577 2578 if (nvme->n_ns[i].ns_idns) 2579 kmem_free(nvme->n_ns[i].ns_idns, 2580 sizeof (nvme_identify_nsid_t)); 2581 } 2582 2583 kmem_free(nvme->n_ns, sizeof (nvme_namespace_t) * 2584 nvme->n_namespace_count); 2585 } 2586 2587 if (nvme->n_progress & NVME_INTERRUPTS) 2588 nvme_release_interrupts(nvme); 2589 2590 if (nvme->n_cmd_taskq) 2591 ddi_taskq_wait(nvme->n_cmd_taskq); 2592 2593 if (nvme->n_ioq_count > 0) { 2594 for (i = 1; i != nvme->n_ioq_count + 1; i++) { 2595 if (nvme->n_ioq[i] != NULL) { 2596 /* TODO: send destroy queue commands */ 2597 nvme_free_qpair(nvme->n_ioq[i]); 2598 } 2599 } 2600 2601 kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *) * 2602 (nvme->n_ioq_count + 1)); 2603 } 2604 2605 if (nvme->n_prp_cache != NULL) { 2606 kmem_cache_destroy(nvme->n_prp_cache); 2607 } 2608 2609 if (nvme->n_progress & NVME_REGS_MAPPED) { 2610 nvme_shutdown(nvme, NVME_CC_SHN_NORMAL, B_FALSE); 2611 (void) nvme_reset(nvme, B_FALSE); 2612 } 2613 2614 if (nvme->n_cmd_taskq) 2615 ddi_taskq_destroy(nvme->n_cmd_taskq); 2616 2617 if (nvme->n_progress & NVME_CTRL_LIMITS) 2618 sema_destroy(&nvme->n_abort_sema); 2619 2620 if (nvme->n_progress & NVME_ADMIN_QUEUE) 2621 nvme_free_qpair(nvme->n_adminq); 2622 2623 if (nvme->n_idctl) 2624 kmem_free(nvme->n_idctl, sizeof (nvme_identify_ctrl_t)); 2625 2626 if (nvme->n_progress & NVME_REGS_MAPPED) 2627 ddi_regs_map_free(&nvme->n_regh); 2628 2629 if (nvme->n_progress & NVME_FMA_INIT) { 2630 if (DDI_FM_ERRCB_CAP(nvme->n_fm_cap)) 2631 ddi_fm_handler_unregister(nvme->n_dip); 2632 2633 if (DDI_FM_EREPORT_CAP(nvme->n_fm_cap) || 2634 DDI_FM_ERRCB_CAP(nvme->n_fm_cap)) 2635 pci_ereport_teardown(nvme->n_dip); 2636 2637 ddi_fm_fini(nvme->n_dip); 2638 } 2639 2640 if (nvme->n_vendor != NULL) 2641 strfree(nvme->n_vendor); 2642 2643 if (nvme->n_product != NULL) 2644 strfree(nvme->n_product); 2645 2646 ddi_soft_state_free(nvme_state, instance); 2647 2648 return (DDI_SUCCESS); 2649 } 2650 2651 static int 2652 nvme_quiesce(dev_info_t *dip) 2653 { 2654 int instance; 2655 nvme_t *nvme; 2656 2657 instance = ddi_get_instance(dip); 2658 2659 nvme = ddi_get_soft_state(nvme_state, instance); 2660 2661 if (nvme == NULL) 2662 return (DDI_FAILURE); 2663 2664 nvme_shutdown(nvme, NVME_CC_SHN_ABRUPT, B_TRUE); 2665 2666 (void) nvme_reset(nvme, B_TRUE); 2667 2668 return (DDI_FAILURE); 2669 } 2670 2671 static int 2672 nvme_fill_prp(nvme_cmd_t *cmd, bd_xfer_t *xfer) 2673 { 2674 nvme_t *nvme = cmd->nc_nvme; 2675 int nprp_page, nprp; 2676 uint64_t *prp; 2677 2678 if (xfer->x_ndmac == 0) 2679 return (DDI_FAILURE); 2680 2681 cmd->nc_sqe.sqe_dptr.d_prp[0] = xfer->x_dmac.dmac_laddress; 2682 ddi_dma_nextcookie(xfer->x_dmah, &xfer->x_dmac); 2683 2684 if (xfer->x_ndmac == 1) { 2685 cmd->nc_sqe.sqe_dptr.d_prp[1] = 0; 2686 return (DDI_SUCCESS); 2687 } else if (xfer->x_ndmac == 2) { 2688 cmd->nc_sqe.sqe_dptr.d_prp[1] = xfer->x_dmac.dmac_laddress; 2689 return (DDI_SUCCESS); 2690 } 2691 2692 xfer->x_ndmac--; 2693 2694 nprp_page = nvme->n_pagesize / sizeof (uint64_t) - 1; 2695 ASSERT(nprp_page > 0); 2696 nprp = (xfer->x_ndmac + nprp_page - 1) / nprp_page; 2697 2698 /* 2699 * We currently don't support chained PRPs and set up our DMA 2700 * attributes to reflect that. If we still get an I/O request 2701 * that needs a chained PRP something is very wrong. 2702 */ 2703 VERIFY(nprp == 1); 2704 2705 cmd->nc_dma = kmem_cache_alloc(nvme->n_prp_cache, KM_SLEEP); 2706 bzero(cmd->nc_dma->nd_memp, cmd->nc_dma->nd_len); 2707 2708 cmd->nc_sqe.sqe_dptr.d_prp[1] = cmd->nc_dma->nd_cookie.dmac_laddress; 2709 2710 /*LINTED: E_PTR_BAD_CAST_ALIGN*/ 2711 for (prp = (uint64_t *)cmd->nc_dma->nd_memp; 2712 xfer->x_ndmac > 0; 2713 prp++, xfer->x_ndmac--) { 2714 *prp = xfer->x_dmac.dmac_laddress; 2715 ddi_dma_nextcookie(xfer->x_dmah, &xfer->x_dmac); 2716 } 2717 2718 (void) ddi_dma_sync(cmd->nc_dma->nd_dmah, 0, cmd->nc_dma->nd_len, 2719 DDI_DMA_SYNC_FORDEV); 2720 return (DDI_SUCCESS); 2721 } 2722 2723 static nvme_cmd_t * 2724 nvme_create_nvm_cmd(nvme_namespace_t *ns, uint8_t opc, bd_xfer_t *xfer) 2725 { 2726 nvme_t *nvme = ns->ns_nvme; 2727 nvme_cmd_t *cmd; 2728 2729 /* 2730 * Blkdev only sets BD_XFER_POLL when dumping, so don't sleep. 2731 */ 2732 cmd = nvme_alloc_cmd(nvme, (xfer->x_flags & BD_XFER_POLL) ? 2733 KM_NOSLEEP : KM_SLEEP); 2734 2735 if (cmd == NULL) 2736 return (NULL); 2737 2738 cmd->nc_sqe.sqe_opc = opc; 2739 cmd->nc_callback = nvme_bd_xfer_done; 2740 cmd->nc_xfer = xfer; 2741 2742 switch (opc) { 2743 case NVME_OPC_NVM_WRITE: 2744 case NVME_OPC_NVM_READ: 2745 VERIFY(xfer->x_nblks <= 0x10000); 2746 2747 cmd->nc_sqe.sqe_nsid = ns->ns_id; 2748 2749 cmd->nc_sqe.sqe_cdw10 = xfer->x_blkno & 0xffffffffu; 2750 cmd->nc_sqe.sqe_cdw11 = (xfer->x_blkno >> 32); 2751 cmd->nc_sqe.sqe_cdw12 = (uint16_t)(xfer->x_nblks - 1); 2752 2753 if (nvme_fill_prp(cmd, xfer) != DDI_SUCCESS) 2754 goto fail; 2755 break; 2756 2757 case NVME_OPC_NVM_FLUSH: 2758 cmd->nc_sqe.sqe_nsid = ns->ns_id; 2759 break; 2760 2761 default: 2762 goto fail; 2763 } 2764 2765 return (cmd); 2766 2767 fail: 2768 nvme_free_cmd(cmd); 2769 return (NULL); 2770 } 2771 2772 static void 2773 nvme_bd_xfer_done(void *arg) 2774 { 2775 nvme_cmd_t *cmd = arg; 2776 bd_xfer_t *xfer = cmd->nc_xfer; 2777 int error = 0; 2778 2779 error = nvme_check_cmd_status(cmd); 2780 nvme_free_cmd(cmd); 2781 2782 bd_xfer_done(xfer, error); 2783 } 2784 2785 static void 2786 nvme_bd_driveinfo(void *arg, bd_drive_t *drive) 2787 { 2788 nvme_namespace_t *ns = arg; 2789 nvme_t *nvme = ns->ns_nvme; 2790 2791 /* 2792 * blkdev maintains one queue size per instance (namespace), 2793 * but all namespace share the I/O queues. 2794 * TODO: need to figure out a sane default, or use per-NS I/O queues, 2795 * or change blkdev to handle EAGAIN 2796 */ 2797 drive->d_qsize = nvme->n_ioq_count * nvme->n_io_queue_len 2798 / nvme->n_namespace_count; 2799 2800 /* 2801 * d_maxxfer is not set, which means the value is taken from the DMA 2802 * attributes specified to bd_alloc_handle. 2803 */ 2804 2805 drive->d_removable = B_FALSE; 2806 drive->d_hotpluggable = B_FALSE; 2807 2808 drive->d_target = ns->ns_id; 2809 drive->d_lun = 0; 2810 2811 drive->d_model = nvme->n_idctl->id_model; 2812 drive->d_model_len = sizeof (nvme->n_idctl->id_model); 2813 drive->d_vendor = nvme->n_vendor; 2814 drive->d_vendor_len = strlen(nvme->n_vendor); 2815 drive->d_product = nvme->n_product; 2816 drive->d_product_len = strlen(nvme->n_product); 2817 drive->d_serial = nvme->n_idctl->id_serial; 2818 drive->d_serial_len = sizeof (nvme->n_idctl->id_serial); 2819 drive->d_revision = nvme->n_idctl->id_fwrev; 2820 drive->d_revision_len = sizeof (nvme->n_idctl->id_fwrev); 2821 } 2822 2823 static int 2824 nvme_bd_mediainfo(void *arg, bd_media_t *media) 2825 { 2826 nvme_namespace_t *ns = arg; 2827 2828 media->m_nblks = ns->ns_block_count; 2829 media->m_blksize = ns->ns_block_size; 2830 media->m_readonly = B_FALSE; 2831 media->m_solidstate = B_TRUE; 2832 2833 media->m_pblksize = ns->ns_best_block_size; 2834 2835 return (0); 2836 } 2837 2838 static int 2839 nvme_bd_cmd(nvme_namespace_t *ns, bd_xfer_t *xfer, uint8_t opc) 2840 { 2841 nvme_t *nvme = ns->ns_nvme; 2842 nvme_cmd_t *cmd; 2843 2844 if (nvme->n_dead) 2845 return (EIO); 2846 2847 /* No polling for now */ 2848 if (xfer->x_flags & BD_XFER_POLL) 2849 return (EIO); 2850 2851 cmd = nvme_create_nvm_cmd(ns, opc, xfer); 2852 if (cmd == NULL) 2853 return (ENOMEM); 2854 2855 cmd->nc_sqid = (CPU->cpu_id % nvme->n_ioq_count) + 1; 2856 ASSERT(cmd->nc_sqid <= nvme->n_ioq_count); 2857 2858 if (nvme_submit_cmd(nvme->n_ioq[cmd->nc_sqid], cmd) 2859 != DDI_SUCCESS) 2860 return (EAGAIN); 2861 2862 return (0); 2863 } 2864 2865 static int 2866 nvme_bd_read(void *arg, bd_xfer_t *xfer) 2867 { 2868 nvme_namespace_t *ns = arg; 2869 2870 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_READ)); 2871 } 2872 2873 static int 2874 nvme_bd_write(void *arg, bd_xfer_t *xfer) 2875 { 2876 nvme_namespace_t *ns = arg; 2877 2878 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_WRITE)); 2879 } 2880 2881 static int 2882 nvme_bd_sync(void *arg, bd_xfer_t *xfer) 2883 { 2884 nvme_namespace_t *ns = arg; 2885 2886 if (ns->ns_nvme->n_dead) 2887 return (EIO); 2888 2889 /* 2890 * If the volatile write cache isn't enabled the FLUSH command is a 2891 * no-op, so we can take a shortcut here. 2892 */ 2893 if (ns->ns_nvme->n_volatile_write_cache_enabled == B_FALSE) { 2894 bd_xfer_done(xfer, ENOTSUP); 2895 return (0); 2896 } 2897 2898 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_FLUSH)); 2899 } 2900 2901 static int 2902 nvme_bd_devid(void *arg, dev_info_t *devinfo, ddi_devid_t *devid) 2903 { 2904 nvme_namespace_t *ns = arg; 2905 2906 return (ddi_devid_init(devinfo, DEVID_ENCAP, strlen(ns->ns_devid), 2907 ns->ns_devid, devid)); 2908 }