165 #include <sys/byteorder.h>
166 #ifdef _BIG_ENDIAN
167 #error nvme driver needs porting for big-endian platforms
168 #endif
169
170 #include <sys/modctl.h>
171 #include <sys/conf.h>
172 #include <sys/devops.h>
173 #include <sys/ddi.h>
174 #include <sys/sunddi.h>
175 #include <sys/bitmap.h>
176 #include <sys/sysmacros.h>
177 #include <sys/param.h>
178 #include <sys/varargs.h>
179 #include <sys/cpuvar.h>
180 #include <sys/disp.h>
181 #include <sys/blkdev.h>
182 #include <sys/atomic.h>
183 #include <sys/archsystm.h>
184 #include <sys/sata/sata_hba.h>
185
186 #include "nvme_reg.h"
187 #include "nvme_var.h"
188
189
190 /* NVMe spec version supported */
191 static const int nvme_version_major = 1;
192 static const int nvme_version_minor = 0;
193
194 /* tunable for admin command timeout in seconds, default is 1s */
195 static volatile int nvme_admin_cmd_timeout = 1;
196
197 static int nvme_attach(dev_info_t *, ddi_attach_cmd_t);
198 static int nvme_detach(dev_info_t *, ddi_detach_cmd_t);
199 static int nvme_quiesce(dev_info_t *);
200 static int nvme_fm_errcb(dev_info_t *, ddi_fm_error_t *, const void *);
201 static int nvme_setup_interrupts(nvme_t *, int, int);
202 static void nvme_release_interrupts(nvme_t *);
203 static uint_t nvme_intr(caddr_t, caddr_t);
204
205 static void nvme_shutdown(nvme_t *, int, boolean_t);
206 static boolean_t nvme_reset(nvme_t *, boolean_t);
207 static int nvme_init(nvme_t *);
208 static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int);
209 static void nvme_free_cmd(nvme_cmd_t *);
210 static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
211 bd_xfer_t *);
212 static int nvme_admin_cmd(nvme_cmd_t *, int);
213 static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *);
214 static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *);
215 static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
216 static void nvme_wakeup_cmd(void *);
217 static void nvme_async_event_task(void *);
218
219 static int nvme_check_unknown_cmd_status(nvme_cmd_t *);
220 static int nvme_check_vendor_cmd_status(nvme_cmd_t *);
221 static int nvme_check_integrity_cmd_status(nvme_cmd_t *);
222 static int nvme_check_specific_cmd_status(nvme_cmd_t *);
223 static int nvme_check_generic_cmd_status(nvme_cmd_t *);
224 static inline int nvme_check_cmd_status(nvme_cmd_t *);
225
226 static void nvme_abort_cmd(nvme_cmd_t *);
227 static int nvme_async_event(nvme_t *);
228 static void *nvme_get_logpage(nvme_t *, uint8_t, ...);
229 static void *nvme_identify(nvme_t *, uint32_t);
230 static int nvme_set_nqueues(nvme_t *, uint16_t);
231
232 static void nvme_free_dma(nvme_dma_t *);
233 static int nvme_zalloc_dma(nvme_t *, size_t, uint_t, ddi_dma_attr_t *,
234 nvme_dma_t **);
242 static inline void nvme_put32(nvme_t *, uintptr_t, uint32_t);
243 static inline uint64_t nvme_get64(nvme_t *, uintptr_t);
244 static inline uint32_t nvme_get32(nvme_t *, uintptr_t);
245
246 static boolean_t nvme_check_regs_hdl(nvme_t *);
247 static boolean_t nvme_check_dma_hdl(nvme_dma_t *);
248
249 static int nvme_fill_prp(nvme_cmd_t *, bd_xfer_t *);
250
251 static void nvme_bd_xfer_done(void *);
252 static void nvme_bd_driveinfo(void *, bd_drive_t *);
253 static int nvme_bd_mediainfo(void *, bd_media_t *);
254 static int nvme_bd_cmd(nvme_namespace_t *, bd_xfer_t *, uint8_t);
255 static int nvme_bd_read(void *, bd_xfer_t *);
256 static int nvme_bd_write(void *, bd_xfer_t *);
257 static int nvme_bd_sync(void *, bd_xfer_t *);
258 static int nvme_bd_devid(void *, dev_info_t *, ddi_devid_t *);
259
260 static void nvme_prepare_devid(nvme_t *, uint32_t);
261
262 static void *nvme_state;
263 static kmem_cache_t *nvme_cmd_cache;
264
265 /*
266 * DMA attributes for queue DMA memory
267 *
268 * Queue DMA memory must be page aligned. The maximum length of a queue is
269 * 65536 entries, and an entry can be 64 bytes long.
270 */
271 static ddi_dma_attr_t nvme_queue_dma_attr = {
272 .dma_attr_version = DMA_ATTR_V0,
273 .dma_attr_addr_lo = 0,
274 .dma_attr_addr_hi = 0xffffffffffffffffULL,
275 .dma_attr_count_max = (UINT16_MAX + 1) * sizeof (nvme_sqe_t) - 1,
276 .dma_attr_align = 0x1000,
277 .dma_attr_burstsizes = 0x7ff,
278 .dma_attr_minxfer = 0x1000,
279 .dma_attr_maxxfer = (UINT16_MAX + 1) * sizeof (nvme_sqe_t),
280 .dma_attr_seg = 0xffffffffffffffffULL,
281 .dma_attr_sgllen = 1,
282 .dma_attr_granular = 1,
283 .dma_attr_flags = 0,
284 };
365 .o_drive_info = nvme_bd_driveinfo,
366 .o_media_info = nvme_bd_mediainfo,
367 .o_devid_init = nvme_bd_devid,
368 .o_sync_cache = nvme_bd_sync,
369 .o_read = nvme_bd_read,
370 .o_write = nvme_bd_write,
371 };
372
373 int
374 _init(void)
375 {
376 int error;
377
378 error = ddi_soft_state_init(&nvme_state, sizeof (nvme_t), 1);
379 if (error != DDI_SUCCESS)
380 return (error);
381
382 nvme_cmd_cache = kmem_cache_create("nvme_cmd_cache",
383 sizeof (nvme_cmd_t), 64, NULL, NULL, NULL, NULL, NULL, 0);
384
385 bd_mod_init(&nvme_dev_ops);
386
387 error = mod_install(&nvme_modlinkage);
388 if (error != DDI_SUCCESS) {
389 ddi_soft_state_fini(&nvme_state);
390 bd_mod_fini(&nvme_dev_ops);
391 }
392
393 return (error);
394 }
395
396 int
397 _fini(void)
398 {
399 int error;
400
401 error = mod_remove(&nvme_modlinkage);
402 if (error == DDI_SUCCESS) {
403 ddi_soft_state_fini(&nvme_state);
404 kmem_cache_destroy(nvme_cmd_cache);
405 bd_mod_fini(&nvme_dev_ops);
406 }
407
408 return (error);
409 }
410
411 int
412 _info(struct modinfo *modinfop)
413 {
414 return (mod_info(&nvme_modlinkage, modinfop));
415 }
416
417 static inline void
418 nvme_put64(nvme_t *nvme, uintptr_t reg, uint64_t val)
419 {
420 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x7) == 0);
421
422 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
423 ddi_put64(nvme->n_regh, (uint64_t *)(nvme->n_regs + reg), val);
424 }
689 * slot. If the slot is already occupied advance to the next slot and
690 * try again. This can happen for long running commands like async event
691 * requests.
692 */
693 while (qp->nq_cmd[qp->nq_next_cmd] != NULL)
694 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
695 qp->nq_cmd[qp->nq_next_cmd] = cmd;
696
697 qp->nq_active_cmds++;
698
699 cmd->nc_sqe.sqe_cid = qp->nq_next_cmd;
700 bcopy(&cmd->nc_sqe, &qp->nq_sq[qp->nq_sqtail], sizeof (nvme_sqe_t));
701 (void) ddi_dma_sync(qp->nq_sqdma->nd_dmah,
702 sizeof (nvme_sqe_t) * qp->nq_sqtail,
703 sizeof (nvme_sqe_t), DDI_DMA_SYNC_FORDEV);
704 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
705
706 tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry;
707 nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r);
708
709 mutex_exit(&qp->nq_mutex);
710 return (DDI_SUCCESS);
711 }
712
713 static nvme_cmd_t *
714 nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp)
715 {
716 nvme_reg_cqhdbl_t head = { 0 };
717
718 nvme_cqe_t *cqe;
719 nvme_cmd_t *cmd;
720
721 (void) ddi_dma_sync(qp->nq_cqdma->nd_dmah, 0,
722 sizeof (nvme_cqe_t) * qp->nq_nentry, DDI_DMA_SYNC_FORKERNEL);
723
724 cqe = &qp->nq_cq[qp->nq_cqhead];
725
726 /* Check phase tag of CQE. Hardware inverts it for new entries. */
727 if (cqe->cqe_sf.sf_p == qp->nq_phase)
728 return (NULL);
729
730 ASSERT(nvme->n_ioq[cqe->cqe_sqid] == qp);
731 ASSERT(cqe->cqe_cid < qp->nq_nentry);
732
733 mutex_enter(&qp->nq_mutex);
734 cmd = qp->nq_cmd[cqe->cqe_cid];
735 qp->nq_cmd[cqe->cqe_cid] = NULL;
736 qp->nq_active_cmds--;
737 mutex_exit(&qp->nq_mutex);
738
739 ASSERT(cmd != NULL);
740 ASSERT(cmd->nc_nvme == nvme);
741 ASSERT(cmd->nc_sqid == cqe->cqe_sqid);
742 ASSERT(cmd->nc_sqe.sqe_cid == cqe->cqe_cid);
743 bcopy(cqe, &cmd->nc_cqe, sizeof (nvme_cqe_t));
744
745 qp->nq_sqhead = cqe->cqe_sqhd;
746
747 head.b.cqhdbl_cqh = qp->nq_cqhead = (qp->nq_cqhead + 1) % qp->nq_nentry;
748
749 /* Toggle phase on wrap-around. */
750 if (qp->nq_cqhead == 0)
751 qp->nq_phase = qp->nq_phase ? 0 : 1;
752
753 nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r);
754
755 return (cmd);
756 }
757
758 static int
759 nvme_check_unknown_cmd_status(nvme_cmd_t *cmd)
760 {
761 nvme_cqe_t *cqe = &cmd->nc_cqe;
762
763 dev_err(cmd->nc_nvme->n_dip, CE_WARN,
764 "!unknown command status received: opc = %x, sqid = %d, cid = %d, "
765 "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc,
766 cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct,
767 cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m);
768
769 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
770
771 if (cmd->nc_nvme->n_strict_version) {
772 cmd->nc_nvme->n_dead = B_TRUE;
773 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST);
774 }
775
1640 cmd->nc_sqe.sqe_cdw10 = dw10.r;
1641 cmd->nc_sqe.sqe_cdw11 = s_dw11.r;
1642 cmd->nc_sqe.sqe_dptr.d_prp[0] = qp->nq_sqdma->nd_cookie.dmac_laddress;
1643
1644 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
1645 dev_err(nvme->n_dip, CE_WARN,
1646 "!nvme_admin_cmd failed for CREATE SQUEUE");
1647 return (DDI_FAILURE);
1648 }
1649
1650 if (nvme_check_cmd_status(cmd)) {
1651 dev_err(nvme->n_dip, CE_WARN,
1652 "!CREATE SQUEUE failed with sct = %x, sc = %x",
1653 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc);
1654 nvme_free_cmd(cmd);
1655 return (DDI_FAILURE);
1656 }
1657
1658 nvme_free_cmd(cmd);
1659
1660 return (DDI_SUCCESS);
1661 }
1662
1663 static boolean_t
1664 nvme_reset(nvme_t *nvme, boolean_t quiesce)
1665 {
1666 nvme_reg_csts_t csts;
1667 int i;
1668
1669 nvme_put32(nvme, NVME_REG_CC, 0);
1670
1671 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
1672 if (csts.b.csts_rdy == 1) {
1673 nvme_put32(nvme, NVME_REG_CC, 0);
1674 for (i = 0; i != nvme->n_timeout * 10; i++) {
1675 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
1676 if (csts.b.csts_rdy == 0)
1677 break;
1678
1679 if (quiesce)
2167 goto fail;
2168 }
2169 }
2170
2171 return (DDI_SUCCESS);
2172
2173 fail:
2174 (void) nvme_reset(nvme, B_FALSE);
2175 return (DDI_FAILURE);
2176 }
2177
2178 static uint_t
2179 nvme_intr(caddr_t arg1, caddr_t arg2)
2180 {
2181 /*LINTED: E_PTR_BAD_CAST_ALIGN*/
2182 nvme_t *nvme = (nvme_t *)arg1;
2183 int inum = (int)(uintptr_t)arg2;
2184 int ccnt = 0;
2185 int qnum;
2186 nvme_cmd_t *cmd;
2187
2188 if (inum >= nvme->n_intr_cnt)
2189 return (DDI_INTR_UNCLAIMED);
2190
2191 /*
2192 * The interrupt vector a queue uses is calculated as queue_idx %
2193 * intr_cnt in nvme_create_io_qpair(). Iterate through the queue array
2194 * in steps of n_intr_cnt to process all queues using this vector.
2195 */
2196 for (qnum = inum;
2197 qnum < nvme->n_ioq_count + 1 && nvme->n_ioq[qnum] != NULL;
2198 qnum += nvme->n_intr_cnt) {
2199 while ((cmd = nvme_retrieve_cmd(nvme, nvme->n_ioq[qnum]))) {
2200 taskq_dispatch_ent((taskq_t *)cmd->nc_nvme->n_cmd_taskq,
2201 cmd->nc_callback, cmd, TQ_NOSLEEP, &cmd->nc_tqent);
2202 ccnt++;
2203 }
2204 }
2205
2206 return (ccnt > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
2207 }
2208
2209 static void
2210 nvme_release_interrupts(nvme_t *nvme)
2211 {
2212 int i;
2213
2214 for (i = 0; i < nvme->n_intr_cnt; i++) {
2215 if (nvme->n_inth[i] == NULL)
2216 break;
2217
2218 if (nvme->n_intr_cap & DDI_INTR_FLAG_BLOCK)
2219 (void) ddi_intr_block_disable(&nvme->n_inth[i], 1);
2220 else
2221 (void) ddi_intr_disable(nvme->n_inth[i]);
2222
2223 (void) ddi_intr_remove_handler(nvme->n_inth[i]);
2514
2515 if (nvme->n_ns[i].ns_idns)
2516 kmem_free(nvme->n_ns[i].ns_idns,
2517 sizeof (nvme_identify_nsid_t));
2518 }
2519
2520 kmem_free(nvme->n_ns, sizeof (nvme_namespace_t) *
2521 nvme->n_namespace_count);
2522 }
2523
2524 if (nvme->n_progress & NVME_INTERRUPTS)
2525 nvme_release_interrupts(nvme);
2526
2527 if (nvme->n_cmd_taskq)
2528 ddi_taskq_wait(nvme->n_cmd_taskq);
2529
2530 if (nvme->n_ioq_count > 0) {
2531 for (i = 1; i != nvme->n_ioq_count + 1; i++) {
2532 if (nvme->n_ioq[i] != NULL) {
2533 /* TODO: send destroy queue commands */
2534 nvme_free_qpair(nvme->n_ioq[i]);
2535 }
2536 }
2537
2538 kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *) *
2539 (nvme->n_ioq_count + 1));
2540 }
2541
2542 if (nvme->n_progress & NVME_REGS_MAPPED) {
2543 nvme_shutdown(nvme, NVME_CC_SHN_NORMAL, B_FALSE);
2544 (void) nvme_reset(nvme, B_FALSE);
2545 }
2546
2547 if (nvme->n_cmd_taskq)
2548 ddi_taskq_destroy(nvme->n_cmd_taskq);
2549
2550 if (nvme->n_progress & NVME_CTRL_LIMITS)
2551 sema_destroy(&nvme->n_abort_sema);
2552
2553 if (nvme->n_progress & NVME_ADMIN_QUEUE)
2826
2827 /*
2828 * If the volatile write cache isn't enabled the FLUSH command is a
2829 * no-op, so we can take a shortcut here.
2830 */
2831 if (ns->ns_nvme->n_volatile_write_cache_enabled == B_FALSE) {
2832 bd_xfer_done(xfer, ENOTSUP);
2833 return (0);
2834 }
2835
2836 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_FLUSH));
2837 }
2838
2839 static int
2840 nvme_bd_devid(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
2841 {
2842 nvme_namespace_t *ns = arg;
2843
2844 return (ddi_devid_init(devinfo, DEVID_ENCAP, strlen(ns->ns_devid),
2845 ns->ns_devid, devid));
2846 }
|
165 #include <sys/byteorder.h>
166 #ifdef _BIG_ENDIAN
167 #error nvme driver needs porting for big-endian platforms
168 #endif
169
170 #include <sys/modctl.h>
171 #include <sys/conf.h>
172 #include <sys/devops.h>
173 #include <sys/ddi.h>
174 #include <sys/sunddi.h>
175 #include <sys/bitmap.h>
176 #include <sys/sysmacros.h>
177 #include <sys/param.h>
178 #include <sys/varargs.h>
179 #include <sys/cpuvar.h>
180 #include <sys/disp.h>
181 #include <sys/blkdev.h>
182 #include <sys/atomic.h>
183 #include <sys/archsystm.h>
184 #include <sys/sata/sata_hba.h>
185 #include <sys/time.h>
186
187 #include "nvme_reg.h"
188 #include "nvme_var.h"
189
190
191 /* NVMe spec version supported */
192 static const int nvme_version_major = 1;
193 static const int nvme_version_minor = 0;
194
195 /* tunable for admin command timeout in seconds, default is 1s */
196 static volatile int nvme_admin_cmd_timeout = 1;
197
198 static int nvme_attach(dev_info_t *, ddi_attach_cmd_t);
199 static int nvme_detach(dev_info_t *, ddi_detach_cmd_t);
200 static int nvme_quiesce(dev_info_t *);
201 static int nvme_fm_errcb(dev_info_t *, ddi_fm_error_t *, const void *);
202 static int nvme_setup_interrupts(nvme_t *, int, int);
203 static void nvme_release_interrupts(nvme_t *);
204 static uint_t nvme_intr(caddr_t, caddr_t);
205
206 static void nvme_shutdown(nvme_t *, int, boolean_t);
207 static boolean_t nvme_reset(nvme_t *, boolean_t);
208 static int nvme_init(nvme_t *);
209 static nvme_cmd_t *nvme_alloc_cmd(nvme_t *, int);
210 static void nvme_free_cmd(nvme_cmd_t *);
211 static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
212 bd_xfer_t *);
213 static int nvme_admin_cmd(nvme_cmd_t *, int);
214 static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *);
215 static int nvme_process_cq_cmds(nvme_t *, nvme_qpair_t *);
216 static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
217 static void nvme_wakeup_cmd(void *);
218 static void nvme_async_event_task(void *);
219
220 static int nvme_check_unknown_cmd_status(nvme_cmd_t *);
221 static int nvme_check_vendor_cmd_status(nvme_cmd_t *);
222 static int nvme_check_integrity_cmd_status(nvme_cmd_t *);
223 static int nvme_check_specific_cmd_status(nvme_cmd_t *);
224 static int nvme_check_generic_cmd_status(nvme_cmd_t *);
225 static inline int nvme_check_cmd_status(nvme_cmd_t *);
226
227 static void nvme_abort_cmd(nvme_cmd_t *);
228 static int nvme_async_event(nvme_t *);
229 static void *nvme_get_logpage(nvme_t *, uint8_t, ...);
230 static void *nvme_identify(nvme_t *, uint32_t);
231 static int nvme_set_nqueues(nvme_t *, uint16_t);
232
233 static void nvme_free_dma(nvme_dma_t *);
234 static int nvme_zalloc_dma(nvme_t *, size_t, uint_t, ddi_dma_attr_t *,
235 nvme_dma_t **);
243 static inline void nvme_put32(nvme_t *, uintptr_t, uint32_t);
244 static inline uint64_t nvme_get64(nvme_t *, uintptr_t);
245 static inline uint32_t nvme_get32(nvme_t *, uintptr_t);
246
247 static boolean_t nvme_check_regs_hdl(nvme_t *);
248 static boolean_t nvme_check_dma_hdl(nvme_dma_t *);
249
250 static int nvme_fill_prp(nvme_cmd_t *, bd_xfer_t *);
251
252 static void nvme_bd_xfer_done(void *);
253 static void nvme_bd_driveinfo(void *, bd_drive_t *);
254 static int nvme_bd_mediainfo(void *, bd_media_t *);
255 static int nvme_bd_cmd(nvme_namespace_t *, bd_xfer_t *, uint8_t);
256 static int nvme_bd_read(void *, bd_xfer_t *);
257 static int nvme_bd_write(void *, bd_xfer_t *);
258 static int nvme_bd_sync(void *, bd_xfer_t *);
259 static int nvme_bd_devid(void *, dev_info_t *, ddi_devid_t *);
260
261 static void nvme_prepare_devid(nvme_t *, uint32_t);
262
263 static void nvme_intr_monitor(void *arg);
264
265 static void *nvme_state;
266 static kmem_cache_t *nvme_cmd_cache;
267
268 static list_t nvme_qp_list;
269 static kmutex_t nvme_global_mutex;
270 static ddi_periodic_t nvme_cyclic;
271 int nvme_cyclic_seconds = 5;
272 hrtime_t nvme_intr_timeout_ns = 3 * NANOSEC;
273 uint64_t nvme_intr_timeouts = 0;
274 boolean_t nvme_enable_intr_monitoring = B_TRUE;
275
276 /*
277 * DMA attributes for queue DMA memory
278 *
279 * Queue DMA memory must be page aligned. The maximum length of a queue is
280 * 65536 entries, and an entry can be 64 bytes long.
281 */
282 static ddi_dma_attr_t nvme_queue_dma_attr = {
283 .dma_attr_version = DMA_ATTR_V0,
284 .dma_attr_addr_lo = 0,
285 .dma_attr_addr_hi = 0xffffffffffffffffULL,
286 .dma_attr_count_max = (UINT16_MAX + 1) * sizeof (nvme_sqe_t) - 1,
287 .dma_attr_align = 0x1000,
288 .dma_attr_burstsizes = 0x7ff,
289 .dma_attr_minxfer = 0x1000,
290 .dma_attr_maxxfer = (UINT16_MAX + 1) * sizeof (nvme_sqe_t),
291 .dma_attr_seg = 0xffffffffffffffffULL,
292 .dma_attr_sgllen = 1,
293 .dma_attr_granular = 1,
294 .dma_attr_flags = 0,
295 };
376 .o_drive_info = nvme_bd_driveinfo,
377 .o_media_info = nvme_bd_mediainfo,
378 .o_devid_init = nvme_bd_devid,
379 .o_sync_cache = nvme_bd_sync,
380 .o_read = nvme_bd_read,
381 .o_write = nvme_bd_write,
382 };
383
384 int
385 _init(void)
386 {
387 int error;
388
389 error = ddi_soft_state_init(&nvme_state, sizeof (nvme_t), 1);
390 if (error != DDI_SUCCESS)
391 return (error);
392
393 nvme_cmd_cache = kmem_cache_create("nvme_cmd_cache",
394 sizeof (nvme_cmd_t), 64, NULL, NULL, NULL, NULL, NULL, 0);
395
396 mutex_init(&nvme_global_mutex, NULL, MUTEX_DRIVER, 0);
397
398 list_create(&nvme_qp_list, sizeof (nvme_qpair_t),
399 offsetof(nvme_qpair_t, nq_list_node));
400
401 nvme_cyclic = ddi_periodic_add(nvme_intr_monitor, NULL,
402 NANOSEC * nvme_cyclic_seconds, DDI_IPL_0);
403
404 bd_mod_init(&nvme_dev_ops);
405
406 error = mod_install(&nvme_modlinkage);
407 if (error != DDI_SUCCESS) {
408 ddi_soft_state_fini(&nvme_state);
409 bd_mod_fini(&nvme_dev_ops);
410 }
411
412 return (error);
413 }
414
415 int
416 _fini(void)
417 {
418 int error;
419
420 error = mod_remove(&nvme_modlinkage);
421 if (error == DDI_SUCCESS) {
422 ddi_soft_state_fini(&nvme_state);
423 kmem_cache_destroy(nvme_cmd_cache);
424 if (nvme_cyclic != NULL) {
425 ddi_periodic_delete(nvme_cyclic);
426 nvme_cyclic = NULL;
427 }
428 mutex_destroy(&nvme_global_mutex);
429 bd_mod_fini(&nvme_dev_ops);
430 }
431
432 return (error);
433 }
434
435 int
436 _info(struct modinfo *modinfop)
437 {
438 return (mod_info(&nvme_modlinkage, modinfop));
439 }
440
441 static inline void
442 nvme_put64(nvme_t *nvme, uintptr_t reg, uint64_t val)
443 {
444 ASSERT(((uintptr_t)(nvme->n_regs + reg) & 0x7) == 0);
445
446 /*LINTED: E_BAD_PTR_CAST_ALIGN*/
447 ddi_put64(nvme->n_regh, (uint64_t *)(nvme->n_regs + reg), val);
448 }
713 * slot. If the slot is already occupied advance to the next slot and
714 * try again. This can happen for long running commands like async event
715 * requests.
716 */
717 while (qp->nq_cmd[qp->nq_next_cmd] != NULL)
718 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
719 qp->nq_cmd[qp->nq_next_cmd] = cmd;
720
721 qp->nq_active_cmds++;
722
723 cmd->nc_sqe.sqe_cid = qp->nq_next_cmd;
724 bcopy(&cmd->nc_sqe, &qp->nq_sq[qp->nq_sqtail], sizeof (nvme_sqe_t));
725 (void) ddi_dma_sync(qp->nq_sqdma->nd_dmah,
726 sizeof (nvme_sqe_t) * qp->nq_sqtail,
727 sizeof (nvme_sqe_t), DDI_DMA_SYNC_FORDEV);
728 qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
729
730 tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry;
731 nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r);
732
733 if (nvme_enable_intr_monitoring)
734 qp->nq_ts = gethrtime();
735 mutex_exit(&qp->nq_mutex);
736
737 return (DDI_SUCCESS);
738 }
739
740 static int
741 nvme_process_cq_cmds(nvme_t *nvme, nvme_qpair_t *qp)
742 {
743 nvme_reg_cqhdbl_t head = { 0 };
744
745 nvme_cqe_t *cqe;
746 nvme_cmd_t *cmd;
747 int cnt_cmds = 0;
748
749 (void) ddi_dma_sync(qp->nq_cqdma->nd_dmah, 0,
750 sizeof (nvme_cqe_t) * qp->nq_nentry, DDI_DMA_SYNC_FORKERNEL);
751
752 cqe = &qp->nq_cq[qp->nq_cqhead];
753 /* Check phase tag of CQE. Hardware inverts it for new entries. */
754 if (cqe->cqe_sf.sf_p == qp->nq_phase)
755 return (cnt_cmds);
756
757 mutex_enter(&qp->nq_mutex);
758 while (cqe->cqe_sf.sf_p != qp->nq_phase) {
759 ASSERT(nvme->n_ioq[cqe->cqe_sqid] == qp);
760 ASSERT(cqe->cqe_cid < qp->nq_nentry);
761
762 cmd = qp->nq_cmd[cqe->cqe_cid];
763 qp->nq_cmd[cqe->cqe_cid] = NULL;
764 qp->nq_active_cmds--;
765
766 ASSERT(cmd != NULL);
767 ASSERT(cmd->nc_nvme == nvme);
768 ASSERT(cmd->nc_sqid == cqe->cqe_sqid);
769 ASSERT(cmd->nc_sqe.sqe_cid == cqe->cqe_cid);
770 bcopy(cqe, &cmd->nc_cqe, sizeof (nvme_cqe_t));
771
772 qp->nq_sqhead = cqe->cqe_sqhd;
773
774 qp->nq_cqhead = (qp->nq_cqhead + 1) % qp->nq_nentry;
775
776 /* Toggle phase on wrap-around. */
777 if (qp->nq_cqhead == 0)
778 qp->nq_phase = qp->nq_phase ? 0 : 1;
779 taskq_dispatch_ent((taskq_t *)cmd->nc_nvme->n_cmd_taskq,
780 cmd->nc_callback, cmd, TQ_NOSLEEP, &cmd->nc_tqent);
781 cnt_cmds++;
782 cqe = &qp->nq_cq[qp->nq_cqhead];
783 }
784
785 if (cnt_cmds != 0) {
786 head.b.cqhdbl_cqh = qp->nq_cqhead;
787 nvme_put32(nvme, qp->nq_cqhdbl, head.r);
788 if (nvme_enable_intr_monitoring)
789 qp->nq_ts = gethrtime();
790 }
791
792 mutex_exit(&qp->nq_mutex);
793
794 return (cnt_cmds);
795 }
796
797 static int
798 nvme_check_unknown_cmd_status(nvme_cmd_t *cmd)
799 {
800 nvme_cqe_t *cqe = &cmd->nc_cqe;
801
802 dev_err(cmd->nc_nvme->n_dip, CE_WARN,
803 "!unknown command status received: opc = %x, sqid = %d, cid = %d, "
804 "sc = %x, sct = %x, dnr = %d, m = %d", cmd->nc_sqe.sqe_opc,
805 cqe->cqe_sqid, cqe->cqe_cid, cqe->cqe_sf.sf_sc, cqe->cqe_sf.sf_sct,
806 cqe->cqe_sf.sf_dnr, cqe->cqe_sf.sf_m);
807
808 bd_error(cmd->nc_xfer, BD_ERR_ILLRQ);
809
810 if (cmd->nc_nvme->n_strict_version) {
811 cmd->nc_nvme->n_dead = B_TRUE;
812 ddi_fm_service_impact(cmd->nc_nvme->n_dip, DDI_SERVICE_LOST);
813 }
814
1679 cmd->nc_sqe.sqe_cdw10 = dw10.r;
1680 cmd->nc_sqe.sqe_cdw11 = s_dw11.r;
1681 cmd->nc_sqe.sqe_dptr.d_prp[0] = qp->nq_sqdma->nd_cookie.dmac_laddress;
1682
1683 if (nvme_admin_cmd(cmd, nvme_admin_cmd_timeout) != DDI_SUCCESS) {
1684 dev_err(nvme->n_dip, CE_WARN,
1685 "!nvme_admin_cmd failed for CREATE SQUEUE");
1686 return (DDI_FAILURE);
1687 }
1688
1689 if (nvme_check_cmd_status(cmd)) {
1690 dev_err(nvme->n_dip, CE_WARN,
1691 "!CREATE SQUEUE failed with sct = %x, sc = %x",
1692 cmd->nc_cqe.cqe_sf.sf_sct, cmd->nc_cqe.cqe_sf.sf_sc);
1693 nvme_free_cmd(cmd);
1694 return (DDI_FAILURE);
1695 }
1696
1697 nvme_free_cmd(cmd);
1698
1699 mutex_enter(&nvme_global_mutex);
1700 list_insert_head(&nvme_qp_list, qp);
1701 qp->nq_nvme = nvme;
1702 mutex_exit(&nvme_global_mutex);
1703
1704 return (DDI_SUCCESS);
1705 }
1706
1707 static boolean_t
1708 nvme_reset(nvme_t *nvme, boolean_t quiesce)
1709 {
1710 nvme_reg_csts_t csts;
1711 int i;
1712
1713 nvme_put32(nvme, NVME_REG_CC, 0);
1714
1715 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
1716 if (csts.b.csts_rdy == 1) {
1717 nvme_put32(nvme, NVME_REG_CC, 0);
1718 for (i = 0; i != nvme->n_timeout * 10; i++) {
1719 csts.r = nvme_get32(nvme, NVME_REG_CSTS);
1720 if (csts.b.csts_rdy == 0)
1721 break;
1722
1723 if (quiesce)
2211 goto fail;
2212 }
2213 }
2214
2215 return (DDI_SUCCESS);
2216
2217 fail:
2218 (void) nvme_reset(nvme, B_FALSE);
2219 return (DDI_FAILURE);
2220 }
2221
2222 static uint_t
2223 nvme_intr(caddr_t arg1, caddr_t arg2)
2224 {
2225 /*LINTED: E_PTR_BAD_CAST_ALIGN*/
2226 nvme_t *nvme = (nvme_t *)arg1;
2227 int inum = (int)(uintptr_t)arg2;
2228 int ccnt = 0;
2229 int qnum;
2230 nvme_cmd_t *cmd;
2231 int cnt_cmds;
2232
2233 if (inum >= nvme->n_intr_cnt)
2234 return (DDI_INTR_UNCLAIMED);
2235
2236 /*
2237 * The interrupt vector a queue uses is calculated as queue_idx %
2238 * intr_cnt in nvme_create_io_qpair(). Iterate through the queue array
2239 * in steps of n_intr_cnt to process all queues using this vector.
2240 */
2241 for (qnum = inum;
2242 qnum < nvme->n_ioq_count + 1 && nvme->n_ioq[qnum] != NULL;
2243 qnum += nvme->n_intr_cnt) {
2244 cnt_cmds = nvme_process_cq_cmds(nvme, nvme->n_ioq[qnum]);
2245 ccnt += cnt_cmds;
2246 }
2247
2248 return (ccnt > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
2249 }
2250
2251 static void
2252 nvme_release_interrupts(nvme_t *nvme)
2253 {
2254 int i;
2255
2256 for (i = 0; i < nvme->n_intr_cnt; i++) {
2257 if (nvme->n_inth[i] == NULL)
2258 break;
2259
2260 if (nvme->n_intr_cap & DDI_INTR_FLAG_BLOCK)
2261 (void) ddi_intr_block_disable(&nvme->n_inth[i], 1);
2262 else
2263 (void) ddi_intr_disable(nvme->n_inth[i]);
2264
2265 (void) ddi_intr_remove_handler(nvme->n_inth[i]);
2556
2557 if (nvme->n_ns[i].ns_idns)
2558 kmem_free(nvme->n_ns[i].ns_idns,
2559 sizeof (nvme_identify_nsid_t));
2560 }
2561
2562 kmem_free(nvme->n_ns, sizeof (nvme_namespace_t) *
2563 nvme->n_namespace_count);
2564 }
2565
2566 if (nvme->n_progress & NVME_INTERRUPTS)
2567 nvme_release_interrupts(nvme);
2568
2569 if (nvme->n_cmd_taskq)
2570 ddi_taskq_wait(nvme->n_cmd_taskq);
2571
2572 if (nvme->n_ioq_count > 0) {
2573 for (i = 1; i != nvme->n_ioq_count + 1; i++) {
2574 if (nvme->n_ioq[i] != NULL) {
2575 /* TODO: send destroy queue commands */
2576 mutex_enter(&nvme_global_mutex);
2577 list_remove(&nvme_qp_list, nvme->n_ioq[i]);
2578 mutex_exit(&nvme_global_mutex);
2579 nvme_free_qpair(nvme->n_ioq[i]);
2580 }
2581 }
2582
2583 kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *) *
2584 (nvme->n_ioq_count + 1));
2585 }
2586
2587 if (nvme->n_progress & NVME_REGS_MAPPED) {
2588 nvme_shutdown(nvme, NVME_CC_SHN_NORMAL, B_FALSE);
2589 (void) nvme_reset(nvme, B_FALSE);
2590 }
2591
2592 if (nvme->n_cmd_taskq)
2593 ddi_taskq_destroy(nvme->n_cmd_taskq);
2594
2595 if (nvme->n_progress & NVME_CTRL_LIMITS)
2596 sema_destroy(&nvme->n_abort_sema);
2597
2598 if (nvme->n_progress & NVME_ADMIN_QUEUE)
2871
2872 /*
2873 * If the volatile write cache isn't enabled the FLUSH command is a
2874 * no-op, so we can take a shortcut here.
2875 */
2876 if (ns->ns_nvme->n_volatile_write_cache_enabled == B_FALSE) {
2877 bd_xfer_done(xfer, ENOTSUP);
2878 return (0);
2879 }
2880
2881 return (nvme_bd_cmd(ns, xfer, NVME_OPC_NVM_FLUSH));
2882 }
2883
2884 static int
2885 nvme_bd_devid(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
2886 {
2887 nvme_namespace_t *ns = arg;
2888
2889 return (ddi_devid_init(devinfo, DEVID_ENCAP, strlen(ns->ns_devid),
2890 ns->ns_devid, devid));
2891 }
2892
2893 static void
2894 nvme_intr_monitor(void *arg)
2895 {
2896 nvme_qpair_t *qp;
2897 hrtime_t diff, now_ns;
2898
2899 if (!nvme_enable_intr_monitoring)
2900 return;
2901 mutex_enter(&nvme_global_mutex);
2902 now_ns = gethrtime();
2903 for (qp = list_head(&nvme_qp_list); qp != NULL;
2904 qp = list_next(&nvme_qp_list, qp)) {
2905 diff = now_ns - qp->nq_ts;
2906 if (diff >= nvme_intr_timeout_ns && qp->nq_active_cmds > 0) {
2907 if (nvme_process_cq_cmds(qp->nq_nvme, qp)) {
2908 nvme_intr_timeouts++;
2909 qp->nq_nvme->n_intr_timeouts++;
2910 }
2911 }
2912 }
2913 mutex_exit(&nvme_global_mutex);
2914 }
|