Print this page
7351 NVMe driver sporadically lost track of completed I/O request, which
leads to zpool hanging and machine panic.
*** 180,189 ****
--- 180,190 ----
#include <sys/disp.h>
#include <sys/blkdev.h>
#include <sys/atomic.h>
#include <sys/archsystm.h>
#include <sys/sata/sata_hba.h>
+ #include <sys/time.h>
#include "nvme_reg.h"
#include "nvme_var.h"
*** 209,219 ****
static void nvme_free_cmd(nvme_cmd_t *);
static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
bd_xfer_t *);
static int nvme_admin_cmd(nvme_cmd_t *, int);
static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *);
! static nvme_cmd_t *nvme_retrieve_cmd(nvme_t *, nvme_qpair_t *);
static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
static void nvme_wakeup_cmd(void *);
static void nvme_async_event_task(void *);
static int nvme_check_unknown_cmd_status(nvme_cmd_t *);
--- 210,220 ----
static void nvme_free_cmd(nvme_cmd_t *);
static nvme_cmd_t *nvme_create_nvm_cmd(nvme_namespace_t *, uint8_t,
bd_xfer_t *);
static int nvme_admin_cmd(nvme_cmd_t *, int);
static int nvme_submit_cmd(nvme_qpair_t *, nvme_cmd_t *);
! static int nvme_process_cq_cmds(nvme_t *, nvme_qpair_t *);
static boolean_t nvme_wait_cmd(nvme_cmd_t *, uint_t);
static void nvme_wakeup_cmd(void *);
static void nvme_async_event_task(void *);
static int nvme_check_unknown_cmd_status(nvme_cmd_t *);
*** 257,269 ****
--- 258,280 ----
static int nvme_bd_sync(void *, bd_xfer_t *);
static int nvme_bd_devid(void *, dev_info_t *, ddi_devid_t *);
static void nvme_prepare_devid(nvme_t *, uint32_t);
+ static void nvme_intr_monitor(void *arg);
+
static void *nvme_state;
static kmem_cache_t *nvme_cmd_cache;
+ static list_t nvme_qp_list;
+ static kmutex_t nvme_global_mutex;
+ static ddi_periodic_t nvme_cyclic;
+ int nvme_cyclic_seconds = 5;
+ hrtime_t nvme_intr_timeout_ns = 3 * NANOSEC;
+ uint64_t nvme_intr_timeouts = 0;
+ boolean_t nvme_enable_intr_monitoring = B_TRUE;
+
/*
* DMA attributes for queue DMA memory
*
* Queue DMA memory must be page aligned. The maximum length of a queue is
* 65536 entries, and an entry can be 64 bytes long.
*** 380,389 ****
--- 391,408 ----
return (error);
nvme_cmd_cache = kmem_cache_create("nvme_cmd_cache",
sizeof (nvme_cmd_t), 64, NULL, NULL, NULL, NULL, NULL, 0);
+ mutex_init(&nvme_global_mutex, NULL, MUTEX_DRIVER, 0);
+
+ list_create(&nvme_qp_list, sizeof (nvme_qpair_t),
+ offsetof(nvme_qpair_t, nq_list_node));
+
+ nvme_cyclic = ddi_periodic_add(nvme_intr_monitor, NULL,
+ NANOSEC * nvme_cyclic_seconds, DDI_IPL_0);
+
bd_mod_init(&nvme_dev_ops);
error = mod_install(&nvme_modlinkage);
if (error != DDI_SUCCESS) {
ddi_soft_state_fini(&nvme_state);
*** 400,409 ****
--- 419,433 ----
error = mod_remove(&nvme_modlinkage);
if (error == DDI_SUCCESS) {
ddi_soft_state_fini(&nvme_state);
kmem_cache_destroy(nvme_cmd_cache);
+ if (nvme_cyclic != NULL) {
+ ddi_periodic_delete(nvme_cyclic);
+ nvme_cyclic = NULL;
+ }
+ mutex_destroy(&nvme_global_mutex);
bd_mod_fini(&nvme_dev_ops);
}
return (error);
}
*** 704,760 ****
qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry;
nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r);
mutex_exit(&qp->nq_mutex);
return (DDI_SUCCESS);
}
! static nvme_cmd_t *
! nvme_retrieve_cmd(nvme_t *nvme, nvme_qpair_t *qp)
{
nvme_reg_cqhdbl_t head = { 0 };
nvme_cqe_t *cqe;
nvme_cmd_t *cmd;
(void) ddi_dma_sync(qp->nq_cqdma->nd_dmah, 0,
sizeof (nvme_cqe_t) * qp->nq_nentry, DDI_DMA_SYNC_FORKERNEL);
cqe = &qp->nq_cq[qp->nq_cqhead];
-
/* Check phase tag of CQE. Hardware inverts it for new entries. */
if (cqe->cqe_sf.sf_p == qp->nq_phase)
! return (NULL);
ASSERT(nvme->n_ioq[cqe->cqe_sqid] == qp);
ASSERT(cqe->cqe_cid < qp->nq_nentry);
- mutex_enter(&qp->nq_mutex);
cmd = qp->nq_cmd[cqe->cqe_cid];
qp->nq_cmd[cqe->cqe_cid] = NULL;
qp->nq_active_cmds--;
- mutex_exit(&qp->nq_mutex);
ASSERT(cmd != NULL);
ASSERT(cmd->nc_nvme == nvme);
ASSERT(cmd->nc_sqid == cqe->cqe_sqid);
ASSERT(cmd->nc_sqe.sqe_cid == cqe->cqe_cid);
bcopy(cqe, &cmd->nc_cqe, sizeof (nvme_cqe_t));
qp->nq_sqhead = cqe->cqe_sqhd;
! head.b.cqhdbl_cqh = qp->nq_cqhead = (qp->nq_cqhead + 1) % qp->nq_nentry;
/* Toggle phase on wrap-around. */
if (qp->nq_cqhead == 0)
qp->nq_phase = qp->nq_phase ? 0 : 1;
! nvme_put32(cmd->nc_nvme, qp->nq_cqhdbl, head.r);
! return (cmd);
}
static int
nvme_check_unknown_cmd_status(nvme_cmd_t *cmd)
{
--- 728,799 ----
qp->nq_next_cmd = (qp->nq_next_cmd + 1) % qp->nq_nentry;
tail.b.sqtdbl_sqt = qp->nq_sqtail = (qp->nq_sqtail + 1) % qp->nq_nentry;
nvme_put32(cmd->nc_nvme, qp->nq_sqtdbl, tail.r);
+ if (nvme_enable_intr_monitoring)
+ qp->nq_ts = gethrtime();
mutex_exit(&qp->nq_mutex);
+
return (DDI_SUCCESS);
}
! static int
! nvme_process_cq_cmds(nvme_t *nvme, nvme_qpair_t *qp)
{
nvme_reg_cqhdbl_t head = { 0 };
nvme_cqe_t *cqe;
nvme_cmd_t *cmd;
+ int cnt_cmds = 0;
(void) ddi_dma_sync(qp->nq_cqdma->nd_dmah, 0,
sizeof (nvme_cqe_t) * qp->nq_nentry, DDI_DMA_SYNC_FORKERNEL);
cqe = &qp->nq_cq[qp->nq_cqhead];
/* Check phase tag of CQE. Hardware inverts it for new entries. */
if (cqe->cqe_sf.sf_p == qp->nq_phase)
! return (cnt_cmds);
+ mutex_enter(&qp->nq_mutex);
+ while (cqe->cqe_sf.sf_p != qp->nq_phase) {
ASSERT(nvme->n_ioq[cqe->cqe_sqid] == qp);
ASSERT(cqe->cqe_cid < qp->nq_nentry);
cmd = qp->nq_cmd[cqe->cqe_cid];
qp->nq_cmd[cqe->cqe_cid] = NULL;
qp->nq_active_cmds--;
ASSERT(cmd != NULL);
ASSERT(cmd->nc_nvme == nvme);
ASSERT(cmd->nc_sqid == cqe->cqe_sqid);
ASSERT(cmd->nc_sqe.sqe_cid == cqe->cqe_cid);
bcopy(cqe, &cmd->nc_cqe, sizeof (nvme_cqe_t));
qp->nq_sqhead = cqe->cqe_sqhd;
! qp->nq_cqhead = (qp->nq_cqhead + 1) % qp->nq_nentry;
/* Toggle phase on wrap-around. */
if (qp->nq_cqhead == 0)
qp->nq_phase = qp->nq_phase ? 0 : 1;
+ taskq_dispatch_ent((taskq_t *)cmd->nc_nvme->n_cmd_taskq,
+ cmd->nc_callback, cmd, TQ_NOSLEEP, &cmd->nc_tqent);
+ cnt_cmds++;
+ cqe = &qp->nq_cq[qp->nq_cqhead];
+ }
! if (cnt_cmds != 0) {
! head.b.cqhdbl_cqh = qp->nq_cqhead;
! nvme_put32(nvme, qp->nq_cqhdbl, head.r);
! if (nvme_enable_intr_monitoring)
! qp->nq_ts = gethrtime();
! }
! mutex_exit(&qp->nq_mutex);
!
! return (cnt_cmds);
}
static int
nvme_check_unknown_cmd_status(nvme_cmd_t *cmd)
{
*** 1655,1664 ****
--- 1694,1708 ----
return (DDI_FAILURE);
}
nvme_free_cmd(cmd);
+ mutex_enter(&nvme_global_mutex);
+ list_insert_head(&nvme_qp_list, qp);
+ qp->nq_nvme = nvme;
+ mutex_exit(&nvme_global_mutex);
+
return (DDI_SUCCESS);
}
static boolean_t
nvme_reset(nvme_t *nvme, boolean_t quiesce)
*** 2182,2191 ****
--- 2226,2236 ----
nvme_t *nvme = (nvme_t *)arg1;
int inum = (int)(uintptr_t)arg2;
int ccnt = 0;
int qnum;
nvme_cmd_t *cmd;
+ int cnt_cmds;
if (inum >= nvme->n_intr_cnt)
return (DDI_INTR_UNCLAIMED);
/*
*** 2194,2208 ****
* in steps of n_intr_cnt to process all queues using this vector.
*/
for (qnum = inum;
qnum < nvme->n_ioq_count + 1 && nvme->n_ioq[qnum] != NULL;
qnum += nvme->n_intr_cnt) {
! while ((cmd = nvme_retrieve_cmd(nvme, nvme->n_ioq[qnum]))) {
! taskq_dispatch_ent((taskq_t *)cmd->nc_nvme->n_cmd_taskq,
! cmd->nc_callback, cmd, TQ_NOSLEEP, &cmd->nc_tqent);
! ccnt++;
! }
}
return (ccnt > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
}
--- 2239,2250 ----
* in steps of n_intr_cnt to process all queues using this vector.
*/
for (qnum = inum;
qnum < nvme->n_ioq_count + 1 && nvme->n_ioq[qnum] != NULL;
qnum += nvme->n_intr_cnt) {
! cnt_cmds = nvme_process_cq_cmds(nvme, nvme->n_ioq[qnum]);
! ccnt += cnt_cmds;
}
return (ccnt > 0 ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
}
*** 2529,2538 ****
--- 2571,2583 ----
if (nvme->n_ioq_count > 0) {
for (i = 1; i != nvme->n_ioq_count + 1; i++) {
if (nvme->n_ioq[i] != NULL) {
/* TODO: send destroy queue commands */
+ mutex_enter(&nvme_global_mutex);
+ list_remove(&nvme_qp_list, nvme->n_ioq[i]);
+ mutex_exit(&nvme_global_mutex);
nvme_free_qpair(nvme->n_ioq[i]);
}
}
kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *) *
*** 2842,2846 ****
--- 2887,2914 ----
nvme_namespace_t *ns = arg;
return (ddi_devid_init(devinfo, DEVID_ENCAP, strlen(ns->ns_devid),
ns->ns_devid, devid));
}
+
+ static void
+ nvme_intr_monitor(void *arg)
+ {
+ nvme_qpair_t *qp;
+ hrtime_t diff, now_ns;
+
+ if (!nvme_enable_intr_monitoring)
+ return;
+ mutex_enter(&nvme_global_mutex);
+ now_ns = gethrtime();
+ for (qp = list_head(&nvme_qp_list); qp != NULL;
+ qp = list_next(&nvme_qp_list, qp)) {
+ diff = now_ns - qp->nq_ts;
+ if (diff >= nvme_intr_timeout_ns && qp->nq_active_cmds > 0) {
+ if (nvme_process_cq_cmds(qp->nq_nvme, qp)) {
+ nvme_intr_timeouts++;
+ qp->nq_nvme->n_intr_timeouts++;
+ }
+ }
+ }
+ mutex_exit(&nvme_global_mutex);
+ }