Print this page
7364 NVMe driver performance can be improved by caching nvme_dma_t structs for PRPL.
Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Garrett D'Amore <garrett@lucera.com>
*** 255,264 ****
--- 255,267 ----
static int nvme_bd_read(void *, bd_xfer_t *);
static int nvme_bd_write(void *, bd_xfer_t *);
static int nvme_bd_sync(void *, bd_xfer_t *);
static int nvme_bd_devid(void *, dev_info_t *, ddi_devid_t *);
+ static int nvme_prp_dma_constructor(void *, void *, int);
+ static void nvme_prp_dma_destructor(void *, void *);
+
static void nvme_prepare_devid(nvme_t *, uint32_t);
static void *nvme_state;
static kmem_cache_t *nvme_cmd_cache;
*** 486,512 ****
return (B_FALSE);
}
static void
! nvme_free_dma(nvme_dma_t *dma)
{
if (dma->nd_dmah != NULL)
(void) ddi_dma_unbind_handle(dma->nd_dmah);
if (dma->nd_acch != NULL)
ddi_dma_mem_free(&dma->nd_acch);
if (dma->nd_dmah != NULL)
ddi_dma_free_handle(&dma->nd_dmah);
- kmem_free(dma, sizeof (nvme_dma_t));
}
! static int
! nvme_zalloc_dma(nvme_t *nvme, size_t len, uint_t flags,
! ddi_dma_attr_t *dma_attr, nvme_dma_t **ret)
{
! nvme_dma_t *dma = kmem_zalloc(sizeof (nvme_dma_t), KM_SLEEP);
if (ddi_dma_alloc_handle(nvme->n_dip, dma_attr, DDI_DMA_SLEEP, NULL,
&dma->nd_dmah) != DDI_SUCCESS) {
/*
* Due to DDI_DMA_SLEEP this can't be DDI_DMA_NORESOURCES, and
* the only other possible error is DDI_DMA_BADATTR which
--- 489,527 ----
return (B_FALSE);
}
static void
! nvme_free_dma_common(nvme_dma_t *dma)
{
if (dma->nd_dmah != NULL)
(void) ddi_dma_unbind_handle(dma->nd_dmah);
if (dma->nd_acch != NULL)
ddi_dma_mem_free(&dma->nd_acch);
if (dma->nd_dmah != NULL)
ddi_dma_free_handle(&dma->nd_dmah);
}
! static void
! nvme_free_dma(nvme_dma_t *dma)
{
! nvme_free_dma_common(dma);
! kmem_free(dma, sizeof (*dma));
! }
!
! static void
! nvme_prp_dma_destructor(void *buf, void *private)
! {
! nvme_dma_t *dma = (nvme_dma_t *)buf;
+ nvme_free_dma_common(dma);
+ }
+
+ static int
+ nvme_alloc_dma_common(nvme_t *nvme, nvme_dma_t *dma,
+ size_t len, uint_t flags, ddi_dma_attr_t *dma_attr)
+ {
if (ddi_dma_alloc_handle(nvme->n_dip, dma_attr, DDI_DMA_SLEEP, NULL,
&dma->nd_dmah) != DDI_SUCCESS) {
/*
* Due to DDI_DMA_SLEEP this can't be DDI_DMA_NORESOURCES, and
* the only other possible error is DDI_DMA_BADATTR which
*** 529,550 ****
dma->nd_len, flags | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
&dma->nd_cookie, &dma->nd_ncookie) != DDI_DMA_MAPPED) {
dev_err(nvme->n_dip, CE_WARN,
"!failed to bind DMA memory");
atomic_inc_32(&nvme->n_dma_bind_err);
*ret = NULL;
! nvme_free_dma(dma);
return (DDI_FAILURE);
}
bzero(dma->nd_memp, dma->nd_len);
*ret = dma;
return (DDI_SUCCESS);
}
static int
nvme_zalloc_queue_dma(nvme_t *nvme, uint32_t nentry, uint16_t qe_len,
uint_t flags, nvme_dma_t **dma)
{
uint32_t len = nentry * qe_len;
ddi_dma_attr_t q_dma_attr = nvme->n_queue_dma_attr;
--- 544,601 ----
dma->nd_len, flags | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
&dma->nd_cookie, &dma->nd_ncookie) != DDI_DMA_MAPPED) {
dev_err(nvme->n_dip, CE_WARN,
"!failed to bind DMA memory");
atomic_inc_32(&nvme->n_dma_bind_err);
+ nvme_free_dma_common(dma);
+ return (DDI_FAILURE);
+ }
+
+ return (DDI_SUCCESS);
+ }
+
+ static int
+ nvme_zalloc_dma(nvme_t *nvme, size_t len, uint_t flags,
+ ddi_dma_attr_t *dma_attr, nvme_dma_t **ret)
+ {
+ nvme_dma_t *dma = kmem_zalloc(sizeof (nvme_dma_t), KM_SLEEP);
+
+ if (nvme_alloc_dma_common(nvme, dma, len, flags, dma_attr) !=
+ DDI_SUCCESS) {
*ret = NULL;
! kmem_free(dma, sizeof (nvme_dma_t));
return (DDI_FAILURE);
}
bzero(dma->nd_memp, dma->nd_len);
*ret = dma;
return (DDI_SUCCESS);
}
static int
+ nvme_prp_dma_constructor(void *buf, void *private, int flags)
+ {
+ nvme_dma_t *dma = (nvme_dma_t *)buf;
+ nvme_t *nvme = (nvme_t *)private;
+
+ dma->nd_dmah = NULL;
+ dma->nd_acch = NULL;
+
+ if (nvme_alloc_dma_common(nvme, dma, nvme->n_pagesize,
+ DDI_DMA_READ, &nvme->n_prp_dma_attr) != DDI_SUCCESS) {
+ return (-1);
+ }
+
+ ASSERT(dma->nd_ncookie == 1);
+
+ dma->nd_cached = B_TRUE;
+
+ return (0);
+ }
+
+ static int
nvme_zalloc_queue_dma(nvme_t *nvme, uint32_t nentry, uint16_t qe_len,
uint_t flags, nvme_dma_t **dma)
{
uint32_t len = nentry * qe_len;
ddi_dma_attr_t q_dma_attr = nvme->n_queue_dma_attr;
*** 658,667 ****
--- 709,722 ----
static void
nvme_free_cmd(nvme_cmd_t *cmd)
{
if (cmd->nc_dma) {
+ if (cmd->nc_dma->nd_cached)
+ kmem_cache_free(cmd->nc_nvme->n_prp_cache,
+ cmd->nc_dma);
+ else
nvme_free_dma(cmd->nc_dma);
cmd->nc_dma = NULL;
}
cv_destroy(&cmd->nc_cv);
*** 2445,2454 ****
--- 2500,2517 ----
if (nvme->n_cmd_taskq == NULL) {
dev_err(dip, CE_WARN, "!failed to create cmd taskq");
goto fail;
}
+ /*
+ * Create PRP DMA cache
+ */
+ (void) snprintf(name, sizeof (name), "%s%d_prp_cache",
+ ddi_driver_name(dip), ddi_get_instance(dip));
+ nvme->n_prp_cache = kmem_cache_create(name, sizeof (nvme_dma_t),
+ 0, nvme_prp_dma_constructor, nvme_prp_dma_destructor,
+ NULL, (void *)nvme, NULL, 0);
if (nvme_init(nvme) != DDI_SUCCESS)
goto fail;
/*
*** 2537,2546 ****
--- 2600,2613 ----
kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *) *
(nvme->n_ioq_count + 1));
}
+ if (nvme->n_prp_cache != NULL) {
+ kmem_cache_destroy(nvme->n_prp_cache);
+ }
+
if (nvme->n_progress & NVME_REGS_MAPPED) {
nvme_shutdown(nvme, NVME_CC_SHN_NORMAL, B_FALSE);
(void) nvme_reset(nvme, B_FALSE);
}
*** 2633,2651 ****
* attributes to reflect that. If we still get an I/O request
* that needs a chained PRP something is very wrong.
*/
VERIFY(nprp == 1);
! if (nvme_zalloc_dma(nvme, nvme->n_pagesize * nprp, DDI_DMA_READ,
! &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) {
! dev_err(nvme->n_dip, CE_WARN, "!%s: nvme_zalloc_dma failed",
! __func__);
! return (DDI_FAILURE);
! }
cmd->nc_sqe.sqe_dptr.d_prp[1] = cmd->nc_dma->nd_cookie.dmac_laddress;
- ddi_dma_nextcookie(cmd->nc_dma->nd_dmah, &cmd->nc_dma->nd_cookie);
/*LINTED: E_PTR_BAD_CAST_ALIGN*/
for (prp = (uint64_t *)cmd->nc_dma->nd_memp;
xfer->x_ndmac > 0;
prp++, xfer->x_ndmac--) {
--- 2700,2713 ----
* attributes to reflect that. If we still get an I/O request
* that needs a chained PRP something is very wrong.
*/
VERIFY(nprp == 1);
! cmd->nc_dma = kmem_cache_alloc(nvme->n_prp_cache, KM_SLEEP);
! bzero(cmd->nc_dma->nd_memp, cmd->nc_dma->nd_len);
cmd->nc_sqe.sqe_dptr.d_prp[1] = cmd->nc_dma->nd_cookie.dmac_laddress;
/*LINTED: E_PTR_BAD_CAST_ALIGN*/
for (prp = (uint64_t *)cmd->nc_dma->nd_memp;
xfer->x_ndmac > 0;
prp++, xfer->x_ndmac--) {