Print this page
7364 NVMe driver performance can be improved by caching nvme_dma_t structs for PRPL.
Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Garrett D'Amore <garrett@lucera.com>

@@ -255,10 +255,13 @@
 static int nvme_bd_read(void *, bd_xfer_t *);
 static int nvme_bd_write(void *, bd_xfer_t *);
 static int nvme_bd_sync(void *, bd_xfer_t *);
 static int nvme_bd_devid(void *, dev_info_t *, ddi_devid_t *);
 
+static int nvme_prp_dma_constructor(void *, void *, int);
+static void nvme_prp_dma_destructor(void *, void *);
+
 static void nvme_prepare_devid(nvme_t *, uint32_t);
 
 static void *nvme_state;
 static kmem_cache_t *nvme_cmd_cache;
 

@@ -486,27 +489,39 @@
 
         return (B_FALSE);
 }
 
 static void
-nvme_free_dma(nvme_dma_t *dma)
+nvme_free_dma_common(nvme_dma_t *dma)
 {
         if (dma->nd_dmah != NULL)
                 (void) ddi_dma_unbind_handle(dma->nd_dmah);
         if (dma->nd_acch != NULL)
                 ddi_dma_mem_free(&dma->nd_acch);
         if (dma->nd_dmah != NULL)
                 ddi_dma_free_handle(&dma->nd_dmah);
-        kmem_free(dma, sizeof (nvme_dma_t));
 }
 
-static int
-nvme_zalloc_dma(nvme_t *nvme, size_t len, uint_t flags,
-    ddi_dma_attr_t *dma_attr, nvme_dma_t **ret)
+static void
+nvme_free_dma(nvme_dma_t *dma)
 {
-        nvme_dma_t *dma = kmem_zalloc(sizeof (nvme_dma_t), KM_SLEEP);
+        nvme_free_dma_common(dma);
+        kmem_free(dma, sizeof (*dma));
+}
+
+static void
+nvme_prp_dma_destructor(void *buf, void *private)
+{
+        nvme_dma_t *dma = (nvme_dma_t *)buf;
 
+        nvme_free_dma_common(dma);
+}
+
+static int
+nvme_alloc_dma_common(nvme_t *nvme, nvme_dma_t *dma,
+    size_t len, uint_t flags, ddi_dma_attr_t *dma_attr)
+{
         if (ddi_dma_alloc_handle(nvme->n_dip, dma_attr, DDI_DMA_SLEEP, NULL,
             &dma->nd_dmah) != DDI_SUCCESS) {
                 /*
                  * Due to DDI_DMA_SLEEP this can't be DDI_DMA_NORESOURCES, and
                  * the only other possible error is DDI_DMA_BADATTR which

@@ -529,22 +544,58 @@
             dma->nd_len, flags | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
             &dma->nd_cookie, &dma->nd_ncookie) != DDI_DMA_MAPPED) {
                 dev_err(nvme->n_dip, CE_WARN,
                     "!failed to bind DMA memory");
                 atomic_inc_32(&nvme->n_dma_bind_err);
+                nvme_free_dma_common(dma);
+                return (DDI_FAILURE);
+        }
+
+        return (DDI_SUCCESS);
+}
+
+static int
+nvme_zalloc_dma(nvme_t *nvme, size_t len, uint_t flags,
+    ddi_dma_attr_t *dma_attr, nvme_dma_t **ret)
+{
+        nvme_dma_t *dma = kmem_zalloc(sizeof (nvme_dma_t), KM_SLEEP);
+
+        if (nvme_alloc_dma_common(nvme, dma, len, flags, dma_attr) !=
+            DDI_SUCCESS) {
                 *ret = NULL;
-                nvme_free_dma(dma);
+                kmem_free(dma, sizeof (nvme_dma_t));
                 return (DDI_FAILURE);
         }
 
         bzero(dma->nd_memp, dma->nd_len);
 
         *ret = dma;
         return (DDI_SUCCESS);
 }
 
 static int
+nvme_prp_dma_constructor(void *buf, void *private, int flags)
+{
+        nvme_dma_t *dma = (nvme_dma_t *)buf;
+        nvme_t *nvme = (nvme_t *)private;
+
+        dma->nd_dmah = NULL;
+        dma->nd_acch = NULL;
+
+        if (nvme_alloc_dma_common(nvme, dma, nvme->n_pagesize,
+            DDI_DMA_READ, &nvme->n_prp_dma_attr) != DDI_SUCCESS) {
+                return (-1);
+        }
+
+        ASSERT(dma->nd_ncookie == 1);
+
+        dma->nd_cached = B_TRUE;
+
+        return (0);
+}
+
+static int
 nvme_zalloc_queue_dma(nvme_t *nvme, uint32_t nentry, uint16_t qe_len,
     uint_t flags, nvme_dma_t **dma)
 {
         uint32_t len = nentry * qe_len;
         ddi_dma_attr_t q_dma_attr = nvme->n_queue_dma_attr;

@@ -658,10 +709,14 @@
 
 static void
 nvme_free_cmd(nvme_cmd_t *cmd)
 {
         if (cmd->nc_dma) {
+                if (cmd->nc_dma->nd_cached)
+                        kmem_cache_free(cmd->nc_nvme->n_prp_cache,
+                            cmd->nc_dma);
+                else
                 nvme_free_dma(cmd->nc_dma);
                 cmd->nc_dma = NULL;
         }
 
         cv_destroy(&cmd->nc_cv);

@@ -2445,10 +2500,18 @@
         if (nvme->n_cmd_taskq == NULL) {
                 dev_err(dip, CE_WARN, "!failed to create cmd taskq");
                 goto fail;
         }
 
+        /*
+         * Create PRP DMA cache
+         */
+        (void) snprintf(name, sizeof (name), "%s%d_prp_cache",
+            ddi_driver_name(dip), ddi_get_instance(dip));
+        nvme->n_prp_cache = kmem_cache_create(name, sizeof (nvme_dma_t),
+            0, nvme_prp_dma_constructor, nvme_prp_dma_destructor,
+            NULL, (void *)nvme, NULL, 0);
 
         if (nvme_init(nvme) != DDI_SUCCESS)
                 goto fail;
 
         /*

@@ -2537,10 +2600,14 @@
 
                 kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *) *
                     (nvme->n_ioq_count + 1));
         }
 
+        if (nvme->n_prp_cache != NULL) {
+                kmem_cache_destroy(nvme->n_prp_cache);
+        }
+
         if (nvme->n_progress & NVME_REGS_MAPPED) {
                 nvme_shutdown(nvme, NVME_CC_SHN_NORMAL, B_FALSE);
                 (void) nvme_reset(nvme, B_FALSE);
         }
 

@@ -2633,19 +2700,14 @@
          * attributes to reflect that. If we still get an I/O request
          * that needs a chained PRP something is very wrong.
          */
         VERIFY(nprp == 1);
 
-        if (nvme_zalloc_dma(nvme, nvme->n_pagesize * nprp, DDI_DMA_READ,
-            &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) {
-                dev_err(nvme->n_dip, CE_WARN, "!%s: nvme_zalloc_dma failed",
-                    __func__);
-                return (DDI_FAILURE);
-        }
+        cmd->nc_dma = kmem_cache_alloc(nvme->n_prp_cache, KM_SLEEP);
+        bzero(cmd->nc_dma->nd_memp, cmd->nc_dma->nd_len);
 
         cmd->nc_sqe.sqe_dptr.d_prp[1] = cmd->nc_dma->nd_cookie.dmac_laddress;
-        ddi_dma_nextcookie(cmd->nc_dma->nd_dmah, &cmd->nc_dma->nd_cookie);
 
         /*LINTED: E_PTR_BAD_CAST_ALIGN*/
         for (prp = (uint64_t *)cmd->nc_dma->nd_memp;
             xfer->x_ndmac > 0;
             prp++, xfer->x_ndmac--) {