Print this page
7364 NVMe driver performance can be improved by caching nvme_dma_t structs for PRPL.
Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Garrett D'Amore <garrett@lucera.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/io/nvme/nvme.c
          +++ new/usr/src/uts/common/io/nvme/nvme.c
↓ open down ↓ 249 lines elided ↑ open up ↑
 250  250  
 251  251  static void nvme_bd_xfer_done(void *);
 252  252  static void nvme_bd_driveinfo(void *, bd_drive_t *);
 253  253  static int nvme_bd_mediainfo(void *, bd_media_t *);
 254  254  static int nvme_bd_cmd(nvme_namespace_t *, bd_xfer_t *, uint8_t);
 255  255  static int nvme_bd_read(void *, bd_xfer_t *);
 256  256  static int nvme_bd_write(void *, bd_xfer_t *);
 257  257  static int nvme_bd_sync(void *, bd_xfer_t *);
 258  258  static int nvme_bd_devid(void *, dev_info_t *, ddi_devid_t *);
 259  259  
      260 +static int nvme_prp_dma_constructor(void *, void *, int);
      261 +static void nvme_prp_dma_destructor(void *, void *);
      262 +
 260  263  static void nvme_prepare_devid(nvme_t *, uint32_t);
 261  264  
 262  265  static void *nvme_state;
 263  266  static kmem_cache_t *nvme_cmd_cache;
 264  267  
 265  268  /*
 266  269   * DMA attributes for queue DMA memory
 267  270   *
 268  271   * Queue DMA memory must be page aligned. The maximum length of a queue is
 269  272   * 65536 entries, and an entry can be 64 bytes long.
↓ open down ↓ 211 lines elided ↑ open up ↑
 481  484  
 482  485          ddi_fm_dma_err_get(dma->nd_dmah, &error, DDI_FME_VERSION);
 483  486  
 484  487          if (error.fme_status != DDI_FM_OK)
 485  488                  return (B_TRUE);
 486  489  
 487  490          return (B_FALSE);
 488  491  }
 489  492  
 490  493  static void
 491      -nvme_free_dma(nvme_dma_t *dma)
      494 +nvme_free_dma_common(nvme_dma_t *dma)
 492  495  {
 493  496          if (dma->nd_dmah != NULL)
 494  497                  (void) ddi_dma_unbind_handle(dma->nd_dmah);
 495  498          if (dma->nd_acch != NULL)
 496  499                  ddi_dma_mem_free(&dma->nd_acch);
 497  500          if (dma->nd_dmah != NULL)
 498  501                  ddi_dma_free_handle(&dma->nd_dmah);
 499      -        kmem_free(dma, sizeof (nvme_dma_t));
 500  502  }
 501  503  
 502      -static int
 503      -nvme_zalloc_dma(nvme_t *nvme, size_t len, uint_t flags,
 504      -    ddi_dma_attr_t *dma_attr, nvme_dma_t **ret)
      504 +static void
      505 +nvme_free_dma(nvme_dma_t *dma)
 505  506  {
 506      -        nvme_dma_t *dma = kmem_zalloc(sizeof (nvme_dma_t), KM_SLEEP);
      507 +        nvme_free_dma_common(dma);
      508 +        kmem_free(dma, sizeof (*dma));
      509 +}
      510 +
      511 +static void
      512 +nvme_prp_dma_destructor(void *buf, void *private)
      513 +{
      514 +        nvme_dma_t *dma = (nvme_dma_t *)buf;
 507  515  
      516 +        nvme_free_dma_common(dma);
      517 +}
      518 +
      519 +static int
      520 +nvme_alloc_dma_common(nvme_t *nvme, nvme_dma_t *dma,
      521 +    size_t len, uint_t flags, ddi_dma_attr_t *dma_attr)
      522 +{
 508  523          if (ddi_dma_alloc_handle(nvme->n_dip, dma_attr, DDI_DMA_SLEEP, NULL,
 509  524              &dma->nd_dmah) != DDI_SUCCESS) {
 510  525                  /*
 511  526                   * Due to DDI_DMA_SLEEP this can't be DDI_DMA_NORESOURCES, and
 512  527                   * the only other possible error is DDI_DMA_BADATTR which
 513  528                   * indicates a driver bug which should cause a panic.
 514  529                   */
 515  530                  dev_err(nvme->n_dip, CE_PANIC,
 516  531                      "!failed to get DMA handle, check DMA attributes");
 517  532                  return (DDI_FAILURE);
↓ open down ↓ 6 lines elided ↑ open up ↑
 524  539          (void) ddi_dma_mem_alloc(dma->nd_dmah, len, &nvme->n_reg_acc_attr,
 525  540              DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, &dma->nd_memp,
 526  541              &dma->nd_len, &dma->nd_acch);
 527  542  
 528  543          if (ddi_dma_addr_bind_handle(dma->nd_dmah, NULL, dma->nd_memp,
 529  544              dma->nd_len, flags | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
 530  545              &dma->nd_cookie, &dma->nd_ncookie) != DDI_DMA_MAPPED) {
 531  546                  dev_err(nvme->n_dip, CE_WARN,
 532  547                      "!failed to bind DMA memory");
 533  548                  atomic_inc_32(&nvme->n_dma_bind_err);
      549 +                nvme_free_dma_common(dma);
      550 +                return (DDI_FAILURE);
      551 +        }
      552 +
      553 +        return (DDI_SUCCESS);
      554 +}
      555 +
      556 +static int
      557 +nvme_zalloc_dma(nvme_t *nvme, size_t len, uint_t flags,
      558 +    ddi_dma_attr_t *dma_attr, nvme_dma_t **ret)
      559 +{
      560 +        nvme_dma_t *dma = kmem_zalloc(sizeof (nvme_dma_t), KM_SLEEP);
      561 +
      562 +        if (nvme_alloc_dma_common(nvme, dma, len, flags, dma_attr) !=
      563 +            DDI_SUCCESS) {
 534  564                  *ret = NULL;
 535      -                nvme_free_dma(dma);
      565 +                kmem_free(dma, sizeof (nvme_dma_t));
 536  566                  return (DDI_FAILURE);
 537  567          }
 538  568  
 539  569          bzero(dma->nd_memp, dma->nd_len);
 540  570  
 541  571          *ret = dma;
 542  572          return (DDI_SUCCESS);
 543  573  }
 544  574  
 545  575  static int
      576 +nvme_prp_dma_constructor(void *buf, void *private, int flags)
      577 +{
      578 +        nvme_dma_t *dma = (nvme_dma_t *)buf;
      579 +        nvme_t *nvme = (nvme_t *)private;
      580 +
      581 +        dma->nd_dmah = NULL;
      582 +        dma->nd_acch = NULL;
      583 +
      584 +        if (nvme_alloc_dma_common(nvme, dma, nvme->n_pagesize,
      585 +            DDI_DMA_READ, &nvme->n_prp_dma_attr) != DDI_SUCCESS) {
      586 +                return (-1);
      587 +        }
      588 +
      589 +        ASSERT(dma->nd_ncookie == 1);
      590 +
      591 +        dma->nd_cached = B_TRUE;
      592 +
      593 +        return (0);
      594 +}
      595 +
      596 +static int
 546  597  nvme_zalloc_queue_dma(nvme_t *nvme, uint32_t nentry, uint16_t qe_len,
 547  598      uint_t flags, nvme_dma_t **dma)
 548  599  {
 549  600          uint32_t len = nentry * qe_len;
 550  601          ddi_dma_attr_t q_dma_attr = nvme->n_queue_dma_attr;
 551  602  
 552  603          len = roundup(len, nvme->n_pagesize);
 553  604  
 554  605          q_dma_attr.dma_attr_minxfer = len;
 555  606  
↓ open down ↓ 97 lines elided ↑ open up ↑
 653  704              DDI_INTR_PRI(nvme->n_intr_pri));
 654  705          cv_init(&cmd->nc_cv, NULL, CV_DRIVER, NULL);
 655  706  
 656  707          return (cmd);
 657  708  }
 658  709  
 659  710  static void
 660  711  nvme_free_cmd(nvme_cmd_t *cmd)
 661  712  {
 662  713          if (cmd->nc_dma) {
 663      -                nvme_free_dma(cmd->nc_dma);
      714 +                if (cmd->nc_dma->nd_cached)
      715 +                        kmem_cache_free(cmd->nc_nvme->n_prp_cache,
      716 +                            cmd->nc_dma);
      717 +                else
      718 +                        nvme_free_dma(cmd->nc_dma);
 664  719                  cmd->nc_dma = NULL;
 665  720          }
 666  721  
 667  722          cv_destroy(&cmd->nc_cv);
 668  723          mutex_destroy(&cmd->nc_mutex);
 669  724  
 670  725          kmem_cache_free(nvme_cmd_cache, cmd);
 671  726  }
 672  727  
 673  728  static int
↓ open down ↓ 1766 lines elided ↑ open up ↑
2440 2495           */
2441 2496          (void) snprintf(name, sizeof (name), "%s%d_cmd_taskq",
2442 2497              ddi_driver_name(dip), ddi_get_instance(dip));
2443 2498          nvme->n_cmd_taskq = ddi_taskq_create(dip, name, MIN(UINT16_MAX, ncpus),
2444 2499              TASKQ_DEFAULTPRI, 0);
2445 2500          if (nvme->n_cmd_taskq == NULL) {
2446 2501                  dev_err(dip, CE_WARN, "!failed to create cmd taskq");
2447 2502                  goto fail;
2448 2503          }
2449 2504  
     2505 +        /*
     2506 +         * Create PRP DMA cache
     2507 +         */
     2508 +        (void) snprintf(name, sizeof (name), "%s%d_prp_cache",
     2509 +            ddi_driver_name(dip), ddi_get_instance(dip));
     2510 +        nvme->n_prp_cache = kmem_cache_create(name, sizeof (nvme_dma_t),
     2511 +            0, nvme_prp_dma_constructor, nvme_prp_dma_destructor,
     2512 +            NULL, (void *)nvme, NULL, 0);
2450 2513  
2451 2514          if (nvme_init(nvme) != DDI_SUCCESS)
2452 2515                  goto fail;
2453 2516  
2454 2517          /*
2455 2518           * Attach the blkdev driver for each namespace.
2456 2519           */
2457 2520          for (i = 0; i != nvme->n_namespace_count; i++) {
2458 2521                  if (nvme->n_ns[i].ns_ignore)
2459 2522                          continue;
↓ open down ↓ 72 lines elided ↑ open up ↑
2532 2595                          if (nvme->n_ioq[i] != NULL) {
2533 2596                                  /* TODO: send destroy queue commands */
2534 2597                                  nvme_free_qpair(nvme->n_ioq[i]);
2535 2598                          }
2536 2599                  }
2537 2600  
2538 2601                  kmem_free(nvme->n_ioq, sizeof (nvme_qpair_t *) *
2539 2602                      (nvme->n_ioq_count + 1));
2540 2603          }
2541 2604  
     2605 +        if (nvme->n_prp_cache != NULL) {
     2606 +                kmem_cache_destroy(nvme->n_prp_cache);
     2607 +        }
     2608 +
2542 2609          if (nvme->n_progress & NVME_REGS_MAPPED) {
2543 2610                  nvme_shutdown(nvme, NVME_CC_SHN_NORMAL, B_FALSE);
2544 2611                  (void) nvme_reset(nvme, B_FALSE);
2545 2612          }
2546 2613  
2547 2614          if (nvme->n_cmd_taskq)
2548 2615                  ddi_taskq_destroy(nvme->n_cmd_taskq);
2549 2616  
2550 2617          if (nvme->n_progress & NVME_CTRL_LIMITS)
2551 2618                  sema_destroy(&nvme->n_abort_sema);
↓ open down ↓ 76 lines elided ↑ open up ↑
2628 2695          ASSERT(nprp_page > 0);
2629 2696          nprp = (xfer->x_ndmac + nprp_page - 1) / nprp_page;
2630 2697  
2631 2698          /*
2632 2699           * We currently don't support chained PRPs and set up our DMA
2633 2700           * attributes to reflect that. If we still get an I/O request
2634 2701           * that needs a chained PRP something is very wrong.
2635 2702           */
2636 2703          VERIFY(nprp == 1);
2637 2704  
2638      -        if (nvme_zalloc_dma(nvme, nvme->n_pagesize * nprp, DDI_DMA_READ,
2639      -            &nvme->n_prp_dma_attr, &cmd->nc_dma) != DDI_SUCCESS) {
2640      -                dev_err(nvme->n_dip, CE_WARN, "!%s: nvme_zalloc_dma failed",
2641      -                    __func__);
2642      -                return (DDI_FAILURE);
2643      -        }
     2705 +        cmd->nc_dma = kmem_cache_alloc(nvme->n_prp_cache, KM_SLEEP);
     2706 +        bzero(cmd->nc_dma->nd_memp, cmd->nc_dma->nd_len);
2644 2707  
2645 2708          cmd->nc_sqe.sqe_dptr.d_prp[1] = cmd->nc_dma->nd_cookie.dmac_laddress;
2646      -        ddi_dma_nextcookie(cmd->nc_dma->nd_dmah, &cmd->nc_dma->nd_cookie);
2647 2709  
2648 2710          /*LINTED: E_PTR_BAD_CAST_ALIGN*/
2649 2711          for (prp = (uint64_t *)cmd->nc_dma->nd_memp;
2650 2712              xfer->x_ndmac > 0;
2651 2713              prp++, xfer->x_ndmac--) {
2652 2714                  *prp = xfer->x_dmac.dmac_laddress;
2653 2715                  ddi_dma_nextcookie(xfer->x_dmah, &xfer->x_dmac);
2654 2716          }
2655 2717  
2656 2718          (void) ddi_dma_sync(cmd->nc_dma->nd_dmah, 0, cmd->nc_dma->nd_len,
↓ open down ↓ 190 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX