Print this page
7813 mpt_sas does not like concurrent HBA resets

*** 554,564 **** mptsas_quiesce /* quiesce */ #endif /* __sparc */ }; ! #define MPTSAS_MOD_STRING "MPTSAS HBA Driver 00.00.00.24" static struct modldrv modldrv = { &mod_driverops, /* Type of module. This one is a driver */ MPTSAS_MOD_STRING, /* Name of the module. */ &mptsas_ops, /* driver ops */ --- 554,564 ---- mptsas_quiesce /* quiesce */ #endif /* __sparc */ }; ! #define MPTSAS_MOD_STRING "MPTSAS HBA Driver 00.00.00.24X" static struct modldrv modldrv = { &mod_driverops, /* Type of module. This one is a driver */ MPTSAS_MOD_STRING, /* Name of the module. */ &mptsas_ops, /* driver ops */
*** 1333,1342 **** --- 1333,1348 ---- MPTSAS_DISABLE_INTR(mpt); if (mptsas_register_intrs(mpt) == FALSE) goto fail; intr_added++; + /* + * The mutex to protect task management during reset + */ + mutex_init(&mpt->m_taskmgmt_mutex, NULL, MUTEX_SPIN, + DDI_INTR_PRI(mpt->m_intr_pri)); + /* Initialize mutex used in interrupt handler */ mutex_init(&mpt->m_mutex, NULL, MUTEX_DRIVER, DDI_INTR_PRI(mpt->m_intr_pri)); mutex_init(&mpt->m_passthru_mutex, NULL, MUTEX_DRIVER, NULL); mutex_init(&mpt->m_tx_waitq_mutex, NULL, MUTEX_DRIVER,
*** 1627,1636 **** --- 1633,1643 ---- } if (mutex_init_done) { mutex_destroy(&mpt->m_tx_waitq_mutex); mutex_destroy(&mpt->m_passthru_mutex); mutex_destroy(&mpt->m_mutex); + mutex_destroy(&mpt->m_taskmgmt_mutex); for (i = 0; i < MPTSAS_MAX_PHYS; i++) { mutex_destroy( &mpt->m_phy_info[i].smhba_info.phy_mutex); } cv_destroy(&mpt->m_cv);
*** 2046,2055 **** --- 2053,2063 ---- } mutex_destroy(&mpt->m_tx_waitq_mutex); mutex_destroy(&mpt->m_passthru_mutex); mutex_destroy(&mpt->m_mutex); + mutex_destroy(&mpt->m_taskmgmt_mutex); for (i = 0; i < MPTSAS_MAX_PHYS; i++) { mutex_destroy(&mpt->m_phy_info[i].smhba_info.phy_mutex); } cv_destroy(&mpt->m_cv); cv_destroy(&mpt->m_passthru_cv);
*** 2407,2417 **** * If IOC is not in operational state, try to hard reset it. */ if ((ioc_status & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_OPERATIONAL) { mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET; ! if (mptsas_restart_ioc(mpt) == DDI_FAILURE) { mptsas_log(mpt, CE_WARN, "mptsas_power: hard reset failed"); mutex_exit(&mpt->m_mutex); return (DDI_FAILURE); } --- 2415,2425 ---- * If IOC is not in operational state, try to hard reset it. */ if ((ioc_status & MPI2_IOC_STATE_MASK) != MPI2_IOC_STATE_OPERATIONAL) { mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET; ! if (mptsas_reset_handler(mpt) == DDI_FAILURE) { mptsas_log(mpt, CE_WARN, "mptsas_power: hard reset failed"); mutex_exit(&mpt->m_mutex); return (DDI_FAILURE); }
*** 3424,3443 **** --- 3432,3455 ---- * which means that they could be invalid even if the target is still * attached. Check if being reset and if DevHandle is being * re-initialized. If this is the case, return BUSY so the I/O can be * retried later. */ + mutex_enter(&mpt->m_taskmgmt_mutex); if ((ptgt->m_devhdl == MPTSAS_INVALID_DEVHDL) && mpt->m_in_reset) { mptsas_set_pkt_reason(mpt, cmd, CMD_RESET, STAT_BUS_RESET); if (cmd->cmd_flags & CFLAG_TXQ) { mptsas_doneq_add(mpt, cmd); mptsas_doneq_empty(mpt); + mutex_exit(&mpt->m_taskmgmt_mutex); return (rval); } else { + mutex_exit(&mpt->m_taskmgmt_mutex); return (TRAN_BUSY); } } + mutex_exit(&mpt->m_taskmgmt_mutex); /* * If device handle has already been invalidated, just * fail the command. In theory, command from scsi_vhci * client is impossible send down command with invalid
*** 3689,3705 **** * need to increase the reference counter here. In a * case the HBA is in reset we just simply free the * allocated packet and bail out. */ mutex_enter(&mpt->m_mutex); ! if (mpt->m_in_reset) { mutex_exit(&mpt->m_mutex); cmd->cmd_flags = CFLAG_FREE; kmem_cache_free(mpt->m_kmem_cache, cmd); return (NULL); } mpt->m_extreq_sense_refcount++; ASSERT(mpt->m_extreq_sense_refcount > 0); mutex_exit(&mpt->m_mutex); /* --- 3701,3720 ---- * need to increase the reference counter here. In a * case the HBA is in reset we just simply free the * allocated packet and bail out. */ mutex_enter(&mpt->m_mutex); ! mutex_enter(&mpt->m_taskmgmt_mutex); ! if (mpt->m_in_reset == TRUE) { ! mutex_exit(&mpt->m_taskmgmt_mutex); mutex_exit(&mpt->m_mutex); cmd->cmd_flags = CFLAG_FREE; kmem_cache_free(mpt->m_kmem_cache, cmd); return (NULL); } + mutex_exit(&mpt->m_taskmgmt_mutex); mpt->m_extreq_sense_refcount++; ASSERT(mpt->m_extreq_sense_refcount > 0); mutex_exit(&mpt->m_mutex); /*
*** 5370,5383 **** * and ack would be sent in taskq thread */ NDBG20(("send mptsas_handle_event_sync success")); } ! if (mpt->m_in_reset) { NDBG20(("dropping event received during reset")); return; } if ((ddi_taskq_dispatch(mpt->m_event_taskq, mptsas_handle_event, (void *)args, DDI_NOSLEEP)) != DDI_SUCCESS) { mptsas_log(mpt, CE_WARN, "No memory available" "for dispatch taskq"); --- 5385,5401 ---- * and ack would be sent in taskq thread */ NDBG20(("send mptsas_handle_event_sync success")); } ! mutex_enter(&mpt->m_taskmgmt_mutex); ! if (mpt->m_in_reset == TRUE) { NDBG20(("dropping event received during reset")); + mutex_exit(&mpt->m_taskmgmt_mutex); return; } + mutex_exit(&mpt->m_taskmgmt_mutex); if ((ddi_taskq_dispatch(mpt->m_event_taskq, mptsas_handle_event, (void *)args, DDI_NOSLEEP)) != DDI_SUCCESS) { mptsas_log(mpt, CE_WARN, "No memory available" "for dispatch taskq");
*** 6340,6353 **** mutex_enter(&mpt->m_mutex); /* * If HBA is being reset, don't perform operations depending * on the IOC. We must free the topo list, however. */ ! if (!mpt->m_in_reset) mptsas_handle_topo_change(topo_node, parent); ! else ! NDBG20(("skipping topo change received during reset")); save_node = topo_node; topo_node = topo_node->next; ASSERT(save_node); kmem_free(save_node, sizeof (mptsas_topo_change_list_t)); mutex_exit(&mpt->m_mutex); --- 6358,6373 ---- mutex_enter(&mpt->m_mutex); /* * If HBA is being reset, don't perform operations depending * on the IOC. We must free the topo list, however. */ ! ! mutex_enter(&mpt->m_taskmgmt_mutex); ! if (mpt->m_in_reset == FALSE) mptsas_handle_topo_change(topo_node, parent); ! mutex_exit(&mpt->m_taskmgmt_mutex); ! save_node = topo_node; topo_node = topo_node->next; ASSERT(save_node); kmem_free(save_node, sizeof (mptsas_topo_change_list_t)); mutex_exit(&mpt->m_mutex);
*** 7612,7626 **** mutex_enter(&mpt->m_mutex); /* * If HBA is being reset, drop incoming event. */ ! if (mpt->m_in_reset) { NDBG20(("dropping event received prior to reset")); mutex_exit(&mpt->m_mutex); return; } eventreply = (pMpi2EventNotificationReply_t) (mpt->m_reply_frame + (rfm - (mpt->m_reply_frame_dma_addr & 0xffffffffu))); event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event); --- 7632,7649 ---- mutex_enter(&mpt->m_mutex); /* * If HBA is being reset, drop incoming event. */ ! mutex_enter(&mpt->m_taskmgmt_mutex); ! if (mpt->m_in_reset == TRUE) { NDBG20(("dropping event received prior to reset")); + mutex_exit(&mpt->m_taskmgmt_mutex); mutex_exit(&mpt->m_mutex); return; } + mutex_exit(&mpt->m_taskmgmt_mutex); eventreply = (pMpi2EventNotificationReply_t) (mpt->m_reply_frame + (rfm - (mpt->m_reply_frame_dma_addr & 0xffffffffu))); event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event);
*** 9920,9930 **** if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { doorbell &= MPI2_DOORBELL_DATA_MASK; mptsas_log(mpt, CE_WARN, "MPT Firmware Fault, " "code: %04x", doorbell); mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET; ! if ((mptsas_restart_ioc(mpt)) == DDI_FAILURE) { mptsas_log(mpt, CE_WARN, "Reset failed" "after fault was detected"); } } --- 9943,9953 ---- if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) { doorbell &= MPI2_DOORBELL_DATA_MASK; mptsas_log(mpt, CE_WARN, "MPT Firmware Fault, " "code: %04x", doorbell); mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET; ! if ((mptsas_reset_handler(mpt)) == DDI_FAILURE) { mptsas_log(mpt, CE_WARN, "Reset failed" "after fault was detected"); } }
*** 11177,11188 **** status = EFAULT; } mptsas_dma_free(&dataout_dma_state); } if (pt_flags & MPTSAS_CMD_TIMEOUT) { ! if ((mptsas_restart_ioc(mpt)) == DDI_FAILURE) { ! mptsas_log(mpt, CE_WARN, "mptsas_restart_ioc failed"); } } if (request_msg) kmem_free(request_msg, request_size); NDBG27(("mptsas_do_passthru: Done status 0x%x", status)); --- 11200,11211 ---- status = EFAULT; } mptsas_dma_free(&dataout_dma_state); } if (pt_flags & MPTSAS_CMD_TIMEOUT) { ! if ((mptsas_reset_handler(mpt)) == DDI_FAILURE) { ! mptsas_log(mpt, CE_WARN, "mptsas_reset_handler failed"); } } if (request_msg) kmem_free(request_msg, request_size); NDBG27(("mptsas_do_passthru: Done status 0x%x", status));
*** 12683,12693 **** /* * Reset the chip to start using the new * firmware. Reset if failed also. */ mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET; ! if (mptsas_restart_ioc(mpt) == DDI_FAILURE) { status = EFAULT; } mutex_exit(&mpt->m_mutex); break; case MPTIOCTL_PASS_THRU: --- 12706,12716 ---- /* * Reset the chip to start using the new * firmware. Reset if failed also. */ mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET; ! if (mptsas_reset_handler(mpt) == DDI_FAILURE) { status = EFAULT; } mutex_exit(&mpt->m_mutex); break; case MPTIOCTL_PASS_THRU:
*** 12755,12765 **** } break; case MPTIOCTL_RESET_ADAPTER: mutex_enter(&mpt->m_mutex); mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET; ! if ((mptsas_restart_ioc(mpt)) == DDI_FAILURE) { mptsas_log(mpt, CE_WARN, "reset adapter IOCTL " "failed"); status = EFAULT; } mutex_exit(&mpt->m_mutex); --- 12778,12788 ---- } break; case MPTIOCTL_RESET_ADAPTER: mutex_enter(&mpt->m_mutex); mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET; ! if ((mptsas_reset_handler(mpt)) == DDI_FAILURE) { mptsas_log(mpt, CE_WARN, "reset adapter IOCTL " "failed"); status = EFAULT; } mutex_exit(&mpt->m_mutex);
*** 12822,12846 **** out: return (status); } int ! mptsas_restart_ioc(mptsas_t *mpt) { int rval = DDI_SUCCESS; mptsas_target_t *ptgt = NULL; ASSERT(mutex_owned(&mpt->m_mutex)); /* ! * Set a flag telling I/O path that we're processing a reset. This is ! * needed because after the reset is complete, the hash table still * needs to be rebuilt. If I/Os are started before the hash table is * rebuilt, I/O errors will occur. This flag allows I/Os to be marked * so that they can be retried. */ mpt->m_in_reset = TRUE; /* * Wait until all the allocated sense data buffers for DMA are freed. */ while (mpt->m_extreq_sense_refcount > 0) --- 12845,12875 ---- out: return (status); } int ! mptsas_reset_handler(mptsas_t *mpt) { int rval = DDI_SUCCESS; mptsas_target_t *ptgt = NULL; ASSERT(mutex_owned(&mpt->m_mutex)); /* ! * Set a flag telling task management we are processing a reset. This ! * is needed because after the reset is complete, the hash table still * needs to be rebuilt. If I/Os are started before the hash table is * rebuilt, I/O errors will occur. This flag allows I/Os to be marked * so that they can be retried. */ + mutex_enter(&mpt->m_taskmgmt_mutex); + if (mpt->m_in_reset == TRUE) { + mutex_exit(&mpt->m_taskmgmt_mutex); + return (DDI_FAILURE); + } mpt->m_in_reset = TRUE; + mutex_exit(&mpt->m_taskmgmt_mutex); /* * Wait until all the allocated sense data buffers for DMA are freed. */ while (mpt->m_extreq_sense_refcount > 0)
*** 12901,12911 **** --- 12930,12942 ---- } /* * Clear the reset flag so that I/Os can continue. */ + mutex_enter(&mpt->m_taskmgmt_mutex); mpt->m_in_reset = FALSE; + mutex_exit(&mpt->m_taskmgmt_mutex); return (rval); } static int