Print this page
7813 mpt_sas does not like concurrent HBA resets
@@ -554,11 +554,11 @@
mptsas_quiesce /* quiesce */
#endif /* __sparc */
};
-#define MPTSAS_MOD_STRING "MPTSAS HBA Driver 00.00.00.24"
+#define MPTSAS_MOD_STRING "MPTSAS HBA Driver 00.00.00.24X"
static struct modldrv modldrv = {
&mod_driverops, /* Type of module. This one is a driver */
MPTSAS_MOD_STRING, /* Name of the module. */
&mptsas_ops, /* driver ops */
@@ -1333,10 +1333,16 @@
MPTSAS_DISABLE_INTR(mpt);
if (mptsas_register_intrs(mpt) == FALSE)
goto fail;
intr_added++;
+ /*
+ * The mutex to protect task management during reset
+ */
+ mutex_init(&mpt->m_taskmgmt_mutex, NULL, MUTEX_SPIN,
+ DDI_INTR_PRI(mpt->m_intr_pri));
+
/* Initialize mutex used in interrupt handler */
mutex_init(&mpt->m_mutex, NULL, MUTEX_DRIVER,
DDI_INTR_PRI(mpt->m_intr_pri));
mutex_init(&mpt->m_passthru_mutex, NULL, MUTEX_DRIVER, NULL);
mutex_init(&mpt->m_tx_waitq_mutex, NULL, MUTEX_DRIVER,
@@ -1627,10 +1633,11 @@
}
if (mutex_init_done) {
mutex_destroy(&mpt->m_tx_waitq_mutex);
mutex_destroy(&mpt->m_passthru_mutex);
mutex_destroy(&mpt->m_mutex);
+ mutex_destroy(&mpt->m_taskmgmt_mutex);
for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
mutex_destroy(
&mpt->m_phy_info[i].smhba_info.phy_mutex);
}
cv_destroy(&mpt->m_cv);
@@ -2046,10 +2053,11 @@
}
mutex_destroy(&mpt->m_tx_waitq_mutex);
mutex_destroy(&mpt->m_passthru_mutex);
mutex_destroy(&mpt->m_mutex);
+ mutex_destroy(&mpt->m_taskmgmt_mutex);
for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
mutex_destroy(&mpt->m_phy_info[i].smhba_info.phy_mutex);
}
cv_destroy(&mpt->m_cv);
cv_destroy(&mpt->m_passthru_cv);
@@ -2407,11 +2415,11 @@
* If IOC is not in operational state, try to hard reset it.
*/
if ((ioc_status & MPI2_IOC_STATE_MASK) !=
MPI2_IOC_STATE_OPERATIONAL) {
mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET;
- if (mptsas_restart_ioc(mpt) == DDI_FAILURE) {
+ if (mptsas_reset_handler(mpt) == DDI_FAILURE) {
mptsas_log(mpt, CE_WARN,
"mptsas_power: hard reset failed");
mutex_exit(&mpt->m_mutex);
return (DDI_FAILURE);
}
@@ -3424,20 +3432,24 @@
* which means that they could be invalid even if the target is still
* attached. Check if being reset and if DevHandle is being
* re-initialized. If this is the case, return BUSY so the I/O can be
* retried later.
*/
+ mutex_enter(&mpt->m_taskmgmt_mutex);
if ((ptgt->m_devhdl == MPTSAS_INVALID_DEVHDL) && mpt->m_in_reset) {
mptsas_set_pkt_reason(mpt, cmd, CMD_RESET, STAT_BUS_RESET);
if (cmd->cmd_flags & CFLAG_TXQ) {
mptsas_doneq_add(mpt, cmd);
mptsas_doneq_empty(mpt);
+ mutex_exit(&mpt->m_taskmgmt_mutex);
return (rval);
} else {
+ mutex_exit(&mpt->m_taskmgmt_mutex);
return (TRAN_BUSY);
}
}
+ mutex_exit(&mpt->m_taskmgmt_mutex);
/*
* If device handle has already been invalidated, just
* fail the command. In theory, command from scsi_vhci
* client is impossible send down command with invalid
@@ -3689,17 +3701,20 @@
* need to increase the reference counter here. In a
* case the HBA is in reset we just simply free the
* allocated packet and bail out.
*/
mutex_enter(&mpt->m_mutex);
- if (mpt->m_in_reset) {
+ mutex_enter(&mpt->m_taskmgmt_mutex);
+ if (mpt->m_in_reset == TRUE) {
+ mutex_exit(&mpt->m_taskmgmt_mutex);
mutex_exit(&mpt->m_mutex);
cmd->cmd_flags = CFLAG_FREE;
kmem_cache_free(mpt->m_kmem_cache, cmd);
return (NULL);
}
+ mutex_exit(&mpt->m_taskmgmt_mutex);
mpt->m_extreq_sense_refcount++;
ASSERT(mpt->m_extreq_sense_refcount > 0);
mutex_exit(&mpt->m_mutex);
/*
@@ -5370,14 +5385,17 @@
* and ack would be sent in taskq thread
*/
NDBG20(("send mptsas_handle_event_sync success"));
}
- if (mpt->m_in_reset) {
+ mutex_enter(&mpt->m_taskmgmt_mutex);
+ if (mpt->m_in_reset == TRUE) {
NDBG20(("dropping event received during reset"));
+ mutex_exit(&mpt->m_taskmgmt_mutex);
return;
}
+ mutex_exit(&mpt->m_taskmgmt_mutex);
if ((ddi_taskq_dispatch(mpt->m_event_taskq, mptsas_handle_event,
(void *)args, DDI_NOSLEEP)) != DDI_SUCCESS) {
mptsas_log(mpt, CE_WARN, "No memory available"
"for dispatch taskq");
@@ -6340,14 +6358,16 @@
mutex_enter(&mpt->m_mutex);
/*
* If HBA is being reset, don't perform operations depending
* on the IOC. We must free the topo list, however.
*/
- if (!mpt->m_in_reset)
+
+ mutex_enter(&mpt->m_taskmgmt_mutex);
+ if (mpt->m_in_reset == FALSE)
mptsas_handle_topo_change(topo_node, parent);
- else
- NDBG20(("skipping topo change received during reset"));
+ mutex_exit(&mpt->m_taskmgmt_mutex);
+
save_node = topo_node;
topo_node = topo_node->next;
ASSERT(save_node);
kmem_free(save_node, sizeof (mptsas_topo_change_list_t));
mutex_exit(&mpt->m_mutex);
@@ -7612,15 +7632,18 @@
mutex_enter(&mpt->m_mutex);
/*
* If HBA is being reset, drop incoming event.
*/
- if (mpt->m_in_reset) {
+ mutex_enter(&mpt->m_taskmgmt_mutex);
+ if (mpt->m_in_reset == TRUE) {
NDBG20(("dropping event received prior to reset"));
+ mutex_exit(&mpt->m_taskmgmt_mutex);
mutex_exit(&mpt->m_mutex);
return;
}
+ mutex_exit(&mpt->m_taskmgmt_mutex);
eventreply = (pMpi2EventNotificationReply_t)
(mpt->m_reply_frame + (rfm -
(mpt->m_reply_frame_dma_addr & 0xffffffffu)));
event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event);
@@ -9920,11 +9943,11 @@
if ((doorbell & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT) {
doorbell &= MPI2_DOORBELL_DATA_MASK;
mptsas_log(mpt, CE_WARN, "MPT Firmware Fault, "
"code: %04x", doorbell);
mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET;
- if ((mptsas_restart_ioc(mpt)) == DDI_FAILURE) {
+ if ((mptsas_reset_handler(mpt)) == DDI_FAILURE) {
mptsas_log(mpt, CE_WARN, "Reset failed"
"after fault was detected");
}
}
@@ -11177,12 +11200,12 @@
status = EFAULT;
}
mptsas_dma_free(&dataout_dma_state);
}
if (pt_flags & MPTSAS_CMD_TIMEOUT) {
- if ((mptsas_restart_ioc(mpt)) == DDI_FAILURE) {
- mptsas_log(mpt, CE_WARN, "mptsas_restart_ioc failed");
+ if ((mptsas_reset_handler(mpt)) == DDI_FAILURE) {
+ mptsas_log(mpt, CE_WARN, "mptsas_reset_handler failed");
}
}
if (request_msg)
kmem_free(request_msg, request_size);
NDBG27(("mptsas_do_passthru: Done status 0x%x", status));
@@ -12683,11 +12706,11 @@
/*
* Reset the chip to start using the new
* firmware. Reset if failed also.
*/
mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET;
- if (mptsas_restart_ioc(mpt) == DDI_FAILURE) {
+ if (mptsas_reset_handler(mpt) == DDI_FAILURE) {
status = EFAULT;
}
mutex_exit(&mpt->m_mutex);
break;
case MPTIOCTL_PASS_THRU:
@@ -12755,11 +12778,11 @@
}
break;
case MPTIOCTL_RESET_ADAPTER:
mutex_enter(&mpt->m_mutex);
mpt->m_softstate &= ~MPTSAS_SS_MSG_UNIT_RESET;
- if ((mptsas_restart_ioc(mpt)) == DDI_FAILURE) {
+ if ((mptsas_reset_handler(mpt)) == DDI_FAILURE) {
mptsas_log(mpt, CE_WARN, "reset adapter IOCTL "
"failed");
status = EFAULT;
}
mutex_exit(&mpt->m_mutex);
@@ -12822,25 +12845,31 @@
out:
return (status);
}
int
-mptsas_restart_ioc(mptsas_t *mpt)
+mptsas_reset_handler(mptsas_t *mpt)
{
int rval = DDI_SUCCESS;
mptsas_target_t *ptgt = NULL;
ASSERT(mutex_owned(&mpt->m_mutex));
/*
- * Set a flag telling I/O path that we're processing a reset. This is
- * needed because after the reset is complete, the hash table still
+ * Set a flag telling task management we are processing a reset. This
+ * is needed because after the reset is complete, the hash table still
* needs to be rebuilt. If I/Os are started before the hash table is
* rebuilt, I/O errors will occur. This flag allows I/Os to be marked
* so that they can be retried.
*/
+ mutex_enter(&mpt->m_taskmgmt_mutex);
+ if (mpt->m_in_reset == TRUE) {
+ mutex_exit(&mpt->m_taskmgmt_mutex);
+ return (DDI_FAILURE);
+ }
mpt->m_in_reset = TRUE;
+ mutex_exit(&mpt->m_taskmgmt_mutex);
/*
* Wait until all the allocated sense data buffers for DMA are freed.
*/
while (mpt->m_extreq_sense_refcount > 0)
@@ -12901,11 +12930,13 @@
}
/*
* Clear the reset flag so that I/Os can continue.
*/
+ mutex_enter(&mpt->m_taskmgmt_mutex);
mpt->m_in_reset = FALSE;
+ mutex_exit(&mpt->m_taskmgmt_mutex);
return (rval);
}
static int