Print this page
Code reconciliation with other base.
hg changesets 607a5b46a793..b706c96317c3
Fix ncpus for early boot config
Purge the ack to the interrupt before exiting mptsas_intr()
Changes from code review
Optimise slot alocation through rotor and the use
of per target mutex in start path.
Update tx waitq's code.
Create 2 threads, divide the workflow and deliver
to the hardware from the threads.
Optimise mutex's and code paths.
Split out offline target code.
Test timeouts code
Add support for more than 8 MSI-X interrupts.
Tidy up interrupt assignement and card ID messages.
Enable Fast Path for capable devices.
Merge fixes for Illumos issue 4819, fix mpt_sas command timeout handling.
Tweeks debug flags.
Default to process done commands all in threads if only 1 interrupt.
Lint and cstyle fixes.
Fix problem with running against 64bit msgaddr attributes for DMA.
Default is now to run like this.
Don't take tx_waiq_mutex if draining isn't set.
Fixes for Illumos issue 4682.
Fix hang bug to do with tx_wq.
Re-arrange mptsas_poll() to disable interrupts before issuing the
command.
Improve the tx_waitq code path.
Major rework of mutexes.
During normal operation do not grab m_mutex during interrupt.
Use reply post queues instead.
Make a few variable non static so you can change in /etc/system.
Fixes to some address arithmetic using 32bit values.
Distribute command done processing around the threads.
Improved auto-request sense memory usage.
Fix for Nexenta commit 36c74113a21
OS-91 mptsas does inquiry without setting pkt_time
Add comment about testing.
Test firmware version of 2008 controllers for MSI-X Compatibility.
Re-arrange mptsas_intr() to reduce number of spurious interrupts.
Fix bug in mptsas_free_post_queue().
Change mptsas_doneq_mv() to not loop.
Should not need m_in_callback flag. It prevents concurrent
command completion processing.
Added code to support using MSI-X interrupts across multiple
reply queues. Not tested with anything other than 3008 yet.
Change output "mptsas%d" -> "mptsas3%d".
Add SAS3 specific messages (12.0Gb).
Allow over-ride for interrupt type.
Restrict pre MPI2.5 to MSI interrupts.
Allow watchdog timout to work for mptsas_smhba_setup() in attach().
Merge fixes for "4403 mpt_sas panic when pulling a drive", commit f7d0d869a9ae78d
Use MSI-X interrupts, just one for now.
Pre-allocate array for request sense buffers, similar to command frames.
No more messing about with scsi_alloc_consistent_buf().
Add rolling buffer for *all* debug messages.
Improve mdb module and seperate out into mpt_sas3.
Initial modifications using the code changes present between
the LSI source code for FreeBSD drivers. Specifically the changes
between from mpslsi-source-17.00.00.00 -> mpslsi-source-03.00.00.00.
This mainly involves using a different scatter/gather element in
frame setup.
Change some obvious references sas -> sas3.
Changes to enable driver to compile.
Header paths, object lists, etc.

@@ -19,12 +19,14 @@
  * CDDL HEADER END
  */
 
 /*
  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014, Tegile Systems Inc. All rights reserved.
+ * Copyright 2014 OmniTI Computer Consulting, Inc. All rights reserved.
  */
 
 /*
  * Copyright (c) 2000 to 2010, LSI Corporation.
  * All rights reserved.

@@ -51,11 +53,11 @@
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  * DAMAGE.
  */
 
 /*
- * mptsas - This is a driver based on LSI Logic's MPT2.0 interface.
+ * mptsas3 - This is a driver based on LSI Logic's MPT2.0/2.5 interface.
  *
  */
 
 #if defined(lint) || defined(DEBUG)
 #define MPTSAS_DEBUG

@@ -76,29 +78,29 @@
 #include <sys/sata/sata_defs.h>
 #include <sys/scsi/generic/sas.h>
 #include <sys/scsi/impl/scsi_sas.h>
 
 #pragma pack(1)
-#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_type.h>
-#include <sys/scsi/adapters/mpt_sas/mpi/mpi2.h>
-#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_cnfg.h>
-#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_init.h>
-#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_ioc.h>
-#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_sas.h>
-#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_tool.h>
-#include <sys/scsi/adapters/mpt_sas/mpi/mpi2_raid.h>
+#include <sys/scsi/adapters/mpt_sas3/mpi/mpi2_type.h>
+#include <sys/scsi/adapters/mpt_sas3/mpi/mpi2.h>
+#include <sys/scsi/adapters/mpt_sas3/mpi/mpi2_cnfg.h>
+#include <sys/scsi/adapters/mpt_sas3/mpi/mpi2_init.h>
+#include <sys/scsi/adapters/mpt_sas3/mpi/mpi2_ioc.h>
+#include <sys/scsi/adapters/mpt_sas3/mpi/mpi2_sas.h>
+#include <sys/scsi/adapters/mpt_sas3/mpi/mpi2_tool.h>
+#include <sys/scsi/adapters/mpt_sas3/mpi/mpi2_raid.h>
 #pragma pack()
 
 /*
  * private header files.
  *
  */
 #include <sys/scsi/impl/scsi_reset_notify.h>
-#include <sys/scsi/adapters/mpt_sas/mptsas_var.h>
-#include <sys/scsi/adapters/mpt_sas/mptsas_ioctl.h>
-#include <sys/scsi/adapters/mpt_sas/mptsas_smhba.h>
-#include <sys/scsi/adapters/mpt_sas/mptsas_hash.h>
+#include <sys/scsi/adapters/mpt_sas3/mptsas3_var.h>
+#include <sys/scsi/adapters/mpt_sas3/mptsas3_ioctl.h>
+#include <sys/scsi/adapters/mpt_sas3/mptsas3_smhba.h>
+#include <sys/scsi/adapters/mpt_sas3/mptsas3_hash.h>
 #include <sys/raidioctl.h>
 
 #include <sys/fs/dv_node.h>     /* devfs_clean */
 
 /*

@@ -142,13 +144,15 @@
 static int mptsas_smp_setup(mptsas_t *mpt);
 static void mptsas_smp_teardown(mptsas_t *mpt);
 static int mptsas_cache_create(mptsas_t *mpt);
 static void mptsas_cache_destroy(mptsas_t *mpt);
 static int mptsas_alloc_request_frames(mptsas_t *mpt);
+static int mptsas_alloc_sense_bufs(mptsas_t *mpt);
 static int mptsas_alloc_reply_frames(mptsas_t *mpt);
 static int mptsas_alloc_free_queue(mptsas_t *mpt);
 static int mptsas_alloc_post_queue(mptsas_t *mpt);
+static void mptsas_free_post_queue(mptsas_t *mpt);
 static void mptsas_alloc_reply_args(mptsas_t *mpt);
 static int mptsas_alloc_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd);
 static void mptsas_free_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd);
 static int mptsas_init_chip(mptsas_t *mpt, int first_time);
 

@@ -199,13 +203,18 @@
 static void mptsas_free_handshake_msg(mptsas_t *mpt);
 
 static void mptsas_ncmds_checkdrain(void *arg);
 
 static int mptsas_prepare_pkt(mptsas_cmd_t *cmd);
-static int mptsas_accept_pkt(mptsas_t *mpt, mptsas_cmd_t *sp);
-static int mptsas_accept_txwq_and_pkt(mptsas_t *mpt, mptsas_cmd_t *sp);
-static void mptsas_accept_tx_waitq(mptsas_t *mpt);
+static void mptsas_retry_pkt(mptsas_t *mpt, mptsas_cmd_t *sp);
+static int mptsas_save_cmd_to_slot(mptsas_t *mpt, mptsas_cmd_t *cmd);
+static int mptsas_accept_pkt(mptsas_t *mpt, mptsas_cmd_t *sp,
+    int *tran_rval);
+static void mptsas_accept_tx_waitqs(mptsas_t *mpt);
+static void mptsas_unblock_tx_waitqs(mptsas_t *mpt);
+static void mptsas_drain_tx_waitq(mptsas_t *mpt, mptsas_tx_waitqueue_t *txwq);
+static int mptsas_check_targ_intxtion(mptsas_target_t *ptgt, int cmd_pkt_flags);
 
 static int mptsas_do_detach(dev_info_t *dev);
 static int mptsas_do_scsi_reset(mptsas_t *mpt, uint16_t devhdl);
 static int mptsas_do_scsi_abort(mptsas_t *mpt, int target, int lun,
     struct scsi_pkt *pkt);

@@ -225,28 +234,28 @@
         uint8_t tasktype);
 static void mptsas_set_pkt_reason(mptsas_t *mpt, mptsas_cmd_t *cmd,
     uchar_t reason, uint_t stat);
 
 static uint_t mptsas_intr(caddr_t arg1, caddr_t arg2);
-static void mptsas_process_intr(mptsas_t *mpt,
+static void mptsas_process_intr(mptsas_t *mpt, mptsas_reply_pqueue_t *rpqp,
     pMpi2ReplyDescriptorsUnion_t reply_desc_union);
 static void mptsas_handle_scsi_io_success(mptsas_t *mpt,
-    pMpi2ReplyDescriptorsUnion_t reply_desc);
+    mptsas_reply_pqueue_t *rpqp, pMpi2ReplyDescriptorsUnion_t reply_desc);
 static void mptsas_handle_address_reply(mptsas_t *mpt,
     pMpi2ReplyDescriptorsUnion_t reply_desc);
 static int mptsas_wait_intr(mptsas_t *mpt, int polltime);
 static void mptsas_sge_setup(mptsas_t *mpt, mptsas_cmd_t *cmd,
     uint32_t *control, pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl);
 
 static void mptsas_watch(void *arg);
 static void mptsas_watchsubr(mptsas_t *mpt);
-static void mptsas_cmd_timeout(mptsas_t *mpt, uint16_t devhdl);
+static void mptsas_cmd_timeout(mptsas_t *mpt, mptsas_target_t *ptgt);
 
 static void mptsas_start_passthru(mptsas_t *mpt, mptsas_cmd_t *cmd);
 static int mptsas_do_passthru(mptsas_t *mpt, uint8_t *request, uint8_t *reply,
     uint8_t *data, uint32_t request_size, uint32_t reply_size,
-    uint32_t data_size, uint32_t direction, uint8_t *dataout,
+    uint32_t data_size, uint8_t direction, uint8_t *dataout,
     uint32_t dataout_size, short timeout, int mode);
 static int mptsas_free_devhdl(mptsas_t *mpt, uint16_t devhdl);
 
 static uint8_t mptsas_get_fw_diag_buffer_number(mptsas_t *mpt,
     uint32_t unique_id);

@@ -297,28 +306,36 @@
 static int mptsas_start_cmd(mptsas_t *mpt, mptsas_cmd_t *cmd);
 
 static void mptsas_restart_hba(mptsas_t *mpt);
 static void mptsas_restart_waitq(mptsas_t *mpt);
 
-static void mptsas_deliver_doneq_thread(mptsas_t *mpt);
+static void mptsas_deliver_doneq_thread(mptsas_t *mpt,
+    mptsas_done_list_t *dlist);
 static void mptsas_doneq_add(mptsas_t *mpt, mptsas_cmd_t *cmd);
-static void mptsas_doneq_mv(mptsas_t *mpt, uint64_t t);
+static void mptsas_rpdoneq_add(mptsas_t *mpt, mptsas_reply_pqueue_t *rpqp,
+    mptsas_cmd_t *cmd);
+static void mptsas_doneq_mv(mptsas_done_list_t *from,
+    mptsas_doneq_thread_list_t *item);
 
 static mptsas_cmd_t *mptsas_doneq_thread_rm(mptsas_t *mpt, uint64_t t);
 static void mptsas_doneq_empty(mptsas_t *mpt);
-static void mptsas_doneq_thread(mptsas_doneq_thread_arg_t *arg);
+static void mptsas_rpdoneq_empty(mptsas_reply_pqueue_t *rpqp);
+static void mptsas_doneq_thread(mptsas_thread_arg_t *arg);
+static void mptsas_tx_waitq_thread(mptsas_thread_arg_t *arg);
 
 static mptsas_cmd_t *mptsas_waitq_rm(mptsas_t *mpt);
 static void mptsas_waitq_delete(mptsas_t *mpt, mptsas_cmd_t *cmd);
-static mptsas_cmd_t *mptsas_tx_waitq_rm(mptsas_t *mpt);
-static void mptsas_tx_waitq_delete(mptsas_t *mpt, mptsas_cmd_t *cmd);
-
 
 static void mptsas_start_watch_reset_delay();
 static void mptsas_setup_bus_reset_delay(mptsas_t *mpt);
 static void mptsas_watch_reset_delay(void *arg);
 static int mptsas_watch_reset_delay_subr(mptsas_t *mpt);
+static void mptsas_set_throttle(struct mptsas *mpt, mptsas_target_t *ptgt,
+    int what);
+static void mptsas_set_throttle_mtx(struct mptsas *mpt, mptsas_target_t *ptgt,
+    int what);
+static void mptsas_remove_cmd_nomtx(mptsas_t *mpt, mptsas_cmd_t *cmd);
 
 /*
  * helper functions
  */
 static void mptsas_dump_cmd(mptsas_t *mpt, mptsas_cmd_t *cmd);

@@ -367,11 +384,13 @@
     dev_info_t **lundip);
 static int mptsas_config_one_phy(dev_info_t *pdip, uint8_t phy, int lun,
     dev_info_t **lundip);
 
 static int mptsas_config_target(dev_info_t *pdip, mptsas_target_t *ptgt);
-static int mptsas_offline_target(dev_info_t *pdip, char *name);
+static int mptsas_offline_targetdev(dev_info_t *pdip, char *name);
+static void mptsas_offline_target(mptsas_t *mpt, mptsas_target_t *ptgt,
+    uint8_t topo_flags, dev_info_t *parent);
 
 static int mptsas_config_raid(dev_info_t *pdip, uint16_t target,
     dev_info_t **dip);
 
 static int mptsas_config_luns(dev_info_t *pdip, mptsas_target_t *ptgt);

@@ -423,12 +442,18 @@
  * MPT MSI tunable:
  *
  * By default MSI is enabled on all supported platforms.
  */
 boolean_t mptsas_enable_msi = B_TRUE;
+boolean_t mptsas_enable_msix = B_TRUE;
 boolean_t mptsas_physical_bind_failed_page_83 = B_FALSE;
 
+/*
+ * Global switch for use of MPI2.5 FAST PATH.
+ */
+boolean_t mptsas3_use_fastpath = B_TRUE;
+
 static int mptsas_register_intrs(mptsas_t *);
 static void mptsas_unregister_intrs(mptsas_t *);
 static int mptsas_add_intrs(mptsas_t *, int);
 static void mptsas_rem_intrs(mptsas_t *);
 

@@ -438,10 +463,18 @@
 static void mptsas_fm_init(mptsas_t *mpt);
 static void mptsas_fm_fini(mptsas_t *mpt);
 static int mptsas_fm_error_cb(dev_info_t *, ddi_fm_error_t *, const void *);
 
 extern pri_t minclsyspri, maxclsyspri;
+/*
+ * NCPUS is used to determine some optimal configurations for number
+ * of threads created to perform specific jobs. If we are invoked because
+ * a disk is part of the root file system ncpus may still be 1 so check
+ * boot_ncpus as well.
+ */
+extern int ncpus, boot_ncpus;
+#define NCPUS   max(ncpus, boot_ncpus)
 
 /*
  * This device is created by the SCSI pseudo nexus driver (SCSI vHCI).  It is
  * under this device that the paths to a physical device are created when
  * MPxIO is used.

@@ -453,10 +486,17 @@
  * By default the value is 30 seconds.
  */
 int mptsas_inq83_retry_timeout = 30;
 
 /*
+ * Tunable for default SCSI pkt timeout. Defaults to 5 seconds, which should
+ * be plenty for INQUIRY and REPORT_LUNS, which are the only commands currently
+ * issued by mptsas directly.
+ */
+int mptsas_scsi_pkt_time = 5;
+
+/*
  * This is used to allocate memory for message frame storage, not for
  * data I/O DMA. All message frames must be stored in the first 4G of
  * physical memory.
  */
 ddi_dma_attr_t mptsas_dma_attrs = {

@@ -488,11 +528,11 @@
         1,              /* minxfer - gran. of DMA engine        */
         0x00ffffffull,  /* maxxfer - gran. of DMA engine        */
         0xffffffffull,  /* max segment size (DMA boundary)      */
         MPTSAS_MAX_DMA_SEGS, /* scatter/gather list length      */
         512,            /* granularity - device transfer size   */
-        DDI_DMA_RELAXED_ORDERING        /* flags, enable relaxed ordering */
+        0               /* flags, set to 0 */
 };
 
 ddi_device_acc_attr_t mptsas_dev_attr = {
         DDI_DEVICE_ATTR_V1,
         DDI_STRUCTURE_LE_ACC,

@@ -543,11 +583,11 @@
         mptsas_quiesce          /* quiesce */
 #endif  /* __sparc */
 };
 
 
-#define MPTSAS_MOD_STRING "MPTSAS HBA Driver 00.00.00.24"
+#define MPTSAS_MOD_STRING "MPTSAS3 HBA Driver 00.00.01"
 
 static struct modldrv modldrv = {
         &mod_driverops, /* Type of module. This one is a driver */
         MPTSAS_MOD_STRING, /* Name of the module. */
         &mptsas_ops,    /* driver ops */

@@ -566,16 +606,23 @@
 
 /*
  * Local static data
  */
 #if defined(MPTSAS_DEBUG)
-uint32_t mptsas_debug_flags = 0;
+uint32_t mptsas_debug_flags = 0x0;
+/*
+ * Flags to ignore these messages in local debug ring buffer.
+ * Default is to ignore the watchsubr() output which normally happens
+ * every second.
+ */
+uint32_t mptsas_dbglog_imask = 0x40000000;
+uint32_t mptsas_test_timeout = 0;
 #endif  /* defined(MPTSAS_DEBUG) */
 uint32_t mptsas_debug_resets = 0;
 
 static kmutex_t         mptsas_global_mutex;
-static void             *mptsas_state;          /* soft state ptr */
+static void             *mptsas3_state;         /* soft state ptr */
 static krwlock_t        mptsas_global_rwlock;
 
 static kmutex_t         mptsas_log_mutex;
 static char             mptsas_log_buf[256];
 _NOTE(MUTEX_PROTECTS_DATA(mptsas_log_mutex, mptsas_log_buf))

@@ -584,10 +631,39 @@
 static clock_t mptsas_scsi_watchdog_tick;
 static clock_t mptsas_tick;
 static timeout_id_t mptsas_reset_watch;
 static timeout_id_t mptsas_timeout_id;
 static int mptsas_timeouts_enabled = 0;
+
+/*
+ * Maximum number of MSI-X interrupts any instance of mptsas3 can use.
+ * Note that if you want to increase this you may have to also bump the
+ * value of ddi_msix_alloc_limit which defaults to 8.
+ * Set to zero to fall back to other interrupt types.
+ */
+int mptsas3_max_msix_intrs = 8;
+
+/*
+ * Default length for extended auto request sense buffers.
+ * All sense buffers need to be under the same alloc because there
+ * is only one common top 32bits (of 64bits) address register.
+ * Most requests only require 32 bytes, but some request >256.
+ * We use rmalloc()/rmfree() on this additional memory to manage the
+ * "extended" requests.
+ */
+int mptsas_extreq_sense_bufsize = 256*64;
+
+/*
+ * Believe that all software resrictions of having to run with DMA
+ * attributes to limit allocation to the first 4G are removed.
+ * However, this flag remains to enable quick switchback should suspicious
+ * problems emerge.
+ * Note that scsi_alloc_consistent_buf() does still adhering to allocating
+ * 32 bit addressable memory, but we can cope if that is changed now.
+ */
+int mptsas_use_64bit_msgaddr = 1;
+
 /*
  * warlock directives
  */
 _NOTE(SCHEME_PROTECTS_DATA("unique per pkt", scsi_pkt \
         mptsas_cmd NcrTableIndirect buf scsi_cdb scsi_status))

@@ -617,18 +693,18 @@
         /* CONSTCOND */
         ASSERT(NO_COMPETING_THREADS);
 
         NDBG0(("_init"));
 
-        status = ddi_soft_state_init(&mptsas_state, MPTSAS_SIZE,
+        status = ddi_soft_state_init(&mptsas3_state, MPTSAS_SIZE,
             MPTSAS_INITIAL_SOFT_SPACE);
         if (status != 0) {
                 return (status);
         }
 
         if ((status = scsi_hba_init(&modlinkage)) != 0) {
-                ddi_soft_state_fini(&mptsas_state);
+                ddi_soft_state_fini(&mptsas3_state);
                 return (status);
         }
 
         mutex_init(&mptsas_global_mutex, NULL, MUTEX_DRIVER, NULL);
         rw_init(&mptsas_global_rwlock, NULL, RW_DRIVER, NULL);

@@ -636,11 +712,11 @@
 
         if ((status = mod_install(&modlinkage)) != 0) {
                 mutex_destroy(&mptsas_log_mutex);
                 rw_destroy(&mptsas_global_rwlock);
                 mutex_destroy(&mptsas_global_mutex);
-                ddi_soft_state_fini(&mptsas_state);
+                ddi_soft_state_fini(&mptsas3_state);
                 scsi_hba_fini(&modlinkage);
         }
 
         return (status);
 }

@@ -657,11 +733,11 @@
         ASSERT(NO_COMPETING_THREADS);
 
         NDBG0(("_fini"));
 
         if ((status = mod_remove(&modlinkage)) == 0) {
-                ddi_soft_state_fini(&mptsas_state);
+                ddi_soft_state_fini(&mptsas3_state);
                 scsi_hba_fini(&modlinkage);
                 mutex_destroy(&mptsas_global_mutex);
                 rw_destroy(&mptsas_global_rwlock);
                 mutex_destroy(&mptsas_log_mutex);
         }

@@ -763,10 +839,11 @@
         mptsas_target_t *tp;
         mptsas_smp_t *sp;
 
         for (tp = refhash_first(mpt->m_targets); tp != NULL;
             tp = refhash_next(mpt->m_targets, tp)) {
+                mutex_destroy(&tp->m_t_mutex);
                 refhash_remove(mpt->m_targets, tp);
         }
         for (sp = refhash_first(mpt->m_smp_targets); sp != NULL;
             sp = refhash_next(mpt->m_smp_targets, sp)) {
                 refhash_remove(mpt->m_smp_targets, sp);

@@ -797,11 +874,11 @@
         uint16_t                attached_devhdl = 0;
         uint32_t                dev_info;
         uint64_t                attached_sas_wwn;
         uint16_t                dev_hdl;
         uint16_t                pdev_hdl;
-        uint16_t                bay_num, enclosure;
+        uint16_t                bay_num, enclosure, io_flags;
         char                    attached_wwnstr[MPTSAS_WWN_STRLEN];
 
         /* CONSTCOND */
         ASSERT(NO_COMPETING_THREADS);
 

@@ -959,11 +1036,11 @@
         mutex_enter(&mpt->m_mutex);
         page_address = (MPI2_SAS_DEVICE_PGAD_FORM_HANDLE &
             MPI2_SAS_DEVICE_PGAD_FORM_MASK) | (uint32_t)attached_devhdl;
         rval = mptsas_get_sas_device_page0(mpt, page_address, &dev_hdl,
             &attached_sas_wwn, &dev_info, &phy_port, &phy_id,
-            &pdev_hdl, &bay_num, &enclosure);
+            &pdev_hdl, &bay_num, &enclosure, &io_flags);
         if (rval != DDI_SUCCESS) {
                 mptsas_log(mpt, CE_WARN,
                     "Failed to get device page0 for handle:%d",
                     attached_devhdl);
                 mutex_exit(&mpt->m_mutex);

@@ -1016,20 +1093,21 @@
 static int
 mptsas_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 {
         mptsas_t                *mpt = NULL;
         int                     instance, i, j;
-        int                     doneq_thread_num;
-        char                    intr_added = 0;
+        int                     q_thread_num;
         char                    map_setup = 0;
         char                    config_setup = 0;
         char                    hba_attach_setup = 0;
         char                    smp_attach_setup = 0;
         char                    mutex_init_done = 0;
         char                    event_taskq_create = 0;
         char                    dr_taskq_create = 0;
         char                    doneq_thread_create = 0;
+        char                    txwq_thread_create = 0;
+        char                    added_watchdog = 0;
         scsi_hba_tran_t         *hba_tran;
         uint_t                  mem_bar = MEM_SPACE;
         int                     rval = DDI_FAILURE;
 
         /* CONSTCOND */

@@ -1140,17 +1218,17 @@
         instance = ddi_get_instance(dip);
 
         /*
          * Allocate softc information.
          */
-        if (ddi_soft_state_zalloc(mptsas_state, instance) != DDI_SUCCESS) {
+        if (ddi_soft_state_zalloc(mptsas3_state, instance) != DDI_SUCCESS) {
                 mptsas_log(NULL, CE_WARN,
                     "mptsas%d: cannot allocate soft state", instance);
                 goto fail;
         }
 
-        mpt = ddi_get_soft_state(mptsas_state, instance);
+        mpt = ddi_get_soft_state(mptsas3_state, instance);
 
         if (mpt == NULL) {
                 mptsas_log(NULL, CE_WARN,
                     "mptsas%d: cannot get soft state", instance);
                 goto fail;

@@ -1162,15 +1240,24 @@
         mpt->m_dip = dip;
         mpt->m_instance = instance;
 
         /* Make a per-instance copy of the structures */
         mpt->m_io_dma_attr = mptsas_dma_attrs64;
+        if (mptsas_use_64bit_msgaddr) {
+                mpt->m_msg_dma_attr = mptsas_dma_attrs64;
+        } else {
         mpt->m_msg_dma_attr = mptsas_dma_attrs;
+        }
         mpt->m_reg_acc_attr = mptsas_dev_attr;
         mpt->m_dev_acc_attr = mptsas_dev_attr;
 
         /*
+         * Round down the arq sense buffer size to nearest 16 bytes.
+         */
+        mpt->m_req_sense_size = EXTCMDS_STATUS_SIZE;
+
+        /*
          * Initialize FMA
          */
         mpt->m_fm_capabilities = ddi_getprop(DDI_DEV_T_ANY, mpt->m_dip,
             DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "fm-capable",
             DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |

@@ -1220,22 +1307,67 @@
                     "failed");
                 goto fail;
         }
         dr_taskq_create++;
 
+        cv_init(&mpt->m_qthread_cv, NULL, CV_DRIVER, NULL);
+        mutex_init(&mpt->m_qthread_mutex, NULL, MUTEX_DRIVER, NULL);
+
+        i = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+            0, "mptsas_enable_txwq_prop", NCPUS > 1);
+        if (i) {
+                mpt->m_txwq_thread_n = NUM_TX_WAITQ;
+                mpt->m_txwq_enabled = FALSE;
+                if (ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+                    0, "mptsas_allow_txwq_jumping", 0)) {
+                        mpt->m_txwq_allow_q_jumping = TRUE;
+                }
+                i = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
+                    0, "mptsas_txwq_threashold_prop", 80000);
+                mpt->m_txwq_thread_threshold = (uint16_t)i;
+        } else {
+                mpt->m_txwq_thread_n = 0;
+                mpt->m_txwq_enabled = FALSE;
+        }
+
+        if (mpt->m_txwq_thread_n) {
+                mutex_enter(&mpt->m_qthread_mutex);
+                for (j = 0; j < NUM_TX_WAITQ; j++) {
+                        mutex_init(&mpt->m_tx_waitq[j].txwq_mutex, NULL,
+                            MUTEX_DRIVER,
+                            NULL);
+                        cv_init(&mpt->m_tx_waitq[j].txwq_cv, NULL, CV_DRIVER,
+                            NULL);
+                        cv_init(&mpt->m_tx_waitq[j].txwq_drain_cv, NULL,
+                            CV_DRIVER, NULL);
+                        mpt->m_tx_waitq[j].txwq_active = TRUE;
+                        mpt->m_tx_waitq[j].txwq_draining = FALSE;
+                        mpt->m_tx_waitq[j].txwq_cmdq = NULL;
+                        mpt->m_tx_waitq[j].txwq_qtail =
+                            &mpt->m_tx_waitq[j].txwq_cmdq;
+                        mutex_enter(&mpt->m_tx_waitq[j].txwq_mutex);
+                        mpt->m_tx_waitq[j].arg.mpt = mpt;
+                        mpt->m_tx_waitq[j].arg.t = j;
+                        mpt->m_tx_waitq[j].txwq_threadp =
+                            thread_create(NULL, 0, mptsas_tx_waitq_thread,
+                            &mpt->m_tx_waitq[j].arg,
+                            0, &p0, TS_RUN, maxclsyspri - 10);
+                        mutex_exit(&mpt->m_tx_waitq[j].txwq_mutex);
+                }
+                mutex_exit(&mpt->m_qthread_mutex);
+                txwq_thread_create++;
+        }
+
         mpt->m_doneq_thread_threshold = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
             0, "mptsas_doneq_thread_threshold_prop", 10);
         mpt->m_doneq_length_threshold = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
             0, "mptsas_doneq_length_threshold_prop", 8);
         mpt->m_doneq_thread_n = ddi_prop_get_int(DDI_DEV_T_ANY, dip,
-            0, "mptsas_doneq_thread_n_prop", 8);
+            0, "mptsas_doneq_thread_n_prop", min(NCPUS, 8));
 
         if (mpt->m_doneq_thread_n) {
-                cv_init(&mpt->m_doneq_thread_cv, NULL, CV_DRIVER, NULL);
-                mutex_init(&mpt->m_doneq_mutex, NULL, MUTEX_DRIVER, NULL);
-
-                mutex_enter(&mpt->m_doneq_mutex);
+                mutex_enter(&mpt->m_qthread_mutex);
                 mpt->m_doneq_thread_id =
                     kmem_zalloc(sizeof (mptsas_doneq_thread_list_t)
                     * mpt->m_doneq_thread_n, KM_SLEEP);
 
                 for (j = 0; j < mpt->m_doneq_thread_n; j++) {

@@ -1249,47 +1381,47 @@
                         mpt->m_doneq_thread_id[j].arg.mpt = mpt;
                         mpt->m_doneq_thread_id[j].arg.t = j;
                         mpt->m_doneq_thread_id[j].threadp =
                             thread_create(NULL, 0, mptsas_doneq_thread,
                             &mpt->m_doneq_thread_id[j].arg,
-                            0, &p0, TS_RUN, minclsyspri);
-                        mpt->m_doneq_thread_id[j].donetail =
-                            &mpt->m_doneq_thread_id[j].doneq;
+                            0, &p0, TS_RUN, maxclsyspri - 10);
+                        mpt->m_doneq_thread_id[j].dlist.dl_tail =
+                            &mpt->m_doneq_thread_id[j].dlist.dl_q;
                         mutex_exit(&mpt->m_doneq_thread_id[j].mutex);
                 }
-                mutex_exit(&mpt->m_doneq_mutex);
+                mutex_exit(&mpt->m_qthread_mutex);
                 doneq_thread_create++;
         }
 
-        /* Initialize mutex used in interrupt handler */
-        mutex_init(&mpt->m_mutex, NULL, MUTEX_DRIVER,
-            DDI_INTR_PRI(mpt->m_intr_pri));
+        /*
+         * Disable hardware interrupt since we're not ready to
+         * handle it yet.
+         */
+        MPTSAS_DISABLE_INTR(mpt);
+
+        /*
+         * Initialize mutex used in interrupt handler.
+         * We don't support hi-level so the mutex's are all adaptive
+         * and we don't want to register the interrupts until we get
+         * the chip type information from _init_chip() below.
+         * Otherwise we would use DDI_INTR_PRI(mpt->m_intr_pri)
+         * rather than NULL in the mutex_init() calls.
+         */
+        mutex_init(&mpt->m_mutex, NULL, MUTEX_DRIVER, NULL);
         mutex_init(&mpt->m_passthru_mutex, NULL, MUTEX_DRIVER, NULL);
-        mutex_init(&mpt->m_tx_waitq_mutex, NULL, MUTEX_DRIVER,
-            DDI_INTR_PRI(mpt->m_intr_pri));
         for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
                 mutex_init(&mpt->m_phy_info[i].smhba_info.phy_mutex,
-                    NULL, MUTEX_DRIVER,
-                    DDI_INTR_PRI(mpt->m_intr_pri));
+                    NULL, MUTEX_DRIVER, NULL);
         }
 
         cv_init(&mpt->m_cv, NULL, CV_DRIVER, NULL);
         cv_init(&mpt->m_passthru_cv, NULL, CV_DRIVER, NULL);
         cv_init(&mpt->m_fw_cv, NULL, CV_DRIVER, NULL);
         cv_init(&mpt->m_config_cv, NULL, CV_DRIVER, NULL);
         cv_init(&mpt->m_fw_diag_cv, NULL, CV_DRIVER, NULL);
         mutex_init_done++;
 
-        /*
-         * Disable hardware interrupt since we're not ready to
-         * handle it yet.
-         */
-        MPTSAS_DISABLE_INTR(mpt);
-        if (mptsas_register_intrs(mpt) == FALSE)
-                goto fail;
-        intr_added++;
-
         mutex_enter(&mpt->m_mutex);
         /*
          * Initialize power management component
          */
         if (mpt->m_options & MPTSAS_OPT_PM) {

@@ -1330,10 +1462,23 @@
                 mptsas_log(mpt, CE_WARN,
                     "mptsas_get_manufacture_page0 failed!");
                 goto fail;
         }
 
+        /*
+         * If we only have one interrupt the default for doneq_thread_threshold
+         * should be 0 so that all completion processing goes to the threads.
+         * Only change it if it wasn't set from .conf file.
+         */
+        if (mpt->m_doneq_thread_n != 0 &&
+            ddi_prop_exists(DDI_DEV_T_ANY, dip,
+            0, "mptsas_doneq_length_threshold_prop") == 0 &&
+            mpt->m_intr_cnt == 1) {
+                mpt->m_doneq_length_threshold = 0;
+        }
+
+
         mutex_exit(&mpt->m_mutex);
 
         /*
          * Register the iport for multiple port HBA
          */

@@ -1363,14 +1508,12 @@
         }
 
         /*
          * Initialize the wait and done FIFO queue
          */
-        mpt->m_donetail = &mpt->m_doneq;
+        mpt->m_dlist.dl_tail = &mpt->m_dlist.dl_q;
         mpt->m_waitqtail = &mpt->m_waitq;
-        mpt->m_tx_waitqtail = &mpt->m_tx_waitq;
-        mpt->m_tx_draining = 0;
 
         /*
          * ioc cmd queue initialize
          */
         mpt->m_ioc_event_cmdtail = &mpt->m_ioc_event_cmdq;

@@ -1387,21 +1530,44 @@
                 goto fail;
         }
         mutex_exit(&mpt->m_mutex);
 
         /*
-         * Initialize PHY info for smhba
+         * used for mptsas_watch
+         */
+        mptsas_list_add(mpt);
+
+        mutex_enter(&mptsas_global_mutex);
+        if (mptsas_timeouts_enabled == 0) {
+                mptsas_scsi_watchdog_tick = ddi_prop_get_int(DDI_DEV_T_ANY,
+                    dip, 0, "scsi-watchdog-tick", DEFAULT_WD_TICK);
+
+                mptsas_tick = mptsas_scsi_watchdog_tick *
+                    drv_usectohz((clock_t)1000000);
+
+                mptsas_timeout_id = timeout(mptsas_watch, NULL, mptsas_tick);
+                mptsas_timeouts_enabled = 1;
+        }
+        mutex_exit(&mptsas_global_mutex);
+        added_watchdog++;
+
+        /*
+         * Initialize PHY info for smhba.
+         * This requires watchdog to be enabled otherwise if interrupts
+         * don't work the system will hang.
          */
         if (mptsas_smhba_setup(mpt)) {
                 mptsas_log(mpt, CE_WARN, "mptsas phy initialization "
                     "failed");
                 goto fail;
         }
 
         /* Check all dma handles allocated in attach */
         if ((mptsas_check_dma_handle(mpt->m_dma_req_frame_hdl)
             != DDI_SUCCESS) ||
+            (mptsas_check_dma_handle(mpt->m_dma_req_sense_hdl)
+            != DDI_SUCCESS) ||
             (mptsas_check_dma_handle(mpt->m_dma_reply_frame_hdl)
             != DDI_SUCCESS) ||
             (mptsas_check_dma_handle(mpt->m_dma_free_queue_hdl)
             != DDI_SUCCESS) ||
             (mptsas_check_dma_handle(mpt->m_dma_post_queue_hdl)

@@ -1413,10 +1579,12 @@
 
         /* Check all acc handles allocated in attach */
         if ((mptsas_check_acc_handle(mpt->m_datap) != DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_req_frame_hdl)
             != DDI_SUCCESS) ||
+            (mptsas_check_acc_handle(mpt->m_acc_req_sense_hdl)
+            != DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_reply_frame_hdl)
             != DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_free_queue_hdl)
             != DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_post_queue_hdl)

@@ -1429,27 +1597,10 @@
         }
 
         /*
          * After this point, we are not going to fail the attach.
          */
-        /*
-         * used for mptsas_watch
-         */
-        mptsas_list_add(mpt);
-
-        mutex_enter(&mptsas_global_mutex);
-        if (mptsas_timeouts_enabled == 0) {
-                mptsas_scsi_watchdog_tick = ddi_prop_get_int(DDI_DEV_T_ANY,
-                    dip, 0, "scsi-watchdog-tick", DEFAULT_WD_TICK);
-
-                mptsas_tick = mptsas_scsi_watchdog_tick *
-                    drv_usectohz((clock_t)1000000);
-
-                mptsas_timeout_id = timeout(mptsas_watch, NULL, mptsas_tick);
-                mptsas_timeouts_enabled = 1;
-        }
-        mutex_exit(&mptsas_global_mutex);
 
         /* Print message of HBA present */
         ddi_report_dev(dip);
 
         /* report idle status to pm framework */

@@ -1462,10 +1613,13 @@
 fail:
         mptsas_log(mpt, CE_WARN, "attach failed");
         mptsas_fm_ereport(mpt, DDI_FM_DEVICE_NO_RESPONSE);
         ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_LOST);
         if (mpt) {
+                /* deallocate in reverse order */
+                if (added_watchdog) {
+                        mptsas_list_del(mpt);
                 mutex_enter(&mptsas_global_mutex);
 
                 if (mptsas_timeout_id && (mptsas_head == NULL)) {
                         timeout_id_t tid = mptsas_timeout_id;
                         mptsas_timeouts_enabled = 0;

@@ -1473,11 +1627,12 @@
                         mutex_exit(&mptsas_global_mutex);
                         (void) untimeout(tid);
                         mutex_enter(&mptsas_global_mutex);
                 }
                 mutex_exit(&mptsas_global_mutex);
-                /* deallocate in reverse order */
+                }
+
                 mptsas_cache_destroy(mpt);
 
                 if (smp_attach_setup) {
                         mptsas_smp_teardown(mpt);
                 }

@@ -1491,53 +1646,71 @@
                         refhash_destroy(mpt->m_smp_targets);
 
                 if (mpt->m_active) {
                         mptsas_free_active_slots(mpt);
                 }
-                if (intr_added) {
+                if (mpt->m_intr_cnt) {
                         mptsas_unregister_intrs(mpt);
                 }
 
                 if (doneq_thread_create) {
-                        mutex_enter(&mpt->m_doneq_mutex);
-                        doneq_thread_num = mpt->m_doneq_thread_n;
-                        for (j = 0; j < mpt->m_doneq_thread_n; j++) {
+                        mutex_enter(&mpt->m_qthread_mutex);
+                        q_thread_num = mpt->m_doneq_thread_n;
+                        for (j = 0; j < q_thread_num; j++) {
                                 mutex_enter(&mpt->m_doneq_thread_id[j].mutex);
                                 mpt->m_doneq_thread_id[j].flag &=
                                     (~MPTSAS_DONEQ_THREAD_ACTIVE);
                                 cv_signal(&mpt->m_doneq_thread_id[j].cv);
                                 mutex_exit(&mpt->m_doneq_thread_id[j].mutex);
                         }
                         while (mpt->m_doneq_thread_n) {
-                                cv_wait(&mpt->m_doneq_thread_cv,
-                                    &mpt->m_doneq_mutex);
+                                cv_wait(&mpt->m_qthread_cv,
+                                    &mpt->m_qthread_mutex);
                         }
-                        for (j = 0; j < doneq_thread_num; j++) {
+                        for (j = 0; j < q_thread_num; j++) {
                                 cv_destroy(&mpt->m_doneq_thread_id[j].cv);
                                 mutex_destroy(&mpt->m_doneq_thread_id[j].mutex);
                         }
                         kmem_free(mpt->m_doneq_thread_id,
                             sizeof (mptsas_doneq_thread_list_t)
-                            * doneq_thread_num);
-                        mutex_exit(&mpt->m_doneq_mutex);
-                        cv_destroy(&mpt->m_doneq_thread_cv);
-                        mutex_destroy(&mpt->m_doneq_mutex);
+                            * q_thread_num);
+                        mutex_exit(&mpt->m_qthread_mutex);
+                }
+                if (txwq_thread_create) {
+                        mutex_enter(&mpt->m_qthread_mutex);
+                        q_thread_num = mpt->m_txwq_thread_n;
+                        for (j = 0; j < q_thread_num; j++) {
+                                mutex_enter(&mpt->m_tx_waitq[j].txwq_mutex);
+                                mpt->m_tx_waitq[j].txwq_active = FALSE;
+                                cv_signal(&mpt->m_tx_waitq[j].txwq_cv);
+                                mutex_exit(&mpt->m_tx_waitq[j].txwq_mutex);
+                        }
+                        while (mpt->m_txwq_thread_n) {
+                                cv_wait(&mpt->m_qthread_cv,
+                                    &mpt->m_qthread_mutex);
+                        }
+                        for (j = 0; j < q_thread_num; j++) {
+                                cv_destroy(&mpt->m_tx_waitq[j].txwq_cv);
+                                cv_destroy(&mpt->m_tx_waitq[j].txwq_drain_cv);
+                                mutex_destroy(&mpt->m_tx_waitq[j].txwq_mutex);
+                        }
                 }
                 if (event_taskq_create) {
                         ddi_taskq_destroy(mpt->m_event_taskq);
                 }
                 if (dr_taskq_create) {
                         ddi_taskq_destroy(mpt->m_dr_taskq);
                 }
                 if (mutex_init_done) {
-                        mutex_destroy(&mpt->m_tx_waitq_mutex);
+                        mutex_destroy(&mpt->m_qthread_mutex);
                         mutex_destroy(&mpt->m_passthru_mutex);
                         mutex_destroy(&mpt->m_mutex);
                         for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
                                 mutex_destroy(
                                     &mpt->m_phy_info[i].smhba_info.phy_mutex);
                         }
+                        cv_destroy(&mpt->m_qthread_cv);
                         cv_destroy(&mpt->m_cv);
                         cv_destroy(&mpt->m_passthru_cv);
                         cv_destroy(&mpt->m_fw_cv);
                         cv_destroy(&mpt->m_config_cv);
                         cv_destroy(&mpt->m_fw_diag_cv);

@@ -1551,11 +1724,11 @@
                 }
                 mptsas_free_handshake_msg(mpt);
                 mptsas_hba_fini(mpt);
 
                 mptsas_fm_fini(mpt);
-                ddi_soft_state_free(mptsas_state, instance);
+                ddi_soft_state_free(mptsas3_state, instance);
                 ddi_prop_remove_all(dip);
         }
         return (DDI_FAILURE);
 }
 

@@ -1765,11 +1938,11 @@
         scsi_hba_tran_t *tran;
         int             circ = 0;
         int             circ1 = 0;
         mdi_pathinfo_t  *pip = NULL;
         int             i;
-        int             doneq_thread_num = 0;
+        int             q_thread_num = 0;
 
         NDBG0(("mptsas_do_detach: dip=0x%p", (void *)dip));
 
         if ((tran = ndi_flavorv_get(dip, SCSA_FLAVOR_SCSI_DEVICE)) == NULL)
                 return (DDI_FAILURE);

@@ -1813,11 +1986,11 @@
                 (void) pm_busy_component(dip, 0);
                 if (mpt->m_power_level != PM_LEVEL_D0) {
                         if (pm_raise_power(dip, 0, PM_LEVEL_D0) !=
                             DDI_SUCCESS) {
                                 mptsas_log(mpt, CE_WARN,
-                                    "mptsas%d: Raise power request failed.",
+                                    "mptsas3%d: Raise power request failed.",
                                     mpt->m_instance);
                                 (void) pm_idle_component(dip, 0);
                                 return (DDI_FAILURE);
                         }
                 }

@@ -1837,33 +2010,50 @@
         mptsas_rem_intrs(mpt);
         ddi_taskq_destroy(mpt->m_event_taskq);
         ddi_taskq_destroy(mpt->m_dr_taskq);
 
         if (mpt->m_doneq_thread_n) {
-                mutex_enter(&mpt->m_doneq_mutex);
-                doneq_thread_num = mpt->m_doneq_thread_n;
+                mutex_enter(&mpt->m_qthread_mutex);
+                q_thread_num = mpt->m_doneq_thread_n;
                 for (i = 0; i < mpt->m_doneq_thread_n; i++) {
                         mutex_enter(&mpt->m_doneq_thread_id[i].mutex);
                         mpt->m_doneq_thread_id[i].flag &=
                             (~MPTSAS_DONEQ_THREAD_ACTIVE);
                         cv_signal(&mpt->m_doneq_thread_id[i].cv);
                         mutex_exit(&mpt->m_doneq_thread_id[i].mutex);
                 }
                 while (mpt->m_doneq_thread_n) {
-                        cv_wait(&mpt->m_doneq_thread_cv,
-                            &mpt->m_doneq_mutex);
+                        cv_wait(&mpt->m_qthread_cv,
+                            &mpt->m_qthread_mutex);
                 }
-                for (i = 0;  i < doneq_thread_num; i++) {
+                for (i = 0;  i < q_thread_num; i++) {
                         cv_destroy(&mpt->m_doneq_thread_id[i].cv);
                         mutex_destroy(&mpt->m_doneq_thread_id[i].mutex);
                 }
                 kmem_free(mpt->m_doneq_thread_id,
                     sizeof (mptsas_doneq_thread_list_t)
-                    * doneq_thread_num);
-                mutex_exit(&mpt->m_doneq_mutex);
-                cv_destroy(&mpt->m_doneq_thread_cv);
-                mutex_destroy(&mpt->m_doneq_mutex);
+                    * q_thread_num);
+                mutex_exit(&mpt->m_qthread_mutex);
+        }
+        if (mpt->m_txwq_thread_n) {
+                mutex_enter(&mpt->m_qthread_mutex);
+                q_thread_num = mpt->m_txwq_thread_n;
+                for (i = 0; i < q_thread_num; i++) {
+                        mutex_enter(&mpt->m_tx_waitq[i].txwq_mutex);
+                        mpt->m_tx_waitq[i].txwq_active = FALSE;
+                        cv_signal(&mpt->m_tx_waitq[i].txwq_cv);
+                        mutex_exit(&mpt->m_tx_waitq[i].txwq_mutex);
+                }
+                while (mpt->m_txwq_thread_n) {
+                        cv_wait(&mpt->m_qthread_cv,
+                            &mpt->m_qthread_mutex);
+                }
+                for (i = 0; i < q_thread_num; i++) {
+                        cv_destroy(&mpt->m_tx_waitq[i].txwq_cv);
+                        cv_destroy(&mpt->m_tx_waitq[i].txwq_drain_cv);
+                        mutex_destroy(&mpt->m_tx_waitq[i].txwq_mutex);
+                }
         }
 
         scsi_hba_reset_notify_tear_down(mpt->m_reset_notify_listf);
 
         mptsas_list_del(mpt);

@@ -1940,21 +2130,22 @@
 
         /* Lower the power informing PM Framework */
         if (mpt->m_options & MPTSAS_OPT_PM) {
                 if (pm_lower_power(dip, 0, PM_LEVEL_D3) != DDI_SUCCESS)
                         mptsas_log(mpt, CE_WARN,
-                            "!mptsas%d: Lower power request failed "
+                            "!mptsas3%d: Lower power request failed "
                             "during detach, ignoring.",
                             mpt->m_instance);
         }
 
-        mutex_destroy(&mpt->m_tx_waitq_mutex);
+        mutex_destroy(&mpt->m_qthread_mutex);
         mutex_destroy(&mpt->m_passthru_mutex);
         mutex_destroy(&mpt->m_mutex);
         for (i = 0; i < MPTSAS_MAX_PHYS; i++) {
                 mutex_destroy(&mpt->m_phy_info[i].smhba_info.phy_mutex);
         }
+        cv_destroy(&mpt->m_qthread_cv);
         cv_destroy(&mpt->m_cv);
         cv_destroy(&mpt->m_passthru_cv);
         cv_destroy(&mpt->m_fw_cv);
         cv_destroy(&mpt->m_config_cv);
         cv_destroy(&mpt->m_fw_diag_cv);

@@ -1966,11 +2157,11 @@
         mptsas_config_space_fini(mpt);
 
         mptsas_free_handshake_msg(mpt);
 
         mptsas_fm_fini(mpt);
-        ddi_soft_state_free(mptsas_state, ddi_get_instance(dip));
+        ddi_soft_state_free(mptsas3_state, ddi_get_instance(dip));
         ddi_prop_remove_all(dip);
 
         return (DDI_SUCCESS);
 }
 

@@ -2019,10 +2210,11 @@
 static int
 mptsas_alloc_handshake_msg(mptsas_t *mpt, size_t alloc_size)
 {
         ddi_dma_attr_t  task_dma_attrs;
 
+        mpt->m_hshk_dma_size = 0;
         task_dma_attrs = mpt->m_msg_dma_attr;
         task_dma_attrs.dma_attr_sgllen = 1;
         task_dma_attrs.dma_attr_granular = (uint32_t)(alloc_size);
 
         /* allocate Task Management ddi_dma resources */

@@ -2037,10 +2229,12 @@
 }
 
 static void
 mptsas_free_handshake_msg(mptsas_t *mpt)
 {
+        if (mpt->m_hshk_dma_size == 0)
+                return;
         mptsas_dma_addr_destroy(&mpt->m_hshk_dma_hdl, &mpt->m_hshk_acc_hdl);
         mpt->m_hshk_dma_size = 0;
 }
 
 static int

@@ -2214,13 +2408,13 @@
         char buf[64];
 
         /*
          * create kmem cache for packets
          */
-        (void) sprintf(buf, "mptsas%d_cache", instance);
+        (void) sprintf(buf, "mptsas3%d_cache", instance);
         mpt->m_kmem_cache = kmem_cache_create(buf,
-            sizeof (struct mptsas_cmd) + scsi_pkt_size(), 8,
+            sizeof (struct mptsas_cmd) + scsi_pkt_size(), 16,
             mptsas_kmem_cache_constructor, mptsas_kmem_cache_destructor,
             NULL, (void *)mpt, NULL, 0);
 
         if (mpt->m_kmem_cache == NULL) {
                 mptsas_log(mpt, CE_WARN, "creating kmem cache failed");

@@ -2229,13 +2423,13 @@
 
         /*
          * create kmem cache for extra SGL frames if SGL cannot
          * be accomodated into main request frame.
          */
-        (void) sprintf(buf, "mptsas%d_cache_frames", instance);
+        (void) sprintf(buf, "mptsas3%d_cache_frames", instance);
         mpt->m_cache_frames = kmem_cache_create(buf,
-            sizeof (mptsas_cache_frames_t), 8,
+            sizeof (mptsas_cache_frames_t), 16,
             mptsas_cache_frames_constructor, mptsas_cache_frames_destructor,
             NULL, (void *)mpt, NULL, 0);
 
         if (mpt->m_cache_frames == NULL) {
                 mptsas_log(mpt, CE_WARN, "creating cache for frames failed");

@@ -2271,11 +2465,11 @@
         uint32_t        ioc_status;
 
         if (scsi_hba_iport_unit_address(dip) != 0)
                 return (DDI_SUCCESS);
 
-        mpt = ddi_get_soft_state(mptsas_state, ddi_get_instance(dip));
+        mpt = ddi_get_soft_state(mptsas3_state, ddi_get_instance(dip));
         if (mpt == NULL) {
                 return (DDI_FAILURE);
         }
 
         mutex_enter(&mpt->m_mutex);

@@ -2287,11 +2481,11 @@
                 mutex_exit(&mpt->m_mutex);
                 return (DDI_FAILURE);
         }
         switch (level) {
         case PM_LEVEL_D0:
-                NDBG11(("mptsas%d: turning power ON.", mpt->m_instance));
+                NDBG11(("mptsas3%d: turning power ON.", mpt->m_instance));
                 MPTSAS_POWER_ON(mpt);
                 /*
                  * Wait up to 30 seconds for IOC to come out of reset.
                  */
                 while (((ioc_status = ddi_get32(mpt->m_datap,

@@ -2316,15 +2510,15 @@
                         }
                 }
                 mpt->m_power_level = PM_LEVEL_D0;
                 break;
         case PM_LEVEL_D3:
-                NDBG11(("mptsas%d: turning power OFF.", mpt->m_instance));
+                NDBG11(("mptsas3%d: turning power OFF.", mpt->m_instance));
                 MPTSAS_POWER_OFF(mpt);
                 break;
         default:
-                mptsas_log(mpt, CE_WARN, "mptsas%d: unknown power level <%x>.",
+                mptsas_log(mpt, CE_WARN, "mptsas3%d: unknown power level <%x>.",
                     mpt->m_instance, level);
                 rval = DDI_FAILURE;
                 break;
         }
         mutex_exit(&mpt->m_mutex);

@@ -2498,12 +2692,15 @@
         size_t                  mem_size;
 
         /*
          * re-alloc when it has already alloced
          */
+        if (mpt->m_dma_flags & MPTSAS_REQ_FRAME) {
         mptsas_dma_addr_destroy(&mpt->m_dma_req_frame_hdl,
             &mpt->m_acc_req_frame_hdl);
+                mpt->m_dma_flags &= ~MPTSAS_REQ_FRAME;
+        }
 
         /*
          * The size of the request frame pool is:
          *   Number of Request Frames * Request Frame Size
          */

@@ -2537,10 +2734,85 @@
         /*
          * Clear the request frame pool.
          */
         bzero(mpt->m_req_frame, mem_size);
 
+        mpt->m_dma_flags |= MPTSAS_REQ_FRAME;
+        return (DDI_SUCCESS);
+}
+
+static int
+mptsas_alloc_sense_bufs(mptsas_t *mpt)
+{
+        ddi_dma_attr_t          sense_dma_attrs;
+        caddr_t                 memp;
+        ddi_dma_cookie_t        cookie;
+        size_t                  mem_size;
+        int                     num_extrqsense_bufs;
+
+        /*
+         * re-alloc when it has already alloced
+         */
+        if (mpt->m_dma_flags & MPTSAS_REQ_SENSE) {
+                rmfreemap(mpt->m_erqsense_map);
+                mptsas_dma_addr_destroy(&mpt->m_dma_req_sense_hdl,
+                    &mpt->m_acc_req_sense_hdl);
+                mpt->m_dma_flags &= ~MPTSAS_REQ_SENSE;
+        }
+
+        /*
+         * The size of the request sense pool is:
+         *   (Number of Request Frames - 2 ) * Request Sense Size +
+         *   extra memory for extended sense requests.
+         */
+        mem_size = ((mpt->m_max_requests - 2) * mpt->m_req_sense_size) +
+            mptsas_extreq_sense_bufsize;
+
+        /*
+         * set the DMA attributes.  ARQ buffers
+         * aligned on a 16-byte boundry.
+         */
+        sense_dma_attrs = mpt->m_msg_dma_attr;
+        sense_dma_attrs.dma_attr_align = 16;
+        sense_dma_attrs.dma_attr_sgllen = 1;
+
+        /*
+         * allocate the request sense buffer pool.
+         */
+        if (mptsas_dma_addr_create(mpt, sense_dma_attrs,
+            &mpt->m_dma_req_sense_hdl, &mpt->m_acc_req_sense_hdl, &memp,
+            mem_size, &cookie) == FALSE) {
+                return (DDI_FAILURE);
+        }
+
+        /*
+         * Store the request sense base memory address.  This chip uses this
+         * address to dma the request sense data.  The second
+         * address is the address mpt uses to access the data.
+         * The third is the base for the extended rqsense buffers.
+         */
+        mpt->m_req_sense_dma_addr = cookie.dmac_laddress;
+        mpt->m_req_sense = memp;
+        memp += (mpt->m_max_requests - 2) * mpt->m_req_sense_size;
+        mpt->m_extreq_sense = memp;
+
+        /*
+         * The extra memory is divided up into multiples of the base
+         * buffer size in order to allocate via rmalloc().
+         * Note that the rmallocmap cannot start at zero!
+         */
+        num_extrqsense_bufs = mptsas_extreq_sense_bufsize /
+            mpt->m_req_sense_size;
+        mpt->m_erqsense_map = rmallocmap_wait(num_extrqsense_bufs);
+        rmfree(mpt->m_erqsense_map, num_extrqsense_bufs, 1);
+
+        /*
+         * Clear the pool.
+         */
+        bzero(mpt->m_req_sense, mem_size);
+
+        mpt->m_dma_flags |= MPTSAS_REQ_SENSE;
         return (DDI_SUCCESS);
 }
 
 static int
 mptsas_alloc_reply_frames(mptsas_t *mpt)

@@ -2551,12 +2823,15 @@
         size_t                  mem_size;
 
         /*
          * re-alloc when it has already alloced
          */
+        if (mpt->m_dma_flags & MPTSAS_REPLY_FRAME) {
         mptsas_dma_addr_destroy(&mpt->m_dma_reply_frame_hdl,
             &mpt->m_acc_reply_frame_hdl);
+                mpt->m_dma_flags &= ~MPTSAS_REPLY_FRAME;
+        }
 
         /*
          * The size of the reply frame pool is:
          *   Number of Reply Frames * Reply Frame Size
          */

@@ -2589,10 +2864,11 @@
         /*
          * Clear the reply frame pool.
          */
         bzero(mpt->m_reply_frame, mem_size);
 
+        mpt->m_dma_flags |= MPTSAS_REPLY_FRAME;
         return (DDI_SUCCESS);
 }
 
 static int
 mptsas_alloc_free_queue(mptsas_t *mpt)

@@ -2603,12 +2879,15 @@
         size_t                  mem_size;
 
         /*
          * re-alloc when it has already alloced
          */
+        if (mpt->m_dma_flags & MPTSAS_FREE_QUEUE) {
         mptsas_dma_addr_destroy(&mpt->m_dma_free_queue_hdl,
             &mpt->m_acc_free_queue_hdl);
+                mpt->m_dma_flags &= ~MPTSAS_FREE_QUEUE;
+        }
 
         /*
          * The reply free queue size is:
          *   Reply Free Queue Depth * 4
          * The "4" is the size of one 32 bit address (low part of 64-bit

@@ -2644,44 +2923,73 @@
         /*
          * Clear the reply free queue memory.
          */
         bzero(mpt->m_free_queue, mem_size);
 
+        mpt->m_dma_flags |= MPTSAS_FREE_QUEUE;
         return (DDI_SUCCESS);
 }
 
+static void
+mptsas_free_post_queue(mptsas_t *mpt)
+{
+        mptsas_reply_pqueue_t   *rpqp;
+        int                     i;
+
+        if (mpt->m_dma_flags & MPTSAS_POST_QUEUE) {
+                mptsas_dma_addr_destroy(&mpt->m_dma_post_queue_hdl,
+                    &mpt->m_acc_post_queue_hdl);
+                rpqp = mpt->m_rep_post_queues;
+                for (i = 0; i < mpt->m_post_reply_qcount; i++) {
+                        mutex_destroy(&rpqp->rpq_mutex);
+                        rpqp++;
+                }
+                kmem_free(mpt->m_rep_post_queues,
+                    sizeof (mptsas_reply_pqueue_t) *
+                    mpt->m_post_reply_qcount);
+                mpt->m_dma_flags &= ~MPTSAS_POST_QUEUE;
+        }
+}
+
 static int
 mptsas_alloc_post_queue(mptsas_t *mpt)
 {
         ddi_dma_attr_t          frame_dma_attrs;
         caddr_t                 memp;
         ddi_dma_cookie_t        cookie;
         size_t                  mem_size;
+        mptsas_reply_pqueue_t   *rpqp;
+        int                     i;
 
         /*
          * re-alloc when it has already alloced
          */
-        mptsas_dma_addr_destroy(&mpt->m_dma_post_queue_hdl,
-            &mpt->m_acc_post_queue_hdl);
+        mptsas_free_post_queue(mpt);
 
         /*
          * The reply descriptor post queue size is:
          *   Reply Descriptor Post Queue Depth * 8
          * The "8" is the size of each descriptor (8 bytes or 64 bits).
          */
-        mem_size = mpt->m_post_queue_depth * 8;
+        mpt->m_post_reply_qcount = mpt->m_intr_cnt;
+        mem_size = mpt->m_post_queue_depth * 8 * mpt->m_post_reply_qcount;
 
         /*
          * set the DMA attributes.  The Reply Descriptor Post Queue must be
          * aligned on a 16-byte boundry.
          */
         frame_dma_attrs = mpt->m_msg_dma_attr;
         frame_dma_attrs.dma_attr_align = 16;
         frame_dma_attrs.dma_attr_sgllen = 1;
 
         /*
-         * allocate the reply post queue
+         * Allocate the reply post queue(s).
+         * MPI2.5 introduces a method to allocate multiple queues
+         * using a redirect table. For now stick to one contiguous
+         * chunck. This can get as big as 1Mbyte for 16 queues.
+         * The spec gives no indication that the queue size can be
+         * reduced if you have many of them.
          */
         if (mptsas_dma_addr_create(mpt, frame_dma_attrs,
             &mpt->m_dma_post_queue_hdl, &mpt->m_acc_post_queue_hdl, &memp,
             mem_size, &cookie) == FALSE) {
                 return (DDI_FAILURE);

@@ -2693,24 +3001,40 @@
          * second address is the address mpt uses to manage the queue.
          */
         mpt->m_post_queue_dma_addr = cookie.dmac_laddress;
         mpt->m_post_queue = memp;
 
+        mpt->m_rep_post_queues = kmem_zalloc(sizeof (mptsas_reply_pqueue_t) *
+            mpt->m_post_reply_qcount, KM_SLEEP);
+        rpqp = mpt->m_rep_post_queues;
+        for (i = 0; i < mpt->m_post_reply_qcount; i++) {
+                rpqp->rpq_queue = memp;
+                mutex_init(&rpqp->rpq_mutex, NULL, MUTEX_DRIVER, NULL);
+                rpqp->rpq_dlist.dl_tail = &rpqp->rpq_dlist.dl_q;
+                rpqp->rpq_num = (uint8_t)i;
+                memp += (mpt->m_post_queue_depth * 8);
+                rpqp++;
+        }
+
         /*
          * Clear the reply post queue memory.
          */
         bzero(mpt->m_post_queue, mem_size);
 
+        mpt->m_dma_flags |= MPTSAS_POST_QUEUE;
         return (DDI_SUCCESS);
 }
 
 static void
 mptsas_alloc_reply_args(mptsas_t *mpt)
 {
         if (mpt->m_replyh_args == NULL) {
                 mpt->m_replyh_args = kmem_zalloc(sizeof (m_replyh_arg_t) *
                     mpt->m_max_replies, KM_SLEEP);
+        } else {
+                bzero(mpt->m_replyh_args, sizeof (m_replyh_arg_t) *
+                    mpt->m_max_replies);
         }
 }
 
 static int
 mptsas_alloc_extra_sgl_frame(mptsas_t *mpt, mptsas_cmd_t *cmd)

@@ -2749,21 +3073,32 @@
         NDBG0(("mptsas_hba_fini"));
 
         /*
          * Free up any allocated memory
          */
+        if (mpt->m_dma_flags & MPTSAS_REQ_FRAME) {
         mptsas_dma_addr_destroy(&mpt->m_dma_req_frame_hdl,
             &mpt->m_acc_req_frame_hdl);
+        }
+
+        if (mpt->m_dma_flags & MPTSAS_REQ_SENSE) {
+                rmfreemap(mpt->m_erqsense_map);
+                mptsas_dma_addr_destroy(&mpt->m_dma_req_sense_hdl,
+                    &mpt->m_acc_req_sense_hdl);
+        }
 
+        if (mpt->m_dma_flags & MPTSAS_REPLY_FRAME) {
         mptsas_dma_addr_destroy(&mpt->m_dma_reply_frame_hdl,
             &mpt->m_acc_reply_frame_hdl);
+        }
 
+        if (mpt->m_dma_flags & MPTSAS_FREE_QUEUE) {
         mptsas_dma_addr_destroy(&mpt->m_dma_free_queue_hdl,
             &mpt->m_acc_free_queue_hdl);
+        }
 
-        mptsas_dma_addr_destroy(&mpt->m_dma_post_queue_hdl,
-            &mpt->m_acc_post_queue_hdl);
+        mptsas_free_post_queue(mpt);
 
         if (mpt->m_replyh_args != NULL) {
                 kmem_free(mpt->m_replyh_args, sizeof (m_replyh_arg_t)
                     * mpt->m_max_replies);
         }

@@ -3038,12 +3373,14 @@
 #ifndef __lock_lint
         _NOTE(ARGUNUSED(ap))
 #endif
         mptsas_t        *mpt = PKT2MPT(pkt);
         mptsas_cmd_t    *cmd = PKT2CMD(pkt);
-        int             rval;
+        int             rval, start;
+        uint8_t         pref;
         mptsas_target_t *ptgt = cmd->cmd_tgt_addr;
+        mptsas_tx_waitqueue_t *txwq;
 
         NDBG1(("mptsas_scsi_start: pkt=0x%p", (void *)pkt));
         ASSERT(ptgt);
         if (ptgt == NULL)
                 return (TRAN_FATAL_ERROR);

@@ -3088,118 +3425,86 @@
          * the last cmd in a burst be processed.
          *
          * we enable this feature only when the helper threads are enabled,
          * at which we think the loads are heavy.
          *
-         * per instance mutex m_tx_waitq_mutex is introduced to protect the
-         * m_tx_waitqtail, m_tx_waitq, m_tx_draining.
+         * per instance, per queue mutex m_tx_waitq[i].txwq_mutex is
+         * introduced to protect the txwq_qtail, txwq_cmdq, txwq_len
          */
 
-        if (mpt->m_doneq_thread_n) {
-                if (mutex_tryenter(&mpt->m_mutex) != 0) {
-                        rval = mptsas_accept_txwq_and_pkt(mpt, cmd);
-                        mutex_exit(&mpt->m_mutex);
-                } else if (cmd->cmd_pkt_flags & FLAG_NOINTR) {
+        if (mpt->m_txwq_enabled == TRUE) {
+                int gotmtx = 0;
+
+                if (mpt->m_txwq_allow_q_jumping) {
+                        gotmtx = mutex_tryenter(&mpt->m_mutex);
+                }
+                if (gotmtx == 0) {
+                        /* We didn't get the mutex or didn't try */
+                        if (cmd->cmd_pkt_flags & FLAG_NOINTR) {
                         mutex_enter(&mpt->m_mutex);
-                        rval = mptsas_accept_txwq_and_pkt(mpt, cmd);
-                        mutex_exit(&mpt->m_mutex);
+                                /* Polled commands queue jump */
+                                mptsas_accept_tx_waitqs(mpt);
                 } else {
-                        mutex_enter(&mpt->m_tx_waitq_mutex);
-                        /*
-                         * ptgt->m_dr_flag is protected by m_mutex or
-                         * m_tx_waitq_mutex. In this case, m_tx_waitq_mutex
-                         * is acquired.
-                         */
-                        if (ptgt->m_dr_flag == MPTSAS_DR_INTRANSITION) {
-                                if (cmd->cmd_pkt_flags & FLAG_NOQUEUE) {
-                                        /*
-                                         * The command should be allowed to
-                                         * retry by returning TRAN_BUSY to
-                                         * to stall the I/O's which come from
-                                         * scsi_vhci since the device/path is
-                                         * in unstable state now.
-                                         */
-                                        mutex_exit(&mpt->m_tx_waitq_mutex);
-                                        return (TRAN_BUSY);
+                                rval = mptsas_check_targ_intxtion(
+                                    cmd->cmd_tgt_addr,
+                                    cmd->cmd_pkt_flags);
+                                if (rval != TRAN_ACCEPT) {
+                                        return (rval);
+                                }
+
+                                cmd->cmd_flags |= CFLAG_TXQ;
+                                pref = mpt->m_pref_tx_waitq;
+                                txwq = &mpt->m_tx_waitq[pref];
+
+                                if (mutex_tryenter(&txwq->txwq_mutex) == 0) {
+                                        txwq = &mpt->m_tx_waitq[pref^1];
+                                        mutex_enter(&txwq->txwq_mutex);
                                 } else {
-                                        /*
-                                         * The device is offline, just fail the
-                                         * command by returning
-                                         * TRAN_FATAL_ERROR.
-                                         */
-                                        mutex_exit(&mpt->m_tx_waitq_mutex);
-                                        return (TRAN_FATAL_ERROR);
+                                        pref ^= 1;
+                                        mpt->m_pref_tx_waitq = pref;
                                 }
+
+                                *txwq->txwq_qtail = cmd;
+                                txwq->txwq_qtail = &cmd->cmd_linkp;
+                                txwq->txwq_len++;
+                                if (!txwq->txwq_draining) {
+                                        cv_signal(&txwq->txwq_cv);
                         }
-                        if (mpt->m_tx_draining) {
-                                cmd->cmd_flags |= CFLAG_TXQ;
-                                *mpt->m_tx_waitqtail = cmd;
-                                mpt->m_tx_waitqtail = &cmd->cmd_linkp;
-                                mutex_exit(&mpt->m_tx_waitq_mutex);
-                        } else { /* drain the queue */
-                                mpt->m_tx_draining = 1;
-                                mutex_exit(&mpt->m_tx_waitq_mutex);
-                                mutex_enter(&mpt->m_mutex);
-                                rval = mptsas_accept_txwq_and_pkt(mpt, cmd);
-                                mutex_exit(&mpt->m_mutex);
+                                mutex_exit(&txwq->txwq_mutex);
+                                return (rval);
                         }
                 }
         } else {
                 mutex_enter(&mpt->m_mutex);
-                /*
-                 * ptgt->m_dr_flag is protected by m_mutex or m_tx_waitq_mutex
-                 * in this case, m_mutex is acquired.
-                 */
-                if (ptgt->m_dr_flag == MPTSAS_DR_INTRANSITION) {
-                        if (cmd->cmd_pkt_flags & FLAG_NOQUEUE) {
-                                /*
-                                 * commands should be allowed to retry by
-                                 * returning TRAN_BUSY to stall the I/O's
-                                 * which come from scsi_vhci since the device/
-                                 * path is in unstable state now.
-                                 */
-                                mutex_exit(&mpt->m_mutex);
-                                return (TRAN_BUSY);
-                        } else {
-                                /*
-                                 * The device is offline, just fail the
-                                 * command by returning TRAN_FATAL_ERROR.
-                                 */
-                                mutex_exit(&mpt->m_mutex);
-                                return (TRAN_FATAL_ERROR);
                         }
+        rval = mptsas_check_targ_intxtion(cmd->cmd_tgt_addr,
+            cmd->cmd_pkt_flags);
+        if (rval != TRAN_ACCEPT) {
+                mutex_exit(&mpt->m_mutex);
+                return (rval);
                 }
-                rval = mptsas_accept_pkt(mpt, cmd);
+
+        start = mptsas_accept_pkt(mpt, cmd, &rval);
                 mutex_exit(&mpt->m_mutex);
+        if (start) {
+                (void) mptsas_start_cmd(mpt, cmd);
         }
 
         return (rval);
 }
 
-/*
- * Accept all the queued cmds(if any) before accept the current one.
- */
 static int
-mptsas_accept_txwq_and_pkt(mptsas_t *mpt, mptsas_cmd_t *cmd)
+mptsas_check_targ_intxtion(mptsas_target_t *ptgt, int cmd_pkt_flags)
 {
-        int rval;
-        mptsas_target_t *ptgt = cmd->cmd_tgt_addr;
-
-        ASSERT(mutex_owned(&mpt->m_mutex));
         /*
-         * The call to mptsas_accept_tx_waitq() must always be performed
-         * because that is where mpt->m_tx_draining is cleared.
-         */
-        mutex_enter(&mpt->m_tx_waitq_mutex);
-        mptsas_accept_tx_waitq(mpt);
-        mutex_exit(&mpt->m_tx_waitq_mutex);
-        /*
-         * ptgt->m_dr_flag is protected by m_mutex or m_tx_waitq_mutex
-         * in this case, m_mutex is acquired.
+         * ptgt->m_dr_flag is a variable that is only ever changed by
+         * direct write under the main m_mutex.
+         * It doesn't need a mutex hold to protect this read.
          */
+
         if (ptgt->m_dr_flag == MPTSAS_DR_INTRANSITION) {
-                if (cmd->cmd_pkt_flags & FLAG_NOQUEUE) {
+                if (cmd_pkt_flags & FLAG_NOQUEUE) {
                         /*
                          * The command should be allowed to retry by returning
                          * TRAN_BUSY to stall the I/O's which come from
                          * scsi_vhci since the device/path is in unstable state
                          * now.

@@ -3211,17 +3516,52 @@
                          * return TRAN_FATAL_ERROR.
                          */
                         return (TRAN_FATAL_ERROR);
                 }
         }
-        rval = mptsas_accept_pkt(mpt, cmd);
+        return (TRAN_ACCEPT);
+}
 
-        return (rval);
+/*
+ * Note that this function has a side effect of releasing the
+ * per target mutex.
+ */
+static void
+mptsas_offline_target_direct(mptsas_t *mpt, mptsas_target_t *ptgt)
+{
+        char                            phy_mask_name[MPTSAS_MAX_PHYS];
+        mptsas_phymask_t                phymask = ptgt->m_addr.mta_phymask;
+        dev_info_t                      *parent;
+
+        ASSERT(mutex_owned(&mpt->m_mutex));
+
+        ptgt->m_dr_flag = MPTSAS_DR_INTRANSITION;
+        bzero(phy_mask_name, MPTSAS_MAX_PHYS);
+        (void) sprintf(phy_mask_name, "%x", phymask);
+        parent = scsi_hba_iport_find(mpt->m_dip, phy_mask_name);
+
+        if (parent != NULL) {
+                mptsas_offline_target(mpt, ptgt,
+                    ptgt->m_deviceinfo & DEVINFO_DIRECT_ATTACHED ?
+                    MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE :
+                    MPTSAS_TOPO_FLAG_EXPANDER_ATTACHED_DEVICE,
+                    parent);
+        } else {
+                mptsas_log(mpt, CE_WARN, "Failed to find an "
+                    "iport for \"%s\", should not happen!",
+                    phy_mask_name);
+        }
 }
 
+/*
+ * In order to be efficient with the m_mutex (which can be dropped before
+ * calling mptsas_start_cmd()) indicate if start_cmd should be called via the
+ * returned value (FALSE or TRUE). Caller is then responsible for doing the
+ * right thing with the m_mutex.
+ */
 static int
-mptsas_accept_pkt(mptsas_t *mpt, mptsas_cmd_t *cmd)
+mptsas_accept_pkt(mptsas_t *mpt, mptsas_cmd_t *cmd, int *tran_rval)
 {
         int             rval = TRAN_ACCEPT;
         mptsas_target_t *ptgt = cmd->cmd_tgt_addr;
 
         NDBG1(("mptsas_accept_pkt: cmd=0x%p", (void *)cmd));

@@ -3230,24 +3570,30 @@
 
         if ((cmd->cmd_flags & CFLAG_PREPARED) == 0) {
                 rval = mptsas_prepare_pkt(cmd);
                 if (rval != TRAN_ACCEPT) {
                         cmd->cmd_flags &= ~CFLAG_TRANFLAG;
-                        return (rval);
+                        goto set_tranrval;
                 }
         }
 
         /*
-         * reset the throttle if we were draining
+         * If the command came from the tx wait q it may have slipped
+         * by the check for dr_flag before being added to the queue.
+         * Fail here with abort status.
          */
-        if ((ptgt->m_t_ncmds == 0) &&
-            (ptgt->m_t_throttle == DRAIN_THROTTLE)) {
-                NDBG23(("reset throttle"));
-                ASSERT(ptgt->m_reset_delay == 0);
-                mptsas_set_throttle(mpt, ptgt, MAX_THROTTLE);
+        if (cmd->cmd_flags & CFLAG_TXQ) {
+                rval = mptsas_check_targ_intxtion(cmd->cmd_tgt_addr,
+                    cmd->cmd_pkt_flags);
+                if (rval != TRAN_ACCEPT) {
+                        mptsas_set_pkt_reason(mpt, cmd, CMD_ABORTED,
+                            STAT_ABORTED);
+                        mptsas_doneq_add(mpt, cmd);
+                        mptsas_doneq_empty(mpt);
+                        goto set_tranrval;
+                }
         }
-
         /*
          * If HBA is being reset, the DevHandles are being re-initialized,
          * which means that they could be invalid even if the target is still
          * attached.  Check if being reset and if DevHandle is being
          * re-initialized.  If this is the case, return BUSY so the I/O can be

@@ -3256,53 +3602,72 @@
         if ((ptgt->m_devhdl == MPTSAS_INVALID_DEVHDL) && mpt->m_in_reset) {
                 mptsas_set_pkt_reason(mpt, cmd, CMD_RESET, STAT_BUS_RESET);
                 if (cmd->cmd_flags & CFLAG_TXQ) {
                         mptsas_doneq_add(mpt, cmd);
                         mptsas_doneq_empty(mpt);
-                        return (rval);
                 } else {
-                        return (TRAN_BUSY);
+                        rval = TRAN_BUSY;
                 }
+                goto set_tranrval;
+        }
+
+        mutex_enter(&ptgt->m_t_mutex);
+        /*
+         * reset the throttle if we were draining
+         */
+        if ((ptgt->m_t_ncmds == 0) &&
+            (ptgt->m_t_throttle == DRAIN_THROTTLE)) {
+                NDBG23(("reset throttle"));
+                ASSERT(ptgt->m_reset_delay == 0);
+                mptsas_set_throttle(mpt, ptgt, MAX_THROTTLE);
         }
 
         /*
          * If device handle has already been invalidated, just
-         * fail the command. In theory, command from scsi_vhci
-         * client is impossible send down command with invalid
+         * fail the command. In theory, for a command from scsi_vhci
+         * client it's impossible to receive a command with an invalid
          * devhdl since devhdl is set after path offline, target
-         * driver is not suppose to select a offlined path.
+         * driver is not supposed to select an offlined path.
          */
         if (ptgt->m_devhdl == MPTSAS_INVALID_DEVHDL) {
-                NDBG20(("rejecting command, it might because invalid devhdl "
+                NDBG3(("rejecting command, it might because invalid devhdl "
                     "request."));
+                mutex_exit(&ptgt->m_t_mutex);
                 mptsas_set_pkt_reason(mpt, cmd, CMD_DEV_GONE, STAT_TERMINATED);
                 if (cmd->cmd_flags & CFLAG_TXQ) {
                         mptsas_doneq_add(mpt, cmd);
                         mptsas_doneq_empty(mpt);
-                        return (rval);
                 } else {
-                        return (TRAN_FATAL_ERROR);
+                        rval = TRAN_FATAL_ERROR;
                 }
+                goto set_tranrval;
         }
         /*
          * The first case is the normal case.  mpt gets a command from the
          * target driver and starts it.
          * Since SMID 0 is reserved and the TM slot is reserved, the actual max
          * commands is m_max_requests - 2.
          */
         if ((mpt->m_ncmds <= (mpt->m_max_requests - 2)) &&
             (ptgt->m_t_throttle > HOLD_THROTTLE) &&
             (ptgt->m_t_ncmds < ptgt->m_t_throttle) &&
-            (ptgt->m_reset_delay == 0) &&
+            (ptgt->m_reset_delay == 0) && (mpt->m_polled_intr == 0) &&
             (ptgt->m_t_nwait == 0) &&
             ((cmd->cmd_pkt_flags & FLAG_NOINTR) == 0)) {
-                if (mptsas_save_cmd(mpt, cmd) == TRUE) {
-                        (void) mptsas_start_cmd(mpt, cmd);
+                ASSERT((cmd->cmd_flags & CFLAG_CMDIOC) == 0);
+                if (mptsas_save_cmd_to_slot(mpt, cmd) == TRUE) {
+                        ptgt->m_t_ncmds++;
+                        mutex_exit(&ptgt->m_t_mutex);
+                        cmd->cmd_active_expiration = 0;
+                        *tran_rval = rval;
+                        return (TRUE);
                 } else {
+                        mutex_exit(&ptgt->m_t_mutex);
                         mptsas_waitq_add(mpt, cmd);
                 }
         } else {
+                mutex_exit(&ptgt->m_t_mutex);
                 /*
                  * Add this pkt to the work queue
                  */
                 mptsas_waitq_add(mpt, cmd);
 

@@ -3317,21 +3682,40 @@
                         if ((cmd->cmd_flags & CFLAG_TM_CMD) == 0) {
                                 mptsas_doneq_empty(mpt);
                         }
                 }
         }
-        return (rval);
+set_tranrval:
+        *tran_rval = rval;
+        return (FALSE);
 }
 
-int
-mptsas_save_cmd(mptsas_t *mpt, mptsas_cmd_t *cmd)
+static void
+mptsas_retry_pkt(mptsas_t *mpt, mptsas_cmd_t *cmd)
 {
-        mptsas_slots_t *slots = mpt->m_active;
-        uint_t slot, start_rotor;
-        mptsas_target_t *ptgt = cmd->cmd_tgt_addr;
+        int             rval;
 
-        ASSERT(MUTEX_HELD(&mpt->m_mutex));
+        cmd->cmd_pkt_flags |= FLAG_HEAD;
+        cmd->cmd_flags |= CFLAG_RETRY;
+        cmd->cmd_flags &= ~CFLAG_TXQ;
+        if (mptsas_accept_pkt(mpt, cmd, &rval)) {
+                (void) mptsas_start_cmd(mpt, cmd);
+        }
+
+        /*
+         * If there was a problem clear the retry flag so that the
+         * command will be completed with error rather than get lost!
+         */
+        if (rval != TRAN_ACCEPT)
+                cmd->cmd_flags &= ~CFLAG_RETRY;
+}
+
+static int
+mptsas_save_cmd_to_slot(mptsas_t *mpt, mptsas_cmd_t *cmd)
+{
+        mptsas_slots_t *slots = mpt->m_active;
+        uint_t slot, start_rotor, rotor, n_normal;
 
         /*
          * Account for reserved TM request slot and reserved SMID of 0.
          */
         ASSERT(slots->m_n_normal == (mpt->m_max_requests - 2));

@@ -3342,46 +3726,74 @@
          * considers only the normal slots, not the reserved slot 0 nor the
          * task management slot m_n_normal + 1.  The rotor is left to point to
          * the normal slot after the one we select, unless we select the last
          * normal slot in which case it returns to slot 1.
          */
-        start_rotor = slots->m_rotor;
+        start_rotor = rotor = slots->m_rotor;
+        n_normal = slots->m_n_normal;
         do {
-                slot = slots->m_rotor++;
-                if (slots->m_rotor > slots->m_n_normal)
-                        slots->m_rotor = 1;
+                slot = rotor++;
+                if (rotor > n_normal)
+                        rotor = 1;
 
-                if (slots->m_rotor == start_rotor)
+                if (rotor == start_rotor)
                         break;
         } while (slots->m_slot[slot] != NULL);
+        slots->m_rotor = rotor;
 
         if (slots->m_slot[slot] != NULL)
                 return (FALSE);
 
         ASSERT(slot != 0 && slot <= slots->m_n_normal);
 
         cmd->cmd_slot = slot;
         slots->m_slot[slot] = cmd;
-        mpt->m_ncmds++;
+        atomic_inc_32(&mpt->m_ncmds);
+
+        /*
+         * Distribute the commands amongst the reply queues (Interrupt vectors).
+         * Stick to 0 for polled.
+         */
+        if (!(cmd->cmd_pkt_flags & FLAG_NOINTR) &&
+            !(cmd->cmd_flags & (CFLAG_PASSTHRU|CFLAG_CONFIG|CFLAG_FW_DIAG)) &&
+            (mpt->m_post_reply_qcount > 1)) {
+                cmd->cmd_rpqidx = slot % mpt->m_post_reply_qcount;
+        }
+        atomic_inc_32(&mpt->m_rep_post_queues[cmd->cmd_rpqidx].rpq_ncmds);
+        return (TRUE);
+}
+
+int
+mptsas_save_cmd(mptsas_t *mpt, mptsas_cmd_t *cmd)
+{
+        mptsas_target_t *ptgt = cmd->cmd_tgt_addr;
+
+        ASSERT(MUTEX_HELD(&mpt->m_mutex));
+
+        if (!mptsas_save_cmd_to_slot(mpt, cmd)) {
+                return (FALSE);
+        }
 
         /*
          * only increment per target ncmds if this is not a
          * command that has no target associated with it (i.e. a
-         * event acknoledgment)
+         * event acknoledgement)
          */
         if ((cmd->cmd_flags & CFLAG_CMDIOC) == 0) {
+                /*
+                 * Expiration time is set in mptsas_start_cmd
+                 */
+                mutex_enter(&ptgt->m_t_mutex);
                 ptgt->m_t_ncmds++;
-        }
-        cmd->cmd_active_timeout = cmd->cmd_pkt->pkt_time;
-
+                mutex_exit(&ptgt->m_t_mutex);
+                cmd->cmd_active_expiration = 0;
+        } else {
         /*
-         * If initial timout is less than or equal to one tick, bump
-         * the timeout by a tick so that command doesn't timeout before
-         * its allotted time.
+                 * Initialize expiration time for passthrough commands,
          */
-        if (cmd->cmd_active_timeout <= mptsas_scsi_watchdog_tick) {
-                cmd->cmd_active_timeout += mptsas_scsi_watchdog_tick;
+                cmd->cmd_active_expiration = gethrtime() +
+                    (hrtime_t)cmd->cmd_pkt->pkt_time * NANOSEC;
         }
         return (TRUE);
 }
 
 /*

@@ -3482,26 +3894,17 @@
         /*
          * Allocate the new packet.
          */
         if (pkt == NULL) {
                 ddi_dma_handle_t        save_dma_handle;
-                ddi_dma_handle_t        save_arq_dma_handle;
-                struct buf              *save_arq_bp;
-                ddi_dma_cookie_t        save_arqcookie;
 
                 cmd = kmem_cache_alloc(mpt->m_kmem_cache, kf);
 
                 if (cmd) {
                         save_dma_handle = cmd->cmd_dmahandle;
-                        save_arq_dma_handle = cmd->cmd_arqhandle;
-                        save_arq_bp = cmd->cmd_arq_buf;
-                        save_arqcookie = cmd->cmd_arqcookie;
                         bzero(cmd, sizeof (*cmd) + scsi_pkt_size());
                         cmd->cmd_dmahandle = save_dma_handle;
-                        cmd->cmd_arqhandle = save_arq_dma_handle;
-                        cmd->cmd_arq_buf = save_arq_bp;
-                        cmd->cmd_arqcookie = save_arqcookie;
 
                         pkt = (void *)((uchar_t *)cmd +
                             sizeof (struct mptsas_cmd));
                         pkt->pkt_ha_private = (opaque_t)cmd;
                         pkt->pkt_address = *ap;

@@ -3777,11 +4180,12 @@
                  * If this was partially allocated we set the resid
                  * the amount of data NOT transferred in this window
                  * If there is only one window, the resid will be 0
                  */
                 pkt->pkt_resid = (bp->b_bcount - cmd->cmd_totaldmacount);
-                NDBG16(("mptsas_dmaget: cmd_dmacount=%d.", cmd->cmd_dmacount));
+                NDBG3(("mptsas_scsi_init_pkt: cmd_dmacount=%d.",
+                    cmd->cmd_dmacount));
         }
         return (pkt);
 }
 
 /*

@@ -3830,66 +4234,24 @@
 static int
 mptsas_kmem_cache_constructor(void *buf, void *cdrarg, int kmflags)
 {
         mptsas_cmd_t            *cmd = buf;
         mptsas_t                *mpt  = cdrarg;
-        struct scsi_address     ap;
-        uint_t                  cookiec;
-        ddi_dma_attr_t          arq_dma_attr;
         int                     (*callback)(caddr_t);
 
         callback = (kmflags == KM_SLEEP)? DDI_DMA_SLEEP: DDI_DMA_DONTWAIT;
 
         NDBG4(("mptsas_kmem_cache_constructor"));
 
-        ap.a_hba_tran = mpt->m_tran;
-        ap.a_target = 0;
-        ap.a_lun = 0;
-
         /*
          * allocate a dma handle
          */
         if ((ddi_dma_alloc_handle(mpt->m_dip, &mpt->m_io_dma_attr, callback,
             NULL, &cmd->cmd_dmahandle)) != DDI_SUCCESS) {
                 cmd->cmd_dmahandle = NULL;
                 return (-1);
         }
-
-        cmd->cmd_arq_buf = scsi_alloc_consistent_buf(&ap, (struct buf *)NULL,
-            SENSE_LENGTH, B_READ, callback, NULL);
-        if (cmd->cmd_arq_buf == NULL) {
-                ddi_dma_free_handle(&cmd->cmd_dmahandle);
-                cmd->cmd_dmahandle = NULL;
-                return (-1);
-        }
-
-        /*
-         * allocate a arq handle
-         */
-        arq_dma_attr = mpt->m_msg_dma_attr;
-        arq_dma_attr.dma_attr_sgllen = 1;
-        if ((ddi_dma_alloc_handle(mpt->m_dip, &arq_dma_attr, callback,
-            NULL, &cmd->cmd_arqhandle)) != DDI_SUCCESS) {
-                ddi_dma_free_handle(&cmd->cmd_dmahandle);
-                scsi_free_consistent_buf(cmd->cmd_arq_buf);
-                cmd->cmd_dmahandle = NULL;
-                cmd->cmd_arqhandle = NULL;
-                return (-1);
-        }
-
-        if (ddi_dma_buf_bind_handle(cmd->cmd_arqhandle,
-            cmd->cmd_arq_buf, (DDI_DMA_READ | DDI_DMA_CONSISTENT),
-            callback, NULL, &cmd->cmd_arqcookie, &cookiec) != DDI_SUCCESS) {
-                ddi_dma_free_handle(&cmd->cmd_dmahandle);
-                ddi_dma_free_handle(&cmd->cmd_arqhandle);
-                scsi_free_consistent_buf(cmd->cmd_arq_buf);
-                cmd->cmd_dmahandle = NULL;
-                cmd->cmd_arqhandle = NULL;
-                cmd->cmd_arq_buf = NULL;
-                return (-1);
-        }
-
         return (0);
 }
 
 static void
 mptsas_kmem_cache_destructor(void *buf, void *cdrarg)

@@ -3899,19 +4261,10 @@
 #endif
         mptsas_cmd_t    *cmd = buf;
 
         NDBG4(("mptsas_kmem_cache_destructor"));
 
-        if (cmd->cmd_arqhandle) {
-                (void) ddi_dma_unbind_handle(cmd->cmd_arqhandle);
-                ddi_dma_free_handle(&cmd->cmd_arqhandle);
-                cmd->cmd_arqhandle = NULL;
-        }
-        if (cmd->cmd_arq_buf) {
-                scsi_free_consistent_buf(cmd->cmd_arq_buf);
-                cmd->cmd_arq_buf = NULL;
-        }
         if (cmd->cmd_dmahandle) {
                 ddi_dma_free_handle(&cmd->cmd_dmahandle);
                 cmd->cmd_dmahandle = NULL;
         }
 }

@@ -3965,11 +4318,11 @@
         /*
          * Store the SGL memory address.  This chip uses this
          * address to dma to and from the driver.  The second
          * address is the address mpt uses to fill in the SGL.
          */
-        p->m_phys_addr = cookie.dmac_address;
+        p->m_phys_addr = cookie.dmac_laddress;
 
         return (DDI_SUCCESS);
 }
 
 static void

@@ -4000,16 +4353,11 @@
 static int
 mptsas_pkt_alloc_extern(mptsas_t *mpt, mptsas_cmd_t *cmd,
     int cmdlen, int tgtlen, int statuslen, int kf)
 {
         caddr_t                 cdbp, scbp, tgt;
-        int                     (*callback)(caddr_t) = (kf == KM_SLEEP) ?
-            DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
-        struct scsi_address     ap;
         size_t                  senselength;
-        ddi_dma_attr_t          ext_arq_dma_attr;
-        uint_t                  cookiec;
 
         NDBG3(("mptsas_pkt_alloc_extern: "
             "cmd=0x%p cmdlen=%d tgtlen=%d statuslen=%d kf=%x",
             (void *)cmd, cmdlen, tgtlen, statuslen, kf));
 

@@ -4040,42 +4388,24 @@
 
                 /* allocate sense data buf for DMA */
 
                 senselength = statuslen - MPTSAS_GET_ITEM_OFF(
                     struct scsi_arq_status, sts_sensedata);
+                if (senselength > mpt->m_req_sense_size) {
+                        unsigned long i;
+                        cmd->cmd_extrqslen = (uint16_t)senselength;
+                        cmd->cmd_extrqschunks = (senselength +
+                            (mpt->m_req_sense_size - 1))/mpt->m_req_sense_size;
+                        i = rmalloc_wait(mpt->m_erqsense_map,
+                            cmd->cmd_extrqschunks);
+                        ASSERT(i != 0);
+                        cmd->cmd_extrqsidx = i - 1;
+                        cmd->cmd_arq_buf = mpt->m_extreq_sense +
+                            (cmd->cmd_extrqsidx * mpt->m_req_sense_size);
+                } else {
                 cmd->cmd_rqslen = (uchar_t)senselength;
-
-                ap.a_hba_tran = mpt->m_tran;
-                ap.a_target = 0;
-                ap.a_lun = 0;
-
-                cmd->cmd_ext_arq_buf = scsi_alloc_consistent_buf(&ap,
-                    (struct buf *)NULL, senselength, B_READ,
-                    callback, NULL);
-
-                if (cmd->cmd_ext_arq_buf == NULL) {
-                        goto fail;
-                }
-                /*
-                 * allocate a extern arq handle and bind the buf
-                 */
-                ext_arq_dma_attr = mpt->m_msg_dma_attr;
-                ext_arq_dma_attr.dma_attr_sgllen = 1;
-                if ((ddi_dma_alloc_handle(mpt->m_dip,
-                    &ext_arq_dma_attr, callback,
-                    NULL, &cmd->cmd_ext_arqhandle)) != DDI_SUCCESS) {
-                        goto fail;
-                }
-
-                if (ddi_dma_buf_bind_handle(cmd->cmd_ext_arqhandle,
-                    cmd->cmd_ext_arq_buf, (DDI_DMA_READ | DDI_DMA_CONSISTENT),
-                    callback, NULL, &cmd->cmd_ext_arqcookie,
-                    &cookiec)
-                    != DDI_SUCCESS) {
-                        goto fail;
                 }
-                cmd->cmd_flags |= CFLAG_EXTARQBUFVALID;
         }
         return (0);
 fail:
         mptsas_pkt_destroy_extern(mpt, cmd);
         return (1);

@@ -4093,24 +4423,19 @@
                 mptsas_log(mpt, CE_PANIC,
                     "mptsas_pkt_destroy_extern: freeing free packet");
                 _NOTE(NOT_REACHED)
                 /* NOTREACHED */
         }
+        if (cmd->cmd_extrqslen != 0) {
+                rmfree(mpt->m_erqsense_map, cmd->cmd_extrqschunks,
+                    cmd->cmd_extrqsidx + 1);
+        }
         if (cmd->cmd_flags & CFLAG_CDBEXTERN) {
                 kmem_free(cmd->cmd_pkt->pkt_cdbp, (size_t)cmd->cmd_cdblen);
         }
         if (cmd->cmd_flags & CFLAG_SCBEXTERN) {
                 kmem_free(cmd->cmd_pkt->pkt_scbp, (size_t)cmd->cmd_scblen);
-                if (cmd->cmd_flags & CFLAG_EXTARQBUFVALID) {
-                        (void) ddi_dma_unbind_handle(cmd->cmd_ext_arqhandle);
-                }
-                if (cmd->cmd_ext_arqhandle) {
-                        ddi_dma_free_handle(&cmd->cmd_ext_arqhandle);
-                        cmd->cmd_ext_arqhandle = NULL;
-                }
-                if (cmd->cmd_ext_arq_buf)
-                        scsi_free_consistent_buf(cmd->cmd_ext_arq_buf);
         }
         if (cmd->cmd_flags & CFLAG_PRIVEXTERN) {
                 kmem_free(cmd->cmd_pkt->pkt_private, (size_t)cmd->cmd_privlen);
         }
         cmd->cmd_flags = CFLAG_FREE;

@@ -4152,15 +4477,10 @@
         if (cmd->cmd_flags & CFLAG_DMAVALID) {
                 (void) ddi_dma_unbind_handle(cmd->cmd_dmahandle);
                 cmd->cmd_flags &= ~CFLAG_DMAVALID;
         }
 
-        if (cmd->cmd_flags & CFLAG_EXTARQBUFVALID) {
-                (void) ddi_dma_unbind_handle(cmd->cmd_ext_arqhandle);
-                cmd->cmd_flags &= ~CFLAG_EXTARQBUFVALID;
-        }
-
         mptsas_free_extra_sgl_frame(mpt, cmd);
 }
 
 static void
 mptsas_pkt_comp(struct scsi_pkt *pkt, mptsas_cmd_t *cmd)

@@ -4172,58 +4492,27 @@
         }
         (*pkt->pkt_comp)(pkt);
 }
 
 static void
-mptsas_sge_setup(mptsas_t *mpt, mptsas_cmd_t *cmd, uint32_t *control,
-        pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
+mptsas_sge_mainframe(mptsas_cmd_t *cmd, pMpi2SCSIIORequest_t frame,
+                ddi_acc_handle_t acc_hdl, uint_t cookiec,
+                uint32_t end_flags)
 {
-        uint_t                  cookiec;
+        pMpi2SGESimple64_t      sge;
         mptti_t                 *dmap;
         uint32_t                flags;
-        pMpi2SGESimple64_t      sge;
-        pMpi2SGEChain64_t       sgechain;
-        ASSERT(cmd->cmd_flags & CFLAG_DMAVALID);
-
-        /*
-         * Save the number of entries in the DMA
-         * Scatter/Gather list
-         */
-        cookiec = cmd->cmd_cookiec;
-
-        NDBG1(("mptsas_sge_setup: cookiec=%d", cookiec));
-
-        /*
-         * Set read/write bit in control.
-         */
-        if (cmd->cmd_flags & CFLAG_DMASEND) {
-                *control |= MPI2_SCSIIO_CONTROL_WRITE;
-        } else {
-                *control |= MPI2_SCSIIO_CONTROL_READ;
-        }
-
-        ddi_put32(acc_hdl, &frame->DataLength, cmd->cmd_dmacount);
 
-        /*
-         * We have 2 cases here.  First where we can fit all the
-         * SG elements into the main frame, and the case
-         * where we can't.
-         * If we have more cookies than we can attach to a frame
-         * we will need to use a chain element to point
-         * a location of memory where the rest of the S/G
-         * elements reside.
-         */
-        if (cookiec <= MPTSAS_MAX_FRAME_SGES64(mpt)) {
                 dmap = cmd->cmd_sg;
+
                 sge = (pMpi2SGESimple64_t)(&frame->SGL);
                 while (cookiec--) {
-                        ddi_put32(acc_hdl,
-                            &sge->Address.Low, dmap->addr.address64.Low);
-                        ddi_put32(acc_hdl,
-                            &sge->Address.High, dmap->addr.address64.High);
-                        ddi_put32(acc_hdl, &sge->FlagsLength,
-                            dmap->count);
+                ddi_put32(acc_hdl, &sge->Address.Low,
+                    dmap->addr.address64.Low);
+                ddi_put32(acc_hdl, &sge->Address.High,
+                    dmap->addr.address64.High);
+                ddi_put32(acc_hdl, &sge->FlagsLength, dmap->count);
                         flags = ddi_get32(acc_hdl, &sge->FlagsLength);
                         flags |= ((uint32_t)
                             (MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
                             MPI2_SGE_FLAGS_SYSTEM_ADDRESS |
                             MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<

@@ -4232,15 +4521,11 @@
                         /*
                          * If this is the last cookie, we set the flags
                          * to indicate so
                          */
                         if (cookiec == 0) {
-                                flags |=
-                                    ((uint32_t)(MPI2_SGE_FLAGS_LAST_ELEMENT
-                                    | MPI2_SGE_FLAGS_END_OF_BUFFER
-                                    | MPI2_SGE_FLAGS_END_OF_LIST) <<
-                                    MPI2_SGE_FLAGS_SHIFT);
+                        flags |= end_flags;
                         }
                         if (cmd->cmd_flags & CFLAG_DMASEND) {
                                 flags |= (MPI2_SGE_FLAGS_HOST_TO_IOC <<
                                     MPI2_SGE_FLAGS_SHIFT);
                         } else {

@@ -4249,11 +4534,30 @@
                         }
                         ddi_put32(acc_hdl, &sge->FlagsLength, flags);
                         dmap++;
                         sge++;
                 }
-        } else {
+}
+
+static void
+mptsas_sge_chain(mptsas_t *mpt, mptsas_cmd_t *cmd,
+    pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
+{
+        pMpi2SGESimple64_t      sge;
+        pMpi2SGEChain64_t       sgechain;
+        uint64_t                nframe_phys_addr;
+        uint_t                  cookiec;
+        mptti_t                 *dmap;
+        uint32_t                flags;
+        int                     i, j, k, l, frames, sgemax;
+        int                     temp, maxframe_sges;
+        uint8_t                 chainflags;
+        uint16_t                chainlength;
+        mptsas_cache_frames_t   *p;
+
+        cookiec = cmd->cmd_cookiec;
+
                 /*
                  * Hereby we start to deal with multiple frames.
                  * The process is as follows:
                  * 1. Determine how many frames are needed for SGL element
                  *    storage; Note that all frames are stored in contiguous

@@ -4280,76 +4584,43 @@
                  *    hold SGL elements with the last 1 or 2 double-words
                  *    (4 or 8 bytes) un-used. On these controllers, we should
                  *    recognize that there's not enough room for another SGL
                  *    element and move the sge pointer to the next frame.
                  */
-                int             i, j, k, l, frames, sgemax;
-                int             temp;
-                uint8_t         chainflags;
-                uint16_t        chainlength;
-                mptsas_cache_frames_t *p;
 
                 /*
                  * Sgemax is the number of SGE's that will fit
                  * each extra frame and frames is total
                  * number of frames we'll need.  1 sge entry per
                  * frame is reseverd for the chain element thus the -1 below.
                  */
-                sgemax = ((mpt->m_req_frame_size / sizeof (MPI2_SGE_SIMPLE64))
-                    - 1);
-                temp = (cookiec - (MPTSAS_MAX_FRAME_SGES64(mpt) - 1)) / sgemax;
+        sgemax = ((mpt->m_req_frame_size / sizeof (MPI2_SGE_SIMPLE64)) - 1);
+        maxframe_sges = MPTSAS_MAX_FRAME_SGES64(mpt);
+        temp = (cookiec - (maxframe_sges - 1)) / sgemax;
 
                 /*
                  * A little check to see if we need to round up the number
                  * of frames we need
                  */
-                if ((cookiec - (MPTSAS_MAX_FRAME_SGES64(mpt) - 1)) - (temp *
-                    sgemax) > 1) {
+        if ((cookiec - (maxframe_sges - 1)) - (temp * sgemax) > 1) {
                         frames = (temp + 1);
                 } else {
                         frames = temp;
                 }
                 dmap = cmd->cmd_sg;
                 sge = (pMpi2SGESimple64_t)(&frame->SGL);
 
                 /*
                  * First fill in the main frame
                  */
-                for (j = 1; j < MPTSAS_MAX_FRAME_SGES64(mpt); j++) {
-                        ddi_put32(acc_hdl, &sge->Address.Low,
-                            dmap->addr.address64.Low);
-                        ddi_put32(acc_hdl, &sge->Address.High,
-                            dmap->addr.address64.High);
-                        ddi_put32(acc_hdl, &sge->FlagsLength, dmap->count);
-                        flags = ddi_get32(acc_hdl, &sge->FlagsLength);
-                        flags |= ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
-                            MPI2_SGE_FLAGS_SYSTEM_ADDRESS |
-                            MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
-                            MPI2_SGE_FLAGS_SHIFT);
-
-                        /*
-                         * If this is the last SGE of this frame
-                         * we set the end of list flag
-                         */
-                        if (j == (MPTSAS_MAX_FRAME_SGES64(mpt) - 1)) {
-                                flags |= ((uint32_t)
-                                    (MPI2_SGE_FLAGS_LAST_ELEMENT) <<
-                                    MPI2_SGE_FLAGS_SHIFT);
-                        }
-                        if (cmd->cmd_flags & CFLAG_DMASEND) {
-                                flags |=
-                                    (MPI2_SGE_FLAGS_HOST_TO_IOC <<
-                                    MPI2_SGE_FLAGS_SHIFT);
-                        } else {
-                                flags |=
-                                    (MPI2_SGE_FLAGS_IOC_TO_HOST <<
-                                    MPI2_SGE_FLAGS_SHIFT);
-                        }
-                        ddi_put32(acc_hdl, &sge->FlagsLength, flags);
-                        dmap++;
-                        sge++;
-                }
+        j = maxframe_sges - 1;
+        mptsas_sge_mainframe(cmd, frame, acc_hdl, j,
+            ((uint32_t)(MPI2_SGE_FLAGS_LAST_ELEMENT) <<
+            MPI2_SGE_FLAGS_SHIFT));
+        dmap += j;
+        sge += j;
+        j++;
 
                 /*
                  * Fill in the chain element in the main frame.
                  * About calculation on ChainOffset:
                  * 1. Struct msg_scsi_io_request has 4 double-words (16 bytes)

@@ -4390,13 +4661,12 @@
 
                 p = cmd->cmd_extra_frames;
 
                 ddi_put16(acc_hdl, &sgechain->Length, chainlength);
                 ddi_put32(acc_hdl, &sgechain->Address.Low,
-                    p->m_phys_addr);
-                /* SGL is allocated in the first 4G mem range */
-                ddi_put32(acc_hdl, &sgechain->Address.High, 0);
+            (p->m_phys_addr&0xffffffffull));
+        ddi_put32(acc_hdl, &sgechain->Address.High, p->m_phys_addr>>32);
 
                 /*
                  * If there are more than 2 frames left we have to
                  * fill in the next chain offset to the location of
                  * the chain element in the next frame.

@@ -4450,16 +4720,18 @@
                                          * k is the frame counter and (k + 1)
                                          * is the number of the next frame.
                                          * Note that frames are in contiguous
                                          * memory space.
                                          */
+                                nframe_phys_addr = p->m_phys_addr +
+                                    (mpt->m_req_frame_size * k);
                                         ddi_put32(p->m_acc_hdl,
                                             &sgechain->Address.Low,
-                                            (p->m_phys_addr +
-                                            (mpt->m_req_frame_size * k)));
+                                    nframe_phys_addr&0xffffffffull);
                                         ddi_put32(p->m_acc_hdl,
-                                            &sgechain->Address.High, 0);
+                                    &sgechain->Address.High,
+                                    nframe_phys_addr>>32);
 
                                         /*
                                          * If there are more than 2 frames left
                                          * we have to next chain offset to
                                          * the location of the chain element

@@ -4558,109 +4830,504 @@
 
                 /*
                  * Sync DMA with the chain buffers that were just created
                  */
                 (void) ddi_dma_sync(p->m_dma_hdl, 0, 0, DDI_DMA_SYNC_FORDEV);
-        }
 }
 
-/*
- * Interrupt handling
- * Utility routine.  Poll for status of a command sent to HBA
- * without interrupts (a FLAG_NOINTR command).
- */
-int
-mptsas_poll(mptsas_t *mpt, mptsas_cmd_t *poll_cmd, int polltime)
+static void
+mptsas_ieee_sge_mainframe(mptsas_cmd_t *cmd, pMpi2SCSIIORequest_t frame,
+    ddi_acc_handle_t acc_hdl, uint_t cookiec,
+    uint8_t end_flag)
 {
-        int     rval = TRUE;
+        pMpi2IeeeSgeSimple64_t  ieeesge;
+        mptti_t                 *dmap;
+        uint8_t                 flags;
 
-        NDBG5(("mptsas_poll: cmd=0x%p", (void *)poll_cmd));
+        dmap = cmd->cmd_sg;
 
-        if ((poll_cmd->cmd_flags & CFLAG_TM_CMD) == 0) {
-                mptsas_restart_hba(mpt);
-        }
+        NDBG1(("mptsas_ieee_sge_mainframe: cookiec=%d, %s", cookiec,
+            cmd->cmd_flags & CFLAG_DMASEND?"Out":"In"));
+
+        ieeesge = (pMpi2IeeeSgeSimple64_t)(&frame->SGL);
+        while (cookiec--) {
+                ddi_put32(acc_hdl, &ieeesge->Address.Low,
+                    dmap->addr.address64.Low);
+                ddi_put32(acc_hdl, &ieeesge->Address.High,
+                    dmap->addr.address64.High);
+                ddi_put32(acc_hdl, &ieeesge->Length, dmap->count);
+                NDBG1(("mptsas_ieee_sge_mainframe: len=%d, high=0x%x",
+                    dmap->count, dmap->addr.address64.High));
+                flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
+                    MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
 
         /*
-         * Wait, using drv_usecwait(), long enough for the command to
-         * reasonably return from the target if the target isn't
-         * "dead".  A polled command may well be sent from scsi_poll, and
-         * there are retries built in to scsi_poll if the transport
-         * accepted the packet (TRAN_ACCEPT).  scsi_poll waits 1 second
-         * and retries the transport up to scsi_poll_busycnt times
-         * (currently 60) if
-         * 1. pkt_reason is CMD_INCOMPLETE and pkt_state is 0, or
-         * 2. pkt_reason is CMD_CMPLT and *pkt_scbp has STATUS_BUSY
-         *
-         * limit the waiting to avoid a hang in the event that the
-         * cmd never gets started but we are still receiving interrupts
+                 * If this is the last cookie, we set the flags
+                 * to indicate so
          */
-        while (!(poll_cmd->cmd_flags & CFLAG_FINISHED)) {
-                if (mptsas_wait_intr(mpt, polltime) == FALSE) {
-                        NDBG5(("mptsas_poll: command incomplete"));
-                        rval = FALSE;
-                        break;
-                }
+                if (cookiec == 0) {
+                        flags |= end_flag;
         }
 
-        if (rval == FALSE) {
-
                 /*
-                 * this isn't supposed to happen, the hba must be wedged
-                 * Mark this cmd as a timeout.
+                 * XXX: Hmmm, what about the direction based on
+                 * cmd->cmd_flags & CFLAG_DMASEND?
                  */
-                mptsas_set_pkt_reason(mpt, poll_cmd, CMD_TIMEOUT,
-                    (STAT_TIMEOUT|STAT_ABORTED));
-
-                if (poll_cmd->cmd_queued == FALSE) {
-
-                        NDBG5(("mptsas_poll: not on waitq"));
+                ddi_put8(acc_hdl, &ieeesge->Flags, flags);
+                dmap++;
+                ieeesge++;
+        }
+}
 
-                        poll_cmd->cmd_pkt->pkt_state |=
+static void
+mptsas_ieee_sge_chain(mptsas_t *mpt, mptsas_cmd_t *cmd,
+    pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
+{
+        pMpi2IeeeSgeSimple64_t  ieeesge;
+        pMpi25IeeeSgeChain64_t  ieeesgechain;
+        uint64_t                nframe_phys_addr;
+        uint_t                  cookiec;
+        mptti_t                 *dmap;
+        uint8_t                 flags;
+        int                     i, j, k, l, frames, sgemax;
+        int                     temp, maxframe_sges;
+        uint8_t                 chainflags;
+        uint32_t                chainlength;
+        mptsas_cache_frames_t   *p;
+
+        cookiec = cmd->cmd_cookiec;
+
+        NDBG1(("mptsas_ieee_sge_chain: cookiec=%d", cookiec));
+
+        /*
+         * Hereby we start to deal with multiple frames.
+         * The process is as follows:
+         * 1. Determine how many frames are needed for SGL element
+         *    storage; Note that all frames are stored in contiguous
+         *    memory space and in 64-bit DMA mode each element is
+         *    4 double-words (16 bytes) long.
+         * 2. Fill up the main frame. We need to do this separately
+         *    since it contains the SCSI IO request header and needs
+         *    dedicated processing. Note that the last 4 double-words
+         *    of the SCSI IO header is for SGL element storage
+         *    (MPI2_SGE_IO_UNION).
+         * 3. Fill the chain element in the main frame, so the DMA
+         *    engine can use the following frames.
+         * 4. Enter a loop to fill the remaining frames. Note that the
+         *    last frame contains no chain element.  The remaining
+         *    frames go into the mpt SGL buffer allocated on the fly,
+         *    not immediately following the main message frame, as in
+         *    Gen1.
+         * Some restrictions:
+         * 1. For 64-bit DMA, the simple element and chain element
+         *    are both of 4 double-words (16 bytes) in size, even
+         *    though all frames are stored in the first 4G of mem
+         *    range and the higher 32-bits of the address are always 0.
+         * 2. On some controllers (like the 1064/1068), a frame can
+         *    hold SGL elements with the last 1 or 2 double-words
+         *    (4 or 8 bytes) un-used. On these controllers, we should
+         *    recognize that there's not enough room for another SGL
+         *    element and move the sge pointer to the next frame.
+         */
+
+        /*
+         * Sgemax is the number of SGE's that will fit
+         * each extra frame and frames is total
+         * number of frames we'll need.  1 sge entry per
+         * frame is reseverd for the chain element thus the -1 below.
+         */
+        sgemax = ((mpt->m_req_frame_size / sizeof (MPI2_IEEE_SGE_SIMPLE64))
+            - 1);
+        maxframe_sges = MPTSAS_MAX_FRAME_SGES64(mpt);
+        temp = (cookiec - (maxframe_sges - 1)) / sgemax;
+
+        /*
+         * A little check to see if we need to round up the number
+         * of frames we need
+         */
+        if ((cookiec - (maxframe_sges - 1)) - (temp * sgemax) > 1) {
+                frames = (temp + 1);
+        } else {
+                frames = temp;
+        }
+        NDBG1(("mptsas_ieee_sge_chain: temp=%d, frames=%d", temp, frames));
+        dmap = cmd->cmd_sg;
+        ieeesge = (pMpi2IeeeSgeSimple64_t)(&frame->SGL);
+
+        /*
+         * First fill in the main frame
+         */
+        j = maxframe_sges - 1;
+        mptsas_ieee_sge_mainframe(cmd, frame, acc_hdl, j, 0);
+        dmap += j;
+        ieeesge += j;
+        j++;
+
+        /*
+         * Fill in the chain element in the main frame.
+         * About calculation on ChainOffset:
+         * 1. Struct msg_scsi_io_request has 4 double-words (16 bytes)
+         *    in the end reserved for SGL element storage
+         *    (MPI2_SGE_IO_UNION); we should count it in our
+         *    calculation.  See its definition in the header file.
+         * 2. Constant j is the counter of the current SGL element
+         *    that will be processed, and (j - 1) is the number of
+         *    SGL elements that have been processed (stored in the
+         *    main frame).
+         * 3. ChainOffset value should be in units of quad-words (16
+         *    bytes) so the last value should be divided by 16.
+         */
+        ddi_put8(acc_hdl, &frame->ChainOffset,
+            (sizeof (MPI2_SCSI_IO_REQUEST) -
+            sizeof (MPI2_SGE_IO_UNION) +
+            (j - 1) * sizeof (MPI2_IEEE_SGE_SIMPLE64)) >> 4);
+        ieeesgechain = (pMpi25IeeeSgeChain64_t)ieeesge;
+        chainflags = (MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT |
+            MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
+        ddi_put8(acc_hdl, &ieeesgechain->Flags, chainflags);
+
+        /*
+         * The size of the next frame is the accurate size of space
+         * (in bytes) used to store the SGL elements. j is the counter
+         * of SGL elements. (j - 1) is the number of SGL elements that
+         * have been processed (stored in frames).
+         */
+        if (frames >= 2) {
+                chainlength = mpt->m_req_frame_size /
+                    sizeof (MPI2_IEEE_SGE_SIMPLE64) *
+                    sizeof (MPI2_IEEE_SGE_SIMPLE64);
+        } else {
+                chainlength = ((cookiec - (j - 1)) *
+                    sizeof (MPI2_IEEE_SGE_SIMPLE64));
+        }
+
+        p = cmd->cmd_extra_frames;
+
+        ddi_put32(acc_hdl, &ieeesgechain->Length, chainlength);
+        ddi_put32(acc_hdl, &ieeesgechain->Address.Low,
+            p->m_phys_addr&0xffffffffull);
+        ddi_put32(acc_hdl, &ieeesgechain->Address.High, p->m_phys_addr>>32);
+
+        /*
+         * If there are more than 2 frames left we have to
+         * fill in the next chain offset to the location of
+         * the chain element in the next frame.
+         * sgemax is the number of simple elements in an extra
+         * frame. Note that the value NextChainOffset should be
+         * in double-words (4 bytes).
+         */
+        if (frames >= 2) {
+                ddi_put8(acc_hdl, &ieeesgechain->NextChainOffset,
+                    (sgemax * sizeof (MPI2_IEEE_SGE_SIMPLE64)) >> 4);
+        } else {
+                ddi_put8(acc_hdl, &ieeesgechain->NextChainOffset, 0);
+        }
+
+        /*
+         * Jump to next frame;
+         * Starting here, chain buffers go into the per command SGL.
+         * This buffer is allocated when chain buffers are needed.
+         */
+        ieeesge = (pMpi2IeeeSgeSimple64_t)p->m_frames_addr;
+        i = cookiec;
+
+        /*
+         * Start filling in frames with SGE's.  If we
+         * reach the end of frame and still have SGE's
+         * to fill we need to add a chain element and
+         * use another frame.  j will be our counter
+         * for what cookie we are at and i will be
+         * the total cookiec. k is the current frame
+         */
+        for (k = 1; k <= frames; k++) {
+                for (l = 1; (l <= (sgemax + 1)) && (j <= i); j++, l++) {
+
+                        /*
+                         * If we have reached the end of frame
+                         * and we have more SGE's to fill in
+                         * we have to fill the final entry
+                         * with a chain element and then
+                         * continue to the next frame
+                         */
+                        if ((l == (sgemax + 1)) && (k != frames)) {
+                                ieeesgechain = (pMpi25IeeeSgeChain64_t)ieeesge;
+                                j--;
+                                chainflags =
+                                    MPI2_IEEE_SGE_FLAGS_CHAIN_ELEMENT |
+                                    MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR;
+                                ddi_put8(p->m_acc_hdl,
+                                    &ieeesgechain->Flags, chainflags);
+                                /*
+                                 * k is the frame counter and (k + 1)
+                                 * is the number of the next frame.
+                                 * Note that frames are in contiguous
+                                 * memory space.
+                                 */
+                                nframe_phys_addr = p->m_phys_addr +
+                                    (mpt->m_req_frame_size * k);
+                                ddi_put32(p->m_acc_hdl,
+                                    &ieeesgechain->Address.Low,
+                                    nframe_phys_addr&0xffffffffull);
+                                ddi_put32(p->m_acc_hdl,
+                                    &ieeesgechain->Address.High,
+                                    nframe_phys_addr>>32);
+
+                                /*
+                                 * If there are more than 2 frames left
+                                 * we have to next chain offset to
+                                 * the location of the chain element
+                                 * in the next frame and fill in the
+                                 * length of the next chain
+                                 */
+                                if ((frames - k) >= 2) {
+                                        ddi_put8(p->m_acc_hdl,
+                                            &ieeesgechain->NextChainOffset,
+                                            (sgemax *
+                                            sizeof (MPI2_IEEE_SGE_SIMPLE64))
+                                            >> 4);
+                                        ddi_put32(p->m_acc_hdl,
+                                            &ieeesgechain->Length,
+                                            mpt->m_req_frame_size /
+                                            sizeof (MPI2_IEEE_SGE_SIMPLE64) *
+                                            sizeof (MPI2_IEEE_SGE_SIMPLE64));
+                                } else {
+                                        /*
+                                         * This is the last frame. Set
+                                         * the NextChainOffset to 0 and
+                                         * Length is the total size of
+                                         * all remaining simple elements
+                                         */
+                                        ddi_put8(p->m_acc_hdl,
+                                            &ieeesgechain->NextChainOffset,
+                                            0);
+                                        ddi_put32(p->m_acc_hdl,
+                                            &ieeesgechain->Length,
+                                            (cookiec - j) *
+                                            sizeof (MPI2_IEEE_SGE_SIMPLE64));
+                                }
+
+                                /* Jump to the next frame */
+                                ieeesge = (pMpi2IeeeSgeSimple64_t)
+                                    ((char *)p->m_frames_addr +
+                                    (int)mpt->m_req_frame_size * k);
+
+                                continue;
+                        }
+
+                        ddi_put32(p->m_acc_hdl,
+                            &ieeesge->Address.Low,
+                            dmap->addr.address64.Low);
+                        ddi_put32(p->m_acc_hdl,
+                            &ieeesge->Address.High,
+                            dmap->addr.address64.High);
+                        ddi_put32(p->m_acc_hdl,
+                            &ieeesge->Length, dmap->count);
+                        flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
+                            MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
+
+                        /*
+                         * If we are at the end of the frame and
+                         * there is another frame to fill in
+                         * do we need to do anything?
+                         * if ((l == sgemax) && (k != frames)) {
+                         * }
+                         */
+
+                        /*
+                         * If this is the final cookie set end of list.
+                         */
+                        if (j == i) {
+                                flags |= MPI25_IEEE_SGE_FLAGS_END_OF_LIST;
+                        }
+
+                        ddi_put8(p->m_acc_hdl, &ieeesge->Flags, flags);
+                        dmap++;
+                        ieeesge++;
+                }
+        }
+
+        /*
+         * Sync DMA with the chain buffers that were just created
+         */
+        (void) ddi_dma_sync(p->m_dma_hdl, 0, 0, DDI_DMA_SYNC_FORDEV);
+}
+
+static void
+mptsas_sge_setup(mptsas_t *mpt, mptsas_cmd_t *cmd, uint32_t *control,
+    pMpi2SCSIIORequest_t frame, ddi_acc_handle_t acc_hdl)
+{
+        ASSERT(cmd->cmd_flags & CFLAG_DMAVALID);
+
+        NDBG1(("mptsas_sge_setup: cookiec=%d", cmd->cmd_cookiec));
+
+        /*
+         * Set read/write bit in control.
+         */
+        if (cmd->cmd_flags & CFLAG_DMASEND) {
+                *control |= MPI2_SCSIIO_CONTROL_WRITE;
+        } else {
+                *control |= MPI2_SCSIIO_CONTROL_READ;
+        }
+
+        ddi_put32(acc_hdl, &frame->DataLength, cmd->cmd_dmacount);
+
+        /*
+         * We have 4 cases here.  First where we can fit all the
+         * SG elements into the main frame, and the case
+         * where we can't. The SG element is also different when using
+         * MPI2.5 interface.
+         * If we have more cookies than we can attach to a frame
+         * we will need to use a chain element to point
+         * a location of memory where the rest of the S/G
+         * elements reside.
+         */
+        if (cmd->cmd_cookiec <= MPTSAS_MAX_FRAME_SGES64(mpt)) {
+                if (mpt->m_MPI25) {
+                        mptsas_ieee_sge_mainframe(cmd, frame, acc_hdl,
+                            cmd->cmd_cookiec,
+                            MPI25_IEEE_SGE_FLAGS_END_OF_LIST);
+                } else {
+                        mptsas_sge_mainframe(cmd, frame, acc_hdl,
+                            cmd->cmd_cookiec,
+                            ((uint32_t)(MPI2_SGE_FLAGS_LAST_ELEMENT
+                            | MPI2_SGE_FLAGS_END_OF_BUFFER
+                            | MPI2_SGE_FLAGS_END_OF_LIST) <<
+                            MPI2_SGE_FLAGS_SHIFT));
+                }
+        } else {
+                if (mpt->m_MPI25) {
+                        mptsas_ieee_sge_chain(mpt, cmd, frame, acc_hdl);
+                } else {
+                        mptsas_sge_chain(mpt, cmd, frame, acc_hdl);
+                }
+        }
+}
+
+/*
+ * Interrupt handling
+ * Utility routine.  Poll for status of a command sent to HBA
+ * without interrupts (a FLAG_NOINTR command).
+ */
+int
+mptsas_poll(mptsas_t *mpt, mptsas_cmd_t *poll_cmd, int polltime)
+{
+        int             rval = TRUE;
+        uint32_t        int_mask;
+
+        NDBG5(("mptsas_poll: cmd=0x%p, flags 0x%x", (void *)poll_cmd,
+            poll_cmd->cmd_flags));
+
+        /*
+         * Get the current interrupt mask and disable interrupts.  When
+         * re-enabling ints, set mask to saved value.
+         */
+        int_mask = ddi_get32(mpt->m_datap, &mpt->m_reg->HostInterruptMask);
+        MPTSAS_DISABLE_INTR(mpt);
+
+        mpt->m_polled_intr = 1;
+
+        if ((poll_cmd->cmd_flags & CFLAG_TM_CMD) == 0) {
+                mptsas_restart_hba(mpt);
+        }
+
+        /*
+         * Wait, using drv_usecwait(), long enough for the command to
+         * reasonably return from the target if the target isn't
+         * "dead".  A polled command may well be sent from scsi_poll, and
+         * there are retries built in to scsi_poll if the transport
+         * accepted the packet (TRAN_ACCEPT).  scsi_poll waits 1 second
+         * and retries the transport up to scsi_poll_busycnt times
+         * (currently 60) if
+         * 1. pkt_reason is CMD_INCOMPLETE and pkt_state is 0, or
+         * 2. pkt_reason is CMD_CMPLT and *pkt_scbp has STATUS_BUSY
+         *
+         * limit the waiting to avoid a hang in the event that the
+         * cmd never gets started but we are still receiving interrupts
+         */
+        while (!(poll_cmd->cmd_flags & CFLAG_FINISHED)) {
+                if (mptsas_wait_intr(mpt, polltime) == FALSE) {
+                        NDBG5(("mptsas_poll: command incomplete"));
+                        rval = FALSE;
+                        break;
+                }
+        }
+
+        if (rval == FALSE) {
+
+                /*
+                 * this isn't supposed to happen, the hba must be wedged
+                 * Mark this cmd as a timeout.
+                 */
+                mptsas_set_pkt_reason(mpt, poll_cmd, CMD_TIMEOUT,
+                    (STAT_TIMEOUT|STAT_ABORTED));
+
+                if (poll_cmd->cmd_queued == FALSE) {
+
+                        NDBG5(("mptsas_poll: not on waitq"));
+
+                        poll_cmd->cmd_pkt->pkt_state |=
                             (STATE_GOT_BUS|STATE_GOT_TARGET|STATE_SENT_CMD);
                 } else {
 
                         /* find and remove it from the waitq */
                         NDBG5(("mptsas_poll: delete from waitq"));
                         mptsas_waitq_delete(mpt, poll_cmd);
                 }
 
         }
         mptsas_fma_check(mpt, poll_cmd);
+
+        /*
+         * Clear polling flag, re-enable interrupts.
+         */
+        mpt->m_polled_intr = 0;
+        ddi_put32(mpt->m_datap, &mpt->m_reg->HostInterruptMask, int_mask);
+
+        /*
+         * If there are queued cmd, start them now.
+         */
+        if (mpt->m_waitq != NULL) {
+                mptsas_restart_waitq(mpt);
+        }
+
         NDBG5(("mptsas_poll: done"));
         return (rval);
 }
 
 /*
  * Used for polling cmds and TM function
  */
 static int
 mptsas_wait_intr(mptsas_t *mpt, int polltime)
 {
-        int                             cnt;
+        int                             cnt, rval = FALSE;
         pMpi2ReplyDescriptorsUnion_t    reply_desc_union;
-        uint32_t                        int_mask;
+        mptsas_reply_pqueue_t           *rpqp;
 
         NDBG5(("mptsas_wait_intr"));
-
-        mpt->m_polled_intr = 1;
+        ASSERT(mutex_owned(&mpt->m_mutex));
 
         /*
-         * Get the current interrupt mask and disable interrupts.  When
-         * re-enabling ints, set mask to saved value.
+         * Keep polling for at least (polltime * 1000) seconds
          */
-        int_mask = ddi_get32(mpt->m_datap, &mpt->m_reg->HostInterruptMask);
-        MPTSAS_DISABLE_INTR(mpt);
+        rpqp = mpt->m_rep_post_queues;
 
         /*
-         * Keep polling for at least (polltime * 1000) seconds
+         * Drop the main mutex and grab the mutex for reply queue 0
          */
+        mutex_exit(&mpt->m_mutex);
+        mutex_enter(&rpqp->rpq_mutex);
         for (cnt = 0; cnt < polltime; cnt++) {
                 (void) ddi_dma_sync(mpt->m_dma_post_queue_hdl, 0, 0,
                     DDI_DMA_SYNC_FORCPU);
 
+                /*
+                 * Polled requests should only come back through
+                 * the first interrupt.
+                 */
                 reply_desc_union = (pMpi2ReplyDescriptorsUnion_t)
-                    MPTSAS_GET_NEXT_REPLY(mpt, mpt->m_post_index);
+                    MPTSAS_GET_NEXT_REPLY(rpqp, rpqp->rpq_index);
 
                 if (ddi_get32(mpt->m_acc_post_queue_hdl,
                     &reply_desc_union->Words.Low) == 0xFFFFFFFF ||
                     ddi_get32(mpt->m_acc_post_queue_hdl,
                     &reply_desc_union->Words.High) == 0xFFFFFFFF) {

@@ -4670,52 +5337,51 @@
 
                 /*
                  * The reply is valid, process it according to its
                  * type.
                  */
-                mptsas_process_intr(mpt, reply_desc_union);
-
-                if (++mpt->m_post_index == mpt->m_post_queue_depth) {
-                        mpt->m_post_index = 0;
-                }
-
-                /*
-                 * Update the global reply index
-                 */
-                ddi_put32(mpt->m_datap,
-                    &mpt->m_reg->ReplyPostHostIndex, mpt->m_post_index);
-                mpt->m_polled_intr = 0;
+                mptsas_process_intr(mpt, rpqp, reply_desc_union);
 
                 /*
-                 * Re-enable interrupts and quit.
+                 * Clear the reply descriptor for re-use.
                  */
-                ddi_put32(mpt->m_datap, &mpt->m_reg->HostInterruptMask,
-                    int_mask);
-                return (TRUE);
+                ddi_put64(mpt->m_acc_post_queue_hdl,
+                    &((uint64_t *)(void *)rpqp->rpq_queue)[rpqp->rpq_index],
+                    0xFFFFFFFFFFFFFFFF);
+                (void) ddi_dma_sync(mpt->m_dma_post_queue_hdl, 0, 0,
+                    DDI_DMA_SYNC_FORDEV);
 
+                if (++rpqp->rpq_index == mpt->m_post_queue_depth) {
+                        rpqp->rpq_index = 0;
         }
 
         /*
-         * Clear polling flag, re-enable interrupts and quit.
+                 * Update the reply index
          */
-        mpt->m_polled_intr = 0;
-        ddi_put32(mpt->m_datap, &mpt->m_reg->HostInterruptMask, int_mask);
-        return (FALSE);
+                ddi_put32(mpt->m_datap,
+                    &mpt->m_reg->ReplyPostHostIndex, rpqp->rpq_index);
+                rval = TRUE;
+                break;
+        }
+
+        mutex_exit(&rpqp->rpq_mutex);
+        mutex_enter(&mpt->m_mutex);
+
+        return (rval);
 }
 
 static void
 mptsas_handle_scsi_io_success(mptsas_t *mpt,
+    mptsas_reply_pqueue_t *rpqp,
     pMpi2ReplyDescriptorsUnion_t reply_desc)
 {
         pMpi2SCSIIOSuccessReplyDescriptor_t     scsi_io_success;
         uint16_t                                SMID;
         mptsas_slots_t                          *slots = mpt->m_active;
         mptsas_cmd_t                            *cmd = NULL;
         struct scsi_pkt                         *pkt;
 
-        ASSERT(mutex_owned(&mpt->m_mutex));
-
         scsi_io_success = (pMpi2SCSIIOSuccessReplyDescriptor_t)reply_desc;
         SMID = ddi_get16(mpt->m_acc_post_queue_hdl, &scsi_io_success->SMID);
 
         /*
          * This is a success reply so just complete the IO.  First, do a sanity

@@ -4737,10 +5403,11 @@
         if (cmd == NULL) {
                 mptsas_log(mpt, CE_WARN, "?NULL command for successful SCSI IO "
                     "in slot %d", SMID);
                 return;
         }
+        ASSERT(cmd->cmd_rpqidx == rpqp->rpq_num);
 
         pkt = CMD2PKT(cmd);
         pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET | STATE_SENT_CMD |
             STATE_GOT_STATUS);
         if (cmd->cmd_flags & CFLAG_DMAVALID) {

@@ -4750,12 +5417,34 @@
 
         if (cmd->cmd_flags & CFLAG_PASSTHRU) {
                 cmd->cmd_flags |= CFLAG_FINISHED;
                 cv_broadcast(&mpt->m_passthru_cv);
                 return;
-        } else {
+        }
+        if (!(cmd->cmd_flags & CFLAG_TM_CMD)) {
+                if (cmd->cmd_flags & CFLAG_CMDIOC) {
+                        mutex_enter(&mpt->m_mutex);
                 mptsas_remove_cmd(mpt, cmd);
+                        mutex_exit(&mpt->m_mutex);
+                } else {
+#ifdef MPTSAS_DEBUG
+                        /*
+                         * In order to test timeout for a command set
+                         * mptsas_test_timeout via mdb to avoid completion
+                         * processing here.
+                         */
+                        if (mptsas_test_timeout) {
+                                mptsas_test_timeout = 0;
+                                return;
+                        }
+#endif
+                        /*
+                         * This is the normal path, avoid grabbing
+                         * the m_mutex.
+                         */
+                        mptsas_remove_cmd_nomtx(mpt, cmd);
+                }
         }
 
         if (cmd->cmd_flags & CFLAG_RETRY) {
                 /*
                  * The target returned QFULL or busy, do not add tihs

@@ -4766,22 +5455,22 @@
                  * mptsas_handle_qfull() or in mptsas_check_scsi_io_error().
                  * Remove this cmd_flag here.
                  */
                 cmd->cmd_flags &= ~CFLAG_RETRY;
         } else {
-                mptsas_doneq_add(mpt, cmd);
+                mptsas_rpdoneq_add(mpt, rpqp, cmd);
         }
 }
 
 static void
 mptsas_handle_address_reply(mptsas_t *mpt,
     pMpi2ReplyDescriptorsUnion_t reply_desc)
 {
         pMpi2AddressReplyDescriptor_t   address_reply;
         pMPI2DefaultReply_t             reply;
         mptsas_fw_diagnostic_buffer_t   *pBuffer;
-        uint32_t                        reply_addr;
+        uint32_t                        reply_addr, reply_frame_dma_baseaddr;
         uint16_t                        SMID, iocstatus;
         mptsas_slots_t                  *slots = mpt->m_active;
         mptsas_cmd_t                    *cmd = NULL;
         uint8_t                         function, buffer_type;
         m_replyh_arg_t                  *args;

@@ -4796,27 +5485,31 @@
 
         /*
          * If reply frame is not in the proper range we should ignore this
          * message and exit the interrupt handler.
          */
-        if ((reply_addr < mpt->m_reply_frame_dma_addr) ||
-            (reply_addr >= (mpt->m_reply_frame_dma_addr +
+        reply_frame_dma_baseaddr = mpt->m_reply_frame_dma_addr & 0xfffffffful;
+        if ((reply_addr < reply_frame_dma_baseaddr) ||
+            (reply_addr >= (reply_frame_dma_baseaddr +
             (mpt->m_reply_frame_size * mpt->m_max_replies))) ||
-            ((reply_addr - mpt->m_reply_frame_dma_addr) %
+            ((reply_addr - reply_frame_dma_baseaddr) %
             mpt->m_reply_frame_size != 0)) {
                 mptsas_log(mpt, CE_WARN, "?Received invalid reply frame "
                     "address 0x%x\n", reply_addr);
                 ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
                 return;
         }
 
         (void) ddi_dma_sync(mpt->m_dma_reply_frame_hdl, 0, 0,
             DDI_DMA_SYNC_FORCPU);
         reply = (pMPI2DefaultReply_t)(mpt->m_reply_frame + (reply_addr -
-            mpt->m_reply_frame_dma_addr));
+            reply_frame_dma_baseaddr));
         function = ddi_get8(mpt->m_acc_reply_frame_hdl, &reply->Function);
 
+        NDBG31(("mptsas_handle_address_reply: function 0x%x, reply_addr=0x%x",
+            function, reply_addr));
+
         /*
          * don't get slot information and command for events since these values
          * don't exist
          */
         if ((function != MPI2_FUNCTION_EVENT_NOTIFICATION) &&

@@ -4841,13 +5534,12 @@
                 if (cmd == NULL) {
                         mptsas_log(mpt, CE_WARN, "?NULL command for address "
                             "reply in slot %d", SMID);
                         return;
                 }
-                if ((cmd->cmd_flags & CFLAG_PASSTHRU) ||
-                    (cmd->cmd_flags & CFLAG_CONFIG) ||
-                    (cmd->cmd_flags & CFLAG_FW_DIAG)) {
+                if ((cmd->cmd_flags &
+                    (CFLAG_PASSTHRU | CFLAG_CONFIG | CFLAG_FW_DIAG))) {
                         cmd->cmd_rfm = reply_addr;
                         cmd->cmd_flags |= CFLAG_FINISHED;
                         cv_broadcast(&mpt->m_passthru_cv);
                         cv_broadcast(&mpt->m_config_cv);
                         cv_broadcast(&mpt->m_fw_diag_cv);

@@ -4873,11 +5565,11 @@
         case MPI2_FUNCTION_FW_DOWNLOAD:
                 cmd->cmd_flags |= CFLAG_FINISHED;
                 cv_signal(&mpt->m_fw_cv);
                 break;
         case MPI2_FUNCTION_EVENT_NOTIFICATION:
-                reply_frame_no = (reply_addr - mpt->m_reply_frame_dma_addr) /
+                reply_frame_no = (reply_addr - reply_frame_dma_baseaddr) /
                     mpt->m_reply_frame_size;
                 args = &mpt->m_replyh_args[reply_frame_no];
                 args->mpt = (void *)mpt;
                 args->rfm = reply_addr;
 

@@ -4984,11 +5676,11 @@
         if (cmd->cmd_flags & CFLAG_FW_CMD)
                 return;
 
         if (cmd->cmd_flags & CFLAG_RETRY) {
                 /*
-                 * The target returned QFULL or busy, do not add tihs
+                 * The target returned QFULL or busy, do not add this
                  * pkt to the doneq since the hba will retry
                  * this cmd.
                  *
                  * The pkt has already been resubmitted in
                  * mptsas_handle_qfull() or in mptsas_check_scsi_io_error().

@@ -4998,45 +5690,51 @@
         } else {
                 mptsas_doneq_add(mpt, cmd);
         }
 }
 
+#ifdef MPTSAS_DEBUG
+static uint8_t mptsas_last_sense[256];
+#endif
+
 static void
 mptsas_check_scsi_io_error(mptsas_t *mpt, pMpi2SCSIIOReply_t reply,
     mptsas_cmd_t *cmd)
 {
         uint8_t                 scsi_status, scsi_state;
-        uint16_t                ioc_status;
+        uint16_t                ioc_status, cmd_rqs_len;
         uint32_t                xferred, sensecount, responsedata, loginfo = 0;
         struct scsi_pkt         *pkt;
         struct scsi_arq_status  *arqstat;
-        struct buf              *bp;
         mptsas_target_t         *ptgt = cmd->cmd_tgt_addr;
         uint8_t                 *sensedata = NULL;
-
-        if ((cmd->cmd_flags & (CFLAG_SCBEXTERN | CFLAG_EXTARQBUFVALID)) ==
-            (CFLAG_SCBEXTERN | CFLAG_EXTARQBUFVALID)) {
-                bp = cmd->cmd_ext_arq_buf;
-        } else {
-                bp = cmd->cmd_arq_buf;
-        }
+        uint64_t                sas_wwn;
+        uint8_t                 phy;
+        char                    wwn_str[MPTSAS_WWN_STRLEN];
 
         scsi_status = ddi_get8(mpt->m_acc_reply_frame_hdl, &reply->SCSIStatus);
         ioc_status = ddi_get16(mpt->m_acc_reply_frame_hdl, &reply->IOCStatus);
         scsi_state = ddi_get8(mpt->m_acc_reply_frame_hdl, &reply->SCSIState);
         xferred = ddi_get32(mpt->m_acc_reply_frame_hdl, &reply->TransferCount);
         sensecount = ddi_get32(mpt->m_acc_reply_frame_hdl, &reply->SenseCount);
         responsedata = ddi_get32(mpt->m_acc_reply_frame_hdl,
             &reply->ResponseInfo);
 
         if (ioc_status & MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) {
+                sas_wwn = ptgt->m_addr.mta_wwn;
+                phy = ptgt->m_phynum;
+                if (sas_wwn == 0) {
+                        (void) sprintf(wwn_str, "p%x", phy);
+                } else {
+                        (void) sprintf(wwn_str, "w%016"PRIx64, sas_wwn);
+                }
                 loginfo = ddi_get32(mpt->m_acc_reply_frame_hdl,
                     &reply->IOCLogInfo);
                 mptsas_log(mpt, CE_NOTE,
-                    "?Log info 0x%x received for target %d.\n"
+                    "?Log info 0x%x received for target %d %s.\n"
                     "\tscsi_status=0x%x, ioc_status=0x%x, scsi_state=0x%x",
-                    loginfo, Tgt(cmd), scsi_status, ioc_status,
+                    loginfo, Tgt(cmd), wwn_str, scsi_status, ioc_status,
                     scsi_state);
         }
 
         NDBG31(("\t\tscsi_status=0x%x, ioc_status=0x%x, scsi_state=0x%x",
             scsi_status, ioc_status, scsi_state));

@@ -5057,14 +5755,16 @@
         if ((scsi_state & MPI2_SCSI_STATE_NO_SCSI_STATUS) &&
             ((ioc_status & MPI2_IOCSTATUS_MASK) ==
             MPI2_IOCSTATUS_SCSI_DEVICE_NOT_THERE)) {
                 pkt->pkt_reason = CMD_INCOMPLETE;
                 pkt->pkt_state |= STATE_GOT_BUS;
+                mutex_enter(&ptgt->m_t_mutex);
                 if (ptgt->m_reset_delay == 0) {
                         mptsas_set_throttle(mpt, ptgt,
                             DRAIN_THROTTLE);
                 }
+                mutex_exit(&ptgt->m_t_mutex);
                 return;
         }
 
         if (scsi_state & MPI2_SCSI_STATE_RESPONSE_INFO_VALID) {
                 responsedata &= 0x000000FF;

@@ -5076,10 +5776,12 @@
         }
 
 
         switch (scsi_status) {
         case MPI2_SCSI_STATUS_CHECK_CONDITION:
+                (void) ddi_dma_sync(mpt->m_dma_req_sense_hdl, 0, 0,
+                    DDI_DMA_SYNC_FORCPU);
                 pkt->pkt_resid = (cmd->cmd_dmacount - xferred);
                 arqstat = (void*)(pkt->pkt_scbp);
                 arqstat->sts_rqpkt_status = *((struct scsi_status *)
                     (pkt->pkt_scbp));
                 pkt->pkt_state |= (STATE_GOT_BUS | STATE_GOT_TARGET |

@@ -5093,15 +5795,23 @@
                 arqstat->sts_rqpkt_reason = pkt->pkt_reason;
                 arqstat->sts_rqpkt_state  = pkt->pkt_state;
                 arqstat->sts_rqpkt_state |= STATE_XFERRED_DATA;
                 arqstat->sts_rqpkt_statistics = pkt->pkt_statistics;
                 sensedata = (uint8_t *)&arqstat->sts_sensedata;
-
-                bcopy((uchar_t *)bp->b_un.b_addr, sensedata,
+#ifdef MPTSAS_DEBUG
+                bcopy((uchar_t *)cmd->cmd_arq_buf, mptsas_last_sense,
+                    cmd->cmd_rqslen);
+#endif
+                if (cmd->cmd_extrqslen != 0) {
+                        cmd_rqs_len = cmd->cmd_extrqslen;
+                } else {
+                        cmd_rqs_len = cmd->cmd_rqslen;
+                }
+                bcopy((uchar_t *)cmd->cmd_arq_buf, sensedata,
                     ((cmd->cmd_rqslen >= sensecount) ? sensecount :
-                    cmd->cmd_rqslen));
-                arqstat->sts_rqpkt_resid = (cmd->cmd_rqslen - sensecount);
+                    cmd_rqs_len));
+                arqstat->sts_rqpkt_resid = (cmd_rqs_len - sensecount);
                 cmd->cmd_flags |= CFLAG_CMDARQ;
                 /*
                  * Set proper status for pkt if autosense was valid
                  */
                 if (scsi_state & MPI2_SCSI_STATE_AUTOSENSE_VALID) {

@@ -5149,10 +5859,12 @@
 
                         if ((ddi_taskq_dispatch(mpt->m_dr_taskq,
                             mptsas_handle_dr,
                             (void *)topo_node,
                             DDI_NOSLEEP)) != DDI_SUCCESS) {
+                                kmem_free(topo_node,
+                                    sizeof (mptsas_topo_change_list_t));
                                 mptsas_log(mpt, CE_NOTE, "mptsas start taskq"
                                     "for handle SAS dynamic reconfigure"
                                     "failed. \n");
                         }
                 }

@@ -5160,13 +5872,15 @@
         case MPI2_SCSI_STATUS_GOOD:
                 switch (ioc_status & MPI2_IOCSTATUS_MASK) {
                 case MPI2_IOCSTATUS_SCSI_DEVICE_NOT_THERE:
                         pkt->pkt_reason = CMD_DEV_GONE;
                         pkt->pkt_state |= STATE_GOT_BUS;
+                        mutex_enter(&ptgt->m_t_mutex);
                         if (ptgt->m_reset_delay == 0) {
                                 mptsas_set_throttle(mpt, ptgt, DRAIN_THROTTLE);
                         }
+                        mutex_exit(&ptgt->m_t_mutex);
                         NDBG31(("lost disk for target%d, command:%x",
                             Tgt(cmd), pkt->pkt_cdbp[0]));
                         break;
                 case MPI2_IOCSTATUS_SCSI_DATA_OVERRUN:
                         NDBG31(("data overrun: xferred=%d", xferred));

@@ -5187,12 +5901,22 @@
                         if (pkt->pkt_resid != cmd->cmd_dmacount) {
                                 pkt->pkt_state |= STATE_XFERRED_DATA;
                         }
                         break;
                 case MPI2_IOCSTATUS_SCSI_TASK_TERMINATED:
-                        mptsas_set_pkt_reason(mpt,
-                            cmd, CMD_RESET, STAT_BUS_RESET);
+                        if (cmd->cmd_active_expiration <= gethrtime()) {
+                                /*
+                                 * When timeout requested, propagate
+                                 * proper reason and statistics to
+                                 * target drivers.
+                                 */
+                                mptsas_set_pkt_reason(mpt, cmd, CMD_TIMEOUT,
+                                    STAT_BUS_RESET | STAT_TIMEOUT);
+                        } else {
+                                mptsas_set_pkt_reason(mpt, cmd, CMD_RESET,
+                                    STAT_BUS_RESET);
+                        }
                         break;
                 case MPI2_IOCSTATUS_SCSI_IOC_TERMINATED:
                 case MPI2_IOCSTATUS_SCSI_EXT_TERMINATED:
                         mptsas_set_pkt_reason(mpt,
                             cmd, CMD_RESET, STAT_DEV_RESET);

@@ -5208,20 +5932,18 @@
                         /*
                          * set throttles to drain
                          */
                         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
                             ptgt = refhash_next(mpt->m_targets, ptgt)) {
-                                mptsas_set_throttle(mpt, ptgt, DRAIN_THROTTLE);
+                                mptsas_set_throttle_mtx(mpt, ptgt,
+                                    DRAIN_THROTTLE);
                         }
 
                         /*
                          * retry command
                          */
-                        cmd->cmd_flags |= CFLAG_RETRY;
-                        cmd->cmd_pkt_flags |= FLAG_HEAD;
-
-                        (void) mptsas_accept_pkt(mpt, cmd);
+                        mptsas_retry_pkt(mpt, cmd);
                         break;
                 default:
                         mptsas_log(mpt, CE_WARN,
                             "unknown ioc_status = %x\n", ioc_status);
                         mptsas_log(mpt, CE_CONT, "scsi_state = %x, transfer "

@@ -5301,21 +6023,21 @@
                 break;
         }
 }
 
 static void
-mptsas_doneq_thread(mptsas_doneq_thread_arg_t *arg)
+mptsas_doneq_thread(mptsas_thread_arg_t *arg)
 {
         mptsas_t                        *mpt = arg->mpt;
-        uint64_t                        t = arg->t;
+        uint32_t                        t = arg->t;
         mptsas_cmd_t                    *cmd;
         struct scsi_pkt                 *pkt;
         mptsas_doneq_thread_list_t      *item = &mpt->m_doneq_thread_id[t];
 
         mutex_enter(&item->mutex);
         while (item->flag & MPTSAS_DONEQ_THREAD_ACTIVE) {
-                if (!item->doneq) {
+                if (!item->dlist.dl_q) {
                         cv_wait(&item->cv, &item->mutex);
                 }
                 pkt = NULL;
                 if ((cmd = mptsas_doneq_thread_rm(mpt, t)) != NULL) {
                         cmd->cmd_flags |= CFLAG_COMPLETED;

@@ -5326,178 +6048,277 @@
                         mptsas_pkt_comp(pkt, cmd);
                 }
                 mutex_enter(&item->mutex);
         }
         mutex_exit(&item->mutex);
-        mutex_enter(&mpt->m_doneq_mutex);
+        mutex_enter(&mpt->m_qthread_mutex);
         mpt->m_doneq_thread_n--;
-        cv_broadcast(&mpt->m_doneq_thread_cv);
-        mutex_exit(&mpt->m_doneq_mutex);
+        cv_broadcast(&mpt->m_qthread_cv);
+        mutex_exit(&mpt->m_qthread_mutex);
 }
 
 
 /*
  * mpt interrupt handler.
  */
 static uint_t
 mptsas_intr(caddr_t arg1, caddr_t arg2)
 {
         mptsas_t                        *mpt = (void *)arg1;
+        mptsas_reply_pqueue_t           *rpqp;
+        int                             reply_q = (int)(uintptr_t)arg2;
         pMpi2ReplyDescriptorsUnion_t    reply_desc_union;
-        uchar_t                         did_reply = FALSE;
+        int                             found = 0, i, rpqidx;
+        size_t                          dma_sync_len;
+        off_t                           dma_sync_offset;
+        uint32_t                        istat;
 
-        NDBG1(("mptsas_intr: arg1 0x%p arg2 0x%p", (void *)arg1, (void *)arg2));
+        NDBG18(("mptsas_intr: arg1 0x%p reply_q 0x%d", (void *)arg1, reply_q));
 
-        mutex_enter(&mpt->m_mutex);
+        rpqp = &mpt->m_rep_post_queues[reply_q];
 
         /*
          * If interrupts are shared by two channels then check whether this
          * interrupt is genuinely for this channel by making sure first the
          * chip is in high power state.
          */
         if ((mpt->m_options & MPTSAS_OPT_PM) &&
             (mpt->m_power_level != PM_LEVEL_D0)) {
-                mutex_exit(&mpt->m_mutex);
+                mpt->m_unclaimed_pm_interrupt_count++;
                 return (DDI_INTR_UNCLAIMED);
         }
 
+        istat = MPTSAS_GET_ISTAT(mpt);
+        if (!(istat & MPI2_HIS_REPLY_DESCRIPTOR_INTERRUPT)) {
+                NDBG18(("Interrupt bit not set, istat 0x%x", istat));
+                mpt->m_unclaimed_no_interrupt_count++;
+                /*
+                 * Really need a good definition of when this is valid.
+                 * It appears not to be if you have multiple reply post
+                 * queues, there may be a better way - need LSI info.
+                 * For now just count them.
+                 */
+#if 0
+                return (DDI_INTR_UNCLAIMED);
+#endif
+        }
+
         /*
          * If polling, interrupt was triggered by some shared interrupt because
          * IOC interrupts are disabled during polling, so polling routine will
          * handle any replies.  Considering this, if polling is happening,
          * return with interrupt unclaimed.
          */
         if (mpt->m_polled_intr) {
-                mutex_exit(&mpt->m_mutex);
-                mptsas_log(mpt, CE_WARN, "mpt_sas: Unclaimed interrupt");
+                mptsas_log(mpt, CE_WARN,
+                    "Unclaimed interrupt, rpq %d (Polling), istat 0x%x",
+                    reply_q, istat);
+                mpt->m_unclaimed_polled_interrupt_count++;
                 return (DDI_INTR_UNCLAIMED);
         }
 
         /*
-         * Read the istat register.
+         * At the moment this is the only place the mutex is grabbed.
+         * So it should never fail!
          */
-        if ((INTPENDING(mpt)) != 0) {
+        if (mutex_tryenter(&rpqp->rpq_mutex) == 0) {
+                mutex_enter(&rpqp->rpq_mutex);
+                rpqp->rpq_intr_mutexbusy++;
+        }
+
+        dma_sync_len = mpt->m_post_queue_depth * 8;
+        dma_sync_offset = dma_sync_len * reply_q;
+        (void) ddi_dma_sync(mpt->m_dma_post_queue_hdl,
+            dma_sync_offset, dma_sync_len, DDI_DMA_SYNC_FORCPU);
+
                 /*
-                 * read fifo until empty.
+         * Go around the reply queue and process each descriptor until
+         * we get to the next unused one.
+         * It seems to be an occupational hazard that we get interrupts
+         * with nothing to do. These are counted below.
                  */
+        rpqidx = rpqp->rpq_index;
 #ifndef __lock_lint
                 _NOTE(CONSTCOND)
 #endif
                 while (TRUE) {
-                        (void) ddi_dma_sync(mpt->m_dma_post_queue_hdl, 0, 0,
-                            DDI_DMA_SYNC_FORCPU);
                         reply_desc_union = (pMpi2ReplyDescriptorsUnion_t)
-                            MPTSAS_GET_NEXT_REPLY(mpt, mpt->m_post_index);
+                    MPTSAS_GET_NEXT_REPLY(rpqp, rpqidx);
 
                         if (ddi_get32(mpt->m_acc_post_queue_hdl,
                             &reply_desc_union->Words.Low) == 0xFFFFFFFF ||
                             ddi_get32(mpt->m_acc_post_queue_hdl,
                             &reply_desc_union->Words.High) == 0xFFFFFFFF) {
                                 break;
                         }
 
+                found++;
+
+                ASSERT(ddi_get8(mpt->m_acc_post_queue_hdl,
+                    &reply_desc_union->Default.MSIxIndex) == reply_q);
+
                         /*
-                         * The reply is valid, process it according to its
-                         * type.  Also, set a flag for updating the reply index
-                         * after they've all been processed.
+                 * Process it according to its type.
                          */
-                        did_reply = TRUE;
+                mptsas_process_intr(mpt, rpqp, reply_desc_union);
 
-                        mptsas_process_intr(mpt, reply_desc_union);
+                /*
+                 * Clear the reply descriptor for re-use.
+                 */
+                ddi_put64(mpt->m_acc_post_queue_hdl,
+                    &((uint64_t *)(void *)rpqp->rpq_queue)[rpqidx],
+                    0xFFFFFFFFFFFFFFFF);
 
                         /*
                          * Increment post index and roll over if needed.
                          */
-                        if (++mpt->m_post_index == mpt->m_post_queue_depth) {
-                                mpt->m_post_index = 0;
+                if (++rpqidx == mpt->m_post_queue_depth) {
+                        rpqidx = 0;
+                }
                         }
+
+        if (found == 0) {
+                rpqp->rpq_intr_unclaimed++;
+                mutex_exit(&rpqp->rpq_mutex);
+                mpt->m_unclaimed_nocmd_interrupt_count++;
+                return (DDI_INTR_UNCLAIMED);
                 }
+        rpqp->rpq_index = rpqidx;
 
+        rpqp->rpq_intr_count++;
+        NDBG18(("mptsas_intr complete(%d), did %d loops", reply_q, found));
+
+        (void) ddi_dma_sync(mpt->m_dma_post_queue_hdl,
+            dma_sync_offset, dma_sync_len, DDI_DMA_SYNC_FORDEV);
+
+        mpt->m_interrupt_count++;
+
+        /*
+         * Update the reply index if at least one reply was processed.
+         * For more than 8 reply queues on SAS3 controllers we have to do
+         * things a little different. See Chapter 20 in the MPI 2.5 spec.
+         */
+        if (mpt->m_post_reply_qcount > 8) {
                 /*
-                 * Update the global reply index if at least one reply was
-                 * processed.
+                 * The offsets from the base are multiples of 0x10.
+                 * We are indexing into 32 bit quantities so calculate
+                 * the index for that.
                  */
-                if (did_reply) {
+                i = (reply_q&~0x7) >> 1;
                         ddi_put32(mpt->m_datap,
-                            &mpt->m_reg->ReplyPostHostIndex, mpt->m_post_index);
-                }
+                    &mpt->m_reg->SuppReplyPostHostIndex[i],
+                    rpqp->rpq_index |
+                    ((reply_q&0x7)<<MPI2_RPHI_MSIX_INDEX_SHIFT));
+                (void) ddi_get32(mpt->m_datap,
+                    &mpt->m_reg->SuppReplyPostHostIndex[i]);
         } else {
-                mutex_exit(&mpt->m_mutex);
-                return (DDI_INTR_UNCLAIMED);
+                ddi_put32(mpt->m_datap,
+                    &mpt->m_reg->ReplyPostHostIndex,
+                    rpqp->rpq_index | (reply_q<<MPI2_RPHI_MSIX_INDEX_SHIFT));
+                (void) ddi_get32(mpt->m_datap,
+                    &mpt->m_reg->ReplyPostHostIndex);
         }
-        NDBG1(("mptsas_intr complete"));
 
         /*
          * If no helper threads are created, process the doneq in ISR. If
          * helpers are created, use the doneq length as a metric to measure the
          * load on the interrupt CPU. If it is long enough, which indicates the
          * load is heavy, then we deliver the IO completions to the helpers.
          * This measurement has some limitations, although it is simple and
          * straightforward and works well for most of the cases at present.
+         * To always use the threads set mptsas_doneq_length_threshold_prop
+         * to zero in the mpt_sas3.conf file.
+         *
+         * Check the current reply queue done queue.
+         */
+        if (rpqp->rpq_dlist.dl_len) {
+                if (!mpt->m_doneq_thread_n ||
+                    (rpqp->rpq_dlist.dl_len <= mpt->m_doneq_length_threshold)) {
+                        mptsas_rpdoneq_empty(rpqp);
+                } else {
+                        mptsas_deliver_doneq_thread(mpt, &rpqp->rpq_dlist);
+                }
+        }
+
+        mutex_exit(&rpqp->rpq_mutex);
+
+        /*
+         * Check the main done queue. If we find something
+         * grab the mutex and check again before processing.
          */
+        if (mpt->m_dlist.dl_len) {
+                mutex_enter(&mpt->m_mutex);
+                if (mpt->m_dlist.dl_len) {
         if (!mpt->m_doneq_thread_n ||
-            (mpt->m_doneq_len <= mpt->m_doneq_length_threshold)) {
+                            (mpt->m_dlist.dl_len <=
+                            mpt->m_doneq_length_threshold)) {
                 mptsas_doneq_empty(mpt);
         } else {
-                mptsas_deliver_doneq_thread(mpt);
+                                mptsas_deliver_doneq_thread(mpt, &mpt->m_dlist);
+                        }
+                }
+                mutex_exit(&mpt->m_mutex);
         }
 
         /*
          * If there are queued cmd, start them now.
          */
         if (mpt->m_waitq != NULL) {
+                mutex_enter(&mpt->m_mutex);
+                if (mpt->m_waitq != NULL && mpt->m_polled_intr == 0) {
                 mptsas_restart_waitq(mpt);
         }
-
         mutex_exit(&mpt->m_mutex);
+        }
         return (DDI_INTR_CLAIMED);
 }
 
 static void
-mptsas_process_intr(mptsas_t *mpt,
+mptsas_process_intr(mptsas_t *mpt, mptsas_reply_pqueue_t *rpqp,
     pMpi2ReplyDescriptorsUnion_t reply_desc_union)
 {
         uint8_t reply_type;
 
-        ASSERT(mutex_owned(&mpt->m_mutex));
+        /*
+         * Should get here with the reply queue mutex held, but not
+         * the main mpt mutex. Want to avoid grabbing that during
+         * normal operations if possible.
+         */
+        ASSERT(mutex_owned(&rpqp->rpq_mutex));
 
         /*
          * The reply is valid, process it according to its
          * type.  Also, set a flag for updated the reply index
          * after they've all been processed.
          */
         reply_type = ddi_get8(mpt->m_acc_post_queue_hdl,
             &reply_desc_union->Default.ReplyFlags);
+        NDBG18(("mptsas_process_intr(rpq %d) reply_type 0x%x", rpqp->rpq_num,
+            reply_type));
         reply_type &= MPI2_RPY_DESCRIPT_FLAGS_TYPE_MASK;
-        if (reply_type == MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS) {
-                mptsas_handle_scsi_io_success(mpt, reply_desc_union);
+        if (reply_type == MPI2_RPY_DESCRIPT_FLAGS_SCSI_IO_SUCCESS ||
+            reply_type == MPI25_RPY_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO_SUCCESS) {
+                mptsas_handle_scsi_io_success(mpt, rpqp, reply_desc_union);
         } else if (reply_type == MPI2_RPY_DESCRIPT_FLAGS_ADDRESS_REPLY) {
+                mutex_enter(&mpt->m_mutex);
                 mptsas_handle_address_reply(mpt, reply_desc_union);
+                mutex_exit(&mpt->m_mutex);
         } else {
                 mptsas_log(mpt, CE_WARN, "?Bad reply type %x", reply_type);
                 ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
         }
-
-        /*
-         * Clear the reply descriptor for re-use and increment
-         * index.
-         */
-        ddi_put64(mpt->m_acc_post_queue_hdl,
-            &((uint64_t *)(void *)mpt->m_post_queue)[mpt->m_post_index],
-            0xFFFFFFFFFFFFFFFF);
-        (void) ddi_dma_sync(mpt->m_dma_post_queue_hdl, 0, 0,
-            DDI_DMA_SYNC_FORDEV);
 }
 
 /*
  * handle qfull condition
  */
 static void
 mptsas_handle_qfull(mptsas_t *mpt, mptsas_cmd_t *cmd)
 {
         mptsas_target_t *ptgt = cmd->cmd_tgt_addr;
 
+        mutex_enter(&ptgt->m_t_mutex);
         if ((++cmd->cmd_qfull_retries > ptgt->m_qfull_retries) ||
             (ptgt->m_qfull_retries == 0)) {
                 /*
                  * We have exhausted the retries on QFULL, or,
                  * the target driver has indicated that it

@@ -5511,17 +6332,17 @@
         } else {
                 if (ptgt->m_reset_delay == 0) {
                         ptgt->m_t_throttle =
                             max((ptgt->m_t_ncmds - 2), 0);
                 }
+                mutex_exit(&ptgt->m_t_mutex);
 
-                cmd->cmd_pkt_flags |= FLAG_HEAD;
                 cmd->cmd_flags &= ~(CFLAG_TRANFLAG);
-                cmd->cmd_flags |= CFLAG_RETRY;
 
-                (void) mptsas_accept_pkt(mpt, cmd);
+                mptsas_retry_pkt(mpt, cmd);
 
+                mutex_enter(&ptgt->m_t_mutex);
                 /*
                  * when target gives queue full status with no commands
                  * outstanding (m_t_ncmds == 0), throttle is set to 0
                  * (HOLD_THROTTLE), and the queue full handling start
                  * (see psarc/1994/313); if there are commands outstanding,

@@ -5540,19 +6361,20 @@
                                     timeout(mptsas_restart_cmd, mpt,
                                     ptgt->m_qfull_retry_interval);
                         }
                 }
         }
+        mutex_exit(&ptgt->m_t_mutex);
 }
 
 mptsas_phymask_t
 mptsas_physport_to_phymask(mptsas_t *mpt, uint8_t physport)
 {
         mptsas_phymask_t        phy_mask = 0;
         uint8_t                 i = 0;
 
-        NDBG20(("mptsas%d physport_to_phymask enter", mpt->m_instance));
+        NDBG20(("mptsas3%d physport_to_phymask enter", mpt->m_instance));
 
         ASSERT(mutex_owned(&mpt->m_mutex));
 
         /*
          * If physport is 0xFF, this is a RAID volume.  Use phymask of 0.

@@ -5567,11 +6389,11 @@
                     (mpt->m_phy_info[i].port_num == physport)) {
                         phy_mask = mpt->m_phy_info[i].phy_mask;
                         break;
                 }
         }
-        NDBG20(("mptsas%d physport_to_phymask:physport :%x phymask :%x, ",
+        NDBG20(("mptsas3%d physport_to_phymask:physport :%x phymask :%x, ",
             mpt->m_instance, physport, phy_mask));
         return (phy_mask);
 }
 
 /*

@@ -5621,11 +6443,11 @@
         mptsas_phymask_t mask = 0, phy_mask;
         char            *phy_mask_name;
         uint8_t         current_port;
         int             i, j;
 
-        NDBG20(("mptsas%d update phymask ", mpt->m_instance));
+        NDBG20(("mptsas3%d update phymask ", mpt->m_instance));
 
         ASSERT(mutex_owned(&mpt->m_mutex));
 
         (void) mptsas_get_sas_io_unit_page(mpt);
 

@@ -5667,11 +6489,11 @@
                  */
                 (void) scsi_hba_iport_register(mpt->m_dip, phy_mask_name);
                 mutex_enter(&mpt->m_mutex);
         }
         kmem_free(phy_mask_name, MPTSAS_MAX_PHYS);
-        NDBG20(("mptsas%d update phymask return", mpt->m_instance));
+        NDBG20(("mptsas3%d update phymask return", mpt->m_instance));
 }
 
 /*
  * mptsas_handle_dr is a task handler for DR, the DR action includes:
  * 1. Directly attched Device Added/Removed.

@@ -5687,24 +6509,22 @@
         mptsas_topo_change_list_t       *topo_node = NULL;
         mptsas_topo_change_list_t       *save_node = NULL;
         mptsas_t                        *mpt;
         dev_info_t                      *parent = NULL;
         mptsas_phymask_t                phymask = 0;
-        char                            *phy_mask_name;
+        char                            phy_mask_name[MPTSAS_MAX_PHYS];
         uint8_t                         flags = 0, physport = 0xff;
         uint8_t                         port_update = 0;
         uint_t                          event;
 
         topo_node = (mptsas_topo_change_list_t *)args;
 
         mpt = topo_node->mpt;
         event = topo_node->event;
         flags = topo_node->flags;
 
-        phy_mask_name = kmem_zalloc(MPTSAS_MAX_PHYS, KM_SLEEP);
-
-        NDBG20(("mptsas%d handle_dr enter", mpt->m_instance));
+        NDBG20(("mptsas3%d handle_dr enter", mpt->m_instance));
 
         switch (event) {
         case MPTSAS_DR_EVENT_RECONFIG_TARGET:
                 if ((flags == MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE) ||
                     (flags == MPTSAS_TOPO_FLAG_EXPANDER_ATTACHED_DEVICE) ||

@@ -5742,43 +6562,43 @@
                 (void) mptsas_update_phymask(mpt);
                 mpt->m_port_chng = 0;
 
         }
         mutex_exit(&mpt->m_mutex);
+
         while (topo_node) {
                 phymask = 0;
+                flags = topo_node->flags;
+                event = topo_node->event;
+                if (event == MPTSAS_DR_EVENT_REMOVE_HANDLE) {
+                        goto handle_topo_change;
+                }
+                if ((event == MPTSAS_DR_EVENT_RECONFIG_TARGET) &&
+                    (flags == MPTSAS_TOPO_FLAG_RAID_PHYSDRV_ASSOCIATED)) {
+                        /*
+                         * There is no any field in IR_CONFIG_CHANGE
+                         * event indicate physport/phynum, let's get
+                         * parent after SAS Device Page0 request.
+                         */
+                        goto handle_topo_change;
+                }
+
                 if (parent == NULL) {
                         physport = topo_node->un.physport;
-                        event = topo_node->event;
-                        flags = topo_node->flags;
                         if (event & (MPTSAS_DR_EVENT_OFFLINE_TARGET |
                             MPTSAS_DR_EVENT_OFFLINE_SMP)) {
                                 /*
                                  * For all offline events, phymask is known
                                  */
                                 phymask = topo_node->un.phymask;
                                 goto find_parent;
                         }
-                        if (event & MPTSAS_TOPO_FLAG_REMOVE_HANDLE) {
-                                goto handle_topo_change;
-                        }
                         if (flags & MPTSAS_TOPO_FLAG_LUN_ASSOCIATED) {
                                 phymask = topo_node->un.phymask;
                                 goto find_parent;
                         }
 
-                        if ((flags ==
-                            MPTSAS_TOPO_FLAG_RAID_PHYSDRV_ASSOCIATED) &&
-                            (event == MPTSAS_DR_EVENT_RECONFIG_TARGET)) {
-                                /*
-                                 * There is no any field in IR_CONFIG_CHANGE
-                                 * event indicate physport/phynum, let's get
-                                 * parent after SAS Device Page0 request.
-                                 */
-                                goto handle_topo_change;
-                        }
-
                         mutex_enter(&mpt->m_mutex);
                         if (flags == MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE) {
                                 /*
                                  * If the direct attached device added or a
                                  * phys disk is being unhidden, argument

@@ -5790,12 +6610,11 @@
 
                         /*
                          * Translate physport to phymask so that we can search
                          * parent dip.
                          */
-                        phymask = mptsas_physport_to_phymask(mpt,
-                            physport);
+                        phymask = mptsas_physport_to_phymask(mpt, physport);
                         mutex_exit(&mpt->m_mutex);
 
 find_parent:
                         bzero(phy_mask_name, MPTSAS_MAX_PHYS);
                         /*

@@ -5804,35 +6623,38 @@
                          */
                         if (flags & MPTSAS_TOPO_FLAG_RAID_ASSOCIATED) {
                                 (void) sprintf(phy_mask_name, "v0");
                         } else {
                                 /*
-                                 * phymask can bo 0 if the drive has been
+                                 * phymask can be 0 if the drive has been
                                  * pulled by the time an add event is
                                  * processed.  If phymask is 0, just skip this
                                  * event and continue.
                                  */
                                 if (phymask == 0) {
-                                        mutex_enter(&mpt->m_mutex);
                                         save_node = topo_node;
                                         topo_node = topo_node->next;
                                         ASSERT(save_node);
                                         kmem_free(save_node,
                                             sizeof (mptsas_topo_change_list_t));
-                                        mutex_exit(&mpt->m_mutex);
-
                                         parent = NULL;
                                         continue;
                                 }
                                 (void) sprintf(phy_mask_name, "%x", phymask);
                         }
                         parent = scsi_hba_iport_find(mpt->m_dip,
                             phy_mask_name);
                         if (parent == NULL) {
                                 mptsas_log(mpt, CE_WARN, "Failed to find an "
-                                    "iport, should not happen!");
-                                goto out;
+                                    "iport for \"%s\", should not happen!",
+                                    phy_mask_name);
+                                save_node = topo_node;
+                                topo_node = topo_node->next;
+                                ASSERT(save_node);
+                                kmem_free(save_node,
+                                    sizeof (mptsas_topo_change_list_t));
+                                continue;
                         }
 
                 }
                 ASSERT(parent);
 handle_topo_change:

@@ -5844,15 +6666,15 @@
                  */
                 if (!mpt->m_in_reset)
                         mptsas_handle_topo_change(topo_node, parent);
                 else
                         NDBG20(("skipping topo change received during reset"));
+                mutex_exit(&mpt->m_mutex);
                 save_node = topo_node;
                 topo_node = topo_node->next;
                 ASSERT(save_node);
                 kmem_free(save_node, sizeof (mptsas_topo_change_list_t));
-                mutex_exit(&mpt->m_mutex);
 
                 if ((flags == MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE) ||
                     (flags == MPTSAS_TOPO_FLAG_RAID_PHYSDRV_ASSOCIATED) ||
                     (flags == MPTSAS_TOPO_FLAG_RAID_ASSOCIATED)) {
                         /*

@@ -5862,12 +6684,100 @@
                          * parent.  Also, reset parent if this is for RAID.
                          */
                         parent = NULL;
                 }
         }
-out:
-        kmem_free(phy_mask_name, MPTSAS_MAX_PHYS);
+}
+
+static void
+mptsas_offline_target(mptsas_t *mpt, mptsas_target_t *ptgt,
+    uint8_t topo_flags, dev_info_t *parent)
+{
+        uint64_t        sas_wwn = 0;
+        uint8_t         phy;
+        char            wwn_str[MPTSAS_WWN_STRLEN];
+        uint16_t        devhdl;
+        int             circ = 0, circ1 = 0;
+        int             rval = 0;
+
+        sas_wwn = ptgt->m_addr.mta_wwn;
+        phy = ptgt->m_phynum;
+        devhdl = ptgt->m_devhdl;
+
+        if (sas_wwn) {
+                (void) sprintf(wwn_str, "w%016"PRIx64, sas_wwn);
+        } else {
+                (void) sprintf(wwn_str, "p%x", phy);
+        }
+
+        /*
+         * Abort all outstanding command on the device
+         */
+        rval = mptsas_do_scsi_reset(mpt, devhdl);
+        if (rval) {
+                NDBG20(("mptsas3%d: mptsas_offline_target: reset target "
+                    "before offline devhdl:%x, phymask:%x, rval:%x",
+                    mpt->m_instance, ptgt->m_devhdl,
+                    ptgt->m_addr.mta_phymask, rval));
+        }
+
+        mutex_exit(&mpt->m_mutex);
+
+        ndi_devi_enter(scsi_vhci_dip, &circ);
+        ndi_devi_enter(parent, &circ1);
+        rval = mptsas_offline_targetdev(parent, wwn_str);
+        ndi_devi_exit(parent, circ1);
+        ndi_devi_exit(scsi_vhci_dip, circ);
+        NDBG20(("mptsas3%d: mptsas_offline_target %s devhdl:%x, "
+            "phymask:%x, rval:%x", mpt->m_instance, wwn_str,
+            ptgt->m_devhdl, ptgt->m_addr.mta_phymask, rval));
+
+        /*
+         * Clear parent's props for SMHBA support
+         */
+        if (topo_flags == MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE) {
+                if (ddi_prop_update_string(DDI_DEV_T_NONE, parent,
+                    SCSI_ADDR_PROP_ATTACHED_PORT, "") !=
+                    DDI_PROP_SUCCESS) {
+                        (void) ddi_prop_remove(DDI_DEV_T_NONE, parent,
+                            SCSI_ADDR_PROP_ATTACHED_PORT);
+                        mptsas_log(mpt, CE_WARN, "mptsas attached port "
+                            "prop update failed");
+                }
+                if (ddi_prop_update_int(DDI_DEV_T_NONE, parent,
+                    MPTSAS_NUM_PHYS, 0) != DDI_PROP_SUCCESS) {
+                        (void) ddi_prop_remove(DDI_DEV_T_NONE, parent,
+                            MPTSAS_NUM_PHYS);
+                        mptsas_log(mpt, CE_WARN, "mptsas num phys "
+                            "prop update failed");
+                }
+                if (ddi_prop_update_int(DDI_DEV_T_NONE, parent,
+                    MPTSAS_VIRTUAL_PORT, 1) != DDI_PROP_SUCCESS) {
+                        (void) ddi_prop_remove(DDI_DEV_T_NONE, parent,
+                            MPTSAS_VIRTUAL_PORT);
+                        mptsas_log(mpt, CE_WARN, "mptsas virtual port "
+                            "prop update failed");
+                }
+        }
+
+        mutex_enter(&mpt->m_mutex);
+        ptgt->m_led_status = 0;
+        (void) mptsas_flush_led_status(mpt, ptgt);
+        if (rval == DDI_SUCCESS) {
+                mutex_destroy(&ptgt->m_t_mutex);
+                refhash_remove(mpt->m_targets, ptgt);
+                ptgt = NULL;
+        } else {
+                /*
+                 * clean DR_INTRANSITION flag to allow I/O down to
+                 * PHCI driver since failover finished.
+                 * Invalidate the devhdl
+                 */
+                ptgt->m_devhdl = MPTSAS_INVALID_DEVHDL;
+                ptgt->m_tgt_unconfigured = 0;
+                ptgt->m_dr_flag = MPTSAS_DR_INACTIVE;
+        }
 }
 
 static void
 mptsas_handle_topo_change(mptsas_topo_change_list_t *topo_node,
     dev_info_t *parent)

@@ -5875,20 +6785,20 @@
         mptsas_target_t *ptgt = NULL;
         mptsas_smp_t    *psmp = NULL;
         mptsas_t        *mpt = (void *)topo_node->mpt;
         uint16_t        devhdl;
         uint16_t        attached_devhdl;
-        uint64_t        sas_wwn = 0;
         int             rval = 0;
         uint32_t        page_address;
-        uint8_t         phy, flags;
-        char            *addr = NULL;
+        uint8_t         flags;
         dev_info_t      *lundip;
         int             circ = 0, circ1 = 0;
         char            attached_wwnstr[MPTSAS_WWN_STRLEN];
 
-        NDBG20(("mptsas%d handle_topo_change enter", mpt->m_instance));
+        NDBG20(("mptsas3%d handle_topo_change enter, devhdl 0x%x,"
+            "event 0x%x, flags 0x%x", mpt->m_instance, topo_node->devhdl,
+            topo_node->event, topo_node->flags));
 
         ASSERT(mutex_owned(&mpt->m_mutex));
 
         switch (topo_node->event) {
         case MPTSAS_DR_EVENT_RECONFIG_TARGET:

@@ -6043,157 +6953,68 @@
                                         return;
                                 }
                         }
                 }
                 mutex_enter(&mpt->m_mutex);
-
-                NDBG20(("mptsas%d handle_topo_change to online devhdl:%x, "
-                    "phymask:%x.", mpt->m_instance, ptgt->m_devhdl,
-                    ptgt->m_addr.mta_phymask));
-                break;
-        }
-        case MPTSAS_DR_EVENT_OFFLINE_TARGET:
-        {
-                devhdl = topo_node->devhdl;
-                ptgt = refhash_linear_search(mpt->m_targets,
-                    mptsas_target_eval_devhdl, &devhdl);
-                if (ptgt == NULL)
-                        break;
-
-                sas_wwn = ptgt->m_addr.mta_wwn;
-                phy = ptgt->m_phynum;
-
-                addr = kmem_zalloc(SCSI_MAXNAMELEN, KM_SLEEP);
-
-                if (sas_wwn) {
-                        (void) sprintf(addr, "w%016"PRIx64, sas_wwn);
-                } else {
-                        (void) sprintf(addr, "p%x", phy);
-                }
-                ASSERT(ptgt->m_devhdl == devhdl);
-
-                if ((topo_node->flags == MPTSAS_TOPO_FLAG_RAID_ASSOCIATED) ||
-                    (topo_node->flags ==
-                    MPTSAS_TOPO_FLAG_RAID_PHYSDRV_ASSOCIATED)) {
-                        /*
-                         * Get latest RAID info if RAID volume status changes
-                         * or Phys Disk status changes
-                         */
-                        (void) mptsas_get_raid_info(mpt);
-                }
-                /*
-                 * Abort all outstanding command on the device
-                 */
-                rval = mptsas_do_scsi_reset(mpt, devhdl);
-                if (rval) {
-                        NDBG20(("mptsas%d handle_topo_change to reset target "
-                            "before offline devhdl:%x, phymask:%x, rval:%x",
-                            mpt->m_instance, ptgt->m_devhdl,
-                            ptgt->m_addr.mta_phymask, rval));
-                }
-
-                mutex_exit(&mpt->m_mutex);
-
-                ndi_devi_enter(scsi_vhci_dip, &circ);
-                ndi_devi_enter(parent, &circ1);
-                rval = mptsas_offline_target(parent, addr);
-                ndi_devi_exit(parent, circ1);
-                ndi_devi_exit(scsi_vhci_dip, circ);
-                NDBG20(("mptsas%d handle_topo_change to offline devhdl:%x, "
-                    "phymask:%x, rval:%x", mpt->m_instance,
-                    ptgt->m_devhdl, ptgt->m_addr.mta_phymask, rval));
-
-                kmem_free(addr, SCSI_MAXNAMELEN);
-
-                /*
-                 * Clear parent's props for SMHBA support
-                 */
-                flags = topo_node->flags;
-                if (flags == MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE) {
-                        bzero(attached_wwnstr, sizeof (attached_wwnstr));
-                        if (ddi_prop_update_string(DDI_DEV_T_NONE, parent,
-                            SCSI_ADDR_PROP_ATTACHED_PORT, attached_wwnstr) !=
-                            DDI_PROP_SUCCESS) {
-                                (void) ddi_prop_remove(DDI_DEV_T_NONE, parent,
-                                    SCSI_ADDR_PROP_ATTACHED_PORT);
-                                mptsas_log(mpt, CE_WARN, "mptsas attached port "
-                                    "prop update failed");
-                                break;
-                        }
-                        if (ddi_prop_update_int(DDI_DEV_T_NONE, parent,
-                            MPTSAS_NUM_PHYS, 0) !=
-                            DDI_PROP_SUCCESS) {
-                                (void) ddi_prop_remove(DDI_DEV_T_NONE, parent,
-                                    MPTSAS_NUM_PHYS);
-                                mptsas_log(mpt, CE_WARN, "mptsas num phys "
-                                    "prop update failed");
-                                break;
-                        }
-                        if (ddi_prop_update_int(DDI_DEV_T_NONE, parent,
-                            MPTSAS_VIRTUAL_PORT, 1) !=
-                            DDI_PROP_SUCCESS) {
-                                (void) ddi_prop_remove(DDI_DEV_T_NONE, parent,
-                                    MPTSAS_VIRTUAL_PORT);
-                                mptsas_log(mpt, CE_WARN, "mptsas virtual port "
-                                    "prop update failed");
-                                break;
-                        }
-                }
-
-                mutex_enter(&mpt->m_mutex);
-                ptgt->m_led_status = 0;
-                (void) mptsas_flush_led_status(mpt, ptgt);
-                if (rval == DDI_SUCCESS) {
-                        refhash_remove(mpt->m_targets, ptgt);
-                        ptgt = NULL;
-                } else {
+
+                NDBG20(("mptsas3%d handle_topo_change to online devhdl:%x, "
+                    "phymask:%x.", mpt->m_instance, ptgt->m_devhdl,
+                    ptgt->m_addr.mta_phymask));
+                break;
+        }
+        case MPTSAS_DR_EVENT_OFFLINE_TARGET:
+        {
+                devhdl = topo_node->devhdl;
+                ptgt = refhash_linear_search(mpt->m_targets,
+                    mptsas_target_eval_devhdl, &devhdl);
+                if (ptgt == NULL)
+                        break;
+
+                ASSERT(ptgt->m_devhdl == devhdl);
+
+                if ((topo_node->flags == MPTSAS_TOPO_FLAG_RAID_ASSOCIATED) ||
+                    (topo_node->flags ==
+                    MPTSAS_TOPO_FLAG_RAID_PHYSDRV_ASSOCIATED)) {
                         /*
-                         * clean DR_INTRANSITION flag to allow I/O down to
-                         * PHCI driver since failover finished.
-                         * Invalidate the devhdl
+                         * Get latest RAID info if RAID volume status changes
+                         * or Phys Disk status changes
                          */
-                        ptgt->m_devhdl = MPTSAS_INVALID_DEVHDL;
-                        ptgt->m_tgt_unconfigured = 0;
-                        mutex_enter(&mpt->m_tx_waitq_mutex);
-                        ptgt->m_dr_flag = MPTSAS_DR_INACTIVE;
-                        mutex_exit(&mpt->m_tx_waitq_mutex);
+                        (void) mptsas_get_raid_info(mpt);
                 }
 
+                mptsas_offline_target(mpt, ptgt, topo_node->flags, parent);
+
                 /*
                  * Send SAS IO Unit Control to free the dev handle
                  */
                 if ((flags == MPTSAS_TOPO_FLAG_DIRECT_ATTACHED_DEVICE) ||
                     (flags == MPTSAS_TOPO_FLAG_EXPANDER_ATTACHED_DEVICE)) {
                         rval = mptsas_free_devhdl(mpt, devhdl);
 
-                        NDBG20(("mptsas%d handle_topo_change to remove "
+                        NDBG20(("mptsas3%d handle_topo_change to remove "
                             "devhdl:%x, rval:%x", mpt->m_instance, devhdl,
                             rval));
                 }
 
                 break;
         }
-        case MPTSAS_TOPO_FLAG_REMOVE_HANDLE:
+        case MPTSAS_DR_EVENT_REMOVE_HANDLE:
         {
                 devhdl = topo_node->devhdl;
+
                 /*
-                 * If this is the remove handle event, do a reset first.
+                 * Do a reset first.
                  */
-                if (topo_node->event == MPTSAS_TOPO_FLAG_REMOVE_HANDLE) {
                         rval = mptsas_do_scsi_reset(mpt, devhdl);
-                        if (rval) {
                                 NDBG20(("mpt%d reset target before remove "
-                                    "devhdl:%x, rval:%x", mpt->m_instance,
-                                    devhdl, rval));
-                        }
-                }
+                    "devhdl:%x, rval:%x", mpt->m_instance, devhdl, rval));
 
                 /*
                  * Send SAS IO Unit Control to free the dev handle
                  */
                 rval = mptsas_free_devhdl(mpt, devhdl);
-                NDBG20(("mptsas%d handle_topo_change to remove "
+                NDBG20(("mptsas3%d handle_topo_change to remove "
                     "devhdl:%x, rval:%x", mpt->m_instance, devhdl,
                     rval));
                 break;
         }
         case MPTSAS_DR_EVENT_RECONFIG_SMP:

@@ -6282,11 +7103,11 @@
                                 return;
                         }
                 }
 
                 mutex_enter(&mpt->m_mutex);
-                NDBG20(("mptsas%d handle_topo_change to remove devhdl:%x, "
+                NDBG20(("mptsas3%d handle_topo_change to remove devhdl:%x, "
                     "rval:%x", mpt->m_instance, psmp->m_devhdl, rval));
                 if (rval == DDI_SUCCESS) {
                         refhash_remove(mpt->m_smp_targets, psmp);
                 } else {
                         psmp->m_devhdl = MPTSAS_INVALID_DEVHDL;

@@ -6318,11 +7139,12 @@
         replyh_arg = (m_replyh_arg_t *)args;
         rfm = replyh_arg->rfm;
         mpt = replyh_arg->mpt;
 
         eventreply = (pMpi2EventNotificationReply_t)
-            (mpt->m_reply_frame + (rfm - mpt->m_reply_frame_dma_addr));
+            (mpt->m_reply_frame + (rfm -
+            (mpt->m_reply_frame_dma_addr&0xfffffffful)));
         event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event);
 
 
         /*
          * Generate a system event to let anyone who cares know that a

@@ -6406,25 +7228,28 @@
         mpt = replyh_arg->mpt;
 
         ASSERT(mutex_owned(&mpt->m_mutex));
 
         eventreply = (pMpi2EventNotificationReply_t)
-            (mpt->m_reply_frame + (rfm - mpt->m_reply_frame_dma_addr));
+            (mpt->m_reply_frame + (rfm -
+            (mpt->m_reply_frame_dma_addr&0xfffffffful)));
         event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event);
 
         if (iocstatus = ddi_get16(mpt->m_acc_reply_frame_hdl,
             &eventreply->IOCStatus)) {
                 if (iocstatus == MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) {
                         mptsas_log(mpt, CE_WARN,
-                            "!mptsas_handle_event_sync: IOCStatus=0x%x, "
-                            "IOCLogInfo=0x%x", iocstatus,
+                            "!mptsas_handle_event_sync: event 0x%x, "
+                            "IOCStatus=0x%x, "
+                            "IOCLogInfo=0x%x", event, iocstatus,
                             ddi_get32(mpt->m_acc_reply_frame_hdl,
                             &eventreply->IOCLogInfo));
                 } else {
                         mptsas_log(mpt, CE_WARN,
-                            "mptsas_handle_event_sync: IOCStatus=0x%x, "
-                            "IOCLogInfo=0x%x", iocstatus,
+                            "mptsas_handle_event_sync: event 0x%x, "
+                            "IOCStatus=0x%x, "
+                            "(IOCLogInfo=0x%x)", event, iocstatus,
                             ddi_get32(mpt->m_acc_reply_frame_hdl,
                             &eventreply->IOCLogInfo));
                 }
         }
 

@@ -6555,11 +7380,11 @@
                         prev[0] = 0;
                         string[0] = 0;
                         switch (reason_code) {
                         case MPI2_EVENT_SAS_TOPO_RC_TARG_ADDED:
                         {
-                                NDBG20(("mptsas%d phy %d physical_port %d "
+                                NDBG20(("mptsas3%d phy %d physical_port %d "
                                     "dev_handle %d added", mpt->m_instance, phy,
                                     physport, dev_handle));
                                 link_rate = ddi_get8(mpt->m_acc_reply_frame_hdl,
                                     &sas_topo_change_list->PHY[i].LinkRate);
                                 state = (link_rate &

@@ -6591,10 +7416,14 @@
                                         break;
                                 case MPI2_EVENT_SAS_TOPO_LR_RATE_6_0:
                                         (void) sprintf(curr, "is online at 6.0 "
                                             "Gbps");
                                         break;
+                                case MPI25_EVENT_SAS_TOPO_LR_RATE_12_0:
+                                        (void) sprintf(curr,
+                                            "is online at 12.0 Gbps");
+                                        break;
                                 default:
                                         (void) sprintf(curr, "state is "
                                             "unknown");
                                         break;
                                 }

@@ -6637,11 +7466,11 @@
                                 }
                                 break;
                         }
                         case MPI2_EVENT_SAS_TOPO_RC_TARG_NOT_RESPONDING:
                         {
-                                NDBG20(("mptsas%d phy %d physical_port %d "
+                                NDBG20(("mptsas3%d phy %d physical_port %d "
                                     "dev_handle %d removed", mpt->m_instance,
                                     phy, physport, dev_handle));
                                 /*
                                  * Set association flag according to if an
                                  * expander is used or not.

@@ -6668,20 +7497,20 @@
                                  * the hash table because the add event will
                                  * have an invalid phymask.  BUT, this does not
                                  * mean that the DevHandle is invalid.  The
                                  * controller will still have a valid DevHandle
                                  * that must be removed.  To do this, use the
-                                 * MPTSAS_TOPO_FLAG_REMOVE_HANDLE event.
+                                 * MPTSAS_DR_EVENT_REMOVE_HANDLE event.
                                  */
                                 if (ptgt == NULL) {
                                         topo_node = kmem_zalloc(
                                             sizeof (mptsas_topo_change_list_t),
                                             KM_SLEEP);
                                         topo_node->mpt = mpt;
                                         topo_node->un.phymask = 0;
                                         topo_node->event =
-                                            MPTSAS_TOPO_FLAG_REMOVE_HANDLE;
+                                            MPTSAS_DR_EVENT_REMOVE_HANDLE;
                                         topo_node->devhdl = dev_handle;
                                         topo_node->flags = flags;
                                         topo_node->object = NULL;
                                         if (topo_head == NULL) {
                                                 topo_head = topo_tail =

@@ -6693,20 +7522,16 @@
                                         break;
                                 }
 
                                 /*
                                  * Update DR flag immediately avoid I/O failure
-                                 * before failover finish. Pay attention to the
-                                 * mutex protect, we need grab m_tx_waitq_mutex
-                                 * during set m_dr_flag because we won't add
-                                 * the following command into waitq, instead,
+                                 * before failover finish. We won't add
+                                 * any following commands into waitq, instead,
                                  * we need return TRAN_BUSY in the tran_start
                                  * context.
                                  */
-                                mutex_enter(&mpt->m_tx_waitq_mutex);
                                 ptgt->m_dr_flag = MPTSAS_DR_INTRANSITION;
-                                mutex_exit(&mpt->m_tx_waitq_mutex);
 
                                 topo_node = kmem_zalloc(
                                     sizeof (mptsas_topo_change_list_t),
                                     KM_SLEEP);
                                 topo_node->mpt = mpt;

@@ -6794,10 +7619,22 @@
                                         mptsas_smhba_log_sysevent(mpt,
                                             ESC_SAS_PHY_EVENT,
                                             SAS_PHY_ONLINE,
                                             &mpt->m_phy_info[i].smhba_info);
                                         break;
+                                case MPI25_EVENT_SAS_TOPO_LR_RATE_12_0:
+                                        (void) sprintf(curr, "is online at "
+                                            "12.0 Gbps");
+                                        if ((expd_handle == 0) &&
+                                            (enc_handle == 1)) {
+                                                mpt->m_port_chng = 1;
+                                        }
+                                        mptsas_smhba_log_sysevent(mpt,
+                                            ESC_SAS_PHY_EVENT,
+                                            SAS_PHY_ONLINE,
+                                            &mpt->m_phy_info[i].smhba_info);
+                                        break;
                                 default:
                                         (void) sprintf(curr, "state is "
                                             "unknown");
                                         break;
                                 }

@@ -6831,10 +7668,14 @@
                                         break;
                                 case MPI2_EVENT_SAS_TOPO_LR_RATE_6_0:
                                         (void) sprintf(prev, ", was online at "
                                             "6.0 Gbps");
                                         break;
+                                case MPI25_EVENT_SAS_TOPO_LR_RATE_12_0:
+                                        (void) sprintf(prev, ", was online at "
+                                            "12.0 Gbps");
+                                        break;
                                 default:
                                 break;
                                 }
                                 (void) sprintf(&string[strlen(string)], "link "
                                     "changed, ");

@@ -6845,21 +7686,27 @@
                                 (void) sprintf(&string[strlen(string)],
                                     "target not responding, delaying "
                                     "removal");
                                 break;
                         }
-                        NDBG20(("mptsas%d phy %d DevHandle %x, %s%s%s\n",
+                        NDBG20(("mptsas3%d phy %d DevHandle %x, %s%s%s\n",
                             mpt->m_instance, phy, dev_handle, string, curr,
                             prev));
                 }
                 if (topo_head != NULL) {
                         /*
                          * Launch DR taskq to handle topology change
                          */
                         if ((ddi_taskq_dispatch(mpt->m_dr_taskq,
                             mptsas_handle_dr, (void *)topo_head,
                             DDI_NOSLEEP)) != DDI_SUCCESS) {
+                                while (topo_head != NULL) {
+                                        topo_node = topo_head;
+                                        topo_head = topo_head->next;
+                                        kmem_free(topo_node,
+                                            sizeof (mptsas_topo_change_list_t));
+                                }
                                 mptsas_log(mpt, CE_NOTE, "mptsas start taskq "
                                     "for handle SAS DR event failed. \n");
                         }
                 }
                 break;

@@ -6877,11 +7724,11 @@
                 irChangeList = (pMpi2EventDataIrConfigChangeList_t)
                     eventreply->EventData;
                 num_entries = ddi_get8(mpt->m_acc_reply_frame_hdl,
                     &irChangeList->NumElements);
 
-                NDBG20(("mptsas%d IR_CONFIGURATION_CHANGE_LIST event received",
+                NDBG20(("mptsas3%d IR_CONFIGURATION_CHANGE_LIST event received",
                     mpt->m_instance));
 
                 for (i = 0; i < num_entries; i++) {
                         reason = ddi_get8(mpt->m_acc_reply_frame_hdl,
                             &irChangeList->ConfigElement[i].ReasonCode);

@@ -6933,13 +7780,11 @@
                                 (void) mptsas_delete_volume(mpt, volhandle);
 
                                 /*
                                  * Update DR flag immediately avoid I/O failure
                                  */
-                                mutex_enter(&mpt->m_tx_waitq_mutex);
                                 ptgt->m_dr_flag = MPTSAS_DR_INTRANSITION;
-                                mutex_exit(&mpt->m_tx_waitq_mutex);
 
                                 topo_node = kmem_zalloc(
                                     sizeof (mptsas_topo_change_list_t),
                                     KM_SLEEP);
                                 topo_node->mpt = mpt;

@@ -6968,13 +7813,11 @@
                                         break;
 
                                 /*
                                  * Update DR flag immediately avoid I/O failure
                                  */
-                                mutex_enter(&mpt->m_tx_waitq_mutex);
                                 ptgt->m_dr_flag = MPTSAS_DR_INTRANSITION;
-                                mutex_exit(&mpt->m_tx_waitq_mutex);
 
                                 topo_node = kmem_zalloc(
                                     sizeof (mptsas_topo_change_list_t),
                                     KM_SLEEP);
                                 topo_node->mpt = mpt;

@@ -7034,10 +7877,16 @@
                          * Launch DR taskq to handle topology change
                          */
                         if ((ddi_taskq_dispatch(mpt->m_dr_taskq,
                             mptsas_handle_dr, (void *)topo_head,
                             DDI_NOSLEEP)) != DDI_SUCCESS) {
+                                while (topo_head != NULL) {
+                                        topo_node = topo_head;
+                                        topo_head = topo_head->next;
+                                        kmem_free(topo_node,
+                                            sizeof (mptsas_topo_change_list_t));
+                                }
                                 mptsas_log(mpt, CE_NOTE, "mptsas start taskq "
                                     "for handle SAS DR event failed. \n");
                         }
                 }
                 break;

@@ -7076,11 +7925,12 @@
                 mutex_exit(&mpt->m_mutex);
                 return;
         }
 
         eventreply = (pMpi2EventNotificationReply_t)
-            (mpt->m_reply_frame + (rfm - mpt->m_reply_frame_dma_addr));
+            (mpt->m_reply_frame + (rfm -
+            (mpt->m_reply_frame_dma_addr&0xfffffffful)));
         event = ddi_get16(mpt->m_acc_reply_frame_hdl, &eventreply->Event);
 
         if (iocstatus = ddi_get16(mpt->m_acc_reply_frame_hdl,
             &eventreply->IOCStatus)) {
                 if (iocstatus == MPI2_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) {

@@ -7109,14 +7959,14 @@
                     &eventreply->IOCLogInfo);
                 NDBG20(("mptsas %d log info %x received.\n", mpt->m_instance,
                     iocloginfo));
                 break;
         case MPI2_EVENT_STATE_CHANGE:
-                NDBG20(("mptsas%d state change.", mpt->m_instance));
+                NDBG20(("mptsas3%d state change.", mpt->m_instance));
                 break;
         case MPI2_EVENT_HARD_RESET_RECEIVED:
-                NDBG20(("mptsas%d event change.", mpt->m_instance));
+                NDBG20(("mptsas3%d event change.", mpt->m_instance));
                 break;
         case MPI2_EVENT_SAS_DISCOVERY:
         {
                 MPI2_EVENT_DATA_SAS_DISCOVERY   *sasdiscovery;
                 char                            string[80];

@@ -7149,19 +7999,19 @@
                     port, status));
 
                 break;
         }
         case MPI2_EVENT_EVENT_CHANGE:
-                NDBG20(("mptsas%d event change.", mpt->m_instance));
+                NDBG20(("mptsas3%d event change.", mpt->m_instance));
                 break;
         case MPI2_EVENT_TASK_SET_FULL:
         {
                 pMpi2EventDataTaskSetFull_t     taskfull;
 
                 taskfull = (pMpi2EventDataTaskSetFull_t)eventreply->EventData;
 
-                NDBG20(("TASK_SET_FULL received for mptsas%d, depth %d\n",
+                NDBG20(("TASK_SET_FULL received for mptsas3%d, depth %d\n",
                     mpt->m_instance,  ddi_get16(mpt->m_acc_reply_frame_hdl,
                     &taskfull->CurrentDepth)));
                 break;
         }
         case MPI2_EVENT_SAS_TOPOLOGY_CHANGE_LIST:

@@ -7191,12 +8041,13 @@
                         (void) sprintf(string, ", not responding");
                         break;
                 default:
                 break;
                 }
-                NDBG20(("mptsas%d ENCLOSURE STATUS CHANGE for enclosure %x%s\n",
-                    mpt->m_instance, ddi_get16(mpt->m_acc_reply_frame_hdl,
+                NDBG20(("mptsas3%d ENCLOSURE STATUS CHANGE for enclosure "
+                    "%x%s\n", mpt->m_instance,
+                    ddi_get16(mpt->m_acc_reply_frame_hdl,
                     &encstatus->EnclosureHandle), string));
                 break;
         }
 
         /*

@@ -7317,11 +8168,11 @@
                         default:
                                 (void) sprintf(reason_str, "unknown reason %x",
                                     rc);
                 }
 
-                NDBG20(("mptsas%d raid operational status: (%s)"
+                NDBG20(("mptsas3%d raid operational status: (%s)"
                     "\thandle(0x%04x), percent complete(%d)\n",
                     mpt->m_instance, reason_str, handle, percent));
                 break;
         }
         case MPI2_EVENT_SAS_BROADCAST_PRIMITIVE:

@@ -7386,16 +8237,16 @@
                             ESC_SAS_HBA_PORT_BROADCAST,
                             SAS_PORT_BROADCAST_D27_4,
                             &mpt->m_phy_info[phy_num].smhba_info);
                         break;
                 default:
-                        NDBG20(("mptsas%d: unknown BROADCAST PRIMITIVE"
+                        NDBG16(("mptsas3%d: unknown BROADCAST PRIMITIVE"
                             " %x received",
                             mpt->m_instance, primitive));
                         break;
                 }
-                NDBG20(("mptsas%d sas broadcast primitive: "
+                NDBG16(("mptsas3%d sas broadcast primitive: "
                     "\tprimitive(0x%04x), phy(%d) complete\n",
                     mpt->m_instance, primitive, phy_num));
                 break;
         }
         case MPI2_EVENT_IR_VOLUME:

@@ -7597,11 +8448,11 @@
                         break;
                 }
                 break;
         }
         default:
-                NDBG20(("mptsas%d: unknown event %x received",
+                NDBG20(("mptsas3%d: unknown event %x received",
                     mpt->m_instance, event));
                 break;
         }
 
         /*

@@ -7632,27 +8483,89 @@
 
         mpt->m_restart_cmd_timeid = 0;
 
         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
             ptgt = refhash_next(mpt->m_targets, ptgt)) {
+                mutex_enter(&ptgt->m_t_mutex);
                 if (ptgt->m_reset_delay == 0) {
                         if (ptgt->m_t_throttle == QFULL_THROTTLE) {
                                 mptsas_set_throttle(mpt, ptgt,
                                     MAX_THROTTLE);
                         }
                 }
+                mutex_exit(&ptgt->m_t_mutex);
         }
         mptsas_restart_hba(mpt);
         mutex_exit(&mpt->m_mutex);
 }
 
+/*
+ * Assume some checks have been done prior to calling this
+ * function so we don't need to consider taking the m_mutex.
+ */
+static void
+mptsas_remove_cmd_nomtx(mptsas_t *mpt, mptsas_cmd_t *cmd)
+{
+        int             slot;
+        mptsas_slots_t  *slots = mpt->m_active;
+        mptsas_target_t *ptgt = cmd->cmd_tgt_addr;
+
+        ASSERT(cmd != NULL);
+        ASSERT(cmd->cmd_queued == FALSE);
+        ASSERT((cmd->cmd_flags & CFLAG_CMDIOC) == 0);
+
+        slot = cmd->cmd_slot;
+
+        /*
+         * remove the cmd.
+         */
+        if (cmd == slots->m_slot[slot]) {
+                NDBG31(("mptsas_remove_cmd_nomtx: removing cmd=0x%p, flags "
+                    "0x%x", (void *)cmd, cmd->cmd_flags));
+                slots->m_slot[slot] = NULL;
+                ASSERT(mpt->m_ncmds != 0);
+                atomic_dec_32(&mpt->m_ncmds);
+                ASSERT(mpt->m_rep_post_queues[cmd->cmd_rpqidx].rpq_ncmds != 0);
+                atomic_dec_32(
+                    &mpt->m_rep_post_queues[cmd->cmd_rpqidx].rpq_ncmds);
+
+                /*
+                 * Decrement per target ncmds, we know this is not an
+                 * IOC cmd and it therefore has a target associated with it.
+                 */
+                mutex_enter(&ptgt->m_t_mutex);
+                ASSERT(ptgt->m_t_ncmds != 0);
+                ptgt->m_t_ncmds--;
+
+                /*
+                 * reset throttle if we just ran an untagged command
+                 * to a tagged target
+                 */
+                if ((ptgt->m_t_ncmds == 0) &&
+                    ((cmd->cmd_pkt_flags & FLAG_TAGMASK) == 0)) {
+                        mptsas_set_throttle(mpt, ptgt, MAX_THROTTLE);
+                }
+
+                /*
+                 * Remove this command from the active queue.
+                 */
+                if (cmd->cmd_active_expiration != 0) {
+                        TAILQ_REMOVE(&ptgt->m_active_cmdq, cmd,
+                            cmd_active_link);
+                        cmd->cmd_active_expiration = 0;
+                }
+                mutex_exit(&ptgt->m_t_mutex);
+        }
+
+        ASSERT(cmd != slots->m_slot[cmd->cmd_slot]);
+}
+
 void
 mptsas_remove_cmd(mptsas_t *mpt, mptsas_cmd_t *cmd)
 {
         int             slot;
         mptsas_slots_t  *slots = mpt->m_active;
-        int             t;
         mptsas_target_t *ptgt = cmd->cmd_tgt_addr;
 
         ASSERT(cmd != NULL);
         ASSERT(cmd->cmd_queued == FALSE);
 

@@ -7662,35 +8575,52 @@
          */
         if (cmd->cmd_flags & CFLAG_TM_CMD) {
                 return;
         }
 
-        t = Tgt(cmd);
         slot = cmd->cmd_slot;
 
         /*
          * remove the cmd.
          */
         if (cmd == slots->m_slot[slot]) {
-                NDBG31(("mptsas_remove_cmd: removing cmd=0x%p", (void *)cmd));
+                NDBG31(("mptsas_remove_cmd: removing cmd=0x%p, flags 0x%x",
+                    (void *)cmd, cmd->cmd_flags));
                 slots->m_slot[slot] = NULL;
-                mpt->m_ncmds--;
+                ASSERT(mpt->m_ncmds != 0);
+                atomic_dec_32(&mpt->m_ncmds);
+                ASSERT(mpt->m_rep_post_queues[cmd->cmd_rpqidx].rpq_ncmds != 0);
+                atomic_dec_32(
+                    &mpt->m_rep_post_queues[cmd->cmd_rpqidx].rpq_ncmds);
 
                 /*
                  * only decrement per target ncmds if command
                  * has a target associated with it.
                  */
                 if ((cmd->cmd_flags & CFLAG_CMDIOC) == 0) {
+                        mutex_enter(&ptgt->m_t_mutex);
+                        ASSERT(ptgt->m_t_ncmds != 0);
                         ptgt->m_t_ncmds--;
+
                         /*
                          * reset throttle if we just ran an untagged command
                          * to a tagged target
                          */
                         if ((ptgt->m_t_ncmds == 0) &&
                             ((cmd->cmd_pkt_flags & FLAG_TAGMASK) == 0)) {
                                 mptsas_set_throttle(mpt, ptgt, MAX_THROTTLE);
                         }
+
+                        /*
+                         * Remove this command from the active queue.
+                         */
+                        if (cmd->cmd_active_expiration != 0) {
+                                TAILQ_REMOVE(&ptgt->m_active_cmdq, cmd,
+                                    cmd_active_link);
+                                cmd->cmd_active_expiration = 0;
+                        }
+                        mutex_exit(&ptgt->m_t_mutex);
                 }
 
         }
 
         /*

@@ -7699,54 +8629,10 @@
         if (cmd->cmd_flags & CFLAG_CMDIOC) {
                 mptsas_return_to_pool(mpt, cmd);
                 return;
         }
 
-        /*
-         * Figure out what to set tag Q timeout for...
-         *
-         * Optimize: If we have duplicate's of same timeout
-         * we're using, then we'll use it again until we run
-         * out of duplicates.  This should be the normal case
-         * for block and raw I/O.
-         * If no duplicates, we have to scan through tag que and
-         * find the longest timeout value and use it.  This is
-         * going to take a while...
-         * Add 1 to m_n_normal to account for TM request.
-         */
-        if (cmd->cmd_pkt->pkt_time == ptgt->m_timebase) {
-                if (--(ptgt->m_dups) == 0) {
-                        if (ptgt->m_t_ncmds) {
-                                mptsas_cmd_t *ssp;
-                                uint_t n = 0;
-                                ushort_t nslots = (slots->m_n_normal + 1);
-                                ushort_t i;
-                                /*
-                                 * This crude check assumes we don't do
-                                 * this too often which seems reasonable
-                                 * for block and raw I/O.
-                                 */
-                                for (i = 0; i < nslots; i++) {
-                                        ssp = slots->m_slot[i];
-                                        if (ssp && (Tgt(ssp) == t) &&
-                                            (ssp->cmd_pkt->pkt_time > n)) {
-                                                n = ssp->cmd_pkt->pkt_time;
-                                                ptgt->m_dups = 1;
-                                        } else if (ssp && (Tgt(ssp) == t) &&
-                                            (ssp->cmd_pkt->pkt_time == n)) {
-                                                ptgt->m_dups++;
-                                        }
-                                }
-                                ptgt->m_timebase = n;
-                        } else {
-                                ptgt->m_dups = 0;
-                                ptgt->m_timebase = 0;
-                        }
-                }
-        }
-        ptgt->m_timeout = ptgt->m_timebase;
-
         ASSERT(cmd != slots->m_slot[cmd->cmd_slot]);
 }
 
 /*
  * accept all cmds on the tx_waitq if any and then

@@ -7760,15 +8646,11 @@
 static void
 mptsas_restart_hba(mptsas_t *mpt)
 {
         ASSERT(mutex_owned(&mpt->m_mutex));
 
-        mutex_enter(&mpt->m_tx_waitq_mutex);
-        if (mpt->m_tx_waitq) {
-                mptsas_accept_tx_waitq(mpt);
-        }
-        mutex_exit(&mpt->m_tx_waitq_mutex);
+        mptsas_accept_tx_waitqs(mpt);
         mptsas_restart_waitq(mpt);
 }
 
 /*
  * start a fresh request from the top of the device queue

@@ -7832,63 +8714,219 @@
                         cmd = next_cmd;
                         continue;
                 }
 
                 ptgt = cmd->cmd_tgt_addr;
-                if (ptgt && (ptgt->m_t_throttle == DRAIN_THROTTLE) &&
+                if (ptgt) {
+                        mutex_enter(&ptgt->m_t_mutex);
+                        if ((ptgt->m_t_throttle == DRAIN_THROTTLE) &&
                     (ptgt->m_t_ncmds == 0)) {
                         mptsas_set_throttle(mpt, ptgt, MAX_THROTTLE);
                 }
                 if ((mpt->m_ncmds <= (mpt->m_max_requests - 2)) &&
-                    (ptgt && (ptgt->m_reset_delay == 0)) &&
-                    (ptgt && (ptgt->m_t_ncmds <
-                    ptgt->m_t_throttle))) {
+                            (ptgt->m_reset_delay == 0) &&
+                            (ptgt->m_t_ncmds < ptgt->m_t_throttle)) {
+                                mutex_exit(&ptgt->m_t_mutex);
+
                         if (mptsas_save_cmd(mpt, cmd) == TRUE) {
                                 mptsas_waitq_delete(mpt, cmd);
+                                        mutex_exit(&mpt->m_mutex);
                                 (void) mptsas_start_cmd(mpt, cmd);
+                                        mutex_enter(&mpt->m_mutex);
+                                        cmd = mpt->m_waitq;
+                                        continue;
+                                }
+                        } else {
+                                mutex_exit(&ptgt->m_t_mutex);
                         }
                 }
                 cmd = next_cmd;
         }
 }
+
 /*
- * Cmds are queued if tran_start() doesn't get the m_mutexlock(no wait).
- * Accept all those queued cmds before new cmd is accept so that the
- * cmds are sent in order.
+ * Cmds are queued if scsi_start() doesn't get the m_mutex lock(no wait)
+ * or if the decision has been made to always do that. Setting
+ * mptsas_allow_txq_jumping to zero will allow higher performance on
+ * a heavily loaded system as there is less disruption to the flow here.
+ * There are 2 threads that handle one queue each. The idea is that
+ * they take it in turn to grab the m_mutex to run the mptsas_accept_pkt()
+ * function and then drop it while the cmd is started in mptsas_start_cmd().
  */
 static void
-mptsas_accept_tx_waitq(mptsas_t *mpt)
+mptsas_tx_waitq_thread(mptsas_thread_arg_t *arg)
 {
-        mptsas_cmd_t *cmd;
+        mptsas_t *mpt = arg->mpt;
+        mptsas_tx_waitqueue_t *txwq = &mpt->m_tx_waitq[arg->t];
+
+        mutex_enter(&txwq->txwq_mutex);
+        while (txwq->txwq_active) {
+                mptsas_drain_tx_waitq(mpt, txwq);
+                if (txwq->txwq_wdrain) {
+                        cv_signal(&txwq->txwq_drain_cv);
+                }
+                cv_wait(&txwq->txwq_cv, &txwq->txwq_mutex);
+        }
+        mutex_exit(&txwq->txwq_mutex);
+        mutex_enter(&mpt->m_qthread_mutex);
+        mpt->m_txwq_thread_n--;
+        cv_broadcast(&mpt->m_qthread_cv);
+        mutex_exit(&mpt->m_qthread_mutex);
+}
+
+/*
+ * Set the draining flag, disconnect the list and process one at a time
+ * so that the cmds are sent in order.
+ */
+static void
+mptsas_drain_tx_waitq(mptsas_t *mpt, mptsas_tx_waitqueue_t *txwq)
+{
+        mptsas_cmd_t    *cmd, *ncmd;
+        int             rval, start;
+#ifdef MPTSAS_DEBUG
+        uint32_t        qlen;
+#endif
+
+        txwq->txwq_draining = TRUE;
+#ifndef __lock_lint
+        _NOTE(CONSTCOND)
+#endif
+        while (TRUE) {
+
+                /*
+                 * A Bus Reset could occur at any time but it will have to
+                 * wait for the main mutex before flushing the tx_waitq.
+                 * Pull all commands at once, then follow the list in order to
+                 * reduce txwq_mutex hold time. If there is a Bus Reset at
+                 * some point the commands will get to the waitq and then be
+                 * flushed.
+                 */
+                cmd = txwq->txwq_cmdq;
+
+                if (cmd == NULL) {
+                        txwq->txwq_draining = FALSE;
+                        return;
+                }
+                txwq->txwq_cmdq = NULL;
+                txwq->txwq_qtail = &txwq->txwq_cmdq;
+#ifdef MPTSAS_DEBUG
+                qlen = txwq->txwq_len;
+#endif
+                txwq->txwq_len = 0;
+                mutex_exit(&txwq->txwq_mutex);
+
+                while (cmd) {
+                        ncmd = cmd->cmd_linkp;
+                        cmd->cmd_linkp = NULL;
+                        mutex_enter(&mpt->m_mutex);
+                        start = mptsas_accept_pkt(mpt, cmd, &rval);
+                        mutex_exit(&mpt->m_mutex);
+                        if (start) {
+                                (void) mptsas_start_cmd(mpt, cmd);
+                        }
+                        if (rval != TRAN_ACCEPT)
+                                cmn_err(CE_WARN,
+                                    "mpt: mptsas_drain_tx_waitq: failed "
+                                    "(rval=0x%x) to accept cmd 0x%p on queue\n",
+                                    rval, (void *)cmd);
+                        cmd = ncmd;
+#ifdef MPTSAS_DEBUG
+                        qlen--;
+#endif
+                }
+                ASSERT(qlen == 0);
+                mutex_enter(&txwq->txwq_mutex);
+        }
+}
+
+/*
+ * Stop the drain threads from picking up a new list.
+ * Optionally wait for the current list being processed to drain through.
+ * Add to and processing the tx waitq is now on hold until unblock is called.
+ */
+static void
+mptsas_block_tx_waitqs(mptsas_t *mpt, int wait)
+{
+        int             i;
+        uint8_t         wdrain = 0;
+        mptsas_tx_waitqueue_t *txwq;
 
         ASSERT(mutex_owned(&mpt->m_mutex));
-        ASSERT(mutex_owned(&mpt->m_tx_waitq_mutex));
+
+        if (mpt->m_txwq_thread_n == 0) {
+                return;
+        }
 
         /*
-         * A Bus Reset could occur at any time and flush the tx_waitq,
-         * so we cannot count on the tx_waitq to contain even one cmd.
-         * And when the m_tx_waitq_mutex is released and run
-         * mptsas_accept_pkt(), the tx_waitq may be flushed.
+         * Turn off the use of the tx wait queues by scsi_start().
+         * This is just a dynamic flag no need for a mutex.
          */
-        cmd = mpt->m_tx_waitq;
-        for (;;) {
-                if ((cmd = mpt->m_tx_waitq) == NULL) {
-                        mpt->m_tx_draining = 0;
-                        break;
+        mpt->m_txwq_enabled = BLOCKED;
+
+        for (i = 0; i < NUM_TX_WAITQ; i++) {
+                txwq = &mpt->m_tx_waitq[i];
+                mutex_enter(&txwq->txwq_mutex);
+                txwq->txwq_wdrain = TRUE;
+                if (txwq->txwq_draining && wait)
+                        wdrain |= (1<<i);
+                mutex_exit(&txwq->txwq_mutex);
+        }
+
+        if (wdrain) {
+                /*
+                 * Because the threads disconnect the entire queue each time
+                 * round in order to drain to completely drain we have to
+                 * drop the main mutex otherwise the drain threads get stuck.
+                 */
+                mutex_exit(&mpt->m_mutex);
+                for (i = 0; i < NUM_TX_WAITQ; i++) {
+                        if (wdrain & (1<<i)) {
+                                txwq = &mpt->m_tx_waitq[i];
+                                mutex_enter(&txwq->txwq_mutex);
+                                while (txwq->txwq_draining) {
+                                        cv_wait(&txwq->txwq_drain_cv,
+                                            &txwq->txwq_mutex);
                 }
-                if ((mpt->m_tx_waitq = cmd->cmd_linkp) == NULL) {
-                        mpt->m_tx_waitqtail = &mpt->m_tx_waitq;
+                                mutex_exit(&txwq->txwq_mutex);
                 }
-                cmd->cmd_linkp = NULL;
-                mutex_exit(&mpt->m_tx_waitq_mutex);
-                if (mptsas_accept_pkt(mpt, cmd) != TRAN_ACCEPT)
-                        cmn_err(CE_WARN, "mpt: mptsas_accept_tx_waitq: failed "
-                            "to accept cmd on queue\n");
-                mutex_enter(&mpt->m_tx_waitq_mutex);
         }
+                mutex_enter(&mpt->m_mutex);
+        }
+}
+
+static void
+mptsas_unblock_tx_waitqs(mptsas_t *mpt)
+{
+        int                     i;
+        mptsas_tx_waitqueue_t   *txwq;
+
+        if (mpt->m_txwq_thread_n == 0) {
+                return;
+        }
+
+        for (i = 0; i < NUM_TX_WAITQ; i++) {
+                txwq = &mpt->m_tx_waitq[i];
+                mutex_enter(&txwq->txwq_mutex);
+                txwq->txwq_wdrain = FALSE;
+                cv_signal(&txwq->txwq_cv);
+                mutex_exit(&txwq->txwq_mutex);
+        }
+
+        mpt->m_txwq_enabled = FALSE;
 }
 
+static void
+mptsas_accept_tx_waitqs(mptsas_t *mpt)
+{
+        /*
+         * Block with drain and unblock will leave us in a state where
+         * we have the main mutex, there is nothing on the tx wait queues
+         * and they are not in use until watch notices high activity again.
+         */
+        mptsas_block_tx_waitqs(mpt, 1);
+        mptsas_unblock_tx_waitqs(mpt);
+}
 
 /*
  * mpt tag type lookup
  */
 static char mptsas_tag_lookup[] =

@@ -7897,26 +8935,30 @@
 static int
 mptsas_start_cmd(mptsas_t *mpt, mptsas_cmd_t *cmd)
 {
         struct scsi_pkt         *pkt = CMD2PKT(cmd);
         uint32_t                control = 0;
-        int                     n;
-        caddr_t                 mem;
+        caddr_t                 mem, arsbuf;
         pMpi2SCSIIORequest_t    io_request;
         ddi_dma_handle_t        dma_hdl = mpt->m_dma_req_frame_hdl;
         ddi_acc_handle_t        acc_hdl = mpt->m_acc_req_frame_hdl;
         mptsas_target_t         *ptgt = cmd->cmd_tgt_addr;
-        uint16_t                SMID, io_flags = 0;
-        uint32_t                request_desc_low, request_desc_high;
+        uint16_t                SMID, io_flags = 0, ars_size;
+        uint8_t                 MSIidx;
+        uint64_t                request_desc;
+        uint32_t                ars_dmaaddrlow;
+        mptsas_cmd_t            *c;
 
-        NDBG1(("mptsas_start_cmd: cmd=0x%p", (void *)cmd));
+        NDBG1(("mptsas_start_cmd: cmd=0x%p, flags 0x%x", (void *)cmd,
+            cmd->cmd_flags));
 
         /*
          * Set SMID and increment index.  Rollover to 1 instead of 0 if index
          * is at the max.  0 is an invalid SMID, so we call the first index 1.
          */
         SMID = cmd->cmd_slot;
+        MSIidx = cmd->cmd_rpqidx;
 
         /*
          * It is possible for back to back device reset to
          * happen before the reset delay has expired.  That's
          * ok, just let the device reset go out on the bus.

@@ -7928,10 +8970,11 @@
         /*
          * if a non-tagged cmd is submitted to an active tagged target
          * then drain before submitting this cmd; SCSI-2 allows RQSENSE
          * to be untagged
          */
+        mutex_enter(&ptgt->m_t_mutex);
         if (((cmd->cmd_pkt_flags & FLAG_TAGMASK) == 0) &&
             (ptgt->m_t_ncmds > 1) &&
             ((cmd->cmd_flags & CFLAG_TM_CMD) == 0) &&
             (*(cmd->cmd_pkt->pkt_cdbp) != SCMD_REQUEST_SENSE)) {
                 if ((cmd->cmd_pkt_flags & FLAG_NOINTR) == 0) {

@@ -7939,14 +8982,19 @@
                             ptgt->m_devhdl));
 
                         if (ptgt->m_reset_delay == 0) {
                                 mptsas_set_throttle(mpt, ptgt, DRAIN_THROTTLE);
                         }
+                        mutex_exit(&ptgt->m_t_mutex);
 
+                        mutex_enter(&mpt->m_mutex);
                         mptsas_remove_cmd(mpt, cmd);
                         cmd->cmd_pkt_flags |= FLAG_HEAD;
                         mptsas_waitq_add(mpt, cmd);
+                        mutex_exit(&mpt->m_mutex);
+                } else {
+                        mutex_exit(&ptgt->m_t_mutex);
                 }
                 return (DDI_FAILURE);
         }
 
         /*

@@ -7973,27 +9021,91 @@
                                 ptgt->m_t_throttle = 1;
                 }
                 control |= MPI2_SCSIIO_CONTROL_SIMPLEQ;
         }
 
+        /*
+         * Set timeout.
+         */
+        cmd->cmd_active_expiration =
+            gethrtime() + (hrtime_t)pkt->pkt_time * NANOSEC;
+
+        c = TAILQ_FIRST(&ptgt->m_active_cmdq);
+        if (c == NULL ||
+            c->cmd_active_expiration < cmd->cmd_active_expiration) {
+                /*
+                 * Common case is that this is the last pending expiration
+                 * (or queue is empty). Insert at head of the queue.
+                 */
+                TAILQ_INSERT_HEAD(&ptgt->m_active_cmdq, cmd, cmd_active_link);
+        } else {
+                /*
+                 * Queue is not empty and first element expires later than
+                 * this command. Search for element expiring sooner.
+                 */
+                while ((c = TAILQ_NEXT(c, cmd_active_link)) != NULL) {
+                        if (c->cmd_active_expiration <
+                            cmd->cmd_active_expiration) {
+                                TAILQ_INSERT_BEFORE(c, cmd, cmd_active_link);
+                                break;
+                        }
+                }
+                if (c == NULL) {
+                        /*
+                         * No element found expiring sooner, append to
+                         * non-empty queue.
+                         */
+                        TAILQ_INSERT_TAIL(&ptgt->m_active_cmdq, cmd,
+                            cmd_active_link);
+                }
+        }
+
+        mutex_exit(&ptgt->m_t_mutex);
+
         if (cmd->cmd_pkt_flags & FLAG_TLR) {
                 control |= MPI2_SCSIIO_CONTROL_TLR_ON;
         }
 
         mem = mpt->m_req_frame + (mpt->m_req_frame_size * SMID);
         io_request = (pMpi2SCSIIORequest_t)mem;
-
+        if (cmd->cmd_extrqslen != 0) {
+                /*
+                 * Mapping of the buffer was done in mptsas_pkt_alloc_extern().
+                 * Calculate the DMA address with the same offset.
+                 */
+                arsbuf = cmd->cmd_arq_buf;
+                ars_size = cmd->cmd_extrqslen;
+                ars_dmaaddrlow = (mpt->m_req_sense_dma_addr +
+                    ((uintptr_t)arsbuf - (uintptr_t)mpt->m_req_sense)) &
+                    0xffffffffull;
+        } else {
+                arsbuf = mpt->m_req_sense + (mpt->m_req_sense_size * (SMID-1));
+                cmd->cmd_arq_buf = arsbuf;
+                ars_size = mpt->m_req_sense_size;
+                ars_dmaaddrlow = (mpt->m_req_sense_dma_addr +
+                    (mpt->m_req_sense_size * (SMID-1))) &
+                    0xffffffffull;
+        }
         bzero(io_request, sizeof (Mpi2SCSIIORequest_t));
+        bzero(arsbuf, ars_size);
+
         ddi_put8(acc_hdl, &io_request->SGLOffset0, offsetof
             (MPI2_SCSI_IO_REQUEST, SGL) / 4);
         mptsas_init_std_hdr(acc_hdl, io_request, ptgt->m_devhdl, Lun(cmd), 0,
             MPI2_FUNCTION_SCSI_IO_REQUEST);
 
         (void) ddi_rep_put8(acc_hdl, (uint8_t *)pkt->pkt_cdbp,
             io_request->CDB.CDB32, cmd->cmd_cdblen, DDI_DEV_AUTOINCR);
 
         io_flags = cmd->cmd_cdblen;
+        if (mptsas3_use_fastpath &&
+            ptgt->m_io_flags & MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) {
+                io_flags |= MPI25_SCSIIO_IOFLAGS_FAST_PATH;
+                request_desc = MPI25_REQ_DESCRIPT_FLAGS_FAST_PATH_SCSI_IO;
+        } else {
+                request_desc = MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO;
+        }
         ddi_put16(acc_hdl, &io_request->IoFlags, io_flags);
         /*
          * setup the Scatter/Gather DMA list for this request
          */
         if (cmd->cmd_cookiec > 0) {

@@ -8008,131 +9120,111 @@
 
         /*
          * save ARQ information
          */
         ddi_put8(acc_hdl, &io_request->SenseBufferLength, cmd->cmd_rqslen);
-        if ((cmd->cmd_flags & (CFLAG_SCBEXTERN | CFLAG_EXTARQBUFVALID)) ==
-            (CFLAG_SCBEXTERN | CFLAG_EXTARQBUFVALID)) {
-                ddi_put32(acc_hdl, &io_request->SenseBufferLowAddress,
-                    cmd->cmd_ext_arqcookie.dmac_address);
-        } else {
-                ddi_put32(acc_hdl, &io_request->SenseBufferLowAddress,
-                    cmd->cmd_arqcookie.dmac_address);
-        }
+        ddi_put32(acc_hdl, &io_request->SenseBufferLowAddress, ars_dmaaddrlow);
 
         ddi_put32(acc_hdl, &io_request->Control, control);
 
-        NDBG31(("starting message=0x%p, with cmd=0x%p",
-            (void *)(uintptr_t)mpt->m_req_frame_dma_addr, (void *)cmd));
+        NDBG31(("starting message=%d(0x%p), with cmd=0x%p",
+            SMID, (void *)io_request, (void *)cmd));
 
         (void) ddi_dma_sync(dma_hdl, 0, 0, DDI_DMA_SYNC_FORDEV);
 
         /*
          * Build request descriptor and write it to the request desc post reg.
          */
-        request_desc_low = (SMID << 16) + MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO;
-        request_desc_high = ptgt->m_devhdl << 16;
-        MPTSAS_START_CMD(mpt, request_desc_low, request_desc_high);
-
-        /*
-         * Start timeout.
-         */
-#ifdef MPTSAS_TEST
-        /*
-         * Temporarily set timebase = 0;  needed for
-         * timeout torture test.
-         */
-        if (mptsas_test_timeouts) {
-                ptgt->m_timebase = 0;
-        }
-#endif
-        n = pkt->pkt_time - ptgt->m_timebase;
-
-        if (n == 0) {
-                (ptgt->m_dups)++;
-                ptgt->m_timeout = ptgt->m_timebase;
-        } else if (n > 0) {
-                ptgt->m_timeout =
-                    ptgt->m_timebase = pkt->pkt_time;
-                ptgt->m_dups = 1;
-        } else if (n < 0) {
-                ptgt->m_timeout = ptgt->m_timebase;
-        }
-#ifdef MPTSAS_TEST
-        /*
-         * Set back to a number higher than
-         * mptsas_scsi_watchdog_tick
-         * so timeouts will happen in mptsas_watchsubr
-         */
-        if (mptsas_test_timeouts) {
-                ptgt->m_timebase = 60;
-        }
-#endif
+        request_desc |= (SMID << 16) + (MSIidx << 8);
+        request_desc |= ((uint64_t)ptgt->m_devhdl << 48);
+        MPTSAS_START_CMD(mpt, request_desc);
 
+#if 0
+        /* Is this of any benefit here, what is it going to catch? */
         if ((mptsas_check_dma_handle(dma_hdl) != DDI_SUCCESS) ||
             (mptsas_check_acc_handle(acc_hdl) != DDI_SUCCESS)) {
                 ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
                 return (DDI_FAILURE);
         }
+#endif
         return (DDI_SUCCESS);
 }
 
 /*
- * Select a helper thread to handle current doneq
+ * Select a helper thread to handle given doneq.
+ * Note that we don't require to have the main m_mutex here, but worst case
+ * is that we wont follow the thread rotation to the letter.
+ * However must ensure we have the mutex that covers the source dlist when
+ * we actually hand off.
  */
 static void
-mptsas_deliver_doneq_thread(mptsas_t *mpt)
+mptsas_deliver_doneq_thread(mptsas_t *mpt, mptsas_done_list_t *dlist)
 {
-        uint64_t                        t, i;
+        uint32_t                        t, i, j = mpt->m_doneq_next_thread;
         uint32_t                        min = 0xffffffff;
         mptsas_doneq_thread_list_t      *item;
 
+        /*
+         * No need to take indivudual list mutex's during the loop.
+         * We are only reading values and the worst that will happen is that
+         * we pick the wrong thread.
+         */
         for (i = 0; i < mpt->m_doneq_thread_n; i++) {
-                item = &mpt->m_doneq_thread_id[i];
+                item = &mpt->m_doneq_thread_id[j];
+
                 /*
                  * If the completed command on help thread[i] less than
-                 * doneq_thread_threshold, then pick the thread[i]. Otherwise
+                 * doneq_thread_threshold, then pick the thread[j]. Otherwise
                  * pick a thread which has least completed command.
                  */
-
-                mutex_enter(&item->mutex);
-                if (item->len < mpt->m_doneq_thread_threshold) {
-                        t = i;
-                        mutex_exit(&item->mutex);
+                if (item->dlist.dl_len < mpt->m_doneq_thread_threshold) {
+                        t = j;
                         break;
                 }
-                if (item->len < min) {
-                        min = item->len;
-                        t = i;
+                if (item->dlist.dl_len < min) {
+                        min = item->dlist.dl_len;
+                        t = j;
+                }
+                if (++j == mpt->m_doneq_thread_n) {
+                        j = 0;
+                }
                 }
+        item = &mpt->m_doneq_thread_id[t];
+        mutex_enter(&item->mutex);
+        mptsas_doneq_mv(dlist, item);
+        cv_signal(&item->cv);
                 mutex_exit(&item->mutex);
+
+        /*
+         * Next time start at the next thread.
+         * This will minimize the potential of grabing a lock
+         * for a thread that is busy, either on a very busy systems
+         * or on one that is configured to do all command completion
+         * processing through threads.
+         */
+        if (++t == mpt->m_doneq_thread_n) {
+                t = 0;
         }
-        mutex_enter(&mpt->m_doneq_thread_id[t].mutex);
-        mptsas_doneq_mv(mpt, t);
-        cv_signal(&mpt->m_doneq_thread_id[t].cv);
-        mutex_exit(&mpt->m_doneq_thread_id[t].mutex);
+        mpt->m_doneq_next_thread = (uint16_t)t;
 }
 
 /*
- * move the current global doneq to the doneq of thead[t]
+ * move one doneq to another.
  */
 static void
-mptsas_doneq_mv(mptsas_t *mpt, uint64_t t)
+mptsas_doneq_mv(mptsas_done_list_t *from, mptsas_doneq_thread_list_t *item)
 {
+        mptsas_done_list_t              *to = &item->dlist;
         mptsas_cmd_t                    *cmd;
-        mptsas_doneq_thread_list_t      *item = &mpt->m_doneq_thread_id[t];
 
-        ASSERT(mutex_owned(&item->mutex));
-        while ((cmd = mpt->m_doneq) != NULL) {
-                if ((mpt->m_doneq = cmd->cmd_linkp) == NULL) {
-                        mpt->m_donetail = &mpt->m_doneq;
-                }
-                cmd->cmd_linkp = NULL;
-                *item->donetail = cmd;
-                item->donetail = &cmd->cmd_linkp;
-                mpt->m_doneq_len--;
-                item->len++;
+        if ((cmd = from->dl_q) != NULL) {
+                *to->dl_tail = cmd;
+                to->dl_tail = from->dl_tail;
+                to->dl_len += from->dl_len;
+                from->dl_q = NULL;
+                from->dl_tail = &from->dl_q;
+                from->dl_len = 0;
         }
 }
 
 void
 mptsas_fma_check(mptsas_t *mpt, mptsas_cmd_t *cmd)

@@ -8142,10 +9234,12 @@
         /* Check all acc and dma handles */
         if ((mptsas_check_acc_handle(mpt->m_datap) !=
             DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_req_frame_hdl) !=
             DDI_SUCCESS) ||
+            (mptsas_check_acc_handle(mpt->m_acc_req_sense_hdl) !=
+            DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_reply_frame_hdl) !=
             DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_free_queue_hdl) !=
             DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_post_queue_hdl) !=

@@ -8161,10 +9255,12 @@
                 pkt->pkt_reason = CMD_TRAN_ERR;
                 pkt->pkt_statistics = 0;
         }
         if ((mptsas_check_dma_handle(mpt->m_dma_req_frame_hdl) !=
             DDI_SUCCESS) ||
+            (mptsas_check_dma_handle(mpt->m_dma_req_sense_hdl) !=
+            DDI_SUCCESS) ||
             (mptsas_check_dma_handle(mpt->m_dma_reply_frame_hdl) !=
             DDI_SUCCESS) ||
             (mptsas_check_dma_handle(mpt->m_dma_free_queue_hdl) !=
             DDI_SUCCESS) ||
             (mptsas_check_dma_handle(mpt->m_dma_post_queue_hdl) !=

@@ -8189,22 +9285,10 @@
             DDI_SUCCESS)))) {
                 ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
                 pkt->pkt_reason = CMD_TRAN_ERR;
                 pkt->pkt_statistics = 0;
         }
-        if (cmd->cmd_arqhandle &&
-            (mptsas_check_dma_handle(cmd->cmd_arqhandle) != DDI_SUCCESS)) {
-                ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
-                pkt->pkt_reason = CMD_TRAN_ERR;
-                pkt->pkt_statistics = 0;
-        }
-        if (cmd->cmd_ext_arqhandle &&
-            (mptsas_check_dma_handle(cmd->cmd_ext_arqhandle) != DDI_SUCCESS)) {
-                ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
-                pkt->pkt_reason = CMD_TRAN_ERR;
-                pkt->pkt_statistics = 0;
-        }
 }
 
 /*
  * These routines manipulate the queue of commands that
  * are waiting for their completion routines to be called.

@@ -8231,47 +9315,72 @@
         /*
          * only add scsi pkts that have completion routines to
          * the doneq.  no intr cmds do not have callbacks.
          */
         if (pkt && (pkt->pkt_comp)) {
-                *mpt->m_donetail = cmd;
-                mpt->m_donetail = &cmd->cmd_linkp;
-                mpt->m_doneq_len++;
+                *mpt->m_dlist.dl_tail = cmd;
+                mpt->m_dlist.dl_tail = &cmd->cmd_linkp;
+                mpt->m_dlist.dl_len++;
+        }
+}
+
+static void
+mptsas_rpdoneq_add(mptsas_t *mpt, mptsas_reply_pqueue_t *rpqp,
+    mptsas_cmd_t *cmd)
+{
+        struct scsi_pkt *pkt = CMD2PKT(cmd);
+
+        NDBG31(("mptsas_rpdoneq_add: cmd=0x%p", (void *)cmd));
+
+        ASSERT((cmd->cmd_flags & CFLAG_COMPLETED) == 0);
+        cmd->cmd_linkp = NULL;
+        cmd->cmd_flags |= CFLAG_FINISHED;
+        cmd->cmd_flags &= ~CFLAG_IN_TRANSPORT;
+
+        mptsas_fma_check(mpt, cmd);
+
+        /*
+         * only add scsi pkts that have completion routines to
+         * the doneq.  no intr cmds do not have callbacks.
+         */
+        if (pkt && (pkt->pkt_comp)) {
+                *rpqp->rpq_dlist.dl_tail = cmd;
+                rpqp->rpq_dlist.dl_tail = &cmd->cmd_linkp;
+                rpqp->rpq_dlist.dl_len++;
         }
 }
 
 static mptsas_cmd_t *
 mptsas_doneq_thread_rm(mptsas_t *mpt, uint64_t t)
 {
         mptsas_cmd_t                    *cmd;
         mptsas_doneq_thread_list_t      *item = &mpt->m_doneq_thread_id[t];
 
         /* pop one off the done queue */
-        if ((cmd = item->doneq) != NULL) {
+        if ((cmd = item->dlist.dl_q) != NULL) {
                 /* if the queue is now empty fix the tail pointer */
                 NDBG31(("mptsas_doneq_thread_rm: cmd=0x%p", (void *)cmd));
-                if ((item->doneq = cmd->cmd_linkp) == NULL) {
-                        item->donetail = &item->doneq;
+                if ((item->dlist.dl_q = cmd->cmd_linkp) == NULL) {
+                        item->dlist.dl_tail = &item->dlist.dl_q;
                 }
                 cmd->cmd_linkp = NULL;
-                item->len--;
+                item->dlist.dl_len--;
         }
         return (cmd);
 }
 
 static void
 mptsas_doneq_empty(mptsas_t *mpt)
 {
-        if (mpt->m_doneq && !mpt->m_in_callback) {
+        if (mpt->m_dlist.dl_q) {
                 mptsas_cmd_t    *cmd, *next;
                 struct scsi_pkt *pkt;
 
-                mpt->m_in_callback = 1;
-                cmd = mpt->m_doneq;
-                mpt->m_doneq = NULL;
-                mpt->m_donetail = &mpt->m_doneq;
-                mpt->m_doneq_len = 0;
+                cmd = mpt->m_dlist.dl_q;
+                mpt->m_dlist.dl_q = NULL;
+                mpt->m_dlist.dl_tail = &mpt->m_dlist.dl_q;
+                mpt->m_dlist.dl_len = 0;
 
                 mutex_exit(&mpt->m_mutex);
                 /*
                  * run the completion routines of all the
                  * completed commands

@@ -8284,11 +9393,40 @@
                         pkt = CMD2PKT(cmd);
                         mptsas_pkt_comp(pkt, cmd);
                         cmd = next;
                 }
                 mutex_enter(&mpt->m_mutex);
-                mpt->m_in_callback = 0;
+        }
+}
+
+static void
+mptsas_rpdoneq_empty(mptsas_reply_pqueue_t *rpqp)
+{
+        if (rpqp->rpq_dlist.dl_q) {
+                mptsas_cmd_t    *cmd, *next;
+                struct scsi_pkt *pkt;
+
+                cmd = rpqp->rpq_dlist.dl_q;
+                rpqp->rpq_dlist.dl_q = NULL;
+                rpqp->rpq_dlist.dl_tail = &rpqp->rpq_dlist.dl_q;
+                rpqp->rpq_dlist.dl_len = 0;
+
+                mutex_exit(&rpqp->rpq_mutex);
+                /*
+                 * run the completion routines of all the
+                 * completed commands
+                 */
+                while (cmd != NULL) {
+                        next = cmd->cmd_linkp;
+                        cmd->cmd_linkp = NULL;
+                        /* run this command's completion routine */
+                        cmd->cmd_flags |= CFLAG_COMPLETED;
+                        pkt = CMD2PKT(cmd);
+                        mptsas_pkt_comp(pkt, cmd);
+                        cmd = next;
+                }
+                mutex_enter(&rpqp->rpq_mutex);
         }
 }
 
 /*
  * These routines manipulate the target's queue of pending requests

@@ -8374,61 +9512,10 @@
                 prevp = prevp->cmd_linkp;
         }
         cmn_err(CE_PANIC, "mpt: mptsas_waitq_delete: queue botch");
 }
 
-static mptsas_cmd_t *
-mptsas_tx_waitq_rm(mptsas_t *mpt)
-{
-        mptsas_cmd_t *cmd;
-        NDBG7(("mptsas_tx_waitq_rm"));
-
-        MPTSAS_TX_WAITQ_RM(mpt, cmd);
-
-        NDBG7(("mptsas_tx_waitq_rm: cmd=0x%p", (void *)cmd));
-
-        return (cmd);
-}
-
-/*
- * remove specified cmd from the middle of the tx_waitq.
- */
-static void
-mptsas_tx_waitq_delete(mptsas_t *mpt, mptsas_cmd_t *cmd)
-{
-        mptsas_cmd_t *prevp = mpt->m_tx_waitq;
-
-        NDBG7(("mptsas_tx_waitq_delete: mpt=0x%p cmd=0x%p",
-            (void *)mpt, (void *)cmd));
-
-        if (prevp == cmd) {
-                if ((mpt->m_tx_waitq = cmd->cmd_linkp) == NULL)
-                        mpt->m_tx_waitqtail = &mpt->m_tx_waitq;
-
-                cmd->cmd_linkp = NULL;
-                cmd->cmd_queued = FALSE;
-                NDBG7(("mptsas_tx_waitq_delete: mpt=0x%p cmd=0x%p",
-                    (void *)mpt, (void *)cmd));
-                return;
-        }
-
-        while (prevp != NULL) {
-                if (prevp->cmd_linkp == cmd) {
-                        if ((prevp->cmd_linkp = cmd->cmd_linkp) == NULL)
-                                mpt->m_tx_waitqtail = &prevp->cmd_linkp;
-
-                        cmd->cmd_linkp = NULL;
-                        cmd->cmd_queued = FALSE;
-                        NDBG7(("mptsas_tx_waitq_delete: mpt=0x%p cmd=0x%p",
-                            (void *)mpt, (void *)cmd));
-                        return;
-                }
-                prevp = prevp->cmd_linkp;
-        }
-        cmn_err(CE_PANIC, "mpt: mptsas_tx_waitq_delete: queue botch");
-}
-
 /*
  * device and bus reset handling
  *
  * Notes:
  *      - RESET_ALL:    reset the controller

@@ -8540,11 +9627,11 @@
 mptsas_get_bus_addr(struct scsi_device *sd, char *name, int len)
 {
         return (mptsas_get_name(sd, name, len));
 }
 
-void
+static void
 mptsas_set_throttle(mptsas_t *mpt, mptsas_target_t *ptgt, int what)
 {
 
         NDBG25(("mptsas_set_throttle: throttle=%x", what));
 

@@ -8564,10 +9651,60 @@
         } else if (ptgt->m_reset_delay == 0) {
                 ptgt->m_t_throttle = what;
         }
 }
 
+static void
+mptsas_set_throttle_mtx(mptsas_t *mpt, mptsas_target_t *ptgt, int what)
+{
+        if (mpt->m_softstate & (MPTSAS_SS_QUIESCED | MPTSAS_SS_DRAINING)) {
+                return;
+        }
+
+        mutex_enter(&ptgt->m_t_mutex);
+        mptsas_set_throttle(mpt, ptgt, what);
+        mutex_exit(&ptgt->m_t_mutex);
+}
+
+/*
+ * Find all commands in the tx_waitq's for target and lun (if lun not -1),
+ * remove them from the queues and return the linked list.
+ */
+static mptsas_cmd_t *
+mptsas_strip_targetlun_from_txwqs(mptsas_t *mpt, ushort_t target, int lun)
+{
+        mptsas_cmd_t            *cmd, *clist, **tailp, **prev_tailp;
+        mptsas_tx_waitqueue_t   *txwq;
+        int                     i;
+
+        clist = NULL;
+        tailp = &clist;
+
+        for (i = 0; i < NUM_TX_WAITQ; i++) {
+                txwq = &mpt->m_tx_waitq[i];
+                mutex_enter(&txwq->txwq_mutex);
+                prev_tailp = &txwq->txwq_cmdq;
+                cmd = txwq->txwq_cmdq;
+                while (cmd != NULL) {
+                        if (Tgt(cmd) == target &&
+                            (lun == -1 || (Lun(cmd) == lun))) {
+                                *prev_tailp = cmd->cmd_linkp;
+                                *tailp = cmd;
+                                tailp = &cmd->cmd_linkp;
+                                cmd = cmd->cmd_linkp;
+                                *tailp = NULL;
+                        } else {
+                                prev_tailp = &cmd->cmd_linkp;
+                                cmd = cmd->cmd_linkp;
+                        }
+                }
+                txwq->txwq_qtail = prev_tailp;
+                mutex_exit(&txwq->txwq_mutex);
+        }
+        return (clist);
+}
+
 /*
  * Clean up from a device reset.
  * For the case of target reset, this function clears the waitq of all
  * commands for a particular target.   For the case of abort task set, this
  * function clears the waitq of all commonds for a particular target/lun.

@@ -8578,13 +9715,16 @@
         mptsas_slots_t  *slots = mpt->m_active;
         mptsas_cmd_t    *cmd, *next_cmd;
         int             slot;
         uchar_t         reason;
         uint_t          stat;
+        hrtime_t        timestamp;
 
         NDBG25(("mptsas_flush_target: target=%d lun=%d", target, lun));
 
+        timestamp = gethrtime();
+
         /*
          * Make sure the I/O Controller has flushed all cmds
          * that are associated with this target for a target reset
          * and target/lun for abort task set.
          * Account for TM requests, which use the last SMID.

@@ -8595,10 +9735,19 @@
                 reason = CMD_RESET;
                 stat = STAT_DEV_RESET;
                 switch (tasktype) {
                 case MPI2_SCSITASKMGMT_TASKTYPE_TARGET_RESET:
                         if (Tgt(cmd) == target) {
+                                if (cmd->cmd_active_expiration <= timestamp) {
+                                        /*
+                                         * When timeout requested, propagate
+                                         * proper reason and statistics to
+                                         * target drivers.
+                                         */
+                                        reason = CMD_TIMEOUT;
+                                        stat |= STAT_TIMEOUT;
+                                }
                                 NDBG25(("mptsas_flush_target discovered non-"
                                     "NULL cmd in slot %d, tasktype 0x%x", slot,
                                     tasktype));
                                 mptsas_dump_cmd(mpt, cmd);
                                 mptsas_remove_cmd(mpt, cmd);

@@ -8610,18 +9759,20 @@
                         reason = CMD_ABORTED;
                         stat = STAT_ABORTED;
                         /*FALLTHROUGH*/
                 case MPI2_SCSITASKMGMT_TASKTYPE_LOGICAL_UNIT_RESET:
                         if ((Tgt(cmd) == target) && (Lun(cmd) == lun)) {
+                                if (cmd->cmd_active_expiration <= timestamp) {
+                                        stat |= STAT_TIMEOUT;
+                                }
 
                                 NDBG25(("mptsas_flush_target discovered non-"
                                     "NULL cmd in slot %d, tasktype 0x%x", slot,
                                     tasktype));
                                 mptsas_dump_cmd(mpt, cmd);
                                 mptsas_remove_cmd(mpt, cmd);
-                                mptsas_set_pkt_reason(mpt, cmd, reason,
-                                    stat);
+                                mptsas_set_pkt_reason(mpt, cmd, reason, stat);
                                 mptsas_doneq_add(mpt, cmd);
                         }
                         break;
                 default:
                         break;

@@ -8646,25 +9797,17 @@
                                     reason, stat);
                                 mptsas_doneq_add(mpt, cmd);
                         }
                         cmd = next_cmd;
                 }
-                mutex_enter(&mpt->m_tx_waitq_mutex);
-                cmd = mpt->m_tx_waitq;
+                cmd = mptsas_strip_targetlun_from_txwqs(mpt, target, -1);
                 while (cmd != NULL) {
                         next_cmd = cmd->cmd_linkp;
-                        if (Tgt(cmd) == target) {
-                                mptsas_tx_waitq_delete(mpt, cmd);
-                                mutex_exit(&mpt->m_tx_waitq_mutex);
-                                mptsas_set_pkt_reason(mpt, cmd,
-                                    reason, stat);
+                        mptsas_set_pkt_reason(mpt, cmd, reason, stat);
                                 mptsas_doneq_add(mpt, cmd);
-                                mutex_enter(&mpt->m_tx_waitq_mutex);
-                        }
                         cmd = next_cmd;
                 }
-                mutex_exit(&mpt->m_tx_waitq_mutex);
                 break;
         case MPI2_SCSITASKMGMT_TASKTYPE_ABRT_TASK_SET:
                 reason = CMD_ABORTED;
                 stat =  STAT_ABORTED;
                 /*FALLTHROUGH*/

@@ -8677,25 +9820,17 @@
                                     reason, stat);
                                 mptsas_doneq_add(mpt, cmd);
                         }
                         cmd = next_cmd;
                 }
-                mutex_enter(&mpt->m_tx_waitq_mutex);
-                cmd = mpt->m_tx_waitq;
+                cmd = mptsas_strip_targetlun_from_txwqs(mpt, target, lun);
                 while (cmd != NULL) {
                         next_cmd = cmd->cmd_linkp;
-                        if ((Tgt(cmd) == target) && (Lun(cmd) == lun)) {
-                                mptsas_tx_waitq_delete(mpt, cmd);
-                                mutex_exit(&mpt->m_tx_waitq_mutex);
-                                mptsas_set_pkt_reason(mpt, cmd,
-                                    reason, stat);
+                        mptsas_set_pkt_reason(mpt, cmd, reason, stat);
                                 mptsas_doneq_add(mpt, cmd);
-                                mutex_enter(&mpt->m_tx_waitq_mutex);
-                        }
                         cmd = next_cmd;
                 }
-                mutex_exit(&mpt->m_tx_waitq_mutex);
                 break;
         default:
                 mptsas_log(mpt, CE_WARN, "Unknown task management type %d.",
                     tasktype);
                 break;

@@ -8708,12 +9843,12 @@
  */
 static void
 mptsas_flush_hba(mptsas_t *mpt)
 {
         mptsas_slots_t  *slots = mpt->m_active;
-        mptsas_cmd_t    *cmd;
-        int             slot;
+        mptsas_cmd_t    *cmd, *ncmd;
+        int             slot, i;
 
         NDBG25(("mptsas_flush_hba"));
 
         /*
          * The I/O Controller should have already sent back

@@ -8734,13 +9869,12 @@
                          * so there will be nothing to check against a time out.
                          * Instead, mark the command as failed due to reset.
                          */
                         mptsas_set_pkt_reason(mpt, cmd, CMD_RESET,
                             STAT_BUS_RESET);
-                        if ((cmd->cmd_flags & CFLAG_PASSTHRU) ||
-                            (cmd->cmd_flags & CFLAG_CONFIG) ||
-                            (cmd->cmd_flags & CFLAG_FW_DIAG)) {
+                        if ((cmd->cmd_flags &
+                            (CFLAG_PASSTHRU | CFLAG_CONFIG | CFLAG_FW_DIAG))) {
                                 cmd->cmd_flags |= CFLAG_FINISHED;
                                 cv_broadcast(&mpt->m_passthru_cv);
                                 cv_broadcast(&mpt->m_config_cv);
                                 cv_broadcast(&mpt->m_fw_diag_cv);
                         }

@@ -8772,20 +9906,26 @@
                         mptsas_doneq_add(mpt, cmd);
                 }
         }
 
         /*
-         * Flush the tx_waitq
+         * Flush the tx_waitqs
          */
-        mutex_enter(&mpt->m_tx_waitq_mutex);
-        while ((cmd = mptsas_tx_waitq_rm(mpt)) != NULL) {
-                mutex_exit(&mpt->m_tx_waitq_mutex);
-                mptsas_set_pkt_reason(mpt, cmd, CMD_RESET, STAT_BUS_RESET);
+        for (i = 0; i < NUM_TX_WAITQ; i++) {
+                mutex_enter(&mpt->m_tx_waitq[i].txwq_mutex);
+                cmd = mpt->m_tx_waitq[i].txwq_cmdq;
+                mpt->m_tx_waitq[i].txwq_cmdq = NULL;
+                mpt->m_tx_waitq[i].txwq_qtail = &mpt->m_tx_waitq[i].txwq_cmdq;
+                mutex_exit(&mpt->m_tx_waitq[i].txwq_mutex);
+                while (cmd != NULL) {
+                        ncmd = cmd->cmd_linkp;
+                        mptsas_set_pkt_reason(mpt, cmd, CMD_RESET,
+                            STAT_BUS_RESET);
                 mptsas_doneq_add(mpt, cmd);
-                mutex_enter(&mpt->m_tx_waitq_mutex);
+                        cmd = ncmd;
+                }
         }
-        mutex_exit(&mpt->m_tx_waitq_mutex);
 
         /*
          * Drain the taskqs prior to reallocating resources.
          */
         mutex_exit(&mpt->m_mutex);

@@ -8839,12 +9979,14 @@
         ASSERT(MUTEX_HELD(&mpt->m_mutex));
 
         NDBG22(("mptsas_setup_bus_reset_delay"));
         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
             ptgt = refhash_next(mpt->m_targets, ptgt)) {
+                mutex_enter(&ptgt->m_t_mutex);
                 mptsas_set_throttle(mpt, ptgt, HOLD_THROTTLE);
                 ptgt->m_reset_delay = mpt->m_scsi_reset_delay;
+                mutex_exit(&ptgt->m_t_mutex);
         }
 
         mptsas_start_watch_reset_delay();
 }
 

@@ -8894,10 +10036,11 @@
 
         ASSERT(mutex_owned(&mpt->m_mutex));
 
         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
             ptgt = refhash_next(mpt->m_targets, ptgt)) {
+                mutex_enter(&ptgt->m_t_mutex);
                 if (ptgt->m_reset_delay != 0) {
                         ptgt->m_reset_delay -=
                             MPTSAS_WATCH_RESET_DELAY_TICK;
                         if (ptgt->m_reset_delay <= 0) {
                                 ptgt->m_reset_delay = 0;

@@ -8906,10 +10049,11 @@
                                 restart++;
                         } else {
                                 done = -1;
                         }
                 }
+                mutex_exit(&ptgt->m_t_mutex);
         }
 
         if (restart > 0) {
                 mptsas_restart_hba(mpt);
         }

@@ -9122,10 +10266,11 @@
  */
 static int
 mptsas_scsi_setcap(struct scsi_address *ap, char *cap, int value, int tgtonly)
 {
         mptsas_t        *mpt = ADDR2MPT(ap);
+        mptsas_target_t *ptgt;
         int             ckey;
         int             rval = FALSE;
 
         NDBG24(("mptsas_scsi_setcap: target=%d, cap=%s value=%x tgtonly=%x",
             ap->a_target, cap, value, tgtonly));

@@ -9163,13 +10308,13 @@
                 } else {
                         rval = FALSE;
                 }
                 break;
         case SCSI_CAP_TAGGED_QING:
-                mptsas_set_throttle(mpt, ((mptsas_tgt_private_t *)
-                    (ap->a_hba_tran->tran_tgt_private))->t_private,
-                    MAX_THROTTLE);
+                ptgt = ((mptsas_tgt_private_t *)
+                    (ap->a_hba_tran->tran_tgt_private))->t_private;
+                mptsas_set_throttle_mtx(mpt, ptgt, MAX_THROTTLE);
                 rval = TRUE;
                 break;
         case SCSI_CAP_QFULL_RETRIES:
                 ((mptsas_tgt_private_t *)(ap->a_hba_tran->tran_tgt_private))->
                     t_private->m_qfull_retries = (uchar_t)value;

@@ -9253,11 +10398,11 @@
 }
 
 /*
  * Error logging, printing, and debug print routines.
  */
-static char *mptsas_label = "mpt_sas";
+static char *mptsas_label = "mpt_sas3";
 
 /*PRINTFLIKE3*/
 void
 mptsas_log(mptsas_t *mpt, int level, char *fmt, ...)
 {

@@ -9284,10 +10429,36 @@
 
         mutex_exit(&mptsas_log_mutex);
 }
 
 #ifdef MPTSAS_DEBUG
+/*
+ * Use a circular buffer to log messages to private memory.
+ * No mutexes, so there is the opportunity for this to miss lines.
+ * But it's fast and does not hold up the proceedings too much.
+ */
+static char mptsas_dbglog_bufs[32][256];
+static uint32_t mptsas_dbglog_idx = 1;
+
+/*PRINTFLIKE1*/
+void
+mptsas_debug_log(char *fmt, ...)
+{
+        va_list         ap;
+        uint32_t        idx;
+
+        if (!mptsas_dbglog_idx) {
+                return;
+        }
+        idx = (mptsas_dbglog_idx++) & 0x1f;
+
+        va_start(ap, fmt);
+        (void) vsnprintf(mptsas_dbglog_bufs[idx],
+            sizeof (mptsas_dbglog_bufs[0]), fmt, ap);
+        va_end(ap);
+}
+
 /*PRINTFLIKE1*/
 void
 mptsas_printf(char *fmt, ...)
 {
         dev_info_t      *dev = 0;

@@ -9300,11 +10471,11 @@
         va_end(ap);
 
 #ifdef PROM_PRINTF
         prom_printf("%s:\t%s\n", mptsas_label, mptsas_log_buf);
 #else
-        scsi_log(dev, mptsas_label, SCSI_DEBUG, "%s\n", mptsas_log_buf);
+        scsi_log(dev, mptsas_label, CE_CONT, "!%s\n", mptsas_log_buf);
 #endif
         mutex_exit(&mptsas_log_mutex);
 }
 #endif
 

@@ -9372,52 +10543,55 @@
         if (mptsas_timeouts_enabled)
                 mptsas_timeout_id = timeout(mptsas_watch, NULL, mptsas_tick);
         mutex_exit(&mptsas_global_mutex);
 }
 
+int mptsas_monitor_for_txwqs = 1;
 static void
 mptsas_watchsubr(mptsas_t *mpt)
 {
         int             i;
         mptsas_cmd_t    *cmd;
         mptsas_target_t *ptgt = NULL;
+        hrtime_t        timestamp = gethrtime();
+        boolean_t       restart_hba = B_FALSE;
 
         ASSERT(MUTEX_HELD(&mpt->m_mutex));
 
-        NDBG30(("mptsas_watchsubr: mpt=0x%p", (void *)mpt));
+        NDBG30(("mptsas_watchsubr: mpt=0x%p, ncmds %d, nstarted %d",
+            (void *)mpt, mpt->m_ncmds, mpt->m_ncstarted));
 
-#ifdef MPTSAS_TEST
-        if (mptsas_enable_untagged) {
-                mptsas_test_untagged++;
+        mpt->m_lncstarted = mpt->m_ncstarted;
+        if (mpt->m_txwq_thread_n != 0 && mpt->m_txwq_enabled != BLOCKED &&
+            mptsas_monitor_for_txwqs) {
+                i = mpt->m_ncstarted/mptsas_scsi_watchdog_tick;
+                if (i > mpt->m_txwq_thread_threshold) {
+                        mpt->m_txwq_enabled = TRUE;
+                } else if (i < (mpt->m_txwq_thread_threshold>>1)) {
+                        mpt->m_txwq_enabled = FALSE;
         }
-#endif
+        }
+        mpt->m_ncstarted = 0;
 
         /*
          * Check for commands stuck in active slot
          * Account for TM requests, which use the last SMID.
          */
         for (i = 0; i <= mpt->m_active->m_n_normal; i++) {
                 if ((cmd = mpt->m_active->m_slot[i]) != NULL) {
+                        if (cmd->cmd_active_expiration <= timestamp) {
                         if ((cmd->cmd_flags & CFLAG_CMDIOC) == 0) {
-                                cmd->cmd_active_timeout -=
-                                    mptsas_scsi_watchdog_tick;
-                                if (cmd->cmd_active_timeout <= 0) {
                                         /*
                                          * There seems to be a command stuck
                                          * in the active slot.  Drain throttle.
                                          */
-                                        mptsas_set_throttle(mpt,
-                                            cmd->cmd_tgt_addr,
+                                        ptgt = cmd->cmd_tgt_addr;
+                                        mptsas_set_throttle_mtx(mpt, ptgt,
                                             DRAIN_THROTTLE);
-                                }
-                        }
-                        if ((cmd->cmd_flags & CFLAG_PASSTHRU) ||
-                            (cmd->cmd_flags & CFLAG_CONFIG) ||
-                            (cmd->cmd_flags & CFLAG_FW_DIAG)) {
-                                cmd->cmd_active_timeout -=
-                                    mptsas_scsi_watchdog_tick;
-                                if (cmd->cmd_active_timeout <= 0) {
+                                } else if (cmd->cmd_flags &
+                                    (CFLAG_PASSTHRU | CFLAG_CONFIG |
+                                    CFLAG_FW_DIAG)) {
                                         /*
                                          * passthrough command timeout
                                          */
                                         cmd->cmd_flags |= (CFLAG_FINISHED |
                                             CFLAG_TIMEOUT);

@@ -9429,62 +10603,99 @@
                 }
         }
 
         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
             ptgt = refhash_next(mpt->m_targets, ptgt)) {
+                mutex_enter(&ptgt->m_t_mutex);
                 /*
                  * If we were draining due to a qfull condition,
                  * go back to full throttle.
                  */
                 if ((ptgt->m_t_throttle < MAX_THROTTLE) &&
                     (ptgt->m_t_throttle > HOLD_THROTTLE) &&
                     (ptgt->m_t_ncmds < ptgt->m_t_throttle)) {
                         mptsas_set_throttle(mpt, ptgt, MAX_THROTTLE);
-                        mptsas_restart_hba(mpt);
+                        restart_hba = B_TRUE;
                 }
 
-                if ((ptgt->m_t_ncmds > 0) &&
-                    (ptgt->m_timebase)) {
+                cmd = TAILQ_LAST(&ptgt->m_active_cmdq, mptsas_active_cmdq);
+                if (cmd != NULL) {
+                        if (cmd->cmd_active_expiration <= timestamp) {
+                                /*
+                                 * Earliest command timeout expired.
+                                 * Drain throttle.
+                                 */
+                                mptsas_set_throttle(mpt, ptgt, DRAIN_THROTTLE);
 
-                        if (ptgt->m_timebase <=
-                            mptsas_scsi_watchdog_tick) {
-                                ptgt->m_timebase +=
-                                    mptsas_scsi_watchdog_tick;
-                                continue;
-                        }
+                                /*
+                                 * Check for remaining commands.
+                                 */
+                                cmd = TAILQ_FIRST(&ptgt->m_active_cmdq);
+                                if (cmd->cmd_active_expiration > timestamp) {
+                                        /*
+                                         * Wait for remaining commands to
+                                         * complete or time out.
+                                         */
+                                        NDBG23(("command timed out, "
+                                            "pending drain"));
+                                } else {
+                                        mutex_exit(&ptgt->m_t_mutex);
 
-                        ptgt->m_timeout -= mptsas_scsi_watchdog_tick;
+                                        /*
+                                         * All command timeouts expired.
+                                         */
+                                        mptsas_log(mpt, CE_NOTE,
+                                            "Timeout of %d seconds "
+                                            "expired with %d commands on "
+                                            "target %d lun %d.",
+                                            cmd->cmd_pkt->pkt_time,
+                                            ptgt->m_t_ncmds,
+                                            ptgt->m_devhdl, Lun(cmd));
 
-                        if (ptgt->m_timeout < 0) {
-                                mptsas_cmd_timeout(mpt, ptgt->m_devhdl);
+                                        mptsas_cmd_timeout(mpt, ptgt);
                                 continue;
                         }
-
-                        if ((ptgt->m_timeout) <=
-                            mptsas_scsi_watchdog_tick) {
+                        } else if (cmd->cmd_active_expiration <= timestamp +
+                            (hrtime_t)mptsas_scsi_watchdog_tick * NANOSEC) {
                                 NDBG23(("pending timeout"));
-                                mptsas_set_throttle(mpt, ptgt,
-                                    DRAIN_THROTTLE);
+                                mptsas_set_throttle(mpt, ptgt, DRAIN_THROTTLE);
+                        }
                         }
+                mutex_exit(&ptgt->m_t_mutex);
                 }
+        if (restart_hba == B_TRUE) {
+                mptsas_restart_hba(mpt);
         }
 }
 
 /*
  * timeout recovery
  */
 static void
-mptsas_cmd_timeout(mptsas_t *mpt, uint16_t devhdl)
+mptsas_cmd_timeout(mptsas_t *mpt, mptsas_target_t *ptgt)
 {
+        uint16_t        devhdl;
+        uint64_t        sas_wwn;
+        uint8_t         phy;
+        char            wwn_str[MPTSAS_WWN_STRLEN];
+
+        devhdl = ptgt->m_devhdl;
+        sas_wwn = ptgt->m_addr.mta_wwn;
+        phy = ptgt->m_phynum;
+        if (sas_wwn == 0) {
+                (void) sprintf(wwn_str, "p%x", phy);
+        } else {
+                (void) sprintf(wwn_str, "w%016"PRIx64, sas_wwn);
+        }
 
         NDBG29(("mptsas_cmd_timeout: target=%d", devhdl));
         mptsas_log(mpt, CE_WARN, "Disconnected command timeout for "
-            "Target %d", devhdl);
+            "target %d %s,  enclosure %u .", devhdl, wwn_str,
+            ptgt->m_enclosure);
 
         /*
-         * If the current target is not the target passed in,
-         * try to reset that target.
+         * Abort all outstanding commands on the device.
          */
         NDBG29(("mptsas_cmd_timeout: device reset"));
         if (mptsas_do_scsi_reset(mpt, devhdl) != TRUE) {
                 mptsas_log(mpt, CE_WARN, "Target %d reset for command timeout "
                     "recovery failed!", devhdl);

@@ -9529,11 +10740,11 @@
         mutex_enter(&mpt->m_mutex);
 
         /* Set all the throttles to zero */
         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
             ptgt = refhash_next(mpt->m_targets, ptgt)) {
-                mptsas_set_throttle(mpt, ptgt, HOLD_THROTTLE);
+                mptsas_set_throttle_mtx(mpt, ptgt, HOLD_THROTTLE);
         }
 
         /* If there are any outstanding commands in the queue */
         if (mpt->m_ncmds) {
                 mpt->m_softstate |= MPTSAS_SS_DRAINING;

@@ -9544,11 +10755,12 @@
                          * Quiesce has been interrupted
                          */
                         mpt->m_softstate &= ~MPTSAS_SS_DRAINING;
                         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
                             ptgt = refhash_next(mpt->m_targets, ptgt)) {
-                                mptsas_set_throttle(mpt, ptgt, MAX_THROTTLE);
+                                mptsas_set_throttle_mtx(mpt, ptgt,
+                                    MAX_THROTTLE);
                         }
                         mptsas_restart_hba(mpt);
                         if (mpt->m_quiesce_timeid != 0) {
                                 timeout_id_t tid = mpt->m_quiesce_timeid;
                                 mpt->m_quiesce_timeid = 0;

@@ -9581,11 +10793,11 @@
         NDBG28(("mptsas_unquiesce_bus"));
         mutex_enter(&mpt->m_mutex);
         mpt->m_softstate &= ~MPTSAS_SS_QUIESCED;
         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
             ptgt = refhash_next(mpt->m_targets, ptgt)) {
-                mptsas_set_throttle(mpt, ptgt, MAX_THROTTLE);
+                mptsas_set_throttle_mtx(mpt, ptgt, MAX_THROTTLE);
         }
         mptsas_restart_hba(mpt);
         mutex_exit(&mpt->m_mutex);
         return (0);
 }

@@ -9607,11 +10819,12 @@
                          * The throttle may have been reset because
                          * of a SCSI bus reset
                          */
                         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
                             ptgt = refhash_next(mpt->m_targets, ptgt)) {
-                                mptsas_set_throttle(mpt, ptgt, HOLD_THROTTLE);
+                                mptsas_set_throttle_mtx(mpt, ptgt,
+                                    HOLD_THROTTLE);
                         }
 
                         mpt->m_quiesce_timeid = timeout(mptsas_ncmds_checkdrain,
                             mpt, (MPTSAS_QUIESCE_TIMEOUT *
                             drv_usectohz(1000000)));

@@ -9643,100 +10856,147 @@
         NDBG25(("?pkt_scbp=0x%x cmd_flags=0x%x\n", cmd->cmd_pkt->pkt_scbp ?
             *(cmd->cmd_pkt->pkt_scbp) : 0, cmd->cmd_flags));
 }
 
 static void
+mptsas_passthru_sge(ddi_acc_handle_t acc_hdl, mptsas_pt_request_t *pt,
+    pMpi2SGESimple64_t sgep)
+{
+        uint32_t                sge_flags;
+        uint32_t                data_size, dataout_size;
+        ddi_dma_cookie_t        data_cookie;
+        ddi_dma_cookie_t        dataout_cookie;
+
+        data_size = pt->data_size;
+        dataout_size = pt->dataout_size;
+        data_cookie = pt->data_cookie;
+        dataout_cookie = pt->dataout_cookie;
+
+        if (dataout_size) {
+                sge_flags = dataout_size |
+                    ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
+                    MPI2_SGE_FLAGS_END_OF_BUFFER |
+                    MPI2_SGE_FLAGS_HOST_TO_IOC |
+                    MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
+                    MPI2_SGE_FLAGS_SHIFT);
+                ddi_put32(acc_hdl, &sgep->FlagsLength, sge_flags);
+                ddi_put32(acc_hdl, &sgep->Address.Low,
+                    (uint32_t)(dataout_cookie.dmac_laddress & 0xffffffffull));
+                ddi_put32(acc_hdl, &sgep->Address.High,
+                    (uint32_t)(dataout_cookie.dmac_laddress >> 32));
+                sgep++;
+        }
+        sge_flags = data_size;
+        sge_flags |= ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
+            MPI2_SGE_FLAGS_LAST_ELEMENT |
+            MPI2_SGE_FLAGS_END_OF_BUFFER |
+            MPI2_SGE_FLAGS_END_OF_LIST |
+            MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
+            MPI2_SGE_FLAGS_SHIFT);
+        if (pt->direction == MPTSAS_PASS_THRU_DIRECTION_WRITE) {
+                sge_flags |= ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) <<
+                    MPI2_SGE_FLAGS_SHIFT);
+        } else {
+                sge_flags |= ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) <<
+                    MPI2_SGE_FLAGS_SHIFT);
+        }
+        ddi_put32(acc_hdl, &sgep->FlagsLength, sge_flags);
+        ddi_put32(acc_hdl, &sgep->Address.Low,
+            (uint32_t)(data_cookie.dmac_laddress & 0xffffffffull));
+        ddi_put32(acc_hdl, &sgep->Address.High,
+            (uint32_t)(data_cookie.dmac_laddress >> 32));
+}
+
+static void
+mptsas_passthru_ieee_sge(ddi_acc_handle_t acc_hdl, mptsas_pt_request_t *pt,
+    pMpi2IeeeSgeSimple64_t ieeesgep)
+{
+        uint8_t                 sge_flags;
+        uint32_t                data_size, dataout_size;
+        ddi_dma_cookie_t        data_cookie;
+        ddi_dma_cookie_t        dataout_cookie;
+
+        data_size = pt->data_size;
+        dataout_size = pt->dataout_size;
+        data_cookie = pt->data_cookie;
+        dataout_cookie = pt->dataout_cookie;
+
+        sge_flags = (MPI2_IEEE_SGE_FLAGS_SIMPLE_ELEMENT |
+            MPI2_IEEE_SGE_FLAGS_SYSTEM_ADDR);
+        if (dataout_size) {
+                ddi_put32(acc_hdl, &ieeesgep->Length, dataout_size);
+                ddi_put32(acc_hdl, &ieeesgep->Address.Low,
+                    (uint32_t)(dataout_cookie.dmac_laddress &
+                    0xffffffffull));
+                ddi_put32(acc_hdl, &ieeesgep->Address.High,
+                    (uint32_t)(dataout_cookie.dmac_laddress >> 32));
+                ddi_put8(acc_hdl, &ieeesgep->Flags, sge_flags);
+                ieeesgep++;
+        }
+        sge_flags |= MPI25_IEEE_SGE_FLAGS_END_OF_LIST;
+        ddi_put32(acc_hdl, &ieeesgep->Length, data_size);
+        ddi_put32(acc_hdl, &ieeesgep->Address.Low,
+            (uint32_t)(data_cookie.dmac_laddress & 0xffffffffull));
+        ddi_put32(acc_hdl, &ieeesgep->Address.High,
+            (uint32_t)(data_cookie.dmac_laddress >> 32));
+        ddi_put8(acc_hdl, &ieeesgep->Flags, sge_flags);
+}
+
+static void
 mptsas_start_passthru(mptsas_t *mpt, mptsas_cmd_t *cmd)
 {
         caddr_t                 memp;
         pMPI2RequestHeader_t    request_hdrp;
         struct scsi_pkt         *pkt = cmd->cmd_pkt;
         mptsas_pt_request_t     *pt = pkt->pkt_ha_private;
-        uint32_t                request_size, data_size, dataout_size;
-        uint32_t                direction;
-        ddi_dma_cookie_t        data_cookie;
-        ddi_dma_cookie_t        dataout_cookie;
-        uint32_t                request_desc_low, request_desc_high = 0;
-        uint32_t                i, sense_bufp;
+        uint32_t                request_size;
+        uint64_t                request_desc = 0;
+        uint64_t                sense_bufp;
         uint8_t                 desc_type;
         uint8_t                 *request, function;
         ddi_dma_handle_t        dma_hdl = mpt->m_dma_req_frame_hdl;
         ddi_acc_handle_t        acc_hdl = mpt->m_acc_req_frame_hdl;
 
         desc_type = MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
 
         request = pt->request;
-        direction = pt->direction;
         request_size = pt->request_size;
-        data_size = pt->data_size;
-        dataout_size = pt->dataout_size;
-        data_cookie = pt->data_cookie;
-        dataout_cookie = pt->dataout_cookie;
 
         /*
          * Store the passthrough message in memory location
          * corresponding to our slot number
          */
         memp = mpt->m_req_frame + (mpt->m_req_frame_size * cmd->cmd_slot);
         request_hdrp = (pMPI2RequestHeader_t)memp;
         bzero(memp, mpt->m_req_frame_size);
 
-        for (i = 0; i < request_size; i++) {
-                bcopy(request + i, memp + i, 1);
-        }
-
-        if (data_size || dataout_size) {
-                pMpi2SGESimple64_t      sgep;
-                uint32_t                sge_flags;
+        bcopy(request, memp, request_size);
 
-                sgep = (pMpi2SGESimple64_t)((uint8_t *)request_hdrp +
-                    request_size);
-                if (dataout_size) {
+        NDBG15(("mptsas_start_passthru: Func 0x%x, MsgFlags 0x%x, "
+            "size=%d, in %d, out %d", request_hdrp->Function,
+            request_hdrp->MsgFlags, request_size,
+            pt->data_size, pt->dataout_size));
 
-                        sge_flags = dataout_size |
-                            ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
-                            MPI2_SGE_FLAGS_END_OF_BUFFER |
-                            MPI2_SGE_FLAGS_HOST_TO_IOC |
-                            MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
-                            MPI2_SGE_FLAGS_SHIFT);
-                        ddi_put32(acc_hdl, &sgep->FlagsLength, sge_flags);
-                        ddi_put32(acc_hdl, &sgep->Address.Low,
-                            (uint32_t)(dataout_cookie.dmac_laddress &
-                            0xffffffffull));
-                        ddi_put32(acc_hdl, &sgep->Address.High,
-                            (uint32_t)(dataout_cookie.dmac_laddress
-                            >> 32));
-                        sgep++;
-                }
-                sge_flags = data_size;
-                sge_flags |= ((uint32_t)(MPI2_SGE_FLAGS_SIMPLE_ELEMENT |
-                    MPI2_SGE_FLAGS_LAST_ELEMENT |
-                    MPI2_SGE_FLAGS_END_OF_BUFFER |
-                    MPI2_SGE_FLAGS_END_OF_LIST |
-                    MPI2_SGE_FLAGS_64_BIT_ADDRESSING) <<
-                    MPI2_SGE_FLAGS_SHIFT);
-                if (direction == MPTSAS_PASS_THRU_DIRECTION_WRITE) {
-                        sge_flags |= ((uint32_t)(MPI2_SGE_FLAGS_HOST_TO_IOC) <<
-                            MPI2_SGE_FLAGS_SHIFT);
+        /*
+         * Add an SGE, even if the length is zero.
+         */
+        if (mpt->m_MPI25 && pt->simple == 0) {
+                mptsas_passthru_ieee_sge(acc_hdl, pt,
+                    (pMpi2IeeeSgeSimple64_t)
+                    ((uint8_t *)request_hdrp + pt->sgl_offset));
                 } else {
-                        sge_flags |= ((uint32_t)(MPI2_SGE_FLAGS_IOC_TO_HOST) <<
-                            MPI2_SGE_FLAGS_SHIFT);
-                }
-                ddi_put32(acc_hdl, &sgep->FlagsLength,
-                    sge_flags);
-                ddi_put32(acc_hdl, &sgep->Address.Low,
-                    (uint32_t)(data_cookie.dmac_laddress &
-                    0xffffffffull));
-                ddi_put32(acc_hdl, &sgep->Address.High,
-                    (uint32_t)(data_cookie.dmac_laddress >> 32));
+                mptsas_passthru_sge(acc_hdl, pt,
+                    (pMpi2SGESimple64_t)
+                    ((uint8_t *)request_hdrp + pt->sgl_offset));
         }
 
         function = request_hdrp->Function;
         if ((function == MPI2_FUNCTION_SCSI_IO_REQUEST) ||
             (function == MPI2_FUNCTION_RAID_SCSI_IO_PASSTHROUGH)) {
                 pMpi2SCSIIORequest_t    scsi_io_req;
 
+                NDBG15(("mptsas_start_passthru: Is SCSI IO Req"));
                 scsi_io_req = (pMpi2SCSIIORequest_t)request_hdrp;
                 /*
                  * Put SGE for data and data_out buffer at the end of
                  * scsi_io_request message header.(64 bytes in total)
                  * Following above SGEs, the residual space will be

@@ -9744,12 +11004,12 @@
                  */
                 ddi_put8(acc_hdl,
                     &scsi_io_req->SenseBufferLength,
                     (uint8_t)(request_size - 64));
 
-                sense_bufp = mpt->m_req_frame_dma_addr +
-                    (mpt->m_req_frame_size * cmd->cmd_slot);
+                sense_bufp = (uint32_t)(mpt->m_req_frame_dma_addr +
+                    (mpt->m_req_frame_size * cmd->cmd_slot) & 0xffffffffull);
                 sense_bufp += 64;
                 ddi_put32(acc_hdl,
                     &scsi_io_req->SenseBufferLowAddress, sense_bufp);
 
                 /*

@@ -9763,36 +11023,397 @@
                  * default request descriptor which is already set, so if this
                  * is a SCSI IO request, change the descriptor to SCSI IO.
                  */
                 if (function == MPI2_FUNCTION_SCSI_IO_REQUEST) {
                         desc_type = MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO;
-                        request_desc_high = (ddi_get16(acc_hdl,
-                            &scsi_io_req->DevHandle) << 16);
+                        request_desc = (((uint64_t)ddi_get16(acc_hdl,
+                            &scsi_io_req->DevHandle)) << 48);
                 }
         }
 
         /*
          * We must wait till the message has been completed before
          * beginning the next message so we wait for this one to
          * finish.
          */
         (void) ddi_dma_sync(dma_hdl, 0, 0, DDI_DMA_SYNC_FORDEV);
-        request_desc_low = (cmd->cmd_slot << 16) + desc_type;
+        request_desc |= ((cmd->cmd_slot << 16) | desc_type);
         cmd->cmd_rfm = NULL;
-        MPTSAS_START_CMD(mpt, request_desc_low, request_desc_high);
+        MPTSAS_START_CMD(mpt, request_desc);
         if ((mptsas_check_dma_handle(dma_hdl) != DDI_SUCCESS) ||
             (mptsas_check_acc_handle(acc_hdl) != DDI_SUCCESS)) {
                 ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);
         }
 }
 
+typedef void (mps_pre_f)(mptsas_t *, mptsas_pt_request_t *);
+static mps_pre_f        mpi_pre_ioc_facts;
+static mps_pre_f        mpi_pre_port_facts;
+static mps_pre_f        mpi_pre_fw_download;
+static mps_pre_f        mpi_pre_fw_25_download;
+static mps_pre_f        mpi_pre_fw_upload;
+static mps_pre_f        mpi_pre_fw_25_upload;
+static mps_pre_f        mpi_pre_sata_passthrough;
+static mps_pre_f        mpi_pre_smp_passthrough;
+static mps_pre_f        mpi_pre_config;
+static mps_pre_f        mpi_pre_sas_io_unit_control;
+static mps_pre_f        mpi_pre_scsi_io_req;
+
+/*
+ * Prepare the pt for a SAS2 FW_DOWNLOAD request.
+ */
+static void
+mpi_pre_fw_download(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+        pMpi2FWDownloadTCSGE_t tcsge;
+        pMpi2FWDownloadRequest req;
+
+        /*
+         * If SAS3, call separate function.
+         */
+        if (mpt->m_MPI25) {
+                mpi_pre_fw_25_download(mpt, pt);
+                return;
+        }
+
+        /*
+         * User requests should come in with the Transaction
+         * context element where the SGL will go. Putting the
+         * SGL after that seems to work, but don't really know
+         * why. Other drivers tend to create an extra SGL and
+         * refer to the TCE through that.
+         */
+        req = (pMpi2FWDownloadRequest)pt->request;
+        tcsge = (pMpi2FWDownloadTCSGE_t)&req->SGL;
+        if (tcsge->ContextSize != 0 || tcsge->DetailsLength != 12 ||
+            tcsge->Flags != MPI2_SGE_FLAGS_TRANSACTION_ELEMENT) {
+                mptsas_log(mpt, CE_WARN, "FW Download tce invalid!");
+        }
+
+        pt->sgl_offset = offsetof(MPI2_FW_DOWNLOAD_REQUEST, SGL) +
+            sizeof (*tcsge);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_fw_download(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    (int)pt->request_size, (int)pt->sgl_offset,
+                    (int)pt->dataout_size));
+        if (pt->data_size < sizeof (MPI2_FW_DOWNLOAD_REPLY))
+                NDBG15(("mpi_pre_fw_download(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_FW_DOWNLOAD_REPLY)));
+}
+
+/*
+ * Prepare the pt for a SAS3 FW_DOWNLOAD request.
+ */
+static void
+mpi_pre_fw_25_download(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+        pMpi2FWDownloadTCSGE_t tcsge;
+        pMpi2FWDownloadRequest req2;
+        pMpi25FWDownloadRequest req25;
+
+        /*
+         * User requests should come in with the Transaction
+         * context element where the SGL will go. The new firmware
+         * Doesn't use TCE and has space in the main request for
+         * this information. So move to the right place.
+         */
+        req2 = (pMpi2FWDownloadRequest)pt->request;
+        req25 = (pMpi25FWDownloadRequest)pt->request;
+        tcsge = (pMpi2FWDownloadTCSGE_t)&req2->SGL;
+        if (tcsge->ContextSize != 0 || tcsge->DetailsLength != 12 ||
+            tcsge->Flags != MPI2_SGE_FLAGS_TRANSACTION_ELEMENT) {
+                mptsas_log(mpt, CE_WARN, "FW Download tce invalid!");
+        }
+        req25->ImageOffset = tcsge->ImageOffset;
+        req25->ImageSize = tcsge->ImageSize;
+
+        pt->sgl_offset = offsetof(MPI25_FW_DOWNLOAD_REQUEST, SGL);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_fw_25_download(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size, pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size < sizeof (MPI2_FW_DOWNLOAD_REPLY))
+                NDBG15(("mpi_pre_fw_25_download(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_FW_UPLOAD_REPLY)));
+}
+
+/*
+ * Prepare the pt for a SAS2 FW_UPLOAD request.
+ */
+static void
+mpi_pre_fw_upload(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+        pMpi2FWUploadTCSGE_t tcsge;
+        pMpi2FWUploadRequest_t req;
+
+        /*
+         * If SAS3, call separate function.
+         */
+        if (mpt->m_MPI25) {
+                mpi_pre_fw_25_upload(mpt, pt);
+                return;
+        }
+
+        /*
+         * User requests should come in with the Transaction
+         * context element where the SGL will go. Putting the
+         * SGL after that seems to work, but don't really know
+         * why. Other drivers tend to create an extra SGL and
+         * refer to the TCE through that.
+         */
+        req = (pMpi2FWUploadRequest_t)pt->request;
+        tcsge = (pMpi2FWUploadTCSGE_t)&req->SGL;
+        if (tcsge->ContextSize != 0 || tcsge->DetailsLength != 12 ||
+            tcsge->Flags != MPI2_SGE_FLAGS_TRANSACTION_ELEMENT) {
+                mptsas_log(mpt, CE_WARN, "FW Upload tce invalid!");
+        }
+
+        pt->sgl_offset = offsetof(MPI2_FW_UPLOAD_REQUEST, SGL) +
+            sizeof (*tcsge);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_fw_upload(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size, pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size < sizeof (MPI2_FW_UPLOAD_REPLY))
+                NDBG15(("mpi_pre_fw_upload(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_FW_UPLOAD_REPLY)));
+}
+
+/*
+ * Prepare the pt a SAS3 FW_UPLOAD request.
+ */
+static void
+mpi_pre_fw_25_upload(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+        pMpi2FWUploadTCSGE_t tcsge;
+        pMpi2FWUploadRequest_t req2;
+        pMpi25FWUploadRequest_t req25;
+
+        /*
+         * User requests should come in with the Transaction
+         * context element where the SGL will go. The new firmware
+         * Doesn't use TCE and has space in the main request for
+         * this information. So move to the right place.
+         */
+        req2 = (pMpi2FWUploadRequest_t)pt->request;
+        req25 = (pMpi25FWUploadRequest_t)pt->request;
+        tcsge = (pMpi2FWUploadTCSGE_t)&req2->SGL;
+        if (tcsge->ContextSize != 0 || tcsge->DetailsLength != 12 ||
+            tcsge->Flags != MPI2_SGE_FLAGS_TRANSACTION_ELEMENT) {
+                mptsas_log(mpt, CE_WARN, "FW Upload tce invalid!");
+        }
+        req25->ImageOffset = tcsge->ImageOffset;
+        req25->ImageSize = tcsge->ImageSize;
+
+        pt->sgl_offset = offsetof(MPI25_FW_UPLOAD_REQUEST, SGL);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_fw_25_upload(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size, pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size < sizeof (MPI2_FW_UPLOAD_REPLY))
+                NDBG15(("mpi_pre_fw_25_upload(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_FW_UPLOAD_REPLY)));
+}
+
+/*
+ * Prepare the pt for an IOC_FACTS request.
+ */
+static void
+mpi_pre_ioc_facts(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        if (pt->request_size != sizeof (MPI2_IOC_FACTS_REQUEST))
+                NDBG15(("mpi_pre_ioc_facts(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size,
+                    (int)sizeof (MPI2_IOC_FACTS_REQUEST),
+                    pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_IOC_FACTS_REPLY))
+                NDBG15(("mpi_pre_ioc_facts(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_IOC_FACTS_REPLY)));
+        pt->sgl_offset = (uint16_t)pt->request_size;
+}
+
+/*
+ * Prepare the pt for a PORT_FACTS request.
+ */
+static void
+mpi_pre_port_facts(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        if (pt->request_size != sizeof (MPI2_PORT_FACTS_REQUEST))
+                NDBG15(("mpi_pre_port_facts(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size,
+                    (int)sizeof (MPI2_PORT_FACTS_REQUEST),
+                    pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_PORT_FACTS_REPLY))
+                NDBG15(("mpi_pre_port_facts(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_PORT_FACTS_REPLY)));
+        pt->sgl_offset = (uint16_t)pt->request_size;
+}
+
+/*
+ * Prepare pt for a SATA_PASSTHROUGH request.
+ */
+static void
+mpi_pre_sata_passthrough(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        pt->sgl_offset = offsetof(MPI2_SATA_PASSTHROUGH_REQUEST, SGL);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_sata_passthrough(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size, pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_SATA_PASSTHROUGH_REPLY))
+                NDBG15(("mpi_pre_sata_passthrough(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_SATA_PASSTHROUGH_REPLY)));
+}
+
+static void
+mpi_pre_smp_passthrough(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        pt->sgl_offset = offsetof(MPI2_SMP_PASSTHROUGH_REQUEST, SGL);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_smp_passthrough(): Incorrect req size, "
+                    "0x%x, should be 0x%x, dataoutsz 0x%x",
+                    pt->request_size, pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_SMP_PASSTHROUGH_REPLY))
+                NDBG15(("mpi_pre_smp_passthrough(): Incorrect rep size, "
+                    "0x%x, should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_SMP_PASSTHROUGH_REPLY)));
+}
+
+/*
+ * Prepare pt for a CONFIG request.
+ */
+static void
+mpi_pre_config(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        pt->sgl_offset = offsetof(MPI2_CONFIG_REQUEST, PageBufferSGE);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_config(): Incorrect req size, 0x%x, "
+                    "should be 0x%x, dataoutsz 0x%x", pt->request_size,
+                    pt->sgl_offset, pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_CONFIG_REPLY))
+                NDBG15(("mpi_pre_config(): Incorrect rep size, 0x%x, "
+                    "should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_CONFIG_REPLY)));
+        pt->simple = 1;
+}
+
+/*
+ * Prepare pt for a SCSI_IO_REQ request.
+ */
+static void
+mpi_pre_scsi_io_req(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        pt->sgl_offset = offsetof(MPI2_SCSI_IO_REQUEST, SGL);
+        if (pt->request_size != pt->sgl_offset)
+                NDBG15(("mpi_pre_config(): Incorrect req size, 0x%x, "
+                    "should be 0x%x, dataoutsz 0x%x", pt->request_size,
+                    pt->sgl_offset,
+                    pt->dataout_size));
+        if (pt->data_size != sizeof (MPI2_SCSI_IO_REPLY))
+                NDBG15(("mpi_pre_config(): Incorrect rep size, 0x%x, "
+                    "should be 0x%x", pt->data_size,
+                    (int)sizeof (MPI2_SCSI_IO_REPLY)));
+}
+
+/*
+ * Prepare the mps_command for a SAS_IO_UNIT_CONTROL request.
+ */
+static void
+mpi_pre_sas_io_unit_control(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+#ifndef __lock_lint
+        _NOTE(ARGUNUSED(mpt))
+#endif
+        pt->sgl_offset = (uint16_t)pt->request_size;
+}
+
+/*
+ * A set of functions to prepare an mps_command for the various
+ * supported requests.
+ */
+struct mps_func {
+        U8              Function;
+        char            *Name;
+        mps_pre_f       *f_pre;
+} mps_func_list[] = {
+        { MPI2_FUNCTION_IOC_FACTS, "IOC_FACTS",         mpi_pre_ioc_facts },
+        { MPI2_FUNCTION_PORT_FACTS, "PORT_FACTS",       mpi_pre_port_facts },
+        { MPI2_FUNCTION_FW_DOWNLOAD, "FW_DOWNLOAD",     mpi_pre_fw_download },
+        { MPI2_FUNCTION_FW_UPLOAD, "FW_UPLOAD",         mpi_pre_fw_upload },
+        { MPI2_FUNCTION_SATA_PASSTHROUGH, "SATA_PASSTHROUGH",
+            mpi_pre_sata_passthrough },
+        { MPI2_FUNCTION_SMP_PASSTHROUGH, "SMP_PASSTHROUGH",
+            mpi_pre_smp_passthrough},
+        { MPI2_FUNCTION_SCSI_IO_REQUEST, "SCSI_IO_REQUEST",
+            mpi_pre_scsi_io_req},
+        { MPI2_FUNCTION_CONFIG, "CONFIG",               mpi_pre_config},
+        { MPI2_FUNCTION_SAS_IO_UNIT_CONTROL, "SAS_IO_UNIT_CONTROL",
+            mpi_pre_sas_io_unit_control },
+        { 0xFF, NULL,                           NULL } /* list end */
+};
+
+static void
+mptsas_prep_sgl_offset(mptsas_t *mpt, mptsas_pt_request_t *pt)
+{
+        pMPI2RequestHeader_t    hdr;
+        struct mps_func         *f;
+
+        hdr = (pMPI2RequestHeader_t)pt->request;
+
+        for (f = mps_func_list; f->f_pre != NULL; f++) {
+                if (hdr->Function == f->Function) {
+                        f->f_pre(mpt, pt);
+                        NDBG15(("mptsas_prep_sgl_offset: Function %s,"
+                            " sgl_offset 0x%x", f->Name,
+                            pt->sgl_offset));
+                        return;
+                }
+        }
+        NDBG15(("mptsas_prep_sgl_offset: Unknown Function 0x%02x,"
+            " returning req_size 0x%x for sgl_offset",
+            hdr->Function, pt->request_size));
+        pt->sgl_offset = (uint16_t)pt->request_size;
+}
 
 
 static int
 mptsas_do_passthru(mptsas_t *mpt, uint8_t *request, uint8_t *reply,
     uint8_t *data, uint32_t request_size, uint32_t reply_size,
-    uint32_t data_size, uint32_t direction, uint8_t *dataout,
+    uint32_t data_size, uint8_t direction, uint8_t *dataout,
     uint32_t dataout_size, short timeout, int mode)
 {
         mptsas_pt_request_t             pt;
         mptsas_dma_alloc_state_t        data_dma_state;
         mptsas_dma_alloc_state_t        dataout_dma_state;

@@ -9865,10 +11486,12 @@
                                 }
                         }
                         mutex_enter(&mpt->m_mutex);
                 }
         }
+        else
+                bzero(&data_dma_state, sizeof (data_dma_state));
 
         if (dataout_size != 0) {
                 dataout_dma_state.size = dataout_size;
                 if (mptsas_dma_alloc(mpt, &dataout_dma_state) != DDI_SUCCESS) {
                         status = ENOMEM;

@@ -9888,10 +11511,12 @@
                                 goto out;
                         }
                 }
                 mutex_enter(&mpt->m_mutex);
         }
+        else
+                bzero(&dataout_dma_state, sizeof (dataout_dma_state));
 
         if ((rvalue = (mptsas_request_from_pool(mpt, &cmd, &pkt))) == -1) {
                 status = EAGAIN;
                 mptsas_log(mpt, CE_NOTE, "event ack command pool is full");
                 goto out;

@@ -9904,15 +11529,17 @@
 
         cmd->ioc_cmd_slot = (uint32_t)(rvalue);
 
         pt.request = (uint8_t *)request_msg;
         pt.direction = direction;
+        pt.simple = 0;
         pt.request_size = request_size;
         pt.data_size = data_size;
         pt.dataout_size = dataout_size;
         pt.data_cookie = data_dma_state.cookie;
         pt.dataout_cookie = dataout_dma_state.cookie;
+        mptsas_prep_sgl_offset(mpt, &pt);
 
         /*
          * Form a blank cmd/pkt to store the acknowledgement message
          */
         pkt->pkt_cdbp           = (opaque_t)&cmd->cmd_cdb[0];

@@ -9965,11 +11592,11 @@
                 pt_flags |= MPTSAS_ADDRESS_REPLY;
                 (void) ddi_dma_sync(mpt->m_dma_reply_frame_hdl, 0, 0,
                     DDI_DMA_SYNC_FORCPU);
                 reply_msg = (pMPI2DefaultReply_t)
                     (mpt->m_reply_frame + (cmd->cmd_rfm -
-                    mpt->m_reply_frame_dma_addr));
+                    (mpt->m_reply_frame_dma_addr&0xfffffffful)));
         }
 
         mptsas_fma_check(mpt, cmd);
         if (pkt->pkt_reason == CMD_TRAN_ERR) {
                 status = EAGAIN;

@@ -10121,11 +11748,11 @@
                 return (mptsas_do_passthru(mpt,
                     (uint8_t *)((uintptr_t)data->PtrRequest),
                     (uint8_t *)((uintptr_t)data->PtrReply),
                     (uint8_t *)((uintptr_t)data->PtrData),
                     data->RequestSize, data->ReplySize,
-                    data->DataSize, data->DataDirection,
+                    data->DataSize, (uint8_t)data->DataDirection,
                     (uint8_t *)((uintptr_t)data->PtrDataOut),
                     data->DataOutSize, data->Timeout, mode));
         } else {
                 return (EINVAL);
         }

@@ -10150,11 +11777,12 @@
 {
         pMpi2DiagBufferPostRequest_t    pDiag_post_msg;
         pMpi2DiagReleaseRequest_t       pDiag_release_msg;
         struct scsi_pkt                 *pkt = cmd->cmd_pkt;
         mptsas_diag_request_t           *diag = pkt->pkt_ha_private;
-        uint32_t                        request_desc_low, i;
+        uint32_t                        i;
+        uint64_t                        request_desc;
 
         ASSERT(mutex_owned(&mpt->m_mutex));
 
         /*
          * Form the diag message depending on the post or release function.

@@ -10203,14 +11831,14 @@
         /*
          * Send the message
          */
         (void) ddi_dma_sync(mpt->m_dma_req_frame_hdl, 0, 0,
             DDI_DMA_SYNC_FORDEV);
-        request_desc_low = (cmd->cmd_slot << 16) +
+        request_desc = (cmd->cmd_slot << 16) |
             MPI2_REQ_DESCRIPT_FLAGS_DEFAULT_TYPE;
         cmd->cmd_rfm = NULL;
-        MPTSAS_START_CMD(mpt, request_desc_low, 0);
+        MPTSAS_START_CMD(mpt, request_desc);
         if ((mptsas_check_dma_handle(mpt->m_dma_req_frame_hdl) !=
             DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_req_frame_hdl) !=
             DDI_SUCCESS)) {
                 ddi_fm_service_impact(mpt->m_dip, DDI_SERVICE_UNAFFECTED);

@@ -10304,11 +11932,12 @@
         if (cmd->cmd_rfm) {
                 post_flags |= MPTSAS_ADDRESS_REPLY;
                 (void) ddi_dma_sync(mpt->m_dma_reply_frame_hdl, 0, 0,
                     DDI_DMA_SYNC_FORCPU);
                 reply = (pMpi2DiagBufferPostReply_t)(mpt->m_reply_frame +
-                    (cmd->cmd_rfm - mpt->m_reply_frame_dma_addr));
+                    (cmd->cmd_rfm -
+                    (mpt->m_reply_frame_dma_addr&0xfffffffful)));
 
                 /*
                  * Get the reply message data
                  */
                 iocstatus = ddi_get16(mpt->m_acc_reply_frame_hdl,

@@ -10458,11 +12087,12 @@
         if (cmd->cmd_rfm) {
                 rel_flags |= MPTSAS_ADDRESS_REPLY;
                 (void) ddi_dma_sync(mpt->m_dma_reply_frame_hdl, 0, 0,
                     DDI_DMA_SYNC_FORCPU);
                 reply = (pMpi2DiagReleaseReply_t)(mpt->m_reply_frame +
-                    (cmd->cmd_rfm - mpt->m_reply_frame_dma_addr));
+                    (cmd->cmd_rfm -
+                    (mpt->m_reply_frame_dma_addr&0xfffffffful)));
 
                 /*
                  * Get the reply message data
                  */
                 iocstatus = ddi_get16(mpt->m_acc_reply_frame_hdl,

@@ -11206,11 +12836,11 @@
 mptsas_read_adapter_data(mptsas_t *mpt, mptsas_adapter_data_t *adapter_data)
 {
         char    *driver_verstr = MPTSAS_MOD_STRING;
 
         mptsas_lookup_pci_data(mpt, adapter_data);
-        adapter_data->AdapterType = MPTIOCTL_ADAPTER_TYPE_SAS2;
+        adapter_data->AdapterType = MPTIOCTL_ADAPTER_TYPE_SAS3;
         adapter_data->PCIDeviceHwId = (uint32_t)mpt->m_devid;
         adapter_data->PCIDeviceHwRev = (uint32_t)mpt->m_revid;
         adapter_data->SubSystemId = (uint32_t)mpt->m_ssid;
         adapter_data->SubsystemVendorId = (uint32_t)mpt->m_svid;
         (void) strcpy((char *)&adapter_data->DriverVersion[0], driver_verstr);

@@ -11480,11 +13110,11 @@
         *rval = MPTIOCTL_STATUS_GOOD;
         if (secpolicy_sys_config(credp, B_FALSE) != 0) {
                 return (EPERM);
         }
 
-        mpt = ddi_get_soft_state(mptsas_state, MINOR2INST(getminor(dev)));
+        mpt = ddi_get_soft_state(mptsas3_state, MINOR2INST(getminor(dev)));
         if (mpt == NULL) {
                 /*
                  * Called from iport node, get the states
                  */
                 iport_flag = 1;

@@ -11501,11 +13131,11 @@
                 if (mpt->m_power_level != PM_LEVEL_D0) {
                         mutex_exit(&mpt->m_mutex);
                         if (pm_raise_power(mpt->m_dip, 0, PM_LEVEL_D0) !=
                             DDI_SUCCESS) {
                                 mptsas_log(mpt, CE_WARN,
-                                    "mptsas%d: mptsas_ioctl: Raise power "
+                                    "mptsas3%d: mptsas_ioctl: Raise power "
                                     "request failed.", mpt->m_instance);
                                 (void) pm_idle_component(mpt->m_dip, 0);
                                 return (ENXIO);
                         }
                 } else {

@@ -11741,11 +13371,11 @@
         /*
          * Set all throttles to HOLD
          */
         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
             ptgt = refhash_next(mpt->m_targets, ptgt)) {
-                mptsas_set_throttle(mpt, ptgt, HOLD_THROTTLE);
+                mptsas_set_throttle_mtx(mpt, ptgt, HOLD_THROTTLE);
         }
 
         /*
          * Disable interrupts
          */

@@ -11779,11 +13409,11 @@
         /*
          * Reset the throttles
          */
         for (ptgt = refhash_first(mpt->m_targets); ptgt != NULL;
             ptgt = refhash_next(mpt->m_targets, ptgt)) {
-                mptsas_set_throttle(mpt, ptgt, MAX_THROTTLE);
+                mptsas_set_throttle_mtx(mpt, ptgt, MAX_THROTTLE);
         }
 
         mptsas_doneq_empty(mpt);
         mptsas_restart_hba(mpt);
 

@@ -11802,11 +13432,12 @@
 
 static int
 mptsas_init_chip(mptsas_t *mpt, int first_time)
 {
         ddi_dma_cookie_t        cookie;
-        uint32_t                i;
+        mptsas_reply_pqueue_t   *rpqp;
+        uint32_t                i, j;
         int                     rval;
 
         /*
          * Check to see if the firmware image is valid
          */

@@ -11845,10 +13476,20 @@
         if (mptsas_ioc_get_facts(mpt) == DDI_FAILURE) {
                 mptsas_log(mpt, CE_WARN, "mptsas_ioc_get_facts failed");
                 goto fail;
         }
 
+        /*
+         * Now we know chip MSIX capabilitites and it's not been done
+         * previously register interrupts accordingly. Need to know this
+         * information before allocating the reply frames below.
+         */
+        if (mpt->m_intr_cnt == 0) {
+                if (mptsas_register_intrs(mpt) == FALSE)
+                        goto fail;
+        }
+
         mpt->m_targets = refhash_create(MPTSAS_TARGET_BUCKET_COUNT,
             mptsas_target_addr_hash, mptsas_target_addr_cmp,
             mptsas_target_free, sizeof (mptsas_target_t),
             offsetof(mptsas_target_t, m_link),
             offsetof(mptsas_target_t, m_addr), KM_SLEEP);

@@ -11862,10 +13503,14 @@
          */
         if (mptsas_alloc_request_frames(mpt) == DDI_FAILURE) {
                 mptsas_log(mpt, CE_WARN, "mptsas_alloc_request_frames failed");
                 goto fail;
         }
+        if (mptsas_alloc_sense_bufs(mpt) == DDI_FAILURE) {
+                mptsas_log(mpt, CE_WARN, "mptsas_alloc_sense_bufs failed");
+                goto fail;
+        }
         if (mptsas_alloc_free_queue(mpt) == DDI_FAILURE) {
                 mptsas_log(mpt, CE_WARN, "mptsas_alloc_free_queue failed!");
                 goto fail;
         }
         if (mptsas_alloc_post_queue(mpt) == DDI_FAILURE) {

@@ -11887,20 +13532,14 @@
         }
 
         mptsas_alloc_reply_args(mpt);
 
         /*
-         * Initialize reply post index.  Reply free index is initialized after
-         * the next loop.
-         */
-        mpt->m_post_index = 0;
-
-        /*
          * Initialize the Reply Free Queue with the physical addresses of our
          * reply frames.
          */
-        cookie.dmac_address = mpt->m_reply_frame_dma_addr;
+        cookie.dmac_address = mpt->m_reply_frame_dma_addr&0xfffffffful;
         for (i = 0; i < mpt->m_max_replies; i++) {
                 ddi_put32(mpt->m_acc_free_queue_hdl,
                     &((uint32_t *)(void *)mpt->m_free_queue)[i],
                     cookie.dmac_address);
                 cookie.dmac_address += mpt->m_reply_frame_size;

@@ -11914,21 +13553,35 @@
          */
         mpt->m_free_index = i;
         ddi_put32(mpt->m_datap, &mpt->m_reg->ReplyFreeHostIndex, i);
 
         /*
-         * Initialize the reply post queue to 0xFFFFFFFF,0xFFFFFFFF's.
+         * Initialize the reply post queue to 0xFFFFFFFF,0xFFFFFFFF's
+         * and the indexes to 0.
          */
+        rpqp = mpt->m_rep_post_queues;
+        for (j = 0; j < mpt->m_post_reply_qcount; j++) {
         for (i = 0; i < mpt->m_post_queue_depth; i++) {
                 ddi_put64(mpt->m_acc_post_queue_hdl,
-                    &((uint64_t *)(void *)mpt->m_post_queue)[i],
+                            &((uint64_t *)(void *)rpqp->rpq_queue)[i],
                     0xFFFFFFFFFFFFFFFF);
         }
+                rpqp->rpq_index = 0;
+                rpqp++;
+        }
         (void) ddi_dma_sync(mpt->m_dma_post_queue_hdl, 0, 0,
             DDI_DMA_SYNC_FORDEV);
 
         /*
+         * Initialise all the reply post queue indexes.
+         */
+        for (j = 0; j < mpt->m_post_reply_qcount; j++) {
+                ddi_put32(mpt->m_datap, &mpt->m_reg->ReplyPostHostIndex,
+                    j << MPI2_RPHI_MSIX_INDEX_SHIFT);
+        }
+
+        /*
          * Enable ports
          */
         if (mptsas_ioc_enable_port(mpt) == DDI_FAILURE) {
                 mptsas_log(mpt, CE_WARN, "mptsas_ioc_enable_port failed");
                 goto fail;

@@ -11936,20 +13589,24 @@
 
         /*
          * enable events
          */
         if (mptsas_ioc_enable_event_notification(mpt)) {
+                mptsas_log(mpt, CE_WARN,
+                    "mptsas_ioc_enable_event_notification failed");
                 goto fail;
         }
 
         /*
          * We need checks in attach and these.
          * chip_init is called in mult. places
          */
 
         if ((mptsas_check_dma_handle(mpt->m_dma_req_frame_hdl) !=
             DDI_SUCCESS) ||
+            (mptsas_check_dma_handle(mpt->m_dma_req_sense_hdl) !=
+            DDI_SUCCESS) ||
             (mptsas_check_dma_handle(mpt->m_dma_reply_frame_hdl) !=
             DDI_SUCCESS) ||
             (mptsas_check_dma_handle(mpt->m_dma_free_queue_hdl) !=
             DDI_SUCCESS) ||
             (mptsas_check_dma_handle(mpt->m_dma_post_queue_hdl) !=

@@ -11962,10 +13619,12 @@
 
         /* Check all acc handles */
         if ((mptsas_check_acc_handle(mpt->m_datap) != DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_req_frame_hdl) !=
             DDI_SUCCESS) ||
+            (mptsas_check_acc_handle(mpt->m_acc_req_sense_hdl) !=
+            DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_reply_frame_hdl) !=
             DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_free_queue_hdl) !=
             DDI_SUCCESS) ||
             (mptsas_check_acc_handle(mpt->m_acc_post_queue_hdl) !=

@@ -12030,30 +13689,40 @@
                  */
                 cap = pci_config_get8(mpt->m_config_handle, caps_ptr);
                 switch (cap) {
                         case PCI_CAP_ID_PM:
                                 mptsas_log(mpt, CE_NOTE,
-                                    "?mptsas%d supports power management.\n",
+                                    "?mptsas3%d supports power management.\n",
                                     mpt->m_instance);
                                 mpt->m_options |= MPTSAS_OPT_PM;
 
                                 /* Save PMCSR offset */
                                 mpt->m_pmcsr_offset = caps_ptr + PCI_PMCSR;
                                 break;
+                        case PCI_CAP_ID_MSI:
+                                mptsas_log(mpt, CE_NOTE,
+                                    "?mptsas3%d supports MSI.\n",
+                                    mpt->m_instance);
+                                mpt->m_options |= MPTSAS_OPT_MSI;
+                                break;
+                        case PCI_CAP_ID_MSI_X:
+                                mptsas_log(mpt, CE_NOTE,
+                                    "?mptsas3%d supports MSI-X.\n",
+                                    mpt->m_instance);
+                                mpt->m_options |= MPTSAS_OPT_MSI_X;
+                                break;
                         /*
                          * The following capabilities are valid.  Any others
                          * will cause a message to be logged.
                          */
                         case PCI_CAP_ID_VPD:
-                        case PCI_CAP_ID_MSI:
                         case PCI_CAP_ID_PCIX:
                         case PCI_CAP_ID_PCI_E:
-                        case PCI_CAP_ID_MSI_X:
                                 break;
                         default:
                                 mptsas_log(mpt, CE_NOTE,
-                                    "?mptsas%d unrecognized capability "
+                                    "?mptsas3%d unrecognized capability "
                                     "0x%x.\n", mpt->m_instance, cap);
                                 break;
                 }
 
                 /*

@@ -12075,30 +13744,27 @@
                                 "3=On (PCI D0 State)",
                                 NULL
                         };
         uint16_t        pmcsr_stat;
 
-        if (mptsas_get_pci_cap(mpt) == FALSE) {
-                return (DDI_FAILURE);
-        }
         /*
          * If PCI's capability does not support PM, then don't need
          * to registe the pm-components
          */
         if (!(mpt->m_options & MPTSAS_OPT_PM))
                 return (DDI_SUCCESS);
         /*
          * If power management is supported by this chip, create
          * pm-components property for the power management framework
          */
-        (void) sprintf(pmc_name, "NAME=mptsas%d", mpt->m_instance);
+        (void) sprintf(pmc_name, "NAME=mptsas3%d", mpt->m_instance);
         pmc[0] = pmc_name;
         if (ddi_prop_update_string_array(DDI_DEV_T_NONE, mpt->m_dip,
             "pm-components", pmc, 3) != DDI_PROP_SUCCESS) {
                 mpt->m_options &= ~MPTSAS_OPT_PM;
                 mptsas_log(mpt, CE_WARN,
-                    "mptsas%d: pm-component property creation failed.",
+                    "mptsas3%d: pm-component property creation failed.",
                     mpt->m_instance);
                 return (DDI_FAILURE);
         }
 
         /*

@@ -12106,11 +13772,11 @@
          */
         (void) pm_busy_component(mpt->m_dip, 0);
         pmcsr_stat = pci_config_get16(mpt->m_config_handle,
             mpt->m_pmcsr_offset);
         if ((pmcsr_stat & PCI_PMCSR_STATE_MASK) != PCI_PMCSR_D0) {
-                mptsas_log(mpt, CE_WARN, "mptsas%d: Power up the device",
+                mptsas_log(mpt, CE_WARN, "mptsas3%d: Power up the device",
                     mpt->m_instance);
                 pci_config_put16(mpt->m_config_handle, mpt->m_pmcsr_offset,
                     PCI_PMCSR_D0);
         }
         if (pm_power_has_changed(mpt->m_dip, 0, PM_LEVEL_D0) != DDI_SUCCESS) {

@@ -12143,26 +13809,37 @@
         }
 
         NDBG6(("ddi_intr_get_supported_types() returned: 0x%x", intr_types));
 
         /*
+         * Try MSIX first.
+         */
+        if (mptsas_enable_msix && (intr_types & DDI_INTR_TYPE_MSIX)) {
+                if (mptsas_add_intrs(mpt, DDI_INTR_TYPE_MSIX) == DDI_SUCCESS) {
+                        NDBG6(("Using MSI-X interrupt type"));
+                        mpt->m_intr_type = DDI_INTR_TYPE_MSIX;
+                        return (TRUE);
+                }
+        }
+
+        /*
          * Try MSI, but fall back to FIXED
          */
         if (mptsas_enable_msi && (intr_types & DDI_INTR_TYPE_MSI)) {
                 if (mptsas_add_intrs(mpt, DDI_INTR_TYPE_MSI) == DDI_SUCCESS) {
-                        NDBG0(("Using MSI interrupt type"));
+                        NDBG6(("Using MSI interrupt type"));
                         mpt->m_intr_type = DDI_INTR_TYPE_MSI;
                         return (TRUE);
                 }
         }
         if (intr_types & DDI_INTR_TYPE_FIXED) {
                 if (mptsas_add_intrs(mpt, DDI_INTR_TYPE_FIXED) == DDI_SUCCESS) {
-                        NDBG0(("Using FIXED interrupt type"));
+                        NDBG6(("Using FIXED interrupt type"));
                         mpt->m_intr_type = DDI_INTR_TYPE_FIXED;
                         return (TRUE);
                 } else {
-                        NDBG0(("FIXED interrupt registration failed"));
+                        NDBG6(("FIXED interrupt registration failed"));
                         return (FALSE);
                 }
         }
 
         return (FALSE);

@@ -12195,54 +13872,76 @@
                     "ret %d count %d\n", ret, count);
 
                 return (DDI_FAILURE);
         }
 
-        /* Get number of available interrupts */
+        /* Get number of interrupts available to this device */
         ret = ddi_intr_get_navail(dip, intr_type, &avail);
         if ((ret != DDI_SUCCESS) || (avail == 0)) {
                 mptsas_log(mpt, CE_WARN, "ddi_intr_get_navail() failed, "
                     "ret %d avail %d\n", ret, avail);
 
                 return (DDI_FAILURE);
         }
 
-        if (avail < count) {
+        if (count < avail) {
                 mptsas_log(mpt, CE_NOTE, "ddi_intr_get_nvail returned %d, "
                     "navail() returned %d", count, avail);
         }
 
-        /* Mpt only have one interrupt routine */
-        if ((intr_type == DDI_INTR_TYPE_MSI) && (count > 1)) {
-                count = 1;
+        NDBG6(("mptsas_add_intrs:count %d, avail %d", count, avail));
+
+        if (intr_type == DDI_INTR_TYPE_MSIX) {
+                if (!mptsas3_max_msix_intrs) {
+                        return (DDI_FAILURE);
+                }
+
+                /*
+                 * Restrict the number of interrupts, firstly by
+                 * the number returned from the IOCInfo, then by
+                 * overall restriction.
+                 */
+                if (avail > mpt->m_max_msix_vectors) {
+                        avail = mpt->m_max_msix_vectors?
+                            mpt->m_max_msix_vectors:1;
+                        NDBG6(("mptsas_add_intrs: mmmv avail %d", avail));
+                }
+                if (avail > mptsas3_max_msix_intrs) {
+                        avail = mptsas3_max_msix_intrs;
+                        NDBG6(("mptsas_add_intrs: m3mmi avail %d", avail));
+                }
+        }
+        if (intr_type == DDI_INTR_TYPE_MSI) {
+                NDBG6(("mptsas_add_intrs: MSI avail %d", avail));
+                avail = 1;
         }
 
         /* Allocate an array of interrupt handles */
-        mpt->m_intr_size = count * sizeof (ddi_intr_handle_t);
+        mpt->m_intr_size = avail * sizeof (ddi_intr_handle_t);
         mpt->m_htable = kmem_alloc(mpt->m_intr_size, KM_SLEEP);
 
         flag = DDI_INTR_ALLOC_NORMAL;
 
         /* call ddi_intr_alloc() */
         ret = ddi_intr_alloc(dip, mpt->m_htable, intr_type, 0,
-            count, &actual, flag);
+            avail, &actual, flag);
 
         if ((ret != DDI_SUCCESS) || (actual == 0)) {
                 mptsas_log(mpt, CE_WARN, "ddi_intr_alloc() failed, ret %d\n",
                     ret);
                 kmem_free(mpt->m_htable, mpt->m_intr_size);
                 return (DDI_FAILURE);
         }
 
+        NDBG6(("mptsas_add_intrs: actual %d, avail %d", actual, avail));
         /* use interrupt count returned or abort? */
-        if (actual < count) {
-                mptsas_log(mpt, CE_NOTE, "Requested: %d, Received: %d\n",
-                    count, actual);
+        if (actual < avail) {
+                mptsas_log(mpt, CE_NOTE,
+                    "Interrupts requested: %d, received: %d\n",
+                    avail, actual);
         }
 
-        mpt->m_intr_cnt = actual;
-
         /*
          * Get priority for first msi, assume remaining are all the same
          */
         if ((ret = ddi_intr_get_pri(mpt->m_htable[0],
             &mpt->m_intr_pri)) != DDI_SUCCESS) {

@@ -12299,10 +13998,12 @@
 
                 kmem_free(mpt->m_htable, mpt->m_intr_size);
                 return (DDI_FAILURE);
         }
 
+        mpt->m_intr_cnt = actual;
+
         /*
          * Enable interrupts
          */
         if (mpt->m_intr_cap & DDI_INTR_FLAG_BLOCK) {
                 /* Call ddi_intr_block_enable() for MSI interrupts */

@@ -12311,10 +14012,26 @@
                 /* Call ddi_intr_enable for MSI or FIXED interrupts */
                 for (i = 0; i < mpt->m_intr_cnt; i++) {
                         (void) ddi_intr_enable(mpt->m_htable[i]);
                 }
         }
+
+        switch (intr_type) {
+        case DDI_INTR_TYPE_MSIX:
+                mptsas_log(mpt, CE_NOTE, "?Using %d MSI-X interrupt(s) "
+                    "(Available sys %d, mpt %d, Requested %d)\n",
+                    actual, count, mpt->m_max_msix_vectors, avail);
+                break;
+        case DDI_INTR_TYPE_MSI:
+                mptsas_log(mpt, CE_NOTE, "Using single MSI interrupt\n");
+                break;
+        case DDI_INTR_TYPE_FIXED:
+        default:
+                mptsas_log(mpt, CE_NOTE, "Using single fixed interrupt\n");
+                break;
+        }
+
         return (DDI_SUCCESS);
 }
 
 /*
  * mptsas_rem_intrs:

@@ -12341,12 +14058,12 @@
         /* Call ddi_intr_remove_handler() */
         for (i = 0; i < mpt->m_intr_cnt; i++) {
                 (void) ddi_intr_remove_handler(mpt->m_htable[i]);
                 (void) ddi_intr_free(mpt->m_htable[i]);
         }
-
         kmem_free(mpt->m_htable, mpt->m_intr_size);
+        mpt->m_intr_cnt = 0;
 }
 
 /*
  * The IO fault service error handling callback function
  */

@@ -12492,17 +14209,17 @@
         mptsas_phymask_t phymask;
         uint8_t         physport, phynum, config, disk;
         uint64_t        devicename;
         uint16_t        pdev_hdl;
         mptsas_target_t *tmp_tgt = NULL;
-        uint16_t        bay_num, enclosure;
+        uint16_t        bay_num, enclosure, io_flags;
 
         ASSERT(*pptgt == NULL);
 
         rval = mptsas_get_sas_device_page0(mpt, page_address, dev_handle,
             &sas_wwn, &dev_info, &physport, &phynum, &pdev_hdl,
-            &bay_num, &enclosure);
+            &bay_num, &enclosure, &io_flags);
         if (rval != DDI_SUCCESS) {
                 rval = DEV_INFO_FAIL_PAGE0;
                 return (rval);
         }
 

@@ -12543,11 +14260,13 @@
                 tmp_tgt->m_deviceinfo = dev_info;
                 tmp_tgt->m_qfull_retries = QFULL_RETRIES;
                 tmp_tgt->m_qfull_retry_interval =
                     drv_usectohz(QFULL_RETRY_INTERVAL * 1000);
                 tmp_tgt->m_t_throttle = MAX_THROTTLE;
+                mutex_init(&tmp_tgt->m_t_mutex, NULL, MUTEX_DRIVER, NULL);
                 devicename = mptsas_get_sata_guid(mpt, tmp_tgt, 0);
+                mutex_destroy(&tmp_tgt->m_t_mutex);
                 kmem_free(tmp_tgt, sizeof (struct mptsas_target));
                 mutex_enter(&mpt->m_mutex);
                 if (devicename != 0 && (((devicename >> 56) & 0xf0) == 0x50)) {
                         sas_wwn = devicename;
                 } else if (dev_info & MPI2_SAS_DEVICE_INFO_DIRECT_ATTACH) {

@@ -12562,10 +14281,11 @@
                 mptsas_log(mpt, CE_WARN, "Failed to allocated target"
                     "structure!");
                 rval = DEV_INFO_FAIL_ALLOC;
                 return (rval);
         }
+        (*pptgt)->m_io_flags = io_flags;
         (*pptgt)->m_enclosure = enclosure;
         (*pptgt)->m_slot_num = bay_num;
         return (DEV_INFO_SUCCESS);
 }
 

@@ -12699,10 +14419,11 @@
         if (pktp == NULL) {
                 goto out;
         }
         bcopy(cdb, pktp->pkt_cdbp, cdblen);
         pktp->pkt_flags = FLAG_NOPARITY;
+        pktp->pkt_time = mptsas_scsi_pkt_time;
         if (scsi_poll(pktp) < 0) {
                 goto out;
         }
         if (((struct scsi_status *)pktp->pkt_scbp)->sts_chk) {
                 goto out;

@@ -13547,11 +15268,11 @@
  * Return fail if not all the childs/paths are freed.
  * if there is any path under the HBA, the return value will be always fail
  * because we didn't call mdi_pi_free for path
  */
 static int
-mptsas_offline_target(dev_info_t *pdip, char *name)
+mptsas_offline_targetdev(dev_info_t *pdip, char *name)
 {
         dev_info_t              *child = NULL, *prechild = NULL;
         mdi_pathinfo_t          *pip = NULL, *savepip = NULL;
         int                     tmp_rval, rval = DDI_SUCCESS;
         char                    *addr, *cp;

@@ -13582,11 +15303,11 @@
                 if (tmp_rval != DDI_SUCCESS) {
                         rval = DDI_FAILURE;
                         if (ndi_prop_create_boolean(DDI_DEV_T_NONE,
                             prechild, MPTSAS_DEV_GONE) !=
                             DDI_PROP_SUCCESS) {
-                                mptsas_log(mpt, CE_WARN, "mptsas driver "
+                                mptsas_log(mpt, CE_WARN,
                                     "unable to create property for "
                                     "SAS %s (MPTSAS_DEV_GONE)", addr);
                         }
                 }
         }

@@ -13959,12 +15680,11 @@
         int                     mdi_rtn = MDI_FAILURE;
         int                     rval = DDI_FAILURE;
         char                    *old_guid = NULL;
         mptsas_t                *mpt = DIP2MPT(pdip);
         char                    *lun_addr = NULL;
-        char                    *wwn_str = NULL;
-        char                    *attached_wwn_str = NULL;
+        char                    wwn_str[MPTSAS_WWN_STRLEN];
         char                    *component = NULL;
         uint8_t                 phy = 0xFF;
         uint64_t                sas_wwn;
         int64_t                 lun64 = 0;
         uint32_t                devinfo;

@@ -13974,11 +15694,11 @@
         uint64_t                pdev_sas_wwn;
         uint32_t                pdev_info;
         uint8_t                 physport;
         uint8_t                 phy_id;
         uint32_t                page_address;
-        uint16_t                bay_num, enclosure;
+        uint16_t                bay_num, enclosure, io_flags;
         char                    pdev_wwn_str[MPTSAS_WWN_STRLEN];
         uint32_t                dev_info;
 
         mutex_enter(&mpt->m_mutex);
         target = ptgt->m_devhdl;

@@ -14068,17 +15788,16 @@
 
         /*
          * if nodename can't be determined then print a message and skip it
          */
         if (nodename == NULL) {
-                mptsas_log(mpt, CE_WARN, "mptsas driver found no compatible "
+                mptsas_log(mpt, CE_WARN, "found no compatible "
                     "driver for target%d lun %d dtype:0x%02x", target, lun,
                     inq->inq_dtype);
                 return (DDI_FAILURE);
         }
 
-        wwn_str = kmem_zalloc(MPTSAS_WWN_STRLEN, KM_SLEEP);
         /* The property is needed by MPAPI */
         (void) sprintf(wwn_str, "%016"PRIx64, sas_wwn);
 
         lun_addr = kmem_zalloc(SCSI_MAXNAMELEN, KM_SLEEP);
         if (guid) {

@@ -14094,56 +15813,56 @@
             0, pip);
         if (mdi_rtn == MDI_SUCCESS) {
 
                 if (mdi_prop_update_string(*pip, MDI_GUID,
                     guid) != DDI_SUCCESS) {
-                        mptsas_log(mpt, CE_WARN, "mptsas driver unable to "
+                        mptsas_log(mpt, CE_WARN, "unable to "
                             "create prop for target %d lun %d (MDI_GUID)",
                             target, lun);
                         mdi_rtn = MDI_FAILURE;
                         goto virt_create_done;
                 }
 
                 if (mdi_prop_update_int(*pip, LUN_PROP,
                     lun) != DDI_SUCCESS) {
-                        mptsas_log(mpt, CE_WARN, "mptsas driver unable to "
+                        mptsas_log(mpt, CE_WARN, "unable to "
                             "create prop for target %d lun %d (LUN_PROP)",
                             target, lun);
                         mdi_rtn = MDI_FAILURE;
                         goto virt_create_done;
                 }
                 lun64 = (int64_t)lun;
                 if (mdi_prop_update_int64(*pip, LUN64_PROP,
                     lun64) != DDI_SUCCESS) {
-                        mptsas_log(mpt, CE_WARN, "mptsas driver unable to "
+                        mptsas_log(mpt, CE_WARN, "unable to "
                             "create prop for target %d (LUN64_PROP)",
                             target);
                         mdi_rtn = MDI_FAILURE;
                         goto virt_create_done;
                 }
                 if (mdi_prop_update_string_array(*pip, "compatible",
                     compatible, ncompatible) !=
                     DDI_PROP_SUCCESS) {
-                        mptsas_log(mpt, CE_WARN, "mptsas driver unable to "
+                        mptsas_log(mpt, CE_WARN, "unable to "
                             "create prop for target %d lun %d (COMPATIBLE)",
                             target, lun);
                         mdi_rtn = MDI_FAILURE;
                         goto virt_create_done;
                 }
                 if (sas_wwn && (mdi_prop_update_string(*pip,
                     SCSI_ADDR_PROP_TARGET_PORT, wwn_str) != DDI_PROP_SUCCESS)) {
-                        mptsas_log(mpt, CE_WARN, "mptsas driver unable to "
+                        mptsas_log(mpt, CE_WARN, "unable to "
                             "create prop for target %d lun %d "
                             "(target-port)", target, lun);
                         mdi_rtn = MDI_FAILURE;
                         goto virt_create_done;
                 } else if ((sas_wwn == 0) && (mdi_prop_update_int(*pip,
                     "sata-phy", phy) != DDI_PROP_SUCCESS)) {
                         /*
                          * Direct attached SATA device without DeviceName
                          */
-                        mptsas_log(mpt, CE_WARN, "mptsas driver unable to "
+                        mptsas_log(mpt, CE_WARN, "unable to "
                             "create prop for SAS target %d lun %d "
                             "(sata-phy)", target, lun);
                         mdi_rtn = MDI_FAILURE;
                         goto virt_create_done;
                 }

@@ -14152,11 +15871,11 @@
                 page_address = (MPI2_SAS_DEVICE_PGAD_FORM_HANDLE &
                     MPI2_SAS_DEVICE_PGAD_FORM_MASK) |
                     (uint32_t)ptgt->m_devhdl;
                 rval = mptsas_get_sas_device_page0(mpt, page_address,
                     &dev_hdl, &dev_sas_wwn, &dev_info, &physport,
-                    &phy_id, &pdev_hdl, &bay_num, &enclosure);
+                    &phy_id, &pdev_hdl, &bay_num, &enclosure, &io_flags);
                 if (rval != DDI_SUCCESS) {
                         mutex_exit(&mpt->m_mutex);
                         mptsas_log(mpt, CE_WARN, "mptsas unable to get "
                             "parent device for handle %d", page_address);
                         mdi_rtn = MDI_FAILURE;

@@ -14165,11 +15884,11 @@
 
                 page_address = (MPI2_SAS_DEVICE_PGAD_FORM_HANDLE &
                     MPI2_SAS_DEVICE_PGAD_FORM_MASK) | (uint32_t)pdev_hdl;
                 rval = mptsas_get_sas_device_page0(mpt, page_address,
                     &dev_hdl, &pdev_sas_wwn, &pdev_info, &physport,
-                    &phy_id, &pdev_hdl, &bay_num, &enclosure);
+                    &phy_id, &pdev_hdl, &bay_num, &enclosure, &io_flags);
                 if (rval != DDI_SUCCESS) {
                         mutex_exit(&mpt->m_mutex);
                         mptsas_log(mpt, CE_WARN, "mptsas unable to get"
                             "device info for handle %d", page_address);
                         mdi_rtn = MDI_FAILURE;

@@ -14212,13 +15931,13 @@
                 }
 
                 if (mdi_prop_update_string(*pip,
                     SCSI_ADDR_PROP_ATTACHED_PORT, pdev_wwn_str) !=
                     DDI_PROP_SUCCESS) {
-                        mptsas_log(mpt, CE_WARN, "mptsas unable to create "
+                        mptsas_log(mpt, CE_WARN, "unable to create "
                             "property for iport attached-port %s (sas_wwn)",
-                            attached_wwn_str);
+                            pdev_wwn_str);
                         mdi_rtn = MDI_FAILURE;
                         goto virt_create_done;
                 }
 
 

@@ -14230,11 +15949,11 @@
                         (void) snprintf(component, MAXPATHLEN,
                             "disk@%s", lun_addr);
 
                         if (mdi_pi_pathname_obp_set(*pip, component) !=
                             DDI_SUCCESS) {
-                                mptsas_log(mpt, CE_WARN, "mpt_sas driver "
+                                mptsas_log(mpt, CE_WARN,
                                     "unable to set obp-path for object %s",
                                     component);
                                 mdi_rtn = MDI_FAILURE;
                                 goto virt_create_done;
                         }

@@ -14244,11 +15963,11 @@
                 if (devinfo & (MPI2_SAS_DEVICE_INFO_SATA_DEVICE |
                     MPI2_SAS_DEVICE_INFO_ATAPI_DEVICE)) {
                         if ((ndi_prop_update_int(DDI_DEV_T_NONE, *lun_dip,
                             "pm-capable", 1)) !=
                             DDI_PROP_SUCCESS) {
-                                mptsas_log(mpt, CE_WARN, "mptsas driver"
+                                mptsas_log(mpt, CE_WARN,
                                     "failed to create pm-capable "
                                     "property, target %d", target);
                                 mdi_rtn = MDI_FAILURE;
                                 goto virt_create_done;
                         }

@@ -14256,11 +15975,11 @@
                 /*
                  * Create the phy-num property
                  */
                 if (mdi_prop_update_int(*pip, "phy-num",
                     ptgt->m_phynum) != DDI_SUCCESS) {
-                        mptsas_log(mpt, CE_WARN, "mptsas driver unable to "
+                        mptsas_log(mpt, CE_WARN, "unable to "
                             "create phy-num property for target %d lun %d",
                             target, lun);
                         mdi_rtn = MDI_FAILURE;
                         goto virt_create_done;
                 }

@@ -14285,13 +16004,10 @@
 
         scsi_hba_nodename_compatible_free(nodename, compatible);
         if (lun_addr != NULL) {
                 kmem_free(lun_addr, SCSI_MAXNAMELEN);
         }
-        if (wwn_str != NULL) {
-                kmem_free(wwn_str, MPTSAS_WWN_STRLEN);
-        }
         if (component != NULL) {
                 kmem_free(component, MAXPATHLEN);
         }
 
         return ((mdi_rtn == MDI_SUCCESS) ? DDI_SUCCESS : DDI_FAILURE);

@@ -14308,13 +16024,12 @@
         char                    *nodename = NULL;
         char                    **compatible = NULL;
         int                     ncompatible = 0;
         int                     instance = 0;
         mptsas_t                *mpt = DIP2MPT(pdip);
-        char                    *wwn_str = NULL;
-        char                    *component = NULL;
-        char                    *attached_wwn_str = NULL;
+        char                    wwn_str[MPTSAS_WWN_STRLEN];
+        char                    component[MAXPATHLEN];
         uint8_t                 phy = 0xFF;
         uint64_t                sas_wwn;
         uint32_t                devinfo;
         uint16_t                dev_hdl;
         uint16_t                pdev_hdl;

@@ -14322,11 +16037,11 @@
         uint64_t                dev_sas_wwn;
         uint32_t                pdev_info;
         uint8_t                 physport;
         uint8_t                 phy_id;
         uint32_t                page_address;
-        uint16_t                bay_num, enclosure;
+        uint16_t                bay_num, enclosure, io_flags;
         char                    pdev_wwn_str[MPTSAS_WWN_STRLEN];
         uint32_t                dev_info;
         int64_t                 lun64 = 0;
 
         mutex_enter(&mpt->m_mutex);

@@ -14393,11 +16108,10 @@
                  * We need the SAS WWN for non-multipath devices, so
                  * we'll use the same property as that multipathing
                  * devices need to present for MPAPI. If we don't have
                  * a WWN (e.g. parallel SCSI), don't create the prop.
                  */
-                wwn_str = kmem_zalloc(MPTSAS_WWN_STRLEN, KM_SLEEP);
                 (void) sprintf(wwn_str, "w%016"PRIx64, sas_wwn);
                 if (sas_wwn && ndi_prop_update_string(DDI_DEV_T_NONE,
                     *lun_dip, SCSI_ADDR_PROP_TARGET_PORT, wwn_str)
                     != DDI_PROP_SUCCESS) {
                         mptsas_log(mpt, CE_WARN, "mptsas unable to "

@@ -14459,11 +16173,11 @@
                     MPI2_SAS_DEVICE_PGAD_FORM_MASK) |
                     (uint32_t)ptgt->m_devhdl;
                 rval = mptsas_get_sas_device_page0(mpt, page_address,
                     &dev_hdl, &dev_sas_wwn, &dev_info,
                     &physport, &phy_id, &pdev_hdl,
-                    &bay_num, &enclosure);
+                    &bay_num, &enclosure, &io_flags);
                 if (rval != DDI_SUCCESS) {
                         mutex_exit(&mpt->m_mutex);
                         mptsas_log(mpt, CE_WARN, "mptsas unable to get"
                             "parent device for handle %d.", page_address);
                         ndi_rtn = NDI_FAILURE;

@@ -14471,12 +16185,12 @@
                 }
 
                 page_address = (MPI2_SAS_DEVICE_PGAD_FORM_HANDLE &
                     MPI2_SAS_DEVICE_PGAD_FORM_MASK) | (uint32_t)pdev_hdl;
                 rval = mptsas_get_sas_device_page0(mpt, page_address,
-                    &dev_hdl, &pdev_sas_wwn, &pdev_info,
-                    &physport, &phy_id, &pdev_hdl, &bay_num, &enclosure);
+                    &dev_hdl, &pdev_sas_wwn, &pdev_info, &physport,
+                    &phy_id, &pdev_hdl, &bay_num, &enclosure, &io_flags);
                 if (rval != DDI_SUCCESS) {
                         mutex_exit(&mpt->m_mutex);
                         mptsas_log(mpt, CE_WARN, "mptsas unable to create "
                             "device for handle %d.", page_address);
                         ndi_rtn = NDI_FAILURE;

@@ -14522,11 +16236,11 @@
                     *lun_dip, SCSI_ADDR_PROP_ATTACHED_PORT, pdev_wwn_str) !=
                     DDI_PROP_SUCCESS) {
                         mptsas_log(mpt, CE_WARN,
                             "mptsas unable to create "
                             "property for iport attached-port %s (sas_wwn)",
-                            attached_wwn_str);
+                            pdev_wwn_str);
                         ndi_rtn = NDI_FAILURE;
                         goto phys_create_done;
                 }
 
                 if (IS_SATA_DEVICE(dev_info)) {

@@ -14561,11 +16275,11 @@
                  * device.
                  */
                 instance = ddi_get_instance(mpt->m_dip);
                 if (devinfo & (MPI2_SAS_DEVICE_INFO_SATA_DEVICE |
                     MPI2_SAS_DEVICE_INFO_ATAPI_DEVICE)) {
-                        NDBG2(("mptsas%d: creating pm-capable property, "
+                        NDBG2(("mptsas3%d: creating pm-capable property, "
                             "target %d", instance, target));
 
                         if ((ndi_prop_update_int(DDI_DEV_T_NONE,
                             *lun_dip, "pm-capable", 1)) !=
                             DDI_PROP_SUCCESS) {

@@ -14582,11 +16296,10 @@
                         /*
                          * add 'obp-path' properties for devinfo
                          */
                         bzero(wwn_str, sizeof (wwn_str));
                         (void) sprintf(wwn_str, "%016"PRIx64, sas_wwn);
-                        component = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
                         if (guid) {
                                 (void) snprintf(component, MAXPATHLEN,
                                     "disk@w%s,%x", wwn_str, lun);
                         } else {
                                 (void) snprintf(component, MAXPATHLEN,

@@ -14606,11 +16319,11 @@
                  */
                 if (ptgt->m_addr.mta_phymask != 0) {
                         if (ndi_prop_update_int(DDI_DEV_T_NONE,
                             *lun_dip, "phy-num", ptgt->m_phynum) !=
                             DDI_PROP_SUCCESS) {
-                                mptsas_log(mpt, CE_WARN, "mptsas driver "
+                                mptsas_log(mpt, CE_WARN,
                                     "failed to create phy-num property for "
                                     "target %d", target);
                                 ndi_rtn = NDI_FAILURE;
                                 goto phys_create_done;
                         }

@@ -14634,28 +16347,20 @@
 
                 /*
                  * If success set rtn flag, else unwire alloc'd lun
                  */
                 if (ndi_rtn != NDI_SUCCESS) {
-                        NDBG12(("mptsas driver unable to online "
+                        NDBG12(("unable to online "
                             "target %d lun %d", target, lun));
                         ndi_prop_remove_all(*lun_dip);
                         (void) ndi_devi_free(*lun_dip);
                         *lun_dip = NULL;
                 }
         }
 
         scsi_hba_nodename_compatible_free(nodename, compatible);
 
-        if (wwn_str != NULL) {
-                kmem_free(wwn_str, MPTSAS_WWN_STRLEN);
-        }
-        if (component != NULL) {
-                kmem_free(component, MAXPATHLEN);
-        }
-
-
         return ((ndi_rtn == NDI_SUCCESS) ? DDI_SUCCESS : DDI_FAILURE);
 }
 
 static int
 mptsas_probe_smp(dev_info_t *pdip, uint64_t wwn)

@@ -14722,11 +16427,11 @@
         uint16_t        i = 0;
         char            phymask[MPTSAS_MAX_PHYS];
         char            *iport = NULL;
         mptsas_phymask_t        phy_mask = 0;
         uint16_t        attached_devhdl;
-        uint16_t        bay_num, enclosure;
+        uint16_t        bay_num, enclosure, io_flags;
 
         (void) sprintf(wwn_str, "%"PRIx64, smp_node->m_addr.mta_wwn);
 
         /*
          * Probe smp device, prevent the node of removed device from being

@@ -14789,12 +16494,12 @@
                 smp_node->m_pdevhdl = dev_info.m_pdevhdl;
                 page_address = (MPI2_SAS_DEVICE_PGAD_FORM_HANDLE &
                     MPI2_SAS_DEVICE_PGAD_FORM_MASK) |
                     (uint32_t)dev_info.m_pdevhdl;
                 rval = mptsas_get_sas_device_page0(mpt, page_address,
-                    &dev_hdl, &sas_wwn, &smp_node->m_pdevinfo,
-                    &physport, &phy_id, &pdev_hdl, &bay_num, &enclosure);
+                    &dev_hdl, &sas_wwn, &smp_node->m_pdevinfo, &physport,
+                    &phy_id, &pdev_hdl, &bay_num, &enclosure, &io_flags);
                 if (rval != DDI_SUCCESS) {
                         mutex_exit(&mpt->m_mutex);
                         mptsas_log(mpt, CE_WARN, "mptsas unable to get "
                             "device info for %x", page_address);
                         ndi_rtn = NDI_FAILURE;

@@ -14804,11 +16509,12 @@
                 page_address = (MPI2_SAS_DEVICE_PGAD_FORM_HANDLE &
                     MPI2_SAS_DEVICE_PGAD_FORM_MASK) |
                     (uint32_t)dev_info.m_devhdl;
                 rval = mptsas_get_sas_device_page0(mpt, page_address,
                     &dev_hdl, &smp_sas_wwn, &smp_node->m_deviceinfo,
-                    &physport, &phy_id, &pdev_hdl, &bay_num, &enclosure);
+                    &physport, &phy_id, &pdev_hdl, &bay_num, &enclosure,
+                    &io_flags);
                 if (rval != DDI_SUCCESS) {
                         mutex_exit(&mpt->m_mutex);
                         mptsas_log(mpt, CE_WARN, "mptsas unable to get "
                             "device info for %x", page_address);
                         ndi_rtn = NDI_FAILURE;

@@ -14958,11 +16664,11 @@
 static int mptsas_smp_start(struct smp_pkt *smp_pkt)
 {
         uint64_t                        wwn;
         Mpi2SmpPassthroughRequest_t     req;
         Mpi2SmpPassthroughReply_t       rep;
-        uint32_t                        direction = 0;
+        uint8_t                         direction = 0;
         mptsas_t                        *mpt;
         int                             ret;
         uint64_t                        tmp64;
 
         mpt = (mptsas_t *)smp_pkt->smp_pkt_address->

@@ -15262,27 +16968,44 @@
         /* Initialized the tgt structure */
         tmp_tgt->m_qfull_retries = QFULL_RETRIES;
         tmp_tgt->m_qfull_retry_interval =
             drv_usectohz(QFULL_RETRY_INTERVAL * 1000);
         tmp_tgt->m_t_throttle = MAX_THROTTLE;
+        mutex_init(&tmp_tgt->m_t_mutex, NULL, MUTEX_DRIVER, NULL);
+        TAILQ_INIT(&tmp_tgt->m_active_cmdq);
 
         refhash_insert(mpt->m_targets, tmp_tgt);
 
         return (tmp_tgt);
 }
 
+static void
+mptsas_smp_target_copy(mptsas_smp_t *src, mptsas_smp_t *dst)
+{
+        dst->m_devhdl = src->m_devhdl;
+        dst->m_deviceinfo = src->m_deviceinfo;
+        dst->m_pdevhdl = src->m_pdevhdl;
+        dst->m_pdevinfo = src->m_pdevinfo;
+}
+
 static mptsas_smp_t *
 mptsas_smp_alloc(mptsas_t *mpt, mptsas_smp_t *data)
 {
         mptsas_target_addr_t addr;
         mptsas_smp_t *ret_data;
 
         addr.mta_wwn = data->m_addr.mta_wwn;
         addr.mta_phymask = data->m_addr.mta_phymask;
         ret_data = refhash_lookup(mpt->m_smp_targets, &addr);
+        /*
+         * If there's already a matching SMP target, update its fields
+         * in place.  Since the address is not changing, it's safe to do
+         * this.  We cannot just bcopy() here because the structure we've
+         * been given has invalid hash links.
+         */
         if (ret_data != NULL) {
-                bcopy(data, ret_data, sizeof (mptsas_smp_t)); /* XXX - dupl */
+                mptsas_smp_target_copy(data, ret_data);
                 return (ret_data);
         }
 
         ret_data = kmem_alloc(sizeof (mptsas_smp_t), KM_SLEEP);
         bcopy(data, ret_data, sizeof (mptsas_smp_t));

@@ -15442,28 +17165,25 @@
         if (cookiep == NULL)
                 cookiep = &new_cookie;
 
         if (ddi_dma_alloc_handle(mpt->m_dip, &dma_attr, DDI_DMA_SLEEP,
             NULL, dma_hdp) != DDI_SUCCESS) {
-                dma_hdp = NULL;
                 return (FALSE);
         }
 
         if (ddi_dma_mem_alloc(*dma_hdp, alloc_size, &mpt->m_dev_acc_attr,
             DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL, dma_memp, &alloc_len,
             acc_hdp) != DDI_SUCCESS) {
                 ddi_dma_free_handle(dma_hdp);
-                dma_hdp = NULL;
                 return (FALSE);
         }
 
         if (ddi_dma_addr_bind_handle(*dma_hdp, NULL, *dma_memp, alloc_len,
             (DDI_DMA_RDWR | DDI_DMA_CONSISTENT), DDI_DMA_SLEEP, NULL,
             cookiep, &ncookie) != DDI_DMA_MAPPED) {
                 (void) ddi_dma_mem_free(acc_hdp);
                 ddi_dma_free_handle(dma_hdp);
-                dma_hdp = NULL;
                 return (FALSE);
         }
 
         return (TRUE);
 }

@@ -15475,7 +17195,6 @@
                 return;
 
         (void) ddi_dma_unbind_handle(*dma_hdp);
         (void) ddi_dma_mem_free(acc_hdp);
         ddi_dma_free_handle(dma_hdp);
-        dma_hdp = NULL;
 }