Print this page
5297 mptsas refhash replacement on reset can cause hang

@@ -1325,10 +1325,16 @@
                 mutex_exit(&mpt->m_mutex);
                 mptsas_log(mpt, CE_WARN, "mptsas chip initialization failed");
                 goto fail;
         }
 
+        mpt->m_targets = refhash_create(MPTSAS_TARGET_BUCKET_COUNT,
+            mptsas_target_addr_hash, mptsas_target_addr_cmp,
+            mptsas_target_free, sizeof (mptsas_target_t),
+            offsetof(mptsas_target_t, m_link),
+            offsetof(mptsas_target_t, m_addr), KM_SLEEP);
+
         /*
          * Fill in the phy_info structure and get the base WWID
          */
         if (mptsas_get_manufacture_page5(mpt) == DDI_FAILURE) {
                 mptsas_log(mpt, CE_WARN,

@@ -12711,16 +12717,10 @@
         if (mptsas_ioc_get_facts(mpt) == DDI_FAILURE) {
                 mptsas_log(mpt, CE_WARN, "mptsas_ioc_get_facts failed");
                 goto fail;
         }
 
-        mpt->m_targets = refhash_create(MPTSAS_TARGET_BUCKET_COUNT,
-            mptsas_target_addr_hash, mptsas_target_addr_cmp,
-            mptsas_target_free, sizeof (mptsas_target_t),
-            offsetof(mptsas_target_t, m_link),
-            offsetof(mptsas_target_t, m_addr), KM_SLEEP);
-
         if (mptsas_alloc_active_slots(mpt, KM_SLEEP)) {
                 goto fail;
         }
         /*
          * Allocate request message frames, reply free queue, reply descriptor

@@ -14311,18 +14311,18 @@
          * 2. invalid all the entries in hash table
          *    m_devhdl = 0xffff and m_deviceinfo = 0
          * 3. call sas_device_page/expander_page to update hash table
          */
         mptsas_update_phymask(mpt);
+
         /*
-         * Invalid the existing entries
-         *
-         * XXX - It seems like we should just delete everything here.  We are
-         * holding the lock and are about to refresh all the targets in both
-         * hashes anyway.  Given the path we're in, what outstanding async
-         * event could possibly be trying to reference one of these things
-         * without taking the lock, and how would that be useful anyway?
+         * Remove all the devhdls for existing entries but leave their
+         * addresses alone.  In update_hashtab() below, we'll find all
+         * targets that are still present and reassociate them with
+         * their potentially new devhdls.  Leaving the targets around in
+         * this fashion allows them to be used on the tx waitq even
+         * while IOC reset it occurring.
          */
         for (tp = refhash_first(mpt->m_targets); tp != NULL;
             tp = refhash_next(mpt->m_targets, tp)) {
                 tp->m_devhdl = MPTSAS_INVALID_DEVHDL;
                 tp->m_deviceinfo = 0;