1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved.
  25  */
  26 
  27 /*
  28  * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
  29  * more detailed discussion of the overall mpxio architecture.
  30  */
  31 
  32 #include <sys/note.h>
  33 #include <sys/types.h>
  34 #include <sys/varargs.h>
  35 #include <sys/param.h>
  36 #include <sys/errno.h>
  37 #include <sys/uio.h>
  38 #include <sys/buf.h>
  39 #include <sys/modctl.h>
  40 #include <sys/open.h>
  41 #include <sys/kmem.h>
  42 #include <sys/poll.h>
  43 #include <sys/conf.h>
  44 #include <sys/bootconf.h>
  45 #include <sys/cmn_err.h>
  46 #include <sys/stat.h>
  47 #include <sys/ddi.h>
  48 #include <sys/sunddi.h>
  49 #include <sys/ddipropdefs.h>
  50 #include <sys/sunndi.h>
  51 #include <sys/ndi_impldefs.h>
  52 #include <sys/promif.h>
  53 #include <sys/sunmdi.h>
  54 #include <sys/mdi_impldefs.h>
  55 #include <sys/taskq.h>
  56 #include <sys/epm.h>
  57 #include <sys/sunpm.h>
  58 #include <sys/modhash.h>
  59 #include <sys/disp.h>
  60 #include <sys/autoconf.h>
  61 #include <sys/sysmacros.h>
  62 
  63 #ifdef  DEBUG
  64 #include <sys/debug.h>
  65 int     mdi_debug = 1;
  66 int     mdi_debug_logonly = 0;
  67 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))      i_mdi_log pargs
  68 #define MDI_WARN        CE_WARN, __func__
  69 #define MDI_NOTE        CE_NOTE, __func__
  70 #define MDI_CONT        CE_CONT, __func__
  71 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
  72 #else   /* !DEBUG */
  73 #define MDI_DEBUG(dbglevel, pargs)
  74 #endif  /* DEBUG */
  75 int     mdi_debug_consoleonly = 0;
  76 int     mdi_delay = 3;
  77 
  78 extern pri_t    minclsyspri;
  79 extern int      modrootloaded;
  80 
  81 /*
  82  * Global mutex:
  83  * Protects vHCI list and structure members.
  84  */
  85 kmutex_t        mdi_mutex;
  86 
  87 /*
  88  * Registered vHCI class driver lists
  89  */
  90 int             mdi_vhci_count;
  91 mdi_vhci_t      *mdi_vhci_head;
  92 mdi_vhci_t      *mdi_vhci_tail;
  93 
  94 /*
  95  * Client Hash Table size
  96  */
  97 static int      mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
  98 
  99 /*
 100  * taskq interface definitions
 101  */
 102 #define MDI_TASKQ_N_THREADS     8
 103 #define MDI_TASKQ_PRI           minclsyspri
 104 #define MDI_TASKQ_MINALLOC      (4*mdi_taskq_n_threads)
 105 #define MDI_TASKQ_MAXALLOC      (500*mdi_taskq_n_threads)
 106 
 107 taskq_t                         *mdi_taskq;
 108 static uint_t                   mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
 109 
 110 #define TICKS_PER_SECOND        (drv_usectohz(1000000))
 111 
 112 /*
 113  * The data should be "quiet" for this interval (in seconds) before the
 114  * vhci cached data is flushed to the disk.
 115  */
 116 static int mdi_vhcache_flush_delay = 10;
 117 
 118 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
 119 static int mdi_vhcache_flush_daemon_idle_time = 60;
 120 
 121 /*
 122  * MDI falls back to discovery of all paths when a bus_config_one fails.
 123  * The following parameters can be used to tune this operation.
 124  *
 125  * mdi_path_discovery_boot
 126  *      Number of times path discovery will be attempted during early boot.
 127  *      Probably there is no reason to ever set this value to greater than one.
 128  *
 129  * mdi_path_discovery_postboot
 130  *      Number of times path discovery will be attempted after early boot.
 131  *      Set it to a minimum of two to allow for discovery of iscsi paths which
 132  *      may happen very late during booting.
 133  *
 134  * mdi_path_discovery_interval
 135  *      Minimum number of seconds MDI will wait between successive discovery
 136  *      of all paths. Set it to -1 to disable discovery of all paths.
 137  */
 138 static int mdi_path_discovery_boot = 1;
 139 static int mdi_path_discovery_postboot = 2;
 140 static int mdi_path_discovery_interval = 10;
 141 
 142 /*
 143  * number of seconds the asynchronous configuration thread will sleep idle
 144  * before exiting.
 145  */
 146 static int mdi_async_config_idle_time = 600;
 147 
 148 static int mdi_bus_config_cache_hash_size = 256;
 149 
 150 /* turns off multithreaded configuration for certain operations */
 151 static int mdi_mtc_off = 0;
 152 
 153 /*
 154  * The "path" to a pathinfo node is identical to the /devices path to a
 155  * devinfo node had the device been enumerated under a pHCI instead of
 156  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
 157  * This association persists across create/delete of the pathinfo nodes,
 158  * but not across reboot.
 159  */
 160 static uint_t           mdi_pathmap_instance = 1;       /* 0 -> any path */
 161 static int              mdi_pathmap_hash_size = 256;
 162 static kmutex_t         mdi_pathmap_mutex;
 163 static mod_hash_t       *mdi_pathmap_bypath;            /* "path"->instance */
 164 static mod_hash_t       *mdi_pathmap_byinstance;        /* instance->"path" */
 165 static mod_hash_t       *mdi_pathmap_sbyinstance;       /* inst->shortpath */
 166 
 167 /*
 168  * MDI component property name/value string definitions
 169  */
 170 const char              *mdi_component_prop = "mpxio-component";
 171 const char              *mdi_component_prop_vhci = "vhci";
 172 const char              *mdi_component_prop_phci = "phci";
 173 const char              *mdi_component_prop_client = "client";
 174 
 175 /*
 176  * MDI client global unique identifier property name
 177  */
 178 const char              *mdi_client_guid_prop = "client-guid";
 179 
 180 /*
 181  * MDI client load balancing property name/value string definitions
 182  */
 183 const char              *mdi_load_balance = "load-balance";
 184 const char              *mdi_load_balance_none = "none";
 185 const char              *mdi_load_balance_rr = "round-robin";
 186 const char              *mdi_load_balance_lba = "logical-block";
 187 
 188 /*
 189  * Obsolete vHCI class definition; to be removed after Leadville update
 190  */
 191 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
 192 
 193 static char vhci_greeting[] =
 194         "\tThere already exists one vHCI driver for class %s\n"
 195         "\tOnly one vHCI driver for each class is allowed\n";
 196 
 197 /*
 198  * Static function prototypes
 199  */
 200 static int              i_mdi_phci_offline(dev_info_t *, uint_t);
 201 static int              i_mdi_client_offline(dev_info_t *, uint_t);
 202 static int              i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
 203 static void             i_mdi_phci_post_detach(dev_info_t *,
 204                             ddi_detach_cmd_t, int);
 205 static int              i_mdi_client_pre_detach(dev_info_t *,
 206                             ddi_detach_cmd_t);
 207 static void             i_mdi_client_post_detach(dev_info_t *,
 208                             ddi_detach_cmd_t, int);
 209 static void             i_mdi_pm_hold_pip(mdi_pathinfo_t *);
 210 static void             i_mdi_pm_rele_pip(mdi_pathinfo_t *);
 211 static int              i_mdi_lba_lb(mdi_client_t *ct,
 212                             mdi_pathinfo_t **ret_pip, struct buf *buf);
 213 static void             i_mdi_pm_hold_client(mdi_client_t *, int);
 214 static void             i_mdi_pm_rele_client(mdi_client_t *, int);
 215 static void             i_mdi_pm_reset_client(mdi_client_t *);
 216 static int              i_mdi_power_all_phci(mdi_client_t *);
 217 static void             i_mdi_log_sysevent(dev_info_t *, char *, char *);
 218 
 219 
 220 /*
 221  * Internal mdi_pathinfo node functions
 222  */
 223 static void             i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
 224 
 225 static mdi_vhci_t       *i_mdi_vhci_class2vhci(char *);
 226 static mdi_vhci_t       *i_devi_get_vhci(dev_info_t *);
 227 static mdi_phci_t       *i_devi_get_phci(dev_info_t *);
 228 static void             i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
 229 static void             i_mdi_phci_unlock(mdi_phci_t *);
 230 static mdi_pathinfo_t   *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
 231 static void             i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
 232 static void             i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
 233 static void             i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
 234                             mdi_client_t *);
 235 static void             i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
 236 static void             i_mdi_client_remove_path(mdi_client_t *,
 237                             mdi_pathinfo_t *);
 238 
 239 static int              i_mdi_pi_state_change(mdi_pathinfo_t *,
 240                             mdi_pathinfo_state_t, int);
 241 static int              i_mdi_pi_offline(mdi_pathinfo_t *, int);
 242 static dev_info_t       *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
 243                             char **, int);
 244 static dev_info_t       *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
 245 static int              i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
 246 static int              i_mdi_is_child_present(dev_info_t *, dev_info_t *);
 247 static mdi_client_t     *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
 248 static void             i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
 249 static void             i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
 250 static mdi_client_t     *i_mdi_client_find(mdi_vhci_t *, char *, char *);
 251 static void             i_mdi_client_update_state(mdi_client_t *);
 252 static int              i_mdi_client_compute_state(mdi_client_t *,
 253                             mdi_phci_t *);
 254 static void             i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
 255 static void             i_mdi_client_unlock(mdi_client_t *);
 256 static int              i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
 257 static mdi_client_t     *i_devi_get_client(dev_info_t *);
 258 /*
 259  * NOTE: this will be removed once the NWS files are changed to use the new
 260  * mdi_{enable,disable}_path interfaces
 261  */
 262 static int              i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
 263                                 int, int);
 264 static mdi_pathinfo_t   *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
 265                                 mdi_vhci_t *vh, int flags, int op);
 266 /*
 267  * Failover related function prototypes
 268  */
 269 static int              i_mdi_failover(void *);
 270 
 271 /*
 272  * misc internal functions
 273  */
 274 static int              i_mdi_get_hash_key(char *);
 275 static int              i_map_nvlist_error_to_mdi(int);
 276 static void             i_mdi_report_path_state(mdi_client_t *,
 277                             mdi_pathinfo_t *);
 278 
 279 static void             setup_vhci_cache(mdi_vhci_t *);
 280 static int              destroy_vhci_cache(mdi_vhci_t *);
 281 static int              stop_vhcache_async_threads(mdi_vhci_config_t *);
 282 static boolean_t        stop_vhcache_flush_thread(void *, int);
 283 static void             free_string_array(char **, int);
 284 static void             free_vhcache_phci(mdi_vhcache_phci_t *);
 285 static void             free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
 286 static void             free_vhcache_client(mdi_vhcache_client_t *);
 287 static int              mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
 288 static nvlist_t         *vhcache_to_mainnvl(mdi_vhci_cache_t *);
 289 static void             vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
 290 static void             vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
 291 static void             vhcache_pi_add(mdi_vhci_config_t *,
 292                             struct mdi_pathinfo *);
 293 static void             vhcache_pi_remove(mdi_vhci_config_t *,
 294                             struct mdi_pathinfo *);
 295 static void             free_phclient_path_list(mdi_phys_path_t *);
 296 static void             sort_vhcache_paths(mdi_vhcache_client_t *);
 297 static int              flush_vhcache(mdi_vhci_config_t *, int);
 298 static void             vhcache_dirty(mdi_vhci_config_t *);
 299 static void             free_async_client_config(mdi_async_client_config_t *);
 300 static void             single_threaded_vhconfig_enter(mdi_vhci_config_t *);
 301 static void             single_threaded_vhconfig_exit(mdi_vhci_config_t *);
 302 static nvlist_t         *read_on_disk_vhci_cache(char *);
 303 extern int              fread_nvlist(char *, nvlist_t **);
 304 extern int              fwrite_nvlist(char *, nvlist_t *);
 305 
 306 /* called once when first vhci registers with mdi */
 307 static void
 308 i_mdi_init()
 309 {
 310         static int initialized = 0;
 311 
 312         if (initialized)
 313                 return;
 314         initialized = 1;
 315 
 316         mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
 317 
 318         /* Create our taskq resources */
 319         mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
 320             MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
 321             TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
 322         ASSERT(mdi_taskq != NULL);      /* taskq_create never fails */
 323 
 324         /* Allocate ['path_instance' <-> "path"] maps */
 325         mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
 326         mdi_pathmap_bypath = mod_hash_create_strhash(
 327             "mdi_pathmap_bypath", mdi_pathmap_hash_size,
 328             mod_hash_null_valdtor);
 329         mdi_pathmap_byinstance = mod_hash_create_idhash(
 330             "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
 331             mod_hash_null_valdtor);
 332         mdi_pathmap_sbyinstance = mod_hash_create_idhash(
 333             "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
 334             mod_hash_null_valdtor);
 335 }
 336 
 337 /*
 338  * mdi_get_component_type():
 339  *              Return mpxio component type
 340  * Return Values:
 341  *              MDI_COMPONENT_NONE
 342  *              MDI_COMPONENT_VHCI
 343  *              MDI_COMPONENT_PHCI
 344  *              MDI_COMPONENT_CLIENT
 345  * XXX This doesn't work under multi-level MPxIO and should be
 346  *      removed when clients migrate mdi_component_is_*() interfaces.
 347  */
 348 int
 349 mdi_get_component_type(dev_info_t *dip)
 350 {
 351         return (DEVI(dip)->devi_mdi_component);
 352 }
 353 
 354 /*
 355  * mdi_vhci_register():
 356  *              Register a vHCI module with the mpxio framework
 357  *              mdi_vhci_register() is called by vHCI drivers to register the
 358  *              'class_driver' vHCI driver and its MDI entrypoints with the
 359  *              mpxio framework.  The vHCI driver must call this interface as
 360  *              part of its attach(9e) handler.
 361  *              Competing threads may try to attach mdi_vhci_register() as
 362  *              the vHCI drivers are loaded and attached as a result of pHCI
 363  *              driver instance registration (mdi_phci_register()) with the
 364  *              framework.
 365  * Return Values:
 366  *              MDI_SUCCESS
 367  *              MDI_FAILURE
 368  */
 369 /*ARGSUSED*/
 370 int
 371 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
 372     int flags)
 373 {
 374         mdi_vhci_t              *vh = NULL;
 375 
 376         /* Registrant can't be older */
 377         ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
 378 
 379 #ifdef DEBUG
 380         /*
 381          * IB nexus driver is loaded only when IB hardware is present.
 382          * In order to be able to do this there is a need to drive the loading
 383          * and attaching of the IB nexus driver (especially when an IB hardware
 384          * is dynamically plugged in) when an IB HCA driver (PHCI)
 385          * is being attached. Unfortunately this gets into the limitations
 386          * of devfs as there seems to be no clean way to drive configuration
 387          * of a subtree from another subtree of a devfs. Hence, do not ASSERT
 388          * for IB.
 389          */
 390         if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
 391                 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
 392 #endif
 393 
 394         i_mdi_init();
 395 
 396         mutex_enter(&mdi_mutex);
 397         /*
 398          * Scan for already registered vhci
 399          */
 400         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
 401                 if (strcmp(vh->vh_class, class) == 0) {
 402                         /*
 403                          * vHCI has already been created.  Check for valid
 404                          * vHCI ops registration.  We only support one vHCI
 405                          * module per class
 406                          */
 407                         if (vh->vh_ops != NULL) {
 408                                 mutex_exit(&mdi_mutex);
 409                                 cmn_err(CE_NOTE, vhci_greeting, class);
 410                                 return (MDI_FAILURE);
 411                         }
 412                         break;
 413                 }
 414         }
 415 
 416         /*
 417          * if not yet created, create the vHCI component
 418          */
 419         if (vh == NULL) {
 420                 struct client_hash      *hash = NULL;
 421                 char                    *load_balance;
 422 
 423                 /*
 424                  * Allocate and initialize the mdi extensions
 425                  */
 426                 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
 427                 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
 428                     KM_SLEEP);
 429                 vh->vh_client_table = hash;
 430                 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
 431                 (void) strcpy(vh->vh_class, class);
 432                 vh->vh_lb = LOAD_BALANCE_RR;
 433                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
 434                     0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
 435                         if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
 436                                 vh->vh_lb = LOAD_BALANCE_NONE;
 437                         } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
 438                                     == 0) {
 439                                 vh->vh_lb = LOAD_BALANCE_LBA;
 440                         }
 441                         ddi_prop_free(load_balance);
 442                 }
 443 
 444                 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
 445                 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
 446 
 447                 /*
 448                  * Store the vHCI ops vectors
 449                  */
 450                 vh->vh_dip = vdip;
 451                 vh->vh_ops = vops;
 452 
 453                 setup_vhci_cache(vh);
 454 
 455                 if (mdi_vhci_head == NULL) {
 456                         mdi_vhci_head = vh;
 457                 }
 458                 if (mdi_vhci_tail) {
 459                         mdi_vhci_tail->vh_next = vh;
 460                 }
 461                 mdi_vhci_tail = vh;
 462                 mdi_vhci_count++;
 463         }
 464 
 465         /*
 466          * Claim the devfs node as a vhci component
 467          */
 468         DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
 469 
 470         /*
 471          * Initialize our back reference from dev_info node
 472          */
 473         DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
 474         mutex_exit(&mdi_mutex);
 475         return (MDI_SUCCESS);
 476 }
 477 
 478 /*
 479  * mdi_vhci_unregister():
 480  *              Unregister a vHCI module from mpxio framework
 481  *              mdi_vhci_unregister() is called from the detach(9E) entrypoint
 482  *              of a vhci to unregister it from the framework.
 483  * Return Values:
 484  *              MDI_SUCCESS
 485  *              MDI_FAILURE
 486  */
 487 /*ARGSUSED*/
 488 int
 489 mdi_vhci_unregister(dev_info_t *vdip, int flags)
 490 {
 491         mdi_vhci_t      *found, *vh, *prev = NULL;
 492 
 493         ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
 494 
 495         /*
 496          * Check for invalid VHCI
 497          */
 498         if ((vh = i_devi_get_vhci(vdip)) == NULL)
 499                 return (MDI_FAILURE);
 500 
 501         /*
 502          * Scan the list of registered vHCIs for a match
 503          */
 504         mutex_enter(&mdi_mutex);
 505         for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
 506                 if (found == vh)
 507                         break;
 508                 prev = found;
 509         }
 510 
 511         if (found == NULL) {
 512                 mutex_exit(&mdi_mutex);
 513                 return (MDI_FAILURE);
 514         }
 515 
 516         /*
 517          * Check the vHCI, pHCI and client count. All the pHCIs and clients
 518          * should have been unregistered, before a vHCI can be
 519          * unregistered.
 520          */
 521         MDI_VHCI_PHCI_LOCK(vh);
 522         if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
 523                 MDI_VHCI_PHCI_UNLOCK(vh);
 524                 mutex_exit(&mdi_mutex);
 525                 return (MDI_FAILURE);
 526         }
 527         MDI_VHCI_PHCI_UNLOCK(vh);
 528 
 529         if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
 530                 mutex_exit(&mdi_mutex);
 531                 return (MDI_FAILURE);
 532         }
 533 
 534         /*
 535          * Remove the vHCI from the global list
 536          */
 537         if (vh == mdi_vhci_head) {
 538                 mdi_vhci_head = vh->vh_next;
 539         } else {
 540                 prev->vh_next = vh->vh_next;
 541         }
 542         if (vh == mdi_vhci_tail) {
 543                 mdi_vhci_tail = prev;
 544         }
 545         mdi_vhci_count--;
 546         mutex_exit(&mdi_mutex);
 547 
 548         vh->vh_ops = NULL;
 549         DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
 550         DEVI(vdip)->devi_mdi_xhci = NULL;
 551         kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
 552         kmem_free(vh->vh_client_table,
 553             mdi_client_table_size * sizeof (struct client_hash));
 554         mutex_destroy(&vh->vh_phci_mutex);
 555         mutex_destroy(&vh->vh_client_mutex);
 556 
 557         kmem_free(vh, sizeof (mdi_vhci_t));
 558         return (MDI_SUCCESS);
 559 }
 560 
 561 /*
 562  * i_mdi_vhci_class2vhci():
 563  *              Look for a matching vHCI module given a vHCI class name
 564  * Return Values:
 565  *              Handle to a vHCI component
 566  *              NULL
 567  */
 568 static mdi_vhci_t *
 569 i_mdi_vhci_class2vhci(char *class)
 570 {
 571         mdi_vhci_t      *vh = NULL;
 572 
 573         ASSERT(!MUTEX_HELD(&mdi_mutex));
 574 
 575         mutex_enter(&mdi_mutex);
 576         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
 577                 if (strcmp(vh->vh_class, class) == 0) {
 578                         break;
 579                 }
 580         }
 581         mutex_exit(&mdi_mutex);
 582         return (vh);
 583 }
 584 
 585 /*
 586  * i_devi_get_vhci():
 587  *              Utility function to get the handle to a vHCI component
 588  * Return Values:
 589  *              Handle to a vHCI component
 590  *              NULL
 591  */
 592 mdi_vhci_t *
 593 i_devi_get_vhci(dev_info_t *vdip)
 594 {
 595         mdi_vhci_t      *vh = NULL;
 596         if (MDI_VHCI(vdip)) {
 597                 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
 598         }
 599         return (vh);
 600 }
 601 
 602 /*
 603  * mdi_phci_register():
 604  *              Register a pHCI module with mpxio framework
 605  *              mdi_phci_register() is called by pHCI drivers to register with
 606  *              the mpxio framework and a specific 'class_driver' vHCI.  The
 607  *              pHCI driver must call this interface as part of its attach(9e)
 608  *              handler.
 609  * Return Values:
 610  *              MDI_SUCCESS
 611  *              MDI_FAILURE
 612  */
 613 /*ARGSUSED*/
 614 int
 615 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
 616 {
 617         mdi_phci_t              *ph;
 618         mdi_vhci_t              *vh;
 619         char                    *data;
 620 
 621         /*
 622          * Some subsystems, like fcp, perform pHCI registration from a
 623          * different thread than the one doing the pHCI attach(9E) - the
 624          * driver attach code is waiting for this other thread to complete.
 625          * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
 626          * (indicating that some thread has done an ndi_devi_enter of parent)
 627          * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
 628          */
 629         ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
 630 
 631         /*
 632          * Check for mpxio-disable property. Enable mpxio if the property is
 633          * missing or not set to "yes".
 634          * If the property is set to "yes" then emit a brief message.
 635          */
 636         if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
 637             &data) == DDI_SUCCESS)) {
 638                 if (strcmp(data, "yes") == 0) {
 639                         MDI_DEBUG(1, (MDI_CONT, pdip,
 640                             "?multipath capabilities disabled via %s.conf.",
 641                             ddi_driver_name(pdip)));
 642                         ddi_prop_free(data);
 643                         return (MDI_FAILURE);
 644                 }
 645                 ddi_prop_free(data);
 646         }
 647 
 648         /*
 649          * Search for a matching vHCI
 650          */
 651         vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
 652         if (vh == NULL) {
 653                 return (MDI_FAILURE);
 654         }
 655 
 656         ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
 657         mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
 658         ph->ph_dip = pdip;
 659         ph->ph_vhci = vh;
 660         ph->ph_next = NULL;
 661         ph->ph_unstable = 0;
 662         ph->ph_vprivate = 0;
 663         cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
 664 
 665         MDI_PHCI_LOCK(ph);
 666         MDI_PHCI_SET_POWER_UP(ph);
 667         MDI_PHCI_UNLOCK(ph);
 668         DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
 669         DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
 670 
 671         vhcache_phci_add(vh->vh_config, ph);
 672 
 673         MDI_VHCI_PHCI_LOCK(vh);
 674         if (vh->vh_phci_head == NULL) {
 675                 vh->vh_phci_head = ph;
 676         }
 677         if (vh->vh_phci_tail) {
 678                 vh->vh_phci_tail->ph_next = ph;
 679         }
 680         vh->vh_phci_tail = ph;
 681         vh->vh_phci_count++;
 682         MDI_VHCI_PHCI_UNLOCK(vh);
 683 
 684         i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
 685         return (MDI_SUCCESS);
 686 }
 687 
 688 /*
 689  * mdi_phci_unregister():
 690  *              Unregister a pHCI module from mpxio framework
 691  *              mdi_phci_unregister() is called by the pHCI drivers from their
 692  *              detach(9E) handler to unregister their instances from the
 693  *              framework.
 694  * Return Values:
 695  *              MDI_SUCCESS
 696  *              MDI_FAILURE
 697  */
 698 /*ARGSUSED*/
 699 int
 700 mdi_phci_unregister(dev_info_t *pdip, int flags)
 701 {
 702         mdi_vhci_t              *vh;
 703         mdi_phci_t              *ph;
 704         mdi_phci_t              *tmp;
 705         mdi_phci_t              *prev = NULL;
 706         mdi_pathinfo_t          *pip;
 707 
 708         ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
 709 
 710         ph = i_devi_get_phci(pdip);
 711         if (ph == NULL) {
 712                 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
 713                 return (MDI_FAILURE);
 714         }
 715 
 716         vh = ph->ph_vhci;
 717         ASSERT(vh != NULL);
 718         if (vh == NULL) {
 719                 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
 720                 return (MDI_FAILURE);
 721         }
 722 
 723         MDI_VHCI_PHCI_LOCK(vh);
 724         tmp = vh->vh_phci_head;
 725         while (tmp) {
 726                 if (tmp == ph) {
 727                         break;
 728                 }
 729                 prev = tmp;
 730                 tmp = tmp->ph_next;
 731         }
 732 
 733         if (ph == vh->vh_phci_head) {
 734                 vh->vh_phci_head = ph->ph_next;
 735         } else {
 736                 prev->ph_next = ph->ph_next;
 737         }
 738 
 739         if (ph == vh->vh_phci_tail) {
 740                 vh->vh_phci_tail = prev;
 741         }
 742 
 743         vh->vh_phci_count--;
 744         MDI_VHCI_PHCI_UNLOCK(vh);
 745 
 746         /* Walk remaining pathinfo nodes and disassociate them from pHCI */
 747         MDI_PHCI_LOCK(ph);
 748         for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
 749             pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
 750                 MDI_PI(pip)->pi_phci = NULL;
 751         MDI_PHCI_UNLOCK(ph);
 752 
 753         i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
 754             ESC_DDI_INITIATOR_UNREGISTER);
 755         vhcache_phci_remove(vh->vh_config, ph);
 756         cv_destroy(&ph->ph_unstable_cv);
 757         mutex_destroy(&ph->ph_mutex);
 758         kmem_free(ph, sizeof (mdi_phci_t));
 759         DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
 760         DEVI(pdip)->devi_mdi_xhci = NULL;
 761         return (MDI_SUCCESS);
 762 }
 763 
 764 /*
 765  * i_devi_get_phci():
 766  *              Utility function to return the phci extensions.
 767  */
 768 static mdi_phci_t *
 769 i_devi_get_phci(dev_info_t *pdip)
 770 {
 771         mdi_phci_t      *ph = NULL;
 772 
 773         if (MDI_PHCI(pdip)) {
 774                 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
 775         }
 776         return (ph);
 777 }
 778 
 779 /*
 780  * Single thread mdi entry into devinfo node for modifying its children.
 781  * If necessary we perform an ndi_devi_enter of the vHCI before doing
 782  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
 783  * for the vHCI and one for the pHCI.
 784  */
 785 void
 786 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
 787 {
 788         dev_info_t      *vdip;
 789         int             vcircular, pcircular;
 790 
 791         /* Verify calling context */
 792         ASSERT(MDI_PHCI(phci_dip));
 793         vdip = mdi_devi_get_vdip(phci_dip);
 794         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 795 
 796         /*
 797          * If pHCI is detaching then the framework has already entered the
 798          * vHCI on a threads that went down the code path leading to
 799          * detach_node().  This framework enter of the vHCI during pHCI
 800          * detach is done to avoid deadlock with vHCI power management
 801          * operations which enter the vHCI and the enter down the path
 802          * to the pHCI. If pHCI is detaching then we piggyback this calls
 803          * enter of the vHCI on frameworks vHCI enter that has already
 804          * occurred - this is OK because we know that the framework thread
 805          * doing detach is waiting for our completion.
 806          *
 807          * We should DEVI_IS_DETACHING under an enter of the parent to avoid
 808          * race with detach - but we can't do that because the framework has
 809          * already entered the parent, so we have some complexity instead.
 810          */
 811         for (;;) {
 812                 if (ndi_devi_tryenter(vdip, &vcircular)) {
 813                         ASSERT(vcircular != -1);
 814                         if (DEVI_IS_DETACHING(phci_dip)) {
 815                                 ndi_devi_exit(vdip, vcircular);
 816                                 vcircular = -1;
 817                         }
 818                         break;
 819                 } else if (DEVI_IS_DETACHING(phci_dip)) {
 820                         vcircular = -1;
 821                         break;
 822                 } else if (servicing_interrupt()) {
 823                         /*
 824                          * Don't delay an interrupt (and ensure adaptive
 825                          * mutex inversion support).
 826                          */
 827                         ndi_devi_enter(vdip, &vcircular);
 828                         break;
 829                 } else {
 830                         delay_random(mdi_delay);
 831                 }
 832         }
 833 
 834         ndi_devi_enter(phci_dip, &pcircular);
 835         *circular = (vcircular << 16) | (pcircular & 0xFFFF);
 836 }
 837 
 838 /*
 839  * Attempt to mdi_devi_enter.
 840  */
 841 int
 842 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
 843 {
 844         dev_info_t      *vdip;
 845         int             vcircular, pcircular;
 846 
 847         /* Verify calling context */
 848         ASSERT(MDI_PHCI(phci_dip));
 849         vdip = mdi_devi_get_vdip(phci_dip);
 850         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 851 
 852         if (ndi_devi_tryenter(vdip, &vcircular)) {
 853                 if (ndi_devi_tryenter(phci_dip, &pcircular)) {
 854                         *circular = (vcircular << 16) | (pcircular & 0xFFFF);
 855                         return (1);     /* locked */
 856                 }
 857                 ndi_devi_exit(vdip, vcircular);
 858         }
 859         return (0);                     /* busy */
 860 }
 861 
 862 /*
 863  * Release mdi_devi_enter or successful mdi_devi_tryenter.
 864  */
 865 void
 866 mdi_devi_exit(dev_info_t *phci_dip, int circular)
 867 {
 868         dev_info_t      *vdip;
 869         int             vcircular, pcircular;
 870 
 871         /* Verify calling context */
 872         ASSERT(MDI_PHCI(phci_dip));
 873         vdip = mdi_devi_get_vdip(phci_dip);
 874         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 875 
 876         /* extract two circular recursion values from single int */
 877         pcircular = (short)(circular & 0xFFFF);
 878         vcircular = (short)((circular >> 16) & 0xFFFF);
 879 
 880         ndi_devi_exit(phci_dip, pcircular);
 881         if (vcircular != -1)
 882                 ndi_devi_exit(vdip, vcircular);
 883 }
 884 
 885 /*
 886  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
 887  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
 888  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
 889  * with vHCI power management code during path online/offline.  Each
 890  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
 891  * occur within the scope of an active mdi_devi_enter that establishes the
 892  * circular value.
 893  */
 894 void
 895 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
 896 {
 897         int             pcircular;
 898 
 899         /* Verify calling context */
 900         ASSERT(MDI_PHCI(phci_dip));
 901 
 902         /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
 903         ndi_hold_devi(phci_dip);
 904 
 905         pcircular = (short)(circular & 0xFFFF);
 906         ndi_devi_exit(phci_dip, pcircular);
 907 }
 908 
 909 void
 910 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
 911 {
 912         int             pcircular;
 913 
 914         /* Verify calling context */
 915         ASSERT(MDI_PHCI(phci_dip));
 916 
 917         ndi_devi_enter(phci_dip, &pcircular);
 918 
 919         /* Drop hold from mdi_devi_exit_phci. */
 920         ndi_rele_devi(phci_dip);
 921 
 922         /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
 923         ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
 924 }
 925 
 926 /*
 927  * mdi_devi_get_vdip():
 928  *              given a pHCI dip return vHCI dip
 929  */
 930 dev_info_t *
 931 mdi_devi_get_vdip(dev_info_t *pdip)
 932 {
 933         mdi_phci_t      *ph;
 934 
 935         ph = i_devi_get_phci(pdip);
 936         if (ph && ph->ph_vhci)
 937                 return (ph->ph_vhci->vh_dip);
 938         return (NULL);
 939 }
 940 
 941 /*
 942  * mdi_devi_pdip_entered():
 943  *              Return 1 if we are vHCI and have done an ndi_devi_enter
 944  *              of a pHCI
 945  */
 946 int
 947 mdi_devi_pdip_entered(dev_info_t *vdip)
 948 {
 949         mdi_vhci_t      *vh;
 950         mdi_phci_t      *ph;
 951 
 952         vh = i_devi_get_vhci(vdip);
 953         if (vh == NULL)
 954                 return (0);
 955 
 956         MDI_VHCI_PHCI_LOCK(vh);
 957         ph = vh->vh_phci_head;
 958         while (ph) {
 959                 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
 960                         MDI_VHCI_PHCI_UNLOCK(vh);
 961                         return (1);
 962                 }
 963                 ph = ph->ph_next;
 964         }
 965         MDI_VHCI_PHCI_UNLOCK(vh);
 966         return (0);
 967 }
 968 
 969 /*
 970  * mdi_phci_path2devinfo():
 971  *              Utility function to search for a valid phci device given
 972  *              the devfs pathname.
 973  */
 974 dev_info_t *
 975 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
 976 {
 977         char            *temp_pathname;
 978         mdi_vhci_t      *vh;
 979         mdi_phci_t      *ph;
 980         dev_info_t      *pdip = NULL;
 981 
 982         vh = i_devi_get_vhci(vdip);
 983         ASSERT(vh != NULL);
 984 
 985         if (vh == NULL) {
 986                 /*
 987                  * Invalid vHCI component, return failure
 988                  */
 989                 return (NULL);
 990         }
 991 
 992         temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
 993         MDI_VHCI_PHCI_LOCK(vh);
 994         ph = vh->vh_phci_head;
 995         while (ph != NULL) {
 996                 pdip = ph->ph_dip;
 997                 ASSERT(pdip != NULL);
 998                 *temp_pathname = '\0';
 999                 (void) ddi_pathname(pdip, temp_pathname);
1000                 if (strcmp(temp_pathname, pathname) == 0) {
1001                         break;
1002                 }
1003                 ph = ph->ph_next;
1004         }
1005         if (ph == NULL) {
1006                 pdip = NULL;
1007         }
1008         MDI_VHCI_PHCI_UNLOCK(vh);
1009         kmem_free(temp_pathname, MAXPATHLEN);
1010         return (pdip);
1011 }
1012 
1013 /*
1014  * mdi_phci_get_path_count():
1015  *              get number of path information nodes associated with a given
1016  *              pHCI device.
1017  */
1018 int
1019 mdi_phci_get_path_count(dev_info_t *pdip)
1020 {
1021         mdi_phci_t      *ph;
1022         int             count = 0;
1023 
1024         ph = i_devi_get_phci(pdip);
1025         if (ph != NULL) {
1026                 count = ph->ph_path_count;
1027         }
1028         return (count);
1029 }
1030 
1031 /*
1032  * i_mdi_phci_lock():
1033  *              Lock a pHCI device
1034  * Return Values:
1035  *              None
1036  * Note:
1037  *              The default locking order is:
1038  *              _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1039  *              But there are number of situations where locks need to be
1040  *              grabbed in reverse order.  This routine implements try and lock
1041  *              mechanism depending on the requested parameter option.
1042  */
1043 static void
1044 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1045 {
1046         if (pip) {
1047                 /* Reverse locking is requested. */
1048                 while (MDI_PHCI_TRYLOCK(ph) == 0) {
1049                         if (servicing_interrupt()) {
1050                                 MDI_PI_HOLD(pip);
1051                                 MDI_PI_UNLOCK(pip);
1052                                 MDI_PHCI_LOCK(ph);
1053                                 MDI_PI_LOCK(pip);
1054                                 MDI_PI_RELE(pip);
1055                                 break;
1056                         } else {
1057                                 /*
1058                                  * tryenter failed. Try to grab again
1059                                  * after a small delay
1060                                  */
1061                                 MDI_PI_HOLD(pip);
1062                                 MDI_PI_UNLOCK(pip);
1063                                 delay_random(mdi_delay);
1064                                 MDI_PI_LOCK(pip);
1065                                 MDI_PI_RELE(pip);
1066                         }
1067                 }
1068         } else {
1069                 MDI_PHCI_LOCK(ph);
1070         }
1071 }
1072 
1073 /*
1074  * i_mdi_phci_unlock():
1075  *              Unlock the pHCI component
1076  */
1077 static void
1078 i_mdi_phci_unlock(mdi_phci_t *ph)
1079 {
1080         MDI_PHCI_UNLOCK(ph);
1081 }
1082 
1083 /*
1084  * i_mdi_devinfo_create():
1085  *              create client device's devinfo node
1086  * Return Values:
1087  *              dev_info
1088  *              NULL
1089  * Notes:
1090  */
1091 static dev_info_t *
1092 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1093         char **compatible, int ncompatible)
1094 {
1095         dev_info_t *cdip = NULL;
1096 
1097         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1098 
1099         /* Verify for duplicate entry */
1100         cdip = i_mdi_devinfo_find(vh, name, guid);
1101         ASSERT(cdip == NULL);
1102         if (cdip) {
1103                 cmn_err(CE_WARN,
1104                     "i_mdi_devinfo_create: client %s@%s already exists",
1105                         name ? name : "", guid ? guid : "");
1106         }
1107 
1108         ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1109         if (cdip == NULL)
1110                 goto fail;
1111 
1112         /*
1113          * Create component type and Global unique identifier
1114          * properties
1115          */
1116         if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1117             MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1118                 goto fail;
1119         }
1120 
1121         /* Decorate the node with compatible property */
1122         if (compatible &&
1123             (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1124             "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1125                 goto fail;
1126         }
1127 
1128         return (cdip);
1129 
1130 fail:
1131         if (cdip) {
1132                 (void) ndi_prop_remove_all(cdip);
1133                 (void) ndi_devi_free(cdip);
1134         }
1135         return (NULL);
1136 }
1137 
1138 /*
1139  * i_mdi_devinfo_find():
1140  *              Find a matching devinfo node for given client node name
1141  *              and its guid.
1142  * Return Values:
1143  *              Handle to a dev_info node or NULL
1144  */
1145 static dev_info_t *
1146 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1147 {
1148         char                    *data;
1149         dev_info_t              *cdip = NULL;
1150         dev_info_t              *ndip = NULL;
1151         int                     circular;
1152 
1153         ndi_devi_enter(vh->vh_dip, &circular);
1154         ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1155         while ((cdip = ndip) != NULL) {
1156                 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1157 
1158                 if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1159                         continue;
1160                 }
1161 
1162                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1163                     DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1164                     &data) != DDI_PROP_SUCCESS) {
1165                         continue;
1166                 }
1167 
1168                 if (strcmp(data, guid) != 0) {
1169                         ddi_prop_free(data);
1170                         continue;
1171                 }
1172                 ddi_prop_free(data);
1173                 break;
1174         }
1175         ndi_devi_exit(vh->vh_dip, circular);
1176         return (cdip);
1177 }
1178 
1179 /*
1180  * i_mdi_devinfo_remove():
1181  *              Remove a client device node
1182  */
1183 static int
1184 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1185 {
1186         int     rv = MDI_SUCCESS;
1187 
1188         if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1189             (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1190                 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1191                 if (rv != NDI_SUCCESS) {
1192                         MDI_DEBUG(1, (MDI_NOTE, cdip,
1193                             "!failed: cdip %p", (void *)cdip));
1194                 }
1195                 /*
1196                  * Convert to MDI error code
1197                  */
1198                 switch (rv) {
1199                 case NDI_SUCCESS:
1200                         rv = MDI_SUCCESS;
1201                         break;
1202                 case NDI_BUSY:
1203                         rv = MDI_BUSY;
1204                         break;
1205                 default:
1206                         rv = MDI_FAILURE;
1207                         break;
1208                 }
1209         }
1210         return (rv);
1211 }
1212 
1213 /*
1214  * i_devi_get_client()
1215  *              Utility function to get mpxio component extensions
1216  */
1217 static mdi_client_t *
1218 i_devi_get_client(dev_info_t *cdip)
1219 {
1220         mdi_client_t    *ct = NULL;
1221 
1222         if (MDI_CLIENT(cdip)) {
1223                 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1224         }
1225         return (ct);
1226 }
1227 
1228 /*
1229  * i_mdi_is_child_present():
1230  *              Search for the presence of client device dev_info node
1231  */
1232 static int
1233 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1234 {
1235         int             rv = MDI_FAILURE;
1236         struct dev_info *dip;
1237         int             circular;
1238 
1239         ndi_devi_enter(vdip, &circular);
1240         dip = DEVI(vdip)->devi_child;
1241         while (dip) {
1242                 if (dip == DEVI(cdip)) {
1243                         rv = MDI_SUCCESS;
1244                         break;
1245                 }
1246                 dip = dip->devi_sibling;
1247         }
1248         ndi_devi_exit(vdip, circular);
1249         return (rv);
1250 }
1251 
1252 
1253 /*
1254  * i_mdi_client_lock():
1255  *              Grab client component lock
1256  * Return Values:
1257  *              None
1258  * Note:
1259  *              The default locking order is:
1260  *              _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1261  *              But there are number of situations where locks need to be
1262  *              grabbed in reverse order.  This routine implements try and lock
1263  *              mechanism depending on the requested parameter option.
1264  */
1265 static void
1266 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1267 {
1268         if (pip) {
1269                 /*
1270                  * Reverse locking is requested.
1271                  */
1272                 while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1273                         if (servicing_interrupt()) {
1274                                 MDI_PI_HOLD(pip);
1275                                 MDI_PI_UNLOCK(pip);
1276                                 MDI_CLIENT_LOCK(ct);
1277                                 MDI_PI_LOCK(pip);
1278                                 MDI_PI_RELE(pip);
1279                                 break;
1280                         } else {
1281                                 /*
1282                                  * tryenter failed. Try to grab again
1283                                  * after a small delay
1284                                  */
1285                                 MDI_PI_HOLD(pip);
1286                                 MDI_PI_UNLOCK(pip);
1287                                 delay_random(mdi_delay);
1288                                 MDI_PI_LOCK(pip);
1289                                 MDI_PI_RELE(pip);
1290                         }
1291                 }
1292         } else {
1293                 MDI_CLIENT_LOCK(ct);
1294         }
1295 }
1296 
1297 /*
1298  * i_mdi_client_unlock():
1299  *              Unlock a client component
1300  */
1301 static void
1302 i_mdi_client_unlock(mdi_client_t *ct)
1303 {
1304         MDI_CLIENT_UNLOCK(ct);
1305 }
1306 
1307 /*
1308  * i_mdi_client_alloc():
1309  *              Allocate and initialize a client structure.  Caller should
1310  *              hold the vhci client lock.
1311  * Return Values:
1312  *              Handle to a client component
1313  */
1314 /*ARGSUSED*/
1315 static mdi_client_t *
1316 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1317 {
1318         mdi_client_t    *ct;
1319 
1320         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1321 
1322         /*
1323          * Allocate and initialize a component structure.
1324          */
1325         ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1326         mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1327         ct->ct_hnext = NULL;
1328         ct->ct_hprev = NULL;
1329         ct->ct_dip = NULL;
1330         ct->ct_vhci = vh;
1331         ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1332         (void) strcpy(ct->ct_drvname, name);
1333         ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1334         (void) strcpy(ct->ct_guid, lguid);
1335         ct->ct_cprivate = NULL;
1336         ct->ct_vprivate = NULL;
1337         ct->ct_flags = 0;
1338         ct->ct_state = MDI_CLIENT_STATE_FAILED;
1339         MDI_CLIENT_LOCK(ct);
1340         MDI_CLIENT_SET_OFFLINE(ct);
1341         MDI_CLIENT_SET_DETACH(ct);
1342         MDI_CLIENT_SET_POWER_UP(ct);
1343         MDI_CLIENT_UNLOCK(ct);
1344         ct->ct_failover_flags = 0;
1345         ct->ct_failover_status = 0;
1346         cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1347         ct->ct_unstable = 0;
1348         cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1349         cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1350         ct->ct_lb = vh->vh_lb;
1351         ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1352         ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1353         ct->ct_path_count = 0;
1354         ct->ct_path_head = NULL;
1355         ct->ct_path_tail = NULL;
1356         ct->ct_path_last = NULL;
1357 
1358         /*
1359          * Add this client component to our client hash queue
1360          */
1361         i_mdi_client_enlist_table(vh, ct);
1362         return (ct);
1363 }
1364 
1365 /*
1366  * i_mdi_client_enlist_table():
1367  *              Attach the client device to the client hash table. Caller
1368  *              should hold the vhci client lock.
1369  */
1370 static void
1371 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1372 {
1373         int                     index;
1374         struct client_hash      *head;
1375 
1376         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1377 
1378         index = i_mdi_get_hash_key(ct->ct_guid);
1379         head = &vh->vh_client_table[index];
1380         ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1381         head->ct_hash_head = ct;
1382         head->ct_hash_count++;
1383         vh->vh_client_count++;
1384 }
1385 
1386 /*
1387  * i_mdi_client_delist_table():
1388  *              Attach the client device to the client hash table.
1389  *              Caller should hold the vhci client lock.
1390  */
1391 static void
1392 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1393 {
1394         int                     index;
1395         char                    *guid;
1396         struct client_hash      *head;
1397         mdi_client_t            *next;
1398         mdi_client_t            *last;
1399 
1400         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1401 
1402         guid = ct->ct_guid;
1403         index = i_mdi_get_hash_key(guid);
1404         head = &vh->vh_client_table[index];
1405 
1406         last = NULL;
1407         next = (mdi_client_t *)head->ct_hash_head;
1408         while (next != NULL) {
1409                 if (next == ct) {
1410                         break;
1411                 }
1412                 last = next;
1413                 next = next->ct_hnext;
1414         }
1415 
1416         if (next) {
1417                 head->ct_hash_count--;
1418                 if (last == NULL) {
1419                         head->ct_hash_head = ct->ct_hnext;
1420                 } else {
1421                         last->ct_hnext = ct->ct_hnext;
1422                 }
1423                 ct->ct_hnext = NULL;
1424                 vh->vh_client_count--;
1425         }
1426 }
1427 
1428 
1429 /*
1430  * i_mdi_client_free():
1431  *              Free a client component
1432  */
1433 static int
1434 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1435 {
1436         int             rv = MDI_SUCCESS;
1437         int             flags = ct->ct_flags;
1438         dev_info_t      *cdip;
1439         dev_info_t      *vdip;
1440 
1441         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1442 
1443         vdip = vh->vh_dip;
1444         cdip = ct->ct_dip;
1445 
1446         (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1447         DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1448         DEVI(cdip)->devi_mdi_client = NULL;
1449 
1450         /*
1451          * Clear out back ref. to dev_info_t node
1452          */
1453         ct->ct_dip = NULL;
1454 
1455         /*
1456          * Remove this client from our hash queue
1457          */
1458         i_mdi_client_delist_table(vh, ct);
1459 
1460         /*
1461          * Uninitialize and free the component
1462          */
1463         kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1464         kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1465         kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1466         cv_destroy(&ct->ct_failover_cv);
1467         cv_destroy(&ct->ct_unstable_cv);
1468         cv_destroy(&ct->ct_powerchange_cv);
1469         mutex_destroy(&ct->ct_mutex);
1470         kmem_free(ct, sizeof (*ct));
1471 
1472         if (cdip != NULL) {
1473                 MDI_VHCI_CLIENT_UNLOCK(vh);
1474                 (void) i_mdi_devinfo_remove(vdip, cdip, flags);
1475                 MDI_VHCI_CLIENT_LOCK(vh);
1476         }
1477         return (rv);
1478 }
1479 
1480 /*
1481  * i_mdi_client_find():
1482  *              Find the client structure corresponding to a given guid
1483  *              Caller should hold the vhci client lock.
1484  */
1485 static mdi_client_t *
1486 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1487 {
1488         int                     index;
1489         struct client_hash      *head;
1490         mdi_client_t            *ct;
1491 
1492         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1493 
1494         index = i_mdi_get_hash_key(guid);
1495         head = &vh->vh_client_table[index];
1496 
1497         ct = head->ct_hash_head;
1498         while (ct != NULL) {
1499                 if (strcmp(ct->ct_guid, guid) == 0 &&
1500                     (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1501                         break;
1502                 }
1503                 ct = ct->ct_hnext;
1504         }
1505         return (ct);
1506 }
1507 
1508 /*
1509  * i_mdi_client_update_state():
1510  *              Compute and update client device state
1511  * Notes:
1512  *              A client device can be in any of three possible states:
1513  *
1514  *              MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1515  *              one online/standby paths. Can tolerate failures.
1516  *              MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1517  *              no alternate paths available as standby. A failure on the online
1518  *              would result in loss of access to device data.
1519  *              MDI_CLIENT_STATE_FAILED - Client device in failed state with
1520  *              no paths available to access the device.
1521  */
1522 static void
1523 i_mdi_client_update_state(mdi_client_t *ct)
1524 {
1525         int state;
1526 
1527         ASSERT(MDI_CLIENT_LOCKED(ct));
1528         state = i_mdi_client_compute_state(ct, NULL);
1529         MDI_CLIENT_SET_STATE(ct, state);
1530 }
1531 
1532 /*
1533  * i_mdi_client_compute_state():
1534  *              Compute client device state
1535  *
1536  *              mdi_phci_t *    Pointer to pHCI structure which should
1537  *                              while computing the new value.  Used by
1538  *                              i_mdi_phci_offline() to find the new
1539  *                              client state after DR of a pHCI.
1540  */
1541 static int
1542 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1543 {
1544         int             state;
1545         int             online_count = 0;
1546         int             standby_count = 0;
1547         mdi_pathinfo_t  *pip, *next;
1548 
1549         ASSERT(MDI_CLIENT_LOCKED(ct));
1550         pip = ct->ct_path_head;
1551         while (pip != NULL) {
1552                 MDI_PI_LOCK(pip);
1553                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1554                 if (MDI_PI(pip)->pi_phci == ph) {
1555                         MDI_PI_UNLOCK(pip);
1556                         pip = next;
1557                         continue;
1558                 }
1559 
1560                 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1561                                 == MDI_PATHINFO_STATE_ONLINE)
1562                         online_count++;
1563                 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1564                                 == MDI_PATHINFO_STATE_STANDBY)
1565                         standby_count++;
1566                 MDI_PI_UNLOCK(pip);
1567                 pip = next;
1568         }
1569 
1570         if (online_count == 0) {
1571                 if (standby_count == 0) {
1572                         state = MDI_CLIENT_STATE_FAILED;
1573                         MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1574                             "client state failed: ct = %p", (void *)ct));
1575                 } else if (standby_count == 1) {
1576                         state = MDI_CLIENT_STATE_DEGRADED;
1577                 } else {
1578                         state = MDI_CLIENT_STATE_OPTIMAL;
1579                 }
1580         } else if (online_count == 1) {
1581                 if (standby_count == 0) {
1582                         state = MDI_CLIENT_STATE_DEGRADED;
1583                 } else {
1584                         state = MDI_CLIENT_STATE_OPTIMAL;
1585                 }
1586         } else {
1587                 state = MDI_CLIENT_STATE_OPTIMAL;
1588         }
1589         return (state);
1590 }
1591 
1592 /*
1593  * i_mdi_client2devinfo():
1594  *              Utility function
1595  */
1596 dev_info_t *
1597 i_mdi_client2devinfo(mdi_client_t *ct)
1598 {
1599         return (ct->ct_dip);
1600 }
1601 
1602 /*
1603  * mdi_client_path2_devinfo():
1604  *              Given the parent devinfo and child devfs pathname, search for
1605  *              a valid devfs node handle.
1606  */
1607 dev_info_t *
1608 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1609 {
1610         dev_info_t      *cdip = NULL;
1611         dev_info_t      *ndip = NULL;
1612         char            *temp_pathname;
1613         int             circular;
1614 
1615         /*
1616          * Allocate temp buffer
1617          */
1618         temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1619 
1620         /*
1621          * Lock parent against changes
1622          */
1623         ndi_devi_enter(vdip, &circular);
1624         ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1625         while ((cdip = ndip) != NULL) {
1626                 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1627 
1628                 *temp_pathname = '\0';
1629                 (void) ddi_pathname(cdip, temp_pathname);
1630                 if (strcmp(temp_pathname, pathname) == 0) {
1631                         break;
1632                 }
1633         }
1634         /*
1635          * Release devinfo lock
1636          */
1637         ndi_devi_exit(vdip, circular);
1638 
1639         /*
1640          * Free the temp buffer
1641          */
1642         kmem_free(temp_pathname, MAXPATHLEN);
1643         return (cdip);
1644 }
1645 
1646 /*
1647  * mdi_client_get_path_count():
1648  *              Utility function to get number of path information nodes
1649  *              associated with a given client device.
1650  */
1651 int
1652 mdi_client_get_path_count(dev_info_t *cdip)
1653 {
1654         mdi_client_t    *ct;
1655         int             count = 0;
1656 
1657         ct = i_devi_get_client(cdip);
1658         if (ct != NULL) {
1659                 count = ct->ct_path_count;
1660         }
1661         return (count);
1662 }
1663 
1664 
1665 /*
1666  * i_mdi_get_hash_key():
1667  *              Create a hash using strings as keys
1668  *
1669  */
1670 static int
1671 i_mdi_get_hash_key(char *str)
1672 {
1673         uint32_t        g, hash = 0;
1674         char            *p;
1675 
1676         for (p = str; *p != '\0'; p++) {
1677                 g = *p;
1678                 hash += g;
1679         }
1680         return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1681 }
1682 
1683 /*
1684  * mdi_get_lb_policy():
1685  *              Get current load balancing policy for a given client device
1686  */
1687 client_lb_t
1688 mdi_get_lb_policy(dev_info_t *cdip)
1689 {
1690         client_lb_t     lb = LOAD_BALANCE_NONE;
1691         mdi_client_t    *ct;
1692 
1693         ct = i_devi_get_client(cdip);
1694         if (ct != NULL) {
1695                 lb = ct->ct_lb;
1696         }
1697         return (lb);
1698 }
1699 
1700 /*
1701  * mdi_set_lb_region_size():
1702  *              Set current region size for the load-balance
1703  */
1704 int
1705 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1706 {
1707         mdi_client_t    *ct;
1708         int             rv = MDI_FAILURE;
1709 
1710         ct = i_devi_get_client(cdip);
1711         if (ct != NULL && ct->ct_lb_args != NULL) {
1712                 ct->ct_lb_args->region_size = region_size;
1713                 rv = MDI_SUCCESS;
1714         }
1715         return (rv);
1716 }
1717 
1718 /*
1719  * mdi_Set_lb_policy():
1720  *              Set current load balancing policy for a given client device
1721  */
1722 int
1723 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1724 {
1725         mdi_client_t    *ct;
1726         int             rv = MDI_FAILURE;
1727 
1728         ct = i_devi_get_client(cdip);
1729         if (ct != NULL) {
1730                 ct->ct_lb = lb;
1731                 rv = MDI_SUCCESS;
1732         }
1733         return (rv);
1734 }
1735 
1736 /*
1737  * mdi_failover():
1738  *              failover function called by the vHCI drivers to initiate
1739  *              a failover operation.  This is typically due to non-availability
1740  *              of online paths to route I/O requests.  Failover can be
1741  *              triggered through user application also.
1742  *
1743  *              The vHCI driver calls mdi_failover() to initiate a failover
1744  *              operation. mdi_failover() calls back into the vHCI driver's
1745  *              vo_failover() entry point to perform the actual failover
1746  *              operation.  The reason for requiring the vHCI driver to
1747  *              initiate failover by calling mdi_failover(), instead of directly
1748  *              executing vo_failover() itself, is to ensure that the mdi
1749  *              framework can keep track of the client state properly.
1750  *              Additionally, mdi_failover() provides as a convenience the
1751  *              option of performing the failover operation synchronously or
1752  *              asynchronously
1753  *
1754  *              Upon successful completion of the failover operation, the
1755  *              paths that were previously ONLINE will be in the STANDBY state,
1756  *              and the newly activated paths will be in the ONLINE state.
1757  *
1758  *              The flags modifier determines whether the activation is done
1759  *              synchronously: MDI_FAILOVER_SYNC
1760  * Return Values:
1761  *              MDI_SUCCESS
1762  *              MDI_FAILURE
1763  *              MDI_BUSY
1764  */
1765 /*ARGSUSED*/
1766 int
1767 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1768 {
1769         int                     rv;
1770         mdi_client_t            *ct;
1771 
1772         ct = i_devi_get_client(cdip);
1773         ASSERT(ct != NULL);
1774         if (ct == NULL) {
1775                 /* cdip is not a valid client device. Nothing more to do. */
1776                 return (MDI_FAILURE);
1777         }
1778 
1779         MDI_CLIENT_LOCK(ct);
1780 
1781         if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1782                 /* A path to the client is being freed */
1783                 MDI_CLIENT_UNLOCK(ct);
1784                 return (MDI_BUSY);
1785         }
1786 
1787 
1788         if (MDI_CLIENT_IS_FAILED(ct)) {
1789                 /*
1790                  * Client is in failed state. Nothing more to do.
1791                  */
1792                 MDI_CLIENT_UNLOCK(ct);
1793                 return (MDI_FAILURE);
1794         }
1795 
1796         if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1797                 /*
1798                  * Failover is already in progress; return BUSY
1799                  */
1800                 MDI_CLIENT_UNLOCK(ct);
1801                 return (MDI_BUSY);
1802         }
1803         /*
1804          * Make sure that mdi_pathinfo node state changes are processed.
1805          * We do not allow failovers to progress while client path state
1806          * changes are in progress
1807          */
1808         if (ct->ct_unstable) {
1809                 if (flags == MDI_FAILOVER_ASYNC) {
1810                         MDI_CLIENT_UNLOCK(ct);
1811                         return (MDI_BUSY);
1812                 } else {
1813                         while (ct->ct_unstable)
1814                                 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1815                 }
1816         }
1817 
1818         /*
1819          * Client device is in stable state. Before proceeding, perform sanity
1820          * checks again.
1821          */
1822         if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1823             (!i_ddi_devi_attached(cdip))) {
1824                 /*
1825                  * Client is in failed state. Nothing more to do.
1826                  */
1827                 MDI_CLIENT_UNLOCK(ct);
1828                 return (MDI_FAILURE);
1829         }
1830 
1831         /*
1832          * Set the client state as failover in progress.
1833          */
1834         MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1835         ct->ct_failover_flags = flags;
1836         MDI_CLIENT_UNLOCK(ct);
1837 
1838         if (flags == MDI_FAILOVER_ASYNC) {
1839                 /*
1840                  * Submit the initiate failover request via CPR safe
1841                  * taskq threads.
1842                  */
1843                 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1844                     ct, KM_SLEEP);
1845                 return (MDI_ACCEPT);
1846         } else {
1847                 /*
1848                  * Synchronous failover mode.  Typically invoked from the user
1849                  * land.
1850                  */
1851                 rv = i_mdi_failover(ct);
1852         }
1853         return (rv);
1854 }
1855 
1856 /*
1857  * i_mdi_failover():
1858  *              internal failover function. Invokes vHCI drivers failover
1859  *              callback function and process the failover status
1860  * Return Values:
1861  *              None
1862  *
1863  * Note: A client device in failover state can not be detached or freed.
1864  */
1865 static int
1866 i_mdi_failover(void *arg)
1867 {
1868         int             rv = MDI_SUCCESS;
1869         mdi_client_t    *ct = (mdi_client_t *)arg;
1870         mdi_vhci_t      *vh = ct->ct_vhci;
1871 
1872         ASSERT(!MDI_CLIENT_LOCKED(ct));
1873 
1874         if (vh->vh_ops->vo_failover != NULL) {
1875                 /*
1876                  * Call vHCI drivers callback routine
1877                  */
1878                 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1879                     ct->ct_failover_flags);
1880         }
1881 
1882         MDI_CLIENT_LOCK(ct);
1883         MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1884 
1885         /*
1886          * Save the failover return status
1887          */
1888         ct->ct_failover_status = rv;
1889 
1890         /*
1891          * As a result of failover, client status would have been changed.
1892          * Update the client state and wake up anyone waiting on this client
1893          * device.
1894          */
1895         i_mdi_client_update_state(ct);
1896 
1897         cv_broadcast(&ct->ct_failover_cv);
1898         MDI_CLIENT_UNLOCK(ct);
1899         return (rv);
1900 }
1901 
1902 /*
1903  * Load balancing is logical block.
1904  * IOs within the range described by region_size
1905  * would go on the same path. This would improve the
1906  * performance by cache-hit on some of the RAID devices.
1907  * Search only for online paths(At some point we
1908  * may want to balance across target ports).
1909  * If no paths are found then default to round-robin.
1910  */
1911 static int
1912 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1913 {
1914         int             path_index = -1;
1915         int             online_path_count = 0;
1916         int             online_nonpref_path_count = 0;
1917         int             region_size = ct->ct_lb_args->region_size;
1918         mdi_pathinfo_t  *pip;
1919         mdi_pathinfo_t  *next;
1920         int             preferred, path_cnt;
1921 
1922         pip = ct->ct_path_head;
1923         while (pip) {
1924                 MDI_PI_LOCK(pip);
1925                 if (MDI_PI(pip)->pi_state ==
1926                     MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1927                         online_path_count++;
1928                 } else if (MDI_PI(pip)->pi_state ==
1929                     MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1930                         online_nonpref_path_count++;
1931                 }
1932                 next = (mdi_pathinfo_t *)
1933                     MDI_PI(pip)->pi_client_link;
1934                 MDI_PI_UNLOCK(pip);
1935                 pip = next;
1936         }
1937         /* if found any online/preferred then use this type */
1938         if (online_path_count > 0) {
1939                 path_cnt = online_path_count;
1940                 preferred = 1;
1941         } else if (online_nonpref_path_count > 0) {
1942                 path_cnt = online_nonpref_path_count;
1943                 preferred = 0;
1944         } else {
1945                 path_cnt = 0;
1946         }
1947         if (path_cnt) {
1948                 path_index = (bp->b_blkno >> region_size) % path_cnt;
1949                 pip = ct->ct_path_head;
1950                 while (pip && path_index != -1) {
1951                         MDI_PI_LOCK(pip);
1952                         if (path_index == 0 &&
1953                             (MDI_PI(pip)->pi_state ==
1954                             MDI_PATHINFO_STATE_ONLINE) &&
1955                                 MDI_PI(pip)->pi_preferred == preferred) {
1956                                 MDI_PI_HOLD(pip);
1957                                 MDI_PI_UNLOCK(pip);
1958                                 *ret_pip = pip;
1959                                 return (MDI_SUCCESS);
1960                         }
1961                         path_index --;
1962                         next = (mdi_pathinfo_t *)
1963                             MDI_PI(pip)->pi_client_link;
1964                         MDI_PI_UNLOCK(pip);
1965                         pip = next;
1966                 }
1967                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1968                     "lba %llx: path %s %p",
1969                     bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1970         }
1971         return (MDI_FAILURE);
1972 }
1973 
1974 /*
1975  * mdi_select_path():
1976  *              select a path to access a client device.
1977  *
1978  *              mdi_select_path() function is called by the vHCI drivers to
1979  *              select a path to route the I/O request to.  The caller passes
1980  *              the block I/O data transfer structure ("buf") as one of the
1981  *              parameters.  The mpxio framework uses the buf structure
1982  *              contents to maintain per path statistics (total I/O size /
1983  *              count pending).  If more than one online paths are available to
1984  *              select, the framework automatically selects a suitable path
1985  *              for routing I/O request. If a failover operation is active for
1986  *              this client device the call shall be failed with MDI_BUSY error
1987  *              code.
1988  *
1989  *              By default this function returns a suitable path in online
1990  *              state based on the current load balancing policy.  Currently
1991  *              we support LOAD_BALANCE_NONE (Previously selected online path
1992  *              will continue to be used till the path is usable) and
1993  *              LOAD_BALANCE_RR (Online paths will be selected in a round
1994  *              robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1995  *              based on the logical block).  The load balancing
1996  *              through vHCI drivers configuration file (driver.conf).
1997  *
1998  *              vHCI drivers may override this default behavior by specifying
1999  *              appropriate flags.  The meaning of the thrid argument depends
2000  *              on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2001  *              then the argument is the "path instance" of the path to select.
2002  *              If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2003  *              "start_pip". A non NULL "start_pip" is the starting point to
2004  *              walk and find the next appropriate path.  The following values
2005  *              are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2006  *              ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2007  *              STANDBY path).
2008  *
2009  *              The non-standard behavior is used by the scsi_vhci driver,
2010  *              whenever it has to use a STANDBY/FAULTED path.  Eg. during
2011  *              attach of client devices (to avoid an unnecessary failover
2012  *              when the STANDBY path comes up first), during failover
2013  *              (to activate a STANDBY path as ONLINE).
2014  *
2015  *              The selected path is returned in a a mdi_hold_path() state
2016  *              (pi_ref_cnt). Caller should release the hold by calling
2017  *              mdi_rele_path().
2018  *
2019  * Return Values:
2020  *              MDI_SUCCESS     - Completed successfully
2021  *              MDI_BUSY        - Client device is busy failing over
2022  *              MDI_NOPATH      - Client device is online, but no valid path are
2023  *                                available to access this client device
2024  *              MDI_FAILURE     - Invalid client device or state
2025  *              MDI_DEVI_ONLINING
2026  *                              - Client device (struct dev_info state) is in
2027  *                                onlining state.
2028  */
2029 
2030 /*ARGSUSED*/
2031 int
2032 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2033     void *arg, mdi_pathinfo_t **ret_pip)
2034 {
2035         mdi_client_t    *ct;
2036         mdi_pathinfo_t  *pip;
2037         mdi_pathinfo_t  *next;
2038         mdi_pathinfo_t  *head;
2039         mdi_pathinfo_t  *start;
2040         client_lb_t     lbp;    /* load balancing policy */
2041         int             sb = 1; /* standard behavior */
2042         int             preferred = 1;  /* preferred path */
2043         int             cond, cont = 1;
2044         int             retry = 0;
2045         mdi_pathinfo_t  *start_pip;     /* request starting pathinfo */
2046         int             path_instance;  /* request specific path instance */
2047 
2048         /* determine type of arg based on flags */
2049         if (flags & MDI_SELECT_PATH_INSTANCE) {
2050                 path_instance = (int)(intptr_t)arg;
2051                 start_pip = NULL;
2052         } else {
2053                 path_instance = 0;
2054                 start_pip = (mdi_pathinfo_t *)arg;
2055         }
2056 
2057         if (flags != 0) {
2058                 /*
2059                  * disable default behavior
2060                  */
2061                 sb = 0;
2062         }
2063 
2064         *ret_pip = NULL;
2065         ct = i_devi_get_client(cdip);
2066         if (ct == NULL) {
2067                 /* mdi extensions are NULL, Nothing more to do */
2068                 return (MDI_FAILURE);
2069         }
2070 
2071         MDI_CLIENT_LOCK(ct);
2072 
2073         if (sb) {
2074                 if (MDI_CLIENT_IS_FAILED(ct)) {
2075                         /*
2076                          * Client is not ready to accept any I/O requests.
2077                          * Fail this request.
2078                          */
2079                         MDI_DEBUG(2, (MDI_NOTE, cdip,
2080                             "client state offline ct = %p", (void *)ct));
2081                         MDI_CLIENT_UNLOCK(ct);
2082                         return (MDI_FAILURE);
2083                 }
2084 
2085                 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2086                         /*
2087                          * Check for Failover is in progress. If so tell the
2088                          * caller that this device is busy.
2089                          */
2090                         MDI_DEBUG(2, (MDI_NOTE, cdip,
2091                             "client failover in progress ct = %p",
2092                             (void *)ct));
2093                         MDI_CLIENT_UNLOCK(ct);
2094                         return (MDI_BUSY);
2095                 }
2096 
2097                 /*
2098                  * Check to see whether the client device is attached.
2099                  * If not so, let the vHCI driver manually select a path
2100                  * (standby) and let the probe/attach process to continue.
2101                  */
2102                 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2103                         MDI_DEBUG(4, (MDI_NOTE, cdip,
2104                             "devi is onlining ct = %p", (void *)ct));
2105                         MDI_CLIENT_UNLOCK(ct);
2106                         return (MDI_DEVI_ONLINING);
2107                 }
2108         }
2109 
2110         /*
2111          * Cache in the client list head.  If head of the list is NULL
2112          * return MDI_NOPATH
2113          */
2114         head = ct->ct_path_head;
2115         if (head == NULL) {
2116                 MDI_CLIENT_UNLOCK(ct);
2117                 return (MDI_NOPATH);
2118         }
2119 
2120         /* Caller is specifying a specific pathinfo path by path_instance */
2121         if (path_instance) {
2122                 /* search for pathinfo with correct path_instance */
2123                 for (pip = head;
2124                     pip && (mdi_pi_get_path_instance(pip) != path_instance);
2125                     pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2126                         ;
2127 
2128                 /* If path can't be selected then MDI_NOPATH is returned. */
2129                 if (pip == NULL) {
2130                         MDI_CLIENT_UNLOCK(ct);
2131                         return (MDI_NOPATH);
2132                 }
2133 
2134                 /*
2135                  * Verify state of path. When asked to select a specific
2136                  * path_instance, we select the requested path in any
2137                  * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2138                  * We don't however select paths where the pHCI has detached.
2139                  * NOTE: last pathinfo node of an opened client device may
2140                  * exist in an OFFLINE state after the pHCI associated with
2141                  * that path has detached (but pi_phci will be NULL if that
2142                  * has occurred).
2143                  */
2144                 MDI_PI_LOCK(pip);
2145                 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2146                     (MDI_PI(pip)->pi_phci == NULL)) {
2147                         MDI_PI_UNLOCK(pip);
2148                         MDI_CLIENT_UNLOCK(ct);
2149                         return (MDI_FAILURE);
2150                 }
2151 
2152                 /* Return MDI_BUSY if we have a transient condition */
2153                 if (MDI_PI_IS_TRANSIENT(pip)) {
2154                         MDI_PI_UNLOCK(pip);
2155                         MDI_CLIENT_UNLOCK(ct);
2156                         return (MDI_BUSY);
2157                 }
2158 
2159                 /*
2160                  * Return the path in hold state. Caller should release the
2161                  * lock by calling mdi_rele_path()
2162                  */
2163                 MDI_PI_HOLD(pip);
2164                 MDI_PI_UNLOCK(pip);
2165                 *ret_pip = pip;
2166                 MDI_CLIENT_UNLOCK(ct);
2167                 return (MDI_SUCCESS);
2168         }
2169 
2170         /*
2171          * for non default behavior, bypass current
2172          * load balancing policy and always use LOAD_BALANCE_RR
2173          * except that the start point will be adjusted based
2174          * on the provided start_pip
2175          */
2176         lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2177 
2178         switch (lbp) {
2179         case LOAD_BALANCE_NONE:
2180                 /*
2181                  * Load balancing is None  or Alternate path mode
2182                  * Start looking for a online mdi_pathinfo node starting from
2183                  * last known selected path
2184                  */
2185                 preferred = 1;
2186                 pip = (mdi_pathinfo_t *)ct->ct_path_last;
2187                 if (pip == NULL) {
2188                         pip = head;
2189                 }
2190                 start = pip;
2191                 do {
2192                         MDI_PI_LOCK(pip);
2193                         /*
2194                          * No need to explicitly check if the path is disabled.
2195                          * Since we are checking for state == ONLINE and the
2196                          * same variable is used for DISABLE/ENABLE information.
2197                          */
2198                         if ((MDI_PI(pip)->pi_state  ==
2199                                 MDI_PATHINFO_STATE_ONLINE) &&
2200                                 preferred == MDI_PI(pip)->pi_preferred) {
2201                                 /*
2202                                  * Return the path in hold state. Caller should
2203                                  * release the lock by calling mdi_rele_path()
2204                                  */
2205                                 MDI_PI_HOLD(pip);
2206                                 MDI_PI_UNLOCK(pip);
2207                                 ct->ct_path_last = pip;
2208                                 *ret_pip = pip;
2209                                 MDI_CLIENT_UNLOCK(ct);
2210                                 return (MDI_SUCCESS);
2211                         }
2212 
2213                         /*
2214                          * Path is busy.
2215                          */
2216                         if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2217                             MDI_PI_IS_TRANSIENT(pip))
2218                                 retry = 1;
2219                         /*
2220                          * Keep looking for a next available online path
2221                          */
2222                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2223                         if (next == NULL) {
2224                                 next = head;
2225                         }
2226                         MDI_PI_UNLOCK(pip);
2227                         pip = next;
2228                         if (start == pip && preferred) {
2229                                 preferred = 0;
2230                         } else if (start == pip && !preferred) {
2231                                 cont = 0;
2232                         }
2233                 } while (cont);
2234                 break;
2235 
2236         case LOAD_BALANCE_LBA:
2237                 /*
2238                  * Make sure we are looking
2239                  * for an online path. Otherwise, if it is for a STANDBY
2240                  * path request, it will go through and fetch an ONLINE
2241                  * path which is not desirable.
2242                  */
2243                 if ((ct->ct_lb_args != NULL) &&
2244                             (ct->ct_lb_args->region_size) && bp &&
2245                                 (sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2246                         if (i_mdi_lba_lb(ct, ret_pip, bp)
2247                                     == MDI_SUCCESS) {
2248                                 MDI_CLIENT_UNLOCK(ct);
2249                                 return (MDI_SUCCESS);
2250                         }
2251                 }
2252                 /* FALLTHROUGH */
2253         case LOAD_BALANCE_RR:
2254                 /*
2255                  * Load balancing is Round Robin. Start looking for a online
2256                  * mdi_pathinfo node starting from last known selected path
2257                  * as the start point.  If override flags are specified,
2258                  * process accordingly.
2259                  * If the search is already in effect(start_pip not null),
2260                  * then lets just use the same path preference to continue the
2261                  * traversal.
2262                  */
2263 
2264                 if (start_pip != NULL) {
2265                         preferred = MDI_PI(start_pip)->pi_preferred;
2266                 } else {
2267                         preferred = 1;
2268                 }
2269 
2270                 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2271                 if (start == NULL) {
2272                         pip = head;
2273                 } else {
2274                         pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2275                         if (pip == NULL) {
2276                                 if ( flags & MDI_SELECT_NO_PREFERRED) {
2277                                         /*
2278                                          * Return since we hit the end of list
2279                                          */
2280                                         MDI_CLIENT_UNLOCK(ct);
2281                                         return (MDI_NOPATH);
2282                                 }
2283 
2284                                 if (!sb) {
2285                                         if (preferred == 0) {
2286                                                 /*
2287                                                  * Looks like we have completed
2288                                                  * the traversal as preferred
2289                                                  * value is 0. Time to bail out.
2290                                                  */
2291                                                 *ret_pip = NULL;
2292                                                 MDI_CLIENT_UNLOCK(ct);
2293                                                 return (MDI_NOPATH);
2294                                         } else {
2295                                                 /*
2296                                                  * Looks like we reached the
2297                                                  * end of the list. Lets enable
2298                                                  * traversal of non preferred
2299                                                  * paths.
2300                                                  */
2301                                                 preferred = 0;
2302                                         }
2303                                 }
2304                                 pip = head;
2305                         }
2306                 }
2307                 start = pip;
2308                 do {
2309                         MDI_PI_LOCK(pip);
2310                         if (sb) {
2311                                 cond = ((MDI_PI(pip)->pi_state ==
2312                                     MDI_PATHINFO_STATE_ONLINE &&
2313                                         MDI_PI(pip)->pi_preferred ==
2314                                                 preferred) ? 1 : 0);
2315                         } else {
2316                                 if (flags == MDI_SELECT_ONLINE_PATH) {
2317                                         cond = ((MDI_PI(pip)->pi_state ==
2318                                             MDI_PATHINFO_STATE_ONLINE &&
2319                                                 MDI_PI(pip)->pi_preferred ==
2320                                                 preferred) ? 1 : 0);
2321                                 } else if (flags == MDI_SELECT_STANDBY_PATH) {
2322                                         cond = ((MDI_PI(pip)->pi_state ==
2323                                             MDI_PATHINFO_STATE_STANDBY &&
2324                                                 MDI_PI(pip)->pi_preferred ==
2325                                                 preferred) ? 1 : 0);
2326                                 } else if (flags == (MDI_SELECT_ONLINE_PATH |
2327                                     MDI_SELECT_STANDBY_PATH)) {
2328                                         cond = (((MDI_PI(pip)->pi_state ==
2329                                             MDI_PATHINFO_STATE_ONLINE ||
2330                                             (MDI_PI(pip)->pi_state ==
2331                                             MDI_PATHINFO_STATE_STANDBY)) &&
2332                                                 MDI_PI(pip)->pi_preferred ==
2333                                                 preferred) ? 1 : 0);
2334                                 } else if (flags ==
2335                                         (MDI_SELECT_STANDBY_PATH |
2336                                         MDI_SELECT_ONLINE_PATH |
2337                                         MDI_SELECT_USER_DISABLE_PATH)) {
2338                                         cond = (((MDI_PI(pip)->pi_state ==
2339                                             MDI_PATHINFO_STATE_ONLINE ||
2340                                             (MDI_PI(pip)->pi_state ==
2341                                             MDI_PATHINFO_STATE_STANDBY) ||
2342                                                 (MDI_PI(pip)->pi_state ==
2343                                             (MDI_PATHINFO_STATE_ONLINE|
2344                                             MDI_PATHINFO_STATE_USER_DISABLE)) ||
2345                                                 (MDI_PI(pip)->pi_state ==
2346                                             (MDI_PATHINFO_STATE_STANDBY |
2347                                             MDI_PATHINFO_STATE_USER_DISABLE)))&&
2348                                                 MDI_PI(pip)->pi_preferred ==
2349                                                 preferred) ? 1 : 0);
2350                                 } else if (flags ==
2351                                     (MDI_SELECT_STANDBY_PATH |
2352                                     MDI_SELECT_ONLINE_PATH |
2353                                     MDI_SELECT_NO_PREFERRED)) {
2354                                         cond = (((MDI_PI(pip)->pi_state ==
2355                                             MDI_PATHINFO_STATE_ONLINE) ||
2356                                             (MDI_PI(pip)->pi_state ==
2357                                             MDI_PATHINFO_STATE_STANDBY))
2358                                             ? 1 : 0);
2359                                 } else {
2360                                         cond = 0;
2361                                 }
2362                         }
2363                         /*
2364                          * No need to explicitly check if the path is disabled.
2365                          * Since we are checking for state == ONLINE and the
2366                          * same variable is used for DISABLE/ENABLE information.
2367                          */
2368                         if (cond) {
2369                                 /*
2370                                  * Return the path in hold state. Caller should
2371                                  * release the lock by calling mdi_rele_path()
2372                                  */
2373                                 MDI_PI_HOLD(pip);
2374                                 MDI_PI_UNLOCK(pip);
2375                                 if (sb)
2376                                         ct->ct_path_last = pip;
2377                                 *ret_pip = pip;
2378                                 MDI_CLIENT_UNLOCK(ct);
2379                                 return (MDI_SUCCESS);
2380                         }
2381                         /*
2382                          * Path is busy.
2383                          */
2384                         if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2385                             MDI_PI_IS_TRANSIENT(pip))
2386                                 retry = 1;
2387 
2388                         /*
2389                          * Keep looking for a next available online path
2390                          */
2391 do_again:
2392                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2393                         if (next == NULL) {
2394                                 if ( flags & MDI_SELECT_NO_PREFERRED) {
2395                                         /*
2396                                          * Bail out since we hit the end of list
2397                                          */
2398                                         MDI_PI_UNLOCK(pip);
2399                                         break;
2400                                 }
2401 
2402                                 if (!sb) {
2403                                         if (preferred == 1) {
2404                                                 /*
2405                                                  * Looks like we reached the
2406                                                  * end of the list. Lets enable
2407                                                  * traversal of non preferred
2408                                                  * paths.
2409                                                  */
2410                                                 preferred = 0;
2411                                                 next = head;
2412                                         } else {
2413                                                 /*
2414                                                  * We have done both the passes
2415                                                  * Preferred as well as for
2416                                                  * Non-preferred. Bail out now.
2417                                                  */
2418                                                 cont = 0;
2419                                         }
2420                                 } else {
2421                                         /*
2422                                          * Standard behavior case.
2423                                          */
2424                                         next = head;
2425                                 }
2426                         }
2427                         MDI_PI_UNLOCK(pip);
2428                         if (cont == 0) {
2429                                 break;
2430                         }
2431                         pip = next;
2432 
2433                         if (!sb) {
2434                                 /*
2435                                  * We need to handle the selection of
2436                                  * non-preferred path in the following
2437                                  * case:
2438                                  *
2439                                  * +------+   +------+   +------+   +-----+
2440                                  * | A : 1| - | B : 1| - | C : 0| - |NULL |
2441                                  * +------+   +------+   +------+   +-----+
2442                                  *
2443                                  * If we start the search with B, we need to
2444                                  * skip beyond B to pick C which is non -
2445                                  * preferred in the second pass. The following
2446                                  * test, if true, will allow us to skip over
2447                                  * the 'start'(B in the example) to select
2448                                  * other non preferred elements.
2449                                  */
2450                                 if ((start_pip != NULL) && (start_pip == pip) &&
2451                                     (MDI_PI(start_pip)->pi_preferred
2452                                     != preferred)) {
2453                                         /*
2454                                          * try again after going past the start
2455                                          * pip
2456                                          */
2457                                         MDI_PI_LOCK(pip);
2458                                         goto do_again;
2459                                 }
2460                         } else {
2461                                 /*
2462                                  * Standard behavior case
2463                                  */
2464                                 if (start == pip && preferred) {
2465                                         /* look for nonpreferred paths */
2466                                         preferred = 0;
2467                                 } else if (start == pip && !preferred) {
2468                                         /*
2469                                          * Exit condition
2470                                          */
2471                                         cont = 0;
2472                                 }
2473                         }
2474                 } while (cont);
2475                 break;
2476         }
2477 
2478         MDI_CLIENT_UNLOCK(ct);
2479         if (retry == 1) {
2480                 return (MDI_BUSY);
2481         } else {
2482                 return (MDI_NOPATH);
2483         }
2484 }
2485 
2486 /*
2487  * For a client, return the next available path to any phci
2488  *
2489  * Note:
2490  *              Caller should hold the branch's devinfo node to get a consistent
2491  *              snap shot of the mdi_pathinfo nodes.
2492  *
2493  *              Please note that even the list is stable the mdi_pathinfo
2494  *              node state and properties are volatile.  The caller should lock
2495  *              and unlock the nodes by calling mdi_pi_lock() and
2496  *              mdi_pi_unlock() functions to get a stable properties.
2497  *
2498  *              If there is a need to use the nodes beyond the hold of the
2499  *              devinfo node period (For ex. I/O), then mdi_pathinfo node
2500  *              need to be held against unexpected removal by calling
2501  *              mdi_hold_path() and should be released by calling
2502  *              mdi_rele_path() on completion.
2503  */
2504 mdi_pathinfo_t *
2505 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2506 {
2507         mdi_client_t *ct;
2508 
2509         if (!MDI_CLIENT(ct_dip))
2510                 return (NULL);
2511 
2512         /*
2513          * Walk through client link
2514          */
2515         ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2516         ASSERT(ct != NULL);
2517 
2518         if (pip == NULL)
2519                 return ((mdi_pathinfo_t *)ct->ct_path_head);
2520 
2521         return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2522 }
2523 
2524 /*
2525  * For a phci, return the next available path to any client
2526  * Note: ditto mdi_get_next_phci_path()
2527  */
2528 mdi_pathinfo_t *
2529 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2530 {
2531         mdi_phci_t *ph;
2532 
2533         if (!MDI_PHCI(ph_dip))
2534                 return (NULL);
2535 
2536         /*
2537          * Walk through pHCI link
2538          */
2539         ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2540         ASSERT(ph != NULL);
2541 
2542         if (pip == NULL)
2543                 return ((mdi_pathinfo_t *)ph->ph_path_head);
2544 
2545         return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2546 }
2547 
2548 /*
2549  * mdi_hold_path():
2550  *              Hold the mdi_pathinfo node against unwanted unexpected free.
2551  * Return Values:
2552  *              None
2553  */
2554 void
2555 mdi_hold_path(mdi_pathinfo_t *pip)
2556 {
2557         if (pip) {
2558                 MDI_PI_LOCK(pip);
2559                 MDI_PI_HOLD(pip);
2560                 MDI_PI_UNLOCK(pip);
2561         }
2562 }
2563 
2564 
2565 /*
2566  * mdi_rele_path():
2567  *              Release the mdi_pathinfo node which was selected
2568  *              through mdi_select_path() mechanism or manually held by
2569  *              calling mdi_hold_path().
2570  * Return Values:
2571  *              None
2572  */
2573 void
2574 mdi_rele_path(mdi_pathinfo_t *pip)
2575 {
2576         if (pip) {
2577                 MDI_PI_LOCK(pip);
2578                 MDI_PI_RELE(pip);
2579                 if (MDI_PI(pip)->pi_ref_cnt == 0) {
2580                         cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2581                 }
2582                 MDI_PI_UNLOCK(pip);
2583         }
2584 }
2585 
2586 /*
2587  * mdi_pi_lock():
2588  *              Lock the mdi_pathinfo node.
2589  * Note:
2590  *              The caller should release the lock by calling mdi_pi_unlock()
2591  */
2592 void
2593 mdi_pi_lock(mdi_pathinfo_t *pip)
2594 {
2595         ASSERT(pip != NULL);
2596         if (pip) {
2597                 MDI_PI_LOCK(pip);
2598         }
2599 }
2600 
2601 
2602 /*
2603  * mdi_pi_unlock():
2604  *              Unlock the mdi_pathinfo node.
2605  * Note:
2606  *              The mdi_pathinfo node should have been locked with mdi_pi_lock()
2607  */
2608 void
2609 mdi_pi_unlock(mdi_pathinfo_t *pip)
2610 {
2611         ASSERT(pip != NULL);
2612         if (pip) {
2613                 MDI_PI_UNLOCK(pip);
2614         }
2615 }
2616 
2617 /*
2618  * mdi_pi_find():
2619  *              Search the list of mdi_pathinfo nodes attached to the
2620  *              pHCI/Client device node whose path address matches "paddr".
2621  *              Returns a pointer to the mdi_pathinfo node if a matching node is
2622  *              found.
2623  * Return Values:
2624  *              mdi_pathinfo node handle
2625  *              NULL
2626  * Notes:
2627  *              Caller need not hold any locks to call this function.
2628  */
2629 mdi_pathinfo_t *
2630 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2631 {
2632         mdi_phci_t              *ph;
2633         mdi_vhci_t              *vh;
2634         mdi_client_t            *ct;
2635         mdi_pathinfo_t          *pip = NULL;
2636 
2637         MDI_DEBUG(2, (MDI_NOTE, pdip,
2638             "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2639         if ((pdip == NULL) || (paddr == NULL)) {
2640                 return (NULL);
2641         }
2642         ph = i_devi_get_phci(pdip);
2643         if (ph == NULL) {
2644                 /*
2645                  * Invalid pHCI device, Nothing more to do.
2646                  */
2647                 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2648                 return (NULL);
2649         }
2650 
2651         vh = ph->ph_vhci;
2652         if (vh == NULL) {
2653                 /*
2654                  * Invalid vHCI device, Nothing more to do.
2655                  */
2656                 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2657                 return (NULL);
2658         }
2659 
2660         /*
2661          * Look for pathinfo node identified by paddr.
2662          */
2663         if (caddr == NULL) {
2664                 /*
2665                  * Find a mdi_pathinfo node under pHCI list for a matching
2666                  * unit address.
2667                  */
2668                 MDI_PHCI_LOCK(ph);
2669                 if (MDI_PHCI_IS_OFFLINE(ph)) {
2670                         MDI_DEBUG(2, (MDI_WARN, pdip,
2671                             "offline phci %p", (void *)ph));
2672                         MDI_PHCI_UNLOCK(ph);
2673                         return (NULL);
2674                 }
2675                 pip = (mdi_pathinfo_t *)ph->ph_path_head;
2676 
2677                 while (pip != NULL) {
2678                         if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2679                                 break;
2680                         }
2681                         pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2682                 }
2683                 MDI_PHCI_UNLOCK(ph);
2684                 MDI_DEBUG(2, (MDI_NOTE, pdip,
2685                     "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2686                 return (pip);
2687         }
2688 
2689         /*
2690          * XXX - Is the rest of the code in this function really necessary?
2691          * The consumers of mdi_pi_find() can search for the desired pathinfo
2692          * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2693          * whether the search is based on the pathinfo nodes attached to
2694          * the pHCI or the client node, the result will be the same.
2695          */
2696 
2697         /*
2698          * Find the client device corresponding to 'caddr'
2699          */
2700         MDI_VHCI_CLIENT_LOCK(vh);
2701 
2702         /*
2703          * XXX - Passing NULL to the following function works as long as the
2704          * the client addresses (caddr) are unique per vhci basis.
2705          */
2706         ct = i_mdi_client_find(vh, NULL, caddr);
2707         if (ct == NULL) {
2708                 /*
2709                  * Client not found, Obviously mdi_pathinfo node has not been
2710                  * created yet.
2711                  */
2712                 MDI_VHCI_CLIENT_UNLOCK(vh);
2713                 MDI_DEBUG(2, (MDI_NOTE, pdip,
2714                     "client not found for caddr @%s", caddr ? caddr : ""));
2715                 return (NULL);
2716         }
2717 
2718         /*
2719          * Hold the client lock and look for a mdi_pathinfo node with matching
2720          * pHCI and paddr
2721          */
2722         MDI_CLIENT_LOCK(ct);
2723 
2724         /*
2725          * Release the global mutex as it is no more needed. Note: We always
2726          * respect the locking order while acquiring.
2727          */
2728         MDI_VHCI_CLIENT_UNLOCK(vh);
2729 
2730         pip = (mdi_pathinfo_t *)ct->ct_path_head;
2731         while (pip != NULL) {
2732                 /*
2733                  * Compare the unit address
2734                  */
2735                 if ((MDI_PI(pip)->pi_phci == ph) &&
2736                     strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2737                         break;
2738                 }
2739                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2740         }
2741         MDI_CLIENT_UNLOCK(ct);
2742         MDI_DEBUG(2, (MDI_NOTE, pdip,
2743             "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2744         return (pip);
2745 }
2746 
2747 /*
2748  * mdi_pi_alloc():
2749  *              Allocate and initialize a new instance of a mdi_pathinfo node.
2750  *              The mdi_pathinfo node returned by this function identifies a
2751  *              unique device path is capable of having properties attached
2752  *              and passed to mdi_pi_online() to fully attach and online the
2753  *              path and client device node.
2754  *              The mdi_pathinfo node returned by this function must be
2755  *              destroyed using mdi_pi_free() if the path is no longer
2756  *              operational or if the caller fails to attach a client device
2757  *              node when calling mdi_pi_online(). The framework will not free
2758  *              the resources allocated.
2759  *              This function can be called from both interrupt and kernel
2760  *              contexts.  DDI_NOSLEEP flag should be used while calling
2761  *              from interrupt contexts.
2762  * Return Values:
2763  *              MDI_SUCCESS
2764  *              MDI_FAILURE
2765  *              MDI_NOMEM
2766  */
2767 /*ARGSUSED*/
2768 int
2769 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2770     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2771 {
2772         mdi_vhci_t      *vh;
2773         mdi_phci_t      *ph;
2774         mdi_client_t    *ct;
2775         mdi_pathinfo_t  *pip = NULL;
2776         dev_info_t      *cdip;
2777         int             rv = MDI_NOMEM;
2778         int             path_allocated = 0;
2779 
2780         MDI_DEBUG(2, (MDI_NOTE, pdip,
2781             "cname %s: caddr@%s paddr@%s",
2782             cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2783 
2784         if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2785             ret_pip == NULL) {
2786                 /* Nothing more to do */
2787                 return (MDI_FAILURE);
2788         }
2789 
2790         *ret_pip = NULL;
2791 
2792         /* No allocations on detaching pHCI */
2793         if (DEVI_IS_DETACHING(pdip)) {
2794                 /* Invalid pHCI device, return failure */
2795                 MDI_DEBUG(1, (MDI_WARN, pdip,
2796                     "!detaching pHCI=%p", (void *)pdip));
2797                 return (MDI_FAILURE);
2798         }
2799 
2800         ph = i_devi_get_phci(pdip);
2801         ASSERT(ph != NULL);
2802         if (ph == NULL) {
2803                 /* Invalid pHCI device, return failure */
2804                 MDI_DEBUG(1, (MDI_WARN, pdip,
2805                     "!invalid pHCI=%p", (void *)pdip));
2806                 return (MDI_FAILURE);
2807         }
2808 
2809         MDI_PHCI_LOCK(ph);
2810         vh = ph->ph_vhci;
2811         if (vh == NULL) {
2812                 /* Invalid vHCI device, return failure */
2813                 MDI_DEBUG(1, (MDI_WARN, pdip,
2814                     "!invalid vHCI=%p", (void *)pdip));
2815                 MDI_PHCI_UNLOCK(ph);
2816                 return (MDI_FAILURE);
2817         }
2818 
2819         if (MDI_PHCI_IS_READY(ph) == 0) {
2820                 /*
2821                  * Do not allow new node creation when pHCI is in
2822                  * offline/suspended states
2823                  */
2824                 MDI_DEBUG(1, (MDI_WARN, pdip,
2825                     "pHCI=%p is not ready", (void *)ph));
2826                 MDI_PHCI_UNLOCK(ph);
2827                 return (MDI_BUSY);
2828         }
2829         MDI_PHCI_UNSTABLE(ph);
2830         MDI_PHCI_UNLOCK(ph);
2831 
2832         /* look for a matching client, create one if not found */
2833         MDI_VHCI_CLIENT_LOCK(vh);
2834         ct = i_mdi_client_find(vh, cname, caddr);
2835         if (ct == NULL) {
2836                 ct = i_mdi_client_alloc(vh, cname, caddr);
2837                 ASSERT(ct != NULL);
2838         }
2839 
2840         if (ct->ct_dip == NULL) {
2841                 /*
2842                  * Allocate a devinfo node
2843                  */
2844                 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2845                     compatible, ncompatible);
2846                 if (ct->ct_dip == NULL) {
2847                         (void) i_mdi_client_free(vh, ct);
2848                         goto fail;
2849                 }
2850         }
2851         cdip = ct->ct_dip;
2852 
2853         DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2854         DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2855 
2856         MDI_CLIENT_LOCK(ct);
2857         pip = (mdi_pathinfo_t *)ct->ct_path_head;
2858         while (pip != NULL) {
2859                 /*
2860                  * Compare the unit address
2861                  */
2862                 if ((MDI_PI(pip)->pi_phci == ph) &&
2863                     strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2864                         break;
2865                 }
2866                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2867         }
2868         MDI_CLIENT_UNLOCK(ct);
2869 
2870         if (pip == NULL) {
2871                 /*
2872                  * This is a new path for this client device.  Allocate and
2873                  * initialize a new pathinfo node
2874                  */
2875                 pip = i_mdi_pi_alloc(ph, paddr, ct);
2876                 ASSERT(pip != NULL);
2877                 path_allocated = 1;
2878         }
2879         rv = MDI_SUCCESS;
2880 
2881 fail:
2882         /*
2883          * Release the global mutex.
2884          */
2885         MDI_VHCI_CLIENT_UNLOCK(vh);
2886 
2887         /*
2888          * Mark the pHCI as stable
2889          */
2890         MDI_PHCI_LOCK(ph);
2891         MDI_PHCI_STABLE(ph);
2892         MDI_PHCI_UNLOCK(ph);
2893         *ret_pip = pip;
2894 
2895         MDI_DEBUG(2, (MDI_NOTE, pdip,
2896             "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2897 
2898         if (path_allocated)
2899                 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2900 
2901         return (rv);
2902 }
2903 
2904 /*ARGSUSED*/
2905 int
2906 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2907     int flags, mdi_pathinfo_t **ret_pip)
2908 {
2909         return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2910             flags, ret_pip));
2911 }
2912 
2913 /*
2914  * i_mdi_pi_alloc():
2915  *              Allocate a mdi_pathinfo node and add to the pHCI path list
2916  * Return Values:
2917  *              mdi_pathinfo
2918  */
2919 /*ARGSUSED*/
2920 static mdi_pathinfo_t *
2921 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2922 {
2923         mdi_pathinfo_t  *pip;
2924         int             ct_circular;
2925         int             ph_circular;
2926         static char     path[MAXPATHLEN];       /* mdi_pathmap_mutex protects */
2927         char            *path_persistent;
2928         int             path_instance;
2929         mod_hash_val_t  hv;
2930 
2931         ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2932 
2933         pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2934         mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2935         MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2936             MDI_PATHINFO_STATE_TRANSIENT;
2937 
2938         if (MDI_PHCI_IS_USER_DISABLED(ph))
2939                 MDI_PI_SET_USER_DISABLE(pip);
2940 
2941         if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2942                 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2943 
2944         if (MDI_PHCI_IS_DRV_DISABLED(ph))
2945                 MDI_PI_SET_DRV_DISABLE(pip);
2946 
2947         MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2948         cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2949         MDI_PI(pip)->pi_client = ct;
2950         MDI_PI(pip)->pi_phci = ph;
2951         MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2952         (void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2953 
2954         /*
2955          * We form the "path" to the pathinfo node, and see if we have
2956          * already allocated a 'path_instance' for that "path".  If so,
2957          * we use the already allocated 'path_instance'.  If not, we
2958          * allocate a new 'path_instance' and associate it with a copy of
2959          * the "path" string (which is never freed). The association
2960          * between a 'path_instance' this "path" string persists until
2961          * reboot.
2962          */
2963         mutex_enter(&mdi_pathmap_mutex);
2964         (void) ddi_pathname(ph->ph_dip, path);
2965         (void) sprintf(path + strlen(path), "/%s@%s",
2966             mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2967         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2968                 path_instance = (uint_t)(intptr_t)hv;
2969         } else {
2970                 /* allocate a new 'path_instance' and persistent "path" */
2971                 path_instance = mdi_pathmap_instance++;
2972                 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2973                 (void) mod_hash_insert(mdi_pathmap_bypath,
2974                     (mod_hash_key_t)path_persistent,
2975                     (mod_hash_val_t)(intptr_t)path_instance);
2976                 (void) mod_hash_insert(mdi_pathmap_byinstance,
2977                     (mod_hash_key_t)(intptr_t)path_instance,
2978                     (mod_hash_val_t)path_persistent);
2979 
2980                 /* create shortpath name */
2981                 (void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2982                     ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2983                     mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2984                 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2985                 (void) mod_hash_insert(mdi_pathmap_sbyinstance,
2986                     (mod_hash_key_t)(intptr_t)path_instance,
2987                     (mod_hash_val_t)path_persistent);
2988         }
2989         mutex_exit(&mdi_pathmap_mutex);
2990         MDI_PI(pip)->pi_path_instance = path_instance;
2991 
2992         (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2993         ASSERT(MDI_PI(pip)->pi_prop != NULL);
2994         MDI_PI(pip)->pi_pprivate = NULL;
2995         MDI_PI(pip)->pi_cprivate = NULL;
2996         MDI_PI(pip)->pi_vprivate = NULL;
2997         MDI_PI(pip)->pi_client_link = NULL;
2998         MDI_PI(pip)->pi_phci_link = NULL;
2999         MDI_PI(pip)->pi_ref_cnt = 0;
3000         MDI_PI(pip)->pi_kstats = NULL;
3001         MDI_PI(pip)->pi_preferred = 1;
3002         cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3003 
3004         /*
3005          * Lock both dev_info nodes against changes in parallel.
3006          *
3007          * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3008          * This atypical operation is done to synchronize pathinfo nodes
3009          * during devinfo snapshot (see di_register_pip) by 'pretending' that
3010          * the pathinfo nodes are children of the Client.
3011          */
3012         ndi_devi_enter(ct->ct_dip, &ct_circular);
3013         ndi_devi_enter(ph->ph_dip, &ph_circular);
3014 
3015         i_mdi_phci_add_path(ph, pip);
3016         i_mdi_client_add_path(ct, pip);
3017 
3018         ndi_devi_exit(ph->ph_dip, ph_circular);
3019         ndi_devi_exit(ct->ct_dip, ct_circular);
3020 
3021         return (pip);
3022 }
3023 
3024 /*
3025  * mdi_pi_pathname_by_instance():
3026  *      Lookup of "path" by 'path_instance'. Return "path".
3027  *      NOTE: returned "path" remains valid forever (until reboot).
3028  */
3029 char *
3030 mdi_pi_pathname_by_instance(int path_instance)
3031 {
3032         char            *path;
3033         mod_hash_val_t  hv;
3034 
3035         /* mdi_pathmap lookup of "path" by 'path_instance' */
3036         mutex_enter(&mdi_pathmap_mutex);
3037         if (mod_hash_find(mdi_pathmap_byinstance,
3038             (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3039                 path = (char *)hv;
3040         else
3041                 path = NULL;
3042         mutex_exit(&mdi_pathmap_mutex);
3043         return (path);
3044 }
3045 
3046 /*
3047  * mdi_pi_spathname_by_instance():
3048  *      Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3049  *      NOTE: returned "shortpath" remains valid forever (until reboot).
3050  */
3051 char *
3052 mdi_pi_spathname_by_instance(int path_instance)
3053 {
3054         char            *path;
3055         mod_hash_val_t  hv;
3056 
3057         /* mdi_pathmap lookup of "path" by 'path_instance' */
3058         mutex_enter(&mdi_pathmap_mutex);
3059         if (mod_hash_find(mdi_pathmap_sbyinstance,
3060             (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3061                 path = (char *)hv;
3062         else
3063                 path = NULL;
3064         mutex_exit(&mdi_pathmap_mutex);
3065         return (path);
3066 }
3067 
3068 
3069 /*
3070  * i_mdi_phci_add_path():
3071  *              Add a mdi_pathinfo node to pHCI list.
3072  * Notes:
3073  *              Caller should per-pHCI mutex
3074  */
3075 static void
3076 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3077 {
3078         ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3079 
3080         MDI_PHCI_LOCK(ph);
3081         if (ph->ph_path_head == NULL) {
3082                 ph->ph_path_head = pip;
3083         } else {
3084                 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3085         }
3086         ph->ph_path_tail = pip;
3087         ph->ph_path_count++;
3088         MDI_PHCI_UNLOCK(ph);
3089 }
3090 
3091 /*
3092  * i_mdi_client_add_path():
3093  *              Add mdi_pathinfo node to client list
3094  */
3095 static void
3096 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3097 {
3098         ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3099 
3100         MDI_CLIENT_LOCK(ct);
3101         if (ct->ct_path_head == NULL) {
3102                 ct->ct_path_head = pip;
3103         } else {
3104                 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3105         }
3106         ct->ct_path_tail = pip;
3107         ct->ct_path_count++;
3108         MDI_CLIENT_UNLOCK(ct);
3109 }
3110 
3111 /*
3112  * mdi_pi_free():
3113  *              Free the mdi_pathinfo node and also client device node if this
3114  *              is the last path to the device
3115  * Return Values:
3116  *              MDI_SUCCESS
3117  *              MDI_FAILURE
3118  *              MDI_BUSY
3119  */
3120 /*ARGSUSED*/
3121 int
3122 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3123 {
3124         int             rv;
3125         mdi_vhci_t      *vh;
3126         mdi_phci_t      *ph;
3127         mdi_client_t    *ct;
3128         int             (*f)();
3129         int             client_held = 0;
3130 
3131         MDI_PI_LOCK(pip);
3132         ph = MDI_PI(pip)->pi_phci;
3133         ASSERT(ph != NULL);
3134         if (ph == NULL) {
3135                 /*
3136                  * Invalid pHCI device, return failure
3137                  */
3138                 MDI_DEBUG(1, (MDI_WARN, NULL,
3139                     "!invalid pHCI: pip %s %p",
3140                     mdi_pi_spathname(pip), (void *)pip));
3141                 MDI_PI_UNLOCK(pip);
3142                 return (MDI_FAILURE);
3143         }
3144 
3145         vh = ph->ph_vhci;
3146         ASSERT(vh != NULL);
3147         if (vh == NULL) {
3148                 /* Invalid pHCI device, return failure */
3149                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3150                     "!invalid vHCI: pip %s %p",
3151                     mdi_pi_spathname(pip), (void *)pip));
3152                 MDI_PI_UNLOCK(pip);
3153                 return (MDI_FAILURE);
3154         }
3155 
3156         ct = MDI_PI(pip)->pi_client;
3157         ASSERT(ct != NULL);
3158         if (ct == NULL) {
3159                 /*
3160                  * Invalid Client device, return failure
3161                  */
3162                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3163                     "!invalid client: pip %s %p",
3164                     mdi_pi_spathname(pip), (void *)pip));
3165                 MDI_PI_UNLOCK(pip);
3166                 return (MDI_FAILURE);
3167         }
3168 
3169         /*
3170          * Check to see for busy condition.  A mdi_pathinfo can only be freed
3171          * if the node state is either offline or init and the reference count
3172          * is zero.
3173          */
3174         if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3175             MDI_PI_IS_INITING(pip))) {
3176                 /*
3177                  * Node is busy
3178                  */
3179                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3180                     "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3181                 MDI_PI_UNLOCK(pip);
3182                 return (MDI_BUSY);
3183         }
3184 
3185         while (MDI_PI(pip)->pi_ref_cnt != 0) {
3186                 /*
3187                  * Give a chance for pending I/Os to complete.
3188                  */
3189                 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3190                     "!%d cmds still pending on path: %s %p",
3191                     MDI_PI(pip)->pi_ref_cnt,
3192                     mdi_pi_spathname(pip), (void *)pip));
3193                 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3194                     &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3195                     TR_CLOCK_TICK) == -1) {
3196                         /*
3197                          * The timeout time reached without ref_cnt being zero
3198                          * being signaled.
3199                          */
3200                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3201                             "!Timeout reached on path %s %p without the cond",
3202                             mdi_pi_spathname(pip), (void *)pip));
3203                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3204                             "!%d cmds still pending on path %s %p",
3205                             MDI_PI(pip)->pi_ref_cnt,
3206                             mdi_pi_spathname(pip), (void *)pip));
3207                         MDI_PI_UNLOCK(pip);
3208                         return (MDI_BUSY);
3209                 }
3210         }
3211         if (MDI_PI(pip)->pi_pm_held) {
3212                 client_held = 1;
3213         }
3214         MDI_PI_UNLOCK(pip);
3215 
3216         vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3217 
3218         MDI_CLIENT_LOCK(ct);
3219 
3220         /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3221         MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3222 
3223         /*
3224          * Wait till failover is complete before removing this node.
3225          */
3226         while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3227                 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3228 
3229         MDI_CLIENT_UNLOCK(ct);
3230         MDI_VHCI_CLIENT_LOCK(vh);
3231         MDI_CLIENT_LOCK(ct);
3232         MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3233 
3234         if (!MDI_PI_IS_INITING(pip)) {
3235                 f = vh->vh_ops->vo_pi_uninit;
3236                 if (f != NULL) {
3237                         rv = (*f)(vh->vh_dip, pip, 0);
3238                 }
3239         } else
3240                 rv = MDI_SUCCESS;
3241 
3242         /*
3243          * If vo_pi_uninit() completed successfully.
3244          */
3245         if (rv == MDI_SUCCESS) {
3246                 if (client_held) {
3247                         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3248                             "i_mdi_pm_rele_client\n"));
3249                         i_mdi_pm_rele_client(ct, 1);
3250                 }
3251                 i_mdi_pi_free(ph, pip, ct);
3252                 if (ct->ct_path_count == 0) {
3253                         /*
3254                          * Client lost its last path.
3255                          * Clean up the client device
3256                          */
3257                         MDI_CLIENT_UNLOCK(ct);
3258                         (void) i_mdi_client_free(ct->ct_vhci, ct);
3259                         MDI_VHCI_CLIENT_UNLOCK(vh);
3260                         return (rv);
3261                 }
3262         }
3263         MDI_CLIENT_UNLOCK(ct);
3264         MDI_VHCI_CLIENT_UNLOCK(vh);
3265 
3266         if (rv == MDI_FAILURE)
3267                 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3268 
3269         return (rv);
3270 }
3271 
3272 /*
3273  * i_mdi_pi_free():
3274  *              Free the mdi_pathinfo node
3275  */
3276 static void
3277 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3278 {
3279         int     ct_circular;
3280         int     ph_circular;
3281 
3282         ASSERT(MDI_CLIENT_LOCKED(ct));
3283 
3284         /*
3285          * remove any per-path kstats
3286          */
3287         i_mdi_pi_kstat_destroy(pip);
3288 
3289         /* See comments in i_mdi_pi_alloc() */
3290         ndi_devi_enter(ct->ct_dip, &ct_circular);
3291         ndi_devi_enter(ph->ph_dip, &ph_circular);
3292 
3293         i_mdi_client_remove_path(ct, pip);
3294         i_mdi_phci_remove_path(ph, pip);
3295 
3296         ndi_devi_exit(ph->ph_dip, ph_circular);
3297         ndi_devi_exit(ct->ct_dip, ct_circular);
3298 
3299         mutex_destroy(&MDI_PI(pip)->pi_mutex);
3300         cv_destroy(&MDI_PI(pip)->pi_state_cv);
3301         cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3302         if (MDI_PI(pip)->pi_addr) {
3303                 kmem_free(MDI_PI(pip)->pi_addr,
3304                     strlen(MDI_PI(pip)->pi_addr) + 1);
3305                 MDI_PI(pip)->pi_addr = NULL;
3306         }
3307 
3308         if (MDI_PI(pip)->pi_prop) {
3309                 (void) nvlist_free(MDI_PI(pip)->pi_prop);
3310                 MDI_PI(pip)->pi_prop = NULL;
3311         }
3312         kmem_free(pip, sizeof (struct mdi_pathinfo));
3313 }
3314 
3315 
3316 /*
3317  * i_mdi_phci_remove_path():
3318  *              Remove a mdi_pathinfo node from pHCI list.
3319  * Notes:
3320  *              Caller should hold per-pHCI mutex
3321  */
3322 static void
3323 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3324 {
3325         mdi_pathinfo_t  *prev = NULL;
3326         mdi_pathinfo_t  *path = NULL;
3327 
3328         ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3329 
3330         MDI_PHCI_LOCK(ph);
3331         path = ph->ph_path_head;
3332         while (path != NULL) {
3333                 if (path == pip) {
3334                         break;
3335                 }
3336                 prev = path;
3337                 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3338         }
3339 
3340         if (path) {
3341                 ph->ph_path_count--;
3342                 if (prev) {
3343                         MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3344                 } else {
3345                         ph->ph_path_head =
3346                             (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3347                 }
3348                 if (ph->ph_path_tail == path) {
3349                         ph->ph_path_tail = prev;
3350                 }
3351         }
3352 
3353         /*
3354          * Clear the pHCI link
3355          */
3356         MDI_PI(pip)->pi_phci_link = NULL;
3357         MDI_PI(pip)->pi_phci = NULL;
3358         MDI_PHCI_UNLOCK(ph);
3359 }
3360 
3361 /*
3362  * i_mdi_client_remove_path():
3363  *              Remove a mdi_pathinfo node from client path list.
3364  */
3365 static void
3366 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3367 {
3368         mdi_pathinfo_t  *prev = NULL;
3369         mdi_pathinfo_t  *path;
3370 
3371         ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3372 
3373         ASSERT(MDI_CLIENT_LOCKED(ct));
3374         path = ct->ct_path_head;
3375         while (path != NULL) {
3376                 if (path == pip) {
3377                         break;
3378                 }
3379                 prev = path;
3380                 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3381         }
3382 
3383         if (path) {
3384                 ct->ct_path_count--;
3385                 if (prev) {
3386                         MDI_PI(prev)->pi_client_link =
3387                             MDI_PI(path)->pi_client_link;
3388                 } else {
3389                         ct->ct_path_head =
3390                             (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3391                 }
3392                 if (ct->ct_path_tail == path) {
3393                         ct->ct_path_tail = prev;
3394                 }
3395                 if (ct->ct_path_last == path) {
3396                         ct->ct_path_last = ct->ct_path_head;
3397                 }
3398         }
3399         MDI_PI(pip)->pi_client_link = NULL;
3400         MDI_PI(pip)->pi_client = NULL;
3401 }
3402 
3403 /*
3404  * i_mdi_pi_state_change():
3405  *              online a mdi_pathinfo node
3406  *
3407  * Return Values:
3408  *              MDI_SUCCESS
3409  *              MDI_FAILURE
3410  */
3411 /*ARGSUSED*/
3412 static int
3413 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3414 {
3415         int             rv = MDI_SUCCESS;
3416         mdi_vhci_t      *vh;
3417         mdi_phci_t      *ph;
3418         mdi_client_t    *ct;
3419         int             (*f)();
3420         dev_info_t      *cdip;
3421 
3422         MDI_PI_LOCK(pip);
3423 
3424         ph = MDI_PI(pip)->pi_phci;
3425         ASSERT(ph);
3426         if (ph == NULL) {
3427                 /*
3428                  * Invalid pHCI device, fail the request
3429                  */
3430                 MDI_PI_UNLOCK(pip);
3431                 MDI_DEBUG(1, (MDI_WARN, NULL,
3432                     "!invalid phci: pip %s %p",
3433                     mdi_pi_spathname(pip), (void *)pip));
3434                 return (MDI_FAILURE);
3435         }
3436 
3437         vh = ph->ph_vhci;
3438         ASSERT(vh);
3439         if (vh == NULL) {
3440                 /*
3441                  * Invalid vHCI device, fail the request
3442                  */
3443                 MDI_PI_UNLOCK(pip);
3444                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3445                     "!invalid vhci: pip %s %p",
3446                     mdi_pi_spathname(pip), (void *)pip));
3447                 return (MDI_FAILURE);
3448         }
3449 
3450         ct = MDI_PI(pip)->pi_client;
3451         ASSERT(ct != NULL);
3452         if (ct == NULL) {
3453                 /*
3454                  * Invalid client device, fail the request
3455                  */
3456                 MDI_PI_UNLOCK(pip);
3457                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3458                     "!invalid client: pip %s %p",
3459                     mdi_pi_spathname(pip), (void *)pip));
3460                 return (MDI_FAILURE);
3461         }
3462 
3463         /*
3464          * If this path has not been initialized yet, Callback vHCI driver's
3465          * pathinfo node initialize entry point
3466          */
3467 
3468         if (MDI_PI_IS_INITING(pip)) {
3469                 MDI_PI_UNLOCK(pip);
3470                 f = vh->vh_ops->vo_pi_init;
3471                 if (f != NULL) {
3472                         rv = (*f)(vh->vh_dip, pip, 0);
3473                         if (rv != MDI_SUCCESS) {
3474                                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3475                                     "!vo_pi_init failed: vHCI %p, pip %s %p",
3476                                     (void *)vh, mdi_pi_spathname(pip),
3477                                     (void *)pip));
3478                                 return (MDI_FAILURE);
3479                         }
3480                 }
3481                 MDI_PI_LOCK(pip);
3482                 MDI_PI_CLEAR_TRANSIENT(pip);
3483         }
3484 
3485         /*
3486          * Do not allow state transition when pHCI is in offline/suspended
3487          * states
3488          */
3489         i_mdi_phci_lock(ph, pip);
3490         if (MDI_PHCI_IS_READY(ph) == 0) {
3491                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3492                     "!pHCI not ready, pHCI=%p", (void *)ph));
3493                 MDI_PI_UNLOCK(pip);
3494                 i_mdi_phci_unlock(ph);
3495                 return (MDI_BUSY);
3496         }
3497         MDI_PHCI_UNSTABLE(ph);
3498         i_mdi_phci_unlock(ph);
3499 
3500         /*
3501          * Check if mdi_pathinfo state is in transient state.
3502          * If yes, offlining is in progress and wait till transient state is
3503          * cleared.
3504          */
3505         if (MDI_PI_IS_TRANSIENT(pip)) {
3506                 while (MDI_PI_IS_TRANSIENT(pip)) {
3507                         cv_wait(&MDI_PI(pip)->pi_state_cv,
3508                             &MDI_PI(pip)->pi_mutex);
3509                 }
3510         }
3511 
3512         /*
3513          * Grab the client lock in reverse order sequence and release the
3514          * mdi_pathinfo mutex.
3515          */
3516         i_mdi_client_lock(ct, pip);
3517         MDI_PI_UNLOCK(pip);
3518 
3519         /*
3520          * Wait till failover state is cleared
3521          */
3522         while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3523                 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3524 
3525         /*
3526          * Mark the mdi_pathinfo node state as transient
3527          */
3528         MDI_PI_LOCK(pip);
3529         switch (state) {
3530         case MDI_PATHINFO_STATE_ONLINE:
3531                 MDI_PI_SET_ONLINING(pip);
3532                 break;
3533 
3534         case MDI_PATHINFO_STATE_STANDBY:
3535                 MDI_PI_SET_STANDBYING(pip);
3536                 break;
3537 
3538         case MDI_PATHINFO_STATE_FAULT:
3539                 /*
3540                  * Mark the pathinfo state as FAULTED
3541                  */
3542                 MDI_PI_SET_FAULTING(pip);
3543                 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3544                 break;
3545 
3546         case MDI_PATHINFO_STATE_OFFLINE:
3547                 /*
3548                  * ndi_devi_offline() cannot hold pip or ct locks.
3549                  */
3550                 MDI_PI_UNLOCK(pip);
3551 
3552                 /*
3553                  * If this is a user initiated path online->offline operation
3554                  * who's success would transition a client from DEGRADED to
3555                  * FAILED then only proceed if we can offline the client first.
3556                  */
3557                 cdip = ct->ct_dip;
3558                 if ((flag & NDI_USER_REQ) &&
3559                     MDI_PI_IS_ONLINE(pip) &&
3560                     (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3561                         i_mdi_client_unlock(ct);
3562                         rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3563                         if (rv != NDI_SUCCESS) {
3564                                 /*
3565                                  * Convert to MDI error code
3566                                  */
3567                                 switch (rv) {
3568                                 case NDI_BUSY:
3569                                         rv = MDI_BUSY;
3570                                         break;
3571                                 default:
3572                                         rv = MDI_FAILURE;
3573                                         break;
3574                                 }
3575                                 goto state_change_exit;
3576                         } else {
3577                                 i_mdi_client_lock(ct, NULL);
3578                         }
3579                 }
3580                 /*
3581                  * Mark the mdi_pathinfo node state as transient
3582                  */
3583                 MDI_PI_LOCK(pip);
3584                 MDI_PI_SET_OFFLINING(pip);
3585                 break;
3586         }
3587         MDI_PI_UNLOCK(pip);
3588         MDI_CLIENT_UNSTABLE(ct);
3589         i_mdi_client_unlock(ct);
3590 
3591         f = vh->vh_ops->vo_pi_state_change;
3592         if (f != NULL)
3593                 rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3594 
3595         MDI_CLIENT_LOCK(ct);
3596         MDI_PI_LOCK(pip);
3597         if (rv == MDI_NOT_SUPPORTED) {
3598                 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3599         }
3600         if (rv != MDI_SUCCESS) {
3601                 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3602                     "vo_pi_state_change failed: rv %x", rv));
3603         }
3604         if (MDI_PI_IS_TRANSIENT(pip)) {
3605                 if (rv == MDI_SUCCESS) {
3606                         MDI_PI_CLEAR_TRANSIENT(pip);
3607                 } else {
3608                         MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3609                 }
3610         }
3611 
3612         /*
3613          * Wake anyone waiting for this mdi_pathinfo node
3614          */
3615         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3616         MDI_PI_UNLOCK(pip);
3617 
3618         /*
3619          * Mark the client device as stable
3620          */
3621         MDI_CLIENT_STABLE(ct);
3622         if (rv == MDI_SUCCESS) {
3623                 if (ct->ct_unstable == 0) {
3624                         cdip = ct->ct_dip;
3625 
3626                         /*
3627                          * Onlining the mdi_pathinfo node will impact the
3628                          * client state Update the client and dev_info node
3629                          * state accordingly
3630                          */
3631                         rv = NDI_SUCCESS;
3632                         i_mdi_client_update_state(ct);
3633                         switch (MDI_CLIENT_STATE(ct)) {
3634                         case MDI_CLIENT_STATE_OPTIMAL:
3635                         case MDI_CLIENT_STATE_DEGRADED:
3636                                 if (cdip && !i_ddi_devi_attached(cdip) &&
3637                                     ((state == MDI_PATHINFO_STATE_ONLINE) ||
3638                                     (state == MDI_PATHINFO_STATE_STANDBY))) {
3639 
3640                                         /*
3641                                          * Must do ndi_devi_online() through
3642                                          * hotplug thread for deferred
3643                                          * attach mechanism to work
3644                                          */
3645                                         MDI_CLIENT_UNLOCK(ct);
3646                                         rv = ndi_devi_online(cdip, 0);
3647                                         MDI_CLIENT_LOCK(ct);
3648                                         if ((rv != NDI_SUCCESS) &&
3649                                             (MDI_CLIENT_STATE(ct) ==
3650                                             MDI_CLIENT_STATE_DEGRADED)) {
3651                                                 MDI_DEBUG(1, (MDI_WARN, cdip,
3652                                                     "!ndi_devi_online failed "
3653                                                     "error %x", rv));
3654                                         }
3655                                         rv = NDI_SUCCESS;
3656                                 }
3657                                 break;
3658 
3659                         case MDI_CLIENT_STATE_FAILED:
3660                                 /*
3661                                  * This is the last path case for
3662                                  * non-user initiated events.
3663                                  */
3664                                 if ((flag & NDI_USER_REQ) ||
3665                                     cdip == NULL || i_ddi_node_state(cdip) <
3666                                     DS_INITIALIZED)
3667                                         break;
3668 
3669                                 MDI_CLIENT_UNLOCK(ct);
3670                                 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3671                                 MDI_CLIENT_LOCK(ct);
3672 
3673                                 if (rv != NDI_SUCCESS) {
3674                                         /*
3675                                          * Reset client flags to online as the
3676                                          * path could not be offlined.
3677                                          */
3678                                         MDI_DEBUG(1, (MDI_WARN, cdip,
3679                                             "!ndi_devi_offline failed: %d",
3680                                             rv));
3681                                         MDI_CLIENT_SET_ONLINE(ct);
3682                                 }
3683                                 break;
3684                         }
3685                         /*
3686                          * Convert to MDI error code
3687                          */
3688                         switch (rv) {
3689                         case NDI_SUCCESS:
3690                                 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3691                                 i_mdi_report_path_state(ct, pip);
3692                                 rv = MDI_SUCCESS;
3693                                 break;
3694                         case NDI_BUSY:
3695                                 rv = MDI_BUSY;
3696                                 break;
3697                         default:
3698                                 rv = MDI_FAILURE;
3699                                 break;
3700                         }
3701                 }
3702         }
3703         MDI_CLIENT_UNLOCK(ct);
3704 
3705 state_change_exit:
3706         /*
3707          * Mark the pHCI as stable again.
3708          */
3709         MDI_PHCI_LOCK(ph);
3710         MDI_PHCI_STABLE(ph);
3711         MDI_PHCI_UNLOCK(ph);
3712         return (rv);
3713 }
3714 
3715 /*
3716  * mdi_pi_online():
3717  *              Place the path_info node in the online state.  The path is
3718  *              now available to be selected by mdi_select_path() for
3719  *              transporting I/O requests to client devices.
3720  * Return Values:
3721  *              MDI_SUCCESS
3722  *              MDI_FAILURE
3723  */
3724 int
3725 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3726 {
3727         mdi_client_t    *ct = MDI_PI(pip)->pi_client;
3728         int             client_held = 0;
3729         int             rv;
3730 
3731         ASSERT(ct != NULL);
3732         rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3733         if (rv != MDI_SUCCESS)
3734                 return (rv);
3735 
3736         MDI_PI_LOCK(pip);
3737         if (MDI_PI(pip)->pi_pm_held == 0) {
3738                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3739                     "i_mdi_pm_hold_pip %p", (void *)pip));
3740                 i_mdi_pm_hold_pip(pip);
3741                 client_held = 1;
3742         }
3743         MDI_PI_UNLOCK(pip);
3744 
3745         if (client_held) {
3746                 MDI_CLIENT_LOCK(ct);
3747                 if (ct->ct_power_cnt == 0) {
3748                         rv = i_mdi_power_all_phci(ct);
3749                 }
3750 
3751                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3752                     "i_mdi_pm_hold_client %p", (void *)ct));
3753                 i_mdi_pm_hold_client(ct, 1);
3754                 MDI_CLIENT_UNLOCK(ct);
3755         }
3756 
3757         return (rv);
3758 }
3759 
3760 /*
3761  * mdi_pi_standby():
3762  *              Place the mdi_pathinfo node in standby state
3763  *
3764  * Return Values:
3765  *              MDI_SUCCESS
3766  *              MDI_FAILURE
3767  */
3768 int
3769 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3770 {
3771         return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3772 }
3773 
3774 /*
3775  * mdi_pi_fault():
3776  *              Place the mdi_pathinfo node in fault'ed state
3777  * Return Values:
3778  *              MDI_SUCCESS
3779  *              MDI_FAILURE
3780  */
3781 int
3782 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3783 {
3784         return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3785 }
3786 
3787 /*
3788  * mdi_pi_offline():
3789  *              Offline a mdi_pathinfo node.
3790  * Return Values:
3791  *              MDI_SUCCESS
3792  *              MDI_FAILURE
3793  */
3794 int
3795 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3796 {
3797         int     ret, client_held = 0;
3798         mdi_client_t    *ct;
3799 
3800         /*
3801          * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3802          * used it to mean "user initiated operation" (i.e. devctl). Callers
3803          * should now just use NDI_USER_REQ.
3804          */
3805         if (flags & NDI_DEVI_REMOVE) {
3806                 flags &= ~NDI_DEVI_REMOVE;
3807                 flags |= NDI_USER_REQ;
3808         }
3809 
3810         ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3811 
3812         if (ret == MDI_SUCCESS) {
3813                 MDI_PI_LOCK(pip);
3814                 if (MDI_PI(pip)->pi_pm_held) {
3815                         client_held = 1;
3816                 }
3817                 MDI_PI_UNLOCK(pip);
3818 
3819                 if (client_held) {
3820                         ct = MDI_PI(pip)->pi_client;
3821                         MDI_CLIENT_LOCK(ct);
3822                         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3823                             "i_mdi_pm_rele_client\n"));
3824                         i_mdi_pm_rele_client(ct, 1);
3825                         MDI_CLIENT_UNLOCK(ct);
3826                 }
3827         }
3828 
3829         return (ret);
3830 }
3831 
3832 /*
3833  * i_mdi_pi_offline():
3834  *              Offline a mdi_pathinfo node and call the vHCI driver's callback
3835  */
3836 static int
3837 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3838 {
3839         dev_info_t      *vdip = NULL;
3840         mdi_vhci_t      *vh = NULL;
3841         mdi_client_t    *ct = NULL;
3842         int             (*f)();
3843         int             rv;
3844 
3845         MDI_PI_LOCK(pip);
3846         ct = MDI_PI(pip)->pi_client;
3847         ASSERT(ct != NULL);
3848 
3849         while (MDI_PI(pip)->pi_ref_cnt != 0) {
3850                 /*
3851                  * Give a chance for pending I/Os to complete.
3852                  */
3853                 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3854                     "!%d cmds still pending on path %s %p",
3855                     MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3856                     (void *)pip));
3857                 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3858                     &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3859                     TR_CLOCK_TICK) == -1) {
3860                         /*
3861                          * The timeout time reached without ref_cnt being zero
3862                          * being signaled.
3863                          */
3864                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3865                             "!Timeout reached on path %s %p without the cond",
3866                             mdi_pi_spathname(pip), (void *)pip));
3867                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3868                             "!%d cmds still pending on path %s %p",
3869                             MDI_PI(pip)->pi_ref_cnt,
3870                             mdi_pi_spathname(pip), (void *)pip));
3871                 }
3872         }
3873         vh = ct->ct_vhci;
3874         vdip = vh->vh_dip;
3875 
3876         /*
3877          * Notify vHCI that has registered this event
3878          */
3879         ASSERT(vh->vh_ops);
3880         f = vh->vh_ops->vo_pi_state_change;
3881 
3882         if (f != NULL) {
3883                 MDI_PI_UNLOCK(pip);
3884                 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3885                     flags)) != MDI_SUCCESS) {
3886                         MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3887                             "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3888                             ddi_driver_name(vdip), ddi_get_instance(vdip),
3889                             (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3890                 }
3891                 MDI_PI_LOCK(pip);
3892         }
3893 
3894         /*
3895          * Set the mdi_pathinfo node state and clear the transient condition
3896          */
3897         MDI_PI_SET_OFFLINE(pip);
3898         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3899         MDI_PI_UNLOCK(pip);
3900 
3901         MDI_CLIENT_LOCK(ct);
3902         if (rv == MDI_SUCCESS) {
3903                 if (ct->ct_unstable == 0) {
3904                         dev_info_t      *cdip = ct->ct_dip;
3905 
3906                         /*
3907                          * Onlining the mdi_pathinfo node will impact the
3908                          * client state Update the client and dev_info node
3909                          * state accordingly
3910                          */
3911                         i_mdi_client_update_state(ct);
3912                         rv = NDI_SUCCESS;
3913                         if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3914                                 if (cdip &&
3915                                     (i_ddi_node_state(cdip) >=
3916                                     DS_INITIALIZED)) {
3917                                         MDI_CLIENT_UNLOCK(ct);
3918                                         rv = ndi_devi_offline(cdip,
3919                                             NDI_DEVFS_CLEAN);
3920                                         MDI_CLIENT_LOCK(ct);
3921                                         if (rv != NDI_SUCCESS) {
3922                                                 /*
3923                                                  * ndi_devi_offline failed.
3924                                                  * Reset client flags to
3925                                                  * online.
3926                                                  */
3927                                                 MDI_DEBUG(4, (MDI_WARN, cdip,
3928                                                     "ndi_devi_offline failed: "
3929                                                     "error %x", rv));
3930                                                 MDI_CLIENT_SET_ONLINE(ct);
3931                                         }
3932                                 }
3933                         }
3934                         /*
3935                          * Convert to MDI error code
3936                          */
3937                         switch (rv) {
3938                         case NDI_SUCCESS:
3939                                 rv = MDI_SUCCESS;
3940                                 break;
3941                         case NDI_BUSY:
3942                                 rv = MDI_BUSY;
3943                                 break;
3944                         default:
3945                                 rv = MDI_FAILURE;
3946                                 break;
3947                         }
3948                 }
3949                 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3950                 i_mdi_report_path_state(ct, pip);
3951         }
3952 
3953         MDI_CLIENT_UNLOCK(ct);
3954 
3955         /*
3956          * Change in the mdi_pathinfo node state will impact the client state
3957          */
3958         MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3959             "ct = %p pip = %p", (void *)ct, (void *)pip));
3960         return (rv);
3961 }
3962 
3963 /*
3964  * i_mdi_pi_online():
3965  *              Online a mdi_pathinfo node and call the vHCI driver's callback
3966  */
3967 static int
3968 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3969 {
3970         mdi_vhci_t      *vh = NULL;
3971         mdi_client_t    *ct = NULL;
3972         mdi_phci_t      *ph;
3973         int             (*f)();
3974         int             rv;
3975 
3976         MDI_PI_LOCK(pip);
3977         ph = MDI_PI(pip)->pi_phci;
3978         vh = ph->ph_vhci;
3979         ct = MDI_PI(pip)->pi_client;
3980         MDI_PI_SET_ONLINING(pip)
3981         MDI_PI_UNLOCK(pip);
3982         f = vh->vh_ops->vo_pi_state_change;
3983         if (f != NULL)
3984                 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
3985                     flags);
3986         MDI_CLIENT_LOCK(ct);
3987         MDI_PI_LOCK(pip);
3988         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3989         MDI_PI_UNLOCK(pip);
3990         if (rv == MDI_SUCCESS) {
3991                 dev_info_t      *cdip = ct->ct_dip;
3992 
3993                 rv = MDI_SUCCESS;
3994                 i_mdi_client_update_state(ct);
3995                 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
3996                     MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
3997                         if (cdip && !i_ddi_devi_attached(cdip)) {
3998                                 MDI_CLIENT_UNLOCK(ct);
3999                                 rv = ndi_devi_online(cdip, 0);
4000                                 MDI_CLIENT_LOCK(ct);
4001                                 if ((rv != NDI_SUCCESS) &&
4002                                     (MDI_CLIENT_STATE(ct) ==
4003                                     MDI_CLIENT_STATE_DEGRADED)) {
4004                                         MDI_CLIENT_SET_OFFLINE(ct);
4005                                 }
4006                                 if (rv != NDI_SUCCESS) {
4007                                         /* Reset the path state */
4008                                         MDI_PI_LOCK(pip);
4009                                         MDI_PI(pip)->pi_state =
4010                                             MDI_PI_OLD_STATE(pip);
4011                                         MDI_PI_UNLOCK(pip);
4012                                 }
4013                         }
4014                 }
4015                 switch (rv) {
4016                 case NDI_SUCCESS:
4017                         MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4018                         i_mdi_report_path_state(ct, pip);
4019                         rv = MDI_SUCCESS;
4020                         break;
4021                 case NDI_BUSY:
4022                         rv = MDI_BUSY;
4023                         break;
4024                 default:
4025                         rv = MDI_FAILURE;
4026                         break;
4027                 }
4028         } else {
4029                 /* Reset the path state */
4030                 MDI_PI_LOCK(pip);
4031                 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4032                 MDI_PI_UNLOCK(pip);
4033         }
4034         MDI_CLIENT_UNLOCK(ct);
4035         return (rv);
4036 }
4037 
4038 /*
4039  * mdi_pi_get_node_name():
4040  *              Get the name associated with a mdi_pathinfo node.
4041  *              Since pathinfo nodes are not directly named, we
4042  *              return the node_name of the client.
4043  *
4044  * Return Values:
4045  *              char *
4046  */
4047 char *
4048 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4049 {
4050         mdi_client_t    *ct;
4051 
4052         if (pip == NULL)
4053                 return (NULL);
4054         ct = MDI_PI(pip)->pi_client;
4055         if ((ct == NULL) || (ct->ct_dip == NULL))
4056                 return (NULL);
4057         return (ddi_node_name(ct->ct_dip));
4058 }
4059 
4060 /*
4061  * mdi_pi_get_addr():
4062  *              Get the unit address associated with a mdi_pathinfo node
4063  *
4064  * Return Values:
4065  *              char *
4066  */
4067 char *
4068 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4069 {
4070         if (pip == NULL)
4071                 return (NULL);
4072 
4073         return (MDI_PI(pip)->pi_addr);
4074 }
4075 
4076 /*
4077  * mdi_pi_get_path_instance():
4078  *              Get the 'path_instance' of a mdi_pathinfo node
4079  *
4080  * Return Values:
4081  *              path_instance
4082  */
4083 int
4084 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4085 {
4086         if (pip == NULL)
4087                 return (0);
4088 
4089         return (MDI_PI(pip)->pi_path_instance);
4090 }
4091 
4092 /*
4093  * mdi_pi_pathname():
4094  *              Return pointer to path to pathinfo node.
4095  */
4096 char *
4097 mdi_pi_pathname(mdi_pathinfo_t *pip)
4098 {
4099         if (pip == NULL)
4100                 return (NULL);
4101         return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4102 }
4103 
4104 /*
4105  * mdi_pi_spathname():
4106  *              Return pointer to shortpath to pathinfo node. Used for debug
4107  *              messages, so return "" instead of NULL when unknown.
4108  */
4109 char *
4110 mdi_pi_spathname(mdi_pathinfo_t *pip)
4111 {
4112         char    *spath = "";
4113 
4114         if (pip) {
4115                 spath = mdi_pi_spathname_by_instance(
4116                     mdi_pi_get_path_instance(pip));
4117                 if (spath == NULL)
4118                         spath = "";
4119         }
4120         return (spath);
4121 }
4122 
4123 char *
4124 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4125 {
4126         char *obp_path = NULL;
4127         if ((pip == NULL) || (path == NULL))
4128                 return (NULL);
4129 
4130         if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4131                 (void) strcpy(path, obp_path);
4132                 (void) mdi_prop_free(obp_path);
4133         } else {
4134                 path = NULL;
4135         }
4136         return (path);
4137 }
4138 
4139 int
4140 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4141 {
4142         dev_info_t *pdip;
4143         char *obp_path = NULL;
4144         int rc = MDI_FAILURE;
4145 
4146         if (pip == NULL)
4147                 return (MDI_FAILURE);
4148 
4149         pdip = mdi_pi_get_phci(pip);
4150         if (pdip == NULL)
4151                 return (MDI_FAILURE);
4152 
4153         obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4154 
4155         if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4156                 (void) ddi_pathname(pdip, obp_path);
4157         }
4158 
4159         if (component) {
4160                 (void) strncat(obp_path, "/", MAXPATHLEN);
4161                 (void) strncat(obp_path, component, MAXPATHLEN);
4162         }
4163         rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4164 
4165         if (obp_path)
4166                 kmem_free(obp_path, MAXPATHLEN);
4167         return (rc);
4168 }
4169 
4170 /*
4171  * mdi_pi_get_client():
4172  *              Get the client devinfo associated with a mdi_pathinfo node
4173  *
4174  * Return Values:
4175  *              Handle to client device dev_info node
4176  */
4177 dev_info_t *
4178 mdi_pi_get_client(mdi_pathinfo_t *pip)
4179 {
4180         dev_info_t      *dip = NULL;
4181         if (pip) {
4182                 dip = MDI_PI(pip)->pi_client->ct_dip;
4183         }
4184         return (dip);
4185 }
4186 
4187 /*
4188  * mdi_pi_get_phci():
4189  *              Get the pHCI devinfo associated with the mdi_pathinfo node
4190  * Return Values:
4191  *              Handle to dev_info node
4192  */
4193 dev_info_t *
4194 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4195 {
4196         dev_info_t      *dip = NULL;
4197         mdi_phci_t      *ph;
4198 
4199         if (pip) {
4200                 ph = MDI_PI(pip)->pi_phci;
4201                 if (ph)
4202                         dip = ph->ph_dip;
4203         }
4204         return (dip);
4205 }
4206 
4207 /*
4208  * mdi_pi_get_client_private():
4209  *              Get the client private information associated with the
4210  *              mdi_pathinfo node
4211  */
4212 void *
4213 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4214 {
4215         void *cprivate = NULL;
4216         if (pip) {
4217                 cprivate = MDI_PI(pip)->pi_cprivate;
4218         }
4219         return (cprivate);
4220 }
4221 
4222 /*
4223  * mdi_pi_set_client_private():
4224  *              Set the client private information in the mdi_pathinfo node
4225  */
4226 void
4227 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4228 {
4229         if (pip) {
4230                 MDI_PI(pip)->pi_cprivate = priv;
4231         }
4232 }
4233 
4234 /*
4235  * mdi_pi_get_phci_private():
4236  *              Get the pHCI private information associated with the
4237  *              mdi_pathinfo node
4238  */
4239 caddr_t
4240 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4241 {
4242         caddr_t pprivate = NULL;
4243 
4244         if (pip) {
4245                 pprivate = MDI_PI(pip)->pi_pprivate;
4246         }
4247         return (pprivate);
4248 }
4249 
4250 /*
4251  * mdi_pi_set_phci_private():
4252  *              Set the pHCI private information in the mdi_pathinfo node
4253  */
4254 void
4255 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4256 {
4257         if (pip) {
4258                 MDI_PI(pip)->pi_pprivate = priv;
4259         }
4260 }
4261 
4262 /*
4263  * mdi_pi_get_state():
4264  *              Get the mdi_pathinfo node state. Transient states are internal
4265  *              and not provided to the users
4266  */
4267 mdi_pathinfo_state_t
4268 mdi_pi_get_state(mdi_pathinfo_t *pip)
4269 {
4270         mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4271 
4272         if (pip) {
4273                 if (MDI_PI_IS_TRANSIENT(pip)) {
4274                         /*
4275                          * mdi_pathinfo is in state transition.  Return the
4276                          * last good state.
4277                          */
4278                         state = MDI_PI_OLD_STATE(pip);
4279                 } else {
4280                         state = MDI_PI_STATE(pip);
4281                 }
4282         }
4283         return (state);
4284 }
4285 
4286 /*
4287  * mdi_pi_get_flags():
4288  *              Get the mdi_pathinfo node flags.
4289  */
4290 uint_t
4291 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4292 {
4293         return (pip ? MDI_PI(pip)->pi_flags : 0);
4294 }
4295 
4296 /*
4297  * Note that the following function needs to be the new interface for
4298  * mdi_pi_get_state when mpxio gets integrated to ON.
4299  */
4300 int
4301 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4302                 uint32_t *ext_state)
4303 {
4304         *state = MDI_PATHINFO_STATE_INIT;
4305 
4306         if (pip) {
4307                 if (MDI_PI_IS_TRANSIENT(pip)) {
4308                         /*
4309                          * mdi_pathinfo is in state transition.  Return the
4310                          * last good state.
4311                          */
4312                         *state = MDI_PI_OLD_STATE(pip);
4313                         *ext_state = MDI_PI_OLD_EXT_STATE(pip);
4314                 } else {
4315                         *state = MDI_PI_STATE(pip);
4316                         *ext_state = MDI_PI_EXT_STATE(pip);
4317                 }
4318         }
4319         return (MDI_SUCCESS);
4320 }
4321 
4322 /*
4323  * mdi_pi_get_preferred:
4324  *      Get the preferred path flag
4325  */
4326 int
4327 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4328 {
4329         if (pip) {
4330                 return (MDI_PI(pip)->pi_preferred);
4331         }
4332         return (0);
4333 }
4334 
4335 /*
4336  * mdi_pi_set_preferred:
4337  *      Set the preferred path flag
4338  */
4339 void
4340 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4341 {
4342         if (pip) {
4343                 MDI_PI(pip)->pi_preferred = preferred;
4344         }
4345 }
4346 
4347 /*
4348  * mdi_pi_set_state():
4349  *              Set the mdi_pathinfo node state
4350  */
4351 void
4352 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4353 {
4354         uint32_t        ext_state;
4355 
4356         if (pip) {
4357                 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4358                 MDI_PI(pip)->pi_state = state;
4359                 MDI_PI(pip)->pi_state |= ext_state;
4360 
4361                 /* Path has changed state, invalidate DINFOCACHE snap shot. */
4362                 i_ddi_di_cache_invalidate();
4363         }
4364 }
4365 
4366 /*
4367  * Property functions:
4368  */
4369 int
4370 i_map_nvlist_error_to_mdi(int val)
4371 {
4372         int rv;
4373 
4374         switch (val) {
4375         case 0:
4376                 rv = DDI_PROP_SUCCESS;
4377                 break;
4378         case EINVAL:
4379         case ENOTSUP:
4380                 rv = DDI_PROP_INVAL_ARG;
4381                 break;
4382         case ENOMEM:
4383                 rv = DDI_PROP_NO_MEMORY;
4384                 break;
4385         default:
4386                 rv = DDI_PROP_NOT_FOUND;
4387                 break;
4388         }
4389         return (rv);
4390 }
4391 
4392 /*
4393  * mdi_pi_get_next_prop():
4394  *              Property walk function.  The caller should hold mdi_pi_lock()
4395  *              and release by calling mdi_pi_unlock() at the end of walk to
4396  *              get a consistent value.
4397  */
4398 nvpair_t *
4399 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4400 {
4401         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4402                 return (NULL);
4403         }
4404         ASSERT(MDI_PI_LOCKED(pip));
4405         return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4406 }
4407 
4408 /*
4409  * mdi_prop_remove():
4410  *              Remove the named property from the named list.
4411  */
4412 int
4413 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4414 {
4415         if (pip == NULL) {
4416                 return (DDI_PROP_NOT_FOUND);
4417         }
4418         ASSERT(!MDI_PI_LOCKED(pip));
4419         MDI_PI_LOCK(pip);
4420         if (MDI_PI(pip)->pi_prop == NULL) {
4421                 MDI_PI_UNLOCK(pip);
4422                 return (DDI_PROP_NOT_FOUND);
4423         }
4424         if (name) {
4425                 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4426         } else {
4427                 char            nvp_name[MAXNAMELEN];
4428                 nvpair_t        *nvp;
4429                 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4430                 while (nvp) {
4431                         nvpair_t        *next;
4432                         next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4433                         (void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4434                             nvpair_name(nvp));
4435                         (void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4436                             nvp_name);
4437                         nvp = next;
4438                 }
4439         }
4440         MDI_PI_UNLOCK(pip);
4441         return (DDI_PROP_SUCCESS);
4442 }
4443 
4444 /*
4445  * mdi_prop_size():
4446  *              Get buffer size needed to pack the property data.
4447  *              Caller should hold the mdi_pathinfo_t lock to get a consistent
4448  *              buffer size.
4449  */
4450 int
4451 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4452 {
4453         int     rv;
4454         size_t  bufsize;
4455 
4456         *buflenp = 0;
4457         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4458                 return (DDI_PROP_NOT_FOUND);
4459         }
4460         ASSERT(MDI_PI_LOCKED(pip));
4461         rv = nvlist_size(MDI_PI(pip)->pi_prop,
4462             &bufsize, NV_ENCODE_NATIVE);
4463         *buflenp = bufsize;
4464         return (i_map_nvlist_error_to_mdi(rv));
4465 }
4466 
4467 /*
4468  * mdi_prop_pack():
4469  *              pack the property list.  The caller should hold the
4470  *              mdi_pathinfo_t node to get a consistent data
4471  */
4472 int
4473 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4474 {
4475         int     rv;
4476         size_t  bufsize;
4477 
4478         if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4479                 return (DDI_PROP_NOT_FOUND);
4480         }
4481 
4482         ASSERT(MDI_PI_LOCKED(pip));
4483 
4484         bufsize = buflen;
4485         rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4486             NV_ENCODE_NATIVE, KM_SLEEP);
4487 
4488         return (i_map_nvlist_error_to_mdi(rv));
4489 }
4490 
4491 /*
4492  * mdi_prop_update_byte():
4493  *              Create/Update a byte property
4494  */
4495 int
4496 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4497 {
4498         int rv;
4499 
4500         if (pip == NULL) {
4501                 return (DDI_PROP_INVAL_ARG);
4502         }
4503         ASSERT(!MDI_PI_LOCKED(pip));
4504         MDI_PI_LOCK(pip);
4505         if (MDI_PI(pip)->pi_prop == NULL) {
4506                 MDI_PI_UNLOCK(pip);
4507                 return (DDI_PROP_NOT_FOUND);
4508         }
4509         rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4510         MDI_PI_UNLOCK(pip);
4511         return (i_map_nvlist_error_to_mdi(rv));
4512 }
4513 
4514 /*
4515  * mdi_prop_update_byte_array():
4516  *              Create/Update a byte array property
4517  */
4518 int
4519 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4520     uint_t nelements)
4521 {
4522         int rv;
4523 
4524         if (pip == NULL) {
4525                 return (DDI_PROP_INVAL_ARG);
4526         }
4527         ASSERT(!MDI_PI_LOCKED(pip));
4528         MDI_PI_LOCK(pip);
4529         if (MDI_PI(pip)->pi_prop == NULL) {
4530                 MDI_PI_UNLOCK(pip);
4531                 return (DDI_PROP_NOT_FOUND);
4532         }
4533         rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4534         MDI_PI_UNLOCK(pip);
4535         return (i_map_nvlist_error_to_mdi(rv));
4536 }
4537 
4538 /*
4539  * mdi_prop_update_int():
4540  *              Create/Update a 32 bit integer property
4541  */
4542 int
4543 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4544 {
4545         int rv;
4546 
4547         if (pip == NULL) {
4548                 return (DDI_PROP_INVAL_ARG);
4549         }
4550         ASSERT(!MDI_PI_LOCKED(pip));
4551         MDI_PI_LOCK(pip);
4552         if (MDI_PI(pip)->pi_prop == NULL) {
4553                 MDI_PI_UNLOCK(pip);
4554                 return (DDI_PROP_NOT_FOUND);
4555         }
4556         rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4557         MDI_PI_UNLOCK(pip);
4558         return (i_map_nvlist_error_to_mdi(rv));
4559 }
4560 
4561 /*
4562  * mdi_prop_update_int64():
4563  *              Create/Update a 64 bit integer property
4564  */
4565 int
4566 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4567 {
4568         int rv;
4569 
4570         if (pip == NULL) {
4571                 return (DDI_PROP_INVAL_ARG);
4572         }
4573         ASSERT(!MDI_PI_LOCKED(pip));
4574         MDI_PI_LOCK(pip);
4575         if (MDI_PI(pip)->pi_prop == NULL) {
4576                 MDI_PI_UNLOCK(pip);
4577                 return (DDI_PROP_NOT_FOUND);
4578         }
4579         rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4580         MDI_PI_UNLOCK(pip);
4581         return (i_map_nvlist_error_to_mdi(rv));
4582 }
4583 
4584 /*
4585  * mdi_prop_update_int_array():
4586  *              Create/Update a int array property
4587  */
4588 int
4589 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4590             uint_t nelements)
4591 {
4592         int rv;
4593 
4594         if (pip == NULL) {
4595                 return (DDI_PROP_INVAL_ARG);
4596         }
4597         ASSERT(!MDI_PI_LOCKED(pip));
4598         MDI_PI_LOCK(pip);
4599         if (MDI_PI(pip)->pi_prop == NULL) {
4600                 MDI_PI_UNLOCK(pip);
4601                 return (DDI_PROP_NOT_FOUND);
4602         }
4603         rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4604             nelements);
4605         MDI_PI_UNLOCK(pip);
4606         return (i_map_nvlist_error_to_mdi(rv));
4607 }
4608 
4609 /*
4610  * mdi_prop_update_string():
4611  *              Create/Update a string property
4612  */
4613 int
4614 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4615 {
4616         int rv;
4617 
4618         if (pip == NULL) {
4619                 return (DDI_PROP_INVAL_ARG);
4620         }
4621         ASSERT(!MDI_PI_LOCKED(pip));
4622         MDI_PI_LOCK(pip);
4623         if (MDI_PI(pip)->pi_prop == NULL) {
4624                 MDI_PI_UNLOCK(pip);
4625                 return (DDI_PROP_NOT_FOUND);
4626         }
4627         rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4628         MDI_PI_UNLOCK(pip);
4629         return (i_map_nvlist_error_to_mdi(rv));
4630 }
4631 
4632 /*
4633  * mdi_prop_update_string_array():
4634  *              Create/Update a string array property
4635  */
4636 int
4637 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4638     uint_t nelements)
4639 {
4640         int rv;
4641 
4642         if (pip == NULL) {
4643                 return (DDI_PROP_INVAL_ARG);
4644         }
4645         ASSERT(!MDI_PI_LOCKED(pip));
4646         MDI_PI_LOCK(pip);
4647         if (MDI_PI(pip)->pi_prop == NULL) {
4648                 MDI_PI_UNLOCK(pip);
4649                 return (DDI_PROP_NOT_FOUND);
4650         }
4651         rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4652             nelements);
4653         MDI_PI_UNLOCK(pip);
4654         return (i_map_nvlist_error_to_mdi(rv));
4655 }
4656 
4657 /*
4658  * mdi_prop_lookup_byte():
4659  *              Look for byte property identified by name.  The data returned
4660  *              is the actual property and valid as long as mdi_pathinfo_t node
4661  *              is alive.
4662  */
4663 int
4664 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4665 {
4666         int rv;
4667 
4668         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4669                 return (DDI_PROP_NOT_FOUND);
4670         }
4671         rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4672         return (i_map_nvlist_error_to_mdi(rv));
4673 }
4674 
4675 
4676 /*
4677  * mdi_prop_lookup_byte_array():
4678  *              Look for byte array property identified by name.  The data
4679  *              returned is the actual property and valid as long as
4680  *              mdi_pathinfo_t node is alive.
4681  */
4682 int
4683 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4684     uint_t *nelements)
4685 {
4686         int rv;
4687 
4688         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4689                 return (DDI_PROP_NOT_FOUND);
4690         }
4691         rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4692             nelements);
4693         return (i_map_nvlist_error_to_mdi(rv));
4694 }
4695 
4696 /*
4697  * mdi_prop_lookup_int():
4698  *              Look for int property identified by name.  The data returned
4699  *              is the actual property and valid as long as mdi_pathinfo_t
4700  *              node is alive.
4701  */
4702 int
4703 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4704 {
4705         int rv;
4706 
4707         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4708                 return (DDI_PROP_NOT_FOUND);
4709         }
4710         rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4711         return (i_map_nvlist_error_to_mdi(rv));
4712 }
4713 
4714 /*
4715  * mdi_prop_lookup_int64():
4716  *              Look for int64 property identified by name.  The data returned
4717  *              is the actual property and valid as long as mdi_pathinfo_t node
4718  *              is alive.
4719  */
4720 int
4721 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4722 {
4723         int rv;
4724         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4725                 return (DDI_PROP_NOT_FOUND);
4726         }
4727         rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4728         return (i_map_nvlist_error_to_mdi(rv));
4729 }
4730 
4731 /*
4732  * mdi_prop_lookup_int_array():
4733  *              Look for int array property identified by name.  The data
4734  *              returned is the actual property and valid as long as
4735  *              mdi_pathinfo_t node is alive.
4736  */
4737 int
4738 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4739     uint_t *nelements)
4740 {
4741         int rv;
4742 
4743         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4744                 return (DDI_PROP_NOT_FOUND);
4745         }
4746         rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4747             (int32_t **)data, nelements);
4748         return (i_map_nvlist_error_to_mdi(rv));
4749 }
4750 
4751 /*
4752  * mdi_prop_lookup_string():
4753  *              Look for string property identified by name.  The data
4754  *              returned is the actual property and valid as long as
4755  *              mdi_pathinfo_t node is alive.
4756  */
4757 int
4758 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4759 {
4760         int rv;
4761 
4762         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4763                 return (DDI_PROP_NOT_FOUND);
4764         }
4765         rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4766         return (i_map_nvlist_error_to_mdi(rv));
4767 }
4768 
4769 /*
4770  * mdi_prop_lookup_string_array():
4771  *              Look for string array property identified by name.  The data
4772  *              returned is the actual property and valid as long as
4773  *              mdi_pathinfo_t node is alive.
4774  */
4775 int
4776 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4777     uint_t *nelements)
4778 {
4779         int rv;
4780 
4781         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4782                 return (DDI_PROP_NOT_FOUND);
4783         }
4784         rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4785             nelements);
4786         return (i_map_nvlist_error_to_mdi(rv));
4787 }
4788 
4789 /*
4790  * mdi_prop_free():
4791  *              Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4792  *              functions return the pointer to actual property data and not a
4793  *              copy of it.  So the data returned is valid as long as
4794  *              mdi_pathinfo_t node is valid.
4795  */
4796 /*ARGSUSED*/
4797 int
4798 mdi_prop_free(void *data)
4799 {
4800         return (DDI_PROP_SUCCESS);
4801 }
4802 
4803 /*ARGSUSED*/
4804 static void
4805 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4806 {
4807         char            *ct_path;
4808         char            *ct_status;
4809         char            *status;
4810         dev_info_t      *cdip = ct->ct_dip;
4811         char            lb_buf[64];
4812         int             report_lb_c = 0, report_lb_p = 0;
4813 
4814         ASSERT(MDI_CLIENT_LOCKED(ct));
4815         if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4816             (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4817                 return;
4818         }
4819         if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4820                 ct_status = "optimal";
4821                 report_lb_c = 1;
4822         } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4823                 ct_status = "degraded";
4824         } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4825                 ct_status = "failed";
4826         } else {
4827                 ct_status = "unknown";
4828         }
4829 
4830         lb_buf[0] = 0;          /* not interested in load balancing config */
4831 
4832         if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4833                 status = "removed";
4834         } else if (MDI_PI_IS_OFFLINE(pip)) {
4835                 status = "offline";
4836         } else if (MDI_PI_IS_ONLINE(pip)) {
4837                 status = "online";
4838                 report_lb_p = 1;
4839         } else if (MDI_PI_IS_STANDBY(pip)) {
4840                 status = "standby";
4841         } else if (MDI_PI_IS_FAULT(pip)) {
4842                 status = "faulted";
4843         } else {
4844                 status = "unknown";
4845         }
4846 
4847         if (cdip) {
4848                 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4849 
4850                 /*
4851                  * NOTE: Keeping "multipath status: %s" and
4852                  * "Load balancing: %s" format unchanged in case someone
4853                  * scrubs /var/adm/messages looking for these messages.
4854                  */
4855                 if (report_lb_c && report_lb_p) {
4856                         if (ct->ct_lb == LOAD_BALANCE_LBA) {
4857                                 (void) snprintf(lb_buf, sizeof (lb_buf),
4858                                     "%s, region-size: %d", mdi_load_balance_lba,
4859                                     ct->ct_lb_args->region_size);
4860                         } else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4861                                 (void) snprintf(lb_buf, sizeof (lb_buf),
4862                                     "%s", mdi_load_balance_none);
4863                         } else {
4864                                 (void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4865                                     mdi_load_balance_rr);
4866                         }
4867 
4868                         cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4869                             "?%s (%s%d) multipath status: %s: "
4870                             "path %d %s is %s: Load balancing: %s\n",
4871                             ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4872                             ddi_get_instance(cdip), ct_status,
4873                             mdi_pi_get_path_instance(pip),
4874                             mdi_pi_spathname(pip), status, lb_buf);
4875                 } else {
4876                         cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4877                             "?%s (%s%d) multipath status: %s: "
4878                             "path %d %s is %s\n",
4879                             ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4880                             ddi_get_instance(cdip), ct_status,
4881                             mdi_pi_get_path_instance(pip),
4882                             mdi_pi_spathname(pip), status);
4883                 }
4884 
4885                 kmem_free(ct_path, MAXPATHLEN);
4886                 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4887         }
4888 }
4889 
4890 #ifdef  DEBUG
4891 /*
4892  * i_mdi_log():
4893  *              Utility function for error message management
4894  *
4895  *              NOTE: Implementation takes care of trailing \n for cmn_err,
4896  *              MDI_DEBUG should not terminate fmt strings with \n.
4897  *
4898  *              NOTE: If the level is >= 2, and there is no leading !?^
4899  *              then a leading ! is implied (but can be overriden via
4900  *              mdi_debug_consoleonly). If you are using kmdb on the console,
4901  *              consider setting mdi_debug_consoleonly to 1 as an aid.
4902  */
4903 /*PRINTFLIKE4*/
4904 static void
4905 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4906 {
4907         char            name[MAXNAMELEN];
4908         char            buf[512];
4909         char            *bp;
4910         va_list         ap;
4911         int             log_only = 0;
4912         int             boot_only = 0;
4913         int             console_only = 0;
4914 
4915         if (dip) {
4916                 (void) snprintf(name, sizeof(name), "%s%d: ",
4917                     ddi_driver_name(dip), ddi_get_instance(dip));
4918         } else {
4919                 name[0] = 0;
4920         }
4921 
4922         va_start(ap, fmt);
4923         (void) vsnprintf(buf, sizeof(buf), fmt, ap);
4924         va_end(ap);
4925 
4926         switch (buf[0]) {
4927         case '!':
4928                 bp = &buf[1];
4929                 log_only = 1;
4930                 break;
4931         case '?':
4932                 bp = &buf[1];
4933                 boot_only = 1;
4934                 break;
4935         case '^':
4936                 bp = &buf[1];
4937                 console_only = 1;
4938                 break;
4939         default:
4940                 if (level >= 2)
4941                         log_only = 1;           /* ! implied */
4942                 bp = buf;
4943                 break;
4944         }
4945         if (mdi_debug_logonly) {
4946                 log_only = 1;
4947                 boot_only = 0;
4948                 console_only = 0;
4949         }
4950         if (mdi_debug_consoleonly) {
4951                 log_only = 0;
4952                 boot_only = 0;
4953                 console_only = 1;
4954                 level = CE_NOTE;
4955                 goto console;
4956         }
4957 
4958         switch (level) {
4959         case CE_NOTE:
4960                 level = CE_CONT;
4961                 /* FALLTHROUGH */
4962         case CE_CONT:
4963                 if (boot_only) {
4964                         cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4965                 } else if (console_only) {
4966                         cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4967                 } else if (log_only) {
4968                         cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4969                 } else {
4970                         cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4971                 }
4972                 break;
4973 
4974         case CE_WARN:
4975         case CE_PANIC:
4976         console:
4977                 if (boot_only) {
4978                         cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
4979                 } else if (console_only) {
4980                         cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
4981                 } else if (log_only) {
4982                         cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
4983                 } else {
4984                         cmn_err(level, "mdi: %s%s: %s", name, func, bp);
4985                 }
4986                 break;
4987         default:
4988                 cmn_err(level, "mdi: %s%s", name, bp);
4989                 break;
4990         }
4991 }
4992 #endif  /* DEBUG */
4993 
4994 void
4995 i_mdi_client_online(dev_info_t *ct_dip)
4996 {
4997         mdi_client_t    *ct;
4998 
4999         /*
5000          * Client online notification. Mark client state as online
5001          * restore our binding with dev_info node
5002          */
5003         ct = i_devi_get_client(ct_dip);
5004         ASSERT(ct != NULL);
5005         MDI_CLIENT_LOCK(ct);
5006         MDI_CLIENT_SET_ONLINE(ct);
5007         /* catch for any memory leaks */
5008         ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5009         ct->ct_dip = ct_dip;
5010 
5011         if (ct->ct_power_cnt == 0)
5012                 (void) i_mdi_power_all_phci(ct);
5013 
5014         MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5015             "i_mdi_pm_hold_client %p", (void *)ct));
5016         i_mdi_pm_hold_client(ct, 1);
5017 
5018         MDI_CLIENT_UNLOCK(ct);
5019 }
5020 
5021 void
5022 i_mdi_phci_online(dev_info_t *ph_dip)
5023 {
5024         mdi_phci_t      *ph;
5025 
5026         /* pHCI online notification. Mark state accordingly */
5027         ph = i_devi_get_phci(ph_dip);
5028         ASSERT(ph != NULL);
5029         MDI_PHCI_LOCK(ph);
5030         MDI_PHCI_SET_ONLINE(ph);
5031         MDI_PHCI_UNLOCK(ph);
5032 }
5033 
5034 /*
5035  * mdi_devi_online():
5036  *              Online notification from NDI framework on pHCI/client
5037  *              device online.
5038  * Return Values:
5039  *              NDI_SUCCESS
5040  *              MDI_FAILURE
5041  */
5042 /*ARGSUSED*/
5043 int
5044 mdi_devi_online(dev_info_t *dip, uint_t flags)
5045 {
5046         if (MDI_PHCI(dip)) {
5047                 i_mdi_phci_online(dip);
5048         }
5049 
5050         if (MDI_CLIENT(dip)) {
5051                 i_mdi_client_online(dip);
5052         }
5053         return (NDI_SUCCESS);
5054 }
5055 
5056 /*
5057  * mdi_devi_offline():
5058  *              Offline notification from NDI framework on pHCI/Client device
5059  *              offline.
5060  *
5061  * Return Values:
5062  *              NDI_SUCCESS
5063  *              NDI_FAILURE
5064  */
5065 /*ARGSUSED*/
5066 int
5067 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5068 {
5069         int             rv = NDI_SUCCESS;
5070 
5071         if (MDI_CLIENT(dip)) {
5072                 rv = i_mdi_client_offline(dip, flags);
5073                 if (rv != NDI_SUCCESS)
5074                         return (rv);
5075         }
5076 
5077         if (MDI_PHCI(dip)) {
5078                 rv = i_mdi_phci_offline(dip, flags);
5079 
5080                 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5081                         /* set client back online */
5082                         i_mdi_client_online(dip);
5083                 }
5084         }
5085 
5086         return (rv);
5087 }
5088 
5089 /*ARGSUSED*/
5090 static int
5091 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5092 {
5093         int             rv = NDI_SUCCESS;
5094         mdi_phci_t      *ph;
5095         mdi_client_t    *ct;
5096         mdi_pathinfo_t  *pip;
5097         mdi_pathinfo_t  *next;
5098         mdi_pathinfo_t  *failed_pip = NULL;
5099         dev_info_t      *cdip;
5100 
5101         /*
5102          * pHCI component offline notification
5103          * Make sure that this pHCI instance is free to be offlined.
5104          * If it is OK to proceed, Offline and remove all the child
5105          * mdi_pathinfo nodes.  This process automatically offlines
5106          * corresponding client devices, for which this pHCI provides
5107          * critical services.
5108          */
5109         ph = i_devi_get_phci(dip);
5110         MDI_DEBUG(2, (MDI_NOTE, dip,
5111             "called %p %p", (void *)dip, (void *)ph));
5112         if (ph == NULL) {
5113                 return (rv);
5114         }
5115 
5116         MDI_PHCI_LOCK(ph);
5117 
5118         if (MDI_PHCI_IS_OFFLINE(ph)) {
5119                 MDI_DEBUG(1, (MDI_WARN, dip,
5120                     "!pHCI already offlined: %p", (void *)dip));
5121                 MDI_PHCI_UNLOCK(ph);
5122                 return (NDI_SUCCESS);
5123         }
5124 
5125         /*
5126          * Check to see if the pHCI can be offlined
5127          */
5128         if (ph->ph_unstable) {
5129                 MDI_DEBUG(1, (MDI_WARN, dip,
5130                     "!One or more target devices are in transient state. "
5131                     "This device can not be removed at this moment. "
5132                     "Please try again later."));
5133                 MDI_PHCI_UNLOCK(ph);
5134                 return (NDI_BUSY);
5135         }
5136 
5137         pip = ph->ph_path_head;
5138         while (pip != NULL) {
5139                 MDI_PI_LOCK(pip);
5140                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5141 
5142                 /*
5143                  * The mdi_pathinfo state is OK. Check the client state.
5144                  * If failover in progress fail the pHCI from offlining
5145                  */
5146                 ct = MDI_PI(pip)->pi_client;
5147                 i_mdi_client_lock(ct, pip);
5148                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5149                     (ct->ct_unstable)) {
5150                         /*
5151                          * Failover is in progress, Fail the DR
5152                          */
5153                         MDI_DEBUG(1, (MDI_WARN, dip,
5154                             "!pHCI device is busy. "
5155                             "This device can not be removed at this moment. "
5156                             "Please try again later."));
5157                         MDI_PI_UNLOCK(pip);
5158                         i_mdi_client_unlock(ct);
5159                         MDI_PHCI_UNLOCK(ph);
5160                         return (NDI_BUSY);
5161                 }
5162                 MDI_PI_UNLOCK(pip);
5163 
5164                 /*
5165                  * Check to see of we are removing the last path of this
5166                  * client device...
5167                  */
5168                 cdip = ct->ct_dip;
5169                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5170                     (i_mdi_client_compute_state(ct, ph) ==
5171                     MDI_CLIENT_STATE_FAILED)) {
5172                         i_mdi_client_unlock(ct);
5173                         MDI_PHCI_UNLOCK(ph);
5174                         if (ndi_devi_offline(cdip,
5175                             NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5176                                 /*
5177                                  * ndi_devi_offline() failed.
5178                                  * This pHCI provides the critical path
5179                                  * to one or more client devices.
5180                                  * Return busy.
5181                                  */
5182                                 MDI_PHCI_LOCK(ph);
5183                                 MDI_DEBUG(1, (MDI_WARN, dip,
5184                                     "!pHCI device is busy. "
5185                                     "This device can not be removed at this "
5186                                     "moment. Please try again later."));
5187                                 failed_pip = pip;
5188                                 break;
5189                         } else {
5190                                 MDI_PHCI_LOCK(ph);
5191                                 pip = next;
5192                         }
5193                 } else {
5194                         i_mdi_client_unlock(ct);
5195                         pip = next;
5196                 }
5197         }
5198 
5199         if (failed_pip) {
5200                 pip = ph->ph_path_head;
5201                 while (pip != failed_pip) {
5202                         MDI_PI_LOCK(pip);
5203                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5204                         ct = MDI_PI(pip)->pi_client;
5205                         i_mdi_client_lock(ct, pip);
5206                         cdip = ct->ct_dip;
5207                         switch (MDI_CLIENT_STATE(ct)) {
5208                         case MDI_CLIENT_STATE_OPTIMAL:
5209                         case MDI_CLIENT_STATE_DEGRADED:
5210                                 if (cdip) {
5211                                         MDI_PI_UNLOCK(pip);
5212                                         i_mdi_client_unlock(ct);
5213                                         MDI_PHCI_UNLOCK(ph);
5214                                         (void) ndi_devi_online(cdip, 0);
5215                                         MDI_PHCI_LOCK(ph);
5216                                         pip = next;
5217                                         continue;
5218                                 }
5219                                 break;
5220 
5221                         case MDI_CLIENT_STATE_FAILED:
5222                                 if (cdip) {
5223                                         MDI_PI_UNLOCK(pip);
5224                                         i_mdi_client_unlock(ct);
5225                                         MDI_PHCI_UNLOCK(ph);
5226                                         (void) ndi_devi_offline(cdip,
5227                                                 NDI_DEVFS_CLEAN);
5228                                         MDI_PHCI_LOCK(ph);
5229                                         pip = next;
5230                                         continue;
5231                                 }
5232                                 break;
5233                         }
5234                         MDI_PI_UNLOCK(pip);
5235                         i_mdi_client_unlock(ct);
5236                         pip = next;
5237                 }
5238                 MDI_PHCI_UNLOCK(ph);
5239                 return (NDI_BUSY);
5240         }
5241 
5242         /*
5243          * Mark the pHCI as offline
5244          */
5245         MDI_PHCI_SET_OFFLINE(ph);
5246 
5247         /*
5248          * Mark the child mdi_pathinfo nodes as transient
5249          */
5250         pip = ph->ph_path_head;
5251         while (pip != NULL) {
5252                 MDI_PI_LOCK(pip);
5253                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5254                 MDI_PI_SET_OFFLINING(pip);
5255                 MDI_PI_UNLOCK(pip);
5256                 pip = next;
5257         }
5258         MDI_PHCI_UNLOCK(ph);
5259         /*
5260          * Give a chance for any pending commands to execute
5261          */
5262         delay_random(mdi_delay);
5263         MDI_PHCI_LOCK(ph);
5264         pip = ph->ph_path_head;
5265         while (pip != NULL) {
5266                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5267                 (void) i_mdi_pi_offline(pip, flags);
5268                 MDI_PI_LOCK(pip);
5269                 ct = MDI_PI(pip)->pi_client;
5270                 if (!MDI_PI_IS_OFFLINE(pip)) {
5271                         MDI_DEBUG(1, (MDI_WARN, dip,
5272                             "!pHCI device is busy. "
5273                             "This device can not be removed at this moment. "
5274                             "Please try again later."));
5275                         MDI_PI_UNLOCK(pip);
5276                         MDI_PHCI_SET_ONLINE(ph);
5277                         MDI_PHCI_UNLOCK(ph);
5278                         return (NDI_BUSY);
5279                 }
5280                 MDI_PI_UNLOCK(pip);
5281                 pip = next;
5282         }
5283         MDI_PHCI_UNLOCK(ph);
5284 
5285         return (rv);
5286 }
5287 
5288 void
5289 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5290 {
5291         mdi_phci_t      *ph;
5292         mdi_client_t    *ct;
5293         mdi_pathinfo_t  *pip;
5294         mdi_pathinfo_t  *next;
5295         dev_info_t      *cdip;
5296 
5297         if (!MDI_PHCI(dip))
5298                 return;
5299 
5300         ph = i_devi_get_phci(dip);
5301         if (ph == NULL) {
5302                 return;
5303         }
5304 
5305         MDI_PHCI_LOCK(ph);
5306 
5307         if (MDI_PHCI_IS_OFFLINE(ph)) {
5308                 /* has no last path */
5309                 MDI_PHCI_UNLOCK(ph);
5310                 return;
5311         }
5312 
5313         pip = ph->ph_path_head;
5314         while (pip != NULL) {
5315                 MDI_PI_LOCK(pip);
5316                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5317 
5318                 ct = MDI_PI(pip)->pi_client;
5319                 i_mdi_client_lock(ct, pip);
5320                 MDI_PI_UNLOCK(pip);
5321 
5322                 cdip = ct->ct_dip;
5323                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5324                     (i_mdi_client_compute_state(ct, ph) ==
5325                     MDI_CLIENT_STATE_FAILED)) {
5326                         /* Last path. Mark client dip as retiring */
5327                         i_mdi_client_unlock(ct);
5328                         MDI_PHCI_UNLOCK(ph);
5329                         (void) e_ddi_mark_retiring(cdip, cons_array);
5330                         MDI_PHCI_LOCK(ph);
5331                         pip = next;
5332                 } else {
5333                         i_mdi_client_unlock(ct);
5334                         pip = next;
5335                 }
5336         }
5337 
5338         MDI_PHCI_UNLOCK(ph);
5339 
5340         return;
5341 }
5342 
5343 void
5344 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5345 {
5346         mdi_phci_t      *ph;
5347         mdi_client_t    *ct;
5348         mdi_pathinfo_t  *pip;
5349         mdi_pathinfo_t  *next;
5350         dev_info_t      *cdip;
5351 
5352         if (!MDI_PHCI(dip))
5353                 return;
5354 
5355         ph = i_devi_get_phci(dip);
5356         if (ph == NULL)
5357                 return;
5358 
5359         MDI_PHCI_LOCK(ph);
5360 
5361         if (MDI_PHCI_IS_OFFLINE(ph)) {
5362                 MDI_PHCI_UNLOCK(ph);
5363                 /* not last path */
5364                 return;
5365         }
5366 
5367         if (ph->ph_unstable) {
5368                 MDI_PHCI_UNLOCK(ph);
5369                 /* can't check for constraints */
5370                 *constraint = 0;
5371                 return;
5372         }
5373 
5374         pip = ph->ph_path_head;
5375         while (pip != NULL) {
5376                 MDI_PI_LOCK(pip);
5377                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5378 
5379                 /*
5380                  * The mdi_pathinfo state is OK. Check the client state.
5381                  * If failover in progress fail the pHCI from offlining
5382                  */
5383                 ct = MDI_PI(pip)->pi_client;
5384                 i_mdi_client_lock(ct, pip);
5385                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5386                     (ct->ct_unstable)) {
5387                         /*
5388                          * Failover is in progress, can't check for constraints
5389                          */
5390                         MDI_PI_UNLOCK(pip);
5391                         i_mdi_client_unlock(ct);
5392                         MDI_PHCI_UNLOCK(ph);
5393                         *constraint = 0;
5394                         return;
5395                 }
5396                 MDI_PI_UNLOCK(pip);
5397 
5398                 /*
5399                  * Check to see of we are retiring the last path of this
5400                  * client device...
5401                  */
5402                 cdip = ct->ct_dip;
5403                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5404                     (i_mdi_client_compute_state(ct, ph) ==
5405                     MDI_CLIENT_STATE_FAILED)) {
5406                         i_mdi_client_unlock(ct);
5407                         MDI_PHCI_UNLOCK(ph);
5408                         (void) e_ddi_retire_notify(cdip, constraint);
5409                         MDI_PHCI_LOCK(ph);
5410                         pip = next;
5411                 } else {
5412                         i_mdi_client_unlock(ct);
5413                         pip = next;
5414                 }
5415         }
5416 
5417         MDI_PHCI_UNLOCK(ph);
5418 
5419         return;
5420 }
5421 
5422 /*
5423  * offline the path(s) hanging off the pHCI. If the
5424  * last path to any client, check that constraints
5425  * have been applied.
5426  *
5427  * If constraint is 0, we aren't going to retire the
5428  * pHCI. However we still need to go through the paths
5429  * calling e_ddi_retire_finalize() to clear their
5430  * contract barriers.
5431  */
5432 void
5433 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5434 {
5435         mdi_phci_t      *ph;
5436         mdi_client_t    *ct;
5437         mdi_pathinfo_t  *pip;
5438         mdi_pathinfo_t  *next;
5439         dev_info_t      *cdip;
5440         int             unstable = 0;
5441         int             tmp_constraint;
5442 
5443         if (!MDI_PHCI(dip))
5444                 return;
5445 
5446         ph = i_devi_get_phci(dip);
5447         if (ph == NULL) {
5448                 /* no last path and no pips */
5449                 return;
5450         }
5451 
5452         MDI_PHCI_LOCK(ph);
5453 
5454         if (MDI_PHCI_IS_OFFLINE(ph)) {
5455                 MDI_PHCI_UNLOCK(ph);
5456                 /* no last path and no pips */
5457                 return;
5458         }
5459 
5460         /*
5461          * Check to see if the pHCI can be offlined
5462          */
5463         if (ph->ph_unstable) {
5464                 unstable = 1;
5465         }
5466 
5467         pip = ph->ph_path_head;
5468         while (pip != NULL) {
5469                 MDI_PI_LOCK(pip);
5470                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5471 
5472                 /*
5473                  * if failover in progress fail the pHCI from offlining
5474                  */
5475                 ct = MDI_PI(pip)->pi_client;
5476                 i_mdi_client_lock(ct, pip);
5477                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5478                     (ct->ct_unstable)) {
5479                         unstable = 1;
5480                 }
5481                 MDI_PI_UNLOCK(pip);
5482 
5483                 /*
5484                  * Check to see of we are removing the last path of this
5485                  * client device...
5486                  */
5487                 cdip = ct->ct_dip;
5488                 if (!phci_only && cdip &&
5489                     (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5490                     (i_mdi_client_compute_state(ct, ph) ==
5491                     MDI_CLIENT_STATE_FAILED)) {
5492                         i_mdi_client_unlock(ct);
5493                         MDI_PHCI_UNLOCK(ph);
5494                         /*
5495                          * This is the last path to this client.
5496                          *
5497                          * Constraint will only be set to 1 if this client can
5498                          * be retired (as already determined by
5499                          * mdi_phci_retire_notify). However we don't actually
5500                          * need to retire the client (we just retire the last
5501                          * path - MPXIO will then fail all I/Os to the client).
5502                          * But we still need to call e_ddi_retire_finalize so
5503                          * the contract barriers can be cleared. Therefore we
5504                          * temporarily set constraint = 0 so that the client
5505                          * dip is not retired.
5506                          */
5507                         tmp_constraint = 0;
5508                         (void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5509                         MDI_PHCI_LOCK(ph);
5510                         pip = next;
5511                 } else {
5512                         i_mdi_client_unlock(ct);
5513                         pip = next;
5514                 }
5515         }
5516 
5517         if (!phci_only && *((int *)constraint) == 0) {
5518                 MDI_PHCI_UNLOCK(ph);
5519                 return;
5520         }
5521 
5522         /*
5523          * Cannot offline pip(s)
5524          */
5525         if (unstable) {
5526                 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5527                     "pHCI in transient state, cannot retire",
5528                     ddi_driver_name(dip), ddi_get_instance(dip));
5529                 MDI_PHCI_UNLOCK(ph);
5530                 return;
5531         }
5532 
5533         /*
5534          * Mark the pHCI as offline
5535          */
5536         MDI_PHCI_SET_OFFLINE(ph);
5537 
5538         /*
5539          * Mark the child mdi_pathinfo nodes as transient
5540          */
5541         pip = ph->ph_path_head;
5542         while (pip != NULL) {
5543                 MDI_PI_LOCK(pip);
5544                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5545                 MDI_PI_SET_OFFLINING(pip);
5546                 MDI_PI_UNLOCK(pip);
5547                 pip = next;
5548         }
5549         MDI_PHCI_UNLOCK(ph);
5550         /*
5551          * Give a chance for any pending commands to execute
5552          */
5553         delay_random(mdi_delay);
5554         MDI_PHCI_LOCK(ph);
5555         pip = ph->ph_path_head;
5556         while (pip != NULL) {
5557                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5558                 (void) i_mdi_pi_offline(pip, 0);
5559                 MDI_PI_LOCK(pip);
5560                 ct = MDI_PI(pip)->pi_client;
5561                 if (!MDI_PI_IS_OFFLINE(pip)) {
5562                         cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5563                             "path %d %s busy, cannot offline",
5564                             mdi_pi_get_path_instance(pip),
5565                             mdi_pi_spathname(pip));
5566                         MDI_PI_UNLOCK(pip);
5567                         MDI_PHCI_SET_ONLINE(ph);
5568                         MDI_PHCI_UNLOCK(ph);
5569                         return;
5570                 }
5571                 MDI_PI_UNLOCK(pip);
5572                 pip = next;
5573         }
5574         MDI_PHCI_UNLOCK(ph);
5575 
5576         return;
5577 }
5578 
5579 void
5580 mdi_phci_unretire(dev_info_t *dip)
5581 {
5582         mdi_phci_t      *ph;
5583         mdi_pathinfo_t  *pip;
5584         mdi_pathinfo_t  *next;
5585 
5586         ASSERT(MDI_PHCI(dip));
5587 
5588         /*
5589          * Online the phci
5590          */
5591         i_mdi_phci_online(dip);
5592 
5593         ph = i_devi_get_phci(dip);
5594         MDI_PHCI_LOCK(ph);
5595         pip = ph->ph_path_head;
5596         while (pip != NULL) {
5597                 MDI_PI_LOCK(pip);
5598                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5599                 MDI_PI_UNLOCK(pip);
5600                 (void) i_mdi_pi_online(pip, 0);
5601                 pip = next;
5602         }
5603         MDI_PHCI_UNLOCK(ph);
5604 }
5605 
5606 /*ARGSUSED*/
5607 static int
5608 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5609 {
5610         int             rv = NDI_SUCCESS;
5611         mdi_client_t    *ct;
5612 
5613         /*
5614          * Client component to go offline.  Make sure that we are
5615          * not in failing over state and update client state
5616          * accordingly
5617          */
5618         ct = i_devi_get_client(dip);
5619         MDI_DEBUG(2, (MDI_NOTE, dip,
5620             "called %p %p", (void *)dip, (void *)ct));
5621         if (ct != NULL) {
5622                 MDI_CLIENT_LOCK(ct);
5623                 if (ct->ct_unstable) {
5624                         /*
5625                          * One or more paths are in transient state,
5626                          * Dont allow offline of a client device
5627                          */
5628                         MDI_DEBUG(1, (MDI_WARN, dip,
5629                             "!One or more paths to "
5630                             "this device are in transient state. "
5631                             "This device can not be removed at this moment. "
5632                             "Please try again later."));
5633                         MDI_CLIENT_UNLOCK(ct);
5634                         return (NDI_BUSY);
5635                 }
5636                 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5637                         /*
5638                          * Failover is in progress, Dont allow DR of
5639                          * a client device
5640                          */
5641                         MDI_DEBUG(1, (MDI_WARN, dip,
5642                             "!Client device is Busy. "
5643                             "This device can not be removed at this moment. "
5644                             "Please try again later."));
5645                         MDI_CLIENT_UNLOCK(ct);
5646                         return (NDI_BUSY);
5647                 }
5648                 MDI_CLIENT_SET_OFFLINE(ct);
5649 
5650                 /*
5651                  * Unbind our relationship with the dev_info node
5652                  */
5653                 if (flags & NDI_DEVI_REMOVE) {
5654                         ct->ct_dip = NULL;
5655                 }
5656                 MDI_CLIENT_UNLOCK(ct);
5657         }
5658         return (rv);
5659 }
5660 
5661 /*
5662  * mdi_pre_attach():
5663  *              Pre attach() notification handler
5664  */
5665 /*ARGSUSED*/
5666 int
5667 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5668 {
5669         /* don't support old DDI_PM_RESUME */
5670         if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5671             (cmd == DDI_PM_RESUME))
5672                 return (DDI_FAILURE);
5673 
5674         return (DDI_SUCCESS);
5675 }
5676 
5677 /*
5678  * mdi_post_attach():
5679  *              Post attach() notification handler
5680  */
5681 /*ARGSUSED*/
5682 void
5683 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5684 {
5685         mdi_phci_t      *ph;
5686         mdi_client_t    *ct;
5687         mdi_vhci_t      *vh;
5688 
5689         if (MDI_PHCI(dip)) {
5690                 ph = i_devi_get_phci(dip);
5691                 ASSERT(ph != NULL);
5692 
5693                 MDI_PHCI_LOCK(ph);
5694                 switch (cmd) {
5695                 case DDI_ATTACH:
5696                         MDI_DEBUG(2, (MDI_NOTE, dip,
5697                             "phci post_attach called %p", (void *)ph));
5698                         if (error == DDI_SUCCESS) {
5699                                 MDI_PHCI_SET_ATTACH(ph);
5700                         } else {
5701                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5702                                     "!pHCI post_attach failed: error %d",
5703                                     error));
5704                                 MDI_PHCI_SET_DETACH(ph);
5705                         }
5706                         break;
5707 
5708                 case DDI_RESUME:
5709                         MDI_DEBUG(2, (MDI_NOTE, dip,
5710                             "pHCI post_resume: called %p", (void *)ph));
5711                         if (error == DDI_SUCCESS) {
5712                                 MDI_PHCI_SET_RESUME(ph);
5713                         } else {
5714                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5715                                     "!pHCI post_resume failed: error %d",
5716                                     error));
5717                                 MDI_PHCI_SET_SUSPEND(ph);
5718                         }
5719                         break;
5720                 }
5721                 MDI_PHCI_UNLOCK(ph);
5722         }
5723 
5724         if (MDI_CLIENT(dip)) {
5725                 ct = i_devi_get_client(dip);
5726                 ASSERT(ct != NULL);
5727 
5728                 MDI_CLIENT_LOCK(ct);
5729                 switch (cmd) {
5730                 case DDI_ATTACH:
5731                         MDI_DEBUG(2, (MDI_NOTE, dip,
5732                             "client post_attach called %p", (void *)ct));
5733                         if (error != DDI_SUCCESS) {
5734                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5735                                     "!client post_attach failed: error %d",
5736                                     error));
5737                                 MDI_CLIENT_SET_DETACH(ct);
5738                                 MDI_DEBUG(4, (MDI_WARN, dip,
5739                                     "i_mdi_pm_reset_client"));
5740                                 i_mdi_pm_reset_client(ct);
5741                                 break;
5742                         }
5743 
5744                         /*
5745                          * Client device has successfully attached, inform
5746                          * the vhci.
5747                          */
5748                         vh = ct->ct_vhci;
5749                         if (vh->vh_ops->vo_client_attached)
5750                                 (*vh->vh_ops->vo_client_attached)(dip);
5751 
5752                         MDI_CLIENT_SET_ATTACH(ct);
5753                         break;
5754 
5755                 case DDI_RESUME:
5756                         MDI_DEBUG(2, (MDI_NOTE, dip,
5757                             "client post_attach: called %p", (void *)ct));
5758                         if (error == DDI_SUCCESS) {
5759                                 MDI_CLIENT_SET_RESUME(ct);
5760                         } else {
5761                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5762                                     "!client post_resume failed: error %d",
5763                                     error));
5764                                 MDI_CLIENT_SET_SUSPEND(ct);
5765                         }
5766                         break;
5767                 }
5768                 MDI_CLIENT_UNLOCK(ct);
5769         }
5770 }
5771 
5772 /*
5773  * mdi_pre_detach():
5774  *              Pre detach notification handler
5775  */
5776 /*ARGSUSED*/
5777 int
5778 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5779 {
5780         int rv = DDI_SUCCESS;
5781 
5782         if (MDI_CLIENT(dip)) {
5783                 (void) i_mdi_client_pre_detach(dip, cmd);
5784         }
5785 
5786         if (MDI_PHCI(dip)) {
5787                 rv = i_mdi_phci_pre_detach(dip, cmd);
5788         }
5789 
5790         return (rv);
5791 }
5792 
5793 /*ARGSUSED*/
5794 static int
5795 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5796 {
5797         int             rv = DDI_SUCCESS;
5798         mdi_phci_t      *ph;
5799         mdi_client_t    *ct;
5800         mdi_pathinfo_t  *pip;
5801         mdi_pathinfo_t  *failed_pip = NULL;
5802         mdi_pathinfo_t  *next;
5803 
5804         ph = i_devi_get_phci(dip);
5805         if (ph == NULL) {
5806                 return (rv);
5807         }
5808 
5809         MDI_PHCI_LOCK(ph);
5810         switch (cmd) {
5811         case DDI_DETACH:
5812                 MDI_DEBUG(2, (MDI_NOTE, dip,
5813                     "pHCI pre_detach: called %p", (void *)ph));
5814                 if (!MDI_PHCI_IS_OFFLINE(ph)) {
5815                         /*
5816                          * mdi_pathinfo nodes are still attached to
5817                          * this pHCI. Fail the detach for this pHCI.
5818                          */
5819                         MDI_DEBUG(2, (MDI_WARN, dip,
5820                             "pHCI pre_detach: paths are still attached %p",
5821                             (void *)ph));
5822                         rv = DDI_FAILURE;
5823                         break;
5824                 }
5825                 MDI_PHCI_SET_DETACH(ph);
5826                 break;
5827 
5828         case DDI_SUSPEND:
5829                 /*
5830                  * pHCI is getting suspended.  Since mpxio client
5831                  * devices may not be suspended at this point, to avoid
5832                  * a potential stack overflow, it is important to suspend
5833                  * client devices before pHCI can be suspended.
5834                  */
5835 
5836                 MDI_DEBUG(2, (MDI_NOTE, dip,
5837                     "pHCI pre_suspend: called %p", (void *)ph));
5838                 /*
5839                  * Suspend all the client devices accessible through this pHCI
5840                  */
5841                 pip = ph->ph_path_head;
5842                 while (pip != NULL && rv == DDI_SUCCESS) {
5843                         dev_info_t *cdip;
5844                         MDI_PI_LOCK(pip);
5845                         next =
5846                             (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5847                         ct = MDI_PI(pip)->pi_client;
5848                         i_mdi_client_lock(ct, pip);
5849                         cdip = ct->ct_dip;
5850                         MDI_PI_UNLOCK(pip);
5851                         if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5852                             MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5853                                 i_mdi_client_unlock(ct);
5854                                 if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5855                                     DDI_SUCCESS) {
5856                                         /*
5857                                          * Suspend of one of the client
5858                                          * device has failed.
5859                                          */
5860                                         MDI_DEBUG(1, (MDI_WARN, dip,
5861                                             "!suspend of device (%s%d) failed.",
5862                                             ddi_driver_name(cdip),
5863                                             ddi_get_instance(cdip)));
5864                                         failed_pip = pip;
5865                                         break;
5866                                 }
5867                         } else {
5868                                 i_mdi_client_unlock(ct);
5869                         }
5870                         pip = next;
5871                 }
5872 
5873                 if (rv == DDI_SUCCESS) {
5874                         /*
5875                          * Suspend of client devices is complete. Proceed
5876                          * with pHCI suspend.
5877                          */
5878                         MDI_PHCI_SET_SUSPEND(ph);
5879                 } else {
5880                         /*
5881                          * Revert back all the suspended client device states
5882                          * to converse.
5883                          */
5884                         pip = ph->ph_path_head;
5885                         while (pip != failed_pip) {
5886                                 dev_info_t *cdip;
5887                                 MDI_PI_LOCK(pip);
5888                                 next =
5889                                     (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5890                                 ct = MDI_PI(pip)->pi_client;
5891                                 i_mdi_client_lock(ct, pip);
5892                                 cdip = ct->ct_dip;
5893                                 MDI_PI_UNLOCK(pip);
5894                                 if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5895                                         i_mdi_client_unlock(ct);
5896                                         (void) devi_attach(cdip, DDI_RESUME);
5897                                 } else {
5898                                         i_mdi_client_unlock(ct);
5899                                 }
5900                                 pip = next;
5901                         }
5902                 }
5903                 break;
5904 
5905         default:
5906                 rv = DDI_FAILURE;
5907                 break;
5908         }
5909         MDI_PHCI_UNLOCK(ph);
5910         return (rv);
5911 }
5912 
5913 /*ARGSUSED*/
5914 static int
5915 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5916 {
5917         int             rv = DDI_SUCCESS;
5918         mdi_client_t    *ct;
5919 
5920         ct = i_devi_get_client(dip);
5921         if (ct == NULL) {
5922                 return (rv);
5923         }
5924 
5925         MDI_CLIENT_LOCK(ct);
5926         switch (cmd) {
5927         case DDI_DETACH:
5928                 MDI_DEBUG(2, (MDI_NOTE, dip,
5929                     "client pre_detach: called %p",
5930                      (void *)ct));
5931                 MDI_CLIENT_SET_DETACH(ct);
5932                 break;
5933 
5934         case DDI_SUSPEND:
5935                 MDI_DEBUG(2, (MDI_NOTE, dip,
5936                     "client pre_suspend: called %p",
5937                     (void *)ct));
5938                 MDI_CLIENT_SET_SUSPEND(ct);
5939                 break;
5940 
5941         default:
5942                 rv = DDI_FAILURE;
5943                 break;
5944         }
5945         MDI_CLIENT_UNLOCK(ct);
5946         return (rv);
5947 }
5948 
5949 /*
5950  * mdi_post_detach():
5951  *              Post detach notification handler
5952  */
5953 /*ARGSUSED*/
5954 void
5955 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5956 {
5957         /*
5958          * Detach/Suspend of mpxio component failed. Update our state
5959          * too
5960          */
5961         if (MDI_PHCI(dip))
5962                 i_mdi_phci_post_detach(dip, cmd, error);
5963 
5964         if (MDI_CLIENT(dip))
5965                 i_mdi_client_post_detach(dip, cmd, error);
5966 }
5967 
5968 /*ARGSUSED*/
5969 static void
5970 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5971 {
5972         mdi_phci_t      *ph;
5973 
5974         /*
5975          * Detach/Suspend of phci component failed. Update our state
5976          * too
5977          */
5978         ph = i_devi_get_phci(dip);
5979         if (ph == NULL) {
5980                 return;
5981         }
5982 
5983         MDI_PHCI_LOCK(ph);
5984         /*
5985          * Detach of pHCI failed. Restore back converse
5986          * state
5987          */
5988         switch (cmd) {
5989         case DDI_DETACH:
5990                 MDI_DEBUG(2, (MDI_NOTE, dip,
5991                     "pHCI post_detach: called %p",
5992                     (void *)ph));
5993                 if (error != DDI_SUCCESS)
5994                         MDI_PHCI_SET_ATTACH(ph);
5995                 break;
5996 
5997         case DDI_SUSPEND:
5998                 MDI_DEBUG(2, (MDI_NOTE, dip,
5999                     "pHCI post_suspend: called %p",
6000                     (void *)ph));
6001                 if (error != DDI_SUCCESS)
6002                         MDI_PHCI_SET_RESUME(ph);
6003                 break;
6004         }
6005         MDI_PHCI_UNLOCK(ph);
6006 }
6007 
6008 /*ARGSUSED*/
6009 static void
6010 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6011 {
6012         mdi_client_t    *ct;
6013 
6014         ct = i_devi_get_client(dip);
6015         if (ct == NULL) {
6016                 return;
6017         }
6018         MDI_CLIENT_LOCK(ct);
6019         /*
6020          * Detach of Client failed. Restore back converse
6021          * state
6022          */
6023         switch (cmd) {
6024         case DDI_DETACH:
6025                 MDI_DEBUG(2, (MDI_NOTE, dip,
6026                     "client post_detach: called %p", (void *)ct));
6027                 if (DEVI_IS_ATTACHING(dip)) {
6028                         MDI_DEBUG(4, (MDI_NOTE, dip,
6029                             "i_mdi_pm_rele_client\n"));
6030                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6031                 } else {
6032                         MDI_DEBUG(4, (MDI_NOTE, dip,
6033                             "i_mdi_pm_reset_client\n"));
6034                         i_mdi_pm_reset_client(ct);
6035                 }
6036                 if (error != DDI_SUCCESS)
6037                         MDI_CLIENT_SET_ATTACH(ct);
6038                 break;
6039 
6040         case DDI_SUSPEND:
6041                 MDI_DEBUG(2, (MDI_NOTE, dip,
6042                     "called %p", (void *)ct));
6043                 if (error != DDI_SUCCESS)
6044                         MDI_CLIENT_SET_RESUME(ct);
6045                 break;
6046         }
6047         MDI_CLIENT_UNLOCK(ct);
6048 }
6049 
6050 int
6051 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6052 {
6053         return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6054 }
6055 
6056 /*
6057  * create and install per-path (client - pHCI) statistics
6058  * I/O stats supported: nread, nwritten, reads, and writes
6059  * Error stats - hard errors, soft errors, & transport errors
6060  */
6061 int
6062 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6063 {
6064         kstat_t                 *kiosp, *kerrsp;
6065         struct pi_errs          *nsp;
6066         struct mdi_pi_kstats    *mdi_statp;
6067 
6068         if (MDI_PI(pip)->pi_kstats != NULL)
6069                 return (MDI_SUCCESS);
6070 
6071         if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6072             KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6073                 return (MDI_FAILURE);
6074         }
6075 
6076         (void) strcat(ksname, ",err");
6077         kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
6078             KSTAT_TYPE_NAMED,
6079             sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6080         if (kerrsp == NULL) {
6081                 kstat_delete(kiosp);
6082                 return (MDI_FAILURE);
6083         }
6084 
6085         nsp = (struct pi_errs *)kerrsp->ks_data;
6086         kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6087         kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6088         kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6089             KSTAT_DATA_UINT32);
6090         kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6091             KSTAT_DATA_UINT32);
6092         kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6093             KSTAT_DATA_UINT32);
6094         kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6095             KSTAT_DATA_UINT32);
6096         kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6097             KSTAT_DATA_UINT32);
6098         kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6099             KSTAT_DATA_UINT32);
6100         kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6101             KSTAT_DATA_UINT32);
6102         kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6103 
6104         mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6105         mdi_statp->pi_kstat_ref = 1;
6106         mdi_statp->pi_kstat_iostats = kiosp;
6107         mdi_statp->pi_kstat_errstats = kerrsp;
6108         kstat_install(kiosp);
6109         kstat_install(kerrsp);
6110         MDI_PI(pip)->pi_kstats = mdi_statp;
6111         return (MDI_SUCCESS);
6112 }
6113 
6114 /*
6115  * destroy per-path properties
6116  */
6117 static void
6118 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6119 {
6120 
6121         struct mdi_pi_kstats *mdi_statp;
6122 
6123         if (MDI_PI(pip)->pi_kstats == NULL)
6124                 return;
6125         if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6126                 return;
6127 
6128         MDI_PI(pip)->pi_kstats = NULL;
6129 
6130         /*
6131          * the kstat may be shared between multiple pathinfo nodes
6132          * decrement this pathinfo's usage, removing the kstats
6133          * themselves when the last pathinfo reference is removed.
6134          */
6135         ASSERT(mdi_statp->pi_kstat_ref > 0);
6136         if (--mdi_statp->pi_kstat_ref != 0)
6137                 return;
6138 
6139         kstat_delete(mdi_statp->pi_kstat_iostats);
6140         kstat_delete(mdi_statp->pi_kstat_errstats);
6141         kmem_free(mdi_statp, sizeof (*mdi_statp));
6142 }
6143 
6144 /*
6145  * update I/O paths KSTATS
6146  */
6147 void
6148 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6149 {
6150         kstat_t *iostatp;
6151         size_t xfer_cnt;
6152 
6153         ASSERT(pip != NULL);
6154 
6155         /*
6156          * I/O can be driven across a path prior to having path
6157          * statistics available, i.e. probe(9e).
6158          */
6159         if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6160                 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6161                 xfer_cnt = bp->b_bcount - bp->b_resid;
6162                 if (bp->b_flags & B_READ) {
6163                         KSTAT_IO_PTR(iostatp)->reads++;
6164                         KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6165                 } else {
6166                         KSTAT_IO_PTR(iostatp)->writes++;
6167                         KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6168                 }
6169         }
6170 }
6171 
6172 /*
6173  * Enable the path(specific client/target/initiator)
6174  * Enabling a path means that MPxIO may select the enabled path for routing
6175  * future I/O requests, subject to other path state constraints.
6176  */
6177 int
6178 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6179 {
6180         mdi_phci_t      *ph;
6181 
6182         ph = MDI_PI(pip)->pi_phci;
6183         if (ph == NULL) {
6184                 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6185                     "!failed: path %s %p: NULL ph",
6186                     mdi_pi_spathname(pip), (void *)pip));
6187                 return (MDI_FAILURE);
6188         }
6189 
6190         (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6191                 MDI_ENABLE_OP);
6192         MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6193             "!returning success pip = %p. ph = %p",
6194             (void *)pip, (void *)ph));
6195         return (MDI_SUCCESS);
6196 
6197 }
6198 
6199 /*
6200  * Disable the path (specific client/target/initiator)
6201  * Disabling a path means that MPxIO will not select the disabled path for
6202  * routing any new I/O requests.
6203  */
6204 int
6205 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6206 {
6207         mdi_phci_t      *ph;
6208 
6209         ph = MDI_PI(pip)->pi_phci;
6210         if (ph == NULL) {
6211                 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6212                     "!failed: path %s %p: NULL ph",
6213                     mdi_pi_spathname(pip), (void *)pip));
6214                 return (MDI_FAILURE);
6215         }
6216 
6217         (void) i_mdi_enable_disable_path(pip,
6218             ph->ph_vhci, flags, MDI_DISABLE_OP);
6219         MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6220             "!returning success pip = %p. ph = %p",
6221             (void *)pip, (void *)ph));
6222         return (MDI_SUCCESS);
6223 }
6224 
6225 /*
6226  * disable the path to a particular pHCI (pHCI specified in the phci_path
6227  * argument) for a particular client (specified in the client_path argument).
6228  * Disabling a path means that MPxIO will not select the disabled path for
6229  * routing any new I/O requests.
6230  * NOTE: this will be removed once the NWS files are changed to use the new
6231  * mdi_{enable,disable}_path interfaces
6232  */
6233 int
6234 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6235 {
6236         return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6237 }
6238 
6239 /*
6240  * Enable the path to a particular pHCI (pHCI specified in the phci_path
6241  * argument) for a particular client (specified in the client_path argument).
6242  * Enabling a path means that MPxIO may select the enabled path for routing
6243  * future I/O requests, subject to other path state constraints.
6244  * NOTE: this will be removed once the NWS files are changed to use the new
6245  * mdi_{enable,disable}_path interfaces
6246  */
6247 
6248 int
6249 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6250 {
6251         return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6252 }
6253 
6254 /*
6255  * Common routine for doing enable/disable.
6256  */
6257 static mdi_pathinfo_t *
6258 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6259                 int op)
6260 {
6261         int             sync_flag = 0;
6262         int             rv;
6263         mdi_pathinfo_t  *next;
6264         int             (*f)() = NULL;
6265 
6266         /*
6267          * Check to make sure the path is not already in the
6268          * requested state. If it is just return the next path
6269          * as we have nothing to do here.
6270          */
6271         if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6272             (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6273                 MDI_PI_LOCK(pip);
6274                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6275                 MDI_PI_UNLOCK(pip);
6276                 return (next);
6277         }
6278 
6279         f = vh->vh_ops->vo_pi_state_change;
6280 
6281         sync_flag = (flags << 8) & 0xf00;
6282 
6283         /*
6284          * Do a callback into the mdi consumer to let it
6285          * know that path is about to get enabled/disabled.
6286          */
6287         if (f != NULL) {
6288                 rv = (*f)(vh->vh_dip, pip, 0,
6289                         MDI_PI_EXT_STATE(pip),
6290                         MDI_EXT_STATE_CHANGE | sync_flag |
6291                         op | MDI_BEFORE_STATE_CHANGE);
6292                 if (rv != MDI_SUCCESS) {
6293                         MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6294                             "vo_pi_state_change: failed rv = %x", rv));
6295                 }
6296         }
6297         MDI_PI_LOCK(pip);
6298         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6299 
6300         switch (flags) {
6301                 case USER_DISABLE:
6302                         if (op == MDI_DISABLE_OP) {
6303                                 MDI_PI_SET_USER_DISABLE(pip);
6304                         } else {
6305                                 MDI_PI_SET_USER_ENABLE(pip);
6306                         }
6307                         break;
6308                 case DRIVER_DISABLE:
6309                         if (op == MDI_DISABLE_OP) {
6310                                 MDI_PI_SET_DRV_DISABLE(pip);
6311                         } else {
6312                                 MDI_PI_SET_DRV_ENABLE(pip);
6313                         }
6314                         break;
6315                 case DRIVER_DISABLE_TRANSIENT:
6316                         if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6317                                 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6318                         } else {
6319                                 MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6320                         }
6321                         break;
6322         }
6323         MDI_PI_UNLOCK(pip);
6324         /*
6325          * Do a callback into the mdi consumer to let it
6326          * know that path is now enabled/disabled.
6327          */
6328         if (f != NULL) {
6329                 rv = (*f)(vh->vh_dip, pip, 0,
6330                         MDI_PI_EXT_STATE(pip),
6331                         MDI_EXT_STATE_CHANGE | sync_flag |
6332                         op | MDI_AFTER_STATE_CHANGE);
6333                 if (rv != MDI_SUCCESS) {
6334                         MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6335                             "vo_pi_state_change failed: rv = %x", rv));
6336                 }
6337         }
6338         return (next);
6339 }
6340 
6341 /*
6342  * Common routine for doing enable/disable.
6343  * NOTE: this will be removed once the NWS files are changed to use the new
6344  * mdi_{enable,disable}_path has been putback
6345  */
6346 int
6347 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6348 {
6349 
6350         mdi_phci_t      *ph;
6351         mdi_vhci_t      *vh = NULL;
6352         mdi_client_t    *ct;
6353         mdi_pathinfo_t  *next, *pip;
6354         int             found_it;
6355 
6356         ph = i_devi_get_phci(pdip);
6357         MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6358             "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6359             (void *)cdip));
6360         if (ph == NULL) {
6361                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6362                     "!failed: operation %d: NULL ph", op));
6363                 return (MDI_FAILURE);
6364         }
6365 
6366         if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6367                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6368                     "!failed: invalid operation %d", op));
6369                 return (MDI_FAILURE);
6370         }
6371 
6372         vh = ph->ph_vhci;
6373 
6374         if (cdip == NULL) {
6375                 /*
6376                  * Need to mark the Phci as enabled/disabled.
6377                  */
6378                 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6379                     "op %d for the phci", op));
6380                 MDI_PHCI_LOCK(ph);
6381                 switch (flags) {
6382                         case USER_DISABLE:
6383                                 if (op == MDI_DISABLE_OP) {
6384                                         MDI_PHCI_SET_USER_DISABLE(ph);
6385                                 } else {
6386                                         MDI_PHCI_SET_USER_ENABLE(ph);
6387                                 }
6388                                 break;
6389                         case DRIVER_DISABLE:
6390                                 if (op == MDI_DISABLE_OP) {
6391                                         MDI_PHCI_SET_DRV_DISABLE(ph);
6392                                 } else {
6393                                         MDI_PHCI_SET_DRV_ENABLE(ph);
6394                                 }
6395                                 break;
6396                         case DRIVER_DISABLE_TRANSIENT:
6397                                 if (op == MDI_DISABLE_OP) {
6398                                         MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6399                                 } else {
6400                                         MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6401                                 }
6402                                 break;
6403                         default:
6404                                 MDI_PHCI_UNLOCK(ph);
6405                                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6406                                     "!invalid flag argument= %d", flags));
6407                 }
6408 
6409                 /*
6410                  * Phci has been disabled. Now try to enable/disable
6411                  * path info's to each client.
6412                  */
6413                 pip = ph->ph_path_head;
6414                 while (pip != NULL) {
6415                         pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6416                 }
6417                 MDI_PHCI_UNLOCK(ph);
6418         } else {
6419 
6420                 /*
6421                  * Disable a specific client.
6422                  */
6423                 ct = i_devi_get_client(cdip);
6424                 if (ct == NULL) {
6425                         MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6426                             "!failed: operation = %d: NULL ct", op));
6427                         return (MDI_FAILURE);
6428                 }
6429 
6430                 MDI_CLIENT_LOCK(ct);
6431                 pip = ct->ct_path_head;
6432                 found_it = 0;
6433                 while (pip != NULL) {
6434                         MDI_PI_LOCK(pip);
6435                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6436                         if (MDI_PI(pip)->pi_phci == ph) {
6437                                 MDI_PI_UNLOCK(pip);
6438                                 found_it = 1;
6439                                 break;
6440                         }
6441                         MDI_PI_UNLOCK(pip);
6442                         pip = next;
6443                 }
6444 
6445 
6446                 MDI_CLIENT_UNLOCK(ct);
6447                 if (found_it == 0) {
6448                         MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6449                             "!failed. Could not find corresponding pip\n"));
6450                         return (MDI_FAILURE);
6451                 }
6452 
6453                 (void) i_mdi_enable_disable_path(pip, vh, flags, op);
6454         }
6455 
6456         MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6457             "!op %d returning success pdip = %p cdip = %p",
6458             op, (void *)pdip, (void *)cdip));
6459         return (MDI_SUCCESS);
6460 }
6461 
6462 /*
6463  * Ensure phci powered up
6464  */
6465 static void
6466 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6467 {
6468         dev_info_t      *ph_dip;
6469 
6470         ASSERT(pip != NULL);
6471         ASSERT(MDI_PI_LOCKED(pip));
6472 
6473         if (MDI_PI(pip)->pi_pm_held) {
6474                 return;
6475         }
6476 
6477         ph_dip = mdi_pi_get_phci(pip);
6478         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6479             "%s %p", mdi_pi_spathname(pip), (void *)pip));
6480         if (ph_dip == NULL) {
6481                 return;
6482         }
6483 
6484         MDI_PI_UNLOCK(pip);
6485         MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6486             DEVI(ph_dip)->devi_pm_kidsupcnt));
6487         pm_hold_power(ph_dip);
6488         MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6489             DEVI(ph_dip)->devi_pm_kidsupcnt));
6490         MDI_PI_LOCK(pip);
6491 
6492         /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6493         if (DEVI(ph_dip)->devi_pm_info)
6494                 MDI_PI(pip)->pi_pm_held = 1;
6495 }
6496 
6497 /*
6498  * Allow phci powered down
6499  */
6500 static void
6501 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6502 {
6503         dev_info_t      *ph_dip = NULL;
6504 
6505         ASSERT(pip != NULL);
6506         ASSERT(MDI_PI_LOCKED(pip));
6507 
6508         if (MDI_PI(pip)->pi_pm_held == 0) {
6509                 return;
6510         }
6511 
6512         ph_dip = mdi_pi_get_phci(pip);
6513         ASSERT(ph_dip != NULL);
6514 
6515         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6516             "%s %p", mdi_pi_spathname(pip), (void *)pip));
6517 
6518         MDI_PI_UNLOCK(pip);
6519         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6520             "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6521         pm_rele_power(ph_dip);
6522         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6523             "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6524         MDI_PI_LOCK(pip);
6525 
6526         MDI_PI(pip)->pi_pm_held = 0;
6527 }
6528 
6529 static void
6530 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6531 {
6532         ASSERT(MDI_CLIENT_LOCKED(ct));
6533 
6534         ct->ct_power_cnt += incr;
6535         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6536             "%p ct_power_cnt = %d incr = %d",
6537             (void *)ct, ct->ct_power_cnt, incr));
6538         ASSERT(ct->ct_power_cnt >= 0);
6539 }
6540 
6541 static void
6542 i_mdi_rele_all_phci(mdi_client_t *ct)
6543 {
6544         mdi_pathinfo_t  *pip;
6545 
6546         ASSERT(MDI_CLIENT_LOCKED(ct));
6547         pip = (mdi_pathinfo_t *)ct->ct_path_head;
6548         while (pip != NULL) {
6549                 mdi_hold_path(pip);
6550                 MDI_PI_LOCK(pip);
6551                 i_mdi_pm_rele_pip(pip);
6552                 MDI_PI_UNLOCK(pip);
6553                 mdi_rele_path(pip);
6554                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6555         }
6556 }
6557 
6558 static void
6559 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6560 {
6561         ASSERT(MDI_CLIENT_LOCKED(ct));
6562 
6563         if (i_ddi_devi_attached(ct->ct_dip)) {
6564                 ct->ct_power_cnt -= decr;
6565                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6566                     "%p ct_power_cnt = %d decr = %d",
6567                     (void *)ct, ct->ct_power_cnt, decr));
6568         }
6569 
6570         ASSERT(ct->ct_power_cnt >= 0);
6571         if (ct->ct_power_cnt == 0) {
6572                 i_mdi_rele_all_phci(ct);
6573                 return;
6574         }
6575 }
6576 
6577 static void
6578 i_mdi_pm_reset_client(mdi_client_t *ct)
6579 {
6580         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6581             "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6582         ASSERT(MDI_CLIENT_LOCKED(ct));
6583         ct->ct_power_cnt = 0;
6584         i_mdi_rele_all_phci(ct);
6585         ct->ct_powercnt_config = 0;
6586         ct->ct_powercnt_unconfig = 0;
6587         ct->ct_powercnt_reset = 1;
6588 }
6589 
6590 static int
6591 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6592 {
6593         int             ret;
6594         dev_info_t      *ph_dip;
6595 
6596         MDI_PI_LOCK(pip);
6597         i_mdi_pm_hold_pip(pip);
6598 
6599         ph_dip = mdi_pi_get_phci(pip);
6600         MDI_PI_UNLOCK(pip);
6601 
6602         /* bring all components of phci to full power */
6603         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6604             "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6605             ddi_get_instance(ph_dip), (void *)pip));
6606 
6607         ret = pm_powerup(ph_dip);
6608 
6609         if (ret == DDI_FAILURE) {
6610                 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6611                     "pm_powerup FAILED for %s%d %p",
6612                     ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6613                     (void *)pip));
6614 
6615                 MDI_PI_LOCK(pip);
6616                 i_mdi_pm_rele_pip(pip);
6617                 MDI_PI_UNLOCK(pip);
6618                 return (MDI_FAILURE);
6619         }
6620 
6621         return (MDI_SUCCESS);
6622 }
6623 
6624 static int
6625 i_mdi_power_all_phci(mdi_client_t *ct)
6626 {
6627         mdi_pathinfo_t  *pip;
6628         int             succeeded = 0;
6629 
6630         ASSERT(MDI_CLIENT_LOCKED(ct));
6631         pip = (mdi_pathinfo_t *)ct->ct_path_head;
6632         while (pip != NULL) {
6633                 /*
6634                  * Don't power if MDI_PATHINFO_STATE_FAULT
6635                  * or MDI_PATHINFO_STATE_OFFLINE.
6636                  */
6637                 if (MDI_PI_IS_INIT(pip) ||
6638                     MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6639                         mdi_hold_path(pip);
6640                         MDI_CLIENT_UNLOCK(ct);
6641                         if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6642                                 succeeded = 1;
6643 
6644                         ASSERT(ct == MDI_PI(pip)->pi_client);
6645                         MDI_CLIENT_LOCK(ct);
6646                         mdi_rele_path(pip);
6647                 }
6648                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6649         }
6650 
6651         return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6652 }
6653 
6654 /*
6655  * mdi_bus_power():
6656  *              1. Place the phci(s) into powered up state so that
6657  *                 client can do power management
6658  *              2. Ensure phci powered up as client power managing
6659  * Return Values:
6660  *              MDI_SUCCESS
6661  *              MDI_FAILURE
6662  */
6663 int
6664 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6665     void *arg, void *result)
6666 {
6667         int                     ret = MDI_SUCCESS;
6668         pm_bp_child_pwrchg_t    *bpc;
6669         mdi_client_t            *ct;
6670         dev_info_t              *cdip;
6671         pm_bp_has_changed_t     *bphc;
6672 
6673         /*
6674          * BUS_POWER_NOINVOL not supported
6675          */
6676         if (op == BUS_POWER_NOINVOL)
6677                 return (MDI_FAILURE);
6678 
6679         /*
6680          * ignore other OPs.
6681          * return quickly to save cou cycles on the ct processing
6682          */
6683         switch (op) {
6684         case BUS_POWER_PRE_NOTIFICATION:
6685         case BUS_POWER_POST_NOTIFICATION:
6686                 bpc = (pm_bp_child_pwrchg_t *)arg;
6687                 cdip = bpc->bpc_dip;
6688                 break;
6689         case BUS_POWER_HAS_CHANGED:
6690                 bphc = (pm_bp_has_changed_t *)arg;
6691                 cdip = bphc->bphc_dip;
6692                 break;
6693         default:
6694                 return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6695         }
6696 
6697         ASSERT(MDI_CLIENT(cdip));
6698 
6699         ct = i_devi_get_client(cdip);
6700         if (ct == NULL)
6701                 return (MDI_FAILURE);
6702 
6703         /*
6704          * wait till the mdi_pathinfo node state change are processed
6705          */
6706         MDI_CLIENT_LOCK(ct);
6707         switch (op) {
6708         case BUS_POWER_PRE_NOTIFICATION:
6709                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6710                     "BUS_POWER_PRE_NOTIFICATION:"
6711                     "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6712                     ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6713                     bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6714 
6715                 /* serialize power level change per client */
6716                 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6717                         cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6718 
6719                 MDI_CLIENT_SET_POWER_TRANSITION(ct);
6720 
6721                 if (ct->ct_power_cnt == 0) {
6722                         ret = i_mdi_power_all_phci(ct);
6723                 }
6724 
6725                 /*
6726                  * if new_level > 0:
6727                  *      - hold phci(s)
6728                  *      - power up phci(s) if not already
6729                  * ignore power down
6730                  */
6731                 if (bpc->bpc_nlevel > 0) {
6732                         if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6733                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6734                                     "i_mdi_pm_hold_client\n"));
6735                                 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6736                         }
6737                 }
6738                 break;
6739         case BUS_POWER_POST_NOTIFICATION:
6740                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6741                     "BUS_POWER_POST_NOTIFICATION:"
6742                     "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6743                     ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6744                     bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6745                     *(int *)result));
6746 
6747                 if (*(int *)result == DDI_SUCCESS) {
6748                         if (bpc->bpc_nlevel > 0) {
6749                                 MDI_CLIENT_SET_POWER_UP(ct);
6750                         } else {
6751                                 MDI_CLIENT_SET_POWER_DOWN(ct);
6752                         }
6753                 }
6754 
6755                 /* release the hold we did in pre-notification */
6756                 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6757                     !DEVI_IS_ATTACHING(ct->ct_dip)) {
6758                         MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6759                             "i_mdi_pm_rele_client\n"));
6760                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6761                 }
6762 
6763                 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6764                         /* another thread might started attaching */
6765                         if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6766                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6767                                     "i_mdi_pm_rele_client\n"));
6768                                 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6769                         /* detaching has been taken care in pm_post_unconfig */
6770                         } else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6771                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6772                                     "i_mdi_pm_reset_client\n"));
6773                                 i_mdi_pm_reset_client(ct);
6774                         }
6775                 }
6776 
6777                 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6778                 cv_broadcast(&ct->ct_powerchange_cv);
6779 
6780                 break;
6781 
6782         /* need to do more */
6783         case BUS_POWER_HAS_CHANGED:
6784                 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6785                     "BUS_POWER_HAS_CHANGED:"
6786                     "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6787                     ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6788                     bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6789 
6790                 if (bphc->bphc_nlevel > 0 &&
6791                     bphc->bphc_nlevel > bphc->bphc_olevel) {
6792                         if (ct->ct_power_cnt == 0) {
6793                                 ret = i_mdi_power_all_phci(ct);
6794                         }
6795                         MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6796                             "i_mdi_pm_hold_client\n"));
6797                         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6798                 }
6799 
6800                 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6801                         MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6802                             "i_mdi_pm_rele_client\n"));
6803                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6804                 }
6805                 break;
6806         }
6807 
6808         MDI_CLIENT_UNLOCK(ct);
6809         return (ret);
6810 }
6811 
6812 static int
6813 i_mdi_pm_pre_config_one(dev_info_t *child)
6814 {
6815         int             ret = MDI_SUCCESS;
6816         mdi_client_t    *ct;
6817 
6818         ct = i_devi_get_client(child);
6819         if (ct == NULL)
6820                 return (MDI_FAILURE);
6821 
6822         MDI_CLIENT_LOCK(ct);
6823         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6824                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6825 
6826         if (!MDI_CLIENT_IS_FAILED(ct)) {
6827                 MDI_CLIENT_UNLOCK(ct);
6828                 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6829                 return (MDI_SUCCESS);
6830         }
6831 
6832         if (ct->ct_powercnt_config) {
6833                 MDI_CLIENT_UNLOCK(ct);
6834                 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6835                 return (MDI_SUCCESS);
6836         }
6837 
6838         if (ct->ct_power_cnt == 0) {
6839                 ret = i_mdi_power_all_phci(ct);
6840         }
6841         MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6842         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6843         ct->ct_powercnt_config = 1;
6844         ct->ct_powercnt_reset = 0;
6845         MDI_CLIENT_UNLOCK(ct);
6846         return (ret);
6847 }
6848 
6849 static int
6850 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6851 {
6852         int                     ret = MDI_SUCCESS;
6853         dev_info_t              *cdip;
6854         int                     circ;
6855 
6856         ASSERT(MDI_VHCI(vdip));
6857 
6858         /* ndi_devi_config_one */
6859         if (child) {
6860                 ASSERT(DEVI_BUSY_OWNED(vdip));
6861                 return (i_mdi_pm_pre_config_one(child));
6862         }
6863 
6864         /* devi_config_common */
6865         ndi_devi_enter(vdip, &circ);
6866         cdip = ddi_get_child(vdip);
6867         while (cdip) {
6868                 dev_info_t *next = ddi_get_next_sibling(cdip);
6869 
6870                 ret = i_mdi_pm_pre_config_one(cdip);
6871                 if (ret != MDI_SUCCESS)
6872                         break;
6873                 cdip = next;
6874         }
6875         ndi_devi_exit(vdip, circ);
6876         return (ret);
6877 }
6878 
6879 static int
6880 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6881 {
6882         int             ret = MDI_SUCCESS;
6883         mdi_client_t    *ct;
6884 
6885         ct = i_devi_get_client(child);
6886         if (ct == NULL)
6887                 return (MDI_FAILURE);
6888 
6889         MDI_CLIENT_LOCK(ct);
6890         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6891                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6892 
6893         if (!i_ddi_devi_attached(child)) {
6894                 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6895                 MDI_CLIENT_UNLOCK(ct);
6896                 return (MDI_SUCCESS);
6897         }
6898 
6899         if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6900             (flags & NDI_AUTODETACH)) {
6901                 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6902                 MDI_CLIENT_UNLOCK(ct);
6903                 return (MDI_FAILURE);
6904         }
6905 
6906         if (ct->ct_powercnt_unconfig) {
6907                 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6908                 MDI_CLIENT_UNLOCK(ct);
6909                 *held = 1;
6910                 return (MDI_SUCCESS);
6911         }
6912 
6913         if (ct->ct_power_cnt == 0) {
6914                 ret = i_mdi_power_all_phci(ct);
6915         }
6916         MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6917         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6918         ct->ct_powercnt_unconfig = 1;
6919         ct->ct_powercnt_reset = 0;
6920         MDI_CLIENT_UNLOCK(ct);
6921         if (ret == MDI_SUCCESS)
6922                 *held = 1;
6923         return (ret);
6924 }
6925 
6926 static int
6927 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6928     int flags)
6929 {
6930         int                     ret = MDI_SUCCESS;
6931         dev_info_t              *cdip;
6932         int                     circ;
6933 
6934         ASSERT(MDI_VHCI(vdip));
6935         *held = 0;
6936 
6937         /* ndi_devi_unconfig_one */
6938         if (child) {
6939                 ASSERT(DEVI_BUSY_OWNED(vdip));
6940                 return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6941         }
6942 
6943         /* devi_unconfig_common */
6944         ndi_devi_enter(vdip, &circ);
6945         cdip = ddi_get_child(vdip);
6946         while (cdip) {
6947                 dev_info_t *next = ddi_get_next_sibling(cdip);
6948 
6949                 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6950                 cdip = next;
6951         }
6952         ndi_devi_exit(vdip, circ);
6953 
6954         if (*held)
6955                 ret = MDI_SUCCESS;
6956 
6957         return (ret);
6958 }
6959 
6960 static void
6961 i_mdi_pm_post_config_one(dev_info_t *child)
6962 {
6963         mdi_client_t    *ct;
6964 
6965         ct = i_devi_get_client(child);
6966         if (ct == NULL)
6967                 return;
6968 
6969         MDI_CLIENT_LOCK(ct);
6970         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6971                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6972 
6973         if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6974                 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6975                 MDI_CLIENT_UNLOCK(ct);
6976                 return;
6977         }
6978 
6979         /* client has not been updated */
6980         if (MDI_CLIENT_IS_FAILED(ct)) {
6981                 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
6982                 MDI_CLIENT_UNLOCK(ct);
6983                 return;
6984         }
6985 
6986         /* another thread might have powered it down or detached it */
6987         if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6988             !DEVI_IS_ATTACHING(child)) ||
6989             (!i_ddi_devi_attached(child) &&
6990             !DEVI_IS_ATTACHING(child))) {
6991                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
6992                 i_mdi_pm_reset_client(ct);
6993         } else {
6994                 mdi_pathinfo_t  *pip, *next;
6995                 int     valid_path_count = 0;
6996 
6997                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
6998                 pip = ct->ct_path_head;
6999                 while (pip != NULL) {
7000                         MDI_PI_LOCK(pip);
7001                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7002                         if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7003                                 valid_path_count ++;
7004                         MDI_PI_UNLOCK(pip);
7005                         pip = next;
7006                 }
7007                 i_mdi_pm_rele_client(ct, valid_path_count);
7008         }
7009         ct->ct_powercnt_config = 0;
7010         MDI_CLIENT_UNLOCK(ct);
7011 }
7012 
7013 static void
7014 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7015 {
7016         int             circ;
7017         dev_info_t      *cdip;
7018 
7019         ASSERT(MDI_VHCI(vdip));
7020 
7021         /* ndi_devi_config_one */
7022         if (child) {
7023                 ASSERT(DEVI_BUSY_OWNED(vdip));
7024                 i_mdi_pm_post_config_one(child);
7025                 return;
7026         }
7027 
7028         /* devi_config_common */
7029         ndi_devi_enter(vdip, &circ);
7030         cdip = ddi_get_child(vdip);
7031         while (cdip) {
7032                 dev_info_t *next = ddi_get_next_sibling(cdip);
7033 
7034                 i_mdi_pm_post_config_one(cdip);
7035                 cdip = next;
7036         }
7037         ndi_devi_exit(vdip, circ);
7038 }
7039 
7040 static void
7041 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7042 {
7043         mdi_client_t    *ct;
7044 
7045         ct = i_devi_get_client(child);
7046         if (ct == NULL)
7047                 return;
7048 
7049         MDI_CLIENT_LOCK(ct);
7050         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7051                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7052 
7053         if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7054                 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7055                 MDI_CLIENT_UNLOCK(ct);
7056                 return;
7057         }
7058 
7059         /* failure detaching or another thread just attached it */
7060         if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7061             i_ddi_devi_attached(child)) ||
7062             (!i_ddi_devi_attached(child) &&
7063             !DEVI_IS_ATTACHING(child))) {
7064                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7065                 i_mdi_pm_reset_client(ct);
7066         } else {
7067                 mdi_pathinfo_t  *pip, *next;
7068                 int     valid_path_count = 0;
7069 
7070                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7071                 pip = ct->ct_path_head;
7072                 while (pip != NULL) {
7073                         MDI_PI_LOCK(pip);
7074                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7075                         if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7076                                 valid_path_count ++;
7077                         MDI_PI_UNLOCK(pip);
7078                         pip = next;
7079                 }
7080                 i_mdi_pm_rele_client(ct, valid_path_count);
7081                 ct->ct_powercnt_unconfig = 0;
7082         }
7083 
7084         MDI_CLIENT_UNLOCK(ct);
7085 }
7086 
7087 static void
7088 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7089 {
7090         int                     circ;
7091         dev_info_t              *cdip;
7092 
7093         ASSERT(MDI_VHCI(vdip));
7094 
7095         if (!held) {
7096                 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7097                 return;
7098         }
7099 
7100         if (child) {
7101                 ASSERT(DEVI_BUSY_OWNED(vdip));
7102                 i_mdi_pm_post_unconfig_one(child);
7103                 return;
7104         }
7105 
7106         ndi_devi_enter(vdip, &circ);
7107         cdip = ddi_get_child(vdip);
7108         while (cdip) {
7109                 dev_info_t *next = ddi_get_next_sibling(cdip);
7110 
7111                 i_mdi_pm_post_unconfig_one(cdip);
7112                 cdip = next;
7113         }
7114         ndi_devi_exit(vdip, circ);
7115 }
7116 
7117 int
7118 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7119 {
7120         int                     circ, ret = MDI_SUCCESS;
7121         dev_info_t              *client_dip = NULL;
7122         mdi_client_t            *ct;
7123 
7124         /*
7125          * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7126          * Power up pHCI for the named client device.
7127          * Note: Before the client is enumerated under vhci by phci,
7128          * client_dip can be NULL. Then proceed to power up all the
7129          * pHCIs.
7130          */
7131         if (devnm != NULL) {
7132                 ndi_devi_enter(vdip, &circ);
7133                 client_dip = ndi_devi_findchild(vdip, devnm);
7134         }
7135 
7136         MDI_DEBUG(4, (MDI_NOTE, vdip,
7137             "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7138 
7139         switch (op) {
7140         case MDI_PM_PRE_CONFIG:
7141                 ret = i_mdi_pm_pre_config(vdip, client_dip);
7142                 break;
7143 
7144         case MDI_PM_PRE_UNCONFIG:
7145                 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7146                     flags);
7147                 break;
7148 
7149         case MDI_PM_POST_CONFIG:
7150                 i_mdi_pm_post_config(vdip, client_dip);
7151                 break;
7152 
7153         case MDI_PM_POST_UNCONFIG:
7154                 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7155                 break;
7156 
7157         case MDI_PM_HOLD_POWER:
7158         case MDI_PM_RELE_POWER:
7159                 ASSERT(args);
7160 
7161                 client_dip = (dev_info_t *)args;
7162                 ASSERT(MDI_CLIENT(client_dip));
7163 
7164                 ct = i_devi_get_client(client_dip);
7165                 MDI_CLIENT_LOCK(ct);
7166 
7167                 if (op == MDI_PM_HOLD_POWER) {
7168                         if (ct->ct_power_cnt == 0) {
7169                                 (void) i_mdi_power_all_phci(ct);
7170                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7171                                     "i_mdi_pm_hold_client\n"));
7172                                 i_mdi_pm_hold_client(ct, ct->ct_path_count);
7173                         }
7174                 } else {
7175                         if (DEVI_IS_ATTACHING(client_dip)) {
7176                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7177                                     "i_mdi_pm_rele_client\n"));
7178                                 i_mdi_pm_rele_client(ct, ct->ct_path_count);
7179                         } else {
7180                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7181                                     "i_mdi_pm_reset_client\n"));
7182                                 i_mdi_pm_reset_client(ct);
7183                         }
7184                 }
7185 
7186                 MDI_CLIENT_UNLOCK(ct);
7187                 break;
7188 
7189         default:
7190                 break;
7191         }
7192 
7193         if (devnm)
7194                 ndi_devi_exit(vdip, circ);
7195 
7196         return (ret);
7197 }
7198 
7199 int
7200 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7201 {
7202         mdi_vhci_t *vhci;
7203 
7204         if (!MDI_VHCI(dip))
7205                 return (MDI_FAILURE);
7206 
7207         if (mdi_class) {
7208                 vhci = DEVI(dip)->devi_mdi_xhci;
7209                 ASSERT(vhci);
7210                 *mdi_class = vhci->vh_class;
7211         }
7212 
7213         return (MDI_SUCCESS);
7214 }
7215 
7216 int
7217 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7218 {
7219         mdi_phci_t *phci;
7220 
7221         if (!MDI_PHCI(dip))
7222                 return (MDI_FAILURE);
7223 
7224         if (mdi_class) {
7225                 phci = DEVI(dip)->devi_mdi_xhci;
7226                 ASSERT(phci);
7227                 *mdi_class = phci->ph_vhci->vh_class;
7228         }
7229 
7230         return (MDI_SUCCESS);
7231 }
7232 
7233 int
7234 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7235 {
7236         mdi_client_t *client;
7237 
7238         if (!MDI_CLIENT(dip))
7239                 return (MDI_FAILURE);
7240 
7241         if (mdi_class) {
7242                 client = DEVI(dip)->devi_mdi_client;
7243                 ASSERT(client);
7244                 *mdi_class = client->ct_vhci->vh_class;
7245         }
7246 
7247         return (MDI_SUCCESS);
7248 }
7249 
7250 void *
7251 mdi_client_get_vhci_private(dev_info_t *dip)
7252 {
7253         ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7254         if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7255                 mdi_client_t    *ct;
7256                 ct = i_devi_get_client(dip);
7257                 return (ct->ct_vprivate);
7258         }
7259         return (NULL);
7260 }
7261 
7262 void
7263 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7264 {
7265         ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7266         if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7267                 mdi_client_t    *ct;
7268                 ct = i_devi_get_client(dip);
7269                 ct->ct_vprivate = data;
7270         }
7271 }
7272 /*
7273  * mdi_pi_get_vhci_private():
7274  *              Get the vhci private information associated with the
7275  *              mdi_pathinfo node
7276  */
7277 void *
7278 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7279 {
7280         caddr_t vprivate = NULL;
7281         if (pip) {
7282                 vprivate = MDI_PI(pip)->pi_vprivate;
7283         }
7284         return (vprivate);
7285 }
7286 
7287 /*
7288  * mdi_pi_set_vhci_private():
7289  *              Set the vhci private information in the mdi_pathinfo node
7290  */
7291 void
7292 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7293 {
7294         if (pip) {
7295                 MDI_PI(pip)->pi_vprivate = priv;
7296         }
7297 }
7298 
7299 /*
7300  * mdi_phci_get_vhci_private():
7301  *              Get the vhci private information associated with the
7302  *              mdi_phci node
7303  */
7304 void *
7305 mdi_phci_get_vhci_private(dev_info_t *dip)
7306 {
7307         ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7308         if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7309                 mdi_phci_t      *ph;
7310                 ph = i_devi_get_phci(dip);
7311                 return (ph->ph_vprivate);
7312         }
7313         return (NULL);
7314 }
7315 
7316 /*
7317  * mdi_phci_set_vhci_private():
7318  *              Set the vhci private information in the mdi_phci node
7319  */
7320 void
7321 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7322 {
7323         ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7324         if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7325                 mdi_phci_t      *ph;
7326                 ph = i_devi_get_phci(dip);
7327                 ph->ph_vprivate = priv;
7328         }
7329 }
7330 
7331 int
7332 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7333 {
7334         return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7335 }
7336 
7337 int
7338 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7339 {
7340         return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7341 }
7342 
7343 /* Return 1 if all client paths are device_removed */
7344 static int
7345 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7346 {
7347         mdi_pathinfo_t  *pip;
7348         int             all_devices_removed = 1;
7349 
7350         MDI_CLIENT_LOCK(ct);
7351         for (pip = ct->ct_path_head; pip;
7352             pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7353                 if (!mdi_pi_device_isremoved(pip)) {
7354                         all_devices_removed = 0;
7355                         break;
7356                 }
7357         }
7358         MDI_CLIENT_UNLOCK(ct);
7359         return (all_devices_removed);
7360 }
7361 
7362 /*
7363  * When processing path hotunplug, represent device removal.
7364  */
7365 int
7366 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7367 {
7368         mdi_client_t    *ct;
7369 
7370         MDI_PI_LOCK(pip);
7371         if (mdi_pi_device_isremoved(pip)) {
7372                 MDI_PI_UNLOCK(pip);
7373                 return (0);
7374         }
7375         MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7376         MDI_PI_FLAGS_SET_HIDDEN(pip);
7377         MDI_PI_UNLOCK(pip);
7378 
7379         /*
7380          * If all paths associated with the client are now DEVICE_REMOVED,
7381          * reflect DEVICE_REMOVED in the client.
7382          */
7383         ct = MDI_PI(pip)->pi_client;
7384         if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7385                 (void) ndi_devi_device_remove(ct->ct_dip);
7386         else
7387                 i_ddi_di_cache_invalidate();
7388 
7389         return (1);
7390 }
7391 
7392 /*
7393  * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7394  * is now accessible then this interfaces is used to represent device insertion.
7395  */
7396 int
7397 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7398 {
7399         MDI_PI_LOCK(pip);
7400         if (!mdi_pi_device_isremoved(pip)) {
7401                 MDI_PI_UNLOCK(pip);
7402                 return (0);
7403         }
7404         MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7405         MDI_PI_FLAGS_CLR_HIDDEN(pip);
7406         MDI_PI_UNLOCK(pip);
7407 
7408         i_ddi_di_cache_invalidate();
7409 
7410         return (1);
7411 }
7412 
7413 /*
7414  * List of vhci class names:
7415  * A vhci class name must be in this list only if the corresponding vhci
7416  * driver intends to use the mdi provided bus config implementation
7417  * (i.e., mdi_vhci_bus_config()).
7418  */
7419 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7420 #define N_VHCI_CLASSES  (sizeof (vhci_class_list) / sizeof (char *))
7421 
7422 /*
7423  * During boot time, the on-disk vhci cache for every vhci class is read
7424  * in the form of an nvlist and stored here.
7425  */
7426 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7427 
7428 /* nvpair names in vhci cache nvlist */
7429 #define MDI_VHCI_CACHE_VERSION  1
7430 #define MDI_NVPNAME_VERSION     "version"
7431 #define MDI_NVPNAME_PHCIS       "phcis"
7432 #define MDI_NVPNAME_CTADDRMAP   "clientaddrmap"
7433 
7434 /*
7435  * Given vhci class name, return its on-disk vhci cache filename.
7436  * Memory for the returned filename which includes the full path is allocated
7437  * by this function.
7438  */
7439 static char *
7440 vhclass2vhcache_filename(char *vhclass)
7441 {
7442         char *filename;
7443         int len;
7444         static char *fmt = "/etc/devices/mdi_%s_cache";
7445 
7446         /*
7447          * fmt contains the on-disk vhci cache file name format;
7448          * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7449          */
7450 
7451         /* the -1 below is to account for "%s" in the format string */
7452         len = strlen(fmt) + strlen(vhclass) - 1;
7453         filename = kmem_alloc(len, KM_SLEEP);
7454         (void) snprintf(filename, len, fmt, vhclass);
7455         ASSERT(len == (strlen(filename) + 1));
7456         return (filename);
7457 }
7458 
7459 /*
7460  * initialize the vhci cache related data structures and read the on-disk
7461  * vhci cached data into memory.
7462  */
7463 static void
7464 setup_vhci_cache(mdi_vhci_t *vh)
7465 {
7466         mdi_vhci_config_t *vhc;
7467         mdi_vhci_cache_t *vhcache;
7468         int i;
7469         nvlist_t *nvl = NULL;
7470 
7471         vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7472         vh->vh_config = vhc;
7473         vhcache = &vhc->vhc_vhcache;
7474 
7475         vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7476 
7477         mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7478         cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7479 
7480         rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7481 
7482         /*
7483          * Create string hash; same as mod_hash_create_strhash() except that
7484          * we use NULL key destructor.
7485          */
7486         vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7487             mdi_bus_config_cache_hash_size,
7488             mod_hash_null_keydtor, mod_hash_null_valdtor,
7489             mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7490 
7491         /*
7492          * The on-disk vhci cache is read during booting prior to the
7493          * lights-out period by mdi_read_devices_files().
7494          */
7495         for (i = 0; i < N_VHCI_CLASSES; i++) {
7496                 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7497                         nvl = vhcache_nvl[i];
7498                         vhcache_nvl[i] = NULL;
7499                         break;
7500                 }
7501         }
7502 
7503         /*
7504          * this is to cover the case of some one manually causing unloading
7505          * (or detaching) and reloading (or attaching) of a vhci driver.
7506          */
7507         if (nvl == NULL && modrootloaded)
7508                 nvl = read_on_disk_vhci_cache(vh->vh_class);
7509 
7510         if (nvl != NULL) {
7511                 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7512                 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7513                         vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7514                 else  {
7515                         cmn_err(CE_WARN,
7516                             "%s: data file corrupted, will recreate",
7517                             vhc->vhc_vhcache_filename);
7518                 }
7519                 rw_exit(&vhcache->vhcache_lock);
7520                 nvlist_free(nvl);
7521         }
7522 
7523         vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7524             CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7525 
7526         vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7527         vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7528 }
7529 
7530 /*
7531  * free all vhci cache related resources
7532  */
7533 static int
7534 destroy_vhci_cache(mdi_vhci_t *vh)
7535 {
7536         mdi_vhci_config_t *vhc = vh->vh_config;
7537         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7538         mdi_vhcache_phci_t *cphci, *cphci_next;
7539         mdi_vhcache_client_t *cct, *cct_next;
7540         mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7541 
7542         if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7543                 return (MDI_FAILURE);
7544 
7545         kmem_free(vhc->vhc_vhcache_filename,
7546             strlen(vhc->vhc_vhcache_filename) + 1);
7547 
7548         mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7549 
7550         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7551             cphci = cphci_next) {
7552                 cphci_next = cphci->cphci_next;
7553                 free_vhcache_phci(cphci);
7554         }
7555 
7556         for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7557                 cct_next = cct->cct_next;
7558                 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7559                         cpi_next = cpi->cpi_next;
7560                         free_vhcache_pathinfo(cpi);
7561                 }
7562                 free_vhcache_client(cct);
7563         }
7564 
7565         rw_destroy(&vhcache->vhcache_lock);
7566 
7567         mutex_destroy(&vhc->vhc_lock);
7568         cv_destroy(&vhc->vhc_cv);
7569         kmem_free(vhc, sizeof (mdi_vhci_config_t));
7570         return (MDI_SUCCESS);
7571 }
7572 
7573 /*
7574  * Stop all vhci cache related async threads and free their resources.
7575  */
7576 static int
7577 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7578 {
7579         mdi_async_client_config_t *acc, *acc_next;
7580 
7581         mutex_enter(&vhc->vhc_lock);
7582         vhc->vhc_flags |= MDI_VHC_EXIT;
7583         ASSERT(vhc->vhc_acc_thrcount >= 0);
7584         cv_broadcast(&vhc->vhc_cv);
7585 
7586         while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7587             vhc->vhc_acc_thrcount != 0) {
7588                 mutex_exit(&vhc->vhc_lock);
7589                 delay_random(mdi_delay);
7590                 mutex_enter(&vhc->vhc_lock);
7591         }
7592 
7593         vhc->vhc_flags &= ~MDI_VHC_EXIT;
7594 
7595         for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7596                 acc_next = acc->acc_next;
7597                 free_async_client_config(acc);
7598         }
7599         vhc->vhc_acc_list_head = NULL;
7600         vhc->vhc_acc_list_tail = NULL;
7601         vhc->vhc_acc_count = 0;
7602 
7603         if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7604                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7605                 mutex_exit(&vhc->vhc_lock);
7606                 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7607                         vhcache_dirty(vhc);
7608                         return (MDI_FAILURE);
7609                 }
7610         } else
7611                 mutex_exit(&vhc->vhc_lock);
7612 
7613         if (callb_delete(vhc->vhc_cbid) != 0)
7614                 return (MDI_FAILURE);
7615 
7616         return (MDI_SUCCESS);
7617 }
7618 
7619 /*
7620  * Stop vhci cache flush thread
7621  */
7622 /* ARGSUSED */
7623 static boolean_t
7624 stop_vhcache_flush_thread(void *arg, int code)
7625 {
7626         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7627 
7628         mutex_enter(&vhc->vhc_lock);
7629         vhc->vhc_flags |= MDI_VHC_EXIT;
7630         cv_broadcast(&vhc->vhc_cv);
7631 
7632         while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7633                 mutex_exit(&vhc->vhc_lock);
7634                 delay_random(mdi_delay);
7635                 mutex_enter(&vhc->vhc_lock);
7636         }
7637 
7638         if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7639                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7640                 mutex_exit(&vhc->vhc_lock);
7641                 (void) flush_vhcache(vhc, 1);
7642         } else
7643                 mutex_exit(&vhc->vhc_lock);
7644 
7645         return (B_TRUE);
7646 }
7647 
7648 /*
7649  * Enqueue the vhcache phci (cphci) at the tail of the list
7650  */
7651 static void
7652 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7653 {
7654         cphci->cphci_next = NULL;
7655         if (vhcache->vhcache_phci_head == NULL)
7656                 vhcache->vhcache_phci_head = cphci;
7657         else
7658                 vhcache->vhcache_phci_tail->cphci_next = cphci;
7659         vhcache->vhcache_phci_tail = cphci;
7660 }
7661 
7662 /*
7663  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7664  */
7665 static void
7666 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7667     mdi_vhcache_pathinfo_t *cpi)
7668 {
7669         cpi->cpi_next = NULL;
7670         if (cct->cct_cpi_head == NULL)
7671                 cct->cct_cpi_head = cpi;
7672         else
7673                 cct->cct_cpi_tail->cpi_next = cpi;
7674         cct->cct_cpi_tail = cpi;
7675 }
7676 
7677 /*
7678  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7679  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7680  * flag set come at the beginning of the list. All cpis which have this
7681  * flag set come at the end of the list.
7682  */
7683 static void
7684 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7685     mdi_vhcache_pathinfo_t *newcpi)
7686 {
7687         mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7688 
7689         if (cct->cct_cpi_head == NULL ||
7690             (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7691                 enqueue_tail_vhcache_pathinfo(cct, newcpi);
7692         else {
7693                 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7694                     !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7695                     prev_cpi = cpi, cpi = cpi->cpi_next)
7696                         ;
7697 
7698                 if (prev_cpi == NULL)
7699                         cct->cct_cpi_head = newcpi;
7700                 else
7701                         prev_cpi->cpi_next = newcpi;
7702 
7703                 newcpi->cpi_next = cpi;
7704 
7705                 if (cpi == NULL)
7706                         cct->cct_cpi_tail = newcpi;
7707         }
7708 }
7709 
7710 /*
7711  * Enqueue the vhcache client (cct) at the tail of the list
7712  */
7713 static void
7714 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7715     mdi_vhcache_client_t *cct)
7716 {
7717         cct->cct_next = NULL;
7718         if (vhcache->vhcache_client_head == NULL)
7719                 vhcache->vhcache_client_head = cct;
7720         else
7721                 vhcache->vhcache_client_tail->cct_next = cct;
7722         vhcache->vhcache_client_tail = cct;
7723 }
7724 
7725 static void
7726 free_string_array(char **str, int nelem)
7727 {
7728         int i;
7729 
7730         if (str) {
7731                 for (i = 0; i < nelem; i++) {
7732                         if (str[i])
7733                                 kmem_free(str[i], strlen(str[i]) + 1);
7734                 }
7735                 kmem_free(str, sizeof (char *) * nelem);
7736         }
7737 }
7738 
7739 static void
7740 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7741 {
7742         kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7743         kmem_free(cphci, sizeof (*cphci));
7744 }
7745 
7746 static void
7747 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7748 {
7749         kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7750         kmem_free(cpi, sizeof (*cpi));
7751 }
7752 
7753 static void
7754 free_vhcache_client(mdi_vhcache_client_t *cct)
7755 {
7756         kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7757         kmem_free(cct, sizeof (*cct));
7758 }
7759 
7760 static char *
7761 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7762 {
7763         char *name_addr;
7764         int len;
7765 
7766         len = strlen(ct_name) + strlen(ct_addr) + 2;
7767         name_addr = kmem_alloc(len, KM_SLEEP);
7768         (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7769 
7770         if (ret_len)
7771                 *ret_len = len;
7772         return (name_addr);
7773 }
7774 
7775 /*
7776  * Copy the contents of paddrnvl to vhci cache.
7777  * paddrnvl nvlist contains path information for a vhci client.
7778  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7779  */
7780 static void
7781 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7782     mdi_vhcache_client_t *cct)
7783 {
7784         nvpair_t *nvp = NULL;
7785         mdi_vhcache_pathinfo_t *cpi;
7786         uint_t nelem;
7787         uint32_t *val;
7788 
7789         while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7790                 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7791                 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7792                 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7793                 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
7794                 ASSERT(nelem == 2);
7795                 cpi->cpi_cphci = cphci_list[val[0]];
7796                 cpi->cpi_flags = val[1];
7797                 enqueue_tail_vhcache_pathinfo(cct, cpi);
7798         }
7799 }
7800 
7801 /*
7802  * Copy the contents of caddrmapnvl to vhci cache.
7803  * caddrmapnvl nvlist contains vhci client address to phci client address
7804  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7805  * this nvlist.
7806  */
7807 static void
7808 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7809     mdi_vhcache_phci_t *cphci_list[])
7810 {
7811         nvpair_t *nvp = NULL;
7812         nvlist_t *paddrnvl;
7813         mdi_vhcache_client_t *cct;
7814 
7815         while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7816                 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7817                 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7818                 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7819                 (void) nvpair_value_nvlist(nvp, &paddrnvl);
7820                 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7821                 /* the client must contain at least one path */
7822                 ASSERT(cct->cct_cpi_head != NULL);
7823 
7824                 enqueue_vhcache_client(vhcache, cct);
7825                 (void) mod_hash_insert(vhcache->vhcache_client_hash,
7826                     (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7827         }
7828 }
7829 
7830 /*
7831  * Copy the contents of the main nvlist to vhci cache.
7832  *
7833  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7834  * The nvlist contains the mappings between the vhci client addresses and
7835  * their corresponding phci client addresses.
7836  *
7837  * The structure of the nvlist is as follows:
7838  *
7839  * Main nvlist:
7840  *      NAME            TYPE            DATA
7841  *      version         int32           version number
7842  *      phcis           string array    array of phci paths
7843  *      clientaddrmap   nvlist_t        c2paddrs_nvl (see below)
7844  *
7845  * structure of c2paddrs_nvl:
7846  *      NAME            TYPE            DATA
7847  *      caddr1          nvlist_t        paddrs_nvl1
7848  *      caddr2          nvlist_t        paddrs_nvl2
7849  *      ...
7850  * where caddr1, caddr2, ... are vhci client name and addresses in the
7851  * form of "<clientname>@<clientaddress>".
7852  * (for example: "ssd@2000002037cd9f72");
7853  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7854  *
7855  * structure of paddrs_nvl:
7856  *      NAME            TYPE            DATA
7857  *      pi_addr1        uint32_array    (phci-id, cpi_flags)
7858  *      pi_addr2        uint32_array    (phci-id, cpi_flags)
7859  *      ...
7860  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7861  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7862  * phci-ids are integers that identify pHCIs to which the
7863  * the bus specific address belongs to. These integers are used as an index
7864  * into to the phcis string array in the main nvlist to get the pHCI path.
7865  */
7866 static int
7867 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7868 {
7869         char **phcis, **phci_namep;
7870         uint_t nphcis;
7871         mdi_vhcache_phci_t *cphci, **cphci_list;
7872         nvlist_t *caddrmapnvl;
7873         int32_t ver;
7874         int i;
7875         size_t cphci_list_size;
7876 
7877         ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7878 
7879         if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7880             ver != MDI_VHCI_CACHE_VERSION)
7881                 return (MDI_FAILURE);
7882 
7883         if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7884             &nphcis) != 0)
7885                 return (MDI_SUCCESS);
7886 
7887         ASSERT(nphcis > 0);
7888 
7889         cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7890         cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7891         for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7892                 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7893                 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7894                 enqueue_vhcache_phci(vhcache, cphci);
7895                 cphci_list[i] = cphci;
7896         }
7897 
7898         ASSERT(vhcache->vhcache_phci_head != NULL);
7899 
7900         if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7901                 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7902 
7903         kmem_free(cphci_list, cphci_list_size);
7904         return (MDI_SUCCESS);
7905 }
7906 
7907 /*
7908  * Build paddrnvl for the specified client using the information in the
7909  * vhci cache and add it to the caddrmapnnvl.
7910  * Returns 0 on success, errno on failure.
7911  */
7912 static int
7913 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7914     nvlist_t *caddrmapnvl)
7915 {
7916         mdi_vhcache_pathinfo_t *cpi;
7917         nvlist_t *nvl;
7918         int err;
7919         uint32_t val[2];
7920 
7921         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7922 
7923         if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7924                 return (err);
7925 
7926         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7927                 val[0] = cpi->cpi_cphci->cphci_id;
7928                 val[1] = cpi->cpi_flags;
7929                 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7930                     != 0)
7931                         goto out;
7932         }
7933 
7934         err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7935 out:
7936         nvlist_free(nvl);
7937         return (err);
7938 }
7939 
7940 /*
7941  * Build caddrmapnvl using the information in the vhci cache
7942  * and add it to the mainnvl.
7943  * Returns 0 on success, errno on failure.
7944  */
7945 static int
7946 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7947 {
7948         mdi_vhcache_client_t *cct;
7949         nvlist_t *nvl;
7950         int err;
7951 
7952         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7953 
7954         if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7955                 return (err);
7956 
7957         for (cct = vhcache->vhcache_client_head; cct != NULL;
7958             cct = cct->cct_next) {
7959                 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7960                         goto out;
7961         }
7962 
7963         err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7964 out:
7965         nvlist_free(nvl);
7966         return (err);
7967 }
7968 
7969 /*
7970  * Build nvlist using the information in the vhci cache.
7971  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7972  * Returns nvl on success, NULL on failure.
7973  */
7974 static nvlist_t *
7975 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7976 {
7977         mdi_vhcache_phci_t *cphci;
7978         uint_t phci_count;
7979         char **phcis;
7980         nvlist_t *nvl;
7981         int err, i;
7982 
7983         if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7984                 nvl = NULL;
7985                 goto out;
7986         }
7987 
7988         if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7989             MDI_VHCI_CACHE_VERSION)) != 0)
7990                 goto out;
7991 
7992         rw_enter(&vhcache->vhcache_lock, RW_READER);
7993         if (vhcache->vhcache_phci_head == NULL) {
7994                 rw_exit(&vhcache->vhcache_lock);
7995                 return (nvl);
7996         }
7997 
7998         phci_count = 0;
7999         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8000             cphci = cphci->cphci_next)
8001                 cphci->cphci_id = phci_count++;
8002 
8003         /* build phci pathname list */
8004         phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8005         for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8006             cphci = cphci->cphci_next, i++)
8007                 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8008 
8009         err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8010             phci_count);
8011         free_string_array(phcis, phci_count);
8012 
8013         if (err == 0 &&
8014             (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8015                 rw_exit(&vhcache->vhcache_lock);
8016                 return (nvl);
8017         }
8018 
8019         rw_exit(&vhcache->vhcache_lock);
8020 out:
8021         nvlist_free(nvl);
8022         return (NULL);
8023 }
8024 
8025 /*
8026  * Lookup vhcache phci structure for the specified phci path.
8027  */
8028 static mdi_vhcache_phci_t *
8029 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8030 {
8031         mdi_vhcache_phci_t *cphci;
8032 
8033         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8034 
8035         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8036             cphci = cphci->cphci_next) {
8037                 if (strcmp(cphci->cphci_path, phci_path) == 0)
8038                         return (cphci);
8039         }
8040 
8041         return (NULL);
8042 }
8043 
8044 /*
8045  * Lookup vhcache phci structure for the specified phci.
8046  */
8047 static mdi_vhcache_phci_t *
8048 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8049 {
8050         mdi_vhcache_phci_t *cphci;
8051 
8052         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8053 
8054         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8055             cphci = cphci->cphci_next) {
8056                 if (cphci->cphci_phci == ph)
8057                         return (cphci);
8058         }
8059 
8060         return (NULL);
8061 }
8062 
8063 /*
8064  * Add the specified phci to the vhci cache if not already present.
8065  */
8066 static void
8067 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8068 {
8069         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8070         mdi_vhcache_phci_t *cphci;
8071         char *pathname;
8072         int cache_updated;
8073 
8074         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8075 
8076         pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8077         (void) ddi_pathname(ph->ph_dip, pathname);
8078         if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8079             != NULL) {
8080                 cphci->cphci_phci = ph;
8081                 cache_updated = 0;
8082         } else {
8083                 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8084                 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8085                 cphci->cphci_phci = ph;
8086                 enqueue_vhcache_phci(vhcache, cphci);
8087                 cache_updated = 1;
8088         }
8089 
8090         rw_exit(&vhcache->vhcache_lock);
8091 
8092         /*
8093          * Since a new phci has been added, reset
8094          * vhc_path_discovery_cutoff_time to allow for discovery of paths
8095          * during next vhcache_discover_paths().
8096          */
8097         mutex_enter(&vhc->vhc_lock);
8098         vhc->vhc_path_discovery_cutoff_time = 0;
8099         mutex_exit(&vhc->vhc_lock);
8100 
8101         kmem_free(pathname, MAXPATHLEN);
8102         if (cache_updated)
8103                 vhcache_dirty(vhc);
8104 }
8105 
8106 /*
8107  * Remove the reference to the specified phci from the vhci cache.
8108  */
8109 static void
8110 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8111 {
8112         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8113         mdi_vhcache_phci_t *cphci;
8114 
8115         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8116         if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8117                 /* do not remove the actual mdi_vhcache_phci structure */
8118                 cphci->cphci_phci = NULL;
8119         }
8120         rw_exit(&vhcache->vhcache_lock);
8121 }
8122 
8123 static void
8124 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8125     mdi_vhcache_lookup_token_t *src)
8126 {
8127         if (src == NULL) {
8128                 dst->lt_cct = NULL;
8129                 dst->lt_cct_lookup_time = 0;
8130         } else {
8131                 dst->lt_cct = src->lt_cct;
8132                 dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8133         }
8134 }
8135 
8136 /*
8137  * Look up vhcache client for the specified client.
8138  */
8139 static mdi_vhcache_client_t *
8140 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8141     mdi_vhcache_lookup_token_t *token)
8142 {
8143         mod_hash_val_t hv;
8144         char *name_addr;
8145         int len;
8146 
8147         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8148 
8149         /*
8150          * If no vhcache clean occurred since the last lookup, we can
8151          * simply return the cct from the last lookup operation.
8152          * It works because ccts are never freed except during the vhcache
8153          * cleanup operation.
8154          */
8155         if (token != NULL &&
8156             vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8157                 return (token->lt_cct);
8158 
8159         name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8160         if (mod_hash_find(vhcache->vhcache_client_hash,
8161             (mod_hash_key_t)name_addr, &hv) == 0) {
8162                 if (token) {
8163                         token->lt_cct = (mdi_vhcache_client_t *)hv;
8164                         token->lt_cct_lookup_time = ddi_get_lbolt64();
8165                 }
8166         } else {
8167                 if (token) {
8168                         token->lt_cct = NULL;
8169                         token->lt_cct_lookup_time = 0;
8170                 }
8171                 hv = NULL;
8172         }
8173         kmem_free(name_addr, len);
8174         return ((mdi_vhcache_client_t *)hv);
8175 }
8176 
8177 /*
8178  * Add the specified path to the vhci cache if not already present.
8179  * Also add the vhcache client for the client corresponding to this path
8180  * if it doesn't already exist.
8181  */
8182 static void
8183 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8184 {
8185         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8186         mdi_vhcache_client_t *cct;
8187         mdi_vhcache_pathinfo_t *cpi;
8188         mdi_phci_t *ph = pip->pi_phci;
8189         mdi_client_t *ct = pip->pi_client;
8190         int cache_updated = 0;
8191 
8192         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8193 
8194         /* if vhcache client for this pip doesn't already exist, add it */
8195         if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8196             NULL)) == NULL) {
8197                 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8198                 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8199                     ct->ct_guid, NULL);
8200                 enqueue_vhcache_client(vhcache, cct);
8201                 (void) mod_hash_insert(vhcache->vhcache_client_hash,
8202                     (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8203                 cache_updated = 1;
8204         }
8205 
8206         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8207                 if (cpi->cpi_cphci->cphci_phci == ph &&
8208                     strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8209                         cpi->cpi_pip = pip;
8210                         if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8211                                 cpi->cpi_flags &=
8212                                     ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8213                                 sort_vhcache_paths(cct);
8214                                 cache_updated = 1;
8215                         }
8216                         break;
8217                 }
8218         }
8219 
8220         if (cpi == NULL) {
8221                 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8222                 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8223                 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8224                 ASSERT(cpi->cpi_cphci != NULL);
8225                 cpi->cpi_pip = pip;
8226                 enqueue_vhcache_pathinfo(cct, cpi);
8227                 cache_updated = 1;
8228         }
8229 
8230         rw_exit(&vhcache->vhcache_lock);
8231 
8232         if (cache_updated)
8233                 vhcache_dirty(vhc);
8234 }
8235 
8236 /*
8237  * Remove the reference to the specified path from the vhci cache.
8238  */
8239 static void
8240 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8241 {
8242         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8243         mdi_client_t *ct = pip->pi_client;
8244         mdi_vhcache_client_t *cct;
8245         mdi_vhcache_pathinfo_t *cpi;
8246 
8247         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8248         if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8249             NULL)) != NULL) {
8250                 for (cpi = cct->cct_cpi_head; cpi != NULL;
8251                     cpi = cpi->cpi_next) {
8252                         if (cpi->cpi_pip == pip) {
8253                                 cpi->cpi_pip = NULL;
8254                                 break;
8255                         }
8256                 }
8257         }
8258         rw_exit(&vhcache->vhcache_lock);
8259 }
8260 
8261 /*
8262  * Flush the vhci cache to disk.
8263  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8264  */
8265 static int
8266 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8267 {
8268         nvlist_t *nvl;
8269         int err;
8270         int rv;
8271 
8272         /*
8273          * It is possible that the system may shutdown before
8274          * i_ddi_io_initialized (during stmsboot for example). To allow for
8275          * flushing the cache in this case do not check for
8276          * i_ddi_io_initialized when force flag is set.
8277          */
8278         if (force_flag == 0 && !i_ddi_io_initialized())
8279                 return (MDI_FAILURE);
8280 
8281         if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8282                 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8283                 nvlist_free(nvl);
8284         } else
8285                 err = EFAULT;
8286 
8287         rv = MDI_SUCCESS;
8288         mutex_enter(&vhc->vhc_lock);
8289         if (err != 0) {
8290                 if (err == EROFS) {
8291                         vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8292                         vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8293                             MDI_VHC_VHCACHE_DIRTY);
8294                 } else {
8295                         if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8296                                 cmn_err(CE_CONT, "%s: update failed\n",
8297                                     vhc->vhc_vhcache_filename);
8298                                 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8299                         }
8300                         rv = MDI_FAILURE;
8301                 }
8302         } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8303                 cmn_err(CE_CONT,
8304                     "%s: update now ok\n", vhc->vhc_vhcache_filename);
8305                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8306         }
8307         mutex_exit(&vhc->vhc_lock);
8308 
8309         return (rv);
8310 }
8311 
8312 /*
8313  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8314  * Exits itself if left idle for the idle timeout period.
8315  */
8316 static void
8317 vhcache_flush_thread(void *arg)
8318 {
8319         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8320         clock_t idle_time, quit_at_ticks;
8321         callb_cpr_t cprinfo;
8322 
8323         /* number of seconds to sleep idle before exiting */
8324         idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8325 
8326         CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8327             "mdi_vhcache_flush");
8328         mutex_enter(&vhc->vhc_lock);
8329         for (; ; ) {
8330                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8331                     (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8332                         if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8333                                 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8334                                 (void) cv_timedwait(&vhc->vhc_cv,
8335                                     &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8336                                 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8337                         } else {
8338                                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8339                                 mutex_exit(&vhc->vhc_lock);
8340 
8341                                 if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8342                                         vhcache_dirty(vhc);
8343 
8344                                 mutex_enter(&vhc->vhc_lock);
8345                         }
8346                 }
8347 
8348                 quit_at_ticks = ddi_get_lbolt() + idle_time;
8349 
8350                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8351                     !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8352                     ddi_get_lbolt() < quit_at_ticks) {
8353                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
8354                         (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8355                             quit_at_ticks);
8356                         CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8357                 }
8358 
8359                 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8360                     !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8361                         goto out;
8362         }
8363 
8364 out:
8365         vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8366         /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8367         CALLB_CPR_EXIT(&cprinfo);
8368 }
8369 
8370 /*
8371  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8372  */
8373 static void
8374 vhcache_dirty(mdi_vhci_config_t *vhc)
8375 {
8376         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8377         int create_thread;
8378 
8379         rw_enter(&vhcache->vhcache_lock, RW_READER);
8380         /* do not flush cache until the cache is fully built */
8381         if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8382                 rw_exit(&vhcache->vhcache_lock);
8383                 return;
8384         }
8385         rw_exit(&vhcache->vhcache_lock);
8386 
8387         mutex_enter(&vhc->vhc_lock);
8388         if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8389                 mutex_exit(&vhc->vhc_lock);
8390                 return;
8391         }
8392 
8393         vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8394         vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8395             mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8396         if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8397                 cv_broadcast(&vhc->vhc_cv);
8398                 create_thread = 0;
8399         } else {
8400                 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8401                 create_thread = 1;
8402         }
8403         mutex_exit(&vhc->vhc_lock);
8404 
8405         if (create_thread)
8406                 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8407                     0, &p0, TS_RUN, minclsyspri);
8408 }
8409 
8410 /*
8411  * phci bus config structure - one for for each phci bus config operation that
8412  * we initiate on behalf of a vhci.
8413  */
8414 typedef struct mdi_phci_bus_config_s {
8415         char *phbc_phci_path;
8416         struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */
8417         struct mdi_phci_bus_config_s *phbc_next;
8418 } mdi_phci_bus_config_t;
8419 
8420 /* vhci bus config structure - one for each vhci bus config operation */
8421 typedef struct mdi_vhci_bus_config_s {
8422         ddi_bus_config_op_t vhbc_op;    /* bus config op */
8423         major_t vhbc_op_major;          /* bus config op major */
8424         uint_t vhbc_op_flags;           /* bus config op flags */
8425         kmutex_t vhbc_lock;
8426         kcondvar_t vhbc_cv;
8427         int vhbc_thr_count;
8428 } mdi_vhci_bus_config_t;
8429 
8430 /*
8431  * bus config the specified phci
8432  */
8433 static void
8434 bus_config_phci(void *arg)
8435 {
8436         mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8437         mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8438         dev_info_t *ph_dip;
8439 
8440         /*
8441          * first configure all path components upto phci and then configure
8442          * the phci children.
8443          */
8444         if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8445             != NULL) {
8446                 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8447                     vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8448                         (void) ndi_devi_config_driver(ph_dip,
8449                             vhbc->vhbc_op_flags,
8450                             vhbc->vhbc_op_major);
8451                 } else
8452                         (void) ndi_devi_config(ph_dip,
8453                             vhbc->vhbc_op_flags);
8454 
8455                 /* release the hold that e_ddi_hold_devi_by_path() placed */
8456                 ndi_rele_devi(ph_dip);
8457         }
8458 
8459         kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8460         kmem_free(phbc, sizeof (*phbc));
8461 
8462         mutex_enter(&vhbc->vhbc_lock);
8463         vhbc->vhbc_thr_count--;
8464         if (vhbc->vhbc_thr_count == 0)
8465                 cv_broadcast(&vhbc->vhbc_cv);
8466         mutex_exit(&vhbc->vhbc_lock);
8467 }
8468 
8469 /*
8470  * Bus config all phcis associated with the vhci in parallel.
8471  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8472  */
8473 static void
8474 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8475     ddi_bus_config_op_t op, major_t maj)
8476 {
8477         mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8478         mdi_vhci_bus_config_t *vhbc;
8479         mdi_vhcache_phci_t *cphci;
8480 
8481         rw_enter(&vhcache->vhcache_lock, RW_READER);
8482         if (vhcache->vhcache_phci_head == NULL) {
8483                 rw_exit(&vhcache->vhcache_lock);
8484                 return;
8485         }
8486 
8487         vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8488 
8489         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8490             cphci = cphci->cphci_next) {
8491                 /* skip phcis that haven't attached before root is available */
8492                 if (!modrootloaded && (cphci->cphci_phci == NULL))
8493                         continue;
8494                 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8495                 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8496                     KM_SLEEP);
8497                 phbc->phbc_vhbusconfig = vhbc;
8498                 phbc->phbc_next = phbc_head;
8499                 phbc_head = phbc;
8500                 vhbc->vhbc_thr_count++;
8501         }
8502         rw_exit(&vhcache->vhcache_lock);
8503 
8504         vhbc->vhbc_op = op;
8505         vhbc->vhbc_op_major = maj;
8506         vhbc->vhbc_op_flags = NDI_NO_EVENT |
8507             (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8508         mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8509         cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8510 
8511         /* now create threads to initiate bus config on all phcis in parallel */
8512         for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8513                 phbc_next = phbc->phbc_next;
8514                 if (mdi_mtc_off)
8515                         bus_config_phci((void *)phbc);
8516                 else
8517                         (void) thread_create(NULL, 0, bus_config_phci, phbc,
8518                             0, &p0, TS_RUN, minclsyspri);
8519         }
8520 
8521         mutex_enter(&vhbc->vhbc_lock);
8522         /* wait until all threads exit */
8523         while (vhbc->vhbc_thr_count > 0)
8524                 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8525         mutex_exit(&vhbc->vhbc_lock);
8526 
8527         mutex_destroy(&vhbc->vhbc_lock);
8528         cv_destroy(&vhbc->vhbc_cv);
8529         kmem_free(vhbc, sizeof (*vhbc));
8530 }
8531 
8532 /*
8533  * Single threaded version of bus_config_all_phcis()
8534  */
8535 static void
8536 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8537     ddi_bus_config_op_t op, major_t maj)
8538 {
8539         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8540 
8541         single_threaded_vhconfig_enter(vhc);
8542         bus_config_all_phcis(vhcache, flags, op, maj);
8543         single_threaded_vhconfig_exit(vhc);
8544 }
8545 
8546 /*
8547  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8548  * The path includes the child component in addition to the phci path.
8549  */
8550 static int
8551 bus_config_one_phci_child(char *path)
8552 {
8553         dev_info_t *ph_dip, *child;
8554         char *devnm;
8555         int rv = MDI_FAILURE;
8556 
8557         /* extract the child component of the phci */
8558         devnm = strrchr(path, '/');
8559         *devnm++ = '\0';
8560 
8561         /*
8562          * first configure all path components upto phci and then
8563          * configure the phci child.
8564          */
8565         if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8566                 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8567                     NDI_SUCCESS) {
8568                         /*
8569                          * release the hold that ndi_devi_config_one() placed
8570                          */
8571                         ndi_rele_devi(child);
8572                         rv = MDI_SUCCESS;
8573                 }
8574 
8575                 /* release the hold that e_ddi_hold_devi_by_path() placed */
8576                 ndi_rele_devi(ph_dip);
8577         }
8578 
8579         devnm--;
8580         *devnm = '/';
8581         return (rv);
8582 }
8583 
8584 /*
8585  * Build a list of phci client paths for the specified vhci client.
8586  * The list includes only those phci client paths which aren't configured yet.
8587  */
8588 static mdi_phys_path_t *
8589 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8590 {
8591         mdi_vhcache_pathinfo_t *cpi;
8592         mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8593         int config_path, len;
8594 
8595         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8596                 /*
8597                  * include only those paths that aren't configured.
8598                  */
8599                 config_path = 0;
8600                 if (cpi->cpi_pip == NULL)
8601                         config_path = 1;
8602                 else {
8603                         MDI_PI_LOCK(cpi->cpi_pip);
8604                         if (MDI_PI_IS_INIT(cpi->cpi_pip))
8605                                 config_path = 1;
8606                         MDI_PI_UNLOCK(cpi->cpi_pip);
8607                 }
8608 
8609                 if (config_path) {
8610                         pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8611                         len = strlen(cpi->cpi_cphci->cphci_path) +
8612                             strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8613                         pp->phys_path = kmem_alloc(len, KM_SLEEP);
8614                         (void) snprintf(pp->phys_path, len, "%s/%s@%s",
8615                             cpi->cpi_cphci->cphci_path, ct_name,
8616                             cpi->cpi_addr);
8617                         pp->phys_path_next = NULL;
8618 
8619                         if (pp_head == NULL)
8620                                 pp_head = pp;
8621                         else
8622                                 pp_tail->phys_path_next = pp;
8623                         pp_tail = pp;
8624                 }
8625         }
8626 
8627         return (pp_head);
8628 }
8629 
8630 /*
8631  * Free the memory allocated for phci client path list.
8632  */
8633 static void
8634 free_phclient_path_list(mdi_phys_path_t *pp_head)
8635 {
8636         mdi_phys_path_t *pp, *pp_next;
8637 
8638         for (pp = pp_head; pp != NULL; pp = pp_next) {
8639                 pp_next = pp->phys_path_next;
8640                 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8641                 kmem_free(pp, sizeof (*pp));
8642         }
8643 }
8644 
8645 /*
8646  * Allocated async client structure and initialize with the specified values.
8647  */
8648 static mdi_async_client_config_t *
8649 alloc_async_client_config(char *ct_name, char *ct_addr,
8650     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8651 {
8652         mdi_async_client_config_t *acc;
8653 
8654         acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8655         acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8656         acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8657         acc->acc_phclient_path_list_head = pp_head;
8658         init_vhcache_lookup_token(&acc->acc_token, tok);
8659         acc->acc_next = NULL;
8660         return (acc);
8661 }
8662 
8663 /*
8664  * Free the memory allocated for the async client structure and their members.
8665  */
8666 static void
8667 free_async_client_config(mdi_async_client_config_t *acc)
8668 {
8669         if (acc->acc_phclient_path_list_head)
8670                 free_phclient_path_list(acc->acc_phclient_path_list_head);
8671         kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8672         kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8673         kmem_free(acc, sizeof (*acc));
8674 }
8675 
8676 /*
8677  * Sort vhcache pathinfos (cpis) of the specified client.
8678  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8679  * flag set come at the beginning of the list. All cpis which have this
8680  * flag set come at the end of the list.
8681  */
8682 static void
8683 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8684 {
8685         mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8686 
8687         cpi_head = cct->cct_cpi_head;
8688         cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8689         for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8690                 cpi_next = cpi->cpi_next;
8691                 enqueue_vhcache_pathinfo(cct, cpi);
8692         }
8693 }
8694 
8695 /*
8696  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8697  * every vhcache pathinfo of the specified client. If not adjust the flag
8698  * setting appropriately.
8699  *
8700  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8701  * on-disk vhci cache. So every time this flag is updated the cache must be
8702  * flushed.
8703  */
8704 static void
8705 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8706     mdi_vhcache_lookup_token_t *tok)
8707 {
8708         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8709         mdi_vhcache_client_t *cct;
8710         mdi_vhcache_pathinfo_t *cpi;
8711 
8712         rw_enter(&vhcache->vhcache_lock, RW_READER);
8713         if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8714             == NULL) {
8715                 rw_exit(&vhcache->vhcache_lock);
8716                 return;
8717         }
8718 
8719         /*
8720          * to avoid unnecessary on-disk cache updates, first check if an
8721          * update is really needed. If no update is needed simply return.
8722          */
8723         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8724                 if ((cpi->cpi_pip != NULL &&
8725                     (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8726                     (cpi->cpi_pip == NULL &&
8727                     !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8728                         break;
8729                 }
8730         }
8731         if (cpi == NULL) {
8732                 rw_exit(&vhcache->vhcache_lock);
8733                 return;
8734         }
8735 
8736         if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8737                 rw_exit(&vhcache->vhcache_lock);
8738                 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8739                 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8740                     tok)) == NULL) {
8741                         rw_exit(&vhcache->vhcache_lock);
8742                         return;
8743                 }
8744         }
8745 
8746         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8747                 if (cpi->cpi_pip != NULL)
8748                         cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8749                 else
8750                         cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8751         }
8752         sort_vhcache_paths(cct);
8753 
8754         rw_exit(&vhcache->vhcache_lock);
8755         vhcache_dirty(vhc);
8756 }
8757 
8758 /*
8759  * Configure all specified paths of the client.
8760  */
8761 static void
8762 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8763     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8764 {
8765         mdi_phys_path_t *pp;
8766 
8767         for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8768                 (void) bus_config_one_phci_child(pp->phys_path);
8769         adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8770 }
8771 
8772 /*
8773  * Dequeue elements from vhci async client config list and bus configure
8774  * their corresponding phci clients.
8775  */
8776 static void
8777 config_client_paths_thread(void *arg)
8778 {
8779         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8780         mdi_async_client_config_t *acc;
8781         clock_t quit_at_ticks;
8782         clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8783         callb_cpr_t cprinfo;
8784 
8785         CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8786             "mdi_config_client_paths");
8787 
8788         for (; ; ) {
8789                 quit_at_ticks = ddi_get_lbolt() + idle_time;
8790 
8791                 mutex_enter(&vhc->vhc_lock);
8792                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8793                     vhc->vhc_acc_list_head == NULL &&
8794                     ddi_get_lbolt() < quit_at_ticks) {
8795                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
8796                         (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8797                             quit_at_ticks);
8798                         CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8799                 }
8800 
8801                 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8802                     vhc->vhc_acc_list_head == NULL)
8803                         goto out;
8804 
8805                 acc = vhc->vhc_acc_list_head;
8806                 vhc->vhc_acc_list_head = acc->acc_next;
8807                 if (vhc->vhc_acc_list_head == NULL)
8808                         vhc->vhc_acc_list_tail = NULL;
8809                 vhc->vhc_acc_count--;
8810                 mutex_exit(&vhc->vhc_lock);
8811 
8812                 config_client_paths_sync(vhc, acc->acc_ct_name,
8813                     acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8814                     &acc->acc_token);
8815 
8816                 free_async_client_config(acc);
8817         }
8818 
8819 out:
8820         vhc->vhc_acc_thrcount--;
8821         /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8822         CALLB_CPR_EXIT(&cprinfo);
8823 }
8824 
8825 /*
8826  * Arrange for all the phci client paths (pp_head) for the specified client
8827  * to be bus configured asynchronously by a thread.
8828  */
8829 static void
8830 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8831     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8832 {
8833         mdi_async_client_config_t *acc, *newacc;
8834         int create_thread;
8835 
8836         if (pp_head == NULL)
8837                 return;
8838 
8839         if (mdi_mtc_off) {
8840                 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8841                 free_phclient_path_list(pp_head);
8842                 return;
8843         }
8844 
8845         newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8846         ASSERT(newacc);
8847 
8848         mutex_enter(&vhc->vhc_lock);
8849         for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8850                 if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8851                     strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8852                         free_async_client_config(newacc);
8853                         mutex_exit(&vhc->vhc_lock);
8854                         return;
8855                 }
8856         }
8857 
8858         if (vhc->vhc_acc_list_head == NULL)
8859                 vhc->vhc_acc_list_head = newacc;
8860         else
8861                 vhc->vhc_acc_list_tail->acc_next = newacc;
8862         vhc->vhc_acc_list_tail = newacc;
8863         vhc->vhc_acc_count++;
8864         if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8865                 cv_broadcast(&vhc->vhc_cv);
8866                 create_thread = 0;
8867         } else {
8868                 vhc->vhc_acc_thrcount++;
8869                 create_thread = 1;
8870         }
8871         mutex_exit(&vhc->vhc_lock);
8872 
8873         if (create_thread)
8874                 (void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8875                     0, &p0, TS_RUN, minclsyspri);
8876 }
8877 
8878 /*
8879  * Return number of online paths for the specified client.
8880  */
8881 static int
8882 nonline_paths(mdi_vhcache_client_t *cct)
8883 {
8884         mdi_vhcache_pathinfo_t *cpi;
8885         int online_count = 0;
8886 
8887         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8888                 if (cpi->cpi_pip != NULL) {
8889                         MDI_PI_LOCK(cpi->cpi_pip);
8890                         if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8891                                 online_count++;
8892                         MDI_PI_UNLOCK(cpi->cpi_pip);
8893                 }
8894         }
8895 
8896         return (online_count);
8897 }
8898 
8899 /*
8900  * Bus configure all paths for the specified vhci client.
8901  * If at least one path for the client is already online, the remaining paths
8902  * will be configured asynchronously. Otherwise, it synchronously configures
8903  * the paths until at least one path is online and then rest of the paths
8904  * will be configured asynchronously.
8905  */
8906 static void
8907 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8908 {
8909         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8910         mdi_phys_path_t *pp_head, *pp;
8911         mdi_vhcache_client_t *cct;
8912         mdi_vhcache_lookup_token_t tok;
8913 
8914         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8915 
8916         init_vhcache_lookup_token(&tok, NULL);
8917 
8918         if (ct_name == NULL || ct_addr == NULL ||
8919             (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8920             == NULL ||
8921             (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8922                 rw_exit(&vhcache->vhcache_lock);
8923                 return;
8924         }
8925 
8926         /* if at least one path is online, configure the rest asynchronously */
8927         if (nonline_paths(cct) > 0) {
8928                 rw_exit(&vhcache->vhcache_lock);
8929                 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8930                 return;
8931         }
8932 
8933         rw_exit(&vhcache->vhcache_lock);
8934 
8935         for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8936                 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8937                         rw_enter(&vhcache->vhcache_lock, RW_READER);
8938 
8939                         if ((cct = lookup_vhcache_client(vhcache, ct_name,
8940                             ct_addr, &tok)) == NULL) {
8941                                 rw_exit(&vhcache->vhcache_lock);
8942                                 goto out;
8943                         }
8944 
8945                         if (nonline_paths(cct) > 0 &&
8946                             pp->phys_path_next != NULL) {
8947                                 rw_exit(&vhcache->vhcache_lock);
8948                                 config_client_paths_async(vhc, ct_name, ct_addr,
8949                                     pp->phys_path_next, &tok);
8950                                 pp->phys_path_next = NULL;
8951                                 goto out;
8952                         }
8953 
8954                         rw_exit(&vhcache->vhcache_lock);
8955                 }
8956         }
8957 
8958         adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8959 out:
8960         free_phclient_path_list(pp_head);
8961 }
8962 
8963 static void
8964 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8965 {
8966         mutex_enter(&vhc->vhc_lock);
8967         while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8968                 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8969         vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8970         mutex_exit(&vhc->vhc_lock);
8971 }
8972 
8973 static void
8974 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8975 {
8976         mutex_enter(&vhc->vhc_lock);
8977         vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8978         cv_broadcast(&vhc->vhc_cv);
8979         mutex_exit(&vhc->vhc_lock);
8980 }
8981 
8982 typedef struct mdi_phci_driver_info {
8983         char    *phdriver_name; /* name of the phci driver */
8984 
8985         /* set to non zero if the phci driver supports root device */
8986         int     phdriver_root_support;
8987 } mdi_phci_driver_info_t;
8988 
8989 /*
8990  * vhci class and root support capability of a phci driver can be
8991  * specified using ddi-vhci-class and ddi-no-root-support properties in the
8992  * phci driver.conf file. The built-in tables below contain this information
8993  * for those phci drivers whose driver.conf files don't yet contain this info.
8994  *
8995  * All phci drivers expect iscsi have root device support.
8996  */
8997 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
8998         { "fp", 1 },
8999         { "iscsi", 0 },
9000         { "ibsrp", 1 }
9001         };
9002 
9003 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9004 
9005 static void *
9006 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9007 {
9008         void *new_ptr;
9009 
9010         new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9011         if (old_ptr) {
9012                 bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9013                 kmem_free(old_ptr, old_size);
9014         }
9015         return (new_ptr);
9016 }
9017 
9018 static void
9019 add_to_phci_list(char ***driver_list, int **root_support_list,
9020     int *cur_elements, int *max_elements, char *driver_name, int root_support)
9021 {
9022         ASSERT(*cur_elements <= *max_elements);
9023         if (*cur_elements == *max_elements) {
9024                 *max_elements += 10;
9025                 *driver_list = mdi_realloc(*driver_list,
9026                     sizeof (char *) * (*cur_elements),
9027                     sizeof (char *) * (*max_elements));
9028                 *root_support_list = mdi_realloc(*root_support_list,
9029                     sizeof (int) * (*cur_elements),
9030                     sizeof (int) * (*max_elements));
9031         }
9032         (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9033         (*root_support_list)[*cur_elements] = root_support;
9034         (*cur_elements)++;
9035 }
9036 
9037 static void
9038 get_phci_driver_list(char *vhci_class, char ***driver_list,
9039     int **root_support_list, int *cur_elements, int *max_elements)
9040 {
9041         mdi_phci_driver_info_t  *st_driver_list, *p;
9042         int             st_ndrivers, root_support, i, j, driver_conf_count;
9043         major_t         m;
9044         struct devnames *dnp;
9045         ddi_prop_t      *propp;
9046 
9047         *driver_list = NULL;
9048         *root_support_list = NULL;
9049         *cur_elements = 0;
9050         *max_elements = 0;
9051 
9052         /* add the phci drivers derived from the phci driver.conf files */
9053         for (m = 0; m < devcnt; m++) {
9054                 dnp = &devnamesp[m];
9055 
9056                 if (dnp->dn_flags & DN_PHCI_DRIVER) {
9057                         LOCK_DEV_OPS(&dnp->dn_lock);
9058                         if (dnp->dn_global_prop_ptr != NULL &&
9059                             (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9060                             DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9061                             &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9062                             strcmp(propp->prop_val, vhci_class) == 0) {
9063 
9064                                 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9065                                     DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9066                                     &dnp->dn_global_prop_ptr->prop_list)
9067                                     == NULL) ? 1 : 0;
9068 
9069                                 add_to_phci_list(driver_list, root_support_list,
9070                                     cur_elements, max_elements, dnp->dn_name,
9071                                     root_support);
9072 
9073                                 UNLOCK_DEV_OPS(&dnp->dn_lock);
9074                         } else
9075                                 UNLOCK_DEV_OPS(&dnp->dn_lock);
9076                 }
9077         }
9078 
9079         driver_conf_count = *cur_elements;
9080 
9081         /* add the phci drivers specified in the built-in tables */
9082         if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9083                 st_driver_list = scsi_phci_driver_list;
9084                 st_ndrivers = sizeof (scsi_phci_driver_list) /
9085                     sizeof (mdi_phci_driver_info_t);
9086         } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9087                 st_driver_list = ib_phci_driver_list;
9088                 st_ndrivers = sizeof (ib_phci_driver_list) /
9089                     sizeof (mdi_phci_driver_info_t);
9090         } else {
9091                 st_driver_list = NULL;
9092                 st_ndrivers = 0;
9093         }
9094 
9095         for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9096                 /* add this phci driver if not already added before */
9097                 for (j = 0; j < driver_conf_count; j++) {
9098                         if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9099                                 break;
9100                 }
9101                 if (j == driver_conf_count) {
9102                         add_to_phci_list(driver_list, root_support_list,
9103                             cur_elements, max_elements, p->phdriver_name,
9104                             p->phdriver_root_support);
9105                 }
9106         }
9107 }
9108 
9109 /*
9110  * Attach the phci driver instances associated with the specified vhci class.
9111  * If root is mounted attach all phci driver instances.
9112  * If root is not mounted, attach the instances of only those phci
9113  * drivers that have the root support.
9114  */
9115 static void
9116 attach_phci_drivers(char *vhci_class)
9117 {
9118         char    **driver_list, **p;
9119         int     *root_support_list;
9120         int     cur_elements, max_elements, i;
9121         major_t m;
9122 
9123         get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9124             &cur_elements, &max_elements);
9125 
9126         for (i = 0; i < cur_elements; i++) {
9127                 if (modrootloaded || root_support_list[i]) {
9128                         m = ddi_name_to_major(driver_list[i]);
9129                         if (m != DDI_MAJOR_T_NONE &&
9130                             ddi_hold_installed_driver(m))
9131                                 ddi_rele_driver(m);
9132                 }
9133         }
9134 
9135         if (driver_list) {
9136                 for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9137                         kmem_free(*p, strlen(*p) + 1);
9138                 kmem_free(driver_list, sizeof (char *) * max_elements);
9139                 kmem_free(root_support_list, sizeof (int) * max_elements);
9140         }
9141 }
9142 
9143 /*
9144  * Build vhci cache:
9145  *
9146  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9147  * the phci driver instances. During this process the cache gets built.
9148  *
9149  * Cache is built fully if the root is mounted.
9150  * If the root is not mounted, phci drivers that do not have root support
9151  * are not attached. As a result the cache is built partially. The entries
9152  * in the cache reflect only those phci drivers that have root support.
9153  */
9154 static int
9155 build_vhci_cache(mdi_vhci_t *vh)
9156 {
9157         mdi_vhci_config_t *vhc = vh->vh_config;
9158         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9159 
9160         single_threaded_vhconfig_enter(vhc);
9161 
9162         rw_enter(&vhcache->vhcache_lock, RW_READER);
9163         if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9164                 rw_exit(&vhcache->vhcache_lock);
9165                 single_threaded_vhconfig_exit(vhc);
9166                 return (0);
9167         }
9168         rw_exit(&vhcache->vhcache_lock);
9169 
9170         attach_phci_drivers(vh->vh_class);
9171         bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9172             BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9173 
9174         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9175         vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9176         rw_exit(&vhcache->vhcache_lock);
9177 
9178         single_threaded_vhconfig_exit(vhc);
9179         vhcache_dirty(vhc);
9180         return (1);
9181 }
9182 
9183 /*
9184  * Determine if discovery of paths is needed.
9185  */
9186 static int
9187 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9188 {
9189         int rv = 1;
9190 
9191         mutex_enter(&vhc->vhc_lock);
9192         if (i_ddi_io_initialized() == 0) {
9193                 if (vhc->vhc_path_discovery_boot > 0) {
9194                         vhc->vhc_path_discovery_boot--;
9195                         goto out;
9196                 }
9197         } else {
9198                 if (vhc->vhc_path_discovery_postboot > 0) {
9199                         vhc->vhc_path_discovery_postboot--;
9200                         goto out;
9201                 }
9202         }
9203 
9204         /*
9205          * Do full path discovery at most once per mdi_path_discovery_interval.
9206          * This is to avoid a series of full path discoveries when opening
9207          * stale /dev/[r]dsk links.
9208          */
9209         if (mdi_path_discovery_interval != -1 &&
9210             ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9211                 goto out;
9212 
9213         rv = 0;
9214 out:
9215         mutex_exit(&vhc->vhc_lock);
9216         return (rv);
9217 }
9218 
9219 /*
9220  * Discover all paths:
9221  *
9222  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9223  * driver instances. During this process all paths will be discovered.
9224  */
9225 static int
9226 vhcache_discover_paths(mdi_vhci_t *vh)
9227 {
9228         mdi_vhci_config_t *vhc = vh->vh_config;
9229         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9230         int rv = 0;
9231 
9232         single_threaded_vhconfig_enter(vhc);
9233 
9234         if (vhcache_do_discovery(vhc)) {
9235                 attach_phci_drivers(vh->vh_class);
9236                 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9237                     NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9238 
9239                 mutex_enter(&vhc->vhc_lock);
9240                 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9241                     mdi_path_discovery_interval * TICKS_PER_SECOND;
9242                 mutex_exit(&vhc->vhc_lock);
9243                 rv = 1;
9244         }
9245 
9246         single_threaded_vhconfig_exit(vhc);
9247         return (rv);
9248 }
9249 
9250 /*
9251  * Generic vhci bus config implementation:
9252  *
9253  * Parameters
9254  *      vdip    vhci dip
9255  *      flags   bus config flags
9256  *      op      bus config operation
9257  *      The remaining parameters are bus config operation specific
9258  *
9259  * for BUS_CONFIG_ONE
9260  *      arg     pointer to name@addr
9261  *      child   upon successful return from this function, *child will be
9262  *              set to the configured and held devinfo child node of vdip.
9263  *      ct_addr pointer to client address (i.e. GUID)
9264  *
9265  * for BUS_CONFIG_DRIVER
9266  *      arg     major number of the driver
9267  *      child and ct_addr parameters are ignored
9268  *
9269  * for BUS_CONFIG_ALL
9270  *      arg, child, and ct_addr parameters are ignored
9271  *
9272  * Note that for the rest of the bus config operations, this function simply
9273  * calls the framework provided default bus config routine.
9274  */
9275 int
9276 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9277     void *arg, dev_info_t **child, char *ct_addr)
9278 {
9279         mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9280         mdi_vhci_config_t *vhc = vh->vh_config;
9281         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9282         int rv = 0;
9283         int params_valid = 0;
9284         char *cp;
9285 
9286         /*
9287          * To bus config vhcis we relay operation, possibly using another
9288          * thread, to phcis. The phci driver then interacts with MDI to cause
9289          * vhci child nodes to be enumerated under the vhci node.  Adding a
9290          * vhci child requires an ndi_devi_enter of the vhci. Since another
9291          * thread may be adding the child, to avoid deadlock we can't wait
9292          * for the relayed operations to complete if we have already entered
9293          * the vhci node.
9294          */
9295         if (DEVI_BUSY_OWNED(vdip)) {
9296                 MDI_DEBUG(2, (MDI_NOTE, vdip,
9297                     "vhci dip is busy owned %p", (void *)vdip));
9298                 goto default_bus_config;
9299         }
9300 
9301         rw_enter(&vhcache->vhcache_lock, RW_READER);
9302         if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9303                 rw_exit(&vhcache->vhcache_lock);
9304                 rv = build_vhci_cache(vh);
9305                 rw_enter(&vhcache->vhcache_lock, RW_READER);
9306         }
9307 
9308         switch (op) {
9309         case BUS_CONFIG_ONE:
9310                 if (arg != NULL && ct_addr != NULL) {
9311                         /* extract node name */
9312                         cp = (char *)arg;
9313                         while (*cp != '\0' && *cp != '@')
9314                                 cp++;
9315                         if (*cp == '@') {
9316                                 params_valid = 1;
9317                                 *cp = '\0';
9318                                 config_client_paths(vhc, (char *)arg, ct_addr);
9319                                 /* config_client_paths() releases cache_lock */
9320                                 *cp = '@';
9321                                 break;
9322                         }
9323                 }
9324 
9325                 rw_exit(&vhcache->vhcache_lock);
9326                 break;
9327 
9328         case BUS_CONFIG_DRIVER:
9329                 rw_exit(&vhcache->vhcache_lock);
9330                 if (rv == 0)
9331                         st_bus_config_all_phcis(vhc, flags, op,
9332                             (major_t)(uintptr_t)arg);
9333                 break;
9334 
9335         case BUS_CONFIG_ALL:
9336                 rw_exit(&vhcache->vhcache_lock);
9337                 if (rv == 0)
9338                         st_bus_config_all_phcis(vhc, flags, op, -1);
9339                 break;
9340 
9341         default:
9342                 rw_exit(&vhcache->vhcache_lock);
9343                 break;
9344         }
9345 
9346 
9347 default_bus_config:
9348         /*
9349          * All requested child nodes are enumerated under the vhci.
9350          * Now configure them.
9351          */
9352         if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9353             NDI_SUCCESS) {
9354                 return (MDI_SUCCESS);
9355         } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9356                 /* discover all paths and try configuring again */
9357                 if (vhcache_discover_paths(vh) &&
9358                     ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9359                     NDI_SUCCESS)
9360                         return (MDI_SUCCESS);
9361         }
9362 
9363         return (MDI_FAILURE);
9364 }
9365 
9366 /*
9367  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9368  */
9369 static nvlist_t *
9370 read_on_disk_vhci_cache(char *vhci_class)
9371 {
9372         nvlist_t *nvl;
9373         int err;
9374         char *filename;
9375 
9376         filename = vhclass2vhcache_filename(vhci_class);
9377 
9378         if ((err = fread_nvlist(filename, &nvl)) == 0) {
9379                 kmem_free(filename, strlen(filename) + 1);
9380                 return (nvl);
9381         } else if (err == EIO)
9382                 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9383         else if (err == EINVAL)
9384                 cmn_err(CE_WARN,
9385                     "%s: data file corrupted, will recreate", filename);
9386 
9387         kmem_free(filename, strlen(filename) + 1);
9388         return (NULL);
9389 }
9390 
9391 /*
9392  * Read on-disk vhci cache into nvlists for all vhci classes.
9393  * Called during booting by i_ddi_read_devices_files().
9394  */
9395 void
9396 mdi_read_devices_files(void)
9397 {
9398         int i;
9399 
9400         for (i = 0; i < N_VHCI_CLASSES; i++)
9401                 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9402 }
9403 
9404 /*
9405  * Remove all stale entries from vhci cache.
9406  */
9407 static void
9408 clean_vhcache(mdi_vhci_config_t *vhc)
9409 {
9410         mdi_vhci_cache_t        *vhcache = &vhc->vhc_vhcache;
9411         mdi_vhcache_phci_t      *phci, *nxt_phci;
9412         mdi_vhcache_client_t    *client, *nxt_client;
9413         mdi_vhcache_pathinfo_t  *path, *nxt_path;
9414 
9415         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9416 
9417         client = vhcache->vhcache_client_head;
9418         vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9419         for ( ; client != NULL; client = nxt_client) {
9420                 nxt_client = client->cct_next;
9421 
9422                 path = client->cct_cpi_head;
9423                 client->cct_cpi_head = client->cct_cpi_tail = NULL;
9424                 for ( ; path != NULL; path = nxt_path) {
9425                         nxt_path = path->cpi_next;
9426                         if ((path->cpi_cphci->cphci_phci != NULL) &&
9427                             (path->cpi_pip != NULL)) {
9428                                 enqueue_tail_vhcache_pathinfo(client, path);
9429                         } else if (path->cpi_pip != NULL) {
9430                                 /* Not valid to have a path without a phci. */
9431                                 free_vhcache_pathinfo(path);
9432                         }
9433                 }
9434 
9435                 if (client->cct_cpi_head != NULL)
9436                         enqueue_vhcache_client(vhcache, client);
9437                 else {
9438                         (void) mod_hash_destroy(vhcache->vhcache_client_hash,
9439                             (mod_hash_key_t)client->cct_name_addr);
9440                         free_vhcache_client(client);
9441                 }
9442         }
9443 
9444         phci = vhcache->vhcache_phci_head;
9445         vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9446         for ( ; phci != NULL; phci = nxt_phci) {
9447 
9448                 nxt_phci = phci->cphci_next;
9449                 if (phci->cphci_phci != NULL)
9450                         enqueue_vhcache_phci(vhcache, phci);
9451                 else
9452                         free_vhcache_phci(phci);
9453         }
9454 
9455         vhcache->vhcache_clean_time = ddi_get_lbolt64();
9456         rw_exit(&vhcache->vhcache_lock);
9457         vhcache_dirty(vhc);
9458 }
9459 
9460 /*
9461  * Remove all stale entries from vhci cache.
9462  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9463  */
9464 void
9465 mdi_clean_vhcache(void)
9466 {
9467         mdi_vhci_t *vh;
9468 
9469         mutex_enter(&mdi_mutex);
9470         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9471                 vh->vh_refcnt++;
9472                 mutex_exit(&mdi_mutex);
9473                 clean_vhcache(vh->vh_config);
9474                 mutex_enter(&mdi_mutex);
9475                 vh->vh_refcnt--;
9476         }
9477         mutex_exit(&mdi_mutex);
9478 }
9479 
9480 /*
9481  * mdi_vhci_walk_clients():
9482  *              Walker routine to traverse client dev_info nodes
9483  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9484  * below the client, including nexus devices, which we dont want.
9485  * So we just traverse the immediate siblings, starting from 1st client.
9486  */
9487 void
9488 mdi_vhci_walk_clients(dev_info_t *vdip,
9489     int (*f)(dev_info_t *, void *), void *arg)
9490 {
9491         mdi_vhci_t      *vh = i_devi_get_vhci(vdip);
9492         dev_info_t      *cdip;
9493         mdi_client_t    *ct;
9494 
9495         MDI_VHCI_CLIENT_LOCK(vh);
9496         cdip = ddi_get_child(vdip);
9497         while (cdip) {
9498                 ct = i_devi_get_client(cdip);
9499                 MDI_CLIENT_LOCK(ct);
9500 
9501                 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9502                         cdip = ddi_get_next_sibling(cdip);
9503                 else
9504                         cdip = NULL;
9505 
9506                 MDI_CLIENT_UNLOCK(ct);
9507         }
9508         MDI_VHCI_CLIENT_UNLOCK(vh);
9509 }
9510 
9511 /*
9512  * mdi_vhci_walk_phcis():
9513  *              Walker routine to traverse phci dev_info nodes
9514  */
9515 void
9516 mdi_vhci_walk_phcis(dev_info_t *vdip,
9517     int (*f)(dev_info_t *, void *), void *arg)
9518 {
9519         mdi_vhci_t      *vh = i_devi_get_vhci(vdip);
9520         mdi_phci_t      *ph, *next;
9521 
9522         MDI_VHCI_PHCI_LOCK(vh);
9523         ph = vh->vh_phci_head;
9524         while (ph) {
9525                 MDI_PHCI_LOCK(ph);
9526 
9527                 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9528                         next = ph->ph_next;
9529                 else
9530                         next = NULL;
9531 
9532                 MDI_PHCI_UNLOCK(ph);
9533                 ph = next;
9534         }
9535         MDI_VHCI_PHCI_UNLOCK(vh);
9536 }
9537 
9538 
9539 /*
9540  * mdi_walk_vhcis():
9541  *              Walker routine to traverse vhci dev_info nodes
9542  */
9543 void
9544 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9545 {
9546         mdi_vhci_t      *vh = NULL;
9547 
9548         mutex_enter(&mdi_mutex);
9549         /*
9550          * Scan for already registered vhci
9551          */
9552         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9553                 vh->vh_refcnt++;
9554                 mutex_exit(&mdi_mutex);
9555                 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9556                         mutex_enter(&mdi_mutex);
9557                         vh->vh_refcnt--;
9558                         break;
9559                 } else {
9560                         mutex_enter(&mdi_mutex);
9561                         vh->vh_refcnt--;
9562                 }
9563         }
9564 
9565         mutex_exit(&mdi_mutex);
9566 }
9567 
9568 /*
9569  * i_mdi_log_sysevent():
9570  *              Logs events for pickup by syseventd
9571  */
9572 static void
9573 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9574 {
9575         char            *path_name;
9576         nvlist_t        *attr_list;
9577 
9578         if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9579             KM_SLEEP) != DDI_SUCCESS) {
9580                 goto alloc_failed;
9581         }
9582 
9583         path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9584         (void) ddi_pathname(dip, path_name);
9585 
9586         if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9587             ddi_driver_name(dip)) != DDI_SUCCESS) {
9588                 goto error;
9589         }
9590 
9591         if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9592             (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9593                 goto error;
9594         }
9595 
9596         if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9597             (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9598                 goto error;
9599         }
9600 
9601         if (nvlist_add_string(attr_list, DDI_PATHNAME,
9602             path_name) != DDI_SUCCESS) {
9603                 goto error;
9604         }
9605 
9606         if (nvlist_add_string(attr_list, DDI_CLASS,
9607             ph_vh_class) != DDI_SUCCESS) {
9608                 goto error;
9609         }
9610 
9611         (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9612             attr_list, NULL, DDI_SLEEP);
9613 
9614 error:
9615         kmem_free(path_name, MAXPATHLEN);
9616         nvlist_free(attr_list);
9617         return;
9618 
9619 alloc_failed:
9620         MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9621 }
9622 
9623 char **
9624 mdi_get_phci_driver_list(char *vhci_class, int  *ndrivers)
9625 {
9626         char    **driver_list, **ret_driver_list = NULL;
9627         int     *root_support_list;
9628         int     cur_elements, max_elements;
9629 
9630         get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9631             &cur_elements, &max_elements);
9632 
9633 
9634         if (driver_list) {
9635                 kmem_free(root_support_list, sizeof (int) * max_elements);
9636                 ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9637                     * max_elements, sizeof (char *) * cur_elements);
9638         }
9639         *ndrivers = cur_elements;
9640 
9641         return (ret_driver_list);
9642 
9643 }
9644 
9645 void
9646 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9647 {
9648         char    **p;
9649         int     i;
9650 
9651         if (driver_list) {
9652                 for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9653                         kmem_free(*p, strlen(*p) + 1);
9654                 kmem_free(driver_list, sizeof (char *) * ndrivers);
9655         }
9656 }
9657 
9658 /*
9659  * mdi_is_dev_supported():
9660  *              function called by pHCI bus config operation to determine if a
9661  *              device should be represented as a child of the vHCI or the
9662  *              pHCI.  This decision is made by the vHCI, using cinfo idenity
9663  *              information passed by the pHCI - specifics of the cinfo
9664  *              representation are by agreement between the pHCI and vHCI.
9665  * Return Values:
9666  *              MDI_SUCCESS
9667  *              MDI_FAILURE
9668  */
9669 int
9670 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9671 {
9672         mdi_vhci_t      *vh;
9673 
9674         ASSERT(class && pdip);
9675 
9676         /*
9677          * For dev_supported, mdi_phci_register() must have established pdip as
9678          * a pHCI.
9679          *
9680          * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9681          * MDI_PHCI(pdip) will return false if mpxio is disabled.
9682          */
9683         if (!MDI_PHCI(pdip))
9684                 return (MDI_FAILURE);
9685 
9686         /* Return MDI_FAILURE if vHCI does not support asking the question. */
9687         vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9688         if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9689                 return (MDI_FAILURE);
9690         }
9691 
9692         /* Return vHCI answer */
9693         return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9694 }
9695 
9696 int
9697 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9698 {
9699         uint_t devstate = 0;
9700         dev_info_t *cdip;
9701 
9702         if ((pip == NULL) || (dcp == NULL))
9703                 return (MDI_FAILURE);
9704 
9705         cdip = mdi_pi_get_client(pip);
9706 
9707         switch (mdi_pi_get_state(pip)) {
9708         case MDI_PATHINFO_STATE_INIT:
9709                 devstate = DEVICE_DOWN;
9710                 break;
9711         case MDI_PATHINFO_STATE_ONLINE:
9712                 devstate = DEVICE_ONLINE;
9713                 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9714                         devstate |= DEVICE_BUSY;
9715                 break;
9716         case MDI_PATHINFO_STATE_STANDBY:
9717                 devstate = DEVICE_ONLINE;
9718                 break;
9719         case MDI_PATHINFO_STATE_FAULT:
9720                 devstate = DEVICE_DOWN;
9721                 break;
9722         case MDI_PATHINFO_STATE_OFFLINE:
9723                 devstate = DEVICE_OFFLINE;
9724                 break;
9725         default:
9726                 ASSERT(MDI_PI(pip)->pi_state);
9727         }
9728 
9729         if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9730                 return (MDI_FAILURE);
9731 
9732         return (MDI_SUCCESS);
9733 }