1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved.
  24  */
  25 
  26 /*
  27  * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
  28  * more detailed discussion of the overall mpxio architecture.
  29  *
  30  * Default locking order:
  31  *
  32  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
  33  * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
  34  * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
  35  * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
  36  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
  37  * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
  38  * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
  39  */
  40 
  41 #include <sys/note.h>
  42 #include <sys/types.h>
  43 #include <sys/varargs.h>
  44 #include <sys/param.h>
  45 #include <sys/errno.h>
  46 #include <sys/uio.h>
  47 #include <sys/buf.h>
  48 #include <sys/modctl.h>
  49 #include <sys/open.h>
  50 #include <sys/kmem.h>
  51 #include <sys/poll.h>
  52 #include <sys/conf.h>
  53 #include <sys/bootconf.h>
  54 #include <sys/cmn_err.h>
  55 #include <sys/stat.h>
  56 #include <sys/ddi.h>
  57 #include <sys/sunddi.h>
  58 #include <sys/ddipropdefs.h>
  59 #include <sys/sunndi.h>
  60 #include <sys/ndi_impldefs.h>
  61 #include <sys/promif.h>
  62 #include <sys/sunmdi.h>
  63 #include <sys/mdi_impldefs.h>
  64 #include <sys/taskq.h>
  65 #include <sys/epm.h>
  66 #include <sys/sunpm.h>
  67 #include <sys/modhash.h>
  68 #include <sys/disp.h>
  69 #include <sys/autoconf.h>
  70 #include <sys/sysmacros.h>
  71 
  72 #ifdef  DEBUG
  73 #include <sys/debug.h>
  74 int     mdi_debug = 1;
  75 int     mdi_debug_logonly = 0;
  76 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel))      i_mdi_log pargs
  77 #define MDI_WARN        CE_WARN, __func__
  78 #define MDI_NOTE        CE_NOTE, __func__
  79 #define MDI_CONT        CE_CONT, __func__
  80 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
  81 #else   /* !DEBUG */
  82 #define MDI_DEBUG(dbglevel, pargs)
  83 #endif  /* DEBUG */
  84 int     mdi_debug_consoleonly = 0;
  85 int     mdi_delay = 3;
  86 
  87 extern pri_t    minclsyspri;
  88 extern int      modrootloaded;
  89 
  90 /*
  91  * Global mutex:
  92  * Protects vHCI list and structure members.
  93  */
  94 kmutex_t        mdi_mutex;
  95 
  96 /*
  97  * Registered vHCI class driver lists
  98  */
  99 int             mdi_vhci_count;
 100 mdi_vhci_t      *mdi_vhci_head;
 101 mdi_vhci_t      *mdi_vhci_tail;
 102 
 103 /*
 104  * Client Hash Table size
 105  */
 106 static int      mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
 107 
 108 /*
 109  * taskq interface definitions
 110  */
 111 #define MDI_TASKQ_N_THREADS     8
 112 #define MDI_TASKQ_PRI           minclsyspri
 113 #define MDI_TASKQ_MINALLOC      (4*mdi_taskq_n_threads)
 114 #define MDI_TASKQ_MAXALLOC      (500*mdi_taskq_n_threads)
 115 
 116 taskq_t                         *mdi_taskq;
 117 static uint_t                   mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
 118 
 119 #define TICKS_PER_SECOND        (drv_usectohz(1000000))
 120 
 121 /*
 122  * The data should be "quiet" for this interval (in seconds) before the
 123  * vhci cached data is flushed to the disk.
 124  */
 125 static int mdi_vhcache_flush_delay = 10;
 126 
 127 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
 128 static int mdi_vhcache_flush_daemon_idle_time = 60;
 129 
 130 /*
 131  * MDI falls back to discovery of all paths when a bus_config_one fails.
 132  * The following parameters can be used to tune this operation.
 133  *
 134  * mdi_path_discovery_boot
 135  *      Number of times path discovery will be attempted during early boot.
 136  *      Probably there is no reason to ever set this value to greater than one.
 137  *
 138  * mdi_path_discovery_postboot
 139  *      Number of times path discovery will be attempted after early boot.
 140  *      Set it to a minimum of two to allow for discovery of iscsi paths which
 141  *      may happen very late during booting.
 142  *
 143  * mdi_path_discovery_interval
 144  *      Minimum number of seconds MDI will wait between successive discovery
 145  *      of all paths. Set it to -1 to disable discovery of all paths.
 146  */
 147 static int mdi_path_discovery_boot = 1;
 148 static int mdi_path_discovery_postboot = 2;
 149 static int mdi_path_discovery_interval = 10;
 150 
 151 /*
 152  * number of seconds the asynchronous configuration thread will sleep idle
 153  * before exiting.
 154  */
 155 static int mdi_async_config_idle_time = 600;
 156 
 157 static int mdi_bus_config_cache_hash_size = 256;
 158 
 159 /* turns off multithreaded configuration for certain operations */
 160 static int mdi_mtc_off = 0;
 161 
 162 /*
 163  * The "path" to a pathinfo node is identical to the /devices path to a
 164  * devinfo node had the device been enumerated under a pHCI instead of
 165  * a vHCI.  This pathinfo "path" is associated with a 'path_instance'.
 166  * This association persists across create/delete of the pathinfo nodes,
 167  * but not across reboot.
 168  */
 169 static uint_t           mdi_pathmap_instance = 1;       /* 0 -> any path */
 170 static int              mdi_pathmap_hash_size = 256;
 171 static kmutex_t         mdi_pathmap_mutex;
 172 static mod_hash_t       *mdi_pathmap_bypath;            /* "path"->instance */
 173 static mod_hash_t       *mdi_pathmap_byinstance;        /* instance->"path" */
 174 static mod_hash_t       *mdi_pathmap_sbyinstance;       /* inst->shortpath */
 175 
 176 /*
 177  * MDI component property name/value string definitions
 178  */
 179 const char              *mdi_component_prop = "mpxio-component";
 180 const char              *mdi_component_prop_vhci = "vhci";
 181 const char              *mdi_component_prop_phci = "phci";
 182 const char              *mdi_component_prop_client = "client";
 183 
 184 /*
 185  * MDI client global unique identifier property name
 186  */
 187 const char              *mdi_client_guid_prop = "client-guid";
 188 
 189 /*
 190  * MDI client load balancing property name/value string definitions
 191  */
 192 const char              *mdi_load_balance = "load-balance";
 193 const char              *mdi_load_balance_none = "none";
 194 const char              *mdi_load_balance_rr = "round-robin";
 195 const char              *mdi_load_balance_lba = "logical-block";
 196 
 197 /*
 198  * Obsolete vHCI class definition; to be removed after Leadville update
 199  */
 200 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
 201 
 202 static char vhci_greeting[] =
 203         "\tThere already exists one vHCI driver for class %s\n"
 204         "\tOnly one vHCI driver for each class is allowed\n";
 205 
 206 /*
 207  * Static function prototypes
 208  */
 209 static int              i_mdi_phci_offline(dev_info_t *, uint_t);
 210 static int              i_mdi_client_offline(dev_info_t *, uint_t);
 211 static int              i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
 212 static void             i_mdi_phci_post_detach(dev_info_t *,
 213                             ddi_detach_cmd_t, int);
 214 static int              i_mdi_client_pre_detach(dev_info_t *,
 215                             ddi_detach_cmd_t);
 216 static void             i_mdi_client_post_detach(dev_info_t *,
 217                             ddi_detach_cmd_t, int);
 218 static void             i_mdi_pm_hold_pip(mdi_pathinfo_t *);
 219 static void             i_mdi_pm_rele_pip(mdi_pathinfo_t *);
 220 static int              i_mdi_lba_lb(mdi_client_t *ct,
 221                             mdi_pathinfo_t **ret_pip, struct buf *buf);
 222 static void             i_mdi_pm_hold_client(mdi_client_t *, int);
 223 static void             i_mdi_pm_rele_client(mdi_client_t *, int);
 224 static void             i_mdi_pm_reset_client(mdi_client_t *);
 225 static int              i_mdi_power_all_phci(mdi_client_t *);
 226 static void             i_mdi_log_sysevent(dev_info_t *, char *, char *);
 227 
 228 
 229 /*
 230  * Internal mdi_pathinfo node functions
 231  */
 232 static void             i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
 233 
 234 static mdi_vhci_t       *i_mdi_vhci_class2vhci(char *);
 235 static mdi_vhci_t       *i_devi_get_vhci(dev_info_t *);
 236 static mdi_phci_t       *i_devi_get_phci(dev_info_t *);
 237 static void             i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
 238 static void             i_mdi_phci_unlock(mdi_phci_t *);
 239 static mdi_pathinfo_t   *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
 240 static void             i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
 241 static void             i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
 242 static void             i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
 243                             mdi_client_t *);
 244 static void             i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
 245 static void             i_mdi_client_remove_path(mdi_client_t *,
 246                             mdi_pathinfo_t *);
 247 
 248 static int              i_mdi_pi_state_change(mdi_pathinfo_t *,
 249                             mdi_pathinfo_state_t, int);
 250 static int              i_mdi_pi_offline(mdi_pathinfo_t *, int);
 251 static dev_info_t       *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
 252                             char **, int);
 253 static dev_info_t       *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
 254 static int              i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
 255 static int              i_mdi_is_child_present(dev_info_t *, dev_info_t *);
 256 static mdi_client_t     *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
 257 static void             i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
 258 static void             i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
 259 static mdi_client_t     *i_mdi_client_find(mdi_vhci_t *, char *, char *);
 260 static void             i_mdi_client_update_state(mdi_client_t *);
 261 static int              i_mdi_client_compute_state(mdi_client_t *,
 262                             mdi_phci_t *);
 263 static void             i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
 264 static void             i_mdi_client_unlock(mdi_client_t *);
 265 static int              i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
 266 static mdi_client_t     *i_devi_get_client(dev_info_t *);
 267 /*
 268  * NOTE: this will be removed once the NWS files are changed to use the new
 269  * mdi_{enable,disable}_path interfaces
 270  */
 271 static int              i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
 272                                 int, int);
 273 static mdi_pathinfo_t   *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
 274                                 mdi_vhci_t *vh, int flags, int op);
 275 /*
 276  * Failover related function prototypes
 277  */
 278 static int              i_mdi_failover(void *);
 279 
 280 /*
 281  * misc internal functions
 282  */
 283 static int              i_mdi_get_hash_key(char *);
 284 static int              i_map_nvlist_error_to_mdi(int);
 285 static void             i_mdi_report_path_state(mdi_client_t *,
 286                             mdi_pathinfo_t *);
 287 
 288 static void             setup_vhci_cache(mdi_vhci_t *);
 289 static int              destroy_vhci_cache(mdi_vhci_t *);
 290 static int              stop_vhcache_async_threads(mdi_vhci_config_t *);
 291 static boolean_t        stop_vhcache_flush_thread(void *, int);
 292 static void             free_string_array(char **, int);
 293 static void             free_vhcache_phci(mdi_vhcache_phci_t *);
 294 static void             free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
 295 static void             free_vhcache_client(mdi_vhcache_client_t *);
 296 static int              mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
 297 static nvlist_t         *vhcache_to_mainnvl(mdi_vhci_cache_t *);
 298 static void             vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
 299 static void             vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
 300 static void             vhcache_pi_add(mdi_vhci_config_t *,
 301                             struct mdi_pathinfo *);
 302 static void             vhcache_pi_remove(mdi_vhci_config_t *,
 303                             struct mdi_pathinfo *);
 304 static void             free_phclient_path_list(mdi_phys_path_t *);
 305 static void             sort_vhcache_paths(mdi_vhcache_client_t *);
 306 static int              flush_vhcache(mdi_vhci_config_t *, int);
 307 static void             vhcache_dirty(mdi_vhci_config_t *);
 308 static void             free_async_client_config(mdi_async_client_config_t *);
 309 static void             single_threaded_vhconfig_enter(mdi_vhci_config_t *);
 310 static void             single_threaded_vhconfig_exit(mdi_vhci_config_t *);
 311 static nvlist_t         *read_on_disk_vhci_cache(char *);
 312 extern int              fread_nvlist(char *, nvlist_t **);
 313 extern int              fwrite_nvlist(char *, nvlist_t *);
 314 
 315 /* called once when first vhci registers with mdi */
 316 static void
 317 i_mdi_init()
 318 {
 319         static int initialized = 0;
 320 
 321         if (initialized)
 322                 return;
 323         initialized = 1;
 324 
 325         mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
 326 
 327         /* Create our taskq resources */
 328         mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
 329             MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
 330             TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
 331         ASSERT(mdi_taskq != NULL);      /* taskq_create never fails */
 332 
 333         /* Allocate ['path_instance' <-> "path"] maps */
 334         mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
 335         mdi_pathmap_bypath = mod_hash_create_strhash(
 336             "mdi_pathmap_bypath", mdi_pathmap_hash_size,
 337             mod_hash_null_valdtor);
 338         mdi_pathmap_byinstance = mod_hash_create_idhash(
 339             "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
 340             mod_hash_null_valdtor);
 341         mdi_pathmap_sbyinstance = mod_hash_create_idhash(
 342             "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
 343             mod_hash_null_valdtor);
 344 }
 345 
 346 /*
 347  * mdi_get_component_type():
 348  *              Return mpxio component type
 349  * Return Values:
 350  *              MDI_COMPONENT_NONE
 351  *              MDI_COMPONENT_VHCI
 352  *              MDI_COMPONENT_PHCI
 353  *              MDI_COMPONENT_CLIENT
 354  * XXX This doesn't work under multi-level MPxIO and should be
 355  *      removed when clients migrate mdi_component_is_*() interfaces.
 356  */
 357 int
 358 mdi_get_component_type(dev_info_t *dip)
 359 {
 360         return (DEVI(dip)->devi_mdi_component);
 361 }
 362 
 363 /*
 364  * mdi_vhci_register():
 365  *              Register a vHCI module with the mpxio framework
 366  *              mdi_vhci_register() is called by vHCI drivers to register the
 367  *              'class_driver' vHCI driver and its MDI entrypoints with the
 368  *              mpxio framework.  The vHCI driver must call this interface as
 369  *              part of its attach(9e) handler.
 370  *              Competing threads may try to attach mdi_vhci_register() as
 371  *              the vHCI drivers are loaded and attached as a result of pHCI
 372  *              driver instance registration (mdi_phci_register()) with the
 373  *              framework.
 374  * Return Values:
 375  *              MDI_SUCCESS
 376  *              MDI_FAILURE
 377  */
 378 /*ARGSUSED*/
 379 int
 380 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
 381     int flags)
 382 {
 383         mdi_vhci_t              *vh = NULL;
 384 
 385         /* Registrant can't be older */
 386         ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
 387 
 388 #ifdef DEBUG
 389         /*
 390          * IB nexus driver is loaded only when IB hardware is present.
 391          * In order to be able to do this there is a need to drive the loading
 392          * and attaching of the IB nexus driver (especially when an IB hardware
 393          * is dynamically plugged in) when an IB HCA driver (PHCI)
 394          * is being attached. Unfortunately this gets into the limitations
 395          * of devfs as there seems to be no clean way to drive configuration
 396          * of a subtree from another subtree of a devfs. Hence, do not ASSERT
 397          * for IB.
 398          */
 399         if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
 400                 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
 401 #endif
 402 
 403         i_mdi_init();
 404 
 405         mutex_enter(&mdi_mutex);
 406         /*
 407          * Scan for already registered vhci
 408          */
 409         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
 410                 if (strcmp(vh->vh_class, class) == 0) {
 411                         /*
 412                          * vHCI has already been created.  Check for valid
 413                          * vHCI ops registration.  We only support one vHCI
 414                          * module per class
 415                          */
 416                         if (vh->vh_ops != NULL) {
 417                                 mutex_exit(&mdi_mutex);
 418                                 cmn_err(CE_NOTE, vhci_greeting, class);
 419                                 return (MDI_FAILURE);
 420                         }
 421                         break;
 422                 }
 423         }
 424 
 425         /*
 426          * if not yet created, create the vHCI component
 427          */
 428         if (vh == NULL) {
 429                 struct client_hash      *hash = NULL;
 430                 char                    *load_balance;
 431 
 432                 /*
 433                  * Allocate and initialize the mdi extensions
 434                  */
 435                 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
 436                 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
 437                     KM_SLEEP);
 438                 vh->vh_client_table = hash;
 439                 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
 440                 (void) strcpy(vh->vh_class, class);
 441                 vh->vh_lb = LOAD_BALANCE_RR;
 442                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
 443                     0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
 444                         if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
 445                                 vh->vh_lb = LOAD_BALANCE_NONE;
 446                         } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
 447                                     == 0) {
 448                                 vh->vh_lb = LOAD_BALANCE_LBA;
 449                         }
 450                         ddi_prop_free(load_balance);
 451                 }
 452 
 453                 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
 454                 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
 455 
 456                 /*
 457                  * Store the vHCI ops vectors
 458                  */
 459                 vh->vh_dip = vdip;
 460                 vh->vh_ops = vops;
 461 
 462                 setup_vhci_cache(vh);
 463 
 464                 if (mdi_vhci_head == NULL) {
 465                         mdi_vhci_head = vh;
 466                 }
 467                 if (mdi_vhci_tail) {
 468                         mdi_vhci_tail->vh_next = vh;
 469                 }
 470                 mdi_vhci_tail = vh;
 471                 mdi_vhci_count++;
 472         }
 473 
 474         /*
 475          * Claim the devfs node as a vhci component
 476          */
 477         DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
 478 
 479         /*
 480          * Initialize our back reference from dev_info node
 481          */
 482         DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
 483         mutex_exit(&mdi_mutex);
 484         return (MDI_SUCCESS);
 485 }
 486 
 487 /*
 488  * mdi_vhci_unregister():
 489  *              Unregister a vHCI module from mpxio framework
 490  *              mdi_vhci_unregister() is called from the detach(9E) entrypoint
 491  *              of a vhci to unregister it from the framework.
 492  * Return Values:
 493  *              MDI_SUCCESS
 494  *              MDI_FAILURE
 495  */
 496 /*ARGSUSED*/
 497 int
 498 mdi_vhci_unregister(dev_info_t *vdip, int flags)
 499 {
 500         mdi_vhci_t      *found, *vh, *prev = NULL;
 501 
 502         ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
 503 
 504         /*
 505          * Check for invalid VHCI
 506          */
 507         if ((vh = i_devi_get_vhci(vdip)) == NULL)
 508                 return (MDI_FAILURE);
 509 
 510         /*
 511          * Scan the list of registered vHCIs for a match
 512          */
 513         mutex_enter(&mdi_mutex);
 514         for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
 515                 if (found == vh)
 516                         break;
 517                 prev = found;
 518         }
 519 
 520         if (found == NULL) {
 521                 mutex_exit(&mdi_mutex);
 522                 return (MDI_FAILURE);
 523         }
 524 
 525         /*
 526          * Check the vHCI, pHCI and client count. All the pHCIs and clients
 527          * should have been unregistered, before a vHCI can be
 528          * unregistered.
 529          */
 530         MDI_VHCI_PHCI_LOCK(vh);
 531         if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
 532                 MDI_VHCI_PHCI_UNLOCK(vh);
 533                 mutex_exit(&mdi_mutex);
 534                 return (MDI_FAILURE);
 535         }
 536         MDI_VHCI_PHCI_UNLOCK(vh);
 537 
 538         if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
 539                 mutex_exit(&mdi_mutex);
 540                 return (MDI_FAILURE);
 541         }
 542 
 543         /*
 544          * Remove the vHCI from the global list
 545          */
 546         if (vh == mdi_vhci_head) {
 547                 mdi_vhci_head = vh->vh_next;
 548         } else {
 549                 prev->vh_next = vh->vh_next;
 550         }
 551         if (vh == mdi_vhci_tail) {
 552                 mdi_vhci_tail = prev;
 553         }
 554         mdi_vhci_count--;
 555         mutex_exit(&mdi_mutex);
 556 
 557         vh->vh_ops = NULL;
 558         DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
 559         DEVI(vdip)->devi_mdi_xhci = NULL;
 560         kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
 561         kmem_free(vh->vh_client_table,
 562             mdi_client_table_size * sizeof (struct client_hash));
 563         mutex_destroy(&vh->vh_phci_mutex);
 564         mutex_destroy(&vh->vh_client_mutex);
 565 
 566         kmem_free(vh, sizeof (mdi_vhci_t));
 567         return (MDI_SUCCESS);
 568 }
 569 
 570 /*
 571  * i_mdi_vhci_class2vhci():
 572  *              Look for a matching vHCI module given a vHCI class name
 573  * Return Values:
 574  *              Handle to a vHCI component
 575  *              NULL
 576  */
 577 static mdi_vhci_t *
 578 i_mdi_vhci_class2vhci(char *class)
 579 {
 580         mdi_vhci_t      *vh = NULL;
 581 
 582         ASSERT(!MUTEX_HELD(&mdi_mutex));
 583 
 584         mutex_enter(&mdi_mutex);
 585         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
 586                 if (strcmp(vh->vh_class, class) == 0) {
 587                         break;
 588                 }
 589         }
 590         mutex_exit(&mdi_mutex);
 591         return (vh);
 592 }
 593 
 594 /*
 595  * i_devi_get_vhci():
 596  *              Utility function to get the handle to a vHCI component
 597  * Return Values:
 598  *              Handle to a vHCI component
 599  *              NULL
 600  */
 601 mdi_vhci_t *
 602 i_devi_get_vhci(dev_info_t *vdip)
 603 {
 604         mdi_vhci_t      *vh = NULL;
 605         if (MDI_VHCI(vdip)) {
 606                 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
 607         }
 608         return (vh);
 609 }
 610 
 611 /*
 612  * mdi_phci_register():
 613  *              Register a pHCI module with mpxio framework
 614  *              mdi_phci_register() is called by pHCI drivers to register with
 615  *              the mpxio framework and a specific 'class_driver' vHCI.  The
 616  *              pHCI driver must call this interface as part of its attach(9e)
 617  *              handler.
 618  * Return Values:
 619  *              MDI_SUCCESS
 620  *              MDI_FAILURE
 621  */
 622 /*ARGSUSED*/
 623 int
 624 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
 625 {
 626         mdi_phci_t              *ph;
 627         mdi_vhci_t              *vh;
 628         char                    *data;
 629 
 630         /*
 631          * Some subsystems, like fcp, perform pHCI registration from a
 632          * different thread than the one doing the pHCI attach(9E) - the
 633          * driver attach code is waiting for this other thread to complete.
 634          * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
 635          * (indicating that some thread has done an ndi_devi_enter of parent)
 636          * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
 637          */
 638         ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
 639 
 640         /*
 641          * Check for mpxio-disable property. Enable mpxio if the property is
 642          * missing or not set to "yes".
 643          * If the property is set to "yes" then emit a brief message.
 644          */
 645         if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
 646             &data) == DDI_SUCCESS)) {
 647                 if (strcmp(data, "yes") == 0) {
 648                         MDI_DEBUG(1, (MDI_CONT, pdip,
 649                             "?multipath capabilities disabled via %s.conf.",
 650                             ddi_driver_name(pdip)));
 651                         ddi_prop_free(data);
 652                         return (MDI_FAILURE);
 653                 }
 654                 ddi_prop_free(data);
 655         }
 656 
 657         /*
 658          * Search for a matching vHCI
 659          */
 660         vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
 661         if (vh == NULL) {
 662                 return (MDI_FAILURE);
 663         }
 664 
 665         ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
 666         mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
 667         ph->ph_dip = pdip;
 668         ph->ph_vhci = vh;
 669         ph->ph_next = NULL;
 670         ph->ph_unstable = 0;
 671         ph->ph_vprivate = 0;
 672         cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
 673 
 674         MDI_PHCI_LOCK(ph);
 675         MDI_PHCI_SET_POWER_UP(ph);
 676         MDI_PHCI_UNLOCK(ph);
 677         DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
 678         DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
 679 
 680         vhcache_phci_add(vh->vh_config, ph);
 681 
 682         MDI_VHCI_PHCI_LOCK(vh);
 683         if (vh->vh_phci_head == NULL) {
 684                 vh->vh_phci_head = ph;
 685         }
 686         if (vh->vh_phci_tail) {
 687                 vh->vh_phci_tail->ph_next = ph;
 688         }
 689         vh->vh_phci_tail = ph;
 690         vh->vh_phci_count++;
 691         MDI_VHCI_PHCI_UNLOCK(vh);
 692 
 693         i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
 694         return (MDI_SUCCESS);
 695 }
 696 
 697 /*
 698  * mdi_phci_unregister():
 699  *              Unregister a pHCI module from mpxio framework
 700  *              mdi_phci_unregister() is called by the pHCI drivers from their
 701  *              detach(9E) handler to unregister their instances from the
 702  *              framework.
 703  * Return Values:
 704  *              MDI_SUCCESS
 705  *              MDI_FAILURE
 706  */
 707 /*ARGSUSED*/
 708 int
 709 mdi_phci_unregister(dev_info_t *pdip, int flags)
 710 {
 711         mdi_vhci_t              *vh;
 712         mdi_phci_t              *ph;
 713         mdi_phci_t              *tmp;
 714         mdi_phci_t              *prev = NULL;
 715         mdi_pathinfo_t          *pip;
 716 
 717         ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
 718 
 719         ph = i_devi_get_phci(pdip);
 720         if (ph == NULL) {
 721                 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
 722                 return (MDI_FAILURE);
 723         }
 724 
 725         vh = ph->ph_vhci;
 726         ASSERT(vh != NULL);
 727         if (vh == NULL) {
 728                 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
 729                 return (MDI_FAILURE);
 730         }
 731 
 732         MDI_VHCI_PHCI_LOCK(vh);
 733         tmp = vh->vh_phci_head;
 734         while (tmp) {
 735                 if (tmp == ph) {
 736                         break;
 737                 }
 738                 prev = tmp;
 739                 tmp = tmp->ph_next;
 740         }
 741 
 742         if (ph == vh->vh_phci_head) {
 743                 vh->vh_phci_head = ph->ph_next;
 744         } else {
 745                 prev->ph_next = ph->ph_next;
 746         }
 747 
 748         if (ph == vh->vh_phci_tail) {
 749                 vh->vh_phci_tail = prev;
 750         }
 751 
 752         vh->vh_phci_count--;
 753         MDI_VHCI_PHCI_UNLOCK(vh);
 754 
 755         /* Walk remaining pathinfo nodes and disassociate them from pHCI */
 756         MDI_PHCI_LOCK(ph);
 757         for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
 758             pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
 759                 MDI_PI(pip)->pi_phci = NULL;
 760         MDI_PHCI_UNLOCK(ph);
 761 
 762         i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
 763             ESC_DDI_INITIATOR_UNREGISTER);
 764         vhcache_phci_remove(vh->vh_config, ph);
 765         cv_destroy(&ph->ph_unstable_cv);
 766         mutex_destroy(&ph->ph_mutex);
 767         kmem_free(ph, sizeof (mdi_phci_t));
 768         DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
 769         DEVI(pdip)->devi_mdi_xhci = NULL;
 770         return (MDI_SUCCESS);
 771 }
 772 
 773 /*
 774  * i_devi_get_phci():
 775  *              Utility function to return the phci extensions.
 776  */
 777 static mdi_phci_t *
 778 i_devi_get_phci(dev_info_t *pdip)
 779 {
 780         mdi_phci_t      *ph = NULL;
 781 
 782         if (MDI_PHCI(pdip)) {
 783                 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
 784         }
 785         return (ph);
 786 }
 787 
 788 /*
 789  * Single thread mdi entry into devinfo node for modifying its children.
 790  * If necessary we perform an ndi_devi_enter of the vHCI before doing
 791  * an ndi_devi_enter of 'dip'.  We maintain circular in two parts: one
 792  * for the vHCI and one for the pHCI.
 793  */
 794 void
 795 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
 796 {
 797         dev_info_t      *vdip;
 798         int             vcircular, pcircular;
 799 
 800         /* Verify calling context */
 801         ASSERT(MDI_PHCI(phci_dip));
 802         vdip = mdi_devi_get_vdip(phci_dip);
 803         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 804 
 805         /*
 806          * If pHCI is detaching then the framework has already entered the
 807          * vHCI on a threads that went down the code path leading to
 808          * detach_node().  This framework enter of the vHCI during pHCI
 809          * detach is done to avoid deadlock with vHCI power management
 810          * operations which enter the vHCI and the enter down the path
 811          * to the pHCI. If pHCI is detaching then we piggyback this calls
 812          * enter of the vHCI on frameworks vHCI enter that has already
 813          * occurred - this is OK because we know that the framework thread
 814          * doing detach is waiting for our completion.
 815          *
 816          * We should DEVI_IS_DETACHING under an enter of the parent to avoid
 817          * race with detach - but we can't do that because the framework has
 818          * already entered the parent, so we have some complexity instead.
 819          */
 820         for (;;) {
 821                 if (ndi_devi_tryenter(vdip, &vcircular)) {
 822                         ASSERT(vcircular != -1);
 823                         if (DEVI_IS_DETACHING(phci_dip)) {
 824                                 ndi_devi_exit(vdip, vcircular);
 825                                 vcircular = -1;
 826                         }
 827                         break;
 828                 } else if (DEVI_IS_DETACHING(phci_dip)) {
 829                         vcircular = -1;
 830                         break;
 831                 } else if (servicing_interrupt()) {
 832                         /*
 833                          * Don't delay an interrupt (and ensure adaptive
 834                          * mutex inversion support).
 835                          */
 836                         ndi_devi_enter(vdip, &vcircular);
 837                         break;
 838                 } else {
 839                         delay_random(mdi_delay);
 840                 }
 841         }
 842 
 843         ndi_devi_enter(phci_dip, &pcircular);
 844         *circular = (vcircular << 16) | (pcircular & 0xFFFF);
 845 }
 846 
 847 /*
 848  * Attempt to mdi_devi_enter.
 849  */
 850 int
 851 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
 852 {
 853         dev_info_t      *vdip;
 854         int             vcircular, pcircular;
 855 
 856         /* Verify calling context */
 857         ASSERT(MDI_PHCI(phci_dip));
 858         vdip = mdi_devi_get_vdip(phci_dip);
 859         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 860 
 861         if (ndi_devi_tryenter(vdip, &vcircular)) {
 862                 if (ndi_devi_tryenter(phci_dip, &pcircular)) {
 863                         *circular = (vcircular << 16) | (pcircular & 0xFFFF);
 864                         return (1);     /* locked */
 865                 }
 866                 ndi_devi_exit(vdip, vcircular);
 867         }
 868         return (0);                     /* busy */
 869 }
 870 
 871 /*
 872  * Release mdi_devi_enter or successful mdi_devi_tryenter.
 873  */
 874 void
 875 mdi_devi_exit(dev_info_t *phci_dip, int circular)
 876 {
 877         dev_info_t      *vdip;
 878         int             vcircular, pcircular;
 879 
 880         /* Verify calling context */
 881         ASSERT(MDI_PHCI(phci_dip));
 882         vdip = mdi_devi_get_vdip(phci_dip);
 883         ASSERT(vdip);                   /* A pHCI always has a vHCI */
 884 
 885         /* extract two circular recursion values from single int */
 886         pcircular = (short)(circular & 0xFFFF);
 887         vcircular = (short)((circular >> 16) & 0xFFFF);
 888 
 889         ndi_devi_exit(phci_dip, pcircular);
 890         if (vcircular != -1)
 891                 ndi_devi_exit(vdip, vcircular);
 892 }
 893 
 894 /*
 895  * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
 896  * around a pHCI drivers calls to mdi_pi_online/offline, after holding
 897  * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
 898  * with vHCI power management code during path online/offline.  Each
 899  * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
 900  * occur within the scope of an active mdi_devi_enter that establishes the
 901  * circular value.
 902  */
 903 void
 904 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
 905 {
 906         int             pcircular;
 907 
 908         /* Verify calling context */
 909         ASSERT(MDI_PHCI(phci_dip));
 910 
 911         /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
 912         ndi_hold_devi(phci_dip);
 913 
 914         pcircular = (short)(circular & 0xFFFF);
 915         ndi_devi_exit(phci_dip, pcircular);
 916 }
 917 
 918 void
 919 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
 920 {
 921         int             pcircular;
 922 
 923         /* Verify calling context */
 924         ASSERT(MDI_PHCI(phci_dip));
 925 
 926         ndi_devi_enter(phci_dip, &pcircular);
 927 
 928         /* Drop hold from mdi_devi_exit_phci. */
 929         ndi_rele_devi(phci_dip);
 930 
 931         /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
 932         ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
 933 }
 934 
 935 /*
 936  * mdi_devi_get_vdip():
 937  *              given a pHCI dip return vHCI dip
 938  */
 939 dev_info_t *
 940 mdi_devi_get_vdip(dev_info_t *pdip)
 941 {
 942         mdi_phci_t      *ph;
 943 
 944         ph = i_devi_get_phci(pdip);
 945         if (ph && ph->ph_vhci)
 946                 return (ph->ph_vhci->vh_dip);
 947         return (NULL);
 948 }
 949 
 950 /*
 951  * mdi_devi_pdip_entered():
 952  *              Return 1 if we are vHCI and have done an ndi_devi_enter
 953  *              of a pHCI
 954  */
 955 int
 956 mdi_devi_pdip_entered(dev_info_t *vdip)
 957 {
 958         mdi_vhci_t      *vh;
 959         mdi_phci_t      *ph;
 960 
 961         vh = i_devi_get_vhci(vdip);
 962         if (vh == NULL)
 963                 return (0);
 964 
 965         MDI_VHCI_PHCI_LOCK(vh);
 966         ph = vh->vh_phci_head;
 967         while (ph) {
 968                 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
 969                         MDI_VHCI_PHCI_UNLOCK(vh);
 970                         return (1);
 971                 }
 972                 ph = ph->ph_next;
 973         }
 974         MDI_VHCI_PHCI_UNLOCK(vh);
 975         return (0);
 976 }
 977 
 978 /*
 979  * mdi_phci_path2devinfo():
 980  *              Utility function to search for a valid phci device given
 981  *              the devfs pathname.
 982  */
 983 dev_info_t *
 984 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
 985 {
 986         char            *temp_pathname;
 987         mdi_vhci_t      *vh;
 988         mdi_phci_t      *ph;
 989         dev_info_t      *pdip = NULL;
 990 
 991         vh = i_devi_get_vhci(vdip);
 992         ASSERT(vh != NULL);
 993 
 994         if (vh == NULL) {
 995                 /*
 996                  * Invalid vHCI component, return failure
 997                  */
 998                 return (NULL);
 999         }
1000 
1001         temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1002         MDI_VHCI_PHCI_LOCK(vh);
1003         ph = vh->vh_phci_head;
1004         while (ph != NULL) {
1005                 pdip = ph->ph_dip;
1006                 ASSERT(pdip != NULL);
1007                 *temp_pathname = '\0';
1008                 (void) ddi_pathname(pdip, temp_pathname);
1009                 if (strcmp(temp_pathname, pathname) == 0) {
1010                         break;
1011                 }
1012                 ph = ph->ph_next;
1013         }
1014         if (ph == NULL) {
1015                 pdip = NULL;
1016         }
1017         MDI_VHCI_PHCI_UNLOCK(vh);
1018         kmem_free(temp_pathname, MAXPATHLEN);
1019         return (pdip);
1020 }
1021 
1022 /*
1023  * mdi_phci_get_path_count():
1024  *              get number of path information nodes associated with a given
1025  *              pHCI device.
1026  */
1027 int
1028 mdi_phci_get_path_count(dev_info_t *pdip)
1029 {
1030         mdi_phci_t      *ph;
1031         int             count = 0;
1032 
1033         ph = i_devi_get_phci(pdip);
1034         if (ph != NULL) {
1035                 count = ph->ph_path_count;
1036         }
1037         return (count);
1038 }
1039 
1040 /*
1041  * i_mdi_phci_lock():
1042  *              Lock a pHCI device
1043  * Return Values:
1044  *              None
1045  * Note:
1046  *              The default locking order is:
1047  *              _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1048  *              But there are number of situations where locks need to be
1049  *              grabbed in reverse order.  This routine implements try and lock
1050  *              mechanism depending on the requested parameter option.
1051  */
1052 static void
1053 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1054 {
1055         if (pip) {
1056                 /* Reverse locking is requested. */
1057                 while (MDI_PHCI_TRYLOCK(ph) == 0) {
1058                         if (servicing_interrupt()) {
1059                                 MDI_PI_HOLD(pip);
1060                                 MDI_PI_UNLOCK(pip);
1061                                 MDI_PHCI_LOCK(ph);
1062                                 MDI_PI_LOCK(pip);
1063                                 MDI_PI_RELE(pip);
1064                                 break;
1065                         } else {
1066                                 /*
1067                                  * tryenter failed. Try to grab again
1068                                  * after a small delay
1069                                  */
1070                                 MDI_PI_HOLD(pip);
1071                                 MDI_PI_UNLOCK(pip);
1072                                 delay_random(mdi_delay);
1073                                 MDI_PI_LOCK(pip);
1074                                 MDI_PI_RELE(pip);
1075                         }
1076                 }
1077         } else {
1078                 MDI_PHCI_LOCK(ph);
1079         }
1080 }
1081 
1082 /*
1083  * i_mdi_phci_unlock():
1084  *              Unlock the pHCI component
1085  */
1086 static void
1087 i_mdi_phci_unlock(mdi_phci_t *ph)
1088 {
1089         MDI_PHCI_UNLOCK(ph);
1090 }
1091 
1092 /*
1093  * i_mdi_devinfo_create():
1094  *              create client device's devinfo node
1095  * Return Values:
1096  *              dev_info
1097  *              NULL
1098  * Notes:
1099  */
1100 static dev_info_t *
1101 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1102         char **compatible, int ncompatible)
1103 {
1104         dev_info_t *cdip = NULL;
1105 
1106         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1107 
1108         /* Verify for duplicate entry */
1109         cdip = i_mdi_devinfo_find(vh, name, guid);
1110         ASSERT(cdip == NULL);
1111         if (cdip) {
1112                 cmn_err(CE_WARN,
1113                     "i_mdi_devinfo_create: client %s@%s already exists",
1114                         name ? name : "", guid ? guid : "");
1115         }
1116 
1117         ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1118         if (cdip == NULL)
1119                 goto fail;
1120 
1121         /*
1122          * Create component type and Global unique identifier
1123          * properties
1124          */
1125         if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1126             MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1127                 goto fail;
1128         }
1129 
1130         /* Decorate the node with compatible property */
1131         if (compatible &&
1132             (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1133             "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1134                 goto fail;
1135         }
1136 
1137         return (cdip);
1138 
1139 fail:
1140         if (cdip) {
1141                 (void) ndi_prop_remove_all(cdip);
1142                 (void) ndi_devi_free(cdip);
1143         }
1144         return (NULL);
1145 }
1146 
1147 /*
1148  * i_mdi_devinfo_find():
1149  *              Find a matching devinfo node for given client node name
1150  *              and its guid.
1151  * Return Values:
1152  *              Handle to a dev_info node or NULL
1153  */
1154 static dev_info_t *
1155 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1156 {
1157         char                    *data;
1158         dev_info_t              *cdip = NULL;
1159         dev_info_t              *ndip = NULL;
1160         int                     circular;
1161 
1162         ndi_devi_enter(vh->vh_dip, &circular);
1163         ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1164         while ((cdip = ndip) != NULL) {
1165                 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1166 
1167                 if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1168                         continue;
1169                 }
1170 
1171                 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1172                     DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1173                     &data) != DDI_PROP_SUCCESS) {
1174                         continue;
1175                 }
1176 
1177                 if (strcmp(data, guid) != 0) {
1178                         ddi_prop_free(data);
1179                         continue;
1180                 }
1181                 ddi_prop_free(data);
1182                 break;
1183         }
1184         ndi_devi_exit(vh->vh_dip, circular);
1185         return (cdip);
1186 }
1187 
1188 /*
1189  * i_mdi_devinfo_remove():
1190  *              Remove a client device node
1191  */
1192 static int
1193 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1194 {
1195         int     rv = MDI_SUCCESS;
1196 
1197         if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1198             (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1199                 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1200                 if (rv != NDI_SUCCESS) {
1201                         MDI_DEBUG(1, (MDI_NOTE, cdip,
1202                             "!failed: cdip %p", (void *)cdip));
1203                 }
1204                 /*
1205                  * Convert to MDI error code
1206                  */
1207                 switch (rv) {
1208                 case NDI_SUCCESS:
1209                         rv = MDI_SUCCESS;
1210                         break;
1211                 case NDI_BUSY:
1212                         rv = MDI_BUSY;
1213                         break;
1214                 default:
1215                         rv = MDI_FAILURE;
1216                         break;
1217                 }
1218         }
1219         return (rv);
1220 }
1221 
1222 /*
1223  * i_devi_get_client()
1224  *              Utility function to get mpxio component extensions
1225  */
1226 static mdi_client_t *
1227 i_devi_get_client(dev_info_t *cdip)
1228 {
1229         mdi_client_t    *ct = NULL;
1230 
1231         if (MDI_CLIENT(cdip)) {
1232                 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1233         }
1234         return (ct);
1235 }
1236 
1237 /*
1238  * i_mdi_is_child_present():
1239  *              Search for the presence of client device dev_info node
1240  */
1241 static int
1242 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1243 {
1244         int             rv = MDI_FAILURE;
1245         struct dev_info *dip;
1246         int             circular;
1247 
1248         ndi_devi_enter(vdip, &circular);
1249         dip = DEVI(vdip)->devi_child;
1250         while (dip) {
1251                 if (dip == DEVI(cdip)) {
1252                         rv = MDI_SUCCESS;
1253                         break;
1254                 }
1255                 dip = dip->devi_sibling;
1256         }
1257         ndi_devi_exit(vdip, circular);
1258         return (rv);
1259 }
1260 
1261 
1262 /*
1263  * i_mdi_client_lock():
1264  *              Grab client component lock
1265  * Return Values:
1266  *              None
1267  * Note:
1268  *              The default locking order is:
1269  *              _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1270  *              But there are number of situations where locks need to be
1271  *              grabbed in reverse order.  This routine implements try and lock
1272  *              mechanism depending on the requested parameter option.
1273  */
1274 static void
1275 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1276 {
1277         if (pip) {
1278                 /*
1279                  * Reverse locking is requested.
1280                  */
1281                 while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1282                         if (servicing_interrupt()) {
1283                                 MDI_PI_HOLD(pip);
1284                                 MDI_PI_UNLOCK(pip);
1285                                 MDI_CLIENT_LOCK(ct);
1286                                 MDI_PI_LOCK(pip);
1287                                 MDI_PI_RELE(pip);
1288                                 break;
1289                         } else {
1290                                 /*
1291                                  * tryenter failed. Try to grab again
1292                                  * after a small delay
1293                                  */
1294                                 MDI_PI_HOLD(pip);
1295                                 MDI_PI_UNLOCK(pip);
1296                                 delay_random(mdi_delay);
1297                                 MDI_PI_LOCK(pip);
1298                                 MDI_PI_RELE(pip);
1299                         }
1300                 }
1301         } else {
1302                 MDI_CLIENT_LOCK(ct);
1303         }
1304 }
1305 
1306 /*
1307  * i_mdi_client_unlock():
1308  *              Unlock a client component
1309  */
1310 static void
1311 i_mdi_client_unlock(mdi_client_t *ct)
1312 {
1313         MDI_CLIENT_UNLOCK(ct);
1314 }
1315 
1316 /*
1317  * i_mdi_client_alloc():
1318  *              Allocate and initialize a client structure.  Caller should
1319  *              hold the vhci client lock.
1320  * Return Values:
1321  *              Handle to a client component
1322  */
1323 /*ARGSUSED*/
1324 static mdi_client_t *
1325 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1326 {
1327         mdi_client_t    *ct;
1328 
1329         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1330 
1331         /*
1332          * Allocate and initialize a component structure.
1333          */
1334         ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1335         mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1336         ct->ct_hnext = NULL;
1337         ct->ct_hprev = NULL;
1338         ct->ct_dip = NULL;
1339         ct->ct_vhci = vh;
1340         ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1341         (void) strcpy(ct->ct_drvname, name);
1342         ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1343         (void) strcpy(ct->ct_guid, lguid);
1344         ct->ct_cprivate = NULL;
1345         ct->ct_vprivate = NULL;
1346         ct->ct_flags = 0;
1347         ct->ct_state = MDI_CLIENT_STATE_FAILED;
1348         MDI_CLIENT_LOCK(ct);
1349         MDI_CLIENT_SET_OFFLINE(ct);
1350         MDI_CLIENT_SET_DETACH(ct);
1351         MDI_CLIENT_SET_POWER_UP(ct);
1352         MDI_CLIENT_UNLOCK(ct);
1353         ct->ct_failover_flags = 0;
1354         ct->ct_failover_status = 0;
1355         cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1356         ct->ct_unstable = 0;
1357         cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1358         cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1359         ct->ct_lb = vh->vh_lb;
1360         ct->ct_lb_args =  kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1361         ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1362         ct->ct_path_count = 0;
1363         ct->ct_path_head = NULL;
1364         ct->ct_path_tail = NULL;
1365         ct->ct_path_last = NULL;
1366 
1367         /*
1368          * Add this client component to our client hash queue
1369          */
1370         i_mdi_client_enlist_table(vh, ct);
1371         return (ct);
1372 }
1373 
1374 /*
1375  * i_mdi_client_enlist_table():
1376  *              Attach the client device to the client hash table. Caller
1377  *              should hold the vhci client lock.
1378  */
1379 static void
1380 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1381 {
1382         int                     index;
1383         struct client_hash      *head;
1384 
1385         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1386 
1387         index = i_mdi_get_hash_key(ct->ct_guid);
1388         head = &vh->vh_client_table[index];
1389         ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1390         head->ct_hash_head = ct;
1391         head->ct_hash_count++;
1392         vh->vh_client_count++;
1393 }
1394 
1395 /*
1396  * i_mdi_client_delist_table():
1397  *              Attach the client device to the client hash table.
1398  *              Caller should hold the vhci client lock.
1399  */
1400 static void
1401 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1402 {
1403         int                     index;
1404         char                    *guid;
1405         struct client_hash      *head;
1406         mdi_client_t            *next;
1407         mdi_client_t            *last;
1408 
1409         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1410 
1411         guid = ct->ct_guid;
1412         index = i_mdi_get_hash_key(guid);
1413         head = &vh->vh_client_table[index];
1414 
1415         last = NULL;
1416         next = (mdi_client_t *)head->ct_hash_head;
1417         while (next != NULL) {
1418                 if (next == ct) {
1419                         break;
1420                 }
1421                 last = next;
1422                 next = next->ct_hnext;
1423         }
1424 
1425         if (next) {
1426                 head->ct_hash_count--;
1427                 if (last == NULL) {
1428                         head->ct_hash_head = ct->ct_hnext;
1429                 } else {
1430                         last->ct_hnext = ct->ct_hnext;
1431                 }
1432                 ct->ct_hnext = NULL;
1433                 vh->vh_client_count--;
1434         }
1435 }
1436 
1437 
1438 /*
1439  * i_mdi_client_free():
1440  *              Free a client component
1441  */
1442 static int
1443 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1444 {
1445         int             rv = MDI_SUCCESS;
1446         int             flags = ct->ct_flags;
1447         dev_info_t      *cdip;
1448         dev_info_t      *vdip;
1449 
1450         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1451 
1452         vdip = vh->vh_dip;
1453         cdip = ct->ct_dip;
1454 
1455         (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1456         DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1457         DEVI(cdip)->devi_mdi_client = NULL;
1458 
1459         /*
1460          * Clear out back ref. to dev_info_t node
1461          */
1462         ct->ct_dip = NULL;
1463 
1464         /*
1465          * Remove this client from our hash queue
1466          */
1467         i_mdi_client_delist_table(vh, ct);
1468 
1469         /*
1470          * Uninitialize and free the component
1471          */
1472         kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1473         kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1474         kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1475         cv_destroy(&ct->ct_failover_cv);
1476         cv_destroy(&ct->ct_unstable_cv);
1477         cv_destroy(&ct->ct_powerchange_cv);
1478         mutex_destroy(&ct->ct_mutex);
1479         kmem_free(ct, sizeof (*ct));
1480 
1481         if (cdip != NULL) {
1482                 MDI_VHCI_CLIENT_UNLOCK(vh);
1483                 (void) i_mdi_devinfo_remove(vdip, cdip, flags);
1484                 MDI_VHCI_CLIENT_LOCK(vh);
1485         }
1486         return (rv);
1487 }
1488 
1489 /*
1490  * i_mdi_client_find():
1491  *              Find the client structure corresponding to a given guid
1492  *              Caller should hold the vhci client lock.
1493  */
1494 static mdi_client_t *
1495 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1496 {
1497         int                     index;
1498         struct client_hash      *head;
1499         mdi_client_t            *ct;
1500 
1501         ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1502 
1503         index = i_mdi_get_hash_key(guid);
1504         head = &vh->vh_client_table[index];
1505 
1506         ct = head->ct_hash_head;
1507         while (ct != NULL) {
1508                 if (strcmp(ct->ct_guid, guid) == 0 &&
1509                     (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1510                         break;
1511                 }
1512                 ct = ct->ct_hnext;
1513         }
1514         return (ct);
1515 }
1516 
1517 /*
1518  * i_mdi_client_update_state():
1519  *              Compute and update client device state
1520  * Notes:
1521  *              A client device can be in any of three possible states:
1522  *
1523  *              MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1524  *              one online/standby paths. Can tolerate failures.
1525  *              MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1526  *              no alternate paths available as standby. A failure on the online
1527  *              would result in loss of access to device data.
1528  *              MDI_CLIENT_STATE_FAILED - Client device in failed state with
1529  *              no paths available to access the device.
1530  */
1531 static void
1532 i_mdi_client_update_state(mdi_client_t *ct)
1533 {
1534         int state;
1535 
1536         ASSERT(MDI_CLIENT_LOCKED(ct));
1537         state = i_mdi_client_compute_state(ct, NULL);
1538         MDI_CLIENT_SET_STATE(ct, state);
1539 }
1540 
1541 /*
1542  * i_mdi_client_compute_state():
1543  *              Compute client device state
1544  *
1545  *              mdi_phci_t *    Pointer to pHCI structure which should
1546  *                              while computing the new value.  Used by
1547  *                              i_mdi_phci_offline() to find the new
1548  *                              client state after DR of a pHCI.
1549  */
1550 static int
1551 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1552 {
1553         int             state;
1554         int             online_count = 0;
1555         int             standby_count = 0;
1556         mdi_pathinfo_t  *pip, *next;
1557 
1558         ASSERT(MDI_CLIENT_LOCKED(ct));
1559         pip = ct->ct_path_head;
1560         while (pip != NULL) {
1561                 MDI_PI_LOCK(pip);
1562                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1563                 if (MDI_PI(pip)->pi_phci == ph) {
1564                         MDI_PI_UNLOCK(pip);
1565                         pip = next;
1566                         continue;
1567                 }
1568 
1569                 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1570                                 == MDI_PATHINFO_STATE_ONLINE)
1571                         online_count++;
1572                 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1573                                 == MDI_PATHINFO_STATE_STANDBY)
1574                         standby_count++;
1575                 MDI_PI_UNLOCK(pip);
1576                 pip = next;
1577         }
1578 
1579         if (online_count == 0) {
1580                 if (standby_count == 0) {
1581                         state = MDI_CLIENT_STATE_FAILED;
1582                         MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1583                             "client state failed: ct = %p", (void *)ct));
1584                 } else if (standby_count == 1) {
1585                         state = MDI_CLIENT_STATE_DEGRADED;
1586                 } else {
1587                         state = MDI_CLIENT_STATE_OPTIMAL;
1588                 }
1589         } else if (online_count == 1) {
1590                 if (standby_count == 0) {
1591                         state = MDI_CLIENT_STATE_DEGRADED;
1592                 } else {
1593                         state = MDI_CLIENT_STATE_OPTIMAL;
1594                 }
1595         } else {
1596                 state = MDI_CLIENT_STATE_OPTIMAL;
1597         }
1598         return (state);
1599 }
1600 
1601 /*
1602  * i_mdi_client2devinfo():
1603  *              Utility function
1604  */
1605 dev_info_t *
1606 i_mdi_client2devinfo(mdi_client_t *ct)
1607 {
1608         return (ct->ct_dip);
1609 }
1610 
1611 /*
1612  * mdi_client_path2_devinfo():
1613  *              Given the parent devinfo and child devfs pathname, search for
1614  *              a valid devfs node handle.
1615  */
1616 dev_info_t *
1617 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1618 {
1619         dev_info_t      *cdip = NULL;
1620         dev_info_t      *ndip = NULL;
1621         char            *temp_pathname;
1622         int             circular;
1623 
1624         /*
1625          * Allocate temp buffer
1626          */
1627         temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1628 
1629         /*
1630          * Lock parent against changes
1631          */
1632         ndi_devi_enter(vdip, &circular);
1633         ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1634         while ((cdip = ndip) != NULL) {
1635                 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1636 
1637                 *temp_pathname = '\0';
1638                 (void) ddi_pathname(cdip, temp_pathname);
1639                 if (strcmp(temp_pathname, pathname) == 0) {
1640                         break;
1641                 }
1642         }
1643         /*
1644          * Release devinfo lock
1645          */
1646         ndi_devi_exit(vdip, circular);
1647 
1648         /*
1649          * Free the temp buffer
1650          */
1651         kmem_free(temp_pathname, MAXPATHLEN);
1652         return (cdip);
1653 }
1654 
1655 /*
1656  * mdi_client_get_path_count():
1657  *              Utility function to get number of path information nodes
1658  *              associated with a given client device.
1659  */
1660 int
1661 mdi_client_get_path_count(dev_info_t *cdip)
1662 {
1663         mdi_client_t    *ct;
1664         int             count = 0;
1665 
1666         ct = i_devi_get_client(cdip);
1667         if (ct != NULL) {
1668                 count = ct->ct_path_count;
1669         }
1670         return (count);
1671 }
1672 
1673 
1674 /*
1675  * i_mdi_get_hash_key():
1676  *              Create a hash using strings as keys
1677  *
1678  */
1679 static int
1680 i_mdi_get_hash_key(char *str)
1681 {
1682         uint32_t        g, hash = 0;
1683         char            *p;
1684 
1685         for (p = str; *p != '\0'; p++) {
1686                 g = *p;
1687                 hash += g;
1688         }
1689         return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1690 }
1691 
1692 /*
1693  * mdi_get_lb_policy():
1694  *              Get current load balancing policy for a given client device
1695  */
1696 client_lb_t
1697 mdi_get_lb_policy(dev_info_t *cdip)
1698 {
1699         client_lb_t     lb = LOAD_BALANCE_NONE;
1700         mdi_client_t    *ct;
1701 
1702         ct = i_devi_get_client(cdip);
1703         if (ct != NULL) {
1704                 lb = ct->ct_lb;
1705         }
1706         return (lb);
1707 }
1708 
1709 /*
1710  * mdi_set_lb_region_size():
1711  *              Set current region size for the load-balance
1712  */
1713 int
1714 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1715 {
1716         mdi_client_t    *ct;
1717         int             rv = MDI_FAILURE;
1718 
1719         ct = i_devi_get_client(cdip);
1720         if (ct != NULL && ct->ct_lb_args != NULL) {
1721                 ct->ct_lb_args->region_size = region_size;
1722                 rv = MDI_SUCCESS;
1723         }
1724         return (rv);
1725 }
1726 
1727 /*
1728  * mdi_Set_lb_policy():
1729  *              Set current load balancing policy for a given client device
1730  */
1731 int
1732 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1733 {
1734         mdi_client_t    *ct;
1735         int             rv = MDI_FAILURE;
1736 
1737         ct = i_devi_get_client(cdip);
1738         if (ct != NULL) {
1739                 ct->ct_lb = lb;
1740                 rv = MDI_SUCCESS;
1741         }
1742         return (rv);
1743 }
1744 
1745 /*
1746  * mdi_failover():
1747  *              failover function called by the vHCI drivers to initiate
1748  *              a failover operation.  This is typically due to non-availability
1749  *              of online paths to route I/O requests.  Failover can be
1750  *              triggered through user application also.
1751  *
1752  *              The vHCI driver calls mdi_failover() to initiate a failover
1753  *              operation. mdi_failover() calls back into the vHCI driver's
1754  *              vo_failover() entry point to perform the actual failover
1755  *              operation.  The reason for requiring the vHCI driver to
1756  *              initiate failover by calling mdi_failover(), instead of directly
1757  *              executing vo_failover() itself, is to ensure that the mdi
1758  *              framework can keep track of the client state properly.
1759  *              Additionally, mdi_failover() provides as a convenience the
1760  *              option of performing the failover operation synchronously or
1761  *              asynchronously
1762  *
1763  *              Upon successful completion of the failover operation, the
1764  *              paths that were previously ONLINE will be in the STANDBY state,
1765  *              and the newly activated paths will be in the ONLINE state.
1766  *
1767  *              The flags modifier determines whether the activation is done
1768  *              synchronously: MDI_FAILOVER_SYNC
1769  * Return Values:
1770  *              MDI_SUCCESS
1771  *              MDI_FAILURE
1772  *              MDI_BUSY
1773  */
1774 /*ARGSUSED*/
1775 int
1776 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1777 {
1778         int                     rv;
1779         mdi_client_t            *ct;
1780 
1781         ct = i_devi_get_client(cdip);
1782         ASSERT(ct != NULL);
1783         if (ct == NULL) {
1784                 /* cdip is not a valid client device. Nothing more to do. */
1785                 return (MDI_FAILURE);
1786         }
1787 
1788         MDI_CLIENT_LOCK(ct);
1789 
1790         if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1791                 /* A path to the client is being freed */
1792                 MDI_CLIENT_UNLOCK(ct);
1793                 return (MDI_BUSY);
1794         }
1795 
1796 
1797         if (MDI_CLIENT_IS_FAILED(ct)) {
1798                 /*
1799                  * Client is in failed state. Nothing more to do.
1800                  */
1801                 MDI_CLIENT_UNLOCK(ct);
1802                 return (MDI_FAILURE);
1803         }
1804 
1805         if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1806                 /*
1807                  * Failover is already in progress; return BUSY
1808                  */
1809                 MDI_CLIENT_UNLOCK(ct);
1810                 return (MDI_BUSY);
1811         }
1812         /*
1813          * Make sure that mdi_pathinfo node state changes are processed.
1814          * We do not allow failovers to progress while client path state
1815          * changes are in progress
1816          */
1817         if (ct->ct_unstable) {
1818                 if (flags == MDI_FAILOVER_ASYNC) {
1819                         MDI_CLIENT_UNLOCK(ct);
1820                         return (MDI_BUSY);
1821                 } else {
1822                         while (ct->ct_unstable)
1823                                 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1824                 }
1825         }
1826 
1827         /*
1828          * Client device is in stable state. Before proceeding, perform sanity
1829          * checks again.
1830          */
1831         if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1832             (!i_ddi_devi_attached(cdip))) {
1833                 /*
1834                  * Client is in failed state. Nothing more to do.
1835                  */
1836                 MDI_CLIENT_UNLOCK(ct);
1837                 return (MDI_FAILURE);
1838         }
1839 
1840         /*
1841          * Set the client state as failover in progress.
1842          */
1843         MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1844         ct->ct_failover_flags = flags;
1845         MDI_CLIENT_UNLOCK(ct);
1846 
1847         if (flags == MDI_FAILOVER_ASYNC) {
1848                 /*
1849                  * Submit the initiate failover request via CPR safe
1850                  * taskq threads.
1851                  */
1852                 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1853                     ct, KM_SLEEP);
1854                 return (MDI_ACCEPT);
1855         } else {
1856                 /*
1857                  * Synchronous failover mode.  Typically invoked from the user
1858                  * land.
1859                  */
1860                 rv = i_mdi_failover(ct);
1861         }
1862         return (rv);
1863 }
1864 
1865 /*
1866  * i_mdi_failover():
1867  *              internal failover function. Invokes vHCI drivers failover
1868  *              callback function and process the failover status
1869  * Return Values:
1870  *              None
1871  *
1872  * Note: A client device in failover state can not be detached or freed.
1873  */
1874 static int
1875 i_mdi_failover(void *arg)
1876 {
1877         int             rv = MDI_SUCCESS;
1878         mdi_client_t    *ct = (mdi_client_t *)arg;
1879         mdi_vhci_t      *vh = ct->ct_vhci;
1880 
1881         ASSERT(!MDI_CLIENT_LOCKED(ct));
1882 
1883         if (vh->vh_ops->vo_failover != NULL) {
1884                 /*
1885                  * Call vHCI drivers callback routine
1886                  */
1887                 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1888                     ct->ct_failover_flags);
1889         }
1890 
1891         MDI_CLIENT_LOCK(ct);
1892         MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1893 
1894         /*
1895          * Save the failover return status
1896          */
1897         ct->ct_failover_status = rv;
1898 
1899         /*
1900          * As a result of failover, client status would have been changed.
1901          * Update the client state and wake up anyone waiting on this client
1902          * device.
1903          */
1904         i_mdi_client_update_state(ct);
1905 
1906         cv_broadcast(&ct->ct_failover_cv);
1907         MDI_CLIENT_UNLOCK(ct);
1908         return (rv);
1909 }
1910 
1911 /*
1912  * Load balancing is logical block.
1913  * IOs within the range described by region_size
1914  * would go on the same path. This would improve the
1915  * performance by cache-hit on some of the RAID devices.
1916  * Search only for online paths(At some point we
1917  * may want to balance across target ports).
1918  * If no paths are found then default to round-robin.
1919  */
1920 static int
1921 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1922 {
1923         int             path_index = -1;
1924         int             online_path_count = 0;
1925         int             online_nonpref_path_count = 0;
1926         int             region_size = ct->ct_lb_args->region_size;
1927         mdi_pathinfo_t  *pip;
1928         mdi_pathinfo_t  *next;
1929         int             preferred, path_cnt;
1930 
1931         pip = ct->ct_path_head;
1932         while (pip) {
1933                 MDI_PI_LOCK(pip);
1934                 if (MDI_PI(pip)->pi_state ==
1935                     MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1936                         online_path_count++;
1937                 } else if (MDI_PI(pip)->pi_state ==
1938                     MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1939                         online_nonpref_path_count++;
1940                 }
1941                 next = (mdi_pathinfo_t *)
1942                     MDI_PI(pip)->pi_client_link;
1943                 MDI_PI_UNLOCK(pip);
1944                 pip = next;
1945         }
1946         /* if found any online/preferred then use this type */
1947         if (online_path_count > 0) {
1948                 path_cnt = online_path_count;
1949                 preferred = 1;
1950         } else if (online_nonpref_path_count > 0) {
1951                 path_cnt = online_nonpref_path_count;
1952                 preferred = 0;
1953         } else {
1954                 path_cnt = 0;
1955         }
1956         if (path_cnt) {
1957                 path_index = (bp->b_blkno >> region_size) % path_cnt;
1958                 pip = ct->ct_path_head;
1959                 while (pip && path_index != -1) {
1960                         MDI_PI_LOCK(pip);
1961                         if (path_index == 0 &&
1962                             (MDI_PI(pip)->pi_state ==
1963                             MDI_PATHINFO_STATE_ONLINE) &&
1964                                 MDI_PI(pip)->pi_preferred == preferred) {
1965                                 MDI_PI_HOLD(pip);
1966                                 MDI_PI_UNLOCK(pip);
1967                                 *ret_pip = pip;
1968                                 return (MDI_SUCCESS);
1969                         }
1970                         path_index --;
1971                         next = (mdi_pathinfo_t *)
1972                             MDI_PI(pip)->pi_client_link;
1973                         MDI_PI_UNLOCK(pip);
1974                         pip = next;
1975                 }
1976                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1977                     "lba %llx: path %s %p",
1978                     bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1979         }
1980         return (MDI_FAILURE);
1981 }
1982 
1983 /*
1984  * mdi_select_path():
1985  *              select a path to access a client device.
1986  *
1987  *              mdi_select_path() function is called by the vHCI drivers to
1988  *              select a path to route the I/O request to.  The caller passes
1989  *              the block I/O data transfer structure ("buf") as one of the
1990  *              parameters.  The mpxio framework uses the buf structure
1991  *              contents to maintain per path statistics (total I/O size /
1992  *              count pending).  If more than one online paths are available to
1993  *              select, the framework automatically selects a suitable path
1994  *              for routing I/O request. If a failover operation is active for
1995  *              this client device the call shall be failed with MDI_BUSY error
1996  *              code.
1997  *
1998  *              By default this function returns a suitable path in online
1999  *              state based on the current load balancing policy.  Currently
2000  *              we support LOAD_BALANCE_NONE (Previously selected online path
2001  *              will continue to be used till the path is usable) and
2002  *              LOAD_BALANCE_RR (Online paths will be selected in a round
2003  *              robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2004  *              based on the logical block).  The load balancing
2005  *              through vHCI drivers configuration file (driver.conf).
2006  *
2007  *              vHCI drivers may override this default behavior by specifying
2008  *              appropriate flags.  The meaning of the thrid argument depends
2009  *              on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2010  *              then the argument is the "path instance" of the path to select.
2011  *              If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2012  *              "start_pip". A non NULL "start_pip" is the starting point to
2013  *              walk and find the next appropriate path.  The following values
2014  *              are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2015  *              ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2016  *              STANDBY path).
2017  *
2018  *              The non-standard behavior is used by the scsi_vhci driver,
2019  *              whenever it has to use a STANDBY/FAULTED path.  Eg. during
2020  *              attach of client devices (to avoid an unnecessary failover
2021  *              when the STANDBY path comes up first), during failover
2022  *              (to activate a STANDBY path as ONLINE).
2023  *
2024  *              The selected path is returned in a a mdi_hold_path() state
2025  *              (pi_ref_cnt). Caller should release the hold by calling
2026  *              mdi_rele_path().
2027  *
2028  * Return Values:
2029  *              MDI_SUCCESS     - Completed successfully
2030  *              MDI_BUSY        - Client device is busy failing over
2031  *              MDI_NOPATH      - Client device is online, but no valid path are
2032  *                                available to access this client device
2033  *              MDI_FAILURE     - Invalid client device or state
2034  *              MDI_DEVI_ONLINING
2035  *                              - Client device (struct dev_info state) is in
2036  *                                onlining state.
2037  */
2038 
2039 /*ARGSUSED*/
2040 int
2041 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2042     void *arg, mdi_pathinfo_t **ret_pip)
2043 {
2044         mdi_client_t    *ct;
2045         mdi_pathinfo_t  *pip;
2046         mdi_pathinfo_t  *next;
2047         mdi_pathinfo_t  *head;
2048         mdi_pathinfo_t  *start;
2049         client_lb_t     lbp;    /* load balancing policy */
2050         int             sb = 1; /* standard behavior */
2051         int             preferred = 1;  /* preferred path */
2052         int             cond, cont = 1;
2053         int             retry = 0;
2054         mdi_pathinfo_t  *start_pip;     /* request starting pathinfo */
2055         int             path_instance;  /* request specific path instance */
2056 
2057         /* determine type of arg based on flags */
2058         if (flags & MDI_SELECT_PATH_INSTANCE) {
2059                 path_instance = (int)(intptr_t)arg;
2060                 start_pip = NULL;
2061         } else {
2062                 path_instance = 0;
2063                 start_pip = (mdi_pathinfo_t *)arg;
2064         }
2065 
2066         if (flags != 0) {
2067                 /*
2068                  * disable default behavior
2069                  */
2070                 sb = 0;
2071         }
2072 
2073         *ret_pip = NULL;
2074         ct = i_devi_get_client(cdip);
2075         if (ct == NULL) {
2076                 /* mdi extensions are NULL, Nothing more to do */
2077                 return (MDI_FAILURE);
2078         }
2079 
2080         MDI_CLIENT_LOCK(ct);
2081 
2082         if (sb) {
2083                 if (MDI_CLIENT_IS_FAILED(ct)) {
2084                         /*
2085                          * Client is not ready to accept any I/O requests.
2086                          * Fail this request.
2087                          */
2088                         MDI_DEBUG(2, (MDI_NOTE, cdip,
2089                             "client state offline ct = %p", (void *)ct));
2090                         MDI_CLIENT_UNLOCK(ct);
2091                         return (MDI_FAILURE);
2092                 }
2093 
2094                 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2095                         /*
2096                          * Check for Failover is in progress. If so tell the
2097                          * caller that this device is busy.
2098                          */
2099                         MDI_DEBUG(2, (MDI_NOTE, cdip,
2100                             "client failover in progress ct = %p",
2101                             (void *)ct));
2102                         MDI_CLIENT_UNLOCK(ct);
2103                         return (MDI_BUSY);
2104                 }
2105 
2106                 /*
2107                  * Check to see whether the client device is attached.
2108                  * If not so, let the vHCI driver manually select a path
2109                  * (standby) and let the probe/attach process to continue.
2110                  */
2111                 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2112                         MDI_DEBUG(4, (MDI_NOTE, cdip,
2113                             "devi is onlining ct = %p", (void *)ct));
2114                         MDI_CLIENT_UNLOCK(ct);
2115                         return (MDI_DEVI_ONLINING);
2116                 }
2117         }
2118 
2119         /*
2120          * Cache in the client list head.  If head of the list is NULL
2121          * return MDI_NOPATH
2122          */
2123         head = ct->ct_path_head;
2124         if (head == NULL) {
2125                 MDI_CLIENT_UNLOCK(ct);
2126                 return (MDI_NOPATH);
2127         }
2128 
2129         /* Caller is specifying a specific pathinfo path by path_instance */
2130         if (path_instance) {
2131                 /* search for pathinfo with correct path_instance */
2132                 for (pip = head;
2133                     pip && (mdi_pi_get_path_instance(pip) != path_instance);
2134                     pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2135                         ;
2136 
2137                 /* If path can't be selected then MDI_NOPATH is returned. */
2138                 if (pip == NULL) {
2139                         MDI_CLIENT_UNLOCK(ct);
2140                         return (MDI_NOPATH);
2141                 }
2142 
2143                 /*
2144                  * Verify state of path. When asked to select a specific
2145                  * path_instance, we select the requested path in any
2146                  * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2147                  * We don't however select paths where the pHCI has detached.
2148                  * NOTE: last pathinfo node of an opened client device may
2149                  * exist in an OFFLINE state after the pHCI associated with
2150                  * that path has detached (but pi_phci will be NULL if that
2151                  * has occurred).
2152                  */
2153                 MDI_PI_LOCK(pip);
2154                 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2155                     (MDI_PI(pip)->pi_phci == NULL)) {
2156                         MDI_PI_UNLOCK(pip);
2157                         MDI_CLIENT_UNLOCK(ct);
2158                         return (MDI_FAILURE);
2159                 }
2160 
2161                 /* Return MDI_BUSY if we have a transient condition */
2162                 if (MDI_PI_IS_TRANSIENT(pip)) {
2163                         MDI_PI_UNLOCK(pip);
2164                         MDI_CLIENT_UNLOCK(ct);
2165                         return (MDI_BUSY);
2166                 }
2167 
2168                 /*
2169                  * Return the path in hold state. Caller should release the
2170                  * lock by calling mdi_rele_path()
2171                  */
2172                 MDI_PI_HOLD(pip);
2173                 MDI_PI_UNLOCK(pip);
2174                 *ret_pip = pip;
2175                 MDI_CLIENT_UNLOCK(ct);
2176                 return (MDI_SUCCESS);
2177         }
2178 
2179         /*
2180          * for non default behavior, bypass current
2181          * load balancing policy and always use LOAD_BALANCE_RR
2182          * except that the start point will be adjusted based
2183          * on the provided start_pip
2184          */
2185         lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2186 
2187         switch (lbp) {
2188         case LOAD_BALANCE_NONE:
2189                 /*
2190                  * Load balancing is None  or Alternate path mode
2191                  * Start looking for a online mdi_pathinfo node starting from
2192                  * last known selected path
2193                  */
2194                 preferred = 1;
2195                 pip = (mdi_pathinfo_t *)ct->ct_path_last;
2196                 if (pip == NULL) {
2197                         pip = head;
2198                 }
2199                 start = pip;
2200                 do {
2201                         MDI_PI_LOCK(pip);
2202                         /*
2203                          * No need to explicitly check if the path is disabled.
2204                          * Since we are checking for state == ONLINE and the
2205                          * same variable is used for DISABLE/ENABLE information.
2206                          */
2207                         if ((MDI_PI(pip)->pi_state  ==
2208                                 MDI_PATHINFO_STATE_ONLINE) &&
2209                                 preferred == MDI_PI(pip)->pi_preferred) {
2210                                 /*
2211                                  * Return the path in hold state. Caller should
2212                                  * release the lock by calling mdi_rele_path()
2213                                  */
2214                                 MDI_PI_HOLD(pip);
2215                                 MDI_PI_UNLOCK(pip);
2216                                 ct->ct_path_last = pip;
2217                                 *ret_pip = pip;
2218                                 MDI_CLIENT_UNLOCK(ct);
2219                                 return (MDI_SUCCESS);
2220                         }
2221 
2222                         /*
2223                          * Path is busy.
2224                          */
2225                         if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2226                             MDI_PI_IS_TRANSIENT(pip))
2227                                 retry = 1;
2228                         /*
2229                          * Keep looking for a next available online path
2230                          */
2231                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2232                         if (next == NULL) {
2233                                 next = head;
2234                         }
2235                         MDI_PI_UNLOCK(pip);
2236                         pip = next;
2237                         if (start == pip && preferred) {
2238                                 preferred = 0;
2239                         } else if (start == pip && !preferred) {
2240                                 cont = 0;
2241                         }
2242                 } while (cont);
2243                 break;
2244 
2245         case LOAD_BALANCE_LBA:
2246                 /*
2247                  * Make sure we are looking
2248                  * for an online path. Otherwise, if it is for a STANDBY
2249                  * path request, it will go through and fetch an ONLINE
2250                  * path which is not desirable.
2251                  */
2252                 if ((ct->ct_lb_args != NULL) &&
2253                             (ct->ct_lb_args->region_size) && bp &&
2254                                 (sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2255                         if (i_mdi_lba_lb(ct, ret_pip, bp)
2256                                     == MDI_SUCCESS) {
2257                                 MDI_CLIENT_UNLOCK(ct);
2258                                 return (MDI_SUCCESS);
2259                         }
2260                 }
2261                 /* FALLTHROUGH */
2262         case LOAD_BALANCE_RR:
2263                 /*
2264                  * Load balancing is Round Robin. Start looking for a online
2265                  * mdi_pathinfo node starting from last known selected path
2266                  * as the start point.  If override flags are specified,
2267                  * process accordingly.
2268                  * If the search is already in effect(start_pip not null),
2269                  * then lets just use the same path preference to continue the
2270                  * traversal.
2271                  */
2272 
2273                 if (start_pip != NULL) {
2274                         preferred = MDI_PI(start_pip)->pi_preferred;
2275                 } else {
2276                         preferred = 1;
2277                 }
2278 
2279                 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2280                 if (start == NULL) {
2281                         pip = head;
2282                 } else {
2283                         pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2284                         if (pip == NULL) {
2285                                 if ( flags & MDI_SELECT_NO_PREFERRED) {
2286                                         /*
2287                                          * Return since we hit the end of list
2288                                          */
2289                                         MDI_CLIENT_UNLOCK(ct);
2290                                         return (MDI_NOPATH);
2291                                 }
2292 
2293                                 if (!sb) {
2294                                         if (preferred == 0) {
2295                                                 /*
2296                                                  * Looks like we have completed
2297                                                  * the traversal as preferred
2298                                                  * value is 0. Time to bail out.
2299                                                  */
2300                                                 *ret_pip = NULL;
2301                                                 MDI_CLIENT_UNLOCK(ct);
2302                                                 return (MDI_NOPATH);
2303                                         } else {
2304                                                 /*
2305                                                  * Looks like we reached the
2306                                                  * end of the list. Lets enable
2307                                                  * traversal of non preferred
2308                                                  * paths.
2309                                                  */
2310                                                 preferred = 0;
2311                                         }
2312                                 }
2313                                 pip = head;
2314                         }
2315                 }
2316                 start = pip;
2317                 do {
2318                         MDI_PI_LOCK(pip);
2319                         if (sb) {
2320                                 cond = ((MDI_PI(pip)->pi_state ==
2321                                     MDI_PATHINFO_STATE_ONLINE &&
2322                                         MDI_PI(pip)->pi_preferred ==
2323                                                 preferred) ? 1 : 0);
2324                         } else {
2325                                 if (flags == MDI_SELECT_ONLINE_PATH) {
2326                                         cond = ((MDI_PI(pip)->pi_state ==
2327                                             MDI_PATHINFO_STATE_ONLINE &&
2328                                                 MDI_PI(pip)->pi_preferred ==
2329                                                 preferred) ? 1 : 0);
2330                                 } else if (flags == MDI_SELECT_STANDBY_PATH) {
2331                                         cond = ((MDI_PI(pip)->pi_state ==
2332                                             MDI_PATHINFO_STATE_STANDBY &&
2333                                                 MDI_PI(pip)->pi_preferred ==
2334                                                 preferred) ? 1 : 0);
2335                                 } else if (flags == (MDI_SELECT_ONLINE_PATH |
2336                                     MDI_SELECT_STANDBY_PATH)) {
2337                                         cond = (((MDI_PI(pip)->pi_state ==
2338                                             MDI_PATHINFO_STATE_ONLINE ||
2339                                             (MDI_PI(pip)->pi_state ==
2340                                             MDI_PATHINFO_STATE_STANDBY)) &&
2341                                                 MDI_PI(pip)->pi_preferred ==
2342                                                 preferred) ? 1 : 0);
2343                                 } else if (flags ==
2344                                         (MDI_SELECT_STANDBY_PATH |
2345                                         MDI_SELECT_ONLINE_PATH |
2346                                         MDI_SELECT_USER_DISABLE_PATH)) {
2347                                         cond = (((MDI_PI(pip)->pi_state ==
2348                                             MDI_PATHINFO_STATE_ONLINE ||
2349                                             (MDI_PI(pip)->pi_state ==
2350                                             MDI_PATHINFO_STATE_STANDBY) ||
2351                                                 (MDI_PI(pip)->pi_state ==
2352                                             (MDI_PATHINFO_STATE_ONLINE|
2353                                             MDI_PATHINFO_STATE_USER_DISABLE)) ||
2354                                                 (MDI_PI(pip)->pi_state ==
2355                                             (MDI_PATHINFO_STATE_STANDBY |
2356                                             MDI_PATHINFO_STATE_USER_DISABLE)))&&
2357                                                 MDI_PI(pip)->pi_preferred ==
2358                                                 preferred) ? 1 : 0);
2359                                 } else if (flags ==
2360                                     (MDI_SELECT_STANDBY_PATH |
2361                                     MDI_SELECT_ONLINE_PATH |
2362                                     MDI_SELECT_NO_PREFERRED)) {
2363                                         cond = (((MDI_PI(pip)->pi_state ==
2364                                             MDI_PATHINFO_STATE_ONLINE) ||
2365                                             (MDI_PI(pip)->pi_state ==
2366                                             MDI_PATHINFO_STATE_STANDBY))
2367                                             ? 1 : 0);
2368                                 } else {
2369                                         cond = 0;
2370                                 }
2371                         }
2372                         /*
2373                          * No need to explicitly check if the path is disabled.
2374                          * Since we are checking for state == ONLINE and the
2375                          * same variable is used for DISABLE/ENABLE information.
2376                          */
2377                         if (cond) {
2378                                 /*
2379                                  * Return the path in hold state. Caller should
2380                                  * release the lock by calling mdi_rele_path()
2381                                  */
2382                                 MDI_PI_HOLD(pip);
2383                                 MDI_PI_UNLOCK(pip);
2384                                 if (sb)
2385                                         ct->ct_path_last = pip;
2386                                 *ret_pip = pip;
2387                                 MDI_CLIENT_UNLOCK(ct);
2388                                 return (MDI_SUCCESS);
2389                         }
2390                         /*
2391                          * Path is busy.
2392                          */
2393                         if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2394                             MDI_PI_IS_TRANSIENT(pip))
2395                                 retry = 1;
2396 
2397                         /*
2398                          * Keep looking for a next available online path
2399                          */
2400 do_again:
2401                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2402                         if (next == NULL) {
2403                                 if ( flags & MDI_SELECT_NO_PREFERRED) {
2404                                         /*
2405                                          * Bail out since we hit the end of list
2406                                          */
2407                                         MDI_PI_UNLOCK(pip);
2408                                         break;
2409                                 }
2410 
2411                                 if (!sb) {
2412                                         if (preferred == 1) {
2413                                                 /*
2414                                                  * Looks like we reached the
2415                                                  * end of the list. Lets enable
2416                                                  * traversal of non preferred
2417                                                  * paths.
2418                                                  */
2419                                                 preferred = 0;
2420                                                 next = head;
2421                                         } else {
2422                                                 /*
2423                                                  * We have done both the passes
2424                                                  * Preferred as well as for
2425                                                  * Non-preferred. Bail out now.
2426                                                  */
2427                                                 cont = 0;
2428                                         }
2429                                 } else {
2430                                         /*
2431                                          * Standard behavior case.
2432                                          */
2433                                         next = head;
2434                                 }
2435                         }
2436                         MDI_PI_UNLOCK(pip);
2437                         if (cont == 0) {
2438                                 break;
2439                         }
2440                         pip = next;
2441 
2442                         if (!sb) {
2443                                 /*
2444                                  * We need to handle the selection of
2445                                  * non-preferred path in the following
2446                                  * case:
2447                                  *
2448                                  * +------+   +------+   +------+   +-----+
2449                                  * | A : 1| - | B : 1| - | C : 0| - |NULL |
2450                                  * +------+   +------+   +------+   +-----+
2451                                  *
2452                                  * If we start the search with B, we need to
2453                                  * skip beyond B to pick C which is non -
2454                                  * preferred in the second pass. The following
2455                                  * test, if true, will allow us to skip over
2456                                  * the 'start'(B in the example) to select
2457                                  * other non preferred elements.
2458                                  */
2459                                 if ((start_pip != NULL) && (start_pip == pip) &&
2460                                     (MDI_PI(start_pip)->pi_preferred
2461                                     != preferred)) {
2462                                         /*
2463                                          * try again after going past the start
2464                                          * pip
2465                                          */
2466                                         MDI_PI_LOCK(pip);
2467                                         goto do_again;
2468                                 }
2469                         } else {
2470                                 /*
2471                                  * Standard behavior case
2472                                  */
2473                                 if (start == pip && preferred) {
2474                                         /* look for nonpreferred paths */
2475                                         preferred = 0;
2476                                 } else if (start == pip && !preferred) {
2477                                         /*
2478                                          * Exit condition
2479                                          */
2480                                         cont = 0;
2481                                 }
2482                         }
2483                 } while (cont);
2484                 break;
2485         }
2486 
2487         MDI_CLIENT_UNLOCK(ct);
2488         if (retry == 1) {
2489                 return (MDI_BUSY);
2490         } else {
2491                 return (MDI_NOPATH);
2492         }
2493 }
2494 
2495 /*
2496  * For a client, return the next available path to any phci
2497  *
2498  * Note:
2499  *              Caller should hold the branch's devinfo node to get a consistent
2500  *              snap shot of the mdi_pathinfo nodes.
2501  *
2502  *              Please note that even the list is stable the mdi_pathinfo
2503  *              node state and properties are volatile.  The caller should lock
2504  *              and unlock the nodes by calling mdi_pi_lock() and
2505  *              mdi_pi_unlock() functions to get a stable properties.
2506  *
2507  *              If there is a need to use the nodes beyond the hold of the
2508  *              devinfo node period (For ex. I/O), then mdi_pathinfo node
2509  *              need to be held against unexpected removal by calling
2510  *              mdi_hold_path() and should be released by calling
2511  *              mdi_rele_path() on completion.
2512  */
2513 mdi_pathinfo_t *
2514 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2515 {
2516         mdi_client_t *ct;
2517 
2518         if (!MDI_CLIENT(ct_dip))
2519                 return (NULL);
2520 
2521         /*
2522          * Walk through client link
2523          */
2524         ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2525         ASSERT(ct != NULL);
2526 
2527         if (pip == NULL)
2528                 return ((mdi_pathinfo_t *)ct->ct_path_head);
2529 
2530         return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2531 }
2532 
2533 /*
2534  * For a phci, return the next available path to any client
2535  * Note: ditto mdi_get_next_phci_path()
2536  */
2537 mdi_pathinfo_t *
2538 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2539 {
2540         mdi_phci_t *ph;
2541 
2542         if (!MDI_PHCI(ph_dip))
2543                 return (NULL);
2544 
2545         /*
2546          * Walk through pHCI link
2547          */
2548         ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2549         ASSERT(ph != NULL);
2550 
2551         if (pip == NULL)
2552                 return ((mdi_pathinfo_t *)ph->ph_path_head);
2553 
2554         return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2555 }
2556 
2557 /*
2558  * mdi_hold_path():
2559  *              Hold the mdi_pathinfo node against unwanted unexpected free.
2560  * Return Values:
2561  *              None
2562  */
2563 void
2564 mdi_hold_path(mdi_pathinfo_t *pip)
2565 {
2566         if (pip) {
2567                 MDI_PI_LOCK(pip);
2568                 MDI_PI_HOLD(pip);
2569                 MDI_PI_UNLOCK(pip);
2570         }
2571 }
2572 
2573 
2574 /*
2575  * mdi_rele_path():
2576  *              Release the mdi_pathinfo node which was selected
2577  *              through mdi_select_path() mechanism or manually held by
2578  *              calling mdi_hold_path().
2579  * Return Values:
2580  *              None
2581  */
2582 void
2583 mdi_rele_path(mdi_pathinfo_t *pip)
2584 {
2585         if (pip) {
2586                 MDI_PI_LOCK(pip);
2587                 MDI_PI_RELE(pip);
2588                 if (MDI_PI(pip)->pi_ref_cnt == 0) {
2589                         cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2590                 }
2591                 MDI_PI_UNLOCK(pip);
2592         }
2593 }
2594 
2595 /*
2596  * mdi_pi_lock():
2597  *              Lock the mdi_pathinfo node.
2598  * Note:
2599  *              The caller should release the lock by calling mdi_pi_unlock()
2600  */
2601 void
2602 mdi_pi_lock(mdi_pathinfo_t *pip)
2603 {
2604         ASSERT(pip != NULL);
2605         if (pip) {
2606                 MDI_PI_LOCK(pip);
2607         }
2608 }
2609 
2610 
2611 /*
2612  * mdi_pi_unlock():
2613  *              Unlock the mdi_pathinfo node.
2614  * Note:
2615  *              The mdi_pathinfo node should have been locked with mdi_pi_lock()
2616  */
2617 void
2618 mdi_pi_unlock(mdi_pathinfo_t *pip)
2619 {
2620         ASSERT(pip != NULL);
2621         if (pip) {
2622                 MDI_PI_UNLOCK(pip);
2623         }
2624 }
2625 
2626 /*
2627  * mdi_pi_find():
2628  *              Search the list of mdi_pathinfo nodes attached to the
2629  *              pHCI/Client device node whose path address matches "paddr".
2630  *              Returns a pointer to the mdi_pathinfo node if a matching node is
2631  *              found.
2632  * Return Values:
2633  *              mdi_pathinfo node handle
2634  *              NULL
2635  * Notes:
2636  *              Caller need not hold any locks to call this function.
2637  */
2638 mdi_pathinfo_t *
2639 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2640 {
2641         mdi_phci_t              *ph;
2642         mdi_vhci_t              *vh;
2643         mdi_client_t            *ct;
2644         mdi_pathinfo_t          *pip = NULL;
2645 
2646         MDI_DEBUG(2, (MDI_NOTE, pdip,
2647             "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2648         if ((pdip == NULL) || (paddr == NULL)) {
2649                 return (NULL);
2650         }
2651         ph = i_devi_get_phci(pdip);
2652         if (ph == NULL) {
2653                 /*
2654                  * Invalid pHCI device, Nothing more to do.
2655                  */
2656                 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2657                 return (NULL);
2658         }
2659 
2660         vh = ph->ph_vhci;
2661         if (vh == NULL) {
2662                 /*
2663                  * Invalid vHCI device, Nothing more to do.
2664                  */
2665                 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2666                 return (NULL);
2667         }
2668 
2669         /*
2670          * Look for pathinfo node identified by paddr.
2671          */
2672         if (caddr == NULL) {
2673                 /*
2674                  * Find a mdi_pathinfo node under pHCI list for a matching
2675                  * unit address.
2676                  */
2677                 MDI_PHCI_LOCK(ph);
2678                 if (MDI_PHCI_IS_OFFLINE(ph)) {
2679                         MDI_DEBUG(2, (MDI_WARN, pdip,
2680                             "offline phci %p", (void *)ph));
2681                         MDI_PHCI_UNLOCK(ph);
2682                         return (NULL);
2683                 }
2684                 pip = (mdi_pathinfo_t *)ph->ph_path_head;
2685 
2686                 while (pip != NULL) {
2687                         if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2688                                 break;
2689                         }
2690                         pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2691                 }
2692                 MDI_PHCI_UNLOCK(ph);
2693                 MDI_DEBUG(2, (MDI_NOTE, pdip,
2694                     "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2695                 return (pip);
2696         }
2697 
2698         /*
2699          * XXX - Is the rest of the code in this function really necessary?
2700          * The consumers of mdi_pi_find() can search for the desired pathinfo
2701          * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2702          * whether the search is based on the pathinfo nodes attached to
2703          * the pHCI or the client node, the result will be the same.
2704          */
2705 
2706         /*
2707          * Find the client device corresponding to 'caddr'
2708          */
2709         MDI_VHCI_CLIENT_LOCK(vh);
2710 
2711         /*
2712          * XXX - Passing NULL to the following function works as long as the
2713          * the client addresses (caddr) are unique per vhci basis.
2714          */
2715         ct = i_mdi_client_find(vh, NULL, caddr);
2716         if (ct == NULL) {
2717                 /*
2718                  * Client not found, Obviously mdi_pathinfo node has not been
2719                  * created yet.
2720                  */
2721                 MDI_VHCI_CLIENT_UNLOCK(vh);
2722                 MDI_DEBUG(2, (MDI_NOTE, pdip,
2723                     "client not found for caddr @%s", caddr ? caddr : ""));
2724                 return (NULL);
2725         }
2726 
2727         /*
2728          * Hold the client lock and look for a mdi_pathinfo node with matching
2729          * pHCI and paddr
2730          */
2731         MDI_CLIENT_LOCK(ct);
2732 
2733         /*
2734          * Release the global mutex as it is no more needed. Note: We always
2735          * respect the locking order while acquiring.
2736          */
2737         MDI_VHCI_CLIENT_UNLOCK(vh);
2738 
2739         pip = (mdi_pathinfo_t *)ct->ct_path_head;
2740         while (pip != NULL) {
2741                 /*
2742                  * Compare the unit address
2743                  */
2744                 if ((MDI_PI(pip)->pi_phci == ph) &&
2745                     strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2746                         break;
2747                 }
2748                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2749         }
2750         MDI_CLIENT_UNLOCK(ct);
2751         MDI_DEBUG(2, (MDI_NOTE, pdip,
2752             "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2753         return (pip);
2754 }
2755 
2756 /*
2757  * mdi_pi_alloc():
2758  *              Allocate and initialize a new instance of a mdi_pathinfo node.
2759  *              The mdi_pathinfo node returned by this function identifies a
2760  *              unique device path is capable of having properties attached
2761  *              and passed to mdi_pi_online() to fully attach and online the
2762  *              path and client device node.
2763  *              The mdi_pathinfo node returned by this function must be
2764  *              destroyed using mdi_pi_free() if the path is no longer
2765  *              operational or if the caller fails to attach a client device
2766  *              node when calling mdi_pi_online(). The framework will not free
2767  *              the resources allocated.
2768  *              This function can be called from both interrupt and kernel
2769  *              contexts.  DDI_NOSLEEP flag should be used while calling
2770  *              from interrupt contexts.
2771  * Return Values:
2772  *              MDI_SUCCESS
2773  *              MDI_FAILURE
2774  *              MDI_NOMEM
2775  */
2776 /*ARGSUSED*/
2777 int
2778 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2779     char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2780 {
2781         mdi_vhci_t      *vh;
2782         mdi_phci_t      *ph;
2783         mdi_client_t    *ct;
2784         mdi_pathinfo_t  *pip = NULL;
2785         dev_info_t      *cdip;
2786         int             rv = MDI_NOMEM;
2787         int             path_allocated = 0;
2788 
2789         MDI_DEBUG(2, (MDI_NOTE, pdip,
2790             "cname %s: caddr@%s paddr@%s",
2791             cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2792 
2793         if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2794             ret_pip == NULL) {
2795                 /* Nothing more to do */
2796                 return (MDI_FAILURE);
2797         }
2798 
2799         *ret_pip = NULL;
2800 
2801         /* No allocations on detaching pHCI */
2802         if (DEVI_IS_DETACHING(pdip)) {
2803                 /* Invalid pHCI device, return failure */
2804                 MDI_DEBUG(1, (MDI_WARN, pdip,
2805                     "!detaching pHCI=%p", (void *)pdip));
2806                 return (MDI_FAILURE);
2807         }
2808 
2809         ph = i_devi_get_phci(pdip);
2810         ASSERT(ph != NULL);
2811         if (ph == NULL) {
2812                 /* Invalid pHCI device, return failure */
2813                 MDI_DEBUG(1, (MDI_WARN, pdip,
2814                     "!invalid pHCI=%p", (void *)pdip));
2815                 return (MDI_FAILURE);
2816         }
2817 
2818         MDI_PHCI_LOCK(ph);
2819         vh = ph->ph_vhci;
2820         if (vh == NULL) {
2821                 /* Invalid vHCI device, return failure */
2822                 MDI_DEBUG(1, (MDI_WARN, pdip,
2823                     "!invalid vHCI=%p", (void *)pdip));
2824                 MDI_PHCI_UNLOCK(ph);
2825                 return (MDI_FAILURE);
2826         }
2827 
2828         if (MDI_PHCI_IS_READY(ph) == 0) {
2829                 /*
2830                  * Do not allow new node creation when pHCI is in
2831                  * offline/suspended states
2832                  */
2833                 MDI_DEBUG(1, (MDI_WARN, pdip,
2834                     "pHCI=%p is not ready", (void *)ph));
2835                 MDI_PHCI_UNLOCK(ph);
2836                 return (MDI_BUSY);
2837         }
2838         MDI_PHCI_UNSTABLE(ph);
2839         MDI_PHCI_UNLOCK(ph);
2840 
2841         /* look for a matching client, create one if not found */
2842         MDI_VHCI_CLIENT_LOCK(vh);
2843         ct = i_mdi_client_find(vh, cname, caddr);
2844         if (ct == NULL) {
2845                 ct = i_mdi_client_alloc(vh, cname, caddr);
2846                 ASSERT(ct != NULL);
2847         }
2848 
2849         if (ct->ct_dip == NULL) {
2850                 /*
2851                  * Allocate a devinfo node
2852                  */
2853                 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2854                     compatible, ncompatible);
2855                 if (ct->ct_dip == NULL) {
2856                         (void) i_mdi_client_free(vh, ct);
2857                         goto fail;
2858                 }
2859         }
2860         cdip = ct->ct_dip;
2861 
2862         DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2863         DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2864 
2865         MDI_CLIENT_LOCK(ct);
2866         pip = (mdi_pathinfo_t *)ct->ct_path_head;
2867         while (pip != NULL) {
2868                 /*
2869                  * Compare the unit address
2870                  */
2871                 if ((MDI_PI(pip)->pi_phci == ph) &&
2872                     strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2873                         break;
2874                 }
2875                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2876         }
2877         MDI_CLIENT_UNLOCK(ct);
2878 
2879         if (pip == NULL) {
2880                 /*
2881                  * This is a new path for this client device.  Allocate and
2882                  * initialize a new pathinfo node
2883                  */
2884                 pip = i_mdi_pi_alloc(ph, paddr, ct);
2885                 ASSERT(pip != NULL);
2886                 path_allocated = 1;
2887         }
2888         rv = MDI_SUCCESS;
2889 
2890 fail:
2891         /*
2892          * Release the global mutex.
2893          */
2894         MDI_VHCI_CLIENT_UNLOCK(vh);
2895 
2896         /*
2897          * Mark the pHCI as stable
2898          */
2899         MDI_PHCI_LOCK(ph);
2900         MDI_PHCI_STABLE(ph);
2901         MDI_PHCI_UNLOCK(ph);
2902         *ret_pip = pip;
2903 
2904         MDI_DEBUG(2, (MDI_NOTE, pdip,
2905             "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2906 
2907         if (path_allocated)
2908                 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2909 
2910         return (rv);
2911 }
2912 
2913 /*ARGSUSED*/
2914 int
2915 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2916     int flags, mdi_pathinfo_t **ret_pip)
2917 {
2918         return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2919             flags, ret_pip));
2920 }
2921 
2922 /*
2923  * i_mdi_pi_alloc():
2924  *              Allocate a mdi_pathinfo node and add to the pHCI path list
2925  * Return Values:
2926  *              mdi_pathinfo
2927  */
2928 /*ARGSUSED*/
2929 static mdi_pathinfo_t *
2930 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2931 {
2932         mdi_pathinfo_t  *pip;
2933         int             ct_circular;
2934         int             ph_circular;
2935         static char     path[MAXPATHLEN];       /* mdi_pathmap_mutex protects */
2936         char            *path_persistent;
2937         int             path_instance;
2938         mod_hash_val_t  hv;
2939 
2940         ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2941 
2942         pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2943         mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2944         MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2945             MDI_PATHINFO_STATE_TRANSIENT;
2946 
2947         if (MDI_PHCI_IS_USER_DISABLED(ph))
2948                 MDI_PI_SET_USER_DISABLE(pip);
2949 
2950         if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2951                 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2952 
2953         if (MDI_PHCI_IS_DRV_DISABLED(ph))
2954                 MDI_PI_SET_DRV_DISABLE(pip);
2955 
2956         MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2957         cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2958         MDI_PI(pip)->pi_client = ct;
2959         MDI_PI(pip)->pi_phci = ph;
2960         MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2961         (void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2962 
2963         /*
2964          * We form the "path" to the pathinfo node, and see if we have
2965          * already allocated a 'path_instance' for that "path".  If so,
2966          * we use the already allocated 'path_instance'.  If not, we
2967          * allocate a new 'path_instance' and associate it with a copy of
2968          * the "path" string (which is never freed). The association
2969          * between a 'path_instance' this "path" string persists until
2970          * reboot.
2971          */
2972         mutex_enter(&mdi_pathmap_mutex);
2973         (void) ddi_pathname(ph->ph_dip, path);
2974         (void) sprintf(path + strlen(path), "/%s@%s",
2975             mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2976         if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2977                 path_instance = (uint_t)(intptr_t)hv;
2978         } else {
2979                 /* allocate a new 'path_instance' and persistent "path" */
2980                 path_instance = mdi_pathmap_instance++;
2981                 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2982                 (void) mod_hash_insert(mdi_pathmap_bypath,
2983                     (mod_hash_key_t)path_persistent,
2984                     (mod_hash_val_t)(intptr_t)path_instance);
2985                 (void) mod_hash_insert(mdi_pathmap_byinstance,
2986                     (mod_hash_key_t)(intptr_t)path_instance,
2987                     (mod_hash_val_t)path_persistent);
2988 
2989                 /* create shortpath name */
2990                 (void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2991                     ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2992                     mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2993                 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2994                 (void) mod_hash_insert(mdi_pathmap_sbyinstance,
2995                     (mod_hash_key_t)(intptr_t)path_instance,
2996                     (mod_hash_val_t)path_persistent);
2997         }
2998         mutex_exit(&mdi_pathmap_mutex);
2999         MDI_PI(pip)->pi_path_instance = path_instance;
3000 
3001         (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
3002         ASSERT(MDI_PI(pip)->pi_prop != NULL);
3003         MDI_PI(pip)->pi_pprivate = NULL;
3004         MDI_PI(pip)->pi_cprivate = NULL;
3005         MDI_PI(pip)->pi_vprivate = NULL;
3006         MDI_PI(pip)->pi_client_link = NULL;
3007         MDI_PI(pip)->pi_phci_link = NULL;
3008         MDI_PI(pip)->pi_ref_cnt = 0;
3009         MDI_PI(pip)->pi_kstats = NULL;
3010         MDI_PI(pip)->pi_preferred = 1;
3011         cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3012 
3013         /*
3014          * Lock both dev_info nodes against changes in parallel.
3015          *
3016          * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3017          * This atypical operation is done to synchronize pathinfo nodes
3018          * during devinfo snapshot (see di_register_pip) by 'pretending' that
3019          * the pathinfo nodes are children of the Client.
3020          */
3021         ndi_devi_enter(ct->ct_dip, &ct_circular);
3022         ndi_devi_enter(ph->ph_dip, &ph_circular);
3023 
3024         i_mdi_phci_add_path(ph, pip);
3025         i_mdi_client_add_path(ct, pip);
3026 
3027         ndi_devi_exit(ph->ph_dip, ph_circular);
3028         ndi_devi_exit(ct->ct_dip, ct_circular);
3029 
3030         return (pip);
3031 }
3032 
3033 /*
3034  * mdi_pi_pathname_by_instance():
3035  *      Lookup of "path" by 'path_instance'. Return "path".
3036  *      NOTE: returned "path" remains valid forever (until reboot).
3037  */
3038 char *
3039 mdi_pi_pathname_by_instance(int path_instance)
3040 {
3041         char            *path;
3042         mod_hash_val_t  hv;
3043 
3044         /* mdi_pathmap lookup of "path" by 'path_instance' */
3045         mutex_enter(&mdi_pathmap_mutex);
3046         if (mod_hash_find(mdi_pathmap_byinstance,
3047             (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3048                 path = (char *)hv;
3049         else
3050                 path = NULL;
3051         mutex_exit(&mdi_pathmap_mutex);
3052         return (path);
3053 }
3054 
3055 /*
3056  * mdi_pi_spathname_by_instance():
3057  *      Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3058  *      NOTE: returned "shortpath" remains valid forever (until reboot).
3059  */
3060 char *
3061 mdi_pi_spathname_by_instance(int path_instance)
3062 {
3063         char            *path;
3064         mod_hash_val_t  hv;
3065 
3066         /* mdi_pathmap lookup of "path" by 'path_instance' */
3067         mutex_enter(&mdi_pathmap_mutex);
3068         if (mod_hash_find(mdi_pathmap_sbyinstance,
3069             (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3070                 path = (char *)hv;
3071         else
3072                 path = NULL;
3073         mutex_exit(&mdi_pathmap_mutex);
3074         return (path);
3075 }
3076 
3077 
3078 /*
3079  * i_mdi_phci_add_path():
3080  *              Add a mdi_pathinfo node to pHCI list.
3081  * Notes:
3082  *              Caller should per-pHCI mutex
3083  */
3084 static void
3085 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3086 {
3087         ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3088 
3089         MDI_PHCI_LOCK(ph);
3090         if (ph->ph_path_head == NULL) {
3091                 ph->ph_path_head = pip;
3092         } else {
3093                 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3094         }
3095         ph->ph_path_tail = pip;
3096         ph->ph_path_count++;
3097         MDI_PHCI_UNLOCK(ph);
3098 }
3099 
3100 /*
3101  * i_mdi_client_add_path():
3102  *              Add mdi_pathinfo node to client list
3103  */
3104 static void
3105 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3106 {
3107         ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3108 
3109         MDI_CLIENT_LOCK(ct);
3110         if (ct->ct_path_head == NULL) {
3111                 ct->ct_path_head = pip;
3112         } else {
3113                 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3114         }
3115         ct->ct_path_tail = pip;
3116         ct->ct_path_count++;
3117         MDI_CLIENT_UNLOCK(ct);
3118 }
3119 
3120 /*
3121  * mdi_pi_free():
3122  *              Free the mdi_pathinfo node and also client device node if this
3123  *              is the last path to the device
3124  * Return Values:
3125  *              MDI_SUCCESS
3126  *              MDI_FAILURE
3127  *              MDI_BUSY
3128  */
3129 /*ARGSUSED*/
3130 int
3131 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3132 {
3133         int             rv;
3134         mdi_vhci_t      *vh;
3135         mdi_phci_t      *ph;
3136         mdi_client_t    *ct;
3137         int             (*f)();
3138         int             client_held = 0;
3139 
3140         MDI_PI_LOCK(pip);
3141         ph = MDI_PI(pip)->pi_phci;
3142         ASSERT(ph != NULL);
3143         if (ph == NULL) {
3144                 /*
3145                  * Invalid pHCI device, return failure
3146                  */
3147                 MDI_DEBUG(1, (MDI_WARN, NULL,
3148                     "!invalid pHCI: pip %s %p",
3149                     mdi_pi_spathname(pip), (void *)pip));
3150                 MDI_PI_UNLOCK(pip);
3151                 return (MDI_FAILURE);
3152         }
3153 
3154         vh = ph->ph_vhci;
3155         ASSERT(vh != NULL);
3156         if (vh == NULL) {
3157                 /* Invalid pHCI device, return failure */
3158                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3159                     "!invalid vHCI: pip %s %p",
3160                     mdi_pi_spathname(pip), (void *)pip));
3161                 MDI_PI_UNLOCK(pip);
3162                 return (MDI_FAILURE);
3163         }
3164 
3165         ct = MDI_PI(pip)->pi_client;
3166         ASSERT(ct != NULL);
3167         if (ct == NULL) {
3168                 /*
3169                  * Invalid Client device, return failure
3170                  */
3171                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3172                     "!invalid client: pip %s %p",
3173                     mdi_pi_spathname(pip), (void *)pip));
3174                 MDI_PI_UNLOCK(pip);
3175                 return (MDI_FAILURE);
3176         }
3177 
3178         /*
3179          * Check to see for busy condition.  A mdi_pathinfo can only be freed
3180          * if the node state is either offline or init and the reference count
3181          * is zero.
3182          */
3183         if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3184             MDI_PI_IS_INITING(pip))) {
3185                 /*
3186                  * Node is busy
3187                  */
3188                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3189                     "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3190                 MDI_PI_UNLOCK(pip);
3191                 return (MDI_BUSY);
3192         }
3193 
3194         while (MDI_PI(pip)->pi_ref_cnt != 0) {
3195                 /*
3196                  * Give a chance for pending I/Os to complete.
3197                  */
3198                 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3199                     "!%d cmds still pending on path: %s %p",
3200                     MDI_PI(pip)->pi_ref_cnt,
3201                     mdi_pi_spathname(pip), (void *)pip));
3202                 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3203                     &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3204                     TR_CLOCK_TICK) == -1) {
3205                         /*
3206                          * The timeout time reached without ref_cnt being zero
3207                          * being signaled.
3208                          */
3209                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3210                             "!Timeout reached on path %s %p without the cond",
3211                             mdi_pi_spathname(pip), (void *)pip));
3212                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3213                             "!%d cmds still pending on path %s %p",
3214                             MDI_PI(pip)->pi_ref_cnt,
3215                             mdi_pi_spathname(pip), (void *)pip));
3216                         MDI_PI_UNLOCK(pip);
3217                         return (MDI_BUSY);
3218                 }
3219         }
3220         if (MDI_PI(pip)->pi_pm_held) {
3221                 client_held = 1;
3222         }
3223         MDI_PI_UNLOCK(pip);
3224 
3225         vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3226 
3227         MDI_CLIENT_LOCK(ct);
3228 
3229         /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3230         MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3231 
3232         /*
3233          * Wait till failover is complete before removing this node.
3234          */
3235         while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3236                 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3237 
3238         MDI_CLIENT_UNLOCK(ct);
3239         MDI_VHCI_CLIENT_LOCK(vh);
3240         MDI_CLIENT_LOCK(ct);
3241         MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3242 
3243         if (!MDI_PI_IS_INITING(pip)) {
3244                 f = vh->vh_ops->vo_pi_uninit;
3245                 if (f != NULL) {
3246                         rv = (*f)(vh->vh_dip, pip, 0);
3247                 }
3248         } else
3249                 rv = MDI_SUCCESS;
3250 
3251         /*
3252          * If vo_pi_uninit() completed successfully.
3253          */
3254         if (rv == MDI_SUCCESS) {
3255                 if (client_held) {
3256                         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3257                             "i_mdi_pm_rele_client\n"));
3258                         i_mdi_pm_rele_client(ct, 1);
3259                 }
3260                 i_mdi_pi_free(ph, pip, ct);
3261                 if (ct->ct_path_count == 0) {
3262                         /*
3263                          * Client lost its last path.
3264                          * Clean up the client device
3265                          */
3266                         MDI_CLIENT_UNLOCK(ct);
3267                         (void) i_mdi_client_free(ct->ct_vhci, ct);
3268                         MDI_VHCI_CLIENT_UNLOCK(vh);
3269                         return (rv);
3270                 }
3271         }
3272         MDI_CLIENT_UNLOCK(ct);
3273         MDI_VHCI_CLIENT_UNLOCK(vh);
3274 
3275         if (rv == MDI_FAILURE)
3276                 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3277 
3278         return (rv);
3279 }
3280 
3281 /*
3282  * i_mdi_pi_free():
3283  *              Free the mdi_pathinfo node
3284  */
3285 static void
3286 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3287 {
3288         int     ct_circular;
3289         int     ph_circular;
3290 
3291         ASSERT(MDI_CLIENT_LOCKED(ct));
3292 
3293         /*
3294          * remove any per-path kstats
3295          */
3296         i_mdi_pi_kstat_destroy(pip);
3297 
3298         /* See comments in i_mdi_pi_alloc() */
3299         ndi_devi_enter(ct->ct_dip, &ct_circular);
3300         ndi_devi_enter(ph->ph_dip, &ph_circular);
3301 
3302         i_mdi_client_remove_path(ct, pip);
3303         i_mdi_phci_remove_path(ph, pip);
3304 
3305         ndi_devi_exit(ph->ph_dip, ph_circular);
3306         ndi_devi_exit(ct->ct_dip, ct_circular);
3307 
3308         mutex_destroy(&MDI_PI(pip)->pi_mutex);
3309         cv_destroy(&MDI_PI(pip)->pi_state_cv);
3310         cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3311         if (MDI_PI(pip)->pi_addr) {
3312                 kmem_free(MDI_PI(pip)->pi_addr,
3313                     strlen(MDI_PI(pip)->pi_addr) + 1);
3314                 MDI_PI(pip)->pi_addr = NULL;
3315         }
3316 
3317         if (MDI_PI(pip)->pi_prop) {
3318                 (void) nvlist_free(MDI_PI(pip)->pi_prop);
3319                 MDI_PI(pip)->pi_prop = NULL;
3320         }
3321         kmem_free(pip, sizeof (struct mdi_pathinfo));
3322 }
3323 
3324 
3325 /*
3326  * i_mdi_phci_remove_path():
3327  *              Remove a mdi_pathinfo node from pHCI list.
3328  * Notes:
3329  *              Caller should hold per-pHCI mutex
3330  */
3331 static void
3332 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3333 {
3334         mdi_pathinfo_t  *prev = NULL;
3335         mdi_pathinfo_t  *path = NULL;
3336 
3337         ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3338 
3339         MDI_PHCI_LOCK(ph);
3340         path = ph->ph_path_head;
3341         while (path != NULL) {
3342                 if (path == pip) {
3343                         break;
3344                 }
3345                 prev = path;
3346                 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3347         }
3348 
3349         if (path) {
3350                 ph->ph_path_count--;
3351                 if (prev) {
3352                         MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3353                 } else {
3354                         ph->ph_path_head =
3355                             (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3356                 }
3357                 if (ph->ph_path_tail == path) {
3358                         ph->ph_path_tail = prev;
3359                 }
3360         }
3361 
3362         /*
3363          * Clear the pHCI link
3364          */
3365         MDI_PI(pip)->pi_phci_link = NULL;
3366         MDI_PI(pip)->pi_phci = NULL;
3367         MDI_PHCI_UNLOCK(ph);
3368 }
3369 
3370 /*
3371  * i_mdi_client_remove_path():
3372  *              Remove a mdi_pathinfo node from client path list.
3373  */
3374 static void
3375 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3376 {
3377         mdi_pathinfo_t  *prev = NULL;
3378         mdi_pathinfo_t  *path;
3379 
3380         ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3381 
3382         ASSERT(MDI_CLIENT_LOCKED(ct));
3383         path = ct->ct_path_head;
3384         while (path != NULL) {
3385                 if (path == pip) {
3386                         break;
3387                 }
3388                 prev = path;
3389                 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3390         }
3391 
3392         if (path) {
3393                 ct->ct_path_count--;
3394                 if (prev) {
3395                         MDI_PI(prev)->pi_client_link =
3396                             MDI_PI(path)->pi_client_link;
3397                 } else {
3398                         ct->ct_path_head =
3399                             (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3400                 }
3401                 if (ct->ct_path_tail == path) {
3402                         ct->ct_path_tail = prev;
3403                 }
3404                 if (ct->ct_path_last == path) {
3405                         ct->ct_path_last = ct->ct_path_head;
3406                 }
3407         }
3408         MDI_PI(pip)->pi_client_link = NULL;
3409         MDI_PI(pip)->pi_client = NULL;
3410 }
3411 
3412 /*
3413  * i_mdi_pi_state_change():
3414  *              online a mdi_pathinfo node
3415  *
3416  * Return Values:
3417  *              MDI_SUCCESS
3418  *              MDI_FAILURE
3419  */
3420 /*ARGSUSED*/
3421 static int
3422 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3423 {
3424         int             rv = MDI_SUCCESS;
3425         mdi_vhci_t      *vh;
3426         mdi_phci_t      *ph;
3427         mdi_client_t    *ct;
3428         int             (*f)();
3429         dev_info_t      *cdip;
3430 
3431         MDI_PI_LOCK(pip);
3432 
3433         ph = MDI_PI(pip)->pi_phci;
3434         ASSERT(ph);
3435         if (ph == NULL) {
3436                 /*
3437                  * Invalid pHCI device, fail the request
3438                  */
3439                 MDI_PI_UNLOCK(pip);
3440                 MDI_DEBUG(1, (MDI_WARN, NULL,
3441                     "!invalid phci: pip %s %p",
3442                     mdi_pi_spathname(pip), (void *)pip));
3443                 return (MDI_FAILURE);
3444         }
3445 
3446         vh = ph->ph_vhci;
3447         ASSERT(vh);
3448         if (vh == NULL) {
3449                 /*
3450                  * Invalid vHCI device, fail the request
3451                  */
3452                 MDI_PI_UNLOCK(pip);
3453                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3454                     "!invalid vhci: pip %s %p",
3455                     mdi_pi_spathname(pip), (void *)pip));
3456                 return (MDI_FAILURE);
3457         }
3458 
3459         ct = MDI_PI(pip)->pi_client;
3460         ASSERT(ct != NULL);
3461         if (ct == NULL) {
3462                 /*
3463                  * Invalid client device, fail the request
3464                  */
3465                 MDI_PI_UNLOCK(pip);
3466                 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3467                     "!invalid client: pip %s %p",
3468                     mdi_pi_spathname(pip), (void *)pip));
3469                 return (MDI_FAILURE);
3470         }
3471 
3472         /*
3473          * If this path has not been initialized yet, Callback vHCI driver's
3474          * pathinfo node initialize entry point
3475          */
3476 
3477         if (MDI_PI_IS_INITING(pip)) {
3478                 MDI_PI_UNLOCK(pip);
3479                 f = vh->vh_ops->vo_pi_init;
3480                 if (f != NULL) {
3481                         rv = (*f)(vh->vh_dip, pip, 0);
3482                         if (rv != MDI_SUCCESS) {
3483                                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3484                                     "!vo_pi_init failed: vHCI %p, pip %s %p",
3485                                     (void *)vh, mdi_pi_spathname(pip),
3486                                     (void *)pip));
3487                                 return (MDI_FAILURE);
3488                         }
3489                 }
3490                 MDI_PI_LOCK(pip);
3491                 MDI_PI_CLEAR_TRANSIENT(pip);
3492         }
3493 
3494         /*
3495          * Do not allow state transition when pHCI is in offline/suspended
3496          * states
3497          */
3498         i_mdi_phci_lock(ph, pip);
3499         if (MDI_PHCI_IS_READY(ph) == 0) {
3500                 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3501                     "!pHCI not ready, pHCI=%p", (void *)ph));
3502                 MDI_PI_UNLOCK(pip);
3503                 i_mdi_phci_unlock(ph);
3504                 return (MDI_BUSY);
3505         }
3506         MDI_PHCI_UNSTABLE(ph);
3507         i_mdi_phci_unlock(ph);
3508 
3509         /*
3510          * Check if mdi_pathinfo state is in transient state.
3511          * If yes, offlining is in progress and wait till transient state is
3512          * cleared.
3513          */
3514         if (MDI_PI_IS_TRANSIENT(pip)) {
3515                 while (MDI_PI_IS_TRANSIENT(pip)) {
3516                         cv_wait(&MDI_PI(pip)->pi_state_cv,
3517                             &MDI_PI(pip)->pi_mutex);
3518                 }
3519         }
3520 
3521         /*
3522          * Grab the client lock in reverse order sequence and release the
3523          * mdi_pathinfo mutex.
3524          */
3525         i_mdi_client_lock(ct, pip);
3526         MDI_PI_UNLOCK(pip);
3527 
3528         /*
3529          * Wait till failover state is cleared
3530          */
3531         while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3532                 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3533 
3534         /*
3535          * Mark the mdi_pathinfo node state as transient
3536          */
3537         MDI_PI_LOCK(pip);
3538         switch (state) {
3539         case MDI_PATHINFO_STATE_ONLINE:
3540                 MDI_PI_SET_ONLINING(pip);
3541                 break;
3542 
3543         case MDI_PATHINFO_STATE_STANDBY:
3544                 MDI_PI_SET_STANDBYING(pip);
3545                 break;
3546 
3547         case MDI_PATHINFO_STATE_FAULT:
3548                 /*
3549                  * Mark the pathinfo state as FAULTED
3550                  */
3551                 MDI_PI_SET_FAULTING(pip);
3552                 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3553                 break;
3554 
3555         case MDI_PATHINFO_STATE_OFFLINE:
3556                 /*
3557                  * ndi_devi_offline() cannot hold pip or ct locks.
3558                  */
3559                 MDI_PI_UNLOCK(pip);
3560 
3561                 /*
3562                  * If this is a user initiated path online->offline operation
3563                  * who's success would transition a client from DEGRADED to
3564                  * FAILED then only proceed if we can offline the client first.
3565                  */
3566                 cdip = ct->ct_dip;
3567                 if ((flag & NDI_USER_REQ) &&
3568                     MDI_PI_IS_ONLINE(pip) &&
3569                     (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3570                         i_mdi_client_unlock(ct);
3571                         rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3572                         if (rv != NDI_SUCCESS) {
3573                                 /*
3574                                  * Convert to MDI error code
3575                                  */
3576                                 switch (rv) {
3577                                 case NDI_BUSY:
3578                                         rv = MDI_BUSY;
3579                                         break;
3580                                 default:
3581                                         rv = MDI_FAILURE;
3582                                         break;
3583                                 }
3584                                 goto state_change_exit;
3585                         } else {
3586                                 i_mdi_client_lock(ct, NULL);
3587                         }
3588                 }
3589                 /*
3590                  * Mark the mdi_pathinfo node state as transient
3591                  */
3592                 MDI_PI_LOCK(pip);
3593                 MDI_PI_SET_OFFLINING(pip);
3594                 break;
3595         }
3596         MDI_PI_UNLOCK(pip);
3597         MDI_CLIENT_UNSTABLE(ct);
3598         i_mdi_client_unlock(ct);
3599 
3600         f = vh->vh_ops->vo_pi_state_change;
3601         if (f != NULL)
3602                 rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3603 
3604         MDI_CLIENT_LOCK(ct);
3605         MDI_PI_LOCK(pip);
3606         if (rv == MDI_NOT_SUPPORTED) {
3607                 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3608         }
3609         if (rv != MDI_SUCCESS) {
3610                 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3611                     "vo_pi_state_change failed: rv %x", rv));
3612         }
3613         if (MDI_PI_IS_TRANSIENT(pip)) {
3614                 if (rv == MDI_SUCCESS) {
3615                         MDI_PI_CLEAR_TRANSIENT(pip);
3616                 } else {
3617                         MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3618                 }
3619         }
3620 
3621         /*
3622          * Wake anyone waiting for this mdi_pathinfo node
3623          */
3624         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3625         MDI_PI_UNLOCK(pip);
3626 
3627         /*
3628          * Mark the client device as stable
3629          */
3630         MDI_CLIENT_STABLE(ct);
3631         if (rv == MDI_SUCCESS) {
3632                 if (ct->ct_unstable == 0) {
3633                         cdip = ct->ct_dip;
3634 
3635                         /*
3636                          * Onlining the mdi_pathinfo node will impact the
3637                          * client state Update the client and dev_info node
3638                          * state accordingly
3639                          */
3640                         rv = NDI_SUCCESS;
3641                         i_mdi_client_update_state(ct);
3642                         switch (MDI_CLIENT_STATE(ct)) {
3643                         case MDI_CLIENT_STATE_OPTIMAL:
3644                         case MDI_CLIENT_STATE_DEGRADED:
3645                                 if (cdip && !i_ddi_devi_attached(cdip) &&
3646                                     ((state == MDI_PATHINFO_STATE_ONLINE) ||
3647                                     (state == MDI_PATHINFO_STATE_STANDBY))) {
3648 
3649                                         /*
3650                                          * Must do ndi_devi_online() through
3651                                          * hotplug thread for deferred
3652                                          * attach mechanism to work
3653                                          */
3654                                         MDI_CLIENT_UNLOCK(ct);
3655                                         rv = ndi_devi_online(cdip, 0);
3656                                         MDI_CLIENT_LOCK(ct);
3657                                         if ((rv != NDI_SUCCESS) &&
3658                                             (MDI_CLIENT_STATE(ct) ==
3659                                             MDI_CLIENT_STATE_DEGRADED)) {
3660                                                 MDI_DEBUG(1, (MDI_WARN, cdip,
3661                                                     "!ndi_devi_online failed "
3662                                                     "error %x", rv));
3663                                         }
3664                                         rv = NDI_SUCCESS;
3665                                 }
3666                                 break;
3667 
3668                         case MDI_CLIENT_STATE_FAILED:
3669                                 /*
3670                                  * This is the last path case for
3671                                  * non-user initiated events.
3672                                  */
3673                                 if (((flag & NDI_USER_REQ) == 0) &&
3674                                     cdip && (i_ddi_node_state(cdip) >=
3675                                     DS_INITIALIZED)) {
3676                                         MDI_CLIENT_UNLOCK(ct);
3677                                         rv = ndi_devi_offline(cdip,
3678                                             NDI_DEVFS_CLEAN);
3679                                         MDI_CLIENT_LOCK(ct);
3680 
3681                                         if (rv != NDI_SUCCESS) {
3682                                                 /*
3683                                                  * ndi_devi_offline failed.
3684                                                  * Reset client flags to
3685                                                  * online as the path could not
3686                                                  * be offlined.
3687                                                  */
3688                                                 MDI_DEBUG(1, (MDI_WARN, cdip,
3689                                                     "!ndi_devi_offline failed: "
3690                                                     "error %x", rv));
3691                                                 MDI_CLIENT_SET_ONLINE(ct);
3692                                         }
3693                                 }
3694                                 break;
3695                         }
3696                         /*
3697                          * Convert to MDI error code
3698                          */
3699                         switch (rv) {
3700                         case NDI_SUCCESS:
3701                                 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3702                                 i_mdi_report_path_state(ct, pip);
3703                                 rv = MDI_SUCCESS;
3704                                 break;
3705                         case NDI_BUSY:
3706                                 rv = MDI_BUSY;
3707                                 break;
3708                         default:
3709                                 rv = MDI_FAILURE;
3710                                 break;
3711                         }
3712                 }
3713         }
3714         MDI_CLIENT_UNLOCK(ct);
3715 
3716 state_change_exit:
3717         /*
3718          * Mark the pHCI as stable again.
3719          */
3720         MDI_PHCI_LOCK(ph);
3721         MDI_PHCI_STABLE(ph);
3722         MDI_PHCI_UNLOCK(ph);
3723         return (rv);
3724 }
3725 
3726 /*
3727  * mdi_pi_online():
3728  *              Place the path_info node in the online state.  The path is
3729  *              now available to be selected by mdi_select_path() for
3730  *              transporting I/O requests to client devices.
3731  * Return Values:
3732  *              MDI_SUCCESS
3733  *              MDI_FAILURE
3734  */
3735 int
3736 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3737 {
3738         mdi_client_t    *ct = MDI_PI(pip)->pi_client;
3739         int             client_held = 0;
3740         int             rv;
3741 
3742         ASSERT(ct != NULL);
3743         rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3744         if (rv != MDI_SUCCESS)
3745                 return (rv);
3746 
3747         MDI_PI_LOCK(pip);
3748         if (MDI_PI(pip)->pi_pm_held == 0) {
3749                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3750                     "i_mdi_pm_hold_pip %p", (void *)pip));
3751                 i_mdi_pm_hold_pip(pip);
3752                 client_held = 1;
3753         }
3754         MDI_PI_UNLOCK(pip);
3755 
3756         if (client_held) {
3757                 MDI_CLIENT_LOCK(ct);
3758                 if (ct->ct_power_cnt == 0) {
3759                         rv = i_mdi_power_all_phci(ct);
3760                 }
3761 
3762                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3763                     "i_mdi_pm_hold_client %p", (void *)ct));
3764                 i_mdi_pm_hold_client(ct, 1);
3765                 MDI_CLIENT_UNLOCK(ct);
3766         }
3767 
3768         return (rv);
3769 }
3770 
3771 /*
3772  * mdi_pi_standby():
3773  *              Place the mdi_pathinfo node in standby state
3774  *
3775  * Return Values:
3776  *              MDI_SUCCESS
3777  *              MDI_FAILURE
3778  */
3779 int
3780 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3781 {
3782         return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3783 }
3784 
3785 /*
3786  * mdi_pi_fault():
3787  *              Place the mdi_pathinfo node in fault'ed state
3788  * Return Values:
3789  *              MDI_SUCCESS
3790  *              MDI_FAILURE
3791  */
3792 int
3793 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3794 {
3795         return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3796 }
3797 
3798 /*
3799  * mdi_pi_offline():
3800  *              Offline a mdi_pathinfo node.
3801  * Return Values:
3802  *              MDI_SUCCESS
3803  *              MDI_FAILURE
3804  */
3805 int
3806 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3807 {
3808         int     ret, client_held = 0;
3809         mdi_client_t    *ct;
3810 
3811         /*
3812          * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3813          * used it to mean "user initiated operation" (i.e. devctl). Callers
3814          * should now just use NDI_USER_REQ.
3815          */
3816         if (flags & NDI_DEVI_REMOVE) {
3817                 flags &= ~NDI_DEVI_REMOVE;
3818                 flags |= NDI_USER_REQ;
3819         }
3820 
3821         ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3822 
3823         if (ret == MDI_SUCCESS) {
3824                 MDI_PI_LOCK(pip);
3825                 if (MDI_PI(pip)->pi_pm_held) {
3826                         client_held = 1;
3827                 }
3828                 MDI_PI_UNLOCK(pip);
3829 
3830                 if (client_held) {
3831                         ct = MDI_PI(pip)->pi_client;
3832                         MDI_CLIENT_LOCK(ct);
3833                         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3834                             "i_mdi_pm_rele_client\n"));
3835                         i_mdi_pm_rele_client(ct, 1);
3836                         MDI_CLIENT_UNLOCK(ct);
3837                 }
3838         }
3839 
3840         return (ret);
3841 }
3842 
3843 /*
3844  * i_mdi_pi_offline():
3845  *              Offline a mdi_pathinfo node and call the vHCI driver's callback
3846  */
3847 static int
3848 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3849 {
3850         dev_info_t      *vdip = NULL;
3851         mdi_vhci_t      *vh = NULL;
3852         mdi_client_t    *ct = NULL;
3853         int             (*f)();
3854         int             rv;
3855 
3856         MDI_PI_LOCK(pip);
3857         ct = MDI_PI(pip)->pi_client;
3858         ASSERT(ct != NULL);
3859 
3860         while (MDI_PI(pip)->pi_ref_cnt != 0) {
3861                 /*
3862                  * Give a chance for pending I/Os to complete.
3863                  */
3864                 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3865                     "!%d cmds still pending on path %s %p",
3866                     MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3867                     (void *)pip));
3868                 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3869                     &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3870                     TR_CLOCK_TICK) == -1) {
3871                         /*
3872                          * The timeout time reached without ref_cnt being zero
3873                          * being signaled.
3874                          */
3875                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3876                             "!Timeout reached on path %s %p without the cond",
3877                             mdi_pi_spathname(pip), (void *)pip));
3878                         MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3879                             "!%d cmds still pending on path %s %p",
3880                             MDI_PI(pip)->pi_ref_cnt,
3881                             mdi_pi_spathname(pip), (void *)pip));
3882                 }
3883         }
3884         vh = ct->ct_vhci;
3885         vdip = vh->vh_dip;
3886 
3887         /*
3888          * Notify vHCI that has registered this event
3889          */
3890         ASSERT(vh->vh_ops);
3891         f = vh->vh_ops->vo_pi_state_change;
3892 
3893         if (f != NULL) {
3894                 MDI_PI_UNLOCK(pip);
3895                 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3896                     flags)) != MDI_SUCCESS) {
3897                         MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3898                             "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3899                             ddi_driver_name(vdip), ddi_get_instance(vdip),
3900                             (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3901                 }
3902                 MDI_PI_LOCK(pip);
3903         }
3904 
3905         /*
3906          * Set the mdi_pathinfo node state and clear the transient condition
3907          */
3908         MDI_PI_SET_OFFLINE(pip);
3909         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3910         MDI_PI_UNLOCK(pip);
3911 
3912         MDI_CLIENT_LOCK(ct);
3913         if (rv == MDI_SUCCESS) {
3914                 if (ct->ct_unstable == 0) {
3915                         dev_info_t      *cdip = ct->ct_dip;
3916 
3917                         /*
3918                          * Onlining the mdi_pathinfo node will impact the
3919                          * client state Update the client and dev_info node
3920                          * state accordingly
3921                          */
3922                         i_mdi_client_update_state(ct);
3923                         rv = NDI_SUCCESS;
3924                         if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3925                                 if (cdip &&
3926                                     (i_ddi_node_state(cdip) >=
3927                                     DS_INITIALIZED)) {
3928                                         MDI_CLIENT_UNLOCK(ct);
3929                                         rv = ndi_devi_offline(cdip,
3930                                             NDI_DEVFS_CLEAN);
3931                                         MDI_CLIENT_LOCK(ct);
3932                                         if (rv != NDI_SUCCESS) {
3933                                                 /*
3934                                                  * ndi_devi_offline failed.
3935                                                  * Reset client flags to
3936                                                  * online.
3937                                                  */
3938                                                 MDI_DEBUG(4, (MDI_WARN, cdip,
3939                                                     "ndi_devi_offline failed: "
3940                                                     "error %x", rv));
3941                                                 MDI_CLIENT_SET_ONLINE(ct);
3942                                         }
3943                                 }
3944                         }
3945                         /*
3946                          * Convert to MDI error code
3947                          */
3948                         switch (rv) {
3949                         case NDI_SUCCESS:
3950                                 rv = MDI_SUCCESS;
3951                                 break;
3952                         case NDI_BUSY:
3953                                 rv = MDI_BUSY;
3954                                 break;
3955                         default:
3956                                 rv = MDI_FAILURE;
3957                                 break;
3958                         }
3959                 }
3960                 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3961                 i_mdi_report_path_state(ct, pip);
3962         }
3963 
3964         MDI_CLIENT_UNLOCK(ct);
3965 
3966         /*
3967          * Change in the mdi_pathinfo node state will impact the client state
3968          */
3969         MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3970             "ct = %p pip = %p", (void *)ct, (void *)pip));
3971         return (rv);
3972 }
3973 
3974 /*
3975  * i_mdi_pi_online():
3976  *              Online a mdi_pathinfo node and call the vHCI driver's callback
3977  */
3978 static int
3979 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3980 {
3981         mdi_vhci_t      *vh = NULL;
3982         mdi_client_t    *ct = NULL;
3983         mdi_phci_t      *ph;
3984         int             (*f)();
3985         int             rv;
3986 
3987         MDI_PI_LOCK(pip);
3988         ph = MDI_PI(pip)->pi_phci;
3989         vh = ph->ph_vhci;
3990         ct = MDI_PI(pip)->pi_client;
3991         MDI_PI_SET_ONLINING(pip)
3992         MDI_PI_UNLOCK(pip);
3993         f = vh->vh_ops->vo_pi_state_change;
3994         if (f != NULL)
3995                 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
3996                     flags);
3997         MDI_CLIENT_LOCK(ct);
3998         MDI_PI_LOCK(pip);
3999         cv_broadcast(&MDI_PI(pip)->pi_state_cv);
4000         MDI_PI_UNLOCK(pip);
4001         if (rv == MDI_SUCCESS) {
4002                 dev_info_t      *cdip = ct->ct_dip;
4003 
4004                 rv = MDI_SUCCESS;
4005                 i_mdi_client_update_state(ct);
4006                 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
4007                     MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4008                         if (cdip && !i_ddi_devi_attached(cdip)) {
4009                                 MDI_CLIENT_UNLOCK(ct);
4010                                 rv = ndi_devi_online(cdip, 0);
4011                                 MDI_CLIENT_LOCK(ct);
4012                                 if ((rv != NDI_SUCCESS) &&
4013                                     (MDI_CLIENT_STATE(ct) ==
4014                                     MDI_CLIENT_STATE_DEGRADED)) {
4015                                         MDI_CLIENT_SET_OFFLINE(ct);
4016                                 }
4017                                 if (rv != NDI_SUCCESS) {
4018                                         /* Reset the path state */
4019                                         MDI_PI_LOCK(pip);
4020                                         MDI_PI(pip)->pi_state =
4021                                             MDI_PI_OLD_STATE(pip);
4022                                         MDI_PI_UNLOCK(pip);
4023                                 }
4024                         }
4025                 }
4026                 switch (rv) {
4027                 case NDI_SUCCESS:
4028                         MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4029                         i_mdi_report_path_state(ct, pip);
4030                         rv = MDI_SUCCESS;
4031                         break;
4032                 case NDI_BUSY:
4033                         rv = MDI_BUSY;
4034                         break;
4035                 default:
4036                         rv = MDI_FAILURE;
4037                         break;
4038                 }
4039         } else {
4040                 /* Reset the path state */
4041                 MDI_PI_LOCK(pip);
4042                 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4043                 MDI_PI_UNLOCK(pip);
4044         }
4045         MDI_CLIENT_UNLOCK(ct);
4046         return (rv);
4047 }
4048 
4049 /*
4050  * mdi_pi_get_node_name():
4051  *              Get the name associated with a mdi_pathinfo node.
4052  *              Since pathinfo nodes are not directly named, we
4053  *              return the node_name of the client.
4054  *
4055  * Return Values:
4056  *              char *
4057  */
4058 char *
4059 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4060 {
4061         mdi_client_t    *ct;
4062 
4063         if (pip == NULL)
4064                 return (NULL);
4065         ct = MDI_PI(pip)->pi_client;
4066         if ((ct == NULL) || (ct->ct_dip == NULL))
4067                 return (NULL);
4068         return (ddi_node_name(ct->ct_dip));
4069 }
4070 
4071 /*
4072  * mdi_pi_get_addr():
4073  *              Get the unit address associated with a mdi_pathinfo node
4074  *
4075  * Return Values:
4076  *              char *
4077  */
4078 char *
4079 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4080 {
4081         if (pip == NULL)
4082                 return (NULL);
4083 
4084         return (MDI_PI(pip)->pi_addr);
4085 }
4086 
4087 /*
4088  * mdi_pi_get_path_instance():
4089  *              Get the 'path_instance' of a mdi_pathinfo node
4090  *
4091  * Return Values:
4092  *              path_instance
4093  */
4094 int
4095 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4096 {
4097         if (pip == NULL)
4098                 return (0);
4099 
4100         return (MDI_PI(pip)->pi_path_instance);
4101 }
4102 
4103 /*
4104  * mdi_pi_pathname():
4105  *              Return pointer to path to pathinfo node.
4106  */
4107 char *
4108 mdi_pi_pathname(mdi_pathinfo_t *pip)
4109 {
4110         if (pip == NULL)
4111                 return (NULL);
4112         return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4113 }
4114 
4115 /*
4116  * mdi_pi_spathname():
4117  *              Return pointer to shortpath to pathinfo node. Used for debug
4118  *              messages, so return "" instead of NULL when unknown.
4119  */
4120 char *
4121 mdi_pi_spathname(mdi_pathinfo_t *pip)
4122 {
4123         char    *spath = "";
4124 
4125         if (pip) {
4126                 spath = mdi_pi_spathname_by_instance(
4127                     mdi_pi_get_path_instance(pip));
4128                 if (spath == NULL)
4129                         spath = "";
4130         }
4131         return (spath);
4132 }
4133 
4134 char *
4135 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4136 {
4137         char *obp_path = NULL;
4138         if ((pip == NULL) || (path == NULL))
4139                 return (NULL);
4140 
4141         if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4142                 (void) strcpy(path, obp_path);
4143                 (void) mdi_prop_free(obp_path);
4144         } else {
4145                 path = NULL;
4146         }
4147         return (path);
4148 }
4149 
4150 int
4151 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4152 {
4153         dev_info_t *pdip;
4154         char *obp_path = NULL;
4155         int rc = MDI_FAILURE;
4156 
4157         if (pip == NULL)
4158                 return (MDI_FAILURE);
4159 
4160         pdip = mdi_pi_get_phci(pip);
4161         if (pdip == NULL)
4162                 return (MDI_FAILURE);
4163 
4164         obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4165 
4166         if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4167                 (void) ddi_pathname(pdip, obp_path);
4168         }
4169 
4170         if (component) {
4171                 (void) strncat(obp_path, "/", MAXPATHLEN);
4172                 (void) strncat(obp_path, component, MAXPATHLEN);
4173         }
4174         rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4175 
4176         if (obp_path)
4177                 kmem_free(obp_path, MAXPATHLEN);
4178         return (rc);
4179 }
4180 
4181 /*
4182  * mdi_pi_get_client():
4183  *              Get the client devinfo associated with a mdi_pathinfo node
4184  *
4185  * Return Values:
4186  *              Handle to client device dev_info node
4187  */
4188 dev_info_t *
4189 mdi_pi_get_client(mdi_pathinfo_t *pip)
4190 {
4191         dev_info_t      *dip = NULL;
4192         if (pip) {
4193                 dip = MDI_PI(pip)->pi_client->ct_dip;
4194         }
4195         return (dip);
4196 }
4197 
4198 /*
4199  * mdi_pi_get_phci():
4200  *              Get the pHCI devinfo associated with the mdi_pathinfo node
4201  * Return Values:
4202  *              Handle to dev_info node
4203  */
4204 dev_info_t *
4205 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4206 {
4207         dev_info_t      *dip = NULL;
4208         mdi_phci_t      *ph;
4209 
4210         if (pip) {
4211                 ph = MDI_PI(pip)->pi_phci;
4212                 if (ph)
4213                         dip = ph->ph_dip;
4214         }
4215         return (dip);
4216 }
4217 
4218 /*
4219  * mdi_pi_get_client_private():
4220  *              Get the client private information associated with the
4221  *              mdi_pathinfo node
4222  */
4223 void *
4224 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4225 {
4226         void *cprivate = NULL;
4227         if (pip) {
4228                 cprivate = MDI_PI(pip)->pi_cprivate;
4229         }
4230         return (cprivate);
4231 }
4232 
4233 /*
4234  * mdi_pi_set_client_private():
4235  *              Set the client private information in the mdi_pathinfo node
4236  */
4237 void
4238 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4239 {
4240         if (pip) {
4241                 MDI_PI(pip)->pi_cprivate = priv;
4242         }
4243 }
4244 
4245 /*
4246  * mdi_pi_get_phci_private():
4247  *              Get the pHCI private information associated with the
4248  *              mdi_pathinfo node
4249  */
4250 caddr_t
4251 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4252 {
4253         caddr_t pprivate = NULL;
4254 
4255         if (pip) {
4256                 pprivate = MDI_PI(pip)->pi_pprivate;
4257         }
4258         return (pprivate);
4259 }
4260 
4261 /*
4262  * mdi_pi_set_phci_private():
4263  *              Set the pHCI private information in the mdi_pathinfo node
4264  */
4265 void
4266 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4267 {
4268         if (pip) {
4269                 MDI_PI(pip)->pi_pprivate = priv;
4270         }
4271 }
4272 
4273 /*
4274  * mdi_pi_get_state():
4275  *              Get the mdi_pathinfo node state. Transient states are internal
4276  *              and not provided to the users
4277  */
4278 mdi_pathinfo_state_t
4279 mdi_pi_get_state(mdi_pathinfo_t *pip)
4280 {
4281         mdi_pathinfo_state_t    state = MDI_PATHINFO_STATE_INIT;
4282 
4283         if (pip) {
4284                 if (MDI_PI_IS_TRANSIENT(pip)) {
4285                         /*
4286                          * mdi_pathinfo is in state transition.  Return the
4287                          * last good state.
4288                          */
4289                         state = MDI_PI_OLD_STATE(pip);
4290                 } else {
4291                         state = MDI_PI_STATE(pip);
4292                 }
4293         }
4294         return (state);
4295 }
4296 
4297 /*
4298  * mdi_pi_get_flags():
4299  *              Get the mdi_pathinfo node flags.
4300  */
4301 uint_t
4302 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4303 {
4304         return (pip ? MDI_PI(pip)->pi_flags : 0);
4305 }
4306 
4307 /*
4308  * Note that the following function needs to be the new interface for
4309  * mdi_pi_get_state when mpxio gets integrated to ON.
4310  */
4311 int
4312 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4313                 uint32_t *ext_state)
4314 {
4315         *state = MDI_PATHINFO_STATE_INIT;
4316 
4317         if (pip) {
4318                 if (MDI_PI_IS_TRANSIENT(pip)) {
4319                         /*
4320                          * mdi_pathinfo is in state transition.  Return the
4321                          * last good state.
4322                          */
4323                         *state = MDI_PI_OLD_STATE(pip);
4324                         *ext_state = MDI_PI_OLD_EXT_STATE(pip);
4325                 } else {
4326                         *state = MDI_PI_STATE(pip);
4327                         *ext_state = MDI_PI_EXT_STATE(pip);
4328                 }
4329         }
4330         return (MDI_SUCCESS);
4331 }
4332 
4333 /*
4334  * mdi_pi_get_preferred:
4335  *      Get the preferred path flag
4336  */
4337 int
4338 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4339 {
4340         if (pip) {
4341                 return (MDI_PI(pip)->pi_preferred);
4342         }
4343         return (0);
4344 }
4345 
4346 /*
4347  * mdi_pi_set_preferred:
4348  *      Set the preferred path flag
4349  */
4350 void
4351 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4352 {
4353         if (pip) {
4354                 MDI_PI(pip)->pi_preferred = preferred;
4355         }
4356 }
4357 
4358 /*
4359  * mdi_pi_set_state():
4360  *              Set the mdi_pathinfo node state
4361  */
4362 void
4363 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4364 {
4365         uint32_t        ext_state;
4366 
4367         if (pip) {
4368                 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4369                 MDI_PI(pip)->pi_state = state;
4370                 MDI_PI(pip)->pi_state |= ext_state;
4371 
4372                 /* Path has changed state, invalidate DINFOCACHE snap shot. */
4373                 i_ddi_di_cache_invalidate();
4374         }
4375 }
4376 
4377 /*
4378  * Property functions:
4379  */
4380 int
4381 i_map_nvlist_error_to_mdi(int val)
4382 {
4383         int rv;
4384 
4385         switch (val) {
4386         case 0:
4387                 rv = DDI_PROP_SUCCESS;
4388                 break;
4389         case EINVAL:
4390         case ENOTSUP:
4391                 rv = DDI_PROP_INVAL_ARG;
4392                 break;
4393         case ENOMEM:
4394                 rv = DDI_PROP_NO_MEMORY;
4395                 break;
4396         default:
4397                 rv = DDI_PROP_NOT_FOUND;
4398                 break;
4399         }
4400         return (rv);
4401 }
4402 
4403 /*
4404  * mdi_pi_get_next_prop():
4405  *              Property walk function.  The caller should hold mdi_pi_lock()
4406  *              and release by calling mdi_pi_unlock() at the end of walk to
4407  *              get a consistent value.
4408  */
4409 nvpair_t *
4410 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4411 {
4412         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4413                 return (NULL);
4414         }
4415         ASSERT(MDI_PI_LOCKED(pip));
4416         return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4417 }
4418 
4419 /*
4420  * mdi_prop_remove():
4421  *              Remove the named property from the named list.
4422  */
4423 int
4424 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4425 {
4426         if (pip == NULL) {
4427                 return (DDI_PROP_NOT_FOUND);
4428         }
4429         ASSERT(!MDI_PI_LOCKED(pip));
4430         MDI_PI_LOCK(pip);
4431         if (MDI_PI(pip)->pi_prop == NULL) {
4432                 MDI_PI_UNLOCK(pip);
4433                 return (DDI_PROP_NOT_FOUND);
4434         }
4435         if (name) {
4436                 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4437         } else {
4438                 char            nvp_name[MAXNAMELEN];
4439                 nvpair_t        *nvp;
4440                 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4441                 while (nvp) {
4442                         nvpair_t        *next;
4443                         next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4444                         (void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4445                             nvpair_name(nvp));
4446                         (void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4447                             nvp_name);
4448                         nvp = next;
4449                 }
4450         }
4451         MDI_PI_UNLOCK(pip);
4452         return (DDI_PROP_SUCCESS);
4453 }
4454 
4455 /*
4456  * mdi_prop_size():
4457  *              Get buffer size needed to pack the property data.
4458  *              Caller should hold the mdi_pathinfo_t lock to get a consistent
4459  *              buffer size.
4460  */
4461 int
4462 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4463 {
4464         int     rv;
4465         size_t  bufsize;
4466 
4467         *buflenp = 0;
4468         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4469                 return (DDI_PROP_NOT_FOUND);
4470         }
4471         ASSERT(MDI_PI_LOCKED(pip));
4472         rv = nvlist_size(MDI_PI(pip)->pi_prop,
4473             &bufsize, NV_ENCODE_NATIVE);
4474         *buflenp = bufsize;
4475         return (i_map_nvlist_error_to_mdi(rv));
4476 }
4477 
4478 /*
4479  * mdi_prop_pack():
4480  *              pack the property list.  The caller should hold the
4481  *              mdi_pathinfo_t node to get a consistent data
4482  */
4483 int
4484 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4485 {
4486         int     rv;
4487         size_t  bufsize;
4488 
4489         if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4490                 return (DDI_PROP_NOT_FOUND);
4491         }
4492 
4493         ASSERT(MDI_PI_LOCKED(pip));
4494 
4495         bufsize = buflen;
4496         rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4497             NV_ENCODE_NATIVE, KM_SLEEP);
4498 
4499         return (i_map_nvlist_error_to_mdi(rv));
4500 }
4501 
4502 /*
4503  * mdi_prop_update_byte():
4504  *              Create/Update a byte property
4505  */
4506 int
4507 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4508 {
4509         int rv;
4510 
4511         if (pip == NULL) {
4512                 return (DDI_PROP_INVAL_ARG);
4513         }
4514         ASSERT(!MDI_PI_LOCKED(pip));
4515         MDI_PI_LOCK(pip);
4516         if (MDI_PI(pip)->pi_prop == NULL) {
4517                 MDI_PI_UNLOCK(pip);
4518                 return (DDI_PROP_NOT_FOUND);
4519         }
4520         rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4521         MDI_PI_UNLOCK(pip);
4522         return (i_map_nvlist_error_to_mdi(rv));
4523 }
4524 
4525 /*
4526  * mdi_prop_update_byte_array():
4527  *              Create/Update a byte array property
4528  */
4529 int
4530 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4531     uint_t nelements)
4532 {
4533         int rv;
4534 
4535         if (pip == NULL) {
4536                 return (DDI_PROP_INVAL_ARG);
4537         }
4538         ASSERT(!MDI_PI_LOCKED(pip));
4539         MDI_PI_LOCK(pip);
4540         if (MDI_PI(pip)->pi_prop == NULL) {
4541                 MDI_PI_UNLOCK(pip);
4542                 return (DDI_PROP_NOT_FOUND);
4543         }
4544         rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4545         MDI_PI_UNLOCK(pip);
4546         return (i_map_nvlist_error_to_mdi(rv));
4547 }
4548 
4549 /*
4550  * mdi_prop_update_int():
4551  *              Create/Update a 32 bit integer property
4552  */
4553 int
4554 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4555 {
4556         int rv;
4557 
4558         if (pip == NULL) {
4559                 return (DDI_PROP_INVAL_ARG);
4560         }
4561         ASSERT(!MDI_PI_LOCKED(pip));
4562         MDI_PI_LOCK(pip);
4563         if (MDI_PI(pip)->pi_prop == NULL) {
4564                 MDI_PI_UNLOCK(pip);
4565                 return (DDI_PROP_NOT_FOUND);
4566         }
4567         rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4568         MDI_PI_UNLOCK(pip);
4569         return (i_map_nvlist_error_to_mdi(rv));
4570 }
4571 
4572 /*
4573  * mdi_prop_update_int64():
4574  *              Create/Update a 64 bit integer property
4575  */
4576 int
4577 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4578 {
4579         int rv;
4580 
4581         if (pip == NULL) {
4582                 return (DDI_PROP_INVAL_ARG);
4583         }
4584         ASSERT(!MDI_PI_LOCKED(pip));
4585         MDI_PI_LOCK(pip);
4586         if (MDI_PI(pip)->pi_prop == NULL) {
4587                 MDI_PI_UNLOCK(pip);
4588                 return (DDI_PROP_NOT_FOUND);
4589         }
4590         rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4591         MDI_PI_UNLOCK(pip);
4592         return (i_map_nvlist_error_to_mdi(rv));
4593 }
4594 
4595 /*
4596  * mdi_prop_update_int_array():
4597  *              Create/Update a int array property
4598  */
4599 int
4600 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4601             uint_t nelements)
4602 {
4603         int rv;
4604 
4605         if (pip == NULL) {
4606                 return (DDI_PROP_INVAL_ARG);
4607         }
4608         ASSERT(!MDI_PI_LOCKED(pip));
4609         MDI_PI_LOCK(pip);
4610         if (MDI_PI(pip)->pi_prop == NULL) {
4611                 MDI_PI_UNLOCK(pip);
4612                 return (DDI_PROP_NOT_FOUND);
4613         }
4614         rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4615             nelements);
4616         MDI_PI_UNLOCK(pip);
4617         return (i_map_nvlist_error_to_mdi(rv));
4618 }
4619 
4620 /*
4621  * mdi_prop_update_string():
4622  *              Create/Update a string property
4623  */
4624 int
4625 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4626 {
4627         int rv;
4628 
4629         if (pip == NULL) {
4630                 return (DDI_PROP_INVAL_ARG);
4631         }
4632         ASSERT(!MDI_PI_LOCKED(pip));
4633         MDI_PI_LOCK(pip);
4634         if (MDI_PI(pip)->pi_prop == NULL) {
4635                 MDI_PI_UNLOCK(pip);
4636                 return (DDI_PROP_NOT_FOUND);
4637         }
4638         rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4639         MDI_PI_UNLOCK(pip);
4640         return (i_map_nvlist_error_to_mdi(rv));
4641 }
4642 
4643 /*
4644  * mdi_prop_update_string_array():
4645  *              Create/Update a string array property
4646  */
4647 int
4648 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4649     uint_t nelements)
4650 {
4651         int rv;
4652 
4653         if (pip == NULL) {
4654                 return (DDI_PROP_INVAL_ARG);
4655         }
4656         ASSERT(!MDI_PI_LOCKED(pip));
4657         MDI_PI_LOCK(pip);
4658         if (MDI_PI(pip)->pi_prop == NULL) {
4659                 MDI_PI_UNLOCK(pip);
4660                 return (DDI_PROP_NOT_FOUND);
4661         }
4662         rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4663             nelements);
4664         MDI_PI_UNLOCK(pip);
4665         return (i_map_nvlist_error_to_mdi(rv));
4666 }
4667 
4668 /*
4669  * mdi_prop_lookup_byte():
4670  *              Look for byte property identified by name.  The data returned
4671  *              is the actual property and valid as long as mdi_pathinfo_t node
4672  *              is alive.
4673  */
4674 int
4675 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4676 {
4677         int rv;
4678 
4679         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4680                 return (DDI_PROP_NOT_FOUND);
4681         }
4682         rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4683         return (i_map_nvlist_error_to_mdi(rv));
4684 }
4685 
4686 
4687 /*
4688  * mdi_prop_lookup_byte_array():
4689  *              Look for byte array property identified by name.  The data
4690  *              returned is the actual property and valid as long as
4691  *              mdi_pathinfo_t node is alive.
4692  */
4693 int
4694 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4695     uint_t *nelements)
4696 {
4697         int rv;
4698 
4699         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4700                 return (DDI_PROP_NOT_FOUND);
4701         }
4702         rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4703             nelements);
4704         return (i_map_nvlist_error_to_mdi(rv));
4705 }
4706 
4707 /*
4708  * mdi_prop_lookup_int():
4709  *              Look for int property identified by name.  The data returned
4710  *              is the actual property and valid as long as mdi_pathinfo_t
4711  *              node is alive.
4712  */
4713 int
4714 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4715 {
4716         int rv;
4717 
4718         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4719                 return (DDI_PROP_NOT_FOUND);
4720         }
4721         rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4722         return (i_map_nvlist_error_to_mdi(rv));
4723 }
4724 
4725 /*
4726  * mdi_prop_lookup_int64():
4727  *              Look for int64 property identified by name.  The data returned
4728  *              is the actual property and valid as long as mdi_pathinfo_t node
4729  *              is alive.
4730  */
4731 int
4732 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4733 {
4734         int rv;
4735         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4736                 return (DDI_PROP_NOT_FOUND);
4737         }
4738         rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4739         return (i_map_nvlist_error_to_mdi(rv));
4740 }
4741 
4742 /*
4743  * mdi_prop_lookup_int_array():
4744  *              Look for int array property identified by name.  The data
4745  *              returned is the actual property and valid as long as
4746  *              mdi_pathinfo_t node is alive.
4747  */
4748 int
4749 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4750     uint_t *nelements)
4751 {
4752         int rv;
4753 
4754         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4755                 return (DDI_PROP_NOT_FOUND);
4756         }
4757         rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4758             (int32_t **)data, nelements);
4759         return (i_map_nvlist_error_to_mdi(rv));
4760 }
4761 
4762 /*
4763  * mdi_prop_lookup_string():
4764  *              Look for string property identified by name.  The data
4765  *              returned is the actual property and valid as long as
4766  *              mdi_pathinfo_t node is alive.
4767  */
4768 int
4769 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4770 {
4771         int rv;
4772 
4773         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4774                 return (DDI_PROP_NOT_FOUND);
4775         }
4776         rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4777         return (i_map_nvlist_error_to_mdi(rv));
4778 }
4779 
4780 /*
4781  * mdi_prop_lookup_string_array():
4782  *              Look for string array property identified by name.  The data
4783  *              returned is the actual property and valid as long as
4784  *              mdi_pathinfo_t node is alive.
4785  */
4786 int
4787 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4788     uint_t *nelements)
4789 {
4790         int rv;
4791 
4792         if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4793                 return (DDI_PROP_NOT_FOUND);
4794         }
4795         rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4796             nelements);
4797         return (i_map_nvlist_error_to_mdi(rv));
4798 }
4799 
4800 /*
4801  * mdi_prop_free():
4802  *              Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4803  *              functions return the pointer to actual property data and not a
4804  *              copy of it.  So the data returned is valid as long as
4805  *              mdi_pathinfo_t node is valid.
4806  */
4807 /*ARGSUSED*/
4808 int
4809 mdi_prop_free(void *data)
4810 {
4811         return (DDI_PROP_SUCCESS);
4812 }
4813 
4814 /*ARGSUSED*/
4815 static void
4816 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4817 {
4818         char            *ct_path;
4819         char            *ct_status;
4820         char            *status;
4821         dev_info_t      *cdip = ct->ct_dip;
4822         char            lb_buf[64];
4823         int             report_lb_c = 0, report_lb_p = 0;
4824 
4825         ASSERT(MDI_CLIENT_LOCKED(ct));
4826         if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4827             (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4828                 return;
4829         }
4830         if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4831                 ct_status = "optimal";
4832                 report_lb_c = 1;
4833         } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4834                 ct_status = "degraded";
4835         } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4836                 ct_status = "failed";
4837         } else {
4838                 ct_status = "unknown";
4839         }
4840 
4841         lb_buf[0] = 0;          /* not interested in load balancing config */
4842 
4843         if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4844                 status = "removed";
4845         } else if (MDI_PI_IS_OFFLINE(pip)) {
4846                 status = "offline";
4847         } else if (MDI_PI_IS_ONLINE(pip)) {
4848                 status = "online";
4849                 report_lb_p = 1;
4850         } else if (MDI_PI_IS_STANDBY(pip)) {
4851                 status = "standby";
4852         } else if (MDI_PI_IS_FAULT(pip)) {
4853                 status = "faulted";
4854         } else {
4855                 status = "unknown";
4856         }
4857 
4858         if (cdip) {
4859                 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4860 
4861                 /*
4862                  * NOTE: Keeping "multipath status: %s" and
4863                  * "Load balancing: %s" format unchanged in case someone
4864                  * scrubs /var/adm/messages looking for these messages.
4865                  */
4866                 if (report_lb_c && report_lb_p) {
4867                         if (ct->ct_lb == LOAD_BALANCE_LBA) {
4868                                 (void) snprintf(lb_buf, sizeof (lb_buf),
4869                                     "%s, region-size: %d", mdi_load_balance_lba,
4870                                     ct->ct_lb_args->region_size);
4871                         } else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4872                                 (void) snprintf(lb_buf, sizeof (lb_buf),
4873                                     "%s", mdi_load_balance_none);
4874                         } else {
4875                                 (void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4876                                     mdi_load_balance_rr);
4877                         }
4878 
4879                         cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4880                             "?%s (%s%d) multipath status: %s: "
4881                             "path %d %s is %s: Load balancing: %s\n",
4882                             ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4883                             ddi_get_instance(cdip), ct_status,
4884                             mdi_pi_get_path_instance(pip),
4885                             mdi_pi_spathname(pip), status, lb_buf);
4886                 } else {
4887                         cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4888                             "?%s (%s%d) multipath status: %s: "
4889                             "path %d %s is %s\n",
4890                             ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4891                             ddi_get_instance(cdip), ct_status,
4892                             mdi_pi_get_path_instance(pip),
4893                             mdi_pi_spathname(pip), status);
4894                 }
4895 
4896                 kmem_free(ct_path, MAXPATHLEN);
4897                 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4898         }
4899 }
4900 
4901 #ifdef  DEBUG
4902 /*
4903  * i_mdi_log():
4904  *              Utility function for error message management
4905  *
4906  *              NOTE: Implementation takes care of trailing \n for cmn_err,
4907  *              MDI_DEBUG should not terminate fmt strings with \n.
4908  *
4909  *              NOTE: If the level is >= 2, and there is no leading !?^
4910  *              then a leading ! is implied (but can be overriden via
4911  *              mdi_debug_consoleonly). If you are using kmdb on the console,
4912  *              consider setting mdi_debug_consoleonly to 1 as an aid.
4913  */
4914 /*PRINTFLIKE4*/
4915 static void
4916 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4917 {
4918         char            name[MAXNAMELEN];
4919         char            buf[512];
4920         char            *bp;
4921         va_list         ap;
4922         int             log_only = 0;
4923         int             boot_only = 0;
4924         int             console_only = 0;
4925 
4926         if (dip) {
4927                 (void) snprintf(name, sizeof(name), "%s%d: ",
4928                     ddi_driver_name(dip), ddi_get_instance(dip));
4929         } else {
4930                 name[0] = 0;
4931         }
4932 
4933         va_start(ap, fmt);
4934         (void) vsnprintf(buf, sizeof(buf), fmt, ap);
4935         va_end(ap);
4936 
4937         switch (buf[0]) {
4938         case '!':
4939                 bp = &buf[1];
4940                 log_only = 1;
4941                 break;
4942         case '?':
4943                 bp = &buf[1];
4944                 boot_only = 1;
4945                 break;
4946         case '^':
4947                 bp = &buf[1];
4948                 console_only = 1;
4949                 break;
4950         default:
4951                 if (level >= 2)
4952                         log_only = 1;           /* ! implied */
4953                 bp = buf;
4954                 break;
4955         }
4956         if (mdi_debug_logonly) {
4957                 log_only = 1;
4958                 boot_only = 0;
4959                 console_only = 0;
4960         }
4961         if (mdi_debug_consoleonly) {
4962                 log_only = 0;
4963                 boot_only = 0;
4964                 console_only = 1;
4965                 level = CE_NOTE;
4966                 goto console;
4967         }
4968 
4969         switch (level) {
4970         case CE_NOTE:
4971                 level = CE_CONT;
4972                 /* FALLTHROUGH */
4973         case CE_CONT:
4974                 if (boot_only) {
4975                         cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4976                 } else if (console_only) {
4977                         cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4978                 } else if (log_only) {
4979                         cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4980                 } else {
4981                         cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4982                 }
4983                 break;
4984 
4985         case CE_WARN:
4986         case CE_PANIC:
4987         console:
4988                 if (boot_only) {
4989                         cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
4990                 } else if (console_only) {
4991                         cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
4992                 } else if (log_only) {
4993                         cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
4994                 } else {
4995                         cmn_err(level, "mdi: %s%s: %s", name, func, bp);
4996                 }
4997                 break;
4998         default:
4999                 cmn_err(level, "mdi: %s%s", name, bp);
5000                 break;
5001         }
5002 }
5003 #endif  /* DEBUG */
5004 
5005 void
5006 i_mdi_client_online(dev_info_t *ct_dip)
5007 {
5008         mdi_client_t    *ct;
5009 
5010         /*
5011          * Client online notification. Mark client state as online
5012          * restore our binding with dev_info node
5013          */
5014         ct = i_devi_get_client(ct_dip);
5015         ASSERT(ct != NULL);
5016         MDI_CLIENT_LOCK(ct);
5017         MDI_CLIENT_SET_ONLINE(ct);
5018         /* catch for any memory leaks */
5019         ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5020         ct->ct_dip = ct_dip;
5021 
5022         if (ct->ct_power_cnt == 0)
5023                 (void) i_mdi_power_all_phci(ct);
5024 
5025         MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5026             "i_mdi_pm_hold_client %p", (void *)ct));
5027         i_mdi_pm_hold_client(ct, 1);
5028 
5029         MDI_CLIENT_UNLOCK(ct);
5030 }
5031 
5032 void
5033 i_mdi_phci_online(dev_info_t *ph_dip)
5034 {
5035         mdi_phci_t      *ph;
5036 
5037         /* pHCI online notification. Mark state accordingly */
5038         ph = i_devi_get_phci(ph_dip);
5039         ASSERT(ph != NULL);
5040         MDI_PHCI_LOCK(ph);
5041         MDI_PHCI_SET_ONLINE(ph);
5042         MDI_PHCI_UNLOCK(ph);
5043 }
5044 
5045 /*
5046  * mdi_devi_online():
5047  *              Online notification from NDI framework on pHCI/client
5048  *              device online.
5049  * Return Values:
5050  *              NDI_SUCCESS
5051  *              MDI_FAILURE
5052  */
5053 /*ARGSUSED*/
5054 int
5055 mdi_devi_online(dev_info_t *dip, uint_t flags)
5056 {
5057         if (MDI_PHCI(dip)) {
5058                 i_mdi_phci_online(dip);
5059         }
5060 
5061         if (MDI_CLIENT(dip)) {
5062                 i_mdi_client_online(dip);
5063         }
5064         return (NDI_SUCCESS);
5065 }
5066 
5067 /*
5068  * mdi_devi_offline():
5069  *              Offline notification from NDI framework on pHCI/Client device
5070  *              offline.
5071  *
5072  * Return Values:
5073  *              NDI_SUCCESS
5074  *              NDI_FAILURE
5075  */
5076 /*ARGSUSED*/
5077 int
5078 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5079 {
5080         int             rv = NDI_SUCCESS;
5081 
5082         if (MDI_CLIENT(dip)) {
5083                 rv = i_mdi_client_offline(dip, flags);
5084                 if (rv != NDI_SUCCESS)
5085                         return (rv);
5086         }
5087 
5088         if (MDI_PHCI(dip)) {
5089                 rv = i_mdi_phci_offline(dip, flags);
5090 
5091                 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5092                         /* set client back online */
5093                         i_mdi_client_online(dip);
5094                 }
5095         }
5096 
5097         return (rv);
5098 }
5099 
5100 /*ARGSUSED*/
5101 static int
5102 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5103 {
5104         int             rv = NDI_SUCCESS;
5105         mdi_phci_t      *ph;
5106         mdi_client_t    *ct;
5107         mdi_pathinfo_t  *pip;
5108         mdi_pathinfo_t  *next;
5109         mdi_pathinfo_t  *failed_pip = NULL;
5110         dev_info_t      *cdip;
5111 
5112         /*
5113          * pHCI component offline notification
5114          * Make sure that this pHCI instance is free to be offlined.
5115          * If it is OK to proceed, Offline and remove all the child
5116          * mdi_pathinfo nodes.  This process automatically offlines
5117          * corresponding client devices, for which this pHCI provides
5118          * critical services.
5119          */
5120         ph = i_devi_get_phci(dip);
5121         MDI_DEBUG(2, (MDI_NOTE, dip,
5122             "called %p %p", (void *)dip, (void *)ph));
5123         if (ph == NULL) {
5124                 return (rv);
5125         }
5126 
5127         MDI_PHCI_LOCK(ph);
5128 
5129         if (MDI_PHCI_IS_OFFLINE(ph)) {
5130                 MDI_DEBUG(1, (MDI_WARN, dip,
5131                     "!pHCI already offlined: %p", (void *)dip));
5132                 MDI_PHCI_UNLOCK(ph);
5133                 return (NDI_SUCCESS);
5134         }
5135 
5136         /*
5137          * Check to see if the pHCI can be offlined
5138          */
5139         if (ph->ph_unstable) {
5140                 MDI_DEBUG(1, (MDI_WARN, dip,
5141                     "!One or more target devices are in transient state. "
5142                     "This device can not be removed at this moment. "
5143                     "Please try again later."));
5144                 MDI_PHCI_UNLOCK(ph);
5145                 return (NDI_BUSY);
5146         }
5147 
5148         pip = ph->ph_path_head;
5149         while (pip != NULL) {
5150                 MDI_PI_LOCK(pip);
5151                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5152 
5153                 /*
5154                  * The mdi_pathinfo state is OK. Check the client state.
5155                  * If failover in progress fail the pHCI from offlining
5156                  */
5157                 ct = MDI_PI(pip)->pi_client;
5158                 i_mdi_client_lock(ct, pip);
5159                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5160                     (ct->ct_unstable)) {
5161                         /*
5162                          * Failover is in progress, Fail the DR
5163                          */
5164                         MDI_DEBUG(1, (MDI_WARN, dip,
5165                             "!pHCI device is busy. "
5166                             "This device can not be removed at this moment. "
5167                             "Please try again later."));
5168                         MDI_PI_UNLOCK(pip);
5169                         i_mdi_client_unlock(ct);
5170                         MDI_PHCI_UNLOCK(ph);
5171                         return (NDI_BUSY);
5172                 }
5173                 MDI_PI_UNLOCK(pip);
5174 
5175                 /*
5176                  * Check to see of we are removing the last path of this
5177                  * client device...
5178                  */
5179                 cdip = ct->ct_dip;
5180                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5181                     (i_mdi_client_compute_state(ct, ph) ==
5182                     MDI_CLIENT_STATE_FAILED)) {
5183                         i_mdi_client_unlock(ct);
5184                         MDI_PHCI_UNLOCK(ph);
5185                         if (ndi_devi_offline(cdip,
5186                             NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5187                                 /*
5188                                  * ndi_devi_offline() failed.
5189                                  * This pHCI provides the critical path
5190                                  * to one or more client devices.
5191                                  * Return busy.
5192                                  */
5193                                 MDI_PHCI_LOCK(ph);
5194                                 MDI_DEBUG(1, (MDI_WARN, dip,
5195                                     "!pHCI device is busy. "
5196                                     "This device can not be removed at this "
5197                                     "moment. Please try again later."));
5198                                 failed_pip = pip;
5199                                 break;
5200                         } else {
5201                                 MDI_PHCI_LOCK(ph);
5202                                 pip = next;
5203                         }
5204                 } else {
5205                         i_mdi_client_unlock(ct);
5206                         pip = next;
5207                 }
5208         }
5209 
5210         if (failed_pip) {
5211                 pip = ph->ph_path_head;
5212                 while (pip != failed_pip) {
5213                         MDI_PI_LOCK(pip);
5214                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5215                         ct = MDI_PI(pip)->pi_client;
5216                         i_mdi_client_lock(ct, pip);
5217                         cdip = ct->ct_dip;
5218                         switch (MDI_CLIENT_STATE(ct)) {
5219                         case MDI_CLIENT_STATE_OPTIMAL:
5220                         case MDI_CLIENT_STATE_DEGRADED:
5221                                 if (cdip) {
5222                                         MDI_PI_UNLOCK(pip);
5223                                         i_mdi_client_unlock(ct);
5224                                         MDI_PHCI_UNLOCK(ph);
5225                                         (void) ndi_devi_online(cdip, 0);
5226                                         MDI_PHCI_LOCK(ph);
5227                                         pip = next;
5228                                         continue;
5229                                 }
5230                                 break;
5231 
5232                         case MDI_CLIENT_STATE_FAILED:
5233                                 if (cdip) {
5234                                         MDI_PI_UNLOCK(pip);
5235                                         i_mdi_client_unlock(ct);
5236                                         MDI_PHCI_UNLOCK(ph);
5237                                         (void) ndi_devi_offline(cdip,
5238                                                 NDI_DEVFS_CLEAN);
5239                                         MDI_PHCI_LOCK(ph);
5240                                         pip = next;
5241                                         continue;
5242                                 }
5243                                 break;
5244                         }
5245                         MDI_PI_UNLOCK(pip);
5246                         i_mdi_client_unlock(ct);
5247                         pip = next;
5248                 }
5249                 MDI_PHCI_UNLOCK(ph);
5250                 return (NDI_BUSY);
5251         }
5252 
5253         /*
5254          * Mark the pHCI as offline
5255          */
5256         MDI_PHCI_SET_OFFLINE(ph);
5257 
5258         /*
5259          * Mark the child mdi_pathinfo nodes as transient
5260          */
5261         pip = ph->ph_path_head;
5262         while (pip != NULL) {
5263                 MDI_PI_LOCK(pip);
5264                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5265                 MDI_PI_SET_OFFLINING(pip);
5266                 MDI_PI_UNLOCK(pip);
5267                 pip = next;
5268         }
5269         MDI_PHCI_UNLOCK(ph);
5270         /*
5271          * Give a chance for any pending commands to execute
5272          */
5273         delay_random(mdi_delay);
5274         MDI_PHCI_LOCK(ph);
5275         pip = ph->ph_path_head;
5276         while (pip != NULL) {
5277                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5278                 (void) i_mdi_pi_offline(pip, flags);
5279                 MDI_PI_LOCK(pip);
5280                 ct = MDI_PI(pip)->pi_client;
5281                 if (!MDI_PI_IS_OFFLINE(pip)) {
5282                         MDI_DEBUG(1, (MDI_WARN, dip,
5283                             "!pHCI device is busy. "
5284                             "This device can not be removed at this moment. "
5285                             "Please try again later."));
5286                         MDI_PI_UNLOCK(pip);
5287                         MDI_PHCI_SET_ONLINE(ph);
5288                         MDI_PHCI_UNLOCK(ph);
5289                         return (NDI_BUSY);
5290                 }
5291                 MDI_PI_UNLOCK(pip);
5292                 pip = next;
5293         }
5294         MDI_PHCI_UNLOCK(ph);
5295 
5296         return (rv);
5297 }
5298 
5299 void
5300 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5301 {
5302         mdi_phci_t      *ph;
5303         mdi_client_t    *ct;
5304         mdi_pathinfo_t  *pip;
5305         mdi_pathinfo_t  *next;
5306         dev_info_t      *cdip;
5307 
5308         if (!MDI_PHCI(dip))
5309                 return;
5310 
5311         ph = i_devi_get_phci(dip);
5312         if (ph == NULL) {
5313                 return;
5314         }
5315 
5316         MDI_PHCI_LOCK(ph);
5317 
5318         if (MDI_PHCI_IS_OFFLINE(ph)) {
5319                 /* has no last path */
5320                 MDI_PHCI_UNLOCK(ph);
5321                 return;
5322         }
5323 
5324         pip = ph->ph_path_head;
5325         while (pip != NULL) {
5326                 MDI_PI_LOCK(pip);
5327                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5328 
5329                 ct = MDI_PI(pip)->pi_client;
5330                 i_mdi_client_lock(ct, pip);
5331                 MDI_PI_UNLOCK(pip);
5332 
5333                 cdip = ct->ct_dip;
5334                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5335                     (i_mdi_client_compute_state(ct, ph) ==
5336                     MDI_CLIENT_STATE_FAILED)) {
5337                         /* Last path. Mark client dip as retiring */
5338                         i_mdi_client_unlock(ct);
5339                         MDI_PHCI_UNLOCK(ph);
5340                         (void) e_ddi_mark_retiring(cdip, cons_array);
5341                         MDI_PHCI_LOCK(ph);
5342                         pip = next;
5343                 } else {
5344                         i_mdi_client_unlock(ct);
5345                         pip = next;
5346                 }
5347         }
5348 
5349         MDI_PHCI_UNLOCK(ph);
5350 
5351         return;
5352 }
5353 
5354 void
5355 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5356 {
5357         mdi_phci_t      *ph;
5358         mdi_client_t    *ct;
5359         mdi_pathinfo_t  *pip;
5360         mdi_pathinfo_t  *next;
5361         dev_info_t      *cdip;
5362 
5363         if (!MDI_PHCI(dip))
5364                 return;
5365 
5366         ph = i_devi_get_phci(dip);
5367         if (ph == NULL)
5368                 return;
5369 
5370         MDI_PHCI_LOCK(ph);
5371 
5372         if (MDI_PHCI_IS_OFFLINE(ph)) {
5373                 MDI_PHCI_UNLOCK(ph);
5374                 /* not last path */
5375                 return;
5376         }
5377 
5378         if (ph->ph_unstable) {
5379                 MDI_PHCI_UNLOCK(ph);
5380                 /* can't check for constraints */
5381                 *constraint = 0;
5382                 return;
5383         }
5384 
5385         pip = ph->ph_path_head;
5386         while (pip != NULL) {
5387                 MDI_PI_LOCK(pip);
5388                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5389 
5390                 /*
5391                  * The mdi_pathinfo state is OK. Check the client state.
5392                  * If failover in progress fail the pHCI from offlining
5393                  */
5394                 ct = MDI_PI(pip)->pi_client;
5395                 i_mdi_client_lock(ct, pip);
5396                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5397                     (ct->ct_unstable)) {
5398                         /*
5399                          * Failover is in progress, can't check for constraints
5400                          */
5401                         MDI_PI_UNLOCK(pip);
5402                         i_mdi_client_unlock(ct);
5403                         MDI_PHCI_UNLOCK(ph);
5404                         *constraint = 0;
5405                         return;
5406                 }
5407                 MDI_PI_UNLOCK(pip);
5408 
5409                 /*
5410                  * Check to see of we are retiring the last path of this
5411                  * client device...
5412                  */
5413                 cdip = ct->ct_dip;
5414                 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5415                     (i_mdi_client_compute_state(ct, ph) ==
5416                     MDI_CLIENT_STATE_FAILED)) {
5417                         i_mdi_client_unlock(ct);
5418                         MDI_PHCI_UNLOCK(ph);
5419                         (void) e_ddi_retire_notify(cdip, constraint);
5420                         MDI_PHCI_LOCK(ph);
5421                         pip = next;
5422                 } else {
5423                         i_mdi_client_unlock(ct);
5424                         pip = next;
5425                 }
5426         }
5427 
5428         MDI_PHCI_UNLOCK(ph);
5429 
5430         return;
5431 }
5432 
5433 /*
5434  * offline the path(s) hanging off the pHCI. If the
5435  * last path to any client, check that constraints
5436  * have been applied.
5437  *
5438  * If constraint is 0, we aren't going to retire the
5439  * pHCI. However we still need to go through the paths
5440  * calling e_ddi_retire_finalize() to clear their
5441  * contract barriers.
5442  */
5443 void
5444 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5445 {
5446         mdi_phci_t      *ph;
5447         mdi_client_t    *ct;
5448         mdi_pathinfo_t  *pip;
5449         mdi_pathinfo_t  *next;
5450         dev_info_t      *cdip;
5451         int             unstable = 0;
5452         int             tmp_constraint;
5453 
5454         if (!MDI_PHCI(dip))
5455                 return;
5456 
5457         ph = i_devi_get_phci(dip);
5458         if (ph == NULL) {
5459                 /* no last path and no pips */
5460                 return;
5461         }
5462 
5463         MDI_PHCI_LOCK(ph);
5464 
5465         if (MDI_PHCI_IS_OFFLINE(ph)) {
5466                 MDI_PHCI_UNLOCK(ph);
5467                 /* no last path and no pips */
5468                 return;
5469         }
5470 
5471         /*
5472          * Check to see if the pHCI can be offlined
5473          */
5474         if (ph->ph_unstable) {
5475                 unstable = 1;
5476         }
5477 
5478         pip = ph->ph_path_head;
5479         while (pip != NULL) {
5480                 MDI_PI_LOCK(pip);
5481                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5482 
5483                 /*
5484                  * if failover in progress fail the pHCI from offlining
5485                  */
5486                 ct = MDI_PI(pip)->pi_client;
5487                 i_mdi_client_lock(ct, pip);
5488                 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5489                     (ct->ct_unstable)) {
5490                         unstable = 1;
5491                 }
5492                 MDI_PI_UNLOCK(pip);
5493 
5494                 /*
5495                  * Check to see of we are removing the last path of this
5496                  * client device...
5497                  */
5498                 cdip = ct->ct_dip;
5499                 if (!phci_only && cdip &&
5500                     (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5501                     (i_mdi_client_compute_state(ct, ph) ==
5502                     MDI_CLIENT_STATE_FAILED)) {
5503                         i_mdi_client_unlock(ct);
5504                         MDI_PHCI_UNLOCK(ph);
5505                         /*
5506                          * This is the last path to this client.
5507                          *
5508                          * Constraint will only be set to 1 if this client can
5509                          * be retired (as already determined by
5510                          * mdi_phci_retire_notify). However we don't actually
5511                          * need to retire the client (we just retire the last
5512                          * path - MPXIO will then fail all I/Os to the client).
5513                          * But we still need to call e_ddi_retire_finalize so
5514                          * the contract barriers can be cleared. Therefore we
5515                          * temporarily set constraint = 0 so that the client
5516                          * dip is not retired.
5517                          */
5518                         tmp_constraint = 0;
5519                         (void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5520                         MDI_PHCI_LOCK(ph);
5521                         pip = next;
5522                 } else {
5523                         i_mdi_client_unlock(ct);
5524                         pip = next;
5525                 }
5526         }
5527 
5528         if (!phci_only && *((int *)constraint) == 0) {
5529                 MDI_PHCI_UNLOCK(ph);
5530                 return;
5531         }
5532 
5533         /*
5534          * Cannot offline pip(s)
5535          */
5536         if (unstable) {
5537                 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5538                     "pHCI in transient state, cannot retire",
5539                     ddi_driver_name(dip), ddi_get_instance(dip));
5540                 MDI_PHCI_UNLOCK(ph);
5541                 return;
5542         }
5543 
5544         /*
5545          * Mark the pHCI as offline
5546          */
5547         MDI_PHCI_SET_OFFLINE(ph);
5548 
5549         /*
5550          * Mark the child mdi_pathinfo nodes as transient
5551          */
5552         pip = ph->ph_path_head;
5553         while (pip != NULL) {
5554                 MDI_PI_LOCK(pip);
5555                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5556                 MDI_PI_SET_OFFLINING(pip);
5557                 MDI_PI_UNLOCK(pip);
5558                 pip = next;
5559         }
5560         MDI_PHCI_UNLOCK(ph);
5561         /*
5562          * Give a chance for any pending commands to execute
5563          */
5564         delay_random(mdi_delay);
5565         MDI_PHCI_LOCK(ph);
5566         pip = ph->ph_path_head;
5567         while (pip != NULL) {
5568                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5569                 (void) i_mdi_pi_offline(pip, 0);
5570                 MDI_PI_LOCK(pip);
5571                 ct = MDI_PI(pip)->pi_client;
5572                 if (!MDI_PI_IS_OFFLINE(pip)) {
5573                         cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5574                             "path %d %s busy, cannot offline",
5575                             mdi_pi_get_path_instance(pip),
5576                             mdi_pi_spathname(pip));
5577                         MDI_PI_UNLOCK(pip);
5578                         MDI_PHCI_SET_ONLINE(ph);
5579                         MDI_PHCI_UNLOCK(ph);
5580                         return;
5581                 }
5582                 MDI_PI_UNLOCK(pip);
5583                 pip = next;
5584         }
5585         MDI_PHCI_UNLOCK(ph);
5586 
5587         return;
5588 }
5589 
5590 void
5591 mdi_phci_unretire(dev_info_t *dip)
5592 {
5593         mdi_phci_t      *ph;
5594         mdi_pathinfo_t  *pip;
5595         mdi_pathinfo_t  *next;
5596 
5597         ASSERT(MDI_PHCI(dip));
5598 
5599         /*
5600          * Online the phci
5601          */
5602         i_mdi_phci_online(dip);
5603 
5604         ph = i_devi_get_phci(dip);
5605         MDI_PHCI_LOCK(ph);
5606         pip = ph->ph_path_head;
5607         while (pip != NULL) {
5608                 MDI_PI_LOCK(pip);
5609                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5610                 MDI_PI_UNLOCK(pip);
5611                 (void) i_mdi_pi_online(pip, 0);
5612                 pip = next;
5613         }
5614         MDI_PHCI_UNLOCK(ph);
5615 }
5616 
5617 /*ARGSUSED*/
5618 static int
5619 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5620 {
5621         int             rv = NDI_SUCCESS;
5622         mdi_client_t    *ct;
5623 
5624         /*
5625          * Client component to go offline.  Make sure that we are
5626          * not in failing over state and update client state
5627          * accordingly
5628          */
5629         ct = i_devi_get_client(dip);
5630         MDI_DEBUG(2, (MDI_NOTE, dip,
5631             "called %p %p", (void *)dip, (void *)ct));
5632         if (ct != NULL) {
5633                 MDI_CLIENT_LOCK(ct);
5634                 if (ct->ct_unstable) {
5635                         /*
5636                          * One or more paths are in transient state,
5637                          * Dont allow offline of a client device
5638                          */
5639                         MDI_DEBUG(1, (MDI_WARN, dip,
5640                             "!One or more paths to "
5641                             "this device are in transient state. "
5642                             "This device can not be removed at this moment. "
5643                             "Please try again later."));
5644                         MDI_CLIENT_UNLOCK(ct);
5645                         return (NDI_BUSY);
5646                 }
5647                 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5648                         /*
5649                          * Failover is in progress, Dont allow DR of
5650                          * a client device
5651                          */
5652                         MDI_DEBUG(1, (MDI_WARN, dip,
5653                             "!Client device is Busy. "
5654                             "This device can not be removed at this moment. "
5655                             "Please try again later."));
5656                         MDI_CLIENT_UNLOCK(ct);
5657                         return (NDI_BUSY);
5658                 }
5659                 MDI_CLIENT_SET_OFFLINE(ct);
5660 
5661                 /*
5662                  * Unbind our relationship with the dev_info node
5663                  */
5664                 if (flags & NDI_DEVI_REMOVE) {
5665                         ct->ct_dip = NULL;
5666                 }
5667                 MDI_CLIENT_UNLOCK(ct);
5668         }
5669         return (rv);
5670 }
5671 
5672 /*
5673  * mdi_pre_attach():
5674  *              Pre attach() notification handler
5675  */
5676 /*ARGSUSED*/
5677 int
5678 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5679 {
5680         /* don't support old DDI_PM_RESUME */
5681         if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5682             (cmd == DDI_PM_RESUME))
5683                 return (DDI_FAILURE);
5684 
5685         return (DDI_SUCCESS);
5686 }
5687 
5688 /*
5689  * mdi_post_attach():
5690  *              Post attach() notification handler
5691  */
5692 /*ARGSUSED*/
5693 void
5694 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5695 {
5696         mdi_phci_t      *ph;
5697         mdi_client_t    *ct;
5698         mdi_vhci_t      *vh;
5699 
5700         if (MDI_PHCI(dip)) {
5701                 ph = i_devi_get_phci(dip);
5702                 ASSERT(ph != NULL);
5703 
5704                 MDI_PHCI_LOCK(ph);
5705                 switch (cmd) {
5706                 case DDI_ATTACH:
5707                         MDI_DEBUG(2, (MDI_NOTE, dip,
5708                             "phci post_attach called %p", (void *)ph));
5709                         if (error == DDI_SUCCESS) {
5710                                 MDI_PHCI_SET_ATTACH(ph);
5711                         } else {
5712                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5713                                     "!pHCI post_attach failed: error %d",
5714                                     error));
5715                                 MDI_PHCI_SET_DETACH(ph);
5716                         }
5717                         break;
5718 
5719                 case DDI_RESUME:
5720                         MDI_DEBUG(2, (MDI_NOTE, dip,
5721                             "pHCI post_resume: called %p", (void *)ph));
5722                         if (error == DDI_SUCCESS) {
5723                                 MDI_PHCI_SET_RESUME(ph);
5724                         } else {
5725                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5726                                     "!pHCI post_resume failed: error %d",
5727                                     error));
5728                                 MDI_PHCI_SET_SUSPEND(ph);
5729                         }
5730                         break;
5731                 }
5732                 MDI_PHCI_UNLOCK(ph);
5733         }
5734 
5735         if (MDI_CLIENT(dip)) {
5736                 ct = i_devi_get_client(dip);
5737                 ASSERT(ct != NULL);
5738 
5739                 MDI_CLIENT_LOCK(ct);
5740                 switch (cmd) {
5741                 case DDI_ATTACH:
5742                         MDI_DEBUG(2, (MDI_NOTE, dip,
5743                             "client post_attach called %p", (void *)ct));
5744                         if (error != DDI_SUCCESS) {
5745                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5746                                     "!client post_attach failed: error %d",
5747                                     error));
5748                                 MDI_CLIENT_SET_DETACH(ct);
5749                                 MDI_DEBUG(4, (MDI_WARN, dip,
5750                                     "i_mdi_pm_reset_client"));
5751                                 i_mdi_pm_reset_client(ct);
5752                                 break;
5753                         }
5754 
5755                         /*
5756                          * Client device has successfully attached, inform
5757                          * the vhci.
5758                          */
5759                         vh = ct->ct_vhci;
5760                         if (vh->vh_ops->vo_client_attached)
5761                                 (*vh->vh_ops->vo_client_attached)(dip);
5762 
5763                         MDI_CLIENT_SET_ATTACH(ct);
5764                         break;
5765 
5766                 case DDI_RESUME:
5767                         MDI_DEBUG(2, (MDI_NOTE, dip,
5768                             "client post_attach: called %p", (void *)ct));
5769                         if (error == DDI_SUCCESS) {
5770                                 MDI_CLIENT_SET_RESUME(ct);
5771                         } else {
5772                                 MDI_DEBUG(1, (MDI_NOTE, dip,
5773                                     "!client post_resume failed: error %d",
5774                                     error));
5775                                 MDI_CLIENT_SET_SUSPEND(ct);
5776                         }
5777                         break;
5778                 }
5779                 MDI_CLIENT_UNLOCK(ct);
5780         }
5781 }
5782 
5783 /*
5784  * mdi_pre_detach():
5785  *              Pre detach notification handler
5786  */
5787 /*ARGSUSED*/
5788 int
5789 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5790 {
5791         int rv = DDI_SUCCESS;
5792 
5793         if (MDI_CLIENT(dip)) {
5794                 (void) i_mdi_client_pre_detach(dip, cmd);
5795         }
5796 
5797         if (MDI_PHCI(dip)) {
5798                 rv = i_mdi_phci_pre_detach(dip, cmd);
5799         }
5800 
5801         return (rv);
5802 }
5803 
5804 /*ARGSUSED*/
5805 static int
5806 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5807 {
5808         int             rv = DDI_SUCCESS;
5809         mdi_phci_t      *ph;
5810         mdi_client_t    *ct;
5811         mdi_pathinfo_t  *pip;
5812         mdi_pathinfo_t  *failed_pip = NULL;
5813         mdi_pathinfo_t  *next;
5814 
5815         ph = i_devi_get_phci(dip);
5816         if (ph == NULL) {
5817                 return (rv);
5818         }
5819 
5820         MDI_PHCI_LOCK(ph);
5821         switch (cmd) {
5822         case DDI_DETACH:
5823                 MDI_DEBUG(2, (MDI_NOTE, dip,
5824                     "pHCI pre_detach: called %p", (void *)ph));
5825                 if (!MDI_PHCI_IS_OFFLINE(ph)) {
5826                         /*
5827                          * mdi_pathinfo nodes are still attached to
5828                          * this pHCI. Fail the detach for this pHCI.
5829                          */
5830                         MDI_DEBUG(2, (MDI_WARN, dip,
5831                             "pHCI pre_detach: paths are still attached %p",
5832                             (void *)ph));
5833                         rv = DDI_FAILURE;
5834                         break;
5835                 }
5836                 MDI_PHCI_SET_DETACH(ph);
5837                 break;
5838 
5839         case DDI_SUSPEND:
5840                 /*
5841                  * pHCI is getting suspended.  Since mpxio client
5842                  * devices may not be suspended at this point, to avoid
5843                  * a potential stack overflow, it is important to suspend
5844                  * client devices before pHCI can be suspended.
5845                  */
5846 
5847                 MDI_DEBUG(2, (MDI_NOTE, dip,
5848                     "pHCI pre_suspend: called %p", (void *)ph));
5849                 /*
5850                  * Suspend all the client devices accessible through this pHCI
5851                  */
5852                 pip = ph->ph_path_head;
5853                 while (pip != NULL && rv == DDI_SUCCESS) {
5854                         dev_info_t *cdip;
5855                         MDI_PI_LOCK(pip);
5856                         next =
5857                             (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5858                         ct = MDI_PI(pip)->pi_client;
5859                         i_mdi_client_lock(ct, pip);
5860                         cdip = ct->ct_dip;
5861                         MDI_PI_UNLOCK(pip);
5862                         if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5863                             MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5864                                 i_mdi_client_unlock(ct);
5865                                 if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5866                                     DDI_SUCCESS) {
5867                                         /*
5868                                          * Suspend of one of the client
5869                                          * device has failed.
5870                                          */
5871                                         MDI_DEBUG(1, (MDI_WARN, dip,
5872                                             "!suspend of device (%s%d) failed.",
5873                                             ddi_driver_name(cdip),
5874                                             ddi_get_instance(cdip)));
5875                                         failed_pip = pip;
5876                                         break;
5877                                 }
5878                         } else {
5879                                 i_mdi_client_unlock(ct);
5880                         }
5881                         pip = next;
5882                 }
5883 
5884                 if (rv == DDI_SUCCESS) {
5885                         /*
5886                          * Suspend of client devices is complete. Proceed
5887                          * with pHCI suspend.
5888                          */
5889                         MDI_PHCI_SET_SUSPEND(ph);
5890                 } else {
5891                         /*
5892                          * Revert back all the suspended client device states
5893                          * to converse.
5894                          */
5895                         pip = ph->ph_path_head;
5896                         while (pip != failed_pip) {
5897                                 dev_info_t *cdip;
5898                                 MDI_PI_LOCK(pip);
5899                                 next =
5900                                     (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5901                                 ct = MDI_PI(pip)->pi_client;
5902                                 i_mdi_client_lock(ct, pip);
5903                                 cdip = ct->ct_dip;
5904                                 MDI_PI_UNLOCK(pip);
5905                                 if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5906                                         i_mdi_client_unlock(ct);
5907                                         (void) devi_attach(cdip, DDI_RESUME);
5908                                 } else {
5909                                         i_mdi_client_unlock(ct);
5910                                 }
5911                                 pip = next;
5912                         }
5913                 }
5914                 break;
5915 
5916         default:
5917                 rv = DDI_FAILURE;
5918                 break;
5919         }
5920         MDI_PHCI_UNLOCK(ph);
5921         return (rv);
5922 }
5923 
5924 /*ARGSUSED*/
5925 static int
5926 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5927 {
5928         int             rv = DDI_SUCCESS;
5929         mdi_client_t    *ct;
5930 
5931         ct = i_devi_get_client(dip);
5932         if (ct == NULL) {
5933                 return (rv);
5934         }
5935 
5936         MDI_CLIENT_LOCK(ct);
5937         switch (cmd) {
5938         case DDI_DETACH:
5939                 MDI_DEBUG(2, (MDI_NOTE, dip,
5940                     "client pre_detach: called %p",
5941                      (void *)ct));
5942                 MDI_CLIENT_SET_DETACH(ct);
5943                 break;
5944 
5945         case DDI_SUSPEND:
5946                 MDI_DEBUG(2, (MDI_NOTE, dip,
5947                     "client pre_suspend: called %p",
5948                     (void *)ct));
5949                 MDI_CLIENT_SET_SUSPEND(ct);
5950                 break;
5951 
5952         default:
5953                 rv = DDI_FAILURE;
5954                 break;
5955         }
5956         MDI_CLIENT_UNLOCK(ct);
5957         return (rv);
5958 }
5959 
5960 /*
5961  * mdi_post_detach():
5962  *              Post detach notification handler
5963  */
5964 /*ARGSUSED*/
5965 void
5966 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5967 {
5968         /*
5969          * Detach/Suspend of mpxio component failed. Update our state
5970          * too
5971          */
5972         if (MDI_PHCI(dip))
5973                 i_mdi_phci_post_detach(dip, cmd, error);
5974 
5975         if (MDI_CLIENT(dip))
5976                 i_mdi_client_post_detach(dip, cmd, error);
5977 }
5978 
5979 /*ARGSUSED*/
5980 static void
5981 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5982 {
5983         mdi_phci_t      *ph;
5984 
5985         /*
5986          * Detach/Suspend of phci component failed. Update our state
5987          * too
5988          */
5989         ph = i_devi_get_phci(dip);
5990         if (ph == NULL) {
5991                 return;
5992         }
5993 
5994         MDI_PHCI_LOCK(ph);
5995         /*
5996          * Detach of pHCI failed. Restore back converse
5997          * state
5998          */
5999         switch (cmd) {
6000         case DDI_DETACH:
6001                 MDI_DEBUG(2, (MDI_NOTE, dip,
6002                     "pHCI post_detach: called %p",
6003                     (void *)ph));
6004                 if (error != DDI_SUCCESS)
6005                         MDI_PHCI_SET_ATTACH(ph);
6006                 break;
6007 
6008         case DDI_SUSPEND:
6009                 MDI_DEBUG(2, (MDI_NOTE, dip,
6010                     "pHCI post_suspend: called %p",
6011                     (void *)ph));
6012                 if (error != DDI_SUCCESS)
6013                         MDI_PHCI_SET_RESUME(ph);
6014                 break;
6015         }
6016         MDI_PHCI_UNLOCK(ph);
6017 }
6018 
6019 /*ARGSUSED*/
6020 static void
6021 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6022 {
6023         mdi_client_t    *ct;
6024 
6025         ct = i_devi_get_client(dip);
6026         if (ct == NULL) {
6027                 return;
6028         }
6029         MDI_CLIENT_LOCK(ct);
6030         /*
6031          * Detach of Client failed. Restore back converse
6032          * state
6033          */
6034         switch (cmd) {
6035         case DDI_DETACH:
6036                 MDI_DEBUG(2, (MDI_NOTE, dip,
6037                     "client post_detach: called %p", (void *)ct));
6038                 if (DEVI_IS_ATTACHING(dip)) {
6039                         MDI_DEBUG(4, (MDI_NOTE, dip,
6040                             "i_mdi_pm_rele_client\n"));
6041                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6042                 } else {
6043                         MDI_DEBUG(4, (MDI_NOTE, dip,
6044                             "i_mdi_pm_reset_client\n"));
6045                         i_mdi_pm_reset_client(ct);
6046                 }
6047                 if (error != DDI_SUCCESS)
6048                         MDI_CLIENT_SET_ATTACH(ct);
6049                 break;
6050 
6051         case DDI_SUSPEND:
6052                 MDI_DEBUG(2, (MDI_NOTE, dip,
6053                     "called %p", (void *)ct));
6054                 if (error != DDI_SUCCESS)
6055                         MDI_CLIENT_SET_RESUME(ct);
6056                 break;
6057         }
6058         MDI_CLIENT_UNLOCK(ct);
6059 }
6060 
6061 int
6062 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6063 {
6064         return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6065 }
6066 
6067 /*
6068  * create and install per-path (client - pHCI) statistics
6069  * I/O stats supported: nread, nwritten, reads, and writes
6070  * Error stats - hard errors, soft errors, & transport errors
6071  */
6072 int
6073 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6074 {
6075         kstat_t                 *kiosp, *kerrsp;
6076         struct pi_errs          *nsp;
6077         struct mdi_pi_kstats    *mdi_statp;
6078 
6079         if (MDI_PI(pip)->pi_kstats != NULL)
6080                 return (MDI_SUCCESS);
6081 
6082         if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6083             KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6084                 return (MDI_FAILURE);
6085         }
6086 
6087         (void) strcat(ksname, ",err");
6088         kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
6089             KSTAT_TYPE_NAMED,
6090             sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6091         if (kerrsp == NULL) {
6092                 kstat_delete(kiosp);
6093                 return (MDI_FAILURE);
6094         }
6095 
6096         nsp = (struct pi_errs *)kerrsp->ks_data;
6097         kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6098         kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6099         kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6100             KSTAT_DATA_UINT32);
6101         kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6102             KSTAT_DATA_UINT32);
6103         kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6104             KSTAT_DATA_UINT32);
6105         kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6106             KSTAT_DATA_UINT32);
6107         kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6108             KSTAT_DATA_UINT32);
6109         kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6110             KSTAT_DATA_UINT32);
6111         kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6112             KSTAT_DATA_UINT32);
6113         kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6114 
6115         mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6116         mdi_statp->pi_kstat_ref = 1;
6117         mdi_statp->pi_kstat_iostats = kiosp;
6118         mdi_statp->pi_kstat_errstats = kerrsp;
6119         kstat_install(kiosp);
6120         kstat_install(kerrsp);
6121         MDI_PI(pip)->pi_kstats = mdi_statp;
6122         return (MDI_SUCCESS);
6123 }
6124 
6125 /*
6126  * destroy per-path properties
6127  */
6128 static void
6129 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6130 {
6131 
6132         struct mdi_pi_kstats *mdi_statp;
6133 
6134         if (MDI_PI(pip)->pi_kstats == NULL)
6135                 return;
6136         if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6137                 return;
6138 
6139         MDI_PI(pip)->pi_kstats = NULL;
6140 
6141         /*
6142          * the kstat may be shared between multiple pathinfo nodes
6143          * decrement this pathinfo's usage, removing the kstats
6144          * themselves when the last pathinfo reference is removed.
6145          */
6146         ASSERT(mdi_statp->pi_kstat_ref > 0);
6147         if (--mdi_statp->pi_kstat_ref != 0)
6148                 return;
6149 
6150         kstat_delete(mdi_statp->pi_kstat_iostats);
6151         kstat_delete(mdi_statp->pi_kstat_errstats);
6152         kmem_free(mdi_statp, sizeof (*mdi_statp));
6153 }
6154 
6155 /*
6156  * update I/O paths KSTATS
6157  */
6158 void
6159 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6160 {
6161         kstat_t *iostatp;
6162         size_t xfer_cnt;
6163 
6164         ASSERT(pip != NULL);
6165 
6166         /*
6167          * I/O can be driven across a path prior to having path
6168          * statistics available, i.e. probe(9e).
6169          */
6170         if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6171                 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6172                 xfer_cnt = bp->b_bcount - bp->b_resid;
6173                 if (bp->b_flags & B_READ) {
6174                         KSTAT_IO_PTR(iostatp)->reads++;
6175                         KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6176                 } else {
6177                         KSTAT_IO_PTR(iostatp)->writes++;
6178                         KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6179                 }
6180         }
6181 }
6182 
6183 /*
6184  * Enable the path(specific client/target/initiator)
6185  * Enabling a path means that MPxIO may select the enabled path for routing
6186  * future I/O requests, subject to other path state constraints.
6187  */
6188 int
6189 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6190 {
6191         mdi_phci_t      *ph;
6192 
6193         ph = MDI_PI(pip)->pi_phci;
6194         if (ph == NULL) {
6195                 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6196                     "!failed: path %s %p: NULL ph",
6197                     mdi_pi_spathname(pip), (void *)pip));
6198                 return (MDI_FAILURE);
6199         }
6200 
6201         (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6202                 MDI_ENABLE_OP);
6203         MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6204             "!returning success pip = %p. ph = %p",
6205             (void *)pip, (void *)ph));
6206         return (MDI_SUCCESS);
6207 
6208 }
6209 
6210 /*
6211  * Disable the path (specific client/target/initiator)
6212  * Disabling a path means that MPxIO will not select the disabled path for
6213  * routing any new I/O requests.
6214  */
6215 int
6216 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6217 {
6218         mdi_phci_t      *ph;
6219 
6220         ph = MDI_PI(pip)->pi_phci;
6221         if (ph == NULL) {
6222                 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6223                     "!failed: path %s %p: NULL ph",
6224                     mdi_pi_spathname(pip), (void *)pip));
6225                 return (MDI_FAILURE);
6226         }
6227 
6228         (void) i_mdi_enable_disable_path(pip,
6229             ph->ph_vhci, flags, MDI_DISABLE_OP);
6230         MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6231             "!returning success pip = %p. ph = %p",
6232             (void *)pip, (void *)ph));
6233         return (MDI_SUCCESS);
6234 }
6235 
6236 /*
6237  * disable the path to a particular pHCI (pHCI specified in the phci_path
6238  * argument) for a particular client (specified in the client_path argument).
6239  * Disabling a path means that MPxIO will not select the disabled path for
6240  * routing any new I/O requests.
6241  * NOTE: this will be removed once the NWS files are changed to use the new
6242  * mdi_{enable,disable}_path interfaces
6243  */
6244 int
6245 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6246 {
6247         return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6248 }
6249 
6250 /*
6251  * Enable the path to a particular pHCI (pHCI specified in the phci_path
6252  * argument) for a particular client (specified in the client_path argument).
6253  * Enabling a path means that MPxIO may select the enabled path for routing
6254  * future I/O requests, subject to other path state constraints.
6255  * NOTE: this will be removed once the NWS files are changed to use the new
6256  * mdi_{enable,disable}_path interfaces
6257  */
6258 
6259 int
6260 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6261 {
6262         return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6263 }
6264 
6265 /*
6266  * Common routine for doing enable/disable.
6267  */
6268 static mdi_pathinfo_t *
6269 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6270                 int op)
6271 {
6272         int             sync_flag = 0;
6273         int             rv;
6274         mdi_pathinfo_t  *next;
6275         int             (*f)() = NULL;
6276 
6277         /*
6278          * Check to make sure the path is not already in the
6279          * requested state. If it is just return the next path
6280          * as we have nothing to do here.
6281          */
6282         if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6283             (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6284                 MDI_PI_LOCK(pip);
6285                 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6286                 MDI_PI_UNLOCK(pip);
6287                 return (next);
6288         }
6289 
6290         f = vh->vh_ops->vo_pi_state_change;
6291 
6292         sync_flag = (flags << 8) & 0xf00;
6293 
6294         /*
6295          * Do a callback into the mdi consumer to let it
6296          * know that path is about to get enabled/disabled.
6297          */
6298         if (f != NULL) {
6299                 rv = (*f)(vh->vh_dip, pip, 0,
6300                         MDI_PI_EXT_STATE(pip),
6301                         MDI_EXT_STATE_CHANGE | sync_flag |
6302                         op | MDI_BEFORE_STATE_CHANGE);
6303                 if (rv != MDI_SUCCESS) {
6304                         MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6305                             "vo_pi_state_change: failed rv = %x", rv));
6306                 }
6307         }
6308         MDI_PI_LOCK(pip);
6309         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6310 
6311         switch (flags) {
6312                 case USER_DISABLE:
6313                         if (op == MDI_DISABLE_OP) {
6314                                 MDI_PI_SET_USER_DISABLE(pip);
6315                         } else {
6316                                 MDI_PI_SET_USER_ENABLE(pip);
6317                         }
6318                         break;
6319                 case DRIVER_DISABLE:
6320                         if (op == MDI_DISABLE_OP) {
6321                                 MDI_PI_SET_DRV_DISABLE(pip);
6322                         } else {
6323                                 MDI_PI_SET_DRV_ENABLE(pip);
6324                         }
6325                         break;
6326                 case DRIVER_DISABLE_TRANSIENT:
6327                         if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6328                                 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6329                         } else {
6330                                 MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6331                         }
6332                         break;
6333         }
6334         MDI_PI_UNLOCK(pip);
6335         /*
6336          * Do a callback into the mdi consumer to let it
6337          * know that path is now enabled/disabled.
6338          */
6339         if (f != NULL) {
6340                 rv = (*f)(vh->vh_dip, pip, 0,
6341                         MDI_PI_EXT_STATE(pip),
6342                         MDI_EXT_STATE_CHANGE | sync_flag |
6343                         op | MDI_AFTER_STATE_CHANGE);
6344                 if (rv != MDI_SUCCESS) {
6345                         MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6346                             "vo_pi_state_change failed: rv = %x", rv));
6347                 }
6348         }
6349         return (next);
6350 }
6351 
6352 /*
6353  * Common routine for doing enable/disable.
6354  * NOTE: this will be removed once the NWS files are changed to use the new
6355  * mdi_{enable,disable}_path has been putback
6356  */
6357 int
6358 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6359 {
6360 
6361         mdi_phci_t      *ph;
6362         mdi_vhci_t      *vh = NULL;
6363         mdi_client_t    *ct;
6364         mdi_pathinfo_t  *next, *pip;
6365         int             found_it;
6366 
6367         ph = i_devi_get_phci(pdip);
6368         MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6369             "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6370             (void *)cdip));
6371         if (ph == NULL) {
6372                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6373                     "!failed: operation %d: NULL ph", op));
6374                 return (MDI_FAILURE);
6375         }
6376 
6377         if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6378                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6379                     "!failed: invalid operation %d", op));
6380                 return (MDI_FAILURE);
6381         }
6382 
6383         vh = ph->ph_vhci;
6384 
6385         if (cdip == NULL) {
6386                 /*
6387                  * Need to mark the Phci as enabled/disabled.
6388                  */
6389                 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6390                     "op %d for the phci", op));
6391                 MDI_PHCI_LOCK(ph);
6392                 switch (flags) {
6393                         case USER_DISABLE:
6394                                 if (op == MDI_DISABLE_OP) {
6395                                         MDI_PHCI_SET_USER_DISABLE(ph);
6396                                 } else {
6397                                         MDI_PHCI_SET_USER_ENABLE(ph);
6398                                 }
6399                                 break;
6400                         case DRIVER_DISABLE:
6401                                 if (op == MDI_DISABLE_OP) {
6402                                         MDI_PHCI_SET_DRV_DISABLE(ph);
6403                                 } else {
6404                                         MDI_PHCI_SET_DRV_ENABLE(ph);
6405                                 }
6406                                 break;
6407                         case DRIVER_DISABLE_TRANSIENT:
6408                                 if (op == MDI_DISABLE_OP) {
6409                                         MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6410                                 } else {
6411                                         MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6412                                 }
6413                                 break;
6414                         default:
6415                                 MDI_PHCI_UNLOCK(ph);
6416                                 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6417                                     "!invalid flag argument= %d", flags));
6418                 }
6419 
6420                 /*
6421                  * Phci has been disabled. Now try to enable/disable
6422                  * path info's to each client.
6423                  */
6424                 pip = ph->ph_path_head;
6425                 while (pip != NULL) {
6426                         pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6427                 }
6428                 MDI_PHCI_UNLOCK(ph);
6429         } else {
6430 
6431                 /*
6432                  * Disable a specific client.
6433                  */
6434                 ct = i_devi_get_client(cdip);
6435                 if (ct == NULL) {
6436                         MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6437                             "!failed: operation = %d: NULL ct", op));
6438                         return (MDI_FAILURE);
6439                 }
6440 
6441                 MDI_CLIENT_LOCK(ct);
6442                 pip = ct->ct_path_head;
6443                 found_it = 0;
6444                 while (pip != NULL) {
6445                         MDI_PI_LOCK(pip);
6446                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6447                         if (MDI_PI(pip)->pi_phci == ph) {
6448                                 MDI_PI_UNLOCK(pip);
6449                                 found_it = 1;
6450                                 break;
6451                         }
6452                         MDI_PI_UNLOCK(pip);
6453                         pip = next;
6454                 }
6455 
6456 
6457                 MDI_CLIENT_UNLOCK(ct);
6458                 if (found_it == 0) {
6459                         MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6460                             "!failed. Could not find corresponding pip\n"));
6461                         return (MDI_FAILURE);
6462                 }
6463 
6464                 (void) i_mdi_enable_disable_path(pip, vh, flags, op);
6465         }
6466 
6467         MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6468             "!op %d returning success pdip = %p cdip = %p",
6469             op, (void *)pdip, (void *)cdip));
6470         return (MDI_SUCCESS);
6471 }
6472 
6473 /*
6474  * Ensure phci powered up
6475  */
6476 static void
6477 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6478 {
6479         dev_info_t      *ph_dip;
6480 
6481         ASSERT(pip != NULL);
6482         ASSERT(MDI_PI_LOCKED(pip));
6483 
6484         if (MDI_PI(pip)->pi_pm_held) {
6485                 return;
6486         }
6487 
6488         ph_dip = mdi_pi_get_phci(pip);
6489         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6490             "%s %p", mdi_pi_spathname(pip), (void *)pip));
6491         if (ph_dip == NULL) {
6492                 return;
6493         }
6494 
6495         MDI_PI_UNLOCK(pip);
6496         MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6497             DEVI(ph_dip)->devi_pm_kidsupcnt));
6498         pm_hold_power(ph_dip);
6499         MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6500             DEVI(ph_dip)->devi_pm_kidsupcnt));
6501         MDI_PI_LOCK(pip);
6502 
6503         /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6504         if (DEVI(ph_dip)->devi_pm_info)
6505                 MDI_PI(pip)->pi_pm_held = 1;
6506 }
6507 
6508 /*
6509  * Allow phci powered down
6510  */
6511 static void
6512 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6513 {
6514         dev_info_t      *ph_dip = NULL;
6515 
6516         ASSERT(pip != NULL);
6517         ASSERT(MDI_PI_LOCKED(pip));
6518 
6519         if (MDI_PI(pip)->pi_pm_held == 0) {
6520                 return;
6521         }
6522 
6523         ph_dip = mdi_pi_get_phci(pip);
6524         ASSERT(ph_dip != NULL);
6525 
6526         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6527             "%s %p", mdi_pi_spathname(pip), (void *)pip));
6528 
6529         MDI_PI_UNLOCK(pip);
6530         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6531             "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6532         pm_rele_power(ph_dip);
6533         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6534             "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6535         MDI_PI_LOCK(pip);
6536 
6537         MDI_PI(pip)->pi_pm_held = 0;
6538 }
6539 
6540 static void
6541 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6542 {
6543         ASSERT(MDI_CLIENT_LOCKED(ct));
6544 
6545         ct->ct_power_cnt += incr;
6546         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6547             "%p ct_power_cnt = %d incr = %d",
6548             (void *)ct, ct->ct_power_cnt, incr));
6549         ASSERT(ct->ct_power_cnt >= 0);
6550 }
6551 
6552 static void
6553 i_mdi_rele_all_phci(mdi_client_t *ct)
6554 {
6555         mdi_pathinfo_t  *pip;
6556 
6557         ASSERT(MDI_CLIENT_LOCKED(ct));
6558         pip = (mdi_pathinfo_t *)ct->ct_path_head;
6559         while (pip != NULL) {
6560                 mdi_hold_path(pip);
6561                 MDI_PI_LOCK(pip);
6562                 i_mdi_pm_rele_pip(pip);
6563                 MDI_PI_UNLOCK(pip);
6564                 mdi_rele_path(pip);
6565                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6566         }
6567 }
6568 
6569 static void
6570 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6571 {
6572         ASSERT(MDI_CLIENT_LOCKED(ct));
6573 
6574         if (i_ddi_devi_attached(ct->ct_dip)) {
6575                 ct->ct_power_cnt -= decr;
6576                 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6577                     "%p ct_power_cnt = %d decr = %d",
6578                     (void *)ct, ct->ct_power_cnt, decr));
6579         }
6580 
6581         ASSERT(ct->ct_power_cnt >= 0);
6582         if (ct->ct_power_cnt == 0) {
6583                 i_mdi_rele_all_phci(ct);
6584                 return;
6585         }
6586 }
6587 
6588 static void
6589 i_mdi_pm_reset_client(mdi_client_t *ct)
6590 {
6591         MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6592             "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6593         ASSERT(MDI_CLIENT_LOCKED(ct));
6594         ct->ct_power_cnt = 0;
6595         i_mdi_rele_all_phci(ct);
6596         ct->ct_powercnt_config = 0;
6597         ct->ct_powercnt_unconfig = 0;
6598         ct->ct_powercnt_reset = 1;
6599 }
6600 
6601 static int
6602 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6603 {
6604         int             ret;
6605         dev_info_t      *ph_dip;
6606 
6607         MDI_PI_LOCK(pip);
6608         i_mdi_pm_hold_pip(pip);
6609 
6610         ph_dip = mdi_pi_get_phci(pip);
6611         MDI_PI_UNLOCK(pip);
6612 
6613         /* bring all components of phci to full power */
6614         MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6615             "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6616             ddi_get_instance(ph_dip), (void *)pip));
6617 
6618         ret = pm_powerup(ph_dip);
6619 
6620         if (ret == DDI_FAILURE) {
6621                 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6622                     "pm_powerup FAILED for %s%d %p",
6623                     ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6624                     (void *)pip));
6625 
6626                 MDI_PI_LOCK(pip);
6627                 i_mdi_pm_rele_pip(pip);
6628                 MDI_PI_UNLOCK(pip);
6629                 return (MDI_FAILURE);
6630         }
6631 
6632         return (MDI_SUCCESS);
6633 }
6634 
6635 static int
6636 i_mdi_power_all_phci(mdi_client_t *ct)
6637 {
6638         mdi_pathinfo_t  *pip;
6639         int             succeeded = 0;
6640 
6641         ASSERT(MDI_CLIENT_LOCKED(ct));
6642         pip = (mdi_pathinfo_t *)ct->ct_path_head;
6643         while (pip != NULL) {
6644                 /*
6645                  * Don't power if MDI_PATHINFO_STATE_FAULT
6646                  * or MDI_PATHINFO_STATE_OFFLINE.
6647                  */
6648                 if (MDI_PI_IS_INIT(pip) ||
6649                     MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6650                         mdi_hold_path(pip);
6651                         MDI_CLIENT_UNLOCK(ct);
6652                         if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6653                                 succeeded = 1;
6654 
6655                         ASSERT(ct == MDI_PI(pip)->pi_client);
6656                         MDI_CLIENT_LOCK(ct);
6657                         mdi_rele_path(pip);
6658                 }
6659                 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6660         }
6661 
6662         return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6663 }
6664 
6665 /*
6666  * mdi_bus_power():
6667  *              1. Place the phci(s) into powered up state so that
6668  *                 client can do power management
6669  *              2. Ensure phci powered up as client power managing
6670  * Return Values:
6671  *              MDI_SUCCESS
6672  *              MDI_FAILURE
6673  */
6674 int
6675 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6676     void *arg, void *result)
6677 {
6678         int                     ret = MDI_SUCCESS;
6679         pm_bp_child_pwrchg_t    *bpc;
6680         mdi_client_t            *ct;
6681         dev_info_t              *cdip;
6682         pm_bp_has_changed_t     *bphc;
6683 
6684         /*
6685          * BUS_POWER_NOINVOL not supported
6686          */
6687         if (op == BUS_POWER_NOINVOL)
6688                 return (MDI_FAILURE);
6689 
6690         /*
6691          * ignore other OPs.
6692          * return quickly to save cou cycles on the ct processing
6693          */
6694         switch (op) {
6695         case BUS_POWER_PRE_NOTIFICATION:
6696         case BUS_POWER_POST_NOTIFICATION:
6697                 bpc = (pm_bp_child_pwrchg_t *)arg;
6698                 cdip = bpc->bpc_dip;
6699                 break;
6700         case BUS_POWER_HAS_CHANGED:
6701                 bphc = (pm_bp_has_changed_t *)arg;
6702                 cdip = bphc->bphc_dip;
6703                 break;
6704         default:
6705                 return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6706         }
6707 
6708         ASSERT(MDI_CLIENT(cdip));
6709 
6710         ct = i_devi_get_client(cdip);
6711         if (ct == NULL)
6712                 return (MDI_FAILURE);
6713 
6714         /*
6715          * wait till the mdi_pathinfo node state change are processed
6716          */
6717         MDI_CLIENT_LOCK(ct);
6718         switch (op) {
6719         case BUS_POWER_PRE_NOTIFICATION:
6720                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6721                     "BUS_POWER_PRE_NOTIFICATION:"
6722                     "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6723                     ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6724                     bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6725 
6726                 /* serialize power level change per client */
6727                 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6728                         cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6729 
6730                 MDI_CLIENT_SET_POWER_TRANSITION(ct);
6731 
6732                 if (ct->ct_power_cnt == 0) {
6733                         ret = i_mdi_power_all_phci(ct);
6734                 }
6735 
6736                 /*
6737                  * if new_level > 0:
6738                  *      - hold phci(s)
6739                  *      - power up phci(s) if not already
6740                  * ignore power down
6741                  */
6742                 if (bpc->bpc_nlevel > 0) {
6743                         if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6744                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6745                                     "i_mdi_pm_hold_client\n"));
6746                                 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6747                         }
6748                 }
6749                 break;
6750         case BUS_POWER_POST_NOTIFICATION:
6751                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6752                     "BUS_POWER_POST_NOTIFICATION:"
6753                     "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6754                     ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6755                     bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6756                     *(int *)result));
6757 
6758                 if (*(int *)result == DDI_SUCCESS) {
6759                         if (bpc->bpc_nlevel > 0) {
6760                                 MDI_CLIENT_SET_POWER_UP(ct);
6761                         } else {
6762                                 MDI_CLIENT_SET_POWER_DOWN(ct);
6763                         }
6764                 }
6765 
6766                 /* release the hold we did in pre-notification */
6767                 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6768                     !DEVI_IS_ATTACHING(ct->ct_dip)) {
6769                         MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6770                             "i_mdi_pm_rele_client\n"));
6771                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6772                 }
6773 
6774                 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6775                         /* another thread might started attaching */
6776                         if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6777                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6778                                     "i_mdi_pm_rele_client\n"));
6779                                 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6780                         /* detaching has been taken care in pm_post_unconfig */
6781                         } else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6782                                 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6783                                     "i_mdi_pm_reset_client\n"));
6784                                 i_mdi_pm_reset_client(ct);
6785                         }
6786                 }
6787 
6788                 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6789                 cv_broadcast(&ct->ct_powerchange_cv);
6790 
6791                 break;
6792 
6793         /* need to do more */
6794         case BUS_POWER_HAS_CHANGED:
6795                 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6796                     "BUS_POWER_HAS_CHANGED:"
6797                     "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6798                     ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6799                     bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6800 
6801                 if (bphc->bphc_nlevel > 0 &&
6802                     bphc->bphc_nlevel > bphc->bphc_olevel) {
6803                         if (ct->ct_power_cnt == 0) {
6804                                 ret = i_mdi_power_all_phci(ct);
6805                         }
6806                         MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6807                             "i_mdi_pm_hold_client\n"));
6808                         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6809                 }
6810 
6811                 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6812                         MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6813                             "i_mdi_pm_rele_client\n"));
6814                         i_mdi_pm_rele_client(ct, ct->ct_path_count);
6815                 }
6816                 break;
6817         }
6818 
6819         MDI_CLIENT_UNLOCK(ct);
6820         return (ret);
6821 }
6822 
6823 static int
6824 i_mdi_pm_pre_config_one(dev_info_t *child)
6825 {
6826         int             ret = MDI_SUCCESS;
6827         mdi_client_t    *ct;
6828 
6829         ct = i_devi_get_client(child);
6830         if (ct == NULL)
6831                 return (MDI_FAILURE);
6832 
6833         MDI_CLIENT_LOCK(ct);
6834         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6835                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6836 
6837         if (!MDI_CLIENT_IS_FAILED(ct)) {
6838                 MDI_CLIENT_UNLOCK(ct);
6839                 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6840                 return (MDI_SUCCESS);
6841         }
6842 
6843         if (ct->ct_powercnt_config) {
6844                 MDI_CLIENT_UNLOCK(ct);
6845                 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6846                 return (MDI_SUCCESS);
6847         }
6848 
6849         if (ct->ct_power_cnt == 0) {
6850                 ret = i_mdi_power_all_phci(ct);
6851         }
6852         MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6853         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6854         ct->ct_powercnt_config = 1;
6855         ct->ct_powercnt_reset = 0;
6856         MDI_CLIENT_UNLOCK(ct);
6857         return (ret);
6858 }
6859 
6860 static int
6861 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6862 {
6863         int                     ret = MDI_SUCCESS;
6864         dev_info_t              *cdip;
6865         int                     circ;
6866 
6867         ASSERT(MDI_VHCI(vdip));
6868 
6869         /* ndi_devi_config_one */
6870         if (child) {
6871                 ASSERT(DEVI_BUSY_OWNED(vdip));
6872                 return (i_mdi_pm_pre_config_one(child));
6873         }
6874 
6875         /* devi_config_common */
6876         ndi_devi_enter(vdip, &circ);
6877         cdip = ddi_get_child(vdip);
6878         while (cdip) {
6879                 dev_info_t *next = ddi_get_next_sibling(cdip);
6880 
6881                 ret = i_mdi_pm_pre_config_one(cdip);
6882                 if (ret != MDI_SUCCESS)
6883                         break;
6884                 cdip = next;
6885         }
6886         ndi_devi_exit(vdip, circ);
6887         return (ret);
6888 }
6889 
6890 static int
6891 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6892 {
6893         int             ret = MDI_SUCCESS;
6894         mdi_client_t    *ct;
6895 
6896         ct = i_devi_get_client(child);
6897         if (ct == NULL)
6898                 return (MDI_FAILURE);
6899 
6900         MDI_CLIENT_LOCK(ct);
6901         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6902                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6903 
6904         if (!i_ddi_devi_attached(child)) {
6905                 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6906                 MDI_CLIENT_UNLOCK(ct);
6907                 return (MDI_SUCCESS);
6908         }
6909 
6910         if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6911             (flags & NDI_AUTODETACH)) {
6912                 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6913                 MDI_CLIENT_UNLOCK(ct);
6914                 return (MDI_FAILURE);
6915         }
6916 
6917         if (ct->ct_powercnt_unconfig) {
6918                 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6919                 MDI_CLIENT_UNLOCK(ct);
6920                 *held = 1;
6921                 return (MDI_SUCCESS);
6922         }
6923 
6924         if (ct->ct_power_cnt == 0) {
6925                 ret = i_mdi_power_all_phci(ct);
6926         }
6927         MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6928         i_mdi_pm_hold_client(ct, ct->ct_path_count);
6929         ct->ct_powercnt_unconfig = 1;
6930         ct->ct_powercnt_reset = 0;
6931         MDI_CLIENT_UNLOCK(ct);
6932         if (ret == MDI_SUCCESS)
6933                 *held = 1;
6934         return (ret);
6935 }
6936 
6937 static int
6938 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6939     int flags)
6940 {
6941         int                     ret = MDI_SUCCESS;
6942         dev_info_t              *cdip;
6943         int                     circ;
6944 
6945         ASSERT(MDI_VHCI(vdip));
6946         *held = 0;
6947 
6948         /* ndi_devi_unconfig_one */
6949         if (child) {
6950                 ASSERT(DEVI_BUSY_OWNED(vdip));
6951                 return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6952         }
6953 
6954         /* devi_unconfig_common */
6955         ndi_devi_enter(vdip, &circ);
6956         cdip = ddi_get_child(vdip);
6957         while (cdip) {
6958                 dev_info_t *next = ddi_get_next_sibling(cdip);
6959 
6960                 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6961                 cdip = next;
6962         }
6963         ndi_devi_exit(vdip, circ);
6964 
6965         if (*held)
6966                 ret = MDI_SUCCESS;
6967 
6968         return (ret);
6969 }
6970 
6971 static void
6972 i_mdi_pm_post_config_one(dev_info_t *child)
6973 {
6974         mdi_client_t    *ct;
6975 
6976         ct = i_devi_get_client(child);
6977         if (ct == NULL)
6978                 return;
6979 
6980         MDI_CLIENT_LOCK(ct);
6981         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6982                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6983 
6984         if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6985                 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6986                 MDI_CLIENT_UNLOCK(ct);
6987                 return;
6988         }
6989 
6990         /* client has not been updated */
6991         if (MDI_CLIENT_IS_FAILED(ct)) {
6992                 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
6993                 MDI_CLIENT_UNLOCK(ct);
6994                 return;
6995         }
6996 
6997         /* another thread might have powered it down or detached it */
6998         if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6999             !DEVI_IS_ATTACHING(child)) ||
7000             (!i_ddi_devi_attached(child) &&
7001             !DEVI_IS_ATTACHING(child))) {
7002                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7003                 i_mdi_pm_reset_client(ct);
7004         } else {
7005                 mdi_pathinfo_t  *pip, *next;
7006                 int     valid_path_count = 0;
7007 
7008                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7009                 pip = ct->ct_path_head;
7010                 while (pip != NULL) {
7011                         MDI_PI_LOCK(pip);
7012                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7013                         if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7014                                 valid_path_count ++;
7015                         MDI_PI_UNLOCK(pip);
7016                         pip = next;
7017                 }
7018                 i_mdi_pm_rele_client(ct, valid_path_count);
7019         }
7020         ct->ct_powercnt_config = 0;
7021         MDI_CLIENT_UNLOCK(ct);
7022 }
7023 
7024 static void
7025 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7026 {
7027         int             circ;
7028         dev_info_t      *cdip;
7029 
7030         ASSERT(MDI_VHCI(vdip));
7031 
7032         /* ndi_devi_config_one */
7033         if (child) {
7034                 ASSERT(DEVI_BUSY_OWNED(vdip));
7035                 i_mdi_pm_post_config_one(child);
7036                 return;
7037         }
7038 
7039         /* devi_config_common */
7040         ndi_devi_enter(vdip, &circ);
7041         cdip = ddi_get_child(vdip);
7042         while (cdip) {
7043                 dev_info_t *next = ddi_get_next_sibling(cdip);
7044 
7045                 i_mdi_pm_post_config_one(cdip);
7046                 cdip = next;
7047         }
7048         ndi_devi_exit(vdip, circ);
7049 }
7050 
7051 static void
7052 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7053 {
7054         mdi_client_t    *ct;
7055 
7056         ct = i_devi_get_client(child);
7057         if (ct == NULL)
7058                 return;
7059 
7060         MDI_CLIENT_LOCK(ct);
7061         while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7062                 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7063 
7064         if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7065                 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7066                 MDI_CLIENT_UNLOCK(ct);
7067                 return;
7068         }
7069 
7070         /* failure detaching or another thread just attached it */
7071         if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7072             i_ddi_devi_attached(child)) ||
7073             (!i_ddi_devi_attached(child) &&
7074             !DEVI_IS_ATTACHING(child))) {
7075                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7076                 i_mdi_pm_reset_client(ct);
7077         } else {
7078                 mdi_pathinfo_t  *pip, *next;
7079                 int     valid_path_count = 0;
7080 
7081                 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7082                 pip = ct->ct_path_head;
7083                 while (pip != NULL) {
7084                         MDI_PI_LOCK(pip);
7085                         next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7086                         if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7087                                 valid_path_count ++;
7088                         MDI_PI_UNLOCK(pip);
7089                         pip = next;
7090                 }
7091                 i_mdi_pm_rele_client(ct, valid_path_count);
7092                 ct->ct_powercnt_unconfig = 0;
7093         }
7094 
7095         MDI_CLIENT_UNLOCK(ct);
7096 }
7097 
7098 static void
7099 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7100 {
7101         int                     circ;
7102         dev_info_t              *cdip;
7103 
7104         ASSERT(MDI_VHCI(vdip));
7105 
7106         if (!held) {
7107                 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7108                 return;
7109         }
7110 
7111         if (child) {
7112                 ASSERT(DEVI_BUSY_OWNED(vdip));
7113                 i_mdi_pm_post_unconfig_one(child);
7114                 return;
7115         }
7116 
7117         ndi_devi_enter(vdip, &circ);
7118         cdip = ddi_get_child(vdip);
7119         while (cdip) {
7120                 dev_info_t *next = ddi_get_next_sibling(cdip);
7121 
7122                 i_mdi_pm_post_unconfig_one(cdip);
7123                 cdip = next;
7124         }
7125         ndi_devi_exit(vdip, circ);
7126 }
7127 
7128 int
7129 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7130 {
7131         int                     circ, ret = MDI_SUCCESS;
7132         dev_info_t              *client_dip = NULL;
7133         mdi_client_t            *ct;
7134 
7135         /*
7136          * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7137          * Power up pHCI for the named client device.
7138          * Note: Before the client is enumerated under vhci by phci,
7139          * client_dip can be NULL. Then proceed to power up all the
7140          * pHCIs.
7141          */
7142         if (devnm != NULL) {
7143                 ndi_devi_enter(vdip, &circ);
7144                 client_dip = ndi_devi_findchild(vdip, devnm);
7145         }
7146 
7147         MDI_DEBUG(4, (MDI_NOTE, vdip,
7148             "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7149 
7150         switch (op) {
7151         case MDI_PM_PRE_CONFIG:
7152                 ret = i_mdi_pm_pre_config(vdip, client_dip);
7153                 break;
7154 
7155         case MDI_PM_PRE_UNCONFIG:
7156                 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7157                     flags);
7158                 break;
7159 
7160         case MDI_PM_POST_CONFIG:
7161                 i_mdi_pm_post_config(vdip, client_dip);
7162                 break;
7163 
7164         case MDI_PM_POST_UNCONFIG:
7165                 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7166                 break;
7167 
7168         case MDI_PM_HOLD_POWER:
7169         case MDI_PM_RELE_POWER:
7170                 ASSERT(args);
7171 
7172                 client_dip = (dev_info_t *)args;
7173                 ASSERT(MDI_CLIENT(client_dip));
7174 
7175                 ct = i_devi_get_client(client_dip);
7176                 MDI_CLIENT_LOCK(ct);
7177 
7178                 if (op == MDI_PM_HOLD_POWER) {
7179                         if (ct->ct_power_cnt == 0) {
7180                                 (void) i_mdi_power_all_phci(ct);
7181                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7182                                     "i_mdi_pm_hold_client\n"));
7183                                 i_mdi_pm_hold_client(ct, ct->ct_path_count);
7184                         }
7185                 } else {
7186                         if (DEVI_IS_ATTACHING(client_dip)) {
7187                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7188                                     "i_mdi_pm_rele_client\n"));
7189                                 i_mdi_pm_rele_client(ct, ct->ct_path_count);
7190                         } else {
7191                                 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7192                                     "i_mdi_pm_reset_client\n"));
7193                                 i_mdi_pm_reset_client(ct);
7194                         }
7195                 }
7196 
7197                 MDI_CLIENT_UNLOCK(ct);
7198                 break;
7199 
7200         default:
7201                 break;
7202         }
7203 
7204         if (devnm)
7205                 ndi_devi_exit(vdip, circ);
7206 
7207         return (ret);
7208 }
7209 
7210 int
7211 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7212 {
7213         mdi_vhci_t *vhci;
7214 
7215         if (!MDI_VHCI(dip))
7216                 return (MDI_FAILURE);
7217 
7218         if (mdi_class) {
7219                 vhci = DEVI(dip)->devi_mdi_xhci;
7220                 ASSERT(vhci);
7221                 *mdi_class = vhci->vh_class;
7222         }
7223 
7224         return (MDI_SUCCESS);
7225 }
7226 
7227 int
7228 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7229 {
7230         mdi_phci_t *phci;
7231 
7232         if (!MDI_PHCI(dip))
7233                 return (MDI_FAILURE);
7234 
7235         if (mdi_class) {
7236                 phci = DEVI(dip)->devi_mdi_xhci;
7237                 ASSERT(phci);
7238                 *mdi_class = phci->ph_vhci->vh_class;
7239         }
7240 
7241         return (MDI_SUCCESS);
7242 }
7243 
7244 int
7245 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7246 {
7247         mdi_client_t *client;
7248 
7249         if (!MDI_CLIENT(dip))
7250                 return (MDI_FAILURE);
7251 
7252         if (mdi_class) {
7253                 client = DEVI(dip)->devi_mdi_client;
7254                 ASSERT(client);
7255                 *mdi_class = client->ct_vhci->vh_class;
7256         }
7257 
7258         return (MDI_SUCCESS);
7259 }
7260 
7261 void *
7262 mdi_client_get_vhci_private(dev_info_t *dip)
7263 {
7264         ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7265         if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7266                 mdi_client_t    *ct;
7267                 ct = i_devi_get_client(dip);
7268                 return (ct->ct_vprivate);
7269         }
7270         return (NULL);
7271 }
7272 
7273 void
7274 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7275 {
7276         ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7277         if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7278                 mdi_client_t    *ct;
7279                 ct = i_devi_get_client(dip);
7280                 ct->ct_vprivate = data;
7281         }
7282 }
7283 /*
7284  * mdi_pi_get_vhci_private():
7285  *              Get the vhci private information associated with the
7286  *              mdi_pathinfo node
7287  */
7288 void *
7289 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7290 {
7291         caddr_t vprivate = NULL;
7292         if (pip) {
7293                 vprivate = MDI_PI(pip)->pi_vprivate;
7294         }
7295         return (vprivate);
7296 }
7297 
7298 /*
7299  * mdi_pi_set_vhci_private():
7300  *              Set the vhci private information in the mdi_pathinfo node
7301  */
7302 void
7303 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7304 {
7305         if (pip) {
7306                 MDI_PI(pip)->pi_vprivate = priv;
7307         }
7308 }
7309 
7310 /*
7311  * mdi_phci_get_vhci_private():
7312  *              Get the vhci private information associated with the
7313  *              mdi_phci node
7314  */
7315 void *
7316 mdi_phci_get_vhci_private(dev_info_t *dip)
7317 {
7318         ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7319         if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7320                 mdi_phci_t      *ph;
7321                 ph = i_devi_get_phci(dip);
7322                 return (ph->ph_vprivate);
7323         }
7324         return (NULL);
7325 }
7326 
7327 /*
7328  * mdi_phci_set_vhci_private():
7329  *              Set the vhci private information in the mdi_phci node
7330  */
7331 void
7332 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7333 {
7334         ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7335         if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7336                 mdi_phci_t      *ph;
7337                 ph = i_devi_get_phci(dip);
7338                 ph->ph_vprivate = priv;
7339         }
7340 }
7341 
7342 int
7343 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7344 {
7345         return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7346 }
7347 
7348 int
7349 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7350 {
7351         return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7352 }
7353 
7354 /* Return 1 if all client paths are device_removed */
7355 static int
7356 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7357 {
7358         mdi_pathinfo_t  *pip;
7359         int             all_devices_removed = 1;
7360 
7361         MDI_CLIENT_LOCK(ct);
7362         for (pip = ct->ct_path_head; pip;
7363             pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7364                 if (!mdi_pi_device_isremoved(pip)) {
7365                         all_devices_removed = 0;
7366                         break;
7367                 }
7368         }
7369         MDI_CLIENT_UNLOCK(ct);
7370         return (all_devices_removed);
7371 }
7372 
7373 /*
7374  * When processing path hotunplug, represent device removal.
7375  */
7376 int
7377 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7378 {
7379         mdi_client_t    *ct;
7380 
7381         MDI_PI_LOCK(pip);
7382         if (mdi_pi_device_isremoved(pip)) {
7383                 MDI_PI_UNLOCK(pip);
7384                 return (0);
7385         }
7386         MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7387         MDI_PI_FLAGS_SET_HIDDEN(pip);
7388         MDI_PI_UNLOCK(pip);
7389 
7390         /*
7391          * If all paths associated with the client are now DEVICE_REMOVED,
7392          * reflect DEVICE_REMOVED in the client.
7393          */
7394         ct = MDI_PI(pip)->pi_client;
7395         if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7396                 (void) ndi_devi_device_remove(ct->ct_dip);
7397         else
7398                 i_ddi_di_cache_invalidate();
7399 
7400         return (1);
7401 }
7402 
7403 /*
7404  * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7405  * is now accessible then this interfaces is used to represent device insertion.
7406  */
7407 int
7408 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7409 {
7410         MDI_PI_LOCK(pip);
7411         if (!mdi_pi_device_isremoved(pip)) {
7412                 MDI_PI_UNLOCK(pip);
7413                 return (0);
7414         }
7415         MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7416         MDI_PI_FLAGS_CLR_HIDDEN(pip);
7417         MDI_PI_UNLOCK(pip);
7418 
7419         i_ddi_di_cache_invalidate();
7420 
7421         return (1);
7422 }
7423 
7424 /*
7425  * List of vhci class names:
7426  * A vhci class name must be in this list only if the corresponding vhci
7427  * driver intends to use the mdi provided bus config implementation
7428  * (i.e., mdi_vhci_bus_config()).
7429  */
7430 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7431 #define N_VHCI_CLASSES  (sizeof (vhci_class_list) / sizeof (char *))
7432 
7433 /*
7434  * During boot time, the on-disk vhci cache for every vhci class is read
7435  * in the form of an nvlist and stored here.
7436  */
7437 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7438 
7439 /* nvpair names in vhci cache nvlist */
7440 #define MDI_VHCI_CACHE_VERSION  1
7441 #define MDI_NVPNAME_VERSION     "version"
7442 #define MDI_NVPNAME_PHCIS       "phcis"
7443 #define MDI_NVPNAME_CTADDRMAP   "clientaddrmap"
7444 
7445 /*
7446  * Given vhci class name, return its on-disk vhci cache filename.
7447  * Memory for the returned filename which includes the full path is allocated
7448  * by this function.
7449  */
7450 static char *
7451 vhclass2vhcache_filename(char *vhclass)
7452 {
7453         char *filename;
7454         int len;
7455         static char *fmt = "/etc/devices/mdi_%s_cache";
7456 
7457         /*
7458          * fmt contains the on-disk vhci cache file name format;
7459          * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7460          */
7461 
7462         /* the -1 below is to account for "%s" in the format string */
7463         len = strlen(fmt) + strlen(vhclass) - 1;
7464         filename = kmem_alloc(len, KM_SLEEP);
7465         (void) snprintf(filename, len, fmt, vhclass);
7466         ASSERT(len == (strlen(filename) + 1));
7467         return (filename);
7468 }
7469 
7470 /*
7471  * initialize the vhci cache related data structures and read the on-disk
7472  * vhci cached data into memory.
7473  */
7474 static void
7475 setup_vhci_cache(mdi_vhci_t *vh)
7476 {
7477         mdi_vhci_config_t *vhc;
7478         mdi_vhci_cache_t *vhcache;
7479         int i;
7480         nvlist_t *nvl = NULL;
7481 
7482         vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7483         vh->vh_config = vhc;
7484         vhcache = &vhc->vhc_vhcache;
7485 
7486         vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7487 
7488         mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7489         cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7490 
7491         rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7492 
7493         /*
7494          * Create string hash; same as mod_hash_create_strhash() except that
7495          * we use NULL key destructor.
7496          */
7497         vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7498             mdi_bus_config_cache_hash_size,
7499             mod_hash_null_keydtor, mod_hash_null_valdtor,
7500             mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7501 
7502         /*
7503          * The on-disk vhci cache is read during booting prior to the
7504          * lights-out period by mdi_read_devices_files().
7505          */
7506         for (i = 0; i < N_VHCI_CLASSES; i++) {
7507                 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7508                         nvl = vhcache_nvl[i];
7509                         vhcache_nvl[i] = NULL;
7510                         break;
7511                 }
7512         }
7513 
7514         /*
7515          * this is to cover the case of some one manually causing unloading
7516          * (or detaching) and reloading (or attaching) of a vhci driver.
7517          */
7518         if (nvl == NULL && modrootloaded)
7519                 nvl = read_on_disk_vhci_cache(vh->vh_class);
7520 
7521         if (nvl != NULL) {
7522                 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7523                 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7524                         vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7525                 else  {
7526                         cmn_err(CE_WARN,
7527                             "%s: data file corrupted, will recreate",
7528                             vhc->vhc_vhcache_filename);
7529                 }
7530                 rw_exit(&vhcache->vhcache_lock);
7531                 nvlist_free(nvl);
7532         }
7533 
7534         vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7535             CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7536 
7537         vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7538         vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7539 }
7540 
7541 /*
7542  * free all vhci cache related resources
7543  */
7544 static int
7545 destroy_vhci_cache(mdi_vhci_t *vh)
7546 {
7547         mdi_vhci_config_t *vhc = vh->vh_config;
7548         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7549         mdi_vhcache_phci_t *cphci, *cphci_next;
7550         mdi_vhcache_client_t *cct, *cct_next;
7551         mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7552 
7553         if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7554                 return (MDI_FAILURE);
7555 
7556         kmem_free(vhc->vhc_vhcache_filename,
7557             strlen(vhc->vhc_vhcache_filename) + 1);
7558 
7559         mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7560 
7561         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7562             cphci = cphci_next) {
7563                 cphci_next = cphci->cphci_next;
7564                 free_vhcache_phci(cphci);
7565         }
7566 
7567         for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7568                 cct_next = cct->cct_next;
7569                 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7570                         cpi_next = cpi->cpi_next;
7571                         free_vhcache_pathinfo(cpi);
7572                 }
7573                 free_vhcache_client(cct);
7574         }
7575 
7576         rw_destroy(&vhcache->vhcache_lock);
7577 
7578         mutex_destroy(&vhc->vhc_lock);
7579         cv_destroy(&vhc->vhc_cv);
7580         kmem_free(vhc, sizeof (mdi_vhci_config_t));
7581         return (MDI_SUCCESS);
7582 }
7583 
7584 /*
7585  * Stop all vhci cache related async threads and free their resources.
7586  */
7587 static int
7588 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7589 {
7590         mdi_async_client_config_t *acc, *acc_next;
7591 
7592         mutex_enter(&vhc->vhc_lock);
7593         vhc->vhc_flags |= MDI_VHC_EXIT;
7594         ASSERT(vhc->vhc_acc_thrcount >= 0);
7595         cv_broadcast(&vhc->vhc_cv);
7596 
7597         while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7598             vhc->vhc_acc_thrcount != 0) {
7599                 mutex_exit(&vhc->vhc_lock);
7600                 delay_random(mdi_delay);
7601                 mutex_enter(&vhc->vhc_lock);
7602         }
7603 
7604         vhc->vhc_flags &= ~MDI_VHC_EXIT;
7605 
7606         for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7607                 acc_next = acc->acc_next;
7608                 free_async_client_config(acc);
7609         }
7610         vhc->vhc_acc_list_head = NULL;
7611         vhc->vhc_acc_list_tail = NULL;
7612         vhc->vhc_acc_count = 0;
7613 
7614         if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7615                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7616                 mutex_exit(&vhc->vhc_lock);
7617                 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7618                         vhcache_dirty(vhc);
7619                         return (MDI_FAILURE);
7620                 }
7621         } else
7622                 mutex_exit(&vhc->vhc_lock);
7623 
7624         if (callb_delete(vhc->vhc_cbid) != 0)
7625                 return (MDI_FAILURE);
7626 
7627         return (MDI_SUCCESS);
7628 }
7629 
7630 /*
7631  * Stop vhci cache flush thread
7632  */
7633 /* ARGSUSED */
7634 static boolean_t
7635 stop_vhcache_flush_thread(void *arg, int code)
7636 {
7637         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7638 
7639         mutex_enter(&vhc->vhc_lock);
7640         vhc->vhc_flags |= MDI_VHC_EXIT;
7641         cv_broadcast(&vhc->vhc_cv);
7642 
7643         while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7644                 mutex_exit(&vhc->vhc_lock);
7645                 delay_random(mdi_delay);
7646                 mutex_enter(&vhc->vhc_lock);
7647         }
7648 
7649         if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7650                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7651                 mutex_exit(&vhc->vhc_lock);
7652                 (void) flush_vhcache(vhc, 1);
7653         } else
7654                 mutex_exit(&vhc->vhc_lock);
7655 
7656         return (B_TRUE);
7657 }
7658 
7659 /*
7660  * Enqueue the vhcache phci (cphci) at the tail of the list
7661  */
7662 static void
7663 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7664 {
7665         cphci->cphci_next = NULL;
7666         if (vhcache->vhcache_phci_head == NULL)
7667                 vhcache->vhcache_phci_head = cphci;
7668         else
7669                 vhcache->vhcache_phci_tail->cphci_next = cphci;
7670         vhcache->vhcache_phci_tail = cphci;
7671 }
7672 
7673 /*
7674  * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7675  */
7676 static void
7677 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7678     mdi_vhcache_pathinfo_t *cpi)
7679 {
7680         cpi->cpi_next = NULL;
7681         if (cct->cct_cpi_head == NULL)
7682                 cct->cct_cpi_head = cpi;
7683         else
7684                 cct->cct_cpi_tail->cpi_next = cpi;
7685         cct->cct_cpi_tail = cpi;
7686 }
7687 
7688 /*
7689  * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7690  * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7691  * flag set come at the beginning of the list. All cpis which have this
7692  * flag set come at the end of the list.
7693  */
7694 static void
7695 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7696     mdi_vhcache_pathinfo_t *newcpi)
7697 {
7698         mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7699 
7700         if (cct->cct_cpi_head == NULL ||
7701             (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7702                 enqueue_tail_vhcache_pathinfo(cct, newcpi);
7703         else {
7704                 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7705                     !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7706                     prev_cpi = cpi, cpi = cpi->cpi_next)
7707                         ;
7708 
7709                 if (prev_cpi == NULL)
7710                         cct->cct_cpi_head = newcpi;
7711                 else
7712                         prev_cpi->cpi_next = newcpi;
7713 
7714                 newcpi->cpi_next = cpi;
7715 
7716                 if (cpi == NULL)
7717                         cct->cct_cpi_tail = newcpi;
7718         }
7719 }
7720 
7721 /*
7722  * Enqueue the vhcache client (cct) at the tail of the list
7723  */
7724 static void
7725 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7726     mdi_vhcache_client_t *cct)
7727 {
7728         cct->cct_next = NULL;
7729         if (vhcache->vhcache_client_head == NULL)
7730                 vhcache->vhcache_client_head = cct;
7731         else
7732                 vhcache->vhcache_client_tail->cct_next = cct;
7733         vhcache->vhcache_client_tail = cct;
7734 }
7735 
7736 static void
7737 free_string_array(char **str, int nelem)
7738 {
7739         int i;
7740 
7741         if (str) {
7742                 for (i = 0; i < nelem; i++) {
7743                         if (str[i])
7744                                 kmem_free(str[i], strlen(str[i]) + 1);
7745                 }
7746                 kmem_free(str, sizeof (char *) * nelem);
7747         }
7748 }
7749 
7750 static void
7751 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7752 {
7753         kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7754         kmem_free(cphci, sizeof (*cphci));
7755 }
7756 
7757 static void
7758 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7759 {
7760         kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7761         kmem_free(cpi, sizeof (*cpi));
7762 }
7763 
7764 static void
7765 free_vhcache_client(mdi_vhcache_client_t *cct)
7766 {
7767         kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7768         kmem_free(cct, sizeof (*cct));
7769 }
7770 
7771 static char *
7772 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7773 {
7774         char *name_addr;
7775         int len;
7776 
7777         len = strlen(ct_name) + strlen(ct_addr) + 2;
7778         name_addr = kmem_alloc(len, KM_SLEEP);
7779         (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7780 
7781         if (ret_len)
7782                 *ret_len = len;
7783         return (name_addr);
7784 }
7785 
7786 /*
7787  * Copy the contents of paddrnvl to vhci cache.
7788  * paddrnvl nvlist contains path information for a vhci client.
7789  * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7790  */
7791 static void
7792 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7793     mdi_vhcache_client_t *cct)
7794 {
7795         nvpair_t *nvp = NULL;
7796         mdi_vhcache_pathinfo_t *cpi;
7797         uint_t nelem;
7798         uint32_t *val;
7799 
7800         while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7801                 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7802                 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7803                 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7804                 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
7805                 ASSERT(nelem == 2);
7806                 cpi->cpi_cphci = cphci_list[val[0]];
7807                 cpi->cpi_flags = val[1];
7808                 enqueue_tail_vhcache_pathinfo(cct, cpi);
7809         }
7810 }
7811 
7812 /*
7813  * Copy the contents of caddrmapnvl to vhci cache.
7814  * caddrmapnvl nvlist contains vhci client address to phci client address
7815  * mappings. See the comment in mainnvl_to_vhcache() for the format of
7816  * this nvlist.
7817  */
7818 static void
7819 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7820     mdi_vhcache_phci_t *cphci_list[])
7821 {
7822         nvpair_t *nvp = NULL;
7823         nvlist_t *paddrnvl;
7824         mdi_vhcache_client_t *cct;
7825 
7826         while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7827                 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7828                 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7829                 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7830                 (void) nvpair_value_nvlist(nvp, &paddrnvl);
7831                 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7832                 /* the client must contain at least one path */
7833                 ASSERT(cct->cct_cpi_head != NULL);
7834 
7835                 enqueue_vhcache_client(vhcache, cct);
7836                 (void) mod_hash_insert(vhcache->vhcache_client_hash,
7837                     (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7838         }
7839 }
7840 
7841 /*
7842  * Copy the contents of the main nvlist to vhci cache.
7843  *
7844  * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7845  * The nvlist contains the mappings between the vhci client addresses and
7846  * their corresponding phci client addresses.
7847  *
7848  * The structure of the nvlist is as follows:
7849  *
7850  * Main nvlist:
7851  *      NAME            TYPE            DATA
7852  *      version         int32           version number
7853  *      phcis           string array    array of phci paths
7854  *      clientaddrmap   nvlist_t        c2paddrs_nvl (see below)
7855  *
7856  * structure of c2paddrs_nvl:
7857  *      NAME            TYPE            DATA
7858  *      caddr1          nvlist_t        paddrs_nvl1
7859  *      caddr2          nvlist_t        paddrs_nvl2
7860  *      ...
7861  * where caddr1, caddr2, ... are vhci client name and addresses in the
7862  * form of "<clientname>@<clientaddress>".
7863  * (for example: "ssd@2000002037cd9f72");
7864  * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7865  *
7866  * structure of paddrs_nvl:
7867  *      NAME            TYPE            DATA
7868  *      pi_addr1        uint32_array    (phci-id, cpi_flags)
7869  *      pi_addr2        uint32_array    (phci-id, cpi_flags)
7870  *      ...
7871  * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7872  * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7873  * phci-ids are integers that identify pHCIs to which the
7874  * the bus specific address belongs to. These integers are used as an index
7875  * into to the phcis string array in the main nvlist to get the pHCI path.
7876  */
7877 static int
7878 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7879 {
7880         char **phcis, **phci_namep;
7881         uint_t nphcis;
7882         mdi_vhcache_phci_t *cphci, **cphci_list;
7883         nvlist_t *caddrmapnvl;
7884         int32_t ver;
7885         int i;
7886         size_t cphci_list_size;
7887 
7888         ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7889 
7890         if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7891             ver != MDI_VHCI_CACHE_VERSION)
7892                 return (MDI_FAILURE);
7893 
7894         if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7895             &nphcis) != 0)
7896                 return (MDI_SUCCESS);
7897 
7898         ASSERT(nphcis > 0);
7899 
7900         cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7901         cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7902         for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7903                 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7904                 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7905                 enqueue_vhcache_phci(vhcache, cphci);
7906                 cphci_list[i] = cphci;
7907         }
7908 
7909         ASSERT(vhcache->vhcache_phci_head != NULL);
7910 
7911         if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7912                 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7913 
7914         kmem_free(cphci_list, cphci_list_size);
7915         return (MDI_SUCCESS);
7916 }
7917 
7918 /*
7919  * Build paddrnvl for the specified client using the information in the
7920  * vhci cache and add it to the caddrmapnnvl.
7921  * Returns 0 on success, errno on failure.
7922  */
7923 static int
7924 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7925     nvlist_t *caddrmapnvl)
7926 {
7927         mdi_vhcache_pathinfo_t *cpi;
7928         nvlist_t *nvl;
7929         int err;
7930         uint32_t val[2];
7931 
7932         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7933 
7934         if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7935                 return (err);
7936 
7937         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7938                 val[0] = cpi->cpi_cphci->cphci_id;
7939                 val[1] = cpi->cpi_flags;
7940                 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7941                     != 0)
7942                         goto out;
7943         }
7944 
7945         err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7946 out:
7947         nvlist_free(nvl);
7948         return (err);
7949 }
7950 
7951 /*
7952  * Build caddrmapnvl using the information in the vhci cache
7953  * and add it to the mainnvl.
7954  * Returns 0 on success, errno on failure.
7955  */
7956 static int
7957 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7958 {
7959         mdi_vhcache_client_t *cct;
7960         nvlist_t *nvl;
7961         int err;
7962 
7963         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7964 
7965         if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7966                 return (err);
7967 
7968         for (cct = vhcache->vhcache_client_head; cct != NULL;
7969             cct = cct->cct_next) {
7970                 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7971                         goto out;
7972         }
7973 
7974         err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7975 out:
7976         nvlist_free(nvl);
7977         return (err);
7978 }
7979 
7980 /*
7981  * Build nvlist using the information in the vhci cache.
7982  * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7983  * Returns nvl on success, NULL on failure.
7984  */
7985 static nvlist_t *
7986 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7987 {
7988         mdi_vhcache_phci_t *cphci;
7989         uint_t phci_count;
7990         char **phcis;
7991         nvlist_t *nvl;
7992         int err, i;
7993 
7994         if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7995                 nvl = NULL;
7996                 goto out;
7997         }
7998 
7999         if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
8000             MDI_VHCI_CACHE_VERSION)) != 0)
8001                 goto out;
8002 
8003         rw_enter(&vhcache->vhcache_lock, RW_READER);
8004         if (vhcache->vhcache_phci_head == NULL) {
8005                 rw_exit(&vhcache->vhcache_lock);
8006                 return (nvl);
8007         }
8008 
8009         phci_count = 0;
8010         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8011             cphci = cphci->cphci_next)
8012                 cphci->cphci_id = phci_count++;
8013 
8014         /* build phci pathname list */
8015         phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8016         for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8017             cphci = cphci->cphci_next, i++)
8018                 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8019 
8020         err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8021             phci_count);
8022         free_string_array(phcis, phci_count);
8023 
8024         if (err == 0 &&
8025             (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8026                 rw_exit(&vhcache->vhcache_lock);
8027                 return (nvl);
8028         }
8029 
8030         rw_exit(&vhcache->vhcache_lock);
8031 out:
8032         nvlist_free(nvl);
8033         return (NULL);
8034 }
8035 
8036 /*
8037  * Lookup vhcache phci structure for the specified phci path.
8038  */
8039 static mdi_vhcache_phci_t *
8040 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8041 {
8042         mdi_vhcache_phci_t *cphci;
8043 
8044         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8045 
8046         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8047             cphci = cphci->cphci_next) {
8048                 if (strcmp(cphci->cphci_path, phci_path) == 0)
8049                         return (cphci);
8050         }
8051 
8052         return (NULL);
8053 }
8054 
8055 /*
8056  * Lookup vhcache phci structure for the specified phci.
8057  */
8058 static mdi_vhcache_phci_t *
8059 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8060 {
8061         mdi_vhcache_phci_t *cphci;
8062 
8063         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8064 
8065         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8066             cphci = cphci->cphci_next) {
8067                 if (cphci->cphci_phci == ph)
8068                         return (cphci);
8069         }
8070 
8071         return (NULL);
8072 }
8073 
8074 /*
8075  * Add the specified phci to the vhci cache if not already present.
8076  */
8077 static void
8078 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8079 {
8080         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8081         mdi_vhcache_phci_t *cphci;
8082         char *pathname;
8083         int cache_updated;
8084 
8085         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8086 
8087         pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8088         (void) ddi_pathname(ph->ph_dip, pathname);
8089         if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8090             != NULL) {
8091                 cphci->cphci_phci = ph;
8092                 cache_updated = 0;
8093         } else {
8094                 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8095                 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8096                 cphci->cphci_phci = ph;
8097                 enqueue_vhcache_phci(vhcache, cphci);
8098                 cache_updated = 1;
8099         }
8100 
8101         rw_exit(&vhcache->vhcache_lock);
8102 
8103         /*
8104          * Since a new phci has been added, reset
8105          * vhc_path_discovery_cutoff_time to allow for discovery of paths
8106          * during next vhcache_discover_paths().
8107          */
8108         mutex_enter(&vhc->vhc_lock);
8109         vhc->vhc_path_discovery_cutoff_time = 0;
8110         mutex_exit(&vhc->vhc_lock);
8111 
8112         kmem_free(pathname, MAXPATHLEN);
8113         if (cache_updated)
8114                 vhcache_dirty(vhc);
8115 }
8116 
8117 /*
8118  * Remove the reference to the specified phci from the vhci cache.
8119  */
8120 static void
8121 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8122 {
8123         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8124         mdi_vhcache_phci_t *cphci;
8125 
8126         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8127         if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8128                 /* do not remove the actual mdi_vhcache_phci structure */
8129                 cphci->cphci_phci = NULL;
8130         }
8131         rw_exit(&vhcache->vhcache_lock);
8132 }
8133 
8134 static void
8135 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8136     mdi_vhcache_lookup_token_t *src)
8137 {
8138         if (src == NULL) {
8139                 dst->lt_cct = NULL;
8140                 dst->lt_cct_lookup_time = 0;
8141         } else {
8142                 dst->lt_cct = src->lt_cct;
8143                 dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8144         }
8145 }
8146 
8147 /*
8148  * Look up vhcache client for the specified client.
8149  */
8150 static mdi_vhcache_client_t *
8151 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8152     mdi_vhcache_lookup_token_t *token)
8153 {
8154         mod_hash_val_t hv;
8155         char *name_addr;
8156         int len;
8157 
8158         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8159 
8160         /*
8161          * If no vhcache clean occurred since the last lookup, we can
8162          * simply return the cct from the last lookup operation.
8163          * It works because ccts are never freed except during the vhcache
8164          * cleanup operation.
8165          */
8166         if (token != NULL &&
8167             vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8168                 return (token->lt_cct);
8169 
8170         name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8171         if (mod_hash_find(vhcache->vhcache_client_hash,
8172             (mod_hash_key_t)name_addr, &hv) == 0) {
8173                 if (token) {
8174                         token->lt_cct = (mdi_vhcache_client_t *)hv;
8175                         token->lt_cct_lookup_time = ddi_get_lbolt64();
8176                 }
8177         } else {
8178                 if (token) {
8179                         token->lt_cct = NULL;
8180                         token->lt_cct_lookup_time = 0;
8181                 }
8182                 hv = NULL;
8183         }
8184         kmem_free(name_addr, len);
8185         return ((mdi_vhcache_client_t *)hv);
8186 }
8187 
8188 /*
8189  * Add the specified path to the vhci cache if not already present.
8190  * Also add the vhcache client for the client corresponding to this path
8191  * if it doesn't already exist.
8192  */
8193 static void
8194 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8195 {
8196         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8197         mdi_vhcache_client_t *cct;
8198         mdi_vhcache_pathinfo_t *cpi;
8199         mdi_phci_t *ph = pip->pi_phci;
8200         mdi_client_t *ct = pip->pi_client;
8201         int cache_updated = 0;
8202 
8203         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8204 
8205         /* if vhcache client for this pip doesn't already exist, add it */
8206         if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8207             NULL)) == NULL) {
8208                 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8209                 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8210                     ct->ct_guid, NULL);
8211                 enqueue_vhcache_client(vhcache, cct);
8212                 (void) mod_hash_insert(vhcache->vhcache_client_hash,
8213                     (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8214                 cache_updated = 1;
8215         }
8216 
8217         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8218                 if (cpi->cpi_cphci->cphci_phci == ph &&
8219                     strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8220                         cpi->cpi_pip = pip;
8221                         if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8222                                 cpi->cpi_flags &=
8223                                     ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8224                                 sort_vhcache_paths(cct);
8225                                 cache_updated = 1;
8226                         }
8227                         break;
8228                 }
8229         }
8230 
8231         if (cpi == NULL) {
8232                 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8233                 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8234                 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8235                 ASSERT(cpi->cpi_cphci != NULL);
8236                 cpi->cpi_pip = pip;
8237                 enqueue_vhcache_pathinfo(cct, cpi);
8238                 cache_updated = 1;
8239         }
8240 
8241         rw_exit(&vhcache->vhcache_lock);
8242 
8243         if (cache_updated)
8244                 vhcache_dirty(vhc);
8245 }
8246 
8247 /*
8248  * Remove the reference to the specified path from the vhci cache.
8249  */
8250 static void
8251 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8252 {
8253         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8254         mdi_client_t *ct = pip->pi_client;
8255         mdi_vhcache_client_t *cct;
8256         mdi_vhcache_pathinfo_t *cpi;
8257 
8258         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8259         if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8260             NULL)) != NULL) {
8261                 for (cpi = cct->cct_cpi_head; cpi != NULL;
8262                     cpi = cpi->cpi_next) {
8263                         if (cpi->cpi_pip == pip) {
8264                                 cpi->cpi_pip = NULL;
8265                                 break;
8266                         }
8267                 }
8268         }
8269         rw_exit(&vhcache->vhcache_lock);
8270 }
8271 
8272 /*
8273  * Flush the vhci cache to disk.
8274  * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8275  */
8276 static int
8277 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8278 {
8279         nvlist_t *nvl;
8280         int err;
8281         int rv;
8282 
8283         /*
8284          * It is possible that the system may shutdown before
8285          * i_ddi_io_initialized (during stmsboot for example). To allow for
8286          * flushing the cache in this case do not check for
8287          * i_ddi_io_initialized when force flag is set.
8288          */
8289         if (force_flag == 0 && !i_ddi_io_initialized())
8290                 return (MDI_FAILURE);
8291 
8292         if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8293                 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8294                 nvlist_free(nvl);
8295         } else
8296                 err = EFAULT;
8297 
8298         rv = MDI_SUCCESS;
8299         mutex_enter(&vhc->vhc_lock);
8300         if (err != 0) {
8301                 if (err == EROFS) {
8302                         vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8303                         vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8304                             MDI_VHC_VHCACHE_DIRTY);
8305                 } else {
8306                         if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8307                                 cmn_err(CE_CONT, "%s: update failed\n",
8308                                     vhc->vhc_vhcache_filename);
8309                                 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8310                         }
8311                         rv = MDI_FAILURE;
8312                 }
8313         } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8314                 cmn_err(CE_CONT,
8315                     "%s: update now ok\n", vhc->vhc_vhcache_filename);
8316                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8317         }
8318         mutex_exit(&vhc->vhc_lock);
8319 
8320         return (rv);
8321 }
8322 
8323 /*
8324  * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8325  * Exits itself if left idle for the idle timeout period.
8326  */
8327 static void
8328 vhcache_flush_thread(void *arg)
8329 {
8330         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8331         clock_t idle_time, quit_at_ticks;
8332         callb_cpr_t cprinfo;
8333 
8334         /* number of seconds to sleep idle before exiting */
8335         idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8336 
8337         CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8338             "mdi_vhcache_flush");
8339         mutex_enter(&vhc->vhc_lock);
8340         for (; ; ) {
8341                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8342                     (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8343                         if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8344                                 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8345                                 (void) cv_timedwait(&vhc->vhc_cv,
8346                                     &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8347                                 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8348                         } else {
8349                                 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8350                                 mutex_exit(&vhc->vhc_lock);
8351 
8352                                 if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8353                                         vhcache_dirty(vhc);
8354 
8355                                 mutex_enter(&vhc->vhc_lock);
8356                         }
8357                 }
8358 
8359                 quit_at_ticks = ddi_get_lbolt() + idle_time;
8360 
8361                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8362                     !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8363                     ddi_get_lbolt() < quit_at_ticks) {
8364                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
8365                         (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8366                             quit_at_ticks);
8367                         CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8368                 }
8369 
8370                 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8371                     !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8372                         goto out;
8373         }
8374 
8375 out:
8376         vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8377         /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8378         CALLB_CPR_EXIT(&cprinfo);
8379 }
8380 
8381 /*
8382  * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8383  */
8384 static void
8385 vhcache_dirty(mdi_vhci_config_t *vhc)
8386 {
8387         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8388         int create_thread;
8389 
8390         rw_enter(&vhcache->vhcache_lock, RW_READER);
8391         /* do not flush cache until the cache is fully built */
8392         if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8393                 rw_exit(&vhcache->vhcache_lock);
8394                 return;
8395         }
8396         rw_exit(&vhcache->vhcache_lock);
8397 
8398         mutex_enter(&vhc->vhc_lock);
8399         if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8400                 mutex_exit(&vhc->vhc_lock);
8401                 return;
8402         }
8403 
8404         vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8405         vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8406             mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8407         if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8408                 cv_broadcast(&vhc->vhc_cv);
8409                 create_thread = 0;
8410         } else {
8411                 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8412                 create_thread = 1;
8413         }
8414         mutex_exit(&vhc->vhc_lock);
8415 
8416         if (create_thread)
8417                 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8418                     0, &p0, TS_RUN, minclsyspri);
8419 }
8420 
8421 /*
8422  * phci bus config structure - one for for each phci bus config operation that
8423  * we initiate on behalf of a vhci.
8424  */
8425 typedef struct mdi_phci_bus_config_s {
8426         char *phbc_phci_path;
8427         struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */
8428         struct mdi_phci_bus_config_s *phbc_next;
8429 } mdi_phci_bus_config_t;
8430 
8431 /* vhci bus config structure - one for each vhci bus config operation */
8432 typedef struct mdi_vhci_bus_config_s {
8433         ddi_bus_config_op_t vhbc_op;    /* bus config op */
8434         major_t vhbc_op_major;          /* bus config op major */
8435         uint_t vhbc_op_flags;           /* bus config op flags */
8436         kmutex_t vhbc_lock;
8437         kcondvar_t vhbc_cv;
8438         int vhbc_thr_count;
8439 } mdi_vhci_bus_config_t;
8440 
8441 /*
8442  * bus config the specified phci
8443  */
8444 static void
8445 bus_config_phci(void *arg)
8446 {
8447         mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8448         mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8449         dev_info_t *ph_dip;
8450 
8451         /*
8452          * first configure all path components upto phci and then configure
8453          * the phci children.
8454          */
8455         if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8456             != NULL) {
8457                 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8458                     vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8459                         (void) ndi_devi_config_driver(ph_dip,
8460                             vhbc->vhbc_op_flags,
8461                             vhbc->vhbc_op_major);
8462                 } else
8463                         (void) ndi_devi_config(ph_dip,
8464                             vhbc->vhbc_op_flags);
8465 
8466                 /* release the hold that e_ddi_hold_devi_by_path() placed */
8467                 ndi_rele_devi(ph_dip);
8468         }
8469 
8470         kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8471         kmem_free(phbc, sizeof (*phbc));
8472 
8473         mutex_enter(&vhbc->vhbc_lock);
8474         vhbc->vhbc_thr_count--;
8475         if (vhbc->vhbc_thr_count == 0)
8476                 cv_broadcast(&vhbc->vhbc_cv);
8477         mutex_exit(&vhbc->vhbc_lock);
8478 }
8479 
8480 /*
8481  * Bus config all phcis associated with the vhci in parallel.
8482  * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8483  */
8484 static void
8485 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8486     ddi_bus_config_op_t op, major_t maj)
8487 {
8488         mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8489         mdi_vhci_bus_config_t *vhbc;
8490         mdi_vhcache_phci_t *cphci;
8491 
8492         rw_enter(&vhcache->vhcache_lock, RW_READER);
8493         if (vhcache->vhcache_phci_head == NULL) {
8494                 rw_exit(&vhcache->vhcache_lock);
8495                 return;
8496         }
8497 
8498         vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8499 
8500         for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8501             cphci = cphci->cphci_next) {
8502                 /* skip phcis that haven't attached before root is available */
8503                 if (!modrootloaded && (cphci->cphci_phci == NULL))
8504                         continue;
8505                 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8506                 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8507                     KM_SLEEP);
8508                 phbc->phbc_vhbusconfig = vhbc;
8509                 phbc->phbc_next = phbc_head;
8510                 phbc_head = phbc;
8511                 vhbc->vhbc_thr_count++;
8512         }
8513         rw_exit(&vhcache->vhcache_lock);
8514 
8515         vhbc->vhbc_op = op;
8516         vhbc->vhbc_op_major = maj;
8517         vhbc->vhbc_op_flags = NDI_NO_EVENT |
8518             (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8519         mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8520         cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8521 
8522         /* now create threads to initiate bus config on all phcis in parallel */
8523         for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8524                 phbc_next = phbc->phbc_next;
8525                 if (mdi_mtc_off)
8526                         bus_config_phci((void *)phbc);
8527                 else
8528                         (void) thread_create(NULL, 0, bus_config_phci, phbc,
8529                             0, &p0, TS_RUN, minclsyspri);
8530         }
8531 
8532         mutex_enter(&vhbc->vhbc_lock);
8533         /* wait until all threads exit */
8534         while (vhbc->vhbc_thr_count > 0)
8535                 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8536         mutex_exit(&vhbc->vhbc_lock);
8537 
8538         mutex_destroy(&vhbc->vhbc_lock);
8539         cv_destroy(&vhbc->vhbc_cv);
8540         kmem_free(vhbc, sizeof (*vhbc));
8541 }
8542 
8543 /*
8544  * Single threaded version of bus_config_all_phcis()
8545  */
8546 static void
8547 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8548     ddi_bus_config_op_t op, major_t maj)
8549 {
8550         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8551 
8552         single_threaded_vhconfig_enter(vhc);
8553         bus_config_all_phcis(vhcache, flags, op, maj);
8554         single_threaded_vhconfig_exit(vhc);
8555 }
8556 
8557 /*
8558  * Perform BUS_CONFIG_ONE on the specified child of the phci.
8559  * The path includes the child component in addition to the phci path.
8560  */
8561 static int
8562 bus_config_one_phci_child(char *path)
8563 {
8564         dev_info_t *ph_dip, *child;
8565         char *devnm;
8566         int rv = MDI_FAILURE;
8567 
8568         /* extract the child component of the phci */
8569         devnm = strrchr(path, '/');
8570         *devnm++ = '\0';
8571 
8572         /*
8573          * first configure all path components upto phci and then
8574          * configure the phci child.
8575          */
8576         if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8577                 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8578                     NDI_SUCCESS) {
8579                         /*
8580                          * release the hold that ndi_devi_config_one() placed
8581                          */
8582                         ndi_rele_devi(child);
8583                         rv = MDI_SUCCESS;
8584                 }
8585 
8586                 /* release the hold that e_ddi_hold_devi_by_path() placed */
8587                 ndi_rele_devi(ph_dip);
8588         }
8589 
8590         devnm--;
8591         *devnm = '/';
8592         return (rv);
8593 }
8594 
8595 /*
8596  * Build a list of phci client paths for the specified vhci client.
8597  * The list includes only those phci client paths which aren't configured yet.
8598  */
8599 static mdi_phys_path_t *
8600 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8601 {
8602         mdi_vhcache_pathinfo_t *cpi;
8603         mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8604         int config_path, len;
8605 
8606         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8607                 /*
8608                  * include only those paths that aren't configured.
8609                  */
8610                 config_path = 0;
8611                 if (cpi->cpi_pip == NULL)
8612                         config_path = 1;
8613                 else {
8614                         MDI_PI_LOCK(cpi->cpi_pip);
8615                         if (MDI_PI_IS_INIT(cpi->cpi_pip))
8616                                 config_path = 1;
8617                         MDI_PI_UNLOCK(cpi->cpi_pip);
8618                 }
8619 
8620                 if (config_path) {
8621                         pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8622                         len = strlen(cpi->cpi_cphci->cphci_path) +
8623                             strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8624                         pp->phys_path = kmem_alloc(len, KM_SLEEP);
8625                         (void) snprintf(pp->phys_path, len, "%s/%s@%s",
8626                             cpi->cpi_cphci->cphci_path, ct_name,
8627                             cpi->cpi_addr);
8628                         pp->phys_path_next = NULL;
8629 
8630                         if (pp_head == NULL)
8631                                 pp_head = pp;
8632                         else
8633                                 pp_tail->phys_path_next = pp;
8634                         pp_tail = pp;
8635                 }
8636         }
8637 
8638         return (pp_head);
8639 }
8640 
8641 /*
8642  * Free the memory allocated for phci client path list.
8643  */
8644 static void
8645 free_phclient_path_list(mdi_phys_path_t *pp_head)
8646 {
8647         mdi_phys_path_t *pp, *pp_next;
8648 
8649         for (pp = pp_head; pp != NULL; pp = pp_next) {
8650                 pp_next = pp->phys_path_next;
8651                 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8652                 kmem_free(pp, sizeof (*pp));
8653         }
8654 }
8655 
8656 /*
8657  * Allocated async client structure and initialize with the specified values.
8658  */
8659 static mdi_async_client_config_t *
8660 alloc_async_client_config(char *ct_name, char *ct_addr,
8661     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8662 {
8663         mdi_async_client_config_t *acc;
8664 
8665         acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8666         acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8667         acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8668         acc->acc_phclient_path_list_head = pp_head;
8669         init_vhcache_lookup_token(&acc->acc_token, tok);
8670         acc->acc_next = NULL;
8671         return (acc);
8672 }
8673 
8674 /*
8675  * Free the memory allocated for the async client structure and their members.
8676  */
8677 static void
8678 free_async_client_config(mdi_async_client_config_t *acc)
8679 {
8680         if (acc->acc_phclient_path_list_head)
8681                 free_phclient_path_list(acc->acc_phclient_path_list_head);
8682         kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8683         kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8684         kmem_free(acc, sizeof (*acc));
8685 }
8686 
8687 /*
8688  * Sort vhcache pathinfos (cpis) of the specified client.
8689  * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8690  * flag set come at the beginning of the list. All cpis which have this
8691  * flag set come at the end of the list.
8692  */
8693 static void
8694 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8695 {
8696         mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8697 
8698         cpi_head = cct->cct_cpi_head;
8699         cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8700         for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8701                 cpi_next = cpi->cpi_next;
8702                 enqueue_vhcache_pathinfo(cct, cpi);
8703         }
8704 }
8705 
8706 /*
8707  * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8708  * every vhcache pathinfo of the specified client. If not adjust the flag
8709  * setting appropriately.
8710  *
8711  * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8712  * on-disk vhci cache. So every time this flag is updated the cache must be
8713  * flushed.
8714  */
8715 static void
8716 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8717     mdi_vhcache_lookup_token_t *tok)
8718 {
8719         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8720         mdi_vhcache_client_t *cct;
8721         mdi_vhcache_pathinfo_t *cpi;
8722 
8723         rw_enter(&vhcache->vhcache_lock, RW_READER);
8724         if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8725             == NULL) {
8726                 rw_exit(&vhcache->vhcache_lock);
8727                 return;
8728         }
8729 
8730         /*
8731          * to avoid unnecessary on-disk cache updates, first check if an
8732          * update is really needed. If no update is needed simply return.
8733          */
8734         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8735                 if ((cpi->cpi_pip != NULL &&
8736                     (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8737                     (cpi->cpi_pip == NULL &&
8738                     !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8739                         break;
8740                 }
8741         }
8742         if (cpi == NULL) {
8743                 rw_exit(&vhcache->vhcache_lock);
8744                 return;
8745         }
8746 
8747         if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8748                 rw_exit(&vhcache->vhcache_lock);
8749                 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8750                 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8751                     tok)) == NULL) {
8752                         rw_exit(&vhcache->vhcache_lock);
8753                         return;
8754                 }
8755         }
8756 
8757         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8758                 if (cpi->cpi_pip != NULL)
8759                         cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8760                 else
8761                         cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8762         }
8763         sort_vhcache_paths(cct);
8764 
8765         rw_exit(&vhcache->vhcache_lock);
8766         vhcache_dirty(vhc);
8767 }
8768 
8769 /*
8770  * Configure all specified paths of the client.
8771  */
8772 static void
8773 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8774     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8775 {
8776         mdi_phys_path_t *pp;
8777 
8778         for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8779                 (void) bus_config_one_phci_child(pp->phys_path);
8780         adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8781 }
8782 
8783 /*
8784  * Dequeue elements from vhci async client config list and bus configure
8785  * their corresponding phci clients.
8786  */
8787 static void
8788 config_client_paths_thread(void *arg)
8789 {
8790         mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8791         mdi_async_client_config_t *acc;
8792         clock_t quit_at_ticks;
8793         clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8794         callb_cpr_t cprinfo;
8795 
8796         CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8797             "mdi_config_client_paths");
8798 
8799         for (; ; ) {
8800                 quit_at_ticks = ddi_get_lbolt() + idle_time;
8801 
8802                 mutex_enter(&vhc->vhc_lock);
8803                 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8804                     vhc->vhc_acc_list_head == NULL &&
8805                     ddi_get_lbolt() < quit_at_ticks) {
8806                         CALLB_CPR_SAFE_BEGIN(&cprinfo);
8807                         (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8808                             quit_at_ticks);
8809                         CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8810                 }
8811 
8812                 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8813                     vhc->vhc_acc_list_head == NULL)
8814                         goto out;
8815 
8816                 acc = vhc->vhc_acc_list_head;
8817                 vhc->vhc_acc_list_head = acc->acc_next;
8818                 if (vhc->vhc_acc_list_head == NULL)
8819                         vhc->vhc_acc_list_tail = NULL;
8820                 vhc->vhc_acc_count--;
8821                 mutex_exit(&vhc->vhc_lock);
8822 
8823                 config_client_paths_sync(vhc, acc->acc_ct_name,
8824                     acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8825                     &acc->acc_token);
8826 
8827                 free_async_client_config(acc);
8828         }
8829 
8830 out:
8831         vhc->vhc_acc_thrcount--;
8832         /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8833         CALLB_CPR_EXIT(&cprinfo);
8834 }
8835 
8836 /*
8837  * Arrange for all the phci client paths (pp_head) for the specified client
8838  * to be bus configured asynchronously by a thread.
8839  */
8840 static void
8841 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8842     mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8843 {
8844         mdi_async_client_config_t *acc, *newacc;
8845         int create_thread;
8846 
8847         if (pp_head == NULL)
8848                 return;
8849 
8850         if (mdi_mtc_off) {
8851                 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8852                 free_phclient_path_list(pp_head);
8853                 return;
8854         }
8855 
8856         newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8857         ASSERT(newacc);
8858 
8859         mutex_enter(&vhc->vhc_lock);
8860         for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8861                 if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8862                     strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8863                         free_async_client_config(newacc);
8864                         mutex_exit(&vhc->vhc_lock);
8865                         return;
8866                 }
8867         }
8868 
8869         if (vhc->vhc_acc_list_head == NULL)
8870                 vhc->vhc_acc_list_head = newacc;
8871         else
8872                 vhc->vhc_acc_list_tail->acc_next = newacc;
8873         vhc->vhc_acc_list_tail = newacc;
8874         vhc->vhc_acc_count++;
8875         if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8876                 cv_broadcast(&vhc->vhc_cv);
8877                 create_thread = 0;
8878         } else {
8879                 vhc->vhc_acc_thrcount++;
8880                 create_thread = 1;
8881         }
8882         mutex_exit(&vhc->vhc_lock);
8883 
8884         if (create_thread)
8885                 (void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8886                     0, &p0, TS_RUN, minclsyspri);
8887 }
8888 
8889 /*
8890  * Return number of online paths for the specified client.
8891  */
8892 static int
8893 nonline_paths(mdi_vhcache_client_t *cct)
8894 {
8895         mdi_vhcache_pathinfo_t *cpi;
8896         int online_count = 0;
8897 
8898         for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8899                 if (cpi->cpi_pip != NULL) {
8900                         MDI_PI_LOCK(cpi->cpi_pip);
8901                         if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8902                                 online_count++;
8903                         MDI_PI_UNLOCK(cpi->cpi_pip);
8904                 }
8905         }
8906 
8907         return (online_count);
8908 }
8909 
8910 /*
8911  * Bus configure all paths for the specified vhci client.
8912  * If at least one path for the client is already online, the remaining paths
8913  * will be configured asynchronously. Otherwise, it synchronously configures
8914  * the paths until at least one path is online and then rest of the paths
8915  * will be configured asynchronously.
8916  */
8917 static void
8918 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8919 {
8920         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8921         mdi_phys_path_t *pp_head, *pp;
8922         mdi_vhcache_client_t *cct;
8923         mdi_vhcache_lookup_token_t tok;
8924 
8925         ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8926 
8927         init_vhcache_lookup_token(&tok, NULL);
8928 
8929         if (ct_name == NULL || ct_addr == NULL ||
8930             (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8931             == NULL ||
8932             (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8933                 rw_exit(&vhcache->vhcache_lock);
8934                 return;
8935         }
8936 
8937         /* if at least one path is online, configure the rest asynchronously */
8938         if (nonline_paths(cct) > 0) {
8939                 rw_exit(&vhcache->vhcache_lock);
8940                 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8941                 return;
8942         }
8943 
8944         rw_exit(&vhcache->vhcache_lock);
8945 
8946         for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8947                 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8948                         rw_enter(&vhcache->vhcache_lock, RW_READER);
8949 
8950                         if ((cct = lookup_vhcache_client(vhcache, ct_name,
8951                             ct_addr, &tok)) == NULL) {
8952                                 rw_exit(&vhcache->vhcache_lock);
8953                                 goto out;
8954                         }
8955 
8956                         if (nonline_paths(cct) > 0 &&
8957                             pp->phys_path_next != NULL) {
8958                                 rw_exit(&vhcache->vhcache_lock);
8959                                 config_client_paths_async(vhc, ct_name, ct_addr,
8960                                     pp->phys_path_next, &tok);
8961                                 pp->phys_path_next = NULL;
8962                                 goto out;
8963                         }
8964 
8965                         rw_exit(&vhcache->vhcache_lock);
8966                 }
8967         }
8968 
8969         adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8970 out:
8971         free_phclient_path_list(pp_head);
8972 }
8973 
8974 static void
8975 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8976 {
8977         mutex_enter(&vhc->vhc_lock);
8978         while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8979                 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8980         vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8981         mutex_exit(&vhc->vhc_lock);
8982 }
8983 
8984 static void
8985 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8986 {
8987         mutex_enter(&vhc->vhc_lock);
8988         vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8989         cv_broadcast(&vhc->vhc_cv);
8990         mutex_exit(&vhc->vhc_lock);
8991 }
8992 
8993 typedef struct mdi_phci_driver_info {
8994         char    *phdriver_name; /* name of the phci driver */
8995 
8996         /* set to non zero if the phci driver supports root device */
8997         int     phdriver_root_support;
8998 } mdi_phci_driver_info_t;
8999 
9000 /*
9001  * vhci class and root support capability of a phci driver can be
9002  * specified using ddi-vhci-class and ddi-no-root-support properties in the
9003  * phci driver.conf file. The built-in tables below contain this information
9004  * for those phci drivers whose driver.conf files don't yet contain this info.
9005  *
9006  * All phci drivers expect iscsi have root device support.
9007  */
9008 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
9009         { "fp", 1 },
9010         { "iscsi", 0 },
9011         { "ibsrp", 1 }
9012         };
9013 
9014 static mdi_phci_driver_info_t ib_phci_driver_list[] = {{ "tavor", 1 }};
9015 
9016 static void *
9017 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9018 {
9019         void *new_ptr;
9020 
9021         new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9022         if (old_ptr) {
9023                 bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9024                 kmem_free(old_ptr, old_size);
9025         }
9026         return (new_ptr);
9027 }
9028 
9029 static void
9030 add_to_phci_list(char ***driver_list, int **root_support_list,
9031     int *cur_elements, int *max_elements, char *driver_name, int root_support)
9032 {
9033         ASSERT(*cur_elements <= *max_elements);
9034         if (*cur_elements == *max_elements) {
9035                 *max_elements += 10;
9036                 *driver_list = mdi_realloc(*driver_list,
9037                     sizeof (char *) * (*cur_elements),
9038                     sizeof (char *) * (*max_elements));
9039                 *root_support_list = mdi_realloc(*root_support_list,
9040                     sizeof (int) * (*cur_elements),
9041                     sizeof (int) * (*max_elements));
9042         }
9043         (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9044         (*root_support_list)[*cur_elements] = root_support;
9045         (*cur_elements)++;
9046 }
9047 
9048 static void
9049 get_phci_driver_list(char *vhci_class, char ***driver_list,
9050     int **root_support_list, int *cur_elements, int *max_elements)
9051 {
9052         mdi_phci_driver_info_t  *st_driver_list, *p;
9053         int             st_ndrivers, root_support, i, j, driver_conf_count;
9054         major_t         m;
9055         struct devnames *dnp;
9056         ddi_prop_t      *propp;
9057 
9058         *driver_list = NULL;
9059         *root_support_list = NULL;
9060         *cur_elements = 0;
9061         *max_elements = 0;
9062 
9063         /* add the phci drivers derived from the phci driver.conf files */
9064         for (m = 0; m < devcnt; m++) {
9065                 dnp = &devnamesp[m];
9066 
9067                 if (dnp->dn_flags & DN_PHCI_DRIVER) {
9068                         LOCK_DEV_OPS(&dnp->dn_lock);
9069                         if (dnp->dn_global_prop_ptr != NULL &&
9070                             (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9071                             DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9072                             &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9073                             strcmp(propp->prop_val, vhci_class) == 0) {
9074 
9075                                 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9076                                     DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9077                                     &dnp->dn_global_prop_ptr->prop_list)
9078                                     == NULL) ? 1 : 0;
9079 
9080                                 add_to_phci_list(driver_list, root_support_list,
9081                                     cur_elements, max_elements, dnp->dn_name,
9082                                     root_support);
9083 
9084                                 UNLOCK_DEV_OPS(&dnp->dn_lock);
9085                         } else
9086                                 UNLOCK_DEV_OPS(&dnp->dn_lock);
9087                 }
9088         }
9089 
9090         driver_conf_count = *cur_elements;
9091 
9092         /* add the phci drivers specified in the built-in tables */
9093         if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9094                 st_driver_list = scsi_phci_driver_list;
9095                 st_ndrivers = sizeof (scsi_phci_driver_list) /
9096                     sizeof (mdi_phci_driver_info_t);
9097         } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9098                 st_driver_list = ib_phci_driver_list;
9099                 st_ndrivers = sizeof (ib_phci_driver_list) /
9100                     sizeof (mdi_phci_driver_info_t);
9101         } else {
9102                 st_driver_list = NULL;
9103                 st_ndrivers = 0;
9104         }
9105 
9106         for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9107                 /* add this phci driver if not already added before */
9108                 for (j = 0; j < driver_conf_count; j++) {
9109                         if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9110                                 break;
9111                 }
9112                 if (j == driver_conf_count) {
9113                         add_to_phci_list(driver_list, root_support_list,
9114                             cur_elements, max_elements, p->phdriver_name,
9115                             p->phdriver_root_support);
9116                 }
9117         }
9118 }
9119 
9120 /*
9121  * Attach the phci driver instances associated with the specified vhci class.
9122  * If root is mounted attach all phci driver instances.
9123  * If root is not mounted, attach the instances of only those phci
9124  * drivers that have the root support.
9125  */
9126 static void
9127 attach_phci_drivers(char *vhci_class)
9128 {
9129         char    **driver_list, **p;
9130         int     *root_support_list;
9131         int     cur_elements, max_elements, i;
9132         major_t m;
9133 
9134         get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9135             &cur_elements, &max_elements);
9136 
9137         for (i = 0; i < cur_elements; i++) {
9138                 if (modrootloaded || root_support_list[i]) {
9139                         m = ddi_name_to_major(driver_list[i]);
9140                         if (m != DDI_MAJOR_T_NONE &&
9141                             ddi_hold_installed_driver(m))
9142                                 ddi_rele_driver(m);
9143                 }
9144         }
9145 
9146         if (driver_list) {
9147                 for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9148                         kmem_free(*p, strlen(*p) + 1);
9149                 kmem_free(driver_list, sizeof (char *) * max_elements);
9150                 kmem_free(root_support_list, sizeof (int) * max_elements);
9151         }
9152 }
9153 
9154 /*
9155  * Build vhci cache:
9156  *
9157  * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9158  * the phci driver instances. During this process the cache gets built.
9159  *
9160  * Cache is built fully if the root is mounted.
9161  * If the root is not mounted, phci drivers that do not have root support
9162  * are not attached. As a result the cache is built partially. The entries
9163  * in the cache reflect only those phci drivers that have root support.
9164  */
9165 static int
9166 build_vhci_cache(mdi_vhci_t *vh)
9167 {
9168         mdi_vhci_config_t *vhc = vh->vh_config;
9169         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9170 
9171         single_threaded_vhconfig_enter(vhc);
9172 
9173         rw_enter(&vhcache->vhcache_lock, RW_READER);
9174         if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9175                 rw_exit(&vhcache->vhcache_lock);
9176                 single_threaded_vhconfig_exit(vhc);
9177                 return (0);
9178         }
9179         rw_exit(&vhcache->vhcache_lock);
9180 
9181         attach_phci_drivers(vh->vh_class);
9182         bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9183             BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9184 
9185         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9186         vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9187         rw_exit(&vhcache->vhcache_lock);
9188 
9189         single_threaded_vhconfig_exit(vhc);
9190         vhcache_dirty(vhc);
9191         return (1);
9192 }
9193 
9194 /*
9195  * Determine if discovery of paths is needed.
9196  */
9197 static int
9198 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9199 {
9200         int rv = 1;
9201 
9202         mutex_enter(&vhc->vhc_lock);
9203         if (i_ddi_io_initialized() == 0) {
9204                 if (vhc->vhc_path_discovery_boot > 0) {
9205                         vhc->vhc_path_discovery_boot--;
9206                         goto out;
9207                 }
9208         } else {
9209                 if (vhc->vhc_path_discovery_postboot > 0) {
9210                         vhc->vhc_path_discovery_postboot--;
9211                         goto out;
9212                 }
9213         }
9214 
9215         /*
9216          * Do full path discovery at most once per mdi_path_discovery_interval.
9217          * This is to avoid a series of full path discoveries when opening
9218          * stale /dev/[r]dsk links.
9219          */
9220         if (mdi_path_discovery_interval != -1 &&
9221             ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9222                 goto out;
9223 
9224         rv = 0;
9225 out:
9226         mutex_exit(&vhc->vhc_lock);
9227         return (rv);
9228 }
9229 
9230 /*
9231  * Discover all paths:
9232  *
9233  * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9234  * driver instances. During this process all paths will be discovered.
9235  */
9236 static int
9237 vhcache_discover_paths(mdi_vhci_t *vh)
9238 {
9239         mdi_vhci_config_t *vhc = vh->vh_config;
9240         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9241         int rv = 0;
9242 
9243         single_threaded_vhconfig_enter(vhc);
9244 
9245         if (vhcache_do_discovery(vhc)) {
9246                 attach_phci_drivers(vh->vh_class);
9247                 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9248                     NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9249 
9250                 mutex_enter(&vhc->vhc_lock);
9251                 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9252                     mdi_path_discovery_interval * TICKS_PER_SECOND;
9253                 mutex_exit(&vhc->vhc_lock);
9254                 rv = 1;
9255         }
9256 
9257         single_threaded_vhconfig_exit(vhc);
9258         return (rv);
9259 }
9260 
9261 /*
9262  * Generic vhci bus config implementation:
9263  *
9264  * Parameters
9265  *      vdip    vhci dip
9266  *      flags   bus config flags
9267  *      op      bus config operation
9268  *      The remaining parameters are bus config operation specific
9269  *
9270  * for BUS_CONFIG_ONE
9271  *      arg     pointer to name@addr
9272  *      child   upon successful return from this function, *child will be
9273  *              set to the configured and held devinfo child node of vdip.
9274  *      ct_addr pointer to client address (i.e. GUID)
9275  *
9276  * for BUS_CONFIG_DRIVER
9277  *      arg     major number of the driver
9278  *      child and ct_addr parameters are ignored
9279  *
9280  * for BUS_CONFIG_ALL
9281  *      arg, child, and ct_addr parameters are ignored
9282  *
9283  * Note that for the rest of the bus config operations, this function simply
9284  * calls the framework provided default bus config routine.
9285  */
9286 int
9287 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9288     void *arg, dev_info_t **child, char *ct_addr)
9289 {
9290         mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9291         mdi_vhci_config_t *vhc = vh->vh_config;
9292         mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9293         int rv = 0;
9294         int params_valid = 0;
9295         char *cp;
9296 
9297         /*
9298          * To bus config vhcis we relay operation, possibly using another
9299          * thread, to phcis. The phci driver then interacts with MDI to cause
9300          * vhci child nodes to be enumerated under the vhci node.  Adding a
9301          * vhci child requires an ndi_devi_enter of the vhci. Since another
9302          * thread may be adding the child, to avoid deadlock we can't wait
9303          * for the relayed operations to complete if we have already entered
9304          * the vhci node.
9305          */
9306         if (DEVI_BUSY_OWNED(vdip)) {
9307                 MDI_DEBUG(2, (MDI_NOTE, vdip,
9308                     "vhci dip is busy owned %p", (void *)vdip));
9309                 goto default_bus_config;
9310         }
9311 
9312         rw_enter(&vhcache->vhcache_lock, RW_READER);
9313         if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9314                 rw_exit(&vhcache->vhcache_lock);
9315                 rv = build_vhci_cache(vh);
9316                 rw_enter(&vhcache->vhcache_lock, RW_READER);
9317         }
9318 
9319         switch (op) {
9320         case BUS_CONFIG_ONE:
9321                 if (arg != NULL && ct_addr != NULL) {
9322                         /* extract node name */
9323                         cp = (char *)arg;
9324                         while (*cp != '\0' && *cp != '@')
9325                                 cp++;
9326                         if (*cp == '@') {
9327                                 params_valid = 1;
9328                                 *cp = '\0';
9329                                 config_client_paths(vhc, (char *)arg, ct_addr);
9330                                 /* config_client_paths() releases cache_lock */
9331                                 *cp = '@';
9332                                 break;
9333                         }
9334                 }
9335 
9336                 rw_exit(&vhcache->vhcache_lock);
9337                 break;
9338 
9339         case BUS_CONFIG_DRIVER:
9340                 rw_exit(&vhcache->vhcache_lock);
9341                 if (rv == 0)
9342                         st_bus_config_all_phcis(vhc, flags, op,
9343                             (major_t)(uintptr_t)arg);
9344                 break;
9345 
9346         case BUS_CONFIG_ALL:
9347                 rw_exit(&vhcache->vhcache_lock);
9348                 if (rv == 0)
9349                         st_bus_config_all_phcis(vhc, flags, op, -1);
9350                 break;
9351 
9352         default:
9353                 rw_exit(&vhcache->vhcache_lock);
9354                 break;
9355         }
9356 
9357 
9358 default_bus_config:
9359         /*
9360          * All requested child nodes are enumerated under the vhci.
9361          * Now configure them.
9362          */
9363         if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9364             NDI_SUCCESS) {
9365                 return (MDI_SUCCESS);
9366         } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9367                 /* discover all paths and try configuring again */
9368                 if (vhcache_discover_paths(vh) &&
9369                     ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9370                     NDI_SUCCESS)
9371                         return (MDI_SUCCESS);
9372         }
9373 
9374         return (MDI_FAILURE);
9375 }
9376 
9377 /*
9378  * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9379  */
9380 static nvlist_t *
9381 read_on_disk_vhci_cache(char *vhci_class)
9382 {
9383         nvlist_t *nvl;
9384         int err;
9385         char *filename;
9386 
9387         filename = vhclass2vhcache_filename(vhci_class);
9388 
9389         if ((err = fread_nvlist(filename, &nvl)) == 0) {
9390                 kmem_free(filename, strlen(filename) + 1);
9391                 return (nvl);
9392         } else if (err == EIO)
9393                 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9394         else if (err == EINVAL)
9395                 cmn_err(CE_WARN,
9396                     "%s: data file corrupted, will recreate", filename);
9397 
9398         kmem_free(filename, strlen(filename) + 1);
9399         return (NULL);
9400 }
9401 
9402 /*
9403  * Read on-disk vhci cache into nvlists for all vhci classes.
9404  * Called during booting by i_ddi_read_devices_files().
9405  */
9406 void
9407 mdi_read_devices_files(void)
9408 {
9409         int i;
9410 
9411         for (i = 0; i < N_VHCI_CLASSES; i++)
9412                 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9413 }
9414 
9415 /*
9416  * Remove all stale entries from vhci cache.
9417  */
9418 static void
9419 clean_vhcache(mdi_vhci_config_t *vhc)
9420 {
9421         mdi_vhci_cache_t        *vhcache = &vhc->vhc_vhcache;
9422         mdi_vhcache_phci_t      *phci, *nxt_phci;
9423         mdi_vhcache_client_t    *client, *nxt_client;
9424         mdi_vhcache_pathinfo_t  *path, *nxt_path;
9425 
9426         rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9427 
9428         client = vhcache->vhcache_client_head;
9429         vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9430         for ( ; client != NULL; client = nxt_client) {
9431                 nxt_client = client->cct_next;
9432 
9433                 path = client->cct_cpi_head;
9434                 client->cct_cpi_head = client->cct_cpi_tail = NULL;
9435                 for ( ; path != NULL; path = nxt_path) {
9436                         nxt_path = path->cpi_next;
9437                         if ((path->cpi_cphci->cphci_phci != NULL) &&
9438                             (path->cpi_pip != NULL)) {
9439                                 enqueue_tail_vhcache_pathinfo(client, path);
9440                         } else if (path->cpi_pip != NULL) {
9441                                 /* Not valid to have a path without a phci. */
9442                                 free_vhcache_pathinfo(path);
9443                         }
9444                 }
9445 
9446                 if (client->cct_cpi_head != NULL)
9447                         enqueue_vhcache_client(vhcache, client);
9448                 else {
9449                         (void) mod_hash_destroy(vhcache->vhcache_client_hash,
9450                             (mod_hash_key_t)client->cct_name_addr);
9451                         free_vhcache_client(client);
9452                 }
9453         }
9454 
9455         phci = vhcache->vhcache_phci_head;
9456         vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9457         for ( ; phci != NULL; phci = nxt_phci) {
9458 
9459                 nxt_phci = phci->cphci_next;
9460                 if (phci->cphci_phci != NULL)
9461                         enqueue_vhcache_phci(vhcache, phci);
9462                 else
9463                         free_vhcache_phci(phci);
9464         }
9465 
9466         vhcache->vhcache_clean_time = ddi_get_lbolt64();
9467         rw_exit(&vhcache->vhcache_lock);
9468         vhcache_dirty(vhc);
9469 }
9470 
9471 /*
9472  * Remove all stale entries from vhci cache.
9473  * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9474  */
9475 void
9476 mdi_clean_vhcache(void)
9477 {
9478         mdi_vhci_t *vh;
9479 
9480         mutex_enter(&mdi_mutex);
9481         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9482                 vh->vh_refcnt++;
9483                 mutex_exit(&mdi_mutex);
9484                 clean_vhcache(vh->vh_config);
9485                 mutex_enter(&mdi_mutex);
9486                 vh->vh_refcnt--;
9487         }
9488         mutex_exit(&mdi_mutex);
9489 }
9490 
9491 /*
9492  * mdi_vhci_walk_clients():
9493  *              Walker routine to traverse client dev_info nodes
9494  * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9495  * below the client, including nexus devices, which we dont want.
9496  * So we just traverse the immediate siblings, starting from 1st client.
9497  */
9498 void
9499 mdi_vhci_walk_clients(dev_info_t *vdip,
9500     int (*f)(dev_info_t *, void *), void *arg)
9501 {
9502         mdi_vhci_t      *vh = i_devi_get_vhci(vdip);
9503         dev_info_t      *cdip;
9504         mdi_client_t    *ct;
9505 
9506         MDI_VHCI_CLIENT_LOCK(vh);
9507         cdip = ddi_get_child(vdip);
9508         while (cdip) {
9509                 ct = i_devi_get_client(cdip);
9510                 MDI_CLIENT_LOCK(ct);
9511 
9512                 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9513                         cdip = ddi_get_next_sibling(cdip);
9514                 else
9515                         cdip = NULL;
9516 
9517                 MDI_CLIENT_UNLOCK(ct);
9518         }
9519         MDI_VHCI_CLIENT_UNLOCK(vh);
9520 }
9521 
9522 /*
9523  * mdi_vhci_walk_phcis():
9524  *              Walker routine to traverse phci dev_info nodes
9525  */
9526 void
9527 mdi_vhci_walk_phcis(dev_info_t *vdip,
9528     int (*f)(dev_info_t *, void *), void *arg)
9529 {
9530         mdi_vhci_t      *vh = i_devi_get_vhci(vdip);
9531         mdi_phci_t      *ph, *next;
9532 
9533         MDI_VHCI_PHCI_LOCK(vh);
9534         ph = vh->vh_phci_head;
9535         while (ph) {
9536                 MDI_PHCI_LOCK(ph);
9537 
9538                 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9539                         next = ph->ph_next;
9540                 else
9541                         next = NULL;
9542 
9543                 MDI_PHCI_UNLOCK(ph);
9544                 ph = next;
9545         }
9546         MDI_VHCI_PHCI_UNLOCK(vh);
9547 }
9548 
9549 
9550 /*
9551  * mdi_walk_vhcis():
9552  *              Walker routine to traverse vhci dev_info nodes
9553  */
9554 void
9555 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9556 {
9557         mdi_vhci_t      *vh = NULL;
9558 
9559         mutex_enter(&mdi_mutex);
9560         /*
9561          * Scan for already registered vhci
9562          */
9563         for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9564                 vh->vh_refcnt++;
9565                 mutex_exit(&mdi_mutex);
9566                 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9567                         mutex_enter(&mdi_mutex);
9568                         vh->vh_refcnt--;
9569                         break;
9570                 } else {
9571                         mutex_enter(&mdi_mutex);
9572                         vh->vh_refcnt--;
9573                 }
9574         }
9575 
9576         mutex_exit(&mdi_mutex);
9577 }
9578 
9579 /*
9580  * i_mdi_log_sysevent():
9581  *              Logs events for pickup by syseventd
9582  */
9583 static void
9584 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9585 {
9586         char            *path_name;
9587         nvlist_t        *attr_list;
9588 
9589         if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9590             KM_SLEEP) != DDI_SUCCESS) {
9591                 goto alloc_failed;
9592         }
9593 
9594         path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9595         (void) ddi_pathname(dip, path_name);
9596 
9597         if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9598             ddi_driver_name(dip)) != DDI_SUCCESS) {
9599                 goto error;
9600         }
9601 
9602         if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9603             (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9604                 goto error;
9605         }
9606 
9607         if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9608             (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9609                 goto error;
9610         }
9611 
9612         if (nvlist_add_string(attr_list, DDI_PATHNAME,
9613             path_name) != DDI_SUCCESS) {
9614                 goto error;
9615         }
9616 
9617         if (nvlist_add_string(attr_list, DDI_CLASS,
9618             ph_vh_class) != DDI_SUCCESS) {
9619                 goto error;
9620         }
9621 
9622         (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9623             attr_list, NULL, DDI_SLEEP);
9624 
9625 error:
9626         kmem_free(path_name, MAXPATHLEN);
9627         nvlist_free(attr_list);
9628         return;
9629 
9630 alloc_failed:
9631         MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9632 }
9633 
9634 char **
9635 mdi_get_phci_driver_list(char *vhci_class, int  *ndrivers)
9636 {
9637         char    **driver_list, **ret_driver_list = NULL;
9638         int     *root_support_list;
9639         int     cur_elements, max_elements;
9640 
9641         get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9642             &cur_elements, &max_elements);
9643 
9644 
9645         if (driver_list) {
9646                 kmem_free(root_support_list, sizeof (int) * max_elements);
9647                 ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9648                     * max_elements, sizeof (char *) * cur_elements);
9649         }
9650         *ndrivers = cur_elements;
9651 
9652         return (ret_driver_list);
9653 
9654 }
9655 
9656 void
9657 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9658 {
9659         char    **p;
9660         int     i;
9661 
9662         if (driver_list) {
9663                 for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9664                         kmem_free(*p, strlen(*p) + 1);
9665                 kmem_free(driver_list, sizeof (char *) * ndrivers);
9666         }
9667 }
9668 
9669 /*
9670  * mdi_is_dev_supported():
9671  *              function called by pHCI bus config operation to determine if a
9672  *              device should be represented as a child of the vHCI or the
9673  *              pHCI.  This decision is made by the vHCI, using cinfo idenity
9674  *              information passed by the pHCI - specifics of the cinfo
9675  *              representation are by agreement between the pHCI and vHCI.
9676  * Return Values:
9677  *              MDI_SUCCESS
9678  *              MDI_FAILURE
9679  */
9680 int
9681 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9682 {
9683         mdi_vhci_t      *vh;
9684 
9685         ASSERT(class && pdip);
9686 
9687         /*
9688          * For dev_supported, mdi_phci_register() must have established pdip as
9689          * a pHCI.
9690          *
9691          * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9692          * MDI_PHCI(pdip) will return false if mpxio is disabled.
9693          */
9694         if (!MDI_PHCI(pdip))
9695                 return (MDI_FAILURE);
9696 
9697         /* Return MDI_FAILURE if vHCI does not support asking the question. */
9698         vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9699         if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9700                 return (MDI_FAILURE);
9701         }
9702 
9703         /* Return vHCI answer */
9704         return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9705 }
9706 
9707 int
9708 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9709 {
9710         uint_t devstate = 0;
9711         dev_info_t *cdip;
9712 
9713         if ((pip == NULL) || (dcp == NULL))
9714                 return (MDI_FAILURE);
9715 
9716         cdip = mdi_pi_get_client(pip);
9717 
9718         switch (mdi_pi_get_state(pip)) {
9719         case MDI_PATHINFO_STATE_INIT:
9720                 devstate = DEVICE_DOWN;
9721                 break;
9722         case MDI_PATHINFO_STATE_ONLINE:
9723                 devstate = DEVICE_ONLINE;
9724                 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9725                         devstate |= DEVICE_BUSY;
9726                 break;
9727         case MDI_PATHINFO_STATE_STANDBY:
9728                 devstate = DEVICE_ONLINE;
9729                 break;
9730         case MDI_PATHINFO_STATE_FAULT:
9731                 devstate = DEVICE_DOWN;
9732                 break;
9733         case MDI_PATHINFO_STATE_OFFLINE:
9734                 devstate = DEVICE_OFFLINE;
9735                 break;
9736         default:
9737                 ASSERT(MDI_PI(pip)->pi_state);
9738         }
9739 
9740         if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9741                 return (MDI_FAILURE);
9742 
9743         return (MDI_SUCCESS);
9744 }