1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved. 25 */ 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a 29 * more detailed discussion of the overall mpxio architecture. 30 */ 31 32 #include <sys/note.h> 33 #include <sys/types.h> 34 #include <sys/varargs.h> 35 #include <sys/param.h> 36 #include <sys/errno.h> 37 #include <sys/uio.h> 38 #include <sys/buf.h> 39 #include <sys/modctl.h> 40 #include <sys/open.h> 41 #include <sys/kmem.h> 42 #include <sys/poll.h> 43 #include <sys/conf.h> 44 #include <sys/bootconf.h> 45 #include <sys/cmn_err.h> 46 #include <sys/stat.h> 47 #include <sys/ddi.h> 48 #include <sys/sunddi.h> 49 #include <sys/ddipropdefs.h> 50 #include <sys/sunndi.h> 51 #include <sys/ndi_impldefs.h> 52 #include <sys/promif.h> 53 #include <sys/sunmdi.h> 54 #include <sys/mdi_impldefs.h> 55 #include <sys/taskq.h> 56 #include <sys/epm.h> 57 #include <sys/sunpm.h> 58 #include <sys/modhash.h> 59 #include <sys/disp.h> 60 #include <sys/autoconf.h> 61 #include <sys/sysmacros.h> 62 63 #ifdef DEBUG 64 #include <sys/debug.h> 65 int mdi_debug = 1; 66 int mdi_debug_logonly = 0; 67 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 68 #define MDI_WARN CE_WARN, __func__ 69 #define MDI_NOTE CE_NOTE, __func__ 70 #define MDI_CONT CE_CONT, __func__ 71 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 72 #else /* !DEBUG */ 73 #define MDI_DEBUG(dbglevel, pargs) 74 #endif /* DEBUG */ 75 int mdi_debug_consoleonly = 0; 76 int mdi_delay = 3; 77 78 extern pri_t minclsyspri; 79 extern int modrootloaded; 80 81 /* 82 * Global mutex: 83 * Protects vHCI list and structure members. 84 */ 85 kmutex_t mdi_mutex; 86 87 /* 88 * Registered vHCI class driver lists 89 */ 90 int mdi_vhci_count; 91 mdi_vhci_t *mdi_vhci_head; 92 mdi_vhci_t *mdi_vhci_tail; 93 94 /* 95 * Client Hash Table size 96 */ 97 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 98 99 /* 100 * taskq interface definitions 101 */ 102 #define MDI_TASKQ_N_THREADS 8 103 #define MDI_TASKQ_PRI minclsyspri 104 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 105 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 106 107 taskq_t *mdi_taskq; 108 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 109 110 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 111 112 /* 113 * The data should be "quiet" for this interval (in seconds) before the 114 * vhci cached data is flushed to the disk. 115 */ 116 static int mdi_vhcache_flush_delay = 10; 117 118 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 119 static int mdi_vhcache_flush_daemon_idle_time = 60; 120 121 /* 122 * MDI falls back to discovery of all paths when a bus_config_one fails. 123 * The following parameters can be used to tune this operation. 124 * 125 * mdi_path_discovery_boot 126 * Number of times path discovery will be attempted during early boot. 127 * Probably there is no reason to ever set this value to greater than one. 128 * 129 * mdi_path_discovery_postboot 130 * Number of times path discovery will be attempted after early boot. 131 * Set it to a minimum of two to allow for discovery of iscsi paths which 132 * may happen very late during booting. 133 * 134 * mdi_path_discovery_interval 135 * Minimum number of seconds MDI will wait between successive discovery 136 * of all paths. Set it to -1 to disable discovery of all paths. 137 */ 138 static int mdi_path_discovery_boot = 1; 139 static int mdi_path_discovery_postboot = 2; 140 static int mdi_path_discovery_interval = 10; 141 142 /* 143 * number of seconds the asynchronous configuration thread will sleep idle 144 * before exiting. 145 */ 146 static int mdi_async_config_idle_time = 600; 147 148 static int mdi_bus_config_cache_hash_size = 256; 149 150 /* turns off multithreaded configuration for certain operations */ 151 static int mdi_mtc_off = 0; 152 153 /* 154 * The "path" to a pathinfo node is identical to the /devices path to a 155 * devinfo node had the device been enumerated under a pHCI instead of 156 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 157 * This association persists across create/delete of the pathinfo nodes, 158 * but not across reboot. 159 */ 160 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 161 static int mdi_pathmap_hash_size = 256; 162 static kmutex_t mdi_pathmap_mutex; 163 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 164 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 165 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 166 167 /* 168 * MDI component property name/value string definitions 169 */ 170 const char *mdi_component_prop = "mpxio-component"; 171 const char *mdi_component_prop_vhci = "vhci"; 172 const char *mdi_component_prop_phci = "phci"; 173 const char *mdi_component_prop_client = "client"; 174 175 /* 176 * MDI client global unique identifier property name 177 */ 178 const char *mdi_client_guid_prop = "client-guid"; 179 180 /* 181 * MDI client load balancing property name/value string definitions 182 */ 183 const char *mdi_load_balance = "load-balance"; 184 const char *mdi_load_balance_none = "none"; 185 const char *mdi_load_balance_rr = "round-robin"; 186 const char *mdi_load_balance_lba = "logical-block"; 187 188 /* 189 * Obsolete vHCI class definition; to be removed after Leadville update 190 */ 191 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 192 193 static char vhci_greeting[] = 194 "\tThere already exists one vHCI driver for class %s\n" 195 "\tOnly one vHCI driver for each class is allowed\n"; 196 197 /* 198 * Static function prototypes 199 */ 200 static int i_mdi_phci_offline(dev_info_t *, uint_t); 201 static int i_mdi_client_offline(dev_info_t *, uint_t); 202 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 203 static void i_mdi_phci_post_detach(dev_info_t *, 204 ddi_detach_cmd_t, int); 205 static int i_mdi_client_pre_detach(dev_info_t *, 206 ddi_detach_cmd_t); 207 static void i_mdi_client_post_detach(dev_info_t *, 208 ddi_detach_cmd_t, int); 209 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 210 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 211 static int i_mdi_lba_lb(mdi_client_t *ct, 212 mdi_pathinfo_t **ret_pip, struct buf *buf); 213 static void i_mdi_pm_hold_client(mdi_client_t *, int); 214 static void i_mdi_pm_rele_client(mdi_client_t *, int); 215 static void i_mdi_pm_reset_client(mdi_client_t *); 216 static int i_mdi_power_all_phci(mdi_client_t *); 217 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 218 219 220 /* 221 * Internal mdi_pathinfo node functions 222 */ 223 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 224 225 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 226 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 227 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 228 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 229 static void i_mdi_phci_unlock(mdi_phci_t *); 230 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 231 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 232 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 233 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 234 mdi_client_t *); 235 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 236 static void i_mdi_client_remove_path(mdi_client_t *, 237 mdi_pathinfo_t *); 238 239 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 240 mdi_pathinfo_state_t, int); 241 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 242 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 243 char **, int); 244 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 245 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 246 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 247 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 248 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 249 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 250 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 251 static void i_mdi_client_update_state(mdi_client_t *); 252 static int i_mdi_client_compute_state(mdi_client_t *, 253 mdi_phci_t *); 254 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 255 static void i_mdi_client_unlock(mdi_client_t *); 256 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 257 static mdi_client_t *i_devi_get_client(dev_info_t *); 258 /* 259 * NOTE: this will be removed once the NWS files are changed to use the new 260 * mdi_{enable,disable}_path interfaces 261 */ 262 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 263 int, int); 264 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 265 mdi_vhci_t *vh, int flags, int op); 266 /* 267 * Failover related function prototypes 268 */ 269 static int i_mdi_failover(void *); 270 271 /* 272 * misc internal functions 273 */ 274 static int i_mdi_get_hash_key(char *); 275 static int i_map_nvlist_error_to_mdi(int); 276 static void i_mdi_report_path_state(mdi_client_t *, 277 mdi_pathinfo_t *); 278 279 static void setup_vhci_cache(mdi_vhci_t *); 280 static int destroy_vhci_cache(mdi_vhci_t *); 281 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 282 static boolean_t stop_vhcache_flush_thread(void *, int); 283 static void free_string_array(char **, int); 284 static void free_vhcache_phci(mdi_vhcache_phci_t *); 285 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 286 static void free_vhcache_client(mdi_vhcache_client_t *); 287 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 288 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 289 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 290 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 291 static void vhcache_pi_add(mdi_vhci_config_t *, 292 struct mdi_pathinfo *); 293 static void vhcache_pi_remove(mdi_vhci_config_t *, 294 struct mdi_pathinfo *); 295 static void free_phclient_path_list(mdi_phys_path_t *); 296 static void sort_vhcache_paths(mdi_vhcache_client_t *); 297 static int flush_vhcache(mdi_vhci_config_t *, int); 298 static void vhcache_dirty(mdi_vhci_config_t *); 299 static void free_async_client_config(mdi_async_client_config_t *); 300 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 301 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 302 static nvlist_t *read_on_disk_vhci_cache(char *); 303 extern int fread_nvlist(char *, nvlist_t **); 304 extern int fwrite_nvlist(char *, nvlist_t *); 305 306 /* called once when first vhci registers with mdi */ 307 static void 308 i_mdi_init() 309 { 310 static int initialized = 0; 311 312 if (initialized) 313 return; 314 initialized = 1; 315 316 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 317 318 /* Create our taskq resources */ 319 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 320 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 321 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 322 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 323 324 /* Allocate ['path_instance' <-> "path"] maps */ 325 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 326 mdi_pathmap_bypath = mod_hash_create_strhash( 327 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 328 mod_hash_null_valdtor); 329 mdi_pathmap_byinstance = mod_hash_create_idhash( 330 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 331 mod_hash_null_valdtor); 332 mdi_pathmap_sbyinstance = mod_hash_create_idhash( 333 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 334 mod_hash_null_valdtor); 335 } 336 337 /* 338 * mdi_get_component_type(): 339 * Return mpxio component type 340 * Return Values: 341 * MDI_COMPONENT_NONE 342 * MDI_COMPONENT_VHCI 343 * MDI_COMPONENT_PHCI 344 * MDI_COMPONENT_CLIENT 345 * XXX This doesn't work under multi-level MPxIO and should be 346 * removed when clients migrate mdi_component_is_*() interfaces. 347 */ 348 int 349 mdi_get_component_type(dev_info_t *dip) 350 { 351 return (DEVI(dip)->devi_mdi_component); 352 } 353 354 /* 355 * mdi_vhci_register(): 356 * Register a vHCI module with the mpxio framework 357 * mdi_vhci_register() is called by vHCI drivers to register the 358 * 'class_driver' vHCI driver and its MDI entrypoints with the 359 * mpxio framework. The vHCI driver must call this interface as 360 * part of its attach(9e) handler. 361 * Competing threads may try to attach mdi_vhci_register() as 362 * the vHCI drivers are loaded and attached as a result of pHCI 363 * driver instance registration (mdi_phci_register()) with the 364 * framework. 365 * Return Values: 366 * MDI_SUCCESS 367 * MDI_FAILURE 368 */ 369 /*ARGSUSED*/ 370 int 371 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 372 int flags) 373 { 374 mdi_vhci_t *vh = NULL; 375 376 /* Registrant can't be older */ 377 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 378 379 #ifdef DEBUG 380 /* 381 * IB nexus driver is loaded only when IB hardware is present. 382 * In order to be able to do this there is a need to drive the loading 383 * and attaching of the IB nexus driver (especially when an IB hardware 384 * is dynamically plugged in) when an IB HCA driver (PHCI) 385 * is being attached. Unfortunately this gets into the limitations 386 * of devfs as there seems to be no clean way to drive configuration 387 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 388 * for IB. 389 */ 390 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 391 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 392 #endif 393 394 i_mdi_init(); 395 396 mutex_enter(&mdi_mutex); 397 /* 398 * Scan for already registered vhci 399 */ 400 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 401 if (strcmp(vh->vh_class, class) == 0) { 402 /* 403 * vHCI has already been created. Check for valid 404 * vHCI ops registration. We only support one vHCI 405 * module per class 406 */ 407 if (vh->vh_ops != NULL) { 408 mutex_exit(&mdi_mutex); 409 cmn_err(CE_NOTE, vhci_greeting, class); 410 return (MDI_FAILURE); 411 } 412 break; 413 } 414 } 415 416 /* 417 * if not yet created, create the vHCI component 418 */ 419 if (vh == NULL) { 420 struct client_hash *hash = NULL; 421 char *load_balance; 422 423 /* 424 * Allocate and initialize the mdi extensions 425 */ 426 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 427 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 428 KM_SLEEP); 429 vh->vh_client_table = hash; 430 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 431 (void) strcpy(vh->vh_class, class); 432 vh->vh_lb = LOAD_BALANCE_RR; 433 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 434 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 435 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 436 vh->vh_lb = LOAD_BALANCE_NONE; 437 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 438 == 0) { 439 vh->vh_lb = LOAD_BALANCE_LBA; 440 } 441 ddi_prop_free(load_balance); 442 } 443 444 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 445 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 446 447 /* 448 * Store the vHCI ops vectors 449 */ 450 vh->vh_dip = vdip; 451 vh->vh_ops = vops; 452 453 setup_vhci_cache(vh); 454 455 if (mdi_vhci_head == NULL) { 456 mdi_vhci_head = vh; 457 } 458 if (mdi_vhci_tail) { 459 mdi_vhci_tail->vh_next = vh; 460 } 461 mdi_vhci_tail = vh; 462 mdi_vhci_count++; 463 } 464 465 /* 466 * Claim the devfs node as a vhci component 467 */ 468 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 469 470 /* 471 * Initialize our back reference from dev_info node 472 */ 473 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 474 mutex_exit(&mdi_mutex); 475 return (MDI_SUCCESS); 476 } 477 478 /* 479 * mdi_vhci_unregister(): 480 * Unregister a vHCI module from mpxio framework 481 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 482 * of a vhci to unregister it from the framework. 483 * Return Values: 484 * MDI_SUCCESS 485 * MDI_FAILURE 486 */ 487 /*ARGSUSED*/ 488 int 489 mdi_vhci_unregister(dev_info_t *vdip, int flags) 490 { 491 mdi_vhci_t *found, *vh, *prev = NULL; 492 493 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 494 495 /* 496 * Check for invalid VHCI 497 */ 498 if ((vh = i_devi_get_vhci(vdip)) == NULL) 499 return (MDI_FAILURE); 500 501 /* 502 * Scan the list of registered vHCIs for a match 503 */ 504 mutex_enter(&mdi_mutex); 505 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 506 if (found == vh) 507 break; 508 prev = found; 509 } 510 511 if (found == NULL) { 512 mutex_exit(&mdi_mutex); 513 return (MDI_FAILURE); 514 } 515 516 /* 517 * Check the vHCI, pHCI and client count. All the pHCIs and clients 518 * should have been unregistered, before a vHCI can be 519 * unregistered. 520 */ 521 MDI_VHCI_PHCI_LOCK(vh); 522 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 523 MDI_VHCI_PHCI_UNLOCK(vh); 524 mutex_exit(&mdi_mutex); 525 return (MDI_FAILURE); 526 } 527 MDI_VHCI_PHCI_UNLOCK(vh); 528 529 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 530 mutex_exit(&mdi_mutex); 531 return (MDI_FAILURE); 532 } 533 534 /* 535 * Remove the vHCI from the global list 536 */ 537 if (vh == mdi_vhci_head) { 538 mdi_vhci_head = vh->vh_next; 539 } else { 540 prev->vh_next = vh->vh_next; 541 } 542 if (vh == mdi_vhci_tail) { 543 mdi_vhci_tail = prev; 544 } 545 mdi_vhci_count--; 546 mutex_exit(&mdi_mutex); 547 548 vh->vh_ops = NULL; 549 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 550 DEVI(vdip)->devi_mdi_xhci = NULL; 551 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 552 kmem_free(vh->vh_client_table, 553 mdi_client_table_size * sizeof (struct client_hash)); 554 mutex_destroy(&vh->vh_phci_mutex); 555 mutex_destroy(&vh->vh_client_mutex); 556 557 kmem_free(vh, sizeof (mdi_vhci_t)); 558 return (MDI_SUCCESS); 559 } 560 561 /* 562 * i_mdi_vhci_class2vhci(): 563 * Look for a matching vHCI module given a vHCI class name 564 * Return Values: 565 * Handle to a vHCI component 566 * NULL 567 */ 568 static mdi_vhci_t * 569 i_mdi_vhci_class2vhci(char *class) 570 { 571 mdi_vhci_t *vh = NULL; 572 573 ASSERT(!MUTEX_HELD(&mdi_mutex)); 574 575 mutex_enter(&mdi_mutex); 576 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 577 if (strcmp(vh->vh_class, class) == 0) { 578 break; 579 } 580 } 581 mutex_exit(&mdi_mutex); 582 return (vh); 583 } 584 585 /* 586 * i_devi_get_vhci(): 587 * Utility function to get the handle to a vHCI component 588 * Return Values: 589 * Handle to a vHCI component 590 * NULL 591 */ 592 mdi_vhci_t * 593 i_devi_get_vhci(dev_info_t *vdip) 594 { 595 mdi_vhci_t *vh = NULL; 596 if (MDI_VHCI(vdip)) { 597 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 598 } 599 return (vh); 600 } 601 602 /* 603 * mdi_phci_register(): 604 * Register a pHCI module with mpxio framework 605 * mdi_phci_register() is called by pHCI drivers to register with 606 * the mpxio framework and a specific 'class_driver' vHCI. The 607 * pHCI driver must call this interface as part of its attach(9e) 608 * handler. 609 * Return Values: 610 * MDI_SUCCESS 611 * MDI_FAILURE 612 */ 613 /*ARGSUSED*/ 614 int 615 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 616 { 617 mdi_phci_t *ph; 618 mdi_vhci_t *vh; 619 char *data; 620 621 /* 622 * Some subsystems, like fcp, perform pHCI registration from a 623 * different thread than the one doing the pHCI attach(9E) - the 624 * driver attach code is waiting for this other thread to complete. 625 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 626 * (indicating that some thread has done an ndi_devi_enter of parent) 627 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 628 */ 629 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 630 631 /* 632 * Check for mpxio-disable property. Enable mpxio if the property is 633 * missing or not set to "yes". 634 * If the property is set to "yes" then emit a brief message. 635 */ 636 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 637 &data) == DDI_SUCCESS)) { 638 if (strcmp(data, "yes") == 0) { 639 MDI_DEBUG(1, (MDI_CONT, pdip, 640 "?multipath capabilities disabled via %s.conf.", 641 ddi_driver_name(pdip))); 642 ddi_prop_free(data); 643 return (MDI_FAILURE); 644 } 645 ddi_prop_free(data); 646 } 647 648 /* 649 * Search for a matching vHCI 650 */ 651 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 652 if (vh == NULL) { 653 return (MDI_FAILURE); 654 } 655 656 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 657 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 658 ph->ph_dip = pdip; 659 ph->ph_vhci = vh; 660 ph->ph_next = NULL; 661 ph->ph_unstable = 0; 662 ph->ph_vprivate = 0; 663 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 664 665 MDI_PHCI_LOCK(ph); 666 MDI_PHCI_SET_POWER_UP(ph); 667 MDI_PHCI_UNLOCK(ph); 668 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 669 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 670 671 vhcache_phci_add(vh->vh_config, ph); 672 673 MDI_VHCI_PHCI_LOCK(vh); 674 if (vh->vh_phci_head == NULL) { 675 vh->vh_phci_head = ph; 676 } 677 if (vh->vh_phci_tail) { 678 vh->vh_phci_tail->ph_next = ph; 679 } 680 vh->vh_phci_tail = ph; 681 vh->vh_phci_count++; 682 MDI_VHCI_PHCI_UNLOCK(vh); 683 684 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 685 return (MDI_SUCCESS); 686 } 687 688 /* 689 * mdi_phci_unregister(): 690 * Unregister a pHCI module from mpxio framework 691 * mdi_phci_unregister() is called by the pHCI drivers from their 692 * detach(9E) handler to unregister their instances from the 693 * framework. 694 * Return Values: 695 * MDI_SUCCESS 696 * MDI_FAILURE 697 */ 698 /*ARGSUSED*/ 699 int 700 mdi_phci_unregister(dev_info_t *pdip, int flags) 701 { 702 mdi_vhci_t *vh; 703 mdi_phci_t *ph; 704 mdi_phci_t *tmp; 705 mdi_phci_t *prev = NULL; 706 mdi_pathinfo_t *pip; 707 708 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 709 710 ph = i_devi_get_phci(pdip); 711 if (ph == NULL) { 712 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 713 return (MDI_FAILURE); 714 } 715 716 vh = ph->ph_vhci; 717 ASSERT(vh != NULL); 718 if (vh == NULL) { 719 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 720 return (MDI_FAILURE); 721 } 722 723 MDI_VHCI_PHCI_LOCK(vh); 724 tmp = vh->vh_phci_head; 725 while (tmp) { 726 if (tmp == ph) { 727 break; 728 } 729 prev = tmp; 730 tmp = tmp->ph_next; 731 } 732 733 if (ph == vh->vh_phci_head) { 734 vh->vh_phci_head = ph->ph_next; 735 } else { 736 prev->ph_next = ph->ph_next; 737 } 738 739 if (ph == vh->vh_phci_tail) { 740 vh->vh_phci_tail = prev; 741 } 742 743 vh->vh_phci_count--; 744 MDI_VHCI_PHCI_UNLOCK(vh); 745 746 /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 747 MDI_PHCI_LOCK(ph); 748 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 749 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 750 MDI_PI(pip)->pi_phci = NULL; 751 MDI_PHCI_UNLOCK(ph); 752 753 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 754 ESC_DDI_INITIATOR_UNREGISTER); 755 vhcache_phci_remove(vh->vh_config, ph); 756 cv_destroy(&ph->ph_unstable_cv); 757 mutex_destroy(&ph->ph_mutex); 758 kmem_free(ph, sizeof (mdi_phci_t)); 759 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 760 DEVI(pdip)->devi_mdi_xhci = NULL; 761 return (MDI_SUCCESS); 762 } 763 764 /* 765 * i_devi_get_phci(): 766 * Utility function to return the phci extensions. 767 */ 768 static mdi_phci_t * 769 i_devi_get_phci(dev_info_t *pdip) 770 { 771 mdi_phci_t *ph = NULL; 772 773 if (MDI_PHCI(pdip)) { 774 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 775 } 776 return (ph); 777 } 778 779 /* 780 * Single thread mdi entry into devinfo node for modifying its children. 781 * If necessary we perform an ndi_devi_enter of the vHCI before doing 782 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 783 * for the vHCI and one for the pHCI. 784 */ 785 void 786 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 787 { 788 dev_info_t *vdip; 789 int vcircular, pcircular; 790 791 /* Verify calling context */ 792 ASSERT(MDI_PHCI(phci_dip)); 793 vdip = mdi_devi_get_vdip(phci_dip); 794 ASSERT(vdip); /* A pHCI always has a vHCI */ 795 796 /* 797 * If pHCI is detaching then the framework has already entered the 798 * vHCI on a threads that went down the code path leading to 799 * detach_node(). This framework enter of the vHCI during pHCI 800 * detach is done to avoid deadlock with vHCI power management 801 * operations which enter the vHCI and the enter down the path 802 * to the pHCI. If pHCI is detaching then we piggyback this calls 803 * enter of the vHCI on frameworks vHCI enter that has already 804 * occurred - this is OK because we know that the framework thread 805 * doing detach is waiting for our completion. 806 * 807 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 808 * race with detach - but we can't do that because the framework has 809 * already entered the parent, so we have some complexity instead. 810 */ 811 for (;;) { 812 if (ndi_devi_tryenter(vdip, &vcircular)) { 813 ASSERT(vcircular != -1); 814 if (DEVI_IS_DETACHING(phci_dip)) { 815 ndi_devi_exit(vdip, vcircular); 816 vcircular = -1; 817 } 818 break; 819 } else if (DEVI_IS_DETACHING(phci_dip)) { 820 vcircular = -1; 821 break; 822 } else if (servicing_interrupt()) { 823 /* 824 * Don't delay an interrupt (and ensure adaptive 825 * mutex inversion support). 826 */ 827 ndi_devi_enter(vdip, &vcircular); 828 break; 829 } else { 830 delay_random(mdi_delay); 831 } 832 } 833 834 ndi_devi_enter(phci_dip, &pcircular); 835 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 836 } 837 838 /* 839 * Attempt to mdi_devi_enter. 840 */ 841 int 842 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 843 { 844 dev_info_t *vdip; 845 int vcircular, pcircular; 846 847 /* Verify calling context */ 848 ASSERT(MDI_PHCI(phci_dip)); 849 vdip = mdi_devi_get_vdip(phci_dip); 850 ASSERT(vdip); /* A pHCI always has a vHCI */ 851 852 if (ndi_devi_tryenter(vdip, &vcircular)) { 853 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 854 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 855 return (1); /* locked */ 856 } 857 ndi_devi_exit(vdip, vcircular); 858 } 859 return (0); /* busy */ 860 } 861 862 /* 863 * Release mdi_devi_enter or successful mdi_devi_tryenter. 864 */ 865 void 866 mdi_devi_exit(dev_info_t *phci_dip, int circular) 867 { 868 dev_info_t *vdip; 869 int vcircular, pcircular; 870 871 /* Verify calling context */ 872 ASSERT(MDI_PHCI(phci_dip)); 873 vdip = mdi_devi_get_vdip(phci_dip); 874 ASSERT(vdip); /* A pHCI always has a vHCI */ 875 876 /* extract two circular recursion values from single int */ 877 pcircular = (short)(circular & 0xFFFF); 878 vcircular = (short)((circular >> 16) & 0xFFFF); 879 880 ndi_devi_exit(phci_dip, pcircular); 881 if (vcircular != -1) 882 ndi_devi_exit(vdip, vcircular); 883 } 884 885 /* 886 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 887 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 888 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 889 * with vHCI power management code during path online/offline. Each 890 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 891 * occur within the scope of an active mdi_devi_enter that establishes the 892 * circular value. 893 */ 894 void 895 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 896 { 897 int pcircular; 898 899 /* Verify calling context */ 900 ASSERT(MDI_PHCI(phci_dip)); 901 902 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 903 ndi_hold_devi(phci_dip); 904 905 pcircular = (short)(circular & 0xFFFF); 906 ndi_devi_exit(phci_dip, pcircular); 907 } 908 909 void 910 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 911 { 912 int pcircular; 913 914 /* Verify calling context */ 915 ASSERT(MDI_PHCI(phci_dip)); 916 917 ndi_devi_enter(phci_dip, &pcircular); 918 919 /* Drop hold from mdi_devi_exit_phci. */ 920 ndi_rele_devi(phci_dip); 921 922 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 923 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 924 } 925 926 /* 927 * mdi_devi_get_vdip(): 928 * given a pHCI dip return vHCI dip 929 */ 930 dev_info_t * 931 mdi_devi_get_vdip(dev_info_t *pdip) 932 { 933 mdi_phci_t *ph; 934 935 ph = i_devi_get_phci(pdip); 936 if (ph && ph->ph_vhci) 937 return (ph->ph_vhci->vh_dip); 938 return (NULL); 939 } 940 941 /* 942 * mdi_devi_pdip_entered(): 943 * Return 1 if we are vHCI and have done an ndi_devi_enter 944 * of a pHCI 945 */ 946 int 947 mdi_devi_pdip_entered(dev_info_t *vdip) 948 { 949 mdi_vhci_t *vh; 950 mdi_phci_t *ph; 951 952 vh = i_devi_get_vhci(vdip); 953 if (vh == NULL) 954 return (0); 955 956 MDI_VHCI_PHCI_LOCK(vh); 957 ph = vh->vh_phci_head; 958 while (ph) { 959 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 960 MDI_VHCI_PHCI_UNLOCK(vh); 961 return (1); 962 } 963 ph = ph->ph_next; 964 } 965 MDI_VHCI_PHCI_UNLOCK(vh); 966 return (0); 967 } 968 969 /* 970 * mdi_phci_path2devinfo(): 971 * Utility function to search for a valid phci device given 972 * the devfs pathname. 973 */ 974 dev_info_t * 975 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 976 { 977 char *temp_pathname; 978 mdi_vhci_t *vh; 979 mdi_phci_t *ph; 980 dev_info_t *pdip = NULL; 981 982 vh = i_devi_get_vhci(vdip); 983 ASSERT(vh != NULL); 984 985 if (vh == NULL) { 986 /* 987 * Invalid vHCI component, return failure 988 */ 989 return (NULL); 990 } 991 992 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 993 MDI_VHCI_PHCI_LOCK(vh); 994 ph = vh->vh_phci_head; 995 while (ph != NULL) { 996 pdip = ph->ph_dip; 997 ASSERT(pdip != NULL); 998 *temp_pathname = '\0'; 999 (void) ddi_pathname(pdip, temp_pathname); 1000 if (strcmp(temp_pathname, pathname) == 0) { 1001 break; 1002 } 1003 ph = ph->ph_next; 1004 } 1005 if (ph == NULL) { 1006 pdip = NULL; 1007 } 1008 MDI_VHCI_PHCI_UNLOCK(vh); 1009 kmem_free(temp_pathname, MAXPATHLEN); 1010 return (pdip); 1011 } 1012 1013 /* 1014 * mdi_phci_get_path_count(): 1015 * get number of path information nodes associated with a given 1016 * pHCI device. 1017 */ 1018 int 1019 mdi_phci_get_path_count(dev_info_t *pdip) 1020 { 1021 mdi_phci_t *ph; 1022 int count = 0; 1023 1024 ph = i_devi_get_phci(pdip); 1025 if (ph != NULL) { 1026 count = ph->ph_path_count; 1027 } 1028 return (count); 1029 } 1030 1031 /* 1032 * i_mdi_phci_lock(): 1033 * Lock a pHCI device 1034 * Return Values: 1035 * None 1036 * Note: 1037 * The default locking order is: 1038 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1039 * But there are number of situations where locks need to be 1040 * grabbed in reverse order. This routine implements try and lock 1041 * mechanism depending on the requested parameter option. 1042 */ 1043 static void 1044 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1045 { 1046 if (pip) { 1047 /* Reverse locking is requested. */ 1048 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1049 if (servicing_interrupt()) { 1050 MDI_PI_HOLD(pip); 1051 MDI_PI_UNLOCK(pip); 1052 MDI_PHCI_LOCK(ph); 1053 MDI_PI_LOCK(pip); 1054 MDI_PI_RELE(pip); 1055 break; 1056 } else { 1057 /* 1058 * tryenter failed. Try to grab again 1059 * after a small delay 1060 */ 1061 MDI_PI_HOLD(pip); 1062 MDI_PI_UNLOCK(pip); 1063 delay_random(mdi_delay); 1064 MDI_PI_LOCK(pip); 1065 MDI_PI_RELE(pip); 1066 } 1067 } 1068 } else { 1069 MDI_PHCI_LOCK(ph); 1070 } 1071 } 1072 1073 /* 1074 * i_mdi_phci_unlock(): 1075 * Unlock the pHCI component 1076 */ 1077 static void 1078 i_mdi_phci_unlock(mdi_phci_t *ph) 1079 { 1080 MDI_PHCI_UNLOCK(ph); 1081 } 1082 1083 /* 1084 * i_mdi_devinfo_create(): 1085 * create client device's devinfo node 1086 * Return Values: 1087 * dev_info 1088 * NULL 1089 * Notes: 1090 */ 1091 static dev_info_t * 1092 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1093 char **compatible, int ncompatible) 1094 { 1095 dev_info_t *cdip = NULL; 1096 1097 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1098 1099 /* Verify for duplicate entry */ 1100 cdip = i_mdi_devinfo_find(vh, name, guid); 1101 ASSERT(cdip == NULL); 1102 if (cdip) { 1103 cmn_err(CE_WARN, 1104 "i_mdi_devinfo_create: client %s@%s already exists", 1105 name ? name : "", guid ? guid : ""); 1106 } 1107 1108 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1109 if (cdip == NULL) 1110 goto fail; 1111 1112 /* 1113 * Create component type and Global unique identifier 1114 * properties 1115 */ 1116 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1117 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1118 goto fail; 1119 } 1120 1121 /* Decorate the node with compatible property */ 1122 if (compatible && 1123 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1124 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1125 goto fail; 1126 } 1127 1128 return (cdip); 1129 1130 fail: 1131 if (cdip) { 1132 (void) ndi_prop_remove_all(cdip); 1133 (void) ndi_devi_free(cdip); 1134 } 1135 return (NULL); 1136 } 1137 1138 /* 1139 * i_mdi_devinfo_find(): 1140 * Find a matching devinfo node for given client node name 1141 * and its guid. 1142 * Return Values: 1143 * Handle to a dev_info node or NULL 1144 */ 1145 static dev_info_t * 1146 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1147 { 1148 char *data; 1149 dev_info_t *cdip = NULL; 1150 dev_info_t *ndip = NULL; 1151 int circular; 1152 1153 ndi_devi_enter(vh->vh_dip, &circular); 1154 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1155 while ((cdip = ndip) != NULL) { 1156 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1157 1158 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1159 continue; 1160 } 1161 1162 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1163 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1164 &data) != DDI_PROP_SUCCESS) { 1165 continue; 1166 } 1167 1168 if (strcmp(data, guid) != 0) { 1169 ddi_prop_free(data); 1170 continue; 1171 } 1172 ddi_prop_free(data); 1173 break; 1174 } 1175 ndi_devi_exit(vh->vh_dip, circular); 1176 return (cdip); 1177 } 1178 1179 /* 1180 * i_mdi_devinfo_remove(): 1181 * Remove a client device node 1182 */ 1183 static int 1184 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1185 { 1186 int rv = MDI_SUCCESS; 1187 1188 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1189 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1190 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1191 if (rv != NDI_SUCCESS) { 1192 MDI_DEBUG(1, (MDI_NOTE, cdip, 1193 "!failed: cdip %p", (void *)cdip)); 1194 } 1195 /* 1196 * Convert to MDI error code 1197 */ 1198 switch (rv) { 1199 case NDI_SUCCESS: 1200 rv = MDI_SUCCESS; 1201 break; 1202 case NDI_BUSY: 1203 rv = MDI_BUSY; 1204 break; 1205 default: 1206 rv = MDI_FAILURE; 1207 break; 1208 } 1209 } 1210 return (rv); 1211 } 1212 1213 /* 1214 * i_devi_get_client() 1215 * Utility function to get mpxio component extensions 1216 */ 1217 static mdi_client_t * 1218 i_devi_get_client(dev_info_t *cdip) 1219 { 1220 mdi_client_t *ct = NULL; 1221 1222 if (MDI_CLIENT(cdip)) { 1223 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1224 } 1225 return (ct); 1226 } 1227 1228 /* 1229 * i_mdi_is_child_present(): 1230 * Search for the presence of client device dev_info node 1231 */ 1232 static int 1233 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1234 { 1235 int rv = MDI_FAILURE; 1236 struct dev_info *dip; 1237 int circular; 1238 1239 ndi_devi_enter(vdip, &circular); 1240 dip = DEVI(vdip)->devi_child; 1241 while (dip) { 1242 if (dip == DEVI(cdip)) { 1243 rv = MDI_SUCCESS; 1244 break; 1245 } 1246 dip = dip->devi_sibling; 1247 } 1248 ndi_devi_exit(vdip, circular); 1249 return (rv); 1250 } 1251 1252 1253 /* 1254 * i_mdi_client_lock(): 1255 * Grab client component lock 1256 * Return Values: 1257 * None 1258 * Note: 1259 * The default locking order is: 1260 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1261 * But there are number of situations where locks need to be 1262 * grabbed in reverse order. This routine implements try and lock 1263 * mechanism depending on the requested parameter option. 1264 */ 1265 static void 1266 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1267 { 1268 if (pip) { 1269 /* 1270 * Reverse locking is requested. 1271 */ 1272 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1273 if (servicing_interrupt()) { 1274 MDI_PI_HOLD(pip); 1275 MDI_PI_UNLOCK(pip); 1276 MDI_CLIENT_LOCK(ct); 1277 MDI_PI_LOCK(pip); 1278 MDI_PI_RELE(pip); 1279 break; 1280 } else { 1281 /* 1282 * tryenter failed. Try to grab again 1283 * after a small delay 1284 */ 1285 MDI_PI_HOLD(pip); 1286 MDI_PI_UNLOCK(pip); 1287 delay_random(mdi_delay); 1288 MDI_PI_LOCK(pip); 1289 MDI_PI_RELE(pip); 1290 } 1291 } 1292 } else { 1293 MDI_CLIENT_LOCK(ct); 1294 } 1295 } 1296 1297 /* 1298 * i_mdi_client_unlock(): 1299 * Unlock a client component 1300 */ 1301 static void 1302 i_mdi_client_unlock(mdi_client_t *ct) 1303 { 1304 MDI_CLIENT_UNLOCK(ct); 1305 } 1306 1307 /* 1308 * i_mdi_client_alloc(): 1309 * Allocate and initialize a client structure. Caller should 1310 * hold the vhci client lock. 1311 * Return Values: 1312 * Handle to a client component 1313 */ 1314 /*ARGSUSED*/ 1315 static mdi_client_t * 1316 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1317 { 1318 mdi_client_t *ct; 1319 1320 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1321 1322 /* 1323 * Allocate and initialize a component structure. 1324 */ 1325 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1326 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1327 ct->ct_hnext = NULL; 1328 ct->ct_hprev = NULL; 1329 ct->ct_dip = NULL; 1330 ct->ct_vhci = vh; 1331 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1332 (void) strcpy(ct->ct_drvname, name); 1333 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1334 (void) strcpy(ct->ct_guid, lguid); 1335 ct->ct_cprivate = NULL; 1336 ct->ct_vprivate = NULL; 1337 ct->ct_flags = 0; 1338 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1339 MDI_CLIENT_LOCK(ct); 1340 MDI_CLIENT_SET_OFFLINE(ct); 1341 MDI_CLIENT_SET_DETACH(ct); 1342 MDI_CLIENT_SET_POWER_UP(ct); 1343 MDI_CLIENT_UNLOCK(ct); 1344 ct->ct_failover_flags = 0; 1345 ct->ct_failover_status = 0; 1346 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1347 ct->ct_unstable = 0; 1348 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1349 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1350 ct->ct_lb = vh->vh_lb; 1351 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1352 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1353 ct->ct_path_count = 0; 1354 ct->ct_path_head = NULL; 1355 ct->ct_path_tail = NULL; 1356 ct->ct_path_last = NULL; 1357 1358 /* 1359 * Add this client component to our client hash queue 1360 */ 1361 i_mdi_client_enlist_table(vh, ct); 1362 return (ct); 1363 } 1364 1365 /* 1366 * i_mdi_client_enlist_table(): 1367 * Attach the client device to the client hash table. Caller 1368 * should hold the vhci client lock. 1369 */ 1370 static void 1371 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1372 { 1373 int index; 1374 struct client_hash *head; 1375 1376 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1377 1378 index = i_mdi_get_hash_key(ct->ct_guid); 1379 head = &vh->vh_client_table[index]; 1380 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1381 head->ct_hash_head = ct; 1382 head->ct_hash_count++; 1383 vh->vh_client_count++; 1384 } 1385 1386 /* 1387 * i_mdi_client_delist_table(): 1388 * Attach the client device to the client hash table. 1389 * Caller should hold the vhci client lock. 1390 */ 1391 static void 1392 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1393 { 1394 int index; 1395 char *guid; 1396 struct client_hash *head; 1397 mdi_client_t *next; 1398 mdi_client_t *last; 1399 1400 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1401 1402 guid = ct->ct_guid; 1403 index = i_mdi_get_hash_key(guid); 1404 head = &vh->vh_client_table[index]; 1405 1406 last = NULL; 1407 next = (mdi_client_t *)head->ct_hash_head; 1408 while (next != NULL) { 1409 if (next == ct) { 1410 break; 1411 } 1412 last = next; 1413 next = next->ct_hnext; 1414 } 1415 1416 if (next) { 1417 head->ct_hash_count--; 1418 if (last == NULL) { 1419 head->ct_hash_head = ct->ct_hnext; 1420 } else { 1421 last->ct_hnext = ct->ct_hnext; 1422 } 1423 ct->ct_hnext = NULL; 1424 vh->vh_client_count--; 1425 } 1426 } 1427 1428 1429 /* 1430 * i_mdi_client_free(): 1431 * Free a client component 1432 */ 1433 static int 1434 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1435 { 1436 int rv = MDI_SUCCESS; 1437 int flags = ct->ct_flags; 1438 dev_info_t *cdip; 1439 dev_info_t *vdip; 1440 1441 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1442 1443 vdip = vh->vh_dip; 1444 cdip = ct->ct_dip; 1445 1446 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1447 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1448 DEVI(cdip)->devi_mdi_client = NULL; 1449 1450 /* 1451 * Clear out back ref. to dev_info_t node 1452 */ 1453 ct->ct_dip = NULL; 1454 1455 /* 1456 * Remove this client from our hash queue 1457 */ 1458 i_mdi_client_delist_table(vh, ct); 1459 1460 /* 1461 * Uninitialize and free the component 1462 */ 1463 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1464 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1465 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1466 cv_destroy(&ct->ct_failover_cv); 1467 cv_destroy(&ct->ct_unstable_cv); 1468 cv_destroy(&ct->ct_powerchange_cv); 1469 mutex_destroy(&ct->ct_mutex); 1470 kmem_free(ct, sizeof (*ct)); 1471 1472 if (cdip != NULL) { 1473 MDI_VHCI_CLIENT_UNLOCK(vh); 1474 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1475 MDI_VHCI_CLIENT_LOCK(vh); 1476 } 1477 return (rv); 1478 } 1479 1480 /* 1481 * i_mdi_client_find(): 1482 * Find the client structure corresponding to a given guid 1483 * Caller should hold the vhci client lock. 1484 */ 1485 static mdi_client_t * 1486 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1487 { 1488 int index; 1489 struct client_hash *head; 1490 mdi_client_t *ct; 1491 1492 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1493 1494 index = i_mdi_get_hash_key(guid); 1495 head = &vh->vh_client_table[index]; 1496 1497 ct = head->ct_hash_head; 1498 while (ct != NULL) { 1499 if (strcmp(ct->ct_guid, guid) == 0 && 1500 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1501 break; 1502 } 1503 ct = ct->ct_hnext; 1504 } 1505 return (ct); 1506 } 1507 1508 /* 1509 * i_mdi_client_update_state(): 1510 * Compute and update client device state 1511 * Notes: 1512 * A client device can be in any of three possible states: 1513 * 1514 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1515 * one online/standby paths. Can tolerate failures. 1516 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1517 * no alternate paths available as standby. A failure on the online 1518 * would result in loss of access to device data. 1519 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1520 * no paths available to access the device. 1521 */ 1522 static void 1523 i_mdi_client_update_state(mdi_client_t *ct) 1524 { 1525 int state; 1526 1527 ASSERT(MDI_CLIENT_LOCKED(ct)); 1528 state = i_mdi_client_compute_state(ct, NULL); 1529 MDI_CLIENT_SET_STATE(ct, state); 1530 } 1531 1532 /* 1533 * i_mdi_client_compute_state(): 1534 * Compute client device state 1535 * 1536 * mdi_phci_t * Pointer to pHCI structure which should 1537 * while computing the new value. Used by 1538 * i_mdi_phci_offline() to find the new 1539 * client state after DR of a pHCI. 1540 */ 1541 static int 1542 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1543 { 1544 int state; 1545 int online_count = 0; 1546 int standby_count = 0; 1547 mdi_pathinfo_t *pip, *next; 1548 1549 ASSERT(MDI_CLIENT_LOCKED(ct)); 1550 pip = ct->ct_path_head; 1551 while (pip != NULL) { 1552 MDI_PI_LOCK(pip); 1553 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1554 if (MDI_PI(pip)->pi_phci == ph) { 1555 MDI_PI_UNLOCK(pip); 1556 pip = next; 1557 continue; 1558 } 1559 1560 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1561 == MDI_PATHINFO_STATE_ONLINE) 1562 online_count++; 1563 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1564 == MDI_PATHINFO_STATE_STANDBY) 1565 standby_count++; 1566 MDI_PI_UNLOCK(pip); 1567 pip = next; 1568 } 1569 1570 if (online_count == 0) { 1571 if (standby_count == 0) { 1572 state = MDI_CLIENT_STATE_FAILED; 1573 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1574 "client state failed: ct = %p", (void *)ct)); 1575 } else if (standby_count == 1) { 1576 state = MDI_CLIENT_STATE_DEGRADED; 1577 } else { 1578 state = MDI_CLIENT_STATE_OPTIMAL; 1579 } 1580 } else if (online_count == 1) { 1581 if (standby_count == 0) { 1582 state = MDI_CLIENT_STATE_DEGRADED; 1583 } else { 1584 state = MDI_CLIENT_STATE_OPTIMAL; 1585 } 1586 } else { 1587 state = MDI_CLIENT_STATE_OPTIMAL; 1588 } 1589 return (state); 1590 } 1591 1592 /* 1593 * i_mdi_client2devinfo(): 1594 * Utility function 1595 */ 1596 dev_info_t * 1597 i_mdi_client2devinfo(mdi_client_t *ct) 1598 { 1599 return (ct->ct_dip); 1600 } 1601 1602 /* 1603 * mdi_client_path2_devinfo(): 1604 * Given the parent devinfo and child devfs pathname, search for 1605 * a valid devfs node handle. 1606 */ 1607 dev_info_t * 1608 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1609 { 1610 dev_info_t *cdip = NULL; 1611 dev_info_t *ndip = NULL; 1612 char *temp_pathname; 1613 int circular; 1614 1615 /* 1616 * Allocate temp buffer 1617 */ 1618 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1619 1620 /* 1621 * Lock parent against changes 1622 */ 1623 ndi_devi_enter(vdip, &circular); 1624 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1625 while ((cdip = ndip) != NULL) { 1626 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1627 1628 *temp_pathname = '\0'; 1629 (void) ddi_pathname(cdip, temp_pathname); 1630 if (strcmp(temp_pathname, pathname) == 0) { 1631 break; 1632 } 1633 } 1634 /* 1635 * Release devinfo lock 1636 */ 1637 ndi_devi_exit(vdip, circular); 1638 1639 /* 1640 * Free the temp buffer 1641 */ 1642 kmem_free(temp_pathname, MAXPATHLEN); 1643 return (cdip); 1644 } 1645 1646 /* 1647 * mdi_client_get_path_count(): 1648 * Utility function to get number of path information nodes 1649 * associated with a given client device. 1650 */ 1651 int 1652 mdi_client_get_path_count(dev_info_t *cdip) 1653 { 1654 mdi_client_t *ct; 1655 int count = 0; 1656 1657 ct = i_devi_get_client(cdip); 1658 if (ct != NULL) { 1659 count = ct->ct_path_count; 1660 } 1661 return (count); 1662 } 1663 1664 1665 /* 1666 * i_mdi_get_hash_key(): 1667 * Create a hash using strings as keys 1668 * 1669 */ 1670 static int 1671 i_mdi_get_hash_key(char *str) 1672 { 1673 uint32_t g, hash = 0; 1674 char *p; 1675 1676 for (p = str; *p != '\0'; p++) { 1677 g = *p; 1678 hash += g; 1679 } 1680 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1681 } 1682 1683 /* 1684 * mdi_get_lb_policy(): 1685 * Get current load balancing policy for a given client device 1686 */ 1687 client_lb_t 1688 mdi_get_lb_policy(dev_info_t *cdip) 1689 { 1690 client_lb_t lb = LOAD_BALANCE_NONE; 1691 mdi_client_t *ct; 1692 1693 ct = i_devi_get_client(cdip); 1694 if (ct != NULL) { 1695 lb = ct->ct_lb; 1696 } 1697 return (lb); 1698 } 1699 1700 /* 1701 * mdi_set_lb_region_size(): 1702 * Set current region size for the load-balance 1703 */ 1704 int 1705 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1706 { 1707 mdi_client_t *ct; 1708 int rv = MDI_FAILURE; 1709 1710 ct = i_devi_get_client(cdip); 1711 if (ct != NULL && ct->ct_lb_args != NULL) { 1712 ct->ct_lb_args->region_size = region_size; 1713 rv = MDI_SUCCESS; 1714 } 1715 return (rv); 1716 } 1717 1718 /* 1719 * mdi_Set_lb_policy(): 1720 * Set current load balancing policy for a given client device 1721 */ 1722 int 1723 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1724 { 1725 mdi_client_t *ct; 1726 int rv = MDI_FAILURE; 1727 1728 ct = i_devi_get_client(cdip); 1729 if (ct != NULL) { 1730 ct->ct_lb = lb; 1731 rv = MDI_SUCCESS; 1732 } 1733 return (rv); 1734 } 1735 1736 /* 1737 * mdi_failover(): 1738 * failover function called by the vHCI drivers to initiate 1739 * a failover operation. This is typically due to non-availability 1740 * of online paths to route I/O requests. Failover can be 1741 * triggered through user application also. 1742 * 1743 * The vHCI driver calls mdi_failover() to initiate a failover 1744 * operation. mdi_failover() calls back into the vHCI driver's 1745 * vo_failover() entry point to perform the actual failover 1746 * operation. The reason for requiring the vHCI driver to 1747 * initiate failover by calling mdi_failover(), instead of directly 1748 * executing vo_failover() itself, is to ensure that the mdi 1749 * framework can keep track of the client state properly. 1750 * Additionally, mdi_failover() provides as a convenience the 1751 * option of performing the failover operation synchronously or 1752 * asynchronously 1753 * 1754 * Upon successful completion of the failover operation, the 1755 * paths that were previously ONLINE will be in the STANDBY state, 1756 * and the newly activated paths will be in the ONLINE state. 1757 * 1758 * The flags modifier determines whether the activation is done 1759 * synchronously: MDI_FAILOVER_SYNC 1760 * Return Values: 1761 * MDI_SUCCESS 1762 * MDI_FAILURE 1763 * MDI_BUSY 1764 */ 1765 /*ARGSUSED*/ 1766 int 1767 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1768 { 1769 int rv; 1770 mdi_client_t *ct; 1771 1772 ct = i_devi_get_client(cdip); 1773 ASSERT(ct != NULL); 1774 if (ct == NULL) { 1775 /* cdip is not a valid client device. Nothing more to do. */ 1776 return (MDI_FAILURE); 1777 } 1778 1779 MDI_CLIENT_LOCK(ct); 1780 1781 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1782 /* A path to the client is being freed */ 1783 MDI_CLIENT_UNLOCK(ct); 1784 return (MDI_BUSY); 1785 } 1786 1787 1788 if (MDI_CLIENT_IS_FAILED(ct)) { 1789 /* 1790 * Client is in failed state. Nothing more to do. 1791 */ 1792 MDI_CLIENT_UNLOCK(ct); 1793 return (MDI_FAILURE); 1794 } 1795 1796 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1797 /* 1798 * Failover is already in progress; return BUSY 1799 */ 1800 MDI_CLIENT_UNLOCK(ct); 1801 return (MDI_BUSY); 1802 } 1803 /* 1804 * Make sure that mdi_pathinfo node state changes are processed. 1805 * We do not allow failovers to progress while client path state 1806 * changes are in progress 1807 */ 1808 if (ct->ct_unstable) { 1809 if (flags == MDI_FAILOVER_ASYNC) { 1810 MDI_CLIENT_UNLOCK(ct); 1811 return (MDI_BUSY); 1812 } else { 1813 while (ct->ct_unstable) 1814 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1815 } 1816 } 1817 1818 /* 1819 * Client device is in stable state. Before proceeding, perform sanity 1820 * checks again. 1821 */ 1822 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1823 (!i_ddi_devi_attached(cdip))) { 1824 /* 1825 * Client is in failed state. Nothing more to do. 1826 */ 1827 MDI_CLIENT_UNLOCK(ct); 1828 return (MDI_FAILURE); 1829 } 1830 1831 /* 1832 * Set the client state as failover in progress. 1833 */ 1834 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1835 ct->ct_failover_flags = flags; 1836 MDI_CLIENT_UNLOCK(ct); 1837 1838 if (flags == MDI_FAILOVER_ASYNC) { 1839 /* 1840 * Submit the initiate failover request via CPR safe 1841 * taskq threads. 1842 */ 1843 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1844 ct, KM_SLEEP); 1845 return (MDI_ACCEPT); 1846 } else { 1847 /* 1848 * Synchronous failover mode. Typically invoked from the user 1849 * land. 1850 */ 1851 rv = i_mdi_failover(ct); 1852 } 1853 return (rv); 1854 } 1855 1856 /* 1857 * i_mdi_failover(): 1858 * internal failover function. Invokes vHCI drivers failover 1859 * callback function and process the failover status 1860 * Return Values: 1861 * None 1862 * 1863 * Note: A client device in failover state can not be detached or freed. 1864 */ 1865 static int 1866 i_mdi_failover(void *arg) 1867 { 1868 int rv = MDI_SUCCESS; 1869 mdi_client_t *ct = (mdi_client_t *)arg; 1870 mdi_vhci_t *vh = ct->ct_vhci; 1871 1872 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1873 1874 if (vh->vh_ops->vo_failover != NULL) { 1875 /* 1876 * Call vHCI drivers callback routine 1877 */ 1878 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1879 ct->ct_failover_flags); 1880 } 1881 1882 MDI_CLIENT_LOCK(ct); 1883 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1884 1885 /* 1886 * Save the failover return status 1887 */ 1888 ct->ct_failover_status = rv; 1889 1890 /* 1891 * As a result of failover, client status would have been changed. 1892 * Update the client state and wake up anyone waiting on this client 1893 * device. 1894 */ 1895 i_mdi_client_update_state(ct); 1896 1897 cv_broadcast(&ct->ct_failover_cv); 1898 MDI_CLIENT_UNLOCK(ct); 1899 return (rv); 1900 } 1901 1902 /* 1903 * Load balancing is logical block. 1904 * IOs within the range described by region_size 1905 * would go on the same path. This would improve the 1906 * performance by cache-hit on some of the RAID devices. 1907 * Search only for online paths(At some point we 1908 * may want to balance across target ports). 1909 * If no paths are found then default to round-robin. 1910 */ 1911 static int 1912 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1913 { 1914 int path_index = -1; 1915 int online_path_count = 0; 1916 int online_nonpref_path_count = 0; 1917 int region_size = ct->ct_lb_args->region_size; 1918 mdi_pathinfo_t *pip; 1919 mdi_pathinfo_t *next; 1920 int preferred, path_cnt; 1921 1922 pip = ct->ct_path_head; 1923 while (pip) { 1924 MDI_PI_LOCK(pip); 1925 if (MDI_PI(pip)->pi_state == 1926 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1927 online_path_count++; 1928 } else if (MDI_PI(pip)->pi_state == 1929 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1930 online_nonpref_path_count++; 1931 } 1932 next = (mdi_pathinfo_t *) 1933 MDI_PI(pip)->pi_client_link; 1934 MDI_PI_UNLOCK(pip); 1935 pip = next; 1936 } 1937 /* if found any online/preferred then use this type */ 1938 if (online_path_count > 0) { 1939 path_cnt = online_path_count; 1940 preferred = 1; 1941 } else if (online_nonpref_path_count > 0) { 1942 path_cnt = online_nonpref_path_count; 1943 preferred = 0; 1944 } else { 1945 path_cnt = 0; 1946 } 1947 if (path_cnt) { 1948 path_index = (bp->b_blkno >> region_size) % path_cnt; 1949 pip = ct->ct_path_head; 1950 while (pip && path_index != -1) { 1951 MDI_PI_LOCK(pip); 1952 if (path_index == 0 && 1953 (MDI_PI(pip)->pi_state == 1954 MDI_PATHINFO_STATE_ONLINE) && 1955 MDI_PI(pip)->pi_preferred == preferred) { 1956 MDI_PI_HOLD(pip); 1957 MDI_PI_UNLOCK(pip); 1958 *ret_pip = pip; 1959 return (MDI_SUCCESS); 1960 } 1961 path_index --; 1962 next = (mdi_pathinfo_t *) 1963 MDI_PI(pip)->pi_client_link; 1964 MDI_PI_UNLOCK(pip); 1965 pip = next; 1966 } 1967 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1968 "lba %llx: path %s %p", 1969 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1970 } 1971 return (MDI_FAILURE); 1972 } 1973 1974 /* 1975 * mdi_select_path(): 1976 * select a path to access a client device. 1977 * 1978 * mdi_select_path() function is called by the vHCI drivers to 1979 * select a path to route the I/O request to. The caller passes 1980 * the block I/O data transfer structure ("buf") as one of the 1981 * parameters. The mpxio framework uses the buf structure 1982 * contents to maintain per path statistics (total I/O size / 1983 * count pending). If more than one online paths are available to 1984 * select, the framework automatically selects a suitable path 1985 * for routing I/O request. If a failover operation is active for 1986 * this client device the call shall be failed with MDI_BUSY error 1987 * code. 1988 * 1989 * By default this function returns a suitable path in online 1990 * state based on the current load balancing policy. Currently 1991 * we support LOAD_BALANCE_NONE (Previously selected online path 1992 * will continue to be used till the path is usable) and 1993 * LOAD_BALANCE_RR (Online paths will be selected in a round 1994 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 1995 * based on the logical block). The load balancing 1996 * through vHCI drivers configuration file (driver.conf). 1997 * 1998 * vHCI drivers may override this default behavior by specifying 1999 * appropriate flags. The meaning of the thrid argument depends 2000 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2001 * then the argument is the "path instance" of the path to select. 2002 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2003 * "start_pip". A non NULL "start_pip" is the starting point to 2004 * walk and find the next appropriate path. The following values 2005 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2006 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2007 * STANDBY path). 2008 * 2009 * The non-standard behavior is used by the scsi_vhci driver, 2010 * whenever it has to use a STANDBY/FAULTED path. Eg. during 2011 * attach of client devices (to avoid an unnecessary failover 2012 * when the STANDBY path comes up first), during failover 2013 * (to activate a STANDBY path as ONLINE). 2014 * 2015 * The selected path is returned in a a mdi_hold_path() state 2016 * (pi_ref_cnt). Caller should release the hold by calling 2017 * mdi_rele_path(). 2018 * 2019 * Return Values: 2020 * MDI_SUCCESS - Completed successfully 2021 * MDI_BUSY - Client device is busy failing over 2022 * MDI_NOPATH - Client device is online, but no valid path are 2023 * available to access this client device 2024 * MDI_FAILURE - Invalid client device or state 2025 * MDI_DEVI_ONLINING 2026 * - Client device (struct dev_info state) is in 2027 * onlining state. 2028 */ 2029 2030 /*ARGSUSED*/ 2031 int 2032 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2033 void *arg, mdi_pathinfo_t **ret_pip) 2034 { 2035 mdi_client_t *ct; 2036 mdi_pathinfo_t *pip; 2037 mdi_pathinfo_t *next; 2038 mdi_pathinfo_t *head; 2039 mdi_pathinfo_t *start; 2040 client_lb_t lbp; /* load balancing policy */ 2041 int sb = 1; /* standard behavior */ 2042 int preferred = 1; /* preferred path */ 2043 int cond, cont = 1; 2044 int retry = 0; 2045 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2046 int path_instance; /* request specific path instance */ 2047 2048 /* determine type of arg based on flags */ 2049 if (flags & MDI_SELECT_PATH_INSTANCE) { 2050 path_instance = (int)(intptr_t)arg; 2051 start_pip = NULL; 2052 } else { 2053 path_instance = 0; 2054 start_pip = (mdi_pathinfo_t *)arg; 2055 } 2056 2057 if (flags != 0) { 2058 /* 2059 * disable default behavior 2060 */ 2061 sb = 0; 2062 } 2063 2064 *ret_pip = NULL; 2065 ct = i_devi_get_client(cdip); 2066 if (ct == NULL) { 2067 /* mdi extensions are NULL, Nothing more to do */ 2068 return (MDI_FAILURE); 2069 } 2070 2071 MDI_CLIENT_LOCK(ct); 2072 2073 if (sb) { 2074 if (MDI_CLIENT_IS_FAILED(ct)) { 2075 /* 2076 * Client is not ready to accept any I/O requests. 2077 * Fail this request. 2078 */ 2079 MDI_DEBUG(2, (MDI_NOTE, cdip, 2080 "client state offline ct = %p", (void *)ct)); 2081 MDI_CLIENT_UNLOCK(ct); 2082 return (MDI_FAILURE); 2083 } 2084 2085 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2086 /* 2087 * Check for Failover is in progress. If so tell the 2088 * caller that this device is busy. 2089 */ 2090 MDI_DEBUG(2, (MDI_NOTE, cdip, 2091 "client failover in progress ct = %p", 2092 (void *)ct)); 2093 MDI_CLIENT_UNLOCK(ct); 2094 return (MDI_BUSY); 2095 } 2096 2097 /* 2098 * Check to see whether the client device is attached. 2099 * If not so, let the vHCI driver manually select a path 2100 * (standby) and let the probe/attach process to continue. 2101 */ 2102 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2103 MDI_DEBUG(4, (MDI_NOTE, cdip, 2104 "devi is onlining ct = %p", (void *)ct)); 2105 MDI_CLIENT_UNLOCK(ct); 2106 return (MDI_DEVI_ONLINING); 2107 } 2108 } 2109 2110 /* 2111 * Cache in the client list head. If head of the list is NULL 2112 * return MDI_NOPATH 2113 */ 2114 head = ct->ct_path_head; 2115 if (head == NULL) { 2116 MDI_CLIENT_UNLOCK(ct); 2117 return (MDI_NOPATH); 2118 } 2119 2120 /* Caller is specifying a specific pathinfo path by path_instance */ 2121 if (path_instance) { 2122 /* search for pathinfo with correct path_instance */ 2123 for (pip = head; 2124 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2125 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2126 ; 2127 2128 /* If path can't be selected then MDI_NOPATH is returned. */ 2129 if (pip == NULL) { 2130 MDI_CLIENT_UNLOCK(ct); 2131 return (MDI_NOPATH); 2132 } 2133 2134 /* 2135 * Verify state of path. When asked to select a specific 2136 * path_instance, we select the requested path in any 2137 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2138 * We don't however select paths where the pHCI has detached. 2139 * NOTE: last pathinfo node of an opened client device may 2140 * exist in an OFFLINE state after the pHCI associated with 2141 * that path has detached (but pi_phci will be NULL if that 2142 * has occurred). 2143 */ 2144 MDI_PI_LOCK(pip); 2145 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2146 (MDI_PI(pip)->pi_phci == NULL)) { 2147 MDI_PI_UNLOCK(pip); 2148 MDI_CLIENT_UNLOCK(ct); 2149 return (MDI_FAILURE); 2150 } 2151 2152 /* Return MDI_BUSY if we have a transient condition */ 2153 if (MDI_PI_IS_TRANSIENT(pip)) { 2154 MDI_PI_UNLOCK(pip); 2155 MDI_CLIENT_UNLOCK(ct); 2156 return (MDI_BUSY); 2157 } 2158 2159 /* 2160 * Return the path in hold state. Caller should release the 2161 * lock by calling mdi_rele_path() 2162 */ 2163 MDI_PI_HOLD(pip); 2164 MDI_PI_UNLOCK(pip); 2165 *ret_pip = pip; 2166 MDI_CLIENT_UNLOCK(ct); 2167 return (MDI_SUCCESS); 2168 } 2169 2170 /* 2171 * for non default behavior, bypass current 2172 * load balancing policy and always use LOAD_BALANCE_RR 2173 * except that the start point will be adjusted based 2174 * on the provided start_pip 2175 */ 2176 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2177 2178 switch (lbp) { 2179 case LOAD_BALANCE_NONE: 2180 /* 2181 * Load balancing is None or Alternate path mode 2182 * Start looking for a online mdi_pathinfo node starting from 2183 * last known selected path 2184 */ 2185 preferred = 1; 2186 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2187 if (pip == NULL) { 2188 pip = head; 2189 } 2190 start = pip; 2191 do { 2192 MDI_PI_LOCK(pip); 2193 /* 2194 * No need to explicitly check if the path is disabled. 2195 * Since we are checking for state == ONLINE and the 2196 * same variable is used for DISABLE/ENABLE information. 2197 */ 2198 if ((MDI_PI(pip)->pi_state == 2199 MDI_PATHINFO_STATE_ONLINE) && 2200 preferred == MDI_PI(pip)->pi_preferred) { 2201 /* 2202 * Return the path in hold state. Caller should 2203 * release the lock by calling mdi_rele_path() 2204 */ 2205 MDI_PI_HOLD(pip); 2206 MDI_PI_UNLOCK(pip); 2207 ct->ct_path_last = pip; 2208 *ret_pip = pip; 2209 MDI_CLIENT_UNLOCK(ct); 2210 return (MDI_SUCCESS); 2211 } 2212 2213 /* 2214 * Path is busy. 2215 */ 2216 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2217 MDI_PI_IS_TRANSIENT(pip)) 2218 retry = 1; 2219 /* 2220 * Keep looking for a next available online path 2221 */ 2222 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2223 if (next == NULL) { 2224 next = head; 2225 } 2226 MDI_PI_UNLOCK(pip); 2227 pip = next; 2228 if (start == pip && preferred) { 2229 preferred = 0; 2230 } else if (start == pip && !preferred) { 2231 cont = 0; 2232 } 2233 } while (cont); 2234 break; 2235 2236 case LOAD_BALANCE_LBA: 2237 /* 2238 * Make sure we are looking 2239 * for an online path. Otherwise, if it is for a STANDBY 2240 * path request, it will go through and fetch an ONLINE 2241 * path which is not desirable. 2242 */ 2243 if ((ct->ct_lb_args != NULL) && 2244 (ct->ct_lb_args->region_size) && bp && 2245 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2246 if (i_mdi_lba_lb(ct, ret_pip, bp) 2247 == MDI_SUCCESS) { 2248 MDI_CLIENT_UNLOCK(ct); 2249 return (MDI_SUCCESS); 2250 } 2251 } 2252 /* FALLTHROUGH */ 2253 case LOAD_BALANCE_RR: 2254 /* 2255 * Load balancing is Round Robin. Start looking for a online 2256 * mdi_pathinfo node starting from last known selected path 2257 * as the start point. If override flags are specified, 2258 * process accordingly. 2259 * If the search is already in effect(start_pip not null), 2260 * then lets just use the same path preference to continue the 2261 * traversal. 2262 */ 2263 2264 if (start_pip != NULL) { 2265 preferred = MDI_PI(start_pip)->pi_preferred; 2266 } else { 2267 preferred = 1; 2268 } 2269 2270 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2271 if (start == NULL) { 2272 pip = head; 2273 } else { 2274 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2275 if (pip == NULL) { 2276 if ( flags & MDI_SELECT_NO_PREFERRED) { 2277 /* 2278 * Return since we hit the end of list 2279 */ 2280 MDI_CLIENT_UNLOCK(ct); 2281 return (MDI_NOPATH); 2282 } 2283 2284 if (!sb) { 2285 if (preferred == 0) { 2286 /* 2287 * Looks like we have completed 2288 * the traversal as preferred 2289 * value is 0. Time to bail out. 2290 */ 2291 *ret_pip = NULL; 2292 MDI_CLIENT_UNLOCK(ct); 2293 return (MDI_NOPATH); 2294 } else { 2295 /* 2296 * Looks like we reached the 2297 * end of the list. Lets enable 2298 * traversal of non preferred 2299 * paths. 2300 */ 2301 preferred = 0; 2302 } 2303 } 2304 pip = head; 2305 } 2306 } 2307 start = pip; 2308 do { 2309 MDI_PI_LOCK(pip); 2310 if (sb) { 2311 cond = ((MDI_PI(pip)->pi_state == 2312 MDI_PATHINFO_STATE_ONLINE && 2313 MDI_PI(pip)->pi_preferred == 2314 preferred) ? 1 : 0); 2315 } else { 2316 if (flags == MDI_SELECT_ONLINE_PATH) { 2317 cond = ((MDI_PI(pip)->pi_state == 2318 MDI_PATHINFO_STATE_ONLINE && 2319 MDI_PI(pip)->pi_preferred == 2320 preferred) ? 1 : 0); 2321 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2322 cond = ((MDI_PI(pip)->pi_state == 2323 MDI_PATHINFO_STATE_STANDBY && 2324 MDI_PI(pip)->pi_preferred == 2325 preferred) ? 1 : 0); 2326 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2327 MDI_SELECT_STANDBY_PATH)) { 2328 cond = (((MDI_PI(pip)->pi_state == 2329 MDI_PATHINFO_STATE_ONLINE || 2330 (MDI_PI(pip)->pi_state == 2331 MDI_PATHINFO_STATE_STANDBY)) && 2332 MDI_PI(pip)->pi_preferred == 2333 preferred) ? 1 : 0); 2334 } else if (flags == 2335 (MDI_SELECT_STANDBY_PATH | 2336 MDI_SELECT_ONLINE_PATH | 2337 MDI_SELECT_USER_DISABLE_PATH)) { 2338 cond = (((MDI_PI(pip)->pi_state == 2339 MDI_PATHINFO_STATE_ONLINE || 2340 (MDI_PI(pip)->pi_state == 2341 MDI_PATHINFO_STATE_STANDBY) || 2342 (MDI_PI(pip)->pi_state == 2343 (MDI_PATHINFO_STATE_ONLINE| 2344 MDI_PATHINFO_STATE_USER_DISABLE)) || 2345 (MDI_PI(pip)->pi_state == 2346 (MDI_PATHINFO_STATE_STANDBY | 2347 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2348 MDI_PI(pip)->pi_preferred == 2349 preferred) ? 1 : 0); 2350 } else if (flags == 2351 (MDI_SELECT_STANDBY_PATH | 2352 MDI_SELECT_ONLINE_PATH | 2353 MDI_SELECT_NO_PREFERRED)) { 2354 cond = (((MDI_PI(pip)->pi_state == 2355 MDI_PATHINFO_STATE_ONLINE) || 2356 (MDI_PI(pip)->pi_state == 2357 MDI_PATHINFO_STATE_STANDBY)) 2358 ? 1 : 0); 2359 } else { 2360 cond = 0; 2361 } 2362 } 2363 /* 2364 * No need to explicitly check if the path is disabled. 2365 * Since we are checking for state == ONLINE and the 2366 * same variable is used for DISABLE/ENABLE information. 2367 */ 2368 if (cond) { 2369 /* 2370 * Return the path in hold state. Caller should 2371 * release the lock by calling mdi_rele_path() 2372 */ 2373 MDI_PI_HOLD(pip); 2374 MDI_PI_UNLOCK(pip); 2375 if (sb) 2376 ct->ct_path_last = pip; 2377 *ret_pip = pip; 2378 MDI_CLIENT_UNLOCK(ct); 2379 return (MDI_SUCCESS); 2380 } 2381 /* 2382 * Path is busy. 2383 */ 2384 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2385 MDI_PI_IS_TRANSIENT(pip)) 2386 retry = 1; 2387 2388 /* 2389 * Keep looking for a next available online path 2390 */ 2391 do_again: 2392 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2393 if (next == NULL) { 2394 if ( flags & MDI_SELECT_NO_PREFERRED) { 2395 /* 2396 * Bail out since we hit the end of list 2397 */ 2398 MDI_PI_UNLOCK(pip); 2399 break; 2400 } 2401 2402 if (!sb) { 2403 if (preferred == 1) { 2404 /* 2405 * Looks like we reached the 2406 * end of the list. Lets enable 2407 * traversal of non preferred 2408 * paths. 2409 */ 2410 preferred = 0; 2411 next = head; 2412 } else { 2413 /* 2414 * We have done both the passes 2415 * Preferred as well as for 2416 * Non-preferred. Bail out now. 2417 */ 2418 cont = 0; 2419 } 2420 } else { 2421 /* 2422 * Standard behavior case. 2423 */ 2424 next = head; 2425 } 2426 } 2427 MDI_PI_UNLOCK(pip); 2428 if (cont == 0) { 2429 break; 2430 } 2431 pip = next; 2432 2433 if (!sb) { 2434 /* 2435 * We need to handle the selection of 2436 * non-preferred path in the following 2437 * case: 2438 * 2439 * +------+ +------+ +------+ +-----+ 2440 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2441 * +------+ +------+ +------+ +-----+ 2442 * 2443 * If we start the search with B, we need to 2444 * skip beyond B to pick C which is non - 2445 * preferred in the second pass. The following 2446 * test, if true, will allow us to skip over 2447 * the 'start'(B in the example) to select 2448 * other non preferred elements. 2449 */ 2450 if ((start_pip != NULL) && (start_pip == pip) && 2451 (MDI_PI(start_pip)->pi_preferred 2452 != preferred)) { 2453 /* 2454 * try again after going past the start 2455 * pip 2456 */ 2457 MDI_PI_LOCK(pip); 2458 goto do_again; 2459 } 2460 } else { 2461 /* 2462 * Standard behavior case 2463 */ 2464 if (start == pip && preferred) { 2465 /* look for nonpreferred paths */ 2466 preferred = 0; 2467 } else if (start == pip && !preferred) { 2468 /* 2469 * Exit condition 2470 */ 2471 cont = 0; 2472 } 2473 } 2474 } while (cont); 2475 break; 2476 } 2477 2478 MDI_CLIENT_UNLOCK(ct); 2479 if (retry == 1) { 2480 return (MDI_BUSY); 2481 } else { 2482 return (MDI_NOPATH); 2483 } 2484 } 2485 2486 /* 2487 * For a client, return the next available path to any phci 2488 * 2489 * Note: 2490 * Caller should hold the branch's devinfo node to get a consistent 2491 * snap shot of the mdi_pathinfo nodes. 2492 * 2493 * Please note that even the list is stable the mdi_pathinfo 2494 * node state and properties are volatile. The caller should lock 2495 * and unlock the nodes by calling mdi_pi_lock() and 2496 * mdi_pi_unlock() functions to get a stable properties. 2497 * 2498 * If there is a need to use the nodes beyond the hold of the 2499 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2500 * need to be held against unexpected removal by calling 2501 * mdi_hold_path() and should be released by calling 2502 * mdi_rele_path() on completion. 2503 */ 2504 mdi_pathinfo_t * 2505 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2506 { 2507 mdi_client_t *ct; 2508 2509 if (!MDI_CLIENT(ct_dip)) 2510 return (NULL); 2511 2512 /* 2513 * Walk through client link 2514 */ 2515 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2516 ASSERT(ct != NULL); 2517 2518 if (pip == NULL) 2519 return ((mdi_pathinfo_t *)ct->ct_path_head); 2520 2521 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2522 } 2523 2524 /* 2525 * For a phci, return the next available path to any client 2526 * Note: ditto mdi_get_next_phci_path() 2527 */ 2528 mdi_pathinfo_t * 2529 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2530 { 2531 mdi_phci_t *ph; 2532 2533 if (!MDI_PHCI(ph_dip)) 2534 return (NULL); 2535 2536 /* 2537 * Walk through pHCI link 2538 */ 2539 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2540 ASSERT(ph != NULL); 2541 2542 if (pip == NULL) 2543 return ((mdi_pathinfo_t *)ph->ph_path_head); 2544 2545 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2546 } 2547 2548 /* 2549 * mdi_hold_path(): 2550 * Hold the mdi_pathinfo node against unwanted unexpected free. 2551 * Return Values: 2552 * None 2553 */ 2554 void 2555 mdi_hold_path(mdi_pathinfo_t *pip) 2556 { 2557 if (pip) { 2558 MDI_PI_LOCK(pip); 2559 MDI_PI_HOLD(pip); 2560 MDI_PI_UNLOCK(pip); 2561 } 2562 } 2563 2564 2565 /* 2566 * mdi_rele_path(): 2567 * Release the mdi_pathinfo node which was selected 2568 * through mdi_select_path() mechanism or manually held by 2569 * calling mdi_hold_path(). 2570 * Return Values: 2571 * None 2572 */ 2573 void 2574 mdi_rele_path(mdi_pathinfo_t *pip) 2575 { 2576 if (pip) { 2577 MDI_PI_LOCK(pip); 2578 MDI_PI_RELE(pip); 2579 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2580 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2581 } 2582 MDI_PI_UNLOCK(pip); 2583 } 2584 } 2585 2586 /* 2587 * mdi_pi_lock(): 2588 * Lock the mdi_pathinfo node. 2589 * Note: 2590 * The caller should release the lock by calling mdi_pi_unlock() 2591 */ 2592 void 2593 mdi_pi_lock(mdi_pathinfo_t *pip) 2594 { 2595 ASSERT(pip != NULL); 2596 if (pip) { 2597 MDI_PI_LOCK(pip); 2598 } 2599 } 2600 2601 2602 /* 2603 * mdi_pi_unlock(): 2604 * Unlock the mdi_pathinfo node. 2605 * Note: 2606 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2607 */ 2608 void 2609 mdi_pi_unlock(mdi_pathinfo_t *pip) 2610 { 2611 ASSERT(pip != NULL); 2612 if (pip) { 2613 MDI_PI_UNLOCK(pip); 2614 } 2615 } 2616 2617 /* 2618 * mdi_pi_find(): 2619 * Search the list of mdi_pathinfo nodes attached to the 2620 * pHCI/Client device node whose path address matches "paddr". 2621 * Returns a pointer to the mdi_pathinfo node if a matching node is 2622 * found. 2623 * Return Values: 2624 * mdi_pathinfo node handle 2625 * NULL 2626 * Notes: 2627 * Caller need not hold any locks to call this function. 2628 */ 2629 mdi_pathinfo_t * 2630 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2631 { 2632 mdi_phci_t *ph; 2633 mdi_vhci_t *vh; 2634 mdi_client_t *ct; 2635 mdi_pathinfo_t *pip = NULL; 2636 2637 MDI_DEBUG(2, (MDI_NOTE, pdip, 2638 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2639 if ((pdip == NULL) || (paddr == NULL)) { 2640 return (NULL); 2641 } 2642 ph = i_devi_get_phci(pdip); 2643 if (ph == NULL) { 2644 /* 2645 * Invalid pHCI device, Nothing more to do. 2646 */ 2647 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2648 return (NULL); 2649 } 2650 2651 vh = ph->ph_vhci; 2652 if (vh == NULL) { 2653 /* 2654 * Invalid vHCI device, Nothing more to do. 2655 */ 2656 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2657 return (NULL); 2658 } 2659 2660 /* 2661 * Look for pathinfo node identified by paddr. 2662 */ 2663 if (caddr == NULL) { 2664 /* 2665 * Find a mdi_pathinfo node under pHCI list for a matching 2666 * unit address. 2667 */ 2668 MDI_PHCI_LOCK(ph); 2669 if (MDI_PHCI_IS_OFFLINE(ph)) { 2670 MDI_DEBUG(2, (MDI_WARN, pdip, 2671 "offline phci %p", (void *)ph)); 2672 MDI_PHCI_UNLOCK(ph); 2673 return (NULL); 2674 } 2675 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2676 2677 while (pip != NULL) { 2678 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2679 break; 2680 } 2681 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2682 } 2683 MDI_PHCI_UNLOCK(ph); 2684 MDI_DEBUG(2, (MDI_NOTE, pdip, 2685 "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2686 return (pip); 2687 } 2688 2689 /* 2690 * XXX - Is the rest of the code in this function really necessary? 2691 * The consumers of mdi_pi_find() can search for the desired pathinfo 2692 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2693 * whether the search is based on the pathinfo nodes attached to 2694 * the pHCI or the client node, the result will be the same. 2695 */ 2696 2697 /* 2698 * Find the client device corresponding to 'caddr' 2699 */ 2700 MDI_VHCI_CLIENT_LOCK(vh); 2701 2702 /* 2703 * XXX - Passing NULL to the following function works as long as the 2704 * the client addresses (caddr) are unique per vhci basis. 2705 */ 2706 ct = i_mdi_client_find(vh, NULL, caddr); 2707 if (ct == NULL) { 2708 /* 2709 * Client not found, Obviously mdi_pathinfo node has not been 2710 * created yet. 2711 */ 2712 MDI_VHCI_CLIENT_UNLOCK(vh); 2713 MDI_DEBUG(2, (MDI_NOTE, pdip, 2714 "client not found for caddr @%s", caddr ? caddr : "")); 2715 return (NULL); 2716 } 2717 2718 /* 2719 * Hold the client lock and look for a mdi_pathinfo node with matching 2720 * pHCI and paddr 2721 */ 2722 MDI_CLIENT_LOCK(ct); 2723 2724 /* 2725 * Release the global mutex as it is no more needed. Note: We always 2726 * respect the locking order while acquiring. 2727 */ 2728 MDI_VHCI_CLIENT_UNLOCK(vh); 2729 2730 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2731 while (pip != NULL) { 2732 /* 2733 * Compare the unit address 2734 */ 2735 if ((MDI_PI(pip)->pi_phci == ph) && 2736 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2737 break; 2738 } 2739 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2740 } 2741 MDI_CLIENT_UNLOCK(ct); 2742 MDI_DEBUG(2, (MDI_NOTE, pdip, 2743 "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2744 return (pip); 2745 } 2746 2747 /* 2748 * mdi_pi_alloc(): 2749 * Allocate and initialize a new instance of a mdi_pathinfo node. 2750 * The mdi_pathinfo node returned by this function identifies a 2751 * unique device path is capable of having properties attached 2752 * and passed to mdi_pi_online() to fully attach and online the 2753 * path and client device node. 2754 * The mdi_pathinfo node returned by this function must be 2755 * destroyed using mdi_pi_free() if the path is no longer 2756 * operational or if the caller fails to attach a client device 2757 * node when calling mdi_pi_online(). The framework will not free 2758 * the resources allocated. 2759 * This function can be called from both interrupt and kernel 2760 * contexts. DDI_NOSLEEP flag should be used while calling 2761 * from interrupt contexts. 2762 * Return Values: 2763 * MDI_SUCCESS 2764 * MDI_FAILURE 2765 * MDI_NOMEM 2766 */ 2767 /*ARGSUSED*/ 2768 int 2769 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2770 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2771 { 2772 mdi_vhci_t *vh; 2773 mdi_phci_t *ph; 2774 mdi_client_t *ct; 2775 mdi_pathinfo_t *pip = NULL; 2776 dev_info_t *cdip; 2777 int rv = MDI_NOMEM; 2778 int path_allocated = 0; 2779 2780 MDI_DEBUG(2, (MDI_NOTE, pdip, 2781 "cname %s: caddr@%s paddr@%s", 2782 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2783 2784 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2785 ret_pip == NULL) { 2786 /* Nothing more to do */ 2787 return (MDI_FAILURE); 2788 } 2789 2790 *ret_pip = NULL; 2791 2792 /* No allocations on detaching pHCI */ 2793 if (DEVI_IS_DETACHING(pdip)) { 2794 /* Invalid pHCI device, return failure */ 2795 MDI_DEBUG(1, (MDI_WARN, pdip, 2796 "!detaching pHCI=%p", (void *)pdip)); 2797 return (MDI_FAILURE); 2798 } 2799 2800 ph = i_devi_get_phci(pdip); 2801 ASSERT(ph != NULL); 2802 if (ph == NULL) { 2803 /* Invalid pHCI device, return failure */ 2804 MDI_DEBUG(1, (MDI_WARN, pdip, 2805 "!invalid pHCI=%p", (void *)pdip)); 2806 return (MDI_FAILURE); 2807 } 2808 2809 MDI_PHCI_LOCK(ph); 2810 vh = ph->ph_vhci; 2811 if (vh == NULL) { 2812 /* Invalid vHCI device, return failure */ 2813 MDI_DEBUG(1, (MDI_WARN, pdip, 2814 "!invalid vHCI=%p", (void *)pdip)); 2815 MDI_PHCI_UNLOCK(ph); 2816 return (MDI_FAILURE); 2817 } 2818 2819 if (MDI_PHCI_IS_READY(ph) == 0) { 2820 /* 2821 * Do not allow new node creation when pHCI is in 2822 * offline/suspended states 2823 */ 2824 MDI_DEBUG(1, (MDI_WARN, pdip, 2825 "pHCI=%p is not ready", (void *)ph)); 2826 MDI_PHCI_UNLOCK(ph); 2827 return (MDI_BUSY); 2828 } 2829 MDI_PHCI_UNSTABLE(ph); 2830 MDI_PHCI_UNLOCK(ph); 2831 2832 /* look for a matching client, create one if not found */ 2833 MDI_VHCI_CLIENT_LOCK(vh); 2834 ct = i_mdi_client_find(vh, cname, caddr); 2835 if (ct == NULL) { 2836 ct = i_mdi_client_alloc(vh, cname, caddr); 2837 ASSERT(ct != NULL); 2838 } 2839 2840 if (ct->ct_dip == NULL) { 2841 /* 2842 * Allocate a devinfo node 2843 */ 2844 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2845 compatible, ncompatible); 2846 if (ct->ct_dip == NULL) { 2847 (void) i_mdi_client_free(vh, ct); 2848 goto fail; 2849 } 2850 } 2851 cdip = ct->ct_dip; 2852 2853 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2854 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2855 2856 MDI_CLIENT_LOCK(ct); 2857 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2858 while (pip != NULL) { 2859 /* 2860 * Compare the unit address 2861 */ 2862 if ((MDI_PI(pip)->pi_phci == ph) && 2863 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2864 break; 2865 } 2866 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2867 } 2868 MDI_CLIENT_UNLOCK(ct); 2869 2870 if (pip == NULL) { 2871 /* 2872 * This is a new path for this client device. Allocate and 2873 * initialize a new pathinfo node 2874 */ 2875 pip = i_mdi_pi_alloc(ph, paddr, ct); 2876 ASSERT(pip != NULL); 2877 path_allocated = 1; 2878 } 2879 rv = MDI_SUCCESS; 2880 2881 fail: 2882 /* 2883 * Release the global mutex. 2884 */ 2885 MDI_VHCI_CLIENT_UNLOCK(vh); 2886 2887 /* 2888 * Mark the pHCI as stable 2889 */ 2890 MDI_PHCI_LOCK(ph); 2891 MDI_PHCI_STABLE(ph); 2892 MDI_PHCI_UNLOCK(ph); 2893 *ret_pip = pip; 2894 2895 MDI_DEBUG(2, (MDI_NOTE, pdip, 2896 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2897 2898 if (path_allocated) 2899 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2900 2901 return (rv); 2902 } 2903 2904 /*ARGSUSED*/ 2905 int 2906 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2907 int flags, mdi_pathinfo_t **ret_pip) 2908 { 2909 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2910 flags, ret_pip)); 2911 } 2912 2913 /* 2914 * i_mdi_pi_alloc(): 2915 * Allocate a mdi_pathinfo node and add to the pHCI path list 2916 * Return Values: 2917 * mdi_pathinfo 2918 */ 2919 /*ARGSUSED*/ 2920 static mdi_pathinfo_t * 2921 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2922 { 2923 mdi_pathinfo_t *pip; 2924 int ct_circular; 2925 int ph_circular; 2926 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2927 char *path_persistent; 2928 int path_instance; 2929 mod_hash_val_t hv; 2930 2931 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2932 2933 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2934 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2935 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2936 MDI_PATHINFO_STATE_TRANSIENT; 2937 2938 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2939 MDI_PI_SET_USER_DISABLE(pip); 2940 2941 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2942 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2943 2944 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2945 MDI_PI_SET_DRV_DISABLE(pip); 2946 2947 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2948 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2949 MDI_PI(pip)->pi_client = ct; 2950 MDI_PI(pip)->pi_phci = ph; 2951 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2952 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2953 2954 /* 2955 * We form the "path" to the pathinfo node, and see if we have 2956 * already allocated a 'path_instance' for that "path". If so, 2957 * we use the already allocated 'path_instance'. If not, we 2958 * allocate a new 'path_instance' and associate it with a copy of 2959 * the "path" string (which is never freed). The association 2960 * between a 'path_instance' this "path" string persists until 2961 * reboot. 2962 */ 2963 mutex_enter(&mdi_pathmap_mutex); 2964 (void) ddi_pathname(ph->ph_dip, path); 2965 (void) sprintf(path + strlen(path), "/%s@%s", 2966 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2967 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2968 path_instance = (uint_t)(intptr_t)hv; 2969 } else { 2970 /* allocate a new 'path_instance' and persistent "path" */ 2971 path_instance = mdi_pathmap_instance++; 2972 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2973 (void) mod_hash_insert(mdi_pathmap_bypath, 2974 (mod_hash_key_t)path_persistent, 2975 (mod_hash_val_t)(intptr_t)path_instance); 2976 (void) mod_hash_insert(mdi_pathmap_byinstance, 2977 (mod_hash_key_t)(intptr_t)path_instance, 2978 (mod_hash_val_t)path_persistent); 2979 2980 /* create shortpath name */ 2981 (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2982 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2983 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2984 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2985 (void) mod_hash_insert(mdi_pathmap_sbyinstance, 2986 (mod_hash_key_t)(intptr_t)path_instance, 2987 (mod_hash_val_t)path_persistent); 2988 } 2989 mutex_exit(&mdi_pathmap_mutex); 2990 MDI_PI(pip)->pi_path_instance = path_instance; 2991 2992 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 2993 ASSERT(MDI_PI(pip)->pi_prop != NULL); 2994 MDI_PI(pip)->pi_pprivate = NULL; 2995 MDI_PI(pip)->pi_cprivate = NULL; 2996 MDI_PI(pip)->pi_vprivate = NULL; 2997 MDI_PI(pip)->pi_client_link = NULL; 2998 MDI_PI(pip)->pi_phci_link = NULL; 2999 MDI_PI(pip)->pi_ref_cnt = 0; 3000 MDI_PI(pip)->pi_kstats = NULL; 3001 MDI_PI(pip)->pi_preferred = 1; 3002 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3003 3004 /* 3005 * Lock both dev_info nodes against changes in parallel. 3006 * 3007 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3008 * This atypical operation is done to synchronize pathinfo nodes 3009 * during devinfo snapshot (see di_register_pip) by 'pretending' that 3010 * the pathinfo nodes are children of the Client. 3011 */ 3012 ndi_devi_enter(ct->ct_dip, &ct_circular); 3013 ndi_devi_enter(ph->ph_dip, &ph_circular); 3014 3015 i_mdi_phci_add_path(ph, pip); 3016 i_mdi_client_add_path(ct, pip); 3017 3018 ndi_devi_exit(ph->ph_dip, ph_circular); 3019 ndi_devi_exit(ct->ct_dip, ct_circular); 3020 3021 return (pip); 3022 } 3023 3024 /* 3025 * mdi_pi_pathname_by_instance(): 3026 * Lookup of "path" by 'path_instance'. Return "path". 3027 * NOTE: returned "path" remains valid forever (until reboot). 3028 */ 3029 char * 3030 mdi_pi_pathname_by_instance(int path_instance) 3031 { 3032 char *path; 3033 mod_hash_val_t hv; 3034 3035 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3036 mutex_enter(&mdi_pathmap_mutex); 3037 if (mod_hash_find(mdi_pathmap_byinstance, 3038 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3039 path = (char *)hv; 3040 else 3041 path = NULL; 3042 mutex_exit(&mdi_pathmap_mutex); 3043 return (path); 3044 } 3045 3046 /* 3047 * mdi_pi_spathname_by_instance(): 3048 * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3049 * NOTE: returned "shortpath" remains valid forever (until reboot). 3050 */ 3051 char * 3052 mdi_pi_spathname_by_instance(int path_instance) 3053 { 3054 char *path; 3055 mod_hash_val_t hv; 3056 3057 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3058 mutex_enter(&mdi_pathmap_mutex); 3059 if (mod_hash_find(mdi_pathmap_sbyinstance, 3060 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3061 path = (char *)hv; 3062 else 3063 path = NULL; 3064 mutex_exit(&mdi_pathmap_mutex); 3065 return (path); 3066 } 3067 3068 3069 /* 3070 * i_mdi_phci_add_path(): 3071 * Add a mdi_pathinfo node to pHCI list. 3072 * Notes: 3073 * Caller should per-pHCI mutex 3074 */ 3075 static void 3076 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3077 { 3078 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3079 3080 MDI_PHCI_LOCK(ph); 3081 if (ph->ph_path_head == NULL) { 3082 ph->ph_path_head = pip; 3083 } else { 3084 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3085 } 3086 ph->ph_path_tail = pip; 3087 ph->ph_path_count++; 3088 MDI_PHCI_UNLOCK(ph); 3089 } 3090 3091 /* 3092 * i_mdi_client_add_path(): 3093 * Add mdi_pathinfo node to client list 3094 */ 3095 static void 3096 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3097 { 3098 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3099 3100 MDI_CLIENT_LOCK(ct); 3101 if (ct->ct_path_head == NULL) { 3102 ct->ct_path_head = pip; 3103 } else { 3104 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3105 } 3106 ct->ct_path_tail = pip; 3107 ct->ct_path_count++; 3108 MDI_CLIENT_UNLOCK(ct); 3109 } 3110 3111 /* 3112 * mdi_pi_free(): 3113 * Free the mdi_pathinfo node and also client device node if this 3114 * is the last path to the device 3115 * Return Values: 3116 * MDI_SUCCESS 3117 * MDI_FAILURE 3118 * MDI_BUSY 3119 */ 3120 /*ARGSUSED*/ 3121 int 3122 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3123 { 3124 int rv; 3125 mdi_vhci_t *vh; 3126 mdi_phci_t *ph; 3127 mdi_client_t *ct; 3128 int (*f)(); 3129 int client_held = 0; 3130 3131 MDI_PI_LOCK(pip); 3132 ph = MDI_PI(pip)->pi_phci; 3133 ASSERT(ph != NULL); 3134 if (ph == NULL) { 3135 /* 3136 * Invalid pHCI device, return failure 3137 */ 3138 MDI_DEBUG(1, (MDI_WARN, NULL, 3139 "!invalid pHCI: pip %s %p", 3140 mdi_pi_spathname(pip), (void *)pip)); 3141 MDI_PI_UNLOCK(pip); 3142 return (MDI_FAILURE); 3143 } 3144 3145 vh = ph->ph_vhci; 3146 ASSERT(vh != NULL); 3147 if (vh == NULL) { 3148 /* Invalid pHCI device, return failure */ 3149 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3150 "!invalid vHCI: pip %s %p", 3151 mdi_pi_spathname(pip), (void *)pip)); 3152 MDI_PI_UNLOCK(pip); 3153 return (MDI_FAILURE); 3154 } 3155 3156 ct = MDI_PI(pip)->pi_client; 3157 ASSERT(ct != NULL); 3158 if (ct == NULL) { 3159 /* 3160 * Invalid Client device, return failure 3161 */ 3162 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3163 "!invalid client: pip %s %p", 3164 mdi_pi_spathname(pip), (void *)pip)); 3165 MDI_PI_UNLOCK(pip); 3166 return (MDI_FAILURE); 3167 } 3168 3169 /* 3170 * Check to see for busy condition. A mdi_pathinfo can only be freed 3171 * if the node state is either offline or init and the reference count 3172 * is zero. 3173 */ 3174 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3175 MDI_PI_IS_INITING(pip))) { 3176 /* 3177 * Node is busy 3178 */ 3179 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3180 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3181 MDI_PI_UNLOCK(pip); 3182 return (MDI_BUSY); 3183 } 3184 3185 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3186 /* 3187 * Give a chance for pending I/Os to complete. 3188 */ 3189 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3190 "!%d cmds still pending on path: %s %p", 3191 MDI_PI(pip)->pi_ref_cnt, 3192 mdi_pi_spathname(pip), (void *)pip)); 3193 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3194 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3195 TR_CLOCK_TICK) == -1) { 3196 /* 3197 * The timeout time reached without ref_cnt being zero 3198 * being signaled. 3199 */ 3200 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3201 "!Timeout reached on path %s %p without the cond", 3202 mdi_pi_spathname(pip), (void *)pip)); 3203 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3204 "!%d cmds still pending on path %s %p", 3205 MDI_PI(pip)->pi_ref_cnt, 3206 mdi_pi_spathname(pip), (void *)pip)); 3207 MDI_PI_UNLOCK(pip); 3208 return (MDI_BUSY); 3209 } 3210 } 3211 if (MDI_PI(pip)->pi_pm_held) { 3212 client_held = 1; 3213 } 3214 MDI_PI_UNLOCK(pip); 3215 3216 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3217 3218 MDI_CLIENT_LOCK(ct); 3219 3220 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3221 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3222 3223 /* 3224 * Wait till failover is complete before removing this node. 3225 */ 3226 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3227 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3228 3229 MDI_CLIENT_UNLOCK(ct); 3230 MDI_VHCI_CLIENT_LOCK(vh); 3231 MDI_CLIENT_LOCK(ct); 3232 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3233 3234 if (!MDI_PI_IS_INITING(pip)) { 3235 f = vh->vh_ops->vo_pi_uninit; 3236 if (f != NULL) { 3237 rv = (*f)(vh->vh_dip, pip, 0); 3238 } 3239 } else 3240 rv = MDI_SUCCESS; 3241 3242 /* 3243 * If vo_pi_uninit() completed successfully. 3244 */ 3245 if (rv == MDI_SUCCESS) { 3246 if (client_held) { 3247 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3248 "i_mdi_pm_rele_client\n")); 3249 i_mdi_pm_rele_client(ct, 1); 3250 } 3251 i_mdi_pi_free(ph, pip, ct); 3252 if (ct->ct_path_count == 0) { 3253 /* 3254 * Client lost its last path. 3255 * Clean up the client device 3256 */ 3257 MDI_CLIENT_UNLOCK(ct); 3258 (void) i_mdi_client_free(ct->ct_vhci, ct); 3259 MDI_VHCI_CLIENT_UNLOCK(vh); 3260 return (rv); 3261 } 3262 } 3263 MDI_CLIENT_UNLOCK(ct); 3264 MDI_VHCI_CLIENT_UNLOCK(vh); 3265 3266 if (rv == MDI_FAILURE) 3267 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3268 3269 return (rv); 3270 } 3271 3272 /* 3273 * i_mdi_pi_free(): 3274 * Free the mdi_pathinfo node 3275 */ 3276 static void 3277 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3278 { 3279 int ct_circular; 3280 int ph_circular; 3281 3282 ASSERT(MDI_CLIENT_LOCKED(ct)); 3283 3284 /* 3285 * remove any per-path kstats 3286 */ 3287 i_mdi_pi_kstat_destroy(pip); 3288 3289 /* See comments in i_mdi_pi_alloc() */ 3290 ndi_devi_enter(ct->ct_dip, &ct_circular); 3291 ndi_devi_enter(ph->ph_dip, &ph_circular); 3292 3293 i_mdi_client_remove_path(ct, pip); 3294 i_mdi_phci_remove_path(ph, pip); 3295 3296 ndi_devi_exit(ph->ph_dip, ph_circular); 3297 ndi_devi_exit(ct->ct_dip, ct_circular); 3298 3299 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3300 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3301 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3302 if (MDI_PI(pip)->pi_addr) { 3303 kmem_free(MDI_PI(pip)->pi_addr, 3304 strlen(MDI_PI(pip)->pi_addr) + 1); 3305 MDI_PI(pip)->pi_addr = NULL; 3306 } 3307 3308 if (MDI_PI(pip)->pi_prop) { 3309 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3310 MDI_PI(pip)->pi_prop = NULL; 3311 } 3312 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3313 } 3314 3315 3316 /* 3317 * i_mdi_phci_remove_path(): 3318 * Remove a mdi_pathinfo node from pHCI list. 3319 * Notes: 3320 * Caller should hold per-pHCI mutex 3321 */ 3322 static void 3323 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3324 { 3325 mdi_pathinfo_t *prev = NULL; 3326 mdi_pathinfo_t *path = NULL; 3327 3328 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3329 3330 MDI_PHCI_LOCK(ph); 3331 path = ph->ph_path_head; 3332 while (path != NULL) { 3333 if (path == pip) { 3334 break; 3335 } 3336 prev = path; 3337 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3338 } 3339 3340 if (path) { 3341 ph->ph_path_count--; 3342 if (prev) { 3343 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3344 } else { 3345 ph->ph_path_head = 3346 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3347 } 3348 if (ph->ph_path_tail == path) { 3349 ph->ph_path_tail = prev; 3350 } 3351 } 3352 3353 /* 3354 * Clear the pHCI link 3355 */ 3356 MDI_PI(pip)->pi_phci_link = NULL; 3357 MDI_PI(pip)->pi_phci = NULL; 3358 MDI_PHCI_UNLOCK(ph); 3359 } 3360 3361 /* 3362 * i_mdi_client_remove_path(): 3363 * Remove a mdi_pathinfo node from client path list. 3364 */ 3365 static void 3366 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3367 { 3368 mdi_pathinfo_t *prev = NULL; 3369 mdi_pathinfo_t *path; 3370 3371 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3372 3373 ASSERT(MDI_CLIENT_LOCKED(ct)); 3374 path = ct->ct_path_head; 3375 while (path != NULL) { 3376 if (path == pip) { 3377 break; 3378 } 3379 prev = path; 3380 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3381 } 3382 3383 if (path) { 3384 ct->ct_path_count--; 3385 if (prev) { 3386 MDI_PI(prev)->pi_client_link = 3387 MDI_PI(path)->pi_client_link; 3388 } else { 3389 ct->ct_path_head = 3390 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3391 } 3392 if (ct->ct_path_tail == path) { 3393 ct->ct_path_tail = prev; 3394 } 3395 if (ct->ct_path_last == path) { 3396 ct->ct_path_last = ct->ct_path_head; 3397 } 3398 } 3399 MDI_PI(pip)->pi_client_link = NULL; 3400 MDI_PI(pip)->pi_client = NULL; 3401 } 3402 3403 /* 3404 * i_mdi_pi_state_change(): 3405 * online a mdi_pathinfo node 3406 * 3407 * Return Values: 3408 * MDI_SUCCESS 3409 * MDI_FAILURE 3410 */ 3411 /*ARGSUSED*/ 3412 static int 3413 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3414 { 3415 int rv = MDI_SUCCESS; 3416 mdi_vhci_t *vh; 3417 mdi_phci_t *ph; 3418 mdi_client_t *ct; 3419 int (*f)(); 3420 dev_info_t *cdip; 3421 3422 MDI_PI_LOCK(pip); 3423 3424 ph = MDI_PI(pip)->pi_phci; 3425 ASSERT(ph); 3426 if (ph == NULL) { 3427 /* 3428 * Invalid pHCI device, fail the request 3429 */ 3430 MDI_PI_UNLOCK(pip); 3431 MDI_DEBUG(1, (MDI_WARN, NULL, 3432 "!invalid phci: pip %s %p", 3433 mdi_pi_spathname(pip), (void *)pip)); 3434 return (MDI_FAILURE); 3435 } 3436 3437 vh = ph->ph_vhci; 3438 ASSERT(vh); 3439 if (vh == NULL) { 3440 /* 3441 * Invalid vHCI device, fail the request 3442 */ 3443 MDI_PI_UNLOCK(pip); 3444 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3445 "!invalid vhci: pip %s %p", 3446 mdi_pi_spathname(pip), (void *)pip)); 3447 return (MDI_FAILURE); 3448 } 3449 3450 ct = MDI_PI(pip)->pi_client; 3451 ASSERT(ct != NULL); 3452 if (ct == NULL) { 3453 /* 3454 * Invalid client device, fail the request 3455 */ 3456 MDI_PI_UNLOCK(pip); 3457 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3458 "!invalid client: pip %s %p", 3459 mdi_pi_spathname(pip), (void *)pip)); 3460 return (MDI_FAILURE); 3461 } 3462 3463 /* 3464 * If this path has not been initialized yet, Callback vHCI driver's 3465 * pathinfo node initialize entry point 3466 */ 3467 3468 if (MDI_PI_IS_INITING(pip)) { 3469 MDI_PI_UNLOCK(pip); 3470 f = vh->vh_ops->vo_pi_init; 3471 if (f != NULL) { 3472 rv = (*f)(vh->vh_dip, pip, 0); 3473 if (rv != MDI_SUCCESS) { 3474 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3475 "!vo_pi_init failed: vHCI %p, pip %s %p", 3476 (void *)vh, mdi_pi_spathname(pip), 3477 (void *)pip)); 3478 return (MDI_FAILURE); 3479 } 3480 } 3481 MDI_PI_LOCK(pip); 3482 MDI_PI_CLEAR_TRANSIENT(pip); 3483 } 3484 3485 /* 3486 * Do not allow state transition when pHCI is in offline/suspended 3487 * states 3488 */ 3489 i_mdi_phci_lock(ph, pip); 3490 if (MDI_PHCI_IS_READY(ph) == 0) { 3491 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3492 "!pHCI not ready, pHCI=%p", (void *)ph)); 3493 MDI_PI_UNLOCK(pip); 3494 i_mdi_phci_unlock(ph); 3495 return (MDI_BUSY); 3496 } 3497 MDI_PHCI_UNSTABLE(ph); 3498 i_mdi_phci_unlock(ph); 3499 3500 /* 3501 * Check if mdi_pathinfo state is in transient state. 3502 * If yes, offlining is in progress and wait till transient state is 3503 * cleared. 3504 */ 3505 if (MDI_PI_IS_TRANSIENT(pip)) { 3506 while (MDI_PI_IS_TRANSIENT(pip)) { 3507 cv_wait(&MDI_PI(pip)->pi_state_cv, 3508 &MDI_PI(pip)->pi_mutex); 3509 } 3510 } 3511 3512 /* 3513 * Grab the client lock in reverse order sequence and release the 3514 * mdi_pathinfo mutex. 3515 */ 3516 i_mdi_client_lock(ct, pip); 3517 MDI_PI_UNLOCK(pip); 3518 3519 /* 3520 * Wait till failover state is cleared 3521 */ 3522 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3523 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3524 3525 /* 3526 * Mark the mdi_pathinfo node state as transient 3527 */ 3528 MDI_PI_LOCK(pip); 3529 switch (state) { 3530 case MDI_PATHINFO_STATE_ONLINE: 3531 MDI_PI_SET_ONLINING(pip); 3532 break; 3533 3534 case MDI_PATHINFO_STATE_STANDBY: 3535 MDI_PI_SET_STANDBYING(pip); 3536 break; 3537 3538 case MDI_PATHINFO_STATE_FAULT: 3539 /* 3540 * Mark the pathinfo state as FAULTED 3541 */ 3542 MDI_PI_SET_FAULTING(pip); 3543 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3544 break; 3545 3546 case MDI_PATHINFO_STATE_OFFLINE: 3547 /* 3548 * ndi_devi_offline() cannot hold pip or ct locks. 3549 */ 3550 MDI_PI_UNLOCK(pip); 3551 3552 /* 3553 * If this is a user initiated path online->offline operation 3554 * who's success would transition a client from DEGRADED to 3555 * FAILED then only proceed if we can offline the client first. 3556 */ 3557 cdip = ct->ct_dip; 3558 if ((flag & NDI_USER_REQ) && 3559 MDI_PI_IS_ONLINE(pip) && 3560 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3561 i_mdi_client_unlock(ct); 3562 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3563 if (rv != NDI_SUCCESS) { 3564 /* 3565 * Convert to MDI error code 3566 */ 3567 switch (rv) { 3568 case NDI_BUSY: 3569 rv = MDI_BUSY; 3570 break; 3571 default: 3572 rv = MDI_FAILURE; 3573 break; 3574 } 3575 goto state_change_exit; 3576 } else { 3577 i_mdi_client_lock(ct, NULL); 3578 } 3579 } 3580 /* 3581 * Mark the mdi_pathinfo node state as transient 3582 */ 3583 MDI_PI_LOCK(pip); 3584 MDI_PI_SET_OFFLINING(pip); 3585 break; 3586 } 3587 MDI_PI_UNLOCK(pip); 3588 MDI_CLIENT_UNSTABLE(ct); 3589 i_mdi_client_unlock(ct); 3590 3591 f = vh->vh_ops->vo_pi_state_change; 3592 if (f != NULL) 3593 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3594 3595 MDI_CLIENT_LOCK(ct); 3596 MDI_PI_LOCK(pip); 3597 if (rv == MDI_NOT_SUPPORTED) { 3598 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3599 } 3600 if (rv != MDI_SUCCESS) { 3601 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip, 3602 "vo_pi_state_change failed: rv %x", rv)); 3603 } 3604 if (MDI_PI_IS_TRANSIENT(pip)) { 3605 if (rv == MDI_SUCCESS) { 3606 MDI_PI_CLEAR_TRANSIENT(pip); 3607 } else { 3608 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3609 } 3610 } 3611 3612 /* 3613 * Wake anyone waiting for this mdi_pathinfo node 3614 */ 3615 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3616 MDI_PI_UNLOCK(pip); 3617 3618 /* 3619 * Mark the client device as stable 3620 */ 3621 MDI_CLIENT_STABLE(ct); 3622 if (rv == MDI_SUCCESS) { 3623 if (ct->ct_unstable == 0) { 3624 cdip = ct->ct_dip; 3625 3626 /* 3627 * Onlining the mdi_pathinfo node will impact the 3628 * client state Update the client and dev_info node 3629 * state accordingly 3630 */ 3631 rv = NDI_SUCCESS; 3632 i_mdi_client_update_state(ct); 3633 switch (MDI_CLIENT_STATE(ct)) { 3634 case MDI_CLIENT_STATE_OPTIMAL: 3635 case MDI_CLIENT_STATE_DEGRADED: 3636 if (cdip && !i_ddi_devi_attached(cdip) && 3637 ((state == MDI_PATHINFO_STATE_ONLINE) || 3638 (state == MDI_PATHINFO_STATE_STANDBY))) { 3639 3640 /* 3641 * Must do ndi_devi_online() through 3642 * hotplug thread for deferred 3643 * attach mechanism to work 3644 */ 3645 MDI_CLIENT_UNLOCK(ct); 3646 rv = ndi_devi_online(cdip, 0); 3647 MDI_CLIENT_LOCK(ct); 3648 if ((rv != NDI_SUCCESS) && 3649 (MDI_CLIENT_STATE(ct) == 3650 MDI_CLIENT_STATE_DEGRADED)) { 3651 MDI_DEBUG(1, (MDI_WARN, cdip, 3652 "!ndi_devi_online failed " 3653 "error %x", rv)); 3654 } 3655 rv = NDI_SUCCESS; 3656 } 3657 break; 3658 3659 case MDI_CLIENT_STATE_FAILED: 3660 /* 3661 * This is the last path case for 3662 * non-user initiated events. 3663 */ 3664 if ((flag & NDI_USER_REQ) || 3665 cdip == NULL || i_ddi_node_state(cdip) < 3666 DS_INITIALIZED) 3667 break; 3668 3669 MDI_CLIENT_UNLOCK(ct); 3670 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3671 MDI_CLIENT_LOCK(ct); 3672 3673 if (rv != NDI_SUCCESS) { 3674 /* 3675 * Reset client flags to online as the 3676 * path could not be offlined. 3677 */ 3678 MDI_DEBUG(1, (MDI_WARN, cdip, 3679 "!ndi_devi_offline failed: %d", 3680 rv)); 3681 MDI_CLIENT_SET_ONLINE(ct); 3682 } 3683 break; 3684 } 3685 /* 3686 * Convert to MDI error code 3687 */ 3688 switch (rv) { 3689 case NDI_SUCCESS: 3690 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3691 i_mdi_report_path_state(ct, pip); 3692 rv = MDI_SUCCESS; 3693 break; 3694 case NDI_BUSY: 3695 rv = MDI_BUSY; 3696 break; 3697 default: 3698 rv = MDI_FAILURE; 3699 break; 3700 } 3701 } 3702 } 3703 MDI_CLIENT_UNLOCK(ct); 3704 3705 state_change_exit: 3706 /* 3707 * Mark the pHCI as stable again. 3708 */ 3709 MDI_PHCI_LOCK(ph); 3710 MDI_PHCI_STABLE(ph); 3711 MDI_PHCI_UNLOCK(ph); 3712 return (rv); 3713 } 3714 3715 /* 3716 * mdi_pi_online(): 3717 * Place the path_info node in the online state. The path is 3718 * now available to be selected by mdi_select_path() for 3719 * transporting I/O requests to client devices. 3720 * Return Values: 3721 * MDI_SUCCESS 3722 * MDI_FAILURE 3723 */ 3724 int 3725 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3726 { 3727 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3728 int client_held = 0; 3729 int rv; 3730 3731 ASSERT(ct != NULL); 3732 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3733 if (rv != MDI_SUCCESS) 3734 return (rv); 3735 3736 MDI_PI_LOCK(pip); 3737 if (MDI_PI(pip)->pi_pm_held == 0) { 3738 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3739 "i_mdi_pm_hold_pip %p", (void *)pip)); 3740 i_mdi_pm_hold_pip(pip); 3741 client_held = 1; 3742 } 3743 MDI_PI_UNLOCK(pip); 3744 3745 if (client_held) { 3746 MDI_CLIENT_LOCK(ct); 3747 if (ct->ct_power_cnt == 0) { 3748 rv = i_mdi_power_all_phci(ct); 3749 } 3750 3751 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3752 "i_mdi_pm_hold_client %p", (void *)ct)); 3753 i_mdi_pm_hold_client(ct, 1); 3754 MDI_CLIENT_UNLOCK(ct); 3755 } 3756 3757 return (rv); 3758 } 3759 3760 /* 3761 * mdi_pi_standby(): 3762 * Place the mdi_pathinfo node in standby state 3763 * 3764 * Return Values: 3765 * MDI_SUCCESS 3766 * MDI_FAILURE 3767 */ 3768 int 3769 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3770 { 3771 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3772 } 3773 3774 /* 3775 * mdi_pi_fault(): 3776 * Place the mdi_pathinfo node in fault'ed state 3777 * Return Values: 3778 * MDI_SUCCESS 3779 * MDI_FAILURE 3780 */ 3781 int 3782 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3783 { 3784 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3785 } 3786 3787 /* 3788 * mdi_pi_offline(): 3789 * Offline a mdi_pathinfo node. 3790 * Return Values: 3791 * MDI_SUCCESS 3792 * MDI_FAILURE 3793 */ 3794 int 3795 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3796 { 3797 int ret, client_held = 0; 3798 mdi_client_t *ct; 3799 3800 /* 3801 * Original code overloaded NDI_DEVI_REMOVE to this interface, and 3802 * used it to mean "user initiated operation" (i.e. devctl). Callers 3803 * should now just use NDI_USER_REQ. 3804 */ 3805 if (flags & NDI_DEVI_REMOVE) { 3806 flags &= ~NDI_DEVI_REMOVE; 3807 flags |= NDI_USER_REQ; 3808 } 3809 3810 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3811 3812 if (ret == MDI_SUCCESS) { 3813 MDI_PI_LOCK(pip); 3814 if (MDI_PI(pip)->pi_pm_held) { 3815 client_held = 1; 3816 } 3817 MDI_PI_UNLOCK(pip); 3818 3819 if (client_held) { 3820 ct = MDI_PI(pip)->pi_client; 3821 MDI_CLIENT_LOCK(ct); 3822 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3823 "i_mdi_pm_rele_client\n")); 3824 i_mdi_pm_rele_client(ct, 1); 3825 MDI_CLIENT_UNLOCK(ct); 3826 } 3827 } 3828 3829 return (ret); 3830 } 3831 3832 /* 3833 * i_mdi_pi_offline(): 3834 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3835 */ 3836 static int 3837 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3838 { 3839 dev_info_t *vdip = NULL; 3840 mdi_vhci_t *vh = NULL; 3841 mdi_client_t *ct = NULL; 3842 int (*f)(); 3843 int rv; 3844 3845 MDI_PI_LOCK(pip); 3846 ct = MDI_PI(pip)->pi_client; 3847 ASSERT(ct != NULL); 3848 3849 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3850 /* 3851 * Give a chance for pending I/Os to complete. 3852 */ 3853 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3854 "!%d cmds still pending on path %s %p", 3855 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip), 3856 (void *)pip)); 3857 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3858 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3859 TR_CLOCK_TICK) == -1) { 3860 /* 3861 * The timeout time reached without ref_cnt being zero 3862 * being signaled. 3863 */ 3864 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3865 "!Timeout reached on path %s %p without the cond", 3866 mdi_pi_spathname(pip), (void *)pip)); 3867 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3868 "!%d cmds still pending on path %s %p", 3869 MDI_PI(pip)->pi_ref_cnt, 3870 mdi_pi_spathname(pip), (void *)pip)); 3871 } 3872 } 3873 vh = ct->ct_vhci; 3874 vdip = vh->vh_dip; 3875 3876 /* 3877 * Notify vHCI that has registered this event 3878 */ 3879 ASSERT(vh->vh_ops); 3880 f = vh->vh_ops->vo_pi_state_change; 3881 3882 if (f != NULL) { 3883 MDI_PI_UNLOCK(pip); 3884 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3885 flags)) != MDI_SUCCESS) { 3886 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3887 "!vo_path_offline failed: vdip %s%d %p: path %s %p", 3888 ddi_driver_name(vdip), ddi_get_instance(vdip), 3889 (void *)vdip, mdi_pi_spathname(pip), (void *)pip)); 3890 } 3891 MDI_PI_LOCK(pip); 3892 } 3893 3894 /* 3895 * Set the mdi_pathinfo node state and clear the transient condition 3896 */ 3897 MDI_PI_SET_OFFLINE(pip); 3898 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3899 MDI_PI_UNLOCK(pip); 3900 3901 MDI_CLIENT_LOCK(ct); 3902 if (rv == MDI_SUCCESS) { 3903 if (ct->ct_unstable == 0) { 3904 dev_info_t *cdip = ct->ct_dip; 3905 3906 /* 3907 * Onlining the mdi_pathinfo node will impact the 3908 * client state Update the client and dev_info node 3909 * state accordingly 3910 */ 3911 i_mdi_client_update_state(ct); 3912 rv = NDI_SUCCESS; 3913 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3914 if (cdip && 3915 (i_ddi_node_state(cdip) >= 3916 DS_INITIALIZED)) { 3917 MDI_CLIENT_UNLOCK(ct); 3918 rv = ndi_devi_offline(cdip, 3919 NDI_DEVFS_CLEAN); 3920 MDI_CLIENT_LOCK(ct); 3921 if (rv != NDI_SUCCESS) { 3922 /* 3923 * ndi_devi_offline failed. 3924 * Reset client flags to 3925 * online. 3926 */ 3927 MDI_DEBUG(4, (MDI_WARN, cdip, 3928 "ndi_devi_offline failed: " 3929 "error %x", rv)); 3930 MDI_CLIENT_SET_ONLINE(ct); 3931 } 3932 } 3933 } 3934 /* 3935 * Convert to MDI error code 3936 */ 3937 switch (rv) { 3938 case NDI_SUCCESS: 3939 rv = MDI_SUCCESS; 3940 break; 3941 case NDI_BUSY: 3942 rv = MDI_BUSY; 3943 break; 3944 default: 3945 rv = MDI_FAILURE; 3946 break; 3947 } 3948 } 3949 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3950 i_mdi_report_path_state(ct, pip); 3951 } 3952 3953 MDI_CLIENT_UNLOCK(ct); 3954 3955 /* 3956 * Change in the mdi_pathinfo node state will impact the client state 3957 */ 3958 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 3959 "ct = %p pip = %p", (void *)ct, (void *)pip)); 3960 return (rv); 3961 } 3962 3963 /* 3964 * i_mdi_pi_online(): 3965 * Online a mdi_pathinfo node and call the vHCI driver's callback 3966 */ 3967 static int 3968 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3969 { 3970 mdi_vhci_t *vh = NULL; 3971 mdi_client_t *ct = NULL; 3972 mdi_phci_t *ph; 3973 int (*f)(); 3974 int rv; 3975 3976 MDI_PI_LOCK(pip); 3977 ph = MDI_PI(pip)->pi_phci; 3978 vh = ph->ph_vhci; 3979 ct = MDI_PI(pip)->pi_client; 3980 MDI_PI_SET_ONLINING(pip) 3981 MDI_PI_UNLOCK(pip); 3982 f = vh->vh_ops->vo_pi_state_change; 3983 if (f != NULL) 3984 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0, 3985 flags); 3986 MDI_CLIENT_LOCK(ct); 3987 MDI_PI_LOCK(pip); 3988 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3989 MDI_PI_UNLOCK(pip); 3990 if (rv == MDI_SUCCESS) { 3991 dev_info_t *cdip = ct->ct_dip; 3992 3993 rv = MDI_SUCCESS; 3994 i_mdi_client_update_state(ct); 3995 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL || 3996 MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 3997 if (cdip && !i_ddi_devi_attached(cdip)) { 3998 MDI_CLIENT_UNLOCK(ct); 3999 rv = ndi_devi_online(cdip, 0); 4000 MDI_CLIENT_LOCK(ct); 4001 if ((rv != NDI_SUCCESS) && 4002 (MDI_CLIENT_STATE(ct) == 4003 MDI_CLIENT_STATE_DEGRADED)) { 4004 MDI_CLIENT_SET_OFFLINE(ct); 4005 } 4006 if (rv != NDI_SUCCESS) { 4007 /* Reset the path state */ 4008 MDI_PI_LOCK(pip); 4009 MDI_PI(pip)->pi_state = 4010 MDI_PI_OLD_STATE(pip); 4011 MDI_PI_UNLOCK(pip); 4012 } 4013 } 4014 } 4015 switch (rv) { 4016 case NDI_SUCCESS: 4017 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 4018 i_mdi_report_path_state(ct, pip); 4019 rv = MDI_SUCCESS; 4020 break; 4021 case NDI_BUSY: 4022 rv = MDI_BUSY; 4023 break; 4024 default: 4025 rv = MDI_FAILURE; 4026 break; 4027 } 4028 } else { 4029 /* Reset the path state */ 4030 MDI_PI_LOCK(pip); 4031 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 4032 MDI_PI_UNLOCK(pip); 4033 } 4034 MDI_CLIENT_UNLOCK(ct); 4035 return (rv); 4036 } 4037 4038 /* 4039 * mdi_pi_get_node_name(): 4040 * Get the name associated with a mdi_pathinfo node. 4041 * Since pathinfo nodes are not directly named, we 4042 * return the node_name of the client. 4043 * 4044 * Return Values: 4045 * char * 4046 */ 4047 char * 4048 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 4049 { 4050 mdi_client_t *ct; 4051 4052 if (pip == NULL) 4053 return (NULL); 4054 ct = MDI_PI(pip)->pi_client; 4055 if ((ct == NULL) || (ct->ct_dip == NULL)) 4056 return (NULL); 4057 return (ddi_node_name(ct->ct_dip)); 4058 } 4059 4060 /* 4061 * mdi_pi_get_addr(): 4062 * Get the unit address associated with a mdi_pathinfo node 4063 * 4064 * Return Values: 4065 * char * 4066 */ 4067 char * 4068 mdi_pi_get_addr(mdi_pathinfo_t *pip) 4069 { 4070 if (pip == NULL) 4071 return (NULL); 4072 4073 return (MDI_PI(pip)->pi_addr); 4074 } 4075 4076 /* 4077 * mdi_pi_get_path_instance(): 4078 * Get the 'path_instance' of a mdi_pathinfo node 4079 * 4080 * Return Values: 4081 * path_instance 4082 */ 4083 int 4084 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 4085 { 4086 if (pip == NULL) 4087 return (0); 4088 4089 return (MDI_PI(pip)->pi_path_instance); 4090 } 4091 4092 /* 4093 * mdi_pi_pathname(): 4094 * Return pointer to path to pathinfo node. 4095 */ 4096 char * 4097 mdi_pi_pathname(mdi_pathinfo_t *pip) 4098 { 4099 if (pip == NULL) 4100 return (NULL); 4101 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 4102 } 4103 4104 /* 4105 * mdi_pi_spathname(): 4106 * Return pointer to shortpath to pathinfo node. Used for debug 4107 * messages, so return "" instead of NULL when unknown. 4108 */ 4109 char * 4110 mdi_pi_spathname(mdi_pathinfo_t *pip) 4111 { 4112 char *spath = ""; 4113 4114 if (pip) { 4115 spath = mdi_pi_spathname_by_instance( 4116 mdi_pi_get_path_instance(pip)); 4117 if (spath == NULL) 4118 spath = ""; 4119 } 4120 return (spath); 4121 } 4122 4123 char * 4124 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 4125 { 4126 char *obp_path = NULL; 4127 if ((pip == NULL) || (path == NULL)) 4128 return (NULL); 4129 4130 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 4131 (void) strcpy(path, obp_path); 4132 (void) mdi_prop_free(obp_path); 4133 } else { 4134 path = NULL; 4135 } 4136 return (path); 4137 } 4138 4139 int 4140 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 4141 { 4142 dev_info_t *pdip; 4143 char *obp_path = NULL; 4144 int rc = MDI_FAILURE; 4145 4146 if (pip == NULL) 4147 return (MDI_FAILURE); 4148 4149 pdip = mdi_pi_get_phci(pip); 4150 if (pdip == NULL) 4151 return (MDI_FAILURE); 4152 4153 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4154 4155 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4156 (void) ddi_pathname(pdip, obp_path); 4157 } 4158 4159 if (component) { 4160 (void) strncat(obp_path, "/", MAXPATHLEN); 4161 (void) strncat(obp_path, component, MAXPATHLEN); 4162 } 4163 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4164 4165 if (obp_path) 4166 kmem_free(obp_path, MAXPATHLEN); 4167 return (rc); 4168 } 4169 4170 /* 4171 * mdi_pi_get_client(): 4172 * Get the client devinfo associated with a mdi_pathinfo node 4173 * 4174 * Return Values: 4175 * Handle to client device dev_info node 4176 */ 4177 dev_info_t * 4178 mdi_pi_get_client(mdi_pathinfo_t *pip) 4179 { 4180 dev_info_t *dip = NULL; 4181 if (pip) { 4182 dip = MDI_PI(pip)->pi_client->ct_dip; 4183 } 4184 return (dip); 4185 } 4186 4187 /* 4188 * mdi_pi_get_phci(): 4189 * Get the pHCI devinfo associated with the mdi_pathinfo node 4190 * Return Values: 4191 * Handle to dev_info node 4192 */ 4193 dev_info_t * 4194 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4195 { 4196 dev_info_t *dip = NULL; 4197 mdi_phci_t *ph; 4198 4199 if (pip) { 4200 ph = MDI_PI(pip)->pi_phci; 4201 if (ph) 4202 dip = ph->ph_dip; 4203 } 4204 return (dip); 4205 } 4206 4207 /* 4208 * mdi_pi_get_client_private(): 4209 * Get the client private information associated with the 4210 * mdi_pathinfo node 4211 */ 4212 void * 4213 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4214 { 4215 void *cprivate = NULL; 4216 if (pip) { 4217 cprivate = MDI_PI(pip)->pi_cprivate; 4218 } 4219 return (cprivate); 4220 } 4221 4222 /* 4223 * mdi_pi_set_client_private(): 4224 * Set the client private information in the mdi_pathinfo node 4225 */ 4226 void 4227 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4228 { 4229 if (pip) { 4230 MDI_PI(pip)->pi_cprivate = priv; 4231 } 4232 } 4233 4234 /* 4235 * mdi_pi_get_phci_private(): 4236 * Get the pHCI private information associated with the 4237 * mdi_pathinfo node 4238 */ 4239 caddr_t 4240 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4241 { 4242 caddr_t pprivate = NULL; 4243 4244 if (pip) { 4245 pprivate = MDI_PI(pip)->pi_pprivate; 4246 } 4247 return (pprivate); 4248 } 4249 4250 /* 4251 * mdi_pi_set_phci_private(): 4252 * Set the pHCI private information in the mdi_pathinfo node 4253 */ 4254 void 4255 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4256 { 4257 if (pip) { 4258 MDI_PI(pip)->pi_pprivate = priv; 4259 } 4260 } 4261 4262 /* 4263 * mdi_pi_get_state(): 4264 * Get the mdi_pathinfo node state. Transient states are internal 4265 * and not provided to the users 4266 */ 4267 mdi_pathinfo_state_t 4268 mdi_pi_get_state(mdi_pathinfo_t *pip) 4269 { 4270 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4271 4272 if (pip) { 4273 if (MDI_PI_IS_TRANSIENT(pip)) { 4274 /* 4275 * mdi_pathinfo is in state transition. Return the 4276 * last good state. 4277 */ 4278 state = MDI_PI_OLD_STATE(pip); 4279 } else { 4280 state = MDI_PI_STATE(pip); 4281 } 4282 } 4283 return (state); 4284 } 4285 4286 /* 4287 * mdi_pi_get_flags(): 4288 * Get the mdi_pathinfo node flags. 4289 */ 4290 uint_t 4291 mdi_pi_get_flags(mdi_pathinfo_t *pip) 4292 { 4293 return (pip ? MDI_PI(pip)->pi_flags : 0); 4294 } 4295 4296 /* 4297 * Note that the following function needs to be the new interface for 4298 * mdi_pi_get_state when mpxio gets integrated to ON. 4299 */ 4300 int 4301 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4302 uint32_t *ext_state) 4303 { 4304 *state = MDI_PATHINFO_STATE_INIT; 4305 4306 if (pip) { 4307 if (MDI_PI_IS_TRANSIENT(pip)) { 4308 /* 4309 * mdi_pathinfo is in state transition. Return the 4310 * last good state. 4311 */ 4312 *state = MDI_PI_OLD_STATE(pip); 4313 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4314 } else { 4315 *state = MDI_PI_STATE(pip); 4316 *ext_state = MDI_PI_EXT_STATE(pip); 4317 } 4318 } 4319 return (MDI_SUCCESS); 4320 } 4321 4322 /* 4323 * mdi_pi_get_preferred: 4324 * Get the preferred path flag 4325 */ 4326 int 4327 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4328 { 4329 if (pip) { 4330 return (MDI_PI(pip)->pi_preferred); 4331 } 4332 return (0); 4333 } 4334 4335 /* 4336 * mdi_pi_set_preferred: 4337 * Set the preferred path flag 4338 */ 4339 void 4340 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4341 { 4342 if (pip) { 4343 MDI_PI(pip)->pi_preferred = preferred; 4344 } 4345 } 4346 4347 /* 4348 * mdi_pi_set_state(): 4349 * Set the mdi_pathinfo node state 4350 */ 4351 void 4352 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4353 { 4354 uint32_t ext_state; 4355 4356 if (pip) { 4357 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4358 MDI_PI(pip)->pi_state = state; 4359 MDI_PI(pip)->pi_state |= ext_state; 4360 4361 /* Path has changed state, invalidate DINFOCACHE snap shot. */ 4362 i_ddi_di_cache_invalidate(); 4363 } 4364 } 4365 4366 /* 4367 * Property functions: 4368 */ 4369 int 4370 i_map_nvlist_error_to_mdi(int val) 4371 { 4372 int rv; 4373 4374 switch (val) { 4375 case 0: 4376 rv = DDI_PROP_SUCCESS; 4377 break; 4378 case EINVAL: 4379 case ENOTSUP: 4380 rv = DDI_PROP_INVAL_ARG; 4381 break; 4382 case ENOMEM: 4383 rv = DDI_PROP_NO_MEMORY; 4384 break; 4385 default: 4386 rv = DDI_PROP_NOT_FOUND; 4387 break; 4388 } 4389 return (rv); 4390 } 4391 4392 /* 4393 * mdi_pi_get_next_prop(): 4394 * Property walk function. The caller should hold mdi_pi_lock() 4395 * and release by calling mdi_pi_unlock() at the end of walk to 4396 * get a consistent value. 4397 */ 4398 nvpair_t * 4399 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4400 { 4401 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4402 return (NULL); 4403 } 4404 ASSERT(MDI_PI_LOCKED(pip)); 4405 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4406 } 4407 4408 /* 4409 * mdi_prop_remove(): 4410 * Remove the named property from the named list. 4411 */ 4412 int 4413 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4414 { 4415 if (pip == NULL) { 4416 return (DDI_PROP_NOT_FOUND); 4417 } 4418 ASSERT(!MDI_PI_LOCKED(pip)); 4419 MDI_PI_LOCK(pip); 4420 if (MDI_PI(pip)->pi_prop == NULL) { 4421 MDI_PI_UNLOCK(pip); 4422 return (DDI_PROP_NOT_FOUND); 4423 } 4424 if (name) { 4425 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4426 } else { 4427 char nvp_name[MAXNAMELEN]; 4428 nvpair_t *nvp; 4429 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4430 while (nvp) { 4431 nvpair_t *next; 4432 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4433 (void) snprintf(nvp_name, sizeof(nvp_name), "%s", 4434 nvpair_name(nvp)); 4435 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4436 nvp_name); 4437 nvp = next; 4438 } 4439 } 4440 MDI_PI_UNLOCK(pip); 4441 return (DDI_PROP_SUCCESS); 4442 } 4443 4444 /* 4445 * mdi_prop_size(): 4446 * Get buffer size needed to pack the property data. 4447 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4448 * buffer size. 4449 */ 4450 int 4451 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4452 { 4453 int rv; 4454 size_t bufsize; 4455 4456 *buflenp = 0; 4457 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4458 return (DDI_PROP_NOT_FOUND); 4459 } 4460 ASSERT(MDI_PI_LOCKED(pip)); 4461 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4462 &bufsize, NV_ENCODE_NATIVE); 4463 *buflenp = bufsize; 4464 return (i_map_nvlist_error_to_mdi(rv)); 4465 } 4466 4467 /* 4468 * mdi_prop_pack(): 4469 * pack the property list. The caller should hold the 4470 * mdi_pathinfo_t node to get a consistent data 4471 */ 4472 int 4473 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4474 { 4475 int rv; 4476 size_t bufsize; 4477 4478 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4479 return (DDI_PROP_NOT_FOUND); 4480 } 4481 4482 ASSERT(MDI_PI_LOCKED(pip)); 4483 4484 bufsize = buflen; 4485 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4486 NV_ENCODE_NATIVE, KM_SLEEP); 4487 4488 return (i_map_nvlist_error_to_mdi(rv)); 4489 } 4490 4491 /* 4492 * mdi_prop_update_byte(): 4493 * Create/Update a byte property 4494 */ 4495 int 4496 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4497 { 4498 int rv; 4499 4500 if (pip == NULL) { 4501 return (DDI_PROP_INVAL_ARG); 4502 } 4503 ASSERT(!MDI_PI_LOCKED(pip)); 4504 MDI_PI_LOCK(pip); 4505 if (MDI_PI(pip)->pi_prop == NULL) { 4506 MDI_PI_UNLOCK(pip); 4507 return (DDI_PROP_NOT_FOUND); 4508 } 4509 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4510 MDI_PI_UNLOCK(pip); 4511 return (i_map_nvlist_error_to_mdi(rv)); 4512 } 4513 4514 /* 4515 * mdi_prop_update_byte_array(): 4516 * Create/Update a byte array property 4517 */ 4518 int 4519 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4520 uint_t nelements) 4521 { 4522 int rv; 4523 4524 if (pip == NULL) { 4525 return (DDI_PROP_INVAL_ARG); 4526 } 4527 ASSERT(!MDI_PI_LOCKED(pip)); 4528 MDI_PI_LOCK(pip); 4529 if (MDI_PI(pip)->pi_prop == NULL) { 4530 MDI_PI_UNLOCK(pip); 4531 return (DDI_PROP_NOT_FOUND); 4532 } 4533 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4534 MDI_PI_UNLOCK(pip); 4535 return (i_map_nvlist_error_to_mdi(rv)); 4536 } 4537 4538 /* 4539 * mdi_prop_update_int(): 4540 * Create/Update a 32 bit integer property 4541 */ 4542 int 4543 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4544 { 4545 int rv; 4546 4547 if (pip == NULL) { 4548 return (DDI_PROP_INVAL_ARG); 4549 } 4550 ASSERT(!MDI_PI_LOCKED(pip)); 4551 MDI_PI_LOCK(pip); 4552 if (MDI_PI(pip)->pi_prop == NULL) { 4553 MDI_PI_UNLOCK(pip); 4554 return (DDI_PROP_NOT_FOUND); 4555 } 4556 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4557 MDI_PI_UNLOCK(pip); 4558 return (i_map_nvlist_error_to_mdi(rv)); 4559 } 4560 4561 /* 4562 * mdi_prop_update_int64(): 4563 * Create/Update a 64 bit integer property 4564 */ 4565 int 4566 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4567 { 4568 int rv; 4569 4570 if (pip == NULL) { 4571 return (DDI_PROP_INVAL_ARG); 4572 } 4573 ASSERT(!MDI_PI_LOCKED(pip)); 4574 MDI_PI_LOCK(pip); 4575 if (MDI_PI(pip)->pi_prop == NULL) { 4576 MDI_PI_UNLOCK(pip); 4577 return (DDI_PROP_NOT_FOUND); 4578 } 4579 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4580 MDI_PI_UNLOCK(pip); 4581 return (i_map_nvlist_error_to_mdi(rv)); 4582 } 4583 4584 /* 4585 * mdi_prop_update_int_array(): 4586 * Create/Update a int array property 4587 */ 4588 int 4589 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4590 uint_t nelements) 4591 { 4592 int rv; 4593 4594 if (pip == NULL) { 4595 return (DDI_PROP_INVAL_ARG); 4596 } 4597 ASSERT(!MDI_PI_LOCKED(pip)); 4598 MDI_PI_LOCK(pip); 4599 if (MDI_PI(pip)->pi_prop == NULL) { 4600 MDI_PI_UNLOCK(pip); 4601 return (DDI_PROP_NOT_FOUND); 4602 } 4603 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4604 nelements); 4605 MDI_PI_UNLOCK(pip); 4606 return (i_map_nvlist_error_to_mdi(rv)); 4607 } 4608 4609 /* 4610 * mdi_prop_update_string(): 4611 * Create/Update a string property 4612 */ 4613 int 4614 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4615 { 4616 int rv; 4617 4618 if (pip == NULL) { 4619 return (DDI_PROP_INVAL_ARG); 4620 } 4621 ASSERT(!MDI_PI_LOCKED(pip)); 4622 MDI_PI_LOCK(pip); 4623 if (MDI_PI(pip)->pi_prop == NULL) { 4624 MDI_PI_UNLOCK(pip); 4625 return (DDI_PROP_NOT_FOUND); 4626 } 4627 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4628 MDI_PI_UNLOCK(pip); 4629 return (i_map_nvlist_error_to_mdi(rv)); 4630 } 4631 4632 /* 4633 * mdi_prop_update_string_array(): 4634 * Create/Update a string array property 4635 */ 4636 int 4637 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4638 uint_t nelements) 4639 { 4640 int rv; 4641 4642 if (pip == NULL) { 4643 return (DDI_PROP_INVAL_ARG); 4644 } 4645 ASSERT(!MDI_PI_LOCKED(pip)); 4646 MDI_PI_LOCK(pip); 4647 if (MDI_PI(pip)->pi_prop == NULL) { 4648 MDI_PI_UNLOCK(pip); 4649 return (DDI_PROP_NOT_FOUND); 4650 } 4651 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4652 nelements); 4653 MDI_PI_UNLOCK(pip); 4654 return (i_map_nvlist_error_to_mdi(rv)); 4655 } 4656 4657 /* 4658 * mdi_prop_lookup_byte(): 4659 * Look for byte property identified by name. The data returned 4660 * is the actual property and valid as long as mdi_pathinfo_t node 4661 * is alive. 4662 */ 4663 int 4664 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4665 { 4666 int rv; 4667 4668 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4669 return (DDI_PROP_NOT_FOUND); 4670 } 4671 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4672 return (i_map_nvlist_error_to_mdi(rv)); 4673 } 4674 4675 4676 /* 4677 * mdi_prop_lookup_byte_array(): 4678 * Look for byte array property identified by name. The data 4679 * returned is the actual property and valid as long as 4680 * mdi_pathinfo_t node is alive. 4681 */ 4682 int 4683 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4684 uint_t *nelements) 4685 { 4686 int rv; 4687 4688 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4689 return (DDI_PROP_NOT_FOUND); 4690 } 4691 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4692 nelements); 4693 return (i_map_nvlist_error_to_mdi(rv)); 4694 } 4695 4696 /* 4697 * mdi_prop_lookup_int(): 4698 * Look for int property identified by name. The data returned 4699 * is the actual property and valid as long as mdi_pathinfo_t 4700 * node is alive. 4701 */ 4702 int 4703 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4704 { 4705 int rv; 4706 4707 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4708 return (DDI_PROP_NOT_FOUND); 4709 } 4710 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4711 return (i_map_nvlist_error_to_mdi(rv)); 4712 } 4713 4714 /* 4715 * mdi_prop_lookup_int64(): 4716 * Look for int64 property identified by name. The data returned 4717 * is the actual property and valid as long as mdi_pathinfo_t node 4718 * is alive. 4719 */ 4720 int 4721 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4722 { 4723 int rv; 4724 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4725 return (DDI_PROP_NOT_FOUND); 4726 } 4727 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4728 return (i_map_nvlist_error_to_mdi(rv)); 4729 } 4730 4731 /* 4732 * mdi_prop_lookup_int_array(): 4733 * Look for int array property identified by name. The data 4734 * returned is the actual property and valid as long as 4735 * mdi_pathinfo_t node is alive. 4736 */ 4737 int 4738 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4739 uint_t *nelements) 4740 { 4741 int rv; 4742 4743 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4744 return (DDI_PROP_NOT_FOUND); 4745 } 4746 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4747 (int32_t **)data, nelements); 4748 return (i_map_nvlist_error_to_mdi(rv)); 4749 } 4750 4751 /* 4752 * mdi_prop_lookup_string(): 4753 * Look for string property identified by name. The data 4754 * returned is the actual property and valid as long as 4755 * mdi_pathinfo_t node is alive. 4756 */ 4757 int 4758 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4759 { 4760 int rv; 4761 4762 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4763 return (DDI_PROP_NOT_FOUND); 4764 } 4765 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4766 return (i_map_nvlist_error_to_mdi(rv)); 4767 } 4768 4769 /* 4770 * mdi_prop_lookup_string_array(): 4771 * Look for string array property identified by name. The data 4772 * returned is the actual property and valid as long as 4773 * mdi_pathinfo_t node is alive. 4774 */ 4775 int 4776 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4777 uint_t *nelements) 4778 { 4779 int rv; 4780 4781 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4782 return (DDI_PROP_NOT_FOUND); 4783 } 4784 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4785 nelements); 4786 return (i_map_nvlist_error_to_mdi(rv)); 4787 } 4788 4789 /* 4790 * mdi_prop_free(): 4791 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4792 * functions return the pointer to actual property data and not a 4793 * copy of it. So the data returned is valid as long as 4794 * mdi_pathinfo_t node is valid. 4795 */ 4796 /*ARGSUSED*/ 4797 int 4798 mdi_prop_free(void *data) 4799 { 4800 return (DDI_PROP_SUCCESS); 4801 } 4802 4803 /*ARGSUSED*/ 4804 static void 4805 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4806 { 4807 char *ct_path; 4808 char *ct_status; 4809 char *status; 4810 dev_info_t *cdip = ct->ct_dip; 4811 char lb_buf[64]; 4812 int report_lb_c = 0, report_lb_p = 0; 4813 4814 ASSERT(MDI_CLIENT_LOCKED(ct)); 4815 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) || 4816 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4817 return; 4818 } 4819 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4820 ct_status = "optimal"; 4821 report_lb_c = 1; 4822 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4823 ct_status = "degraded"; 4824 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4825 ct_status = "failed"; 4826 } else { 4827 ct_status = "unknown"; 4828 } 4829 4830 lb_buf[0] = 0; /* not interested in load balancing config */ 4831 4832 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) { 4833 status = "removed"; 4834 } else if (MDI_PI_IS_OFFLINE(pip)) { 4835 status = "offline"; 4836 } else if (MDI_PI_IS_ONLINE(pip)) { 4837 status = "online"; 4838 report_lb_p = 1; 4839 } else if (MDI_PI_IS_STANDBY(pip)) { 4840 status = "standby"; 4841 } else if (MDI_PI_IS_FAULT(pip)) { 4842 status = "faulted"; 4843 } else { 4844 status = "unknown"; 4845 } 4846 4847 if (cdip) { 4848 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4849 4850 /* 4851 * NOTE: Keeping "multipath status: %s" and 4852 * "Load balancing: %s" format unchanged in case someone 4853 * scrubs /var/adm/messages looking for these messages. 4854 */ 4855 if (report_lb_c && report_lb_p) { 4856 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4857 (void) snprintf(lb_buf, sizeof (lb_buf), 4858 "%s, region-size: %d", mdi_load_balance_lba, 4859 ct->ct_lb_args->region_size); 4860 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4861 (void) snprintf(lb_buf, sizeof (lb_buf), 4862 "%s", mdi_load_balance_none); 4863 } else { 4864 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4865 mdi_load_balance_rr); 4866 } 4867 4868 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4869 "?%s (%s%d) multipath status: %s: " 4870 "path %d %s is %s: Load balancing: %s\n", 4871 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4872 ddi_get_instance(cdip), ct_status, 4873 mdi_pi_get_path_instance(pip), 4874 mdi_pi_spathname(pip), status, lb_buf); 4875 } else { 4876 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4877 "?%s (%s%d) multipath status: %s: " 4878 "path %d %s is %s\n", 4879 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4880 ddi_get_instance(cdip), ct_status, 4881 mdi_pi_get_path_instance(pip), 4882 mdi_pi_spathname(pip), status); 4883 } 4884 4885 kmem_free(ct_path, MAXPATHLEN); 4886 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4887 } 4888 } 4889 4890 #ifdef DEBUG 4891 /* 4892 * i_mdi_log(): 4893 * Utility function for error message management 4894 * 4895 * NOTE: Implementation takes care of trailing \n for cmn_err, 4896 * MDI_DEBUG should not terminate fmt strings with \n. 4897 * 4898 * NOTE: If the level is >= 2, and there is no leading !?^ 4899 * then a leading ! is implied (but can be overriden via 4900 * mdi_debug_consoleonly). If you are using kmdb on the console, 4901 * consider setting mdi_debug_consoleonly to 1 as an aid. 4902 */ 4903 /*PRINTFLIKE4*/ 4904 static void 4905 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...) 4906 { 4907 char name[MAXNAMELEN]; 4908 char buf[512]; 4909 char *bp; 4910 va_list ap; 4911 int log_only = 0; 4912 int boot_only = 0; 4913 int console_only = 0; 4914 4915 if (dip) { 4916 (void) snprintf(name, sizeof(name), "%s%d: ", 4917 ddi_driver_name(dip), ddi_get_instance(dip)); 4918 } else { 4919 name[0] = 0; 4920 } 4921 4922 va_start(ap, fmt); 4923 (void) vsnprintf(buf, sizeof(buf), fmt, ap); 4924 va_end(ap); 4925 4926 switch (buf[0]) { 4927 case '!': 4928 bp = &buf[1]; 4929 log_only = 1; 4930 break; 4931 case '?': 4932 bp = &buf[1]; 4933 boot_only = 1; 4934 break; 4935 case '^': 4936 bp = &buf[1]; 4937 console_only = 1; 4938 break; 4939 default: 4940 if (level >= 2) 4941 log_only = 1; /* ! implied */ 4942 bp = buf; 4943 break; 4944 } 4945 if (mdi_debug_logonly) { 4946 log_only = 1; 4947 boot_only = 0; 4948 console_only = 0; 4949 } 4950 if (mdi_debug_consoleonly) { 4951 log_only = 0; 4952 boot_only = 0; 4953 console_only = 1; 4954 level = CE_NOTE; 4955 goto console; 4956 } 4957 4958 switch (level) { 4959 case CE_NOTE: 4960 level = CE_CONT; 4961 /* FALLTHROUGH */ 4962 case CE_CONT: 4963 if (boot_only) { 4964 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp); 4965 } else if (console_only) { 4966 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp); 4967 } else if (log_only) { 4968 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp); 4969 } else { 4970 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp); 4971 } 4972 break; 4973 4974 case CE_WARN: 4975 case CE_PANIC: 4976 console: 4977 if (boot_only) { 4978 cmn_err(level, "?mdi: %s%s: %s", name, func, bp); 4979 } else if (console_only) { 4980 cmn_err(level, "^mdi: %s%s: %s", name, func, bp); 4981 } else if (log_only) { 4982 cmn_err(level, "!mdi: %s%s: %s", name, func, bp); 4983 } else { 4984 cmn_err(level, "mdi: %s%s: %s", name, func, bp); 4985 } 4986 break; 4987 default: 4988 cmn_err(level, "mdi: %s%s", name, bp); 4989 break; 4990 } 4991 } 4992 #endif /* DEBUG */ 4993 4994 void 4995 i_mdi_client_online(dev_info_t *ct_dip) 4996 { 4997 mdi_client_t *ct; 4998 4999 /* 5000 * Client online notification. Mark client state as online 5001 * restore our binding with dev_info node 5002 */ 5003 ct = i_devi_get_client(ct_dip); 5004 ASSERT(ct != NULL); 5005 MDI_CLIENT_LOCK(ct); 5006 MDI_CLIENT_SET_ONLINE(ct); 5007 /* catch for any memory leaks */ 5008 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 5009 ct->ct_dip = ct_dip; 5010 5011 if (ct->ct_power_cnt == 0) 5012 (void) i_mdi_power_all_phci(ct); 5013 5014 MDI_DEBUG(4, (MDI_NOTE, ct_dip, 5015 "i_mdi_pm_hold_client %p", (void *)ct)); 5016 i_mdi_pm_hold_client(ct, 1); 5017 5018 MDI_CLIENT_UNLOCK(ct); 5019 } 5020 5021 void 5022 i_mdi_phci_online(dev_info_t *ph_dip) 5023 { 5024 mdi_phci_t *ph; 5025 5026 /* pHCI online notification. Mark state accordingly */ 5027 ph = i_devi_get_phci(ph_dip); 5028 ASSERT(ph != NULL); 5029 MDI_PHCI_LOCK(ph); 5030 MDI_PHCI_SET_ONLINE(ph); 5031 MDI_PHCI_UNLOCK(ph); 5032 } 5033 5034 /* 5035 * mdi_devi_online(): 5036 * Online notification from NDI framework on pHCI/client 5037 * device online. 5038 * Return Values: 5039 * NDI_SUCCESS 5040 * MDI_FAILURE 5041 */ 5042 /*ARGSUSED*/ 5043 int 5044 mdi_devi_online(dev_info_t *dip, uint_t flags) 5045 { 5046 if (MDI_PHCI(dip)) { 5047 i_mdi_phci_online(dip); 5048 } 5049 5050 if (MDI_CLIENT(dip)) { 5051 i_mdi_client_online(dip); 5052 } 5053 return (NDI_SUCCESS); 5054 } 5055 5056 /* 5057 * mdi_devi_offline(): 5058 * Offline notification from NDI framework on pHCI/Client device 5059 * offline. 5060 * 5061 * Return Values: 5062 * NDI_SUCCESS 5063 * NDI_FAILURE 5064 */ 5065 /*ARGSUSED*/ 5066 int 5067 mdi_devi_offline(dev_info_t *dip, uint_t flags) 5068 { 5069 int rv = NDI_SUCCESS; 5070 5071 if (MDI_CLIENT(dip)) { 5072 rv = i_mdi_client_offline(dip, flags); 5073 if (rv != NDI_SUCCESS) 5074 return (rv); 5075 } 5076 5077 if (MDI_PHCI(dip)) { 5078 rv = i_mdi_phci_offline(dip, flags); 5079 5080 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 5081 /* set client back online */ 5082 i_mdi_client_online(dip); 5083 } 5084 } 5085 5086 return (rv); 5087 } 5088 5089 /*ARGSUSED*/ 5090 static int 5091 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 5092 { 5093 int rv = NDI_SUCCESS; 5094 mdi_phci_t *ph; 5095 mdi_client_t *ct; 5096 mdi_pathinfo_t *pip; 5097 mdi_pathinfo_t *next; 5098 mdi_pathinfo_t *failed_pip = NULL; 5099 dev_info_t *cdip; 5100 5101 /* 5102 * pHCI component offline notification 5103 * Make sure that this pHCI instance is free to be offlined. 5104 * If it is OK to proceed, Offline and remove all the child 5105 * mdi_pathinfo nodes. This process automatically offlines 5106 * corresponding client devices, for which this pHCI provides 5107 * critical services. 5108 */ 5109 ph = i_devi_get_phci(dip); 5110 MDI_DEBUG(2, (MDI_NOTE, dip, 5111 "called %p %p", (void *)dip, (void *)ph)); 5112 if (ph == NULL) { 5113 return (rv); 5114 } 5115 5116 MDI_PHCI_LOCK(ph); 5117 5118 if (MDI_PHCI_IS_OFFLINE(ph)) { 5119 MDI_DEBUG(1, (MDI_WARN, dip, 5120 "!pHCI already offlined: %p", (void *)dip)); 5121 MDI_PHCI_UNLOCK(ph); 5122 return (NDI_SUCCESS); 5123 } 5124 5125 /* 5126 * Check to see if the pHCI can be offlined 5127 */ 5128 if (ph->ph_unstable) { 5129 MDI_DEBUG(1, (MDI_WARN, dip, 5130 "!One or more target devices are in transient state. " 5131 "This device can not be removed at this moment. " 5132 "Please try again later.")); 5133 MDI_PHCI_UNLOCK(ph); 5134 return (NDI_BUSY); 5135 } 5136 5137 pip = ph->ph_path_head; 5138 while (pip != NULL) { 5139 MDI_PI_LOCK(pip); 5140 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5141 5142 /* 5143 * The mdi_pathinfo state is OK. Check the client state. 5144 * If failover in progress fail the pHCI from offlining 5145 */ 5146 ct = MDI_PI(pip)->pi_client; 5147 i_mdi_client_lock(ct, pip); 5148 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5149 (ct->ct_unstable)) { 5150 /* 5151 * Failover is in progress, Fail the DR 5152 */ 5153 MDI_DEBUG(1, (MDI_WARN, dip, 5154 "!pHCI device is busy. " 5155 "This device can not be removed at this moment. " 5156 "Please try again later.")); 5157 MDI_PI_UNLOCK(pip); 5158 i_mdi_client_unlock(ct); 5159 MDI_PHCI_UNLOCK(ph); 5160 return (NDI_BUSY); 5161 } 5162 MDI_PI_UNLOCK(pip); 5163 5164 /* 5165 * Check to see of we are removing the last path of this 5166 * client device... 5167 */ 5168 cdip = ct->ct_dip; 5169 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5170 (i_mdi_client_compute_state(ct, ph) == 5171 MDI_CLIENT_STATE_FAILED)) { 5172 i_mdi_client_unlock(ct); 5173 MDI_PHCI_UNLOCK(ph); 5174 if (ndi_devi_offline(cdip, 5175 NDI_DEVFS_CLEAN) != NDI_SUCCESS) { 5176 /* 5177 * ndi_devi_offline() failed. 5178 * This pHCI provides the critical path 5179 * to one or more client devices. 5180 * Return busy. 5181 */ 5182 MDI_PHCI_LOCK(ph); 5183 MDI_DEBUG(1, (MDI_WARN, dip, 5184 "!pHCI device is busy. " 5185 "This device can not be removed at this " 5186 "moment. Please try again later.")); 5187 failed_pip = pip; 5188 break; 5189 } else { 5190 MDI_PHCI_LOCK(ph); 5191 pip = next; 5192 } 5193 } else { 5194 i_mdi_client_unlock(ct); 5195 pip = next; 5196 } 5197 } 5198 5199 if (failed_pip) { 5200 pip = ph->ph_path_head; 5201 while (pip != failed_pip) { 5202 MDI_PI_LOCK(pip); 5203 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5204 ct = MDI_PI(pip)->pi_client; 5205 i_mdi_client_lock(ct, pip); 5206 cdip = ct->ct_dip; 5207 switch (MDI_CLIENT_STATE(ct)) { 5208 case MDI_CLIENT_STATE_OPTIMAL: 5209 case MDI_CLIENT_STATE_DEGRADED: 5210 if (cdip) { 5211 MDI_PI_UNLOCK(pip); 5212 i_mdi_client_unlock(ct); 5213 MDI_PHCI_UNLOCK(ph); 5214 (void) ndi_devi_online(cdip, 0); 5215 MDI_PHCI_LOCK(ph); 5216 pip = next; 5217 continue; 5218 } 5219 break; 5220 5221 case MDI_CLIENT_STATE_FAILED: 5222 if (cdip) { 5223 MDI_PI_UNLOCK(pip); 5224 i_mdi_client_unlock(ct); 5225 MDI_PHCI_UNLOCK(ph); 5226 (void) ndi_devi_offline(cdip, 5227 NDI_DEVFS_CLEAN); 5228 MDI_PHCI_LOCK(ph); 5229 pip = next; 5230 continue; 5231 } 5232 break; 5233 } 5234 MDI_PI_UNLOCK(pip); 5235 i_mdi_client_unlock(ct); 5236 pip = next; 5237 } 5238 MDI_PHCI_UNLOCK(ph); 5239 return (NDI_BUSY); 5240 } 5241 5242 /* 5243 * Mark the pHCI as offline 5244 */ 5245 MDI_PHCI_SET_OFFLINE(ph); 5246 5247 /* 5248 * Mark the child mdi_pathinfo nodes as transient 5249 */ 5250 pip = ph->ph_path_head; 5251 while (pip != NULL) { 5252 MDI_PI_LOCK(pip); 5253 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5254 MDI_PI_SET_OFFLINING(pip); 5255 MDI_PI_UNLOCK(pip); 5256 pip = next; 5257 } 5258 MDI_PHCI_UNLOCK(ph); 5259 /* 5260 * Give a chance for any pending commands to execute 5261 */ 5262 delay_random(mdi_delay); 5263 MDI_PHCI_LOCK(ph); 5264 pip = ph->ph_path_head; 5265 while (pip != NULL) { 5266 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5267 (void) i_mdi_pi_offline(pip, flags); 5268 MDI_PI_LOCK(pip); 5269 ct = MDI_PI(pip)->pi_client; 5270 if (!MDI_PI_IS_OFFLINE(pip)) { 5271 MDI_DEBUG(1, (MDI_WARN, dip, 5272 "!pHCI device is busy. " 5273 "This device can not be removed at this moment. " 5274 "Please try again later.")); 5275 MDI_PI_UNLOCK(pip); 5276 MDI_PHCI_SET_ONLINE(ph); 5277 MDI_PHCI_UNLOCK(ph); 5278 return (NDI_BUSY); 5279 } 5280 MDI_PI_UNLOCK(pip); 5281 pip = next; 5282 } 5283 MDI_PHCI_UNLOCK(ph); 5284 5285 return (rv); 5286 } 5287 5288 void 5289 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5290 { 5291 mdi_phci_t *ph; 5292 mdi_client_t *ct; 5293 mdi_pathinfo_t *pip; 5294 mdi_pathinfo_t *next; 5295 dev_info_t *cdip; 5296 5297 if (!MDI_PHCI(dip)) 5298 return; 5299 5300 ph = i_devi_get_phci(dip); 5301 if (ph == NULL) { 5302 return; 5303 } 5304 5305 MDI_PHCI_LOCK(ph); 5306 5307 if (MDI_PHCI_IS_OFFLINE(ph)) { 5308 /* has no last path */ 5309 MDI_PHCI_UNLOCK(ph); 5310 return; 5311 } 5312 5313 pip = ph->ph_path_head; 5314 while (pip != NULL) { 5315 MDI_PI_LOCK(pip); 5316 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5317 5318 ct = MDI_PI(pip)->pi_client; 5319 i_mdi_client_lock(ct, pip); 5320 MDI_PI_UNLOCK(pip); 5321 5322 cdip = ct->ct_dip; 5323 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5324 (i_mdi_client_compute_state(ct, ph) == 5325 MDI_CLIENT_STATE_FAILED)) { 5326 /* Last path. Mark client dip as retiring */ 5327 i_mdi_client_unlock(ct); 5328 MDI_PHCI_UNLOCK(ph); 5329 (void) e_ddi_mark_retiring(cdip, cons_array); 5330 MDI_PHCI_LOCK(ph); 5331 pip = next; 5332 } else { 5333 i_mdi_client_unlock(ct); 5334 pip = next; 5335 } 5336 } 5337 5338 MDI_PHCI_UNLOCK(ph); 5339 5340 return; 5341 } 5342 5343 void 5344 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5345 { 5346 mdi_phci_t *ph; 5347 mdi_client_t *ct; 5348 mdi_pathinfo_t *pip; 5349 mdi_pathinfo_t *next; 5350 dev_info_t *cdip; 5351 5352 if (!MDI_PHCI(dip)) 5353 return; 5354 5355 ph = i_devi_get_phci(dip); 5356 if (ph == NULL) 5357 return; 5358 5359 MDI_PHCI_LOCK(ph); 5360 5361 if (MDI_PHCI_IS_OFFLINE(ph)) { 5362 MDI_PHCI_UNLOCK(ph); 5363 /* not last path */ 5364 return; 5365 } 5366 5367 if (ph->ph_unstable) { 5368 MDI_PHCI_UNLOCK(ph); 5369 /* can't check for constraints */ 5370 *constraint = 0; 5371 return; 5372 } 5373 5374 pip = ph->ph_path_head; 5375 while (pip != NULL) { 5376 MDI_PI_LOCK(pip); 5377 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5378 5379 /* 5380 * The mdi_pathinfo state is OK. Check the client state. 5381 * If failover in progress fail the pHCI from offlining 5382 */ 5383 ct = MDI_PI(pip)->pi_client; 5384 i_mdi_client_lock(ct, pip); 5385 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5386 (ct->ct_unstable)) { 5387 /* 5388 * Failover is in progress, can't check for constraints 5389 */ 5390 MDI_PI_UNLOCK(pip); 5391 i_mdi_client_unlock(ct); 5392 MDI_PHCI_UNLOCK(ph); 5393 *constraint = 0; 5394 return; 5395 } 5396 MDI_PI_UNLOCK(pip); 5397 5398 /* 5399 * Check to see of we are retiring the last path of this 5400 * client device... 5401 */ 5402 cdip = ct->ct_dip; 5403 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5404 (i_mdi_client_compute_state(ct, ph) == 5405 MDI_CLIENT_STATE_FAILED)) { 5406 i_mdi_client_unlock(ct); 5407 MDI_PHCI_UNLOCK(ph); 5408 (void) e_ddi_retire_notify(cdip, constraint); 5409 MDI_PHCI_LOCK(ph); 5410 pip = next; 5411 } else { 5412 i_mdi_client_unlock(ct); 5413 pip = next; 5414 } 5415 } 5416 5417 MDI_PHCI_UNLOCK(ph); 5418 5419 return; 5420 } 5421 5422 /* 5423 * offline the path(s) hanging off the pHCI. If the 5424 * last path to any client, check that constraints 5425 * have been applied. 5426 * 5427 * If constraint is 0, we aren't going to retire the 5428 * pHCI. However we still need to go through the paths 5429 * calling e_ddi_retire_finalize() to clear their 5430 * contract barriers. 5431 */ 5432 void 5433 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint) 5434 { 5435 mdi_phci_t *ph; 5436 mdi_client_t *ct; 5437 mdi_pathinfo_t *pip; 5438 mdi_pathinfo_t *next; 5439 dev_info_t *cdip; 5440 int unstable = 0; 5441 int tmp_constraint; 5442 5443 if (!MDI_PHCI(dip)) 5444 return; 5445 5446 ph = i_devi_get_phci(dip); 5447 if (ph == NULL) { 5448 /* no last path and no pips */ 5449 return; 5450 } 5451 5452 MDI_PHCI_LOCK(ph); 5453 5454 if (MDI_PHCI_IS_OFFLINE(ph)) { 5455 MDI_PHCI_UNLOCK(ph); 5456 /* no last path and no pips */ 5457 return; 5458 } 5459 5460 /* 5461 * Check to see if the pHCI can be offlined 5462 */ 5463 if (ph->ph_unstable) { 5464 unstable = 1; 5465 } 5466 5467 pip = ph->ph_path_head; 5468 while (pip != NULL) { 5469 MDI_PI_LOCK(pip); 5470 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5471 5472 /* 5473 * if failover in progress fail the pHCI from offlining 5474 */ 5475 ct = MDI_PI(pip)->pi_client; 5476 i_mdi_client_lock(ct, pip); 5477 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5478 (ct->ct_unstable)) { 5479 unstable = 1; 5480 } 5481 MDI_PI_UNLOCK(pip); 5482 5483 /* 5484 * Check to see of we are removing the last path of this 5485 * client device... 5486 */ 5487 cdip = ct->ct_dip; 5488 if (!phci_only && cdip && 5489 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5490 (i_mdi_client_compute_state(ct, ph) == 5491 MDI_CLIENT_STATE_FAILED)) { 5492 i_mdi_client_unlock(ct); 5493 MDI_PHCI_UNLOCK(ph); 5494 /* 5495 * This is the last path to this client. 5496 * 5497 * Constraint will only be set to 1 if this client can 5498 * be retired (as already determined by 5499 * mdi_phci_retire_notify). However we don't actually 5500 * need to retire the client (we just retire the last 5501 * path - MPXIO will then fail all I/Os to the client). 5502 * But we still need to call e_ddi_retire_finalize so 5503 * the contract barriers can be cleared. Therefore we 5504 * temporarily set constraint = 0 so that the client 5505 * dip is not retired. 5506 */ 5507 tmp_constraint = 0; 5508 (void) e_ddi_retire_finalize(cdip, &tmp_constraint); 5509 MDI_PHCI_LOCK(ph); 5510 pip = next; 5511 } else { 5512 i_mdi_client_unlock(ct); 5513 pip = next; 5514 } 5515 } 5516 5517 if (!phci_only && *((int *)constraint) == 0) { 5518 MDI_PHCI_UNLOCK(ph); 5519 return; 5520 } 5521 5522 /* 5523 * Cannot offline pip(s) 5524 */ 5525 if (unstable) { 5526 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: " 5527 "pHCI in transient state, cannot retire", 5528 ddi_driver_name(dip), ddi_get_instance(dip)); 5529 MDI_PHCI_UNLOCK(ph); 5530 return; 5531 } 5532 5533 /* 5534 * Mark the pHCI as offline 5535 */ 5536 MDI_PHCI_SET_OFFLINE(ph); 5537 5538 /* 5539 * Mark the child mdi_pathinfo nodes as transient 5540 */ 5541 pip = ph->ph_path_head; 5542 while (pip != NULL) { 5543 MDI_PI_LOCK(pip); 5544 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5545 MDI_PI_SET_OFFLINING(pip); 5546 MDI_PI_UNLOCK(pip); 5547 pip = next; 5548 } 5549 MDI_PHCI_UNLOCK(ph); 5550 /* 5551 * Give a chance for any pending commands to execute 5552 */ 5553 delay_random(mdi_delay); 5554 MDI_PHCI_LOCK(ph); 5555 pip = ph->ph_path_head; 5556 while (pip != NULL) { 5557 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5558 (void) i_mdi_pi_offline(pip, 0); 5559 MDI_PI_LOCK(pip); 5560 ct = MDI_PI(pip)->pi_client; 5561 if (!MDI_PI_IS_OFFLINE(pip)) { 5562 cmn_err(CE_WARN, "mdi_phci_retire_finalize: " 5563 "path %d %s busy, cannot offline", 5564 mdi_pi_get_path_instance(pip), 5565 mdi_pi_spathname(pip)); 5566 MDI_PI_UNLOCK(pip); 5567 MDI_PHCI_SET_ONLINE(ph); 5568 MDI_PHCI_UNLOCK(ph); 5569 return; 5570 } 5571 MDI_PI_UNLOCK(pip); 5572 pip = next; 5573 } 5574 MDI_PHCI_UNLOCK(ph); 5575 5576 return; 5577 } 5578 5579 void 5580 mdi_phci_unretire(dev_info_t *dip) 5581 { 5582 mdi_phci_t *ph; 5583 mdi_pathinfo_t *pip; 5584 mdi_pathinfo_t *next; 5585 5586 ASSERT(MDI_PHCI(dip)); 5587 5588 /* 5589 * Online the phci 5590 */ 5591 i_mdi_phci_online(dip); 5592 5593 ph = i_devi_get_phci(dip); 5594 MDI_PHCI_LOCK(ph); 5595 pip = ph->ph_path_head; 5596 while (pip != NULL) { 5597 MDI_PI_LOCK(pip); 5598 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5599 MDI_PI_UNLOCK(pip); 5600 (void) i_mdi_pi_online(pip, 0); 5601 pip = next; 5602 } 5603 MDI_PHCI_UNLOCK(ph); 5604 } 5605 5606 /*ARGSUSED*/ 5607 static int 5608 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5609 { 5610 int rv = NDI_SUCCESS; 5611 mdi_client_t *ct; 5612 5613 /* 5614 * Client component to go offline. Make sure that we are 5615 * not in failing over state and update client state 5616 * accordingly 5617 */ 5618 ct = i_devi_get_client(dip); 5619 MDI_DEBUG(2, (MDI_NOTE, dip, 5620 "called %p %p", (void *)dip, (void *)ct)); 5621 if (ct != NULL) { 5622 MDI_CLIENT_LOCK(ct); 5623 if (ct->ct_unstable) { 5624 /* 5625 * One or more paths are in transient state, 5626 * Dont allow offline of a client device 5627 */ 5628 MDI_DEBUG(1, (MDI_WARN, dip, 5629 "!One or more paths to " 5630 "this device are in transient state. " 5631 "This device can not be removed at this moment. " 5632 "Please try again later.")); 5633 MDI_CLIENT_UNLOCK(ct); 5634 return (NDI_BUSY); 5635 } 5636 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5637 /* 5638 * Failover is in progress, Dont allow DR of 5639 * a client device 5640 */ 5641 MDI_DEBUG(1, (MDI_WARN, dip, 5642 "!Client device is Busy. " 5643 "This device can not be removed at this moment. " 5644 "Please try again later.")); 5645 MDI_CLIENT_UNLOCK(ct); 5646 return (NDI_BUSY); 5647 } 5648 MDI_CLIENT_SET_OFFLINE(ct); 5649 5650 /* 5651 * Unbind our relationship with the dev_info node 5652 */ 5653 if (flags & NDI_DEVI_REMOVE) { 5654 ct->ct_dip = NULL; 5655 } 5656 MDI_CLIENT_UNLOCK(ct); 5657 } 5658 return (rv); 5659 } 5660 5661 /* 5662 * mdi_pre_attach(): 5663 * Pre attach() notification handler 5664 */ 5665 /*ARGSUSED*/ 5666 int 5667 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5668 { 5669 /* don't support old DDI_PM_RESUME */ 5670 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5671 (cmd == DDI_PM_RESUME)) 5672 return (DDI_FAILURE); 5673 5674 return (DDI_SUCCESS); 5675 } 5676 5677 /* 5678 * mdi_post_attach(): 5679 * Post attach() notification handler 5680 */ 5681 /*ARGSUSED*/ 5682 void 5683 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5684 { 5685 mdi_phci_t *ph; 5686 mdi_client_t *ct; 5687 mdi_vhci_t *vh; 5688 5689 if (MDI_PHCI(dip)) { 5690 ph = i_devi_get_phci(dip); 5691 ASSERT(ph != NULL); 5692 5693 MDI_PHCI_LOCK(ph); 5694 switch (cmd) { 5695 case DDI_ATTACH: 5696 MDI_DEBUG(2, (MDI_NOTE, dip, 5697 "phci post_attach called %p", (void *)ph)); 5698 if (error == DDI_SUCCESS) { 5699 MDI_PHCI_SET_ATTACH(ph); 5700 } else { 5701 MDI_DEBUG(1, (MDI_NOTE, dip, 5702 "!pHCI post_attach failed: error %d", 5703 error)); 5704 MDI_PHCI_SET_DETACH(ph); 5705 } 5706 break; 5707 5708 case DDI_RESUME: 5709 MDI_DEBUG(2, (MDI_NOTE, dip, 5710 "pHCI post_resume: called %p", (void *)ph)); 5711 if (error == DDI_SUCCESS) { 5712 MDI_PHCI_SET_RESUME(ph); 5713 } else { 5714 MDI_DEBUG(1, (MDI_NOTE, dip, 5715 "!pHCI post_resume failed: error %d", 5716 error)); 5717 MDI_PHCI_SET_SUSPEND(ph); 5718 } 5719 break; 5720 } 5721 MDI_PHCI_UNLOCK(ph); 5722 } 5723 5724 if (MDI_CLIENT(dip)) { 5725 ct = i_devi_get_client(dip); 5726 ASSERT(ct != NULL); 5727 5728 MDI_CLIENT_LOCK(ct); 5729 switch (cmd) { 5730 case DDI_ATTACH: 5731 MDI_DEBUG(2, (MDI_NOTE, dip, 5732 "client post_attach called %p", (void *)ct)); 5733 if (error != DDI_SUCCESS) { 5734 MDI_DEBUG(1, (MDI_NOTE, dip, 5735 "!client post_attach failed: error %d", 5736 error)); 5737 MDI_CLIENT_SET_DETACH(ct); 5738 MDI_DEBUG(4, (MDI_WARN, dip, 5739 "i_mdi_pm_reset_client")); 5740 i_mdi_pm_reset_client(ct); 5741 break; 5742 } 5743 5744 /* 5745 * Client device has successfully attached, inform 5746 * the vhci. 5747 */ 5748 vh = ct->ct_vhci; 5749 if (vh->vh_ops->vo_client_attached) 5750 (*vh->vh_ops->vo_client_attached)(dip); 5751 5752 MDI_CLIENT_SET_ATTACH(ct); 5753 break; 5754 5755 case DDI_RESUME: 5756 MDI_DEBUG(2, (MDI_NOTE, dip, 5757 "client post_attach: called %p", (void *)ct)); 5758 if (error == DDI_SUCCESS) { 5759 MDI_CLIENT_SET_RESUME(ct); 5760 } else { 5761 MDI_DEBUG(1, (MDI_NOTE, dip, 5762 "!client post_resume failed: error %d", 5763 error)); 5764 MDI_CLIENT_SET_SUSPEND(ct); 5765 } 5766 break; 5767 } 5768 MDI_CLIENT_UNLOCK(ct); 5769 } 5770 } 5771 5772 /* 5773 * mdi_pre_detach(): 5774 * Pre detach notification handler 5775 */ 5776 /*ARGSUSED*/ 5777 int 5778 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5779 { 5780 int rv = DDI_SUCCESS; 5781 5782 if (MDI_CLIENT(dip)) { 5783 (void) i_mdi_client_pre_detach(dip, cmd); 5784 } 5785 5786 if (MDI_PHCI(dip)) { 5787 rv = i_mdi_phci_pre_detach(dip, cmd); 5788 } 5789 5790 return (rv); 5791 } 5792 5793 /*ARGSUSED*/ 5794 static int 5795 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5796 { 5797 int rv = DDI_SUCCESS; 5798 mdi_phci_t *ph; 5799 mdi_client_t *ct; 5800 mdi_pathinfo_t *pip; 5801 mdi_pathinfo_t *failed_pip = NULL; 5802 mdi_pathinfo_t *next; 5803 5804 ph = i_devi_get_phci(dip); 5805 if (ph == NULL) { 5806 return (rv); 5807 } 5808 5809 MDI_PHCI_LOCK(ph); 5810 switch (cmd) { 5811 case DDI_DETACH: 5812 MDI_DEBUG(2, (MDI_NOTE, dip, 5813 "pHCI pre_detach: called %p", (void *)ph)); 5814 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5815 /* 5816 * mdi_pathinfo nodes are still attached to 5817 * this pHCI. Fail the detach for this pHCI. 5818 */ 5819 MDI_DEBUG(2, (MDI_WARN, dip, 5820 "pHCI pre_detach: paths are still attached %p", 5821 (void *)ph)); 5822 rv = DDI_FAILURE; 5823 break; 5824 } 5825 MDI_PHCI_SET_DETACH(ph); 5826 break; 5827 5828 case DDI_SUSPEND: 5829 /* 5830 * pHCI is getting suspended. Since mpxio client 5831 * devices may not be suspended at this point, to avoid 5832 * a potential stack overflow, it is important to suspend 5833 * client devices before pHCI can be suspended. 5834 */ 5835 5836 MDI_DEBUG(2, (MDI_NOTE, dip, 5837 "pHCI pre_suspend: called %p", (void *)ph)); 5838 /* 5839 * Suspend all the client devices accessible through this pHCI 5840 */ 5841 pip = ph->ph_path_head; 5842 while (pip != NULL && rv == DDI_SUCCESS) { 5843 dev_info_t *cdip; 5844 MDI_PI_LOCK(pip); 5845 next = 5846 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5847 ct = MDI_PI(pip)->pi_client; 5848 i_mdi_client_lock(ct, pip); 5849 cdip = ct->ct_dip; 5850 MDI_PI_UNLOCK(pip); 5851 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5852 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5853 i_mdi_client_unlock(ct); 5854 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5855 DDI_SUCCESS) { 5856 /* 5857 * Suspend of one of the client 5858 * device has failed. 5859 */ 5860 MDI_DEBUG(1, (MDI_WARN, dip, 5861 "!suspend of device (%s%d) failed.", 5862 ddi_driver_name(cdip), 5863 ddi_get_instance(cdip))); 5864 failed_pip = pip; 5865 break; 5866 } 5867 } else { 5868 i_mdi_client_unlock(ct); 5869 } 5870 pip = next; 5871 } 5872 5873 if (rv == DDI_SUCCESS) { 5874 /* 5875 * Suspend of client devices is complete. Proceed 5876 * with pHCI suspend. 5877 */ 5878 MDI_PHCI_SET_SUSPEND(ph); 5879 } else { 5880 /* 5881 * Revert back all the suspended client device states 5882 * to converse. 5883 */ 5884 pip = ph->ph_path_head; 5885 while (pip != failed_pip) { 5886 dev_info_t *cdip; 5887 MDI_PI_LOCK(pip); 5888 next = 5889 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5890 ct = MDI_PI(pip)->pi_client; 5891 i_mdi_client_lock(ct, pip); 5892 cdip = ct->ct_dip; 5893 MDI_PI_UNLOCK(pip); 5894 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5895 i_mdi_client_unlock(ct); 5896 (void) devi_attach(cdip, DDI_RESUME); 5897 } else { 5898 i_mdi_client_unlock(ct); 5899 } 5900 pip = next; 5901 } 5902 } 5903 break; 5904 5905 default: 5906 rv = DDI_FAILURE; 5907 break; 5908 } 5909 MDI_PHCI_UNLOCK(ph); 5910 return (rv); 5911 } 5912 5913 /*ARGSUSED*/ 5914 static int 5915 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5916 { 5917 int rv = DDI_SUCCESS; 5918 mdi_client_t *ct; 5919 5920 ct = i_devi_get_client(dip); 5921 if (ct == NULL) { 5922 return (rv); 5923 } 5924 5925 MDI_CLIENT_LOCK(ct); 5926 switch (cmd) { 5927 case DDI_DETACH: 5928 MDI_DEBUG(2, (MDI_NOTE, dip, 5929 "client pre_detach: called %p", 5930 (void *)ct)); 5931 MDI_CLIENT_SET_DETACH(ct); 5932 break; 5933 5934 case DDI_SUSPEND: 5935 MDI_DEBUG(2, (MDI_NOTE, dip, 5936 "client pre_suspend: called %p", 5937 (void *)ct)); 5938 MDI_CLIENT_SET_SUSPEND(ct); 5939 break; 5940 5941 default: 5942 rv = DDI_FAILURE; 5943 break; 5944 } 5945 MDI_CLIENT_UNLOCK(ct); 5946 return (rv); 5947 } 5948 5949 /* 5950 * mdi_post_detach(): 5951 * Post detach notification handler 5952 */ 5953 /*ARGSUSED*/ 5954 void 5955 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5956 { 5957 /* 5958 * Detach/Suspend of mpxio component failed. Update our state 5959 * too 5960 */ 5961 if (MDI_PHCI(dip)) 5962 i_mdi_phci_post_detach(dip, cmd, error); 5963 5964 if (MDI_CLIENT(dip)) 5965 i_mdi_client_post_detach(dip, cmd, error); 5966 } 5967 5968 /*ARGSUSED*/ 5969 static void 5970 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5971 { 5972 mdi_phci_t *ph; 5973 5974 /* 5975 * Detach/Suspend of phci component failed. Update our state 5976 * too 5977 */ 5978 ph = i_devi_get_phci(dip); 5979 if (ph == NULL) { 5980 return; 5981 } 5982 5983 MDI_PHCI_LOCK(ph); 5984 /* 5985 * Detach of pHCI failed. Restore back converse 5986 * state 5987 */ 5988 switch (cmd) { 5989 case DDI_DETACH: 5990 MDI_DEBUG(2, (MDI_NOTE, dip, 5991 "pHCI post_detach: called %p", 5992 (void *)ph)); 5993 if (error != DDI_SUCCESS) 5994 MDI_PHCI_SET_ATTACH(ph); 5995 break; 5996 5997 case DDI_SUSPEND: 5998 MDI_DEBUG(2, (MDI_NOTE, dip, 5999 "pHCI post_suspend: called %p", 6000 (void *)ph)); 6001 if (error != DDI_SUCCESS) 6002 MDI_PHCI_SET_RESUME(ph); 6003 break; 6004 } 6005 MDI_PHCI_UNLOCK(ph); 6006 } 6007 6008 /*ARGSUSED*/ 6009 static void 6010 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 6011 { 6012 mdi_client_t *ct; 6013 6014 ct = i_devi_get_client(dip); 6015 if (ct == NULL) { 6016 return; 6017 } 6018 MDI_CLIENT_LOCK(ct); 6019 /* 6020 * Detach of Client failed. Restore back converse 6021 * state 6022 */ 6023 switch (cmd) { 6024 case DDI_DETACH: 6025 MDI_DEBUG(2, (MDI_NOTE, dip, 6026 "client post_detach: called %p", (void *)ct)); 6027 if (DEVI_IS_ATTACHING(dip)) { 6028 MDI_DEBUG(4, (MDI_NOTE, dip, 6029 "i_mdi_pm_rele_client\n")); 6030 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6031 } else { 6032 MDI_DEBUG(4, (MDI_NOTE, dip, 6033 "i_mdi_pm_reset_client\n")); 6034 i_mdi_pm_reset_client(ct); 6035 } 6036 if (error != DDI_SUCCESS) 6037 MDI_CLIENT_SET_ATTACH(ct); 6038 break; 6039 6040 case DDI_SUSPEND: 6041 MDI_DEBUG(2, (MDI_NOTE, dip, 6042 "called %p", (void *)ct)); 6043 if (error != DDI_SUCCESS) 6044 MDI_CLIENT_SET_RESUME(ct); 6045 break; 6046 } 6047 MDI_CLIENT_UNLOCK(ct); 6048 } 6049 6050 int 6051 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 6052 { 6053 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 6054 } 6055 6056 /* 6057 * create and install per-path (client - pHCI) statistics 6058 * I/O stats supported: nread, nwritten, reads, and writes 6059 * Error stats - hard errors, soft errors, & transport errors 6060 */ 6061 int 6062 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 6063 { 6064 kstat_t *kiosp, *kerrsp; 6065 struct pi_errs *nsp; 6066 struct mdi_pi_kstats *mdi_statp; 6067 6068 if (MDI_PI(pip)->pi_kstats != NULL) 6069 return (MDI_SUCCESS); 6070 6071 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 6072 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 6073 return (MDI_FAILURE); 6074 } 6075 6076 (void) strcat(ksname, ",err"); 6077 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 6078 KSTAT_TYPE_NAMED, 6079 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 6080 if (kerrsp == NULL) { 6081 kstat_delete(kiosp); 6082 return (MDI_FAILURE); 6083 } 6084 6085 nsp = (struct pi_errs *)kerrsp->ks_data; 6086 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 6087 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 6088 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 6089 KSTAT_DATA_UINT32); 6090 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 6091 KSTAT_DATA_UINT32); 6092 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 6093 KSTAT_DATA_UINT32); 6094 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 6095 KSTAT_DATA_UINT32); 6096 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 6097 KSTAT_DATA_UINT32); 6098 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 6099 KSTAT_DATA_UINT32); 6100 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 6101 KSTAT_DATA_UINT32); 6102 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 6103 6104 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 6105 mdi_statp->pi_kstat_ref = 1; 6106 mdi_statp->pi_kstat_iostats = kiosp; 6107 mdi_statp->pi_kstat_errstats = kerrsp; 6108 kstat_install(kiosp); 6109 kstat_install(kerrsp); 6110 MDI_PI(pip)->pi_kstats = mdi_statp; 6111 return (MDI_SUCCESS); 6112 } 6113 6114 /* 6115 * destroy per-path properties 6116 */ 6117 static void 6118 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 6119 { 6120 6121 struct mdi_pi_kstats *mdi_statp; 6122 6123 if (MDI_PI(pip)->pi_kstats == NULL) 6124 return; 6125 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 6126 return; 6127 6128 MDI_PI(pip)->pi_kstats = NULL; 6129 6130 /* 6131 * the kstat may be shared between multiple pathinfo nodes 6132 * decrement this pathinfo's usage, removing the kstats 6133 * themselves when the last pathinfo reference is removed. 6134 */ 6135 ASSERT(mdi_statp->pi_kstat_ref > 0); 6136 if (--mdi_statp->pi_kstat_ref != 0) 6137 return; 6138 6139 kstat_delete(mdi_statp->pi_kstat_iostats); 6140 kstat_delete(mdi_statp->pi_kstat_errstats); 6141 kmem_free(mdi_statp, sizeof (*mdi_statp)); 6142 } 6143 6144 /* 6145 * update I/O paths KSTATS 6146 */ 6147 void 6148 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 6149 { 6150 kstat_t *iostatp; 6151 size_t xfer_cnt; 6152 6153 ASSERT(pip != NULL); 6154 6155 /* 6156 * I/O can be driven across a path prior to having path 6157 * statistics available, i.e. probe(9e). 6158 */ 6159 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 6160 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 6161 xfer_cnt = bp->b_bcount - bp->b_resid; 6162 if (bp->b_flags & B_READ) { 6163 KSTAT_IO_PTR(iostatp)->reads++; 6164 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 6165 } else { 6166 KSTAT_IO_PTR(iostatp)->writes++; 6167 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 6168 } 6169 } 6170 } 6171 6172 /* 6173 * Enable the path(specific client/target/initiator) 6174 * Enabling a path means that MPxIO may select the enabled path for routing 6175 * future I/O requests, subject to other path state constraints. 6176 */ 6177 int 6178 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 6179 { 6180 mdi_phci_t *ph; 6181 6182 ph = MDI_PI(pip)->pi_phci; 6183 if (ph == NULL) { 6184 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6185 "!failed: path %s %p: NULL ph", 6186 mdi_pi_spathname(pip), (void *)pip)); 6187 return (MDI_FAILURE); 6188 } 6189 6190 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 6191 MDI_ENABLE_OP); 6192 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6193 "!returning success pip = %p. ph = %p", 6194 (void *)pip, (void *)ph)); 6195 return (MDI_SUCCESS); 6196 6197 } 6198 6199 /* 6200 * Disable the path (specific client/target/initiator) 6201 * Disabling a path means that MPxIO will not select the disabled path for 6202 * routing any new I/O requests. 6203 */ 6204 int 6205 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 6206 { 6207 mdi_phci_t *ph; 6208 6209 ph = MDI_PI(pip)->pi_phci; 6210 if (ph == NULL) { 6211 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6212 "!failed: path %s %p: NULL ph", 6213 mdi_pi_spathname(pip), (void *)pip)); 6214 return (MDI_FAILURE); 6215 } 6216 6217 (void) i_mdi_enable_disable_path(pip, 6218 ph->ph_vhci, flags, MDI_DISABLE_OP); 6219 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6220 "!returning success pip = %p. ph = %p", 6221 (void *)pip, (void *)ph)); 6222 return (MDI_SUCCESS); 6223 } 6224 6225 /* 6226 * disable the path to a particular pHCI (pHCI specified in the phci_path 6227 * argument) for a particular client (specified in the client_path argument). 6228 * Disabling a path means that MPxIO will not select the disabled path for 6229 * routing any new I/O requests. 6230 * NOTE: this will be removed once the NWS files are changed to use the new 6231 * mdi_{enable,disable}_path interfaces 6232 */ 6233 int 6234 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6235 { 6236 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 6237 } 6238 6239 /* 6240 * Enable the path to a particular pHCI (pHCI specified in the phci_path 6241 * argument) for a particular client (specified in the client_path argument). 6242 * Enabling a path means that MPxIO may select the enabled path for routing 6243 * future I/O requests, subject to other path state constraints. 6244 * NOTE: this will be removed once the NWS files are changed to use the new 6245 * mdi_{enable,disable}_path interfaces 6246 */ 6247 6248 int 6249 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6250 { 6251 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 6252 } 6253 6254 /* 6255 * Common routine for doing enable/disable. 6256 */ 6257 static mdi_pathinfo_t * 6258 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6259 int op) 6260 { 6261 int sync_flag = 0; 6262 int rv; 6263 mdi_pathinfo_t *next; 6264 int (*f)() = NULL; 6265 6266 /* 6267 * Check to make sure the path is not already in the 6268 * requested state. If it is just return the next path 6269 * as we have nothing to do here. 6270 */ 6271 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) || 6272 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) { 6273 MDI_PI_LOCK(pip); 6274 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6275 MDI_PI_UNLOCK(pip); 6276 return (next); 6277 } 6278 6279 f = vh->vh_ops->vo_pi_state_change; 6280 6281 sync_flag = (flags << 8) & 0xf00; 6282 6283 /* 6284 * Do a callback into the mdi consumer to let it 6285 * know that path is about to get enabled/disabled. 6286 */ 6287 if (f != NULL) { 6288 rv = (*f)(vh->vh_dip, pip, 0, 6289 MDI_PI_EXT_STATE(pip), 6290 MDI_EXT_STATE_CHANGE | sync_flag | 6291 op | MDI_BEFORE_STATE_CHANGE); 6292 if (rv != MDI_SUCCESS) { 6293 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6294 "vo_pi_state_change: failed rv = %x", rv)); 6295 } 6296 } 6297 MDI_PI_LOCK(pip); 6298 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6299 6300 switch (flags) { 6301 case USER_DISABLE: 6302 if (op == MDI_DISABLE_OP) { 6303 MDI_PI_SET_USER_DISABLE(pip); 6304 } else { 6305 MDI_PI_SET_USER_ENABLE(pip); 6306 } 6307 break; 6308 case DRIVER_DISABLE: 6309 if (op == MDI_DISABLE_OP) { 6310 MDI_PI_SET_DRV_DISABLE(pip); 6311 } else { 6312 MDI_PI_SET_DRV_ENABLE(pip); 6313 } 6314 break; 6315 case DRIVER_DISABLE_TRANSIENT: 6316 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6317 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6318 } else { 6319 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6320 } 6321 break; 6322 } 6323 MDI_PI_UNLOCK(pip); 6324 /* 6325 * Do a callback into the mdi consumer to let it 6326 * know that path is now enabled/disabled. 6327 */ 6328 if (f != NULL) { 6329 rv = (*f)(vh->vh_dip, pip, 0, 6330 MDI_PI_EXT_STATE(pip), 6331 MDI_EXT_STATE_CHANGE | sync_flag | 6332 op | MDI_AFTER_STATE_CHANGE); 6333 if (rv != MDI_SUCCESS) { 6334 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6335 "vo_pi_state_change failed: rv = %x", rv)); 6336 } 6337 } 6338 return (next); 6339 } 6340 6341 /* 6342 * Common routine for doing enable/disable. 6343 * NOTE: this will be removed once the NWS files are changed to use the new 6344 * mdi_{enable,disable}_path has been putback 6345 */ 6346 int 6347 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6348 { 6349 6350 mdi_phci_t *ph; 6351 mdi_vhci_t *vh = NULL; 6352 mdi_client_t *ct; 6353 mdi_pathinfo_t *next, *pip; 6354 int found_it; 6355 6356 ph = i_devi_get_phci(pdip); 6357 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6358 "!op = %d pdip = %p cdip = %p", op, (void *)pdip, 6359 (void *)cdip)); 6360 if (ph == NULL) { 6361 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6362 "!failed: operation %d: NULL ph", op)); 6363 return (MDI_FAILURE); 6364 } 6365 6366 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6367 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6368 "!failed: invalid operation %d", op)); 6369 return (MDI_FAILURE); 6370 } 6371 6372 vh = ph->ph_vhci; 6373 6374 if (cdip == NULL) { 6375 /* 6376 * Need to mark the Phci as enabled/disabled. 6377 */ 6378 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip, 6379 "op %d for the phci", op)); 6380 MDI_PHCI_LOCK(ph); 6381 switch (flags) { 6382 case USER_DISABLE: 6383 if (op == MDI_DISABLE_OP) { 6384 MDI_PHCI_SET_USER_DISABLE(ph); 6385 } else { 6386 MDI_PHCI_SET_USER_ENABLE(ph); 6387 } 6388 break; 6389 case DRIVER_DISABLE: 6390 if (op == MDI_DISABLE_OP) { 6391 MDI_PHCI_SET_DRV_DISABLE(ph); 6392 } else { 6393 MDI_PHCI_SET_DRV_ENABLE(ph); 6394 } 6395 break; 6396 case DRIVER_DISABLE_TRANSIENT: 6397 if (op == MDI_DISABLE_OP) { 6398 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6399 } else { 6400 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6401 } 6402 break; 6403 default: 6404 MDI_PHCI_UNLOCK(ph); 6405 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6406 "!invalid flag argument= %d", flags)); 6407 } 6408 6409 /* 6410 * Phci has been disabled. Now try to enable/disable 6411 * path info's to each client. 6412 */ 6413 pip = ph->ph_path_head; 6414 while (pip != NULL) { 6415 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6416 } 6417 MDI_PHCI_UNLOCK(ph); 6418 } else { 6419 6420 /* 6421 * Disable a specific client. 6422 */ 6423 ct = i_devi_get_client(cdip); 6424 if (ct == NULL) { 6425 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6426 "!failed: operation = %d: NULL ct", op)); 6427 return (MDI_FAILURE); 6428 } 6429 6430 MDI_CLIENT_LOCK(ct); 6431 pip = ct->ct_path_head; 6432 found_it = 0; 6433 while (pip != NULL) { 6434 MDI_PI_LOCK(pip); 6435 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6436 if (MDI_PI(pip)->pi_phci == ph) { 6437 MDI_PI_UNLOCK(pip); 6438 found_it = 1; 6439 break; 6440 } 6441 MDI_PI_UNLOCK(pip); 6442 pip = next; 6443 } 6444 6445 6446 MDI_CLIENT_UNLOCK(ct); 6447 if (found_it == 0) { 6448 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6449 "!failed. Could not find corresponding pip\n")); 6450 return (MDI_FAILURE); 6451 } 6452 6453 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6454 } 6455 6456 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6457 "!op %d returning success pdip = %p cdip = %p", 6458 op, (void *)pdip, (void *)cdip)); 6459 return (MDI_SUCCESS); 6460 } 6461 6462 /* 6463 * Ensure phci powered up 6464 */ 6465 static void 6466 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6467 { 6468 dev_info_t *ph_dip; 6469 6470 ASSERT(pip != NULL); 6471 ASSERT(MDI_PI_LOCKED(pip)); 6472 6473 if (MDI_PI(pip)->pi_pm_held) { 6474 return; 6475 } 6476 6477 ph_dip = mdi_pi_get_phci(pip); 6478 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6479 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6480 if (ph_dip == NULL) { 6481 return; 6482 } 6483 6484 MDI_PI_UNLOCK(pip); 6485 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d", 6486 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6487 pm_hold_power(ph_dip); 6488 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d", 6489 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6490 MDI_PI_LOCK(pip); 6491 6492 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6493 if (DEVI(ph_dip)->devi_pm_info) 6494 MDI_PI(pip)->pi_pm_held = 1; 6495 } 6496 6497 /* 6498 * Allow phci powered down 6499 */ 6500 static void 6501 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6502 { 6503 dev_info_t *ph_dip = NULL; 6504 6505 ASSERT(pip != NULL); 6506 ASSERT(MDI_PI_LOCKED(pip)); 6507 6508 if (MDI_PI(pip)->pi_pm_held == 0) { 6509 return; 6510 } 6511 6512 ph_dip = mdi_pi_get_phci(pip); 6513 ASSERT(ph_dip != NULL); 6514 6515 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6516 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6517 6518 MDI_PI_UNLOCK(pip); 6519 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6520 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6521 pm_rele_power(ph_dip); 6522 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6523 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6524 MDI_PI_LOCK(pip); 6525 6526 MDI_PI(pip)->pi_pm_held = 0; 6527 } 6528 6529 static void 6530 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6531 { 6532 ASSERT(MDI_CLIENT_LOCKED(ct)); 6533 6534 ct->ct_power_cnt += incr; 6535 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6536 "%p ct_power_cnt = %d incr = %d", 6537 (void *)ct, ct->ct_power_cnt, incr)); 6538 ASSERT(ct->ct_power_cnt >= 0); 6539 } 6540 6541 static void 6542 i_mdi_rele_all_phci(mdi_client_t *ct) 6543 { 6544 mdi_pathinfo_t *pip; 6545 6546 ASSERT(MDI_CLIENT_LOCKED(ct)); 6547 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6548 while (pip != NULL) { 6549 mdi_hold_path(pip); 6550 MDI_PI_LOCK(pip); 6551 i_mdi_pm_rele_pip(pip); 6552 MDI_PI_UNLOCK(pip); 6553 mdi_rele_path(pip); 6554 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6555 } 6556 } 6557 6558 static void 6559 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6560 { 6561 ASSERT(MDI_CLIENT_LOCKED(ct)); 6562 6563 if (i_ddi_devi_attached(ct->ct_dip)) { 6564 ct->ct_power_cnt -= decr; 6565 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6566 "%p ct_power_cnt = %d decr = %d", 6567 (void *)ct, ct->ct_power_cnt, decr)); 6568 } 6569 6570 ASSERT(ct->ct_power_cnt >= 0); 6571 if (ct->ct_power_cnt == 0) { 6572 i_mdi_rele_all_phci(ct); 6573 return; 6574 } 6575 } 6576 6577 static void 6578 i_mdi_pm_reset_client(mdi_client_t *ct) 6579 { 6580 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6581 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt)); 6582 ASSERT(MDI_CLIENT_LOCKED(ct)); 6583 ct->ct_power_cnt = 0; 6584 i_mdi_rele_all_phci(ct); 6585 ct->ct_powercnt_config = 0; 6586 ct->ct_powercnt_unconfig = 0; 6587 ct->ct_powercnt_reset = 1; 6588 } 6589 6590 static int 6591 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6592 { 6593 int ret; 6594 dev_info_t *ph_dip; 6595 6596 MDI_PI_LOCK(pip); 6597 i_mdi_pm_hold_pip(pip); 6598 6599 ph_dip = mdi_pi_get_phci(pip); 6600 MDI_PI_UNLOCK(pip); 6601 6602 /* bring all components of phci to full power */ 6603 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6604 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip), 6605 ddi_get_instance(ph_dip), (void *)pip)); 6606 6607 ret = pm_powerup(ph_dip); 6608 6609 if (ret == DDI_FAILURE) { 6610 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6611 "pm_powerup FAILED for %s%d %p", 6612 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6613 (void *)pip)); 6614 6615 MDI_PI_LOCK(pip); 6616 i_mdi_pm_rele_pip(pip); 6617 MDI_PI_UNLOCK(pip); 6618 return (MDI_FAILURE); 6619 } 6620 6621 return (MDI_SUCCESS); 6622 } 6623 6624 static int 6625 i_mdi_power_all_phci(mdi_client_t *ct) 6626 { 6627 mdi_pathinfo_t *pip; 6628 int succeeded = 0; 6629 6630 ASSERT(MDI_CLIENT_LOCKED(ct)); 6631 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6632 while (pip != NULL) { 6633 /* 6634 * Don't power if MDI_PATHINFO_STATE_FAULT 6635 * or MDI_PATHINFO_STATE_OFFLINE. 6636 */ 6637 if (MDI_PI_IS_INIT(pip) || 6638 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6639 mdi_hold_path(pip); 6640 MDI_CLIENT_UNLOCK(ct); 6641 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6642 succeeded = 1; 6643 6644 ASSERT(ct == MDI_PI(pip)->pi_client); 6645 MDI_CLIENT_LOCK(ct); 6646 mdi_rele_path(pip); 6647 } 6648 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6649 } 6650 6651 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6652 } 6653 6654 /* 6655 * mdi_bus_power(): 6656 * 1. Place the phci(s) into powered up state so that 6657 * client can do power management 6658 * 2. Ensure phci powered up as client power managing 6659 * Return Values: 6660 * MDI_SUCCESS 6661 * MDI_FAILURE 6662 */ 6663 int 6664 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6665 void *arg, void *result) 6666 { 6667 int ret = MDI_SUCCESS; 6668 pm_bp_child_pwrchg_t *bpc; 6669 mdi_client_t *ct; 6670 dev_info_t *cdip; 6671 pm_bp_has_changed_t *bphc; 6672 6673 /* 6674 * BUS_POWER_NOINVOL not supported 6675 */ 6676 if (op == BUS_POWER_NOINVOL) 6677 return (MDI_FAILURE); 6678 6679 /* 6680 * ignore other OPs. 6681 * return quickly to save cou cycles on the ct processing 6682 */ 6683 switch (op) { 6684 case BUS_POWER_PRE_NOTIFICATION: 6685 case BUS_POWER_POST_NOTIFICATION: 6686 bpc = (pm_bp_child_pwrchg_t *)arg; 6687 cdip = bpc->bpc_dip; 6688 break; 6689 case BUS_POWER_HAS_CHANGED: 6690 bphc = (pm_bp_has_changed_t *)arg; 6691 cdip = bphc->bphc_dip; 6692 break; 6693 default: 6694 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6695 } 6696 6697 ASSERT(MDI_CLIENT(cdip)); 6698 6699 ct = i_devi_get_client(cdip); 6700 if (ct == NULL) 6701 return (MDI_FAILURE); 6702 6703 /* 6704 * wait till the mdi_pathinfo node state change are processed 6705 */ 6706 MDI_CLIENT_LOCK(ct); 6707 switch (op) { 6708 case BUS_POWER_PRE_NOTIFICATION: 6709 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6710 "BUS_POWER_PRE_NOTIFICATION:" 6711 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6712 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6713 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6714 6715 /* serialize power level change per client */ 6716 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6717 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6718 6719 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6720 6721 if (ct->ct_power_cnt == 0) { 6722 ret = i_mdi_power_all_phci(ct); 6723 } 6724 6725 /* 6726 * if new_level > 0: 6727 * - hold phci(s) 6728 * - power up phci(s) if not already 6729 * ignore power down 6730 */ 6731 if (bpc->bpc_nlevel > 0) { 6732 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6733 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6734 "i_mdi_pm_hold_client\n")); 6735 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6736 } 6737 } 6738 break; 6739 case BUS_POWER_POST_NOTIFICATION: 6740 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6741 "BUS_POWER_POST_NOTIFICATION:" 6742 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d", 6743 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6744 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6745 *(int *)result)); 6746 6747 if (*(int *)result == DDI_SUCCESS) { 6748 if (bpc->bpc_nlevel > 0) { 6749 MDI_CLIENT_SET_POWER_UP(ct); 6750 } else { 6751 MDI_CLIENT_SET_POWER_DOWN(ct); 6752 } 6753 } 6754 6755 /* release the hold we did in pre-notification */ 6756 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6757 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6758 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6759 "i_mdi_pm_rele_client\n")); 6760 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6761 } 6762 6763 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6764 /* another thread might started attaching */ 6765 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6766 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6767 "i_mdi_pm_rele_client\n")); 6768 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6769 /* detaching has been taken care in pm_post_unconfig */ 6770 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6771 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6772 "i_mdi_pm_reset_client\n")); 6773 i_mdi_pm_reset_client(ct); 6774 } 6775 } 6776 6777 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6778 cv_broadcast(&ct->ct_powerchange_cv); 6779 6780 break; 6781 6782 /* need to do more */ 6783 case BUS_POWER_HAS_CHANGED: 6784 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6785 "BUS_POWER_HAS_CHANGED:" 6786 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6787 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6788 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6789 6790 if (bphc->bphc_nlevel > 0 && 6791 bphc->bphc_nlevel > bphc->bphc_olevel) { 6792 if (ct->ct_power_cnt == 0) { 6793 ret = i_mdi_power_all_phci(ct); 6794 } 6795 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6796 "i_mdi_pm_hold_client\n")); 6797 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6798 } 6799 6800 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6801 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6802 "i_mdi_pm_rele_client\n")); 6803 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6804 } 6805 break; 6806 } 6807 6808 MDI_CLIENT_UNLOCK(ct); 6809 return (ret); 6810 } 6811 6812 static int 6813 i_mdi_pm_pre_config_one(dev_info_t *child) 6814 { 6815 int ret = MDI_SUCCESS; 6816 mdi_client_t *ct; 6817 6818 ct = i_devi_get_client(child); 6819 if (ct == NULL) 6820 return (MDI_FAILURE); 6821 6822 MDI_CLIENT_LOCK(ct); 6823 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6824 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6825 6826 if (!MDI_CLIENT_IS_FAILED(ct)) { 6827 MDI_CLIENT_UNLOCK(ct); 6828 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n")); 6829 return (MDI_SUCCESS); 6830 } 6831 6832 if (ct->ct_powercnt_config) { 6833 MDI_CLIENT_UNLOCK(ct); 6834 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n")); 6835 return (MDI_SUCCESS); 6836 } 6837 6838 if (ct->ct_power_cnt == 0) { 6839 ret = i_mdi_power_all_phci(ct); 6840 } 6841 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6842 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6843 ct->ct_powercnt_config = 1; 6844 ct->ct_powercnt_reset = 0; 6845 MDI_CLIENT_UNLOCK(ct); 6846 return (ret); 6847 } 6848 6849 static int 6850 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6851 { 6852 int ret = MDI_SUCCESS; 6853 dev_info_t *cdip; 6854 int circ; 6855 6856 ASSERT(MDI_VHCI(vdip)); 6857 6858 /* ndi_devi_config_one */ 6859 if (child) { 6860 ASSERT(DEVI_BUSY_OWNED(vdip)); 6861 return (i_mdi_pm_pre_config_one(child)); 6862 } 6863 6864 /* devi_config_common */ 6865 ndi_devi_enter(vdip, &circ); 6866 cdip = ddi_get_child(vdip); 6867 while (cdip) { 6868 dev_info_t *next = ddi_get_next_sibling(cdip); 6869 6870 ret = i_mdi_pm_pre_config_one(cdip); 6871 if (ret != MDI_SUCCESS) 6872 break; 6873 cdip = next; 6874 } 6875 ndi_devi_exit(vdip, circ); 6876 return (ret); 6877 } 6878 6879 static int 6880 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6881 { 6882 int ret = MDI_SUCCESS; 6883 mdi_client_t *ct; 6884 6885 ct = i_devi_get_client(child); 6886 if (ct == NULL) 6887 return (MDI_FAILURE); 6888 6889 MDI_CLIENT_LOCK(ct); 6890 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6891 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6892 6893 if (!i_ddi_devi_attached(child)) { 6894 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n")); 6895 MDI_CLIENT_UNLOCK(ct); 6896 return (MDI_SUCCESS); 6897 } 6898 6899 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6900 (flags & NDI_AUTODETACH)) { 6901 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n")); 6902 MDI_CLIENT_UNLOCK(ct); 6903 return (MDI_FAILURE); 6904 } 6905 6906 if (ct->ct_powercnt_unconfig) { 6907 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n")); 6908 MDI_CLIENT_UNLOCK(ct); 6909 *held = 1; 6910 return (MDI_SUCCESS); 6911 } 6912 6913 if (ct->ct_power_cnt == 0) { 6914 ret = i_mdi_power_all_phci(ct); 6915 } 6916 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6917 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6918 ct->ct_powercnt_unconfig = 1; 6919 ct->ct_powercnt_reset = 0; 6920 MDI_CLIENT_UNLOCK(ct); 6921 if (ret == MDI_SUCCESS) 6922 *held = 1; 6923 return (ret); 6924 } 6925 6926 static int 6927 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6928 int flags) 6929 { 6930 int ret = MDI_SUCCESS; 6931 dev_info_t *cdip; 6932 int circ; 6933 6934 ASSERT(MDI_VHCI(vdip)); 6935 *held = 0; 6936 6937 /* ndi_devi_unconfig_one */ 6938 if (child) { 6939 ASSERT(DEVI_BUSY_OWNED(vdip)); 6940 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6941 } 6942 6943 /* devi_unconfig_common */ 6944 ndi_devi_enter(vdip, &circ); 6945 cdip = ddi_get_child(vdip); 6946 while (cdip) { 6947 dev_info_t *next = ddi_get_next_sibling(cdip); 6948 6949 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6950 cdip = next; 6951 } 6952 ndi_devi_exit(vdip, circ); 6953 6954 if (*held) 6955 ret = MDI_SUCCESS; 6956 6957 return (ret); 6958 } 6959 6960 static void 6961 i_mdi_pm_post_config_one(dev_info_t *child) 6962 { 6963 mdi_client_t *ct; 6964 6965 ct = i_devi_get_client(child); 6966 if (ct == NULL) 6967 return; 6968 6969 MDI_CLIENT_LOCK(ct); 6970 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6971 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6972 6973 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6974 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n")); 6975 MDI_CLIENT_UNLOCK(ct); 6976 return; 6977 } 6978 6979 /* client has not been updated */ 6980 if (MDI_CLIENT_IS_FAILED(ct)) { 6981 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n")); 6982 MDI_CLIENT_UNLOCK(ct); 6983 return; 6984 } 6985 6986 /* another thread might have powered it down or detached it */ 6987 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6988 !DEVI_IS_ATTACHING(child)) || 6989 (!i_ddi_devi_attached(child) && 6990 !DEVI_IS_ATTACHING(child))) { 6991 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 6992 i_mdi_pm_reset_client(ct); 6993 } else { 6994 mdi_pathinfo_t *pip, *next; 6995 int valid_path_count = 0; 6996 6997 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 6998 pip = ct->ct_path_head; 6999 while (pip != NULL) { 7000 MDI_PI_LOCK(pip); 7001 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 7002 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 7003 valid_path_count ++; 7004 MDI_PI_UNLOCK(pip); 7005 pip = next; 7006 } 7007 i_mdi_pm_rele_client(ct, valid_path_count); 7008 } 7009 ct->ct_powercnt_config = 0; 7010 MDI_CLIENT_UNLOCK(ct); 7011 } 7012 7013 static void 7014 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 7015 { 7016 int circ; 7017 dev_info_t *cdip; 7018 7019 ASSERT(MDI_VHCI(vdip)); 7020 7021 /* ndi_devi_config_one */ 7022 if (child) { 7023 ASSERT(DEVI_BUSY_OWNED(vdip)); 7024 i_mdi_pm_post_config_one(child); 7025 return; 7026 } 7027 7028 /* devi_config_common */ 7029 ndi_devi_enter(vdip, &circ); 7030 cdip = ddi_get_child(vdip); 7031 while (cdip) { 7032 dev_info_t *next = ddi_get_next_sibling(cdip); 7033 7034 i_mdi_pm_post_config_one(cdip); 7035 cdip = next; 7036 } 7037 ndi_devi_exit(vdip, circ); 7038 } 7039 7040 static void 7041 i_mdi_pm_post_unconfig_one(dev_info_t *child) 7042 { 7043 mdi_client_t *ct; 7044 7045 ct = i_devi_get_client(child); 7046 if (ct == NULL) 7047 return; 7048 7049 MDI_CLIENT_LOCK(ct); 7050 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 7051 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 7052 7053 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 7054 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n")); 7055 MDI_CLIENT_UNLOCK(ct); 7056 return; 7057 } 7058 7059 /* failure detaching or another thread just attached it */ 7060 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 7061 i_ddi_devi_attached(child)) || 7062 (!i_ddi_devi_attached(child) && 7063 !DEVI_IS_ATTACHING(child))) { 7064 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 7065 i_mdi_pm_reset_client(ct); 7066 } else { 7067 mdi_pathinfo_t *pip, *next; 7068 int valid_path_count = 0; 7069 7070 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 7071 pip = ct->ct_path_head; 7072 while (pip != NULL) { 7073 MDI_PI_LOCK(pip); 7074 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 7075 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 7076 valid_path_count ++; 7077 MDI_PI_UNLOCK(pip); 7078 pip = next; 7079 } 7080 i_mdi_pm_rele_client(ct, valid_path_count); 7081 ct->ct_powercnt_unconfig = 0; 7082 } 7083 7084 MDI_CLIENT_UNLOCK(ct); 7085 } 7086 7087 static void 7088 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 7089 { 7090 int circ; 7091 dev_info_t *cdip; 7092 7093 ASSERT(MDI_VHCI(vdip)); 7094 7095 if (!held) { 7096 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held)); 7097 return; 7098 } 7099 7100 if (child) { 7101 ASSERT(DEVI_BUSY_OWNED(vdip)); 7102 i_mdi_pm_post_unconfig_one(child); 7103 return; 7104 } 7105 7106 ndi_devi_enter(vdip, &circ); 7107 cdip = ddi_get_child(vdip); 7108 while (cdip) { 7109 dev_info_t *next = ddi_get_next_sibling(cdip); 7110 7111 i_mdi_pm_post_unconfig_one(cdip); 7112 cdip = next; 7113 } 7114 ndi_devi_exit(vdip, circ); 7115 } 7116 7117 int 7118 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 7119 { 7120 int circ, ret = MDI_SUCCESS; 7121 dev_info_t *client_dip = NULL; 7122 mdi_client_t *ct; 7123 7124 /* 7125 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 7126 * Power up pHCI for the named client device. 7127 * Note: Before the client is enumerated under vhci by phci, 7128 * client_dip can be NULL. Then proceed to power up all the 7129 * pHCIs. 7130 */ 7131 if (devnm != NULL) { 7132 ndi_devi_enter(vdip, &circ); 7133 client_dip = ndi_devi_findchild(vdip, devnm); 7134 } 7135 7136 MDI_DEBUG(4, (MDI_NOTE, vdip, 7137 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip)); 7138 7139 switch (op) { 7140 case MDI_PM_PRE_CONFIG: 7141 ret = i_mdi_pm_pre_config(vdip, client_dip); 7142 break; 7143 7144 case MDI_PM_PRE_UNCONFIG: 7145 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 7146 flags); 7147 break; 7148 7149 case MDI_PM_POST_CONFIG: 7150 i_mdi_pm_post_config(vdip, client_dip); 7151 break; 7152 7153 case MDI_PM_POST_UNCONFIG: 7154 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 7155 break; 7156 7157 case MDI_PM_HOLD_POWER: 7158 case MDI_PM_RELE_POWER: 7159 ASSERT(args); 7160 7161 client_dip = (dev_info_t *)args; 7162 ASSERT(MDI_CLIENT(client_dip)); 7163 7164 ct = i_devi_get_client(client_dip); 7165 MDI_CLIENT_LOCK(ct); 7166 7167 if (op == MDI_PM_HOLD_POWER) { 7168 if (ct->ct_power_cnt == 0) { 7169 (void) i_mdi_power_all_phci(ct); 7170 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7171 "i_mdi_pm_hold_client\n")); 7172 i_mdi_pm_hold_client(ct, ct->ct_path_count); 7173 } 7174 } else { 7175 if (DEVI_IS_ATTACHING(client_dip)) { 7176 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7177 "i_mdi_pm_rele_client\n")); 7178 i_mdi_pm_rele_client(ct, ct->ct_path_count); 7179 } else { 7180 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7181 "i_mdi_pm_reset_client\n")); 7182 i_mdi_pm_reset_client(ct); 7183 } 7184 } 7185 7186 MDI_CLIENT_UNLOCK(ct); 7187 break; 7188 7189 default: 7190 break; 7191 } 7192 7193 if (devnm) 7194 ndi_devi_exit(vdip, circ); 7195 7196 return (ret); 7197 } 7198 7199 int 7200 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 7201 { 7202 mdi_vhci_t *vhci; 7203 7204 if (!MDI_VHCI(dip)) 7205 return (MDI_FAILURE); 7206 7207 if (mdi_class) { 7208 vhci = DEVI(dip)->devi_mdi_xhci; 7209 ASSERT(vhci); 7210 *mdi_class = vhci->vh_class; 7211 } 7212 7213 return (MDI_SUCCESS); 7214 } 7215 7216 int 7217 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 7218 { 7219 mdi_phci_t *phci; 7220 7221 if (!MDI_PHCI(dip)) 7222 return (MDI_FAILURE); 7223 7224 if (mdi_class) { 7225 phci = DEVI(dip)->devi_mdi_xhci; 7226 ASSERT(phci); 7227 *mdi_class = phci->ph_vhci->vh_class; 7228 } 7229 7230 return (MDI_SUCCESS); 7231 } 7232 7233 int 7234 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 7235 { 7236 mdi_client_t *client; 7237 7238 if (!MDI_CLIENT(dip)) 7239 return (MDI_FAILURE); 7240 7241 if (mdi_class) { 7242 client = DEVI(dip)->devi_mdi_client; 7243 ASSERT(client); 7244 *mdi_class = client->ct_vhci->vh_class; 7245 } 7246 7247 return (MDI_SUCCESS); 7248 } 7249 7250 void * 7251 mdi_client_get_vhci_private(dev_info_t *dip) 7252 { 7253 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7254 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7255 mdi_client_t *ct; 7256 ct = i_devi_get_client(dip); 7257 return (ct->ct_vprivate); 7258 } 7259 return (NULL); 7260 } 7261 7262 void 7263 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7264 { 7265 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7266 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7267 mdi_client_t *ct; 7268 ct = i_devi_get_client(dip); 7269 ct->ct_vprivate = data; 7270 } 7271 } 7272 /* 7273 * mdi_pi_get_vhci_private(): 7274 * Get the vhci private information associated with the 7275 * mdi_pathinfo node 7276 */ 7277 void * 7278 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7279 { 7280 caddr_t vprivate = NULL; 7281 if (pip) { 7282 vprivate = MDI_PI(pip)->pi_vprivate; 7283 } 7284 return (vprivate); 7285 } 7286 7287 /* 7288 * mdi_pi_set_vhci_private(): 7289 * Set the vhci private information in the mdi_pathinfo node 7290 */ 7291 void 7292 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7293 { 7294 if (pip) { 7295 MDI_PI(pip)->pi_vprivate = priv; 7296 } 7297 } 7298 7299 /* 7300 * mdi_phci_get_vhci_private(): 7301 * Get the vhci private information associated with the 7302 * mdi_phci node 7303 */ 7304 void * 7305 mdi_phci_get_vhci_private(dev_info_t *dip) 7306 { 7307 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7308 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7309 mdi_phci_t *ph; 7310 ph = i_devi_get_phci(dip); 7311 return (ph->ph_vprivate); 7312 } 7313 return (NULL); 7314 } 7315 7316 /* 7317 * mdi_phci_set_vhci_private(): 7318 * Set the vhci private information in the mdi_phci node 7319 */ 7320 void 7321 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7322 { 7323 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7324 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7325 mdi_phci_t *ph; 7326 ph = i_devi_get_phci(dip); 7327 ph->ph_vprivate = priv; 7328 } 7329 } 7330 7331 int 7332 mdi_pi_ishidden(mdi_pathinfo_t *pip) 7333 { 7334 return (MDI_PI_FLAGS_IS_HIDDEN(pip)); 7335 } 7336 7337 int 7338 mdi_pi_device_isremoved(mdi_pathinfo_t *pip) 7339 { 7340 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)); 7341 } 7342 7343 /* Return 1 if all client paths are device_removed */ 7344 static int 7345 i_mdi_client_all_devices_removed(mdi_client_t *ct) 7346 { 7347 mdi_pathinfo_t *pip; 7348 int all_devices_removed = 1; 7349 7350 MDI_CLIENT_LOCK(ct); 7351 for (pip = ct->ct_path_head; pip; 7352 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) { 7353 if (!mdi_pi_device_isremoved(pip)) { 7354 all_devices_removed = 0; 7355 break; 7356 } 7357 } 7358 MDI_CLIENT_UNLOCK(ct); 7359 return (all_devices_removed); 7360 } 7361 7362 /* 7363 * When processing path hotunplug, represent device removal. 7364 */ 7365 int 7366 mdi_pi_device_remove(mdi_pathinfo_t *pip) 7367 { 7368 mdi_client_t *ct; 7369 7370 MDI_PI_LOCK(pip); 7371 if (mdi_pi_device_isremoved(pip)) { 7372 MDI_PI_UNLOCK(pip); 7373 return (0); 7374 } 7375 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip); 7376 MDI_PI_FLAGS_SET_HIDDEN(pip); 7377 MDI_PI_UNLOCK(pip); 7378 7379 /* 7380 * If all paths associated with the client are now DEVICE_REMOVED, 7381 * reflect DEVICE_REMOVED in the client. 7382 */ 7383 ct = MDI_PI(pip)->pi_client; 7384 if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct)) 7385 (void) ndi_devi_device_remove(ct->ct_dip); 7386 else 7387 i_ddi_di_cache_invalidate(); 7388 7389 return (1); 7390 } 7391 7392 /* 7393 * When processing hotplug, if a path marked mdi_pi_device_isremoved() 7394 * is now accessible then this interfaces is used to represent device insertion. 7395 */ 7396 int 7397 mdi_pi_device_insert(mdi_pathinfo_t *pip) 7398 { 7399 MDI_PI_LOCK(pip); 7400 if (!mdi_pi_device_isremoved(pip)) { 7401 MDI_PI_UNLOCK(pip); 7402 return (0); 7403 } 7404 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip); 7405 MDI_PI_FLAGS_CLR_HIDDEN(pip); 7406 MDI_PI_UNLOCK(pip); 7407 7408 i_ddi_di_cache_invalidate(); 7409 7410 return (1); 7411 } 7412 7413 /* 7414 * List of vhci class names: 7415 * A vhci class name must be in this list only if the corresponding vhci 7416 * driver intends to use the mdi provided bus config implementation 7417 * (i.e., mdi_vhci_bus_config()). 7418 */ 7419 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7420 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7421 7422 /* 7423 * During boot time, the on-disk vhci cache for every vhci class is read 7424 * in the form of an nvlist and stored here. 7425 */ 7426 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7427 7428 /* nvpair names in vhci cache nvlist */ 7429 #define MDI_VHCI_CACHE_VERSION 1 7430 #define MDI_NVPNAME_VERSION "version" 7431 #define MDI_NVPNAME_PHCIS "phcis" 7432 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7433 7434 /* 7435 * Given vhci class name, return its on-disk vhci cache filename. 7436 * Memory for the returned filename which includes the full path is allocated 7437 * by this function. 7438 */ 7439 static char * 7440 vhclass2vhcache_filename(char *vhclass) 7441 { 7442 char *filename; 7443 int len; 7444 static char *fmt = "/etc/devices/mdi_%s_cache"; 7445 7446 /* 7447 * fmt contains the on-disk vhci cache file name format; 7448 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7449 */ 7450 7451 /* the -1 below is to account for "%s" in the format string */ 7452 len = strlen(fmt) + strlen(vhclass) - 1; 7453 filename = kmem_alloc(len, KM_SLEEP); 7454 (void) snprintf(filename, len, fmt, vhclass); 7455 ASSERT(len == (strlen(filename) + 1)); 7456 return (filename); 7457 } 7458 7459 /* 7460 * initialize the vhci cache related data structures and read the on-disk 7461 * vhci cached data into memory. 7462 */ 7463 static void 7464 setup_vhci_cache(mdi_vhci_t *vh) 7465 { 7466 mdi_vhci_config_t *vhc; 7467 mdi_vhci_cache_t *vhcache; 7468 int i; 7469 nvlist_t *nvl = NULL; 7470 7471 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7472 vh->vh_config = vhc; 7473 vhcache = &vhc->vhc_vhcache; 7474 7475 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7476 7477 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7478 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7479 7480 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7481 7482 /* 7483 * Create string hash; same as mod_hash_create_strhash() except that 7484 * we use NULL key destructor. 7485 */ 7486 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7487 mdi_bus_config_cache_hash_size, 7488 mod_hash_null_keydtor, mod_hash_null_valdtor, 7489 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7490 7491 /* 7492 * The on-disk vhci cache is read during booting prior to the 7493 * lights-out period by mdi_read_devices_files(). 7494 */ 7495 for (i = 0; i < N_VHCI_CLASSES; i++) { 7496 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7497 nvl = vhcache_nvl[i]; 7498 vhcache_nvl[i] = NULL; 7499 break; 7500 } 7501 } 7502 7503 /* 7504 * this is to cover the case of some one manually causing unloading 7505 * (or detaching) and reloading (or attaching) of a vhci driver. 7506 */ 7507 if (nvl == NULL && modrootloaded) 7508 nvl = read_on_disk_vhci_cache(vh->vh_class); 7509 7510 if (nvl != NULL) { 7511 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7512 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7513 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7514 else { 7515 cmn_err(CE_WARN, 7516 "%s: data file corrupted, will recreate", 7517 vhc->vhc_vhcache_filename); 7518 } 7519 rw_exit(&vhcache->vhcache_lock); 7520 nvlist_free(nvl); 7521 } 7522 7523 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7524 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7525 7526 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7527 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7528 } 7529 7530 /* 7531 * free all vhci cache related resources 7532 */ 7533 static int 7534 destroy_vhci_cache(mdi_vhci_t *vh) 7535 { 7536 mdi_vhci_config_t *vhc = vh->vh_config; 7537 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7538 mdi_vhcache_phci_t *cphci, *cphci_next; 7539 mdi_vhcache_client_t *cct, *cct_next; 7540 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7541 7542 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7543 return (MDI_FAILURE); 7544 7545 kmem_free(vhc->vhc_vhcache_filename, 7546 strlen(vhc->vhc_vhcache_filename) + 1); 7547 7548 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7549 7550 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7551 cphci = cphci_next) { 7552 cphci_next = cphci->cphci_next; 7553 free_vhcache_phci(cphci); 7554 } 7555 7556 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7557 cct_next = cct->cct_next; 7558 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7559 cpi_next = cpi->cpi_next; 7560 free_vhcache_pathinfo(cpi); 7561 } 7562 free_vhcache_client(cct); 7563 } 7564 7565 rw_destroy(&vhcache->vhcache_lock); 7566 7567 mutex_destroy(&vhc->vhc_lock); 7568 cv_destroy(&vhc->vhc_cv); 7569 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7570 return (MDI_SUCCESS); 7571 } 7572 7573 /* 7574 * Stop all vhci cache related async threads and free their resources. 7575 */ 7576 static int 7577 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7578 { 7579 mdi_async_client_config_t *acc, *acc_next; 7580 7581 mutex_enter(&vhc->vhc_lock); 7582 vhc->vhc_flags |= MDI_VHC_EXIT; 7583 ASSERT(vhc->vhc_acc_thrcount >= 0); 7584 cv_broadcast(&vhc->vhc_cv); 7585 7586 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7587 vhc->vhc_acc_thrcount != 0) { 7588 mutex_exit(&vhc->vhc_lock); 7589 delay_random(mdi_delay); 7590 mutex_enter(&vhc->vhc_lock); 7591 } 7592 7593 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7594 7595 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7596 acc_next = acc->acc_next; 7597 free_async_client_config(acc); 7598 } 7599 vhc->vhc_acc_list_head = NULL; 7600 vhc->vhc_acc_list_tail = NULL; 7601 vhc->vhc_acc_count = 0; 7602 7603 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7604 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7605 mutex_exit(&vhc->vhc_lock); 7606 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7607 vhcache_dirty(vhc); 7608 return (MDI_FAILURE); 7609 } 7610 } else 7611 mutex_exit(&vhc->vhc_lock); 7612 7613 if (callb_delete(vhc->vhc_cbid) != 0) 7614 return (MDI_FAILURE); 7615 7616 return (MDI_SUCCESS); 7617 } 7618 7619 /* 7620 * Stop vhci cache flush thread 7621 */ 7622 /* ARGSUSED */ 7623 static boolean_t 7624 stop_vhcache_flush_thread(void *arg, int code) 7625 { 7626 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7627 7628 mutex_enter(&vhc->vhc_lock); 7629 vhc->vhc_flags |= MDI_VHC_EXIT; 7630 cv_broadcast(&vhc->vhc_cv); 7631 7632 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7633 mutex_exit(&vhc->vhc_lock); 7634 delay_random(mdi_delay); 7635 mutex_enter(&vhc->vhc_lock); 7636 } 7637 7638 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7639 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7640 mutex_exit(&vhc->vhc_lock); 7641 (void) flush_vhcache(vhc, 1); 7642 } else 7643 mutex_exit(&vhc->vhc_lock); 7644 7645 return (B_TRUE); 7646 } 7647 7648 /* 7649 * Enqueue the vhcache phci (cphci) at the tail of the list 7650 */ 7651 static void 7652 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7653 { 7654 cphci->cphci_next = NULL; 7655 if (vhcache->vhcache_phci_head == NULL) 7656 vhcache->vhcache_phci_head = cphci; 7657 else 7658 vhcache->vhcache_phci_tail->cphci_next = cphci; 7659 vhcache->vhcache_phci_tail = cphci; 7660 } 7661 7662 /* 7663 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7664 */ 7665 static void 7666 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7667 mdi_vhcache_pathinfo_t *cpi) 7668 { 7669 cpi->cpi_next = NULL; 7670 if (cct->cct_cpi_head == NULL) 7671 cct->cct_cpi_head = cpi; 7672 else 7673 cct->cct_cpi_tail->cpi_next = cpi; 7674 cct->cct_cpi_tail = cpi; 7675 } 7676 7677 /* 7678 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7679 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7680 * flag set come at the beginning of the list. All cpis which have this 7681 * flag set come at the end of the list. 7682 */ 7683 static void 7684 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7685 mdi_vhcache_pathinfo_t *newcpi) 7686 { 7687 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7688 7689 if (cct->cct_cpi_head == NULL || 7690 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7691 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7692 else { 7693 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7694 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7695 prev_cpi = cpi, cpi = cpi->cpi_next) 7696 ; 7697 7698 if (prev_cpi == NULL) 7699 cct->cct_cpi_head = newcpi; 7700 else 7701 prev_cpi->cpi_next = newcpi; 7702 7703 newcpi->cpi_next = cpi; 7704 7705 if (cpi == NULL) 7706 cct->cct_cpi_tail = newcpi; 7707 } 7708 } 7709 7710 /* 7711 * Enqueue the vhcache client (cct) at the tail of the list 7712 */ 7713 static void 7714 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7715 mdi_vhcache_client_t *cct) 7716 { 7717 cct->cct_next = NULL; 7718 if (vhcache->vhcache_client_head == NULL) 7719 vhcache->vhcache_client_head = cct; 7720 else 7721 vhcache->vhcache_client_tail->cct_next = cct; 7722 vhcache->vhcache_client_tail = cct; 7723 } 7724 7725 static void 7726 free_string_array(char **str, int nelem) 7727 { 7728 int i; 7729 7730 if (str) { 7731 for (i = 0; i < nelem; i++) { 7732 if (str[i]) 7733 kmem_free(str[i], strlen(str[i]) + 1); 7734 } 7735 kmem_free(str, sizeof (char *) * nelem); 7736 } 7737 } 7738 7739 static void 7740 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7741 { 7742 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7743 kmem_free(cphci, sizeof (*cphci)); 7744 } 7745 7746 static void 7747 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7748 { 7749 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7750 kmem_free(cpi, sizeof (*cpi)); 7751 } 7752 7753 static void 7754 free_vhcache_client(mdi_vhcache_client_t *cct) 7755 { 7756 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7757 kmem_free(cct, sizeof (*cct)); 7758 } 7759 7760 static char * 7761 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7762 { 7763 char *name_addr; 7764 int len; 7765 7766 len = strlen(ct_name) + strlen(ct_addr) + 2; 7767 name_addr = kmem_alloc(len, KM_SLEEP); 7768 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7769 7770 if (ret_len) 7771 *ret_len = len; 7772 return (name_addr); 7773 } 7774 7775 /* 7776 * Copy the contents of paddrnvl to vhci cache. 7777 * paddrnvl nvlist contains path information for a vhci client. 7778 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7779 */ 7780 static void 7781 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7782 mdi_vhcache_client_t *cct) 7783 { 7784 nvpair_t *nvp = NULL; 7785 mdi_vhcache_pathinfo_t *cpi; 7786 uint_t nelem; 7787 uint32_t *val; 7788 7789 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7790 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7791 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7792 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7793 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7794 ASSERT(nelem == 2); 7795 cpi->cpi_cphci = cphci_list[val[0]]; 7796 cpi->cpi_flags = val[1]; 7797 enqueue_tail_vhcache_pathinfo(cct, cpi); 7798 } 7799 } 7800 7801 /* 7802 * Copy the contents of caddrmapnvl to vhci cache. 7803 * caddrmapnvl nvlist contains vhci client address to phci client address 7804 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7805 * this nvlist. 7806 */ 7807 static void 7808 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7809 mdi_vhcache_phci_t *cphci_list[]) 7810 { 7811 nvpair_t *nvp = NULL; 7812 nvlist_t *paddrnvl; 7813 mdi_vhcache_client_t *cct; 7814 7815 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7816 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7817 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7818 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7819 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7820 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7821 /* the client must contain at least one path */ 7822 ASSERT(cct->cct_cpi_head != NULL); 7823 7824 enqueue_vhcache_client(vhcache, cct); 7825 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7826 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7827 } 7828 } 7829 7830 /* 7831 * Copy the contents of the main nvlist to vhci cache. 7832 * 7833 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7834 * The nvlist contains the mappings between the vhci client addresses and 7835 * their corresponding phci client addresses. 7836 * 7837 * The structure of the nvlist is as follows: 7838 * 7839 * Main nvlist: 7840 * NAME TYPE DATA 7841 * version int32 version number 7842 * phcis string array array of phci paths 7843 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7844 * 7845 * structure of c2paddrs_nvl: 7846 * NAME TYPE DATA 7847 * caddr1 nvlist_t paddrs_nvl1 7848 * caddr2 nvlist_t paddrs_nvl2 7849 * ... 7850 * where caddr1, caddr2, ... are vhci client name and addresses in the 7851 * form of "<clientname>@<clientaddress>". 7852 * (for example: "ssd@2000002037cd9f72"); 7853 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7854 * 7855 * structure of paddrs_nvl: 7856 * NAME TYPE DATA 7857 * pi_addr1 uint32_array (phci-id, cpi_flags) 7858 * pi_addr2 uint32_array (phci-id, cpi_flags) 7859 * ... 7860 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7861 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7862 * phci-ids are integers that identify pHCIs to which the 7863 * the bus specific address belongs to. These integers are used as an index 7864 * into to the phcis string array in the main nvlist to get the pHCI path. 7865 */ 7866 static int 7867 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7868 { 7869 char **phcis, **phci_namep; 7870 uint_t nphcis; 7871 mdi_vhcache_phci_t *cphci, **cphci_list; 7872 nvlist_t *caddrmapnvl; 7873 int32_t ver; 7874 int i; 7875 size_t cphci_list_size; 7876 7877 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7878 7879 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7880 ver != MDI_VHCI_CACHE_VERSION) 7881 return (MDI_FAILURE); 7882 7883 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7884 &nphcis) != 0) 7885 return (MDI_SUCCESS); 7886 7887 ASSERT(nphcis > 0); 7888 7889 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7890 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7891 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7892 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7893 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7894 enqueue_vhcache_phci(vhcache, cphci); 7895 cphci_list[i] = cphci; 7896 } 7897 7898 ASSERT(vhcache->vhcache_phci_head != NULL); 7899 7900 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7901 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7902 7903 kmem_free(cphci_list, cphci_list_size); 7904 return (MDI_SUCCESS); 7905 } 7906 7907 /* 7908 * Build paddrnvl for the specified client using the information in the 7909 * vhci cache and add it to the caddrmapnnvl. 7910 * Returns 0 on success, errno on failure. 7911 */ 7912 static int 7913 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7914 nvlist_t *caddrmapnvl) 7915 { 7916 mdi_vhcache_pathinfo_t *cpi; 7917 nvlist_t *nvl; 7918 int err; 7919 uint32_t val[2]; 7920 7921 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7922 7923 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7924 return (err); 7925 7926 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7927 val[0] = cpi->cpi_cphci->cphci_id; 7928 val[1] = cpi->cpi_flags; 7929 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7930 != 0) 7931 goto out; 7932 } 7933 7934 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7935 out: 7936 nvlist_free(nvl); 7937 return (err); 7938 } 7939 7940 /* 7941 * Build caddrmapnvl using the information in the vhci cache 7942 * and add it to the mainnvl. 7943 * Returns 0 on success, errno on failure. 7944 */ 7945 static int 7946 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7947 { 7948 mdi_vhcache_client_t *cct; 7949 nvlist_t *nvl; 7950 int err; 7951 7952 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7953 7954 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7955 return (err); 7956 7957 for (cct = vhcache->vhcache_client_head; cct != NULL; 7958 cct = cct->cct_next) { 7959 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7960 goto out; 7961 } 7962 7963 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7964 out: 7965 nvlist_free(nvl); 7966 return (err); 7967 } 7968 7969 /* 7970 * Build nvlist using the information in the vhci cache. 7971 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7972 * Returns nvl on success, NULL on failure. 7973 */ 7974 static nvlist_t * 7975 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7976 { 7977 mdi_vhcache_phci_t *cphci; 7978 uint_t phci_count; 7979 char **phcis; 7980 nvlist_t *nvl; 7981 int err, i; 7982 7983 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7984 nvl = NULL; 7985 goto out; 7986 } 7987 7988 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 7989 MDI_VHCI_CACHE_VERSION)) != 0) 7990 goto out; 7991 7992 rw_enter(&vhcache->vhcache_lock, RW_READER); 7993 if (vhcache->vhcache_phci_head == NULL) { 7994 rw_exit(&vhcache->vhcache_lock); 7995 return (nvl); 7996 } 7997 7998 phci_count = 0; 7999 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8000 cphci = cphci->cphci_next) 8001 cphci->cphci_id = phci_count++; 8002 8003 /* build phci pathname list */ 8004 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 8005 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 8006 cphci = cphci->cphci_next, i++) 8007 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 8008 8009 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 8010 phci_count); 8011 free_string_array(phcis, phci_count); 8012 8013 if (err == 0 && 8014 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 8015 rw_exit(&vhcache->vhcache_lock); 8016 return (nvl); 8017 } 8018 8019 rw_exit(&vhcache->vhcache_lock); 8020 out: 8021 nvlist_free(nvl); 8022 return (NULL); 8023 } 8024 8025 /* 8026 * Lookup vhcache phci structure for the specified phci path. 8027 */ 8028 static mdi_vhcache_phci_t * 8029 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 8030 { 8031 mdi_vhcache_phci_t *cphci; 8032 8033 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8034 8035 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8036 cphci = cphci->cphci_next) { 8037 if (strcmp(cphci->cphci_path, phci_path) == 0) 8038 return (cphci); 8039 } 8040 8041 return (NULL); 8042 } 8043 8044 /* 8045 * Lookup vhcache phci structure for the specified phci. 8046 */ 8047 static mdi_vhcache_phci_t * 8048 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 8049 { 8050 mdi_vhcache_phci_t *cphci; 8051 8052 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8053 8054 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8055 cphci = cphci->cphci_next) { 8056 if (cphci->cphci_phci == ph) 8057 return (cphci); 8058 } 8059 8060 return (NULL); 8061 } 8062 8063 /* 8064 * Add the specified phci to the vhci cache if not already present. 8065 */ 8066 static void 8067 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8068 { 8069 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8070 mdi_vhcache_phci_t *cphci; 8071 char *pathname; 8072 int cache_updated; 8073 8074 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8075 8076 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 8077 (void) ddi_pathname(ph->ph_dip, pathname); 8078 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 8079 != NULL) { 8080 cphci->cphci_phci = ph; 8081 cache_updated = 0; 8082 } else { 8083 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 8084 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 8085 cphci->cphci_phci = ph; 8086 enqueue_vhcache_phci(vhcache, cphci); 8087 cache_updated = 1; 8088 } 8089 8090 rw_exit(&vhcache->vhcache_lock); 8091 8092 /* 8093 * Since a new phci has been added, reset 8094 * vhc_path_discovery_cutoff_time to allow for discovery of paths 8095 * during next vhcache_discover_paths(). 8096 */ 8097 mutex_enter(&vhc->vhc_lock); 8098 vhc->vhc_path_discovery_cutoff_time = 0; 8099 mutex_exit(&vhc->vhc_lock); 8100 8101 kmem_free(pathname, MAXPATHLEN); 8102 if (cache_updated) 8103 vhcache_dirty(vhc); 8104 } 8105 8106 /* 8107 * Remove the reference to the specified phci from the vhci cache. 8108 */ 8109 static void 8110 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8111 { 8112 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8113 mdi_vhcache_phci_t *cphci; 8114 8115 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8116 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 8117 /* do not remove the actual mdi_vhcache_phci structure */ 8118 cphci->cphci_phci = NULL; 8119 } 8120 rw_exit(&vhcache->vhcache_lock); 8121 } 8122 8123 static void 8124 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 8125 mdi_vhcache_lookup_token_t *src) 8126 { 8127 if (src == NULL) { 8128 dst->lt_cct = NULL; 8129 dst->lt_cct_lookup_time = 0; 8130 } else { 8131 dst->lt_cct = src->lt_cct; 8132 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 8133 } 8134 } 8135 8136 /* 8137 * Look up vhcache client for the specified client. 8138 */ 8139 static mdi_vhcache_client_t * 8140 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 8141 mdi_vhcache_lookup_token_t *token) 8142 { 8143 mod_hash_val_t hv; 8144 char *name_addr; 8145 int len; 8146 8147 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8148 8149 /* 8150 * If no vhcache clean occurred since the last lookup, we can 8151 * simply return the cct from the last lookup operation. 8152 * It works because ccts are never freed except during the vhcache 8153 * cleanup operation. 8154 */ 8155 if (token != NULL && 8156 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 8157 return (token->lt_cct); 8158 8159 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 8160 if (mod_hash_find(vhcache->vhcache_client_hash, 8161 (mod_hash_key_t)name_addr, &hv) == 0) { 8162 if (token) { 8163 token->lt_cct = (mdi_vhcache_client_t *)hv; 8164 token->lt_cct_lookup_time = ddi_get_lbolt64(); 8165 } 8166 } else { 8167 if (token) { 8168 token->lt_cct = NULL; 8169 token->lt_cct_lookup_time = 0; 8170 } 8171 hv = NULL; 8172 } 8173 kmem_free(name_addr, len); 8174 return ((mdi_vhcache_client_t *)hv); 8175 } 8176 8177 /* 8178 * Add the specified path to the vhci cache if not already present. 8179 * Also add the vhcache client for the client corresponding to this path 8180 * if it doesn't already exist. 8181 */ 8182 static void 8183 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8184 { 8185 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8186 mdi_vhcache_client_t *cct; 8187 mdi_vhcache_pathinfo_t *cpi; 8188 mdi_phci_t *ph = pip->pi_phci; 8189 mdi_client_t *ct = pip->pi_client; 8190 int cache_updated = 0; 8191 8192 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8193 8194 /* if vhcache client for this pip doesn't already exist, add it */ 8195 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8196 NULL)) == NULL) { 8197 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 8198 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 8199 ct->ct_guid, NULL); 8200 enqueue_vhcache_client(vhcache, cct); 8201 (void) mod_hash_insert(vhcache->vhcache_client_hash, 8202 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 8203 cache_updated = 1; 8204 } 8205 8206 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8207 if (cpi->cpi_cphci->cphci_phci == ph && 8208 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 8209 cpi->cpi_pip = pip; 8210 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 8211 cpi->cpi_flags &= 8212 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8213 sort_vhcache_paths(cct); 8214 cache_updated = 1; 8215 } 8216 break; 8217 } 8218 } 8219 8220 if (cpi == NULL) { 8221 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 8222 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 8223 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 8224 ASSERT(cpi->cpi_cphci != NULL); 8225 cpi->cpi_pip = pip; 8226 enqueue_vhcache_pathinfo(cct, cpi); 8227 cache_updated = 1; 8228 } 8229 8230 rw_exit(&vhcache->vhcache_lock); 8231 8232 if (cache_updated) 8233 vhcache_dirty(vhc); 8234 } 8235 8236 /* 8237 * Remove the reference to the specified path from the vhci cache. 8238 */ 8239 static void 8240 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8241 { 8242 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8243 mdi_client_t *ct = pip->pi_client; 8244 mdi_vhcache_client_t *cct; 8245 mdi_vhcache_pathinfo_t *cpi; 8246 8247 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8248 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8249 NULL)) != NULL) { 8250 for (cpi = cct->cct_cpi_head; cpi != NULL; 8251 cpi = cpi->cpi_next) { 8252 if (cpi->cpi_pip == pip) { 8253 cpi->cpi_pip = NULL; 8254 break; 8255 } 8256 } 8257 } 8258 rw_exit(&vhcache->vhcache_lock); 8259 } 8260 8261 /* 8262 * Flush the vhci cache to disk. 8263 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 8264 */ 8265 static int 8266 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 8267 { 8268 nvlist_t *nvl; 8269 int err; 8270 int rv; 8271 8272 /* 8273 * It is possible that the system may shutdown before 8274 * i_ddi_io_initialized (during stmsboot for example). To allow for 8275 * flushing the cache in this case do not check for 8276 * i_ddi_io_initialized when force flag is set. 8277 */ 8278 if (force_flag == 0 && !i_ddi_io_initialized()) 8279 return (MDI_FAILURE); 8280 8281 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 8282 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 8283 nvlist_free(nvl); 8284 } else 8285 err = EFAULT; 8286 8287 rv = MDI_SUCCESS; 8288 mutex_enter(&vhc->vhc_lock); 8289 if (err != 0) { 8290 if (err == EROFS) { 8291 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 8292 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 8293 MDI_VHC_VHCACHE_DIRTY); 8294 } else { 8295 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 8296 cmn_err(CE_CONT, "%s: update failed\n", 8297 vhc->vhc_vhcache_filename); 8298 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 8299 } 8300 rv = MDI_FAILURE; 8301 } 8302 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 8303 cmn_err(CE_CONT, 8304 "%s: update now ok\n", vhc->vhc_vhcache_filename); 8305 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 8306 } 8307 mutex_exit(&vhc->vhc_lock); 8308 8309 return (rv); 8310 } 8311 8312 /* 8313 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 8314 * Exits itself if left idle for the idle timeout period. 8315 */ 8316 static void 8317 vhcache_flush_thread(void *arg) 8318 { 8319 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8320 clock_t idle_time, quit_at_ticks; 8321 callb_cpr_t cprinfo; 8322 8323 /* number of seconds to sleep idle before exiting */ 8324 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 8325 8326 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8327 "mdi_vhcache_flush"); 8328 mutex_enter(&vhc->vhc_lock); 8329 for (; ; ) { 8330 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8331 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8332 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8333 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8334 (void) cv_timedwait(&vhc->vhc_cv, 8335 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8336 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8337 } else { 8338 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8339 mutex_exit(&vhc->vhc_lock); 8340 8341 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8342 vhcache_dirty(vhc); 8343 8344 mutex_enter(&vhc->vhc_lock); 8345 } 8346 } 8347 8348 quit_at_ticks = ddi_get_lbolt() + idle_time; 8349 8350 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8351 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8352 ddi_get_lbolt() < quit_at_ticks) { 8353 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8354 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8355 quit_at_ticks); 8356 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8357 } 8358 8359 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8360 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8361 goto out; 8362 } 8363 8364 out: 8365 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8366 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8367 CALLB_CPR_EXIT(&cprinfo); 8368 } 8369 8370 /* 8371 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8372 */ 8373 static void 8374 vhcache_dirty(mdi_vhci_config_t *vhc) 8375 { 8376 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8377 int create_thread; 8378 8379 rw_enter(&vhcache->vhcache_lock, RW_READER); 8380 /* do not flush cache until the cache is fully built */ 8381 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8382 rw_exit(&vhcache->vhcache_lock); 8383 return; 8384 } 8385 rw_exit(&vhcache->vhcache_lock); 8386 8387 mutex_enter(&vhc->vhc_lock); 8388 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8389 mutex_exit(&vhc->vhc_lock); 8390 return; 8391 } 8392 8393 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8394 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8395 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8396 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8397 cv_broadcast(&vhc->vhc_cv); 8398 create_thread = 0; 8399 } else { 8400 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8401 create_thread = 1; 8402 } 8403 mutex_exit(&vhc->vhc_lock); 8404 8405 if (create_thread) 8406 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8407 0, &p0, TS_RUN, minclsyspri); 8408 } 8409 8410 /* 8411 * phci bus config structure - one for for each phci bus config operation that 8412 * we initiate on behalf of a vhci. 8413 */ 8414 typedef struct mdi_phci_bus_config_s { 8415 char *phbc_phci_path; 8416 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8417 struct mdi_phci_bus_config_s *phbc_next; 8418 } mdi_phci_bus_config_t; 8419 8420 /* vhci bus config structure - one for each vhci bus config operation */ 8421 typedef struct mdi_vhci_bus_config_s { 8422 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8423 major_t vhbc_op_major; /* bus config op major */ 8424 uint_t vhbc_op_flags; /* bus config op flags */ 8425 kmutex_t vhbc_lock; 8426 kcondvar_t vhbc_cv; 8427 int vhbc_thr_count; 8428 } mdi_vhci_bus_config_t; 8429 8430 /* 8431 * bus config the specified phci 8432 */ 8433 static void 8434 bus_config_phci(void *arg) 8435 { 8436 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8437 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8438 dev_info_t *ph_dip; 8439 8440 /* 8441 * first configure all path components upto phci and then configure 8442 * the phci children. 8443 */ 8444 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8445 != NULL) { 8446 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8447 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8448 (void) ndi_devi_config_driver(ph_dip, 8449 vhbc->vhbc_op_flags, 8450 vhbc->vhbc_op_major); 8451 } else 8452 (void) ndi_devi_config(ph_dip, 8453 vhbc->vhbc_op_flags); 8454 8455 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8456 ndi_rele_devi(ph_dip); 8457 } 8458 8459 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8460 kmem_free(phbc, sizeof (*phbc)); 8461 8462 mutex_enter(&vhbc->vhbc_lock); 8463 vhbc->vhbc_thr_count--; 8464 if (vhbc->vhbc_thr_count == 0) 8465 cv_broadcast(&vhbc->vhbc_cv); 8466 mutex_exit(&vhbc->vhbc_lock); 8467 } 8468 8469 /* 8470 * Bus config all phcis associated with the vhci in parallel. 8471 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8472 */ 8473 static void 8474 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8475 ddi_bus_config_op_t op, major_t maj) 8476 { 8477 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8478 mdi_vhci_bus_config_t *vhbc; 8479 mdi_vhcache_phci_t *cphci; 8480 8481 rw_enter(&vhcache->vhcache_lock, RW_READER); 8482 if (vhcache->vhcache_phci_head == NULL) { 8483 rw_exit(&vhcache->vhcache_lock); 8484 return; 8485 } 8486 8487 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8488 8489 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8490 cphci = cphci->cphci_next) { 8491 /* skip phcis that haven't attached before root is available */ 8492 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8493 continue; 8494 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8495 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8496 KM_SLEEP); 8497 phbc->phbc_vhbusconfig = vhbc; 8498 phbc->phbc_next = phbc_head; 8499 phbc_head = phbc; 8500 vhbc->vhbc_thr_count++; 8501 } 8502 rw_exit(&vhcache->vhcache_lock); 8503 8504 vhbc->vhbc_op = op; 8505 vhbc->vhbc_op_major = maj; 8506 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8507 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8508 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8509 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8510 8511 /* now create threads to initiate bus config on all phcis in parallel */ 8512 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8513 phbc_next = phbc->phbc_next; 8514 if (mdi_mtc_off) 8515 bus_config_phci((void *)phbc); 8516 else 8517 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8518 0, &p0, TS_RUN, minclsyspri); 8519 } 8520 8521 mutex_enter(&vhbc->vhbc_lock); 8522 /* wait until all threads exit */ 8523 while (vhbc->vhbc_thr_count > 0) 8524 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8525 mutex_exit(&vhbc->vhbc_lock); 8526 8527 mutex_destroy(&vhbc->vhbc_lock); 8528 cv_destroy(&vhbc->vhbc_cv); 8529 kmem_free(vhbc, sizeof (*vhbc)); 8530 } 8531 8532 /* 8533 * Single threaded version of bus_config_all_phcis() 8534 */ 8535 static void 8536 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8537 ddi_bus_config_op_t op, major_t maj) 8538 { 8539 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8540 8541 single_threaded_vhconfig_enter(vhc); 8542 bus_config_all_phcis(vhcache, flags, op, maj); 8543 single_threaded_vhconfig_exit(vhc); 8544 } 8545 8546 /* 8547 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8548 * The path includes the child component in addition to the phci path. 8549 */ 8550 static int 8551 bus_config_one_phci_child(char *path) 8552 { 8553 dev_info_t *ph_dip, *child; 8554 char *devnm; 8555 int rv = MDI_FAILURE; 8556 8557 /* extract the child component of the phci */ 8558 devnm = strrchr(path, '/'); 8559 *devnm++ = '\0'; 8560 8561 /* 8562 * first configure all path components upto phci and then 8563 * configure the phci child. 8564 */ 8565 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8566 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8567 NDI_SUCCESS) { 8568 /* 8569 * release the hold that ndi_devi_config_one() placed 8570 */ 8571 ndi_rele_devi(child); 8572 rv = MDI_SUCCESS; 8573 } 8574 8575 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8576 ndi_rele_devi(ph_dip); 8577 } 8578 8579 devnm--; 8580 *devnm = '/'; 8581 return (rv); 8582 } 8583 8584 /* 8585 * Build a list of phci client paths for the specified vhci client. 8586 * The list includes only those phci client paths which aren't configured yet. 8587 */ 8588 static mdi_phys_path_t * 8589 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8590 { 8591 mdi_vhcache_pathinfo_t *cpi; 8592 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8593 int config_path, len; 8594 8595 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8596 /* 8597 * include only those paths that aren't configured. 8598 */ 8599 config_path = 0; 8600 if (cpi->cpi_pip == NULL) 8601 config_path = 1; 8602 else { 8603 MDI_PI_LOCK(cpi->cpi_pip); 8604 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8605 config_path = 1; 8606 MDI_PI_UNLOCK(cpi->cpi_pip); 8607 } 8608 8609 if (config_path) { 8610 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8611 len = strlen(cpi->cpi_cphci->cphci_path) + 8612 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8613 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8614 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8615 cpi->cpi_cphci->cphci_path, ct_name, 8616 cpi->cpi_addr); 8617 pp->phys_path_next = NULL; 8618 8619 if (pp_head == NULL) 8620 pp_head = pp; 8621 else 8622 pp_tail->phys_path_next = pp; 8623 pp_tail = pp; 8624 } 8625 } 8626 8627 return (pp_head); 8628 } 8629 8630 /* 8631 * Free the memory allocated for phci client path list. 8632 */ 8633 static void 8634 free_phclient_path_list(mdi_phys_path_t *pp_head) 8635 { 8636 mdi_phys_path_t *pp, *pp_next; 8637 8638 for (pp = pp_head; pp != NULL; pp = pp_next) { 8639 pp_next = pp->phys_path_next; 8640 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8641 kmem_free(pp, sizeof (*pp)); 8642 } 8643 } 8644 8645 /* 8646 * Allocated async client structure and initialize with the specified values. 8647 */ 8648 static mdi_async_client_config_t * 8649 alloc_async_client_config(char *ct_name, char *ct_addr, 8650 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8651 { 8652 mdi_async_client_config_t *acc; 8653 8654 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8655 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8656 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8657 acc->acc_phclient_path_list_head = pp_head; 8658 init_vhcache_lookup_token(&acc->acc_token, tok); 8659 acc->acc_next = NULL; 8660 return (acc); 8661 } 8662 8663 /* 8664 * Free the memory allocated for the async client structure and their members. 8665 */ 8666 static void 8667 free_async_client_config(mdi_async_client_config_t *acc) 8668 { 8669 if (acc->acc_phclient_path_list_head) 8670 free_phclient_path_list(acc->acc_phclient_path_list_head); 8671 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8672 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8673 kmem_free(acc, sizeof (*acc)); 8674 } 8675 8676 /* 8677 * Sort vhcache pathinfos (cpis) of the specified client. 8678 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8679 * flag set come at the beginning of the list. All cpis which have this 8680 * flag set come at the end of the list. 8681 */ 8682 static void 8683 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8684 { 8685 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8686 8687 cpi_head = cct->cct_cpi_head; 8688 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8689 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8690 cpi_next = cpi->cpi_next; 8691 enqueue_vhcache_pathinfo(cct, cpi); 8692 } 8693 } 8694 8695 /* 8696 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8697 * every vhcache pathinfo of the specified client. If not adjust the flag 8698 * setting appropriately. 8699 * 8700 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8701 * on-disk vhci cache. So every time this flag is updated the cache must be 8702 * flushed. 8703 */ 8704 static void 8705 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8706 mdi_vhcache_lookup_token_t *tok) 8707 { 8708 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8709 mdi_vhcache_client_t *cct; 8710 mdi_vhcache_pathinfo_t *cpi; 8711 8712 rw_enter(&vhcache->vhcache_lock, RW_READER); 8713 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8714 == NULL) { 8715 rw_exit(&vhcache->vhcache_lock); 8716 return; 8717 } 8718 8719 /* 8720 * to avoid unnecessary on-disk cache updates, first check if an 8721 * update is really needed. If no update is needed simply return. 8722 */ 8723 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8724 if ((cpi->cpi_pip != NULL && 8725 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8726 (cpi->cpi_pip == NULL && 8727 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8728 break; 8729 } 8730 } 8731 if (cpi == NULL) { 8732 rw_exit(&vhcache->vhcache_lock); 8733 return; 8734 } 8735 8736 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8737 rw_exit(&vhcache->vhcache_lock); 8738 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8739 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8740 tok)) == NULL) { 8741 rw_exit(&vhcache->vhcache_lock); 8742 return; 8743 } 8744 } 8745 8746 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8747 if (cpi->cpi_pip != NULL) 8748 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8749 else 8750 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8751 } 8752 sort_vhcache_paths(cct); 8753 8754 rw_exit(&vhcache->vhcache_lock); 8755 vhcache_dirty(vhc); 8756 } 8757 8758 /* 8759 * Configure all specified paths of the client. 8760 */ 8761 static void 8762 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8763 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8764 { 8765 mdi_phys_path_t *pp; 8766 8767 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8768 (void) bus_config_one_phci_child(pp->phys_path); 8769 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8770 } 8771 8772 /* 8773 * Dequeue elements from vhci async client config list and bus configure 8774 * their corresponding phci clients. 8775 */ 8776 static void 8777 config_client_paths_thread(void *arg) 8778 { 8779 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8780 mdi_async_client_config_t *acc; 8781 clock_t quit_at_ticks; 8782 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8783 callb_cpr_t cprinfo; 8784 8785 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8786 "mdi_config_client_paths"); 8787 8788 for (; ; ) { 8789 quit_at_ticks = ddi_get_lbolt() + idle_time; 8790 8791 mutex_enter(&vhc->vhc_lock); 8792 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8793 vhc->vhc_acc_list_head == NULL && 8794 ddi_get_lbolt() < quit_at_ticks) { 8795 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8796 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8797 quit_at_ticks); 8798 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8799 } 8800 8801 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8802 vhc->vhc_acc_list_head == NULL) 8803 goto out; 8804 8805 acc = vhc->vhc_acc_list_head; 8806 vhc->vhc_acc_list_head = acc->acc_next; 8807 if (vhc->vhc_acc_list_head == NULL) 8808 vhc->vhc_acc_list_tail = NULL; 8809 vhc->vhc_acc_count--; 8810 mutex_exit(&vhc->vhc_lock); 8811 8812 config_client_paths_sync(vhc, acc->acc_ct_name, 8813 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8814 &acc->acc_token); 8815 8816 free_async_client_config(acc); 8817 } 8818 8819 out: 8820 vhc->vhc_acc_thrcount--; 8821 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8822 CALLB_CPR_EXIT(&cprinfo); 8823 } 8824 8825 /* 8826 * Arrange for all the phci client paths (pp_head) for the specified client 8827 * to be bus configured asynchronously by a thread. 8828 */ 8829 static void 8830 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8831 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8832 { 8833 mdi_async_client_config_t *acc, *newacc; 8834 int create_thread; 8835 8836 if (pp_head == NULL) 8837 return; 8838 8839 if (mdi_mtc_off) { 8840 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8841 free_phclient_path_list(pp_head); 8842 return; 8843 } 8844 8845 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8846 ASSERT(newacc); 8847 8848 mutex_enter(&vhc->vhc_lock); 8849 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8850 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8851 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8852 free_async_client_config(newacc); 8853 mutex_exit(&vhc->vhc_lock); 8854 return; 8855 } 8856 } 8857 8858 if (vhc->vhc_acc_list_head == NULL) 8859 vhc->vhc_acc_list_head = newacc; 8860 else 8861 vhc->vhc_acc_list_tail->acc_next = newacc; 8862 vhc->vhc_acc_list_tail = newacc; 8863 vhc->vhc_acc_count++; 8864 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8865 cv_broadcast(&vhc->vhc_cv); 8866 create_thread = 0; 8867 } else { 8868 vhc->vhc_acc_thrcount++; 8869 create_thread = 1; 8870 } 8871 mutex_exit(&vhc->vhc_lock); 8872 8873 if (create_thread) 8874 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8875 0, &p0, TS_RUN, minclsyspri); 8876 } 8877 8878 /* 8879 * Return number of online paths for the specified client. 8880 */ 8881 static int 8882 nonline_paths(mdi_vhcache_client_t *cct) 8883 { 8884 mdi_vhcache_pathinfo_t *cpi; 8885 int online_count = 0; 8886 8887 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8888 if (cpi->cpi_pip != NULL) { 8889 MDI_PI_LOCK(cpi->cpi_pip); 8890 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8891 online_count++; 8892 MDI_PI_UNLOCK(cpi->cpi_pip); 8893 } 8894 } 8895 8896 return (online_count); 8897 } 8898 8899 /* 8900 * Bus configure all paths for the specified vhci client. 8901 * If at least one path for the client is already online, the remaining paths 8902 * will be configured asynchronously. Otherwise, it synchronously configures 8903 * the paths until at least one path is online and then rest of the paths 8904 * will be configured asynchronously. 8905 */ 8906 static void 8907 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8908 { 8909 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8910 mdi_phys_path_t *pp_head, *pp; 8911 mdi_vhcache_client_t *cct; 8912 mdi_vhcache_lookup_token_t tok; 8913 8914 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8915 8916 init_vhcache_lookup_token(&tok, NULL); 8917 8918 if (ct_name == NULL || ct_addr == NULL || 8919 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8920 == NULL || 8921 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8922 rw_exit(&vhcache->vhcache_lock); 8923 return; 8924 } 8925 8926 /* if at least one path is online, configure the rest asynchronously */ 8927 if (nonline_paths(cct) > 0) { 8928 rw_exit(&vhcache->vhcache_lock); 8929 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8930 return; 8931 } 8932 8933 rw_exit(&vhcache->vhcache_lock); 8934 8935 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8936 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8937 rw_enter(&vhcache->vhcache_lock, RW_READER); 8938 8939 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8940 ct_addr, &tok)) == NULL) { 8941 rw_exit(&vhcache->vhcache_lock); 8942 goto out; 8943 } 8944 8945 if (nonline_paths(cct) > 0 && 8946 pp->phys_path_next != NULL) { 8947 rw_exit(&vhcache->vhcache_lock); 8948 config_client_paths_async(vhc, ct_name, ct_addr, 8949 pp->phys_path_next, &tok); 8950 pp->phys_path_next = NULL; 8951 goto out; 8952 } 8953 8954 rw_exit(&vhcache->vhcache_lock); 8955 } 8956 } 8957 8958 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8959 out: 8960 free_phclient_path_list(pp_head); 8961 } 8962 8963 static void 8964 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8965 { 8966 mutex_enter(&vhc->vhc_lock); 8967 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8968 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8969 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8970 mutex_exit(&vhc->vhc_lock); 8971 } 8972 8973 static void 8974 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8975 { 8976 mutex_enter(&vhc->vhc_lock); 8977 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8978 cv_broadcast(&vhc->vhc_cv); 8979 mutex_exit(&vhc->vhc_lock); 8980 } 8981 8982 typedef struct mdi_phci_driver_info { 8983 char *phdriver_name; /* name of the phci driver */ 8984 8985 /* set to non zero if the phci driver supports root device */ 8986 int phdriver_root_support; 8987 } mdi_phci_driver_info_t; 8988 8989 /* 8990 * vhci class and root support capability of a phci driver can be 8991 * specified using ddi-vhci-class and ddi-no-root-support properties in the 8992 * phci driver.conf file. The built-in tables below contain this information 8993 * for those phci drivers whose driver.conf files don't yet contain this info. 8994 * 8995 * All phci drivers expect iscsi have root device support. 8996 */ 8997 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 8998 { "fp", 1 }, 8999 { "iscsi", 0 }, 9000 { "ibsrp", 1 } 9001 }; 9002 9003 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 9004 9005 static void * 9006 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 9007 { 9008 void *new_ptr; 9009 9010 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 9011 if (old_ptr) { 9012 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 9013 kmem_free(old_ptr, old_size); 9014 } 9015 return (new_ptr); 9016 } 9017 9018 static void 9019 add_to_phci_list(char ***driver_list, int **root_support_list, 9020 int *cur_elements, int *max_elements, char *driver_name, int root_support) 9021 { 9022 ASSERT(*cur_elements <= *max_elements); 9023 if (*cur_elements == *max_elements) { 9024 *max_elements += 10; 9025 *driver_list = mdi_realloc(*driver_list, 9026 sizeof (char *) * (*cur_elements), 9027 sizeof (char *) * (*max_elements)); 9028 *root_support_list = mdi_realloc(*root_support_list, 9029 sizeof (int) * (*cur_elements), 9030 sizeof (int) * (*max_elements)); 9031 } 9032 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 9033 (*root_support_list)[*cur_elements] = root_support; 9034 (*cur_elements)++; 9035 } 9036 9037 static void 9038 get_phci_driver_list(char *vhci_class, char ***driver_list, 9039 int **root_support_list, int *cur_elements, int *max_elements) 9040 { 9041 mdi_phci_driver_info_t *st_driver_list, *p; 9042 int st_ndrivers, root_support, i, j, driver_conf_count; 9043 major_t m; 9044 struct devnames *dnp; 9045 ddi_prop_t *propp; 9046 9047 *driver_list = NULL; 9048 *root_support_list = NULL; 9049 *cur_elements = 0; 9050 *max_elements = 0; 9051 9052 /* add the phci drivers derived from the phci driver.conf files */ 9053 for (m = 0; m < devcnt; m++) { 9054 dnp = &devnamesp[m]; 9055 9056 if (dnp->dn_flags & DN_PHCI_DRIVER) { 9057 LOCK_DEV_OPS(&dnp->dn_lock); 9058 if (dnp->dn_global_prop_ptr != NULL && 9059 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 9060 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 9061 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 9062 strcmp(propp->prop_val, vhci_class) == 0) { 9063 9064 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 9065 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 9066 &dnp->dn_global_prop_ptr->prop_list) 9067 == NULL) ? 1 : 0; 9068 9069 add_to_phci_list(driver_list, root_support_list, 9070 cur_elements, max_elements, dnp->dn_name, 9071 root_support); 9072 9073 UNLOCK_DEV_OPS(&dnp->dn_lock); 9074 } else 9075 UNLOCK_DEV_OPS(&dnp->dn_lock); 9076 } 9077 } 9078 9079 driver_conf_count = *cur_elements; 9080 9081 /* add the phci drivers specified in the built-in tables */ 9082 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 9083 st_driver_list = scsi_phci_driver_list; 9084 st_ndrivers = sizeof (scsi_phci_driver_list) / 9085 sizeof (mdi_phci_driver_info_t); 9086 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 9087 st_driver_list = ib_phci_driver_list; 9088 st_ndrivers = sizeof (ib_phci_driver_list) / 9089 sizeof (mdi_phci_driver_info_t); 9090 } else { 9091 st_driver_list = NULL; 9092 st_ndrivers = 0; 9093 } 9094 9095 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 9096 /* add this phci driver if not already added before */ 9097 for (j = 0; j < driver_conf_count; j++) { 9098 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 9099 break; 9100 } 9101 if (j == driver_conf_count) { 9102 add_to_phci_list(driver_list, root_support_list, 9103 cur_elements, max_elements, p->phdriver_name, 9104 p->phdriver_root_support); 9105 } 9106 } 9107 } 9108 9109 /* 9110 * Attach the phci driver instances associated with the specified vhci class. 9111 * If root is mounted attach all phci driver instances. 9112 * If root is not mounted, attach the instances of only those phci 9113 * drivers that have the root support. 9114 */ 9115 static void 9116 attach_phci_drivers(char *vhci_class) 9117 { 9118 char **driver_list, **p; 9119 int *root_support_list; 9120 int cur_elements, max_elements, i; 9121 major_t m; 9122 9123 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9124 &cur_elements, &max_elements); 9125 9126 for (i = 0; i < cur_elements; i++) { 9127 if (modrootloaded || root_support_list[i]) { 9128 m = ddi_name_to_major(driver_list[i]); 9129 if (m != DDI_MAJOR_T_NONE && 9130 ddi_hold_installed_driver(m)) 9131 ddi_rele_driver(m); 9132 } 9133 } 9134 9135 if (driver_list) { 9136 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 9137 kmem_free(*p, strlen(*p) + 1); 9138 kmem_free(driver_list, sizeof (char *) * max_elements); 9139 kmem_free(root_support_list, sizeof (int) * max_elements); 9140 } 9141 } 9142 9143 /* 9144 * Build vhci cache: 9145 * 9146 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 9147 * the phci driver instances. During this process the cache gets built. 9148 * 9149 * Cache is built fully if the root is mounted. 9150 * If the root is not mounted, phci drivers that do not have root support 9151 * are not attached. As a result the cache is built partially. The entries 9152 * in the cache reflect only those phci drivers that have root support. 9153 */ 9154 static int 9155 build_vhci_cache(mdi_vhci_t *vh) 9156 { 9157 mdi_vhci_config_t *vhc = vh->vh_config; 9158 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9159 9160 single_threaded_vhconfig_enter(vhc); 9161 9162 rw_enter(&vhcache->vhcache_lock, RW_READER); 9163 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 9164 rw_exit(&vhcache->vhcache_lock); 9165 single_threaded_vhconfig_exit(vhc); 9166 return (0); 9167 } 9168 rw_exit(&vhcache->vhcache_lock); 9169 9170 attach_phci_drivers(vh->vh_class); 9171 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 9172 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9173 9174 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9175 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 9176 rw_exit(&vhcache->vhcache_lock); 9177 9178 single_threaded_vhconfig_exit(vhc); 9179 vhcache_dirty(vhc); 9180 return (1); 9181 } 9182 9183 /* 9184 * Determine if discovery of paths is needed. 9185 */ 9186 static int 9187 vhcache_do_discovery(mdi_vhci_config_t *vhc) 9188 { 9189 int rv = 1; 9190 9191 mutex_enter(&vhc->vhc_lock); 9192 if (i_ddi_io_initialized() == 0) { 9193 if (vhc->vhc_path_discovery_boot > 0) { 9194 vhc->vhc_path_discovery_boot--; 9195 goto out; 9196 } 9197 } else { 9198 if (vhc->vhc_path_discovery_postboot > 0) { 9199 vhc->vhc_path_discovery_postboot--; 9200 goto out; 9201 } 9202 } 9203 9204 /* 9205 * Do full path discovery at most once per mdi_path_discovery_interval. 9206 * This is to avoid a series of full path discoveries when opening 9207 * stale /dev/[r]dsk links. 9208 */ 9209 if (mdi_path_discovery_interval != -1 && 9210 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time) 9211 goto out; 9212 9213 rv = 0; 9214 out: 9215 mutex_exit(&vhc->vhc_lock); 9216 return (rv); 9217 } 9218 9219 /* 9220 * Discover all paths: 9221 * 9222 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 9223 * driver instances. During this process all paths will be discovered. 9224 */ 9225 static int 9226 vhcache_discover_paths(mdi_vhci_t *vh) 9227 { 9228 mdi_vhci_config_t *vhc = vh->vh_config; 9229 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9230 int rv = 0; 9231 9232 single_threaded_vhconfig_enter(vhc); 9233 9234 if (vhcache_do_discovery(vhc)) { 9235 attach_phci_drivers(vh->vh_class); 9236 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 9237 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9238 9239 mutex_enter(&vhc->vhc_lock); 9240 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() + 9241 mdi_path_discovery_interval * TICKS_PER_SECOND; 9242 mutex_exit(&vhc->vhc_lock); 9243 rv = 1; 9244 } 9245 9246 single_threaded_vhconfig_exit(vhc); 9247 return (rv); 9248 } 9249 9250 /* 9251 * Generic vhci bus config implementation: 9252 * 9253 * Parameters 9254 * vdip vhci dip 9255 * flags bus config flags 9256 * op bus config operation 9257 * The remaining parameters are bus config operation specific 9258 * 9259 * for BUS_CONFIG_ONE 9260 * arg pointer to name@addr 9261 * child upon successful return from this function, *child will be 9262 * set to the configured and held devinfo child node of vdip. 9263 * ct_addr pointer to client address (i.e. GUID) 9264 * 9265 * for BUS_CONFIG_DRIVER 9266 * arg major number of the driver 9267 * child and ct_addr parameters are ignored 9268 * 9269 * for BUS_CONFIG_ALL 9270 * arg, child, and ct_addr parameters are ignored 9271 * 9272 * Note that for the rest of the bus config operations, this function simply 9273 * calls the framework provided default bus config routine. 9274 */ 9275 int 9276 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 9277 void *arg, dev_info_t **child, char *ct_addr) 9278 { 9279 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9280 mdi_vhci_config_t *vhc = vh->vh_config; 9281 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9282 int rv = 0; 9283 int params_valid = 0; 9284 char *cp; 9285 9286 /* 9287 * To bus config vhcis we relay operation, possibly using another 9288 * thread, to phcis. The phci driver then interacts with MDI to cause 9289 * vhci child nodes to be enumerated under the vhci node. Adding a 9290 * vhci child requires an ndi_devi_enter of the vhci. Since another 9291 * thread may be adding the child, to avoid deadlock we can't wait 9292 * for the relayed operations to complete if we have already entered 9293 * the vhci node. 9294 */ 9295 if (DEVI_BUSY_OWNED(vdip)) { 9296 MDI_DEBUG(2, (MDI_NOTE, vdip, 9297 "vhci dip is busy owned %p", (void *)vdip)); 9298 goto default_bus_config; 9299 } 9300 9301 rw_enter(&vhcache->vhcache_lock, RW_READER); 9302 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 9303 rw_exit(&vhcache->vhcache_lock); 9304 rv = build_vhci_cache(vh); 9305 rw_enter(&vhcache->vhcache_lock, RW_READER); 9306 } 9307 9308 switch (op) { 9309 case BUS_CONFIG_ONE: 9310 if (arg != NULL && ct_addr != NULL) { 9311 /* extract node name */ 9312 cp = (char *)arg; 9313 while (*cp != '\0' && *cp != '@') 9314 cp++; 9315 if (*cp == '@') { 9316 params_valid = 1; 9317 *cp = '\0'; 9318 config_client_paths(vhc, (char *)arg, ct_addr); 9319 /* config_client_paths() releases cache_lock */ 9320 *cp = '@'; 9321 break; 9322 } 9323 } 9324 9325 rw_exit(&vhcache->vhcache_lock); 9326 break; 9327 9328 case BUS_CONFIG_DRIVER: 9329 rw_exit(&vhcache->vhcache_lock); 9330 if (rv == 0) 9331 st_bus_config_all_phcis(vhc, flags, op, 9332 (major_t)(uintptr_t)arg); 9333 break; 9334 9335 case BUS_CONFIG_ALL: 9336 rw_exit(&vhcache->vhcache_lock); 9337 if (rv == 0) 9338 st_bus_config_all_phcis(vhc, flags, op, -1); 9339 break; 9340 9341 default: 9342 rw_exit(&vhcache->vhcache_lock); 9343 break; 9344 } 9345 9346 9347 default_bus_config: 9348 /* 9349 * All requested child nodes are enumerated under the vhci. 9350 * Now configure them. 9351 */ 9352 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9353 NDI_SUCCESS) { 9354 return (MDI_SUCCESS); 9355 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9356 /* discover all paths and try configuring again */ 9357 if (vhcache_discover_paths(vh) && 9358 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9359 NDI_SUCCESS) 9360 return (MDI_SUCCESS); 9361 } 9362 9363 return (MDI_FAILURE); 9364 } 9365 9366 /* 9367 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9368 */ 9369 static nvlist_t * 9370 read_on_disk_vhci_cache(char *vhci_class) 9371 { 9372 nvlist_t *nvl; 9373 int err; 9374 char *filename; 9375 9376 filename = vhclass2vhcache_filename(vhci_class); 9377 9378 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9379 kmem_free(filename, strlen(filename) + 1); 9380 return (nvl); 9381 } else if (err == EIO) 9382 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename); 9383 else if (err == EINVAL) 9384 cmn_err(CE_WARN, 9385 "%s: data file corrupted, will recreate", filename); 9386 9387 kmem_free(filename, strlen(filename) + 1); 9388 return (NULL); 9389 } 9390 9391 /* 9392 * Read on-disk vhci cache into nvlists for all vhci classes. 9393 * Called during booting by i_ddi_read_devices_files(). 9394 */ 9395 void 9396 mdi_read_devices_files(void) 9397 { 9398 int i; 9399 9400 for (i = 0; i < N_VHCI_CLASSES; i++) 9401 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9402 } 9403 9404 /* 9405 * Remove all stale entries from vhci cache. 9406 */ 9407 static void 9408 clean_vhcache(mdi_vhci_config_t *vhc) 9409 { 9410 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9411 mdi_vhcache_phci_t *phci, *nxt_phci; 9412 mdi_vhcache_client_t *client, *nxt_client; 9413 mdi_vhcache_pathinfo_t *path, *nxt_path; 9414 9415 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9416 9417 client = vhcache->vhcache_client_head; 9418 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9419 for ( ; client != NULL; client = nxt_client) { 9420 nxt_client = client->cct_next; 9421 9422 path = client->cct_cpi_head; 9423 client->cct_cpi_head = client->cct_cpi_tail = NULL; 9424 for ( ; path != NULL; path = nxt_path) { 9425 nxt_path = path->cpi_next; 9426 if ((path->cpi_cphci->cphci_phci != NULL) && 9427 (path->cpi_pip != NULL)) { 9428 enqueue_tail_vhcache_pathinfo(client, path); 9429 } else if (path->cpi_pip != NULL) { 9430 /* Not valid to have a path without a phci. */ 9431 free_vhcache_pathinfo(path); 9432 } 9433 } 9434 9435 if (client->cct_cpi_head != NULL) 9436 enqueue_vhcache_client(vhcache, client); 9437 else { 9438 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9439 (mod_hash_key_t)client->cct_name_addr); 9440 free_vhcache_client(client); 9441 } 9442 } 9443 9444 phci = vhcache->vhcache_phci_head; 9445 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9446 for ( ; phci != NULL; phci = nxt_phci) { 9447 9448 nxt_phci = phci->cphci_next; 9449 if (phci->cphci_phci != NULL) 9450 enqueue_vhcache_phci(vhcache, phci); 9451 else 9452 free_vhcache_phci(phci); 9453 } 9454 9455 vhcache->vhcache_clean_time = ddi_get_lbolt64(); 9456 rw_exit(&vhcache->vhcache_lock); 9457 vhcache_dirty(vhc); 9458 } 9459 9460 /* 9461 * Remove all stale entries from vhci cache. 9462 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9463 */ 9464 void 9465 mdi_clean_vhcache(void) 9466 { 9467 mdi_vhci_t *vh; 9468 9469 mutex_enter(&mdi_mutex); 9470 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9471 vh->vh_refcnt++; 9472 mutex_exit(&mdi_mutex); 9473 clean_vhcache(vh->vh_config); 9474 mutex_enter(&mdi_mutex); 9475 vh->vh_refcnt--; 9476 } 9477 mutex_exit(&mdi_mutex); 9478 } 9479 9480 /* 9481 * mdi_vhci_walk_clients(): 9482 * Walker routine to traverse client dev_info nodes 9483 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9484 * below the client, including nexus devices, which we dont want. 9485 * So we just traverse the immediate siblings, starting from 1st client. 9486 */ 9487 void 9488 mdi_vhci_walk_clients(dev_info_t *vdip, 9489 int (*f)(dev_info_t *, void *), void *arg) 9490 { 9491 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9492 dev_info_t *cdip; 9493 mdi_client_t *ct; 9494 9495 MDI_VHCI_CLIENT_LOCK(vh); 9496 cdip = ddi_get_child(vdip); 9497 while (cdip) { 9498 ct = i_devi_get_client(cdip); 9499 MDI_CLIENT_LOCK(ct); 9500 9501 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9502 cdip = ddi_get_next_sibling(cdip); 9503 else 9504 cdip = NULL; 9505 9506 MDI_CLIENT_UNLOCK(ct); 9507 } 9508 MDI_VHCI_CLIENT_UNLOCK(vh); 9509 } 9510 9511 /* 9512 * mdi_vhci_walk_phcis(): 9513 * Walker routine to traverse phci dev_info nodes 9514 */ 9515 void 9516 mdi_vhci_walk_phcis(dev_info_t *vdip, 9517 int (*f)(dev_info_t *, void *), void *arg) 9518 { 9519 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9520 mdi_phci_t *ph, *next; 9521 9522 MDI_VHCI_PHCI_LOCK(vh); 9523 ph = vh->vh_phci_head; 9524 while (ph) { 9525 MDI_PHCI_LOCK(ph); 9526 9527 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9528 next = ph->ph_next; 9529 else 9530 next = NULL; 9531 9532 MDI_PHCI_UNLOCK(ph); 9533 ph = next; 9534 } 9535 MDI_VHCI_PHCI_UNLOCK(vh); 9536 } 9537 9538 9539 /* 9540 * mdi_walk_vhcis(): 9541 * Walker routine to traverse vhci dev_info nodes 9542 */ 9543 void 9544 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9545 { 9546 mdi_vhci_t *vh = NULL; 9547 9548 mutex_enter(&mdi_mutex); 9549 /* 9550 * Scan for already registered vhci 9551 */ 9552 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9553 vh->vh_refcnt++; 9554 mutex_exit(&mdi_mutex); 9555 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9556 mutex_enter(&mdi_mutex); 9557 vh->vh_refcnt--; 9558 break; 9559 } else { 9560 mutex_enter(&mdi_mutex); 9561 vh->vh_refcnt--; 9562 } 9563 } 9564 9565 mutex_exit(&mdi_mutex); 9566 } 9567 9568 /* 9569 * i_mdi_log_sysevent(): 9570 * Logs events for pickup by syseventd 9571 */ 9572 static void 9573 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9574 { 9575 char *path_name; 9576 nvlist_t *attr_list; 9577 9578 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9579 KM_SLEEP) != DDI_SUCCESS) { 9580 goto alloc_failed; 9581 } 9582 9583 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9584 (void) ddi_pathname(dip, path_name); 9585 9586 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9587 ddi_driver_name(dip)) != DDI_SUCCESS) { 9588 goto error; 9589 } 9590 9591 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9592 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9593 goto error; 9594 } 9595 9596 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9597 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9598 goto error; 9599 } 9600 9601 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9602 path_name) != DDI_SUCCESS) { 9603 goto error; 9604 } 9605 9606 if (nvlist_add_string(attr_list, DDI_CLASS, 9607 ph_vh_class) != DDI_SUCCESS) { 9608 goto error; 9609 } 9610 9611 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9612 attr_list, NULL, DDI_SLEEP); 9613 9614 error: 9615 kmem_free(path_name, MAXPATHLEN); 9616 nvlist_free(attr_list); 9617 return; 9618 9619 alloc_failed: 9620 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent")); 9621 } 9622 9623 char ** 9624 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9625 { 9626 char **driver_list, **ret_driver_list = NULL; 9627 int *root_support_list; 9628 int cur_elements, max_elements; 9629 9630 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9631 &cur_elements, &max_elements); 9632 9633 9634 if (driver_list) { 9635 kmem_free(root_support_list, sizeof (int) * max_elements); 9636 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9637 * max_elements, sizeof (char *) * cur_elements); 9638 } 9639 *ndrivers = cur_elements; 9640 9641 return (ret_driver_list); 9642 9643 } 9644 9645 void 9646 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9647 { 9648 char **p; 9649 int i; 9650 9651 if (driver_list) { 9652 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9653 kmem_free(*p, strlen(*p) + 1); 9654 kmem_free(driver_list, sizeof (char *) * ndrivers); 9655 } 9656 } 9657 9658 /* 9659 * mdi_is_dev_supported(): 9660 * function called by pHCI bus config operation to determine if a 9661 * device should be represented as a child of the vHCI or the 9662 * pHCI. This decision is made by the vHCI, using cinfo idenity 9663 * information passed by the pHCI - specifics of the cinfo 9664 * representation are by agreement between the pHCI and vHCI. 9665 * Return Values: 9666 * MDI_SUCCESS 9667 * MDI_FAILURE 9668 */ 9669 int 9670 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9671 { 9672 mdi_vhci_t *vh; 9673 9674 ASSERT(class && pdip); 9675 9676 /* 9677 * For dev_supported, mdi_phci_register() must have established pdip as 9678 * a pHCI. 9679 * 9680 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9681 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9682 */ 9683 if (!MDI_PHCI(pdip)) 9684 return (MDI_FAILURE); 9685 9686 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9687 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9688 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9689 return (MDI_FAILURE); 9690 } 9691 9692 /* Return vHCI answer */ 9693 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9694 } 9695 9696 int 9697 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9698 { 9699 uint_t devstate = 0; 9700 dev_info_t *cdip; 9701 9702 if ((pip == NULL) || (dcp == NULL)) 9703 return (MDI_FAILURE); 9704 9705 cdip = mdi_pi_get_client(pip); 9706 9707 switch (mdi_pi_get_state(pip)) { 9708 case MDI_PATHINFO_STATE_INIT: 9709 devstate = DEVICE_DOWN; 9710 break; 9711 case MDI_PATHINFO_STATE_ONLINE: 9712 devstate = DEVICE_ONLINE; 9713 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9714 devstate |= DEVICE_BUSY; 9715 break; 9716 case MDI_PATHINFO_STATE_STANDBY: 9717 devstate = DEVICE_ONLINE; 9718 break; 9719 case MDI_PATHINFO_STATE_FAULT: 9720 devstate = DEVICE_DOWN; 9721 break; 9722 case MDI_PATHINFO_STATE_OFFLINE: 9723 devstate = DEVICE_OFFLINE; 9724 break; 9725 default: 9726 ASSERT(MDI_PI(pip)->pi_state); 9727 } 9728 9729 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9730 return (MDI_FAILURE); 9731 9732 return (MDI_SUCCESS); 9733 }