1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved. 24 * Copyright (c) 2018, Joyent, Inc. 25 */ 26 27 /* 28 * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a 29 * more detailed discussion of the overall mpxio architecture. 30 * 31 * Default locking order: 32 * 33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex); 34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex); 35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex); 36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex); 37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex)) 39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 40 */ 41 42 #include <sys/note.h> 43 #include <sys/types.h> 44 #include <sys/varargs.h> 45 #include <sys/param.h> 46 #include <sys/errno.h> 47 #include <sys/uio.h> 48 #include <sys/buf.h> 49 #include <sys/modctl.h> 50 #include <sys/open.h> 51 #include <sys/kmem.h> 52 #include <sys/poll.h> 53 #include <sys/conf.h> 54 #include <sys/bootconf.h> 55 #include <sys/cmn_err.h> 56 #include <sys/stat.h> 57 #include <sys/ddi.h> 58 #include <sys/sunddi.h> 59 #include <sys/ddipropdefs.h> 60 #include <sys/sunndi.h> 61 #include <sys/ndi_impldefs.h> 62 #include <sys/promif.h> 63 #include <sys/sunmdi.h> 64 #include <sys/mdi_impldefs.h> 65 #include <sys/taskq.h> 66 #include <sys/epm.h> 67 #include <sys/sunpm.h> 68 #include <sys/modhash.h> 69 #include <sys/disp.h> 70 #include <sys/autoconf.h> 71 #include <sys/sysmacros.h> 72 73 #ifdef DEBUG 74 #include <sys/debug.h> 75 int mdi_debug = 1; 76 int mdi_debug_logonly = 0; 77 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs 78 #define MDI_WARN CE_WARN, __func__ 79 #define MDI_NOTE CE_NOTE, __func__ 80 #define MDI_CONT CE_CONT, __func__ 81 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...); 82 #else /* !DEBUG */ 83 #define MDI_DEBUG(dbglevel, pargs) 84 #endif /* DEBUG */ 85 int mdi_debug_consoleonly = 0; 86 int mdi_delay = 3; 87 88 extern pri_t minclsyspri; 89 extern int modrootloaded; 90 91 /* 92 * Global mutex: 93 * Protects vHCI list and structure members. 94 */ 95 kmutex_t mdi_mutex; 96 97 /* 98 * Registered vHCI class driver lists 99 */ 100 int mdi_vhci_count; 101 mdi_vhci_t *mdi_vhci_head; 102 mdi_vhci_t *mdi_vhci_tail; 103 104 /* 105 * Client Hash Table size 106 */ 107 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE; 108 109 /* 110 * taskq interface definitions 111 */ 112 #define MDI_TASKQ_N_THREADS 8 113 #define MDI_TASKQ_PRI minclsyspri 114 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads) 115 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads) 116 117 taskq_t *mdi_taskq; 118 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS; 119 120 #define TICKS_PER_SECOND (drv_usectohz(1000000)) 121 122 /* 123 * The data should be "quiet" for this interval (in seconds) before the 124 * vhci cached data is flushed to the disk. 125 */ 126 static int mdi_vhcache_flush_delay = 10; 127 128 /* number of seconds the vhcache flush daemon will sleep idle before exiting */ 129 static int mdi_vhcache_flush_daemon_idle_time = 60; 130 131 /* 132 * MDI falls back to discovery of all paths when a bus_config_one fails. 133 * The following parameters can be used to tune this operation. 134 * 135 * mdi_path_discovery_boot 136 * Number of times path discovery will be attempted during early boot. 137 * Probably there is no reason to ever set this value to greater than one. 138 * 139 * mdi_path_discovery_postboot 140 * Number of times path discovery will be attempted after early boot. 141 * Set it to a minimum of two to allow for discovery of iscsi paths which 142 * may happen very late during booting. 143 * 144 * mdi_path_discovery_interval 145 * Minimum number of seconds MDI will wait between successive discovery 146 * of all paths. Set it to -1 to disable discovery of all paths. 147 */ 148 static int mdi_path_discovery_boot = 1; 149 static int mdi_path_discovery_postboot = 2; 150 static int mdi_path_discovery_interval = 10; 151 152 /* 153 * number of seconds the asynchronous configuration thread will sleep idle 154 * before exiting. 155 */ 156 static int mdi_async_config_idle_time = 600; 157 158 static int mdi_bus_config_cache_hash_size = 256; 159 160 /* turns off multithreaded configuration for certain operations */ 161 static int mdi_mtc_off = 0; 162 163 /* 164 * The "path" to a pathinfo node is identical to the /devices path to a 165 * devinfo node had the device been enumerated under a pHCI instead of 166 * a vHCI. This pathinfo "path" is associated with a 'path_instance'. 167 * This association persists across create/delete of the pathinfo nodes, 168 * but not across reboot. 169 */ 170 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */ 171 static int mdi_pathmap_hash_size = 256; 172 static kmutex_t mdi_pathmap_mutex; 173 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */ 174 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */ 175 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */ 176 177 /* 178 * MDI component property name/value string definitions 179 */ 180 const char *mdi_component_prop = "mpxio-component"; 181 const char *mdi_component_prop_vhci = "vhci"; 182 const char *mdi_component_prop_phci = "phci"; 183 const char *mdi_component_prop_client = "client"; 184 185 /* 186 * MDI client global unique identifier property name 187 */ 188 const char *mdi_client_guid_prop = "client-guid"; 189 190 /* 191 * MDI client load balancing property name/value string definitions 192 */ 193 const char *mdi_load_balance = "load-balance"; 194 const char *mdi_load_balance_none = "none"; 195 const char *mdi_load_balance_rr = "round-robin"; 196 const char *mdi_load_balance_lba = "logical-block"; 197 198 /* 199 * Obsolete vHCI class definition; to be removed after Leadville update 200 */ 201 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI; 202 203 static char vhci_greeting[] = 204 "\tThere already exists one vHCI driver for class %s\n" 205 "\tOnly one vHCI driver for each class is allowed\n"; 206 207 /* 208 * Static function prototypes 209 */ 210 static int i_mdi_phci_offline(dev_info_t *, uint_t); 211 static int i_mdi_client_offline(dev_info_t *, uint_t); 212 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t); 213 static void i_mdi_phci_post_detach(dev_info_t *, 214 ddi_detach_cmd_t, int); 215 static int i_mdi_client_pre_detach(dev_info_t *, 216 ddi_detach_cmd_t); 217 static void i_mdi_client_post_detach(dev_info_t *, 218 ddi_detach_cmd_t, int); 219 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *); 220 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *); 221 static int i_mdi_lba_lb(mdi_client_t *ct, 222 mdi_pathinfo_t **ret_pip, struct buf *buf); 223 static void i_mdi_pm_hold_client(mdi_client_t *, int); 224 static void i_mdi_pm_rele_client(mdi_client_t *, int); 225 static void i_mdi_pm_reset_client(mdi_client_t *); 226 static int i_mdi_power_all_phci(mdi_client_t *); 227 static void i_mdi_log_sysevent(dev_info_t *, char *, char *); 228 229 230 /* 231 * Internal mdi_pathinfo node functions 232 */ 233 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *); 234 235 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *); 236 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *); 237 static mdi_phci_t *i_devi_get_phci(dev_info_t *); 238 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *); 239 static void i_mdi_phci_unlock(mdi_phci_t *); 240 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *); 241 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *); 242 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *); 243 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *, 244 mdi_client_t *); 245 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *); 246 static void i_mdi_client_remove_path(mdi_client_t *, 247 mdi_pathinfo_t *); 248 249 static int i_mdi_pi_state_change(mdi_pathinfo_t *, 250 mdi_pathinfo_state_t, int); 251 static int i_mdi_pi_offline(mdi_pathinfo_t *, int); 252 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *, 253 char **, int); 254 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *); 255 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int); 256 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *); 257 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *); 258 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *); 259 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *); 260 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *); 261 static void i_mdi_client_update_state(mdi_client_t *); 262 static int i_mdi_client_compute_state(mdi_client_t *, 263 mdi_phci_t *); 264 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *); 265 static void i_mdi_client_unlock(mdi_client_t *); 266 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *); 267 static mdi_client_t *i_devi_get_client(dev_info_t *); 268 /* 269 * NOTE: this will be removed once the NWS files are changed to use the new 270 * mdi_{enable,disable}_path interfaces 271 */ 272 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *, 273 int, int); 274 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip, 275 mdi_vhci_t *vh, int flags, int op); 276 /* 277 * Failover related function prototypes 278 */ 279 static int i_mdi_failover(void *); 280 281 /* 282 * misc internal functions 283 */ 284 static int i_mdi_get_hash_key(char *); 285 static int i_map_nvlist_error_to_mdi(int); 286 static void i_mdi_report_path_state(mdi_client_t *, 287 mdi_pathinfo_t *); 288 289 static void setup_vhci_cache(mdi_vhci_t *); 290 static int destroy_vhci_cache(mdi_vhci_t *); 291 static int stop_vhcache_async_threads(mdi_vhci_config_t *); 292 static boolean_t stop_vhcache_flush_thread(void *, int); 293 static void free_string_array(char **, int); 294 static void free_vhcache_phci(mdi_vhcache_phci_t *); 295 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *); 296 static void free_vhcache_client(mdi_vhcache_client_t *); 297 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *); 298 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *); 299 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *); 300 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *); 301 static void vhcache_pi_add(mdi_vhci_config_t *, 302 struct mdi_pathinfo *); 303 static void vhcache_pi_remove(mdi_vhci_config_t *, 304 struct mdi_pathinfo *); 305 static void free_phclient_path_list(mdi_phys_path_t *); 306 static void sort_vhcache_paths(mdi_vhcache_client_t *); 307 static int flush_vhcache(mdi_vhci_config_t *, int); 308 static void vhcache_dirty(mdi_vhci_config_t *); 309 static void free_async_client_config(mdi_async_client_config_t *); 310 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *); 311 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *); 312 static nvlist_t *read_on_disk_vhci_cache(char *); 313 extern int fread_nvlist(char *, nvlist_t **); 314 extern int fwrite_nvlist(char *, nvlist_t *); 315 316 /* called once when first vhci registers with mdi */ 317 static void 318 i_mdi_init() 319 { 320 static int initialized = 0; 321 322 if (initialized) 323 return; 324 initialized = 1; 325 326 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL); 327 328 /* Create our taskq resources */ 329 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads, 330 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC, 331 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE); 332 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */ 333 334 /* Allocate ['path_instance' <-> "path"] maps */ 335 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL); 336 mdi_pathmap_bypath = mod_hash_create_strhash( 337 "mdi_pathmap_bypath", mdi_pathmap_hash_size, 338 mod_hash_null_valdtor); 339 mdi_pathmap_byinstance = mod_hash_create_idhash( 340 "mdi_pathmap_byinstance", mdi_pathmap_hash_size, 341 mod_hash_null_valdtor); 342 mdi_pathmap_sbyinstance = mod_hash_create_idhash( 343 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size, 344 mod_hash_null_valdtor); 345 } 346 347 /* 348 * mdi_get_component_type(): 349 * Return mpxio component type 350 * Return Values: 351 * MDI_COMPONENT_NONE 352 * MDI_COMPONENT_VHCI 353 * MDI_COMPONENT_PHCI 354 * MDI_COMPONENT_CLIENT 355 * XXX This doesn't work under multi-level MPxIO and should be 356 * removed when clients migrate mdi_component_is_*() interfaces. 357 */ 358 int 359 mdi_get_component_type(dev_info_t *dip) 360 { 361 return (DEVI(dip)->devi_mdi_component); 362 } 363 364 /* 365 * mdi_vhci_register(): 366 * Register a vHCI module with the mpxio framework 367 * mdi_vhci_register() is called by vHCI drivers to register the 368 * 'class_driver' vHCI driver and its MDI entrypoints with the 369 * mpxio framework. The vHCI driver must call this interface as 370 * part of its attach(9e) handler. 371 * Competing threads may try to attach mdi_vhci_register() as 372 * the vHCI drivers are loaded and attached as a result of pHCI 373 * driver instance registration (mdi_phci_register()) with the 374 * framework. 375 * Return Values: 376 * MDI_SUCCESS 377 * MDI_FAILURE 378 */ 379 /*ARGSUSED*/ 380 int 381 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops, 382 int flags) 383 { 384 mdi_vhci_t *vh = NULL; 385 386 /* Registrant can't be older */ 387 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV); 388 389 #ifdef DEBUG 390 /* 391 * IB nexus driver is loaded only when IB hardware is present. 392 * In order to be able to do this there is a need to drive the loading 393 * and attaching of the IB nexus driver (especially when an IB hardware 394 * is dynamically plugged in) when an IB HCA driver (PHCI) 395 * is being attached. Unfortunately this gets into the limitations 396 * of devfs as there seems to be no clean way to drive configuration 397 * of a subtree from another subtree of a devfs. Hence, do not ASSERT 398 * for IB. 399 */ 400 if (strcmp(class, MDI_HCI_CLASS_IB) != 0) 401 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 402 #endif 403 404 i_mdi_init(); 405 406 mutex_enter(&mdi_mutex); 407 /* 408 * Scan for already registered vhci 409 */ 410 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 411 if (strcmp(vh->vh_class, class) == 0) { 412 /* 413 * vHCI has already been created. Check for valid 414 * vHCI ops registration. We only support one vHCI 415 * module per class 416 */ 417 if (vh->vh_ops != NULL) { 418 mutex_exit(&mdi_mutex); 419 cmn_err(CE_NOTE, vhci_greeting, class); 420 return (MDI_FAILURE); 421 } 422 break; 423 } 424 } 425 426 /* 427 * if not yet created, create the vHCI component 428 */ 429 if (vh == NULL) { 430 struct client_hash *hash = NULL; 431 char *load_balance; 432 433 /* 434 * Allocate and initialize the mdi extensions 435 */ 436 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP); 437 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash), 438 KM_SLEEP); 439 vh->vh_client_table = hash; 440 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP); 441 (void) strcpy(vh->vh_class, class); 442 vh->vh_lb = LOAD_BALANCE_RR; 443 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip, 444 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) { 445 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) { 446 vh->vh_lb = LOAD_BALANCE_NONE; 447 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA) 448 == 0) { 449 vh->vh_lb = LOAD_BALANCE_LBA; 450 } 451 ddi_prop_free(load_balance); 452 } 453 454 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL); 455 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL); 456 457 /* 458 * Store the vHCI ops vectors 459 */ 460 vh->vh_dip = vdip; 461 vh->vh_ops = vops; 462 463 setup_vhci_cache(vh); 464 465 if (mdi_vhci_head == NULL) { 466 mdi_vhci_head = vh; 467 } 468 if (mdi_vhci_tail) { 469 mdi_vhci_tail->vh_next = vh; 470 } 471 mdi_vhci_tail = vh; 472 mdi_vhci_count++; 473 } 474 475 /* 476 * Claim the devfs node as a vhci component 477 */ 478 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI; 479 480 /* 481 * Initialize our back reference from dev_info node 482 */ 483 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh; 484 mutex_exit(&mdi_mutex); 485 return (MDI_SUCCESS); 486 } 487 488 /* 489 * mdi_vhci_unregister(): 490 * Unregister a vHCI module from mpxio framework 491 * mdi_vhci_unregister() is called from the detach(9E) entrypoint 492 * of a vhci to unregister it from the framework. 493 * Return Values: 494 * MDI_SUCCESS 495 * MDI_FAILURE 496 */ 497 /*ARGSUSED*/ 498 int 499 mdi_vhci_unregister(dev_info_t *vdip, int flags) 500 { 501 mdi_vhci_t *found, *vh, *prev = NULL; 502 503 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip))); 504 505 /* 506 * Check for invalid VHCI 507 */ 508 if ((vh = i_devi_get_vhci(vdip)) == NULL) 509 return (MDI_FAILURE); 510 511 /* 512 * Scan the list of registered vHCIs for a match 513 */ 514 mutex_enter(&mdi_mutex); 515 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) { 516 if (found == vh) 517 break; 518 prev = found; 519 } 520 521 if (found == NULL) { 522 mutex_exit(&mdi_mutex); 523 return (MDI_FAILURE); 524 } 525 526 /* 527 * Check the vHCI, pHCI and client count. All the pHCIs and clients 528 * should have been unregistered, before a vHCI can be 529 * unregistered. 530 */ 531 MDI_VHCI_PHCI_LOCK(vh); 532 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) { 533 MDI_VHCI_PHCI_UNLOCK(vh); 534 mutex_exit(&mdi_mutex); 535 return (MDI_FAILURE); 536 } 537 MDI_VHCI_PHCI_UNLOCK(vh); 538 539 if (destroy_vhci_cache(vh) != MDI_SUCCESS) { 540 mutex_exit(&mdi_mutex); 541 return (MDI_FAILURE); 542 } 543 544 /* 545 * Remove the vHCI from the global list 546 */ 547 if (vh == mdi_vhci_head) { 548 mdi_vhci_head = vh->vh_next; 549 } else { 550 prev->vh_next = vh->vh_next; 551 } 552 if (vh == mdi_vhci_tail) { 553 mdi_vhci_tail = prev; 554 } 555 mdi_vhci_count--; 556 mutex_exit(&mdi_mutex); 557 558 vh->vh_ops = NULL; 559 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI; 560 DEVI(vdip)->devi_mdi_xhci = NULL; 561 kmem_free(vh->vh_class, strlen(vh->vh_class)+1); 562 kmem_free(vh->vh_client_table, 563 mdi_client_table_size * sizeof (struct client_hash)); 564 mutex_destroy(&vh->vh_phci_mutex); 565 mutex_destroy(&vh->vh_client_mutex); 566 567 kmem_free(vh, sizeof (mdi_vhci_t)); 568 return (MDI_SUCCESS); 569 } 570 571 /* 572 * i_mdi_vhci_class2vhci(): 573 * Look for a matching vHCI module given a vHCI class name 574 * Return Values: 575 * Handle to a vHCI component 576 * NULL 577 */ 578 static mdi_vhci_t * 579 i_mdi_vhci_class2vhci(char *class) 580 { 581 mdi_vhci_t *vh = NULL; 582 583 ASSERT(!MUTEX_HELD(&mdi_mutex)); 584 585 mutex_enter(&mdi_mutex); 586 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 587 if (strcmp(vh->vh_class, class) == 0) { 588 break; 589 } 590 } 591 mutex_exit(&mdi_mutex); 592 return (vh); 593 } 594 595 /* 596 * i_devi_get_vhci(): 597 * Utility function to get the handle to a vHCI component 598 * Return Values: 599 * Handle to a vHCI component 600 * NULL 601 */ 602 mdi_vhci_t * 603 i_devi_get_vhci(dev_info_t *vdip) 604 { 605 mdi_vhci_t *vh = NULL; 606 if (MDI_VHCI(vdip)) { 607 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci; 608 } 609 return (vh); 610 } 611 612 /* 613 * mdi_phci_register(): 614 * Register a pHCI module with mpxio framework 615 * mdi_phci_register() is called by pHCI drivers to register with 616 * the mpxio framework and a specific 'class_driver' vHCI. The 617 * pHCI driver must call this interface as part of its attach(9e) 618 * handler. 619 * Return Values: 620 * MDI_SUCCESS 621 * MDI_FAILURE 622 */ 623 /*ARGSUSED*/ 624 int 625 mdi_phci_register(char *class, dev_info_t *pdip, int flags) 626 { 627 mdi_phci_t *ph; 628 mdi_vhci_t *vh; 629 char *data; 630 631 /* 632 * Some subsystems, like fcp, perform pHCI registration from a 633 * different thread than the one doing the pHCI attach(9E) - the 634 * driver attach code is waiting for this other thread to complete. 635 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent 636 * (indicating that some thread has done an ndi_devi_enter of parent) 637 * not DEVI_BUSY_OWNED (which would indicate that we did the enter). 638 */ 639 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 640 641 /* 642 * Check for mpxio-disable property. Enable mpxio if the property is 643 * missing or not set to "yes". 644 * If the property is set to "yes" then emit a brief message. 645 */ 646 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable", 647 &data) == DDI_SUCCESS)) { 648 if (strcmp(data, "yes") == 0) { 649 MDI_DEBUG(1, (MDI_CONT, pdip, 650 "?multipath capabilities disabled via %s.conf.", 651 ddi_driver_name(pdip))); 652 ddi_prop_free(data); 653 return (MDI_FAILURE); 654 } 655 ddi_prop_free(data); 656 } 657 658 /* 659 * Search for a matching vHCI 660 */ 661 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 662 if (vh == NULL) { 663 return (MDI_FAILURE); 664 } 665 666 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP); 667 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL); 668 ph->ph_dip = pdip; 669 ph->ph_vhci = vh; 670 ph->ph_next = NULL; 671 ph->ph_unstable = 0; 672 ph->ph_vprivate = 0; 673 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL); 674 675 MDI_PHCI_LOCK(ph); 676 MDI_PHCI_SET_POWER_UP(ph); 677 MDI_PHCI_UNLOCK(ph); 678 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI; 679 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph; 680 681 vhcache_phci_add(vh->vh_config, ph); 682 683 MDI_VHCI_PHCI_LOCK(vh); 684 if (vh->vh_phci_head == NULL) { 685 vh->vh_phci_head = ph; 686 } 687 if (vh->vh_phci_tail) { 688 vh->vh_phci_tail->ph_next = ph; 689 } 690 vh->vh_phci_tail = ph; 691 vh->vh_phci_count++; 692 MDI_VHCI_PHCI_UNLOCK(vh); 693 694 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER); 695 return (MDI_SUCCESS); 696 } 697 698 /* 699 * mdi_phci_unregister(): 700 * Unregister a pHCI module from mpxio framework 701 * mdi_phci_unregister() is called by the pHCI drivers from their 702 * detach(9E) handler to unregister their instances from the 703 * framework. 704 * Return Values: 705 * MDI_SUCCESS 706 * MDI_FAILURE 707 */ 708 /*ARGSUSED*/ 709 int 710 mdi_phci_unregister(dev_info_t *pdip, int flags) 711 { 712 mdi_vhci_t *vh; 713 mdi_phci_t *ph; 714 mdi_phci_t *tmp; 715 mdi_phci_t *prev = NULL; 716 mdi_pathinfo_t *pip; 717 718 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip))); 719 720 ph = i_devi_get_phci(pdip); 721 if (ph == NULL) { 722 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI")); 723 return (MDI_FAILURE); 724 } 725 726 vh = ph->ph_vhci; 727 ASSERT(vh != NULL); 728 if (vh == NULL) { 729 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI")); 730 return (MDI_FAILURE); 731 } 732 733 MDI_VHCI_PHCI_LOCK(vh); 734 tmp = vh->vh_phci_head; 735 while (tmp) { 736 if (tmp == ph) { 737 break; 738 } 739 prev = tmp; 740 tmp = tmp->ph_next; 741 } 742 743 if (ph == vh->vh_phci_head) { 744 vh->vh_phci_head = ph->ph_next; 745 } else { 746 prev->ph_next = ph->ph_next; 747 } 748 749 if (ph == vh->vh_phci_tail) { 750 vh->vh_phci_tail = prev; 751 } 752 753 vh->vh_phci_count--; 754 MDI_VHCI_PHCI_UNLOCK(vh); 755 756 /* Walk remaining pathinfo nodes and disassociate them from pHCI */ 757 MDI_PHCI_LOCK(ph); 758 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip; 759 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link) 760 MDI_PI(pip)->pi_phci = NULL; 761 MDI_PHCI_UNLOCK(ph); 762 763 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class, 764 ESC_DDI_INITIATOR_UNREGISTER); 765 vhcache_phci_remove(vh->vh_config, ph); 766 cv_destroy(&ph->ph_unstable_cv); 767 mutex_destroy(&ph->ph_mutex); 768 kmem_free(ph, sizeof (mdi_phci_t)); 769 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI; 770 DEVI(pdip)->devi_mdi_xhci = NULL; 771 return (MDI_SUCCESS); 772 } 773 774 /* 775 * i_devi_get_phci(): 776 * Utility function to return the phci extensions. 777 */ 778 static mdi_phci_t * 779 i_devi_get_phci(dev_info_t *pdip) 780 { 781 mdi_phci_t *ph = NULL; 782 783 if (MDI_PHCI(pdip)) { 784 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci; 785 } 786 return (ph); 787 } 788 789 /* 790 * Single thread mdi entry into devinfo node for modifying its children. 791 * If necessary we perform an ndi_devi_enter of the vHCI before doing 792 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one 793 * for the vHCI and one for the pHCI. 794 */ 795 void 796 mdi_devi_enter(dev_info_t *phci_dip, int *circular) 797 { 798 dev_info_t *vdip; 799 int vcircular, pcircular; 800 801 /* Verify calling context */ 802 ASSERT(MDI_PHCI(phci_dip)); 803 vdip = mdi_devi_get_vdip(phci_dip); 804 ASSERT(vdip); /* A pHCI always has a vHCI */ 805 806 /* 807 * If pHCI is detaching then the framework has already entered the 808 * vHCI on a threads that went down the code path leading to 809 * detach_node(). This framework enter of the vHCI during pHCI 810 * detach is done to avoid deadlock with vHCI power management 811 * operations which enter the vHCI and the enter down the path 812 * to the pHCI. If pHCI is detaching then we piggyback this calls 813 * enter of the vHCI on frameworks vHCI enter that has already 814 * occurred - this is OK because we know that the framework thread 815 * doing detach is waiting for our completion. 816 * 817 * We should DEVI_IS_DETACHING under an enter of the parent to avoid 818 * race with detach - but we can't do that because the framework has 819 * already entered the parent, so we have some complexity instead. 820 */ 821 for (;;) { 822 if (ndi_devi_tryenter(vdip, &vcircular)) { 823 ASSERT(vcircular != -1); 824 if (DEVI_IS_DETACHING(phci_dip)) { 825 ndi_devi_exit(vdip, vcircular); 826 vcircular = -1; 827 } 828 break; 829 } else if (DEVI_IS_DETACHING(phci_dip)) { 830 vcircular = -1; 831 break; 832 } else if (servicing_interrupt()) { 833 /* 834 * Don't delay an interrupt (and ensure adaptive 835 * mutex inversion support). 836 */ 837 ndi_devi_enter(vdip, &vcircular); 838 break; 839 } else { 840 delay_random(mdi_delay); 841 } 842 } 843 844 ndi_devi_enter(phci_dip, &pcircular); 845 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 846 } 847 848 /* 849 * Attempt to mdi_devi_enter. 850 */ 851 int 852 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular) 853 { 854 dev_info_t *vdip; 855 int vcircular, pcircular; 856 857 /* Verify calling context */ 858 ASSERT(MDI_PHCI(phci_dip)); 859 vdip = mdi_devi_get_vdip(phci_dip); 860 ASSERT(vdip); /* A pHCI always has a vHCI */ 861 862 if (ndi_devi_tryenter(vdip, &vcircular)) { 863 if (ndi_devi_tryenter(phci_dip, &pcircular)) { 864 *circular = (vcircular << 16) | (pcircular & 0xFFFF); 865 return (1); /* locked */ 866 } 867 ndi_devi_exit(vdip, vcircular); 868 } 869 return (0); /* busy */ 870 } 871 872 /* 873 * Release mdi_devi_enter or successful mdi_devi_tryenter. 874 */ 875 void 876 mdi_devi_exit(dev_info_t *phci_dip, int circular) 877 { 878 dev_info_t *vdip; 879 int vcircular, pcircular; 880 881 /* Verify calling context */ 882 ASSERT(MDI_PHCI(phci_dip)); 883 vdip = mdi_devi_get_vdip(phci_dip); 884 ASSERT(vdip); /* A pHCI always has a vHCI */ 885 886 /* extract two circular recursion values from single int */ 887 pcircular = (short)(circular & 0xFFFF); 888 vcircular = (short)((circular >> 16) & 0xFFFF); 889 890 ndi_devi_exit(phci_dip, pcircular); 891 if (vcircular != -1) 892 ndi_devi_exit(vdip, vcircular); 893 } 894 895 /* 896 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used 897 * around a pHCI drivers calls to mdi_pi_online/offline, after holding 898 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock 899 * with vHCI power management code during path online/offline. Each 900 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must 901 * occur within the scope of an active mdi_devi_enter that establishes the 902 * circular value. 903 */ 904 void 905 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular) 906 { 907 int pcircular; 908 909 /* Verify calling context */ 910 ASSERT(MDI_PHCI(phci_dip)); 911 912 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */ 913 ndi_hold_devi(phci_dip); 914 915 pcircular = (short)(circular & 0xFFFF); 916 ndi_devi_exit(phci_dip, pcircular); 917 } 918 919 void 920 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular) 921 { 922 int pcircular; 923 924 /* Verify calling context */ 925 ASSERT(MDI_PHCI(phci_dip)); 926 927 ndi_devi_enter(phci_dip, &pcircular); 928 929 /* Drop hold from mdi_devi_exit_phci. */ 930 ndi_rele_devi(phci_dip); 931 932 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */ 933 ASSERT(pcircular == ((short)(*circular & 0xFFFF))); 934 } 935 936 /* 937 * mdi_devi_get_vdip(): 938 * given a pHCI dip return vHCI dip 939 */ 940 dev_info_t * 941 mdi_devi_get_vdip(dev_info_t *pdip) 942 { 943 mdi_phci_t *ph; 944 945 ph = i_devi_get_phci(pdip); 946 if (ph && ph->ph_vhci) 947 return (ph->ph_vhci->vh_dip); 948 return (NULL); 949 } 950 951 /* 952 * mdi_devi_pdip_entered(): 953 * Return 1 if we are vHCI and have done an ndi_devi_enter 954 * of a pHCI 955 */ 956 int 957 mdi_devi_pdip_entered(dev_info_t *vdip) 958 { 959 mdi_vhci_t *vh; 960 mdi_phci_t *ph; 961 962 vh = i_devi_get_vhci(vdip); 963 if (vh == NULL) 964 return (0); 965 966 MDI_VHCI_PHCI_LOCK(vh); 967 ph = vh->vh_phci_head; 968 while (ph) { 969 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) { 970 MDI_VHCI_PHCI_UNLOCK(vh); 971 return (1); 972 } 973 ph = ph->ph_next; 974 } 975 MDI_VHCI_PHCI_UNLOCK(vh); 976 return (0); 977 } 978 979 /* 980 * mdi_phci_path2devinfo(): 981 * Utility function to search for a valid phci device given 982 * the devfs pathname. 983 */ 984 dev_info_t * 985 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname) 986 { 987 char *temp_pathname; 988 mdi_vhci_t *vh; 989 mdi_phci_t *ph; 990 dev_info_t *pdip = NULL; 991 992 vh = i_devi_get_vhci(vdip); 993 ASSERT(vh != NULL); 994 995 if (vh == NULL) { 996 /* 997 * Invalid vHCI component, return failure 998 */ 999 return (NULL); 1000 } 1001 1002 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1003 MDI_VHCI_PHCI_LOCK(vh); 1004 ph = vh->vh_phci_head; 1005 while (ph != NULL) { 1006 pdip = ph->ph_dip; 1007 ASSERT(pdip != NULL); 1008 *temp_pathname = '\0'; 1009 (void) ddi_pathname(pdip, temp_pathname); 1010 if (strcmp(temp_pathname, pathname) == 0) { 1011 break; 1012 } 1013 ph = ph->ph_next; 1014 } 1015 if (ph == NULL) { 1016 pdip = NULL; 1017 } 1018 MDI_VHCI_PHCI_UNLOCK(vh); 1019 kmem_free(temp_pathname, MAXPATHLEN); 1020 return (pdip); 1021 } 1022 1023 /* 1024 * mdi_phci_get_path_count(): 1025 * get number of path information nodes associated with a given 1026 * pHCI device. 1027 */ 1028 int 1029 mdi_phci_get_path_count(dev_info_t *pdip) 1030 { 1031 mdi_phci_t *ph; 1032 int count = 0; 1033 1034 ph = i_devi_get_phci(pdip); 1035 if (ph != NULL) { 1036 count = ph->ph_path_count; 1037 } 1038 return (count); 1039 } 1040 1041 /* 1042 * i_mdi_phci_lock(): 1043 * Lock a pHCI device 1044 * Return Values: 1045 * None 1046 * Note: 1047 * The default locking order is: 1048 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex)) 1049 * But there are number of situations where locks need to be 1050 * grabbed in reverse order. This routine implements try and lock 1051 * mechanism depending on the requested parameter option. 1052 */ 1053 static void 1054 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip) 1055 { 1056 if (pip) { 1057 /* Reverse locking is requested. */ 1058 while (MDI_PHCI_TRYLOCK(ph) == 0) { 1059 if (servicing_interrupt()) { 1060 MDI_PI_HOLD(pip); 1061 MDI_PI_UNLOCK(pip); 1062 MDI_PHCI_LOCK(ph); 1063 MDI_PI_LOCK(pip); 1064 MDI_PI_RELE(pip); 1065 break; 1066 } else { 1067 /* 1068 * tryenter failed. Try to grab again 1069 * after a small delay 1070 */ 1071 MDI_PI_HOLD(pip); 1072 MDI_PI_UNLOCK(pip); 1073 delay_random(mdi_delay); 1074 MDI_PI_LOCK(pip); 1075 MDI_PI_RELE(pip); 1076 } 1077 } 1078 } else { 1079 MDI_PHCI_LOCK(ph); 1080 } 1081 } 1082 1083 /* 1084 * i_mdi_phci_unlock(): 1085 * Unlock the pHCI component 1086 */ 1087 static void 1088 i_mdi_phci_unlock(mdi_phci_t *ph) 1089 { 1090 MDI_PHCI_UNLOCK(ph); 1091 } 1092 1093 /* 1094 * i_mdi_devinfo_create(): 1095 * create client device's devinfo node 1096 * Return Values: 1097 * dev_info 1098 * NULL 1099 * Notes: 1100 */ 1101 static dev_info_t * 1102 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid, 1103 char **compatible, int ncompatible) 1104 { 1105 dev_info_t *cdip = NULL; 1106 1107 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1108 1109 /* Verify for duplicate entry */ 1110 cdip = i_mdi_devinfo_find(vh, name, guid); 1111 ASSERT(cdip == NULL); 1112 if (cdip) { 1113 cmn_err(CE_WARN, 1114 "i_mdi_devinfo_create: client %s@%s already exists", 1115 name ? name : "", guid ? guid : ""); 1116 } 1117 1118 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip); 1119 if (cdip == NULL) 1120 goto fail; 1121 1122 /* 1123 * Create component type and Global unique identifier 1124 * properties 1125 */ 1126 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip, 1127 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) { 1128 goto fail; 1129 } 1130 1131 /* Decorate the node with compatible property */ 1132 if (compatible && 1133 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip, 1134 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) { 1135 goto fail; 1136 } 1137 1138 return (cdip); 1139 1140 fail: 1141 if (cdip) { 1142 (void) ndi_prop_remove_all(cdip); 1143 (void) ndi_devi_free(cdip); 1144 } 1145 return (NULL); 1146 } 1147 1148 /* 1149 * i_mdi_devinfo_find(): 1150 * Find a matching devinfo node for given client node name 1151 * and its guid. 1152 * Return Values: 1153 * Handle to a dev_info node or NULL 1154 */ 1155 static dev_info_t * 1156 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid) 1157 { 1158 char *data; 1159 dev_info_t *cdip = NULL; 1160 dev_info_t *ndip = NULL; 1161 int circular; 1162 1163 ndi_devi_enter(vh->vh_dip, &circular); 1164 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child; 1165 while ((cdip = ndip) != NULL) { 1166 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1167 1168 if (strcmp(DEVI(cdip)->devi_node_name, name)) { 1169 continue; 1170 } 1171 1172 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip, 1173 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP, 1174 &data) != DDI_PROP_SUCCESS) { 1175 continue; 1176 } 1177 1178 if (strcmp(data, guid) != 0) { 1179 ddi_prop_free(data); 1180 continue; 1181 } 1182 ddi_prop_free(data); 1183 break; 1184 } 1185 ndi_devi_exit(vh->vh_dip, circular); 1186 return (cdip); 1187 } 1188 1189 /* 1190 * i_mdi_devinfo_remove(): 1191 * Remove a client device node 1192 */ 1193 static int 1194 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags) 1195 { 1196 int rv = MDI_SUCCESS; 1197 1198 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS || 1199 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) { 1200 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE); 1201 if (rv != NDI_SUCCESS) { 1202 MDI_DEBUG(1, (MDI_NOTE, cdip, 1203 "!failed: cdip %p", (void *)cdip)); 1204 } 1205 /* 1206 * Convert to MDI error code 1207 */ 1208 switch (rv) { 1209 case NDI_SUCCESS: 1210 rv = MDI_SUCCESS; 1211 break; 1212 case NDI_BUSY: 1213 rv = MDI_BUSY; 1214 break; 1215 default: 1216 rv = MDI_FAILURE; 1217 break; 1218 } 1219 } 1220 return (rv); 1221 } 1222 1223 /* 1224 * i_devi_get_client() 1225 * Utility function to get mpxio component extensions 1226 */ 1227 static mdi_client_t * 1228 i_devi_get_client(dev_info_t *cdip) 1229 { 1230 mdi_client_t *ct = NULL; 1231 1232 if (MDI_CLIENT(cdip)) { 1233 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client; 1234 } 1235 return (ct); 1236 } 1237 1238 /* 1239 * i_mdi_is_child_present(): 1240 * Search for the presence of client device dev_info node 1241 */ 1242 static int 1243 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip) 1244 { 1245 int rv = MDI_FAILURE; 1246 struct dev_info *dip; 1247 int circular; 1248 1249 ndi_devi_enter(vdip, &circular); 1250 dip = DEVI(vdip)->devi_child; 1251 while (dip) { 1252 if (dip == DEVI(cdip)) { 1253 rv = MDI_SUCCESS; 1254 break; 1255 } 1256 dip = dip->devi_sibling; 1257 } 1258 ndi_devi_exit(vdip, circular); 1259 return (rv); 1260 } 1261 1262 1263 /* 1264 * i_mdi_client_lock(): 1265 * Grab client component lock 1266 * Return Values: 1267 * None 1268 * Note: 1269 * The default locking order is: 1270 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex)) 1271 * But there are number of situations where locks need to be 1272 * grabbed in reverse order. This routine implements try and lock 1273 * mechanism depending on the requested parameter option. 1274 */ 1275 static void 1276 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip) 1277 { 1278 if (pip) { 1279 /* 1280 * Reverse locking is requested. 1281 */ 1282 while (MDI_CLIENT_TRYLOCK(ct) == 0) { 1283 if (servicing_interrupt()) { 1284 MDI_PI_HOLD(pip); 1285 MDI_PI_UNLOCK(pip); 1286 MDI_CLIENT_LOCK(ct); 1287 MDI_PI_LOCK(pip); 1288 MDI_PI_RELE(pip); 1289 break; 1290 } else { 1291 /* 1292 * tryenter failed. Try to grab again 1293 * after a small delay 1294 */ 1295 MDI_PI_HOLD(pip); 1296 MDI_PI_UNLOCK(pip); 1297 delay_random(mdi_delay); 1298 MDI_PI_LOCK(pip); 1299 MDI_PI_RELE(pip); 1300 } 1301 } 1302 } else { 1303 MDI_CLIENT_LOCK(ct); 1304 } 1305 } 1306 1307 /* 1308 * i_mdi_client_unlock(): 1309 * Unlock a client component 1310 */ 1311 static void 1312 i_mdi_client_unlock(mdi_client_t *ct) 1313 { 1314 MDI_CLIENT_UNLOCK(ct); 1315 } 1316 1317 /* 1318 * i_mdi_client_alloc(): 1319 * Allocate and initialize a client structure. Caller should 1320 * hold the vhci client lock. 1321 * Return Values: 1322 * Handle to a client component 1323 */ 1324 /*ARGSUSED*/ 1325 static mdi_client_t * 1326 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid) 1327 { 1328 mdi_client_t *ct; 1329 1330 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1331 1332 /* 1333 * Allocate and initialize a component structure. 1334 */ 1335 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP); 1336 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL); 1337 ct->ct_hnext = NULL; 1338 ct->ct_hprev = NULL; 1339 ct->ct_dip = NULL; 1340 ct->ct_vhci = vh; 1341 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP); 1342 (void) strcpy(ct->ct_drvname, name); 1343 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP); 1344 (void) strcpy(ct->ct_guid, lguid); 1345 ct->ct_cprivate = NULL; 1346 ct->ct_vprivate = NULL; 1347 ct->ct_flags = 0; 1348 ct->ct_state = MDI_CLIENT_STATE_FAILED; 1349 MDI_CLIENT_LOCK(ct); 1350 MDI_CLIENT_SET_OFFLINE(ct); 1351 MDI_CLIENT_SET_DETACH(ct); 1352 MDI_CLIENT_SET_POWER_UP(ct); 1353 MDI_CLIENT_UNLOCK(ct); 1354 ct->ct_failover_flags = 0; 1355 ct->ct_failover_status = 0; 1356 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL); 1357 ct->ct_unstable = 0; 1358 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL); 1359 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL); 1360 ct->ct_lb = vh->vh_lb; 1361 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP); 1362 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE; 1363 ct->ct_path_count = 0; 1364 ct->ct_path_head = NULL; 1365 ct->ct_path_tail = NULL; 1366 ct->ct_path_last = NULL; 1367 1368 /* 1369 * Add this client component to our client hash queue 1370 */ 1371 i_mdi_client_enlist_table(vh, ct); 1372 return (ct); 1373 } 1374 1375 /* 1376 * i_mdi_client_enlist_table(): 1377 * Attach the client device to the client hash table. Caller 1378 * should hold the vhci client lock. 1379 */ 1380 static void 1381 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1382 { 1383 int index; 1384 struct client_hash *head; 1385 1386 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1387 1388 index = i_mdi_get_hash_key(ct->ct_guid); 1389 head = &vh->vh_client_table[index]; 1390 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head; 1391 head->ct_hash_head = ct; 1392 head->ct_hash_count++; 1393 vh->vh_client_count++; 1394 } 1395 1396 /* 1397 * i_mdi_client_delist_table(): 1398 * Attach the client device to the client hash table. 1399 * Caller should hold the vhci client lock. 1400 */ 1401 static void 1402 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct) 1403 { 1404 int index; 1405 char *guid; 1406 struct client_hash *head; 1407 mdi_client_t *next; 1408 mdi_client_t *last; 1409 1410 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1411 1412 guid = ct->ct_guid; 1413 index = i_mdi_get_hash_key(guid); 1414 head = &vh->vh_client_table[index]; 1415 1416 last = NULL; 1417 next = (mdi_client_t *)head->ct_hash_head; 1418 while (next != NULL) { 1419 if (next == ct) { 1420 break; 1421 } 1422 last = next; 1423 next = next->ct_hnext; 1424 } 1425 1426 if (next) { 1427 head->ct_hash_count--; 1428 if (last == NULL) { 1429 head->ct_hash_head = ct->ct_hnext; 1430 } else { 1431 last->ct_hnext = ct->ct_hnext; 1432 } 1433 ct->ct_hnext = NULL; 1434 vh->vh_client_count--; 1435 } 1436 } 1437 1438 1439 /* 1440 * i_mdi_client_free(): 1441 * Free a client component 1442 */ 1443 static int 1444 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct) 1445 { 1446 int rv = MDI_SUCCESS; 1447 int flags = ct->ct_flags; 1448 dev_info_t *cdip; 1449 dev_info_t *vdip; 1450 1451 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1452 1453 vdip = vh->vh_dip; 1454 cdip = ct->ct_dip; 1455 1456 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP); 1457 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT; 1458 DEVI(cdip)->devi_mdi_client = NULL; 1459 1460 /* 1461 * Clear out back ref. to dev_info_t node 1462 */ 1463 ct->ct_dip = NULL; 1464 1465 /* 1466 * Remove this client from our hash queue 1467 */ 1468 i_mdi_client_delist_table(vh, ct); 1469 1470 /* 1471 * Uninitialize and free the component 1472 */ 1473 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1); 1474 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1); 1475 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t)); 1476 cv_destroy(&ct->ct_failover_cv); 1477 cv_destroy(&ct->ct_unstable_cv); 1478 cv_destroy(&ct->ct_powerchange_cv); 1479 mutex_destroy(&ct->ct_mutex); 1480 kmem_free(ct, sizeof (*ct)); 1481 1482 MDI_VHCI_CLIENT_UNLOCK(vh); 1483 (void) i_mdi_devinfo_remove(vdip, cdip, flags); 1484 MDI_VHCI_CLIENT_LOCK(vh); 1485 1486 return (rv); 1487 } 1488 1489 /* 1490 * i_mdi_client_find(): 1491 * Find the client structure corresponding to a given guid 1492 * Caller should hold the vhci client lock. 1493 */ 1494 static mdi_client_t * 1495 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid) 1496 { 1497 int index; 1498 struct client_hash *head; 1499 mdi_client_t *ct; 1500 1501 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh)); 1502 1503 index = i_mdi_get_hash_key(guid); 1504 head = &vh->vh_client_table[index]; 1505 1506 ct = head->ct_hash_head; 1507 while (ct != NULL) { 1508 if (strcmp(ct->ct_guid, guid) == 0 && 1509 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) { 1510 break; 1511 } 1512 ct = ct->ct_hnext; 1513 } 1514 return (ct); 1515 } 1516 1517 /* 1518 * i_mdi_client_update_state(): 1519 * Compute and update client device state 1520 * Notes: 1521 * A client device can be in any of three possible states: 1522 * 1523 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more 1524 * one online/standby paths. Can tolerate failures. 1525 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with 1526 * no alternate paths available as standby. A failure on the online 1527 * would result in loss of access to device data. 1528 * MDI_CLIENT_STATE_FAILED - Client device in failed state with 1529 * no paths available to access the device. 1530 */ 1531 static void 1532 i_mdi_client_update_state(mdi_client_t *ct) 1533 { 1534 int state; 1535 1536 ASSERT(MDI_CLIENT_LOCKED(ct)); 1537 state = i_mdi_client_compute_state(ct, NULL); 1538 MDI_CLIENT_SET_STATE(ct, state); 1539 } 1540 1541 /* 1542 * i_mdi_client_compute_state(): 1543 * Compute client device state 1544 * 1545 * mdi_phci_t * Pointer to pHCI structure which should 1546 * while computing the new value. Used by 1547 * i_mdi_phci_offline() to find the new 1548 * client state after DR of a pHCI. 1549 */ 1550 static int 1551 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph) 1552 { 1553 int state; 1554 int online_count = 0; 1555 int standby_count = 0; 1556 mdi_pathinfo_t *pip, *next; 1557 1558 ASSERT(MDI_CLIENT_LOCKED(ct)); 1559 pip = ct->ct_path_head; 1560 while (pip != NULL) { 1561 MDI_PI_LOCK(pip); 1562 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 1563 if (MDI_PI(pip)->pi_phci == ph) { 1564 MDI_PI_UNLOCK(pip); 1565 pip = next; 1566 continue; 1567 } 1568 1569 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1570 == MDI_PATHINFO_STATE_ONLINE) 1571 online_count++; 1572 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK) 1573 == MDI_PATHINFO_STATE_STANDBY) 1574 standby_count++; 1575 MDI_PI_UNLOCK(pip); 1576 pip = next; 1577 } 1578 1579 if (online_count == 0) { 1580 if (standby_count == 0) { 1581 state = MDI_CLIENT_STATE_FAILED; 1582 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 1583 "client state failed: ct = %p", (void *)ct)); 1584 } else if (standby_count == 1) { 1585 state = MDI_CLIENT_STATE_DEGRADED; 1586 } else { 1587 state = MDI_CLIENT_STATE_OPTIMAL; 1588 } 1589 } else if (online_count == 1) { 1590 if (standby_count == 0) { 1591 state = MDI_CLIENT_STATE_DEGRADED; 1592 } else { 1593 state = MDI_CLIENT_STATE_OPTIMAL; 1594 } 1595 } else { 1596 state = MDI_CLIENT_STATE_OPTIMAL; 1597 } 1598 return (state); 1599 } 1600 1601 /* 1602 * i_mdi_client2devinfo(): 1603 * Utility function 1604 */ 1605 dev_info_t * 1606 i_mdi_client2devinfo(mdi_client_t *ct) 1607 { 1608 return (ct->ct_dip); 1609 } 1610 1611 /* 1612 * mdi_client_path2_devinfo(): 1613 * Given the parent devinfo and child devfs pathname, search for 1614 * a valid devfs node handle. 1615 */ 1616 dev_info_t * 1617 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname) 1618 { 1619 dev_info_t *cdip = NULL; 1620 dev_info_t *ndip = NULL; 1621 char *temp_pathname; 1622 int circular; 1623 1624 /* 1625 * Allocate temp buffer 1626 */ 1627 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1628 1629 /* 1630 * Lock parent against changes 1631 */ 1632 ndi_devi_enter(vdip, &circular); 1633 ndip = (dev_info_t *)DEVI(vdip)->devi_child; 1634 while ((cdip = ndip) != NULL) { 1635 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling; 1636 1637 *temp_pathname = '\0'; 1638 (void) ddi_pathname(cdip, temp_pathname); 1639 if (strcmp(temp_pathname, pathname) == 0) { 1640 break; 1641 } 1642 } 1643 /* 1644 * Release devinfo lock 1645 */ 1646 ndi_devi_exit(vdip, circular); 1647 1648 /* 1649 * Free the temp buffer 1650 */ 1651 kmem_free(temp_pathname, MAXPATHLEN); 1652 return (cdip); 1653 } 1654 1655 /* 1656 * mdi_client_get_path_count(): 1657 * Utility function to get number of path information nodes 1658 * associated with a given client device. 1659 */ 1660 int 1661 mdi_client_get_path_count(dev_info_t *cdip) 1662 { 1663 mdi_client_t *ct; 1664 int count = 0; 1665 1666 ct = i_devi_get_client(cdip); 1667 if (ct != NULL) { 1668 count = ct->ct_path_count; 1669 } 1670 return (count); 1671 } 1672 1673 1674 /* 1675 * i_mdi_get_hash_key(): 1676 * Create a hash using strings as keys 1677 * 1678 */ 1679 static int 1680 i_mdi_get_hash_key(char *str) 1681 { 1682 uint32_t g, hash = 0; 1683 char *p; 1684 1685 for (p = str; *p != '\0'; p++) { 1686 g = *p; 1687 hash += g; 1688 } 1689 return (hash % (CLIENT_HASH_TABLE_SIZE - 1)); 1690 } 1691 1692 /* 1693 * mdi_get_lb_policy(): 1694 * Get current load balancing policy for a given client device 1695 */ 1696 client_lb_t 1697 mdi_get_lb_policy(dev_info_t *cdip) 1698 { 1699 client_lb_t lb = LOAD_BALANCE_NONE; 1700 mdi_client_t *ct; 1701 1702 ct = i_devi_get_client(cdip); 1703 if (ct != NULL) { 1704 lb = ct->ct_lb; 1705 } 1706 return (lb); 1707 } 1708 1709 /* 1710 * mdi_set_lb_region_size(): 1711 * Set current region size for the load-balance 1712 */ 1713 int 1714 mdi_set_lb_region_size(dev_info_t *cdip, int region_size) 1715 { 1716 mdi_client_t *ct; 1717 int rv = MDI_FAILURE; 1718 1719 ct = i_devi_get_client(cdip); 1720 if (ct != NULL && ct->ct_lb_args != NULL) { 1721 ct->ct_lb_args->region_size = region_size; 1722 rv = MDI_SUCCESS; 1723 } 1724 return (rv); 1725 } 1726 1727 /* 1728 * mdi_Set_lb_policy(): 1729 * Set current load balancing policy for a given client device 1730 */ 1731 int 1732 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb) 1733 { 1734 mdi_client_t *ct; 1735 int rv = MDI_FAILURE; 1736 1737 ct = i_devi_get_client(cdip); 1738 if (ct != NULL) { 1739 ct->ct_lb = lb; 1740 rv = MDI_SUCCESS; 1741 } 1742 return (rv); 1743 } 1744 1745 /* 1746 * mdi_failover(): 1747 * failover function called by the vHCI drivers to initiate 1748 * a failover operation. This is typically due to non-availability 1749 * of online paths to route I/O requests. Failover can be 1750 * triggered through user application also. 1751 * 1752 * The vHCI driver calls mdi_failover() to initiate a failover 1753 * operation. mdi_failover() calls back into the vHCI driver's 1754 * vo_failover() entry point to perform the actual failover 1755 * operation. The reason for requiring the vHCI driver to 1756 * initiate failover by calling mdi_failover(), instead of directly 1757 * executing vo_failover() itself, is to ensure that the mdi 1758 * framework can keep track of the client state properly. 1759 * Additionally, mdi_failover() provides as a convenience the 1760 * option of performing the failover operation synchronously or 1761 * asynchronously 1762 * 1763 * Upon successful completion of the failover operation, the 1764 * paths that were previously ONLINE will be in the STANDBY state, 1765 * and the newly activated paths will be in the ONLINE state. 1766 * 1767 * The flags modifier determines whether the activation is done 1768 * synchronously: MDI_FAILOVER_SYNC 1769 * Return Values: 1770 * MDI_SUCCESS 1771 * MDI_FAILURE 1772 * MDI_BUSY 1773 */ 1774 /*ARGSUSED*/ 1775 int 1776 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags) 1777 { 1778 int rv; 1779 mdi_client_t *ct; 1780 1781 ct = i_devi_get_client(cdip); 1782 ASSERT(ct != NULL); 1783 if (ct == NULL) { 1784 /* cdip is not a valid client device. Nothing more to do. */ 1785 return (MDI_FAILURE); 1786 } 1787 1788 MDI_CLIENT_LOCK(ct); 1789 1790 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) { 1791 /* A path to the client is being freed */ 1792 MDI_CLIENT_UNLOCK(ct); 1793 return (MDI_BUSY); 1794 } 1795 1796 1797 if (MDI_CLIENT_IS_FAILED(ct)) { 1798 /* 1799 * Client is in failed state. Nothing more to do. 1800 */ 1801 MDI_CLIENT_UNLOCK(ct); 1802 return (MDI_FAILURE); 1803 } 1804 1805 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 1806 /* 1807 * Failover is already in progress; return BUSY 1808 */ 1809 MDI_CLIENT_UNLOCK(ct); 1810 return (MDI_BUSY); 1811 } 1812 /* 1813 * Make sure that mdi_pathinfo node state changes are processed. 1814 * We do not allow failovers to progress while client path state 1815 * changes are in progress 1816 */ 1817 if (ct->ct_unstable) { 1818 if (flags == MDI_FAILOVER_ASYNC) { 1819 MDI_CLIENT_UNLOCK(ct); 1820 return (MDI_BUSY); 1821 } else { 1822 while (ct->ct_unstable) 1823 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex); 1824 } 1825 } 1826 1827 /* 1828 * Client device is in stable state. Before proceeding, perform sanity 1829 * checks again. 1830 */ 1831 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) || 1832 (!i_ddi_devi_attached(cdip))) { 1833 /* 1834 * Client is in failed state. Nothing more to do. 1835 */ 1836 MDI_CLIENT_UNLOCK(ct); 1837 return (MDI_FAILURE); 1838 } 1839 1840 /* 1841 * Set the client state as failover in progress. 1842 */ 1843 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct); 1844 ct->ct_failover_flags = flags; 1845 MDI_CLIENT_UNLOCK(ct); 1846 1847 if (flags == MDI_FAILOVER_ASYNC) { 1848 /* 1849 * Submit the initiate failover request via CPR safe 1850 * taskq threads. 1851 */ 1852 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover, 1853 ct, KM_SLEEP); 1854 return (MDI_ACCEPT); 1855 } else { 1856 /* 1857 * Synchronous failover mode. Typically invoked from the user 1858 * land. 1859 */ 1860 rv = i_mdi_failover(ct); 1861 } 1862 return (rv); 1863 } 1864 1865 /* 1866 * i_mdi_failover(): 1867 * internal failover function. Invokes vHCI drivers failover 1868 * callback function and process the failover status 1869 * Return Values: 1870 * None 1871 * 1872 * Note: A client device in failover state can not be detached or freed. 1873 */ 1874 static int 1875 i_mdi_failover(void *arg) 1876 { 1877 int rv = MDI_SUCCESS; 1878 mdi_client_t *ct = (mdi_client_t *)arg; 1879 mdi_vhci_t *vh = ct->ct_vhci; 1880 1881 ASSERT(!MDI_CLIENT_LOCKED(ct)); 1882 1883 if (vh->vh_ops->vo_failover != NULL) { 1884 /* 1885 * Call vHCI drivers callback routine 1886 */ 1887 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip, 1888 ct->ct_failover_flags); 1889 } 1890 1891 MDI_CLIENT_LOCK(ct); 1892 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct); 1893 1894 /* 1895 * Save the failover return status 1896 */ 1897 ct->ct_failover_status = rv; 1898 1899 /* 1900 * As a result of failover, client status would have been changed. 1901 * Update the client state and wake up anyone waiting on this client 1902 * device. 1903 */ 1904 i_mdi_client_update_state(ct); 1905 1906 cv_broadcast(&ct->ct_failover_cv); 1907 MDI_CLIENT_UNLOCK(ct); 1908 return (rv); 1909 } 1910 1911 /* 1912 * Load balancing is logical block. 1913 * IOs within the range described by region_size 1914 * would go on the same path. This would improve the 1915 * performance by cache-hit on some of the RAID devices. 1916 * Search only for online paths(At some point we 1917 * may want to balance across target ports). 1918 * If no paths are found then default to round-robin. 1919 */ 1920 static int 1921 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp) 1922 { 1923 int path_index = -1; 1924 int online_path_count = 0; 1925 int online_nonpref_path_count = 0; 1926 int region_size = ct->ct_lb_args->region_size; 1927 mdi_pathinfo_t *pip; 1928 mdi_pathinfo_t *next; 1929 int preferred, path_cnt; 1930 1931 pip = ct->ct_path_head; 1932 while (pip) { 1933 MDI_PI_LOCK(pip); 1934 if (MDI_PI(pip)->pi_state == 1935 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) { 1936 online_path_count++; 1937 } else if (MDI_PI(pip)->pi_state == 1938 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) { 1939 online_nonpref_path_count++; 1940 } 1941 next = (mdi_pathinfo_t *) 1942 MDI_PI(pip)->pi_client_link; 1943 MDI_PI_UNLOCK(pip); 1944 pip = next; 1945 } 1946 /* if found any online/preferred then use this type */ 1947 if (online_path_count > 0) { 1948 path_cnt = online_path_count; 1949 preferred = 1; 1950 } else if (online_nonpref_path_count > 0) { 1951 path_cnt = online_nonpref_path_count; 1952 preferred = 0; 1953 } else { 1954 path_cnt = 0; 1955 } 1956 if (path_cnt) { 1957 path_index = (bp->b_blkno >> region_size) % path_cnt; 1958 pip = ct->ct_path_head; 1959 while (pip && path_index != -1) { 1960 MDI_PI_LOCK(pip); 1961 if (path_index == 0 && 1962 (MDI_PI(pip)->pi_state == 1963 MDI_PATHINFO_STATE_ONLINE) && 1964 MDI_PI(pip)->pi_preferred == preferred) { 1965 MDI_PI_HOLD(pip); 1966 MDI_PI_UNLOCK(pip); 1967 *ret_pip = pip; 1968 return (MDI_SUCCESS); 1969 } 1970 path_index --; 1971 next = (mdi_pathinfo_t *) 1972 MDI_PI(pip)->pi_client_link; 1973 MDI_PI_UNLOCK(pip); 1974 pip = next; 1975 } 1976 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 1977 "lba %llx: path %s %p", 1978 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip)); 1979 } 1980 return (MDI_FAILURE); 1981 } 1982 1983 /* 1984 * mdi_select_path(): 1985 * select a path to access a client device. 1986 * 1987 * mdi_select_path() function is called by the vHCI drivers to 1988 * select a path to route the I/O request to. The caller passes 1989 * the block I/O data transfer structure ("buf") as one of the 1990 * parameters. The mpxio framework uses the buf structure 1991 * contents to maintain per path statistics (total I/O size / 1992 * count pending). If more than one online paths are available to 1993 * select, the framework automatically selects a suitable path 1994 * for routing I/O request. If a failover operation is active for 1995 * this client device the call shall be failed with MDI_BUSY error 1996 * code. 1997 * 1998 * By default this function returns a suitable path in online 1999 * state based on the current load balancing policy. Currently 2000 * we support LOAD_BALANCE_NONE (Previously selected online path 2001 * will continue to be used till the path is usable) and 2002 * LOAD_BALANCE_RR (Online paths will be selected in a round 2003 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected 2004 * based on the logical block). The load balancing 2005 * through vHCI drivers configuration file (driver.conf). 2006 * 2007 * vHCI drivers may override this default behavior by specifying 2008 * appropriate flags. The meaning of the thrid argument depends 2009 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set 2010 * then the argument is the "path instance" of the path to select. 2011 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is 2012 * "start_pip". A non NULL "start_pip" is the starting point to 2013 * walk and find the next appropriate path. The following values 2014 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an 2015 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an 2016 * STANDBY path). 2017 * 2018 * The non-standard behavior is used by the scsi_vhci driver, 2019 * whenever it has to use a STANDBY/FAULTED path. Eg. during 2020 * attach of client devices (to avoid an unnecessary failover 2021 * when the STANDBY path comes up first), during failover 2022 * (to activate a STANDBY path as ONLINE). 2023 * 2024 * The selected path is returned in a a mdi_hold_path() state 2025 * (pi_ref_cnt). Caller should release the hold by calling 2026 * mdi_rele_path(). 2027 * 2028 * Return Values: 2029 * MDI_SUCCESS - Completed successfully 2030 * MDI_BUSY - Client device is busy failing over 2031 * MDI_NOPATH - Client device is online, but no valid path are 2032 * available to access this client device 2033 * MDI_FAILURE - Invalid client device or state 2034 * MDI_DEVI_ONLINING 2035 * - Client device (struct dev_info state) is in 2036 * onlining state. 2037 */ 2038 2039 /*ARGSUSED*/ 2040 int 2041 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags, 2042 void *arg, mdi_pathinfo_t **ret_pip) 2043 { 2044 mdi_client_t *ct; 2045 mdi_pathinfo_t *pip; 2046 mdi_pathinfo_t *next; 2047 mdi_pathinfo_t *head; 2048 mdi_pathinfo_t *start; 2049 client_lb_t lbp; /* load balancing policy */ 2050 int sb = 1; /* standard behavior */ 2051 int preferred = 1; /* preferred path */ 2052 int cond, cont = 1; 2053 int retry = 0; 2054 mdi_pathinfo_t *start_pip; /* request starting pathinfo */ 2055 int path_instance; /* request specific path instance */ 2056 2057 /* determine type of arg based on flags */ 2058 if (flags & MDI_SELECT_PATH_INSTANCE) { 2059 path_instance = (int)(intptr_t)arg; 2060 start_pip = NULL; 2061 } else { 2062 path_instance = 0; 2063 start_pip = (mdi_pathinfo_t *)arg; 2064 } 2065 2066 if (flags != 0) { 2067 /* 2068 * disable default behavior 2069 */ 2070 sb = 0; 2071 } 2072 2073 *ret_pip = NULL; 2074 ct = i_devi_get_client(cdip); 2075 if (ct == NULL) { 2076 /* mdi extensions are NULL, Nothing more to do */ 2077 return (MDI_FAILURE); 2078 } 2079 2080 MDI_CLIENT_LOCK(ct); 2081 2082 if (sb) { 2083 if (MDI_CLIENT_IS_FAILED(ct)) { 2084 /* 2085 * Client is not ready to accept any I/O requests. 2086 * Fail this request. 2087 */ 2088 MDI_DEBUG(2, (MDI_NOTE, cdip, 2089 "client state offline ct = %p", (void *)ct)); 2090 MDI_CLIENT_UNLOCK(ct); 2091 return (MDI_FAILURE); 2092 } 2093 2094 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 2095 /* 2096 * Check for Failover is in progress. If so tell the 2097 * caller that this device is busy. 2098 */ 2099 MDI_DEBUG(2, (MDI_NOTE, cdip, 2100 "client failover in progress ct = %p", 2101 (void *)ct)); 2102 MDI_CLIENT_UNLOCK(ct); 2103 return (MDI_BUSY); 2104 } 2105 2106 /* 2107 * Check to see whether the client device is attached. 2108 * If not so, let the vHCI driver manually select a path 2109 * (standby) and let the probe/attach process to continue. 2110 */ 2111 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) { 2112 MDI_DEBUG(4, (MDI_NOTE, cdip, 2113 "devi is onlining ct = %p", (void *)ct)); 2114 MDI_CLIENT_UNLOCK(ct); 2115 return (MDI_DEVI_ONLINING); 2116 } 2117 } 2118 2119 /* 2120 * Cache in the client list head. If head of the list is NULL 2121 * return MDI_NOPATH 2122 */ 2123 head = ct->ct_path_head; 2124 if (head == NULL) { 2125 MDI_CLIENT_UNLOCK(ct); 2126 return (MDI_NOPATH); 2127 } 2128 2129 /* Caller is specifying a specific pathinfo path by path_instance */ 2130 if (path_instance) { 2131 /* search for pathinfo with correct path_instance */ 2132 for (pip = head; 2133 pip && (mdi_pi_get_path_instance(pip) != path_instance); 2134 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) 2135 ; 2136 2137 /* If path can't be selected then MDI_NOPATH is returned. */ 2138 if (pip == NULL) { 2139 MDI_CLIENT_UNLOCK(ct); 2140 return (MDI_NOPATH); 2141 } 2142 2143 /* 2144 * Verify state of path. When asked to select a specific 2145 * path_instance, we select the requested path in any 2146 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT. 2147 * We don't however select paths where the pHCI has detached. 2148 * NOTE: last pathinfo node of an opened client device may 2149 * exist in an OFFLINE state after the pHCI associated with 2150 * that path has detached (but pi_phci will be NULL if that 2151 * has occurred). 2152 */ 2153 MDI_PI_LOCK(pip); 2154 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) || 2155 (MDI_PI(pip)->pi_phci == NULL)) { 2156 MDI_PI_UNLOCK(pip); 2157 MDI_CLIENT_UNLOCK(ct); 2158 return (MDI_FAILURE); 2159 } 2160 2161 /* Return MDI_BUSY if we have a transient condition */ 2162 if (MDI_PI_IS_TRANSIENT(pip)) { 2163 MDI_PI_UNLOCK(pip); 2164 MDI_CLIENT_UNLOCK(ct); 2165 return (MDI_BUSY); 2166 } 2167 2168 /* 2169 * Return the path in hold state. Caller should release the 2170 * lock by calling mdi_rele_path() 2171 */ 2172 MDI_PI_HOLD(pip); 2173 MDI_PI_UNLOCK(pip); 2174 *ret_pip = pip; 2175 MDI_CLIENT_UNLOCK(ct); 2176 return (MDI_SUCCESS); 2177 } 2178 2179 /* 2180 * for non default behavior, bypass current 2181 * load balancing policy and always use LOAD_BALANCE_RR 2182 * except that the start point will be adjusted based 2183 * on the provided start_pip 2184 */ 2185 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR; 2186 2187 switch (lbp) { 2188 case LOAD_BALANCE_NONE: 2189 /* 2190 * Load balancing is None or Alternate path mode 2191 * Start looking for a online mdi_pathinfo node starting from 2192 * last known selected path 2193 */ 2194 preferred = 1; 2195 pip = (mdi_pathinfo_t *)ct->ct_path_last; 2196 if (pip == NULL) { 2197 pip = head; 2198 } 2199 start = pip; 2200 do { 2201 MDI_PI_LOCK(pip); 2202 /* 2203 * No need to explicitly check if the path is disabled. 2204 * Since we are checking for state == ONLINE and the 2205 * same variable is used for DISABLE/ENABLE information. 2206 */ 2207 if ((MDI_PI(pip)->pi_state == 2208 MDI_PATHINFO_STATE_ONLINE) && 2209 preferred == MDI_PI(pip)->pi_preferred) { 2210 /* 2211 * Return the path in hold state. Caller should 2212 * release the lock by calling mdi_rele_path() 2213 */ 2214 MDI_PI_HOLD(pip); 2215 MDI_PI_UNLOCK(pip); 2216 ct->ct_path_last = pip; 2217 *ret_pip = pip; 2218 MDI_CLIENT_UNLOCK(ct); 2219 return (MDI_SUCCESS); 2220 } 2221 2222 /* 2223 * Path is busy. 2224 */ 2225 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2226 MDI_PI_IS_TRANSIENT(pip)) 2227 retry = 1; 2228 /* 2229 * Keep looking for a next available online path 2230 */ 2231 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2232 if (next == NULL) { 2233 next = head; 2234 } 2235 MDI_PI_UNLOCK(pip); 2236 pip = next; 2237 if (start == pip && preferred) { 2238 preferred = 0; 2239 } else if (start == pip && !preferred) { 2240 cont = 0; 2241 } 2242 } while (cont); 2243 break; 2244 2245 case LOAD_BALANCE_LBA: 2246 /* 2247 * Make sure we are looking 2248 * for an online path. Otherwise, if it is for a STANDBY 2249 * path request, it will go through and fetch an ONLINE 2250 * path which is not desirable. 2251 */ 2252 if ((ct->ct_lb_args != NULL) && 2253 (ct->ct_lb_args->region_size) && bp && 2254 (sb || (flags == MDI_SELECT_ONLINE_PATH))) { 2255 if (i_mdi_lba_lb(ct, ret_pip, bp) 2256 == MDI_SUCCESS) { 2257 MDI_CLIENT_UNLOCK(ct); 2258 return (MDI_SUCCESS); 2259 } 2260 } 2261 /* FALLTHROUGH */ 2262 case LOAD_BALANCE_RR: 2263 /* 2264 * Load balancing is Round Robin. Start looking for a online 2265 * mdi_pathinfo node starting from last known selected path 2266 * as the start point. If override flags are specified, 2267 * process accordingly. 2268 * If the search is already in effect(start_pip not null), 2269 * then lets just use the same path preference to continue the 2270 * traversal. 2271 */ 2272 2273 if (start_pip != NULL) { 2274 preferred = MDI_PI(start_pip)->pi_preferred; 2275 } else { 2276 preferred = 1; 2277 } 2278 2279 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip; 2280 if (start == NULL) { 2281 pip = head; 2282 } else { 2283 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link; 2284 if (pip == NULL) { 2285 if ( flags & MDI_SELECT_NO_PREFERRED) { 2286 /* 2287 * Return since we hit the end of list 2288 */ 2289 MDI_CLIENT_UNLOCK(ct); 2290 return (MDI_NOPATH); 2291 } 2292 2293 if (!sb) { 2294 if (preferred == 0) { 2295 /* 2296 * Looks like we have completed 2297 * the traversal as preferred 2298 * value is 0. Time to bail out. 2299 */ 2300 *ret_pip = NULL; 2301 MDI_CLIENT_UNLOCK(ct); 2302 return (MDI_NOPATH); 2303 } else { 2304 /* 2305 * Looks like we reached the 2306 * end of the list. Lets enable 2307 * traversal of non preferred 2308 * paths. 2309 */ 2310 preferred = 0; 2311 } 2312 } 2313 pip = head; 2314 } 2315 } 2316 start = pip; 2317 do { 2318 MDI_PI_LOCK(pip); 2319 if (sb) { 2320 cond = ((MDI_PI(pip)->pi_state == 2321 MDI_PATHINFO_STATE_ONLINE && 2322 MDI_PI(pip)->pi_preferred == 2323 preferred) ? 1 : 0); 2324 } else { 2325 if (flags == MDI_SELECT_ONLINE_PATH) { 2326 cond = ((MDI_PI(pip)->pi_state == 2327 MDI_PATHINFO_STATE_ONLINE && 2328 MDI_PI(pip)->pi_preferred == 2329 preferred) ? 1 : 0); 2330 } else if (flags == MDI_SELECT_STANDBY_PATH) { 2331 cond = ((MDI_PI(pip)->pi_state == 2332 MDI_PATHINFO_STATE_STANDBY && 2333 MDI_PI(pip)->pi_preferred == 2334 preferred) ? 1 : 0); 2335 } else if (flags == (MDI_SELECT_ONLINE_PATH | 2336 MDI_SELECT_STANDBY_PATH)) { 2337 cond = (((MDI_PI(pip)->pi_state == 2338 MDI_PATHINFO_STATE_ONLINE || 2339 (MDI_PI(pip)->pi_state == 2340 MDI_PATHINFO_STATE_STANDBY)) && 2341 MDI_PI(pip)->pi_preferred == 2342 preferred) ? 1 : 0); 2343 } else if (flags == 2344 (MDI_SELECT_STANDBY_PATH | 2345 MDI_SELECT_ONLINE_PATH | 2346 MDI_SELECT_USER_DISABLE_PATH)) { 2347 cond = (((MDI_PI(pip)->pi_state == 2348 MDI_PATHINFO_STATE_ONLINE || 2349 (MDI_PI(pip)->pi_state == 2350 MDI_PATHINFO_STATE_STANDBY) || 2351 (MDI_PI(pip)->pi_state == 2352 (MDI_PATHINFO_STATE_ONLINE| 2353 MDI_PATHINFO_STATE_USER_DISABLE)) || 2354 (MDI_PI(pip)->pi_state == 2355 (MDI_PATHINFO_STATE_STANDBY | 2356 MDI_PATHINFO_STATE_USER_DISABLE)))&& 2357 MDI_PI(pip)->pi_preferred == 2358 preferred) ? 1 : 0); 2359 } else if (flags == 2360 (MDI_SELECT_STANDBY_PATH | 2361 MDI_SELECT_ONLINE_PATH | 2362 MDI_SELECT_NO_PREFERRED)) { 2363 cond = (((MDI_PI(pip)->pi_state == 2364 MDI_PATHINFO_STATE_ONLINE) || 2365 (MDI_PI(pip)->pi_state == 2366 MDI_PATHINFO_STATE_STANDBY)) 2367 ? 1 : 0); 2368 } else { 2369 cond = 0; 2370 } 2371 } 2372 /* 2373 * No need to explicitly check if the path is disabled. 2374 * Since we are checking for state == ONLINE and the 2375 * same variable is used for DISABLE/ENABLE information. 2376 */ 2377 if (cond) { 2378 /* 2379 * Return the path in hold state. Caller should 2380 * release the lock by calling mdi_rele_path() 2381 */ 2382 MDI_PI_HOLD(pip); 2383 MDI_PI_UNLOCK(pip); 2384 if (sb) 2385 ct->ct_path_last = pip; 2386 *ret_pip = pip; 2387 MDI_CLIENT_UNLOCK(ct); 2388 return (MDI_SUCCESS); 2389 } 2390 /* 2391 * Path is busy. 2392 */ 2393 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) || 2394 MDI_PI_IS_TRANSIENT(pip)) 2395 retry = 1; 2396 2397 /* 2398 * Keep looking for a next available online path 2399 */ 2400 do_again: 2401 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2402 if (next == NULL) { 2403 if ( flags & MDI_SELECT_NO_PREFERRED) { 2404 /* 2405 * Bail out since we hit the end of list 2406 */ 2407 MDI_PI_UNLOCK(pip); 2408 break; 2409 } 2410 2411 if (!sb) { 2412 if (preferred == 1) { 2413 /* 2414 * Looks like we reached the 2415 * end of the list. Lets enable 2416 * traversal of non preferred 2417 * paths. 2418 */ 2419 preferred = 0; 2420 next = head; 2421 } else { 2422 /* 2423 * We have done both the passes 2424 * Preferred as well as for 2425 * Non-preferred. Bail out now. 2426 */ 2427 cont = 0; 2428 } 2429 } else { 2430 /* 2431 * Standard behavior case. 2432 */ 2433 next = head; 2434 } 2435 } 2436 MDI_PI_UNLOCK(pip); 2437 if (cont == 0) { 2438 break; 2439 } 2440 pip = next; 2441 2442 if (!sb) { 2443 /* 2444 * We need to handle the selection of 2445 * non-preferred path in the following 2446 * case: 2447 * 2448 * +------+ +------+ +------+ +-----+ 2449 * | A : 1| - | B : 1| - | C : 0| - |NULL | 2450 * +------+ +------+ +------+ +-----+ 2451 * 2452 * If we start the search with B, we need to 2453 * skip beyond B to pick C which is non - 2454 * preferred in the second pass. The following 2455 * test, if true, will allow us to skip over 2456 * the 'start'(B in the example) to select 2457 * other non preferred elements. 2458 */ 2459 if ((start_pip != NULL) && (start_pip == pip) && 2460 (MDI_PI(start_pip)->pi_preferred 2461 != preferred)) { 2462 /* 2463 * try again after going past the start 2464 * pip 2465 */ 2466 MDI_PI_LOCK(pip); 2467 goto do_again; 2468 } 2469 } else { 2470 /* 2471 * Standard behavior case 2472 */ 2473 if (start == pip && preferred) { 2474 /* look for nonpreferred paths */ 2475 preferred = 0; 2476 } else if (start == pip && !preferred) { 2477 /* 2478 * Exit condition 2479 */ 2480 cont = 0; 2481 } 2482 } 2483 } while (cont); 2484 break; 2485 } 2486 2487 MDI_CLIENT_UNLOCK(ct); 2488 if (retry == 1) { 2489 return (MDI_BUSY); 2490 } else { 2491 return (MDI_NOPATH); 2492 } 2493 } 2494 2495 /* 2496 * For a client, return the next available path to any phci 2497 * 2498 * Note: 2499 * Caller should hold the branch's devinfo node to get a consistent 2500 * snap shot of the mdi_pathinfo nodes. 2501 * 2502 * Please note that even the list is stable the mdi_pathinfo 2503 * node state and properties are volatile. The caller should lock 2504 * and unlock the nodes by calling mdi_pi_lock() and 2505 * mdi_pi_unlock() functions to get a stable properties. 2506 * 2507 * If there is a need to use the nodes beyond the hold of the 2508 * devinfo node period (For ex. I/O), then mdi_pathinfo node 2509 * need to be held against unexpected removal by calling 2510 * mdi_hold_path() and should be released by calling 2511 * mdi_rele_path() on completion. 2512 */ 2513 mdi_pathinfo_t * 2514 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip) 2515 { 2516 mdi_client_t *ct; 2517 2518 if (!MDI_CLIENT(ct_dip)) 2519 return (NULL); 2520 2521 /* 2522 * Walk through client link 2523 */ 2524 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client; 2525 ASSERT(ct != NULL); 2526 2527 if (pip == NULL) 2528 return ((mdi_pathinfo_t *)ct->ct_path_head); 2529 2530 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link); 2531 } 2532 2533 /* 2534 * For a phci, return the next available path to any client 2535 * Note: ditto mdi_get_next_phci_path() 2536 */ 2537 mdi_pathinfo_t * 2538 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip) 2539 { 2540 mdi_phci_t *ph; 2541 2542 if (!MDI_PHCI(ph_dip)) 2543 return (NULL); 2544 2545 /* 2546 * Walk through pHCI link 2547 */ 2548 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci; 2549 ASSERT(ph != NULL); 2550 2551 if (pip == NULL) 2552 return ((mdi_pathinfo_t *)ph->ph_path_head); 2553 2554 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link); 2555 } 2556 2557 /* 2558 * mdi_hold_path(): 2559 * Hold the mdi_pathinfo node against unwanted unexpected free. 2560 * Return Values: 2561 * None 2562 */ 2563 void 2564 mdi_hold_path(mdi_pathinfo_t *pip) 2565 { 2566 if (pip) { 2567 MDI_PI_LOCK(pip); 2568 MDI_PI_HOLD(pip); 2569 MDI_PI_UNLOCK(pip); 2570 } 2571 } 2572 2573 2574 /* 2575 * mdi_rele_path(): 2576 * Release the mdi_pathinfo node which was selected 2577 * through mdi_select_path() mechanism or manually held by 2578 * calling mdi_hold_path(). 2579 * Return Values: 2580 * None 2581 */ 2582 void 2583 mdi_rele_path(mdi_pathinfo_t *pip) 2584 { 2585 if (pip) { 2586 MDI_PI_LOCK(pip); 2587 MDI_PI_RELE(pip); 2588 if (MDI_PI(pip)->pi_ref_cnt == 0) { 2589 cv_broadcast(&MDI_PI(pip)->pi_ref_cv); 2590 } 2591 MDI_PI_UNLOCK(pip); 2592 } 2593 } 2594 2595 /* 2596 * mdi_pi_lock(): 2597 * Lock the mdi_pathinfo node. 2598 * Note: 2599 * The caller should release the lock by calling mdi_pi_unlock() 2600 */ 2601 void 2602 mdi_pi_lock(mdi_pathinfo_t *pip) 2603 { 2604 ASSERT(pip != NULL); 2605 if (pip) { 2606 MDI_PI_LOCK(pip); 2607 } 2608 } 2609 2610 2611 /* 2612 * mdi_pi_unlock(): 2613 * Unlock the mdi_pathinfo node. 2614 * Note: 2615 * The mdi_pathinfo node should have been locked with mdi_pi_lock() 2616 */ 2617 void 2618 mdi_pi_unlock(mdi_pathinfo_t *pip) 2619 { 2620 ASSERT(pip != NULL); 2621 if (pip) { 2622 MDI_PI_UNLOCK(pip); 2623 } 2624 } 2625 2626 /* 2627 * mdi_pi_find(): 2628 * Search the list of mdi_pathinfo nodes attached to the 2629 * pHCI/Client device node whose path address matches "paddr". 2630 * Returns a pointer to the mdi_pathinfo node if a matching node is 2631 * found. 2632 * Return Values: 2633 * mdi_pathinfo node handle 2634 * NULL 2635 * Notes: 2636 * Caller need not hold any locks to call this function. 2637 */ 2638 mdi_pathinfo_t * 2639 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr) 2640 { 2641 mdi_phci_t *ph; 2642 mdi_vhci_t *vh; 2643 mdi_client_t *ct; 2644 mdi_pathinfo_t *pip = NULL; 2645 2646 MDI_DEBUG(2, (MDI_NOTE, pdip, 2647 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : "")); 2648 if ((pdip == NULL) || (paddr == NULL)) { 2649 return (NULL); 2650 } 2651 ph = i_devi_get_phci(pdip); 2652 if (ph == NULL) { 2653 /* 2654 * Invalid pHCI device, Nothing more to do. 2655 */ 2656 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci")); 2657 return (NULL); 2658 } 2659 2660 vh = ph->ph_vhci; 2661 if (vh == NULL) { 2662 /* 2663 * Invalid vHCI device, Nothing more to do. 2664 */ 2665 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci")); 2666 return (NULL); 2667 } 2668 2669 /* 2670 * Look for pathinfo node identified by paddr. 2671 */ 2672 if (caddr == NULL) { 2673 /* 2674 * Find a mdi_pathinfo node under pHCI list for a matching 2675 * unit address. 2676 */ 2677 MDI_PHCI_LOCK(ph); 2678 if (MDI_PHCI_IS_OFFLINE(ph)) { 2679 MDI_DEBUG(2, (MDI_WARN, pdip, 2680 "offline phci %p", (void *)ph)); 2681 MDI_PHCI_UNLOCK(ph); 2682 return (NULL); 2683 } 2684 pip = (mdi_pathinfo_t *)ph->ph_path_head; 2685 2686 while (pip != NULL) { 2687 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2688 break; 2689 } 2690 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 2691 } 2692 MDI_PHCI_UNLOCK(ph); 2693 MDI_DEBUG(2, (MDI_NOTE, pdip, 2694 "found %s %p", mdi_pi_spathname(pip), (void *)pip)); 2695 return (pip); 2696 } 2697 2698 /* 2699 * XXX - Is the rest of the code in this function really necessary? 2700 * The consumers of mdi_pi_find() can search for the desired pathinfo 2701 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of 2702 * whether the search is based on the pathinfo nodes attached to 2703 * the pHCI or the client node, the result will be the same. 2704 */ 2705 2706 /* 2707 * Find the client device corresponding to 'caddr' 2708 */ 2709 MDI_VHCI_CLIENT_LOCK(vh); 2710 2711 /* 2712 * XXX - Passing NULL to the following function works as long as the 2713 * the client addresses (caddr) are unique per vhci basis. 2714 */ 2715 ct = i_mdi_client_find(vh, NULL, caddr); 2716 if (ct == NULL) { 2717 /* 2718 * Client not found, Obviously mdi_pathinfo node has not been 2719 * created yet. 2720 */ 2721 MDI_VHCI_CLIENT_UNLOCK(vh); 2722 MDI_DEBUG(2, (MDI_NOTE, pdip, 2723 "client not found for caddr @%s", caddr ? caddr : "")); 2724 return (NULL); 2725 } 2726 2727 /* 2728 * Hold the client lock and look for a mdi_pathinfo node with matching 2729 * pHCI and paddr 2730 */ 2731 MDI_CLIENT_LOCK(ct); 2732 2733 /* 2734 * Release the global mutex as it is no more needed. Note: We always 2735 * respect the locking order while acquiring. 2736 */ 2737 MDI_VHCI_CLIENT_UNLOCK(vh); 2738 2739 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2740 while (pip != NULL) { 2741 /* 2742 * Compare the unit address 2743 */ 2744 if ((MDI_PI(pip)->pi_phci == ph) && 2745 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2746 break; 2747 } 2748 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2749 } 2750 MDI_CLIENT_UNLOCK(ct); 2751 MDI_DEBUG(2, (MDI_NOTE, pdip, 2752 "found: %s %p", mdi_pi_spathname(pip), (void *)pip)); 2753 return (pip); 2754 } 2755 2756 /* 2757 * mdi_pi_alloc(): 2758 * Allocate and initialize a new instance of a mdi_pathinfo node. 2759 * The mdi_pathinfo node returned by this function identifies a 2760 * unique device path is capable of having properties attached 2761 * and passed to mdi_pi_online() to fully attach and online the 2762 * path and client device node. 2763 * The mdi_pathinfo node returned by this function must be 2764 * destroyed using mdi_pi_free() if the path is no longer 2765 * operational or if the caller fails to attach a client device 2766 * node when calling mdi_pi_online(). The framework will not free 2767 * the resources allocated. 2768 * This function can be called from both interrupt and kernel 2769 * contexts. DDI_NOSLEEP flag should be used while calling 2770 * from interrupt contexts. 2771 * Return Values: 2772 * MDI_SUCCESS 2773 * MDI_FAILURE 2774 * MDI_NOMEM 2775 */ 2776 /*ARGSUSED*/ 2777 int 2778 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2779 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip) 2780 { 2781 mdi_vhci_t *vh; 2782 mdi_phci_t *ph; 2783 mdi_client_t *ct; 2784 mdi_pathinfo_t *pip = NULL; 2785 dev_info_t *cdip; 2786 int rv = MDI_NOMEM; 2787 int path_allocated = 0; 2788 2789 MDI_DEBUG(2, (MDI_NOTE, pdip, 2790 "cname %s: caddr@%s paddr@%s", 2791 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : "")); 2792 2793 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL || 2794 ret_pip == NULL) { 2795 /* Nothing more to do */ 2796 return (MDI_FAILURE); 2797 } 2798 2799 *ret_pip = NULL; 2800 2801 /* No allocations on detaching pHCI */ 2802 if (DEVI_IS_DETACHING(pdip)) { 2803 /* Invalid pHCI device, return failure */ 2804 MDI_DEBUG(1, (MDI_WARN, pdip, 2805 "!detaching pHCI=%p", (void *)pdip)); 2806 return (MDI_FAILURE); 2807 } 2808 2809 ph = i_devi_get_phci(pdip); 2810 ASSERT(ph != NULL); 2811 if (ph == NULL) { 2812 /* Invalid pHCI device, return failure */ 2813 MDI_DEBUG(1, (MDI_WARN, pdip, 2814 "!invalid pHCI=%p", (void *)pdip)); 2815 return (MDI_FAILURE); 2816 } 2817 2818 MDI_PHCI_LOCK(ph); 2819 vh = ph->ph_vhci; 2820 if (vh == NULL) { 2821 /* Invalid vHCI device, return failure */ 2822 MDI_DEBUG(1, (MDI_WARN, pdip, 2823 "!invalid vHCI=%p", (void *)pdip)); 2824 MDI_PHCI_UNLOCK(ph); 2825 return (MDI_FAILURE); 2826 } 2827 2828 if (MDI_PHCI_IS_READY(ph) == 0) { 2829 /* 2830 * Do not allow new node creation when pHCI is in 2831 * offline/suspended states 2832 */ 2833 MDI_DEBUG(1, (MDI_WARN, pdip, 2834 "pHCI=%p is not ready", (void *)ph)); 2835 MDI_PHCI_UNLOCK(ph); 2836 return (MDI_BUSY); 2837 } 2838 MDI_PHCI_UNSTABLE(ph); 2839 MDI_PHCI_UNLOCK(ph); 2840 2841 /* look for a matching client, create one if not found */ 2842 MDI_VHCI_CLIENT_LOCK(vh); 2843 ct = i_mdi_client_find(vh, cname, caddr); 2844 if (ct == NULL) { 2845 ct = i_mdi_client_alloc(vh, cname, caddr); 2846 ASSERT(ct != NULL); 2847 } 2848 2849 if (ct->ct_dip == NULL) { 2850 /* 2851 * Allocate a devinfo node 2852 */ 2853 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr, 2854 compatible, ncompatible); 2855 if (ct->ct_dip == NULL) { 2856 (void) i_mdi_client_free(vh, ct); 2857 goto fail; 2858 } 2859 } 2860 cdip = ct->ct_dip; 2861 2862 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT; 2863 DEVI(cdip)->devi_mdi_client = (caddr_t)ct; 2864 2865 MDI_CLIENT_LOCK(ct); 2866 pip = (mdi_pathinfo_t *)ct->ct_path_head; 2867 while (pip != NULL) { 2868 /* 2869 * Compare the unit address 2870 */ 2871 if ((MDI_PI(pip)->pi_phci == ph) && 2872 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) { 2873 break; 2874 } 2875 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 2876 } 2877 MDI_CLIENT_UNLOCK(ct); 2878 2879 if (pip == NULL) { 2880 /* 2881 * This is a new path for this client device. Allocate and 2882 * initialize a new pathinfo node 2883 */ 2884 pip = i_mdi_pi_alloc(ph, paddr, ct); 2885 ASSERT(pip != NULL); 2886 path_allocated = 1; 2887 } 2888 rv = MDI_SUCCESS; 2889 2890 fail: 2891 /* 2892 * Release the global mutex. 2893 */ 2894 MDI_VHCI_CLIENT_UNLOCK(vh); 2895 2896 /* 2897 * Mark the pHCI as stable 2898 */ 2899 MDI_PHCI_LOCK(ph); 2900 MDI_PHCI_STABLE(ph); 2901 MDI_PHCI_UNLOCK(ph); 2902 *ret_pip = pip; 2903 2904 MDI_DEBUG(2, (MDI_NOTE, pdip, 2905 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip)); 2906 2907 if (path_allocated) 2908 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 2909 2910 return (rv); 2911 } 2912 2913 /*ARGSUSED*/ 2914 int 2915 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr, 2916 int flags, mdi_pathinfo_t **ret_pip) 2917 { 2918 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0, 2919 flags, ret_pip)); 2920 } 2921 2922 /* 2923 * i_mdi_pi_alloc(): 2924 * Allocate a mdi_pathinfo node and add to the pHCI path list 2925 * Return Values: 2926 * mdi_pathinfo 2927 */ 2928 /*ARGSUSED*/ 2929 static mdi_pathinfo_t * 2930 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct) 2931 { 2932 mdi_pathinfo_t *pip; 2933 int ct_circular; 2934 int ph_circular; 2935 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */ 2936 char *path_persistent; 2937 int path_instance; 2938 mod_hash_val_t hv; 2939 2940 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci)); 2941 2942 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP); 2943 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL); 2944 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT | 2945 MDI_PATHINFO_STATE_TRANSIENT; 2946 2947 if (MDI_PHCI_IS_USER_DISABLED(ph)) 2948 MDI_PI_SET_USER_DISABLE(pip); 2949 2950 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph)) 2951 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 2952 2953 if (MDI_PHCI_IS_DRV_DISABLED(ph)) 2954 MDI_PI_SET_DRV_DISABLE(pip); 2955 2956 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT; 2957 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL); 2958 MDI_PI(pip)->pi_client = ct; 2959 MDI_PI(pip)->pi_phci = ph; 2960 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP); 2961 (void) strcpy(MDI_PI(pip)->pi_addr, paddr); 2962 2963 /* 2964 * We form the "path" to the pathinfo node, and see if we have 2965 * already allocated a 'path_instance' for that "path". If so, 2966 * we use the already allocated 'path_instance'. If not, we 2967 * allocate a new 'path_instance' and associate it with a copy of 2968 * the "path" string (which is never freed). The association 2969 * between a 'path_instance' this "path" string persists until 2970 * reboot. 2971 */ 2972 mutex_enter(&mdi_pathmap_mutex); 2973 (void) ddi_pathname(ph->ph_dip, path); 2974 (void) sprintf(path + strlen(path), "/%s@%s", 2975 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2976 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) { 2977 path_instance = (uint_t)(intptr_t)hv; 2978 } else { 2979 /* allocate a new 'path_instance' and persistent "path" */ 2980 path_instance = mdi_pathmap_instance++; 2981 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2982 (void) mod_hash_insert(mdi_pathmap_bypath, 2983 (mod_hash_key_t)path_persistent, 2984 (mod_hash_val_t)(intptr_t)path_instance); 2985 (void) mod_hash_insert(mdi_pathmap_byinstance, 2986 (mod_hash_key_t)(intptr_t)path_instance, 2987 (mod_hash_val_t)path_persistent); 2988 2989 /* create shortpath name */ 2990 (void) snprintf(path, sizeof(path), "%s%d/%s@%s", 2991 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip), 2992 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip)); 2993 path_persistent = i_ddi_strdup(path, KM_SLEEP); 2994 (void) mod_hash_insert(mdi_pathmap_sbyinstance, 2995 (mod_hash_key_t)(intptr_t)path_instance, 2996 (mod_hash_val_t)path_persistent); 2997 } 2998 mutex_exit(&mdi_pathmap_mutex); 2999 MDI_PI(pip)->pi_path_instance = path_instance; 3000 3001 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP); 3002 ASSERT(MDI_PI(pip)->pi_prop != NULL); 3003 MDI_PI(pip)->pi_pprivate = NULL; 3004 MDI_PI(pip)->pi_cprivate = NULL; 3005 MDI_PI(pip)->pi_vprivate = NULL; 3006 MDI_PI(pip)->pi_client_link = NULL; 3007 MDI_PI(pip)->pi_phci_link = NULL; 3008 MDI_PI(pip)->pi_ref_cnt = 0; 3009 MDI_PI(pip)->pi_kstats = NULL; 3010 MDI_PI(pip)->pi_preferred = 1; 3011 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL); 3012 3013 /* 3014 * Lock both dev_info nodes against changes in parallel. 3015 * 3016 * The ndi_devi_enter(Client), is atypical since the client is a leaf. 3017 * This atypical operation is done to synchronize pathinfo nodes 3018 * during devinfo snapshot (see di_register_pip) by 'pretending' that 3019 * the pathinfo nodes are children of the Client. 3020 */ 3021 ndi_devi_enter(ct->ct_dip, &ct_circular); 3022 ndi_devi_enter(ph->ph_dip, &ph_circular); 3023 3024 i_mdi_phci_add_path(ph, pip); 3025 i_mdi_client_add_path(ct, pip); 3026 3027 ndi_devi_exit(ph->ph_dip, ph_circular); 3028 ndi_devi_exit(ct->ct_dip, ct_circular); 3029 3030 return (pip); 3031 } 3032 3033 /* 3034 * mdi_pi_pathname_by_instance(): 3035 * Lookup of "path" by 'path_instance'. Return "path". 3036 * NOTE: returned "path" remains valid forever (until reboot). 3037 */ 3038 char * 3039 mdi_pi_pathname_by_instance(int path_instance) 3040 { 3041 char *path; 3042 mod_hash_val_t hv; 3043 3044 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3045 mutex_enter(&mdi_pathmap_mutex); 3046 if (mod_hash_find(mdi_pathmap_byinstance, 3047 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3048 path = (char *)hv; 3049 else 3050 path = NULL; 3051 mutex_exit(&mdi_pathmap_mutex); 3052 return (path); 3053 } 3054 3055 /* 3056 * mdi_pi_spathname_by_instance(): 3057 * Lookup of "shortpath" by 'path_instance'. Return "shortpath". 3058 * NOTE: returned "shortpath" remains valid forever (until reboot). 3059 */ 3060 char * 3061 mdi_pi_spathname_by_instance(int path_instance) 3062 { 3063 char *path; 3064 mod_hash_val_t hv; 3065 3066 /* mdi_pathmap lookup of "path" by 'path_instance' */ 3067 mutex_enter(&mdi_pathmap_mutex); 3068 if (mod_hash_find(mdi_pathmap_sbyinstance, 3069 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0) 3070 path = (char *)hv; 3071 else 3072 path = NULL; 3073 mutex_exit(&mdi_pathmap_mutex); 3074 return (path); 3075 } 3076 3077 3078 /* 3079 * i_mdi_phci_add_path(): 3080 * Add a mdi_pathinfo node to pHCI list. 3081 * Notes: 3082 * Caller should per-pHCI mutex 3083 */ 3084 static void 3085 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3086 { 3087 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3088 3089 MDI_PHCI_LOCK(ph); 3090 if (ph->ph_path_head == NULL) { 3091 ph->ph_path_head = pip; 3092 } else { 3093 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip); 3094 } 3095 ph->ph_path_tail = pip; 3096 ph->ph_path_count++; 3097 MDI_PHCI_UNLOCK(ph); 3098 } 3099 3100 /* 3101 * i_mdi_client_add_path(): 3102 * Add mdi_pathinfo node to client list 3103 */ 3104 static void 3105 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3106 { 3107 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3108 3109 MDI_CLIENT_LOCK(ct); 3110 if (ct->ct_path_head == NULL) { 3111 ct->ct_path_head = pip; 3112 } else { 3113 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip); 3114 } 3115 ct->ct_path_tail = pip; 3116 ct->ct_path_count++; 3117 MDI_CLIENT_UNLOCK(ct); 3118 } 3119 3120 /* 3121 * mdi_pi_free(): 3122 * Free the mdi_pathinfo node and also client device node if this 3123 * is the last path to the device 3124 * Return Values: 3125 * MDI_SUCCESS 3126 * MDI_FAILURE 3127 * MDI_BUSY 3128 */ 3129 /*ARGSUSED*/ 3130 int 3131 mdi_pi_free(mdi_pathinfo_t *pip, int flags) 3132 { 3133 int rv; 3134 mdi_vhci_t *vh; 3135 mdi_phci_t *ph; 3136 mdi_client_t *ct; 3137 int (*f)(); 3138 int client_held = 0; 3139 3140 MDI_PI_LOCK(pip); 3141 ph = MDI_PI(pip)->pi_phci; 3142 ASSERT(ph != NULL); 3143 if (ph == NULL) { 3144 /* 3145 * Invalid pHCI device, return failure 3146 */ 3147 MDI_DEBUG(1, (MDI_WARN, NULL, 3148 "!invalid pHCI: pip %s %p", 3149 mdi_pi_spathname(pip), (void *)pip)); 3150 MDI_PI_UNLOCK(pip); 3151 return (MDI_FAILURE); 3152 } 3153 3154 vh = ph->ph_vhci; 3155 ASSERT(vh != NULL); 3156 if (vh == NULL) { 3157 /* Invalid pHCI device, return failure */ 3158 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3159 "!invalid vHCI: pip %s %p", 3160 mdi_pi_spathname(pip), (void *)pip)); 3161 MDI_PI_UNLOCK(pip); 3162 return (MDI_FAILURE); 3163 } 3164 3165 ct = MDI_PI(pip)->pi_client; 3166 ASSERT(ct != NULL); 3167 if (ct == NULL) { 3168 /* 3169 * Invalid Client device, return failure 3170 */ 3171 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3172 "!invalid client: pip %s %p", 3173 mdi_pi_spathname(pip), (void *)pip)); 3174 MDI_PI_UNLOCK(pip); 3175 return (MDI_FAILURE); 3176 } 3177 3178 /* 3179 * Check to see for busy condition. A mdi_pathinfo can only be freed 3180 * if the node state is either offline or init and the reference count 3181 * is zero. 3182 */ 3183 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) || 3184 MDI_PI_IS_INITING(pip))) { 3185 /* 3186 * Node is busy 3187 */ 3188 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3189 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip)); 3190 MDI_PI_UNLOCK(pip); 3191 return (MDI_BUSY); 3192 } 3193 3194 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3195 /* 3196 * Give a chance for pending I/Os to complete. 3197 */ 3198 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3199 "!%d cmds still pending on path: %s %p", 3200 MDI_PI(pip)->pi_ref_cnt, 3201 mdi_pi_spathname(pip), (void *)pip)); 3202 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3203 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3204 TR_CLOCK_TICK) == -1) { 3205 /* 3206 * The timeout time reached without ref_cnt being zero 3207 * being signaled. 3208 */ 3209 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3210 "!Timeout reached on path %s %p without the cond", 3211 mdi_pi_spathname(pip), (void *)pip)); 3212 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3213 "!%d cmds still pending on path %s %p", 3214 MDI_PI(pip)->pi_ref_cnt, 3215 mdi_pi_spathname(pip), (void *)pip)); 3216 MDI_PI_UNLOCK(pip); 3217 return (MDI_BUSY); 3218 } 3219 } 3220 if (MDI_PI(pip)->pi_pm_held) { 3221 client_held = 1; 3222 } 3223 MDI_PI_UNLOCK(pip); 3224 3225 vhcache_pi_remove(vh->vh_config, MDI_PI(pip)); 3226 3227 MDI_CLIENT_LOCK(ct); 3228 3229 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */ 3230 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct); 3231 3232 /* 3233 * Wait till failover is complete before removing this node. 3234 */ 3235 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3236 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3237 3238 MDI_CLIENT_UNLOCK(ct); 3239 MDI_VHCI_CLIENT_LOCK(vh); 3240 MDI_CLIENT_LOCK(ct); 3241 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct); 3242 3243 if (!MDI_PI_IS_INITING(pip)) { 3244 f = vh->vh_ops->vo_pi_uninit; 3245 if (f != NULL) { 3246 rv = (*f)(vh->vh_dip, pip, 0); 3247 } 3248 } else 3249 rv = MDI_SUCCESS; 3250 3251 /* 3252 * If vo_pi_uninit() completed successfully. 3253 */ 3254 if (rv == MDI_SUCCESS) { 3255 if (client_held) { 3256 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3257 "i_mdi_pm_rele_client\n")); 3258 i_mdi_pm_rele_client(ct, 1); 3259 } 3260 i_mdi_pi_free(ph, pip, ct); 3261 if (ct->ct_path_count == 0) { 3262 /* 3263 * Client lost its last path. 3264 * Clean up the client device 3265 */ 3266 MDI_CLIENT_UNLOCK(ct); 3267 (void) i_mdi_client_free(ct->ct_vhci, ct); 3268 MDI_VHCI_CLIENT_UNLOCK(vh); 3269 return (rv); 3270 } 3271 } 3272 MDI_CLIENT_UNLOCK(ct); 3273 MDI_VHCI_CLIENT_UNLOCK(vh); 3274 3275 if (rv == MDI_FAILURE) 3276 vhcache_pi_add(vh->vh_config, MDI_PI(pip)); 3277 3278 return (rv); 3279 } 3280 3281 /* 3282 * i_mdi_pi_free(): 3283 * Free the mdi_pathinfo node 3284 */ 3285 static void 3286 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct) 3287 { 3288 int ct_circular; 3289 int ph_circular; 3290 3291 ASSERT(MDI_CLIENT_LOCKED(ct)); 3292 3293 /* 3294 * remove any per-path kstats 3295 */ 3296 i_mdi_pi_kstat_destroy(pip); 3297 3298 /* See comments in i_mdi_pi_alloc() */ 3299 ndi_devi_enter(ct->ct_dip, &ct_circular); 3300 ndi_devi_enter(ph->ph_dip, &ph_circular); 3301 3302 i_mdi_client_remove_path(ct, pip); 3303 i_mdi_phci_remove_path(ph, pip); 3304 3305 ndi_devi_exit(ph->ph_dip, ph_circular); 3306 ndi_devi_exit(ct->ct_dip, ct_circular); 3307 3308 mutex_destroy(&MDI_PI(pip)->pi_mutex); 3309 cv_destroy(&MDI_PI(pip)->pi_state_cv); 3310 cv_destroy(&MDI_PI(pip)->pi_ref_cv); 3311 if (MDI_PI(pip)->pi_addr) { 3312 kmem_free(MDI_PI(pip)->pi_addr, 3313 strlen(MDI_PI(pip)->pi_addr) + 1); 3314 MDI_PI(pip)->pi_addr = NULL; 3315 } 3316 3317 if (MDI_PI(pip)->pi_prop) { 3318 (void) nvlist_free(MDI_PI(pip)->pi_prop); 3319 MDI_PI(pip)->pi_prop = NULL; 3320 } 3321 kmem_free(pip, sizeof (struct mdi_pathinfo)); 3322 } 3323 3324 3325 /* 3326 * i_mdi_phci_remove_path(): 3327 * Remove a mdi_pathinfo node from pHCI list. 3328 * Notes: 3329 * Caller should hold per-pHCI mutex 3330 */ 3331 static void 3332 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip) 3333 { 3334 mdi_pathinfo_t *prev = NULL; 3335 mdi_pathinfo_t *path = NULL; 3336 3337 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip)); 3338 3339 MDI_PHCI_LOCK(ph); 3340 path = ph->ph_path_head; 3341 while (path != NULL) { 3342 if (path == pip) { 3343 break; 3344 } 3345 prev = path; 3346 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3347 } 3348 3349 if (path) { 3350 ph->ph_path_count--; 3351 if (prev) { 3352 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link; 3353 } else { 3354 ph->ph_path_head = 3355 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link; 3356 } 3357 if (ph->ph_path_tail == path) { 3358 ph->ph_path_tail = prev; 3359 } 3360 } 3361 3362 /* 3363 * Clear the pHCI link 3364 */ 3365 MDI_PI(pip)->pi_phci_link = NULL; 3366 MDI_PI(pip)->pi_phci = NULL; 3367 MDI_PHCI_UNLOCK(ph); 3368 } 3369 3370 /* 3371 * i_mdi_client_remove_path(): 3372 * Remove a mdi_pathinfo node from client path list. 3373 */ 3374 static void 3375 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip) 3376 { 3377 mdi_pathinfo_t *prev = NULL; 3378 mdi_pathinfo_t *path; 3379 3380 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip)); 3381 3382 ASSERT(MDI_CLIENT_LOCKED(ct)); 3383 path = ct->ct_path_head; 3384 while (path != NULL) { 3385 if (path == pip) { 3386 break; 3387 } 3388 prev = path; 3389 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3390 } 3391 3392 if (path) { 3393 ct->ct_path_count--; 3394 if (prev) { 3395 MDI_PI(prev)->pi_client_link = 3396 MDI_PI(path)->pi_client_link; 3397 } else { 3398 ct->ct_path_head = 3399 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link; 3400 } 3401 if (ct->ct_path_tail == path) { 3402 ct->ct_path_tail = prev; 3403 } 3404 if (ct->ct_path_last == path) { 3405 ct->ct_path_last = ct->ct_path_head; 3406 } 3407 } 3408 MDI_PI(pip)->pi_client_link = NULL; 3409 MDI_PI(pip)->pi_client = NULL; 3410 } 3411 3412 /* 3413 * i_mdi_pi_state_change(): 3414 * online a mdi_pathinfo node 3415 * 3416 * Return Values: 3417 * MDI_SUCCESS 3418 * MDI_FAILURE 3419 */ 3420 /*ARGSUSED*/ 3421 static int 3422 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag) 3423 { 3424 int rv = MDI_SUCCESS; 3425 mdi_vhci_t *vh; 3426 mdi_phci_t *ph; 3427 mdi_client_t *ct; 3428 int (*f)(); 3429 dev_info_t *cdip; 3430 3431 MDI_PI_LOCK(pip); 3432 3433 ph = MDI_PI(pip)->pi_phci; 3434 ASSERT(ph); 3435 if (ph == NULL) { 3436 /* 3437 * Invalid pHCI device, fail the request 3438 */ 3439 MDI_PI_UNLOCK(pip); 3440 MDI_DEBUG(1, (MDI_WARN, NULL, 3441 "!invalid phci: pip %s %p", 3442 mdi_pi_spathname(pip), (void *)pip)); 3443 return (MDI_FAILURE); 3444 } 3445 3446 vh = ph->ph_vhci; 3447 ASSERT(vh); 3448 if (vh == NULL) { 3449 /* 3450 * Invalid vHCI device, fail the request 3451 */ 3452 MDI_PI_UNLOCK(pip); 3453 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3454 "!invalid vhci: pip %s %p", 3455 mdi_pi_spathname(pip), (void *)pip)); 3456 return (MDI_FAILURE); 3457 } 3458 3459 ct = MDI_PI(pip)->pi_client; 3460 ASSERT(ct != NULL); 3461 if (ct == NULL) { 3462 /* 3463 * Invalid client device, fail the request 3464 */ 3465 MDI_PI_UNLOCK(pip); 3466 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip, 3467 "!invalid client: pip %s %p", 3468 mdi_pi_spathname(pip), (void *)pip)); 3469 return (MDI_FAILURE); 3470 } 3471 3472 /* 3473 * If this path has not been initialized yet, Callback vHCI driver's 3474 * pathinfo node initialize entry point 3475 */ 3476 3477 if (MDI_PI_IS_INITING(pip)) { 3478 MDI_PI_UNLOCK(pip); 3479 f = vh->vh_ops->vo_pi_init; 3480 if (f != NULL) { 3481 rv = (*f)(vh->vh_dip, pip, 0); 3482 if (rv != MDI_SUCCESS) { 3483 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3484 "!vo_pi_init failed: vHCI %p, pip %s %p", 3485 (void *)vh, mdi_pi_spathname(pip), 3486 (void *)pip)); 3487 return (MDI_FAILURE); 3488 } 3489 } 3490 MDI_PI_LOCK(pip); 3491 MDI_PI_CLEAR_TRANSIENT(pip); 3492 } 3493 3494 /* 3495 * Do not allow state transition when pHCI is in offline/suspended 3496 * states 3497 */ 3498 i_mdi_phci_lock(ph, pip); 3499 if (MDI_PHCI_IS_READY(ph) == 0) { 3500 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3501 "!pHCI not ready, pHCI=%p", (void *)ph)); 3502 MDI_PI_UNLOCK(pip); 3503 i_mdi_phci_unlock(ph); 3504 return (MDI_BUSY); 3505 } 3506 MDI_PHCI_UNSTABLE(ph); 3507 i_mdi_phci_unlock(ph); 3508 3509 /* 3510 * Check if mdi_pathinfo state is in transient state. 3511 * If yes, offlining is in progress and wait till transient state is 3512 * cleared. 3513 */ 3514 if (MDI_PI_IS_TRANSIENT(pip)) { 3515 while (MDI_PI_IS_TRANSIENT(pip)) { 3516 cv_wait(&MDI_PI(pip)->pi_state_cv, 3517 &MDI_PI(pip)->pi_mutex); 3518 } 3519 } 3520 3521 /* 3522 * Grab the client lock in reverse order sequence and release the 3523 * mdi_pathinfo mutex. 3524 */ 3525 i_mdi_client_lock(ct, pip); 3526 MDI_PI_UNLOCK(pip); 3527 3528 /* 3529 * Wait till failover state is cleared 3530 */ 3531 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) 3532 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex); 3533 3534 /* 3535 * Mark the mdi_pathinfo node state as transient 3536 */ 3537 MDI_PI_LOCK(pip); 3538 switch (state) { 3539 case MDI_PATHINFO_STATE_ONLINE: 3540 MDI_PI_SET_ONLINING(pip); 3541 break; 3542 3543 case MDI_PATHINFO_STATE_STANDBY: 3544 MDI_PI_SET_STANDBYING(pip); 3545 break; 3546 3547 case MDI_PATHINFO_STATE_FAULT: 3548 /* 3549 * Mark the pathinfo state as FAULTED 3550 */ 3551 MDI_PI_SET_FAULTING(pip); 3552 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR); 3553 break; 3554 3555 case MDI_PATHINFO_STATE_OFFLINE: 3556 /* 3557 * ndi_devi_offline() cannot hold pip or ct locks. 3558 */ 3559 MDI_PI_UNLOCK(pip); 3560 3561 /* 3562 * If this is a user initiated path online->offline operation 3563 * who's success would transition a client from DEGRADED to 3564 * FAILED then only proceed if we can offline the client first. 3565 */ 3566 cdip = ct->ct_dip; 3567 if ((flag & NDI_USER_REQ) && 3568 MDI_PI_IS_ONLINE(pip) && 3569 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) { 3570 i_mdi_client_unlock(ct); 3571 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN); 3572 if (rv != NDI_SUCCESS) { 3573 /* 3574 * Convert to MDI error code 3575 */ 3576 switch (rv) { 3577 case NDI_BUSY: 3578 rv = MDI_BUSY; 3579 break; 3580 default: 3581 rv = MDI_FAILURE; 3582 break; 3583 } 3584 goto state_change_exit; 3585 } else { 3586 i_mdi_client_lock(ct, NULL); 3587 } 3588 } 3589 /* 3590 * Mark the mdi_pathinfo node state as transient 3591 */ 3592 MDI_PI_LOCK(pip); 3593 MDI_PI_SET_OFFLINING(pip); 3594 break; 3595 } 3596 MDI_PI_UNLOCK(pip); 3597 MDI_CLIENT_UNSTABLE(ct); 3598 i_mdi_client_unlock(ct); 3599 3600 f = vh->vh_ops->vo_pi_state_change; 3601 if (f != NULL) 3602 rv = (*f)(vh->vh_dip, pip, state, 0, flag); 3603 3604 MDI_CLIENT_LOCK(ct); 3605 MDI_PI_LOCK(pip); 3606 if (rv == MDI_NOT_SUPPORTED) { 3607 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct); 3608 } 3609 if (rv != MDI_SUCCESS) { 3610 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip, 3611 "vo_pi_state_change failed: rv %x", rv)); 3612 } 3613 if (MDI_PI_IS_TRANSIENT(pip)) { 3614 if (rv == MDI_SUCCESS) { 3615 MDI_PI_CLEAR_TRANSIENT(pip); 3616 } else { 3617 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 3618 } 3619 } 3620 3621 /* 3622 * Wake anyone waiting for this mdi_pathinfo node 3623 */ 3624 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3625 MDI_PI_UNLOCK(pip); 3626 3627 /* 3628 * Mark the client device as stable 3629 */ 3630 MDI_CLIENT_STABLE(ct); 3631 if (rv == MDI_SUCCESS) { 3632 if (ct->ct_unstable == 0) { 3633 cdip = ct->ct_dip; 3634 3635 /* 3636 * Onlining the mdi_pathinfo node will impact the 3637 * client state Update the client and dev_info node 3638 * state accordingly 3639 */ 3640 rv = NDI_SUCCESS; 3641 i_mdi_client_update_state(ct); 3642 switch (MDI_CLIENT_STATE(ct)) { 3643 case MDI_CLIENT_STATE_OPTIMAL: 3644 case MDI_CLIENT_STATE_DEGRADED: 3645 if (cdip && !i_ddi_devi_attached(cdip) && 3646 ((state == MDI_PATHINFO_STATE_ONLINE) || 3647 (state == MDI_PATHINFO_STATE_STANDBY))) { 3648 3649 /* 3650 * Must do ndi_devi_online() through 3651 * hotplug thread for deferred 3652 * attach mechanism to work 3653 */ 3654 MDI_CLIENT_UNLOCK(ct); 3655 rv = ndi_devi_online(cdip, 0); 3656 MDI_CLIENT_LOCK(ct); 3657 if ((rv != NDI_SUCCESS) && 3658 (MDI_CLIENT_STATE(ct) == 3659 MDI_CLIENT_STATE_DEGRADED)) { 3660 MDI_DEBUG(1, (MDI_WARN, cdip, 3661 "!ndi_devi_online failed " 3662 "error %x", rv)); 3663 } 3664 rv = NDI_SUCCESS; 3665 } 3666 break; 3667 3668 case MDI_CLIENT_STATE_FAILED: 3669 /* 3670 * This is the last path case for 3671 * non-user initiated events. 3672 */ 3673 if (((flag & NDI_USER_REQ) == 0) && 3674 cdip && (i_ddi_node_state(cdip) >= 3675 DS_INITIALIZED)) { 3676 MDI_CLIENT_UNLOCK(ct); 3677 rv = ndi_devi_offline(cdip, 3678 NDI_DEVFS_CLEAN); 3679 MDI_CLIENT_LOCK(ct); 3680 3681 if (rv != NDI_SUCCESS) { 3682 /* 3683 * ndi_devi_offline failed. 3684 * Reset client flags to 3685 * online as the path could not 3686 * be offlined. 3687 */ 3688 MDI_DEBUG(1, (MDI_WARN, cdip, 3689 "!ndi_devi_offline failed: " 3690 "error %x", rv)); 3691 MDI_CLIENT_SET_ONLINE(ct); 3692 } 3693 } 3694 break; 3695 } 3696 /* 3697 * Convert to MDI error code 3698 */ 3699 switch (rv) { 3700 case NDI_SUCCESS: 3701 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3702 i_mdi_report_path_state(ct, pip); 3703 rv = MDI_SUCCESS; 3704 break; 3705 case NDI_BUSY: 3706 rv = MDI_BUSY; 3707 break; 3708 default: 3709 rv = MDI_FAILURE; 3710 break; 3711 } 3712 } 3713 } 3714 MDI_CLIENT_UNLOCK(ct); 3715 3716 state_change_exit: 3717 /* 3718 * Mark the pHCI as stable again. 3719 */ 3720 MDI_PHCI_LOCK(ph); 3721 MDI_PHCI_STABLE(ph); 3722 MDI_PHCI_UNLOCK(ph); 3723 return (rv); 3724 } 3725 3726 /* 3727 * mdi_pi_online(): 3728 * Place the path_info node in the online state. The path is 3729 * now available to be selected by mdi_select_path() for 3730 * transporting I/O requests to client devices. 3731 * Return Values: 3732 * MDI_SUCCESS 3733 * MDI_FAILURE 3734 */ 3735 int 3736 mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3737 { 3738 mdi_client_t *ct = MDI_PI(pip)->pi_client; 3739 int client_held = 0; 3740 int rv; 3741 3742 ASSERT(ct != NULL); 3743 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags); 3744 if (rv != MDI_SUCCESS) 3745 return (rv); 3746 3747 MDI_PI_LOCK(pip); 3748 if (MDI_PI(pip)->pi_pm_held == 0) { 3749 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3750 "i_mdi_pm_hold_pip %p", (void *)pip)); 3751 i_mdi_pm_hold_pip(pip); 3752 client_held = 1; 3753 } 3754 MDI_PI_UNLOCK(pip); 3755 3756 if (client_held) { 3757 MDI_CLIENT_LOCK(ct); 3758 if (ct->ct_power_cnt == 0) { 3759 rv = i_mdi_power_all_phci(ct); 3760 } 3761 3762 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3763 "i_mdi_pm_hold_client %p", (void *)ct)); 3764 i_mdi_pm_hold_client(ct, 1); 3765 MDI_CLIENT_UNLOCK(ct); 3766 } 3767 3768 return (rv); 3769 } 3770 3771 /* 3772 * mdi_pi_standby(): 3773 * Place the mdi_pathinfo node in standby state 3774 * 3775 * Return Values: 3776 * MDI_SUCCESS 3777 * MDI_FAILURE 3778 */ 3779 int 3780 mdi_pi_standby(mdi_pathinfo_t *pip, int flags) 3781 { 3782 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags)); 3783 } 3784 3785 /* 3786 * mdi_pi_fault(): 3787 * Place the mdi_pathinfo node in fault'ed state 3788 * Return Values: 3789 * MDI_SUCCESS 3790 * MDI_FAILURE 3791 */ 3792 int 3793 mdi_pi_fault(mdi_pathinfo_t *pip, int flags) 3794 { 3795 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags)); 3796 } 3797 3798 /* 3799 * mdi_pi_offline(): 3800 * Offline a mdi_pathinfo node. 3801 * Return Values: 3802 * MDI_SUCCESS 3803 * MDI_FAILURE 3804 */ 3805 int 3806 mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3807 { 3808 int ret, client_held = 0; 3809 mdi_client_t *ct; 3810 3811 /* 3812 * Original code overloaded NDI_DEVI_REMOVE to this interface, and 3813 * used it to mean "user initiated operation" (i.e. devctl). Callers 3814 * should now just use NDI_USER_REQ. 3815 */ 3816 if (flags & NDI_DEVI_REMOVE) { 3817 flags &= ~NDI_DEVI_REMOVE; 3818 flags |= NDI_USER_REQ; 3819 } 3820 3821 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags); 3822 3823 if (ret == MDI_SUCCESS) { 3824 MDI_PI_LOCK(pip); 3825 if (MDI_PI(pip)->pi_pm_held) { 3826 client_held = 1; 3827 } 3828 MDI_PI_UNLOCK(pip); 3829 3830 if (client_held) { 3831 ct = MDI_PI(pip)->pi_client; 3832 MDI_CLIENT_LOCK(ct); 3833 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 3834 "i_mdi_pm_rele_client\n")); 3835 i_mdi_pm_rele_client(ct, 1); 3836 MDI_CLIENT_UNLOCK(ct); 3837 } 3838 } 3839 3840 return (ret); 3841 } 3842 3843 /* 3844 * i_mdi_pi_offline(): 3845 * Offline a mdi_pathinfo node and call the vHCI driver's callback 3846 */ 3847 static int 3848 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags) 3849 { 3850 dev_info_t *vdip = NULL; 3851 mdi_vhci_t *vh = NULL; 3852 mdi_client_t *ct = NULL; 3853 int (*f)(); 3854 int rv; 3855 3856 MDI_PI_LOCK(pip); 3857 ct = MDI_PI(pip)->pi_client; 3858 ASSERT(ct != NULL); 3859 3860 while (MDI_PI(pip)->pi_ref_cnt != 0) { 3861 /* 3862 * Give a chance for pending I/Os to complete. 3863 */ 3864 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3865 "!%d cmds still pending on path %s %p", 3866 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip), 3867 (void *)pip)); 3868 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv, 3869 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000), 3870 TR_CLOCK_TICK) == -1) { 3871 /* 3872 * The timeout time reached without ref_cnt being zero 3873 * being signaled. 3874 */ 3875 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3876 "!Timeout reached on path %s %p without the cond", 3877 mdi_pi_spathname(pip), (void *)pip)); 3878 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip, 3879 "!%d cmds still pending on path %s %p", 3880 MDI_PI(pip)->pi_ref_cnt, 3881 mdi_pi_spathname(pip), (void *)pip)); 3882 } 3883 } 3884 vh = ct->ct_vhci; 3885 vdip = vh->vh_dip; 3886 3887 /* 3888 * Notify vHCI that has registered this event 3889 */ 3890 ASSERT(vh->vh_ops); 3891 f = vh->vh_ops->vo_pi_state_change; 3892 3893 if (f != NULL) { 3894 MDI_PI_UNLOCK(pip); 3895 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0, 3896 flags)) != MDI_SUCCESS) { 3897 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip, 3898 "!vo_path_offline failed: vdip %s%d %p: path %s %p", 3899 ddi_driver_name(vdip), ddi_get_instance(vdip), 3900 (void *)vdip, mdi_pi_spathname(pip), (void *)pip)); 3901 } 3902 MDI_PI_LOCK(pip); 3903 } 3904 3905 /* 3906 * Set the mdi_pathinfo node state and clear the transient condition 3907 */ 3908 MDI_PI_SET_OFFLINE(pip); 3909 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 3910 MDI_PI_UNLOCK(pip); 3911 3912 MDI_CLIENT_LOCK(ct); 3913 if (rv == MDI_SUCCESS) { 3914 if (ct->ct_unstable == 0) { 3915 dev_info_t *cdip = ct->ct_dip; 3916 3917 /* 3918 * Onlining the mdi_pathinfo node will impact the 3919 * client state Update the client and dev_info node 3920 * state accordingly 3921 */ 3922 i_mdi_client_update_state(ct); 3923 rv = NDI_SUCCESS; 3924 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 3925 if (cdip && 3926 (i_ddi_node_state(cdip) >= 3927 DS_INITIALIZED)) { 3928 MDI_CLIENT_UNLOCK(ct); 3929 rv = ndi_devi_offline(cdip, 3930 NDI_DEVFS_CLEAN); 3931 MDI_CLIENT_LOCK(ct); 3932 if (rv != NDI_SUCCESS) { 3933 /* 3934 * ndi_devi_offline failed. 3935 * Reset client flags to 3936 * online. 3937 */ 3938 MDI_DEBUG(4, (MDI_WARN, cdip, 3939 "ndi_devi_offline failed: " 3940 "error %x", rv)); 3941 MDI_CLIENT_SET_ONLINE(ct); 3942 } 3943 } 3944 } 3945 /* 3946 * Convert to MDI error code 3947 */ 3948 switch (rv) { 3949 case NDI_SUCCESS: 3950 rv = MDI_SUCCESS; 3951 break; 3952 case NDI_BUSY: 3953 rv = MDI_BUSY; 3954 break; 3955 default: 3956 rv = MDI_FAILURE; 3957 break; 3958 } 3959 } 3960 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 3961 i_mdi_report_path_state(ct, pip); 3962 } 3963 3964 MDI_CLIENT_UNLOCK(ct); 3965 3966 /* 3967 * Change in the mdi_pathinfo node state will impact the client state 3968 */ 3969 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip, 3970 "ct = %p pip = %p", (void *)ct, (void *)pip)); 3971 return (rv); 3972 } 3973 3974 /* 3975 * i_mdi_pi_online(): 3976 * Online a mdi_pathinfo node and call the vHCI driver's callback 3977 */ 3978 static int 3979 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags) 3980 { 3981 mdi_vhci_t *vh = NULL; 3982 mdi_client_t *ct = NULL; 3983 mdi_phci_t *ph; 3984 int (*f)(); 3985 int rv; 3986 3987 MDI_PI_LOCK(pip); 3988 ph = MDI_PI(pip)->pi_phci; 3989 vh = ph->ph_vhci; 3990 ct = MDI_PI(pip)->pi_client; 3991 MDI_PI_SET_ONLINING(pip) 3992 MDI_PI_UNLOCK(pip); 3993 f = vh->vh_ops->vo_pi_state_change; 3994 if (f != NULL) 3995 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0, 3996 flags); 3997 MDI_CLIENT_LOCK(ct); 3998 MDI_PI_LOCK(pip); 3999 cv_broadcast(&MDI_PI(pip)->pi_state_cv); 4000 MDI_PI_UNLOCK(pip); 4001 if (rv == MDI_SUCCESS) { 4002 dev_info_t *cdip = ct->ct_dip; 4003 4004 rv = MDI_SUCCESS; 4005 i_mdi_client_update_state(ct); 4006 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL || 4007 MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4008 if (cdip && !i_ddi_devi_attached(cdip)) { 4009 MDI_CLIENT_UNLOCK(ct); 4010 rv = ndi_devi_online(cdip, 0); 4011 MDI_CLIENT_LOCK(ct); 4012 if ((rv != NDI_SUCCESS) && 4013 (MDI_CLIENT_STATE(ct) == 4014 MDI_CLIENT_STATE_DEGRADED)) { 4015 MDI_CLIENT_SET_OFFLINE(ct); 4016 } 4017 if (rv != NDI_SUCCESS) { 4018 /* Reset the path state */ 4019 MDI_PI_LOCK(pip); 4020 MDI_PI(pip)->pi_state = 4021 MDI_PI_OLD_STATE(pip); 4022 MDI_PI_UNLOCK(pip); 4023 } 4024 } 4025 } 4026 switch (rv) { 4027 case NDI_SUCCESS: 4028 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct); 4029 i_mdi_report_path_state(ct, pip); 4030 rv = MDI_SUCCESS; 4031 break; 4032 case NDI_BUSY: 4033 rv = MDI_BUSY; 4034 break; 4035 default: 4036 rv = MDI_FAILURE; 4037 break; 4038 } 4039 } else { 4040 /* Reset the path state */ 4041 MDI_PI_LOCK(pip); 4042 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip); 4043 MDI_PI_UNLOCK(pip); 4044 } 4045 MDI_CLIENT_UNLOCK(ct); 4046 return (rv); 4047 } 4048 4049 /* 4050 * mdi_pi_get_node_name(): 4051 * Get the name associated with a mdi_pathinfo node. 4052 * Since pathinfo nodes are not directly named, we 4053 * return the node_name of the client. 4054 * 4055 * Return Values: 4056 * char * 4057 */ 4058 char * 4059 mdi_pi_get_node_name(mdi_pathinfo_t *pip) 4060 { 4061 mdi_client_t *ct; 4062 4063 if (pip == NULL) 4064 return (NULL); 4065 ct = MDI_PI(pip)->pi_client; 4066 if ((ct == NULL) || (ct->ct_dip == NULL)) 4067 return (NULL); 4068 return (ddi_node_name(ct->ct_dip)); 4069 } 4070 4071 /* 4072 * mdi_pi_get_addr(): 4073 * Get the unit address associated with a mdi_pathinfo node 4074 * 4075 * Return Values: 4076 * char * 4077 */ 4078 char * 4079 mdi_pi_get_addr(mdi_pathinfo_t *pip) 4080 { 4081 if (pip == NULL) 4082 return (NULL); 4083 4084 return (MDI_PI(pip)->pi_addr); 4085 } 4086 4087 /* 4088 * mdi_pi_get_path_instance(): 4089 * Get the 'path_instance' of a mdi_pathinfo node 4090 * 4091 * Return Values: 4092 * path_instance 4093 */ 4094 int 4095 mdi_pi_get_path_instance(mdi_pathinfo_t *pip) 4096 { 4097 if (pip == NULL) 4098 return (0); 4099 4100 return (MDI_PI(pip)->pi_path_instance); 4101 } 4102 4103 /* 4104 * mdi_pi_pathname(): 4105 * Return pointer to path to pathinfo node. 4106 */ 4107 char * 4108 mdi_pi_pathname(mdi_pathinfo_t *pip) 4109 { 4110 if (pip == NULL) 4111 return (NULL); 4112 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip))); 4113 } 4114 4115 /* 4116 * mdi_pi_spathname(): 4117 * Return pointer to shortpath to pathinfo node. Used for debug 4118 * messages, so return "" instead of NULL when unknown. 4119 */ 4120 char * 4121 mdi_pi_spathname(mdi_pathinfo_t *pip) 4122 { 4123 char *spath = ""; 4124 4125 if (pip) { 4126 spath = mdi_pi_spathname_by_instance( 4127 mdi_pi_get_path_instance(pip)); 4128 if (spath == NULL) 4129 spath = ""; 4130 } 4131 return (spath); 4132 } 4133 4134 char * 4135 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path) 4136 { 4137 char *obp_path = NULL; 4138 if ((pip == NULL) || (path == NULL)) 4139 return (NULL); 4140 4141 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) { 4142 (void) strcpy(path, obp_path); 4143 (void) mdi_prop_free(obp_path); 4144 } else { 4145 path = NULL; 4146 } 4147 return (path); 4148 } 4149 4150 int 4151 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component) 4152 { 4153 dev_info_t *pdip; 4154 char *obp_path = NULL; 4155 int rc = MDI_FAILURE; 4156 4157 if (pip == NULL) 4158 return (MDI_FAILURE); 4159 4160 pdip = mdi_pi_get_phci(pip); 4161 if (pdip == NULL) 4162 return (MDI_FAILURE); 4163 4164 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 4165 4166 if (ddi_pathname_obp(pdip, obp_path) == NULL) { 4167 (void) ddi_pathname(pdip, obp_path); 4168 } 4169 4170 if (component) { 4171 (void) strncat(obp_path, "/", MAXPATHLEN); 4172 (void) strncat(obp_path, component, MAXPATHLEN); 4173 } 4174 rc = mdi_prop_update_string(pip, "obp-path", obp_path); 4175 4176 if (obp_path) 4177 kmem_free(obp_path, MAXPATHLEN); 4178 return (rc); 4179 } 4180 4181 /* 4182 * mdi_pi_get_client(): 4183 * Get the client devinfo associated with a mdi_pathinfo node 4184 * 4185 * Return Values: 4186 * Handle to client device dev_info node 4187 */ 4188 dev_info_t * 4189 mdi_pi_get_client(mdi_pathinfo_t *pip) 4190 { 4191 dev_info_t *dip = NULL; 4192 if (pip) { 4193 dip = MDI_PI(pip)->pi_client->ct_dip; 4194 } 4195 return (dip); 4196 } 4197 4198 /* 4199 * mdi_pi_get_phci(): 4200 * Get the pHCI devinfo associated with the mdi_pathinfo node 4201 * Return Values: 4202 * Handle to dev_info node 4203 */ 4204 dev_info_t * 4205 mdi_pi_get_phci(mdi_pathinfo_t *pip) 4206 { 4207 dev_info_t *dip = NULL; 4208 mdi_phci_t *ph; 4209 4210 if (pip) { 4211 ph = MDI_PI(pip)->pi_phci; 4212 if (ph) 4213 dip = ph->ph_dip; 4214 } 4215 return (dip); 4216 } 4217 4218 /* 4219 * mdi_pi_get_client_private(): 4220 * Get the client private information associated with the 4221 * mdi_pathinfo node 4222 */ 4223 void * 4224 mdi_pi_get_client_private(mdi_pathinfo_t *pip) 4225 { 4226 void *cprivate = NULL; 4227 if (pip) { 4228 cprivate = MDI_PI(pip)->pi_cprivate; 4229 } 4230 return (cprivate); 4231 } 4232 4233 /* 4234 * mdi_pi_set_client_private(): 4235 * Set the client private information in the mdi_pathinfo node 4236 */ 4237 void 4238 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv) 4239 { 4240 if (pip) { 4241 MDI_PI(pip)->pi_cprivate = priv; 4242 } 4243 } 4244 4245 /* 4246 * mdi_pi_get_phci_private(): 4247 * Get the pHCI private information associated with the 4248 * mdi_pathinfo node 4249 */ 4250 caddr_t 4251 mdi_pi_get_phci_private(mdi_pathinfo_t *pip) 4252 { 4253 caddr_t pprivate = NULL; 4254 4255 if (pip) { 4256 pprivate = MDI_PI(pip)->pi_pprivate; 4257 } 4258 return (pprivate); 4259 } 4260 4261 /* 4262 * mdi_pi_set_phci_private(): 4263 * Set the pHCI private information in the mdi_pathinfo node 4264 */ 4265 void 4266 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv) 4267 { 4268 if (pip) { 4269 MDI_PI(pip)->pi_pprivate = priv; 4270 } 4271 } 4272 4273 /* 4274 * mdi_pi_get_state(): 4275 * Get the mdi_pathinfo node state. Transient states are internal 4276 * and not provided to the users 4277 */ 4278 mdi_pathinfo_state_t 4279 mdi_pi_get_state(mdi_pathinfo_t *pip) 4280 { 4281 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT; 4282 4283 if (pip) { 4284 if (MDI_PI_IS_TRANSIENT(pip)) { 4285 /* 4286 * mdi_pathinfo is in state transition. Return the 4287 * last good state. 4288 */ 4289 state = MDI_PI_OLD_STATE(pip); 4290 } else { 4291 state = MDI_PI_STATE(pip); 4292 } 4293 } 4294 return (state); 4295 } 4296 4297 /* 4298 * mdi_pi_get_flags(): 4299 * Get the mdi_pathinfo node flags. 4300 */ 4301 uint_t 4302 mdi_pi_get_flags(mdi_pathinfo_t *pip) 4303 { 4304 return (pip ? MDI_PI(pip)->pi_flags : 0); 4305 } 4306 4307 /* 4308 * Note that the following function needs to be the new interface for 4309 * mdi_pi_get_state when mpxio gets integrated to ON. 4310 */ 4311 int 4312 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state, 4313 uint32_t *ext_state) 4314 { 4315 *state = MDI_PATHINFO_STATE_INIT; 4316 4317 if (pip) { 4318 if (MDI_PI_IS_TRANSIENT(pip)) { 4319 /* 4320 * mdi_pathinfo is in state transition. Return the 4321 * last good state. 4322 */ 4323 *state = MDI_PI_OLD_STATE(pip); 4324 *ext_state = MDI_PI_OLD_EXT_STATE(pip); 4325 } else { 4326 *state = MDI_PI_STATE(pip); 4327 *ext_state = MDI_PI_EXT_STATE(pip); 4328 } 4329 } 4330 return (MDI_SUCCESS); 4331 } 4332 4333 /* 4334 * mdi_pi_get_preferred: 4335 * Get the preferred path flag 4336 */ 4337 int 4338 mdi_pi_get_preferred(mdi_pathinfo_t *pip) 4339 { 4340 if (pip) { 4341 return (MDI_PI(pip)->pi_preferred); 4342 } 4343 return (0); 4344 } 4345 4346 /* 4347 * mdi_pi_set_preferred: 4348 * Set the preferred path flag 4349 */ 4350 void 4351 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred) 4352 { 4353 if (pip) { 4354 MDI_PI(pip)->pi_preferred = preferred; 4355 } 4356 } 4357 4358 /* 4359 * mdi_pi_set_state(): 4360 * Set the mdi_pathinfo node state 4361 */ 4362 void 4363 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state) 4364 { 4365 uint32_t ext_state; 4366 4367 if (pip) { 4368 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK; 4369 MDI_PI(pip)->pi_state = state; 4370 MDI_PI(pip)->pi_state |= ext_state; 4371 4372 /* Path has changed state, invalidate DINFOCACHE snap shot. */ 4373 i_ddi_di_cache_invalidate(); 4374 } 4375 } 4376 4377 /* 4378 * Property functions: 4379 */ 4380 int 4381 i_map_nvlist_error_to_mdi(int val) 4382 { 4383 int rv; 4384 4385 switch (val) { 4386 case 0: 4387 rv = DDI_PROP_SUCCESS; 4388 break; 4389 case EINVAL: 4390 case ENOTSUP: 4391 rv = DDI_PROP_INVAL_ARG; 4392 break; 4393 case ENOMEM: 4394 rv = DDI_PROP_NO_MEMORY; 4395 break; 4396 default: 4397 rv = DDI_PROP_NOT_FOUND; 4398 break; 4399 } 4400 return (rv); 4401 } 4402 4403 /* 4404 * mdi_pi_get_next_prop(): 4405 * Property walk function. The caller should hold mdi_pi_lock() 4406 * and release by calling mdi_pi_unlock() at the end of walk to 4407 * get a consistent value. 4408 */ 4409 nvpair_t * 4410 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev) 4411 { 4412 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4413 return (NULL); 4414 } 4415 ASSERT(MDI_PI_LOCKED(pip)); 4416 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev)); 4417 } 4418 4419 /* 4420 * mdi_prop_remove(): 4421 * Remove the named property from the named list. 4422 */ 4423 int 4424 mdi_prop_remove(mdi_pathinfo_t *pip, char *name) 4425 { 4426 if (pip == NULL) { 4427 return (DDI_PROP_NOT_FOUND); 4428 } 4429 ASSERT(!MDI_PI_LOCKED(pip)); 4430 MDI_PI_LOCK(pip); 4431 if (MDI_PI(pip)->pi_prop == NULL) { 4432 MDI_PI_UNLOCK(pip); 4433 return (DDI_PROP_NOT_FOUND); 4434 } 4435 if (name) { 4436 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name); 4437 } else { 4438 char nvp_name[MAXNAMELEN]; 4439 nvpair_t *nvp; 4440 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL); 4441 while (nvp) { 4442 nvpair_t *next; 4443 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp); 4444 (void) snprintf(nvp_name, sizeof(nvp_name), "%s", 4445 nvpair_name(nvp)); 4446 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, 4447 nvp_name); 4448 nvp = next; 4449 } 4450 } 4451 MDI_PI_UNLOCK(pip); 4452 return (DDI_PROP_SUCCESS); 4453 } 4454 4455 /* 4456 * mdi_prop_size(): 4457 * Get buffer size needed to pack the property data. 4458 * Caller should hold the mdi_pathinfo_t lock to get a consistent 4459 * buffer size. 4460 */ 4461 int 4462 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp) 4463 { 4464 int rv; 4465 size_t bufsize; 4466 4467 *buflenp = 0; 4468 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4469 return (DDI_PROP_NOT_FOUND); 4470 } 4471 ASSERT(MDI_PI_LOCKED(pip)); 4472 rv = nvlist_size(MDI_PI(pip)->pi_prop, 4473 &bufsize, NV_ENCODE_NATIVE); 4474 *buflenp = bufsize; 4475 return (i_map_nvlist_error_to_mdi(rv)); 4476 } 4477 4478 /* 4479 * mdi_prop_pack(): 4480 * pack the property list. The caller should hold the 4481 * mdi_pathinfo_t node to get a consistent data 4482 */ 4483 int 4484 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen) 4485 { 4486 int rv; 4487 size_t bufsize; 4488 4489 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) { 4490 return (DDI_PROP_NOT_FOUND); 4491 } 4492 4493 ASSERT(MDI_PI_LOCKED(pip)); 4494 4495 bufsize = buflen; 4496 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize, 4497 NV_ENCODE_NATIVE, KM_SLEEP); 4498 4499 return (i_map_nvlist_error_to_mdi(rv)); 4500 } 4501 4502 /* 4503 * mdi_prop_update_byte(): 4504 * Create/Update a byte property 4505 */ 4506 int 4507 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data) 4508 { 4509 int rv; 4510 4511 if (pip == NULL) { 4512 return (DDI_PROP_INVAL_ARG); 4513 } 4514 ASSERT(!MDI_PI_LOCKED(pip)); 4515 MDI_PI_LOCK(pip); 4516 if (MDI_PI(pip)->pi_prop == NULL) { 4517 MDI_PI_UNLOCK(pip); 4518 return (DDI_PROP_NOT_FOUND); 4519 } 4520 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data); 4521 MDI_PI_UNLOCK(pip); 4522 return (i_map_nvlist_error_to_mdi(rv)); 4523 } 4524 4525 /* 4526 * mdi_prop_update_byte_array(): 4527 * Create/Update a byte array property 4528 */ 4529 int 4530 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data, 4531 uint_t nelements) 4532 { 4533 int rv; 4534 4535 if (pip == NULL) { 4536 return (DDI_PROP_INVAL_ARG); 4537 } 4538 ASSERT(!MDI_PI_LOCKED(pip)); 4539 MDI_PI_LOCK(pip); 4540 if (MDI_PI(pip)->pi_prop == NULL) { 4541 MDI_PI_UNLOCK(pip); 4542 return (DDI_PROP_NOT_FOUND); 4543 } 4544 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements); 4545 MDI_PI_UNLOCK(pip); 4546 return (i_map_nvlist_error_to_mdi(rv)); 4547 } 4548 4549 /* 4550 * mdi_prop_update_int(): 4551 * Create/Update a 32 bit integer property 4552 */ 4553 int 4554 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data) 4555 { 4556 int rv; 4557 4558 if (pip == NULL) { 4559 return (DDI_PROP_INVAL_ARG); 4560 } 4561 ASSERT(!MDI_PI_LOCKED(pip)); 4562 MDI_PI_LOCK(pip); 4563 if (MDI_PI(pip)->pi_prop == NULL) { 4564 MDI_PI_UNLOCK(pip); 4565 return (DDI_PROP_NOT_FOUND); 4566 } 4567 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data); 4568 MDI_PI_UNLOCK(pip); 4569 return (i_map_nvlist_error_to_mdi(rv)); 4570 } 4571 4572 /* 4573 * mdi_prop_update_int64(): 4574 * Create/Update a 64 bit integer property 4575 */ 4576 int 4577 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data) 4578 { 4579 int rv; 4580 4581 if (pip == NULL) { 4582 return (DDI_PROP_INVAL_ARG); 4583 } 4584 ASSERT(!MDI_PI_LOCKED(pip)); 4585 MDI_PI_LOCK(pip); 4586 if (MDI_PI(pip)->pi_prop == NULL) { 4587 MDI_PI_UNLOCK(pip); 4588 return (DDI_PROP_NOT_FOUND); 4589 } 4590 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data); 4591 MDI_PI_UNLOCK(pip); 4592 return (i_map_nvlist_error_to_mdi(rv)); 4593 } 4594 4595 /* 4596 * mdi_prop_update_int_array(): 4597 * Create/Update a int array property 4598 */ 4599 int 4600 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data, 4601 uint_t nelements) 4602 { 4603 int rv; 4604 4605 if (pip == NULL) { 4606 return (DDI_PROP_INVAL_ARG); 4607 } 4608 ASSERT(!MDI_PI_LOCKED(pip)); 4609 MDI_PI_LOCK(pip); 4610 if (MDI_PI(pip)->pi_prop == NULL) { 4611 MDI_PI_UNLOCK(pip); 4612 return (DDI_PROP_NOT_FOUND); 4613 } 4614 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data, 4615 nelements); 4616 MDI_PI_UNLOCK(pip); 4617 return (i_map_nvlist_error_to_mdi(rv)); 4618 } 4619 4620 /* 4621 * mdi_prop_update_string(): 4622 * Create/Update a string property 4623 */ 4624 int 4625 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data) 4626 { 4627 int rv; 4628 4629 if (pip == NULL) { 4630 return (DDI_PROP_INVAL_ARG); 4631 } 4632 ASSERT(!MDI_PI_LOCKED(pip)); 4633 MDI_PI_LOCK(pip); 4634 if (MDI_PI(pip)->pi_prop == NULL) { 4635 MDI_PI_UNLOCK(pip); 4636 return (DDI_PROP_NOT_FOUND); 4637 } 4638 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data); 4639 MDI_PI_UNLOCK(pip); 4640 return (i_map_nvlist_error_to_mdi(rv)); 4641 } 4642 4643 /* 4644 * mdi_prop_update_string_array(): 4645 * Create/Update a string array property 4646 */ 4647 int 4648 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data, 4649 uint_t nelements) 4650 { 4651 int rv; 4652 4653 if (pip == NULL) { 4654 return (DDI_PROP_INVAL_ARG); 4655 } 4656 ASSERT(!MDI_PI_LOCKED(pip)); 4657 MDI_PI_LOCK(pip); 4658 if (MDI_PI(pip)->pi_prop == NULL) { 4659 MDI_PI_UNLOCK(pip); 4660 return (DDI_PROP_NOT_FOUND); 4661 } 4662 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data, 4663 nelements); 4664 MDI_PI_UNLOCK(pip); 4665 return (i_map_nvlist_error_to_mdi(rv)); 4666 } 4667 4668 /* 4669 * mdi_prop_lookup_byte(): 4670 * Look for byte property identified by name. The data returned 4671 * is the actual property and valid as long as mdi_pathinfo_t node 4672 * is alive. 4673 */ 4674 int 4675 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data) 4676 { 4677 int rv; 4678 4679 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4680 return (DDI_PROP_NOT_FOUND); 4681 } 4682 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data); 4683 return (i_map_nvlist_error_to_mdi(rv)); 4684 } 4685 4686 4687 /* 4688 * mdi_prop_lookup_byte_array(): 4689 * Look for byte array property identified by name. The data 4690 * returned is the actual property and valid as long as 4691 * mdi_pathinfo_t node is alive. 4692 */ 4693 int 4694 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data, 4695 uint_t *nelements) 4696 { 4697 int rv; 4698 4699 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4700 return (DDI_PROP_NOT_FOUND); 4701 } 4702 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data, 4703 nelements); 4704 return (i_map_nvlist_error_to_mdi(rv)); 4705 } 4706 4707 /* 4708 * mdi_prop_lookup_int(): 4709 * Look for int property identified by name. The data returned 4710 * is the actual property and valid as long as mdi_pathinfo_t 4711 * node is alive. 4712 */ 4713 int 4714 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data) 4715 { 4716 int rv; 4717 4718 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4719 return (DDI_PROP_NOT_FOUND); 4720 } 4721 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data); 4722 return (i_map_nvlist_error_to_mdi(rv)); 4723 } 4724 4725 /* 4726 * mdi_prop_lookup_int64(): 4727 * Look for int64 property identified by name. The data returned 4728 * is the actual property and valid as long as mdi_pathinfo_t node 4729 * is alive. 4730 */ 4731 int 4732 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data) 4733 { 4734 int rv; 4735 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4736 return (DDI_PROP_NOT_FOUND); 4737 } 4738 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data); 4739 return (i_map_nvlist_error_to_mdi(rv)); 4740 } 4741 4742 /* 4743 * mdi_prop_lookup_int_array(): 4744 * Look for int array property identified by name. The data 4745 * returned is the actual property and valid as long as 4746 * mdi_pathinfo_t node is alive. 4747 */ 4748 int 4749 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data, 4750 uint_t *nelements) 4751 { 4752 int rv; 4753 4754 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4755 return (DDI_PROP_NOT_FOUND); 4756 } 4757 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name, 4758 (int32_t **)data, nelements); 4759 return (i_map_nvlist_error_to_mdi(rv)); 4760 } 4761 4762 /* 4763 * mdi_prop_lookup_string(): 4764 * Look for string property identified by name. The data 4765 * returned is the actual property and valid as long as 4766 * mdi_pathinfo_t node is alive. 4767 */ 4768 int 4769 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data) 4770 { 4771 int rv; 4772 4773 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4774 return (DDI_PROP_NOT_FOUND); 4775 } 4776 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data); 4777 return (i_map_nvlist_error_to_mdi(rv)); 4778 } 4779 4780 /* 4781 * mdi_prop_lookup_string_array(): 4782 * Look for string array property identified by name. The data 4783 * returned is the actual property and valid as long as 4784 * mdi_pathinfo_t node is alive. 4785 */ 4786 int 4787 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data, 4788 uint_t *nelements) 4789 { 4790 int rv; 4791 4792 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) { 4793 return (DDI_PROP_NOT_FOUND); 4794 } 4795 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data, 4796 nelements); 4797 return (i_map_nvlist_error_to_mdi(rv)); 4798 } 4799 4800 /* 4801 * mdi_prop_free(): 4802 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx() 4803 * functions return the pointer to actual property data and not a 4804 * copy of it. So the data returned is valid as long as 4805 * mdi_pathinfo_t node is valid. 4806 */ 4807 /*ARGSUSED*/ 4808 int 4809 mdi_prop_free(void *data) 4810 { 4811 return (DDI_PROP_SUCCESS); 4812 } 4813 4814 /*ARGSUSED*/ 4815 static void 4816 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip) 4817 { 4818 char *ct_path; 4819 char *ct_status; 4820 char *status; 4821 dev_info_t *cdip = ct->ct_dip; 4822 char lb_buf[64]; 4823 int report_lb_c = 0, report_lb_p = 0; 4824 4825 ASSERT(MDI_CLIENT_LOCKED(ct)); 4826 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) || 4827 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) { 4828 return; 4829 } 4830 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) { 4831 ct_status = "optimal"; 4832 report_lb_c = 1; 4833 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) { 4834 ct_status = "degraded"; 4835 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) { 4836 ct_status = "failed"; 4837 } else { 4838 ct_status = "unknown"; 4839 } 4840 4841 lb_buf[0] = 0; /* not interested in load balancing config */ 4842 4843 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) { 4844 status = "removed"; 4845 } else if (MDI_PI_IS_OFFLINE(pip)) { 4846 status = "offline"; 4847 } else if (MDI_PI_IS_ONLINE(pip)) { 4848 status = "online"; 4849 report_lb_p = 1; 4850 } else if (MDI_PI_IS_STANDBY(pip)) { 4851 status = "standby"; 4852 } else if (MDI_PI_IS_FAULT(pip)) { 4853 status = "faulted"; 4854 } else { 4855 status = "unknown"; 4856 } 4857 4858 if (cdip) { 4859 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 4860 4861 /* 4862 * NOTE: Keeping "multipath status: %s" and 4863 * "Load balancing: %s" format unchanged in case someone 4864 * scrubs /var/adm/messages looking for these messages. 4865 */ 4866 if (report_lb_c && report_lb_p) { 4867 if (ct->ct_lb == LOAD_BALANCE_LBA) { 4868 (void) snprintf(lb_buf, sizeof (lb_buf), 4869 "%s, region-size: %d", mdi_load_balance_lba, 4870 ct->ct_lb_args->region_size); 4871 } else if (ct->ct_lb == LOAD_BALANCE_NONE) { 4872 (void) snprintf(lb_buf, sizeof (lb_buf), 4873 "%s", mdi_load_balance_none); 4874 } else { 4875 (void) snprintf(lb_buf, sizeof (lb_buf), "%s", 4876 mdi_load_balance_rr); 4877 } 4878 4879 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4880 "?%s (%s%d) multipath status: %s: " 4881 "path %d %s is %s: Load balancing: %s\n", 4882 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4883 ddi_get_instance(cdip), ct_status, 4884 mdi_pi_get_path_instance(pip), 4885 mdi_pi_spathname(pip), status, lb_buf); 4886 } else { 4887 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT, 4888 "?%s (%s%d) multipath status: %s: " 4889 "path %d %s is %s\n", 4890 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip), 4891 ddi_get_instance(cdip), ct_status, 4892 mdi_pi_get_path_instance(pip), 4893 mdi_pi_spathname(pip), status); 4894 } 4895 4896 kmem_free(ct_path, MAXPATHLEN); 4897 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct); 4898 } 4899 } 4900 4901 #ifdef DEBUG 4902 /* 4903 * i_mdi_log(): 4904 * Utility function for error message management 4905 * 4906 * NOTE: Implementation takes care of trailing \n for cmn_err, 4907 * MDI_DEBUG should not terminate fmt strings with \n. 4908 * 4909 * NOTE: If the level is >= 2, and there is no leading !?^ 4910 * then a leading ! is implied (but can be overriden via 4911 * mdi_debug_consoleonly). If you are using kmdb on the console, 4912 * consider setting mdi_debug_consoleonly to 1 as an aid. 4913 */ 4914 /*PRINTFLIKE4*/ 4915 static void 4916 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...) 4917 { 4918 char name[MAXNAMELEN]; 4919 char buf[512]; 4920 char *bp; 4921 va_list ap; 4922 int log_only = 0; 4923 int boot_only = 0; 4924 int console_only = 0; 4925 4926 if (dip) { 4927 (void) snprintf(name, sizeof(name), "%s%d: ", 4928 ddi_driver_name(dip), ddi_get_instance(dip)); 4929 } else { 4930 name[0] = 0; 4931 } 4932 4933 va_start(ap, fmt); 4934 (void) vsnprintf(buf, sizeof(buf), fmt, ap); 4935 va_end(ap); 4936 4937 switch (buf[0]) { 4938 case '!': 4939 bp = &buf[1]; 4940 log_only = 1; 4941 break; 4942 case '?': 4943 bp = &buf[1]; 4944 boot_only = 1; 4945 break; 4946 case '^': 4947 bp = &buf[1]; 4948 console_only = 1; 4949 break; 4950 default: 4951 if (level >= 2) 4952 log_only = 1; /* ! implied */ 4953 bp = buf; 4954 break; 4955 } 4956 if (mdi_debug_logonly) { 4957 log_only = 1; 4958 boot_only = 0; 4959 console_only = 0; 4960 } 4961 if (mdi_debug_consoleonly) { 4962 log_only = 0; 4963 boot_only = 0; 4964 console_only = 1; 4965 level = CE_NOTE; 4966 goto console; 4967 } 4968 4969 switch (level) { 4970 case CE_NOTE: 4971 level = CE_CONT; 4972 /* FALLTHROUGH */ 4973 case CE_CONT: 4974 if (boot_only) { 4975 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp); 4976 } else if (console_only) { 4977 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp); 4978 } else if (log_only) { 4979 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp); 4980 } else { 4981 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp); 4982 } 4983 break; 4984 4985 case CE_WARN: 4986 case CE_PANIC: 4987 console: 4988 if (boot_only) { 4989 cmn_err(level, "?mdi: %s%s: %s", name, func, bp); 4990 } else if (console_only) { 4991 cmn_err(level, "^mdi: %s%s: %s", name, func, bp); 4992 } else if (log_only) { 4993 cmn_err(level, "!mdi: %s%s: %s", name, func, bp); 4994 } else { 4995 cmn_err(level, "mdi: %s%s: %s", name, func, bp); 4996 } 4997 break; 4998 default: 4999 cmn_err(level, "mdi: %s%s", name, bp); 5000 break; 5001 } 5002 } 5003 #endif /* DEBUG */ 5004 5005 void 5006 i_mdi_client_online(dev_info_t *ct_dip) 5007 { 5008 mdi_client_t *ct; 5009 5010 /* 5011 * Client online notification. Mark client state as online 5012 * restore our binding with dev_info node 5013 */ 5014 ct = i_devi_get_client(ct_dip); 5015 ASSERT(ct != NULL); 5016 MDI_CLIENT_LOCK(ct); 5017 MDI_CLIENT_SET_ONLINE(ct); 5018 /* catch for any memory leaks */ 5019 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip)); 5020 ct->ct_dip = ct_dip; 5021 5022 if (ct->ct_power_cnt == 0) 5023 (void) i_mdi_power_all_phci(ct); 5024 5025 MDI_DEBUG(4, (MDI_NOTE, ct_dip, 5026 "i_mdi_pm_hold_client %p", (void *)ct)); 5027 i_mdi_pm_hold_client(ct, 1); 5028 5029 MDI_CLIENT_UNLOCK(ct); 5030 } 5031 5032 void 5033 i_mdi_phci_online(dev_info_t *ph_dip) 5034 { 5035 mdi_phci_t *ph; 5036 5037 /* pHCI online notification. Mark state accordingly */ 5038 ph = i_devi_get_phci(ph_dip); 5039 ASSERT(ph != NULL); 5040 MDI_PHCI_LOCK(ph); 5041 MDI_PHCI_SET_ONLINE(ph); 5042 MDI_PHCI_UNLOCK(ph); 5043 } 5044 5045 /* 5046 * mdi_devi_online(): 5047 * Online notification from NDI framework on pHCI/client 5048 * device online. 5049 * Return Values: 5050 * NDI_SUCCESS 5051 * MDI_FAILURE 5052 */ 5053 /*ARGSUSED*/ 5054 int 5055 mdi_devi_online(dev_info_t *dip, uint_t flags) 5056 { 5057 if (MDI_PHCI(dip)) { 5058 i_mdi_phci_online(dip); 5059 } 5060 5061 if (MDI_CLIENT(dip)) { 5062 i_mdi_client_online(dip); 5063 } 5064 return (NDI_SUCCESS); 5065 } 5066 5067 /* 5068 * mdi_devi_offline(): 5069 * Offline notification from NDI framework on pHCI/Client device 5070 * offline. 5071 * 5072 * Return Values: 5073 * NDI_SUCCESS 5074 * NDI_FAILURE 5075 */ 5076 /*ARGSUSED*/ 5077 int 5078 mdi_devi_offline(dev_info_t *dip, uint_t flags) 5079 { 5080 int rv = NDI_SUCCESS; 5081 5082 if (MDI_CLIENT(dip)) { 5083 rv = i_mdi_client_offline(dip, flags); 5084 if (rv != NDI_SUCCESS) 5085 return (rv); 5086 } 5087 5088 if (MDI_PHCI(dip)) { 5089 rv = i_mdi_phci_offline(dip, flags); 5090 5091 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) { 5092 /* set client back online */ 5093 i_mdi_client_online(dip); 5094 } 5095 } 5096 5097 return (rv); 5098 } 5099 5100 /*ARGSUSED*/ 5101 static int 5102 i_mdi_phci_offline(dev_info_t *dip, uint_t flags) 5103 { 5104 int rv = NDI_SUCCESS; 5105 mdi_phci_t *ph; 5106 mdi_client_t *ct; 5107 mdi_pathinfo_t *pip; 5108 mdi_pathinfo_t *next; 5109 mdi_pathinfo_t *failed_pip = NULL; 5110 dev_info_t *cdip; 5111 5112 /* 5113 * pHCI component offline notification 5114 * Make sure that this pHCI instance is free to be offlined. 5115 * If it is OK to proceed, Offline and remove all the child 5116 * mdi_pathinfo nodes. This process automatically offlines 5117 * corresponding client devices, for which this pHCI provides 5118 * critical services. 5119 */ 5120 ph = i_devi_get_phci(dip); 5121 MDI_DEBUG(2, (MDI_NOTE, dip, 5122 "called %p %p", (void *)dip, (void *)ph)); 5123 if (ph == NULL) { 5124 return (rv); 5125 } 5126 5127 MDI_PHCI_LOCK(ph); 5128 5129 if (MDI_PHCI_IS_OFFLINE(ph)) { 5130 MDI_DEBUG(1, (MDI_WARN, dip, 5131 "!pHCI already offlined: %p", (void *)dip)); 5132 MDI_PHCI_UNLOCK(ph); 5133 return (NDI_SUCCESS); 5134 } 5135 5136 /* 5137 * Check to see if the pHCI can be offlined 5138 */ 5139 if (ph->ph_unstable) { 5140 MDI_DEBUG(1, (MDI_WARN, dip, 5141 "!One or more target devices are in transient state. " 5142 "This device can not be removed at this moment. " 5143 "Please try again later.")); 5144 MDI_PHCI_UNLOCK(ph); 5145 return (NDI_BUSY); 5146 } 5147 5148 pip = ph->ph_path_head; 5149 while (pip != NULL) { 5150 MDI_PI_LOCK(pip); 5151 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5152 5153 /* 5154 * The mdi_pathinfo state is OK. Check the client state. 5155 * If failover in progress fail the pHCI from offlining 5156 */ 5157 ct = MDI_PI(pip)->pi_client; 5158 i_mdi_client_lock(ct, pip); 5159 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5160 (ct->ct_unstable)) { 5161 /* 5162 * Failover is in progress, Fail the DR 5163 */ 5164 MDI_DEBUG(1, (MDI_WARN, dip, 5165 "!pHCI device is busy. " 5166 "This device can not be removed at this moment. " 5167 "Please try again later.")); 5168 MDI_PI_UNLOCK(pip); 5169 i_mdi_client_unlock(ct); 5170 MDI_PHCI_UNLOCK(ph); 5171 return (NDI_BUSY); 5172 } 5173 MDI_PI_UNLOCK(pip); 5174 5175 /* 5176 * Check to see of we are removing the last path of this 5177 * client device... 5178 */ 5179 cdip = ct->ct_dip; 5180 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5181 (i_mdi_client_compute_state(ct, ph) == 5182 MDI_CLIENT_STATE_FAILED)) { 5183 i_mdi_client_unlock(ct); 5184 MDI_PHCI_UNLOCK(ph); 5185 if (ndi_devi_offline(cdip, 5186 NDI_DEVFS_CLEAN) != NDI_SUCCESS) { 5187 /* 5188 * ndi_devi_offline() failed. 5189 * This pHCI provides the critical path 5190 * to one or more client devices. 5191 * Return busy. 5192 */ 5193 MDI_PHCI_LOCK(ph); 5194 MDI_DEBUG(1, (MDI_WARN, dip, 5195 "!pHCI device is busy. " 5196 "This device can not be removed at this " 5197 "moment. Please try again later.")); 5198 failed_pip = pip; 5199 break; 5200 } else { 5201 MDI_PHCI_LOCK(ph); 5202 pip = next; 5203 } 5204 } else { 5205 i_mdi_client_unlock(ct); 5206 pip = next; 5207 } 5208 } 5209 5210 if (failed_pip) { 5211 pip = ph->ph_path_head; 5212 while (pip != failed_pip) { 5213 MDI_PI_LOCK(pip); 5214 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5215 ct = MDI_PI(pip)->pi_client; 5216 i_mdi_client_lock(ct, pip); 5217 cdip = ct->ct_dip; 5218 switch (MDI_CLIENT_STATE(ct)) { 5219 case MDI_CLIENT_STATE_OPTIMAL: 5220 case MDI_CLIENT_STATE_DEGRADED: 5221 if (cdip) { 5222 MDI_PI_UNLOCK(pip); 5223 i_mdi_client_unlock(ct); 5224 MDI_PHCI_UNLOCK(ph); 5225 (void) ndi_devi_online(cdip, 0); 5226 MDI_PHCI_LOCK(ph); 5227 pip = next; 5228 continue; 5229 } 5230 break; 5231 5232 case MDI_CLIENT_STATE_FAILED: 5233 if (cdip) { 5234 MDI_PI_UNLOCK(pip); 5235 i_mdi_client_unlock(ct); 5236 MDI_PHCI_UNLOCK(ph); 5237 (void) ndi_devi_offline(cdip, 5238 NDI_DEVFS_CLEAN); 5239 MDI_PHCI_LOCK(ph); 5240 pip = next; 5241 continue; 5242 } 5243 break; 5244 } 5245 MDI_PI_UNLOCK(pip); 5246 i_mdi_client_unlock(ct); 5247 pip = next; 5248 } 5249 MDI_PHCI_UNLOCK(ph); 5250 return (NDI_BUSY); 5251 } 5252 5253 /* 5254 * Mark the pHCI as offline 5255 */ 5256 MDI_PHCI_SET_OFFLINE(ph); 5257 5258 /* 5259 * Mark the child mdi_pathinfo nodes as transient 5260 */ 5261 pip = ph->ph_path_head; 5262 while (pip != NULL) { 5263 MDI_PI_LOCK(pip); 5264 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5265 MDI_PI_SET_OFFLINING(pip); 5266 MDI_PI_UNLOCK(pip); 5267 pip = next; 5268 } 5269 MDI_PHCI_UNLOCK(ph); 5270 /* 5271 * Give a chance for any pending commands to execute 5272 */ 5273 delay_random(mdi_delay); 5274 MDI_PHCI_LOCK(ph); 5275 pip = ph->ph_path_head; 5276 while (pip != NULL) { 5277 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5278 (void) i_mdi_pi_offline(pip, flags); 5279 MDI_PI_LOCK(pip); 5280 ct = MDI_PI(pip)->pi_client; 5281 if (!MDI_PI_IS_OFFLINE(pip)) { 5282 MDI_DEBUG(1, (MDI_WARN, dip, 5283 "!pHCI device is busy. " 5284 "This device can not be removed at this moment. " 5285 "Please try again later.")); 5286 MDI_PI_UNLOCK(pip); 5287 MDI_PHCI_SET_ONLINE(ph); 5288 MDI_PHCI_UNLOCK(ph); 5289 return (NDI_BUSY); 5290 } 5291 MDI_PI_UNLOCK(pip); 5292 pip = next; 5293 } 5294 MDI_PHCI_UNLOCK(ph); 5295 5296 return (rv); 5297 } 5298 5299 void 5300 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array) 5301 { 5302 mdi_phci_t *ph; 5303 mdi_client_t *ct; 5304 mdi_pathinfo_t *pip; 5305 mdi_pathinfo_t *next; 5306 dev_info_t *cdip; 5307 5308 if (!MDI_PHCI(dip)) 5309 return; 5310 5311 ph = i_devi_get_phci(dip); 5312 if (ph == NULL) { 5313 return; 5314 } 5315 5316 MDI_PHCI_LOCK(ph); 5317 5318 if (MDI_PHCI_IS_OFFLINE(ph)) { 5319 /* has no last path */ 5320 MDI_PHCI_UNLOCK(ph); 5321 return; 5322 } 5323 5324 pip = ph->ph_path_head; 5325 while (pip != NULL) { 5326 MDI_PI_LOCK(pip); 5327 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5328 5329 ct = MDI_PI(pip)->pi_client; 5330 i_mdi_client_lock(ct, pip); 5331 MDI_PI_UNLOCK(pip); 5332 5333 cdip = ct->ct_dip; 5334 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5335 (i_mdi_client_compute_state(ct, ph) == 5336 MDI_CLIENT_STATE_FAILED)) { 5337 /* Last path. Mark client dip as retiring */ 5338 i_mdi_client_unlock(ct); 5339 MDI_PHCI_UNLOCK(ph); 5340 (void) e_ddi_mark_retiring(cdip, cons_array); 5341 MDI_PHCI_LOCK(ph); 5342 pip = next; 5343 } else { 5344 i_mdi_client_unlock(ct); 5345 pip = next; 5346 } 5347 } 5348 5349 MDI_PHCI_UNLOCK(ph); 5350 5351 return; 5352 } 5353 5354 void 5355 mdi_phci_retire_notify(dev_info_t *dip, int *constraint) 5356 { 5357 mdi_phci_t *ph; 5358 mdi_client_t *ct; 5359 mdi_pathinfo_t *pip; 5360 mdi_pathinfo_t *next; 5361 dev_info_t *cdip; 5362 5363 if (!MDI_PHCI(dip)) 5364 return; 5365 5366 ph = i_devi_get_phci(dip); 5367 if (ph == NULL) 5368 return; 5369 5370 MDI_PHCI_LOCK(ph); 5371 5372 if (MDI_PHCI_IS_OFFLINE(ph)) { 5373 MDI_PHCI_UNLOCK(ph); 5374 /* not last path */ 5375 return; 5376 } 5377 5378 if (ph->ph_unstable) { 5379 MDI_PHCI_UNLOCK(ph); 5380 /* can't check for constraints */ 5381 *constraint = 0; 5382 return; 5383 } 5384 5385 pip = ph->ph_path_head; 5386 while (pip != NULL) { 5387 MDI_PI_LOCK(pip); 5388 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5389 5390 /* 5391 * The mdi_pathinfo state is OK. Check the client state. 5392 * If failover in progress fail the pHCI from offlining 5393 */ 5394 ct = MDI_PI(pip)->pi_client; 5395 i_mdi_client_lock(ct, pip); 5396 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5397 (ct->ct_unstable)) { 5398 /* 5399 * Failover is in progress, can't check for constraints 5400 */ 5401 MDI_PI_UNLOCK(pip); 5402 i_mdi_client_unlock(ct); 5403 MDI_PHCI_UNLOCK(ph); 5404 *constraint = 0; 5405 return; 5406 } 5407 MDI_PI_UNLOCK(pip); 5408 5409 /* 5410 * Check to see of we are retiring the last path of this 5411 * client device... 5412 */ 5413 cdip = ct->ct_dip; 5414 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5415 (i_mdi_client_compute_state(ct, ph) == 5416 MDI_CLIENT_STATE_FAILED)) { 5417 i_mdi_client_unlock(ct); 5418 MDI_PHCI_UNLOCK(ph); 5419 (void) e_ddi_retire_notify(cdip, constraint); 5420 MDI_PHCI_LOCK(ph); 5421 pip = next; 5422 } else { 5423 i_mdi_client_unlock(ct); 5424 pip = next; 5425 } 5426 } 5427 5428 MDI_PHCI_UNLOCK(ph); 5429 5430 return; 5431 } 5432 5433 /* 5434 * offline the path(s) hanging off the pHCI. If the 5435 * last path to any client, check that constraints 5436 * have been applied. 5437 * 5438 * If constraint is 0, we aren't going to retire the 5439 * pHCI. However we still need to go through the paths 5440 * calling e_ddi_retire_finalize() to clear their 5441 * contract barriers. 5442 */ 5443 void 5444 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint) 5445 { 5446 mdi_phci_t *ph; 5447 mdi_client_t *ct; 5448 mdi_pathinfo_t *pip; 5449 mdi_pathinfo_t *next; 5450 dev_info_t *cdip; 5451 int unstable = 0; 5452 int tmp_constraint; 5453 5454 if (!MDI_PHCI(dip)) 5455 return; 5456 5457 ph = i_devi_get_phci(dip); 5458 if (ph == NULL) { 5459 /* no last path and no pips */ 5460 return; 5461 } 5462 5463 MDI_PHCI_LOCK(ph); 5464 5465 if (MDI_PHCI_IS_OFFLINE(ph)) { 5466 MDI_PHCI_UNLOCK(ph); 5467 /* no last path and no pips */ 5468 return; 5469 } 5470 5471 /* 5472 * Check to see if the pHCI can be offlined 5473 */ 5474 if (ph->ph_unstable) { 5475 unstable = 1; 5476 } 5477 5478 pip = ph->ph_path_head; 5479 while (pip != NULL) { 5480 MDI_PI_LOCK(pip); 5481 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5482 5483 /* 5484 * if failover in progress fail the pHCI from offlining 5485 */ 5486 ct = MDI_PI(pip)->pi_client; 5487 i_mdi_client_lock(ct, pip); 5488 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) || 5489 (ct->ct_unstable)) { 5490 unstable = 1; 5491 } 5492 MDI_PI_UNLOCK(pip); 5493 5494 /* 5495 * Check to see of we are removing the last path of this 5496 * client device... 5497 */ 5498 cdip = ct->ct_dip; 5499 if (!phci_only && cdip && 5500 (i_ddi_node_state(cdip) >= DS_INITIALIZED) && 5501 (i_mdi_client_compute_state(ct, ph) == 5502 MDI_CLIENT_STATE_FAILED)) { 5503 i_mdi_client_unlock(ct); 5504 MDI_PHCI_UNLOCK(ph); 5505 /* 5506 * This is the last path to this client. 5507 * 5508 * Constraint will only be set to 1 if this client can 5509 * be retired (as already determined by 5510 * mdi_phci_retire_notify). However we don't actually 5511 * need to retire the client (we just retire the last 5512 * path - MPXIO will then fail all I/Os to the client). 5513 * But we still need to call e_ddi_retire_finalize so 5514 * the contract barriers can be cleared. Therefore we 5515 * temporarily set constraint = 0 so that the client 5516 * dip is not retired. 5517 */ 5518 tmp_constraint = 0; 5519 (void) e_ddi_retire_finalize(cdip, &tmp_constraint); 5520 MDI_PHCI_LOCK(ph); 5521 pip = next; 5522 } else { 5523 i_mdi_client_unlock(ct); 5524 pip = next; 5525 } 5526 } 5527 5528 if (!phci_only && *((int *)constraint) == 0) { 5529 MDI_PHCI_UNLOCK(ph); 5530 return; 5531 } 5532 5533 /* 5534 * Cannot offline pip(s) 5535 */ 5536 if (unstable) { 5537 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: " 5538 "pHCI in transient state, cannot retire", 5539 ddi_driver_name(dip), ddi_get_instance(dip)); 5540 MDI_PHCI_UNLOCK(ph); 5541 return; 5542 } 5543 5544 /* 5545 * Mark the pHCI as offline 5546 */ 5547 MDI_PHCI_SET_OFFLINE(ph); 5548 5549 /* 5550 * Mark the child mdi_pathinfo nodes as transient 5551 */ 5552 pip = ph->ph_path_head; 5553 while (pip != NULL) { 5554 MDI_PI_LOCK(pip); 5555 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5556 MDI_PI_SET_OFFLINING(pip); 5557 MDI_PI_UNLOCK(pip); 5558 pip = next; 5559 } 5560 MDI_PHCI_UNLOCK(ph); 5561 /* 5562 * Give a chance for any pending commands to execute 5563 */ 5564 delay_random(mdi_delay); 5565 MDI_PHCI_LOCK(ph); 5566 pip = ph->ph_path_head; 5567 while (pip != NULL) { 5568 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5569 (void) i_mdi_pi_offline(pip, 0); 5570 MDI_PI_LOCK(pip); 5571 ct = MDI_PI(pip)->pi_client; 5572 if (!MDI_PI_IS_OFFLINE(pip)) { 5573 cmn_err(CE_WARN, "mdi_phci_retire_finalize: " 5574 "path %d %s busy, cannot offline", 5575 mdi_pi_get_path_instance(pip), 5576 mdi_pi_spathname(pip)); 5577 MDI_PI_UNLOCK(pip); 5578 MDI_PHCI_SET_ONLINE(ph); 5579 MDI_PHCI_UNLOCK(ph); 5580 return; 5581 } 5582 MDI_PI_UNLOCK(pip); 5583 pip = next; 5584 } 5585 MDI_PHCI_UNLOCK(ph); 5586 5587 return; 5588 } 5589 5590 void 5591 mdi_phci_unretire(dev_info_t *dip) 5592 { 5593 mdi_phci_t *ph; 5594 mdi_pathinfo_t *pip; 5595 mdi_pathinfo_t *next; 5596 5597 ASSERT(MDI_PHCI(dip)); 5598 5599 /* 5600 * Online the phci 5601 */ 5602 i_mdi_phci_online(dip); 5603 5604 ph = i_devi_get_phci(dip); 5605 MDI_PHCI_LOCK(ph); 5606 pip = ph->ph_path_head; 5607 while (pip != NULL) { 5608 MDI_PI_LOCK(pip); 5609 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5610 MDI_PI_UNLOCK(pip); 5611 (void) i_mdi_pi_online(pip, 0); 5612 pip = next; 5613 } 5614 MDI_PHCI_UNLOCK(ph); 5615 } 5616 5617 /*ARGSUSED*/ 5618 static int 5619 i_mdi_client_offline(dev_info_t *dip, uint_t flags) 5620 { 5621 int rv = NDI_SUCCESS; 5622 mdi_client_t *ct; 5623 5624 /* 5625 * Client component to go offline. Make sure that we are 5626 * not in failing over state and update client state 5627 * accordingly 5628 */ 5629 ct = i_devi_get_client(dip); 5630 MDI_DEBUG(2, (MDI_NOTE, dip, 5631 "called %p %p", (void *)dip, (void *)ct)); 5632 if (ct != NULL) { 5633 MDI_CLIENT_LOCK(ct); 5634 if (ct->ct_unstable) { 5635 /* 5636 * One or more paths are in transient state, 5637 * Dont allow offline of a client device 5638 */ 5639 MDI_DEBUG(1, (MDI_WARN, dip, 5640 "!One or more paths to " 5641 "this device are in transient state. " 5642 "This device can not be removed at this moment. " 5643 "Please try again later.")); 5644 MDI_CLIENT_UNLOCK(ct); 5645 return (NDI_BUSY); 5646 } 5647 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) { 5648 /* 5649 * Failover is in progress, Dont allow DR of 5650 * a client device 5651 */ 5652 MDI_DEBUG(1, (MDI_WARN, dip, 5653 "!Client device is Busy. " 5654 "This device can not be removed at this moment. " 5655 "Please try again later.")); 5656 MDI_CLIENT_UNLOCK(ct); 5657 return (NDI_BUSY); 5658 } 5659 MDI_CLIENT_SET_OFFLINE(ct); 5660 5661 /* 5662 * Unbind our relationship with the dev_info node 5663 */ 5664 if (flags & NDI_DEVI_REMOVE) { 5665 ct->ct_dip = NULL; 5666 } 5667 MDI_CLIENT_UNLOCK(ct); 5668 } 5669 return (rv); 5670 } 5671 5672 /* 5673 * mdi_pre_attach(): 5674 * Pre attach() notification handler 5675 */ 5676 /*ARGSUSED*/ 5677 int 5678 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 5679 { 5680 /* don't support old DDI_PM_RESUME */ 5681 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) && 5682 (cmd == DDI_PM_RESUME)) 5683 return (DDI_FAILURE); 5684 5685 return (DDI_SUCCESS); 5686 } 5687 5688 /* 5689 * mdi_post_attach(): 5690 * Post attach() notification handler 5691 */ 5692 /*ARGSUSED*/ 5693 void 5694 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error) 5695 { 5696 mdi_phci_t *ph; 5697 mdi_client_t *ct; 5698 mdi_vhci_t *vh; 5699 5700 if (MDI_PHCI(dip)) { 5701 ph = i_devi_get_phci(dip); 5702 ASSERT(ph != NULL); 5703 5704 MDI_PHCI_LOCK(ph); 5705 switch (cmd) { 5706 case DDI_ATTACH: 5707 MDI_DEBUG(2, (MDI_NOTE, dip, 5708 "phci post_attach called %p", (void *)ph)); 5709 if (error == DDI_SUCCESS) { 5710 MDI_PHCI_SET_ATTACH(ph); 5711 } else { 5712 MDI_DEBUG(1, (MDI_NOTE, dip, 5713 "!pHCI post_attach failed: error %d", 5714 error)); 5715 MDI_PHCI_SET_DETACH(ph); 5716 } 5717 break; 5718 5719 case DDI_RESUME: 5720 MDI_DEBUG(2, (MDI_NOTE, dip, 5721 "pHCI post_resume: called %p", (void *)ph)); 5722 if (error == DDI_SUCCESS) { 5723 MDI_PHCI_SET_RESUME(ph); 5724 } else { 5725 MDI_DEBUG(1, (MDI_NOTE, dip, 5726 "!pHCI post_resume failed: error %d", 5727 error)); 5728 MDI_PHCI_SET_SUSPEND(ph); 5729 } 5730 break; 5731 } 5732 MDI_PHCI_UNLOCK(ph); 5733 } 5734 5735 if (MDI_CLIENT(dip)) { 5736 ct = i_devi_get_client(dip); 5737 ASSERT(ct != NULL); 5738 5739 MDI_CLIENT_LOCK(ct); 5740 switch (cmd) { 5741 case DDI_ATTACH: 5742 MDI_DEBUG(2, (MDI_NOTE, dip, 5743 "client post_attach called %p", (void *)ct)); 5744 if (error != DDI_SUCCESS) { 5745 MDI_DEBUG(1, (MDI_NOTE, dip, 5746 "!client post_attach failed: error %d", 5747 error)); 5748 MDI_CLIENT_SET_DETACH(ct); 5749 MDI_DEBUG(4, (MDI_WARN, dip, 5750 "i_mdi_pm_reset_client")); 5751 i_mdi_pm_reset_client(ct); 5752 break; 5753 } 5754 5755 /* 5756 * Client device has successfully attached, inform 5757 * the vhci. 5758 */ 5759 vh = ct->ct_vhci; 5760 if (vh->vh_ops->vo_client_attached) 5761 (*vh->vh_ops->vo_client_attached)(dip); 5762 5763 MDI_CLIENT_SET_ATTACH(ct); 5764 break; 5765 5766 case DDI_RESUME: 5767 MDI_DEBUG(2, (MDI_NOTE, dip, 5768 "client post_attach: called %p", (void *)ct)); 5769 if (error == DDI_SUCCESS) { 5770 MDI_CLIENT_SET_RESUME(ct); 5771 } else { 5772 MDI_DEBUG(1, (MDI_NOTE, dip, 5773 "!client post_resume failed: error %d", 5774 error)); 5775 MDI_CLIENT_SET_SUSPEND(ct); 5776 } 5777 break; 5778 } 5779 MDI_CLIENT_UNLOCK(ct); 5780 } 5781 } 5782 5783 /* 5784 * mdi_pre_detach(): 5785 * Pre detach notification handler 5786 */ 5787 /*ARGSUSED*/ 5788 int 5789 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5790 { 5791 int rv = DDI_SUCCESS; 5792 5793 if (MDI_CLIENT(dip)) { 5794 (void) i_mdi_client_pre_detach(dip, cmd); 5795 } 5796 5797 if (MDI_PHCI(dip)) { 5798 rv = i_mdi_phci_pre_detach(dip, cmd); 5799 } 5800 5801 return (rv); 5802 } 5803 5804 /*ARGSUSED*/ 5805 static int 5806 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5807 { 5808 int rv = DDI_SUCCESS; 5809 mdi_phci_t *ph; 5810 mdi_client_t *ct; 5811 mdi_pathinfo_t *pip; 5812 mdi_pathinfo_t *failed_pip = NULL; 5813 mdi_pathinfo_t *next; 5814 5815 ph = i_devi_get_phci(dip); 5816 if (ph == NULL) { 5817 return (rv); 5818 } 5819 5820 MDI_PHCI_LOCK(ph); 5821 switch (cmd) { 5822 case DDI_DETACH: 5823 MDI_DEBUG(2, (MDI_NOTE, dip, 5824 "pHCI pre_detach: called %p", (void *)ph)); 5825 if (!MDI_PHCI_IS_OFFLINE(ph)) { 5826 /* 5827 * mdi_pathinfo nodes are still attached to 5828 * this pHCI. Fail the detach for this pHCI. 5829 */ 5830 MDI_DEBUG(2, (MDI_WARN, dip, 5831 "pHCI pre_detach: paths are still attached %p", 5832 (void *)ph)); 5833 rv = DDI_FAILURE; 5834 break; 5835 } 5836 MDI_PHCI_SET_DETACH(ph); 5837 break; 5838 5839 case DDI_SUSPEND: 5840 /* 5841 * pHCI is getting suspended. Since mpxio client 5842 * devices may not be suspended at this point, to avoid 5843 * a potential stack overflow, it is important to suspend 5844 * client devices before pHCI can be suspended. 5845 */ 5846 5847 MDI_DEBUG(2, (MDI_NOTE, dip, 5848 "pHCI pre_suspend: called %p", (void *)ph)); 5849 /* 5850 * Suspend all the client devices accessible through this pHCI 5851 */ 5852 pip = ph->ph_path_head; 5853 while (pip != NULL && rv == DDI_SUCCESS) { 5854 dev_info_t *cdip; 5855 MDI_PI_LOCK(pip); 5856 next = 5857 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5858 ct = MDI_PI(pip)->pi_client; 5859 i_mdi_client_lock(ct, pip); 5860 cdip = ct->ct_dip; 5861 MDI_PI_UNLOCK(pip); 5862 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) && 5863 MDI_CLIENT_IS_SUSPENDED(ct) == 0) { 5864 i_mdi_client_unlock(ct); 5865 if ((rv = devi_detach(cdip, DDI_SUSPEND)) != 5866 DDI_SUCCESS) { 5867 /* 5868 * Suspend of one of the client 5869 * device has failed. 5870 */ 5871 MDI_DEBUG(1, (MDI_WARN, dip, 5872 "!suspend of device (%s%d) failed.", 5873 ddi_driver_name(cdip), 5874 ddi_get_instance(cdip))); 5875 failed_pip = pip; 5876 break; 5877 } 5878 } else { 5879 i_mdi_client_unlock(ct); 5880 } 5881 pip = next; 5882 } 5883 5884 if (rv == DDI_SUCCESS) { 5885 /* 5886 * Suspend of client devices is complete. Proceed 5887 * with pHCI suspend. 5888 */ 5889 MDI_PHCI_SET_SUSPEND(ph); 5890 } else { 5891 /* 5892 * Revert back all the suspended client device states 5893 * to converse. 5894 */ 5895 pip = ph->ph_path_head; 5896 while (pip != failed_pip) { 5897 dev_info_t *cdip; 5898 MDI_PI_LOCK(pip); 5899 next = 5900 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 5901 ct = MDI_PI(pip)->pi_client; 5902 i_mdi_client_lock(ct, pip); 5903 cdip = ct->ct_dip; 5904 MDI_PI_UNLOCK(pip); 5905 if (MDI_CLIENT_IS_SUSPENDED(ct)) { 5906 i_mdi_client_unlock(ct); 5907 (void) devi_attach(cdip, DDI_RESUME); 5908 } else { 5909 i_mdi_client_unlock(ct); 5910 } 5911 pip = next; 5912 } 5913 } 5914 break; 5915 5916 default: 5917 rv = DDI_FAILURE; 5918 break; 5919 } 5920 MDI_PHCI_UNLOCK(ph); 5921 return (rv); 5922 } 5923 5924 /*ARGSUSED*/ 5925 static int 5926 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 5927 { 5928 int rv = DDI_SUCCESS; 5929 mdi_client_t *ct; 5930 5931 ct = i_devi_get_client(dip); 5932 if (ct == NULL) { 5933 return (rv); 5934 } 5935 5936 MDI_CLIENT_LOCK(ct); 5937 switch (cmd) { 5938 case DDI_DETACH: 5939 MDI_DEBUG(2, (MDI_NOTE, dip, 5940 "client pre_detach: called %p", 5941 (void *)ct)); 5942 MDI_CLIENT_SET_DETACH(ct); 5943 break; 5944 5945 case DDI_SUSPEND: 5946 MDI_DEBUG(2, (MDI_NOTE, dip, 5947 "client pre_suspend: called %p", 5948 (void *)ct)); 5949 MDI_CLIENT_SET_SUSPEND(ct); 5950 break; 5951 5952 default: 5953 rv = DDI_FAILURE; 5954 break; 5955 } 5956 MDI_CLIENT_UNLOCK(ct); 5957 return (rv); 5958 } 5959 5960 /* 5961 * mdi_post_detach(): 5962 * Post detach notification handler 5963 */ 5964 /*ARGSUSED*/ 5965 void 5966 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5967 { 5968 /* 5969 * Detach/Suspend of mpxio component failed. Update our state 5970 * too 5971 */ 5972 if (MDI_PHCI(dip)) 5973 i_mdi_phci_post_detach(dip, cmd, error); 5974 5975 if (MDI_CLIENT(dip)) 5976 i_mdi_client_post_detach(dip, cmd, error); 5977 } 5978 5979 /*ARGSUSED*/ 5980 static void 5981 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 5982 { 5983 mdi_phci_t *ph; 5984 5985 /* 5986 * Detach/Suspend of phci component failed. Update our state 5987 * too 5988 */ 5989 ph = i_devi_get_phci(dip); 5990 if (ph == NULL) { 5991 return; 5992 } 5993 5994 MDI_PHCI_LOCK(ph); 5995 /* 5996 * Detach of pHCI failed. Restore back converse 5997 * state 5998 */ 5999 switch (cmd) { 6000 case DDI_DETACH: 6001 MDI_DEBUG(2, (MDI_NOTE, dip, 6002 "pHCI post_detach: called %p", 6003 (void *)ph)); 6004 if (error != DDI_SUCCESS) 6005 MDI_PHCI_SET_ATTACH(ph); 6006 break; 6007 6008 case DDI_SUSPEND: 6009 MDI_DEBUG(2, (MDI_NOTE, dip, 6010 "pHCI post_suspend: called %p", 6011 (void *)ph)); 6012 if (error != DDI_SUCCESS) 6013 MDI_PHCI_SET_RESUME(ph); 6014 break; 6015 } 6016 MDI_PHCI_UNLOCK(ph); 6017 } 6018 6019 /*ARGSUSED*/ 6020 static void 6021 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error) 6022 { 6023 mdi_client_t *ct; 6024 6025 ct = i_devi_get_client(dip); 6026 if (ct == NULL) { 6027 return; 6028 } 6029 MDI_CLIENT_LOCK(ct); 6030 /* 6031 * Detach of Client failed. Restore back converse 6032 * state 6033 */ 6034 switch (cmd) { 6035 case DDI_DETACH: 6036 MDI_DEBUG(2, (MDI_NOTE, dip, 6037 "client post_detach: called %p", (void *)ct)); 6038 if (DEVI_IS_ATTACHING(dip)) { 6039 MDI_DEBUG(4, (MDI_NOTE, dip, 6040 "i_mdi_pm_rele_client\n")); 6041 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6042 } else { 6043 MDI_DEBUG(4, (MDI_NOTE, dip, 6044 "i_mdi_pm_reset_client\n")); 6045 i_mdi_pm_reset_client(ct); 6046 } 6047 if (error != DDI_SUCCESS) 6048 MDI_CLIENT_SET_ATTACH(ct); 6049 break; 6050 6051 case DDI_SUSPEND: 6052 MDI_DEBUG(2, (MDI_NOTE, dip, 6053 "called %p", (void *)ct)); 6054 if (error != DDI_SUCCESS) 6055 MDI_CLIENT_SET_RESUME(ct); 6056 break; 6057 } 6058 MDI_CLIENT_UNLOCK(ct); 6059 } 6060 6061 int 6062 mdi_pi_kstat_exists(mdi_pathinfo_t *pip) 6063 { 6064 return (MDI_PI(pip)->pi_kstats ? 1 : 0); 6065 } 6066 6067 /* 6068 * create and install per-path (client - pHCI) statistics 6069 * I/O stats supported: nread, nwritten, reads, and writes 6070 * Error stats - hard errors, soft errors, & transport errors 6071 */ 6072 int 6073 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname) 6074 { 6075 kstat_t *kiosp, *kerrsp; 6076 struct pi_errs *nsp; 6077 struct mdi_pi_kstats *mdi_statp; 6078 6079 if (MDI_PI(pip)->pi_kstats != NULL) 6080 return (MDI_SUCCESS); 6081 6082 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath", 6083 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) { 6084 return (MDI_FAILURE); 6085 } 6086 6087 (void) strcat(ksname, ",err"); 6088 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors", 6089 KSTAT_TYPE_NAMED, 6090 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0); 6091 if (kerrsp == NULL) { 6092 kstat_delete(kiosp); 6093 return (MDI_FAILURE); 6094 } 6095 6096 nsp = (struct pi_errs *)kerrsp->ks_data; 6097 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32); 6098 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32); 6099 kstat_named_init(&nsp->pi_transerrs, "Transport Errors", 6100 KSTAT_DATA_UINT32); 6101 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy", 6102 KSTAT_DATA_UINT32); 6103 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors", 6104 KSTAT_DATA_UINT32); 6105 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources", 6106 KSTAT_DATA_UINT32); 6107 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors", 6108 KSTAT_DATA_UINT32); 6109 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State", 6110 KSTAT_DATA_UINT32); 6111 kstat_named_init(&nsp->pi_failedfrom, "Failed From", 6112 KSTAT_DATA_UINT32); 6113 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32); 6114 6115 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP); 6116 mdi_statp->pi_kstat_ref = 1; 6117 mdi_statp->pi_kstat_iostats = kiosp; 6118 mdi_statp->pi_kstat_errstats = kerrsp; 6119 kstat_install(kiosp); 6120 kstat_install(kerrsp); 6121 MDI_PI(pip)->pi_kstats = mdi_statp; 6122 return (MDI_SUCCESS); 6123 } 6124 6125 /* 6126 * destroy per-path properties 6127 */ 6128 static void 6129 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip) 6130 { 6131 6132 struct mdi_pi_kstats *mdi_statp; 6133 6134 if (MDI_PI(pip)->pi_kstats == NULL) 6135 return; 6136 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL) 6137 return; 6138 6139 MDI_PI(pip)->pi_kstats = NULL; 6140 6141 /* 6142 * the kstat may be shared between multiple pathinfo nodes 6143 * decrement this pathinfo's usage, removing the kstats 6144 * themselves when the last pathinfo reference is removed. 6145 */ 6146 ASSERT(mdi_statp->pi_kstat_ref > 0); 6147 if (--mdi_statp->pi_kstat_ref != 0) 6148 return; 6149 6150 kstat_delete(mdi_statp->pi_kstat_iostats); 6151 kstat_delete(mdi_statp->pi_kstat_errstats); 6152 kmem_free(mdi_statp, sizeof (*mdi_statp)); 6153 } 6154 6155 /* 6156 * update I/O paths KSTATS 6157 */ 6158 void 6159 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp) 6160 { 6161 kstat_t *iostatp; 6162 size_t xfer_cnt; 6163 6164 ASSERT(pip != NULL); 6165 6166 /* 6167 * I/O can be driven across a path prior to having path 6168 * statistics available, i.e. probe(9e). 6169 */ 6170 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) { 6171 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats; 6172 xfer_cnt = bp->b_bcount - bp->b_resid; 6173 if (bp->b_flags & B_READ) { 6174 KSTAT_IO_PTR(iostatp)->reads++; 6175 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt; 6176 } else { 6177 KSTAT_IO_PTR(iostatp)->writes++; 6178 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt; 6179 } 6180 } 6181 } 6182 6183 /* 6184 * Enable the path(specific client/target/initiator) 6185 * Enabling a path means that MPxIO may select the enabled path for routing 6186 * future I/O requests, subject to other path state constraints. 6187 */ 6188 int 6189 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags) 6190 { 6191 mdi_phci_t *ph; 6192 6193 ph = MDI_PI(pip)->pi_phci; 6194 if (ph == NULL) { 6195 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6196 "!failed: path %s %p: NULL ph", 6197 mdi_pi_spathname(pip), (void *)pip)); 6198 return (MDI_FAILURE); 6199 } 6200 6201 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags, 6202 MDI_ENABLE_OP); 6203 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6204 "!returning success pip = %p. ph = %p", 6205 (void *)pip, (void *)ph)); 6206 return (MDI_SUCCESS); 6207 6208 } 6209 6210 /* 6211 * Disable the path (specific client/target/initiator) 6212 * Disabling a path means that MPxIO will not select the disabled path for 6213 * routing any new I/O requests. 6214 */ 6215 int 6216 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags) 6217 { 6218 mdi_phci_t *ph; 6219 6220 ph = MDI_PI(pip)->pi_phci; 6221 if (ph == NULL) { 6222 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip), 6223 "!failed: path %s %p: NULL ph", 6224 mdi_pi_spathname(pip), (void *)pip)); 6225 return (MDI_FAILURE); 6226 } 6227 6228 (void) i_mdi_enable_disable_path(pip, 6229 ph->ph_vhci, flags, MDI_DISABLE_OP); 6230 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip, 6231 "!returning success pip = %p. ph = %p", 6232 (void *)pip, (void *)ph)); 6233 return (MDI_SUCCESS); 6234 } 6235 6236 /* 6237 * disable the path to a particular pHCI (pHCI specified in the phci_path 6238 * argument) for a particular client (specified in the client_path argument). 6239 * Disabling a path means that MPxIO will not select the disabled path for 6240 * routing any new I/O requests. 6241 * NOTE: this will be removed once the NWS files are changed to use the new 6242 * mdi_{enable,disable}_path interfaces 6243 */ 6244 int 6245 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6246 { 6247 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP)); 6248 } 6249 6250 /* 6251 * Enable the path to a particular pHCI (pHCI specified in the phci_path 6252 * argument) for a particular client (specified in the client_path argument). 6253 * Enabling a path means that MPxIO may select the enabled path for routing 6254 * future I/O requests, subject to other path state constraints. 6255 * NOTE: this will be removed once the NWS files are changed to use the new 6256 * mdi_{enable,disable}_path interfaces 6257 */ 6258 6259 int 6260 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags) 6261 { 6262 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP)); 6263 } 6264 6265 /* 6266 * Common routine for doing enable/disable. 6267 */ 6268 static mdi_pathinfo_t * 6269 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags, 6270 int op) 6271 { 6272 int sync_flag = 0; 6273 int rv; 6274 mdi_pathinfo_t *next; 6275 int (*f)() = NULL; 6276 6277 /* 6278 * Check to make sure the path is not already in the 6279 * requested state. If it is just return the next path 6280 * as we have nothing to do here. 6281 */ 6282 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) || 6283 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) { 6284 MDI_PI_LOCK(pip); 6285 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6286 MDI_PI_UNLOCK(pip); 6287 return (next); 6288 } 6289 6290 f = vh->vh_ops->vo_pi_state_change; 6291 6292 sync_flag = (flags << 8) & 0xf00; 6293 6294 /* 6295 * Do a callback into the mdi consumer to let it 6296 * know that path is about to get enabled/disabled. 6297 */ 6298 if (f != NULL) { 6299 rv = (*f)(vh->vh_dip, pip, 0, 6300 MDI_PI_EXT_STATE(pip), 6301 MDI_EXT_STATE_CHANGE | sync_flag | 6302 op | MDI_BEFORE_STATE_CHANGE); 6303 if (rv != MDI_SUCCESS) { 6304 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6305 "vo_pi_state_change: failed rv = %x", rv)); 6306 } 6307 } 6308 MDI_PI_LOCK(pip); 6309 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link; 6310 6311 switch (flags) { 6312 case USER_DISABLE: 6313 if (op == MDI_DISABLE_OP) { 6314 MDI_PI_SET_USER_DISABLE(pip); 6315 } else { 6316 MDI_PI_SET_USER_ENABLE(pip); 6317 } 6318 break; 6319 case DRIVER_DISABLE: 6320 if (op == MDI_DISABLE_OP) { 6321 MDI_PI_SET_DRV_DISABLE(pip); 6322 } else { 6323 MDI_PI_SET_DRV_ENABLE(pip); 6324 } 6325 break; 6326 case DRIVER_DISABLE_TRANSIENT: 6327 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) { 6328 MDI_PI_SET_DRV_DISABLE_TRANS(pip); 6329 } else { 6330 MDI_PI_SET_DRV_ENABLE_TRANS(pip); 6331 } 6332 break; 6333 } 6334 MDI_PI_UNLOCK(pip); 6335 /* 6336 * Do a callback into the mdi consumer to let it 6337 * know that path is now enabled/disabled. 6338 */ 6339 if (f != NULL) { 6340 rv = (*f)(vh->vh_dip, pip, 0, 6341 MDI_PI_EXT_STATE(pip), 6342 MDI_EXT_STATE_CHANGE | sync_flag | 6343 op | MDI_AFTER_STATE_CHANGE); 6344 if (rv != MDI_SUCCESS) { 6345 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip, 6346 "vo_pi_state_change failed: rv = %x", rv)); 6347 } 6348 } 6349 return (next); 6350 } 6351 6352 /* 6353 * Common routine for doing enable/disable. 6354 * NOTE: this will be removed once the NWS files are changed to use the new 6355 * mdi_{enable,disable}_path has been putback 6356 */ 6357 int 6358 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op) 6359 { 6360 6361 mdi_phci_t *ph; 6362 mdi_vhci_t *vh = NULL; 6363 mdi_client_t *ct; 6364 mdi_pathinfo_t *next, *pip; 6365 int found_it; 6366 6367 ph = i_devi_get_phci(pdip); 6368 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6369 "!op = %d pdip = %p cdip = %p", op, (void *)pdip, 6370 (void *)cdip)); 6371 if (ph == NULL) { 6372 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6373 "!failed: operation %d: NULL ph", op)); 6374 return (MDI_FAILURE); 6375 } 6376 6377 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) { 6378 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6379 "!failed: invalid operation %d", op)); 6380 return (MDI_FAILURE); 6381 } 6382 6383 vh = ph->ph_vhci; 6384 6385 if (cdip == NULL) { 6386 /* 6387 * Need to mark the Phci as enabled/disabled. 6388 */ 6389 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip, 6390 "op %d for the phci", op)); 6391 MDI_PHCI_LOCK(ph); 6392 switch (flags) { 6393 case USER_DISABLE: 6394 if (op == MDI_DISABLE_OP) { 6395 MDI_PHCI_SET_USER_DISABLE(ph); 6396 } else { 6397 MDI_PHCI_SET_USER_ENABLE(ph); 6398 } 6399 break; 6400 case DRIVER_DISABLE: 6401 if (op == MDI_DISABLE_OP) { 6402 MDI_PHCI_SET_DRV_DISABLE(ph); 6403 } else { 6404 MDI_PHCI_SET_DRV_ENABLE(ph); 6405 } 6406 break; 6407 case DRIVER_DISABLE_TRANSIENT: 6408 if (op == MDI_DISABLE_OP) { 6409 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph); 6410 } else { 6411 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph); 6412 } 6413 break; 6414 default: 6415 MDI_PHCI_UNLOCK(ph); 6416 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6417 "!invalid flag argument= %d", flags)); 6418 } 6419 6420 /* 6421 * Phci has been disabled. Now try to enable/disable 6422 * path info's to each client. 6423 */ 6424 pip = ph->ph_path_head; 6425 while (pip != NULL) { 6426 pip = i_mdi_enable_disable_path(pip, vh, flags, op); 6427 } 6428 MDI_PHCI_UNLOCK(ph); 6429 } else { 6430 6431 /* 6432 * Disable a specific client. 6433 */ 6434 ct = i_devi_get_client(cdip); 6435 if (ct == NULL) { 6436 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6437 "!failed: operation = %d: NULL ct", op)); 6438 return (MDI_FAILURE); 6439 } 6440 6441 MDI_CLIENT_LOCK(ct); 6442 pip = ct->ct_path_head; 6443 found_it = 0; 6444 while (pip != NULL) { 6445 MDI_PI_LOCK(pip); 6446 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6447 if (MDI_PI(pip)->pi_phci == ph) { 6448 MDI_PI_UNLOCK(pip); 6449 found_it = 1; 6450 break; 6451 } 6452 MDI_PI_UNLOCK(pip); 6453 pip = next; 6454 } 6455 6456 6457 MDI_CLIENT_UNLOCK(ct); 6458 if (found_it == 0) { 6459 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip, 6460 "!failed. Could not find corresponding pip\n")); 6461 return (MDI_FAILURE); 6462 } 6463 6464 (void) i_mdi_enable_disable_path(pip, vh, flags, op); 6465 } 6466 6467 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip, 6468 "!op %d returning success pdip = %p cdip = %p", 6469 op, (void *)pdip, (void *)cdip)); 6470 return (MDI_SUCCESS); 6471 } 6472 6473 /* 6474 * Ensure phci powered up 6475 */ 6476 static void 6477 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip) 6478 { 6479 dev_info_t *ph_dip; 6480 6481 ASSERT(pip != NULL); 6482 ASSERT(MDI_PI_LOCKED(pip)); 6483 6484 if (MDI_PI(pip)->pi_pm_held) { 6485 return; 6486 } 6487 6488 ph_dip = mdi_pi_get_phci(pip); 6489 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6490 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6491 if (ph_dip == NULL) { 6492 return; 6493 } 6494 6495 MDI_PI_UNLOCK(pip); 6496 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d", 6497 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6498 pm_hold_power(ph_dip); 6499 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d", 6500 DEVI(ph_dip)->devi_pm_kidsupcnt)); 6501 MDI_PI_LOCK(pip); 6502 6503 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */ 6504 if (DEVI(ph_dip)->devi_pm_info) 6505 MDI_PI(pip)->pi_pm_held = 1; 6506 } 6507 6508 /* 6509 * Allow phci powered down 6510 */ 6511 static void 6512 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip) 6513 { 6514 dev_info_t *ph_dip = NULL; 6515 6516 ASSERT(pip != NULL); 6517 ASSERT(MDI_PI_LOCKED(pip)); 6518 6519 if (MDI_PI(pip)->pi_pm_held == 0) { 6520 return; 6521 } 6522 6523 ph_dip = mdi_pi_get_phci(pip); 6524 ASSERT(ph_dip != NULL); 6525 6526 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6527 "%s %p", mdi_pi_spathname(pip), (void *)pip)); 6528 6529 MDI_PI_UNLOCK(pip); 6530 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6531 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6532 pm_rele_power(ph_dip); 6533 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6534 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt)); 6535 MDI_PI_LOCK(pip); 6536 6537 MDI_PI(pip)->pi_pm_held = 0; 6538 } 6539 6540 static void 6541 i_mdi_pm_hold_client(mdi_client_t *ct, int incr) 6542 { 6543 ASSERT(MDI_CLIENT_LOCKED(ct)); 6544 6545 ct->ct_power_cnt += incr; 6546 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6547 "%p ct_power_cnt = %d incr = %d", 6548 (void *)ct, ct->ct_power_cnt, incr)); 6549 ASSERT(ct->ct_power_cnt >= 0); 6550 } 6551 6552 static void 6553 i_mdi_rele_all_phci(mdi_client_t *ct) 6554 { 6555 mdi_pathinfo_t *pip; 6556 6557 ASSERT(MDI_CLIENT_LOCKED(ct)); 6558 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6559 while (pip != NULL) { 6560 mdi_hold_path(pip); 6561 MDI_PI_LOCK(pip); 6562 i_mdi_pm_rele_pip(pip); 6563 MDI_PI_UNLOCK(pip); 6564 mdi_rele_path(pip); 6565 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6566 } 6567 } 6568 6569 static void 6570 i_mdi_pm_rele_client(mdi_client_t *ct, int decr) 6571 { 6572 ASSERT(MDI_CLIENT_LOCKED(ct)); 6573 6574 if (i_ddi_devi_attached(ct->ct_dip)) { 6575 ct->ct_power_cnt -= decr; 6576 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6577 "%p ct_power_cnt = %d decr = %d", 6578 (void *)ct, ct->ct_power_cnt, decr)); 6579 } 6580 6581 ASSERT(ct->ct_power_cnt >= 0); 6582 if (ct->ct_power_cnt == 0) { 6583 i_mdi_rele_all_phci(ct); 6584 return; 6585 } 6586 } 6587 6588 static void 6589 i_mdi_pm_reset_client(mdi_client_t *ct) 6590 { 6591 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip, 6592 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt)); 6593 ASSERT(MDI_CLIENT_LOCKED(ct)); 6594 ct->ct_power_cnt = 0; 6595 i_mdi_rele_all_phci(ct); 6596 ct->ct_powercnt_config = 0; 6597 ct->ct_powercnt_unconfig = 0; 6598 ct->ct_powercnt_reset = 1; 6599 } 6600 6601 static int 6602 i_mdi_power_one_phci(mdi_pathinfo_t *pip) 6603 { 6604 int ret; 6605 dev_info_t *ph_dip; 6606 6607 MDI_PI_LOCK(pip); 6608 i_mdi_pm_hold_pip(pip); 6609 6610 ph_dip = mdi_pi_get_phci(pip); 6611 MDI_PI_UNLOCK(pip); 6612 6613 /* bring all components of phci to full power */ 6614 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6615 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip), 6616 ddi_get_instance(ph_dip), (void *)pip)); 6617 6618 ret = pm_powerup(ph_dip); 6619 6620 if (ret == DDI_FAILURE) { 6621 MDI_DEBUG(4, (MDI_NOTE, ph_dip, 6622 "pm_powerup FAILED for %s%d %p", 6623 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip), 6624 (void *)pip)); 6625 6626 MDI_PI_LOCK(pip); 6627 i_mdi_pm_rele_pip(pip); 6628 MDI_PI_UNLOCK(pip); 6629 return (MDI_FAILURE); 6630 } 6631 6632 return (MDI_SUCCESS); 6633 } 6634 6635 static int 6636 i_mdi_power_all_phci(mdi_client_t *ct) 6637 { 6638 mdi_pathinfo_t *pip; 6639 int succeeded = 0; 6640 6641 ASSERT(MDI_CLIENT_LOCKED(ct)); 6642 pip = (mdi_pathinfo_t *)ct->ct_path_head; 6643 while (pip != NULL) { 6644 /* 6645 * Don't power if MDI_PATHINFO_STATE_FAULT 6646 * or MDI_PATHINFO_STATE_OFFLINE. 6647 */ 6648 if (MDI_PI_IS_INIT(pip) || 6649 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) { 6650 mdi_hold_path(pip); 6651 MDI_CLIENT_UNLOCK(ct); 6652 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS) 6653 succeeded = 1; 6654 6655 ASSERT(ct == MDI_PI(pip)->pi_client); 6656 MDI_CLIENT_LOCK(ct); 6657 mdi_rele_path(pip); 6658 } 6659 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 6660 } 6661 6662 return (succeeded ? MDI_SUCCESS : MDI_FAILURE); 6663 } 6664 6665 /* 6666 * mdi_bus_power(): 6667 * 1. Place the phci(s) into powered up state so that 6668 * client can do power management 6669 * 2. Ensure phci powered up as client power managing 6670 * Return Values: 6671 * MDI_SUCCESS 6672 * MDI_FAILURE 6673 */ 6674 int 6675 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op, 6676 void *arg, void *result) 6677 { 6678 int ret = MDI_SUCCESS; 6679 pm_bp_child_pwrchg_t *bpc; 6680 mdi_client_t *ct; 6681 dev_info_t *cdip; 6682 pm_bp_has_changed_t *bphc; 6683 6684 /* 6685 * BUS_POWER_NOINVOL not supported 6686 */ 6687 if (op == BUS_POWER_NOINVOL) 6688 return (MDI_FAILURE); 6689 6690 /* 6691 * ignore other OPs. 6692 * return quickly to save cou cycles on the ct processing 6693 */ 6694 switch (op) { 6695 case BUS_POWER_PRE_NOTIFICATION: 6696 case BUS_POWER_POST_NOTIFICATION: 6697 bpc = (pm_bp_child_pwrchg_t *)arg; 6698 cdip = bpc->bpc_dip; 6699 break; 6700 case BUS_POWER_HAS_CHANGED: 6701 bphc = (pm_bp_has_changed_t *)arg; 6702 cdip = bphc->bphc_dip; 6703 break; 6704 default: 6705 return (pm_busop_bus_power(parent, impl_arg, op, arg, result)); 6706 } 6707 6708 ASSERT(MDI_CLIENT(cdip)); 6709 6710 ct = i_devi_get_client(cdip); 6711 if (ct == NULL) 6712 return (MDI_FAILURE); 6713 6714 /* 6715 * wait till the mdi_pathinfo node state change are processed 6716 */ 6717 MDI_CLIENT_LOCK(ct); 6718 switch (op) { 6719 case BUS_POWER_PRE_NOTIFICATION: 6720 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6721 "BUS_POWER_PRE_NOTIFICATION:" 6722 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6723 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6724 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp)); 6725 6726 /* serialize power level change per client */ 6727 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6728 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6729 6730 MDI_CLIENT_SET_POWER_TRANSITION(ct); 6731 6732 if (ct->ct_power_cnt == 0) { 6733 ret = i_mdi_power_all_phci(ct); 6734 } 6735 6736 /* 6737 * if new_level > 0: 6738 * - hold phci(s) 6739 * - power up phci(s) if not already 6740 * ignore power down 6741 */ 6742 if (bpc->bpc_nlevel > 0) { 6743 if (!DEVI_IS_ATTACHING(ct->ct_dip)) { 6744 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6745 "i_mdi_pm_hold_client\n")); 6746 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6747 } 6748 } 6749 break; 6750 case BUS_POWER_POST_NOTIFICATION: 6751 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6752 "BUS_POWER_POST_NOTIFICATION:" 6753 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d", 6754 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip), 6755 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp, 6756 *(int *)result)); 6757 6758 if (*(int *)result == DDI_SUCCESS) { 6759 if (bpc->bpc_nlevel > 0) { 6760 MDI_CLIENT_SET_POWER_UP(ct); 6761 } else { 6762 MDI_CLIENT_SET_POWER_DOWN(ct); 6763 } 6764 } 6765 6766 /* release the hold we did in pre-notification */ 6767 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) && 6768 !DEVI_IS_ATTACHING(ct->ct_dip)) { 6769 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6770 "i_mdi_pm_rele_client\n")); 6771 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6772 } 6773 6774 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) { 6775 /* another thread might started attaching */ 6776 if (DEVI_IS_ATTACHING(ct->ct_dip)) { 6777 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6778 "i_mdi_pm_rele_client\n")); 6779 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6780 /* detaching has been taken care in pm_post_unconfig */ 6781 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) { 6782 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip, 6783 "i_mdi_pm_reset_client\n")); 6784 i_mdi_pm_reset_client(ct); 6785 } 6786 } 6787 6788 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct); 6789 cv_broadcast(&ct->ct_powerchange_cv); 6790 6791 break; 6792 6793 /* need to do more */ 6794 case BUS_POWER_HAS_CHANGED: 6795 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6796 "BUS_POWER_HAS_CHANGED:" 6797 "%s@%s, olevel=%d, nlevel=%d, comp=%d", 6798 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip), 6799 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp)); 6800 6801 if (bphc->bphc_nlevel > 0 && 6802 bphc->bphc_nlevel > bphc->bphc_olevel) { 6803 if (ct->ct_power_cnt == 0) { 6804 ret = i_mdi_power_all_phci(ct); 6805 } 6806 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6807 "i_mdi_pm_hold_client\n")); 6808 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6809 } 6810 6811 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) { 6812 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip, 6813 "i_mdi_pm_rele_client\n")); 6814 i_mdi_pm_rele_client(ct, ct->ct_path_count); 6815 } 6816 break; 6817 } 6818 6819 MDI_CLIENT_UNLOCK(ct); 6820 return (ret); 6821 } 6822 6823 static int 6824 i_mdi_pm_pre_config_one(dev_info_t *child) 6825 { 6826 int ret = MDI_SUCCESS; 6827 mdi_client_t *ct; 6828 6829 ct = i_devi_get_client(child); 6830 if (ct == NULL) 6831 return (MDI_FAILURE); 6832 6833 MDI_CLIENT_LOCK(ct); 6834 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6835 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6836 6837 if (!MDI_CLIENT_IS_FAILED(ct)) { 6838 MDI_CLIENT_UNLOCK(ct); 6839 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n")); 6840 return (MDI_SUCCESS); 6841 } 6842 6843 if (ct->ct_powercnt_config) { 6844 MDI_CLIENT_UNLOCK(ct); 6845 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n")); 6846 return (MDI_SUCCESS); 6847 } 6848 6849 if (ct->ct_power_cnt == 0) { 6850 ret = i_mdi_power_all_phci(ct); 6851 } 6852 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6853 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6854 ct->ct_powercnt_config = 1; 6855 ct->ct_powercnt_reset = 0; 6856 MDI_CLIENT_UNLOCK(ct); 6857 return (ret); 6858 } 6859 6860 static int 6861 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child) 6862 { 6863 int ret = MDI_SUCCESS; 6864 dev_info_t *cdip; 6865 int circ; 6866 6867 ASSERT(MDI_VHCI(vdip)); 6868 6869 /* ndi_devi_config_one */ 6870 if (child) { 6871 ASSERT(DEVI_BUSY_OWNED(vdip)); 6872 return (i_mdi_pm_pre_config_one(child)); 6873 } 6874 6875 /* devi_config_common */ 6876 ndi_devi_enter(vdip, &circ); 6877 cdip = ddi_get_child(vdip); 6878 while (cdip) { 6879 dev_info_t *next = ddi_get_next_sibling(cdip); 6880 6881 ret = i_mdi_pm_pre_config_one(cdip); 6882 if (ret != MDI_SUCCESS) 6883 break; 6884 cdip = next; 6885 } 6886 ndi_devi_exit(vdip, circ); 6887 return (ret); 6888 } 6889 6890 static int 6891 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags) 6892 { 6893 int ret = MDI_SUCCESS; 6894 mdi_client_t *ct; 6895 6896 ct = i_devi_get_client(child); 6897 if (ct == NULL) 6898 return (MDI_FAILURE); 6899 6900 MDI_CLIENT_LOCK(ct); 6901 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6902 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6903 6904 if (!i_ddi_devi_attached(child)) { 6905 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n")); 6906 MDI_CLIENT_UNLOCK(ct); 6907 return (MDI_SUCCESS); 6908 } 6909 6910 if (MDI_CLIENT_IS_POWERED_DOWN(ct) && 6911 (flags & NDI_AUTODETACH)) { 6912 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n")); 6913 MDI_CLIENT_UNLOCK(ct); 6914 return (MDI_FAILURE); 6915 } 6916 6917 if (ct->ct_powercnt_unconfig) { 6918 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n")); 6919 MDI_CLIENT_UNLOCK(ct); 6920 *held = 1; 6921 return (MDI_SUCCESS); 6922 } 6923 6924 if (ct->ct_power_cnt == 0) { 6925 ret = i_mdi_power_all_phci(ct); 6926 } 6927 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n")); 6928 i_mdi_pm_hold_client(ct, ct->ct_path_count); 6929 ct->ct_powercnt_unconfig = 1; 6930 ct->ct_powercnt_reset = 0; 6931 MDI_CLIENT_UNLOCK(ct); 6932 if (ret == MDI_SUCCESS) 6933 *held = 1; 6934 return (ret); 6935 } 6936 6937 static int 6938 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held, 6939 int flags) 6940 { 6941 int ret = MDI_SUCCESS; 6942 dev_info_t *cdip; 6943 int circ; 6944 6945 ASSERT(MDI_VHCI(vdip)); 6946 *held = 0; 6947 6948 /* ndi_devi_unconfig_one */ 6949 if (child) { 6950 ASSERT(DEVI_BUSY_OWNED(vdip)); 6951 return (i_mdi_pm_pre_unconfig_one(child, held, flags)); 6952 } 6953 6954 /* devi_unconfig_common */ 6955 ndi_devi_enter(vdip, &circ); 6956 cdip = ddi_get_child(vdip); 6957 while (cdip) { 6958 dev_info_t *next = ddi_get_next_sibling(cdip); 6959 6960 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags); 6961 cdip = next; 6962 } 6963 ndi_devi_exit(vdip, circ); 6964 6965 if (*held) 6966 ret = MDI_SUCCESS; 6967 6968 return (ret); 6969 } 6970 6971 static void 6972 i_mdi_pm_post_config_one(dev_info_t *child) 6973 { 6974 mdi_client_t *ct; 6975 6976 ct = i_devi_get_client(child); 6977 if (ct == NULL) 6978 return; 6979 6980 MDI_CLIENT_LOCK(ct); 6981 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 6982 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 6983 6984 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) { 6985 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n")); 6986 MDI_CLIENT_UNLOCK(ct); 6987 return; 6988 } 6989 6990 /* client has not been updated */ 6991 if (MDI_CLIENT_IS_FAILED(ct)) { 6992 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n")); 6993 MDI_CLIENT_UNLOCK(ct); 6994 return; 6995 } 6996 6997 /* another thread might have powered it down or detached it */ 6998 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 6999 !DEVI_IS_ATTACHING(child)) || 7000 (!i_ddi_devi_attached(child) && 7001 !DEVI_IS_ATTACHING(child))) { 7002 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 7003 i_mdi_pm_reset_client(ct); 7004 } else { 7005 mdi_pathinfo_t *pip, *next; 7006 int valid_path_count = 0; 7007 7008 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 7009 pip = ct->ct_path_head; 7010 while (pip != NULL) { 7011 MDI_PI_LOCK(pip); 7012 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 7013 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 7014 valid_path_count ++; 7015 MDI_PI_UNLOCK(pip); 7016 pip = next; 7017 } 7018 i_mdi_pm_rele_client(ct, valid_path_count); 7019 } 7020 ct->ct_powercnt_config = 0; 7021 MDI_CLIENT_UNLOCK(ct); 7022 } 7023 7024 static void 7025 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child) 7026 { 7027 int circ; 7028 dev_info_t *cdip; 7029 7030 ASSERT(MDI_VHCI(vdip)); 7031 7032 /* ndi_devi_config_one */ 7033 if (child) { 7034 ASSERT(DEVI_BUSY_OWNED(vdip)); 7035 i_mdi_pm_post_config_one(child); 7036 return; 7037 } 7038 7039 /* devi_config_common */ 7040 ndi_devi_enter(vdip, &circ); 7041 cdip = ddi_get_child(vdip); 7042 while (cdip) { 7043 dev_info_t *next = ddi_get_next_sibling(cdip); 7044 7045 i_mdi_pm_post_config_one(cdip); 7046 cdip = next; 7047 } 7048 ndi_devi_exit(vdip, circ); 7049 } 7050 7051 static void 7052 i_mdi_pm_post_unconfig_one(dev_info_t *child) 7053 { 7054 mdi_client_t *ct; 7055 7056 ct = i_devi_get_client(child); 7057 if (ct == NULL) 7058 return; 7059 7060 MDI_CLIENT_LOCK(ct); 7061 while (MDI_CLIENT_IS_POWER_TRANSITION(ct)) 7062 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex); 7063 7064 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) { 7065 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n")); 7066 MDI_CLIENT_UNLOCK(ct); 7067 return; 7068 } 7069 7070 /* failure detaching or another thread just attached it */ 7071 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) && 7072 i_ddi_devi_attached(child)) || 7073 (!i_ddi_devi_attached(child) && 7074 !DEVI_IS_ATTACHING(child))) { 7075 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n")); 7076 i_mdi_pm_reset_client(ct); 7077 } else { 7078 mdi_pathinfo_t *pip, *next; 7079 int valid_path_count = 0; 7080 7081 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n")); 7082 pip = ct->ct_path_head; 7083 while (pip != NULL) { 7084 MDI_PI_LOCK(pip); 7085 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link; 7086 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) 7087 valid_path_count ++; 7088 MDI_PI_UNLOCK(pip); 7089 pip = next; 7090 } 7091 i_mdi_pm_rele_client(ct, valid_path_count); 7092 ct->ct_powercnt_unconfig = 0; 7093 } 7094 7095 MDI_CLIENT_UNLOCK(ct); 7096 } 7097 7098 static void 7099 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held) 7100 { 7101 int circ; 7102 dev_info_t *cdip; 7103 7104 ASSERT(MDI_VHCI(vdip)); 7105 7106 if (!held) { 7107 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held)); 7108 return; 7109 } 7110 7111 if (child) { 7112 ASSERT(DEVI_BUSY_OWNED(vdip)); 7113 i_mdi_pm_post_unconfig_one(child); 7114 return; 7115 } 7116 7117 ndi_devi_enter(vdip, &circ); 7118 cdip = ddi_get_child(vdip); 7119 while (cdip) { 7120 dev_info_t *next = ddi_get_next_sibling(cdip); 7121 7122 i_mdi_pm_post_unconfig_one(cdip); 7123 cdip = next; 7124 } 7125 ndi_devi_exit(vdip, circ); 7126 } 7127 7128 int 7129 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags) 7130 { 7131 int circ, ret = MDI_SUCCESS; 7132 dev_info_t *client_dip = NULL; 7133 mdi_client_t *ct; 7134 7135 /* 7136 * Handling ndi_devi_config_one and ndi_devi_unconfig_one. 7137 * Power up pHCI for the named client device. 7138 * Note: Before the client is enumerated under vhci by phci, 7139 * client_dip can be NULL. Then proceed to power up all the 7140 * pHCIs. 7141 */ 7142 if (devnm != NULL) { 7143 ndi_devi_enter(vdip, &circ); 7144 client_dip = ndi_devi_findchild(vdip, devnm); 7145 } 7146 7147 MDI_DEBUG(4, (MDI_NOTE, vdip, 7148 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip)); 7149 7150 switch (op) { 7151 case MDI_PM_PRE_CONFIG: 7152 ret = i_mdi_pm_pre_config(vdip, client_dip); 7153 break; 7154 7155 case MDI_PM_PRE_UNCONFIG: 7156 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args, 7157 flags); 7158 break; 7159 7160 case MDI_PM_POST_CONFIG: 7161 i_mdi_pm_post_config(vdip, client_dip); 7162 break; 7163 7164 case MDI_PM_POST_UNCONFIG: 7165 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args); 7166 break; 7167 7168 case MDI_PM_HOLD_POWER: 7169 case MDI_PM_RELE_POWER: 7170 ASSERT(args); 7171 7172 client_dip = (dev_info_t *)args; 7173 ASSERT(MDI_CLIENT(client_dip)); 7174 7175 ct = i_devi_get_client(client_dip); 7176 MDI_CLIENT_LOCK(ct); 7177 7178 if (op == MDI_PM_HOLD_POWER) { 7179 if (ct->ct_power_cnt == 0) { 7180 (void) i_mdi_power_all_phci(ct); 7181 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7182 "i_mdi_pm_hold_client\n")); 7183 i_mdi_pm_hold_client(ct, ct->ct_path_count); 7184 } 7185 } else { 7186 if (DEVI_IS_ATTACHING(client_dip)) { 7187 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7188 "i_mdi_pm_rele_client\n")); 7189 i_mdi_pm_rele_client(ct, ct->ct_path_count); 7190 } else { 7191 MDI_DEBUG(4, (MDI_NOTE, client_dip, 7192 "i_mdi_pm_reset_client\n")); 7193 i_mdi_pm_reset_client(ct); 7194 } 7195 } 7196 7197 MDI_CLIENT_UNLOCK(ct); 7198 break; 7199 7200 default: 7201 break; 7202 } 7203 7204 if (devnm) 7205 ndi_devi_exit(vdip, circ); 7206 7207 return (ret); 7208 } 7209 7210 int 7211 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class) 7212 { 7213 mdi_vhci_t *vhci; 7214 7215 if (!MDI_VHCI(dip)) 7216 return (MDI_FAILURE); 7217 7218 if (mdi_class) { 7219 vhci = DEVI(dip)->devi_mdi_xhci; 7220 ASSERT(vhci); 7221 *mdi_class = vhci->vh_class; 7222 } 7223 7224 return (MDI_SUCCESS); 7225 } 7226 7227 int 7228 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class) 7229 { 7230 mdi_phci_t *phci; 7231 7232 if (!MDI_PHCI(dip)) 7233 return (MDI_FAILURE); 7234 7235 if (mdi_class) { 7236 phci = DEVI(dip)->devi_mdi_xhci; 7237 ASSERT(phci); 7238 *mdi_class = phci->ph_vhci->vh_class; 7239 } 7240 7241 return (MDI_SUCCESS); 7242 } 7243 7244 int 7245 mdi_component_is_client(dev_info_t *dip, const char **mdi_class) 7246 { 7247 mdi_client_t *client; 7248 7249 if (!MDI_CLIENT(dip)) 7250 return (MDI_FAILURE); 7251 7252 if (mdi_class) { 7253 client = DEVI(dip)->devi_mdi_client; 7254 ASSERT(client); 7255 *mdi_class = client->ct_vhci->vh_class; 7256 } 7257 7258 return (MDI_SUCCESS); 7259 } 7260 7261 void * 7262 mdi_client_get_vhci_private(dev_info_t *dip) 7263 { 7264 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7265 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7266 mdi_client_t *ct; 7267 ct = i_devi_get_client(dip); 7268 return (ct->ct_vprivate); 7269 } 7270 return (NULL); 7271 } 7272 7273 void 7274 mdi_client_set_vhci_private(dev_info_t *dip, void *data) 7275 { 7276 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS); 7277 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) { 7278 mdi_client_t *ct; 7279 ct = i_devi_get_client(dip); 7280 ct->ct_vprivate = data; 7281 } 7282 } 7283 /* 7284 * mdi_pi_get_vhci_private(): 7285 * Get the vhci private information associated with the 7286 * mdi_pathinfo node 7287 */ 7288 void * 7289 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip) 7290 { 7291 caddr_t vprivate = NULL; 7292 if (pip) { 7293 vprivate = MDI_PI(pip)->pi_vprivate; 7294 } 7295 return (vprivate); 7296 } 7297 7298 /* 7299 * mdi_pi_set_vhci_private(): 7300 * Set the vhci private information in the mdi_pathinfo node 7301 */ 7302 void 7303 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv) 7304 { 7305 if (pip) { 7306 MDI_PI(pip)->pi_vprivate = priv; 7307 } 7308 } 7309 7310 /* 7311 * mdi_phci_get_vhci_private(): 7312 * Get the vhci private information associated with the 7313 * mdi_phci node 7314 */ 7315 void * 7316 mdi_phci_get_vhci_private(dev_info_t *dip) 7317 { 7318 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7319 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7320 mdi_phci_t *ph; 7321 ph = i_devi_get_phci(dip); 7322 return (ph->ph_vprivate); 7323 } 7324 return (NULL); 7325 } 7326 7327 /* 7328 * mdi_phci_set_vhci_private(): 7329 * Set the vhci private information in the mdi_phci node 7330 */ 7331 void 7332 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv) 7333 { 7334 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS); 7335 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) { 7336 mdi_phci_t *ph; 7337 ph = i_devi_get_phci(dip); 7338 ph->ph_vprivate = priv; 7339 } 7340 } 7341 7342 int 7343 mdi_pi_ishidden(mdi_pathinfo_t *pip) 7344 { 7345 return (MDI_PI_FLAGS_IS_HIDDEN(pip)); 7346 } 7347 7348 int 7349 mdi_pi_device_isremoved(mdi_pathinfo_t *pip) 7350 { 7351 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)); 7352 } 7353 7354 /* Return 1 if all client paths are device_removed */ 7355 static int 7356 i_mdi_client_all_devices_removed(mdi_client_t *ct) 7357 { 7358 mdi_pathinfo_t *pip; 7359 int all_devices_removed = 1; 7360 7361 MDI_CLIENT_LOCK(ct); 7362 for (pip = ct->ct_path_head; pip; 7363 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) { 7364 if (!mdi_pi_device_isremoved(pip)) { 7365 all_devices_removed = 0; 7366 break; 7367 } 7368 } 7369 MDI_CLIENT_UNLOCK(ct); 7370 return (all_devices_removed); 7371 } 7372 7373 /* 7374 * When processing path hotunplug, represent device removal. 7375 */ 7376 int 7377 mdi_pi_device_remove(mdi_pathinfo_t *pip) 7378 { 7379 mdi_client_t *ct; 7380 7381 MDI_PI_LOCK(pip); 7382 if (mdi_pi_device_isremoved(pip)) { 7383 MDI_PI_UNLOCK(pip); 7384 return (0); 7385 } 7386 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip); 7387 MDI_PI_FLAGS_SET_HIDDEN(pip); 7388 MDI_PI_UNLOCK(pip); 7389 7390 /* 7391 * If all paths associated with the client are now DEVICE_REMOVED, 7392 * reflect DEVICE_REMOVED in the client. 7393 */ 7394 ct = MDI_PI(pip)->pi_client; 7395 if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct)) 7396 (void) ndi_devi_device_remove(ct->ct_dip); 7397 else 7398 i_ddi_di_cache_invalidate(); 7399 7400 return (1); 7401 } 7402 7403 /* 7404 * When processing hotplug, if a path marked mdi_pi_device_isremoved() 7405 * is now accessible then this interfaces is used to represent device insertion. 7406 */ 7407 int 7408 mdi_pi_device_insert(mdi_pathinfo_t *pip) 7409 { 7410 MDI_PI_LOCK(pip); 7411 if (!mdi_pi_device_isremoved(pip)) { 7412 MDI_PI_UNLOCK(pip); 7413 return (0); 7414 } 7415 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip); 7416 MDI_PI_FLAGS_CLR_HIDDEN(pip); 7417 MDI_PI_UNLOCK(pip); 7418 7419 i_ddi_di_cache_invalidate(); 7420 7421 return (1); 7422 } 7423 7424 /* 7425 * List of vhci class names: 7426 * A vhci class name must be in this list only if the corresponding vhci 7427 * driver intends to use the mdi provided bus config implementation 7428 * (i.e., mdi_vhci_bus_config()). 7429 */ 7430 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB }; 7431 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *)) 7432 7433 /* 7434 * During boot time, the on-disk vhci cache for every vhci class is read 7435 * in the form of an nvlist and stored here. 7436 */ 7437 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES]; 7438 7439 /* nvpair names in vhci cache nvlist */ 7440 #define MDI_VHCI_CACHE_VERSION 1 7441 #define MDI_NVPNAME_VERSION "version" 7442 #define MDI_NVPNAME_PHCIS "phcis" 7443 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap" 7444 7445 /* 7446 * Given vhci class name, return its on-disk vhci cache filename. 7447 * Memory for the returned filename which includes the full path is allocated 7448 * by this function. 7449 */ 7450 static char * 7451 vhclass2vhcache_filename(char *vhclass) 7452 { 7453 char *filename; 7454 int len; 7455 static char *fmt = "/etc/devices/mdi_%s_cache"; 7456 7457 /* 7458 * fmt contains the on-disk vhci cache file name format; 7459 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache". 7460 */ 7461 7462 /* the -1 below is to account for "%s" in the format string */ 7463 len = strlen(fmt) + strlen(vhclass) - 1; 7464 filename = kmem_alloc(len, KM_SLEEP); 7465 (void) snprintf(filename, len, fmt, vhclass); 7466 ASSERT(len == (strlen(filename) + 1)); 7467 return (filename); 7468 } 7469 7470 /* 7471 * initialize the vhci cache related data structures and read the on-disk 7472 * vhci cached data into memory. 7473 */ 7474 static void 7475 setup_vhci_cache(mdi_vhci_t *vh) 7476 { 7477 mdi_vhci_config_t *vhc; 7478 mdi_vhci_cache_t *vhcache; 7479 int i; 7480 nvlist_t *nvl = NULL; 7481 7482 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP); 7483 vh->vh_config = vhc; 7484 vhcache = &vhc->vhc_vhcache; 7485 7486 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class); 7487 7488 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL); 7489 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL); 7490 7491 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL); 7492 7493 /* 7494 * Create string hash; same as mod_hash_create_strhash() except that 7495 * we use NULL key destructor. 7496 */ 7497 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class, 7498 mdi_bus_config_cache_hash_size, 7499 mod_hash_null_keydtor, mod_hash_null_valdtor, 7500 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); 7501 7502 /* 7503 * The on-disk vhci cache is read during booting prior to the 7504 * lights-out period by mdi_read_devices_files(). 7505 */ 7506 for (i = 0; i < N_VHCI_CLASSES; i++) { 7507 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) { 7508 nvl = vhcache_nvl[i]; 7509 vhcache_nvl[i] = NULL; 7510 break; 7511 } 7512 } 7513 7514 /* 7515 * this is to cover the case of some one manually causing unloading 7516 * (or detaching) and reloading (or attaching) of a vhci driver. 7517 */ 7518 if (nvl == NULL && modrootloaded) 7519 nvl = read_on_disk_vhci_cache(vh->vh_class); 7520 7521 if (nvl != NULL) { 7522 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 7523 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS) 7524 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 7525 else { 7526 cmn_err(CE_WARN, 7527 "%s: data file corrupted, will recreate", 7528 vhc->vhc_vhcache_filename); 7529 } 7530 rw_exit(&vhcache->vhcache_lock); 7531 nvlist_free(nvl); 7532 } 7533 7534 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc, 7535 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush"); 7536 7537 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot; 7538 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot; 7539 } 7540 7541 /* 7542 * free all vhci cache related resources 7543 */ 7544 static int 7545 destroy_vhci_cache(mdi_vhci_t *vh) 7546 { 7547 mdi_vhci_config_t *vhc = vh->vh_config; 7548 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 7549 mdi_vhcache_phci_t *cphci, *cphci_next; 7550 mdi_vhcache_client_t *cct, *cct_next; 7551 mdi_vhcache_pathinfo_t *cpi, *cpi_next; 7552 7553 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS) 7554 return (MDI_FAILURE); 7555 7556 kmem_free(vhc->vhc_vhcache_filename, 7557 strlen(vhc->vhc_vhcache_filename) + 1); 7558 7559 mod_hash_destroy_strhash(vhcache->vhcache_client_hash); 7560 7561 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 7562 cphci = cphci_next) { 7563 cphci_next = cphci->cphci_next; 7564 free_vhcache_phci(cphci); 7565 } 7566 7567 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) { 7568 cct_next = cct->cct_next; 7569 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) { 7570 cpi_next = cpi->cpi_next; 7571 free_vhcache_pathinfo(cpi); 7572 } 7573 free_vhcache_client(cct); 7574 } 7575 7576 rw_destroy(&vhcache->vhcache_lock); 7577 7578 mutex_destroy(&vhc->vhc_lock); 7579 cv_destroy(&vhc->vhc_cv); 7580 kmem_free(vhc, sizeof (mdi_vhci_config_t)); 7581 return (MDI_SUCCESS); 7582 } 7583 7584 /* 7585 * Stop all vhci cache related async threads and free their resources. 7586 */ 7587 static int 7588 stop_vhcache_async_threads(mdi_vhci_config_t *vhc) 7589 { 7590 mdi_async_client_config_t *acc, *acc_next; 7591 7592 mutex_enter(&vhc->vhc_lock); 7593 vhc->vhc_flags |= MDI_VHC_EXIT; 7594 ASSERT(vhc->vhc_acc_thrcount >= 0); 7595 cv_broadcast(&vhc->vhc_cv); 7596 7597 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) || 7598 vhc->vhc_acc_thrcount != 0) { 7599 mutex_exit(&vhc->vhc_lock); 7600 delay_random(mdi_delay); 7601 mutex_enter(&vhc->vhc_lock); 7602 } 7603 7604 vhc->vhc_flags &= ~MDI_VHC_EXIT; 7605 7606 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) { 7607 acc_next = acc->acc_next; 7608 free_async_client_config(acc); 7609 } 7610 vhc->vhc_acc_list_head = NULL; 7611 vhc->vhc_acc_list_tail = NULL; 7612 vhc->vhc_acc_count = 0; 7613 7614 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7615 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7616 mutex_exit(&vhc->vhc_lock); 7617 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) { 7618 vhcache_dirty(vhc); 7619 return (MDI_FAILURE); 7620 } 7621 } else 7622 mutex_exit(&vhc->vhc_lock); 7623 7624 if (callb_delete(vhc->vhc_cbid) != 0) 7625 return (MDI_FAILURE); 7626 7627 return (MDI_SUCCESS); 7628 } 7629 7630 /* 7631 * Stop vhci cache flush thread 7632 */ 7633 /* ARGSUSED */ 7634 static boolean_t 7635 stop_vhcache_flush_thread(void *arg, int code) 7636 { 7637 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 7638 7639 mutex_enter(&vhc->vhc_lock); 7640 vhc->vhc_flags |= MDI_VHC_EXIT; 7641 cv_broadcast(&vhc->vhc_cv); 7642 7643 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 7644 mutex_exit(&vhc->vhc_lock); 7645 delay_random(mdi_delay); 7646 mutex_enter(&vhc->vhc_lock); 7647 } 7648 7649 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) { 7650 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 7651 mutex_exit(&vhc->vhc_lock); 7652 (void) flush_vhcache(vhc, 1); 7653 } else 7654 mutex_exit(&vhc->vhc_lock); 7655 7656 return (B_TRUE); 7657 } 7658 7659 /* 7660 * Enqueue the vhcache phci (cphci) at the tail of the list 7661 */ 7662 static void 7663 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci) 7664 { 7665 cphci->cphci_next = NULL; 7666 if (vhcache->vhcache_phci_head == NULL) 7667 vhcache->vhcache_phci_head = cphci; 7668 else 7669 vhcache->vhcache_phci_tail->cphci_next = cphci; 7670 vhcache->vhcache_phci_tail = cphci; 7671 } 7672 7673 /* 7674 * Enqueue the vhcache pathinfo (cpi) at the tail of the list 7675 */ 7676 static void 7677 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7678 mdi_vhcache_pathinfo_t *cpi) 7679 { 7680 cpi->cpi_next = NULL; 7681 if (cct->cct_cpi_head == NULL) 7682 cct->cct_cpi_head = cpi; 7683 else 7684 cct->cct_cpi_tail->cpi_next = cpi; 7685 cct->cct_cpi_tail = cpi; 7686 } 7687 7688 /* 7689 * Enqueue the vhcache pathinfo (cpi) at the correct location in the 7690 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 7691 * flag set come at the beginning of the list. All cpis which have this 7692 * flag set come at the end of the list. 7693 */ 7694 static void 7695 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct, 7696 mdi_vhcache_pathinfo_t *newcpi) 7697 { 7698 mdi_vhcache_pathinfo_t *cpi, *prev_cpi; 7699 7700 if (cct->cct_cpi_head == NULL || 7701 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) 7702 enqueue_tail_vhcache_pathinfo(cct, newcpi); 7703 else { 7704 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL && 7705 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST); 7706 prev_cpi = cpi, cpi = cpi->cpi_next) 7707 ; 7708 7709 if (prev_cpi == NULL) 7710 cct->cct_cpi_head = newcpi; 7711 else 7712 prev_cpi->cpi_next = newcpi; 7713 7714 newcpi->cpi_next = cpi; 7715 7716 if (cpi == NULL) 7717 cct->cct_cpi_tail = newcpi; 7718 } 7719 } 7720 7721 /* 7722 * Enqueue the vhcache client (cct) at the tail of the list 7723 */ 7724 static void 7725 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache, 7726 mdi_vhcache_client_t *cct) 7727 { 7728 cct->cct_next = NULL; 7729 if (vhcache->vhcache_client_head == NULL) 7730 vhcache->vhcache_client_head = cct; 7731 else 7732 vhcache->vhcache_client_tail->cct_next = cct; 7733 vhcache->vhcache_client_tail = cct; 7734 } 7735 7736 static void 7737 free_string_array(char **str, int nelem) 7738 { 7739 int i; 7740 7741 if (str) { 7742 for (i = 0; i < nelem; i++) { 7743 if (str[i]) 7744 kmem_free(str[i], strlen(str[i]) + 1); 7745 } 7746 kmem_free(str, sizeof (char *) * nelem); 7747 } 7748 } 7749 7750 static void 7751 free_vhcache_phci(mdi_vhcache_phci_t *cphci) 7752 { 7753 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1); 7754 kmem_free(cphci, sizeof (*cphci)); 7755 } 7756 7757 static void 7758 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi) 7759 { 7760 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1); 7761 kmem_free(cpi, sizeof (*cpi)); 7762 } 7763 7764 static void 7765 free_vhcache_client(mdi_vhcache_client_t *cct) 7766 { 7767 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1); 7768 kmem_free(cct, sizeof (*cct)); 7769 } 7770 7771 static char * 7772 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len) 7773 { 7774 char *name_addr; 7775 int len; 7776 7777 len = strlen(ct_name) + strlen(ct_addr) + 2; 7778 name_addr = kmem_alloc(len, KM_SLEEP); 7779 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr); 7780 7781 if (ret_len) 7782 *ret_len = len; 7783 return (name_addr); 7784 } 7785 7786 /* 7787 * Copy the contents of paddrnvl to vhci cache. 7788 * paddrnvl nvlist contains path information for a vhci client. 7789 * See the comment in mainnvl_to_vhcache() for the format of this nvlist. 7790 */ 7791 static void 7792 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[], 7793 mdi_vhcache_client_t *cct) 7794 { 7795 nvpair_t *nvp = NULL; 7796 mdi_vhcache_pathinfo_t *cpi; 7797 uint_t nelem; 7798 uint32_t *val; 7799 7800 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7801 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY); 7802 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 7803 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7804 (void) nvpair_value_uint32_array(nvp, &val, &nelem); 7805 ASSERT(nelem == 2); 7806 cpi->cpi_cphci = cphci_list[val[0]]; 7807 cpi->cpi_flags = val[1]; 7808 enqueue_tail_vhcache_pathinfo(cct, cpi); 7809 } 7810 } 7811 7812 /* 7813 * Copy the contents of caddrmapnvl to vhci cache. 7814 * caddrmapnvl nvlist contains vhci client address to phci client address 7815 * mappings. See the comment in mainnvl_to_vhcache() for the format of 7816 * this nvlist. 7817 */ 7818 static void 7819 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl, 7820 mdi_vhcache_phci_t *cphci_list[]) 7821 { 7822 nvpair_t *nvp = NULL; 7823 nvlist_t *paddrnvl; 7824 mdi_vhcache_client_t *cct; 7825 7826 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) { 7827 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST); 7828 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 7829 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP); 7830 (void) nvpair_value_nvlist(nvp, &paddrnvl); 7831 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct); 7832 /* the client must contain at least one path */ 7833 ASSERT(cct->cct_cpi_head != NULL); 7834 7835 enqueue_vhcache_client(vhcache, cct); 7836 (void) mod_hash_insert(vhcache->vhcache_client_hash, 7837 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 7838 } 7839 } 7840 7841 /* 7842 * Copy the contents of the main nvlist to vhci cache. 7843 * 7844 * VHCI busconfig cached data is stored in the form of a nvlist on the disk. 7845 * The nvlist contains the mappings between the vhci client addresses and 7846 * their corresponding phci client addresses. 7847 * 7848 * The structure of the nvlist is as follows: 7849 * 7850 * Main nvlist: 7851 * NAME TYPE DATA 7852 * version int32 version number 7853 * phcis string array array of phci paths 7854 * clientaddrmap nvlist_t c2paddrs_nvl (see below) 7855 * 7856 * structure of c2paddrs_nvl: 7857 * NAME TYPE DATA 7858 * caddr1 nvlist_t paddrs_nvl1 7859 * caddr2 nvlist_t paddrs_nvl2 7860 * ... 7861 * where caddr1, caddr2, ... are vhci client name and addresses in the 7862 * form of "<clientname>@<clientaddress>". 7863 * (for example: "ssd@2000002037cd9f72"); 7864 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information. 7865 * 7866 * structure of paddrs_nvl: 7867 * NAME TYPE DATA 7868 * pi_addr1 uint32_array (phci-id, cpi_flags) 7869 * pi_addr2 uint32_array (phci-id, cpi_flags) 7870 * ... 7871 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes 7872 * (so called pi_addrs, for example: "w2100002037cd9f72,0"); 7873 * phci-ids are integers that identify pHCIs to which the 7874 * the bus specific address belongs to. These integers are used as an index 7875 * into to the phcis string array in the main nvlist to get the pHCI path. 7876 */ 7877 static int 7878 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl) 7879 { 7880 char **phcis, **phci_namep; 7881 uint_t nphcis; 7882 mdi_vhcache_phci_t *cphci, **cphci_list; 7883 nvlist_t *caddrmapnvl; 7884 int32_t ver; 7885 int i; 7886 size_t cphci_list_size; 7887 7888 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock)); 7889 7890 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 || 7891 ver != MDI_VHCI_CACHE_VERSION) 7892 return (MDI_FAILURE); 7893 7894 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis, 7895 &nphcis) != 0) 7896 return (MDI_SUCCESS); 7897 7898 ASSERT(nphcis > 0); 7899 7900 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis; 7901 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP); 7902 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) { 7903 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP); 7904 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP); 7905 enqueue_vhcache_phci(vhcache, cphci); 7906 cphci_list[i] = cphci; 7907 } 7908 7909 ASSERT(vhcache->vhcache_phci_head != NULL); 7910 7911 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0) 7912 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list); 7913 7914 kmem_free(cphci_list, cphci_list_size); 7915 return (MDI_SUCCESS); 7916 } 7917 7918 /* 7919 * Build paddrnvl for the specified client using the information in the 7920 * vhci cache and add it to the caddrmapnnvl. 7921 * Returns 0 on success, errno on failure. 7922 */ 7923 static int 7924 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct, 7925 nvlist_t *caddrmapnvl) 7926 { 7927 mdi_vhcache_pathinfo_t *cpi; 7928 nvlist_t *nvl; 7929 int err; 7930 uint32_t val[2]; 7931 7932 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7933 7934 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0) 7935 return (err); 7936 7937 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 7938 val[0] = cpi->cpi_cphci->cphci_id; 7939 val[1] = cpi->cpi_flags; 7940 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2)) 7941 != 0) 7942 goto out; 7943 } 7944 7945 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl); 7946 out: 7947 nvlist_free(nvl); 7948 return (err); 7949 } 7950 7951 /* 7952 * Build caddrmapnvl using the information in the vhci cache 7953 * and add it to the mainnvl. 7954 * Returns 0 on success, errno on failure. 7955 */ 7956 static int 7957 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl) 7958 { 7959 mdi_vhcache_client_t *cct; 7960 nvlist_t *nvl; 7961 int err; 7962 7963 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 7964 7965 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) 7966 return (err); 7967 7968 for (cct = vhcache->vhcache_client_head; cct != NULL; 7969 cct = cct->cct_next) { 7970 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0) 7971 goto out; 7972 } 7973 7974 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl); 7975 out: 7976 nvlist_free(nvl); 7977 return (err); 7978 } 7979 7980 /* 7981 * Build nvlist using the information in the vhci cache. 7982 * See the comment in mainnvl_to_vhcache() for the format of the nvlist. 7983 * Returns nvl on success, NULL on failure. 7984 */ 7985 static nvlist_t * 7986 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache) 7987 { 7988 mdi_vhcache_phci_t *cphci; 7989 uint_t phci_count; 7990 char **phcis; 7991 nvlist_t *nvl; 7992 int err, i; 7993 7994 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) { 7995 nvl = NULL; 7996 goto out; 7997 } 7998 7999 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION, 8000 MDI_VHCI_CACHE_VERSION)) != 0) 8001 goto out; 8002 8003 rw_enter(&vhcache->vhcache_lock, RW_READER); 8004 if (vhcache->vhcache_phci_head == NULL) { 8005 rw_exit(&vhcache->vhcache_lock); 8006 return (nvl); 8007 } 8008 8009 phci_count = 0; 8010 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8011 cphci = cphci->cphci_next) 8012 cphci->cphci_id = phci_count++; 8013 8014 /* build phci pathname list */ 8015 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP); 8016 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL; 8017 cphci = cphci->cphci_next, i++) 8018 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP); 8019 8020 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis, 8021 phci_count); 8022 free_string_array(phcis, phci_count); 8023 8024 if (err == 0 && 8025 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) { 8026 rw_exit(&vhcache->vhcache_lock); 8027 return (nvl); 8028 } 8029 8030 rw_exit(&vhcache->vhcache_lock); 8031 out: 8032 nvlist_free(nvl); 8033 return (NULL); 8034 } 8035 8036 /* 8037 * Lookup vhcache phci structure for the specified phci path. 8038 */ 8039 static mdi_vhcache_phci_t * 8040 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path) 8041 { 8042 mdi_vhcache_phci_t *cphci; 8043 8044 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8045 8046 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8047 cphci = cphci->cphci_next) { 8048 if (strcmp(cphci->cphci_path, phci_path) == 0) 8049 return (cphci); 8050 } 8051 8052 return (NULL); 8053 } 8054 8055 /* 8056 * Lookup vhcache phci structure for the specified phci. 8057 */ 8058 static mdi_vhcache_phci_t * 8059 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph) 8060 { 8061 mdi_vhcache_phci_t *cphci; 8062 8063 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8064 8065 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8066 cphci = cphci->cphci_next) { 8067 if (cphci->cphci_phci == ph) 8068 return (cphci); 8069 } 8070 8071 return (NULL); 8072 } 8073 8074 /* 8075 * Add the specified phci to the vhci cache if not already present. 8076 */ 8077 static void 8078 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8079 { 8080 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8081 mdi_vhcache_phci_t *cphci; 8082 char *pathname; 8083 int cache_updated; 8084 8085 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8086 8087 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 8088 (void) ddi_pathname(ph->ph_dip, pathname); 8089 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname)) 8090 != NULL) { 8091 cphci->cphci_phci = ph; 8092 cache_updated = 0; 8093 } else { 8094 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP); 8095 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP); 8096 cphci->cphci_phci = ph; 8097 enqueue_vhcache_phci(vhcache, cphci); 8098 cache_updated = 1; 8099 } 8100 8101 rw_exit(&vhcache->vhcache_lock); 8102 8103 /* 8104 * Since a new phci has been added, reset 8105 * vhc_path_discovery_cutoff_time to allow for discovery of paths 8106 * during next vhcache_discover_paths(). 8107 */ 8108 mutex_enter(&vhc->vhc_lock); 8109 vhc->vhc_path_discovery_cutoff_time = 0; 8110 mutex_exit(&vhc->vhc_lock); 8111 8112 kmem_free(pathname, MAXPATHLEN); 8113 if (cache_updated) 8114 vhcache_dirty(vhc); 8115 } 8116 8117 /* 8118 * Remove the reference to the specified phci from the vhci cache. 8119 */ 8120 static void 8121 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph) 8122 { 8123 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8124 mdi_vhcache_phci_t *cphci; 8125 8126 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8127 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) { 8128 /* do not remove the actual mdi_vhcache_phci structure */ 8129 cphci->cphci_phci = NULL; 8130 } 8131 rw_exit(&vhcache->vhcache_lock); 8132 } 8133 8134 static void 8135 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst, 8136 mdi_vhcache_lookup_token_t *src) 8137 { 8138 if (src == NULL) { 8139 dst->lt_cct = NULL; 8140 dst->lt_cct_lookup_time = 0; 8141 } else { 8142 dst->lt_cct = src->lt_cct; 8143 dst->lt_cct_lookup_time = src->lt_cct_lookup_time; 8144 } 8145 } 8146 8147 /* 8148 * Look up vhcache client for the specified client. 8149 */ 8150 static mdi_vhcache_client_t * 8151 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr, 8152 mdi_vhcache_lookup_token_t *token) 8153 { 8154 mod_hash_val_t hv; 8155 char *name_addr; 8156 int len; 8157 8158 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8159 8160 /* 8161 * If no vhcache clean occurred since the last lookup, we can 8162 * simply return the cct from the last lookup operation. 8163 * It works because ccts are never freed except during the vhcache 8164 * cleanup operation. 8165 */ 8166 if (token != NULL && 8167 vhcache->vhcache_clean_time < token->lt_cct_lookup_time) 8168 return (token->lt_cct); 8169 8170 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len); 8171 if (mod_hash_find(vhcache->vhcache_client_hash, 8172 (mod_hash_key_t)name_addr, &hv) == 0) { 8173 if (token) { 8174 token->lt_cct = (mdi_vhcache_client_t *)hv; 8175 token->lt_cct_lookup_time = ddi_get_lbolt64(); 8176 } 8177 } else { 8178 if (token) { 8179 token->lt_cct = NULL; 8180 token->lt_cct_lookup_time = 0; 8181 } 8182 hv = NULL; 8183 } 8184 kmem_free(name_addr, len); 8185 return ((mdi_vhcache_client_t *)hv); 8186 } 8187 8188 /* 8189 * Add the specified path to the vhci cache if not already present. 8190 * Also add the vhcache client for the client corresponding to this path 8191 * if it doesn't already exist. 8192 */ 8193 static void 8194 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8195 { 8196 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8197 mdi_vhcache_client_t *cct; 8198 mdi_vhcache_pathinfo_t *cpi; 8199 mdi_phci_t *ph = pip->pi_phci; 8200 mdi_client_t *ct = pip->pi_client; 8201 int cache_updated = 0; 8202 8203 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8204 8205 /* if vhcache client for this pip doesn't already exist, add it */ 8206 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8207 NULL)) == NULL) { 8208 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP); 8209 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname, 8210 ct->ct_guid, NULL); 8211 enqueue_vhcache_client(vhcache, cct); 8212 (void) mod_hash_insert(vhcache->vhcache_client_hash, 8213 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct); 8214 cache_updated = 1; 8215 } 8216 8217 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8218 if (cpi->cpi_cphci->cphci_phci == ph && 8219 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) { 8220 cpi->cpi_pip = pip; 8221 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) { 8222 cpi->cpi_flags &= 8223 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8224 sort_vhcache_paths(cct); 8225 cache_updated = 1; 8226 } 8227 break; 8228 } 8229 } 8230 8231 if (cpi == NULL) { 8232 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP); 8233 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP); 8234 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph); 8235 ASSERT(cpi->cpi_cphci != NULL); 8236 cpi->cpi_pip = pip; 8237 enqueue_vhcache_pathinfo(cct, cpi); 8238 cache_updated = 1; 8239 } 8240 8241 rw_exit(&vhcache->vhcache_lock); 8242 8243 if (cache_updated) 8244 vhcache_dirty(vhc); 8245 } 8246 8247 /* 8248 * Remove the reference to the specified path from the vhci cache. 8249 */ 8250 static void 8251 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip) 8252 { 8253 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8254 mdi_client_t *ct = pip->pi_client; 8255 mdi_vhcache_client_t *cct; 8256 mdi_vhcache_pathinfo_t *cpi; 8257 8258 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8259 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid, 8260 NULL)) != NULL) { 8261 for (cpi = cct->cct_cpi_head; cpi != NULL; 8262 cpi = cpi->cpi_next) { 8263 if (cpi->cpi_pip == pip) { 8264 cpi->cpi_pip = NULL; 8265 break; 8266 } 8267 } 8268 } 8269 rw_exit(&vhcache->vhcache_lock); 8270 } 8271 8272 /* 8273 * Flush the vhci cache to disk. 8274 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure. 8275 */ 8276 static int 8277 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag) 8278 { 8279 nvlist_t *nvl; 8280 int err; 8281 int rv; 8282 8283 /* 8284 * It is possible that the system may shutdown before 8285 * i_ddi_io_initialized (during stmsboot for example). To allow for 8286 * flushing the cache in this case do not check for 8287 * i_ddi_io_initialized when force flag is set. 8288 */ 8289 if (force_flag == 0 && !i_ddi_io_initialized()) 8290 return (MDI_FAILURE); 8291 8292 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) { 8293 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl); 8294 nvlist_free(nvl); 8295 } else 8296 err = EFAULT; 8297 8298 rv = MDI_SUCCESS; 8299 mutex_enter(&vhc->vhc_lock); 8300 if (err != 0) { 8301 if (err == EROFS) { 8302 vhc->vhc_flags |= MDI_VHC_READONLY_FS; 8303 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR | 8304 MDI_VHC_VHCACHE_DIRTY); 8305 } else { 8306 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) { 8307 cmn_err(CE_CONT, "%s: update failed\n", 8308 vhc->vhc_vhcache_filename); 8309 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR; 8310 } 8311 rv = MDI_FAILURE; 8312 } 8313 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) { 8314 cmn_err(CE_CONT, 8315 "%s: update now ok\n", vhc->vhc_vhcache_filename); 8316 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR; 8317 } 8318 mutex_exit(&vhc->vhc_lock); 8319 8320 return (rv); 8321 } 8322 8323 /* 8324 * Call flush_vhcache() to flush the vhci cache at the scheduled time. 8325 * Exits itself if left idle for the idle timeout period. 8326 */ 8327 static void 8328 vhcache_flush_thread(void *arg) 8329 { 8330 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8331 clock_t idle_time, quit_at_ticks; 8332 callb_cpr_t cprinfo; 8333 8334 /* number of seconds to sleep idle before exiting */ 8335 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND; 8336 8337 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8338 "mdi_vhcache_flush"); 8339 mutex_enter(&vhc->vhc_lock); 8340 for (; ; ) { 8341 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8342 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) { 8343 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) { 8344 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8345 (void) cv_timedwait(&vhc->vhc_cv, 8346 &vhc->vhc_lock, vhc->vhc_flush_at_ticks); 8347 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8348 } else { 8349 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY; 8350 mutex_exit(&vhc->vhc_lock); 8351 8352 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) 8353 vhcache_dirty(vhc); 8354 8355 mutex_enter(&vhc->vhc_lock); 8356 } 8357 } 8358 8359 quit_at_ticks = ddi_get_lbolt() + idle_time; 8360 8361 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8362 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) && 8363 ddi_get_lbolt() < quit_at_ticks) { 8364 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8365 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8366 quit_at_ticks); 8367 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8368 } 8369 8370 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8371 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) 8372 goto out; 8373 } 8374 8375 out: 8376 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD; 8377 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8378 CALLB_CPR_EXIT(&cprinfo); 8379 } 8380 8381 /* 8382 * Make vhci cache dirty and schedule flushing by vhcache flush thread. 8383 */ 8384 static void 8385 vhcache_dirty(mdi_vhci_config_t *vhc) 8386 { 8387 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8388 int create_thread; 8389 8390 rw_enter(&vhcache->vhcache_lock, RW_READER); 8391 /* do not flush cache until the cache is fully built */ 8392 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 8393 rw_exit(&vhcache->vhcache_lock); 8394 return; 8395 } 8396 rw_exit(&vhcache->vhcache_lock); 8397 8398 mutex_enter(&vhc->vhc_lock); 8399 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) { 8400 mutex_exit(&vhc->vhc_lock); 8401 return; 8402 } 8403 8404 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY; 8405 vhc->vhc_flush_at_ticks = ddi_get_lbolt() + 8406 mdi_vhcache_flush_delay * TICKS_PER_SECOND; 8407 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) { 8408 cv_broadcast(&vhc->vhc_cv); 8409 create_thread = 0; 8410 } else { 8411 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD; 8412 create_thread = 1; 8413 } 8414 mutex_exit(&vhc->vhc_lock); 8415 8416 if (create_thread) 8417 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc, 8418 0, &p0, TS_RUN, minclsyspri); 8419 } 8420 8421 /* 8422 * phci bus config structure - one for for each phci bus config operation that 8423 * we initiate on behalf of a vhci. 8424 */ 8425 typedef struct mdi_phci_bus_config_s { 8426 char *phbc_phci_path; 8427 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */ 8428 struct mdi_phci_bus_config_s *phbc_next; 8429 } mdi_phci_bus_config_t; 8430 8431 /* vhci bus config structure - one for each vhci bus config operation */ 8432 typedef struct mdi_vhci_bus_config_s { 8433 ddi_bus_config_op_t vhbc_op; /* bus config op */ 8434 major_t vhbc_op_major; /* bus config op major */ 8435 uint_t vhbc_op_flags; /* bus config op flags */ 8436 kmutex_t vhbc_lock; 8437 kcondvar_t vhbc_cv; 8438 int vhbc_thr_count; 8439 } mdi_vhci_bus_config_t; 8440 8441 /* 8442 * bus config the specified phci 8443 */ 8444 static void 8445 bus_config_phci(void *arg) 8446 { 8447 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg; 8448 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig; 8449 dev_info_t *ph_dip; 8450 8451 /* 8452 * first configure all path components upto phci and then configure 8453 * the phci children. 8454 */ 8455 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0)) 8456 != NULL) { 8457 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER || 8458 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) { 8459 (void) ndi_devi_config_driver(ph_dip, 8460 vhbc->vhbc_op_flags, 8461 vhbc->vhbc_op_major); 8462 } else 8463 (void) ndi_devi_config(ph_dip, 8464 vhbc->vhbc_op_flags); 8465 8466 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8467 ndi_rele_devi(ph_dip); 8468 } 8469 8470 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1); 8471 kmem_free(phbc, sizeof (*phbc)); 8472 8473 mutex_enter(&vhbc->vhbc_lock); 8474 vhbc->vhbc_thr_count--; 8475 if (vhbc->vhbc_thr_count == 0) 8476 cv_broadcast(&vhbc->vhbc_cv); 8477 mutex_exit(&vhbc->vhbc_lock); 8478 } 8479 8480 /* 8481 * Bus config all phcis associated with the vhci in parallel. 8482 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL. 8483 */ 8484 static void 8485 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags, 8486 ddi_bus_config_op_t op, major_t maj) 8487 { 8488 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next; 8489 mdi_vhci_bus_config_t *vhbc; 8490 mdi_vhcache_phci_t *cphci; 8491 8492 rw_enter(&vhcache->vhcache_lock, RW_READER); 8493 if (vhcache->vhcache_phci_head == NULL) { 8494 rw_exit(&vhcache->vhcache_lock); 8495 return; 8496 } 8497 8498 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP); 8499 8500 for (cphci = vhcache->vhcache_phci_head; cphci != NULL; 8501 cphci = cphci->cphci_next) { 8502 /* skip phcis that haven't attached before root is available */ 8503 if (!modrootloaded && (cphci->cphci_phci == NULL)) 8504 continue; 8505 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP); 8506 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path, 8507 KM_SLEEP); 8508 phbc->phbc_vhbusconfig = vhbc; 8509 phbc->phbc_next = phbc_head; 8510 phbc_head = phbc; 8511 vhbc->vhbc_thr_count++; 8512 } 8513 rw_exit(&vhcache->vhcache_lock); 8514 8515 vhbc->vhbc_op = op; 8516 vhbc->vhbc_op_major = maj; 8517 vhbc->vhbc_op_flags = NDI_NO_EVENT | 8518 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE)); 8519 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL); 8520 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL); 8521 8522 /* now create threads to initiate bus config on all phcis in parallel */ 8523 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) { 8524 phbc_next = phbc->phbc_next; 8525 if (mdi_mtc_off) 8526 bus_config_phci((void *)phbc); 8527 else 8528 (void) thread_create(NULL, 0, bus_config_phci, phbc, 8529 0, &p0, TS_RUN, minclsyspri); 8530 } 8531 8532 mutex_enter(&vhbc->vhbc_lock); 8533 /* wait until all threads exit */ 8534 while (vhbc->vhbc_thr_count > 0) 8535 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock); 8536 mutex_exit(&vhbc->vhbc_lock); 8537 8538 mutex_destroy(&vhbc->vhbc_lock); 8539 cv_destroy(&vhbc->vhbc_cv); 8540 kmem_free(vhbc, sizeof (*vhbc)); 8541 } 8542 8543 /* 8544 * Single threaded version of bus_config_all_phcis() 8545 */ 8546 static void 8547 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags, 8548 ddi_bus_config_op_t op, major_t maj) 8549 { 8550 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8551 8552 single_threaded_vhconfig_enter(vhc); 8553 bus_config_all_phcis(vhcache, flags, op, maj); 8554 single_threaded_vhconfig_exit(vhc); 8555 } 8556 8557 /* 8558 * Perform BUS_CONFIG_ONE on the specified child of the phci. 8559 * The path includes the child component in addition to the phci path. 8560 */ 8561 static int 8562 bus_config_one_phci_child(char *path) 8563 { 8564 dev_info_t *ph_dip, *child; 8565 char *devnm; 8566 int rv = MDI_FAILURE; 8567 8568 /* extract the child component of the phci */ 8569 devnm = strrchr(path, '/'); 8570 *devnm++ = '\0'; 8571 8572 /* 8573 * first configure all path components upto phci and then 8574 * configure the phci child. 8575 */ 8576 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) { 8577 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) == 8578 NDI_SUCCESS) { 8579 /* 8580 * release the hold that ndi_devi_config_one() placed 8581 */ 8582 ndi_rele_devi(child); 8583 rv = MDI_SUCCESS; 8584 } 8585 8586 /* release the hold that e_ddi_hold_devi_by_path() placed */ 8587 ndi_rele_devi(ph_dip); 8588 } 8589 8590 devnm--; 8591 *devnm = '/'; 8592 return (rv); 8593 } 8594 8595 /* 8596 * Build a list of phci client paths for the specified vhci client. 8597 * The list includes only those phci client paths which aren't configured yet. 8598 */ 8599 static mdi_phys_path_t * 8600 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name) 8601 { 8602 mdi_vhcache_pathinfo_t *cpi; 8603 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp; 8604 int config_path, len; 8605 8606 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8607 /* 8608 * include only those paths that aren't configured. 8609 */ 8610 config_path = 0; 8611 if (cpi->cpi_pip == NULL) 8612 config_path = 1; 8613 else { 8614 MDI_PI_LOCK(cpi->cpi_pip); 8615 if (MDI_PI_IS_INIT(cpi->cpi_pip)) 8616 config_path = 1; 8617 MDI_PI_UNLOCK(cpi->cpi_pip); 8618 } 8619 8620 if (config_path) { 8621 pp = kmem_alloc(sizeof (*pp), KM_SLEEP); 8622 len = strlen(cpi->cpi_cphci->cphci_path) + 8623 strlen(ct_name) + strlen(cpi->cpi_addr) + 3; 8624 pp->phys_path = kmem_alloc(len, KM_SLEEP); 8625 (void) snprintf(pp->phys_path, len, "%s/%s@%s", 8626 cpi->cpi_cphci->cphci_path, ct_name, 8627 cpi->cpi_addr); 8628 pp->phys_path_next = NULL; 8629 8630 if (pp_head == NULL) 8631 pp_head = pp; 8632 else 8633 pp_tail->phys_path_next = pp; 8634 pp_tail = pp; 8635 } 8636 } 8637 8638 return (pp_head); 8639 } 8640 8641 /* 8642 * Free the memory allocated for phci client path list. 8643 */ 8644 static void 8645 free_phclient_path_list(mdi_phys_path_t *pp_head) 8646 { 8647 mdi_phys_path_t *pp, *pp_next; 8648 8649 for (pp = pp_head; pp != NULL; pp = pp_next) { 8650 pp_next = pp->phys_path_next; 8651 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1); 8652 kmem_free(pp, sizeof (*pp)); 8653 } 8654 } 8655 8656 /* 8657 * Allocated async client structure and initialize with the specified values. 8658 */ 8659 static mdi_async_client_config_t * 8660 alloc_async_client_config(char *ct_name, char *ct_addr, 8661 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8662 { 8663 mdi_async_client_config_t *acc; 8664 8665 acc = kmem_alloc(sizeof (*acc), KM_SLEEP); 8666 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP); 8667 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP); 8668 acc->acc_phclient_path_list_head = pp_head; 8669 init_vhcache_lookup_token(&acc->acc_token, tok); 8670 acc->acc_next = NULL; 8671 return (acc); 8672 } 8673 8674 /* 8675 * Free the memory allocated for the async client structure and their members. 8676 */ 8677 static void 8678 free_async_client_config(mdi_async_client_config_t *acc) 8679 { 8680 if (acc->acc_phclient_path_list_head) 8681 free_phclient_path_list(acc->acc_phclient_path_list_head); 8682 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1); 8683 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1); 8684 kmem_free(acc, sizeof (*acc)); 8685 } 8686 8687 /* 8688 * Sort vhcache pathinfos (cpis) of the specified client. 8689 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST 8690 * flag set come at the beginning of the list. All cpis which have this 8691 * flag set come at the end of the list. 8692 */ 8693 static void 8694 sort_vhcache_paths(mdi_vhcache_client_t *cct) 8695 { 8696 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head; 8697 8698 cpi_head = cct->cct_cpi_head; 8699 cct->cct_cpi_head = cct->cct_cpi_tail = NULL; 8700 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) { 8701 cpi_next = cpi->cpi_next; 8702 enqueue_vhcache_pathinfo(cct, cpi); 8703 } 8704 } 8705 8706 /* 8707 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for 8708 * every vhcache pathinfo of the specified client. If not adjust the flag 8709 * setting appropriately. 8710 * 8711 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the 8712 * on-disk vhci cache. So every time this flag is updated the cache must be 8713 * flushed. 8714 */ 8715 static void 8716 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8717 mdi_vhcache_lookup_token_t *tok) 8718 { 8719 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8720 mdi_vhcache_client_t *cct; 8721 mdi_vhcache_pathinfo_t *cpi; 8722 8723 rw_enter(&vhcache->vhcache_lock, RW_READER); 8724 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok)) 8725 == NULL) { 8726 rw_exit(&vhcache->vhcache_lock); 8727 return; 8728 } 8729 8730 /* 8731 * to avoid unnecessary on-disk cache updates, first check if an 8732 * update is really needed. If no update is needed simply return. 8733 */ 8734 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8735 if ((cpi->cpi_pip != NULL && 8736 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) || 8737 (cpi->cpi_pip == NULL && 8738 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) { 8739 break; 8740 } 8741 } 8742 if (cpi == NULL) { 8743 rw_exit(&vhcache->vhcache_lock); 8744 return; 8745 } 8746 8747 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) { 8748 rw_exit(&vhcache->vhcache_lock); 8749 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 8750 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, 8751 tok)) == NULL) { 8752 rw_exit(&vhcache->vhcache_lock); 8753 return; 8754 } 8755 } 8756 8757 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8758 if (cpi->cpi_pip != NULL) 8759 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8760 else 8761 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST; 8762 } 8763 sort_vhcache_paths(cct); 8764 8765 rw_exit(&vhcache->vhcache_lock); 8766 vhcache_dirty(vhc); 8767 } 8768 8769 /* 8770 * Configure all specified paths of the client. 8771 */ 8772 static void 8773 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8774 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8775 { 8776 mdi_phys_path_t *pp; 8777 8778 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) 8779 (void) bus_config_one_phci_child(pp->phys_path); 8780 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok); 8781 } 8782 8783 /* 8784 * Dequeue elements from vhci async client config list and bus configure 8785 * their corresponding phci clients. 8786 */ 8787 static void 8788 config_client_paths_thread(void *arg) 8789 { 8790 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg; 8791 mdi_async_client_config_t *acc; 8792 clock_t quit_at_ticks; 8793 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND; 8794 callb_cpr_t cprinfo; 8795 8796 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr, 8797 "mdi_config_client_paths"); 8798 8799 for (; ; ) { 8800 quit_at_ticks = ddi_get_lbolt() + idle_time; 8801 8802 mutex_enter(&vhc->vhc_lock); 8803 while (!(vhc->vhc_flags & MDI_VHC_EXIT) && 8804 vhc->vhc_acc_list_head == NULL && 8805 ddi_get_lbolt() < quit_at_ticks) { 8806 CALLB_CPR_SAFE_BEGIN(&cprinfo); 8807 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock, 8808 quit_at_ticks); 8809 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock); 8810 } 8811 8812 if ((vhc->vhc_flags & MDI_VHC_EXIT) || 8813 vhc->vhc_acc_list_head == NULL) 8814 goto out; 8815 8816 acc = vhc->vhc_acc_list_head; 8817 vhc->vhc_acc_list_head = acc->acc_next; 8818 if (vhc->vhc_acc_list_head == NULL) 8819 vhc->vhc_acc_list_tail = NULL; 8820 vhc->vhc_acc_count--; 8821 mutex_exit(&vhc->vhc_lock); 8822 8823 config_client_paths_sync(vhc, acc->acc_ct_name, 8824 acc->acc_ct_addr, acc->acc_phclient_path_list_head, 8825 &acc->acc_token); 8826 8827 free_async_client_config(acc); 8828 } 8829 8830 out: 8831 vhc->vhc_acc_thrcount--; 8832 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */ 8833 CALLB_CPR_EXIT(&cprinfo); 8834 } 8835 8836 /* 8837 * Arrange for all the phci client paths (pp_head) for the specified client 8838 * to be bus configured asynchronously by a thread. 8839 */ 8840 static void 8841 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr, 8842 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok) 8843 { 8844 mdi_async_client_config_t *acc, *newacc; 8845 int create_thread; 8846 8847 if (pp_head == NULL) 8848 return; 8849 8850 if (mdi_mtc_off) { 8851 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok); 8852 free_phclient_path_list(pp_head); 8853 return; 8854 } 8855 8856 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok); 8857 ASSERT(newacc); 8858 8859 mutex_enter(&vhc->vhc_lock); 8860 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) { 8861 if (strcmp(ct_name, acc->acc_ct_name) == 0 && 8862 strcmp(ct_addr, acc->acc_ct_addr) == 0) { 8863 free_async_client_config(newacc); 8864 mutex_exit(&vhc->vhc_lock); 8865 return; 8866 } 8867 } 8868 8869 if (vhc->vhc_acc_list_head == NULL) 8870 vhc->vhc_acc_list_head = newacc; 8871 else 8872 vhc->vhc_acc_list_tail->acc_next = newacc; 8873 vhc->vhc_acc_list_tail = newacc; 8874 vhc->vhc_acc_count++; 8875 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) { 8876 cv_broadcast(&vhc->vhc_cv); 8877 create_thread = 0; 8878 } else { 8879 vhc->vhc_acc_thrcount++; 8880 create_thread = 1; 8881 } 8882 mutex_exit(&vhc->vhc_lock); 8883 8884 if (create_thread) 8885 (void) thread_create(NULL, 0, config_client_paths_thread, vhc, 8886 0, &p0, TS_RUN, minclsyspri); 8887 } 8888 8889 /* 8890 * Return number of online paths for the specified client. 8891 */ 8892 static int 8893 nonline_paths(mdi_vhcache_client_t *cct) 8894 { 8895 mdi_vhcache_pathinfo_t *cpi; 8896 int online_count = 0; 8897 8898 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) { 8899 if (cpi->cpi_pip != NULL) { 8900 MDI_PI_LOCK(cpi->cpi_pip); 8901 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE) 8902 online_count++; 8903 MDI_PI_UNLOCK(cpi->cpi_pip); 8904 } 8905 } 8906 8907 return (online_count); 8908 } 8909 8910 /* 8911 * Bus configure all paths for the specified vhci client. 8912 * If at least one path for the client is already online, the remaining paths 8913 * will be configured asynchronously. Otherwise, it synchronously configures 8914 * the paths until at least one path is online and then rest of the paths 8915 * will be configured asynchronously. 8916 */ 8917 static void 8918 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr) 8919 { 8920 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 8921 mdi_phys_path_t *pp_head, *pp; 8922 mdi_vhcache_client_t *cct; 8923 mdi_vhcache_lookup_token_t tok; 8924 8925 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock)); 8926 8927 init_vhcache_lookup_token(&tok, NULL); 8928 8929 if (ct_name == NULL || ct_addr == NULL || 8930 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok)) 8931 == NULL || 8932 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) { 8933 rw_exit(&vhcache->vhcache_lock); 8934 return; 8935 } 8936 8937 /* if at least one path is online, configure the rest asynchronously */ 8938 if (nonline_paths(cct) > 0) { 8939 rw_exit(&vhcache->vhcache_lock); 8940 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok); 8941 return; 8942 } 8943 8944 rw_exit(&vhcache->vhcache_lock); 8945 8946 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) { 8947 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) { 8948 rw_enter(&vhcache->vhcache_lock, RW_READER); 8949 8950 if ((cct = lookup_vhcache_client(vhcache, ct_name, 8951 ct_addr, &tok)) == NULL) { 8952 rw_exit(&vhcache->vhcache_lock); 8953 goto out; 8954 } 8955 8956 if (nonline_paths(cct) > 0 && 8957 pp->phys_path_next != NULL) { 8958 rw_exit(&vhcache->vhcache_lock); 8959 config_client_paths_async(vhc, ct_name, ct_addr, 8960 pp->phys_path_next, &tok); 8961 pp->phys_path_next = NULL; 8962 goto out; 8963 } 8964 8965 rw_exit(&vhcache->vhcache_lock); 8966 } 8967 } 8968 8969 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok); 8970 out: 8971 free_phclient_path_list(pp_head); 8972 } 8973 8974 static void 8975 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc) 8976 { 8977 mutex_enter(&vhc->vhc_lock); 8978 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED) 8979 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock); 8980 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED; 8981 mutex_exit(&vhc->vhc_lock); 8982 } 8983 8984 static void 8985 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc) 8986 { 8987 mutex_enter(&vhc->vhc_lock); 8988 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED; 8989 cv_broadcast(&vhc->vhc_cv); 8990 mutex_exit(&vhc->vhc_lock); 8991 } 8992 8993 typedef struct mdi_phci_driver_info { 8994 char *phdriver_name; /* name of the phci driver */ 8995 8996 /* set to non zero if the phci driver supports root device */ 8997 int phdriver_root_support; 8998 } mdi_phci_driver_info_t; 8999 9000 /* 9001 * vhci class and root support capability of a phci driver can be 9002 * specified using ddi-vhci-class and ddi-no-root-support properties in the 9003 * phci driver.conf file. The built-in tables below contain this information 9004 * for those phci drivers whose driver.conf files don't yet contain this info. 9005 * 9006 * All phci drivers expect iscsi have root device support. 9007 */ 9008 static mdi_phci_driver_info_t scsi_phci_driver_list[] = { 9009 { "fp", 1 }, 9010 { "iscsi", 0 }, 9011 { "ibsrp", 1 } 9012 }; 9013 9014 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 }; 9015 9016 static void * 9017 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size) 9018 { 9019 void *new_ptr; 9020 9021 new_ptr = kmem_zalloc(new_size, KM_SLEEP); 9022 if (old_ptr) { 9023 bcopy(old_ptr, new_ptr, MIN(old_size, new_size)); 9024 kmem_free(old_ptr, old_size); 9025 } 9026 return (new_ptr); 9027 } 9028 9029 static void 9030 add_to_phci_list(char ***driver_list, int **root_support_list, 9031 int *cur_elements, int *max_elements, char *driver_name, int root_support) 9032 { 9033 ASSERT(*cur_elements <= *max_elements); 9034 if (*cur_elements == *max_elements) { 9035 *max_elements += 10; 9036 *driver_list = mdi_realloc(*driver_list, 9037 sizeof (char *) * (*cur_elements), 9038 sizeof (char *) * (*max_elements)); 9039 *root_support_list = mdi_realloc(*root_support_list, 9040 sizeof (int) * (*cur_elements), 9041 sizeof (int) * (*max_elements)); 9042 } 9043 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP); 9044 (*root_support_list)[*cur_elements] = root_support; 9045 (*cur_elements)++; 9046 } 9047 9048 static void 9049 get_phci_driver_list(char *vhci_class, char ***driver_list, 9050 int **root_support_list, int *cur_elements, int *max_elements) 9051 { 9052 mdi_phci_driver_info_t *st_driver_list, *p; 9053 int st_ndrivers, root_support, i, j, driver_conf_count; 9054 major_t m; 9055 struct devnames *dnp; 9056 ddi_prop_t *propp; 9057 9058 *driver_list = NULL; 9059 *root_support_list = NULL; 9060 *cur_elements = 0; 9061 *max_elements = 0; 9062 9063 /* add the phci drivers derived from the phci driver.conf files */ 9064 for (m = 0; m < devcnt; m++) { 9065 dnp = &devnamesp[m]; 9066 9067 if (dnp->dn_flags & DN_PHCI_DRIVER) { 9068 LOCK_DEV_OPS(&dnp->dn_lock); 9069 if (dnp->dn_global_prop_ptr != NULL && 9070 (propp = i_ddi_prop_search(DDI_DEV_T_ANY, 9071 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING, 9072 &dnp->dn_global_prop_ptr->prop_list)) != NULL && 9073 strcmp(propp->prop_val, vhci_class) == 0) { 9074 9075 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY, 9076 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT, 9077 &dnp->dn_global_prop_ptr->prop_list) 9078 == NULL) ? 1 : 0; 9079 9080 add_to_phci_list(driver_list, root_support_list, 9081 cur_elements, max_elements, dnp->dn_name, 9082 root_support); 9083 9084 UNLOCK_DEV_OPS(&dnp->dn_lock); 9085 } else 9086 UNLOCK_DEV_OPS(&dnp->dn_lock); 9087 } 9088 } 9089 9090 driver_conf_count = *cur_elements; 9091 9092 /* add the phci drivers specified in the built-in tables */ 9093 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) { 9094 st_driver_list = scsi_phci_driver_list; 9095 st_ndrivers = sizeof (scsi_phci_driver_list) / 9096 sizeof (mdi_phci_driver_info_t); 9097 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) { 9098 st_driver_list = ib_phci_driver_list; 9099 st_ndrivers = sizeof (ib_phci_driver_list) / 9100 sizeof (mdi_phci_driver_info_t); 9101 } else { 9102 st_driver_list = NULL; 9103 st_ndrivers = 0; 9104 } 9105 9106 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) { 9107 /* add this phci driver if not already added before */ 9108 for (j = 0; j < driver_conf_count; j++) { 9109 if (strcmp((*driver_list)[j], p->phdriver_name) == 0) 9110 break; 9111 } 9112 if (j == driver_conf_count) { 9113 add_to_phci_list(driver_list, root_support_list, 9114 cur_elements, max_elements, p->phdriver_name, 9115 p->phdriver_root_support); 9116 } 9117 } 9118 } 9119 9120 /* 9121 * Attach the phci driver instances associated with the specified vhci class. 9122 * If root is mounted attach all phci driver instances. 9123 * If root is not mounted, attach the instances of only those phci 9124 * drivers that have the root support. 9125 */ 9126 static void 9127 attach_phci_drivers(char *vhci_class) 9128 { 9129 char **driver_list, **p; 9130 int *root_support_list; 9131 int cur_elements, max_elements, i; 9132 major_t m; 9133 9134 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9135 &cur_elements, &max_elements); 9136 9137 for (i = 0; i < cur_elements; i++) { 9138 if (modrootloaded || root_support_list[i]) { 9139 m = ddi_name_to_major(driver_list[i]); 9140 if (m != DDI_MAJOR_T_NONE && 9141 ddi_hold_installed_driver(m)) 9142 ddi_rele_driver(m); 9143 } 9144 } 9145 9146 if (driver_list) { 9147 for (i = 0, p = driver_list; i < cur_elements; i++, p++) 9148 kmem_free(*p, strlen(*p) + 1); 9149 kmem_free(driver_list, sizeof (char *) * max_elements); 9150 kmem_free(root_support_list, sizeof (int) * max_elements); 9151 } 9152 } 9153 9154 /* 9155 * Build vhci cache: 9156 * 9157 * Attach phci driver instances and then drive BUS_CONFIG_ALL on 9158 * the phci driver instances. During this process the cache gets built. 9159 * 9160 * Cache is built fully if the root is mounted. 9161 * If the root is not mounted, phci drivers that do not have root support 9162 * are not attached. As a result the cache is built partially. The entries 9163 * in the cache reflect only those phci drivers that have root support. 9164 */ 9165 static int 9166 build_vhci_cache(mdi_vhci_t *vh) 9167 { 9168 mdi_vhci_config_t *vhc = vh->vh_config; 9169 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9170 9171 single_threaded_vhconfig_enter(vhc); 9172 9173 rw_enter(&vhcache->vhcache_lock, RW_READER); 9174 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) { 9175 rw_exit(&vhcache->vhcache_lock); 9176 single_threaded_vhconfig_exit(vhc); 9177 return (0); 9178 } 9179 rw_exit(&vhcache->vhcache_lock); 9180 9181 attach_phci_drivers(vh->vh_class); 9182 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT, 9183 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9184 9185 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9186 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE; 9187 rw_exit(&vhcache->vhcache_lock); 9188 9189 single_threaded_vhconfig_exit(vhc); 9190 vhcache_dirty(vhc); 9191 return (1); 9192 } 9193 9194 /* 9195 * Determine if discovery of paths is needed. 9196 */ 9197 static int 9198 vhcache_do_discovery(mdi_vhci_config_t *vhc) 9199 { 9200 int rv = 1; 9201 9202 mutex_enter(&vhc->vhc_lock); 9203 if (i_ddi_io_initialized() == 0) { 9204 if (vhc->vhc_path_discovery_boot > 0) { 9205 vhc->vhc_path_discovery_boot--; 9206 goto out; 9207 } 9208 } else { 9209 if (vhc->vhc_path_discovery_postboot > 0) { 9210 vhc->vhc_path_discovery_postboot--; 9211 goto out; 9212 } 9213 } 9214 9215 /* 9216 * Do full path discovery at most once per mdi_path_discovery_interval. 9217 * This is to avoid a series of full path discoveries when opening 9218 * stale /dev/[r]dsk links. 9219 */ 9220 if (mdi_path_discovery_interval != -1 && 9221 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time) 9222 goto out; 9223 9224 rv = 0; 9225 out: 9226 mutex_exit(&vhc->vhc_lock); 9227 return (rv); 9228 } 9229 9230 /* 9231 * Discover all paths: 9232 * 9233 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci 9234 * driver instances. During this process all paths will be discovered. 9235 */ 9236 static int 9237 vhcache_discover_paths(mdi_vhci_t *vh) 9238 { 9239 mdi_vhci_config_t *vhc = vh->vh_config; 9240 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9241 int rv = 0; 9242 9243 single_threaded_vhconfig_enter(vhc); 9244 9245 if (vhcache_do_discovery(vhc)) { 9246 attach_phci_drivers(vh->vh_class); 9247 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | 9248 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE); 9249 9250 mutex_enter(&vhc->vhc_lock); 9251 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() + 9252 mdi_path_discovery_interval * TICKS_PER_SECOND; 9253 mutex_exit(&vhc->vhc_lock); 9254 rv = 1; 9255 } 9256 9257 single_threaded_vhconfig_exit(vhc); 9258 return (rv); 9259 } 9260 9261 /* 9262 * Generic vhci bus config implementation: 9263 * 9264 * Parameters 9265 * vdip vhci dip 9266 * flags bus config flags 9267 * op bus config operation 9268 * The remaining parameters are bus config operation specific 9269 * 9270 * for BUS_CONFIG_ONE 9271 * arg pointer to name@addr 9272 * child upon successful return from this function, *child will be 9273 * set to the configured and held devinfo child node of vdip. 9274 * ct_addr pointer to client address (i.e. GUID) 9275 * 9276 * for BUS_CONFIG_DRIVER 9277 * arg major number of the driver 9278 * child and ct_addr parameters are ignored 9279 * 9280 * for BUS_CONFIG_ALL 9281 * arg, child, and ct_addr parameters are ignored 9282 * 9283 * Note that for the rest of the bus config operations, this function simply 9284 * calls the framework provided default bus config routine. 9285 */ 9286 int 9287 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op, 9288 void *arg, dev_info_t **child, char *ct_addr) 9289 { 9290 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9291 mdi_vhci_config_t *vhc = vh->vh_config; 9292 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9293 int rv = 0; 9294 int params_valid = 0; 9295 char *cp; 9296 9297 /* 9298 * To bus config vhcis we relay operation, possibly using another 9299 * thread, to phcis. The phci driver then interacts with MDI to cause 9300 * vhci child nodes to be enumerated under the vhci node. Adding a 9301 * vhci child requires an ndi_devi_enter of the vhci. Since another 9302 * thread may be adding the child, to avoid deadlock we can't wait 9303 * for the relayed operations to complete if we have already entered 9304 * the vhci node. 9305 */ 9306 if (DEVI_BUSY_OWNED(vdip)) { 9307 MDI_DEBUG(2, (MDI_NOTE, vdip, 9308 "vhci dip is busy owned %p", (void *)vdip)); 9309 goto default_bus_config; 9310 } 9311 9312 rw_enter(&vhcache->vhcache_lock, RW_READER); 9313 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) { 9314 rw_exit(&vhcache->vhcache_lock); 9315 rv = build_vhci_cache(vh); 9316 rw_enter(&vhcache->vhcache_lock, RW_READER); 9317 } 9318 9319 switch (op) { 9320 case BUS_CONFIG_ONE: 9321 if (arg != NULL && ct_addr != NULL) { 9322 /* extract node name */ 9323 cp = (char *)arg; 9324 while (*cp != '\0' && *cp != '@') 9325 cp++; 9326 if (*cp == '@') { 9327 params_valid = 1; 9328 *cp = '\0'; 9329 config_client_paths(vhc, (char *)arg, ct_addr); 9330 /* config_client_paths() releases cache_lock */ 9331 *cp = '@'; 9332 break; 9333 } 9334 } 9335 9336 rw_exit(&vhcache->vhcache_lock); 9337 break; 9338 9339 case BUS_CONFIG_DRIVER: 9340 rw_exit(&vhcache->vhcache_lock); 9341 if (rv == 0) 9342 st_bus_config_all_phcis(vhc, flags, op, 9343 (major_t)(uintptr_t)arg); 9344 break; 9345 9346 case BUS_CONFIG_ALL: 9347 rw_exit(&vhcache->vhcache_lock); 9348 if (rv == 0) 9349 st_bus_config_all_phcis(vhc, flags, op, -1); 9350 break; 9351 9352 default: 9353 rw_exit(&vhcache->vhcache_lock); 9354 break; 9355 } 9356 9357 9358 default_bus_config: 9359 /* 9360 * All requested child nodes are enumerated under the vhci. 9361 * Now configure them. 9362 */ 9363 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9364 NDI_SUCCESS) { 9365 return (MDI_SUCCESS); 9366 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) { 9367 /* discover all paths and try configuring again */ 9368 if (vhcache_discover_paths(vh) && 9369 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) == 9370 NDI_SUCCESS) 9371 return (MDI_SUCCESS); 9372 } 9373 9374 return (MDI_FAILURE); 9375 } 9376 9377 /* 9378 * Read the on-disk vhci cache into an nvlist for the specified vhci class. 9379 */ 9380 static nvlist_t * 9381 read_on_disk_vhci_cache(char *vhci_class) 9382 { 9383 nvlist_t *nvl; 9384 int err; 9385 char *filename; 9386 9387 filename = vhclass2vhcache_filename(vhci_class); 9388 9389 if ((err = fread_nvlist(filename, &nvl)) == 0) { 9390 kmem_free(filename, strlen(filename) + 1); 9391 return (nvl); 9392 } else if (err == EIO) 9393 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename); 9394 else if (err == EINVAL) 9395 cmn_err(CE_WARN, 9396 "%s: data file corrupted, will recreate", filename); 9397 9398 kmem_free(filename, strlen(filename) + 1); 9399 return (NULL); 9400 } 9401 9402 /* 9403 * Read on-disk vhci cache into nvlists for all vhci classes. 9404 * Called during booting by i_ddi_read_devices_files(). 9405 */ 9406 void 9407 mdi_read_devices_files(void) 9408 { 9409 int i; 9410 9411 for (i = 0; i < N_VHCI_CLASSES; i++) 9412 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]); 9413 } 9414 9415 /* 9416 * Remove all stale entries from vhci cache. 9417 */ 9418 static void 9419 clean_vhcache(mdi_vhci_config_t *vhc) 9420 { 9421 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache; 9422 mdi_vhcache_phci_t *phci, *nxt_phci; 9423 mdi_vhcache_client_t *client, *nxt_client; 9424 mdi_vhcache_pathinfo_t *path, *nxt_path; 9425 9426 rw_enter(&vhcache->vhcache_lock, RW_WRITER); 9427 9428 client = vhcache->vhcache_client_head; 9429 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL; 9430 for ( ; client != NULL; client = nxt_client) { 9431 nxt_client = client->cct_next; 9432 9433 path = client->cct_cpi_head; 9434 client->cct_cpi_head = client->cct_cpi_tail = NULL; 9435 for ( ; path != NULL; path = nxt_path) { 9436 nxt_path = path->cpi_next; 9437 if ((path->cpi_cphci->cphci_phci != NULL) && 9438 (path->cpi_pip != NULL)) { 9439 enqueue_tail_vhcache_pathinfo(client, path); 9440 } else if (path->cpi_pip != NULL) { 9441 /* Not valid to have a path without a phci. */ 9442 free_vhcache_pathinfo(path); 9443 } 9444 } 9445 9446 if (client->cct_cpi_head != NULL) 9447 enqueue_vhcache_client(vhcache, client); 9448 else { 9449 (void) mod_hash_destroy(vhcache->vhcache_client_hash, 9450 (mod_hash_key_t)client->cct_name_addr); 9451 free_vhcache_client(client); 9452 } 9453 } 9454 9455 phci = vhcache->vhcache_phci_head; 9456 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL; 9457 for ( ; phci != NULL; phci = nxt_phci) { 9458 9459 nxt_phci = phci->cphci_next; 9460 if (phci->cphci_phci != NULL) 9461 enqueue_vhcache_phci(vhcache, phci); 9462 else 9463 free_vhcache_phci(phci); 9464 } 9465 9466 vhcache->vhcache_clean_time = ddi_get_lbolt64(); 9467 rw_exit(&vhcache->vhcache_lock); 9468 vhcache_dirty(vhc); 9469 } 9470 9471 /* 9472 * Remove all stale entries from vhci cache. 9473 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C 9474 */ 9475 void 9476 mdi_clean_vhcache(void) 9477 { 9478 mdi_vhci_t *vh; 9479 9480 mutex_enter(&mdi_mutex); 9481 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9482 vh->vh_refcnt++; 9483 mutex_exit(&mdi_mutex); 9484 clean_vhcache(vh->vh_config); 9485 mutex_enter(&mdi_mutex); 9486 vh->vh_refcnt--; 9487 } 9488 mutex_exit(&mdi_mutex); 9489 } 9490 9491 /* 9492 * mdi_vhci_walk_clients(): 9493 * Walker routine to traverse client dev_info nodes 9494 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree 9495 * below the client, including nexus devices, which we dont want. 9496 * So we just traverse the immediate siblings, starting from 1st client. 9497 */ 9498 void 9499 mdi_vhci_walk_clients(dev_info_t *vdip, 9500 int (*f)(dev_info_t *, void *), void *arg) 9501 { 9502 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9503 dev_info_t *cdip; 9504 mdi_client_t *ct; 9505 9506 MDI_VHCI_CLIENT_LOCK(vh); 9507 cdip = ddi_get_child(vdip); 9508 while (cdip) { 9509 ct = i_devi_get_client(cdip); 9510 MDI_CLIENT_LOCK(ct); 9511 9512 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE) 9513 cdip = ddi_get_next_sibling(cdip); 9514 else 9515 cdip = NULL; 9516 9517 MDI_CLIENT_UNLOCK(ct); 9518 } 9519 MDI_VHCI_CLIENT_UNLOCK(vh); 9520 } 9521 9522 /* 9523 * mdi_vhci_walk_phcis(): 9524 * Walker routine to traverse phci dev_info nodes 9525 */ 9526 void 9527 mdi_vhci_walk_phcis(dev_info_t *vdip, 9528 int (*f)(dev_info_t *, void *), void *arg) 9529 { 9530 mdi_vhci_t *vh = i_devi_get_vhci(vdip); 9531 mdi_phci_t *ph, *next; 9532 9533 MDI_VHCI_PHCI_LOCK(vh); 9534 ph = vh->vh_phci_head; 9535 while (ph) { 9536 MDI_PHCI_LOCK(ph); 9537 9538 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE) 9539 next = ph->ph_next; 9540 else 9541 next = NULL; 9542 9543 MDI_PHCI_UNLOCK(ph); 9544 ph = next; 9545 } 9546 MDI_VHCI_PHCI_UNLOCK(vh); 9547 } 9548 9549 9550 /* 9551 * mdi_walk_vhcis(): 9552 * Walker routine to traverse vhci dev_info nodes 9553 */ 9554 void 9555 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg) 9556 { 9557 mdi_vhci_t *vh = NULL; 9558 9559 mutex_enter(&mdi_mutex); 9560 /* 9561 * Scan for already registered vhci 9562 */ 9563 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) { 9564 vh->vh_refcnt++; 9565 mutex_exit(&mdi_mutex); 9566 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) { 9567 mutex_enter(&mdi_mutex); 9568 vh->vh_refcnt--; 9569 break; 9570 } else { 9571 mutex_enter(&mdi_mutex); 9572 vh->vh_refcnt--; 9573 } 9574 } 9575 9576 mutex_exit(&mdi_mutex); 9577 } 9578 9579 /* 9580 * i_mdi_log_sysevent(): 9581 * Logs events for pickup by syseventd 9582 */ 9583 static void 9584 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass) 9585 { 9586 char *path_name; 9587 nvlist_t *attr_list; 9588 9589 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, 9590 KM_SLEEP) != DDI_SUCCESS) { 9591 goto alloc_failed; 9592 } 9593 9594 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 9595 (void) ddi_pathname(dip, path_name); 9596 9597 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME, 9598 ddi_driver_name(dip)) != DDI_SUCCESS) { 9599 goto error; 9600 } 9601 9602 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR, 9603 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) { 9604 goto error; 9605 } 9606 9607 if (nvlist_add_int32(attr_list, DDI_INSTANCE, 9608 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) { 9609 goto error; 9610 } 9611 9612 if (nvlist_add_string(attr_list, DDI_PATHNAME, 9613 path_name) != DDI_SUCCESS) { 9614 goto error; 9615 } 9616 9617 if (nvlist_add_string(attr_list, DDI_CLASS, 9618 ph_vh_class) != DDI_SUCCESS) { 9619 goto error; 9620 } 9621 9622 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass, 9623 attr_list, NULL, DDI_SLEEP); 9624 9625 error: 9626 kmem_free(path_name, MAXPATHLEN); 9627 nvlist_free(attr_list); 9628 return; 9629 9630 alloc_failed: 9631 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent")); 9632 } 9633 9634 char ** 9635 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers) 9636 { 9637 char **driver_list, **ret_driver_list = NULL; 9638 int *root_support_list; 9639 int cur_elements, max_elements; 9640 9641 get_phci_driver_list(vhci_class, &driver_list, &root_support_list, 9642 &cur_elements, &max_elements); 9643 9644 9645 if (driver_list) { 9646 kmem_free(root_support_list, sizeof (int) * max_elements); 9647 ret_driver_list = mdi_realloc(driver_list, sizeof (char *) 9648 * max_elements, sizeof (char *) * cur_elements); 9649 } 9650 *ndrivers = cur_elements; 9651 9652 return (ret_driver_list); 9653 9654 } 9655 9656 void 9657 mdi_free_phci_driver_list(char **driver_list, int ndrivers) 9658 { 9659 char **p; 9660 int i; 9661 9662 if (driver_list) { 9663 for (i = 0, p = driver_list; i < ndrivers; i++, p++) 9664 kmem_free(*p, strlen(*p) + 1); 9665 kmem_free(driver_list, sizeof (char *) * ndrivers); 9666 } 9667 } 9668 9669 /* 9670 * mdi_is_dev_supported(): 9671 * function called by pHCI bus config operation to determine if a 9672 * device should be represented as a child of the vHCI or the 9673 * pHCI. This decision is made by the vHCI, using cinfo idenity 9674 * information passed by the pHCI - specifics of the cinfo 9675 * representation are by agreement between the pHCI and vHCI. 9676 * Return Values: 9677 * MDI_SUCCESS 9678 * MDI_FAILURE 9679 */ 9680 int 9681 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo) 9682 { 9683 mdi_vhci_t *vh; 9684 9685 ASSERT(class && pdip); 9686 9687 /* 9688 * For dev_supported, mdi_phci_register() must have established pdip as 9689 * a pHCI. 9690 * 9691 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and 9692 * MDI_PHCI(pdip) will return false if mpxio is disabled. 9693 */ 9694 if (!MDI_PHCI(pdip)) 9695 return (MDI_FAILURE); 9696 9697 /* Return MDI_FAILURE if vHCI does not support asking the question. */ 9698 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class); 9699 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) { 9700 return (MDI_FAILURE); 9701 } 9702 9703 /* Return vHCI answer */ 9704 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo)); 9705 } 9706 9707 int 9708 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp) 9709 { 9710 uint_t devstate = 0; 9711 dev_info_t *cdip; 9712 9713 if ((pip == NULL) || (dcp == NULL)) 9714 return (MDI_FAILURE); 9715 9716 cdip = mdi_pi_get_client(pip); 9717 9718 switch (mdi_pi_get_state(pip)) { 9719 case MDI_PATHINFO_STATE_INIT: 9720 devstate = DEVICE_DOWN; 9721 break; 9722 case MDI_PATHINFO_STATE_ONLINE: 9723 devstate = DEVICE_ONLINE; 9724 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED)) 9725 devstate |= DEVICE_BUSY; 9726 break; 9727 case MDI_PATHINFO_STATE_STANDBY: 9728 devstate = DEVICE_ONLINE; 9729 break; 9730 case MDI_PATHINFO_STATE_FAULT: 9731 devstate = DEVICE_DOWN; 9732 break; 9733 case MDI_PATHINFO_STATE_OFFLINE: 9734 devstate = DEVICE_OFFLINE; 9735 break; 9736 default: 9737 ASSERT(MDI_PI(pip)->pi_state); 9738 } 9739 9740 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0) 9741 return (MDI_FAILURE); 9742 9743 return (MDI_SUCCESS); 9744 }