1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved.
25 */
26
27 /*
28 * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
29 * more detailed discussion of the overall mpxio architecture.
30 */
31
32 #include <sys/note.h>
33 #include <sys/types.h>
34 #include <sys/varargs.h>
35 #include <sys/param.h>
36 #include <sys/errno.h>
37 #include <sys/uio.h>
38 #include <sys/buf.h>
39 #include <sys/modctl.h>
40 #include <sys/open.h>
41 #include <sys/kmem.h>
42 #include <sys/poll.h>
43 #include <sys/conf.h>
44 #include <sys/bootconf.h>
45 #include <sys/cmn_err.h>
46 #include <sys/stat.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/ddipropdefs.h>
50 #include <sys/sunndi.h>
51 #include <sys/ndi_impldefs.h>
52 #include <sys/promif.h>
53 #include <sys/sunmdi.h>
54 #include <sys/mdi_impldefs.h>
55 #include <sys/taskq.h>
56 #include <sys/epm.h>
57 #include <sys/sunpm.h>
58 #include <sys/modhash.h>
59 #include <sys/disp.h>
60 #include <sys/autoconf.h>
61 #include <sys/sysmacros.h>
62
63 #ifdef DEBUG
64 #include <sys/debug.h>
65 int mdi_debug = 1;
66 int mdi_debug_logonly = 0;
67 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs
68 #define MDI_WARN CE_WARN, __func__
69 #define MDI_NOTE CE_NOTE, __func__
70 #define MDI_CONT CE_CONT, __func__
71 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
72 #else /* !DEBUG */
73 #define MDI_DEBUG(dbglevel, pargs)
74 #endif /* DEBUG */
75 int mdi_debug_consoleonly = 0;
76 int mdi_delay = 3;
77
78 extern pri_t minclsyspri;
79 extern int modrootloaded;
80
81 /*
82 * Global mutex:
83 * Protects vHCI list and structure members.
84 */
85 kmutex_t mdi_mutex;
86
87 /*
88 * Registered vHCI class driver lists
89 */
90 int mdi_vhci_count;
91 mdi_vhci_t *mdi_vhci_head;
92 mdi_vhci_t *mdi_vhci_tail;
93
94 /*
95 * Client Hash Table size
96 */
97 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
98
99 /*
100 * taskq interface definitions
101 */
102 #define MDI_TASKQ_N_THREADS 8
103 #define MDI_TASKQ_PRI minclsyspri
104 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads)
105 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads)
106
107 taskq_t *mdi_taskq;
108 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
109
110 #define TICKS_PER_SECOND (drv_usectohz(1000000))
111
112 /*
113 * The data should be "quiet" for this interval (in seconds) before the
114 * vhci cached data is flushed to the disk.
115 */
116 static int mdi_vhcache_flush_delay = 10;
117
118 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
119 static int mdi_vhcache_flush_daemon_idle_time = 60;
120
121 /*
122 * MDI falls back to discovery of all paths when a bus_config_one fails.
123 * The following parameters can be used to tune this operation.
124 *
125 * mdi_path_discovery_boot
126 * Number of times path discovery will be attempted during early boot.
127 * Probably there is no reason to ever set this value to greater than one.
128 *
129 * mdi_path_discovery_postboot
130 * Number of times path discovery will be attempted after early boot.
131 * Set it to a minimum of two to allow for discovery of iscsi paths which
132 * may happen very late during booting.
133 *
134 * mdi_path_discovery_interval
135 * Minimum number of seconds MDI will wait between successive discovery
136 * of all paths. Set it to -1 to disable discovery of all paths.
137 */
138 static int mdi_path_discovery_boot = 1;
139 static int mdi_path_discovery_postboot = 2;
140 static int mdi_path_discovery_interval = 10;
141
142 /*
143 * number of seconds the asynchronous configuration thread will sleep idle
144 * before exiting.
145 */
146 static int mdi_async_config_idle_time = 600;
147
148 static int mdi_bus_config_cache_hash_size = 256;
149
150 /* turns off multithreaded configuration for certain operations */
151 static int mdi_mtc_off = 0;
152
153 /*
154 * The "path" to a pathinfo node is identical to the /devices path to a
155 * devinfo node had the device been enumerated under a pHCI instead of
156 * a vHCI. This pathinfo "path" is associated with a 'path_instance'.
157 * This association persists across create/delete of the pathinfo nodes,
158 * but not across reboot.
159 */
160 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */
161 static int mdi_pathmap_hash_size = 256;
162 static kmutex_t mdi_pathmap_mutex;
163 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */
164 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */
165 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */
166
167 /*
168 * MDI component property name/value string definitions
169 */
170 const char *mdi_component_prop = "mpxio-component";
171 const char *mdi_component_prop_vhci = "vhci";
172 const char *mdi_component_prop_phci = "phci";
173 const char *mdi_component_prop_client = "client";
174
175 /*
176 * MDI client global unique identifier property name
177 */
178 const char *mdi_client_guid_prop = "client-guid";
179
180 /*
181 * MDI client load balancing property name/value string definitions
182 */
183 const char *mdi_load_balance = "load-balance";
184 const char *mdi_load_balance_none = "none";
185 const char *mdi_load_balance_rr = "round-robin";
186 const char *mdi_load_balance_lba = "logical-block";
187
188 /*
189 * Obsolete vHCI class definition; to be removed after Leadville update
190 */
191 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
192
193 static char vhci_greeting[] =
194 "\tThere already exists one vHCI driver for class %s\n"
195 "\tOnly one vHCI driver for each class is allowed\n";
196
197 /*
198 * Static function prototypes
199 */
200 static int i_mdi_phci_offline(dev_info_t *, uint_t);
201 static int i_mdi_client_offline(dev_info_t *, uint_t);
202 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
203 static void i_mdi_phci_post_detach(dev_info_t *,
204 ddi_detach_cmd_t, int);
205 static int i_mdi_client_pre_detach(dev_info_t *,
206 ddi_detach_cmd_t);
207 static void i_mdi_client_post_detach(dev_info_t *,
208 ddi_detach_cmd_t, int);
209 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *);
210 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *);
211 static int i_mdi_lba_lb(mdi_client_t *ct,
212 mdi_pathinfo_t **ret_pip, struct buf *buf);
213 static void i_mdi_pm_hold_client(mdi_client_t *, int);
214 static void i_mdi_pm_rele_client(mdi_client_t *, int);
215 static void i_mdi_pm_reset_client(mdi_client_t *);
216 static int i_mdi_power_all_phci(mdi_client_t *);
217 static void i_mdi_log_sysevent(dev_info_t *, char *, char *);
218
219
220 /*
221 * Internal mdi_pathinfo node functions
222 */
223 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
224
225 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *);
226 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *);
227 static mdi_phci_t *i_devi_get_phci(dev_info_t *);
228 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
229 static void i_mdi_phci_unlock(mdi_phci_t *);
230 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
231 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
232 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
233 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
234 mdi_client_t *);
235 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
236 static void i_mdi_client_remove_path(mdi_client_t *,
237 mdi_pathinfo_t *);
238
239 static int i_mdi_pi_state_change(mdi_pathinfo_t *,
240 mdi_pathinfo_state_t, int);
241 static int i_mdi_pi_offline(mdi_pathinfo_t *, int);
242 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
243 char **, int);
244 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
245 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
246 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *);
247 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
248 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
249 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
250 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *);
251 static void i_mdi_client_update_state(mdi_client_t *);
252 static int i_mdi_client_compute_state(mdi_client_t *,
253 mdi_phci_t *);
254 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
255 static void i_mdi_client_unlock(mdi_client_t *);
256 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
257 static mdi_client_t *i_devi_get_client(dev_info_t *);
258 /*
259 * NOTE: this will be removed once the NWS files are changed to use the new
260 * mdi_{enable,disable}_path interfaces
261 */
262 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
263 int, int);
264 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
265 mdi_vhci_t *vh, int flags, int op);
266 /*
267 * Failover related function prototypes
268 */
269 static int i_mdi_failover(void *);
270
271 /*
272 * misc internal functions
273 */
274 static int i_mdi_get_hash_key(char *);
275 static int i_map_nvlist_error_to_mdi(int);
276 static void i_mdi_report_path_state(mdi_client_t *,
277 mdi_pathinfo_t *);
278
279 static void setup_vhci_cache(mdi_vhci_t *);
280 static int destroy_vhci_cache(mdi_vhci_t *);
281 static int stop_vhcache_async_threads(mdi_vhci_config_t *);
282 static boolean_t stop_vhcache_flush_thread(void *, int);
283 static void free_string_array(char **, int);
284 static void free_vhcache_phci(mdi_vhcache_phci_t *);
285 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
286 static void free_vhcache_client(mdi_vhcache_client_t *);
287 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
288 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *);
289 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
290 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
291 static void vhcache_pi_add(mdi_vhci_config_t *,
292 struct mdi_pathinfo *);
293 static void vhcache_pi_remove(mdi_vhci_config_t *,
294 struct mdi_pathinfo *);
295 static void free_phclient_path_list(mdi_phys_path_t *);
296 static void sort_vhcache_paths(mdi_vhcache_client_t *);
297 static int flush_vhcache(mdi_vhci_config_t *, int);
298 static void vhcache_dirty(mdi_vhci_config_t *);
299 static void free_async_client_config(mdi_async_client_config_t *);
300 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *);
301 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *);
302 static nvlist_t *read_on_disk_vhci_cache(char *);
303 extern int fread_nvlist(char *, nvlist_t **);
304 extern int fwrite_nvlist(char *, nvlist_t *);
305
306 /* called once when first vhci registers with mdi */
307 static void
308 i_mdi_init()
309 {
310 static int initialized = 0;
311
312 if (initialized)
313 return;
314 initialized = 1;
315
316 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
317
318 /* Create our taskq resources */
319 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
320 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
321 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
322 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */
323
324 /* Allocate ['path_instance' <-> "path"] maps */
325 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
326 mdi_pathmap_bypath = mod_hash_create_strhash(
327 "mdi_pathmap_bypath", mdi_pathmap_hash_size,
328 mod_hash_null_valdtor);
329 mdi_pathmap_byinstance = mod_hash_create_idhash(
330 "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
331 mod_hash_null_valdtor);
332 mdi_pathmap_sbyinstance = mod_hash_create_idhash(
333 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
334 mod_hash_null_valdtor);
335 }
336
337 /*
338 * mdi_get_component_type():
339 * Return mpxio component type
340 * Return Values:
341 * MDI_COMPONENT_NONE
342 * MDI_COMPONENT_VHCI
343 * MDI_COMPONENT_PHCI
344 * MDI_COMPONENT_CLIENT
345 * XXX This doesn't work under multi-level MPxIO and should be
346 * removed when clients migrate mdi_component_is_*() interfaces.
347 */
348 int
349 mdi_get_component_type(dev_info_t *dip)
350 {
351 return (DEVI(dip)->devi_mdi_component);
352 }
353
354 /*
355 * mdi_vhci_register():
356 * Register a vHCI module with the mpxio framework
357 * mdi_vhci_register() is called by vHCI drivers to register the
358 * 'class_driver' vHCI driver and its MDI entrypoints with the
359 * mpxio framework. The vHCI driver must call this interface as
360 * part of its attach(9e) handler.
361 * Competing threads may try to attach mdi_vhci_register() as
362 * the vHCI drivers are loaded and attached as a result of pHCI
363 * driver instance registration (mdi_phci_register()) with the
364 * framework.
365 * Return Values:
366 * MDI_SUCCESS
367 * MDI_FAILURE
368 */
369 /*ARGSUSED*/
370 int
371 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
372 int flags)
373 {
374 mdi_vhci_t *vh = NULL;
375
376 /* Registrant can't be older */
377 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
378
379 #ifdef DEBUG
380 /*
381 * IB nexus driver is loaded only when IB hardware is present.
382 * In order to be able to do this there is a need to drive the loading
383 * and attaching of the IB nexus driver (especially when an IB hardware
384 * is dynamically plugged in) when an IB HCA driver (PHCI)
385 * is being attached. Unfortunately this gets into the limitations
386 * of devfs as there seems to be no clean way to drive configuration
387 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
388 * for IB.
389 */
390 if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
391 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
392 #endif
393
394 i_mdi_init();
395
396 mutex_enter(&mdi_mutex);
397 /*
398 * Scan for already registered vhci
399 */
400 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
401 if (strcmp(vh->vh_class, class) == 0) {
402 /*
403 * vHCI has already been created. Check for valid
404 * vHCI ops registration. We only support one vHCI
405 * module per class
406 */
407 if (vh->vh_ops != NULL) {
408 mutex_exit(&mdi_mutex);
409 cmn_err(CE_NOTE, vhci_greeting, class);
410 return (MDI_FAILURE);
411 }
412 break;
413 }
414 }
415
416 /*
417 * if not yet created, create the vHCI component
418 */
419 if (vh == NULL) {
420 struct client_hash *hash = NULL;
421 char *load_balance;
422
423 /*
424 * Allocate and initialize the mdi extensions
425 */
426 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
427 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
428 KM_SLEEP);
429 vh->vh_client_table = hash;
430 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
431 (void) strcpy(vh->vh_class, class);
432 vh->vh_lb = LOAD_BALANCE_RR;
433 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
434 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
435 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
436 vh->vh_lb = LOAD_BALANCE_NONE;
437 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
438 == 0) {
439 vh->vh_lb = LOAD_BALANCE_LBA;
440 }
441 ddi_prop_free(load_balance);
442 }
443
444 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
445 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
446
447 /*
448 * Store the vHCI ops vectors
449 */
450 vh->vh_dip = vdip;
451 vh->vh_ops = vops;
452
453 setup_vhci_cache(vh);
454
455 if (mdi_vhci_head == NULL) {
456 mdi_vhci_head = vh;
457 }
458 if (mdi_vhci_tail) {
459 mdi_vhci_tail->vh_next = vh;
460 }
461 mdi_vhci_tail = vh;
462 mdi_vhci_count++;
463 }
464
465 /*
466 * Claim the devfs node as a vhci component
467 */
468 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
469
470 /*
471 * Initialize our back reference from dev_info node
472 */
473 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
474 mutex_exit(&mdi_mutex);
475 return (MDI_SUCCESS);
476 }
477
478 /*
479 * mdi_vhci_unregister():
480 * Unregister a vHCI module from mpxio framework
481 * mdi_vhci_unregister() is called from the detach(9E) entrypoint
482 * of a vhci to unregister it from the framework.
483 * Return Values:
484 * MDI_SUCCESS
485 * MDI_FAILURE
486 */
487 /*ARGSUSED*/
488 int
489 mdi_vhci_unregister(dev_info_t *vdip, int flags)
490 {
491 mdi_vhci_t *found, *vh, *prev = NULL;
492
493 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
494
495 /*
496 * Check for invalid VHCI
497 */
498 if ((vh = i_devi_get_vhci(vdip)) == NULL)
499 return (MDI_FAILURE);
500
501 /*
502 * Scan the list of registered vHCIs for a match
503 */
504 mutex_enter(&mdi_mutex);
505 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
506 if (found == vh)
507 break;
508 prev = found;
509 }
510
511 if (found == NULL) {
512 mutex_exit(&mdi_mutex);
513 return (MDI_FAILURE);
514 }
515
516 /*
517 * Check the vHCI, pHCI and client count. All the pHCIs and clients
518 * should have been unregistered, before a vHCI can be
519 * unregistered.
520 */
521 MDI_VHCI_PHCI_LOCK(vh);
522 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
523 MDI_VHCI_PHCI_UNLOCK(vh);
524 mutex_exit(&mdi_mutex);
525 return (MDI_FAILURE);
526 }
527 MDI_VHCI_PHCI_UNLOCK(vh);
528
529 if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
530 mutex_exit(&mdi_mutex);
531 return (MDI_FAILURE);
532 }
533
534 /*
535 * Remove the vHCI from the global list
536 */
537 if (vh == mdi_vhci_head) {
538 mdi_vhci_head = vh->vh_next;
539 } else {
540 prev->vh_next = vh->vh_next;
541 }
542 if (vh == mdi_vhci_tail) {
543 mdi_vhci_tail = prev;
544 }
545 mdi_vhci_count--;
546 mutex_exit(&mdi_mutex);
547
548 vh->vh_ops = NULL;
549 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
550 DEVI(vdip)->devi_mdi_xhci = NULL;
551 kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
552 kmem_free(vh->vh_client_table,
553 mdi_client_table_size * sizeof (struct client_hash));
554 mutex_destroy(&vh->vh_phci_mutex);
555 mutex_destroy(&vh->vh_client_mutex);
556
557 kmem_free(vh, sizeof (mdi_vhci_t));
558 return (MDI_SUCCESS);
559 }
560
561 /*
562 * i_mdi_vhci_class2vhci():
563 * Look for a matching vHCI module given a vHCI class name
564 * Return Values:
565 * Handle to a vHCI component
566 * NULL
567 */
568 static mdi_vhci_t *
569 i_mdi_vhci_class2vhci(char *class)
570 {
571 mdi_vhci_t *vh = NULL;
572
573 ASSERT(!MUTEX_HELD(&mdi_mutex));
574
575 mutex_enter(&mdi_mutex);
576 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
577 if (strcmp(vh->vh_class, class) == 0) {
578 break;
579 }
580 }
581 mutex_exit(&mdi_mutex);
582 return (vh);
583 }
584
585 /*
586 * i_devi_get_vhci():
587 * Utility function to get the handle to a vHCI component
588 * Return Values:
589 * Handle to a vHCI component
590 * NULL
591 */
592 mdi_vhci_t *
593 i_devi_get_vhci(dev_info_t *vdip)
594 {
595 mdi_vhci_t *vh = NULL;
596 if (MDI_VHCI(vdip)) {
597 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
598 }
599 return (vh);
600 }
601
602 /*
603 * mdi_phci_register():
604 * Register a pHCI module with mpxio framework
605 * mdi_phci_register() is called by pHCI drivers to register with
606 * the mpxio framework and a specific 'class_driver' vHCI. The
607 * pHCI driver must call this interface as part of its attach(9e)
608 * handler.
609 * Return Values:
610 * MDI_SUCCESS
611 * MDI_FAILURE
612 */
613 /*ARGSUSED*/
614 int
615 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
616 {
617 mdi_phci_t *ph;
618 mdi_vhci_t *vh;
619 char *data;
620
621 /*
622 * Some subsystems, like fcp, perform pHCI registration from a
623 * different thread than the one doing the pHCI attach(9E) - the
624 * driver attach code is waiting for this other thread to complete.
625 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
626 * (indicating that some thread has done an ndi_devi_enter of parent)
627 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
628 */
629 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
630
631 /*
632 * Check for mpxio-disable property. Enable mpxio if the property is
633 * missing or not set to "yes".
634 * If the property is set to "yes" then emit a brief message.
635 */
636 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
637 &data) == DDI_SUCCESS)) {
638 if (strcmp(data, "yes") == 0) {
639 MDI_DEBUG(1, (MDI_CONT, pdip,
640 "?multipath capabilities disabled via %s.conf.",
641 ddi_driver_name(pdip)));
642 ddi_prop_free(data);
643 return (MDI_FAILURE);
644 }
645 ddi_prop_free(data);
646 }
647
648 /*
649 * Search for a matching vHCI
650 */
651 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
652 if (vh == NULL) {
653 return (MDI_FAILURE);
654 }
655
656 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
657 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
658 ph->ph_dip = pdip;
659 ph->ph_vhci = vh;
660 ph->ph_next = NULL;
661 ph->ph_unstable = 0;
662 ph->ph_vprivate = 0;
663 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
664
665 MDI_PHCI_LOCK(ph);
666 MDI_PHCI_SET_POWER_UP(ph);
667 MDI_PHCI_UNLOCK(ph);
668 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
669 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
670
671 vhcache_phci_add(vh->vh_config, ph);
672
673 MDI_VHCI_PHCI_LOCK(vh);
674 if (vh->vh_phci_head == NULL) {
675 vh->vh_phci_head = ph;
676 }
677 if (vh->vh_phci_tail) {
678 vh->vh_phci_tail->ph_next = ph;
679 }
680 vh->vh_phci_tail = ph;
681 vh->vh_phci_count++;
682 MDI_VHCI_PHCI_UNLOCK(vh);
683
684 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
685 return (MDI_SUCCESS);
686 }
687
688 /*
689 * mdi_phci_unregister():
690 * Unregister a pHCI module from mpxio framework
691 * mdi_phci_unregister() is called by the pHCI drivers from their
692 * detach(9E) handler to unregister their instances from the
693 * framework.
694 * Return Values:
695 * MDI_SUCCESS
696 * MDI_FAILURE
697 */
698 /*ARGSUSED*/
699 int
700 mdi_phci_unregister(dev_info_t *pdip, int flags)
701 {
702 mdi_vhci_t *vh;
703 mdi_phci_t *ph;
704 mdi_phci_t *tmp;
705 mdi_phci_t *prev = NULL;
706 mdi_pathinfo_t *pip;
707
708 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
709
710 ph = i_devi_get_phci(pdip);
711 if (ph == NULL) {
712 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
713 return (MDI_FAILURE);
714 }
715
716 vh = ph->ph_vhci;
717 ASSERT(vh != NULL);
718 if (vh == NULL) {
719 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
720 return (MDI_FAILURE);
721 }
722
723 MDI_VHCI_PHCI_LOCK(vh);
724 tmp = vh->vh_phci_head;
725 while (tmp) {
726 if (tmp == ph) {
727 break;
728 }
729 prev = tmp;
730 tmp = tmp->ph_next;
731 }
732
733 if (ph == vh->vh_phci_head) {
734 vh->vh_phci_head = ph->ph_next;
735 } else {
736 prev->ph_next = ph->ph_next;
737 }
738
739 if (ph == vh->vh_phci_tail) {
740 vh->vh_phci_tail = prev;
741 }
742
743 vh->vh_phci_count--;
744 MDI_VHCI_PHCI_UNLOCK(vh);
745
746 /* Walk remaining pathinfo nodes and disassociate them from pHCI */
747 MDI_PHCI_LOCK(ph);
748 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
749 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
750 MDI_PI(pip)->pi_phci = NULL;
751 MDI_PHCI_UNLOCK(ph);
752
753 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
754 ESC_DDI_INITIATOR_UNREGISTER);
755 vhcache_phci_remove(vh->vh_config, ph);
756 cv_destroy(&ph->ph_unstable_cv);
757 mutex_destroy(&ph->ph_mutex);
758 kmem_free(ph, sizeof (mdi_phci_t));
759 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
760 DEVI(pdip)->devi_mdi_xhci = NULL;
761 return (MDI_SUCCESS);
762 }
763
764 /*
765 * i_devi_get_phci():
766 * Utility function to return the phci extensions.
767 */
768 static mdi_phci_t *
769 i_devi_get_phci(dev_info_t *pdip)
770 {
771 mdi_phci_t *ph = NULL;
772
773 if (MDI_PHCI(pdip)) {
774 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
775 }
776 return (ph);
777 }
778
779 /*
780 * Single thread mdi entry into devinfo node for modifying its children.
781 * If necessary we perform an ndi_devi_enter of the vHCI before doing
782 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one
783 * for the vHCI and one for the pHCI.
784 */
785 void
786 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
787 {
788 dev_info_t *vdip;
789 int vcircular, pcircular;
790
791 /* Verify calling context */
792 ASSERT(MDI_PHCI(phci_dip));
793 vdip = mdi_devi_get_vdip(phci_dip);
794 ASSERT(vdip); /* A pHCI always has a vHCI */
795
796 /*
797 * If pHCI is detaching then the framework has already entered the
798 * vHCI on a threads that went down the code path leading to
799 * detach_node(). This framework enter of the vHCI during pHCI
800 * detach is done to avoid deadlock with vHCI power management
801 * operations which enter the vHCI and the enter down the path
802 * to the pHCI. If pHCI is detaching then we piggyback this calls
803 * enter of the vHCI on frameworks vHCI enter that has already
804 * occurred - this is OK because we know that the framework thread
805 * doing detach is waiting for our completion.
806 *
807 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
808 * race with detach - but we can't do that because the framework has
809 * already entered the parent, so we have some complexity instead.
810 */
811 for (;;) {
812 if (ndi_devi_tryenter(vdip, &vcircular)) {
813 ASSERT(vcircular != -1);
814 if (DEVI_IS_DETACHING(phci_dip)) {
815 ndi_devi_exit(vdip, vcircular);
816 vcircular = -1;
817 }
818 break;
819 } else if (DEVI_IS_DETACHING(phci_dip)) {
820 vcircular = -1;
821 break;
822 } else if (servicing_interrupt()) {
823 /*
824 * Don't delay an interrupt (and ensure adaptive
825 * mutex inversion support).
826 */
827 ndi_devi_enter(vdip, &vcircular);
828 break;
829 } else {
830 delay_random(mdi_delay);
831 }
832 }
833
834 ndi_devi_enter(phci_dip, &pcircular);
835 *circular = (vcircular << 16) | (pcircular & 0xFFFF);
836 }
837
838 /*
839 * Attempt to mdi_devi_enter.
840 */
841 int
842 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
843 {
844 dev_info_t *vdip;
845 int vcircular, pcircular;
846
847 /* Verify calling context */
848 ASSERT(MDI_PHCI(phci_dip));
849 vdip = mdi_devi_get_vdip(phci_dip);
850 ASSERT(vdip); /* A pHCI always has a vHCI */
851
852 if (ndi_devi_tryenter(vdip, &vcircular)) {
853 if (ndi_devi_tryenter(phci_dip, &pcircular)) {
854 *circular = (vcircular << 16) | (pcircular & 0xFFFF);
855 return (1); /* locked */
856 }
857 ndi_devi_exit(vdip, vcircular);
858 }
859 return (0); /* busy */
860 }
861
862 /*
863 * Release mdi_devi_enter or successful mdi_devi_tryenter.
864 */
865 void
866 mdi_devi_exit(dev_info_t *phci_dip, int circular)
867 {
868 dev_info_t *vdip;
869 int vcircular, pcircular;
870
871 /* Verify calling context */
872 ASSERT(MDI_PHCI(phci_dip));
873 vdip = mdi_devi_get_vdip(phci_dip);
874 ASSERT(vdip); /* A pHCI always has a vHCI */
875
876 /* extract two circular recursion values from single int */
877 pcircular = (short)(circular & 0xFFFF);
878 vcircular = (short)((circular >> 16) & 0xFFFF);
879
880 ndi_devi_exit(phci_dip, pcircular);
881 if (vcircular != -1)
882 ndi_devi_exit(vdip, vcircular);
883 }
884
885 /*
886 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
887 * around a pHCI drivers calls to mdi_pi_online/offline, after holding
888 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
889 * with vHCI power management code during path online/offline. Each
890 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
891 * occur within the scope of an active mdi_devi_enter that establishes the
892 * circular value.
893 */
894 void
895 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
896 {
897 int pcircular;
898
899 /* Verify calling context */
900 ASSERT(MDI_PHCI(phci_dip));
901
902 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
903 ndi_hold_devi(phci_dip);
904
905 pcircular = (short)(circular & 0xFFFF);
906 ndi_devi_exit(phci_dip, pcircular);
907 }
908
909 void
910 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
911 {
912 int pcircular;
913
914 /* Verify calling context */
915 ASSERT(MDI_PHCI(phci_dip));
916
917 ndi_devi_enter(phci_dip, &pcircular);
918
919 /* Drop hold from mdi_devi_exit_phci. */
920 ndi_rele_devi(phci_dip);
921
922 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
923 ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
924 }
925
926 /*
927 * mdi_devi_get_vdip():
928 * given a pHCI dip return vHCI dip
929 */
930 dev_info_t *
931 mdi_devi_get_vdip(dev_info_t *pdip)
932 {
933 mdi_phci_t *ph;
934
935 ph = i_devi_get_phci(pdip);
936 if (ph && ph->ph_vhci)
937 return (ph->ph_vhci->vh_dip);
938 return (NULL);
939 }
940
941 /*
942 * mdi_devi_pdip_entered():
943 * Return 1 if we are vHCI and have done an ndi_devi_enter
944 * of a pHCI
945 */
946 int
947 mdi_devi_pdip_entered(dev_info_t *vdip)
948 {
949 mdi_vhci_t *vh;
950 mdi_phci_t *ph;
951
952 vh = i_devi_get_vhci(vdip);
953 if (vh == NULL)
954 return (0);
955
956 MDI_VHCI_PHCI_LOCK(vh);
957 ph = vh->vh_phci_head;
958 while (ph) {
959 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
960 MDI_VHCI_PHCI_UNLOCK(vh);
961 return (1);
962 }
963 ph = ph->ph_next;
964 }
965 MDI_VHCI_PHCI_UNLOCK(vh);
966 return (0);
967 }
968
969 /*
970 * mdi_phci_path2devinfo():
971 * Utility function to search for a valid phci device given
972 * the devfs pathname.
973 */
974 dev_info_t *
975 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
976 {
977 char *temp_pathname;
978 mdi_vhci_t *vh;
979 mdi_phci_t *ph;
980 dev_info_t *pdip = NULL;
981
982 vh = i_devi_get_vhci(vdip);
983 ASSERT(vh != NULL);
984
985 if (vh == NULL) {
986 /*
987 * Invalid vHCI component, return failure
988 */
989 return (NULL);
990 }
991
992 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
993 MDI_VHCI_PHCI_LOCK(vh);
994 ph = vh->vh_phci_head;
995 while (ph != NULL) {
996 pdip = ph->ph_dip;
997 ASSERT(pdip != NULL);
998 *temp_pathname = '\0';
999 (void) ddi_pathname(pdip, temp_pathname);
1000 if (strcmp(temp_pathname, pathname) == 0) {
1001 break;
1002 }
1003 ph = ph->ph_next;
1004 }
1005 if (ph == NULL) {
1006 pdip = NULL;
1007 }
1008 MDI_VHCI_PHCI_UNLOCK(vh);
1009 kmem_free(temp_pathname, MAXPATHLEN);
1010 return (pdip);
1011 }
1012
1013 /*
1014 * mdi_phci_get_path_count():
1015 * get number of path information nodes associated with a given
1016 * pHCI device.
1017 */
1018 int
1019 mdi_phci_get_path_count(dev_info_t *pdip)
1020 {
1021 mdi_phci_t *ph;
1022 int count = 0;
1023
1024 ph = i_devi_get_phci(pdip);
1025 if (ph != NULL) {
1026 count = ph->ph_path_count;
1027 }
1028 return (count);
1029 }
1030
1031 /*
1032 * i_mdi_phci_lock():
1033 * Lock a pHCI device
1034 * Return Values:
1035 * None
1036 * Note:
1037 * The default locking order is:
1038 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1039 * But there are number of situations where locks need to be
1040 * grabbed in reverse order. This routine implements try and lock
1041 * mechanism depending on the requested parameter option.
1042 */
1043 static void
1044 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1045 {
1046 if (pip) {
1047 /* Reverse locking is requested. */
1048 while (MDI_PHCI_TRYLOCK(ph) == 0) {
1049 if (servicing_interrupt()) {
1050 MDI_PI_HOLD(pip);
1051 MDI_PI_UNLOCK(pip);
1052 MDI_PHCI_LOCK(ph);
1053 MDI_PI_LOCK(pip);
1054 MDI_PI_RELE(pip);
1055 break;
1056 } else {
1057 /*
1058 * tryenter failed. Try to grab again
1059 * after a small delay
1060 */
1061 MDI_PI_HOLD(pip);
1062 MDI_PI_UNLOCK(pip);
1063 delay_random(mdi_delay);
1064 MDI_PI_LOCK(pip);
1065 MDI_PI_RELE(pip);
1066 }
1067 }
1068 } else {
1069 MDI_PHCI_LOCK(ph);
1070 }
1071 }
1072
1073 /*
1074 * i_mdi_phci_unlock():
1075 * Unlock the pHCI component
1076 */
1077 static void
1078 i_mdi_phci_unlock(mdi_phci_t *ph)
1079 {
1080 MDI_PHCI_UNLOCK(ph);
1081 }
1082
1083 /*
1084 * i_mdi_devinfo_create():
1085 * create client device's devinfo node
1086 * Return Values:
1087 * dev_info
1088 * NULL
1089 * Notes:
1090 */
1091 static dev_info_t *
1092 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1093 char **compatible, int ncompatible)
1094 {
1095 dev_info_t *cdip = NULL;
1096
1097 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1098
1099 /* Verify for duplicate entry */
1100 cdip = i_mdi_devinfo_find(vh, name, guid);
1101 ASSERT(cdip == NULL);
1102 if (cdip) {
1103 cmn_err(CE_WARN,
1104 "i_mdi_devinfo_create: client %s@%s already exists",
1105 name ? name : "", guid ? guid : "");
1106 }
1107
1108 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1109 if (cdip == NULL)
1110 goto fail;
1111
1112 /*
1113 * Create component type and Global unique identifier
1114 * properties
1115 */
1116 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1117 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1118 goto fail;
1119 }
1120
1121 /* Decorate the node with compatible property */
1122 if (compatible &&
1123 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1124 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1125 goto fail;
1126 }
1127
1128 return (cdip);
1129
1130 fail:
1131 if (cdip) {
1132 (void) ndi_prop_remove_all(cdip);
1133 (void) ndi_devi_free(cdip);
1134 }
1135 return (NULL);
1136 }
1137
1138 /*
1139 * i_mdi_devinfo_find():
1140 * Find a matching devinfo node for given client node name
1141 * and its guid.
1142 * Return Values:
1143 * Handle to a dev_info node or NULL
1144 */
1145 static dev_info_t *
1146 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1147 {
1148 char *data;
1149 dev_info_t *cdip = NULL;
1150 dev_info_t *ndip = NULL;
1151 int circular;
1152
1153 ndi_devi_enter(vh->vh_dip, &circular);
1154 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1155 while ((cdip = ndip) != NULL) {
1156 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1157
1158 if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1159 continue;
1160 }
1161
1162 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1163 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1164 &data) != DDI_PROP_SUCCESS) {
1165 continue;
1166 }
1167
1168 if (strcmp(data, guid) != 0) {
1169 ddi_prop_free(data);
1170 continue;
1171 }
1172 ddi_prop_free(data);
1173 break;
1174 }
1175 ndi_devi_exit(vh->vh_dip, circular);
1176 return (cdip);
1177 }
1178
1179 /*
1180 * i_mdi_devinfo_remove():
1181 * Remove a client device node
1182 */
1183 static int
1184 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1185 {
1186 int rv = MDI_SUCCESS;
1187
1188 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1189 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1190 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1191 if (rv != NDI_SUCCESS) {
1192 MDI_DEBUG(1, (MDI_NOTE, cdip,
1193 "!failed: cdip %p", (void *)cdip));
1194 }
1195 /*
1196 * Convert to MDI error code
1197 */
1198 switch (rv) {
1199 case NDI_SUCCESS:
1200 rv = MDI_SUCCESS;
1201 break;
1202 case NDI_BUSY:
1203 rv = MDI_BUSY;
1204 break;
1205 default:
1206 rv = MDI_FAILURE;
1207 break;
1208 }
1209 }
1210 return (rv);
1211 }
1212
1213 /*
1214 * i_devi_get_client()
1215 * Utility function to get mpxio component extensions
1216 */
1217 static mdi_client_t *
1218 i_devi_get_client(dev_info_t *cdip)
1219 {
1220 mdi_client_t *ct = NULL;
1221
1222 if (MDI_CLIENT(cdip)) {
1223 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1224 }
1225 return (ct);
1226 }
1227
1228 /*
1229 * i_mdi_is_child_present():
1230 * Search for the presence of client device dev_info node
1231 */
1232 static int
1233 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1234 {
1235 int rv = MDI_FAILURE;
1236 struct dev_info *dip;
1237 int circular;
1238
1239 ndi_devi_enter(vdip, &circular);
1240 dip = DEVI(vdip)->devi_child;
1241 while (dip) {
1242 if (dip == DEVI(cdip)) {
1243 rv = MDI_SUCCESS;
1244 break;
1245 }
1246 dip = dip->devi_sibling;
1247 }
1248 ndi_devi_exit(vdip, circular);
1249 return (rv);
1250 }
1251
1252
1253 /*
1254 * i_mdi_client_lock():
1255 * Grab client component lock
1256 * Return Values:
1257 * None
1258 * Note:
1259 * The default locking order is:
1260 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1261 * But there are number of situations where locks need to be
1262 * grabbed in reverse order. This routine implements try and lock
1263 * mechanism depending on the requested parameter option.
1264 */
1265 static void
1266 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1267 {
1268 if (pip) {
1269 /*
1270 * Reverse locking is requested.
1271 */
1272 while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1273 if (servicing_interrupt()) {
1274 MDI_PI_HOLD(pip);
1275 MDI_PI_UNLOCK(pip);
1276 MDI_CLIENT_LOCK(ct);
1277 MDI_PI_LOCK(pip);
1278 MDI_PI_RELE(pip);
1279 break;
1280 } else {
1281 /*
1282 * tryenter failed. Try to grab again
1283 * after a small delay
1284 */
1285 MDI_PI_HOLD(pip);
1286 MDI_PI_UNLOCK(pip);
1287 delay_random(mdi_delay);
1288 MDI_PI_LOCK(pip);
1289 MDI_PI_RELE(pip);
1290 }
1291 }
1292 } else {
1293 MDI_CLIENT_LOCK(ct);
1294 }
1295 }
1296
1297 /*
1298 * i_mdi_client_unlock():
1299 * Unlock a client component
1300 */
1301 static void
1302 i_mdi_client_unlock(mdi_client_t *ct)
1303 {
1304 MDI_CLIENT_UNLOCK(ct);
1305 }
1306
1307 /*
1308 * i_mdi_client_alloc():
1309 * Allocate and initialize a client structure. Caller should
1310 * hold the vhci client lock.
1311 * Return Values:
1312 * Handle to a client component
1313 */
1314 /*ARGSUSED*/
1315 static mdi_client_t *
1316 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1317 {
1318 mdi_client_t *ct;
1319
1320 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1321
1322 /*
1323 * Allocate and initialize a component structure.
1324 */
1325 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1326 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1327 ct->ct_hnext = NULL;
1328 ct->ct_hprev = NULL;
1329 ct->ct_dip = NULL;
1330 ct->ct_vhci = vh;
1331 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1332 (void) strcpy(ct->ct_drvname, name);
1333 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1334 (void) strcpy(ct->ct_guid, lguid);
1335 ct->ct_cprivate = NULL;
1336 ct->ct_vprivate = NULL;
1337 ct->ct_flags = 0;
1338 ct->ct_state = MDI_CLIENT_STATE_FAILED;
1339 MDI_CLIENT_LOCK(ct);
1340 MDI_CLIENT_SET_OFFLINE(ct);
1341 MDI_CLIENT_SET_DETACH(ct);
1342 MDI_CLIENT_SET_POWER_UP(ct);
1343 MDI_CLIENT_UNLOCK(ct);
1344 ct->ct_failover_flags = 0;
1345 ct->ct_failover_status = 0;
1346 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1347 ct->ct_unstable = 0;
1348 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1349 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1350 ct->ct_lb = vh->vh_lb;
1351 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1352 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1353 ct->ct_path_count = 0;
1354 ct->ct_path_head = NULL;
1355 ct->ct_path_tail = NULL;
1356 ct->ct_path_last = NULL;
1357
1358 /*
1359 * Add this client component to our client hash queue
1360 */
1361 i_mdi_client_enlist_table(vh, ct);
1362 return (ct);
1363 }
1364
1365 /*
1366 * i_mdi_client_enlist_table():
1367 * Attach the client device to the client hash table. Caller
1368 * should hold the vhci client lock.
1369 */
1370 static void
1371 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1372 {
1373 int index;
1374 struct client_hash *head;
1375
1376 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1377
1378 index = i_mdi_get_hash_key(ct->ct_guid);
1379 head = &vh->vh_client_table[index];
1380 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1381 head->ct_hash_head = ct;
1382 head->ct_hash_count++;
1383 vh->vh_client_count++;
1384 }
1385
1386 /*
1387 * i_mdi_client_delist_table():
1388 * Attach the client device to the client hash table.
1389 * Caller should hold the vhci client lock.
1390 */
1391 static void
1392 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1393 {
1394 int index;
1395 char *guid;
1396 struct client_hash *head;
1397 mdi_client_t *next;
1398 mdi_client_t *last;
1399
1400 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1401
1402 guid = ct->ct_guid;
1403 index = i_mdi_get_hash_key(guid);
1404 head = &vh->vh_client_table[index];
1405
1406 last = NULL;
1407 next = (mdi_client_t *)head->ct_hash_head;
1408 while (next != NULL) {
1409 if (next == ct) {
1410 break;
1411 }
1412 last = next;
1413 next = next->ct_hnext;
1414 }
1415
1416 if (next) {
1417 head->ct_hash_count--;
1418 if (last == NULL) {
1419 head->ct_hash_head = ct->ct_hnext;
1420 } else {
1421 last->ct_hnext = ct->ct_hnext;
1422 }
1423 ct->ct_hnext = NULL;
1424 vh->vh_client_count--;
1425 }
1426 }
1427
1428
1429 /*
1430 * i_mdi_client_free():
1431 * Free a client component
1432 */
1433 static int
1434 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1435 {
1436 int rv = MDI_SUCCESS;
1437 int flags = ct->ct_flags;
1438 dev_info_t *cdip;
1439 dev_info_t *vdip;
1440
1441 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1442
1443 vdip = vh->vh_dip;
1444 cdip = ct->ct_dip;
1445
1446 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1447 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1448 DEVI(cdip)->devi_mdi_client = NULL;
1449
1450 /*
1451 * Clear out back ref. to dev_info_t node
1452 */
1453 ct->ct_dip = NULL;
1454
1455 /*
1456 * Remove this client from our hash queue
1457 */
1458 i_mdi_client_delist_table(vh, ct);
1459
1460 /*
1461 * Uninitialize and free the component
1462 */
1463 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1464 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1465 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1466 cv_destroy(&ct->ct_failover_cv);
1467 cv_destroy(&ct->ct_unstable_cv);
1468 cv_destroy(&ct->ct_powerchange_cv);
1469 mutex_destroy(&ct->ct_mutex);
1470 kmem_free(ct, sizeof (*ct));
1471
1472 if (cdip != NULL) {
1473 MDI_VHCI_CLIENT_UNLOCK(vh);
1474 (void) i_mdi_devinfo_remove(vdip, cdip, flags);
1475 MDI_VHCI_CLIENT_LOCK(vh);
1476 }
1477 return (rv);
1478 }
1479
1480 /*
1481 * i_mdi_client_find():
1482 * Find the client structure corresponding to a given guid
1483 * Caller should hold the vhci client lock.
1484 */
1485 static mdi_client_t *
1486 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1487 {
1488 int index;
1489 struct client_hash *head;
1490 mdi_client_t *ct;
1491
1492 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1493
1494 index = i_mdi_get_hash_key(guid);
1495 head = &vh->vh_client_table[index];
1496
1497 ct = head->ct_hash_head;
1498 while (ct != NULL) {
1499 if (strcmp(ct->ct_guid, guid) == 0 &&
1500 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1501 break;
1502 }
1503 ct = ct->ct_hnext;
1504 }
1505 return (ct);
1506 }
1507
1508 /*
1509 * i_mdi_client_update_state():
1510 * Compute and update client device state
1511 * Notes:
1512 * A client device can be in any of three possible states:
1513 *
1514 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1515 * one online/standby paths. Can tolerate failures.
1516 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1517 * no alternate paths available as standby. A failure on the online
1518 * would result in loss of access to device data.
1519 * MDI_CLIENT_STATE_FAILED - Client device in failed state with
1520 * no paths available to access the device.
1521 */
1522 static void
1523 i_mdi_client_update_state(mdi_client_t *ct)
1524 {
1525 int state;
1526
1527 ASSERT(MDI_CLIENT_LOCKED(ct));
1528 state = i_mdi_client_compute_state(ct, NULL);
1529 MDI_CLIENT_SET_STATE(ct, state);
1530 }
1531
1532 /*
1533 * i_mdi_client_compute_state():
1534 * Compute client device state
1535 *
1536 * mdi_phci_t * Pointer to pHCI structure which should
1537 * while computing the new value. Used by
1538 * i_mdi_phci_offline() to find the new
1539 * client state after DR of a pHCI.
1540 */
1541 static int
1542 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1543 {
1544 int state;
1545 int online_count = 0;
1546 int standby_count = 0;
1547 mdi_pathinfo_t *pip, *next;
1548
1549 ASSERT(MDI_CLIENT_LOCKED(ct));
1550 pip = ct->ct_path_head;
1551 while (pip != NULL) {
1552 MDI_PI_LOCK(pip);
1553 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1554 if (MDI_PI(pip)->pi_phci == ph) {
1555 MDI_PI_UNLOCK(pip);
1556 pip = next;
1557 continue;
1558 }
1559
1560 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1561 == MDI_PATHINFO_STATE_ONLINE)
1562 online_count++;
1563 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1564 == MDI_PATHINFO_STATE_STANDBY)
1565 standby_count++;
1566 MDI_PI_UNLOCK(pip);
1567 pip = next;
1568 }
1569
1570 if (online_count == 0) {
1571 if (standby_count == 0) {
1572 state = MDI_CLIENT_STATE_FAILED;
1573 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1574 "client state failed: ct = %p", (void *)ct));
1575 } else if (standby_count == 1) {
1576 state = MDI_CLIENT_STATE_DEGRADED;
1577 } else {
1578 state = MDI_CLIENT_STATE_OPTIMAL;
1579 }
1580 } else if (online_count == 1) {
1581 if (standby_count == 0) {
1582 state = MDI_CLIENT_STATE_DEGRADED;
1583 } else {
1584 state = MDI_CLIENT_STATE_OPTIMAL;
1585 }
1586 } else {
1587 state = MDI_CLIENT_STATE_OPTIMAL;
1588 }
1589 return (state);
1590 }
1591
1592 /*
1593 * i_mdi_client2devinfo():
1594 * Utility function
1595 */
1596 dev_info_t *
1597 i_mdi_client2devinfo(mdi_client_t *ct)
1598 {
1599 return (ct->ct_dip);
1600 }
1601
1602 /*
1603 * mdi_client_path2_devinfo():
1604 * Given the parent devinfo and child devfs pathname, search for
1605 * a valid devfs node handle.
1606 */
1607 dev_info_t *
1608 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1609 {
1610 dev_info_t *cdip = NULL;
1611 dev_info_t *ndip = NULL;
1612 char *temp_pathname;
1613 int circular;
1614
1615 /*
1616 * Allocate temp buffer
1617 */
1618 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1619
1620 /*
1621 * Lock parent against changes
1622 */
1623 ndi_devi_enter(vdip, &circular);
1624 ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1625 while ((cdip = ndip) != NULL) {
1626 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1627
1628 *temp_pathname = '\0';
1629 (void) ddi_pathname(cdip, temp_pathname);
1630 if (strcmp(temp_pathname, pathname) == 0) {
1631 break;
1632 }
1633 }
1634 /*
1635 * Release devinfo lock
1636 */
1637 ndi_devi_exit(vdip, circular);
1638
1639 /*
1640 * Free the temp buffer
1641 */
1642 kmem_free(temp_pathname, MAXPATHLEN);
1643 return (cdip);
1644 }
1645
1646 /*
1647 * mdi_client_get_path_count():
1648 * Utility function to get number of path information nodes
1649 * associated with a given client device.
1650 */
1651 int
1652 mdi_client_get_path_count(dev_info_t *cdip)
1653 {
1654 mdi_client_t *ct;
1655 int count = 0;
1656
1657 ct = i_devi_get_client(cdip);
1658 if (ct != NULL) {
1659 count = ct->ct_path_count;
1660 }
1661 return (count);
1662 }
1663
1664
1665 /*
1666 * i_mdi_get_hash_key():
1667 * Create a hash using strings as keys
1668 *
1669 */
1670 static int
1671 i_mdi_get_hash_key(char *str)
1672 {
1673 uint32_t g, hash = 0;
1674 char *p;
1675
1676 for (p = str; *p != '\0'; p++) {
1677 g = *p;
1678 hash += g;
1679 }
1680 return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1681 }
1682
1683 /*
1684 * mdi_get_lb_policy():
1685 * Get current load balancing policy for a given client device
1686 */
1687 client_lb_t
1688 mdi_get_lb_policy(dev_info_t *cdip)
1689 {
1690 client_lb_t lb = LOAD_BALANCE_NONE;
1691 mdi_client_t *ct;
1692
1693 ct = i_devi_get_client(cdip);
1694 if (ct != NULL) {
1695 lb = ct->ct_lb;
1696 }
1697 return (lb);
1698 }
1699
1700 /*
1701 * mdi_set_lb_region_size():
1702 * Set current region size for the load-balance
1703 */
1704 int
1705 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1706 {
1707 mdi_client_t *ct;
1708 int rv = MDI_FAILURE;
1709
1710 ct = i_devi_get_client(cdip);
1711 if (ct != NULL && ct->ct_lb_args != NULL) {
1712 ct->ct_lb_args->region_size = region_size;
1713 rv = MDI_SUCCESS;
1714 }
1715 return (rv);
1716 }
1717
1718 /*
1719 * mdi_Set_lb_policy():
1720 * Set current load balancing policy for a given client device
1721 */
1722 int
1723 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1724 {
1725 mdi_client_t *ct;
1726 int rv = MDI_FAILURE;
1727
1728 ct = i_devi_get_client(cdip);
1729 if (ct != NULL) {
1730 ct->ct_lb = lb;
1731 rv = MDI_SUCCESS;
1732 }
1733 return (rv);
1734 }
1735
1736 /*
1737 * mdi_failover():
1738 * failover function called by the vHCI drivers to initiate
1739 * a failover operation. This is typically due to non-availability
1740 * of online paths to route I/O requests. Failover can be
1741 * triggered through user application also.
1742 *
1743 * The vHCI driver calls mdi_failover() to initiate a failover
1744 * operation. mdi_failover() calls back into the vHCI driver's
1745 * vo_failover() entry point to perform the actual failover
1746 * operation. The reason for requiring the vHCI driver to
1747 * initiate failover by calling mdi_failover(), instead of directly
1748 * executing vo_failover() itself, is to ensure that the mdi
1749 * framework can keep track of the client state properly.
1750 * Additionally, mdi_failover() provides as a convenience the
1751 * option of performing the failover operation synchronously or
1752 * asynchronously
1753 *
1754 * Upon successful completion of the failover operation, the
1755 * paths that were previously ONLINE will be in the STANDBY state,
1756 * and the newly activated paths will be in the ONLINE state.
1757 *
1758 * The flags modifier determines whether the activation is done
1759 * synchronously: MDI_FAILOVER_SYNC
1760 * Return Values:
1761 * MDI_SUCCESS
1762 * MDI_FAILURE
1763 * MDI_BUSY
1764 */
1765 /*ARGSUSED*/
1766 int
1767 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1768 {
1769 int rv;
1770 mdi_client_t *ct;
1771
1772 ct = i_devi_get_client(cdip);
1773 ASSERT(ct != NULL);
1774 if (ct == NULL) {
1775 /* cdip is not a valid client device. Nothing more to do. */
1776 return (MDI_FAILURE);
1777 }
1778
1779 MDI_CLIENT_LOCK(ct);
1780
1781 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1782 /* A path to the client is being freed */
1783 MDI_CLIENT_UNLOCK(ct);
1784 return (MDI_BUSY);
1785 }
1786
1787
1788 if (MDI_CLIENT_IS_FAILED(ct)) {
1789 /*
1790 * Client is in failed state. Nothing more to do.
1791 */
1792 MDI_CLIENT_UNLOCK(ct);
1793 return (MDI_FAILURE);
1794 }
1795
1796 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1797 /*
1798 * Failover is already in progress; return BUSY
1799 */
1800 MDI_CLIENT_UNLOCK(ct);
1801 return (MDI_BUSY);
1802 }
1803 /*
1804 * Make sure that mdi_pathinfo node state changes are processed.
1805 * We do not allow failovers to progress while client path state
1806 * changes are in progress
1807 */
1808 if (ct->ct_unstable) {
1809 if (flags == MDI_FAILOVER_ASYNC) {
1810 MDI_CLIENT_UNLOCK(ct);
1811 return (MDI_BUSY);
1812 } else {
1813 while (ct->ct_unstable)
1814 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1815 }
1816 }
1817
1818 /*
1819 * Client device is in stable state. Before proceeding, perform sanity
1820 * checks again.
1821 */
1822 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1823 (!i_ddi_devi_attached(cdip))) {
1824 /*
1825 * Client is in failed state. Nothing more to do.
1826 */
1827 MDI_CLIENT_UNLOCK(ct);
1828 return (MDI_FAILURE);
1829 }
1830
1831 /*
1832 * Set the client state as failover in progress.
1833 */
1834 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1835 ct->ct_failover_flags = flags;
1836 MDI_CLIENT_UNLOCK(ct);
1837
1838 if (flags == MDI_FAILOVER_ASYNC) {
1839 /*
1840 * Submit the initiate failover request via CPR safe
1841 * taskq threads.
1842 */
1843 (void) taskq_dispatch(mdi_taskq, (task_func_t *)i_mdi_failover,
1844 ct, KM_SLEEP);
1845 return (MDI_ACCEPT);
1846 } else {
1847 /*
1848 * Synchronous failover mode. Typically invoked from the user
1849 * land.
1850 */
1851 rv = i_mdi_failover(ct);
1852 }
1853 return (rv);
1854 }
1855
1856 /*
1857 * i_mdi_failover():
1858 * internal failover function. Invokes vHCI drivers failover
1859 * callback function and process the failover status
1860 * Return Values:
1861 * None
1862 *
1863 * Note: A client device in failover state can not be detached or freed.
1864 */
1865 static int
1866 i_mdi_failover(void *arg)
1867 {
1868 int rv = MDI_SUCCESS;
1869 mdi_client_t *ct = (mdi_client_t *)arg;
1870 mdi_vhci_t *vh = ct->ct_vhci;
1871
1872 ASSERT(!MDI_CLIENT_LOCKED(ct));
1873
1874 if (vh->vh_ops->vo_failover != NULL) {
1875 /*
1876 * Call vHCI drivers callback routine
1877 */
1878 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1879 ct->ct_failover_flags);
1880 }
1881
1882 MDI_CLIENT_LOCK(ct);
1883 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1884
1885 /*
1886 * Save the failover return status
1887 */
1888 ct->ct_failover_status = rv;
1889
1890 /*
1891 * As a result of failover, client status would have been changed.
1892 * Update the client state and wake up anyone waiting on this client
1893 * device.
1894 */
1895 i_mdi_client_update_state(ct);
1896
1897 cv_broadcast(&ct->ct_failover_cv);
1898 MDI_CLIENT_UNLOCK(ct);
1899 return (rv);
1900 }
1901
1902 /*
1903 * Load balancing is logical block.
1904 * IOs within the range described by region_size
1905 * would go on the same path. This would improve the
1906 * performance by cache-hit on some of the RAID devices.
1907 * Search only for online paths(At some point we
1908 * may want to balance across target ports).
1909 * If no paths are found then default to round-robin.
1910 */
1911 static int
1912 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1913 {
1914 int path_index = -1;
1915 int online_path_count = 0;
1916 int online_nonpref_path_count = 0;
1917 int region_size = ct->ct_lb_args->region_size;
1918 mdi_pathinfo_t *pip;
1919 mdi_pathinfo_t *next;
1920 int preferred, path_cnt;
1921
1922 pip = ct->ct_path_head;
1923 while (pip) {
1924 MDI_PI_LOCK(pip);
1925 if (MDI_PI(pip)->pi_state ==
1926 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1927 online_path_count++;
1928 } else if (MDI_PI(pip)->pi_state ==
1929 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1930 online_nonpref_path_count++;
1931 }
1932 next = (mdi_pathinfo_t *)
1933 MDI_PI(pip)->pi_client_link;
1934 MDI_PI_UNLOCK(pip);
1935 pip = next;
1936 }
1937 /* if found any online/preferred then use this type */
1938 if (online_path_count > 0) {
1939 path_cnt = online_path_count;
1940 preferred = 1;
1941 } else if (online_nonpref_path_count > 0) {
1942 path_cnt = online_nonpref_path_count;
1943 preferred = 0;
1944 } else {
1945 path_cnt = 0;
1946 }
1947 if (path_cnt) {
1948 path_index = (bp->b_blkno >> region_size) % path_cnt;
1949 pip = ct->ct_path_head;
1950 while (pip && path_index != -1) {
1951 MDI_PI_LOCK(pip);
1952 if (path_index == 0 &&
1953 (MDI_PI(pip)->pi_state ==
1954 MDI_PATHINFO_STATE_ONLINE) &&
1955 MDI_PI(pip)->pi_preferred == preferred) {
1956 MDI_PI_HOLD(pip);
1957 MDI_PI_UNLOCK(pip);
1958 *ret_pip = pip;
1959 return (MDI_SUCCESS);
1960 }
1961 path_index --;
1962 next = (mdi_pathinfo_t *)
1963 MDI_PI(pip)->pi_client_link;
1964 MDI_PI_UNLOCK(pip);
1965 pip = next;
1966 }
1967 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1968 "lba %llx: path %s %p",
1969 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1970 }
1971 return (MDI_FAILURE);
1972 }
1973
1974 /*
1975 * mdi_select_path():
1976 * select a path to access a client device.
1977 *
1978 * mdi_select_path() function is called by the vHCI drivers to
1979 * select a path to route the I/O request to. The caller passes
1980 * the block I/O data transfer structure ("buf") as one of the
1981 * parameters. The mpxio framework uses the buf structure
1982 * contents to maintain per path statistics (total I/O size /
1983 * count pending). If more than one online paths are available to
1984 * select, the framework automatically selects a suitable path
1985 * for routing I/O request. If a failover operation is active for
1986 * this client device the call shall be failed with MDI_BUSY error
1987 * code.
1988 *
1989 * By default this function returns a suitable path in online
1990 * state based on the current load balancing policy. Currently
1991 * we support LOAD_BALANCE_NONE (Previously selected online path
1992 * will continue to be used till the path is usable) and
1993 * LOAD_BALANCE_RR (Online paths will be selected in a round
1994 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected
1995 * based on the logical block). The load balancing
1996 * through vHCI drivers configuration file (driver.conf).
1997 *
1998 * vHCI drivers may override this default behavior by specifying
1999 * appropriate flags. The meaning of the thrid argument depends
2000 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2001 * then the argument is the "path instance" of the path to select.
2002 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2003 * "start_pip". A non NULL "start_pip" is the starting point to
2004 * walk and find the next appropriate path. The following values
2005 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2006 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2007 * STANDBY path).
2008 *
2009 * The non-standard behavior is used by the scsi_vhci driver,
2010 * whenever it has to use a STANDBY/FAULTED path. Eg. during
2011 * attach of client devices (to avoid an unnecessary failover
2012 * when the STANDBY path comes up first), during failover
2013 * (to activate a STANDBY path as ONLINE).
2014 *
2015 * The selected path is returned in a a mdi_hold_path() state
2016 * (pi_ref_cnt). Caller should release the hold by calling
2017 * mdi_rele_path().
2018 *
2019 * Return Values:
2020 * MDI_SUCCESS - Completed successfully
2021 * MDI_BUSY - Client device is busy failing over
2022 * MDI_NOPATH - Client device is online, but no valid path are
2023 * available to access this client device
2024 * MDI_FAILURE - Invalid client device or state
2025 * MDI_DEVI_ONLINING
2026 * - Client device (struct dev_info state) is in
2027 * onlining state.
2028 */
2029
2030 /*ARGSUSED*/
2031 int
2032 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2033 void *arg, mdi_pathinfo_t **ret_pip)
2034 {
2035 mdi_client_t *ct;
2036 mdi_pathinfo_t *pip;
2037 mdi_pathinfo_t *next;
2038 mdi_pathinfo_t *head;
2039 mdi_pathinfo_t *start;
2040 client_lb_t lbp; /* load balancing policy */
2041 int sb = 1; /* standard behavior */
2042 int preferred = 1; /* preferred path */
2043 int cond, cont = 1;
2044 int retry = 0;
2045 mdi_pathinfo_t *start_pip; /* request starting pathinfo */
2046 int path_instance; /* request specific path instance */
2047
2048 /* determine type of arg based on flags */
2049 if (flags & MDI_SELECT_PATH_INSTANCE) {
2050 path_instance = (int)(intptr_t)arg;
2051 start_pip = NULL;
2052 } else {
2053 path_instance = 0;
2054 start_pip = (mdi_pathinfo_t *)arg;
2055 }
2056
2057 if (flags != 0) {
2058 /*
2059 * disable default behavior
2060 */
2061 sb = 0;
2062 }
2063
2064 *ret_pip = NULL;
2065 ct = i_devi_get_client(cdip);
2066 if (ct == NULL) {
2067 /* mdi extensions are NULL, Nothing more to do */
2068 return (MDI_FAILURE);
2069 }
2070
2071 MDI_CLIENT_LOCK(ct);
2072
2073 if (sb) {
2074 if (MDI_CLIENT_IS_FAILED(ct)) {
2075 /*
2076 * Client is not ready to accept any I/O requests.
2077 * Fail this request.
2078 */
2079 MDI_DEBUG(2, (MDI_NOTE, cdip,
2080 "client state offline ct = %p", (void *)ct));
2081 MDI_CLIENT_UNLOCK(ct);
2082 return (MDI_FAILURE);
2083 }
2084
2085 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2086 /*
2087 * Check for Failover is in progress. If so tell the
2088 * caller that this device is busy.
2089 */
2090 MDI_DEBUG(2, (MDI_NOTE, cdip,
2091 "client failover in progress ct = %p",
2092 (void *)ct));
2093 MDI_CLIENT_UNLOCK(ct);
2094 return (MDI_BUSY);
2095 }
2096
2097 /*
2098 * Check to see whether the client device is attached.
2099 * If not so, let the vHCI driver manually select a path
2100 * (standby) and let the probe/attach process to continue.
2101 */
2102 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2103 MDI_DEBUG(4, (MDI_NOTE, cdip,
2104 "devi is onlining ct = %p", (void *)ct));
2105 MDI_CLIENT_UNLOCK(ct);
2106 return (MDI_DEVI_ONLINING);
2107 }
2108 }
2109
2110 /*
2111 * Cache in the client list head. If head of the list is NULL
2112 * return MDI_NOPATH
2113 */
2114 head = ct->ct_path_head;
2115 if (head == NULL) {
2116 MDI_CLIENT_UNLOCK(ct);
2117 return (MDI_NOPATH);
2118 }
2119
2120 /* Caller is specifying a specific pathinfo path by path_instance */
2121 if (path_instance) {
2122 /* search for pathinfo with correct path_instance */
2123 for (pip = head;
2124 pip && (mdi_pi_get_path_instance(pip) != path_instance);
2125 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2126 ;
2127
2128 /* If path can't be selected then MDI_NOPATH is returned. */
2129 if (pip == NULL) {
2130 MDI_CLIENT_UNLOCK(ct);
2131 return (MDI_NOPATH);
2132 }
2133
2134 /*
2135 * Verify state of path. When asked to select a specific
2136 * path_instance, we select the requested path in any
2137 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2138 * We don't however select paths where the pHCI has detached.
2139 * NOTE: last pathinfo node of an opened client device may
2140 * exist in an OFFLINE state after the pHCI associated with
2141 * that path has detached (but pi_phci will be NULL if that
2142 * has occurred).
2143 */
2144 MDI_PI_LOCK(pip);
2145 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2146 (MDI_PI(pip)->pi_phci == NULL)) {
2147 MDI_PI_UNLOCK(pip);
2148 MDI_CLIENT_UNLOCK(ct);
2149 return (MDI_FAILURE);
2150 }
2151
2152 /* Return MDI_BUSY if we have a transient condition */
2153 if (MDI_PI_IS_TRANSIENT(pip)) {
2154 MDI_PI_UNLOCK(pip);
2155 MDI_CLIENT_UNLOCK(ct);
2156 return (MDI_BUSY);
2157 }
2158
2159 /*
2160 * Return the path in hold state. Caller should release the
2161 * lock by calling mdi_rele_path()
2162 */
2163 MDI_PI_HOLD(pip);
2164 MDI_PI_UNLOCK(pip);
2165 *ret_pip = pip;
2166 MDI_CLIENT_UNLOCK(ct);
2167 return (MDI_SUCCESS);
2168 }
2169
2170 /*
2171 * for non default behavior, bypass current
2172 * load balancing policy and always use LOAD_BALANCE_RR
2173 * except that the start point will be adjusted based
2174 * on the provided start_pip
2175 */
2176 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2177
2178 switch (lbp) {
2179 case LOAD_BALANCE_NONE:
2180 /*
2181 * Load balancing is None or Alternate path mode
2182 * Start looking for a online mdi_pathinfo node starting from
2183 * last known selected path
2184 */
2185 preferred = 1;
2186 pip = (mdi_pathinfo_t *)ct->ct_path_last;
2187 if (pip == NULL) {
2188 pip = head;
2189 }
2190 start = pip;
2191 do {
2192 MDI_PI_LOCK(pip);
2193 /*
2194 * No need to explicitly check if the path is disabled.
2195 * Since we are checking for state == ONLINE and the
2196 * same variable is used for DISABLE/ENABLE information.
2197 */
2198 if ((MDI_PI(pip)->pi_state ==
2199 MDI_PATHINFO_STATE_ONLINE) &&
2200 preferred == MDI_PI(pip)->pi_preferred) {
2201 /*
2202 * Return the path in hold state. Caller should
2203 * release the lock by calling mdi_rele_path()
2204 */
2205 MDI_PI_HOLD(pip);
2206 MDI_PI_UNLOCK(pip);
2207 ct->ct_path_last = pip;
2208 *ret_pip = pip;
2209 MDI_CLIENT_UNLOCK(ct);
2210 return (MDI_SUCCESS);
2211 }
2212
2213 /*
2214 * Path is busy.
2215 */
2216 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2217 MDI_PI_IS_TRANSIENT(pip))
2218 retry = 1;
2219 /*
2220 * Keep looking for a next available online path
2221 */
2222 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2223 if (next == NULL) {
2224 next = head;
2225 }
2226 MDI_PI_UNLOCK(pip);
2227 pip = next;
2228 if (start == pip && preferred) {
2229 preferred = 0;
2230 } else if (start == pip && !preferred) {
2231 cont = 0;
2232 }
2233 } while (cont);
2234 break;
2235
2236 case LOAD_BALANCE_LBA:
2237 /*
2238 * Make sure we are looking
2239 * for an online path. Otherwise, if it is for a STANDBY
2240 * path request, it will go through and fetch an ONLINE
2241 * path which is not desirable.
2242 */
2243 if ((ct->ct_lb_args != NULL) &&
2244 (ct->ct_lb_args->region_size) && bp &&
2245 (sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2246 if (i_mdi_lba_lb(ct, ret_pip, bp)
2247 == MDI_SUCCESS) {
2248 MDI_CLIENT_UNLOCK(ct);
2249 return (MDI_SUCCESS);
2250 }
2251 }
2252 /* FALLTHROUGH */
2253 case LOAD_BALANCE_RR:
2254 /*
2255 * Load balancing is Round Robin. Start looking for a online
2256 * mdi_pathinfo node starting from last known selected path
2257 * as the start point. If override flags are specified,
2258 * process accordingly.
2259 * If the search is already in effect(start_pip not null),
2260 * then lets just use the same path preference to continue the
2261 * traversal.
2262 */
2263
2264 if (start_pip != NULL) {
2265 preferred = MDI_PI(start_pip)->pi_preferred;
2266 } else {
2267 preferred = 1;
2268 }
2269
2270 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2271 if (start == NULL) {
2272 pip = head;
2273 } else {
2274 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2275 if (pip == NULL) {
2276 if ( flags & MDI_SELECT_NO_PREFERRED) {
2277 /*
2278 * Return since we hit the end of list
2279 */
2280 MDI_CLIENT_UNLOCK(ct);
2281 return (MDI_NOPATH);
2282 }
2283
2284 if (!sb) {
2285 if (preferred == 0) {
2286 /*
2287 * Looks like we have completed
2288 * the traversal as preferred
2289 * value is 0. Time to bail out.
2290 */
2291 *ret_pip = NULL;
2292 MDI_CLIENT_UNLOCK(ct);
2293 return (MDI_NOPATH);
2294 } else {
2295 /*
2296 * Looks like we reached the
2297 * end of the list. Lets enable
2298 * traversal of non preferred
2299 * paths.
2300 */
2301 preferred = 0;
2302 }
2303 }
2304 pip = head;
2305 }
2306 }
2307 start = pip;
2308 do {
2309 MDI_PI_LOCK(pip);
2310 if (sb) {
2311 cond = ((MDI_PI(pip)->pi_state ==
2312 MDI_PATHINFO_STATE_ONLINE &&
2313 MDI_PI(pip)->pi_preferred ==
2314 preferred) ? 1 : 0);
2315 } else {
2316 if (flags == MDI_SELECT_ONLINE_PATH) {
2317 cond = ((MDI_PI(pip)->pi_state ==
2318 MDI_PATHINFO_STATE_ONLINE &&
2319 MDI_PI(pip)->pi_preferred ==
2320 preferred) ? 1 : 0);
2321 } else if (flags == MDI_SELECT_STANDBY_PATH) {
2322 cond = ((MDI_PI(pip)->pi_state ==
2323 MDI_PATHINFO_STATE_STANDBY &&
2324 MDI_PI(pip)->pi_preferred ==
2325 preferred) ? 1 : 0);
2326 } else if (flags == (MDI_SELECT_ONLINE_PATH |
2327 MDI_SELECT_STANDBY_PATH)) {
2328 cond = (((MDI_PI(pip)->pi_state ==
2329 MDI_PATHINFO_STATE_ONLINE ||
2330 (MDI_PI(pip)->pi_state ==
2331 MDI_PATHINFO_STATE_STANDBY)) &&
2332 MDI_PI(pip)->pi_preferred ==
2333 preferred) ? 1 : 0);
2334 } else if (flags ==
2335 (MDI_SELECT_STANDBY_PATH |
2336 MDI_SELECT_ONLINE_PATH |
2337 MDI_SELECT_USER_DISABLE_PATH)) {
2338 cond = (((MDI_PI(pip)->pi_state ==
2339 MDI_PATHINFO_STATE_ONLINE ||
2340 (MDI_PI(pip)->pi_state ==
2341 MDI_PATHINFO_STATE_STANDBY) ||
2342 (MDI_PI(pip)->pi_state ==
2343 (MDI_PATHINFO_STATE_ONLINE|
2344 MDI_PATHINFO_STATE_USER_DISABLE)) ||
2345 (MDI_PI(pip)->pi_state ==
2346 (MDI_PATHINFO_STATE_STANDBY |
2347 MDI_PATHINFO_STATE_USER_DISABLE)))&&
2348 MDI_PI(pip)->pi_preferred ==
2349 preferred) ? 1 : 0);
2350 } else if (flags ==
2351 (MDI_SELECT_STANDBY_PATH |
2352 MDI_SELECT_ONLINE_PATH |
2353 MDI_SELECT_NO_PREFERRED)) {
2354 cond = (((MDI_PI(pip)->pi_state ==
2355 MDI_PATHINFO_STATE_ONLINE) ||
2356 (MDI_PI(pip)->pi_state ==
2357 MDI_PATHINFO_STATE_STANDBY))
2358 ? 1 : 0);
2359 } else {
2360 cond = 0;
2361 }
2362 }
2363 /*
2364 * No need to explicitly check if the path is disabled.
2365 * Since we are checking for state == ONLINE and the
2366 * same variable is used for DISABLE/ENABLE information.
2367 */
2368 if (cond) {
2369 /*
2370 * Return the path in hold state. Caller should
2371 * release the lock by calling mdi_rele_path()
2372 */
2373 MDI_PI_HOLD(pip);
2374 MDI_PI_UNLOCK(pip);
2375 if (sb)
2376 ct->ct_path_last = pip;
2377 *ret_pip = pip;
2378 MDI_CLIENT_UNLOCK(ct);
2379 return (MDI_SUCCESS);
2380 }
2381 /*
2382 * Path is busy.
2383 */
2384 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2385 MDI_PI_IS_TRANSIENT(pip))
2386 retry = 1;
2387
2388 /*
2389 * Keep looking for a next available online path
2390 */
2391 do_again:
2392 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2393 if (next == NULL) {
2394 if ( flags & MDI_SELECT_NO_PREFERRED) {
2395 /*
2396 * Bail out since we hit the end of list
2397 */
2398 MDI_PI_UNLOCK(pip);
2399 break;
2400 }
2401
2402 if (!sb) {
2403 if (preferred == 1) {
2404 /*
2405 * Looks like we reached the
2406 * end of the list. Lets enable
2407 * traversal of non preferred
2408 * paths.
2409 */
2410 preferred = 0;
2411 next = head;
2412 } else {
2413 /*
2414 * We have done both the passes
2415 * Preferred as well as for
2416 * Non-preferred. Bail out now.
2417 */
2418 cont = 0;
2419 }
2420 } else {
2421 /*
2422 * Standard behavior case.
2423 */
2424 next = head;
2425 }
2426 }
2427 MDI_PI_UNLOCK(pip);
2428 if (cont == 0) {
2429 break;
2430 }
2431 pip = next;
2432
2433 if (!sb) {
2434 /*
2435 * We need to handle the selection of
2436 * non-preferred path in the following
2437 * case:
2438 *
2439 * +------+ +------+ +------+ +-----+
2440 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2441 * +------+ +------+ +------+ +-----+
2442 *
2443 * If we start the search with B, we need to
2444 * skip beyond B to pick C which is non -
2445 * preferred in the second pass. The following
2446 * test, if true, will allow us to skip over
2447 * the 'start'(B in the example) to select
2448 * other non preferred elements.
2449 */
2450 if ((start_pip != NULL) && (start_pip == pip) &&
2451 (MDI_PI(start_pip)->pi_preferred
2452 != preferred)) {
2453 /*
2454 * try again after going past the start
2455 * pip
2456 */
2457 MDI_PI_LOCK(pip);
2458 goto do_again;
2459 }
2460 } else {
2461 /*
2462 * Standard behavior case
2463 */
2464 if (start == pip && preferred) {
2465 /* look for nonpreferred paths */
2466 preferred = 0;
2467 } else if (start == pip && !preferred) {
2468 /*
2469 * Exit condition
2470 */
2471 cont = 0;
2472 }
2473 }
2474 } while (cont);
2475 break;
2476 }
2477
2478 MDI_CLIENT_UNLOCK(ct);
2479 if (retry == 1) {
2480 return (MDI_BUSY);
2481 } else {
2482 return (MDI_NOPATH);
2483 }
2484 }
2485
2486 /*
2487 * For a client, return the next available path to any phci
2488 *
2489 * Note:
2490 * Caller should hold the branch's devinfo node to get a consistent
2491 * snap shot of the mdi_pathinfo nodes.
2492 *
2493 * Please note that even the list is stable the mdi_pathinfo
2494 * node state and properties are volatile. The caller should lock
2495 * and unlock the nodes by calling mdi_pi_lock() and
2496 * mdi_pi_unlock() functions to get a stable properties.
2497 *
2498 * If there is a need to use the nodes beyond the hold of the
2499 * devinfo node period (For ex. I/O), then mdi_pathinfo node
2500 * need to be held against unexpected removal by calling
2501 * mdi_hold_path() and should be released by calling
2502 * mdi_rele_path() on completion.
2503 */
2504 mdi_pathinfo_t *
2505 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2506 {
2507 mdi_client_t *ct;
2508
2509 if (!MDI_CLIENT(ct_dip))
2510 return (NULL);
2511
2512 /*
2513 * Walk through client link
2514 */
2515 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2516 ASSERT(ct != NULL);
2517
2518 if (pip == NULL)
2519 return ((mdi_pathinfo_t *)ct->ct_path_head);
2520
2521 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2522 }
2523
2524 /*
2525 * For a phci, return the next available path to any client
2526 * Note: ditto mdi_get_next_phci_path()
2527 */
2528 mdi_pathinfo_t *
2529 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2530 {
2531 mdi_phci_t *ph;
2532
2533 if (!MDI_PHCI(ph_dip))
2534 return (NULL);
2535
2536 /*
2537 * Walk through pHCI link
2538 */
2539 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2540 ASSERT(ph != NULL);
2541
2542 if (pip == NULL)
2543 return ((mdi_pathinfo_t *)ph->ph_path_head);
2544
2545 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2546 }
2547
2548 /*
2549 * mdi_hold_path():
2550 * Hold the mdi_pathinfo node against unwanted unexpected free.
2551 * Return Values:
2552 * None
2553 */
2554 void
2555 mdi_hold_path(mdi_pathinfo_t *pip)
2556 {
2557 if (pip) {
2558 MDI_PI_LOCK(pip);
2559 MDI_PI_HOLD(pip);
2560 MDI_PI_UNLOCK(pip);
2561 }
2562 }
2563
2564
2565 /*
2566 * mdi_rele_path():
2567 * Release the mdi_pathinfo node which was selected
2568 * through mdi_select_path() mechanism or manually held by
2569 * calling mdi_hold_path().
2570 * Return Values:
2571 * None
2572 */
2573 void
2574 mdi_rele_path(mdi_pathinfo_t *pip)
2575 {
2576 if (pip) {
2577 MDI_PI_LOCK(pip);
2578 MDI_PI_RELE(pip);
2579 if (MDI_PI(pip)->pi_ref_cnt == 0) {
2580 cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2581 }
2582 MDI_PI_UNLOCK(pip);
2583 }
2584 }
2585
2586 /*
2587 * mdi_pi_lock():
2588 * Lock the mdi_pathinfo node.
2589 * Note:
2590 * The caller should release the lock by calling mdi_pi_unlock()
2591 */
2592 void
2593 mdi_pi_lock(mdi_pathinfo_t *pip)
2594 {
2595 ASSERT(pip != NULL);
2596 if (pip) {
2597 MDI_PI_LOCK(pip);
2598 }
2599 }
2600
2601
2602 /*
2603 * mdi_pi_unlock():
2604 * Unlock the mdi_pathinfo node.
2605 * Note:
2606 * The mdi_pathinfo node should have been locked with mdi_pi_lock()
2607 */
2608 void
2609 mdi_pi_unlock(mdi_pathinfo_t *pip)
2610 {
2611 ASSERT(pip != NULL);
2612 if (pip) {
2613 MDI_PI_UNLOCK(pip);
2614 }
2615 }
2616
2617 /*
2618 * mdi_pi_find():
2619 * Search the list of mdi_pathinfo nodes attached to the
2620 * pHCI/Client device node whose path address matches "paddr".
2621 * Returns a pointer to the mdi_pathinfo node if a matching node is
2622 * found.
2623 * Return Values:
2624 * mdi_pathinfo node handle
2625 * NULL
2626 * Notes:
2627 * Caller need not hold any locks to call this function.
2628 */
2629 mdi_pathinfo_t *
2630 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2631 {
2632 mdi_phci_t *ph;
2633 mdi_vhci_t *vh;
2634 mdi_client_t *ct;
2635 mdi_pathinfo_t *pip = NULL;
2636
2637 MDI_DEBUG(2, (MDI_NOTE, pdip,
2638 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2639 if ((pdip == NULL) || (paddr == NULL)) {
2640 return (NULL);
2641 }
2642 ph = i_devi_get_phci(pdip);
2643 if (ph == NULL) {
2644 /*
2645 * Invalid pHCI device, Nothing more to do.
2646 */
2647 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2648 return (NULL);
2649 }
2650
2651 vh = ph->ph_vhci;
2652 if (vh == NULL) {
2653 /*
2654 * Invalid vHCI device, Nothing more to do.
2655 */
2656 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2657 return (NULL);
2658 }
2659
2660 /*
2661 * Look for pathinfo node identified by paddr.
2662 */
2663 if (caddr == NULL) {
2664 /*
2665 * Find a mdi_pathinfo node under pHCI list for a matching
2666 * unit address.
2667 */
2668 MDI_PHCI_LOCK(ph);
2669 if (MDI_PHCI_IS_OFFLINE(ph)) {
2670 MDI_DEBUG(2, (MDI_WARN, pdip,
2671 "offline phci %p", (void *)ph));
2672 MDI_PHCI_UNLOCK(ph);
2673 return (NULL);
2674 }
2675 pip = (mdi_pathinfo_t *)ph->ph_path_head;
2676
2677 while (pip != NULL) {
2678 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2679 break;
2680 }
2681 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2682 }
2683 MDI_PHCI_UNLOCK(ph);
2684 MDI_DEBUG(2, (MDI_NOTE, pdip,
2685 "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2686 return (pip);
2687 }
2688
2689 /*
2690 * XXX - Is the rest of the code in this function really necessary?
2691 * The consumers of mdi_pi_find() can search for the desired pathinfo
2692 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2693 * whether the search is based on the pathinfo nodes attached to
2694 * the pHCI or the client node, the result will be the same.
2695 */
2696
2697 /*
2698 * Find the client device corresponding to 'caddr'
2699 */
2700 MDI_VHCI_CLIENT_LOCK(vh);
2701
2702 /*
2703 * XXX - Passing NULL to the following function works as long as the
2704 * the client addresses (caddr) are unique per vhci basis.
2705 */
2706 ct = i_mdi_client_find(vh, NULL, caddr);
2707 if (ct == NULL) {
2708 /*
2709 * Client not found, Obviously mdi_pathinfo node has not been
2710 * created yet.
2711 */
2712 MDI_VHCI_CLIENT_UNLOCK(vh);
2713 MDI_DEBUG(2, (MDI_NOTE, pdip,
2714 "client not found for caddr @%s", caddr ? caddr : ""));
2715 return (NULL);
2716 }
2717
2718 /*
2719 * Hold the client lock and look for a mdi_pathinfo node with matching
2720 * pHCI and paddr
2721 */
2722 MDI_CLIENT_LOCK(ct);
2723
2724 /*
2725 * Release the global mutex as it is no more needed. Note: We always
2726 * respect the locking order while acquiring.
2727 */
2728 MDI_VHCI_CLIENT_UNLOCK(vh);
2729
2730 pip = (mdi_pathinfo_t *)ct->ct_path_head;
2731 while (pip != NULL) {
2732 /*
2733 * Compare the unit address
2734 */
2735 if ((MDI_PI(pip)->pi_phci == ph) &&
2736 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2737 break;
2738 }
2739 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2740 }
2741 MDI_CLIENT_UNLOCK(ct);
2742 MDI_DEBUG(2, (MDI_NOTE, pdip,
2743 "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2744 return (pip);
2745 }
2746
2747 /*
2748 * mdi_pi_alloc():
2749 * Allocate and initialize a new instance of a mdi_pathinfo node.
2750 * The mdi_pathinfo node returned by this function identifies a
2751 * unique device path is capable of having properties attached
2752 * and passed to mdi_pi_online() to fully attach and online the
2753 * path and client device node.
2754 * The mdi_pathinfo node returned by this function must be
2755 * destroyed using mdi_pi_free() if the path is no longer
2756 * operational or if the caller fails to attach a client device
2757 * node when calling mdi_pi_online(). The framework will not free
2758 * the resources allocated.
2759 * This function can be called from both interrupt and kernel
2760 * contexts. DDI_NOSLEEP flag should be used while calling
2761 * from interrupt contexts.
2762 * Return Values:
2763 * MDI_SUCCESS
2764 * MDI_FAILURE
2765 * MDI_NOMEM
2766 */
2767 /*ARGSUSED*/
2768 int
2769 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2770 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2771 {
2772 mdi_vhci_t *vh;
2773 mdi_phci_t *ph;
2774 mdi_client_t *ct;
2775 mdi_pathinfo_t *pip = NULL;
2776 dev_info_t *cdip;
2777 int rv = MDI_NOMEM;
2778 int path_allocated = 0;
2779
2780 MDI_DEBUG(2, (MDI_NOTE, pdip,
2781 "cname %s: caddr@%s paddr@%s",
2782 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2783
2784 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2785 ret_pip == NULL) {
2786 /* Nothing more to do */
2787 return (MDI_FAILURE);
2788 }
2789
2790 *ret_pip = NULL;
2791
2792 /* No allocations on detaching pHCI */
2793 if (DEVI_IS_DETACHING(pdip)) {
2794 /* Invalid pHCI device, return failure */
2795 MDI_DEBUG(1, (MDI_WARN, pdip,
2796 "!detaching pHCI=%p", (void *)pdip));
2797 return (MDI_FAILURE);
2798 }
2799
2800 ph = i_devi_get_phci(pdip);
2801 ASSERT(ph != NULL);
2802 if (ph == NULL) {
2803 /* Invalid pHCI device, return failure */
2804 MDI_DEBUG(1, (MDI_WARN, pdip,
2805 "!invalid pHCI=%p", (void *)pdip));
2806 return (MDI_FAILURE);
2807 }
2808
2809 MDI_PHCI_LOCK(ph);
2810 vh = ph->ph_vhci;
2811 if (vh == NULL) {
2812 /* Invalid vHCI device, return failure */
2813 MDI_DEBUG(1, (MDI_WARN, pdip,
2814 "!invalid vHCI=%p", (void *)pdip));
2815 MDI_PHCI_UNLOCK(ph);
2816 return (MDI_FAILURE);
2817 }
2818
2819 if (MDI_PHCI_IS_READY(ph) == 0) {
2820 /*
2821 * Do not allow new node creation when pHCI is in
2822 * offline/suspended states
2823 */
2824 MDI_DEBUG(1, (MDI_WARN, pdip,
2825 "pHCI=%p is not ready", (void *)ph));
2826 MDI_PHCI_UNLOCK(ph);
2827 return (MDI_BUSY);
2828 }
2829 MDI_PHCI_UNSTABLE(ph);
2830 MDI_PHCI_UNLOCK(ph);
2831
2832 /* look for a matching client, create one if not found */
2833 MDI_VHCI_CLIENT_LOCK(vh);
2834 ct = i_mdi_client_find(vh, cname, caddr);
2835 if (ct == NULL) {
2836 ct = i_mdi_client_alloc(vh, cname, caddr);
2837 ASSERT(ct != NULL);
2838 }
2839
2840 if (ct->ct_dip == NULL) {
2841 /*
2842 * Allocate a devinfo node
2843 */
2844 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2845 compatible, ncompatible);
2846 if (ct->ct_dip == NULL) {
2847 (void) i_mdi_client_free(vh, ct);
2848 goto fail;
2849 }
2850 }
2851 cdip = ct->ct_dip;
2852
2853 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2854 DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2855
2856 MDI_CLIENT_LOCK(ct);
2857 pip = (mdi_pathinfo_t *)ct->ct_path_head;
2858 while (pip != NULL) {
2859 /*
2860 * Compare the unit address
2861 */
2862 if ((MDI_PI(pip)->pi_phci == ph) &&
2863 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2864 break;
2865 }
2866 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2867 }
2868 MDI_CLIENT_UNLOCK(ct);
2869
2870 if (pip == NULL) {
2871 /*
2872 * This is a new path for this client device. Allocate and
2873 * initialize a new pathinfo node
2874 */
2875 pip = i_mdi_pi_alloc(ph, paddr, ct);
2876 ASSERT(pip != NULL);
2877 path_allocated = 1;
2878 }
2879 rv = MDI_SUCCESS;
2880
2881 fail:
2882 /*
2883 * Release the global mutex.
2884 */
2885 MDI_VHCI_CLIENT_UNLOCK(vh);
2886
2887 /*
2888 * Mark the pHCI as stable
2889 */
2890 MDI_PHCI_LOCK(ph);
2891 MDI_PHCI_STABLE(ph);
2892 MDI_PHCI_UNLOCK(ph);
2893 *ret_pip = pip;
2894
2895 MDI_DEBUG(2, (MDI_NOTE, pdip,
2896 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2897
2898 if (path_allocated)
2899 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2900
2901 return (rv);
2902 }
2903
2904 /*ARGSUSED*/
2905 int
2906 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2907 int flags, mdi_pathinfo_t **ret_pip)
2908 {
2909 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2910 flags, ret_pip));
2911 }
2912
2913 /*
2914 * i_mdi_pi_alloc():
2915 * Allocate a mdi_pathinfo node and add to the pHCI path list
2916 * Return Values:
2917 * mdi_pathinfo
2918 */
2919 /*ARGSUSED*/
2920 static mdi_pathinfo_t *
2921 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2922 {
2923 mdi_pathinfo_t *pip;
2924 int ct_circular;
2925 int ph_circular;
2926 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */
2927 char *path_persistent;
2928 int path_instance;
2929 mod_hash_val_t hv;
2930
2931 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2932
2933 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2934 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2935 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2936 MDI_PATHINFO_STATE_TRANSIENT;
2937
2938 if (MDI_PHCI_IS_USER_DISABLED(ph))
2939 MDI_PI_SET_USER_DISABLE(pip);
2940
2941 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2942 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2943
2944 if (MDI_PHCI_IS_DRV_DISABLED(ph))
2945 MDI_PI_SET_DRV_DISABLE(pip);
2946
2947 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2948 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2949 MDI_PI(pip)->pi_client = ct;
2950 MDI_PI(pip)->pi_phci = ph;
2951 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2952 (void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2953
2954 /*
2955 * We form the "path" to the pathinfo node, and see if we have
2956 * already allocated a 'path_instance' for that "path". If so,
2957 * we use the already allocated 'path_instance'. If not, we
2958 * allocate a new 'path_instance' and associate it with a copy of
2959 * the "path" string (which is never freed). The association
2960 * between a 'path_instance' this "path" string persists until
2961 * reboot.
2962 */
2963 mutex_enter(&mdi_pathmap_mutex);
2964 (void) ddi_pathname(ph->ph_dip, path);
2965 (void) sprintf(path + strlen(path), "/%s@%s",
2966 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2967 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2968 path_instance = (uint_t)(intptr_t)hv;
2969 } else {
2970 /* allocate a new 'path_instance' and persistent "path" */
2971 path_instance = mdi_pathmap_instance++;
2972 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2973 (void) mod_hash_insert(mdi_pathmap_bypath,
2974 (mod_hash_key_t)path_persistent,
2975 (mod_hash_val_t)(intptr_t)path_instance);
2976 (void) mod_hash_insert(mdi_pathmap_byinstance,
2977 (mod_hash_key_t)(intptr_t)path_instance,
2978 (mod_hash_val_t)path_persistent);
2979
2980 /* create shortpath name */
2981 (void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2982 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2983 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2984 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2985 (void) mod_hash_insert(mdi_pathmap_sbyinstance,
2986 (mod_hash_key_t)(intptr_t)path_instance,
2987 (mod_hash_val_t)path_persistent);
2988 }
2989 mutex_exit(&mdi_pathmap_mutex);
2990 MDI_PI(pip)->pi_path_instance = path_instance;
2991
2992 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
2993 ASSERT(MDI_PI(pip)->pi_prop != NULL);
2994 MDI_PI(pip)->pi_pprivate = NULL;
2995 MDI_PI(pip)->pi_cprivate = NULL;
2996 MDI_PI(pip)->pi_vprivate = NULL;
2997 MDI_PI(pip)->pi_client_link = NULL;
2998 MDI_PI(pip)->pi_phci_link = NULL;
2999 MDI_PI(pip)->pi_ref_cnt = 0;
3000 MDI_PI(pip)->pi_kstats = NULL;
3001 MDI_PI(pip)->pi_preferred = 1;
3002 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3003
3004 /*
3005 * Lock both dev_info nodes against changes in parallel.
3006 *
3007 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3008 * This atypical operation is done to synchronize pathinfo nodes
3009 * during devinfo snapshot (see di_register_pip) by 'pretending' that
3010 * the pathinfo nodes are children of the Client.
3011 */
3012 ndi_devi_enter(ct->ct_dip, &ct_circular);
3013 ndi_devi_enter(ph->ph_dip, &ph_circular);
3014
3015 i_mdi_phci_add_path(ph, pip);
3016 i_mdi_client_add_path(ct, pip);
3017
3018 ndi_devi_exit(ph->ph_dip, ph_circular);
3019 ndi_devi_exit(ct->ct_dip, ct_circular);
3020
3021 return (pip);
3022 }
3023
3024 /*
3025 * mdi_pi_pathname_by_instance():
3026 * Lookup of "path" by 'path_instance'. Return "path".
3027 * NOTE: returned "path" remains valid forever (until reboot).
3028 */
3029 char *
3030 mdi_pi_pathname_by_instance(int path_instance)
3031 {
3032 char *path;
3033 mod_hash_val_t hv;
3034
3035 /* mdi_pathmap lookup of "path" by 'path_instance' */
3036 mutex_enter(&mdi_pathmap_mutex);
3037 if (mod_hash_find(mdi_pathmap_byinstance,
3038 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3039 path = (char *)hv;
3040 else
3041 path = NULL;
3042 mutex_exit(&mdi_pathmap_mutex);
3043 return (path);
3044 }
3045
3046 /*
3047 * mdi_pi_spathname_by_instance():
3048 * Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3049 * NOTE: returned "shortpath" remains valid forever (until reboot).
3050 */
3051 char *
3052 mdi_pi_spathname_by_instance(int path_instance)
3053 {
3054 char *path;
3055 mod_hash_val_t hv;
3056
3057 /* mdi_pathmap lookup of "path" by 'path_instance' */
3058 mutex_enter(&mdi_pathmap_mutex);
3059 if (mod_hash_find(mdi_pathmap_sbyinstance,
3060 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3061 path = (char *)hv;
3062 else
3063 path = NULL;
3064 mutex_exit(&mdi_pathmap_mutex);
3065 return (path);
3066 }
3067
3068
3069 /*
3070 * i_mdi_phci_add_path():
3071 * Add a mdi_pathinfo node to pHCI list.
3072 * Notes:
3073 * Caller should per-pHCI mutex
3074 */
3075 static void
3076 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3077 {
3078 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3079
3080 MDI_PHCI_LOCK(ph);
3081 if (ph->ph_path_head == NULL) {
3082 ph->ph_path_head = pip;
3083 } else {
3084 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3085 }
3086 ph->ph_path_tail = pip;
3087 ph->ph_path_count++;
3088 MDI_PHCI_UNLOCK(ph);
3089 }
3090
3091 /*
3092 * i_mdi_client_add_path():
3093 * Add mdi_pathinfo node to client list
3094 */
3095 static void
3096 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3097 {
3098 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3099
3100 MDI_CLIENT_LOCK(ct);
3101 if (ct->ct_path_head == NULL) {
3102 ct->ct_path_head = pip;
3103 } else {
3104 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3105 }
3106 ct->ct_path_tail = pip;
3107 ct->ct_path_count++;
3108 MDI_CLIENT_UNLOCK(ct);
3109 }
3110
3111 /*
3112 * mdi_pi_free():
3113 * Free the mdi_pathinfo node and also client device node if this
3114 * is the last path to the device
3115 * Return Values:
3116 * MDI_SUCCESS
3117 * MDI_FAILURE
3118 * MDI_BUSY
3119 */
3120 /*ARGSUSED*/
3121 int
3122 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3123 {
3124 int rv;
3125 mdi_vhci_t *vh;
3126 mdi_phci_t *ph;
3127 mdi_client_t *ct;
3128 int (*f)();
3129 int client_held = 0;
3130
3131 MDI_PI_LOCK(pip);
3132 ph = MDI_PI(pip)->pi_phci;
3133 ASSERT(ph != NULL);
3134 if (ph == NULL) {
3135 /*
3136 * Invalid pHCI device, return failure
3137 */
3138 MDI_DEBUG(1, (MDI_WARN, NULL,
3139 "!invalid pHCI: pip %s %p",
3140 mdi_pi_spathname(pip), (void *)pip));
3141 MDI_PI_UNLOCK(pip);
3142 return (MDI_FAILURE);
3143 }
3144
3145 vh = ph->ph_vhci;
3146 ASSERT(vh != NULL);
3147 if (vh == NULL) {
3148 /* Invalid pHCI device, return failure */
3149 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3150 "!invalid vHCI: pip %s %p",
3151 mdi_pi_spathname(pip), (void *)pip));
3152 MDI_PI_UNLOCK(pip);
3153 return (MDI_FAILURE);
3154 }
3155
3156 ct = MDI_PI(pip)->pi_client;
3157 ASSERT(ct != NULL);
3158 if (ct == NULL) {
3159 /*
3160 * Invalid Client device, return failure
3161 */
3162 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3163 "!invalid client: pip %s %p",
3164 mdi_pi_spathname(pip), (void *)pip));
3165 MDI_PI_UNLOCK(pip);
3166 return (MDI_FAILURE);
3167 }
3168
3169 /*
3170 * Check to see for busy condition. A mdi_pathinfo can only be freed
3171 * if the node state is either offline or init and the reference count
3172 * is zero.
3173 */
3174 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3175 MDI_PI_IS_INITING(pip))) {
3176 /*
3177 * Node is busy
3178 */
3179 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3180 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3181 MDI_PI_UNLOCK(pip);
3182 return (MDI_BUSY);
3183 }
3184
3185 while (MDI_PI(pip)->pi_ref_cnt != 0) {
3186 /*
3187 * Give a chance for pending I/Os to complete.
3188 */
3189 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3190 "!%d cmds still pending on path: %s %p",
3191 MDI_PI(pip)->pi_ref_cnt,
3192 mdi_pi_spathname(pip), (void *)pip));
3193 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3194 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3195 TR_CLOCK_TICK) == -1) {
3196 /*
3197 * The timeout time reached without ref_cnt being zero
3198 * being signaled.
3199 */
3200 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3201 "!Timeout reached on path %s %p without the cond",
3202 mdi_pi_spathname(pip), (void *)pip));
3203 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3204 "!%d cmds still pending on path %s %p",
3205 MDI_PI(pip)->pi_ref_cnt,
3206 mdi_pi_spathname(pip), (void *)pip));
3207 MDI_PI_UNLOCK(pip);
3208 return (MDI_BUSY);
3209 }
3210 }
3211 if (MDI_PI(pip)->pi_pm_held) {
3212 client_held = 1;
3213 }
3214 MDI_PI_UNLOCK(pip);
3215
3216 vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3217
3218 MDI_CLIENT_LOCK(ct);
3219
3220 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3221 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3222
3223 /*
3224 * Wait till failover is complete before removing this node.
3225 */
3226 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3227 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3228
3229 MDI_CLIENT_UNLOCK(ct);
3230 MDI_VHCI_CLIENT_LOCK(vh);
3231 MDI_CLIENT_LOCK(ct);
3232 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3233
3234 if (!MDI_PI_IS_INITING(pip)) {
3235 f = vh->vh_ops->vo_pi_uninit;
3236 if (f != NULL) {
3237 rv = (*f)(vh->vh_dip, pip, 0);
3238 }
3239 } else
3240 rv = MDI_SUCCESS;
3241
3242 /*
3243 * If vo_pi_uninit() completed successfully.
3244 */
3245 if (rv == MDI_SUCCESS) {
3246 if (client_held) {
3247 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3248 "i_mdi_pm_rele_client\n"));
3249 i_mdi_pm_rele_client(ct, 1);
3250 }
3251 i_mdi_pi_free(ph, pip, ct);
3252 if (ct->ct_path_count == 0) {
3253 /*
3254 * Client lost its last path.
3255 * Clean up the client device
3256 */
3257 MDI_CLIENT_UNLOCK(ct);
3258 (void) i_mdi_client_free(ct->ct_vhci, ct);
3259 MDI_VHCI_CLIENT_UNLOCK(vh);
3260 return (rv);
3261 }
3262 }
3263 MDI_CLIENT_UNLOCK(ct);
3264 MDI_VHCI_CLIENT_UNLOCK(vh);
3265
3266 if (rv == MDI_FAILURE)
3267 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3268
3269 return (rv);
3270 }
3271
3272 /*
3273 * i_mdi_pi_free():
3274 * Free the mdi_pathinfo node
3275 */
3276 static void
3277 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3278 {
3279 int ct_circular;
3280 int ph_circular;
3281
3282 ASSERT(MDI_CLIENT_LOCKED(ct));
3283
3284 /*
3285 * remove any per-path kstats
3286 */
3287 i_mdi_pi_kstat_destroy(pip);
3288
3289 /* See comments in i_mdi_pi_alloc() */
3290 ndi_devi_enter(ct->ct_dip, &ct_circular);
3291 ndi_devi_enter(ph->ph_dip, &ph_circular);
3292
3293 i_mdi_client_remove_path(ct, pip);
3294 i_mdi_phci_remove_path(ph, pip);
3295
3296 ndi_devi_exit(ph->ph_dip, ph_circular);
3297 ndi_devi_exit(ct->ct_dip, ct_circular);
3298
3299 mutex_destroy(&MDI_PI(pip)->pi_mutex);
3300 cv_destroy(&MDI_PI(pip)->pi_state_cv);
3301 cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3302 if (MDI_PI(pip)->pi_addr) {
3303 kmem_free(MDI_PI(pip)->pi_addr,
3304 strlen(MDI_PI(pip)->pi_addr) + 1);
3305 MDI_PI(pip)->pi_addr = NULL;
3306 }
3307
3308 if (MDI_PI(pip)->pi_prop) {
3309 (void) nvlist_free(MDI_PI(pip)->pi_prop);
3310 MDI_PI(pip)->pi_prop = NULL;
3311 }
3312 kmem_free(pip, sizeof (struct mdi_pathinfo));
3313 }
3314
3315
3316 /*
3317 * i_mdi_phci_remove_path():
3318 * Remove a mdi_pathinfo node from pHCI list.
3319 * Notes:
3320 * Caller should hold per-pHCI mutex
3321 */
3322 static void
3323 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3324 {
3325 mdi_pathinfo_t *prev = NULL;
3326 mdi_pathinfo_t *path = NULL;
3327
3328 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3329
3330 MDI_PHCI_LOCK(ph);
3331 path = ph->ph_path_head;
3332 while (path != NULL) {
3333 if (path == pip) {
3334 break;
3335 }
3336 prev = path;
3337 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3338 }
3339
3340 if (path) {
3341 ph->ph_path_count--;
3342 if (prev) {
3343 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3344 } else {
3345 ph->ph_path_head =
3346 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3347 }
3348 if (ph->ph_path_tail == path) {
3349 ph->ph_path_tail = prev;
3350 }
3351 }
3352
3353 /*
3354 * Clear the pHCI link
3355 */
3356 MDI_PI(pip)->pi_phci_link = NULL;
3357 MDI_PI(pip)->pi_phci = NULL;
3358 MDI_PHCI_UNLOCK(ph);
3359 }
3360
3361 /*
3362 * i_mdi_client_remove_path():
3363 * Remove a mdi_pathinfo node from client path list.
3364 */
3365 static void
3366 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3367 {
3368 mdi_pathinfo_t *prev = NULL;
3369 mdi_pathinfo_t *path;
3370
3371 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3372
3373 ASSERT(MDI_CLIENT_LOCKED(ct));
3374 path = ct->ct_path_head;
3375 while (path != NULL) {
3376 if (path == pip) {
3377 break;
3378 }
3379 prev = path;
3380 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3381 }
3382
3383 if (path) {
3384 ct->ct_path_count--;
3385 if (prev) {
3386 MDI_PI(prev)->pi_client_link =
3387 MDI_PI(path)->pi_client_link;
3388 } else {
3389 ct->ct_path_head =
3390 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3391 }
3392 if (ct->ct_path_tail == path) {
3393 ct->ct_path_tail = prev;
3394 }
3395 if (ct->ct_path_last == path) {
3396 ct->ct_path_last = ct->ct_path_head;
3397 }
3398 }
3399 MDI_PI(pip)->pi_client_link = NULL;
3400 MDI_PI(pip)->pi_client = NULL;
3401 }
3402
3403 /*
3404 * i_mdi_pi_state_change():
3405 * online a mdi_pathinfo node
3406 *
3407 * Return Values:
3408 * MDI_SUCCESS
3409 * MDI_FAILURE
3410 */
3411 /*ARGSUSED*/
3412 static int
3413 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3414 {
3415 int rv = MDI_SUCCESS;
3416 mdi_vhci_t *vh;
3417 mdi_phci_t *ph;
3418 mdi_client_t *ct;
3419 int (*f)();
3420 dev_info_t *cdip;
3421
3422 MDI_PI_LOCK(pip);
3423
3424 ph = MDI_PI(pip)->pi_phci;
3425 ASSERT(ph);
3426 if (ph == NULL) {
3427 /*
3428 * Invalid pHCI device, fail the request
3429 */
3430 MDI_PI_UNLOCK(pip);
3431 MDI_DEBUG(1, (MDI_WARN, NULL,
3432 "!invalid phci: pip %s %p",
3433 mdi_pi_spathname(pip), (void *)pip));
3434 return (MDI_FAILURE);
3435 }
3436
3437 vh = ph->ph_vhci;
3438 ASSERT(vh);
3439 if (vh == NULL) {
3440 /*
3441 * Invalid vHCI device, fail the request
3442 */
3443 MDI_PI_UNLOCK(pip);
3444 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3445 "!invalid vhci: pip %s %p",
3446 mdi_pi_spathname(pip), (void *)pip));
3447 return (MDI_FAILURE);
3448 }
3449
3450 ct = MDI_PI(pip)->pi_client;
3451 ASSERT(ct != NULL);
3452 if (ct == NULL) {
3453 /*
3454 * Invalid client device, fail the request
3455 */
3456 MDI_PI_UNLOCK(pip);
3457 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3458 "!invalid client: pip %s %p",
3459 mdi_pi_spathname(pip), (void *)pip));
3460 return (MDI_FAILURE);
3461 }
3462
3463 /*
3464 * If this path has not been initialized yet, Callback vHCI driver's
3465 * pathinfo node initialize entry point
3466 */
3467
3468 if (MDI_PI_IS_INITING(pip)) {
3469 MDI_PI_UNLOCK(pip);
3470 f = vh->vh_ops->vo_pi_init;
3471 if (f != NULL) {
3472 rv = (*f)(vh->vh_dip, pip, 0);
3473 if (rv != MDI_SUCCESS) {
3474 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3475 "!vo_pi_init failed: vHCI %p, pip %s %p",
3476 (void *)vh, mdi_pi_spathname(pip),
3477 (void *)pip));
3478 return (MDI_FAILURE);
3479 }
3480 }
3481 MDI_PI_LOCK(pip);
3482 MDI_PI_CLEAR_TRANSIENT(pip);
3483 }
3484
3485 /*
3486 * Do not allow state transition when pHCI is in offline/suspended
3487 * states
3488 */
3489 i_mdi_phci_lock(ph, pip);
3490 if (MDI_PHCI_IS_READY(ph) == 0) {
3491 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3492 "!pHCI not ready, pHCI=%p", (void *)ph));
3493 MDI_PI_UNLOCK(pip);
3494 i_mdi_phci_unlock(ph);
3495 return (MDI_BUSY);
3496 }
3497 MDI_PHCI_UNSTABLE(ph);
3498 i_mdi_phci_unlock(ph);
3499
3500 /*
3501 * Check if mdi_pathinfo state is in transient state.
3502 * If yes, offlining is in progress and wait till transient state is
3503 * cleared.
3504 */
3505 if (MDI_PI_IS_TRANSIENT(pip)) {
3506 while (MDI_PI_IS_TRANSIENT(pip)) {
3507 cv_wait(&MDI_PI(pip)->pi_state_cv,
3508 &MDI_PI(pip)->pi_mutex);
3509 }
3510 }
3511
3512 /*
3513 * Grab the client lock in reverse order sequence and release the
3514 * mdi_pathinfo mutex.
3515 */
3516 i_mdi_client_lock(ct, pip);
3517 MDI_PI_UNLOCK(pip);
3518
3519 /*
3520 * Wait till failover state is cleared
3521 */
3522 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3523 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3524
3525 /*
3526 * Mark the mdi_pathinfo node state as transient
3527 */
3528 MDI_PI_LOCK(pip);
3529 switch (state) {
3530 case MDI_PATHINFO_STATE_ONLINE:
3531 MDI_PI_SET_ONLINING(pip);
3532 break;
3533
3534 case MDI_PATHINFO_STATE_STANDBY:
3535 MDI_PI_SET_STANDBYING(pip);
3536 break;
3537
3538 case MDI_PATHINFO_STATE_FAULT:
3539 /*
3540 * Mark the pathinfo state as FAULTED
3541 */
3542 MDI_PI_SET_FAULTING(pip);
3543 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3544 break;
3545
3546 case MDI_PATHINFO_STATE_OFFLINE:
3547 /*
3548 * ndi_devi_offline() cannot hold pip or ct locks.
3549 */
3550 MDI_PI_UNLOCK(pip);
3551
3552 /*
3553 * If this is a user initiated path online->offline operation
3554 * who's success would transition a client from DEGRADED to
3555 * FAILED then only proceed if we can offline the client first.
3556 */
3557 cdip = ct->ct_dip;
3558 if ((flag & NDI_USER_REQ) &&
3559 MDI_PI_IS_ONLINE(pip) &&
3560 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3561 i_mdi_client_unlock(ct);
3562 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3563 if (rv != NDI_SUCCESS) {
3564 /*
3565 * Convert to MDI error code
3566 */
3567 switch (rv) {
3568 case NDI_BUSY:
3569 rv = MDI_BUSY;
3570 break;
3571 default:
3572 rv = MDI_FAILURE;
3573 break;
3574 }
3575 goto state_change_exit;
3576 } else {
3577 i_mdi_client_lock(ct, NULL);
3578 }
3579 }
3580 /*
3581 * Mark the mdi_pathinfo node state as transient
3582 */
3583 MDI_PI_LOCK(pip);
3584 MDI_PI_SET_OFFLINING(pip);
3585 break;
3586 }
3587 MDI_PI_UNLOCK(pip);
3588 MDI_CLIENT_UNSTABLE(ct);
3589 i_mdi_client_unlock(ct);
3590
3591 f = vh->vh_ops->vo_pi_state_change;
3592 if (f != NULL)
3593 rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3594
3595 MDI_CLIENT_LOCK(ct);
3596 MDI_PI_LOCK(pip);
3597 if (rv == MDI_NOT_SUPPORTED) {
3598 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3599 }
3600 if (rv != MDI_SUCCESS) {
3601 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3602 "vo_pi_state_change failed: rv %x", rv));
3603 }
3604 if (MDI_PI_IS_TRANSIENT(pip)) {
3605 if (rv == MDI_SUCCESS) {
3606 MDI_PI_CLEAR_TRANSIENT(pip);
3607 } else {
3608 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3609 }
3610 }
3611
3612 /*
3613 * Wake anyone waiting for this mdi_pathinfo node
3614 */
3615 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3616 MDI_PI_UNLOCK(pip);
3617
3618 /*
3619 * Mark the client device as stable
3620 */
3621 MDI_CLIENT_STABLE(ct);
3622 if (rv == MDI_SUCCESS) {
3623 if (ct->ct_unstable == 0) {
3624 cdip = ct->ct_dip;
3625
3626 /*
3627 * Onlining the mdi_pathinfo node will impact the
3628 * client state Update the client and dev_info node
3629 * state accordingly
3630 */
3631 rv = NDI_SUCCESS;
3632 i_mdi_client_update_state(ct);
3633 switch (MDI_CLIENT_STATE(ct)) {
3634 case MDI_CLIENT_STATE_OPTIMAL:
3635 case MDI_CLIENT_STATE_DEGRADED:
3636 if (cdip && !i_ddi_devi_attached(cdip) &&
3637 ((state == MDI_PATHINFO_STATE_ONLINE) ||
3638 (state == MDI_PATHINFO_STATE_STANDBY))) {
3639
3640 /*
3641 * Must do ndi_devi_online() through
3642 * hotplug thread for deferred
3643 * attach mechanism to work
3644 */
3645 MDI_CLIENT_UNLOCK(ct);
3646 rv = ndi_devi_online(cdip, 0);
3647 MDI_CLIENT_LOCK(ct);
3648 if ((rv != NDI_SUCCESS) &&
3649 (MDI_CLIENT_STATE(ct) ==
3650 MDI_CLIENT_STATE_DEGRADED)) {
3651 MDI_DEBUG(1, (MDI_WARN, cdip,
3652 "!ndi_devi_online failed "
3653 "error %x", rv));
3654 }
3655 rv = NDI_SUCCESS;
3656 }
3657 break;
3658
3659 case MDI_CLIENT_STATE_FAILED:
3660 /*
3661 * This is the last path case for
3662 * non-user initiated events.
3663 */
3664 if ((flag & NDI_USER_REQ) ||
3665 cdip == NULL || i_ddi_node_state(cdip) <
3666 DS_INITIALIZED)
3667 break;
3668
3669 MDI_CLIENT_UNLOCK(ct);
3670 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3671 MDI_CLIENT_LOCK(ct);
3672
3673 if (rv != NDI_SUCCESS) {
3674 /*
3675 * Reset client flags to online as the
3676 * path could not be offlined.
3677 */
3678 MDI_DEBUG(1, (MDI_WARN, cdip,
3679 "!ndi_devi_offline failed: %d",
3680 rv));
3681 MDI_CLIENT_SET_ONLINE(ct);
3682 }
3683 break;
3684 }
3685 /*
3686 * Convert to MDI error code
3687 */
3688 switch (rv) {
3689 case NDI_SUCCESS:
3690 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3691 i_mdi_report_path_state(ct, pip);
3692 rv = MDI_SUCCESS;
3693 break;
3694 case NDI_BUSY:
3695 rv = MDI_BUSY;
3696 break;
3697 default:
3698 rv = MDI_FAILURE;
3699 break;
3700 }
3701 }
3702 }
3703 MDI_CLIENT_UNLOCK(ct);
3704
3705 state_change_exit:
3706 /*
3707 * Mark the pHCI as stable again.
3708 */
3709 MDI_PHCI_LOCK(ph);
3710 MDI_PHCI_STABLE(ph);
3711 MDI_PHCI_UNLOCK(ph);
3712 return (rv);
3713 }
3714
3715 /*
3716 * mdi_pi_online():
3717 * Place the path_info node in the online state. The path is
3718 * now available to be selected by mdi_select_path() for
3719 * transporting I/O requests to client devices.
3720 * Return Values:
3721 * MDI_SUCCESS
3722 * MDI_FAILURE
3723 */
3724 int
3725 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3726 {
3727 mdi_client_t *ct = MDI_PI(pip)->pi_client;
3728 int client_held = 0;
3729 int rv;
3730
3731 ASSERT(ct != NULL);
3732 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3733 if (rv != MDI_SUCCESS)
3734 return (rv);
3735
3736 MDI_PI_LOCK(pip);
3737 if (MDI_PI(pip)->pi_pm_held == 0) {
3738 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3739 "i_mdi_pm_hold_pip %p", (void *)pip));
3740 i_mdi_pm_hold_pip(pip);
3741 client_held = 1;
3742 }
3743 MDI_PI_UNLOCK(pip);
3744
3745 if (client_held) {
3746 MDI_CLIENT_LOCK(ct);
3747 if (ct->ct_power_cnt == 0) {
3748 rv = i_mdi_power_all_phci(ct);
3749 }
3750
3751 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3752 "i_mdi_pm_hold_client %p", (void *)ct));
3753 i_mdi_pm_hold_client(ct, 1);
3754 MDI_CLIENT_UNLOCK(ct);
3755 }
3756
3757 return (rv);
3758 }
3759
3760 /*
3761 * mdi_pi_standby():
3762 * Place the mdi_pathinfo node in standby state
3763 *
3764 * Return Values:
3765 * MDI_SUCCESS
3766 * MDI_FAILURE
3767 */
3768 int
3769 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3770 {
3771 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3772 }
3773
3774 /*
3775 * mdi_pi_fault():
3776 * Place the mdi_pathinfo node in fault'ed state
3777 * Return Values:
3778 * MDI_SUCCESS
3779 * MDI_FAILURE
3780 */
3781 int
3782 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3783 {
3784 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3785 }
3786
3787 /*
3788 * mdi_pi_offline():
3789 * Offline a mdi_pathinfo node.
3790 * Return Values:
3791 * MDI_SUCCESS
3792 * MDI_FAILURE
3793 */
3794 int
3795 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3796 {
3797 int ret, client_held = 0;
3798 mdi_client_t *ct;
3799
3800 /*
3801 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3802 * used it to mean "user initiated operation" (i.e. devctl). Callers
3803 * should now just use NDI_USER_REQ.
3804 */
3805 if (flags & NDI_DEVI_REMOVE) {
3806 flags &= ~NDI_DEVI_REMOVE;
3807 flags |= NDI_USER_REQ;
3808 }
3809
3810 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3811
3812 if (ret == MDI_SUCCESS) {
3813 MDI_PI_LOCK(pip);
3814 if (MDI_PI(pip)->pi_pm_held) {
3815 client_held = 1;
3816 }
3817 MDI_PI_UNLOCK(pip);
3818
3819 if (client_held) {
3820 ct = MDI_PI(pip)->pi_client;
3821 MDI_CLIENT_LOCK(ct);
3822 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3823 "i_mdi_pm_rele_client\n"));
3824 i_mdi_pm_rele_client(ct, 1);
3825 MDI_CLIENT_UNLOCK(ct);
3826 }
3827 }
3828
3829 return (ret);
3830 }
3831
3832 /*
3833 * i_mdi_pi_offline():
3834 * Offline a mdi_pathinfo node and call the vHCI driver's callback
3835 */
3836 static int
3837 i_mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3838 {
3839 dev_info_t *vdip = NULL;
3840 mdi_vhci_t *vh = NULL;
3841 mdi_client_t *ct = NULL;
3842 int (*f)();
3843 int rv;
3844
3845 MDI_PI_LOCK(pip);
3846 ct = MDI_PI(pip)->pi_client;
3847 ASSERT(ct != NULL);
3848
3849 while (MDI_PI(pip)->pi_ref_cnt != 0) {
3850 /*
3851 * Give a chance for pending I/Os to complete.
3852 */
3853 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3854 "!%d cmds still pending on path %s %p",
3855 MDI_PI(pip)->pi_ref_cnt, mdi_pi_spathname(pip),
3856 (void *)pip));
3857 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3858 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3859 TR_CLOCK_TICK) == -1) {
3860 /*
3861 * The timeout time reached without ref_cnt being zero
3862 * being signaled.
3863 */
3864 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3865 "!Timeout reached on path %s %p without the cond",
3866 mdi_pi_spathname(pip), (void *)pip));
3867 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3868 "!%d cmds still pending on path %s %p",
3869 MDI_PI(pip)->pi_ref_cnt,
3870 mdi_pi_spathname(pip), (void *)pip));
3871 }
3872 }
3873 vh = ct->ct_vhci;
3874 vdip = vh->vh_dip;
3875
3876 /*
3877 * Notify vHCI that has registered this event
3878 */
3879 ASSERT(vh->vh_ops);
3880 f = vh->vh_ops->vo_pi_state_change;
3881
3882 if (f != NULL) {
3883 MDI_PI_UNLOCK(pip);
3884 if ((rv = (*f)(vdip, pip, MDI_PATHINFO_STATE_OFFLINE, 0,
3885 flags)) != MDI_SUCCESS) {
3886 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3887 "!vo_path_offline failed: vdip %s%d %p: path %s %p",
3888 ddi_driver_name(vdip), ddi_get_instance(vdip),
3889 (void *)vdip, mdi_pi_spathname(pip), (void *)pip));
3890 }
3891 MDI_PI_LOCK(pip);
3892 }
3893
3894 /*
3895 * Set the mdi_pathinfo node state and clear the transient condition
3896 */
3897 MDI_PI_SET_OFFLINE(pip);
3898 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3899 MDI_PI_UNLOCK(pip);
3900
3901 MDI_CLIENT_LOCK(ct);
3902 if (rv == MDI_SUCCESS) {
3903 if (ct->ct_unstable == 0) {
3904 dev_info_t *cdip = ct->ct_dip;
3905
3906 /*
3907 * Onlining the mdi_pathinfo node will impact the
3908 * client state Update the client and dev_info node
3909 * state accordingly
3910 */
3911 i_mdi_client_update_state(ct);
3912 rv = NDI_SUCCESS;
3913 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
3914 if (cdip &&
3915 (i_ddi_node_state(cdip) >=
3916 DS_INITIALIZED)) {
3917 MDI_CLIENT_UNLOCK(ct);
3918 rv = ndi_devi_offline(cdip,
3919 NDI_DEVFS_CLEAN);
3920 MDI_CLIENT_LOCK(ct);
3921 if (rv != NDI_SUCCESS) {
3922 /*
3923 * ndi_devi_offline failed.
3924 * Reset client flags to
3925 * online.
3926 */
3927 MDI_DEBUG(4, (MDI_WARN, cdip,
3928 "ndi_devi_offline failed: "
3929 "error %x", rv));
3930 MDI_CLIENT_SET_ONLINE(ct);
3931 }
3932 }
3933 }
3934 /*
3935 * Convert to MDI error code
3936 */
3937 switch (rv) {
3938 case NDI_SUCCESS:
3939 rv = MDI_SUCCESS;
3940 break;
3941 case NDI_BUSY:
3942 rv = MDI_BUSY;
3943 break;
3944 default:
3945 rv = MDI_FAILURE;
3946 break;
3947 }
3948 }
3949 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3950 i_mdi_report_path_state(ct, pip);
3951 }
3952
3953 MDI_CLIENT_UNLOCK(ct);
3954
3955 /*
3956 * Change in the mdi_pathinfo node state will impact the client state
3957 */
3958 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
3959 "ct = %p pip = %p", (void *)ct, (void *)pip));
3960 return (rv);
3961 }
3962
3963 /*
3964 * i_mdi_pi_online():
3965 * Online a mdi_pathinfo node and call the vHCI driver's callback
3966 */
3967 static int
3968 i_mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3969 {
3970 mdi_vhci_t *vh = NULL;
3971 mdi_client_t *ct = NULL;
3972 mdi_phci_t *ph;
3973 int (*f)();
3974 int rv;
3975
3976 MDI_PI_LOCK(pip);
3977 ph = MDI_PI(pip)->pi_phci;
3978 vh = ph->ph_vhci;
3979 ct = MDI_PI(pip)->pi_client;
3980 MDI_PI_SET_ONLINING(pip)
3981 MDI_PI_UNLOCK(pip);
3982 f = vh->vh_ops->vo_pi_state_change;
3983 if (f != NULL)
3984 rv = (*f)(vh->vh_dip, pip, MDI_PATHINFO_STATE_ONLINE, 0,
3985 flags);
3986 MDI_CLIENT_LOCK(ct);
3987 MDI_PI_LOCK(pip);
3988 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3989 MDI_PI_UNLOCK(pip);
3990 if (rv == MDI_SUCCESS) {
3991 dev_info_t *cdip = ct->ct_dip;
3992
3993 rv = MDI_SUCCESS;
3994 i_mdi_client_update_state(ct);
3995 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL ||
3996 MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
3997 if (cdip && !i_ddi_devi_attached(cdip)) {
3998 MDI_CLIENT_UNLOCK(ct);
3999 rv = ndi_devi_online(cdip, 0);
4000 MDI_CLIENT_LOCK(ct);
4001 if ((rv != NDI_SUCCESS) &&
4002 (MDI_CLIENT_STATE(ct) ==
4003 MDI_CLIENT_STATE_DEGRADED)) {
4004 MDI_CLIENT_SET_OFFLINE(ct);
4005 }
4006 if (rv != NDI_SUCCESS) {
4007 /* Reset the path state */
4008 MDI_PI_LOCK(pip);
4009 MDI_PI(pip)->pi_state =
4010 MDI_PI_OLD_STATE(pip);
4011 MDI_PI_UNLOCK(pip);
4012 }
4013 }
4014 }
4015 switch (rv) {
4016 case NDI_SUCCESS:
4017 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
4018 i_mdi_report_path_state(ct, pip);
4019 rv = MDI_SUCCESS;
4020 break;
4021 case NDI_BUSY:
4022 rv = MDI_BUSY;
4023 break;
4024 default:
4025 rv = MDI_FAILURE;
4026 break;
4027 }
4028 } else {
4029 /* Reset the path state */
4030 MDI_PI_LOCK(pip);
4031 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
4032 MDI_PI_UNLOCK(pip);
4033 }
4034 MDI_CLIENT_UNLOCK(ct);
4035 return (rv);
4036 }
4037
4038 /*
4039 * mdi_pi_get_node_name():
4040 * Get the name associated with a mdi_pathinfo node.
4041 * Since pathinfo nodes are not directly named, we
4042 * return the node_name of the client.
4043 *
4044 * Return Values:
4045 * char *
4046 */
4047 char *
4048 mdi_pi_get_node_name(mdi_pathinfo_t *pip)
4049 {
4050 mdi_client_t *ct;
4051
4052 if (pip == NULL)
4053 return (NULL);
4054 ct = MDI_PI(pip)->pi_client;
4055 if ((ct == NULL) || (ct->ct_dip == NULL))
4056 return (NULL);
4057 return (ddi_node_name(ct->ct_dip));
4058 }
4059
4060 /*
4061 * mdi_pi_get_addr():
4062 * Get the unit address associated with a mdi_pathinfo node
4063 *
4064 * Return Values:
4065 * char *
4066 */
4067 char *
4068 mdi_pi_get_addr(mdi_pathinfo_t *pip)
4069 {
4070 if (pip == NULL)
4071 return (NULL);
4072
4073 return (MDI_PI(pip)->pi_addr);
4074 }
4075
4076 /*
4077 * mdi_pi_get_path_instance():
4078 * Get the 'path_instance' of a mdi_pathinfo node
4079 *
4080 * Return Values:
4081 * path_instance
4082 */
4083 int
4084 mdi_pi_get_path_instance(mdi_pathinfo_t *pip)
4085 {
4086 if (pip == NULL)
4087 return (0);
4088
4089 return (MDI_PI(pip)->pi_path_instance);
4090 }
4091
4092 /*
4093 * mdi_pi_pathname():
4094 * Return pointer to path to pathinfo node.
4095 */
4096 char *
4097 mdi_pi_pathname(mdi_pathinfo_t *pip)
4098 {
4099 if (pip == NULL)
4100 return (NULL);
4101 return (mdi_pi_pathname_by_instance(mdi_pi_get_path_instance(pip)));
4102 }
4103
4104 /*
4105 * mdi_pi_spathname():
4106 * Return pointer to shortpath to pathinfo node. Used for debug
4107 * messages, so return "" instead of NULL when unknown.
4108 */
4109 char *
4110 mdi_pi_spathname(mdi_pathinfo_t *pip)
4111 {
4112 char *spath = "";
4113
4114 if (pip) {
4115 spath = mdi_pi_spathname_by_instance(
4116 mdi_pi_get_path_instance(pip));
4117 if (spath == NULL)
4118 spath = "";
4119 }
4120 return (spath);
4121 }
4122
4123 char *
4124 mdi_pi_pathname_obp(mdi_pathinfo_t *pip, char *path)
4125 {
4126 char *obp_path = NULL;
4127 if ((pip == NULL) || (path == NULL))
4128 return (NULL);
4129
4130 if (mdi_prop_lookup_string(pip, "obp-path", &obp_path) == MDI_SUCCESS) {
4131 (void) strcpy(path, obp_path);
4132 (void) mdi_prop_free(obp_path);
4133 } else {
4134 path = NULL;
4135 }
4136 return (path);
4137 }
4138
4139 int
4140 mdi_pi_pathname_obp_set(mdi_pathinfo_t *pip, char *component)
4141 {
4142 dev_info_t *pdip;
4143 char *obp_path = NULL;
4144 int rc = MDI_FAILURE;
4145
4146 if (pip == NULL)
4147 return (MDI_FAILURE);
4148
4149 pdip = mdi_pi_get_phci(pip);
4150 if (pdip == NULL)
4151 return (MDI_FAILURE);
4152
4153 obp_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4154
4155 if (ddi_pathname_obp(pdip, obp_path) == NULL) {
4156 (void) ddi_pathname(pdip, obp_path);
4157 }
4158
4159 if (component) {
4160 (void) strncat(obp_path, "/", MAXPATHLEN);
4161 (void) strncat(obp_path, component, MAXPATHLEN);
4162 }
4163 rc = mdi_prop_update_string(pip, "obp-path", obp_path);
4164
4165 if (obp_path)
4166 kmem_free(obp_path, MAXPATHLEN);
4167 return (rc);
4168 }
4169
4170 /*
4171 * mdi_pi_get_client():
4172 * Get the client devinfo associated with a mdi_pathinfo node
4173 *
4174 * Return Values:
4175 * Handle to client device dev_info node
4176 */
4177 dev_info_t *
4178 mdi_pi_get_client(mdi_pathinfo_t *pip)
4179 {
4180 dev_info_t *dip = NULL;
4181 if (pip) {
4182 dip = MDI_PI(pip)->pi_client->ct_dip;
4183 }
4184 return (dip);
4185 }
4186
4187 /*
4188 * mdi_pi_get_phci():
4189 * Get the pHCI devinfo associated with the mdi_pathinfo node
4190 * Return Values:
4191 * Handle to dev_info node
4192 */
4193 dev_info_t *
4194 mdi_pi_get_phci(mdi_pathinfo_t *pip)
4195 {
4196 dev_info_t *dip = NULL;
4197 mdi_phci_t *ph;
4198
4199 if (pip) {
4200 ph = MDI_PI(pip)->pi_phci;
4201 if (ph)
4202 dip = ph->ph_dip;
4203 }
4204 return (dip);
4205 }
4206
4207 /*
4208 * mdi_pi_get_client_private():
4209 * Get the client private information associated with the
4210 * mdi_pathinfo node
4211 */
4212 void *
4213 mdi_pi_get_client_private(mdi_pathinfo_t *pip)
4214 {
4215 void *cprivate = NULL;
4216 if (pip) {
4217 cprivate = MDI_PI(pip)->pi_cprivate;
4218 }
4219 return (cprivate);
4220 }
4221
4222 /*
4223 * mdi_pi_set_client_private():
4224 * Set the client private information in the mdi_pathinfo node
4225 */
4226 void
4227 mdi_pi_set_client_private(mdi_pathinfo_t *pip, void *priv)
4228 {
4229 if (pip) {
4230 MDI_PI(pip)->pi_cprivate = priv;
4231 }
4232 }
4233
4234 /*
4235 * mdi_pi_get_phci_private():
4236 * Get the pHCI private information associated with the
4237 * mdi_pathinfo node
4238 */
4239 caddr_t
4240 mdi_pi_get_phci_private(mdi_pathinfo_t *pip)
4241 {
4242 caddr_t pprivate = NULL;
4243
4244 if (pip) {
4245 pprivate = MDI_PI(pip)->pi_pprivate;
4246 }
4247 return (pprivate);
4248 }
4249
4250 /*
4251 * mdi_pi_set_phci_private():
4252 * Set the pHCI private information in the mdi_pathinfo node
4253 */
4254 void
4255 mdi_pi_set_phci_private(mdi_pathinfo_t *pip, caddr_t priv)
4256 {
4257 if (pip) {
4258 MDI_PI(pip)->pi_pprivate = priv;
4259 }
4260 }
4261
4262 /*
4263 * mdi_pi_get_state():
4264 * Get the mdi_pathinfo node state. Transient states are internal
4265 * and not provided to the users
4266 */
4267 mdi_pathinfo_state_t
4268 mdi_pi_get_state(mdi_pathinfo_t *pip)
4269 {
4270 mdi_pathinfo_state_t state = MDI_PATHINFO_STATE_INIT;
4271
4272 if (pip) {
4273 if (MDI_PI_IS_TRANSIENT(pip)) {
4274 /*
4275 * mdi_pathinfo is in state transition. Return the
4276 * last good state.
4277 */
4278 state = MDI_PI_OLD_STATE(pip);
4279 } else {
4280 state = MDI_PI_STATE(pip);
4281 }
4282 }
4283 return (state);
4284 }
4285
4286 /*
4287 * mdi_pi_get_flags():
4288 * Get the mdi_pathinfo node flags.
4289 */
4290 uint_t
4291 mdi_pi_get_flags(mdi_pathinfo_t *pip)
4292 {
4293 return (pip ? MDI_PI(pip)->pi_flags : 0);
4294 }
4295
4296 /*
4297 * Note that the following function needs to be the new interface for
4298 * mdi_pi_get_state when mpxio gets integrated to ON.
4299 */
4300 int
4301 mdi_pi_get_state2(mdi_pathinfo_t *pip, mdi_pathinfo_state_t *state,
4302 uint32_t *ext_state)
4303 {
4304 *state = MDI_PATHINFO_STATE_INIT;
4305
4306 if (pip) {
4307 if (MDI_PI_IS_TRANSIENT(pip)) {
4308 /*
4309 * mdi_pathinfo is in state transition. Return the
4310 * last good state.
4311 */
4312 *state = MDI_PI_OLD_STATE(pip);
4313 *ext_state = MDI_PI_OLD_EXT_STATE(pip);
4314 } else {
4315 *state = MDI_PI_STATE(pip);
4316 *ext_state = MDI_PI_EXT_STATE(pip);
4317 }
4318 }
4319 return (MDI_SUCCESS);
4320 }
4321
4322 /*
4323 * mdi_pi_get_preferred:
4324 * Get the preferred path flag
4325 */
4326 int
4327 mdi_pi_get_preferred(mdi_pathinfo_t *pip)
4328 {
4329 if (pip) {
4330 return (MDI_PI(pip)->pi_preferred);
4331 }
4332 return (0);
4333 }
4334
4335 /*
4336 * mdi_pi_set_preferred:
4337 * Set the preferred path flag
4338 */
4339 void
4340 mdi_pi_set_preferred(mdi_pathinfo_t *pip, int preferred)
4341 {
4342 if (pip) {
4343 MDI_PI(pip)->pi_preferred = preferred;
4344 }
4345 }
4346
4347 /*
4348 * mdi_pi_set_state():
4349 * Set the mdi_pathinfo node state
4350 */
4351 void
4352 mdi_pi_set_state(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state)
4353 {
4354 uint32_t ext_state;
4355
4356 if (pip) {
4357 ext_state = MDI_PI(pip)->pi_state & MDI_PATHINFO_EXT_STATE_MASK;
4358 MDI_PI(pip)->pi_state = state;
4359 MDI_PI(pip)->pi_state |= ext_state;
4360
4361 /* Path has changed state, invalidate DINFOCACHE snap shot. */
4362 i_ddi_di_cache_invalidate();
4363 }
4364 }
4365
4366 /*
4367 * Property functions:
4368 */
4369 int
4370 i_map_nvlist_error_to_mdi(int val)
4371 {
4372 int rv;
4373
4374 switch (val) {
4375 case 0:
4376 rv = DDI_PROP_SUCCESS;
4377 break;
4378 case EINVAL:
4379 case ENOTSUP:
4380 rv = DDI_PROP_INVAL_ARG;
4381 break;
4382 case ENOMEM:
4383 rv = DDI_PROP_NO_MEMORY;
4384 break;
4385 default:
4386 rv = DDI_PROP_NOT_FOUND;
4387 break;
4388 }
4389 return (rv);
4390 }
4391
4392 /*
4393 * mdi_pi_get_next_prop():
4394 * Property walk function. The caller should hold mdi_pi_lock()
4395 * and release by calling mdi_pi_unlock() at the end of walk to
4396 * get a consistent value.
4397 */
4398 nvpair_t *
4399 mdi_pi_get_next_prop(mdi_pathinfo_t *pip, nvpair_t *prev)
4400 {
4401 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4402 return (NULL);
4403 }
4404 ASSERT(MDI_PI_LOCKED(pip));
4405 return (nvlist_next_nvpair(MDI_PI(pip)->pi_prop, prev));
4406 }
4407
4408 /*
4409 * mdi_prop_remove():
4410 * Remove the named property from the named list.
4411 */
4412 int
4413 mdi_prop_remove(mdi_pathinfo_t *pip, char *name)
4414 {
4415 if (pip == NULL) {
4416 return (DDI_PROP_NOT_FOUND);
4417 }
4418 ASSERT(!MDI_PI_LOCKED(pip));
4419 MDI_PI_LOCK(pip);
4420 if (MDI_PI(pip)->pi_prop == NULL) {
4421 MDI_PI_UNLOCK(pip);
4422 return (DDI_PROP_NOT_FOUND);
4423 }
4424 if (name) {
4425 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop, name);
4426 } else {
4427 char nvp_name[MAXNAMELEN];
4428 nvpair_t *nvp;
4429 nvp = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, NULL);
4430 while (nvp) {
4431 nvpair_t *next;
4432 next = nvlist_next_nvpair(MDI_PI(pip)->pi_prop, nvp);
4433 (void) snprintf(nvp_name, sizeof(nvp_name), "%s",
4434 nvpair_name(nvp));
4435 (void) nvlist_remove_all(MDI_PI(pip)->pi_prop,
4436 nvp_name);
4437 nvp = next;
4438 }
4439 }
4440 MDI_PI_UNLOCK(pip);
4441 return (DDI_PROP_SUCCESS);
4442 }
4443
4444 /*
4445 * mdi_prop_size():
4446 * Get buffer size needed to pack the property data.
4447 * Caller should hold the mdi_pathinfo_t lock to get a consistent
4448 * buffer size.
4449 */
4450 int
4451 mdi_prop_size(mdi_pathinfo_t *pip, size_t *buflenp)
4452 {
4453 int rv;
4454 size_t bufsize;
4455
4456 *buflenp = 0;
4457 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4458 return (DDI_PROP_NOT_FOUND);
4459 }
4460 ASSERT(MDI_PI_LOCKED(pip));
4461 rv = nvlist_size(MDI_PI(pip)->pi_prop,
4462 &bufsize, NV_ENCODE_NATIVE);
4463 *buflenp = bufsize;
4464 return (i_map_nvlist_error_to_mdi(rv));
4465 }
4466
4467 /*
4468 * mdi_prop_pack():
4469 * pack the property list. The caller should hold the
4470 * mdi_pathinfo_t node to get a consistent data
4471 */
4472 int
4473 mdi_prop_pack(mdi_pathinfo_t *pip, char **bufp, uint_t buflen)
4474 {
4475 int rv;
4476 size_t bufsize;
4477
4478 if ((pip == NULL) || MDI_PI(pip)->pi_prop == NULL) {
4479 return (DDI_PROP_NOT_FOUND);
4480 }
4481
4482 ASSERT(MDI_PI_LOCKED(pip));
4483
4484 bufsize = buflen;
4485 rv = nvlist_pack(MDI_PI(pip)->pi_prop, bufp, (size_t *)&bufsize,
4486 NV_ENCODE_NATIVE, KM_SLEEP);
4487
4488 return (i_map_nvlist_error_to_mdi(rv));
4489 }
4490
4491 /*
4492 * mdi_prop_update_byte():
4493 * Create/Update a byte property
4494 */
4495 int
4496 mdi_prop_update_byte(mdi_pathinfo_t *pip, char *name, uchar_t data)
4497 {
4498 int rv;
4499
4500 if (pip == NULL) {
4501 return (DDI_PROP_INVAL_ARG);
4502 }
4503 ASSERT(!MDI_PI_LOCKED(pip));
4504 MDI_PI_LOCK(pip);
4505 if (MDI_PI(pip)->pi_prop == NULL) {
4506 MDI_PI_UNLOCK(pip);
4507 return (DDI_PROP_NOT_FOUND);
4508 }
4509 rv = nvlist_add_byte(MDI_PI(pip)->pi_prop, name, data);
4510 MDI_PI_UNLOCK(pip);
4511 return (i_map_nvlist_error_to_mdi(rv));
4512 }
4513
4514 /*
4515 * mdi_prop_update_byte_array():
4516 * Create/Update a byte array property
4517 */
4518 int
4519 mdi_prop_update_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t *data,
4520 uint_t nelements)
4521 {
4522 int rv;
4523
4524 if (pip == NULL) {
4525 return (DDI_PROP_INVAL_ARG);
4526 }
4527 ASSERT(!MDI_PI_LOCKED(pip));
4528 MDI_PI_LOCK(pip);
4529 if (MDI_PI(pip)->pi_prop == NULL) {
4530 MDI_PI_UNLOCK(pip);
4531 return (DDI_PROP_NOT_FOUND);
4532 }
4533 rv = nvlist_add_byte_array(MDI_PI(pip)->pi_prop, name, data, nelements);
4534 MDI_PI_UNLOCK(pip);
4535 return (i_map_nvlist_error_to_mdi(rv));
4536 }
4537
4538 /*
4539 * mdi_prop_update_int():
4540 * Create/Update a 32 bit integer property
4541 */
4542 int
4543 mdi_prop_update_int(mdi_pathinfo_t *pip, char *name, int data)
4544 {
4545 int rv;
4546
4547 if (pip == NULL) {
4548 return (DDI_PROP_INVAL_ARG);
4549 }
4550 ASSERT(!MDI_PI_LOCKED(pip));
4551 MDI_PI_LOCK(pip);
4552 if (MDI_PI(pip)->pi_prop == NULL) {
4553 MDI_PI_UNLOCK(pip);
4554 return (DDI_PROP_NOT_FOUND);
4555 }
4556 rv = nvlist_add_int32(MDI_PI(pip)->pi_prop, name, (int32_t)data);
4557 MDI_PI_UNLOCK(pip);
4558 return (i_map_nvlist_error_to_mdi(rv));
4559 }
4560
4561 /*
4562 * mdi_prop_update_int64():
4563 * Create/Update a 64 bit integer property
4564 */
4565 int
4566 mdi_prop_update_int64(mdi_pathinfo_t *pip, char *name, int64_t data)
4567 {
4568 int rv;
4569
4570 if (pip == NULL) {
4571 return (DDI_PROP_INVAL_ARG);
4572 }
4573 ASSERT(!MDI_PI_LOCKED(pip));
4574 MDI_PI_LOCK(pip);
4575 if (MDI_PI(pip)->pi_prop == NULL) {
4576 MDI_PI_UNLOCK(pip);
4577 return (DDI_PROP_NOT_FOUND);
4578 }
4579 rv = nvlist_add_int64(MDI_PI(pip)->pi_prop, name, data);
4580 MDI_PI_UNLOCK(pip);
4581 return (i_map_nvlist_error_to_mdi(rv));
4582 }
4583
4584 /*
4585 * mdi_prop_update_int_array():
4586 * Create/Update a int array property
4587 */
4588 int
4589 mdi_prop_update_int_array(mdi_pathinfo_t *pip, char *name, int *data,
4590 uint_t nelements)
4591 {
4592 int rv;
4593
4594 if (pip == NULL) {
4595 return (DDI_PROP_INVAL_ARG);
4596 }
4597 ASSERT(!MDI_PI_LOCKED(pip));
4598 MDI_PI_LOCK(pip);
4599 if (MDI_PI(pip)->pi_prop == NULL) {
4600 MDI_PI_UNLOCK(pip);
4601 return (DDI_PROP_NOT_FOUND);
4602 }
4603 rv = nvlist_add_int32_array(MDI_PI(pip)->pi_prop, name, (int32_t *)data,
4604 nelements);
4605 MDI_PI_UNLOCK(pip);
4606 return (i_map_nvlist_error_to_mdi(rv));
4607 }
4608
4609 /*
4610 * mdi_prop_update_string():
4611 * Create/Update a string property
4612 */
4613 int
4614 mdi_prop_update_string(mdi_pathinfo_t *pip, char *name, char *data)
4615 {
4616 int rv;
4617
4618 if (pip == NULL) {
4619 return (DDI_PROP_INVAL_ARG);
4620 }
4621 ASSERT(!MDI_PI_LOCKED(pip));
4622 MDI_PI_LOCK(pip);
4623 if (MDI_PI(pip)->pi_prop == NULL) {
4624 MDI_PI_UNLOCK(pip);
4625 return (DDI_PROP_NOT_FOUND);
4626 }
4627 rv = nvlist_add_string(MDI_PI(pip)->pi_prop, name, data);
4628 MDI_PI_UNLOCK(pip);
4629 return (i_map_nvlist_error_to_mdi(rv));
4630 }
4631
4632 /*
4633 * mdi_prop_update_string_array():
4634 * Create/Update a string array property
4635 */
4636 int
4637 mdi_prop_update_string_array(mdi_pathinfo_t *pip, char *name, char **data,
4638 uint_t nelements)
4639 {
4640 int rv;
4641
4642 if (pip == NULL) {
4643 return (DDI_PROP_INVAL_ARG);
4644 }
4645 ASSERT(!MDI_PI_LOCKED(pip));
4646 MDI_PI_LOCK(pip);
4647 if (MDI_PI(pip)->pi_prop == NULL) {
4648 MDI_PI_UNLOCK(pip);
4649 return (DDI_PROP_NOT_FOUND);
4650 }
4651 rv = nvlist_add_string_array(MDI_PI(pip)->pi_prop, name, data,
4652 nelements);
4653 MDI_PI_UNLOCK(pip);
4654 return (i_map_nvlist_error_to_mdi(rv));
4655 }
4656
4657 /*
4658 * mdi_prop_lookup_byte():
4659 * Look for byte property identified by name. The data returned
4660 * is the actual property and valid as long as mdi_pathinfo_t node
4661 * is alive.
4662 */
4663 int
4664 mdi_prop_lookup_byte(mdi_pathinfo_t *pip, char *name, uchar_t *data)
4665 {
4666 int rv;
4667
4668 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4669 return (DDI_PROP_NOT_FOUND);
4670 }
4671 rv = nvlist_lookup_byte(MDI_PI(pip)->pi_prop, name, data);
4672 return (i_map_nvlist_error_to_mdi(rv));
4673 }
4674
4675
4676 /*
4677 * mdi_prop_lookup_byte_array():
4678 * Look for byte array property identified by name. The data
4679 * returned is the actual property and valid as long as
4680 * mdi_pathinfo_t node is alive.
4681 */
4682 int
4683 mdi_prop_lookup_byte_array(mdi_pathinfo_t *pip, char *name, uchar_t **data,
4684 uint_t *nelements)
4685 {
4686 int rv;
4687
4688 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4689 return (DDI_PROP_NOT_FOUND);
4690 }
4691 rv = nvlist_lookup_byte_array(MDI_PI(pip)->pi_prop, name, data,
4692 nelements);
4693 return (i_map_nvlist_error_to_mdi(rv));
4694 }
4695
4696 /*
4697 * mdi_prop_lookup_int():
4698 * Look for int property identified by name. The data returned
4699 * is the actual property and valid as long as mdi_pathinfo_t
4700 * node is alive.
4701 */
4702 int
4703 mdi_prop_lookup_int(mdi_pathinfo_t *pip, char *name, int *data)
4704 {
4705 int rv;
4706
4707 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4708 return (DDI_PROP_NOT_FOUND);
4709 }
4710 rv = nvlist_lookup_int32(MDI_PI(pip)->pi_prop, name, (int32_t *)data);
4711 return (i_map_nvlist_error_to_mdi(rv));
4712 }
4713
4714 /*
4715 * mdi_prop_lookup_int64():
4716 * Look for int64 property identified by name. The data returned
4717 * is the actual property and valid as long as mdi_pathinfo_t node
4718 * is alive.
4719 */
4720 int
4721 mdi_prop_lookup_int64(mdi_pathinfo_t *pip, char *name, int64_t *data)
4722 {
4723 int rv;
4724 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4725 return (DDI_PROP_NOT_FOUND);
4726 }
4727 rv = nvlist_lookup_int64(MDI_PI(pip)->pi_prop, name, data);
4728 return (i_map_nvlist_error_to_mdi(rv));
4729 }
4730
4731 /*
4732 * mdi_prop_lookup_int_array():
4733 * Look for int array property identified by name. The data
4734 * returned is the actual property and valid as long as
4735 * mdi_pathinfo_t node is alive.
4736 */
4737 int
4738 mdi_prop_lookup_int_array(mdi_pathinfo_t *pip, char *name, int **data,
4739 uint_t *nelements)
4740 {
4741 int rv;
4742
4743 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4744 return (DDI_PROP_NOT_FOUND);
4745 }
4746 rv = nvlist_lookup_int32_array(MDI_PI(pip)->pi_prop, name,
4747 (int32_t **)data, nelements);
4748 return (i_map_nvlist_error_to_mdi(rv));
4749 }
4750
4751 /*
4752 * mdi_prop_lookup_string():
4753 * Look for string property identified by name. The data
4754 * returned is the actual property and valid as long as
4755 * mdi_pathinfo_t node is alive.
4756 */
4757 int
4758 mdi_prop_lookup_string(mdi_pathinfo_t *pip, char *name, char **data)
4759 {
4760 int rv;
4761
4762 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4763 return (DDI_PROP_NOT_FOUND);
4764 }
4765 rv = nvlist_lookup_string(MDI_PI(pip)->pi_prop, name, data);
4766 return (i_map_nvlist_error_to_mdi(rv));
4767 }
4768
4769 /*
4770 * mdi_prop_lookup_string_array():
4771 * Look for string array property identified by name. The data
4772 * returned is the actual property and valid as long as
4773 * mdi_pathinfo_t node is alive.
4774 */
4775 int
4776 mdi_prop_lookup_string_array(mdi_pathinfo_t *pip, char *name, char ***data,
4777 uint_t *nelements)
4778 {
4779 int rv;
4780
4781 if ((pip == NULL) || (MDI_PI(pip)->pi_prop == NULL)) {
4782 return (DDI_PROP_NOT_FOUND);
4783 }
4784 rv = nvlist_lookup_string_array(MDI_PI(pip)->pi_prop, name, data,
4785 nelements);
4786 return (i_map_nvlist_error_to_mdi(rv));
4787 }
4788
4789 /*
4790 * mdi_prop_free():
4791 * Symmetrical function to ddi_prop_free(). nvlist_lookup_xx()
4792 * functions return the pointer to actual property data and not a
4793 * copy of it. So the data returned is valid as long as
4794 * mdi_pathinfo_t node is valid.
4795 */
4796 /*ARGSUSED*/
4797 int
4798 mdi_prop_free(void *data)
4799 {
4800 return (DDI_PROP_SUCCESS);
4801 }
4802
4803 /*ARGSUSED*/
4804 static void
4805 i_mdi_report_path_state(mdi_client_t *ct, mdi_pathinfo_t *pip)
4806 {
4807 char *ct_path;
4808 char *ct_status;
4809 char *status;
4810 dev_info_t *cdip = ct->ct_dip;
4811 char lb_buf[64];
4812 int report_lb_c = 0, report_lb_p = 0;
4813
4814 ASSERT(MDI_CLIENT_LOCKED(ct));
4815 if ((cdip == NULL) || (ddi_get_instance(cdip) == -1) ||
4816 (MDI_CLIENT_IS_REPORT_DEV_NEEDED(ct) == 0)) {
4817 return;
4818 }
4819 if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_OPTIMAL) {
4820 ct_status = "optimal";
4821 report_lb_c = 1;
4822 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED) {
4823 ct_status = "degraded";
4824 } else if (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_FAILED) {
4825 ct_status = "failed";
4826 } else {
4827 ct_status = "unknown";
4828 }
4829
4830 lb_buf[0] = 0; /* not interested in load balancing config */
4831
4832 if (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip)) {
4833 status = "removed";
4834 } else if (MDI_PI_IS_OFFLINE(pip)) {
4835 status = "offline";
4836 } else if (MDI_PI_IS_ONLINE(pip)) {
4837 status = "online";
4838 report_lb_p = 1;
4839 } else if (MDI_PI_IS_STANDBY(pip)) {
4840 status = "standby";
4841 } else if (MDI_PI_IS_FAULT(pip)) {
4842 status = "faulted";
4843 } else {
4844 status = "unknown";
4845 }
4846
4847 if (cdip) {
4848 ct_path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
4849
4850 /*
4851 * NOTE: Keeping "multipath status: %s" and
4852 * "Load balancing: %s" format unchanged in case someone
4853 * scrubs /var/adm/messages looking for these messages.
4854 */
4855 if (report_lb_c && report_lb_p) {
4856 if (ct->ct_lb == LOAD_BALANCE_LBA) {
4857 (void) snprintf(lb_buf, sizeof (lb_buf),
4858 "%s, region-size: %d", mdi_load_balance_lba,
4859 ct->ct_lb_args->region_size);
4860 } else if (ct->ct_lb == LOAD_BALANCE_NONE) {
4861 (void) snprintf(lb_buf, sizeof (lb_buf),
4862 "%s", mdi_load_balance_none);
4863 } else {
4864 (void) snprintf(lb_buf, sizeof (lb_buf), "%s",
4865 mdi_load_balance_rr);
4866 }
4867
4868 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4869 "?%s (%s%d) multipath status: %s: "
4870 "path %d %s is %s: Load balancing: %s\n",
4871 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4872 ddi_get_instance(cdip), ct_status,
4873 mdi_pi_get_path_instance(pip),
4874 mdi_pi_spathname(pip), status, lb_buf);
4875 } else {
4876 cmn_err(mdi_debug_consoleonly ? CE_NOTE : CE_CONT,
4877 "?%s (%s%d) multipath status: %s: "
4878 "path %d %s is %s\n",
4879 ddi_pathname(cdip, ct_path), ddi_driver_name(cdip),
4880 ddi_get_instance(cdip), ct_status,
4881 mdi_pi_get_path_instance(pip),
4882 mdi_pi_spathname(pip), status);
4883 }
4884
4885 kmem_free(ct_path, MAXPATHLEN);
4886 MDI_CLIENT_CLEAR_REPORT_DEV_NEEDED(ct);
4887 }
4888 }
4889
4890 #ifdef DEBUG
4891 /*
4892 * i_mdi_log():
4893 * Utility function for error message management
4894 *
4895 * NOTE: Implementation takes care of trailing \n for cmn_err,
4896 * MDI_DEBUG should not terminate fmt strings with \n.
4897 *
4898 * NOTE: If the level is >= 2, and there is no leading !?^
4899 * then a leading ! is implied (but can be overriden via
4900 * mdi_debug_consoleonly). If you are using kmdb on the console,
4901 * consider setting mdi_debug_consoleonly to 1 as an aid.
4902 */
4903 /*PRINTFLIKE4*/
4904 static void
4905 i_mdi_log(int level, const char *func, dev_info_t *dip, const char *fmt, ...)
4906 {
4907 char name[MAXNAMELEN];
4908 char buf[512];
4909 char *bp;
4910 va_list ap;
4911 int log_only = 0;
4912 int boot_only = 0;
4913 int console_only = 0;
4914
4915 if (dip) {
4916 (void) snprintf(name, sizeof(name), "%s%d: ",
4917 ddi_driver_name(dip), ddi_get_instance(dip));
4918 } else {
4919 name[0] = 0;
4920 }
4921
4922 va_start(ap, fmt);
4923 (void) vsnprintf(buf, sizeof(buf), fmt, ap);
4924 va_end(ap);
4925
4926 switch (buf[0]) {
4927 case '!':
4928 bp = &buf[1];
4929 log_only = 1;
4930 break;
4931 case '?':
4932 bp = &buf[1];
4933 boot_only = 1;
4934 break;
4935 case '^':
4936 bp = &buf[1];
4937 console_only = 1;
4938 break;
4939 default:
4940 if (level >= 2)
4941 log_only = 1; /* ! implied */
4942 bp = buf;
4943 break;
4944 }
4945 if (mdi_debug_logonly) {
4946 log_only = 1;
4947 boot_only = 0;
4948 console_only = 0;
4949 }
4950 if (mdi_debug_consoleonly) {
4951 log_only = 0;
4952 boot_only = 0;
4953 console_only = 1;
4954 level = CE_NOTE;
4955 goto console;
4956 }
4957
4958 switch (level) {
4959 case CE_NOTE:
4960 level = CE_CONT;
4961 /* FALLTHROUGH */
4962 case CE_CONT:
4963 if (boot_only) {
4964 cmn_err(level, "?mdi: %s%s: %s\n", name, func, bp);
4965 } else if (console_only) {
4966 cmn_err(level, "^mdi: %s%s: %s\n", name, func, bp);
4967 } else if (log_only) {
4968 cmn_err(level, "!mdi: %s%s: %s\n", name, func, bp);
4969 } else {
4970 cmn_err(level, "mdi: %s%s: %s\n", name, func, bp);
4971 }
4972 break;
4973
4974 case CE_WARN:
4975 case CE_PANIC:
4976 console:
4977 if (boot_only) {
4978 cmn_err(level, "?mdi: %s%s: %s", name, func, bp);
4979 } else if (console_only) {
4980 cmn_err(level, "^mdi: %s%s: %s", name, func, bp);
4981 } else if (log_only) {
4982 cmn_err(level, "!mdi: %s%s: %s", name, func, bp);
4983 } else {
4984 cmn_err(level, "mdi: %s%s: %s", name, func, bp);
4985 }
4986 break;
4987 default:
4988 cmn_err(level, "mdi: %s%s", name, bp);
4989 break;
4990 }
4991 }
4992 #endif /* DEBUG */
4993
4994 void
4995 i_mdi_client_online(dev_info_t *ct_dip)
4996 {
4997 mdi_client_t *ct;
4998
4999 /*
5000 * Client online notification. Mark client state as online
5001 * restore our binding with dev_info node
5002 */
5003 ct = i_devi_get_client(ct_dip);
5004 ASSERT(ct != NULL);
5005 MDI_CLIENT_LOCK(ct);
5006 MDI_CLIENT_SET_ONLINE(ct);
5007 /* catch for any memory leaks */
5008 ASSERT((ct->ct_dip == NULL) || (ct->ct_dip == ct_dip));
5009 ct->ct_dip = ct_dip;
5010
5011 if (ct->ct_power_cnt == 0)
5012 (void) i_mdi_power_all_phci(ct);
5013
5014 MDI_DEBUG(4, (MDI_NOTE, ct_dip,
5015 "i_mdi_pm_hold_client %p", (void *)ct));
5016 i_mdi_pm_hold_client(ct, 1);
5017
5018 MDI_CLIENT_UNLOCK(ct);
5019 }
5020
5021 void
5022 i_mdi_phci_online(dev_info_t *ph_dip)
5023 {
5024 mdi_phci_t *ph;
5025
5026 /* pHCI online notification. Mark state accordingly */
5027 ph = i_devi_get_phci(ph_dip);
5028 ASSERT(ph != NULL);
5029 MDI_PHCI_LOCK(ph);
5030 MDI_PHCI_SET_ONLINE(ph);
5031 MDI_PHCI_UNLOCK(ph);
5032 }
5033
5034 /*
5035 * mdi_devi_online():
5036 * Online notification from NDI framework on pHCI/client
5037 * device online.
5038 * Return Values:
5039 * NDI_SUCCESS
5040 * MDI_FAILURE
5041 */
5042 /*ARGSUSED*/
5043 int
5044 mdi_devi_online(dev_info_t *dip, uint_t flags)
5045 {
5046 if (MDI_PHCI(dip)) {
5047 i_mdi_phci_online(dip);
5048 }
5049
5050 if (MDI_CLIENT(dip)) {
5051 i_mdi_client_online(dip);
5052 }
5053 return (NDI_SUCCESS);
5054 }
5055
5056 /*
5057 * mdi_devi_offline():
5058 * Offline notification from NDI framework on pHCI/Client device
5059 * offline.
5060 *
5061 * Return Values:
5062 * NDI_SUCCESS
5063 * NDI_FAILURE
5064 */
5065 /*ARGSUSED*/
5066 int
5067 mdi_devi_offline(dev_info_t *dip, uint_t flags)
5068 {
5069 int rv = NDI_SUCCESS;
5070
5071 if (MDI_CLIENT(dip)) {
5072 rv = i_mdi_client_offline(dip, flags);
5073 if (rv != NDI_SUCCESS)
5074 return (rv);
5075 }
5076
5077 if (MDI_PHCI(dip)) {
5078 rv = i_mdi_phci_offline(dip, flags);
5079
5080 if ((rv != NDI_SUCCESS) && MDI_CLIENT(dip)) {
5081 /* set client back online */
5082 i_mdi_client_online(dip);
5083 }
5084 }
5085
5086 return (rv);
5087 }
5088
5089 /*ARGSUSED*/
5090 static int
5091 i_mdi_phci_offline(dev_info_t *dip, uint_t flags)
5092 {
5093 int rv = NDI_SUCCESS;
5094 mdi_phci_t *ph;
5095 mdi_client_t *ct;
5096 mdi_pathinfo_t *pip;
5097 mdi_pathinfo_t *next;
5098 mdi_pathinfo_t *failed_pip = NULL;
5099 dev_info_t *cdip;
5100
5101 /*
5102 * pHCI component offline notification
5103 * Make sure that this pHCI instance is free to be offlined.
5104 * If it is OK to proceed, Offline and remove all the child
5105 * mdi_pathinfo nodes. This process automatically offlines
5106 * corresponding client devices, for which this pHCI provides
5107 * critical services.
5108 */
5109 ph = i_devi_get_phci(dip);
5110 MDI_DEBUG(2, (MDI_NOTE, dip,
5111 "called %p %p", (void *)dip, (void *)ph));
5112 if (ph == NULL) {
5113 return (rv);
5114 }
5115
5116 MDI_PHCI_LOCK(ph);
5117
5118 if (MDI_PHCI_IS_OFFLINE(ph)) {
5119 MDI_DEBUG(1, (MDI_WARN, dip,
5120 "!pHCI already offlined: %p", (void *)dip));
5121 MDI_PHCI_UNLOCK(ph);
5122 return (NDI_SUCCESS);
5123 }
5124
5125 /*
5126 * Check to see if the pHCI can be offlined
5127 */
5128 if (ph->ph_unstable) {
5129 MDI_DEBUG(1, (MDI_WARN, dip,
5130 "!One or more target devices are in transient state. "
5131 "This device can not be removed at this moment. "
5132 "Please try again later."));
5133 MDI_PHCI_UNLOCK(ph);
5134 return (NDI_BUSY);
5135 }
5136
5137 pip = ph->ph_path_head;
5138 while (pip != NULL) {
5139 MDI_PI_LOCK(pip);
5140 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5141
5142 /*
5143 * The mdi_pathinfo state is OK. Check the client state.
5144 * If failover in progress fail the pHCI from offlining
5145 */
5146 ct = MDI_PI(pip)->pi_client;
5147 i_mdi_client_lock(ct, pip);
5148 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5149 (ct->ct_unstable)) {
5150 /*
5151 * Failover is in progress, Fail the DR
5152 */
5153 MDI_DEBUG(1, (MDI_WARN, dip,
5154 "!pHCI device is busy. "
5155 "This device can not be removed at this moment. "
5156 "Please try again later."));
5157 MDI_PI_UNLOCK(pip);
5158 i_mdi_client_unlock(ct);
5159 MDI_PHCI_UNLOCK(ph);
5160 return (NDI_BUSY);
5161 }
5162 MDI_PI_UNLOCK(pip);
5163
5164 /*
5165 * Check to see of we are removing the last path of this
5166 * client device...
5167 */
5168 cdip = ct->ct_dip;
5169 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5170 (i_mdi_client_compute_state(ct, ph) ==
5171 MDI_CLIENT_STATE_FAILED)) {
5172 i_mdi_client_unlock(ct);
5173 MDI_PHCI_UNLOCK(ph);
5174 if (ndi_devi_offline(cdip,
5175 NDI_DEVFS_CLEAN) != NDI_SUCCESS) {
5176 /*
5177 * ndi_devi_offline() failed.
5178 * This pHCI provides the critical path
5179 * to one or more client devices.
5180 * Return busy.
5181 */
5182 MDI_PHCI_LOCK(ph);
5183 MDI_DEBUG(1, (MDI_WARN, dip,
5184 "!pHCI device is busy. "
5185 "This device can not be removed at this "
5186 "moment. Please try again later."));
5187 failed_pip = pip;
5188 break;
5189 } else {
5190 MDI_PHCI_LOCK(ph);
5191 pip = next;
5192 }
5193 } else {
5194 i_mdi_client_unlock(ct);
5195 pip = next;
5196 }
5197 }
5198
5199 if (failed_pip) {
5200 pip = ph->ph_path_head;
5201 while (pip != failed_pip) {
5202 MDI_PI_LOCK(pip);
5203 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5204 ct = MDI_PI(pip)->pi_client;
5205 i_mdi_client_lock(ct, pip);
5206 cdip = ct->ct_dip;
5207 switch (MDI_CLIENT_STATE(ct)) {
5208 case MDI_CLIENT_STATE_OPTIMAL:
5209 case MDI_CLIENT_STATE_DEGRADED:
5210 if (cdip) {
5211 MDI_PI_UNLOCK(pip);
5212 i_mdi_client_unlock(ct);
5213 MDI_PHCI_UNLOCK(ph);
5214 (void) ndi_devi_online(cdip, 0);
5215 MDI_PHCI_LOCK(ph);
5216 pip = next;
5217 continue;
5218 }
5219 break;
5220
5221 case MDI_CLIENT_STATE_FAILED:
5222 if (cdip) {
5223 MDI_PI_UNLOCK(pip);
5224 i_mdi_client_unlock(ct);
5225 MDI_PHCI_UNLOCK(ph);
5226 (void) ndi_devi_offline(cdip,
5227 NDI_DEVFS_CLEAN);
5228 MDI_PHCI_LOCK(ph);
5229 pip = next;
5230 continue;
5231 }
5232 break;
5233 }
5234 MDI_PI_UNLOCK(pip);
5235 i_mdi_client_unlock(ct);
5236 pip = next;
5237 }
5238 MDI_PHCI_UNLOCK(ph);
5239 return (NDI_BUSY);
5240 }
5241
5242 /*
5243 * Mark the pHCI as offline
5244 */
5245 MDI_PHCI_SET_OFFLINE(ph);
5246
5247 /*
5248 * Mark the child mdi_pathinfo nodes as transient
5249 */
5250 pip = ph->ph_path_head;
5251 while (pip != NULL) {
5252 MDI_PI_LOCK(pip);
5253 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5254 MDI_PI_SET_OFFLINING(pip);
5255 MDI_PI_UNLOCK(pip);
5256 pip = next;
5257 }
5258 MDI_PHCI_UNLOCK(ph);
5259 /*
5260 * Give a chance for any pending commands to execute
5261 */
5262 delay_random(mdi_delay);
5263 MDI_PHCI_LOCK(ph);
5264 pip = ph->ph_path_head;
5265 while (pip != NULL) {
5266 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5267 (void) i_mdi_pi_offline(pip, flags);
5268 MDI_PI_LOCK(pip);
5269 ct = MDI_PI(pip)->pi_client;
5270 if (!MDI_PI_IS_OFFLINE(pip)) {
5271 MDI_DEBUG(1, (MDI_WARN, dip,
5272 "!pHCI device is busy. "
5273 "This device can not be removed at this moment. "
5274 "Please try again later."));
5275 MDI_PI_UNLOCK(pip);
5276 MDI_PHCI_SET_ONLINE(ph);
5277 MDI_PHCI_UNLOCK(ph);
5278 return (NDI_BUSY);
5279 }
5280 MDI_PI_UNLOCK(pip);
5281 pip = next;
5282 }
5283 MDI_PHCI_UNLOCK(ph);
5284
5285 return (rv);
5286 }
5287
5288 void
5289 mdi_phci_mark_retiring(dev_info_t *dip, char **cons_array)
5290 {
5291 mdi_phci_t *ph;
5292 mdi_client_t *ct;
5293 mdi_pathinfo_t *pip;
5294 mdi_pathinfo_t *next;
5295 dev_info_t *cdip;
5296
5297 if (!MDI_PHCI(dip))
5298 return;
5299
5300 ph = i_devi_get_phci(dip);
5301 if (ph == NULL) {
5302 return;
5303 }
5304
5305 MDI_PHCI_LOCK(ph);
5306
5307 if (MDI_PHCI_IS_OFFLINE(ph)) {
5308 /* has no last path */
5309 MDI_PHCI_UNLOCK(ph);
5310 return;
5311 }
5312
5313 pip = ph->ph_path_head;
5314 while (pip != NULL) {
5315 MDI_PI_LOCK(pip);
5316 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5317
5318 ct = MDI_PI(pip)->pi_client;
5319 i_mdi_client_lock(ct, pip);
5320 MDI_PI_UNLOCK(pip);
5321
5322 cdip = ct->ct_dip;
5323 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5324 (i_mdi_client_compute_state(ct, ph) ==
5325 MDI_CLIENT_STATE_FAILED)) {
5326 /* Last path. Mark client dip as retiring */
5327 i_mdi_client_unlock(ct);
5328 MDI_PHCI_UNLOCK(ph);
5329 (void) e_ddi_mark_retiring(cdip, cons_array);
5330 MDI_PHCI_LOCK(ph);
5331 pip = next;
5332 } else {
5333 i_mdi_client_unlock(ct);
5334 pip = next;
5335 }
5336 }
5337
5338 MDI_PHCI_UNLOCK(ph);
5339
5340 return;
5341 }
5342
5343 void
5344 mdi_phci_retire_notify(dev_info_t *dip, int *constraint)
5345 {
5346 mdi_phci_t *ph;
5347 mdi_client_t *ct;
5348 mdi_pathinfo_t *pip;
5349 mdi_pathinfo_t *next;
5350 dev_info_t *cdip;
5351
5352 if (!MDI_PHCI(dip))
5353 return;
5354
5355 ph = i_devi_get_phci(dip);
5356 if (ph == NULL)
5357 return;
5358
5359 MDI_PHCI_LOCK(ph);
5360
5361 if (MDI_PHCI_IS_OFFLINE(ph)) {
5362 MDI_PHCI_UNLOCK(ph);
5363 /* not last path */
5364 return;
5365 }
5366
5367 if (ph->ph_unstable) {
5368 MDI_PHCI_UNLOCK(ph);
5369 /* can't check for constraints */
5370 *constraint = 0;
5371 return;
5372 }
5373
5374 pip = ph->ph_path_head;
5375 while (pip != NULL) {
5376 MDI_PI_LOCK(pip);
5377 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5378
5379 /*
5380 * The mdi_pathinfo state is OK. Check the client state.
5381 * If failover in progress fail the pHCI from offlining
5382 */
5383 ct = MDI_PI(pip)->pi_client;
5384 i_mdi_client_lock(ct, pip);
5385 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5386 (ct->ct_unstable)) {
5387 /*
5388 * Failover is in progress, can't check for constraints
5389 */
5390 MDI_PI_UNLOCK(pip);
5391 i_mdi_client_unlock(ct);
5392 MDI_PHCI_UNLOCK(ph);
5393 *constraint = 0;
5394 return;
5395 }
5396 MDI_PI_UNLOCK(pip);
5397
5398 /*
5399 * Check to see of we are retiring the last path of this
5400 * client device...
5401 */
5402 cdip = ct->ct_dip;
5403 if (cdip && (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5404 (i_mdi_client_compute_state(ct, ph) ==
5405 MDI_CLIENT_STATE_FAILED)) {
5406 i_mdi_client_unlock(ct);
5407 MDI_PHCI_UNLOCK(ph);
5408 (void) e_ddi_retire_notify(cdip, constraint);
5409 MDI_PHCI_LOCK(ph);
5410 pip = next;
5411 } else {
5412 i_mdi_client_unlock(ct);
5413 pip = next;
5414 }
5415 }
5416
5417 MDI_PHCI_UNLOCK(ph);
5418
5419 return;
5420 }
5421
5422 /*
5423 * offline the path(s) hanging off the pHCI. If the
5424 * last path to any client, check that constraints
5425 * have been applied.
5426 *
5427 * If constraint is 0, we aren't going to retire the
5428 * pHCI. However we still need to go through the paths
5429 * calling e_ddi_retire_finalize() to clear their
5430 * contract barriers.
5431 */
5432 void
5433 mdi_phci_retire_finalize(dev_info_t *dip, int phci_only, void *constraint)
5434 {
5435 mdi_phci_t *ph;
5436 mdi_client_t *ct;
5437 mdi_pathinfo_t *pip;
5438 mdi_pathinfo_t *next;
5439 dev_info_t *cdip;
5440 int unstable = 0;
5441 int tmp_constraint;
5442
5443 if (!MDI_PHCI(dip))
5444 return;
5445
5446 ph = i_devi_get_phci(dip);
5447 if (ph == NULL) {
5448 /* no last path and no pips */
5449 return;
5450 }
5451
5452 MDI_PHCI_LOCK(ph);
5453
5454 if (MDI_PHCI_IS_OFFLINE(ph)) {
5455 MDI_PHCI_UNLOCK(ph);
5456 /* no last path and no pips */
5457 return;
5458 }
5459
5460 /*
5461 * Check to see if the pHCI can be offlined
5462 */
5463 if (ph->ph_unstable) {
5464 unstable = 1;
5465 }
5466
5467 pip = ph->ph_path_head;
5468 while (pip != NULL) {
5469 MDI_PI_LOCK(pip);
5470 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5471
5472 /*
5473 * if failover in progress fail the pHCI from offlining
5474 */
5475 ct = MDI_PI(pip)->pi_client;
5476 i_mdi_client_lock(ct, pip);
5477 if ((MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) ||
5478 (ct->ct_unstable)) {
5479 unstable = 1;
5480 }
5481 MDI_PI_UNLOCK(pip);
5482
5483 /*
5484 * Check to see of we are removing the last path of this
5485 * client device...
5486 */
5487 cdip = ct->ct_dip;
5488 if (!phci_only && cdip &&
5489 (i_ddi_node_state(cdip) >= DS_INITIALIZED) &&
5490 (i_mdi_client_compute_state(ct, ph) ==
5491 MDI_CLIENT_STATE_FAILED)) {
5492 i_mdi_client_unlock(ct);
5493 MDI_PHCI_UNLOCK(ph);
5494 /*
5495 * This is the last path to this client.
5496 *
5497 * Constraint will only be set to 1 if this client can
5498 * be retired (as already determined by
5499 * mdi_phci_retire_notify). However we don't actually
5500 * need to retire the client (we just retire the last
5501 * path - MPXIO will then fail all I/Os to the client).
5502 * But we still need to call e_ddi_retire_finalize so
5503 * the contract barriers can be cleared. Therefore we
5504 * temporarily set constraint = 0 so that the client
5505 * dip is not retired.
5506 */
5507 tmp_constraint = 0;
5508 (void) e_ddi_retire_finalize(cdip, &tmp_constraint);
5509 MDI_PHCI_LOCK(ph);
5510 pip = next;
5511 } else {
5512 i_mdi_client_unlock(ct);
5513 pip = next;
5514 }
5515 }
5516
5517 if (!phci_only && *((int *)constraint) == 0) {
5518 MDI_PHCI_UNLOCK(ph);
5519 return;
5520 }
5521
5522 /*
5523 * Cannot offline pip(s)
5524 */
5525 if (unstable) {
5526 cmn_err(CE_WARN, "%s%d: mdi_phci_retire_finalize: "
5527 "pHCI in transient state, cannot retire",
5528 ddi_driver_name(dip), ddi_get_instance(dip));
5529 MDI_PHCI_UNLOCK(ph);
5530 return;
5531 }
5532
5533 /*
5534 * Mark the pHCI as offline
5535 */
5536 MDI_PHCI_SET_OFFLINE(ph);
5537
5538 /*
5539 * Mark the child mdi_pathinfo nodes as transient
5540 */
5541 pip = ph->ph_path_head;
5542 while (pip != NULL) {
5543 MDI_PI_LOCK(pip);
5544 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5545 MDI_PI_SET_OFFLINING(pip);
5546 MDI_PI_UNLOCK(pip);
5547 pip = next;
5548 }
5549 MDI_PHCI_UNLOCK(ph);
5550 /*
5551 * Give a chance for any pending commands to execute
5552 */
5553 delay_random(mdi_delay);
5554 MDI_PHCI_LOCK(ph);
5555 pip = ph->ph_path_head;
5556 while (pip != NULL) {
5557 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5558 (void) i_mdi_pi_offline(pip, 0);
5559 MDI_PI_LOCK(pip);
5560 ct = MDI_PI(pip)->pi_client;
5561 if (!MDI_PI_IS_OFFLINE(pip)) {
5562 cmn_err(CE_WARN, "mdi_phci_retire_finalize: "
5563 "path %d %s busy, cannot offline",
5564 mdi_pi_get_path_instance(pip),
5565 mdi_pi_spathname(pip));
5566 MDI_PI_UNLOCK(pip);
5567 MDI_PHCI_SET_ONLINE(ph);
5568 MDI_PHCI_UNLOCK(ph);
5569 return;
5570 }
5571 MDI_PI_UNLOCK(pip);
5572 pip = next;
5573 }
5574 MDI_PHCI_UNLOCK(ph);
5575
5576 return;
5577 }
5578
5579 void
5580 mdi_phci_unretire(dev_info_t *dip)
5581 {
5582 mdi_phci_t *ph;
5583 mdi_pathinfo_t *pip;
5584 mdi_pathinfo_t *next;
5585
5586 ASSERT(MDI_PHCI(dip));
5587
5588 /*
5589 * Online the phci
5590 */
5591 i_mdi_phci_online(dip);
5592
5593 ph = i_devi_get_phci(dip);
5594 MDI_PHCI_LOCK(ph);
5595 pip = ph->ph_path_head;
5596 while (pip != NULL) {
5597 MDI_PI_LOCK(pip);
5598 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5599 MDI_PI_UNLOCK(pip);
5600 (void) i_mdi_pi_online(pip, 0);
5601 pip = next;
5602 }
5603 MDI_PHCI_UNLOCK(ph);
5604 }
5605
5606 /*ARGSUSED*/
5607 static int
5608 i_mdi_client_offline(dev_info_t *dip, uint_t flags)
5609 {
5610 int rv = NDI_SUCCESS;
5611 mdi_client_t *ct;
5612
5613 /*
5614 * Client component to go offline. Make sure that we are
5615 * not in failing over state and update client state
5616 * accordingly
5617 */
5618 ct = i_devi_get_client(dip);
5619 MDI_DEBUG(2, (MDI_NOTE, dip,
5620 "called %p %p", (void *)dip, (void *)ct));
5621 if (ct != NULL) {
5622 MDI_CLIENT_LOCK(ct);
5623 if (ct->ct_unstable) {
5624 /*
5625 * One or more paths are in transient state,
5626 * Dont allow offline of a client device
5627 */
5628 MDI_DEBUG(1, (MDI_WARN, dip,
5629 "!One or more paths to "
5630 "this device are in transient state. "
5631 "This device can not be removed at this moment. "
5632 "Please try again later."));
5633 MDI_CLIENT_UNLOCK(ct);
5634 return (NDI_BUSY);
5635 }
5636 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
5637 /*
5638 * Failover is in progress, Dont allow DR of
5639 * a client device
5640 */
5641 MDI_DEBUG(1, (MDI_WARN, dip,
5642 "!Client device is Busy. "
5643 "This device can not be removed at this moment. "
5644 "Please try again later."));
5645 MDI_CLIENT_UNLOCK(ct);
5646 return (NDI_BUSY);
5647 }
5648 MDI_CLIENT_SET_OFFLINE(ct);
5649
5650 /*
5651 * Unbind our relationship with the dev_info node
5652 */
5653 if (flags & NDI_DEVI_REMOVE) {
5654 ct->ct_dip = NULL;
5655 }
5656 MDI_CLIENT_UNLOCK(ct);
5657 }
5658 return (rv);
5659 }
5660
5661 /*
5662 * mdi_pre_attach():
5663 * Pre attach() notification handler
5664 */
5665 /*ARGSUSED*/
5666 int
5667 mdi_pre_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
5668 {
5669 /* don't support old DDI_PM_RESUME */
5670 if ((DEVI(dip)->devi_mdi_component != MDI_COMPONENT_NONE) &&
5671 (cmd == DDI_PM_RESUME))
5672 return (DDI_FAILURE);
5673
5674 return (DDI_SUCCESS);
5675 }
5676
5677 /*
5678 * mdi_post_attach():
5679 * Post attach() notification handler
5680 */
5681 /*ARGSUSED*/
5682 void
5683 mdi_post_attach(dev_info_t *dip, ddi_attach_cmd_t cmd, int error)
5684 {
5685 mdi_phci_t *ph;
5686 mdi_client_t *ct;
5687 mdi_vhci_t *vh;
5688
5689 if (MDI_PHCI(dip)) {
5690 ph = i_devi_get_phci(dip);
5691 ASSERT(ph != NULL);
5692
5693 MDI_PHCI_LOCK(ph);
5694 switch (cmd) {
5695 case DDI_ATTACH:
5696 MDI_DEBUG(2, (MDI_NOTE, dip,
5697 "phci post_attach called %p", (void *)ph));
5698 if (error == DDI_SUCCESS) {
5699 MDI_PHCI_SET_ATTACH(ph);
5700 } else {
5701 MDI_DEBUG(1, (MDI_NOTE, dip,
5702 "!pHCI post_attach failed: error %d",
5703 error));
5704 MDI_PHCI_SET_DETACH(ph);
5705 }
5706 break;
5707
5708 case DDI_RESUME:
5709 MDI_DEBUG(2, (MDI_NOTE, dip,
5710 "pHCI post_resume: called %p", (void *)ph));
5711 if (error == DDI_SUCCESS) {
5712 MDI_PHCI_SET_RESUME(ph);
5713 } else {
5714 MDI_DEBUG(1, (MDI_NOTE, dip,
5715 "!pHCI post_resume failed: error %d",
5716 error));
5717 MDI_PHCI_SET_SUSPEND(ph);
5718 }
5719 break;
5720 }
5721 MDI_PHCI_UNLOCK(ph);
5722 }
5723
5724 if (MDI_CLIENT(dip)) {
5725 ct = i_devi_get_client(dip);
5726 ASSERT(ct != NULL);
5727
5728 MDI_CLIENT_LOCK(ct);
5729 switch (cmd) {
5730 case DDI_ATTACH:
5731 MDI_DEBUG(2, (MDI_NOTE, dip,
5732 "client post_attach called %p", (void *)ct));
5733 if (error != DDI_SUCCESS) {
5734 MDI_DEBUG(1, (MDI_NOTE, dip,
5735 "!client post_attach failed: error %d",
5736 error));
5737 MDI_CLIENT_SET_DETACH(ct);
5738 MDI_DEBUG(4, (MDI_WARN, dip,
5739 "i_mdi_pm_reset_client"));
5740 i_mdi_pm_reset_client(ct);
5741 break;
5742 }
5743
5744 /*
5745 * Client device has successfully attached, inform
5746 * the vhci.
5747 */
5748 vh = ct->ct_vhci;
5749 if (vh->vh_ops->vo_client_attached)
5750 (*vh->vh_ops->vo_client_attached)(dip);
5751
5752 MDI_CLIENT_SET_ATTACH(ct);
5753 break;
5754
5755 case DDI_RESUME:
5756 MDI_DEBUG(2, (MDI_NOTE, dip,
5757 "client post_attach: called %p", (void *)ct));
5758 if (error == DDI_SUCCESS) {
5759 MDI_CLIENT_SET_RESUME(ct);
5760 } else {
5761 MDI_DEBUG(1, (MDI_NOTE, dip,
5762 "!client post_resume failed: error %d",
5763 error));
5764 MDI_CLIENT_SET_SUSPEND(ct);
5765 }
5766 break;
5767 }
5768 MDI_CLIENT_UNLOCK(ct);
5769 }
5770 }
5771
5772 /*
5773 * mdi_pre_detach():
5774 * Pre detach notification handler
5775 */
5776 /*ARGSUSED*/
5777 int
5778 mdi_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5779 {
5780 int rv = DDI_SUCCESS;
5781
5782 if (MDI_CLIENT(dip)) {
5783 (void) i_mdi_client_pre_detach(dip, cmd);
5784 }
5785
5786 if (MDI_PHCI(dip)) {
5787 rv = i_mdi_phci_pre_detach(dip, cmd);
5788 }
5789
5790 return (rv);
5791 }
5792
5793 /*ARGSUSED*/
5794 static int
5795 i_mdi_phci_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5796 {
5797 int rv = DDI_SUCCESS;
5798 mdi_phci_t *ph;
5799 mdi_client_t *ct;
5800 mdi_pathinfo_t *pip;
5801 mdi_pathinfo_t *failed_pip = NULL;
5802 mdi_pathinfo_t *next;
5803
5804 ph = i_devi_get_phci(dip);
5805 if (ph == NULL) {
5806 return (rv);
5807 }
5808
5809 MDI_PHCI_LOCK(ph);
5810 switch (cmd) {
5811 case DDI_DETACH:
5812 MDI_DEBUG(2, (MDI_NOTE, dip,
5813 "pHCI pre_detach: called %p", (void *)ph));
5814 if (!MDI_PHCI_IS_OFFLINE(ph)) {
5815 /*
5816 * mdi_pathinfo nodes are still attached to
5817 * this pHCI. Fail the detach for this pHCI.
5818 */
5819 MDI_DEBUG(2, (MDI_WARN, dip,
5820 "pHCI pre_detach: paths are still attached %p",
5821 (void *)ph));
5822 rv = DDI_FAILURE;
5823 break;
5824 }
5825 MDI_PHCI_SET_DETACH(ph);
5826 break;
5827
5828 case DDI_SUSPEND:
5829 /*
5830 * pHCI is getting suspended. Since mpxio client
5831 * devices may not be suspended at this point, to avoid
5832 * a potential stack overflow, it is important to suspend
5833 * client devices before pHCI can be suspended.
5834 */
5835
5836 MDI_DEBUG(2, (MDI_NOTE, dip,
5837 "pHCI pre_suspend: called %p", (void *)ph));
5838 /*
5839 * Suspend all the client devices accessible through this pHCI
5840 */
5841 pip = ph->ph_path_head;
5842 while (pip != NULL && rv == DDI_SUCCESS) {
5843 dev_info_t *cdip;
5844 MDI_PI_LOCK(pip);
5845 next =
5846 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5847 ct = MDI_PI(pip)->pi_client;
5848 i_mdi_client_lock(ct, pip);
5849 cdip = ct->ct_dip;
5850 MDI_PI_UNLOCK(pip);
5851 if ((MDI_CLIENT_IS_DETACHED(ct) == 0) &&
5852 MDI_CLIENT_IS_SUSPENDED(ct) == 0) {
5853 i_mdi_client_unlock(ct);
5854 if ((rv = devi_detach(cdip, DDI_SUSPEND)) !=
5855 DDI_SUCCESS) {
5856 /*
5857 * Suspend of one of the client
5858 * device has failed.
5859 */
5860 MDI_DEBUG(1, (MDI_WARN, dip,
5861 "!suspend of device (%s%d) failed.",
5862 ddi_driver_name(cdip),
5863 ddi_get_instance(cdip)));
5864 failed_pip = pip;
5865 break;
5866 }
5867 } else {
5868 i_mdi_client_unlock(ct);
5869 }
5870 pip = next;
5871 }
5872
5873 if (rv == DDI_SUCCESS) {
5874 /*
5875 * Suspend of client devices is complete. Proceed
5876 * with pHCI suspend.
5877 */
5878 MDI_PHCI_SET_SUSPEND(ph);
5879 } else {
5880 /*
5881 * Revert back all the suspended client device states
5882 * to converse.
5883 */
5884 pip = ph->ph_path_head;
5885 while (pip != failed_pip) {
5886 dev_info_t *cdip;
5887 MDI_PI_LOCK(pip);
5888 next =
5889 (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
5890 ct = MDI_PI(pip)->pi_client;
5891 i_mdi_client_lock(ct, pip);
5892 cdip = ct->ct_dip;
5893 MDI_PI_UNLOCK(pip);
5894 if (MDI_CLIENT_IS_SUSPENDED(ct)) {
5895 i_mdi_client_unlock(ct);
5896 (void) devi_attach(cdip, DDI_RESUME);
5897 } else {
5898 i_mdi_client_unlock(ct);
5899 }
5900 pip = next;
5901 }
5902 }
5903 break;
5904
5905 default:
5906 rv = DDI_FAILURE;
5907 break;
5908 }
5909 MDI_PHCI_UNLOCK(ph);
5910 return (rv);
5911 }
5912
5913 /*ARGSUSED*/
5914 static int
5915 i_mdi_client_pre_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
5916 {
5917 int rv = DDI_SUCCESS;
5918 mdi_client_t *ct;
5919
5920 ct = i_devi_get_client(dip);
5921 if (ct == NULL) {
5922 return (rv);
5923 }
5924
5925 MDI_CLIENT_LOCK(ct);
5926 switch (cmd) {
5927 case DDI_DETACH:
5928 MDI_DEBUG(2, (MDI_NOTE, dip,
5929 "client pre_detach: called %p",
5930 (void *)ct));
5931 MDI_CLIENT_SET_DETACH(ct);
5932 break;
5933
5934 case DDI_SUSPEND:
5935 MDI_DEBUG(2, (MDI_NOTE, dip,
5936 "client pre_suspend: called %p",
5937 (void *)ct));
5938 MDI_CLIENT_SET_SUSPEND(ct);
5939 break;
5940
5941 default:
5942 rv = DDI_FAILURE;
5943 break;
5944 }
5945 MDI_CLIENT_UNLOCK(ct);
5946 return (rv);
5947 }
5948
5949 /*
5950 * mdi_post_detach():
5951 * Post detach notification handler
5952 */
5953 /*ARGSUSED*/
5954 void
5955 mdi_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5956 {
5957 /*
5958 * Detach/Suspend of mpxio component failed. Update our state
5959 * too
5960 */
5961 if (MDI_PHCI(dip))
5962 i_mdi_phci_post_detach(dip, cmd, error);
5963
5964 if (MDI_CLIENT(dip))
5965 i_mdi_client_post_detach(dip, cmd, error);
5966 }
5967
5968 /*ARGSUSED*/
5969 static void
5970 i_mdi_phci_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
5971 {
5972 mdi_phci_t *ph;
5973
5974 /*
5975 * Detach/Suspend of phci component failed. Update our state
5976 * too
5977 */
5978 ph = i_devi_get_phci(dip);
5979 if (ph == NULL) {
5980 return;
5981 }
5982
5983 MDI_PHCI_LOCK(ph);
5984 /*
5985 * Detach of pHCI failed. Restore back converse
5986 * state
5987 */
5988 switch (cmd) {
5989 case DDI_DETACH:
5990 MDI_DEBUG(2, (MDI_NOTE, dip,
5991 "pHCI post_detach: called %p",
5992 (void *)ph));
5993 if (error != DDI_SUCCESS)
5994 MDI_PHCI_SET_ATTACH(ph);
5995 break;
5996
5997 case DDI_SUSPEND:
5998 MDI_DEBUG(2, (MDI_NOTE, dip,
5999 "pHCI post_suspend: called %p",
6000 (void *)ph));
6001 if (error != DDI_SUCCESS)
6002 MDI_PHCI_SET_RESUME(ph);
6003 break;
6004 }
6005 MDI_PHCI_UNLOCK(ph);
6006 }
6007
6008 /*ARGSUSED*/
6009 static void
6010 i_mdi_client_post_detach(dev_info_t *dip, ddi_detach_cmd_t cmd, int error)
6011 {
6012 mdi_client_t *ct;
6013
6014 ct = i_devi_get_client(dip);
6015 if (ct == NULL) {
6016 return;
6017 }
6018 MDI_CLIENT_LOCK(ct);
6019 /*
6020 * Detach of Client failed. Restore back converse
6021 * state
6022 */
6023 switch (cmd) {
6024 case DDI_DETACH:
6025 MDI_DEBUG(2, (MDI_NOTE, dip,
6026 "client post_detach: called %p", (void *)ct));
6027 if (DEVI_IS_ATTACHING(dip)) {
6028 MDI_DEBUG(4, (MDI_NOTE, dip,
6029 "i_mdi_pm_rele_client\n"));
6030 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6031 } else {
6032 MDI_DEBUG(4, (MDI_NOTE, dip,
6033 "i_mdi_pm_reset_client\n"));
6034 i_mdi_pm_reset_client(ct);
6035 }
6036 if (error != DDI_SUCCESS)
6037 MDI_CLIENT_SET_ATTACH(ct);
6038 break;
6039
6040 case DDI_SUSPEND:
6041 MDI_DEBUG(2, (MDI_NOTE, dip,
6042 "called %p", (void *)ct));
6043 if (error != DDI_SUCCESS)
6044 MDI_CLIENT_SET_RESUME(ct);
6045 break;
6046 }
6047 MDI_CLIENT_UNLOCK(ct);
6048 }
6049
6050 int
6051 mdi_pi_kstat_exists(mdi_pathinfo_t *pip)
6052 {
6053 return (MDI_PI(pip)->pi_kstats ? 1 : 0);
6054 }
6055
6056 /*
6057 * create and install per-path (client - pHCI) statistics
6058 * I/O stats supported: nread, nwritten, reads, and writes
6059 * Error stats - hard errors, soft errors, & transport errors
6060 */
6061 int
6062 mdi_pi_kstat_create(mdi_pathinfo_t *pip, char *ksname)
6063 {
6064 kstat_t *kiosp, *kerrsp;
6065 struct pi_errs *nsp;
6066 struct mdi_pi_kstats *mdi_statp;
6067
6068 if (MDI_PI(pip)->pi_kstats != NULL)
6069 return (MDI_SUCCESS);
6070
6071 if ((kiosp = kstat_create("mdi", 0, ksname, "iopath",
6072 KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT)) == NULL) {
6073 return (MDI_FAILURE);
6074 }
6075
6076 (void) strcat(ksname, ",err");
6077 kerrsp = kstat_create("mdi", 0, ksname, "iopath_errors",
6078 KSTAT_TYPE_NAMED,
6079 sizeof (struct pi_errs) / sizeof (kstat_named_t), 0);
6080 if (kerrsp == NULL) {
6081 kstat_delete(kiosp);
6082 return (MDI_FAILURE);
6083 }
6084
6085 nsp = (struct pi_errs *)kerrsp->ks_data;
6086 kstat_named_init(&nsp->pi_softerrs, "Soft Errors", KSTAT_DATA_UINT32);
6087 kstat_named_init(&nsp->pi_harderrs, "Hard Errors", KSTAT_DATA_UINT32);
6088 kstat_named_init(&nsp->pi_transerrs, "Transport Errors",
6089 KSTAT_DATA_UINT32);
6090 kstat_named_init(&nsp->pi_icnt_busy, "Interconnect Busy",
6091 KSTAT_DATA_UINT32);
6092 kstat_named_init(&nsp->pi_icnt_errors, "Interconnect Errors",
6093 KSTAT_DATA_UINT32);
6094 kstat_named_init(&nsp->pi_phci_rsrc, "pHCI No Resources",
6095 KSTAT_DATA_UINT32);
6096 kstat_named_init(&nsp->pi_phci_localerr, "pHCI Local Errors",
6097 KSTAT_DATA_UINT32);
6098 kstat_named_init(&nsp->pi_phci_invstate, "pHCI Invalid State",
6099 KSTAT_DATA_UINT32);
6100 kstat_named_init(&nsp->pi_failedfrom, "Failed From",
6101 KSTAT_DATA_UINT32);
6102 kstat_named_init(&nsp->pi_failedto, "Failed To", KSTAT_DATA_UINT32);
6103
6104 mdi_statp = kmem_alloc(sizeof (*mdi_statp), KM_SLEEP);
6105 mdi_statp->pi_kstat_ref = 1;
6106 mdi_statp->pi_kstat_iostats = kiosp;
6107 mdi_statp->pi_kstat_errstats = kerrsp;
6108 kstat_install(kiosp);
6109 kstat_install(kerrsp);
6110 MDI_PI(pip)->pi_kstats = mdi_statp;
6111 return (MDI_SUCCESS);
6112 }
6113
6114 /*
6115 * destroy per-path properties
6116 */
6117 static void
6118 i_mdi_pi_kstat_destroy(mdi_pathinfo_t *pip)
6119 {
6120
6121 struct mdi_pi_kstats *mdi_statp;
6122
6123 if (MDI_PI(pip)->pi_kstats == NULL)
6124 return;
6125 if ((mdi_statp = MDI_PI(pip)->pi_kstats) == NULL)
6126 return;
6127
6128 MDI_PI(pip)->pi_kstats = NULL;
6129
6130 /*
6131 * the kstat may be shared between multiple pathinfo nodes
6132 * decrement this pathinfo's usage, removing the kstats
6133 * themselves when the last pathinfo reference is removed.
6134 */
6135 ASSERT(mdi_statp->pi_kstat_ref > 0);
6136 if (--mdi_statp->pi_kstat_ref != 0)
6137 return;
6138
6139 kstat_delete(mdi_statp->pi_kstat_iostats);
6140 kstat_delete(mdi_statp->pi_kstat_errstats);
6141 kmem_free(mdi_statp, sizeof (*mdi_statp));
6142 }
6143
6144 /*
6145 * update I/O paths KSTATS
6146 */
6147 void
6148 mdi_pi_kstat_iosupdate(mdi_pathinfo_t *pip, struct buf *bp)
6149 {
6150 kstat_t *iostatp;
6151 size_t xfer_cnt;
6152
6153 ASSERT(pip != NULL);
6154
6155 /*
6156 * I/O can be driven across a path prior to having path
6157 * statistics available, i.e. probe(9e).
6158 */
6159 if (bp != NULL && MDI_PI(pip)->pi_kstats != NULL) {
6160 iostatp = MDI_PI(pip)->pi_kstats->pi_kstat_iostats;
6161 xfer_cnt = bp->b_bcount - bp->b_resid;
6162 if (bp->b_flags & B_READ) {
6163 KSTAT_IO_PTR(iostatp)->reads++;
6164 KSTAT_IO_PTR(iostatp)->nread += xfer_cnt;
6165 } else {
6166 KSTAT_IO_PTR(iostatp)->writes++;
6167 KSTAT_IO_PTR(iostatp)->nwritten += xfer_cnt;
6168 }
6169 }
6170 }
6171
6172 /*
6173 * Enable the path(specific client/target/initiator)
6174 * Enabling a path means that MPxIO may select the enabled path for routing
6175 * future I/O requests, subject to other path state constraints.
6176 */
6177 int
6178 mdi_pi_enable_path(mdi_pathinfo_t *pip, int flags)
6179 {
6180 mdi_phci_t *ph;
6181
6182 ph = MDI_PI(pip)->pi_phci;
6183 if (ph == NULL) {
6184 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6185 "!failed: path %s %p: NULL ph",
6186 mdi_pi_spathname(pip), (void *)pip));
6187 return (MDI_FAILURE);
6188 }
6189
6190 (void) i_mdi_enable_disable_path(pip, ph->ph_vhci, flags,
6191 MDI_ENABLE_OP);
6192 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6193 "!returning success pip = %p. ph = %p",
6194 (void *)pip, (void *)ph));
6195 return (MDI_SUCCESS);
6196
6197 }
6198
6199 /*
6200 * Disable the path (specific client/target/initiator)
6201 * Disabling a path means that MPxIO will not select the disabled path for
6202 * routing any new I/O requests.
6203 */
6204 int
6205 mdi_pi_disable_path(mdi_pathinfo_t *pip, int flags)
6206 {
6207 mdi_phci_t *ph;
6208
6209 ph = MDI_PI(pip)->pi_phci;
6210 if (ph == NULL) {
6211 MDI_DEBUG(1, (MDI_NOTE, mdi_pi_get_phci(pip),
6212 "!failed: path %s %p: NULL ph",
6213 mdi_pi_spathname(pip), (void *)pip));
6214 return (MDI_FAILURE);
6215 }
6216
6217 (void) i_mdi_enable_disable_path(pip,
6218 ph->ph_vhci, flags, MDI_DISABLE_OP);
6219 MDI_DEBUG(5, (MDI_NOTE, ph->ph_dip,
6220 "!returning success pip = %p. ph = %p",
6221 (void *)pip, (void *)ph));
6222 return (MDI_SUCCESS);
6223 }
6224
6225 /*
6226 * disable the path to a particular pHCI (pHCI specified in the phci_path
6227 * argument) for a particular client (specified in the client_path argument).
6228 * Disabling a path means that MPxIO will not select the disabled path for
6229 * routing any new I/O requests.
6230 * NOTE: this will be removed once the NWS files are changed to use the new
6231 * mdi_{enable,disable}_path interfaces
6232 */
6233 int
6234 mdi_pi_disable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6235 {
6236 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_DISABLE_OP));
6237 }
6238
6239 /*
6240 * Enable the path to a particular pHCI (pHCI specified in the phci_path
6241 * argument) for a particular client (specified in the client_path argument).
6242 * Enabling a path means that MPxIO may select the enabled path for routing
6243 * future I/O requests, subject to other path state constraints.
6244 * NOTE: this will be removed once the NWS files are changed to use the new
6245 * mdi_{enable,disable}_path interfaces
6246 */
6247
6248 int
6249 mdi_pi_enable(dev_info_t *cdip, dev_info_t *pdip, int flags)
6250 {
6251 return (i_mdi_pi_enable_disable(cdip, pdip, flags, MDI_ENABLE_OP));
6252 }
6253
6254 /*
6255 * Common routine for doing enable/disable.
6256 */
6257 static mdi_pathinfo_t *
6258 i_mdi_enable_disable_path(mdi_pathinfo_t *pip, mdi_vhci_t *vh, int flags,
6259 int op)
6260 {
6261 int sync_flag = 0;
6262 int rv;
6263 mdi_pathinfo_t *next;
6264 int (*f)() = NULL;
6265
6266 /*
6267 * Check to make sure the path is not already in the
6268 * requested state. If it is just return the next path
6269 * as we have nothing to do here.
6270 */
6271 if ((MDI_PI_IS_DISABLE(pip) && op == MDI_DISABLE_OP) ||
6272 (!MDI_PI_IS_DISABLE(pip) && op == MDI_ENABLE_OP)) {
6273 MDI_PI_LOCK(pip);
6274 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6275 MDI_PI_UNLOCK(pip);
6276 return (next);
6277 }
6278
6279 f = vh->vh_ops->vo_pi_state_change;
6280
6281 sync_flag = (flags << 8) & 0xf00;
6282
6283 /*
6284 * Do a callback into the mdi consumer to let it
6285 * know that path is about to get enabled/disabled.
6286 */
6287 if (f != NULL) {
6288 rv = (*f)(vh->vh_dip, pip, 0,
6289 MDI_PI_EXT_STATE(pip),
6290 MDI_EXT_STATE_CHANGE | sync_flag |
6291 op | MDI_BEFORE_STATE_CHANGE);
6292 if (rv != MDI_SUCCESS) {
6293 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6294 "vo_pi_state_change: failed rv = %x", rv));
6295 }
6296 }
6297 MDI_PI_LOCK(pip);
6298 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
6299
6300 switch (flags) {
6301 case USER_DISABLE:
6302 if (op == MDI_DISABLE_OP) {
6303 MDI_PI_SET_USER_DISABLE(pip);
6304 } else {
6305 MDI_PI_SET_USER_ENABLE(pip);
6306 }
6307 break;
6308 case DRIVER_DISABLE:
6309 if (op == MDI_DISABLE_OP) {
6310 MDI_PI_SET_DRV_DISABLE(pip);
6311 } else {
6312 MDI_PI_SET_DRV_ENABLE(pip);
6313 }
6314 break;
6315 case DRIVER_DISABLE_TRANSIENT:
6316 if (op == MDI_DISABLE_OP && rv == MDI_SUCCESS) {
6317 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
6318 } else {
6319 MDI_PI_SET_DRV_ENABLE_TRANS(pip);
6320 }
6321 break;
6322 }
6323 MDI_PI_UNLOCK(pip);
6324 /*
6325 * Do a callback into the mdi consumer to let it
6326 * know that path is now enabled/disabled.
6327 */
6328 if (f != NULL) {
6329 rv = (*f)(vh->vh_dip, pip, 0,
6330 MDI_PI_EXT_STATE(pip),
6331 MDI_EXT_STATE_CHANGE | sync_flag |
6332 op | MDI_AFTER_STATE_CHANGE);
6333 if (rv != MDI_SUCCESS) {
6334 MDI_DEBUG(2, (MDI_WARN, vh->vh_dip,
6335 "vo_pi_state_change failed: rv = %x", rv));
6336 }
6337 }
6338 return (next);
6339 }
6340
6341 /*
6342 * Common routine for doing enable/disable.
6343 * NOTE: this will be removed once the NWS files are changed to use the new
6344 * mdi_{enable,disable}_path has been putback
6345 */
6346 int
6347 i_mdi_pi_enable_disable(dev_info_t *cdip, dev_info_t *pdip, int flags, int op)
6348 {
6349
6350 mdi_phci_t *ph;
6351 mdi_vhci_t *vh = NULL;
6352 mdi_client_t *ct;
6353 mdi_pathinfo_t *next, *pip;
6354 int found_it;
6355
6356 ph = i_devi_get_phci(pdip);
6357 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6358 "!op = %d pdip = %p cdip = %p", op, (void *)pdip,
6359 (void *)cdip));
6360 if (ph == NULL) {
6361 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6362 "!failed: operation %d: NULL ph", op));
6363 return (MDI_FAILURE);
6364 }
6365
6366 if ((op != MDI_ENABLE_OP) && (op != MDI_DISABLE_OP)) {
6367 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6368 "!failed: invalid operation %d", op));
6369 return (MDI_FAILURE);
6370 }
6371
6372 vh = ph->ph_vhci;
6373
6374 if (cdip == NULL) {
6375 /*
6376 * Need to mark the Phci as enabled/disabled.
6377 */
6378 MDI_DEBUG(4, (MDI_NOTE, cdip ? cdip : pdip,
6379 "op %d for the phci", op));
6380 MDI_PHCI_LOCK(ph);
6381 switch (flags) {
6382 case USER_DISABLE:
6383 if (op == MDI_DISABLE_OP) {
6384 MDI_PHCI_SET_USER_DISABLE(ph);
6385 } else {
6386 MDI_PHCI_SET_USER_ENABLE(ph);
6387 }
6388 break;
6389 case DRIVER_DISABLE:
6390 if (op == MDI_DISABLE_OP) {
6391 MDI_PHCI_SET_DRV_DISABLE(ph);
6392 } else {
6393 MDI_PHCI_SET_DRV_ENABLE(ph);
6394 }
6395 break;
6396 case DRIVER_DISABLE_TRANSIENT:
6397 if (op == MDI_DISABLE_OP) {
6398 MDI_PHCI_SET_DRV_DISABLE_TRANSIENT(ph);
6399 } else {
6400 MDI_PHCI_SET_DRV_ENABLE_TRANSIENT(ph);
6401 }
6402 break;
6403 default:
6404 MDI_PHCI_UNLOCK(ph);
6405 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6406 "!invalid flag argument= %d", flags));
6407 }
6408
6409 /*
6410 * Phci has been disabled. Now try to enable/disable
6411 * path info's to each client.
6412 */
6413 pip = ph->ph_path_head;
6414 while (pip != NULL) {
6415 pip = i_mdi_enable_disable_path(pip, vh, flags, op);
6416 }
6417 MDI_PHCI_UNLOCK(ph);
6418 } else {
6419
6420 /*
6421 * Disable a specific client.
6422 */
6423 ct = i_devi_get_client(cdip);
6424 if (ct == NULL) {
6425 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6426 "!failed: operation = %d: NULL ct", op));
6427 return (MDI_FAILURE);
6428 }
6429
6430 MDI_CLIENT_LOCK(ct);
6431 pip = ct->ct_path_head;
6432 found_it = 0;
6433 while (pip != NULL) {
6434 MDI_PI_LOCK(pip);
6435 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6436 if (MDI_PI(pip)->pi_phci == ph) {
6437 MDI_PI_UNLOCK(pip);
6438 found_it = 1;
6439 break;
6440 }
6441 MDI_PI_UNLOCK(pip);
6442 pip = next;
6443 }
6444
6445
6446 MDI_CLIENT_UNLOCK(ct);
6447 if (found_it == 0) {
6448 MDI_DEBUG(1, (MDI_NOTE, cdip ? cdip : pdip,
6449 "!failed. Could not find corresponding pip\n"));
6450 return (MDI_FAILURE);
6451 }
6452
6453 (void) i_mdi_enable_disable_path(pip, vh, flags, op);
6454 }
6455
6456 MDI_DEBUG(5, (MDI_NOTE, cdip ? cdip : pdip,
6457 "!op %d returning success pdip = %p cdip = %p",
6458 op, (void *)pdip, (void *)cdip));
6459 return (MDI_SUCCESS);
6460 }
6461
6462 /*
6463 * Ensure phci powered up
6464 */
6465 static void
6466 i_mdi_pm_hold_pip(mdi_pathinfo_t *pip)
6467 {
6468 dev_info_t *ph_dip;
6469
6470 ASSERT(pip != NULL);
6471 ASSERT(MDI_PI_LOCKED(pip));
6472
6473 if (MDI_PI(pip)->pi_pm_held) {
6474 return;
6475 }
6476
6477 ph_dip = mdi_pi_get_phci(pip);
6478 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6479 "%s %p", mdi_pi_spathname(pip), (void *)pip));
6480 if (ph_dip == NULL) {
6481 return;
6482 }
6483
6484 MDI_PI_UNLOCK(pip);
6485 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt was %d",
6486 DEVI(ph_dip)->devi_pm_kidsupcnt));
6487 pm_hold_power(ph_dip);
6488 MDI_DEBUG(4, (MDI_NOTE, ph_dip, "kidsupcnt is %d",
6489 DEVI(ph_dip)->devi_pm_kidsupcnt));
6490 MDI_PI_LOCK(pip);
6491
6492 /* If PM_GET_PM_INFO is NULL the pm_hold_power above was a noop */
6493 if (DEVI(ph_dip)->devi_pm_info)
6494 MDI_PI(pip)->pi_pm_held = 1;
6495 }
6496
6497 /*
6498 * Allow phci powered down
6499 */
6500 static void
6501 i_mdi_pm_rele_pip(mdi_pathinfo_t *pip)
6502 {
6503 dev_info_t *ph_dip = NULL;
6504
6505 ASSERT(pip != NULL);
6506 ASSERT(MDI_PI_LOCKED(pip));
6507
6508 if (MDI_PI(pip)->pi_pm_held == 0) {
6509 return;
6510 }
6511
6512 ph_dip = mdi_pi_get_phci(pip);
6513 ASSERT(ph_dip != NULL);
6514
6515 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6516 "%s %p", mdi_pi_spathname(pip), (void *)pip));
6517
6518 MDI_PI_UNLOCK(pip);
6519 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6520 "kidsupcnt was %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6521 pm_rele_power(ph_dip);
6522 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6523 "kidsupcnt is %d", DEVI(ph_dip)->devi_pm_kidsupcnt));
6524 MDI_PI_LOCK(pip);
6525
6526 MDI_PI(pip)->pi_pm_held = 0;
6527 }
6528
6529 static void
6530 i_mdi_pm_hold_client(mdi_client_t *ct, int incr)
6531 {
6532 ASSERT(MDI_CLIENT_LOCKED(ct));
6533
6534 ct->ct_power_cnt += incr;
6535 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6536 "%p ct_power_cnt = %d incr = %d",
6537 (void *)ct, ct->ct_power_cnt, incr));
6538 ASSERT(ct->ct_power_cnt >= 0);
6539 }
6540
6541 static void
6542 i_mdi_rele_all_phci(mdi_client_t *ct)
6543 {
6544 mdi_pathinfo_t *pip;
6545
6546 ASSERT(MDI_CLIENT_LOCKED(ct));
6547 pip = (mdi_pathinfo_t *)ct->ct_path_head;
6548 while (pip != NULL) {
6549 mdi_hold_path(pip);
6550 MDI_PI_LOCK(pip);
6551 i_mdi_pm_rele_pip(pip);
6552 MDI_PI_UNLOCK(pip);
6553 mdi_rele_path(pip);
6554 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6555 }
6556 }
6557
6558 static void
6559 i_mdi_pm_rele_client(mdi_client_t *ct, int decr)
6560 {
6561 ASSERT(MDI_CLIENT_LOCKED(ct));
6562
6563 if (i_ddi_devi_attached(ct->ct_dip)) {
6564 ct->ct_power_cnt -= decr;
6565 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6566 "%p ct_power_cnt = %d decr = %d",
6567 (void *)ct, ct->ct_power_cnt, decr));
6568 }
6569
6570 ASSERT(ct->ct_power_cnt >= 0);
6571 if (ct->ct_power_cnt == 0) {
6572 i_mdi_rele_all_phci(ct);
6573 return;
6574 }
6575 }
6576
6577 static void
6578 i_mdi_pm_reset_client(mdi_client_t *ct)
6579 {
6580 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
6581 "%p ct_power_cnt = %d", (void *)ct, ct->ct_power_cnt));
6582 ASSERT(MDI_CLIENT_LOCKED(ct));
6583 ct->ct_power_cnt = 0;
6584 i_mdi_rele_all_phci(ct);
6585 ct->ct_powercnt_config = 0;
6586 ct->ct_powercnt_unconfig = 0;
6587 ct->ct_powercnt_reset = 1;
6588 }
6589
6590 static int
6591 i_mdi_power_one_phci(mdi_pathinfo_t *pip)
6592 {
6593 int ret;
6594 dev_info_t *ph_dip;
6595
6596 MDI_PI_LOCK(pip);
6597 i_mdi_pm_hold_pip(pip);
6598
6599 ph_dip = mdi_pi_get_phci(pip);
6600 MDI_PI_UNLOCK(pip);
6601
6602 /* bring all components of phci to full power */
6603 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6604 "pm_powerup for %s%d %p", ddi_driver_name(ph_dip),
6605 ddi_get_instance(ph_dip), (void *)pip));
6606
6607 ret = pm_powerup(ph_dip);
6608
6609 if (ret == DDI_FAILURE) {
6610 MDI_DEBUG(4, (MDI_NOTE, ph_dip,
6611 "pm_powerup FAILED for %s%d %p",
6612 ddi_driver_name(ph_dip), ddi_get_instance(ph_dip),
6613 (void *)pip));
6614
6615 MDI_PI_LOCK(pip);
6616 i_mdi_pm_rele_pip(pip);
6617 MDI_PI_UNLOCK(pip);
6618 return (MDI_FAILURE);
6619 }
6620
6621 return (MDI_SUCCESS);
6622 }
6623
6624 static int
6625 i_mdi_power_all_phci(mdi_client_t *ct)
6626 {
6627 mdi_pathinfo_t *pip;
6628 int succeeded = 0;
6629
6630 ASSERT(MDI_CLIENT_LOCKED(ct));
6631 pip = (mdi_pathinfo_t *)ct->ct_path_head;
6632 while (pip != NULL) {
6633 /*
6634 * Don't power if MDI_PATHINFO_STATE_FAULT
6635 * or MDI_PATHINFO_STATE_OFFLINE.
6636 */
6637 if (MDI_PI_IS_INIT(pip) ||
6638 MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip)) {
6639 mdi_hold_path(pip);
6640 MDI_CLIENT_UNLOCK(ct);
6641 if (i_mdi_power_one_phci(pip) == MDI_SUCCESS)
6642 succeeded = 1;
6643
6644 ASSERT(ct == MDI_PI(pip)->pi_client);
6645 MDI_CLIENT_LOCK(ct);
6646 mdi_rele_path(pip);
6647 }
6648 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
6649 }
6650
6651 return (succeeded ? MDI_SUCCESS : MDI_FAILURE);
6652 }
6653
6654 /*
6655 * mdi_bus_power():
6656 * 1. Place the phci(s) into powered up state so that
6657 * client can do power management
6658 * 2. Ensure phci powered up as client power managing
6659 * Return Values:
6660 * MDI_SUCCESS
6661 * MDI_FAILURE
6662 */
6663 int
6664 mdi_bus_power(dev_info_t *parent, void *impl_arg, pm_bus_power_op_t op,
6665 void *arg, void *result)
6666 {
6667 int ret = MDI_SUCCESS;
6668 pm_bp_child_pwrchg_t *bpc;
6669 mdi_client_t *ct;
6670 dev_info_t *cdip;
6671 pm_bp_has_changed_t *bphc;
6672
6673 /*
6674 * BUS_POWER_NOINVOL not supported
6675 */
6676 if (op == BUS_POWER_NOINVOL)
6677 return (MDI_FAILURE);
6678
6679 /*
6680 * ignore other OPs.
6681 * return quickly to save cou cycles on the ct processing
6682 */
6683 switch (op) {
6684 case BUS_POWER_PRE_NOTIFICATION:
6685 case BUS_POWER_POST_NOTIFICATION:
6686 bpc = (pm_bp_child_pwrchg_t *)arg;
6687 cdip = bpc->bpc_dip;
6688 break;
6689 case BUS_POWER_HAS_CHANGED:
6690 bphc = (pm_bp_has_changed_t *)arg;
6691 cdip = bphc->bphc_dip;
6692 break;
6693 default:
6694 return (pm_busop_bus_power(parent, impl_arg, op, arg, result));
6695 }
6696
6697 ASSERT(MDI_CLIENT(cdip));
6698
6699 ct = i_devi_get_client(cdip);
6700 if (ct == NULL)
6701 return (MDI_FAILURE);
6702
6703 /*
6704 * wait till the mdi_pathinfo node state change are processed
6705 */
6706 MDI_CLIENT_LOCK(ct);
6707 switch (op) {
6708 case BUS_POWER_PRE_NOTIFICATION:
6709 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6710 "BUS_POWER_PRE_NOTIFICATION:"
6711 "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6712 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6713 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp));
6714
6715 /* serialize power level change per client */
6716 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6717 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6718
6719 MDI_CLIENT_SET_POWER_TRANSITION(ct);
6720
6721 if (ct->ct_power_cnt == 0) {
6722 ret = i_mdi_power_all_phci(ct);
6723 }
6724
6725 /*
6726 * if new_level > 0:
6727 * - hold phci(s)
6728 * - power up phci(s) if not already
6729 * ignore power down
6730 */
6731 if (bpc->bpc_nlevel > 0) {
6732 if (!DEVI_IS_ATTACHING(ct->ct_dip)) {
6733 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6734 "i_mdi_pm_hold_client\n"));
6735 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6736 }
6737 }
6738 break;
6739 case BUS_POWER_POST_NOTIFICATION:
6740 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6741 "BUS_POWER_POST_NOTIFICATION:"
6742 "%s@%s, olevel=%d, nlevel=%d, comp=%d result=%d",
6743 ddi_node_name(bpc->bpc_dip), PM_ADDR(bpc->bpc_dip),
6744 bpc->bpc_olevel, bpc->bpc_nlevel, bpc->bpc_comp,
6745 *(int *)result));
6746
6747 if (*(int *)result == DDI_SUCCESS) {
6748 if (bpc->bpc_nlevel > 0) {
6749 MDI_CLIENT_SET_POWER_UP(ct);
6750 } else {
6751 MDI_CLIENT_SET_POWER_DOWN(ct);
6752 }
6753 }
6754
6755 /* release the hold we did in pre-notification */
6756 if (bpc->bpc_nlevel > 0 && (*(int *)result != DDI_SUCCESS) &&
6757 !DEVI_IS_ATTACHING(ct->ct_dip)) {
6758 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6759 "i_mdi_pm_rele_client\n"));
6760 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6761 }
6762
6763 if (bpc->bpc_nlevel == 0 && (*(int *)result == DDI_SUCCESS)) {
6764 /* another thread might started attaching */
6765 if (DEVI_IS_ATTACHING(ct->ct_dip)) {
6766 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6767 "i_mdi_pm_rele_client\n"));
6768 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6769 /* detaching has been taken care in pm_post_unconfig */
6770 } else if (!DEVI_IS_DETACHING(ct->ct_dip)) {
6771 MDI_DEBUG(4, (MDI_NOTE, bpc->bpc_dip,
6772 "i_mdi_pm_reset_client\n"));
6773 i_mdi_pm_reset_client(ct);
6774 }
6775 }
6776
6777 MDI_CLIENT_CLEAR_POWER_TRANSITION(ct);
6778 cv_broadcast(&ct->ct_powerchange_cv);
6779
6780 break;
6781
6782 /* need to do more */
6783 case BUS_POWER_HAS_CHANGED:
6784 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6785 "BUS_POWER_HAS_CHANGED:"
6786 "%s@%s, olevel=%d, nlevel=%d, comp=%d",
6787 ddi_node_name(bphc->bphc_dip), PM_ADDR(bphc->bphc_dip),
6788 bphc->bphc_olevel, bphc->bphc_nlevel, bphc->bphc_comp));
6789
6790 if (bphc->bphc_nlevel > 0 &&
6791 bphc->bphc_nlevel > bphc->bphc_olevel) {
6792 if (ct->ct_power_cnt == 0) {
6793 ret = i_mdi_power_all_phci(ct);
6794 }
6795 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6796 "i_mdi_pm_hold_client\n"));
6797 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6798 }
6799
6800 if (bphc->bphc_nlevel == 0 && bphc->bphc_olevel != -1) {
6801 MDI_DEBUG(4, (MDI_NOTE, bphc->bphc_dip,
6802 "i_mdi_pm_rele_client\n"));
6803 i_mdi_pm_rele_client(ct, ct->ct_path_count);
6804 }
6805 break;
6806 }
6807
6808 MDI_CLIENT_UNLOCK(ct);
6809 return (ret);
6810 }
6811
6812 static int
6813 i_mdi_pm_pre_config_one(dev_info_t *child)
6814 {
6815 int ret = MDI_SUCCESS;
6816 mdi_client_t *ct;
6817
6818 ct = i_devi_get_client(child);
6819 if (ct == NULL)
6820 return (MDI_FAILURE);
6821
6822 MDI_CLIENT_LOCK(ct);
6823 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6824 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6825
6826 if (!MDI_CLIENT_IS_FAILED(ct)) {
6827 MDI_CLIENT_UNLOCK(ct);
6828 MDI_DEBUG(4, (MDI_NOTE, child, "already configured\n"));
6829 return (MDI_SUCCESS);
6830 }
6831
6832 if (ct->ct_powercnt_config) {
6833 MDI_CLIENT_UNLOCK(ct);
6834 MDI_DEBUG(4, (MDI_NOTE, child, "already held\n"));
6835 return (MDI_SUCCESS);
6836 }
6837
6838 if (ct->ct_power_cnt == 0) {
6839 ret = i_mdi_power_all_phci(ct);
6840 }
6841 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6842 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6843 ct->ct_powercnt_config = 1;
6844 ct->ct_powercnt_reset = 0;
6845 MDI_CLIENT_UNLOCK(ct);
6846 return (ret);
6847 }
6848
6849 static int
6850 i_mdi_pm_pre_config(dev_info_t *vdip, dev_info_t *child)
6851 {
6852 int ret = MDI_SUCCESS;
6853 dev_info_t *cdip;
6854 int circ;
6855
6856 ASSERT(MDI_VHCI(vdip));
6857
6858 /* ndi_devi_config_one */
6859 if (child) {
6860 ASSERT(DEVI_BUSY_OWNED(vdip));
6861 return (i_mdi_pm_pre_config_one(child));
6862 }
6863
6864 /* devi_config_common */
6865 ndi_devi_enter(vdip, &circ);
6866 cdip = ddi_get_child(vdip);
6867 while (cdip) {
6868 dev_info_t *next = ddi_get_next_sibling(cdip);
6869
6870 ret = i_mdi_pm_pre_config_one(cdip);
6871 if (ret != MDI_SUCCESS)
6872 break;
6873 cdip = next;
6874 }
6875 ndi_devi_exit(vdip, circ);
6876 return (ret);
6877 }
6878
6879 static int
6880 i_mdi_pm_pre_unconfig_one(dev_info_t *child, int *held, int flags)
6881 {
6882 int ret = MDI_SUCCESS;
6883 mdi_client_t *ct;
6884
6885 ct = i_devi_get_client(child);
6886 if (ct == NULL)
6887 return (MDI_FAILURE);
6888
6889 MDI_CLIENT_LOCK(ct);
6890 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6891 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6892
6893 if (!i_ddi_devi_attached(child)) {
6894 MDI_DEBUG(4, (MDI_NOTE, child, "node detached already\n"));
6895 MDI_CLIENT_UNLOCK(ct);
6896 return (MDI_SUCCESS);
6897 }
6898
6899 if (MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6900 (flags & NDI_AUTODETACH)) {
6901 MDI_DEBUG(4, (MDI_NOTE, child, "auto-modunload\n"));
6902 MDI_CLIENT_UNLOCK(ct);
6903 return (MDI_FAILURE);
6904 }
6905
6906 if (ct->ct_powercnt_unconfig) {
6907 MDI_DEBUG(4, (MDI_NOTE, child, "ct_powercnt_held\n"));
6908 MDI_CLIENT_UNLOCK(ct);
6909 *held = 1;
6910 return (MDI_SUCCESS);
6911 }
6912
6913 if (ct->ct_power_cnt == 0) {
6914 ret = i_mdi_power_all_phci(ct);
6915 }
6916 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_hold_client\n"));
6917 i_mdi_pm_hold_client(ct, ct->ct_path_count);
6918 ct->ct_powercnt_unconfig = 1;
6919 ct->ct_powercnt_reset = 0;
6920 MDI_CLIENT_UNLOCK(ct);
6921 if (ret == MDI_SUCCESS)
6922 *held = 1;
6923 return (ret);
6924 }
6925
6926 static int
6927 i_mdi_pm_pre_unconfig(dev_info_t *vdip, dev_info_t *child, int *held,
6928 int flags)
6929 {
6930 int ret = MDI_SUCCESS;
6931 dev_info_t *cdip;
6932 int circ;
6933
6934 ASSERT(MDI_VHCI(vdip));
6935 *held = 0;
6936
6937 /* ndi_devi_unconfig_one */
6938 if (child) {
6939 ASSERT(DEVI_BUSY_OWNED(vdip));
6940 return (i_mdi_pm_pre_unconfig_one(child, held, flags));
6941 }
6942
6943 /* devi_unconfig_common */
6944 ndi_devi_enter(vdip, &circ);
6945 cdip = ddi_get_child(vdip);
6946 while (cdip) {
6947 dev_info_t *next = ddi_get_next_sibling(cdip);
6948
6949 ret = i_mdi_pm_pre_unconfig_one(cdip, held, flags);
6950 cdip = next;
6951 }
6952 ndi_devi_exit(vdip, circ);
6953
6954 if (*held)
6955 ret = MDI_SUCCESS;
6956
6957 return (ret);
6958 }
6959
6960 static void
6961 i_mdi_pm_post_config_one(dev_info_t *child)
6962 {
6963 mdi_client_t *ct;
6964
6965 ct = i_devi_get_client(child);
6966 if (ct == NULL)
6967 return;
6968
6969 MDI_CLIENT_LOCK(ct);
6970 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
6971 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
6972
6973 if (ct->ct_powercnt_reset || !ct->ct_powercnt_config) {
6974 MDI_DEBUG(4, (MDI_NOTE, child, "not configured\n"));
6975 MDI_CLIENT_UNLOCK(ct);
6976 return;
6977 }
6978
6979 /* client has not been updated */
6980 if (MDI_CLIENT_IS_FAILED(ct)) {
6981 MDI_DEBUG(4, (MDI_NOTE, child, "client failed\n"));
6982 MDI_CLIENT_UNLOCK(ct);
6983 return;
6984 }
6985
6986 /* another thread might have powered it down or detached it */
6987 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
6988 !DEVI_IS_ATTACHING(child)) ||
6989 (!i_ddi_devi_attached(child) &&
6990 !DEVI_IS_ATTACHING(child))) {
6991 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
6992 i_mdi_pm_reset_client(ct);
6993 } else {
6994 mdi_pathinfo_t *pip, *next;
6995 int valid_path_count = 0;
6996
6997 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
6998 pip = ct->ct_path_head;
6999 while (pip != NULL) {
7000 MDI_PI_LOCK(pip);
7001 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7002 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7003 valid_path_count ++;
7004 MDI_PI_UNLOCK(pip);
7005 pip = next;
7006 }
7007 i_mdi_pm_rele_client(ct, valid_path_count);
7008 }
7009 ct->ct_powercnt_config = 0;
7010 MDI_CLIENT_UNLOCK(ct);
7011 }
7012
7013 static void
7014 i_mdi_pm_post_config(dev_info_t *vdip, dev_info_t *child)
7015 {
7016 int circ;
7017 dev_info_t *cdip;
7018
7019 ASSERT(MDI_VHCI(vdip));
7020
7021 /* ndi_devi_config_one */
7022 if (child) {
7023 ASSERT(DEVI_BUSY_OWNED(vdip));
7024 i_mdi_pm_post_config_one(child);
7025 return;
7026 }
7027
7028 /* devi_config_common */
7029 ndi_devi_enter(vdip, &circ);
7030 cdip = ddi_get_child(vdip);
7031 while (cdip) {
7032 dev_info_t *next = ddi_get_next_sibling(cdip);
7033
7034 i_mdi_pm_post_config_one(cdip);
7035 cdip = next;
7036 }
7037 ndi_devi_exit(vdip, circ);
7038 }
7039
7040 static void
7041 i_mdi_pm_post_unconfig_one(dev_info_t *child)
7042 {
7043 mdi_client_t *ct;
7044
7045 ct = i_devi_get_client(child);
7046 if (ct == NULL)
7047 return;
7048
7049 MDI_CLIENT_LOCK(ct);
7050 while (MDI_CLIENT_IS_POWER_TRANSITION(ct))
7051 cv_wait(&ct->ct_powerchange_cv, &ct->ct_mutex);
7052
7053 if (!ct->ct_powercnt_unconfig || ct->ct_powercnt_reset) {
7054 MDI_DEBUG(4, (MDI_NOTE, child, "not held\n"));
7055 MDI_CLIENT_UNLOCK(ct);
7056 return;
7057 }
7058
7059 /* failure detaching or another thread just attached it */
7060 if ((MDI_CLIENT_IS_POWERED_DOWN(ct) &&
7061 i_ddi_devi_attached(child)) ||
7062 (!i_ddi_devi_attached(child) &&
7063 !DEVI_IS_ATTACHING(child))) {
7064 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_reset_client\n"));
7065 i_mdi_pm_reset_client(ct);
7066 } else {
7067 mdi_pathinfo_t *pip, *next;
7068 int valid_path_count = 0;
7069
7070 MDI_DEBUG(4, (MDI_NOTE, child, "i_mdi_pm_rele_client\n"));
7071 pip = ct->ct_path_head;
7072 while (pip != NULL) {
7073 MDI_PI_LOCK(pip);
7074 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
7075 if (MDI_PI_IS_ONLINE(pip) || MDI_PI_IS_STANDBY(pip))
7076 valid_path_count ++;
7077 MDI_PI_UNLOCK(pip);
7078 pip = next;
7079 }
7080 i_mdi_pm_rele_client(ct, valid_path_count);
7081 ct->ct_powercnt_unconfig = 0;
7082 }
7083
7084 MDI_CLIENT_UNLOCK(ct);
7085 }
7086
7087 static void
7088 i_mdi_pm_post_unconfig(dev_info_t *vdip, dev_info_t *child, int held)
7089 {
7090 int circ;
7091 dev_info_t *cdip;
7092
7093 ASSERT(MDI_VHCI(vdip));
7094
7095 if (!held) {
7096 MDI_DEBUG(4, (MDI_NOTE, vdip, "held = %d", held));
7097 return;
7098 }
7099
7100 if (child) {
7101 ASSERT(DEVI_BUSY_OWNED(vdip));
7102 i_mdi_pm_post_unconfig_one(child);
7103 return;
7104 }
7105
7106 ndi_devi_enter(vdip, &circ);
7107 cdip = ddi_get_child(vdip);
7108 while (cdip) {
7109 dev_info_t *next = ddi_get_next_sibling(cdip);
7110
7111 i_mdi_pm_post_unconfig_one(cdip);
7112 cdip = next;
7113 }
7114 ndi_devi_exit(vdip, circ);
7115 }
7116
7117 int
7118 mdi_power(dev_info_t *vdip, mdi_pm_op_t op, void *args, char *devnm, int flags)
7119 {
7120 int circ, ret = MDI_SUCCESS;
7121 dev_info_t *client_dip = NULL;
7122 mdi_client_t *ct;
7123
7124 /*
7125 * Handling ndi_devi_config_one and ndi_devi_unconfig_one.
7126 * Power up pHCI for the named client device.
7127 * Note: Before the client is enumerated under vhci by phci,
7128 * client_dip can be NULL. Then proceed to power up all the
7129 * pHCIs.
7130 */
7131 if (devnm != NULL) {
7132 ndi_devi_enter(vdip, &circ);
7133 client_dip = ndi_devi_findchild(vdip, devnm);
7134 }
7135
7136 MDI_DEBUG(4, (MDI_NOTE, vdip,
7137 "op = %d %s %p", op, devnm ? devnm : "", (void *)client_dip));
7138
7139 switch (op) {
7140 case MDI_PM_PRE_CONFIG:
7141 ret = i_mdi_pm_pre_config(vdip, client_dip);
7142 break;
7143
7144 case MDI_PM_PRE_UNCONFIG:
7145 ret = i_mdi_pm_pre_unconfig(vdip, client_dip, (int *)args,
7146 flags);
7147 break;
7148
7149 case MDI_PM_POST_CONFIG:
7150 i_mdi_pm_post_config(vdip, client_dip);
7151 break;
7152
7153 case MDI_PM_POST_UNCONFIG:
7154 i_mdi_pm_post_unconfig(vdip, client_dip, *(int *)args);
7155 break;
7156
7157 case MDI_PM_HOLD_POWER:
7158 case MDI_PM_RELE_POWER:
7159 ASSERT(args);
7160
7161 client_dip = (dev_info_t *)args;
7162 ASSERT(MDI_CLIENT(client_dip));
7163
7164 ct = i_devi_get_client(client_dip);
7165 MDI_CLIENT_LOCK(ct);
7166
7167 if (op == MDI_PM_HOLD_POWER) {
7168 if (ct->ct_power_cnt == 0) {
7169 (void) i_mdi_power_all_phci(ct);
7170 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7171 "i_mdi_pm_hold_client\n"));
7172 i_mdi_pm_hold_client(ct, ct->ct_path_count);
7173 }
7174 } else {
7175 if (DEVI_IS_ATTACHING(client_dip)) {
7176 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7177 "i_mdi_pm_rele_client\n"));
7178 i_mdi_pm_rele_client(ct, ct->ct_path_count);
7179 } else {
7180 MDI_DEBUG(4, (MDI_NOTE, client_dip,
7181 "i_mdi_pm_reset_client\n"));
7182 i_mdi_pm_reset_client(ct);
7183 }
7184 }
7185
7186 MDI_CLIENT_UNLOCK(ct);
7187 break;
7188
7189 default:
7190 break;
7191 }
7192
7193 if (devnm)
7194 ndi_devi_exit(vdip, circ);
7195
7196 return (ret);
7197 }
7198
7199 int
7200 mdi_component_is_vhci(dev_info_t *dip, const char **mdi_class)
7201 {
7202 mdi_vhci_t *vhci;
7203
7204 if (!MDI_VHCI(dip))
7205 return (MDI_FAILURE);
7206
7207 if (mdi_class) {
7208 vhci = DEVI(dip)->devi_mdi_xhci;
7209 ASSERT(vhci);
7210 *mdi_class = vhci->vh_class;
7211 }
7212
7213 return (MDI_SUCCESS);
7214 }
7215
7216 int
7217 mdi_component_is_phci(dev_info_t *dip, const char **mdi_class)
7218 {
7219 mdi_phci_t *phci;
7220
7221 if (!MDI_PHCI(dip))
7222 return (MDI_FAILURE);
7223
7224 if (mdi_class) {
7225 phci = DEVI(dip)->devi_mdi_xhci;
7226 ASSERT(phci);
7227 *mdi_class = phci->ph_vhci->vh_class;
7228 }
7229
7230 return (MDI_SUCCESS);
7231 }
7232
7233 int
7234 mdi_component_is_client(dev_info_t *dip, const char **mdi_class)
7235 {
7236 mdi_client_t *client;
7237
7238 if (!MDI_CLIENT(dip))
7239 return (MDI_FAILURE);
7240
7241 if (mdi_class) {
7242 client = DEVI(dip)->devi_mdi_client;
7243 ASSERT(client);
7244 *mdi_class = client->ct_vhci->vh_class;
7245 }
7246
7247 return (MDI_SUCCESS);
7248 }
7249
7250 void *
7251 mdi_client_get_vhci_private(dev_info_t *dip)
7252 {
7253 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7254 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7255 mdi_client_t *ct;
7256 ct = i_devi_get_client(dip);
7257 return (ct->ct_vprivate);
7258 }
7259 return (NULL);
7260 }
7261
7262 void
7263 mdi_client_set_vhci_private(dev_info_t *dip, void *data)
7264 {
7265 ASSERT(mdi_component_is_client(dip, NULL) == MDI_SUCCESS);
7266 if (mdi_component_is_client(dip, NULL) == MDI_SUCCESS) {
7267 mdi_client_t *ct;
7268 ct = i_devi_get_client(dip);
7269 ct->ct_vprivate = data;
7270 }
7271 }
7272 /*
7273 * mdi_pi_get_vhci_private():
7274 * Get the vhci private information associated with the
7275 * mdi_pathinfo node
7276 */
7277 void *
7278 mdi_pi_get_vhci_private(mdi_pathinfo_t *pip)
7279 {
7280 caddr_t vprivate = NULL;
7281 if (pip) {
7282 vprivate = MDI_PI(pip)->pi_vprivate;
7283 }
7284 return (vprivate);
7285 }
7286
7287 /*
7288 * mdi_pi_set_vhci_private():
7289 * Set the vhci private information in the mdi_pathinfo node
7290 */
7291 void
7292 mdi_pi_set_vhci_private(mdi_pathinfo_t *pip, void *priv)
7293 {
7294 if (pip) {
7295 MDI_PI(pip)->pi_vprivate = priv;
7296 }
7297 }
7298
7299 /*
7300 * mdi_phci_get_vhci_private():
7301 * Get the vhci private information associated with the
7302 * mdi_phci node
7303 */
7304 void *
7305 mdi_phci_get_vhci_private(dev_info_t *dip)
7306 {
7307 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7308 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7309 mdi_phci_t *ph;
7310 ph = i_devi_get_phci(dip);
7311 return (ph->ph_vprivate);
7312 }
7313 return (NULL);
7314 }
7315
7316 /*
7317 * mdi_phci_set_vhci_private():
7318 * Set the vhci private information in the mdi_phci node
7319 */
7320 void
7321 mdi_phci_set_vhci_private(dev_info_t *dip, void *priv)
7322 {
7323 ASSERT(mdi_component_is_phci(dip, NULL) == MDI_SUCCESS);
7324 if (mdi_component_is_phci(dip, NULL) == MDI_SUCCESS) {
7325 mdi_phci_t *ph;
7326 ph = i_devi_get_phci(dip);
7327 ph->ph_vprivate = priv;
7328 }
7329 }
7330
7331 int
7332 mdi_pi_ishidden(mdi_pathinfo_t *pip)
7333 {
7334 return (MDI_PI_FLAGS_IS_HIDDEN(pip));
7335 }
7336
7337 int
7338 mdi_pi_device_isremoved(mdi_pathinfo_t *pip)
7339 {
7340 return (MDI_PI_FLAGS_IS_DEVICE_REMOVED(pip));
7341 }
7342
7343 /* Return 1 if all client paths are device_removed */
7344 static int
7345 i_mdi_client_all_devices_removed(mdi_client_t *ct)
7346 {
7347 mdi_pathinfo_t *pip;
7348 int all_devices_removed = 1;
7349
7350 MDI_CLIENT_LOCK(ct);
7351 for (pip = ct->ct_path_head; pip;
7352 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link) {
7353 if (!mdi_pi_device_isremoved(pip)) {
7354 all_devices_removed = 0;
7355 break;
7356 }
7357 }
7358 MDI_CLIENT_UNLOCK(ct);
7359 return (all_devices_removed);
7360 }
7361
7362 /*
7363 * When processing path hotunplug, represent device removal.
7364 */
7365 int
7366 mdi_pi_device_remove(mdi_pathinfo_t *pip)
7367 {
7368 mdi_client_t *ct;
7369
7370 MDI_PI_LOCK(pip);
7371 if (mdi_pi_device_isremoved(pip)) {
7372 MDI_PI_UNLOCK(pip);
7373 return (0);
7374 }
7375 MDI_PI_FLAGS_SET_DEVICE_REMOVED(pip);
7376 MDI_PI_FLAGS_SET_HIDDEN(pip);
7377 MDI_PI_UNLOCK(pip);
7378
7379 /*
7380 * If all paths associated with the client are now DEVICE_REMOVED,
7381 * reflect DEVICE_REMOVED in the client.
7382 */
7383 ct = MDI_PI(pip)->pi_client;
7384 if (ct && ct->ct_dip && i_mdi_client_all_devices_removed(ct))
7385 (void) ndi_devi_device_remove(ct->ct_dip);
7386 else
7387 i_ddi_di_cache_invalidate();
7388
7389 return (1);
7390 }
7391
7392 /*
7393 * When processing hotplug, if a path marked mdi_pi_device_isremoved()
7394 * is now accessible then this interfaces is used to represent device insertion.
7395 */
7396 int
7397 mdi_pi_device_insert(mdi_pathinfo_t *pip)
7398 {
7399 MDI_PI_LOCK(pip);
7400 if (!mdi_pi_device_isremoved(pip)) {
7401 MDI_PI_UNLOCK(pip);
7402 return (0);
7403 }
7404 MDI_PI_FLAGS_CLR_DEVICE_REMOVED(pip);
7405 MDI_PI_FLAGS_CLR_HIDDEN(pip);
7406 MDI_PI_UNLOCK(pip);
7407
7408 i_ddi_di_cache_invalidate();
7409
7410 return (1);
7411 }
7412
7413 /*
7414 * List of vhci class names:
7415 * A vhci class name must be in this list only if the corresponding vhci
7416 * driver intends to use the mdi provided bus config implementation
7417 * (i.e., mdi_vhci_bus_config()).
7418 */
7419 static char *vhci_class_list[] = { MDI_HCI_CLASS_SCSI, MDI_HCI_CLASS_IB };
7420 #define N_VHCI_CLASSES (sizeof (vhci_class_list) / sizeof (char *))
7421
7422 /*
7423 * During boot time, the on-disk vhci cache for every vhci class is read
7424 * in the form of an nvlist and stored here.
7425 */
7426 static nvlist_t *vhcache_nvl[N_VHCI_CLASSES];
7427
7428 /* nvpair names in vhci cache nvlist */
7429 #define MDI_VHCI_CACHE_VERSION 1
7430 #define MDI_NVPNAME_VERSION "version"
7431 #define MDI_NVPNAME_PHCIS "phcis"
7432 #define MDI_NVPNAME_CTADDRMAP "clientaddrmap"
7433
7434 /*
7435 * Given vhci class name, return its on-disk vhci cache filename.
7436 * Memory for the returned filename which includes the full path is allocated
7437 * by this function.
7438 */
7439 static char *
7440 vhclass2vhcache_filename(char *vhclass)
7441 {
7442 char *filename;
7443 int len;
7444 static char *fmt = "/etc/devices/mdi_%s_cache";
7445
7446 /*
7447 * fmt contains the on-disk vhci cache file name format;
7448 * for scsi_vhci the filename is "/etc/devices/mdi_scsi_vhci_cache".
7449 */
7450
7451 /* the -1 below is to account for "%s" in the format string */
7452 len = strlen(fmt) + strlen(vhclass) - 1;
7453 filename = kmem_alloc(len, KM_SLEEP);
7454 (void) snprintf(filename, len, fmt, vhclass);
7455 ASSERT(len == (strlen(filename) + 1));
7456 return (filename);
7457 }
7458
7459 /*
7460 * initialize the vhci cache related data structures and read the on-disk
7461 * vhci cached data into memory.
7462 */
7463 static void
7464 setup_vhci_cache(mdi_vhci_t *vh)
7465 {
7466 mdi_vhci_config_t *vhc;
7467 mdi_vhci_cache_t *vhcache;
7468 int i;
7469 nvlist_t *nvl = NULL;
7470
7471 vhc = kmem_zalloc(sizeof (mdi_vhci_config_t), KM_SLEEP);
7472 vh->vh_config = vhc;
7473 vhcache = &vhc->vhc_vhcache;
7474
7475 vhc->vhc_vhcache_filename = vhclass2vhcache_filename(vh->vh_class);
7476
7477 mutex_init(&vhc->vhc_lock, NULL, MUTEX_DEFAULT, NULL);
7478 cv_init(&vhc->vhc_cv, NULL, CV_DRIVER, NULL);
7479
7480 rw_init(&vhcache->vhcache_lock, NULL, RW_DRIVER, NULL);
7481
7482 /*
7483 * Create string hash; same as mod_hash_create_strhash() except that
7484 * we use NULL key destructor.
7485 */
7486 vhcache->vhcache_client_hash = mod_hash_create_extended(vh->vh_class,
7487 mdi_bus_config_cache_hash_size,
7488 mod_hash_null_keydtor, mod_hash_null_valdtor,
7489 mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
7490
7491 /*
7492 * The on-disk vhci cache is read during booting prior to the
7493 * lights-out period by mdi_read_devices_files().
7494 */
7495 for (i = 0; i < N_VHCI_CLASSES; i++) {
7496 if (strcmp(vhci_class_list[i], vh->vh_class) == 0) {
7497 nvl = vhcache_nvl[i];
7498 vhcache_nvl[i] = NULL;
7499 break;
7500 }
7501 }
7502
7503 /*
7504 * this is to cover the case of some one manually causing unloading
7505 * (or detaching) and reloading (or attaching) of a vhci driver.
7506 */
7507 if (nvl == NULL && modrootloaded)
7508 nvl = read_on_disk_vhci_cache(vh->vh_class);
7509
7510 if (nvl != NULL) {
7511 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
7512 if (mainnvl_to_vhcache(vhcache, nvl) == MDI_SUCCESS)
7513 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
7514 else {
7515 cmn_err(CE_WARN,
7516 "%s: data file corrupted, will recreate",
7517 vhc->vhc_vhcache_filename);
7518 }
7519 rw_exit(&vhcache->vhcache_lock);
7520 nvlist_free(nvl);
7521 }
7522
7523 vhc->vhc_cbid = callb_add(stop_vhcache_flush_thread, vhc,
7524 CB_CL_UADMIN_PRE_VFS, "mdi_vhcache_flush");
7525
7526 vhc->vhc_path_discovery_boot = mdi_path_discovery_boot;
7527 vhc->vhc_path_discovery_postboot = mdi_path_discovery_postboot;
7528 }
7529
7530 /*
7531 * free all vhci cache related resources
7532 */
7533 static int
7534 destroy_vhci_cache(mdi_vhci_t *vh)
7535 {
7536 mdi_vhci_config_t *vhc = vh->vh_config;
7537 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
7538 mdi_vhcache_phci_t *cphci, *cphci_next;
7539 mdi_vhcache_client_t *cct, *cct_next;
7540 mdi_vhcache_pathinfo_t *cpi, *cpi_next;
7541
7542 if (stop_vhcache_async_threads(vhc) != MDI_SUCCESS)
7543 return (MDI_FAILURE);
7544
7545 kmem_free(vhc->vhc_vhcache_filename,
7546 strlen(vhc->vhc_vhcache_filename) + 1);
7547
7548 mod_hash_destroy_strhash(vhcache->vhcache_client_hash);
7549
7550 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
7551 cphci = cphci_next) {
7552 cphci_next = cphci->cphci_next;
7553 free_vhcache_phci(cphci);
7554 }
7555
7556 for (cct = vhcache->vhcache_client_head; cct != NULL; cct = cct_next) {
7557 cct_next = cct->cct_next;
7558 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi_next) {
7559 cpi_next = cpi->cpi_next;
7560 free_vhcache_pathinfo(cpi);
7561 }
7562 free_vhcache_client(cct);
7563 }
7564
7565 rw_destroy(&vhcache->vhcache_lock);
7566
7567 mutex_destroy(&vhc->vhc_lock);
7568 cv_destroy(&vhc->vhc_cv);
7569 kmem_free(vhc, sizeof (mdi_vhci_config_t));
7570 return (MDI_SUCCESS);
7571 }
7572
7573 /*
7574 * Stop all vhci cache related async threads and free their resources.
7575 */
7576 static int
7577 stop_vhcache_async_threads(mdi_vhci_config_t *vhc)
7578 {
7579 mdi_async_client_config_t *acc, *acc_next;
7580
7581 mutex_enter(&vhc->vhc_lock);
7582 vhc->vhc_flags |= MDI_VHC_EXIT;
7583 ASSERT(vhc->vhc_acc_thrcount >= 0);
7584 cv_broadcast(&vhc->vhc_cv);
7585
7586 while ((vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) ||
7587 vhc->vhc_acc_thrcount != 0) {
7588 mutex_exit(&vhc->vhc_lock);
7589 delay_random(mdi_delay);
7590 mutex_enter(&vhc->vhc_lock);
7591 }
7592
7593 vhc->vhc_flags &= ~MDI_VHC_EXIT;
7594
7595 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc_next) {
7596 acc_next = acc->acc_next;
7597 free_async_client_config(acc);
7598 }
7599 vhc->vhc_acc_list_head = NULL;
7600 vhc->vhc_acc_list_tail = NULL;
7601 vhc->vhc_acc_count = 0;
7602
7603 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7604 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7605 mutex_exit(&vhc->vhc_lock);
7606 if (flush_vhcache(vhc, 0) != MDI_SUCCESS) {
7607 vhcache_dirty(vhc);
7608 return (MDI_FAILURE);
7609 }
7610 } else
7611 mutex_exit(&vhc->vhc_lock);
7612
7613 if (callb_delete(vhc->vhc_cbid) != 0)
7614 return (MDI_FAILURE);
7615
7616 return (MDI_SUCCESS);
7617 }
7618
7619 /*
7620 * Stop vhci cache flush thread
7621 */
7622 /* ARGSUSED */
7623 static boolean_t
7624 stop_vhcache_flush_thread(void *arg, int code)
7625 {
7626 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
7627
7628 mutex_enter(&vhc->vhc_lock);
7629 vhc->vhc_flags |= MDI_VHC_EXIT;
7630 cv_broadcast(&vhc->vhc_cv);
7631
7632 while (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
7633 mutex_exit(&vhc->vhc_lock);
7634 delay_random(mdi_delay);
7635 mutex_enter(&vhc->vhc_lock);
7636 }
7637
7638 if (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) {
7639 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
7640 mutex_exit(&vhc->vhc_lock);
7641 (void) flush_vhcache(vhc, 1);
7642 } else
7643 mutex_exit(&vhc->vhc_lock);
7644
7645 return (B_TRUE);
7646 }
7647
7648 /*
7649 * Enqueue the vhcache phci (cphci) at the tail of the list
7650 */
7651 static void
7652 enqueue_vhcache_phci(mdi_vhci_cache_t *vhcache, mdi_vhcache_phci_t *cphci)
7653 {
7654 cphci->cphci_next = NULL;
7655 if (vhcache->vhcache_phci_head == NULL)
7656 vhcache->vhcache_phci_head = cphci;
7657 else
7658 vhcache->vhcache_phci_tail->cphci_next = cphci;
7659 vhcache->vhcache_phci_tail = cphci;
7660 }
7661
7662 /*
7663 * Enqueue the vhcache pathinfo (cpi) at the tail of the list
7664 */
7665 static void
7666 enqueue_tail_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7667 mdi_vhcache_pathinfo_t *cpi)
7668 {
7669 cpi->cpi_next = NULL;
7670 if (cct->cct_cpi_head == NULL)
7671 cct->cct_cpi_head = cpi;
7672 else
7673 cct->cct_cpi_tail->cpi_next = cpi;
7674 cct->cct_cpi_tail = cpi;
7675 }
7676
7677 /*
7678 * Enqueue the vhcache pathinfo (cpi) at the correct location in the
7679 * ordered list. All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
7680 * flag set come at the beginning of the list. All cpis which have this
7681 * flag set come at the end of the list.
7682 */
7683 static void
7684 enqueue_vhcache_pathinfo(mdi_vhcache_client_t *cct,
7685 mdi_vhcache_pathinfo_t *newcpi)
7686 {
7687 mdi_vhcache_pathinfo_t *cpi, *prev_cpi;
7688
7689 if (cct->cct_cpi_head == NULL ||
7690 (newcpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))
7691 enqueue_tail_vhcache_pathinfo(cct, newcpi);
7692 else {
7693 for (cpi = cct->cct_cpi_head, prev_cpi = NULL; cpi != NULL &&
7694 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST);
7695 prev_cpi = cpi, cpi = cpi->cpi_next)
7696 ;
7697
7698 if (prev_cpi == NULL)
7699 cct->cct_cpi_head = newcpi;
7700 else
7701 prev_cpi->cpi_next = newcpi;
7702
7703 newcpi->cpi_next = cpi;
7704
7705 if (cpi == NULL)
7706 cct->cct_cpi_tail = newcpi;
7707 }
7708 }
7709
7710 /*
7711 * Enqueue the vhcache client (cct) at the tail of the list
7712 */
7713 static void
7714 enqueue_vhcache_client(mdi_vhci_cache_t *vhcache,
7715 mdi_vhcache_client_t *cct)
7716 {
7717 cct->cct_next = NULL;
7718 if (vhcache->vhcache_client_head == NULL)
7719 vhcache->vhcache_client_head = cct;
7720 else
7721 vhcache->vhcache_client_tail->cct_next = cct;
7722 vhcache->vhcache_client_tail = cct;
7723 }
7724
7725 static void
7726 free_string_array(char **str, int nelem)
7727 {
7728 int i;
7729
7730 if (str) {
7731 for (i = 0; i < nelem; i++) {
7732 if (str[i])
7733 kmem_free(str[i], strlen(str[i]) + 1);
7734 }
7735 kmem_free(str, sizeof (char *) * nelem);
7736 }
7737 }
7738
7739 static void
7740 free_vhcache_phci(mdi_vhcache_phci_t *cphci)
7741 {
7742 kmem_free(cphci->cphci_path, strlen(cphci->cphci_path) + 1);
7743 kmem_free(cphci, sizeof (*cphci));
7744 }
7745
7746 static void
7747 free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *cpi)
7748 {
7749 kmem_free(cpi->cpi_addr, strlen(cpi->cpi_addr) + 1);
7750 kmem_free(cpi, sizeof (*cpi));
7751 }
7752
7753 static void
7754 free_vhcache_client(mdi_vhcache_client_t *cct)
7755 {
7756 kmem_free(cct->cct_name_addr, strlen(cct->cct_name_addr) + 1);
7757 kmem_free(cct, sizeof (*cct));
7758 }
7759
7760 static char *
7761 vhcache_mknameaddr(char *ct_name, char *ct_addr, int *ret_len)
7762 {
7763 char *name_addr;
7764 int len;
7765
7766 len = strlen(ct_name) + strlen(ct_addr) + 2;
7767 name_addr = kmem_alloc(len, KM_SLEEP);
7768 (void) snprintf(name_addr, len, "%s@%s", ct_name, ct_addr);
7769
7770 if (ret_len)
7771 *ret_len = len;
7772 return (name_addr);
7773 }
7774
7775 /*
7776 * Copy the contents of paddrnvl to vhci cache.
7777 * paddrnvl nvlist contains path information for a vhci client.
7778 * See the comment in mainnvl_to_vhcache() for the format of this nvlist.
7779 */
7780 static void
7781 paddrnvl_to_vhcache(nvlist_t *nvl, mdi_vhcache_phci_t *cphci_list[],
7782 mdi_vhcache_client_t *cct)
7783 {
7784 nvpair_t *nvp = NULL;
7785 mdi_vhcache_pathinfo_t *cpi;
7786 uint_t nelem;
7787 uint32_t *val;
7788
7789 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7790 ASSERT(nvpair_type(nvp) == DATA_TYPE_UINT32_ARRAY);
7791 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
7792 cpi->cpi_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7793 (void) nvpair_value_uint32_array(nvp, &val, &nelem);
7794 ASSERT(nelem == 2);
7795 cpi->cpi_cphci = cphci_list[val[0]];
7796 cpi->cpi_flags = val[1];
7797 enqueue_tail_vhcache_pathinfo(cct, cpi);
7798 }
7799 }
7800
7801 /*
7802 * Copy the contents of caddrmapnvl to vhci cache.
7803 * caddrmapnvl nvlist contains vhci client address to phci client address
7804 * mappings. See the comment in mainnvl_to_vhcache() for the format of
7805 * this nvlist.
7806 */
7807 static void
7808 caddrmapnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl,
7809 mdi_vhcache_phci_t *cphci_list[])
7810 {
7811 nvpair_t *nvp = NULL;
7812 nvlist_t *paddrnvl;
7813 mdi_vhcache_client_t *cct;
7814
7815 while ((nvp = nvlist_next_nvpair(nvl, nvp)) != NULL) {
7816 ASSERT(nvpair_type(nvp) == DATA_TYPE_NVLIST);
7817 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
7818 cct->cct_name_addr = i_ddi_strdup(nvpair_name(nvp), KM_SLEEP);
7819 (void) nvpair_value_nvlist(nvp, &paddrnvl);
7820 paddrnvl_to_vhcache(paddrnvl, cphci_list, cct);
7821 /* the client must contain at least one path */
7822 ASSERT(cct->cct_cpi_head != NULL);
7823
7824 enqueue_vhcache_client(vhcache, cct);
7825 (void) mod_hash_insert(vhcache->vhcache_client_hash,
7826 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
7827 }
7828 }
7829
7830 /*
7831 * Copy the contents of the main nvlist to vhci cache.
7832 *
7833 * VHCI busconfig cached data is stored in the form of a nvlist on the disk.
7834 * The nvlist contains the mappings between the vhci client addresses and
7835 * their corresponding phci client addresses.
7836 *
7837 * The structure of the nvlist is as follows:
7838 *
7839 * Main nvlist:
7840 * NAME TYPE DATA
7841 * version int32 version number
7842 * phcis string array array of phci paths
7843 * clientaddrmap nvlist_t c2paddrs_nvl (see below)
7844 *
7845 * structure of c2paddrs_nvl:
7846 * NAME TYPE DATA
7847 * caddr1 nvlist_t paddrs_nvl1
7848 * caddr2 nvlist_t paddrs_nvl2
7849 * ...
7850 * where caddr1, caddr2, ... are vhci client name and addresses in the
7851 * form of "<clientname>@<clientaddress>".
7852 * (for example: "ssd@2000002037cd9f72");
7853 * paddrs_nvl1, paddrs_nvl2, .. are nvlists that contain path information.
7854 *
7855 * structure of paddrs_nvl:
7856 * NAME TYPE DATA
7857 * pi_addr1 uint32_array (phci-id, cpi_flags)
7858 * pi_addr2 uint32_array (phci-id, cpi_flags)
7859 * ...
7860 * where pi_addr1, pi_addr2, ... are bus specific addresses of pathinfo nodes
7861 * (so called pi_addrs, for example: "w2100002037cd9f72,0");
7862 * phci-ids are integers that identify pHCIs to which the
7863 * the bus specific address belongs to. These integers are used as an index
7864 * into to the phcis string array in the main nvlist to get the pHCI path.
7865 */
7866 static int
7867 mainnvl_to_vhcache(mdi_vhci_cache_t *vhcache, nvlist_t *nvl)
7868 {
7869 char **phcis, **phci_namep;
7870 uint_t nphcis;
7871 mdi_vhcache_phci_t *cphci, **cphci_list;
7872 nvlist_t *caddrmapnvl;
7873 int32_t ver;
7874 int i;
7875 size_t cphci_list_size;
7876
7877 ASSERT(RW_WRITE_HELD(&vhcache->vhcache_lock));
7878
7879 if (nvlist_lookup_int32(nvl, MDI_NVPNAME_VERSION, &ver) != 0 ||
7880 ver != MDI_VHCI_CACHE_VERSION)
7881 return (MDI_FAILURE);
7882
7883 if (nvlist_lookup_string_array(nvl, MDI_NVPNAME_PHCIS, &phcis,
7884 &nphcis) != 0)
7885 return (MDI_SUCCESS);
7886
7887 ASSERT(nphcis > 0);
7888
7889 cphci_list_size = sizeof (mdi_vhcache_phci_t *) * nphcis;
7890 cphci_list = kmem_alloc(cphci_list_size, KM_SLEEP);
7891 for (i = 0, phci_namep = phcis; i < nphcis; i++, phci_namep++) {
7892 cphci = kmem_zalloc(sizeof (mdi_vhcache_phci_t), KM_SLEEP);
7893 cphci->cphci_path = i_ddi_strdup(*phci_namep, KM_SLEEP);
7894 enqueue_vhcache_phci(vhcache, cphci);
7895 cphci_list[i] = cphci;
7896 }
7897
7898 ASSERT(vhcache->vhcache_phci_head != NULL);
7899
7900 if (nvlist_lookup_nvlist(nvl, MDI_NVPNAME_CTADDRMAP, &caddrmapnvl) == 0)
7901 caddrmapnvl_to_vhcache(vhcache, caddrmapnvl, cphci_list);
7902
7903 kmem_free(cphci_list, cphci_list_size);
7904 return (MDI_SUCCESS);
7905 }
7906
7907 /*
7908 * Build paddrnvl for the specified client using the information in the
7909 * vhci cache and add it to the caddrmapnnvl.
7910 * Returns 0 on success, errno on failure.
7911 */
7912 static int
7913 vhcache_to_paddrnvl(mdi_vhci_cache_t *vhcache, mdi_vhcache_client_t *cct,
7914 nvlist_t *caddrmapnvl)
7915 {
7916 mdi_vhcache_pathinfo_t *cpi;
7917 nvlist_t *nvl;
7918 int err;
7919 uint32_t val[2];
7920
7921 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7922
7923 if ((err = nvlist_alloc(&nvl, 0, KM_SLEEP)) != 0)
7924 return (err);
7925
7926 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
7927 val[0] = cpi->cpi_cphci->cphci_id;
7928 val[1] = cpi->cpi_flags;
7929 if ((err = nvlist_add_uint32_array(nvl, cpi->cpi_addr, val, 2))
7930 != 0)
7931 goto out;
7932 }
7933
7934 err = nvlist_add_nvlist(caddrmapnvl, cct->cct_name_addr, nvl);
7935 out:
7936 nvlist_free(nvl);
7937 return (err);
7938 }
7939
7940 /*
7941 * Build caddrmapnvl using the information in the vhci cache
7942 * and add it to the mainnvl.
7943 * Returns 0 on success, errno on failure.
7944 */
7945 static int
7946 vhcache_to_caddrmapnvl(mdi_vhci_cache_t *vhcache, nvlist_t *mainnvl)
7947 {
7948 mdi_vhcache_client_t *cct;
7949 nvlist_t *nvl;
7950 int err;
7951
7952 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
7953
7954 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0)
7955 return (err);
7956
7957 for (cct = vhcache->vhcache_client_head; cct != NULL;
7958 cct = cct->cct_next) {
7959 if ((err = vhcache_to_paddrnvl(vhcache, cct, nvl)) != 0)
7960 goto out;
7961 }
7962
7963 err = nvlist_add_nvlist(mainnvl, MDI_NVPNAME_CTADDRMAP, nvl);
7964 out:
7965 nvlist_free(nvl);
7966 return (err);
7967 }
7968
7969 /*
7970 * Build nvlist using the information in the vhci cache.
7971 * See the comment in mainnvl_to_vhcache() for the format of the nvlist.
7972 * Returns nvl on success, NULL on failure.
7973 */
7974 static nvlist_t *
7975 vhcache_to_mainnvl(mdi_vhci_cache_t *vhcache)
7976 {
7977 mdi_vhcache_phci_t *cphci;
7978 uint_t phci_count;
7979 char **phcis;
7980 nvlist_t *nvl;
7981 int err, i;
7982
7983 if ((err = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP)) != 0) {
7984 nvl = NULL;
7985 goto out;
7986 }
7987
7988 if ((err = nvlist_add_int32(nvl, MDI_NVPNAME_VERSION,
7989 MDI_VHCI_CACHE_VERSION)) != 0)
7990 goto out;
7991
7992 rw_enter(&vhcache->vhcache_lock, RW_READER);
7993 if (vhcache->vhcache_phci_head == NULL) {
7994 rw_exit(&vhcache->vhcache_lock);
7995 return (nvl);
7996 }
7997
7998 phci_count = 0;
7999 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8000 cphci = cphci->cphci_next)
8001 cphci->cphci_id = phci_count++;
8002
8003 /* build phci pathname list */
8004 phcis = kmem_alloc(sizeof (char *) * phci_count, KM_SLEEP);
8005 for (cphci = vhcache->vhcache_phci_head, i = 0; cphci != NULL;
8006 cphci = cphci->cphci_next, i++)
8007 phcis[i] = i_ddi_strdup(cphci->cphci_path, KM_SLEEP);
8008
8009 err = nvlist_add_string_array(nvl, MDI_NVPNAME_PHCIS, phcis,
8010 phci_count);
8011 free_string_array(phcis, phci_count);
8012
8013 if (err == 0 &&
8014 (err = vhcache_to_caddrmapnvl(vhcache, nvl)) == 0) {
8015 rw_exit(&vhcache->vhcache_lock);
8016 return (nvl);
8017 }
8018
8019 rw_exit(&vhcache->vhcache_lock);
8020 out:
8021 nvlist_free(nvl);
8022 return (NULL);
8023 }
8024
8025 /*
8026 * Lookup vhcache phci structure for the specified phci path.
8027 */
8028 static mdi_vhcache_phci_t *
8029 lookup_vhcache_phci_by_name(mdi_vhci_cache_t *vhcache, char *phci_path)
8030 {
8031 mdi_vhcache_phci_t *cphci;
8032
8033 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8034
8035 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8036 cphci = cphci->cphci_next) {
8037 if (strcmp(cphci->cphci_path, phci_path) == 0)
8038 return (cphci);
8039 }
8040
8041 return (NULL);
8042 }
8043
8044 /*
8045 * Lookup vhcache phci structure for the specified phci.
8046 */
8047 static mdi_vhcache_phci_t *
8048 lookup_vhcache_phci_by_addr(mdi_vhci_cache_t *vhcache, mdi_phci_t *ph)
8049 {
8050 mdi_vhcache_phci_t *cphci;
8051
8052 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8053
8054 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8055 cphci = cphci->cphci_next) {
8056 if (cphci->cphci_phci == ph)
8057 return (cphci);
8058 }
8059
8060 return (NULL);
8061 }
8062
8063 /*
8064 * Add the specified phci to the vhci cache if not already present.
8065 */
8066 static void
8067 vhcache_phci_add(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8068 {
8069 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8070 mdi_vhcache_phci_t *cphci;
8071 char *pathname;
8072 int cache_updated;
8073
8074 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8075
8076 pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
8077 (void) ddi_pathname(ph->ph_dip, pathname);
8078 if ((cphci = lookup_vhcache_phci_by_name(vhcache, pathname))
8079 != NULL) {
8080 cphci->cphci_phci = ph;
8081 cache_updated = 0;
8082 } else {
8083 cphci = kmem_zalloc(sizeof (*cphci), KM_SLEEP);
8084 cphci->cphci_path = i_ddi_strdup(pathname, KM_SLEEP);
8085 cphci->cphci_phci = ph;
8086 enqueue_vhcache_phci(vhcache, cphci);
8087 cache_updated = 1;
8088 }
8089
8090 rw_exit(&vhcache->vhcache_lock);
8091
8092 /*
8093 * Since a new phci has been added, reset
8094 * vhc_path_discovery_cutoff_time to allow for discovery of paths
8095 * during next vhcache_discover_paths().
8096 */
8097 mutex_enter(&vhc->vhc_lock);
8098 vhc->vhc_path_discovery_cutoff_time = 0;
8099 mutex_exit(&vhc->vhc_lock);
8100
8101 kmem_free(pathname, MAXPATHLEN);
8102 if (cache_updated)
8103 vhcache_dirty(vhc);
8104 }
8105
8106 /*
8107 * Remove the reference to the specified phci from the vhci cache.
8108 */
8109 static void
8110 vhcache_phci_remove(mdi_vhci_config_t *vhc, mdi_phci_t *ph)
8111 {
8112 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8113 mdi_vhcache_phci_t *cphci;
8114
8115 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8116 if ((cphci = lookup_vhcache_phci_by_addr(vhcache, ph)) != NULL) {
8117 /* do not remove the actual mdi_vhcache_phci structure */
8118 cphci->cphci_phci = NULL;
8119 }
8120 rw_exit(&vhcache->vhcache_lock);
8121 }
8122
8123 static void
8124 init_vhcache_lookup_token(mdi_vhcache_lookup_token_t *dst,
8125 mdi_vhcache_lookup_token_t *src)
8126 {
8127 if (src == NULL) {
8128 dst->lt_cct = NULL;
8129 dst->lt_cct_lookup_time = 0;
8130 } else {
8131 dst->lt_cct = src->lt_cct;
8132 dst->lt_cct_lookup_time = src->lt_cct_lookup_time;
8133 }
8134 }
8135
8136 /*
8137 * Look up vhcache client for the specified client.
8138 */
8139 static mdi_vhcache_client_t *
8140 lookup_vhcache_client(mdi_vhci_cache_t *vhcache, char *ct_name, char *ct_addr,
8141 mdi_vhcache_lookup_token_t *token)
8142 {
8143 mod_hash_val_t hv;
8144 char *name_addr;
8145 int len;
8146
8147 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8148
8149 /*
8150 * If no vhcache clean occurred since the last lookup, we can
8151 * simply return the cct from the last lookup operation.
8152 * It works because ccts are never freed except during the vhcache
8153 * cleanup operation.
8154 */
8155 if (token != NULL &&
8156 vhcache->vhcache_clean_time < token->lt_cct_lookup_time)
8157 return (token->lt_cct);
8158
8159 name_addr = vhcache_mknameaddr(ct_name, ct_addr, &len);
8160 if (mod_hash_find(vhcache->vhcache_client_hash,
8161 (mod_hash_key_t)name_addr, &hv) == 0) {
8162 if (token) {
8163 token->lt_cct = (mdi_vhcache_client_t *)hv;
8164 token->lt_cct_lookup_time = ddi_get_lbolt64();
8165 }
8166 } else {
8167 if (token) {
8168 token->lt_cct = NULL;
8169 token->lt_cct_lookup_time = 0;
8170 }
8171 hv = NULL;
8172 }
8173 kmem_free(name_addr, len);
8174 return ((mdi_vhcache_client_t *)hv);
8175 }
8176
8177 /*
8178 * Add the specified path to the vhci cache if not already present.
8179 * Also add the vhcache client for the client corresponding to this path
8180 * if it doesn't already exist.
8181 */
8182 static void
8183 vhcache_pi_add(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8184 {
8185 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8186 mdi_vhcache_client_t *cct;
8187 mdi_vhcache_pathinfo_t *cpi;
8188 mdi_phci_t *ph = pip->pi_phci;
8189 mdi_client_t *ct = pip->pi_client;
8190 int cache_updated = 0;
8191
8192 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8193
8194 /* if vhcache client for this pip doesn't already exist, add it */
8195 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8196 NULL)) == NULL) {
8197 cct = kmem_zalloc(sizeof (*cct), KM_SLEEP);
8198 cct->cct_name_addr = vhcache_mknameaddr(ct->ct_drvname,
8199 ct->ct_guid, NULL);
8200 enqueue_vhcache_client(vhcache, cct);
8201 (void) mod_hash_insert(vhcache->vhcache_client_hash,
8202 (mod_hash_key_t)cct->cct_name_addr, (mod_hash_val_t)cct);
8203 cache_updated = 1;
8204 }
8205
8206 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8207 if (cpi->cpi_cphci->cphci_phci == ph &&
8208 strcmp(cpi->cpi_addr, pip->pi_addr) == 0) {
8209 cpi->cpi_pip = pip;
8210 if (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST) {
8211 cpi->cpi_flags &=
8212 ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8213 sort_vhcache_paths(cct);
8214 cache_updated = 1;
8215 }
8216 break;
8217 }
8218 }
8219
8220 if (cpi == NULL) {
8221 cpi = kmem_zalloc(sizeof (*cpi), KM_SLEEP);
8222 cpi->cpi_addr = i_ddi_strdup(pip->pi_addr, KM_SLEEP);
8223 cpi->cpi_cphci = lookup_vhcache_phci_by_addr(vhcache, ph);
8224 ASSERT(cpi->cpi_cphci != NULL);
8225 cpi->cpi_pip = pip;
8226 enqueue_vhcache_pathinfo(cct, cpi);
8227 cache_updated = 1;
8228 }
8229
8230 rw_exit(&vhcache->vhcache_lock);
8231
8232 if (cache_updated)
8233 vhcache_dirty(vhc);
8234 }
8235
8236 /*
8237 * Remove the reference to the specified path from the vhci cache.
8238 */
8239 static void
8240 vhcache_pi_remove(mdi_vhci_config_t *vhc, struct mdi_pathinfo *pip)
8241 {
8242 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8243 mdi_client_t *ct = pip->pi_client;
8244 mdi_vhcache_client_t *cct;
8245 mdi_vhcache_pathinfo_t *cpi;
8246
8247 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8248 if ((cct = lookup_vhcache_client(vhcache, ct->ct_drvname, ct->ct_guid,
8249 NULL)) != NULL) {
8250 for (cpi = cct->cct_cpi_head; cpi != NULL;
8251 cpi = cpi->cpi_next) {
8252 if (cpi->cpi_pip == pip) {
8253 cpi->cpi_pip = NULL;
8254 break;
8255 }
8256 }
8257 }
8258 rw_exit(&vhcache->vhcache_lock);
8259 }
8260
8261 /*
8262 * Flush the vhci cache to disk.
8263 * Returns MDI_SUCCESS on success, MDI_FAILURE on failure.
8264 */
8265 static int
8266 flush_vhcache(mdi_vhci_config_t *vhc, int force_flag)
8267 {
8268 nvlist_t *nvl;
8269 int err;
8270 int rv;
8271
8272 /*
8273 * It is possible that the system may shutdown before
8274 * i_ddi_io_initialized (during stmsboot for example). To allow for
8275 * flushing the cache in this case do not check for
8276 * i_ddi_io_initialized when force flag is set.
8277 */
8278 if (force_flag == 0 && !i_ddi_io_initialized())
8279 return (MDI_FAILURE);
8280
8281 if ((nvl = vhcache_to_mainnvl(&vhc->vhc_vhcache)) != NULL) {
8282 err = fwrite_nvlist(vhc->vhc_vhcache_filename, nvl);
8283 nvlist_free(nvl);
8284 } else
8285 err = EFAULT;
8286
8287 rv = MDI_SUCCESS;
8288 mutex_enter(&vhc->vhc_lock);
8289 if (err != 0) {
8290 if (err == EROFS) {
8291 vhc->vhc_flags |= MDI_VHC_READONLY_FS;
8292 vhc->vhc_flags &= ~(MDI_VHC_VHCACHE_FLUSH_ERROR |
8293 MDI_VHC_VHCACHE_DIRTY);
8294 } else {
8295 if (!(vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR)) {
8296 cmn_err(CE_CONT, "%s: update failed\n",
8297 vhc->vhc_vhcache_filename);
8298 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_ERROR;
8299 }
8300 rv = MDI_FAILURE;
8301 }
8302 } else if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_ERROR) {
8303 cmn_err(CE_CONT,
8304 "%s: update now ok\n", vhc->vhc_vhcache_filename);
8305 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_ERROR;
8306 }
8307 mutex_exit(&vhc->vhc_lock);
8308
8309 return (rv);
8310 }
8311
8312 /*
8313 * Call flush_vhcache() to flush the vhci cache at the scheduled time.
8314 * Exits itself if left idle for the idle timeout period.
8315 */
8316 static void
8317 vhcache_flush_thread(void *arg)
8318 {
8319 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8320 clock_t idle_time, quit_at_ticks;
8321 callb_cpr_t cprinfo;
8322
8323 /* number of seconds to sleep idle before exiting */
8324 idle_time = mdi_vhcache_flush_daemon_idle_time * TICKS_PER_SECOND;
8325
8326 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8327 "mdi_vhcache_flush");
8328 mutex_enter(&vhc->vhc_lock);
8329 for (; ; ) {
8330 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8331 (vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY)) {
8332 if (ddi_get_lbolt() < vhc->vhc_flush_at_ticks) {
8333 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8334 (void) cv_timedwait(&vhc->vhc_cv,
8335 &vhc->vhc_lock, vhc->vhc_flush_at_ticks);
8336 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8337 } else {
8338 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_DIRTY;
8339 mutex_exit(&vhc->vhc_lock);
8340
8341 if (flush_vhcache(vhc, 0) != MDI_SUCCESS)
8342 vhcache_dirty(vhc);
8343
8344 mutex_enter(&vhc->vhc_lock);
8345 }
8346 }
8347
8348 quit_at_ticks = ddi_get_lbolt() + idle_time;
8349
8350 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8351 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY) &&
8352 ddi_get_lbolt() < quit_at_ticks) {
8353 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8354 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8355 quit_at_ticks);
8356 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8357 }
8358
8359 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8360 !(vhc->vhc_flags & MDI_VHC_VHCACHE_DIRTY))
8361 goto out;
8362 }
8363
8364 out:
8365 vhc->vhc_flags &= ~MDI_VHC_VHCACHE_FLUSH_THREAD;
8366 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8367 CALLB_CPR_EXIT(&cprinfo);
8368 }
8369
8370 /*
8371 * Make vhci cache dirty and schedule flushing by vhcache flush thread.
8372 */
8373 static void
8374 vhcache_dirty(mdi_vhci_config_t *vhc)
8375 {
8376 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8377 int create_thread;
8378
8379 rw_enter(&vhcache->vhcache_lock, RW_READER);
8380 /* do not flush cache until the cache is fully built */
8381 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
8382 rw_exit(&vhcache->vhcache_lock);
8383 return;
8384 }
8385 rw_exit(&vhcache->vhcache_lock);
8386
8387 mutex_enter(&vhc->vhc_lock);
8388 if (vhc->vhc_flags & MDI_VHC_READONLY_FS) {
8389 mutex_exit(&vhc->vhc_lock);
8390 return;
8391 }
8392
8393 vhc->vhc_flags |= MDI_VHC_VHCACHE_DIRTY;
8394 vhc->vhc_flush_at_ticks = ddi_get_lbolt() +
8395 mdi_vhcache_flush_delay * TICKS_PER_SECOND;
8396 if (vhc->vhc_flags & MDI_VHC_VHCACHE_FLUSH_THREAD) {
8397 cv_broadcast(&vhc->vhc_cv);
8398 create_thread = 0;
8399 } else {
8400 vhc->vhc_flags |= MDI_VHC_VHCACHE_FLUSH_THREAD;
8401 create_thread = 1;
8402 }
8403 mutex_exit(&vhc->vhc_lock);
8404
8405 if (create_thread)
8406 (void) thread_create(NULL, 0, vhcache_flush_thread, vhc,
8407 0, &p0, TS_RUN, minclsyspri);
8408 }
8409
8410 /*
8411 * phci bus config structure - one for for each phci bus config operation that
8412 * we initiate on behalf of a vhci.
8413 */
8414 typedef struct mdi_phci_bus_config_s {
8415 char *phbc_phci_path;
8416 struct mdi_vhci_bus_config_s *phbc_vhbusconfig; /* vhci bus config */
8417 struct mdi_phci_bus_config_s *phbc_next;
8418 } mdi_phci_bus_config_t;
8419
8420 /* vhci bus config structure - one for each vhci bus config operation */
8421 typedef struct mdi_vhci_bus_config_s {
8422 ddi_bus_config_op_t vhbc_op; /* bus config op */
8423 major_t vhbc_op_major; /* bus config op major */
8424 uint_t vhbc_op_flags; /* bus config op flags */
8425 kmutex_t vhbc_lock;
8426 kcondvar_t vhbc_cv;
8427 int vhbc_thr_count;
8428 } mdi_vhci_bus_config_t;
8429
8430 /*
8431 * bus config the specified phci
8432 */
8433 static void
8434 bus_config_phci(void *arg)
8435 {
8436 mdi_phci_bus_config_t *phbc = (mdi_phci_bus_config_t *)arg;
8437 mdi_vhci_bus_config_t *vhbc = phbc->phbc_vhbusconfig;
8438 dev_info_t *ph_dip;
8439
8440 /*
8441 * first configure all path components upto phci and then configure
8442 * the phci children.
8443 */
8444 if ((ph_dip = e_ddi_hold_devi_by_path(phbc->phbc_phci_path, 0))
8445 != NULL) {
8446 if (vhbc->vhbc_op == BUS_CONFIG_DRIVER ||
8447 vhbc->vhbc_op == BUS_UNCONFIG_DRIVER) {
8448 (void) ndi_devi_config_driver(ph_dip,
8449 vhbc->vhbc_op_flags,
8450 vhbc->vhbc_op_major);
8451 } else
8452 (void) ndi_devi_config(ph_dip,
8453 vhbc->vhbc_op_flags);
8454
8455 /* release the hold that e_ddi_hold_devi_by_path() placed */
8456 ndi_rele_devi(ph_dip);
8457 }
8458
8459 kmem_free(phbc->phbc_phci_path, strlen(phbc->phbc_phci_path) + 1);
8460 kmem_free(phbc, sizeof (*phbc));
8461
8462 mutex_enter(&vhbc->vhbc_lock);
8463 vhbc->vhbc_thr_count--;
8464 if (vhbc->vhbc_thr_count == 0)
8465 cv_broadcast(&vhbc->vhbc_cv);
8466 mutex_exit(&vhbc->vhbc_lock);
8467 }
8468
8469 /*
8470 * Bus config all phcis associated with the vhci in parallel.
8471 * op must be BUS_CONFIG_DRIVER or BUS_CONFIG_ALL.
8472 */
8473 static void
8474 bus_config_all_phcis(mdi_vhci_cache_t *vhcache, uint_t flags,
8475 ddi_bus_config_op_t op, major_t maj)
8476 {
8477 mdi_phci_bus_config_t *phbc_head = NULL, *phbc, *phbc_next;
8478 mdi_vhci_bus_config_t *vhbc;
8479 mdi_vhcache_phci_t *cphci;
8480
8481 rw_enter(&vhcache->vhcache_lock, RW_READER);
8482 if (vhcache->vhcache_phci_head == NULL) {
8483 rw_exit(&vhcache->vhcache_lock);
8484 return;
8485 }
8486
8487 vhbc = kmem_zalloc(sizeof (*vhbc), KM_SLEEP);
8488
8489 for (cphci = vhcache->vhcache_phci_head; cphci != NULL;
8490 cphci = cphci->cphci_next) {
8491 /* skip phcis that haven't attached before root is available */
8492 if (!modrootloaded && (cphci->cphci_phci == NULL))
8493 continue;
8494 phbc = kmem_zalloc(sizeof (*phbc), KM_SLEEP);
8495 phbc->phbc_phci_path = i_ddi_strdup(cphci->cphci_path,
8496 KM_SLEEP);
8497 phbc->phbc_vhbusconfig = vhbc;
8498 phbc->phbc_next = phbc_head;
8499 phbc_head = phbc;
8500 vhbc->vhbc_thr_count++;
8501 }
8502 rw_exit(&vhcache->vhcache_lock);
8503
8504 vhbc->vhbc_op = op;
8505 vhbc->vhbc_op_major = maj;
8506 vhbc->vhbc_op_flags = NDI_NO_EVENT |
8507 (flags & (NDI_CONFIG_REPROBE | NDI_DRV_CONF_REPROBE));
8508 mutex_init(&vhbc->vhbc_lock, NULL, MUTEX_DEFAULT, NULL);
8509 cv_init(&vhbc->vhbc_cv, NULL, CV_DRIVER, NULL);
8510
8511 /* now create threads to initiate bus config on all phcis in parallel */
8512 for (phbc = phbc_head; phbc != NULL; phbc = phbc_next) {
8513 phbc_next = phbc->phbc_next;
8514 if (mdi_mtc_off)
8515 bus_config_phci((void *)phbc);
8516 else
8517 (void) thread_create(NULL, 0, bus_config_phci, phbc,
8518 0, &p0, TS_RUN, minclsyspri);
8519 }
8520
8521 mutex_enter(&vhbc->vhbc_lock);
8522 /* wait until all threads exit */
8523 while (vhbc->vhbc_thr_count > 0)
8524 cv_wait(&vhbc->vhbc_cv, &vhbc->vhbc_lock);
8525 mutex_exit(&vhbc->vhbc_lock);
8526
8527 mutex_destroy(&vhbc->vhbc_lock);
8528 cv_destroy(&vhbc->vhbc_cv);
8529 kmem_free(vhbc, sizeof (*vhbc));
8530 }
8531
8532 /*
8533 * Single threaded version of bus_config_all_phcis()
8534 */
8535 static void
8536 st_bus_config_all_phcis(mdi_vhci_config_t *vhc, uint_t flags,
8537 ddi_bus_config_op_t op, major_t maj)
8538 {
8539 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8540
8541 single_threaded_vhconfig_enter(vhc);
8542 bus_config_all_phcis(vhcache, flags, op, maj);
8543 single_threaded_vhconfig_exit(vhc);
8544 }
8545
8546 /*
8547 * Perform BUS_CONFIG_ONE on the specified child of the phci.
8548 * The path includes the child component in addition to the phci path.
8549 */
8550 static int
8551 bus_config_one_phci_child(char *path)
8552 {
8553 dev_info_t *ph_dip, *child;
8554 char *devnm;
8555 int rv = MDI_FAILURE;
8556
8557 /* extract the child component of the phci */
8558 devnm = strrchr(path, '/');
8559 *devnm++ = '\0';
8560
8561 /*
8562 * first configure all path components upto phci and then
8563 * configure the phci child.
8564 */
8565 if ((ph_dip = e_ddi_hold_devi_by_path(path, 0)) != NULL) {
8566 if (ndi_devi_config_one(ph_dip, devnm, &child, NDI_NO_EVENT) ==
8567 NDI_SUCCESS) {
8568 /*
8569 * release the hold that ndi_devi_config_one() placed
8570 */
8571 ndi_rele_devi(child);
8572 rv = MDI_SUCCESS;
8573 }
8574
8575 /* release the hold that e_ddi_hold_devi_by_path() placed */
8576 ndi_rele_devi(ph_dip);
8577 }
8578
8579 devnm--;
8580 *devnm = '/';
8581 return (rv);
8582 }
8583
8584 /*
8585 * Build a list of phci client paths for the specified vhci client.
8586 * The list includes only those phci client paths which aren't configured yet.
8587 */
8588 static mdi_phys_path_t *
8589 build_phclient_path_list(mdi_vhcache_client_t *cct, char *ct_name)
8590 {
8591 mdi_vhcache_pathinfo_t *cpi;
8592 mdi_phys_path_t *pp_head = NULL, *pp_tail = NULL, *pp;
8593 int config_path, len;
8594
8595 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8596 /*
8597 * include only those paths that aren't configured.
8598 */
8599 config_path = 0;
8600 if (cpi->cpi_pip == NULL)
8601 config_path = 1;
8602 else {
8603 MDI_PI_LOCK(cpi->cpi_pip);
8604 if (MDI_PI_IS_INIT(cpi->cpi_pip))
8605 config_path = 1;
8606 MDI_PI_UNLOCK(cpi->cpi_pip);
8607 }
8608
8609 if (config_path) {
8610 pp = kmem_alloc(sizeof (*pp), KM_SLEEP);
8611 len = strlen(cpi->cpi_cphci->cphci_path) +
8612 strlen(ct_name) + strlen(cpi->cpi_addr) + 3;
8613 pp->phys_path = kmem_alloc(len, KM_SLEEP);
8614 (void) snprintf(pp->phys_path, len, "%s/%s@%s",
8615 cpi->cpi_cphci->cphci_path, ct_name,
8616 cpi->cpi_addr);
8617 pp->phys_path_next = NULL;
8618
8619 if (pp_head == NULL)
8620 pp_head = pp;
8621 else
8622 pp_tail->phys_path_next = pp;
8623 pp_tail = pp;
8624 }
8625 }
8626
8627 return (pp_head);
8628 }
8629
8630 /*
8631 * Free the memory allocated for phci client path list.
8632 */
8633 static void
8634 free_phclient_path_list(mdi_phys_path_t *pp_head)
8635 {
8636 mdi_phys_path_t *pp, *pp_next;
8637
8638 for (pp = pp_head; pp != NULL; pp = pp_next) {
8639 pp_next = pp->phys_path_next;
8640 kmem_free(pp->phys_path, strlen(pp->phys_path) + 1);
8641 kmem_free(pp, sizeof (*pp));
8642 }
8643 }
8644
8645 /*
8646 * Allocated async client structure and initialize with the specified values.
8647 */
8648 static mdi_async_client_config_t *
8649 alloc_async_client_config(char *ct_name, char *ct_addr,
8650 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8651 {
8652 mdi_async_client_config_t *acc;
8653
8654 acc = kmem_alloc(sizeof (*acc), KM_SLEEP);
8655 acc->acc_ct_name = i_ddi_strdup(ct_name, KM_SLEEP);
8656 acc->acc_ct_addr = i_ddi_strdup(ct_addr, KM_SLEEP);
8657 acc->acc_phclient_path_list_head = pp_head;
8658 init_vhcache_lookup_token(&acc->acc_token, tok);
8659 acc->acc_next = NULL;
8660 return (acc);
8661 }
8662
8663 /*
8664 * Free the memory allocated for the async client structure and their members.
8665 */
8666 static void
8667 free_async_client_config(mdi_async_client_config_t *acc)
8668 {
8669 if (acc->acc_phclient_path_list_head)
8670 free_phclient_path_list(acc->acc_phclient_path_list_head);
8671 kmem_free(acc->acc_ct_name, strlen(acc->acc_ct_name) + 1);
8672 kmem_free(acc->acc_ct_addr, strlen(acc->acc_ct_addr) + 1);
8673 kmem_free(acc, sizeof (*acc));
8674 }
8675
8676 /*
8677 * Sort vhcache pathinfos (cpis) of the specified client.
8678 * All cpis which do not have MDI_CPI_HINT_PATH_DOES_NOT_EXIST
8679 * flag set come at the beginning of the list. All cpis which have this
8680 * flag set come at the end of the list.
8681 */
8682 static void
8683 sort_vhcache_paths(mdi_vhcache_client_t *cct)
8684 {
8685 mdi_vhcache_pathinfo_t *cpi, *cpi_next, *cpi_head;
8686
8687 cpi_head = cct->cct_cpi_head;
8688 cct->cct_cpi_head = cct->cct_cpi_tail = NULL;
8689 for (cpi = cpi_head; cpi != NULL; cpi = cpi_next) {
8690 cpi_next = cpi->cpi_next;
8691 enqueue_vhcache_pathinfo(cct, cpi);
8692 }
8693 }
8694
8695 /*
8696 * Verify whether MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag setting is correct for
8697 * every vhcache pathinfo of the specified client. If not adjust the flag
8698 * setting appropriately.
8699 *
8700 * Note that MDI_CPI_HINT_PATH_DOES_NOT_EXIST flag is persisted in the
8701 * on-disk vhci cache. So every time this flag is updated the cache must be
8702 * flushed.
8703 */
8704 static void
8705 adjust_sort_vhcache_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8706 mdi_vhcache_lookup_token_t *tok)
8707 {
8708 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8709 mdi_vhcache_client_t *cct;
8710 mdi_vhcache_pathinfo_t *cpi;
8711
8712 rw_enter(&vhcache->vhcache_lock, RW_READER);
8713 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, tok))
8714 == NULL) {
8715 rw_exit(&vhcache->vhcache_lock);
8716 return;
8717 }
8718
8719 /*
8720 * to avoid unnecessary on-disk cache updates, first check if an
8721 * update is really needed. If no update is needed simply return.
8722 */
8723 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8724 if ((cpi->cpi_pip != NULL &&
8725 (cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST)) ||
8726 (cpi->cpi_pip == NULL &&
8727 !(cpi->cpi_flags & MDI_CPI_HINT_PATH_DOES_NOT_EXIST))) {
8728 break;
8729 }
8730 }
8731 if (cpi == NULL) {
8732 rw_exit(&vhcache->vhcache_lock);
8733 return;
8734 }
8735
8736 if (rw_tryupgrade(&vhcache->vhcache_lock) == 0) {
8737 rw_exit(&vhcache->vhcache_lock);
8738 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
8739 if ((cct = lookup_vhcache_client(vhcache, ct_name, ct_addr,
8740 tok)) == NULL) {
8741 rw_exit(&vhcache->vhcache_lock);
8742 return;
8743 }
8744 }
8745
8746 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8747 if (cpi->cpi_pip != NULL)
8748 cpi->cpi_flags &= ~MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8749 else
8750 cpi->cpi_flags |= MDI_CPI_HINT_PATH_DOES_NOT_EXIST;
8751 }
8752 sort_vhcache_paths(cct);
8753
8754 rw_exit(&vhcache->vhcache_lock);
8755 vhcache_dirty(vhc);
8756 }
8757
8758 /*
8759 * Configure all specified paths of the client.
8760 */
8761 static void
8762 config_client_paths_sync(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8763 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8764 {
8765 mdi_phys_path_t *pp;
8766
8767 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next)
8768 (void) bus_config_one_phci_child(pp->phys_path);
8769 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, tok);
8770 }
8771
8772 /*
8773 * Dequeue elements from vhci async client config list and bus configure
8774 * their corresponding phci clients.
8775 */
8776 static void
8777 config_client_paths_thread(void *arg)
8778 {
8779 mdi_vhci_config_t *vhc = (mdi_vhci_config_t *)arg;
8780 mdi_async_client_config_t *acc;
8781 clock_t quit_at_ticks;
8782 clock_t idle_time = mdi_async_config_idle_time * TICKS_PER_SECOND;
8783 callb_cpr_t cprinfo;
8784
8785 CALLB_CPR_INIT(&cprinfo, &vhc->vhc_lock, callb_generic_cpr,
8786 "mdi_config_client_paths");
8787
8788 for (; ; ) {
8789 quit_at_ticks = ddi_get_lbolt() + idle_time;
8790
8791 mutex_enter(&vhc->vhc_lock);
8792 while (!(vhc->vhc_flags & MDI_VHC_EXIT) &&
8793 vhc->vhc_acc_list_head == NULL &&
8794 ddi_get_lbolt() < quit_at_ticks) {
8795 CALLB_CPR_SAFE_BEGIN(&cprinfo);
8796 (void) cv_timedwait(&vhc->vhc_cv, &vhc->vhc_lock,
8797 quit_at_ticks);
8798 CALLB_CPR_SAFE_END(&cprinfo, &vhc->vhc_lock);
8799 }
8800
8801 if ((vhc->vhc_flags & MDI_VHC_EXIT) ||
8802 vhc->vhc_acc_list_head == NULL)
8803 goto out;
8804
8805 acc = vhc->vhc_acc_list_head;
8806 vhc->vhc_acc_list_head = acc->acc_next;
8807 if (vhc->vhc_acc_list_head == NULL)
8808 vhc->vhc_acc_list_tail = NULL;
8809 vhc->vhc_acc_count--;
8810 mutex_exit(&vhc->vhc_lock);
8811
8812 config_client_paths_sync(vhc, acc->acc_ct_name,
8813 acc->acc_ct_addr, acc->acc_phclient_path_list_head,
8814 &acc->acc_token);
8815
8816 free_async_client_config(acc);
8817 }
8818
8819 out:
8820 vhc->vhc_acc_thrcount--;
8821 /* CALLB_CPR_EXIT releases the vhc->vhc_lock */
8822 CALLB_CPR_EXIT(&cprinfo);
8823 }
8824
8825 /*
8826 * Arrange for all the phci client paths (pp_head) for the specified client
8827 * to be bus configured asynchronously by a thread.
8828 */
8829 static void
8830 config_client_paths_async(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr,
8831 mdi_phys_path_t *pp_head, mdi_vhcache_lookup_token_t *tok)
8832 {
8833 mdi_async_client_config_t *acc, *newacc;
8834 int create_thread;
8835
8836 if (pp_head == NULL)
8837 return;
8838
8839 if (mdi_mtc_off) {
8840 config_client_paths_sync(vhc, ct_name, ct_addr, pp_head, tok);
8841 free_phclient_path_list(pp_head);
8842 return;
8843 }
8844
8845 newacc = alloc_async_client_config(ct_name, ct_addr, pp_head, tok);
8846 ASSERT(newacc);
8847
8848 mutex_enter(&vhc->vhc_lock);
8849 for (acc = vhc->vhc_acc_list_head; acc != NULL; acc = acc->acc_next) {
8850 if (strcmp(ct_name, acc->acc_ct_name) == 0 &&
8851 strcmp(ct_addr, acc->acc_ct_addr) == 0) {
8852 free_async_client_config(newacc);
8853 mutex_exit(&vhc->vhc_lock);
8854 return;
8855 }
8856 }
8857
8858 if (vhc->vhc_acc_list_head == NULL)
8859 vhc->vhc_acc_list_head = newacc;
8860 else
8861 vhc->vhc_acc_list_tail->acc_next = newacc;
8862 vhc->vhc_acc_list_tail = newacc;
8863 vhc->vhc_acc_count++;
8864 if (vhc->vhc_acc_count <= vhc->vhc_acc_thrcount) {
8865 cv_broadcast(&vhc->vhc_cv);
8866 create_thread = 0;
8867 } else {
8868 vhc->vhc_acc_thrcount++;
8869 create_thread = 1;
8870 }
8871 mutex_exit(&vhc->vhc_lock);
8872
8873 if (create_thread)
8874 (void) thread_create(NULL, 0, config_client_paths_thread, vhc,
8875 0, &p0, TS_RUN, minclsyspri);
8876 }
8877
8878 /*
8879 * Return number of online paths for the specified client.
8880 */
8881 static int
8882 nonline_paths(mdi_vhcache_client_t *cct)
8883 {
8884 mdi_vhcache_pathinfo_t *cpi;
8885 int online_count = 0;
8886
8887 for (cpi = cct->cct_cpi_head; cpi != NULL; cpi = cpi->cpi_next) {
8888 if (cpi->cpi_pip != NULL) {
8889 MDI_PI_LOCK(cpi->cpi_pip);
8890 if (cpi->cpi_pip->pi_state == MDI_PATHINFO_STATE_ONLINE)
8891 online_count++;
8892 MDI_PI_UNLOCK(cpi->cpi_pip);
8893 }
8894 }
8895
8896 return (online_count);
8897 }
8898
8899 /*
8900 * Bus configure all paths for the specified vhci client.
8901 * If at least one path for the client is already online, the remaining paths
8902 * will be configured asynchronously. Otherwise, it synchronously configures
8903 * the paths until at least one path is online and then rest of the paths
8904 * will be configured asynchronously.
8905 */
8906 static void
8907 config_client_paths(mdi_vhci_config_t *vhc, char *ct_name, char *ct_addr)
8908 {
8909 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
8910 mdi_phys_path_t *pp_head, *pp;
8911 mdi_vhcache_client_t *cct;
8912 mdi_vhcache_lookup_token_t tok;
8913
8914 ASSERT(RW_LOCK_HELD(&vhcache->vhcache_lock));
8915
8916 init_vhcache_lookup_token(&tok, NULL);
8917
8918 if (ct_name == NULL || ct_addr == NULL ||
8919 (cct = lookup_vhcache_client(vhcache, ct_name, ct_addr, &tok))
8920 == NULL ||
8921 (pp_head = build_phclient_path_list(cct, ct_name)) == NULL) {
8922 rw_exit(&vhcache->vhcache_lock);
8923 return;
8924 }
8925
8926 /* if at least one path is online, configure the rest asynchronously */
8927 if (nonline_paths(cct) > 0) {
8928 rw_exit(&vhcache->vhcache_lock);
8929 config_client_paths_async(vhc, ct_name, ct_addr, pp_head, &tok);
8930 return;
8931 }
8932
8933 rw_exit(&vhcache->vhcache_lock);
8934
8935 for (pp = pp_head; pp != NULL; pp = pp->phys_path_next) {
8936 if (bus_config_one_phci_child(pp->phys_path) == MDI_SUCCESS) {
8937 rw_enter(&vhcache->vhcache_lock, RW_READER);
8938
8939 if ((cct = lookup_vhcache_client(vhcache, ct_name,
8940 ct_addr, &tok)) == NULL) {
8941 rw_exit(&vhcache->vhcache_lock);
8942 goto out;
8943 }
8944
8945 if (nonline_paths(cct) > 0 &&
8946 pp->phys_path_next != NULL) {
8947 rw_exit(&vhcache->vhcache_lock);
8948 config_client_paths_async(vhc, ct_name, ct_addr,
8949 pp->phys_path_next, &tok);
8950 pp->phys_path_next = NULL;
8951 goto out;
8952 }
8953
8954 rw_exit(&vhcache->vhcache_lock);
8955 }
8956 }
8957
8958 adjust_sort_vhcache_paths(vhc, ct_name, ct_addr, &tok);
8959 out:
8960 free_phclient_path_list(pp_head);
8961 }
8962
8963 static void
8964 single_threaded_vhconfig_enter(mdi_vhci_config_t *vhc)
8965 {
8966 mutex_enter(&vhc->vhc_lock);
8967 while (vhc->vhc_flags & MDI_VHC_SINGLE_THREADED)
8968 cv_wait(&vhc->vhc_cv, &vhc->vhc_lock);
8969 vhc->vhc_flags |= MDI_VHC_SINGLE_THREADED;
8970 mutex_exit(&vhc->vhc_lock);
8971 }
8972
8973 static void
8974 single_threaded_vhconfig_exit(mdi_vhci_config_t *vhc)
8975 {
8976 mutex_enter(&vhc->vhc_lock);
8977 vhc->vhc_flags &= ~MDI_VHC_SINGLE_THREADED;
8978 cv_broadcast(&vhc->vhc_cv);
8979 mutex_exit(&vhc->vhc_lock);
8980 }
8981
8982 typedef struct mdi_phci_driver_info {
8983 char *phdriver_name; /* name of the phci driver */
8984
8985 /* set to non zero if the phci driver supports root device */
8986 int phdriver_root_support;
8987 } mdi_phci_driver_info_t;
8988
8989 /*
8990 * vhci class and root support capability of a phci driver can be
8991 * specified using ddi-vhci-class and ddi-no-root-support properties in the
8992 * phci driver.conf file. The built-in tables below contain this information
8993 * for those phci drivers whose driver.conf files don't yet contain this info.
8994 *
8995 * All phci drivers expect iscsi have root device support.
8996 */
8997 static mdi_phci_driver_info_t scsi_phci_driver_list[] = {
8998 { "fp", 1 },
8999 { "iscsi", 0 },
9000 { "ibsrp", 1 }
9001 };
9002
9003 static mdi_phci_driver_info_t ib_phci_driver_list[] = { "tavor", 1 };
9004
9005 static void *
9006 mdi_realloc(void *old_ptr, size_t old_size, size_t new_size)
9007 {
9008 void *new_ptr;
9009
9010 new_ptr = kmem_zalloc(new_size, KM_SLEEP);
9011 if (old_ptr) {
9012 bcopy(old_ptr, new_ptr, MIN(old_size, new_size));
9013 kmem_free(old_ptr, old_size);
9014 }
9015 return (new_ptr);
9016 }
9017
9018 static void
9019 add_to_phci_list(char ***driver_list, int **root_support_list,
9020 int *cur_elements, int *max_elements, char *driver_name, int root_support)
9021 {
9022 ASSERT(*cur_elements <= *max_elements);
9023 if (*cur_elements == *max_elements) {
9024 *max_elements += 10;
9025 *driver_list = mdi_realloc(*driver_list,
9026 sizeof (char *) * (*cur_elements),
9027 sizeof (char *) * (*max_elements));
9028 *root_support_list = mdi_realloc(*root_support_list,
9029 sizeof (int) * (*cur_elements),
9030 sizeof (int) * (*max_elements));
9031 }
9032 (*driver_list)[*cur_elements] = i_ddi_strdup(driver_name, KM_SLEEP);
9033 (*root_support_list)[*cur_elements] = root_support;
9034 (*cur_elements)++;
9035 }
9036
9037 static void
9038 get_phci_driver_list(char *vhci_class, char ***driver_list,
9039 int **root_support_list, int *cur_elements, int *max_elements)
9040 {
9041 mdi_phci_driver_info_t *st_driver_list, *p;
9042 int st_ndrivers, root_support, i, j, driver_conf_count;
9043 major_t m;
9044 struct devnames *dnp;
9045 ddi_prop_t *propp;
9046
9047 *driver_list = NULL;
9048 *root_support_list = NULL;
9049 *cur_elements = 0;
9050 *max_elements = 0;
9051
9052 /* add the phci drivers derived from the phci driver.conf files */
9053 for (m = 0; m < devcnt; m++) {
9054 dnp = &devnamesp[m];
9055
9056 if (dnp->dn_flags & DN_PHCI_DRIVER) {
9057 LOCK_DEV_OPS(&dnp->dn_lock);
9058 if (dnp->dn_global_prop_ptr != NULL &&
9059 (propp = i_ddi_prop_search(DDI_DEV_T_ANY,
9060 DDI_VHCI_CLASS, DDI_PROP_TYPE_STRING,
9061 &dnp->dn_global_prop_ptr->prop_list)) != NULL &&
9062 strcmp(propp->prop_val, vhci_class) == 0) {
9063
9064 root_support = (i_ddi_prop_search(DDI_DEV_T_ANY,
9065 DDI_NO_ROOT_SUPPORT, DDI_PROP_TYPE_INT,
9066 &dnp->dn_global_prop_ptr->prop_list)
9067 == NULL) ? 1 : 0;
9068
9069 add_to_phci_list(driver_list, root_support_list,
9070 cur_elements, max_elements, dnp->dn_name,
9071 root_support);
9072
9073 UNLOCK_DEV_OPS(&dnp->dn_lock);
9074 } else
9075 UNLOCK_DEV_OPS(&dnp->dn_lock);
9076 }
9077 }
9078
9079 driver_conf_count = *cur_elements;
9080
9081 /* add the phci drivers specified in the built-in tables */
9082 if (strcmp(vhci_class, MDI_HCI_CLASS_SCSI) == 0) {
9083 st_driver_list = scsi_phci_driver_list;
9084 st_ndrivers = sizeof (scsi_phci_driver_list) /
9085 sizeof (mdi_phci_driver_info_t);
9086 } else if (strcmp(vhci_class, MDI_HCI_CLASS_IB) == 0) {
9087 st_driver_list = ib_phci_driver_list;
9088 st_ndrivers = sizeof (ib_phci_driver_list) /
9089 sizeof (mdi_phci_driver_info_t);
9090 } else {
9091 st_driver_list = NULL;
9092 st_ndrivers = 0;
9093 }
9094
9095 for (i = 0, p = st_driver_list; i < st_ndrivers; i++, p++) {
9096 /* add this phci driver if not already added before */
9097 for (j = 0; j < driver_conf_count; j++) {
9098 if (strcmp((*driver_list)[j], p->phdriver_name) == 0)
9099 break;
9100 }
9101 if (j == driver_conf_count) {
9102 add_to_phci_list(driver_list, root_support_list,
9103 cur_elements, max_elements, p->phdriver_name,
9104 p->phdriver_root_support);
9105 }
9106 }
9107 }
9108
9109 /*
9110 * Attach the phci driver instances associated with the specified vhci class.
9111 * If root is mounted attach all phci driver instances.
9112 * If root is not mounted, attach the instances of only those phci
9113 * drivers that have the root support.
9114 */
9115 static void
9116 attach_phci_drivers(char *vhci_class)
9117 {
9118 char **driver_list, **p;
9119 int *root_support_list;
9120 int cur_elements, max_elements, i;
9121 major_t m;
9122
9123 get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9124 &cur_elements, &max_elements);
9125
9126 for (i = 0; i < cur_elements; i++) {
9127 if (modrootloaded || root_support_list[i]) {
9128 m = ddi_name_to_major(driver_list[i]);
9129 if (m != DDI_MAJOR_T_NONE &&
9130 ddi_hold_installed_driver(m))
9131 ddi_rele_driver(m);
9132 }
9133 }
9134
9135 if (driver_list) {
9136 for (i = 0, p = driver_list; i < cur_elements; i++, p++)
9137 kmem_free(*p, strlen(*p) + 1);
9138 kmem_free(driver_list, sizeof (char *) * max_elements);
9139 kmem_free(root_support_list, sizeof (int) * max_elements);
9140 }
9141 }
9142
9143 /*
9144 * Build vhci cache:
9145 *
9146 * Attach phci driver instances and then drive BUS_CONFIG_ALL on
9147 * the phci driver instances. During this process the cache gets built.
9148 *
9149 * Cache is built fully if the root is mounted.
9150 * If the root is not mounted, phci drivers that do not have root support
9151 * are not attached. As a result the cache is built partially. The entries
9152 * in the cache reflect only those phci drivers that have root support.
9153 */
9154 static int
9155 build_vhci_cache(mdi_vhci_t *vh)
9156 {
9157 mdi_vhci_config_t *vhc = vh->vh_config;
9158 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9159
9160 single_threaded_vhconfig_enter(vhc);
9161
9162 rw_enter(&vhcache->vhcache_lock, RW_READER);
9163 if (vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE) {
9164 rw_exit(&vhcache->vhcache_lock);
9165 single_threaded_vhconfig_exit(vhc);
9166 return (0);
9167 }
9168 rw_exit(&vhcache->vhcache_lock);
9169
9170 attach_phci_drivers(vh->vh_class);
9171 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE | NDI_NO_EVENT,
9172 BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9173
9174 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9175 vhcache->vhcache_flags |= MDI_VHCI_CACHE_SETUP_DONE;
9176 rw_exit(&vhcache->vhcache_lock);
9177
9178 single_threaded_vhconfig_exit(vhc);
9179 vhcache_dirty(vhc);
9180 return (1);
9181 }
9182
9183 /*
9184 * Determine if discovery of paths is needed.
9185 */
9186 static int
9187 vhcache_do_discovery(mdi_vhci_config_t *vhc)
9188 {
9189 int rv = 1;
9190
9191 mutex_enter(&vhc->vhc_lock);
9192 if (i_ddi_io_initialized() == 0) {
9193 if (vhc->vhc_path_discovery_boot > 0) {
9194 vhc->vhc_path_discovery_boot--;
9195 goto out;
9196 }
9197 } else {
9198 if (vhc->vhc_path_discovery_postboot > 0) {
9199 vhc->vhc_path_discovery_postboot--;
9200 goto out;
9201 }
9202 }
9203
9204 /*
9205 * Do full path discovery at most once per mdi_path_discovery_interval.
9206 * This is to avoid a series of full path discoveries when opening
9207 * stale /dev/[r]dsk links.
9208 */
9209 if (mdi_path_discovery_interval != -1 &&
9210 ddi_get_lbolt64() >= vhc->vhc_path_discovery_cutoff_time)
9211 goto out;
9212
9213 rv = 0;
9214 out:
9215 mutex_exit(&vhc->vhc_lock);
9216 return (rv);
9217 }
9218
9219 /*
9220 * Discover all paths:
9221 *
9222 * Attach phci driver instances and then drive BUS_CONFIG_ALL on all the phci
9223 * driver instances. During this process all paths will be discovered.
9224 */
9225 static int
9226 vhcache_discover_paths(mdi_vhci_t *vh)
9227 {
9228 mdi_vhci_config_t *vhc = vh->vh_config;
9229 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9230 int rv = 0;
9231
9232 single_threaded_vhconfig_enter(vhc);
9233
9234 if (vhcache_do_discovery(vhc)) {
9235 attach_phci_drivers(vh->vh_class);
9236 bus_config_all_phcis(vhcache, NDI_DRV_CONF_REPROBE |
9237 NDI_NO_EVENT, BUS_CONFIG_ALL, DDI_MAJOR_T_NONE);
9238
9239 mutex_enter(&vhc->vhc_lock);
9240 vhc->vhc_path_discovery_cutoff_time = ddi_get_lbolt64() +
9241 mdi_path_discovery_interval * TICKS_PER_SECOND;
9242 mutex_exit(&vhc->vhc_lock);
9243 rv = 1;
9244 }
9245
9246 single_threaded_vhconfig_exit(vhc);
9247 return (rv);
9248 }
9249
9250 /*
9251 * Generic vhci bus config implementation:
9252 *
9253 * Parameters
9254 * vdip vhci dip
9255 * flags bus config flags
9256 * op bus config operation
9257 * The remaining parameters are bus config operation specific
9258 *
9259 * for BUS_CONFIG_ONE
9260 * arg pointer to name@addr
9261 * child upon successful return from this function, *child will be
9262 * set to the configured and held devinfo child node of vdip.
9263 * ct_addr pointer to client address (i.e. GUID)
9264 *
9265 * for BUS_CONFIG_DRIVER
9266 * arg major number of the driver
9267 * child and ct_addr parameters are ignored
9268 *
9269 * for BUS_CONFIG_ALL
9270 * arg, child, and ct_addr parameters are ignored
9271 *
9272 * Note that for the rest of the bus config operations, this function simply
9273 * calls the framework provided default bus config routine.
9274 */
9275 int
9276 mdi_vhci_bus_config(dev_info_t *vdip, uint_t flags, ddi_bus_config_op_t op,
9277 void *arg, dev_info_t **child, char *ct_addr)
9278 {
9279 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9280 mdi_vhci_config_t *vhc = vh->vh_config;
9281 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9282 int rv = 0;
9283 int params_valid = 0;
9284 char *cp;
9285
9286 /*
9287 * To bus config vhcis we relay operation, possibly using another
9288 * thread, to phcis. The phci driver then interacts with MDI to cause
9289 * vhci child nodes to be enumerated under the vhci node. Adding a
9290 * vhci child requires an ndi_devi_enter of the vhci. Since another
9291 * thread may be adding the child, to avoid deadlock we can't wait
9292 * for the relayed operations to complete if we have already entered
9293 * the vhci node.
9294 */
9295 if (DEVI_BUSY_OWNED(vdip)) {
9296 MDI_DEBUG(2, (MDI_NOTE, vdip,
9297 "vhci dip is busy owned %p", (void *)vdip));
9298 goto default_bus_config;
9299 }
9300
9301 rw_enter(&vhcache->vhcache_lock, RW_READER);
9302 if (!(vhcache->vhcache_flags & MDI_VHCI_CACHE_SETUP_DONE)) {
9303 rw_exit(&vhcache->vhcache_lock);
9304 rv = build_vhci_cache(vh);
9305 rw_enter(&vhcache->vhcache_lock, RW_READER);
9306 }
9307
9308 switch (op) {
9309 case BUS_CONFIG_ONE:
9310 if (arg != NULL && ct_addr != NULL) {
9311 /* extract node name */
9312 cp = (char *)arg;
9313 while (*cp != '\0' && *cp != '@')
9314 cp++;
9315 if (*cp == '@') {
9316 params_valid = 1;
9317 *cp = '\0';
9318 config_client_paths(vhc, (char *)arg, ct_addr);
9319 /* config_client_paths() releases cache_lock */
9320 *cp = '@';
9321 break;
9322 }
9323 }
9324
9325 rw_exit(&vhcache->vhcache_lock);
9326 break;
9327
9328 case BUS_CONFIG_DRIVER:
9329 rw_exit(&vhcache->vhcache_lock);
9330 if (rv == 0)
9331 st_bus_config_all_phcis(vhc, flags, op,
9332 (major_t)(uintptr_t)arg);
9333 break;
9334
9335 case BUS_CONFIG_ALL:
9336 rw_exit(&vhcache->vhcache_lock);
9337 if (rv == 0)
9338 st_bus_config_all_phcis(vhc, flags, op, -1);
9339 break;
9340
9341 default:
9342 rw_exit(&vhcache->vhcache_lock);
9343 break;
9344 }
9345
9346
9347 default_bus_config:
9348 /*
9349 * All requested child nodes are enumerated under the vhci.
9350 * Now configure them.
9351 */
9352 if (ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9353 NDI_SUCCESS) {
9354 return (MDI_SUCCESS);
9355 } else if (op == BUS_CONFIG_ONE && rv == 0 && params_valid) {
9356 /* discover all paths and try configuring again */
9357 if (vhcache_discover_paths(vh) &&
9358 ndi_busop_bus_config(vdip, flags, op, arg, child, 0) ==
9359 NDI_SUCCESS)
9360 return (MDI_SUCCESS);
9361 }
9362
9363 return (MDI_FAILURE);
9364 }
9365
9366 /*
9367 * Read the on-disk vhci cache into an nvlist for the specified vhci class.
9368 */
9369 static nvlist_t *
9370 read_on_disk_vhci_cache(char *vhci_class)
9371 {
9372 nvlist_t *nvl;
9373 int err;
9374 char *filename;
9375
9376 filename = vhclass2vhcache_filename(vhci_class);
9377
9378 if ((err = fread_nvlist(filename, &nvl)) == 0) {
9379 kmem_free(filename, strlen(filename) + 1);
9380 return (nvl);
9381 } else if (err == EIO)
9382 cmn_err(CE_WARN, "%s: I/O error, will recreate", filename);
9383 else if (err == EINVAL)
9384 cmn_err(CE_WARN,
9385 "%s: data file corrupted, will recreate", filename);
9386
9387 kmem_free(filename, strlen(filename) + 1);
9388 return (NULL);
9389 }
9390
9391 /*
9392 * Read on-disk vhci cache into nvlists for all vhci classes.
9393 * Called during booting by i_ddi_read_devices_files().
9394 */
9395 void
9396 mdi_read_devices_files(void)
9397 {
9398 int i;
9399
9400 for (i = 0; i < N_VHCI_CLASSES; i++)
9401 vhcache_nvl[i] = read_on_disk_vhci_cache(vhci_class_list[i]);
9402 }
9403
9404 /*
9405 * Remove all stale entries from vhci cache.
9406 */
9407 static void
9408 clean_vhcache(mdi_vhci_config_t *vhc)
9409 {
9410 mdi_vhci_cache_t *vhcache = &vhc->vhc_vhcache;
9411 mdi_vhcache_phci_t *phci, *nxt_phci;
9412 mdi_vhcache_client_t *client, *nxt_client;
9413 mdi_vhcache_pathinfo_t *path, *nxt_path;
9414
9415 rw_enter(&vhcache->vhcache_lock, RW_WRITER);
9416
9417 client = vhcache->vhcache_client_head;
9418 vhcache->vhcache_client_head = vhcache->vhcache_client_tail = NULL;
9419 for ( ; client != NULL; client = nxt_client) {
9420 nxt_client = client->cct_next;
9421
9422 path = client->cct_cpi_head;
9423 client->cct_cpi_head = client->cct_cpi_tail = NULL;
9424 for ( ; path != NULL; path = nxt_path) {
9425 nxt_path = path->cpi_next;
9426 if ((path->cpi_cphci->cphci_phci != NULL) &&
9427 (path->cpi_pip != NULL)) {
9428 enqueue_tail_vhcache_pathinfo(client, path);
9429 } else if (path->cpi_pip != NULL) {
9430 /* Not valid to have a path without a phci. */
9431 free_vhcache_pathinfo(path);
9432 }
9433 }
9434
9435 if (client->cct_cpi_head != NULL)
9436 enqueue_vhcache_client(vhcache, client);
9437 else {
9438 (void) mod_hash_destroy(vhcache->vhcache_client_hash,
9439 (mod_hash_key_t)client->cct_name_addr);
9440 free_vhcache_client(client);
9441 }
9442 }
9443
9444 phci = vhcache->vhcache_phci_head;
9445 vhcache->vhcache_phci_head = vhcache->vhcache_phci_tail = NULL;
9446 for ( ; phci != NULL; phci = nxt_phci) {
9447
9448 nxt_phci = phci->cphci_next;
9449 if (phci->cphci_phci != NULL)
9450 enqueue_vhcache_phci(vhcache, phci);
9451 else
9452 free_vhcache_phci(phci);
9453 }
9454
9455 vhcache->vhcache_clean_time = ddi_get_lbolt64();
9456 rw_exit(&vhcache->vhcache_lock);
9457 vhcache_dirty(vhc);
9458 }
9459
9460 /*
9461 * Remove all stale entries from vhci cache.
9462 * Called by i_ddi_clean_devices_files() during the execution of devfsadm -C
9463 */
9464 void
9465 mdi_clean_vhcache(void)
9466 {
9467 mdi_vhci_t *vh;
9468
9469 mutex_enter(&mdi_mutex);
9470 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9471 vh->vh_refcnt++;
9472 mutex_exit(&mdi_mutex);
9473 clean_vhcache(vh->vh_config);
9474 mutex_enter(&mdi_mutex);
9475 vh->vh_refcnt--;
9476 }
9477 mutex_exit(&mdi_mutex);
9478 }
9479
9480 /*
9481 * mdi_vhci_walk_clients():
9482 * Walker routine to traverse client dev_info nodes
9483 * ddi_walk_devs(ddi_get_child(vdip), f, arg) returns the entire tree
9484 * below the client, including nexus devices, which we dont want.
9485 * So we just traverse the immediate siblings, starting from 1st client.
9486 */
9487 void
9488 mdi_vhci_walk_clients(dev_info_t *vdip,
9489 int (*f)(dev_info_t *, void *), void *arg)
9490 {
9491 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9492 dev_info_t *cdip;
9493 mdi_client_t *ct;
9494
9495 MDI_VHCI_CLIENT_LOCK(vh);
9496 cdip = ddi_get_child(vdip);
9497 while (cdip) {
9498 ct = i_devi_get_client(cdip);
9499 MDI_CLIENT_LOCK(ct);
9500
9501 if (((*f)(cdip, arg)) == DDI_WALK_CONTINUE)
9502 cdip = ddi_get_next_sibling(cdip);
9503 else
9504 cdip = NULL;
9505
9506 MDI_CLIENT_UNLOCK(ct);
9507 }
9508 MDI_VHCI_CLIENT_UNLOCK(vh);
9509 }
9510
9511 /*
9512 * mdi_vhci_walk_phcis():
9513 * Walker routine to traverse phci dev_info nodes
9514 */
9515 void
9516 mdi_vhci_walk_phcis(dev_info_t *vdip,
9517 int (*f)(dev_info_t *, void *), void *arg)
9518 {
9519 mdi_vhci_t *vh = i_devi_get_vhci(vdip);
9520 mdi_phci_t *ph, *next;
9521
9522 MDI_VHCI_PHCI_LOCK(vh);
9523 ph = vh->vh_phci_head;
9524 while (ph) {
9525 MDI_PHCI_LOCK(ph);
9526
9527 if (((*f)(ph->ph_dip, arg)) == DDI_WALK_CONTINUE)
9528 next = ph->ph_next;
9529 else
9530 next = NULL;
9531
9532 MDI_PHCI_UNLOCK(ph);
9533 ph = next;
9534 }
9535 MDI_VHCI_PHCI_UNLOCK(vh);
9536 }
9537
9538
9539 /*
9540 * mdi_walk_vhcis():
9541 * Walker routine to traverse vhci dev_info nodes
9542 */
9543 void
9544 mdi_walk_vhcis(int (*f)(dev_info_t *, void *), void *arg)
9545 {
9546 mdi_vhci_t *vh = NULL;
9547
9548 mutex_enter(&mdi_mutex);
9549 /*
9550 * Scan for already registered vhci
9551 */
9552 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
9553 vh->vh_refcnt++;
9554 mutex_exit(&mdi_mutex);
9555 if (((*f)(vh->vh_dip, arg)) != DDI_WALK_CONTINUE) {
9556 mutex_enter(&mdi_mutex);
9557 vh->vh_refcnt--;
9558 break;
9559 } else {
9560 mutex_enter(&mdi_mutex);
9561 vh->vh_refcnt--;
9562 }
9563 }
9564
9565 mutex_exit(&mdi_mutex);
9566 }
9567
9568 /*
9569 * i_mdi_log_sysevent():
9570 * Logs events for pickup by syseventd
9571 */
9572 static void
9573 i_mdi_log_sysevent(dev_info_t *dip, char *ph_vh_class, char *subclass)
9574 {
9575 char *path_name;
9576 nvlist_t *attr_list;
9577
9578 if (nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE,
9579 KM_SLEEP) != DDI_SUCCESS) {
9580 goto alloc_failed;
9581 }
9582
9583 path_name = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
9584 (void) ddi_pathname(dip, path_name);
9585
9586 if (nvlist_add_string(attr_list, DDI_DRIVER_NAME,
9587 ddi_driver_name(dip)) != DDI_SUCCESS) {
9588 goto error;
9589 }
9590
9591 if (nvlist_add_int32(attr_list, DDI_DRIVER_MAJOR,
9592 (int32_t)ddi_driver_major(dip)) != DDI_SUCCESS) {
9593 goto error;
9594 }
9595
9596 if (nvlist_add_int32(attr_list, DDI_INSTANCE,
9597 (int32_t)ddi_get_instance(dip)) != DDI_SUCCESS) {
9598 goto error;
9599 }
9600
9601 if (nvlist_add_string(attr_list, DDI_PATHNAME,
9602 path_name) != DDI_SUCCESS) {
9603 goto error;
9604 }
9605
9606 if (nvlist_add_string(attr_list, DDI_CLASS,
9607 ph_vh_class) != DDI_SUCCESS) {
9608 goto error;
9609 }
9610
9611 (void) ddi_log_sysevent(dip, DDI_VENDOR_SUNW, EC_DDI, subclass,
9612 attr_list, NULL, DDI_SLEEP);
9613
9614 error:
9615 kmem_free(path_name, MAXPATHLEN);
9616 nvlist_free(attr_list);
9617 return;
9618
9619 alloc_failed:
9620 MDI_DEBUG(1, (MDI_WARN, dip, "!unable to send sysevent"));
9621 }
9622
9623 char **
9624 mdi_get_phci_driver_list(char *vhci_class, int *ndrivers)
9625 {
9626 char **driver_list, **ret_driver_list = NULL;
9627 int *root_support_list;
9628 int cur_elements, max_elements;
9629
9630 get_phci_driver_list(vhci_class, &driver_list, &root_support_list,
9631 &cur_elements, &max_elements);
9632
9633
9634 if (driver_list) {
9635 kmem_free(root_support_list, sizeof (int) * max_elements);
9636 ret_driver_list = mdi_realloc(driver_list, sizeof (char *)
9637 * max_elements, sizeof (char *) * cur_elements);
9638 }
9639 *ndrivers = cur_elements;
9640
9641 return (ret_driver_list);
9642
9643 }
9644
9645 void
9646 mdi_free_phci_driver_list(char **driver_list, int ndrivers)
9647 {
9648 char **p;
9649 int i;
9650
9651 if (driver_list) {
9652 for (i = 0, p = driver_list; i < ndrivers; i++, p++)
9653 kmem_free(*p, strlen(*p) + 1);
9654 kmem_free(driver_list, sizeof (char *) * ndrivers);
9655 }
9656 }
9657
9658 /*
9659 * mdi_is_dev_supported():
9660 * function called by pHCI bus config operation to determine if a
9661 * device should be represented as a child of the vHCI or the
9662 * pHCI. This decision is made by the vHCI, using cinfo idenity
9663 * information passed by the pHCI - specifics of the cinfo
9664 * representation are by agreement between the pHCI and vHCI.
9665 * Return Values:
9666 * MDI_SUCCESS
9667 * MDI_FAILURE
9668 */
9669 int
9670 mdi_is_dev_supported(char *class, dev_info_t *pdip, void *cinfo)
9671 {
9672 mdi_vhci_t *vh;
9673
9674 ASSERT(class && pdip);
9675
9676 /*
9677 * For dev_supported, mdi_phci_register() must have established pdip as
9678 * a pHCI.
9679 *
9680 * NOTE: mdi_phci_register() does "mpxio-disable" processing, and
9681 * MDI_PHCI(pdip) will return false if mpxio is disabled.
9682 */
9683 if (!MDI_PHCI(pdip))
9684 return (MDI_FAILURE);
9685
9686 /* Return MDI_FAILURE if vHCI does not support asking the question. */
9687 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
9688 if ((vh == NULL) || (vh->vh_ops->vo_is_dev_supported == NULL)) {
9689 return (MDI_FAILURE);
9690 }
9691
9692 /* Return vHCI answer */
9693 return (vh->vh_ops->vo_is_dev_supported(vh->vh_dip, pdip, cinfo));
9694 }
9695
9696 int
9697 mdi_dc_return_dev_state(mdi_pathinfo_t *pip, struct devctl_iocdata *dcp)
9698 {
9699 uint_t devstate = 0;
9700 dev_info_t *cdip;
9701
9702 if ((pip == NULL) || (dcp == NULL))
9703 return (MDI_FAILURE);
9704
9705 cdip = mdi_pi_get_client(pip);
9706
9707 switch (mdi_pi_get_state(pip)) {
9708 case MDI_PATHINFO_STATE_INIT:
9709 devstate = DEVICE_DOWN;
9710 break;
9711 case MDI_PATHINFO_STATE_ONLINE:
9712 devstate = DEVICE_ONLINE;
9713 if ((cdip) && (devi_stillreferenced(cdip) == DEVI_REFERENCED))
9714 devstate |= DEVICE_BUSY;
9715 break;
9716 case MDI_PATHINFO_STATE_STANDBY:
9717 devstate = DEVICE_ONLINE;
9718 break;
9719 case MDI_PATHINFO_STATE_FAULT:
9720 devstate = DEVICE_DOWN;
9721 break;
9722 case MDI_PATHINFO_STATE_OFFLINE:
9723 devstate = DEVICE_OFFLINE;
9724 break;
9725 default:
9726 ASSERT(MDI_PI(pip)->pi_state);
9727 }
9728
9729 if (copyout(&devstate, dcp->cpyout_buf, sizeof (uint_t)) != 0)
9730 return (MDI_FAILURE);
9731
9732 return (MDI_SUCCESS);
9733 }