1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 #include <sys/mutex.h> 26 #include <sys/debug.h> 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/kmem.h> 30 #include <sys/thread.h> 31 #include <sys/id_space.h> 32 #include <sys/avl.h> 33 #include <sys/list.h> 34 #include <sys/sysmacros.h> 35 #include <sys/proc.h> 36 #include <sys/contract.h> 37 #include <sys/contract_impl.h> 38 #include <sys/contract/device.h> 39 #include <sys/contract/device_impl.h> 40 #include <sys/cmn_err.h> 41 #include <sys/nvpair.h> 42 #include <sys/policy.h> 43 #include <sys/ddi_impldefs.h> 44 #include <sys/ddi_implfuncs.h> 45 #include <sys/systm.h> 46 #include <sys/stat.h> 47 #include <sys/sunddi.h> 48 #include <sys/esunddi.h> 49 #include <sys/ddi.h> 50 #include <sys/fs/dv_node.h> 51 #include <sys/sunndi.h> 52 #undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ 53 54 /* 55 * Device Contracts 56 * ----------------- 57 * This file contains the core code for the device contracts framework. 58 * A device contract is an agreement or a contract between a process and 59 * the kernel regarding the state of the device. A device contract may be 60 * created when a relationship is formed between a device and a process 61 * i.e. at open(2) time, or it may be created at some point after the device 62 * has been opened. A device contract once formed may be broken by either party. 63 * A device contract can be broken by the process by an explicit abandon of the 64 * contract or by an implicit abandon when the process exits. A device contract 65 * can be broken by the kernel either asynchronously (without negotiation) or 66 * synchronously (with negotiation). Exactly which happens depends on the device 67 * state transition. The following state diagram shows the transitions between 68 * device states. Only device state transitions currently supported by device 69 * contracts is shown. 70 * 71 * <-- A --> 72 * /-----------------> DEGRADED 73 * | | 74 * | | 75 * | | S 76 * | | | 77 * | | v 78 * v S --> v 79 * ONLINE ------------> OFFLINE 80 * 81 * 82 * In the figure above, the arrows indicate the direction of transition. The 83 * letter S refers to transitions which are inherently synchronous i.e. 84 * require negotiation and the letter A indicates transitions which are 85 * asynchronous i.e. are done without contract negotiations. A good example 86 * of a synchronous transition is the ONLINE -> OFFLINE transition. This 87 * transition cannot happen as long as there are consumers which have the 88 * device open. Thus some form of negotiation needs to happen between the 89 * consumers and the kernel to ensure that consumers either close devices 90 * or disallow the move to OFFLINE. Certain other transitions such as 91 * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. 92 * non-negotiable. A device that suffers a fault that degrades its 93 * capabilities will become degraded irrespective of what consumers it has, 94 * so a negotiation in this case is pointless. 95 * 96 * The following device states are currently defined for device contracts: 97 * 98 * CT_DEV_EV_ONLINE 99 * The device is online and functioning normally 100 * CT_DEV_EV_DEGRADED 101 * The device is online but is functioning in a degraded capacity 102 * CT_DEV_EV_OFFLINE 103 * The device is offline and is no longer configured 104 * 105 * A typical consumer of device contracts starts out with a contract 106 * template and adds terms to that template. These include the 107 * "acceptable set" (A-set) term, which is a bitset of device states which 108 * are guaranteed by the contract. If the device moves out of a state in 109 * the A-set, the contract is broken. The breaking of the contract can 110 * be asynchronous in which case a critical contract event is sent to the 111 * contract holder but no negotiations take place. If the breaking of the 112 * contract is synchronous, negotations are opened between the affected 113 * consumer and the kernel. The kernel does this by sending a critical 114 * event to the consumer with the CTE_NEG flag set indicating that this 115 * is a negotiation event. The consumer can accept this change by sending 116 * a ACK message to the kernel. Alternatively, if it has the necessary 117 * privileges, it can send a NACK message to the kernel which will block 118 * the device state change. To NACK a negotiable event, a process must 119 * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 120 * 121 * Other terms include the "minor path" term, specified explicitly if the 122 * contract is not being created at open(2) time or specified implicitly 123 * if the contract is being created at open time via an activated template. 124 * 125 * A contract event is sent on any state change to which the contract 126 * owner has subscribed via the informative or critical event sets. Only 127 * critical events are guaranteed to be delivered. Since all device state 128 * changes are controlled by the kernel and cannot be arbitrarily generated 129 * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not 130 * need to be asserted in a process's effective set to designate an event as 131 * critical. To ensure privacy, a process must either have the same effective 132 * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege 133 * asserted in its effective set in order to observe device contract events 134 * off the device contract type specific endpoint. 135 * 136 * Yet another term available with device contracts is the "non-negotiable" 137 * term. This term is used to pre-specify a NACK to any contract negotiation. 138 * This term is ignored for asynchronous state changes. For example, a 139 * provcess may have the A-set {ONLINE|DEGRADED} and make the contract 140 * non-negotiable. In this case, the device contract framework assumes a 141 * NACK for any transition to OFFLINE and blocks the offline. If the A-set 142 * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE 143 * are NACKed but transitions to DEGRADE succeed. 144 * 145 * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) 146 * happens just before the I/O framework attempts to offline a device 147 * (i.e. detach a device and set the offline flag so that it cannot be 148 * reattached). A device contract holder is expected to either NACK the offline 149 * (if privileged) or release the device and allow the offline to proceed. 150 * 151 * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) 152 * is generated just before the I/O framework transitions the device state 153 * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). 154 * 155 * The contract holder is expected to ACK or NACK a negotiation event 156 * within a certain period of time. If the ACK/NACK is not received 157 * within the timeout period, the device contract framework will behave 158 * as if the contract does not exist and will proceed with the event. 159 * 160 * Unlike a process contract a device contract does not need to exist 161 * once it is abandoned, since it does not define a fault boundary. It 162 * merely represents an agreement between a process and the kernel 163 * regarding the state of the device. Once the process has abandoned 164 * the contract (either implicitly via a process exit or explicitly) 165 * the kernel has no reason to retain the contract. As a result 166 * device contracts are neither inheritable nor need to exist in an 167 * orphan state. 168 * 169 * A device unlike a process may exist in multiple contracts and has 170 * a "life" outside a device contract. A device unlike a process 171 * may exist without an associated contract. Unlike a process contract 172 * a device contract may be formed after a binding relationship is 173 * formed between a process and a device. 174 * 175 * IMPLEMENTATION NOTES 176 * ==================== 177 * DATA STRUCTURES 178 * ---------------- 179 * The heart of the device contracts implementation is the device contract 180 * private cont_device_t (or ctd for short) data structure. It encapsulates 181 * the generic contract_t data structure and has a number of private 182 * fields. 183 * These include: 184 * cond_minor: The minor device that is the subject of the contract 185 * cond_aset: The bitset of states which are guaranteed by the 186 * contract 187 * cond_noneg: If set, indicates that the result of negotiation has 188 * been predefined to be a NACK 189 * In addition, there are other device identifiers such the devinfo node, 190 * dev_t and spec_type of the minor node. There are also a few fields that 191 * are used during negotiation to maintain state. See 192 * uts/common/sys/contract/device_impl.h 193 * for details. 194 * The ctd structure represents the device private part of a contract of 195 * type "device" 196 * 197 * Another data structure used by device contracts is ctmpl_device. It is 198 * the device contracts private part of the contract template structure. It 199 * encapsulates the generic template structure "ct_template_t" and includes 200 * the following device contract specific fields 201 * ctd_aset: The bitset of states that should be guaranteed by a 202 * contract 203 * ctd_noneg: If set, indicates that contract should NACK a 204 * negotiation 205 * ctd_minor: The devfs_path (without the /devices prefix) of the 206 * minor node that is the subject of the contract. 207 * 208 * ALGORITHMS 209 * --------- 210 * There are three sets of routines in this file 211 * Template related routines 212 * ------------------------- 213 * These routines provide support for template related operations initated 214 * via the generic template operations. These include routines that dup 215 * a template, free it, and set various terms in the template 216 * (such as the minor node path, the acceptable state set (or A-set) 217 * and the non-negotiable term) as well as a routine to query the 218 * device specific portion of the template for the abovementioned terms. 219 * There is also a routine to create (ctmpl_device_create) that is used to 220 * create a contract from a template. This routine calls (after initial 221 * setup) the common function used to create a device contract 222 * (contract_device_create). 223 * 224 * core device contract implementation 225 * ---------------------------------- 226 * These routines support the generic contract framework to provide 227 * functionality that allows contracts to be created, managed and 228 * destroyed. The contract_device_create() routine is a routine used 229 * to create a contract from a template (either via an explicit create 230 * operation on a template or implicitly via an open with an 231 * activated template.). The contract_device_free() routine assists 232 * in freeing the device contract specific parts. There are routines 233 * used to abandon (contract_device_abandon) a device contract as well 234 * as a routine to destroy (which despite its name does not destroy, 235 * it only moves a contract to a dead state) a contract. 236 * There is also a routine to return status information about a 237 * contract - the level of detail depends on what is requested by the 238 * user. A value of CTD_FIXED only returns fixed length fields such 239 * as the A-set, state of device and value of the "noneg" term. If 240 * CTD_ALL is specified, the minor node path is returned as well. 241 * 242 * In addition there are interfaces (contract_device_ack/nack) which 243 * are used to support negotiation between userland processes and 244 * device contracts. These interfaces record the acknowledgement 245 * or lack thereof for negotiation events and help determine if the 246 * negotiated event should occur. 247 * 248 * "backend routines" 249 * ----------------- 250 * The backend routines form the interface between the I/O framework 251 * and the device contract subsystem. These routines, allow the I/O 252 * framework to call into the device contract subsystem to notify it of 253 * impending changes to a device state as well as to inform of the 254 * final disposition of such attempted state changes. Routines in this 255 * class include contract_device_offline() that indicates an attempt to 256 * offline a device, contract_device_degrade() that indicates that 257 * a device is moving to the degraded state and contract_device_negend() 258 * that is used by the I/O framework to inform the contracts subsystem of 259 * the final disposition of an attempted operation. 260 * 261 * SUMMARY 262 * ------- 263 * A contract starts its life as a template. A process allocates a device 264 * contract template and sets various terms: 265 * The A-set 266 * The device minor node 267 * Critical and informative events 268 * The noneg i.e. no negotition term 269 * Setting of these terms in the template is done via the 270 * ctmpl_device_set() entry point in this file. A process can query a 271 * template to determine the terms already set in the template - this is 272 * facilitated by the ctmpl_device_get() routine. 273 * 274 * Once all the appropriate terms are set, the contract is instantiated via 275 * one of two methods 276 * - via an explicit create operation - this is facilitated by the 277 * ctmpl_device_create() entry point 278 * - synchronously with the open(2) system call - this is achieved via the 279 * contract_device_open() routine. 280 * The core work for both these above functions is done by 281 * contract_device_create() 282 * 283 * A contract once created can be queried for its status. Support for 284 * status info is provided by both the common contracts framework and by 285 * the "device" contract type. If the level of detail requested is 286 * CTD_COMMON, only the common contract framework data is used. Higher 287 * levels of detail result in calls to contract_device_status() to supply 288 * device contract type specific status information. 289 * 290 * A contract once created may be abandoned either explicitly or implictly. 291 * In either case, the contract_device_abandon() function is invoked. This 292 * function merely calls contract_destroy() which moves the contract to 293 * the DEAD state. The device contract portion of destroy processing is 294 * provided by contract_device_destroy() which merely disassociates the 295 * contract from its device devinfo node. A contract in the DEAD state is 296 * not freed. It hanbgs around until all references to the contract are 297 * gone. When that happens, the contract is finally deallocated. The 298 * device contract specific portion of the free is done by 299 * contract_device_free() which finally frees the device contract specific 300 * data structure (cont_device_t). 301 * 302 * When a device undergoes a state change, the I/O framework calls the 303 * corresponding device contract entry point. For example, when a device 304 * is about to go OFFLINE, the routine contract_device_offline() is 305 * invoked. Similarly if a device moves to DEGRADED state, the routine 306 * contract_device_degrade() function is called. These functions call the 307 * core routine contract_device_publish(). This function determines via 308 * the function is_sync_neg() whether an event is a synchronous (i.e. 309 * negotiable) event or not. In the former case contract_device_publish() 310 * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs 311 * and/or NACKs from contract holders. In the latter case, it simply 312 * publishes the event and does not wait. In the negotiation case, ACKs or 313 * NACKs from userland consumers results in contract_device_ack_nack() 314 * being called where the result of the negotiation is recorded in the 315 * contract data structure. Once all outstanding contract owners have 316 * responded, the device contract code in wait_for_acks() determines the 317 * final result of the negotiation. A single NACK overrides all other ACKs 318 * If there is no NACK, then a single ACK will result in an overall ACK 319 * result. If there are no ACKs or NACKs, then the result CT_NONE is 320 * returned back to the I/O framework. Once the event is permitted or 321 * blocked, the I/O framework proceeds or aborts the state change. The 322 * I/O framework then calls contract_device_negend() with a result code 323 * indicating final disposition of the event. This call releases the 324 * barrier and other state associated with the previous negotiation, 325 * which permits the next event (if any) to come into the device contract 326 * framework. 327 * 328 * Finally, a device that has outstanding contracts may be removed from 329 * the system which results in its devinfo node being freed. The devinfo 330 * free routine in the I/O framework, calls into the device contract 331 * function - contract_device_remove_dip(). This routine, disassociates 332 * the dip from all contracts associated with the contract being freed, 333 * allowing the devinfo node to be freed. 334 * 335 * LOCKING 336 * --------- 337 * There are four sets of data that need to be protected by locks 338 * 339 * i) device contract specific portion of the contract template - This data 340 * is protected by the template lock ctmpl_lock. 341 * 342 * ii) device contract specific portion of the contract - This data is 343 * protected by the contract lock ct_lock 344 * 345 * iii) The linked list of contracts hanging off a devinfo node - This 346 * list is protected by the per-devinfo node lock devi_ct_lock 347 * 348 * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv 349 * and devi_ct_count that controls state changes to a dip 350 * 351 * The template lock is independent in that none of the other locks in this 352 * file may be taken while holding the template lock (and vice versa). 353 * 354 * The remaining three locks have the following lock order 355 * 356 * devi_ct_lock -> ct_count barrier -> ct_lock 357 * 358 */ 359 360 static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, 361 int spec_type, proc_t *owner, int *errorp); 362 363 /* barrier routines */ 364 static void ct_barrier_acquire(dev_info_t *dip); 365 static void ct_barrier_release(dev_info_t *dip); 366 static int ct_barrier_held(dev_info_t *dip); 367 static int ct_barrier_empty(dev_info_t *dip); 368 static void ct_barrier_wait_for_release(dev_info_t *dip); 369 static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); 370 static void ct_barrier_decr(dev_info_t *dip); 371 static void ct_barrier_incr(dev_info_t *dip); 372 373 ct_type_t *device_type; 374 375 /* 376 * Macro predicates for determining when events should be sent and how. 377 */ 378 #define EVSENDP(ctd, flag) \ 379 ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) 380 381 #define EVINFOP(ctd, flag) \ 382 ((ctd->cond_contract.ct_ev_crit & flag) == 0) 383 384 /* 385 * State transition table showing which transitions are synchronous and which 386 * are not. 387 */ 388 struct ct_dev_negtable { 389 uint_t st_old; 390 uint_t st_new; 391 uint_t st_neg; 392 } ct_dev_negtable[] = { 393 {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, 394 {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, 395 {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, 396 {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, 397 {0} 398 }; 399 400 /* 401 * Device contract template implementation 402 */ 403 404 /* 405 * ctmpl_device_dup 406 * 407 * The device contract template dup entry point. 408 * This simply copies all the fields (generic as well as device contract 409 * specific) fields of the original. 410 */ 411 static struct ct_template * 412 ctmpl_device_dup(struct ct_template *template) 413 { 414 ctmpl_device_t *new; 415 ctmpl_device_t *old = template->ctmpl_data; 416 char *buf; 417 char *minor; 418 419 new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 420 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 421 422 /* 423 * copy generic fields. 424 * ctmpl_copy returns with old template lock held 425 */ 426 ctmpl_copy(&new->ctd_ctmpl, template); 427 428 new->ctd_ctmpl.ctmpl_data = new; 429 new->ctd_aset = old->ctd_aset; 430 new->ctd_minor = NULL; 431 new->ctd_noneg = old->ctd_noneg; 432 433 if (old->ctd_minor) { 434 ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); 435 bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); 436 } else { 437 kmem_free(buf, MAXPATHLEN); 438 buf = NULL; 439 } 440 441 mutex_exit(&template->ctmpl_lock); 442 if (buf) { 443 minor = i_ddi_strdup(buf, KM_SLEEP); 444 kmem_free(buf, MAXPATHLEN); 445 buf = NULL; 446 } else { 447 minor = NULL; 448 } 449 mutex_enter(&template->ctmpl_lock); 450 451 if (minor) { 452 new->ctd_minor = minor; 453 } 454 455 ASSERT(buf == NULL); 456 return (&new->ctd_ctmpl); 457 } 458 459 /* 460 * ctmpl_device_free 461 * 462 * The device contract template free entry point. Just 463 * frees the template. 464 */ 465 static void 466 ctmpl_device_free(struct ct_template *template) 467 { 468 ctmpl_device_t *dtmpl = template->ctmpl_data; 469 470 if (dtmpl->ctd_minor) 471 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 472 473 kmem_free(dtmpl, sizeof (ctmpl_device_t)); 474 } 475 476 /* 477 * SAFE_EV is the set of events which a non-privileged process is 478 * allowed to make critical. An unprivileged device contract owner has 479 * no control over when a device changes state, so all device events 480 * can be in the critical set. 481 * 482 * EXCESS tells us if "value", a critical event set, requires 483 * additional privilege. For device contracts EXCESS currently 484 * evaluates to 0. 485 */ 486 #define SAFE_EV (CT_DEV_ALLEVENT) 487 #define EXCESS(value) ((value) & ~SAFE_EV) 488 489 490 /* 491 * ctmpl_device_set 492 * 493 * The device contract template set entry point. Sets various terms in the 494 * template. The non-negotiable term can only be set if the process has 495 * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 496 */ 497 static int 498 ctmpl_device_set(struct ct_template *tmpl, ct_kparam_t *kparam, 499 const cred_t *cr) 500 { 501 ctmpl_device_t *dtmpl = tmpl->ctmpl_data; 502 ct_param_t *param = &kparam->param; 503 int error; 504 dev_info_t *dip; 505 int spec_type; 506 uint64_t param_value; 507 char *str_value; 508 509 ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); 510 511 if (param->ctpm_id == CTDP_MINOR) { 512 str_value = (char *)kparam->ctpm_kbuf; 513 str_value[param->ctpm_size - 1] = '\0'; 514 } else { 515 if (param->ctpm_size < sizeof (uint64_t)) 516 return (EINVAL); 517 param_value = *(uint64_t *)kparam->ctpm_kbuf; 518 } 519 520 switch (param->ctpm_id) { 521 case CTDP_ACCEPT: 522 if (param_value & ~CT_DEV_ALLEVENT) 523 return (EINVAL); 524 if (param_value == 0) 525 return (EINVAL); 526 if (param_value == CT_DEV_ALLEVENT) 527 return (EINVAL); 528 529 dtmpl->ctd_aset = param_value; 530 break; 531 case CTDP_NONEG: 532 if (param_value != CTDP_NONEG_SET && 533 param_value != CTDP_NONEG_CLEAR) 534 return (EINVAL); 535 536 /* 537 * only privileged processes can designate a contract 538 * non-negotiatble. 539 */ 540 if (param_value == CTDP_NONEG_SET && 541 (error = secpolicy_sys_devices(cr)) != 0) { 542 return (error); 543 } 544 545 dtmpl->ctd_noneg = param_value; 546 break; 547 548 case CTDP_MINOR: 549 if (*str_value != '/' || 550 strncmp(str_value, "/devices/", 551 strlen("/devices/")) == 0 || 552 strstr(str_value, "../devices/") != NULL || 553 strchr(str_value, ':') == NULL) { 554 return (EINVAL); 555 } 556 557 spec_type = 0; 558 dip = NULL; 559 if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) { 560 return (ERANGE); 561 } 562 ddi_release_devi(dip); 563 564 if (spec_type != S_IFCHR && spec_type != S_IFBLK) { 565 return (EINVAL); 566 } 567 568 if (dtmpl->ctd_minor != NULL) { 569 kmem_free(dtmpl->ctd_minor, 570 strlen(dtmpl->ctd_minor) + 1); 571 } 572 dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP); 573 break; 574 case CTP_EV_CRITICAL: 575 /* 576 * Currently for device contracts, any event 577 * may be added to the critical set. We retain the 578 * following code however for future enhancements. 579 */ 580 if (EXCESS(param_value) && 581 (error = secpolicy_contract_event(cr)) != 0) 582 return (error); 583 tmpl->ctmpl_ev_crit = param_value; 584 break; 585 default: 586 return (EINVAL); 587 } 588 589 return (0); 590 } 591 592 /* 593 * ctmpl_device_get 594 * 595 * The device contract template get entry point. Simply fetches and 596 * returns the value of the requested term. 597 */ 598 static int 599 ctmpl_device_get(struct ct_template *template, ct_kparam_t *kparam) 600 { 601 ctmpl_device_t *dtmpl = template->ctmpl_data; 602 ct_param_t *param = &kparam->param; 603 uint64_t *param_value = kparam->ctpm_kbuf; 604 605 ASSERT(MUTEX_HELD(&template->ctmpl_lock)); 606 607 if (param->ctpm_id == CTDP_ACCEPT || 608 param->ctpm_id == CTDP_NONEG) { 609 if (param->ctpm_size < sizeof (uint64_t)) 610 return (EINVAL); 611 kparam->ret_size = sizeof (uint64_t); 612 } 613 614 switch (param->ctpm_id) { 615 case CTDP_ACCEPT: 616 *param_value = dtmpl->ctd_aset; 617 break; 618 case CTDP_NONEG: 619 *param_value = dtmpl->ctd_noneg; 620 break; 621 case CTDP_MINOR: 622 if (dtmpl->ctd_minor) { 623 kparam->ret_size = strlcpy((char *)kparam->ctpm_kbuf, 624 dtmpl->ctd_minor, param->ctpm_size); 625 kparam->ret_size++; 626 } else { 627 return (ENOENT); 628 } 629 break; 630 default: 631 return (EINVAL); 632 } 633 634 return (0); 635 } 636 637 /* 638 * Device contract type specific portion of creating a contract using 639 * a specified template 640 */ 641 /*ARGSUSED*/ 642 int 643 ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) 644 { 645 ctmpl_device_t *dtmpl; 646 char *buf; 647 dev_t dev; 648 int spec_type; 649 int error; 650 cont_device_t *ctd; 651 652 if (ctidp == NULL) 653 return (EINVAL); 654 655 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 656 657 dtmpl = template->ctmpl_data; 658 659 mutex_enter(&template->ctmpl_lock); 660 if (dtmpl->ctd_minor == NULL) { 661 /* incomplete template */ 662 mutex_exit(&template->ctmpl_lock); 663 kmem_free(buf, MAXPATHLEN); 664 return (EINVAL); 665 } else { 666 ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 667 bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); 668 } 669 mutex_exit(&template->ctmpl_lock); 670 671 spec_type = 0; 672 dev = NODEV; 673 if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || 674 dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || 675 (spec_type != S_IFCHR && spec_type != S_IFBLK)) { 676 CT_DEBUG((CE_WARN, 677 "tmpl_create: failed to find device: %s", buf)); 678 kmem_free(buf, MAXPATHLEN); 679 return (ERANGE); 680 } 681 kmem_free(buf, MAXPATHLEN); 682 683 ctd = contract_device_create(template->ctmpl_data, 684 dev, spec_type, curproc, &error); 685 686 if (ctd == NULL) { 687 CT_DEBUG((CE_WARN, "Failed to create device contract for " 688 "process (%d) with device (devt = %lu, spec_type = %s)", 689 curproc->p_pid, dev, 690 spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); 691 return (error); 692 } 693 694 mutex_enter(&ctd->cond_contract.ct_lock); 695 *ctidp = ctd->cond_contract.ct_id; 696 mutex_exit(&ctd->cond_contract.ct_lock); 697 698 return (0); 699 } 700 701 /* 702 * Device contract specific template entry points 703 */ 704 static ctmplops_t ctmpl_device_ops = { 705 ctmpl_device_dup, /* ctop_dup */ 706 ctmpl_device_free, /* ctop_free */ 707 ctmpl_device_set, /* ctop_set */ 708 ctmpl_device_get, /* ctop_get */ 709 ctmpl_device_create, /* ctop_create */ 710 CT_DEV_ALLEVENT /* all device events bitmask */ 711 }; 712 713 714 /* 715 * Device contract implementation 716 */ 717 718 /* 719 * contract_device_default 720 * 721 * The device contract default template entry point. Creates a 722 * device contract template with a default A-set and no "noneg" , 723 * with informative degrade events and critical offline events. 724 * There is no default minor path. 725 */ 726 static ct_template_t * 727 contract_device_default(void) 728 { 729 ctmpl_device_t *new; 730 731 new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 732 ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); 733 734 new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; 735 new->ctd_noneg = 0; 736 new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; 737 new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; 738 739 return (&new->ctd_ctmpl); 740 } 741 742 /* 743 * contract_device_free 744 * 745 * Destroys the device contract specific portion of a contract and 746 * frees the contract. 747 */ 748 static void 749 contract_device_free(contract_t *ct) 750 { 751 cont_device_t *ctd = ct->ct_data; 752 753 ASSERT(ctd->cond_minor); 754 ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 755 kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); 756 757 ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && 758 ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); 759 760 ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); 761 762 ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); 763 ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); 764 765 ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); 766 ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); 767 768 ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); 769 ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); 770 771 ASSERT(!list_link_active(&ctd->cond_next)); 772 773 kmem_free(ctd, sizeof (cont_device_t)); 774 } 775 776 /* 777 * contract_device_abandon 778 * 779 * The device contract abandon entry point. 780 */ 781 static void 782 contract_device_abandon(contract_t *ct) 783 { 784 ASSERT(MUTEX_HELD(&ct->ct_lock)); 785 786 /* 787 * device contracts cannot be inherited or orphaned. 788 * Move the contract to the DEAD_STATE. It will be freed 789 * once all references to it are gone. 790 */ 791 contract_destroy(ct); 792 } 793 794 /* 795 * contract_device_destroy 796 * 797 * The device contract destroy entry point. 798 * Called from contract_destroy() to do any type specific destroy. Note 799 * that destroy is a misnomer - this does not free the contract, it only 800 * moves it to the dead state. A contract is actually freed via 801 * contract_rele() -> contract_dtor(), contop_free() 802 */ 803 static void 804 contract_device_destroy(contract_t *ct) 805 { 806 cont_device_t *ctd; 807 dev_info_t *dip; 808 809 ASSERT(MUTEX_HELD(&ct->ct_lock)); 810 811 for (;;) { 812 ctd = ct->ct_data; 813 dip = ctd->cond_dip; 814 if (dip == NULL) { 815 /* 816 * The dip has been removed, this is a dangling contract 817 * Check that dip linkages are NULL 818 */ 819 ASSERT(!list_link_active(&ctd->cond_next)); 820 CT_DEBUG((CE_NOTE, "contract_device_destroy:" 821 " contract has no devinfo node. contract ctid : %d", 822 ct->ct_id)); 823 return; 824 } 825 826 /* 827 * The intended lock order is : devi_ct_lock -> ct_count 828 * barrier -> ct_lock. 829 * However we can't do this here as dropping the ct_lock allows 830 * a race condition with i_ddi_free_node()/ 831 * contract_device_remove_dip() which may free off dip before 832 * we can take devi_ct_lock. So use mutex_tryenter to avoid 833 * dropping ct_lock until we have acquired devi_ct_lock. 834 */ 835 if (mutex_tryenter(&(DEVI(dip)->devi_ct_lock)) != 0) 836 break; 837 mutex_exit(&ct->ct_lock); 838 delay(drv_usectohz(1000)); 839 mutex_enter(&ct->ct_lock); 840 } 841 mutex_exit(&ct->ct_lock); 842 843 /* 844 * Waiting for the barrier to be released is strictly speaking not 845 * necessary. But it simplifies the implementation of 846 * contract_device_publish() by establishing the invariant that 847 * device contracts cannot go away during negotiation. 848 */ 849 ct_barrier_wait_for_release(dip); 850 mutex_enter(&ct->ct_lock); 851 852 list_remove(&(DEVI(dip)->devi_ct), ctd); 853 ctd->cond_dip = NULL; /* no longer linked to dip */ 854 contract_rele(ct); /* remove hold for dip linkage */ 855 856 mutex_exit(&ct->ct_lock); 857 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 858 mutex_enter(&ct->ct_lock); 859 } 860 861 /* 862 * contract_device_status 863 * 864 * The device contract status entry point. Called when level of "detail" 865 * is either CTD_FIXED or CTD_ALL 866 * 867 */ 868 static void 869 contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, 870 void *status, model_t model) 871 { 872 cont_device_t *ctd = ct->ct_data; 873 874 ASSERT(detail == CTD_FIXED || detail == CTD_ALL); 875 876 mutex_enter(&ct->ct_lock); 877 contract_status_common(ct, zone, status, model); 878 879 /* 880 * There's no need to hold the contract lock while accessing static 881 * data like aset or noneg. But since we need the lock to access other 882 * data like state, we hold it anyway. 883 */ 884 VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); 885 VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); 886 VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); 887 888 if (detail == CTD_FIXED) { 889 mutex_exit(&ct->ct_lock); 890 return; 891 } 892 893 ASSERT(ctd->cond_minor); 894 VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); 895 896 mutex_exit(&ct->ct_lock); 897 } 898 899 /* 900 * Converts a result integer into the corresponding string. Used for printing 901 * messages 902 */ 903 static char * 904 result_str(uint_t result) 905 { 906 switch (result) { 907 case CT_ACK: 908 return ("CT_ACK"); 909 case CT_NACK: 910 return ("CT_NACK"); 911 case CT_NONE: 912 return ("CT_NONE"); 913 default: 914 return ("UNKNOWN"); 915 } 916 } 917 918 /* 919 * Converts a device state integer constant into the corresponding string. 920 * Used to print messages. 921 */ 922 static char * 923 state_str(uint_t state) 924 { 925 switch (state) { 926 case CT_DEV_EV_ONLINE: 927 return ("ONLINE"); 928 case CT_DEV_EV_DEGRADED: 929 return ("DEGRADED"); 930 case CT_DEV_EV_OFFLINE: 931 return ("OFFLINE"); 932 default: 933 return ("UNKNOWN"); 934 } 935 } 936 937 /* 938 * Routine that determines if a particular CT_DEV_EV_? event corresponds to a 939 * synchronous state change or not. 940 */ 941 static int 942 is_sync_neg(uint_t old, uint_t new) 943 { 944 int i; 945 946 ASSERT(old & CT_DEV_ALLEVENT); 947 ASSERT(new & CT_DEV_ALLEVENT); 948 949 if (old == new) { 950 CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", 951 state_str(new))); 952 return (-2); 953 } 954 955 for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { 956 if (old == ct_dev_negtable[i].st_old && 957 new == ct_dev_negtable[i].st_new) { 958 return (ct_dev_negtable[i].st_neg); 959 } 960 } 961 962 CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " 963 "old = %s -> new = %s", state_str(old), state_str(new))); 964 965 return (-1); 966 } 967 968 /* 969 * Used to cleanup cached dv_nodes so that when a device is released by 970 * a contract holder, its devinfo node can be successfully detached. 971 */ 972 static int 973 contract_device_dvclean(dev_info_t *dip) 974 { 975 char *devnm; 976 dev_info_t *pdip; 977 978 ASSERT(dip); 979 980 /* pdip can be NULL if we have contracts against the root dip */ 981 pdip = ddi_get_parent(dip); 982 983 if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { 984 char *path; 985 986 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 987 (void) ddi_pathname(dip, path); 988 CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " 989 "device=%s", path)); 990 kmem_free(path, MAXPATHLEN); 991 return (EDEADLOCK); 992 } 993 994 if (pdip) { 995 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 996 (void) ddi_deviname(dip, devnm); 997 (void) devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); 998 kmem_free(devnm, MAXNAMELEN + 1); 999 } else { 1000 (void) devfs_clean(dip, NULL, DV_CLEAN_FORCE); 1001 } 1002 1003 return (0); 1004 } 1005 1006 /* 1007 * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. 1008 * Results in the ACK or NACK being recorded on the dip for one particular 1009 * contract. The device contracts framework evaluates the ACK/NACKs for all 1010 * contracts against a device to determine if a particular device state change 1011 * should be allowed. 1012 */ 1013 static int 1014 contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, 1015 uint_t cmd) 1016 { 1017 cont_device_t *ctd = ct->ct_data; 1018 dev_info_t *dip; 1019 ctid_t ctid; 1020 int error; 1021 1022 ctid = ct->ct_id; 1023 1024 CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); 1025 1026 mutex_enter(&ct->ct_lock); 1027 CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); 1028 1029 dip = ctd->cond_dip; 1030 1031 ASSERT(ctd->cond_minor); 1032 ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 1033 1034 /* 1035 * Negotiation only if new state is not in A-set 1036 */ 1037 ASSERT(!(ctd->cond_aset & evtype)); 1038 1039 /* 1040 * Negotiation only if transition is synchronous 1041 */ 1042 ASSERT(is_sync_neg(ctd->cond_state, evtype)); 1043 1044 /* 1045 * We shouldn't be negotiating if the "noneg" flag is set 1046 */ 1047 ASSERT(!ctd->cond_noneg); 1048 1049 if (dip) 1050 ndi_hold_devi(dip); 1051 1052 mutex_exit(&ct->ct_lock); 1053 1054 /* 1055 * dv_clean only if !NACK and offline state change 1056 */ 1057 if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { 1058 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); 1059 error = contract_device_dvclean(dip); 1060 if (error != 0) { 1061 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", 1062 ctid)); 1063 ddi_release_devi(dip); 1064 } 1065 } 1066 1067 mutex_enter(&ct->ct_lock); 1068 1069 if (dip) 1070 ddi_release_devi(dip); 1071 1072 if (dip == NULL) { 1073 if (ctd->cond_currev_id != evid) { 1074 CT_DEBUG((CE_WARN, "%sACK for non-current event " 1075 "(type=%s, id=%llu) on removed device", 1076 cmd == CT_NACK ? "N" : "", 1077 state_str(evtype), (unsigned long long)evid)); 1078 CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", 1079 ctid)); 1080 } else { 1081 ASSERT(ctd->cond_currev_type == evtype); 1082 CT_DEBUG((CE_WARN, "contract_ack: no such device: " 1083 "ctid: %d", ctid)); 1084 } 1085 error = (ct->ct_state == CTS_DEAD) ? ESRCH : 1086 ((cmd == CT_NACK) ? ETIMEDOUT : 0); 1087 mutex_exit(&ct->ct_lock); 1088 return (error); 1089 } 1090 1091 /* 1092 * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock 1093 */ 1094 mutex_exit(&ct->ct_lock); 1095 1096 mutex_enter(&DEVI(dip)->devi_ct_lock); 1097 mutex_enter(&ct->ct_lock); 1098 if (ctd->cond_currev_id != evid) { 1099 char *buf; 1100 mutex_exit(&ct->ct_lock); 1101 mutex_exit(&DEVI(dip)->devi_ct_lock); 1102 ndi_hold_devi(dip); 1103 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1104 (void) ddi_pathname(dip, buf); 1105 ddi_release_devi(dip); 1106 CT_DEBUG((CE_WARN, "%sACK for non-current event" 1107 "(type=%s, id=%llu) on device %s", 1108 cmd == CT_NACK ? "N" : "", 1109 state_str(evtype), (unsigned long long)evid, buf)); 1110 kmem_free(buf, MAXPATHLEN); 1111 CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", 1112 cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); 1113 return (cmd == CT_ACK ? 0 : ETIMEDOUT); 1114 } 1115 1116 ASSERT(ctd->cond_currev_type == evtype); 1117 ASSERT(cmd == CT_ACK || cmd == CT_NACK); 1118 1119 CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", 1120 cmd == CT_NACK ? "N" : "", ctid)); 1121 1122 ctd->cond_currev_ack = cmd; 1123 mutex_exit(&ct->ct_lock); 1124 1125 ct_barrier_decr(dip); 1126 mutex_exit(&DEVI(dip)->devi_ct_lock); 1127 1128 CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); 1129 1130 return (0); 1131 } 1132 1133 /* 1134 * Invoked when a userland contract holder approves (i.e. ACKs) a state change 1135 */ 1136 static int 1137 contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) 1138 { 1139 return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); 1140 } 1141 1142 /* 1143 * Invoked when a userland contract holder blocks (i.e. NACKs) a state change 1144 */ 1145 static int 1146 contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) 1147 { 1148 return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); 1149 } 1150 1151 /* 1152 * Creates a new contract synchronously with the breaking of an existing 1153 * contract. Currently not supported. 1154 */ 1155 /*ARGSUSED*/ 1156 static int 1157 contract_device_newct(contract_t *ct) 1158 { 1159 return (ENOTSUP); 1160 } 1161 1162 /* 1163 * Core device contract implementation entry points 1164 */ 1165 static contops_t contract_device_ops = { 1166 contract_device_free, /* contop_free */ 1167 contract_device_abandon, /* contop_abandon */ 1168 contract_device_destroy, /* contop_destroy */ 1169 contract_device_status, /* contop_status */ 1170 contract_device_ack, /* contop_ack */ 1171 contract_device_nack, /* contop_nack */ 1172 contract_qack_notsup, /* contop_qack */ 1173 contract_device_newct /* contop_newct */ 1174 }; 1175 1176 /* 1177 * contract_device_init 1178 * 1179 * Initializes the device contract type. 1180 */ 1181 void 1182 contract_device_init(void) 1183 { 1184 device_type = contract_type_init(CTT_DEVICE, "device", 1185 &contract_device_ops, contract_device_default); 1186 } 1187 1188 /* 1189 * contract_device_create 1190 * 1191 * create a device contract given template "tmpl" and the "owner" process. 1192 * May fail and return NULL if project.max-contracts would have been exceeded. 1193 * 1194 * Common device contract creation routine called for both open-time and 1195 * non-open time device contract creation 1196 */ 1197 static cont_device_t * 1198 contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type, 1199 proc_t *owner, int *errorp) 1200 { 1201 cont_device_t *ctd; 1202 char *minor; 1203 char *path; 1204 dev_info_t *dip; 1205 1206 ASSERT(dtmpl != NULL); 1207 ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE); 1208 ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK); 1209 ASSERT(errorp); 1210 1211 *errorp = 0; 1212 1213 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1214 1215 mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 1216 ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 1217 bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1); 1218 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1219 1220 dip = e_ddi_hold_devi_by_path(path, 0); 1221 if (dip == NULL) { 1222 cmn_err(CE_WARN, "contract_create: Cannot find devinfo node " 1223 "for device path (%s)", path); 1224 kmem_free(path, MAXPATHLEN); 1225 *errorp = ERANGE; 1226 return (NULL); 1227 } 1228 1229 /* 1230 * Lock out any parallel contract negotiations 1231 */ 1232 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1233 ct_barrier_acquire(dip); 1234 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1235 1236 minor = i_ddi_strdup(path, KM_SLEEP); 1237 kmem_free(path, MAXPATHLEN); 1238 1239 (void) contract_type_pbundle(device_type, owner); 1240 1241 ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP); 1242 1243 /* 1244 * Only we hold a refernce to this contract. Safe to access 1245 * the fields without a ct_lock 1246 */ 1247 ctd->cond_minor = minor; 1248 /* 1249 * It is safe to set the dip pointer in the contract 1250 * as the contract will always be destroyed before the dip 1251 * is released 1252 */ 1253 ctd->cond_dip = dip; 1254 ctd->cond_devt = dev; 1255 ctd->cond_spec = spec_type; 1256 1257 /* 1258 * Since we are able to lookup the device, it is either 1259 * online or degraded 1260 */ 1261 ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ? 1262 CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE; 1263 1264 mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock); 1265 ctd->cond_aset = dtmpl->ctd_aset; 1266 ctd->cond_noneg = dtmpl->ctd_noneg; 1267 1268 /* 1269 * contract_ctor() initailizes the common portion of a contract 1270 * contract_dtor() destroys the common portion of a contract 1271 */ 1272 if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl, 1273 ctd, 0, owner, B_TRUE)) { 1274 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1275 /* 1276 * contract_device_free() destroys the type specific 1277 * portion of a contract and frees the contract. 1278 * The "minor" path and "cred" is a part of the type specific 1279 * portion of the contract and will be freed by 1280 * contract_device_free() 1281 */ 1282 contract_device_free(&ctd->cond_contract); 1283 1284 /* release barrier */ 1285 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1286 ct_barrier_release(dip); 1287 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1288 1289 ddi_release_devi(dip); 1290 *errorp = EAGAIN; 1291 return (NULL); 1292 } 1293 mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock); 1294 1295 mutex_enter(&ctd->cond_contract.ct_lock); 1296 ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME; 1297 ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME; 1298 ctd->cond_contract.ct_ntime.ctm_start = -1; 1299 ctd->cond_contract.ct_qtime.ctm_start = -1; 1300 mutex_exit(&ctd->cond_contract.ct_lock); 1301 1302 /* 1303 * Insert device contract into list hanging off the dip 1304 * Bump up the ref-count on the contract to reflect this 1305 */ 1306 contract_hold(&ctd->cond_contract); 1307 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1308 list_insert_tail(&(DEVI(dip)->devi_ct), ctd); 1309 1310 /* release barrier */ 1311 ct_barrier_release(dip); 1312 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1313 1314 ddi_release_devi(dip); 1315 1316 return (ctd); 1317 } 1318 1319 /* 1320 * Called when a device is successfully opened to create an open-time contract 1321 * i.e. synchronously with a device open. 1322 */ 1323 int 1324 contract_device_open(dev_t dev, int spec_type, contract_t **ctpp) 1325 { 1326 ctmpl_device_t *dtmpl; 1327 ct_template_t *tmpl; 1328 cont_device_t *ctd; 1329 char *path; 1330 klwp_t *lwp; 1331 int error; 1332 1333 if (ctpp) 1334 *ctpp = NULL; 1335 1336 /* 1337 * Check if we are in user-context i.e. if we have an lwp 1338 */ 1339 lwp = ttolwp(curthread); 1340 if (lwp == NULL) { 1341 CT_DEBUG((CE_NOTE, "contract_open: Not user-context")); 1342 return (0); 1343 } 1344 1345 tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]); 1346 if (tmpl == NULL) { 1347 return (0); 1348 } 1349 dtmpl = tmpl->ctmpl_data; 1350 1351 /* 1352 * If the user set a minor path in the template before an open, 1353 * ignore it. We use the minor path of the actual minor opened. 1354 */ 1355 mutex_enter(&tmpl->ctmpl_lock); 1356 if (dtmpl->ctd_minor != NULL) { 1357 CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: " 1358 "ignoring device minor path in active template: %s", 1359 curproc->p_pid, dtmpl->ctd_minor)); 1360 /* 1361 * This is a copy of the actual activated template. 1362 * Safe to make changes such as freeing the minor 1363 * path in the template. 1364 */ 1365 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 1366 dtmpl->ctd_minor = NULL; 1367 } 1368 mutex_exit(&tmpl->ctmpl_lock); 1369 1370 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1371 1372 if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) { 1373 CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive " 1374 "minor path from dev_t,spec {%lu, %d} for process (%d)", 1375 dev, spec_type, curproc->p_pid)); 1376 ctmpl_free(tmpl); 1377 kmem_free(path, MAXPATHLEN); 1378 return (1); 1379 } 1380 1381 mutex_enter(&tmpl->ctmpl_lock); 1382 ASSERT(dtmpl->ctd_minor == NULL); 1383 dtmpl->ctd_minor = path; 1384 mutex_exit(&tmpl->ctmpl_lock); 1385 1386 ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error); 1387 1388 mutex_enter(&tmpl->ctmpl_lock); 1389 ASSERT(dtmpl->ctd_minor); 1390 dtmpl->ctd_minor = NULL; 1391 mutex_exit(&tmpl->ctmpl_lock); 1392 ctmpl_free(tmpl); 1393 kmem_free(path, MAXPATHLEN); 1394 1395 if (ctd == NULL) { 1396 cmn_err(CE_NOTE, "contract_device_open(): Failed to " 1397 "create device contract for process (%d) holding " 1398 "device (devt = %lu, spec_type = %d)", 1399 curproc->p_pid, dev, spec_type); 1400 return (1); 1401 } 1402 1403 if (ctpp) { 1404 mutex_enter(&ctd->cond_contract.ct_lock); 1405 *ctpp = &ctd->cond_contract; 1406 mutex_exit(&ctd->cond_contract.ct_lock); 1407 } 1408 return (0); 1409 } 1410 1411 /* 1412 * Called during contract negotiation by the device contract framework to wait 1413 * for ACKs or NACKs from contract holders. If all responses are not received 1414 * before a specified timeout, this routine times out. 1415 */ 1416 static uint_t 1417 wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype) 1418 { 1419 cont_device_t *ctd; 1420 int timed_out = 0; 1421 int result = CT_NONE; 1422 int ack; 1423 char *f = "wait_for_acks"; 1424 1425 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 1426 ASSERT(dip); 1427 ASSERT(evtype & CT_DEV_ALLEVENT); 1428 ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 1429 ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 1430 (spec_type == S_IFBLK || spec_type == S_IFCHR)); 1431 1432 CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip)); 1433 1434 if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) { 1435 /* 1436 * some contract owner(s) didn't respond in time 1437 */ 1438 CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip)); 1439 timed_out = 1; 1440 } 1441 1442 ack = 0; 1443 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1444 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1445 1446 mutex_enter(&ctd->cond_contract.ct_lock); 1447 1448 ASSERT(ctd->cond_dip == dip); 1449 1450 if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 1451 mutex_exit(&ctd->cond_contract.ct_lock); 1452 continue; 1453 } 1454 if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 1455 mutex_exit(&ctd->cond_contract.ct_lock); 1456 continue; 1457 } 1458 1459 /* skip if non-negotiable contract */ 1460 if (ctd->cond_noneg) { 1461 mutex_exit(&ctd->cond_contract.ct_lock); 1462 continue; 1463 } 1464 1465 ASSERT(ctd->cond_currev_type == evtype); 1466 if (ctd->cond_currev_ack == CT_NACK) { 1467 CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p", 1468 f, (void *)dip)); 1469 mutex_exit(&ctd->cond_contract.ct_lock); 1470 return (CT_NACK); 1471 } else if (ctd->cond_currev_ack == CT_ACK) { 1472 ack = 1; 1473 CT_DEBUG((CE_NOTE, "%s: found a ACK: %p", 1474 f, (void *)dip)); 1475 } 1476 mutex_exit(&ctd->cond_contract.ct_lock); 1477 } 1478 1479 if (ack) { 1480 result = CT_ACK; 1481 CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip)); 1482 } else if (timed_out) { 1483 result = CT_NONE; 1484 CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p", 1485 f, (void *)dip)); 1486 } else { 1487 CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p", 1488 f, (void *)dip)); 1489 } 1490 1491 1492 return (result); 1493 } 1494 1495 /* 1496 * Determines the current state of a device (i.e a devinfo node 1497 */ 1498 static int 1499 get_state(dev_info_t *dip) 1500 { 1501 if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip)) 1502 return (CT_DEV_EV_OFFLINE); 1503 else if (DEVI_IS_DEVICE_DEGRADED(dip)) 1504 return (CT_DEV_EV_DEGRADED); 1505 else 1506 return (CT_DEV_EV_ONLINE); 1507 } 1508 1509 /* 1510 * Sets the current state of a device in a device contract 1511 */ 1512 static void 1513 set_cond_state(dev_info_t *dip) 1514 { 1515 uint_t state = get_state(dip); 1516 cont_device_t *ctd; 1517 1518 /* verify that barrier is held */ 1519 ASSERT(ct_barrier_held(dip)); 1520 1521 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1522 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1523 mutex_enter(&ctd->cond_contract.ct_lock); 1524 ASSERT(ctd->cond_dip == dip); 1525 ctd->cond_state = state; 1526 mutex_exit(&ctd->cond_contract.ct_lock); 1527 } 1528 } 1529 1530 /* 1531 * Core routine called by event-specific routines when an event occurs. 1532 * Determines if an event should be be published, and if it is to be 1533 * published, whether a negotiation should take place. Also implements 1534 * NEGEND events which publish the final disposition of an event after 1535 * negotiations are complete. 1536 * 1537 * When an event occurs on a minor node, this routine walks the list of 1538 * contracts hanging off a devinfo node and for each contract on the affected 1539 * dip, evaluates the following cases 1540 * 1541 * a. an event that is synchronous, breaks the contract and NONEG not set 1542 * - bumps up the outstanding negotiation counts on the dip 1543 * - marks the dip as undergoing negotiation (devi_ct_neg) 1544 * - event of type CTE_NEG is published 1545 * b. an event that is synchronous, breaks the contract and NONEG is set 1546 * - sets the final result to CT_NACK, event is blocked 1547 * - does not publish an event 1548 * c. event is asynchronous and breaks the contract 1549 * - publishes a critical event irrespect of whether the NONEG 1550 * flag is set, since the contract will be broken and contract 1551 * owner needs to be informed. 1552 * d. No contract breakage but the owner has subscribed to the event 1553 * - publishes the event irrespective of the NONEG event as the 1554 * owner has explicitly subscribed to the event. 1555 * e. NEGEND event 1556 * - publishes a critical event. Should only be doing this if 1557 * if NONEG is not set. 1558 * f. all other events 1559 * - Since a contract is not broken and this event has not been 1560 * subscribed to, this event does not need to be published for 1561 * for this contract. 1562 * 1563 * Once an event is published, what happens next depends on the type of 1564 * event: 1565 * 1566 * a. NEGEND event 1567 * - cleanup all state associated with the preceding negotiation 1568 * and return CT_ACK to the caller of contract_device_publish() 1569 * b. NACKed event 1570 * - One or more contracts had the NONEG term, so the event was 1571 * blocked. Return CT_NACK to the caller. 1572 * c. Negotiated event 1573 * - Call wait_for_acks() to wait for responses from contract 1574 * holders. The end result is either CT_ACK (event is permitted), 1575 * CT_NACK (event is blocked) or CT_NONE (no contract owner) 1576 * responded. This result is returned back to the caller. 1577 * d. All other events 1578 * - If the event was asynchronous (i.e. not negotiated) or 1579 * a contract was not broken return CT_ACK to the caller. 1580 */ 1581 static uint_t 1582 contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type, 1583 uint_t evtype, nvlist_t *tnvl) 1584 { 1585 cont_device_t *ctd; 1586 uint_t result = CT_NONE; 1587 uint64_t evid = 0; 1588 uint64_t nevid = 0; 1589 char *path = NULL; 1590 int negend; 1591 int match; 1592 int sync = 0; 1593 contract_t *ct; 1594 ct_kevent_t *event; 1595 nvlist_t *nvl; 1596 int broken = 0; 1597 1598 ASSERT(dip); 1599 ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE); 1600 ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) || 1601 (spec_type == S_IFBLK || spec_type == S_IFCHR)); 1602 ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT)); 1603 1604 /* Is this a synchronous state change ? */ 1605 if (evtype != CT_EV_NEGEND) { 1606 sync = is_sync_neg(get_state(dip), evtype); 1607 /* NOP if unsupported transition */ 1608 if (sync == -2 || sync == -1) { 1609 DEVI(dip)->devi_flags |= DEVI_CT_NOP; 1610 result = (sync == -2) ? CT_ACK : CT_NONE; 1611 goto out; 1612 } 1613 CT_DEBUG((CE_NOTE, "publish: is%s sync state change", 1614 sync ? "" : " not")); 1615 } else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) { 1616 DEVI(dip)->devi_flags &= ~DEVI_CT_NOP; 1617 result = CT_ACK; 1618 goto out; 1619 } 1620 1621 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1622 (void) ddi_pathname(dip, path); 1623 1624 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 1625 1626 /* 1627 * Negotiation end - set the state of the device in the contract 1628 */ 1629 if (evtype == CT_EV_NEGEND) { 1630 CT_DEBUG((CE_NOTE, "publish: negend: setting cond state")); 1631 set_cond_state(dip); 1632 } 1633 1634 /* 1635 * If this device didn't go through negotiation, don't publish 1636 * a NEGEND event - simply release the barrier to allow other 1637 * device events in. 1638 */ 1639 negend = 0; 1640 if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) { 1641 CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier")); 1642 ct_barrier_release(dip); 1643 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1644 result = CT_ACK; 1645 goto out; 1646 } else if (evtype == CT_EV_NEGEND) { 1647 /* 1648 * There are negotiated contract breakages that 1649 * need a NEGEND event 1650 */ 1651 ASSERT(ct_barrier_held(dip)); 1652 negend = 1; 1653 CT_DEBUG((CE_NOTE, "publish: setting negend flag")); 1654 } else { 1655 /* 1656 * This is a new event, not a NEGEND event. Wait for previous 1657 * contract events to complete. 1658 */ 1659 ct_barrier_acquire(dip); 1660 } 1661 1662 1663 match = 0; 1664 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; 1665 ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) { 1666 1667 ctid_t ctid; 1668 size_t len = strlen(path); 1669 1670 mutex_enter(&ctd->cond_contract.ct_lock); 1671 1672 ASSERT(ctd->cond_dip == dip); 1673 ASSERT(ctd->cond_minor); 1674 ASSERT(strncmp(ctd->cond_minor, path, len) == 0 && 1675 ctd->cond_minor[len] == ':'); 1676 1677 if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) { 1678 mutex_exit(&ctd->cond_contract.ct_lock); 1679 continue; 1680 } 1681 if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) { 1682 mutex_exit(&ctd->cond_contract.ct_lock); 1683 continue; 1684 } 1685 1686 /* We have a matching contract */ 1687 match = 1; 1688 ctid = ctd->cond_contract.ct_id; 1689 CT_DEBUG((CE_NOTE, "publish: found matching contract: %d", 1690 ctid)); 1691 1692 /* 1693 * There are 4 possible cases 1694 * 1. A contract is broken (dev not in acceptable state) and 1695 * the state change is synchronous - start negotiation 1696 * by sending a CTE_NEG critical event. 1697 * 2. A contract is broken and the state change is 1698 * asynchronous - just send a critical event and 1699 * break the contract. 1700 * 3. Contract is not broken, but consumer has subscribed 1701 * to the event as a critical or informative event 1702 * - just send the appropriate event 1703 * 4. contract waiting for negend event - just send the critical 1704 * NEGEND event. 1705 */ 1706 broken = 0; 1707 if (!negend && !(evtype & ctd->cond_aset)) { 1708 broken = 1; 1709 CT_DEBUG((CE_NOTE, "publish: Contract broken: %d", 1710 ctid)); 1711 } 1712 1713 /* 1714 * Don't send event if 1715 * - contract is not broken AND 1716 * - contract holder has not subscribed to this event AND 1717 * - contract not waiting for a NEGEND event 1718 */ 1719 if (!broken && !EVSENDP(ctd, evtype) && 1720 !ctd->cond_neg) { 1721 CT_DEBUG((CE_NOTE, "contract_device_publish(): " 1722 "contract (%d): no publish reqd: event %d", 1723 ctd->cond_contract.ct_id, evtype)); 1724 mutex_exit(&ctd->cond_contract.ct_lock); 1725 continue; 1726 } 1727 1728 /* 1729 * Note: need to kmem_zalloc() the event so mutexes are 1730 * initialized automatically 1731 */ 1732 ct = &ctd->cond_contract; 1733 event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP); 1734 event->cte_type = evtype; 1735 1736 if (broken && sync) { 1737 CT_DEBUG((CE_NOTE, "publish: broken + sync: " 1738 "ctid: %d", ctid)); 1739 ASSERT(!negend); 1740 ASSERT(ctd->cond_currev_id == 0); 1741 ASSERT(ctd->cond_currev_type == 0); 1742 ASSERT(ctd->cond_currev_ack == 0); 1743 ASSERT(ctd->cond_neg == 0); 1744 if (ctd->cond_noneg) { 1745 /* Nothing to publish. Event has been blocked */ 1746 CT_DEBUG((CE_NOTE, "publish: sync and noneg:" 1747 "not publishing blocked ev: ctid: %d", 1748 ctid)); 1749 result = CT_NACK; 1750 kmem_free(event, sizeof (ct_kevent_t)); 1751 mutex_exit(&ctd->cond_contract.ct_lock); 1752 continue; 1753 } 1754 event->cte_flags = CTE_NEG; /* critical neg. event */ 1755 ctd->cond_currev_type = event->cte_type; 1756 ct_barrier_incr(dip); 1757 DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */ 1758 ctd->cond_neg = 1; 1759 } else if (broken && !sync) { 1760 CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d", 1761 ctid)); 1762 ASSERT(!negend); 1763 ASSERT(ctd->cond_currev_id == 0); 1764 ASSERT(ctd->cond_currev_type == 0); 1765 ASSERT(ctd->cond_currev_ack == 0); 1766 ASSERT(ctd->cond_neg == 0); 1767 event->cte_flags = 0; /* critical event */ 1768 } else if (EVSENDP(ctd, event->cte_type)) { 1769 CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d", 1770 ctid)); 1771 ASSERT(!negend); 1772 ASSERT(ctd->cond_currev_id == 0); 1773 ASSERT(ctd->cond_currev_type == 0); 1774 ASSERT(ctd->cond_currev_ack == 0); 1775 ASSERT(ctd->cond_neg == 0); 1776 event->cte_flags = EVINFOP(ctd, event->cte_type) ? 1777 CTE_INFO : 0; 1778 } else if (ctd->cond_neg) { 1779 CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid)); 1780 ASSERT(negend); 1781 ASSERT(ctd->cond_noneg == 0); 1782 nevid = ctd->cond_contract.ct_nevent ? 1783 ctd->cond_contract.ct_nevent->cte_id : 0; 1784 ASSERT(ctd->cond_currev_id == nevid); 1785 event->cte_flags = 0; /* NEGEND is always critical */ 1786 ctd->cond_currev_id = 0; 1787 ctd->cond_currev_type = 0; 1788 ctd->cond_currev_ack = 0; 1789 ctd->cond_neg = 0; 1790 } else { 1791 CT_DEBUG((CE_NOTE, "publish: not publishing event for " 1792 "ctid: %d, evtype: %d", 1793 ctd->cond_contract.ct_id, event->cte_type)); 1794 ASSERT(!negend); 1795 ASSERT(ctd->cond_currev_id == 0); 1796 ASSERT(ctd->cond_currev_type == 0); 1797 ASSERT(ctd->cond_currev_ack == 0); 1798 ASSERT(ctd->cond_neg == 0); 1799 kmem_free(event, sizeof (ct_kevent_t)); 1800 mutex_exit(&ctd->cond_contract.ct_lock); 1801 continue; 1802 } 1803 1804 nvl = NULL; 1805 if (tnvl) { 1806 VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0); 1807 if (negend) { 1808 int32_t newct = 0; 1809 ASSERT(ctd->cond_noneg == 0); 1810 VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid) 1811 == 0); 1812 VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT, 1813 &newct) == 0); 1814 VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 1815 newct == 1 ? 0 : 1816 ctd->cond_contract.ct_id) == 0); 1817 CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d " 1818 "CTS_NEVID: %llu, CTS_NEWCT: %s", 1819 ctid, (unsigned long long)nevid, 1820 newct ? "success" : "failure")); 1821 1822 } 1823 } 1824 1825 if (ctd->cond_neg) { 1826 ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1); 1827 ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1); 1828 ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt(); 1829 ctd->cond_contract.ct_qtime.ctm_start = 1830 ctd->cond_contract.ct_ntime.ctm_start; 1831 } 1832 1833 /* 1834 * by holding the dip's devi_ct_lock we ensure that 1835 * all ACK/NACKs are held up until we have finished 1836 * publishing to all contracts. 1837 */ 1838 mutex_exit(&ctd->cond_contract.ct_lock); 1839 evid = cte_publish_all(ct, event, nvl, NULL); 1840 mutex_enter(&ctd->cond_contract.ct_lock); 1841 1842 if (ctd->cond_neg) { 1843 ASSERT(!negend); 1844 ASSERT(broken); 1845 ASSERT(sync); 1846 ASSERT(!ctd->cond_noneg); 1847 CT_DEBUG((CE_NOTE, "publish: sync break, setting evid" 1848 ": %d", ctid)); 1849 ctd->cond_currev_id = evid; 1850 } else if (negend) { 1851 ctd->cond_contract.ct_ntime.ctm_start = -1; 1852 ctd->cond_contract.ct_qtime.ctm_start = -1; 1853 } 1854 mutex_exit(&ctd->cond_contract.ct_lock); 1855 } 1856 1857 /* 1858 * If "negend" set counter back to initial state (-1) so that 1859 * other events can be published. Also clear the negotiation flag 1860 * on dip. 1861 * 1862 * 0 .. n are used for counting. 1863 * -1 indicates counter is available for use. 1864 */ 1865 if (negend) { 1866 /* 1867 * devi_ct_count not necessarily 0. We may have 1868 * timed out in which case, count will be non-zero. 1869 */ 1870 ct_barrier_release(dip); 1871 DEVI(dip)->devi_ct_neg = 0; 1872 CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p", 1873 (void *)dip)); 1874 } else if (DEVI(dip)->devi_ct_neg) { 1875 ASSERT(match); 1876 ASSERT(!ct_barrier_empty(dip)); 1877 CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p", 1878 DEVI(dip)->devi_ct_count, (void *)dip)); 1879 } else { 1880 /* 1881 * for non-negotiated events or subscribed events or no 1882 * matching contracts 1883 */ 1884 ASSERT(ct_barrier_empty(dip)); 1885 ASSERT(DEVI(dip)->devi_ct_neg == 0); 1886 CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: " 1887 "dip=%p", (void *)dip)); 1888 1889 /* 1890 * only this function when called from contract_device_negend() 1891 * can reset the counter to READY state i.e. -1. This function 1892 * is so called for every event whether a NEGEND event is needed 1893 * or not, but the negend event is only published if the event 1894 * whose end they signal is a negotiated event for the contract. 1895 */ 1896 } 1897 1898 if (!match) { 1899 /* No matching contracts */ 1900 CT_DEBUG((CE_NOTE, "publish: No matching contract")); 1901 result = CT_NONE; 1902 } else if (result == CT_NACK) { 1903 /* a non-negotiable contract exists and this is a neg. event */ 1904 CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract")); 1905 (void) wait_for_acks(dip, dev, spec_type, evtype); 1906 } else if (DEVI(dip)->devi_ct_neg) { 1907 /* one or more contracts going through negotations */ 1908 CT_DEBUG((CE_NOTE, "publish: sync contract: waiting")); 1909 result = wait_for_acks(dip, dev, spec_type, evtype); 1910 } else { 1911 /* no negotiated contracts or no broken contracts or NEGEND */ 1912 CT_DEBUG((CE_NOTE, "publish: async/no-break/negend")); 1913 result = CT_ACK; 1914 } 1915 1916 /* 1917 * Release the lock only now so that the only point where we 1918 * drop the lock is in wait_for_acks(). This is so that we don't 1919 * miss cv_signal/cv_broadcast from contract holders 1920 */ 1921 CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock")); 1922 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 1923 1924 out: 1925 nvlist_free(tnvl); 1926 if (path) 1927 kmem_free(path, MAXPATHLEN); 1928 1929 1930 CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result))); 1931 return (result); 1932 } 1933 1934 1935 /* 1936 * contract_device_offline 1937 * 1938 * Event publishing routine called by I/O framework when a device is offlined. 1939 */ 1940 ct_ack_t 1941 contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type) 1942 { 1943 nvlist_t *nvl; 1944 uint_t result; 1945 uint_t evtype; 1946 1947 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1948 1949 evtype = CT_DEV_EV_OFFLINE; 1950 result = contract_device_publish(dip, dev, spec_type, evtype, nvl); 1951 1952 /* 1953 * If a contract offline is NACKED, the framework expects us to call 1954 * NEGEND ourselves, since we know the final result 1955 */ 1956 if (result == CT_NACK) { 1957 contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE); 1958 } 1959 1960 return (result); 1961 } 1962 1963 /* 1964 * contract_device_degrade 1965 * 1966 * Event publishing routine called by I/O framework when a device 1967 * moves to degrade state. 1968 */ 1969 /*ARGSUSED*/ 1970 void 1971 contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type) 1972 { 1973 nvlist_t *nvl; 1974 uint_t evtype; 1975 1976 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1977 1978 evtype = CT_DEV_EV_DEGRADED; 1979 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 1980 } 1981 1982 /* 1983 * contract_device_undegrade 1984 * 1985 * Event publishing routine called by I/O framework when a device 1986 * moves from degraded state to online state. 1987 */ 1988 /*ARGSUSED*/ 1989 void 1990 contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type) 1991 { 1992 nvlist_t *nvl; 1993 uint_t evtype; 1994 1995 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 1996 1997 evtype = CT_DEV_EV_ONLINE; 1998 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 1999 } 2000 2001 /* 2002 * For all contracts which have undergone a negotiation (because the device 2003 * moved out of the acceptable state for that contract and the state 2004 * change is synchronous i.e. requires negotiation) this routine publishes 2005 * a CT_EV_NEGEND event with the final disposition of the event. 2006 * 2007 * This event is always a critical event. 2008 */ 2009 void 2010 contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result) 2011 { 2012 nvlist_t *nvl; 2013 uint_t evtype; 2014 2015 ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE); 2016 2017 CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, " 2018 "dip: %p", result, (void *)dip)); 2019 2020 VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0); 2021 VERIFY(nvlist_add_int32(nvl, CTS_NEWCT, 2022 result == CT_EV_SUCCESS ? 1 : 0) == 0); 2023 2024 evtype = CT_EV_NEGEND; 2025 (void) contract_device_publish(dip, dev, spec_type, evtype, nvl); 2026 2027 CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p", 2028 (void *)dip)); 2029 } 2030 2031 /* 2032 * Wrapper routine called by other subsystems (such as LDI) to start 2033 * negotiations when a synchronous device state change occurs. 2034 * Returns CT_ACK or CT_NACK. 2035 */ 2036 ct_ack_t 2037 contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type, 2038 uint_t evtype) 2039 { 2040 int result; 2041 2042 ASSERT(dip); 2043 ASSERT(dev != NODEV); 2044 ASSERT(dev != DDI_DEV_T_ANY); 2045 ASSERT(dev != DDI_DEV_T_NONE); 2046 ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 2047 2048 switch (evtype) { 2049 case CT_DEV_EV_OFFLINE: 2050 result = contract_device_offline(dip, dev, spec_type); 2051 break; 2052 default: 2053 cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation " 2054 "not supported: event (%d) for dev_t (%lu) and spec (%d), " 2055 "dip (%p)", evtype, dev, spec_type, (void *)dip); 2056 result = CT_NACK; 2057 break; 2058 } 2059 2060 return (result); 2061 } 2062 2063 /* 2064 * A wrapper routine called by other subsystems (such as the LDI) to 2065 * finalize event processing for a state change event. For synchronous 2066 * state changes, this publishes NEGEND events. For asynchronous i.e. 2067 * non-negotiable events this publishes the event. 2068 */ 2069 void 2070 contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type, 2071 uint_t evtype, int ct_result) 2072 { 2073 ASSERT(dip); 2074 ASSERT(dev != NODEV); 2075 ASSERT(dev != DDI_DEV_T_ANY); 2076 ASSERT(dev != DDI_DEV_T_NONE); 2077 ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR); 2078 2079 switch (evtype) { 2080 case CT_DEV_EV_OFFLINE: 2081 contract_device_negend(dip, dev, spec_type, ct_result); 2082 break; 2083 case CT_DEV_EV_DEGRADED: 2084 contract_device_degrade(dip, dev, spec_type); 2085 contract_device_negend(dip, dev, spec_type, ct_result); 2086 break; 2087 case CT_DEV_EV_ONLINE: 2088 contract_device_undegrade(dip, dev, spec_type); 2089 contract_device_negend(dip, dev, spec_type, ct_result); 2090 break; 2091 default: 2092 cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported " 2093 "event (%d) for dev_t (%lu) and spec (%d), dip (%p)", 2094 evtype, dev, spec_type, (void *)dip); 2095 break; 2096 } 2097 } 2098 2099 /* 2100 * Called by I/O framework when a devinfo node is freed to remove the 2101 * association between a devinfo node and its contracts. 2102 */ 2103 void 2104 contract_device_remove_dip(dev_info_t *dip) 2105 { 2106 cont_device_t *ctd; 2107 cont_device_t *next; 2108 contract_t *ct; 2109 2110 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 2111 ct_barrier_wait_for_release(dip); 2112 2113 for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) { 2114 next = list_next(&(DEVI(dip)->devi_ct), ctd); 2115 list_remove(&(DEVI(dip)->devi_ct), ctd); 2116 ct = &ctd->cond_contract; 2117 /* 2118 * Unlink the dip associated with this contract 2119 */ 2120 mutex_enter(&ct->ct_lock); 2121 ASSERT(ctd->cond_dip == dip); 2122 ctd->cond_dip = NULL; /* no longer linked to dip */ 2123 contract_rele(ct); /* remove hold for dip linkage */ 2124 CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: " 2125 "ctid: %d", ct->ct_id)); 2126 mutex_exit(&ct->ct_lock); 2127 } 2128 ASSERT(list_is_empty(&(DEVI(dip)->devi_ct))); 2129 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 2130 } 2131 2132 /* 2133 * Barrier related routines 2134 */ 2135 static void 2136 ct_barrier_acquire(dev_info_t *dip) 2137 { 2138 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2139 CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier")); 2140 while (DEVI(dip)->devi_ct_count != -1) 2141 cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 2142 DEVI(dip)->devi_ct_count = 0; 2143 CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier")); 2144 } 2145 2146 static void 2147 ct_barrier_release(dev_info_t *dip) 2148 { 2149 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2150 ASSERT(DEVI(dip)->devi_ct_count != -1); 2151 DEVI(dip)->devi_ct_count = -1; 2152 cv_broadcast(&(DEVI(dip)->devi_ct_cv)); 2153 CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier")); 2154 } 2155 2156 static int 2157 ct_barrier_held(dev_info_t *dip) 2158 { 2159 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2160 return (DEVI(dip)->devi_ct_count != -1); 2161 } 2162 2163 static int 2164 ct_barrier_empty(dev_info_t *dip) 2165 { 2166 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2167 ASSERT(DEVI(dip)->devi_ct_count != -1); 2168 return (DEVI(dip)->devi_ct_count == 0); 2169 } 2170 2171 static void 2172 ct_barrier_wait_for_release(dev_info_t *dip) 2173 { 2174 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2175 while (DEVI(dip)->devi_ct_count != -1) 2176 cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock)); 2177 } 2178 2179 static void 2180 ct_barrier_decr(dev_info_t *dip) 2181 { 2182 CT_DEBUG((CE_NOTE, "barrier_decr: ct_count before decr: %d", 2183 DEVI(dip)->devi_ct_count)); 2184 2185 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2186 ASSERT(DEVI(dip)->devi_ct_count > 0); 2187 2188 DEVI(dip)->devi_ct_count--; 2189 if (DEVI(dip)->devi_ct_count == 0) { 2190 cv_broadcast(&DEVI(dip)->devi_ct_cv); 2191 CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast")); 2192 } 2193 } 2194 2195 static void 2196 ct_barrier_incr(dev_info_t *dip) 2197 { 2198 ASSERT(ct_barrier_held(dip)); 2199 DEVI(dip)->devi_ct_count++; 2200 } 2201 2202 static int 2203 ct_barrier_wait_for_empty(dev_info_t *dip, int secs) 2204 { 2205 clock_t abstime; 2206 2207 ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock))); 2208 2209 abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000); 2210 while (DEVI(dip)->devi_ct_count) { 2211 if (cv_timedwait(&(DEVI(dip)->devi_ct_cv), 2212 &(DEVI(dip)->devi_ct_lock), abstime) == -1) { 2213 return (-1); 2214 } 2215 } 2216 return (0); 2217 }