1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/systm.h> 29 #include <sys/cred.h> 30 #include <sys/modctl.h> 31 #include <sys/vfs.h> 32 #include <sys/vfs_opreg.h> 33 #include <sys/sysmacros.h> 34 #include <sys/cmn_err.h> 35 #include <sys/stat.h> 36 #include <sys/errno.h> 37 #include <sys/kmem.h> 38 #include <sys/file.h> 39 #include <sys/kstat.h> 40 #include <sys/port_impl.h> 41 #include <sys/task.h> 42 #include <sys/project.h> 43 44 /* 45 * Event Ports can be shared across threads or across processes. 46 * Every thread/process can use an own event port or a group of them 47 * can use a single port. A major request was also to get the ability 48 * to submit user-defined events to a port. The idea of the 49 * user-defined events is to use the event ports for communication between 50 * threads/processes (like message queues). User defined-events are queued 51 * in a port with the same priority as other event types. 52 * 53 * Events are delivered only once. The thread/process which is waiting 54 * for events with the "highest priority" (priority here is related to the 55 * internal strategy to wakeup waiting threads) will retrieve the event, 56 * all other threads/processes will not be notified. There is also 57 * the requirement to have events which should be submitted immediately 58 * to all "waiting" threads. That is the main task of the alert event. 59 * The alert event is submitted by the application to a port. The port 60 * changes from a standard mode to the alert mode. Now all waiting threads 61 * will be awaken immediately and they will return with the alert event. 62 * Threads trying to retrieve events from a port in alert mode will 63 * return immediately with the alert event. 64 * 65 * 66 * An event port is like a kernel queue, which accept events submitted from 67 * user level as well as events submitted from kernel sub-systems. Sub-systems 68 * able to submit events to a port are the so-called "event sources". 69 * Current event sources: 70 * PORT_SOURCE_AIO : events submitted per transaction completion from 71 * POSIX-I/O framework. 72 * PORT_SOURCE_TIMER : events submitted when a timer fires 73 * (see timer_create(3RT)). 74 * PORT_SOURCE_FD : events submitted per file descriptor (see poll(2)). 75 * PORT_SOURCE_ALERT : events submitted from user. This is not really a 76 * single event, this is actually a port mode 77 * (see port_alert(3c)). 78 * PORT_SOURCE_USER : events submitted by applications with 79 * port_send(3c) or port_sendn(3c). 80 * PORT_SOURCE_FILE : events submitted per file being watched for file 81 * change events (see port_create(3c). 82 * 83 * There is a user API implemented in the libc library as well as a 84 * kernel API implemented in port_subr.c in genunix. 85 * The available user API functions are: 86 * port_create() : create a port as a file descriptor of portfs file system 87 * The standard close(2) function closes a port. 88 * port_associate() : associate a file descriptor with a port to be able to 89 * retrieve events from that file descriptor. 90 * port_dissociate(): remove the association of a file descriptor with a port. 91 * port_alert() : set/unset a port in alert mode 92 * port_send() : send an event of type PORT_SOURCE_USER to a port 93 * port_sendn() : send an event of type PORT_SOURCE_USER to a list of ports 94 * port_get() : retrieve a single event from a port 95 * port_getn() : retrieve a list of events from a port 96 * 97 * The available kernel API functions are: 98 * port_allocate_event(): allocate an event slot/structure of/from a port 99 * port_init_event() : set event data in the event structure 100 * port_send_event() : send event to a port 101 * port_free_event() : deliver allocated slot/structure back to a port 102 * port_associate_ksource(): associate a kernel event source with a port 103 * port_dissociate_ksource(): dissociate a kernel event source from a port 104 * 105 * The libc implementation consists of small functions which pass the 106 * arguments to the kernel using the "portfs" system call. It means, all the 107 * synchronisation work is being done in the kernel. The "portfs" system 108 * call loads the portfs file system into the kernel. 109 * 110 * PORT CREATION 111 * The first function to be used is port_create() which internally creates 112 * a vnode and a portfs node. The portfs node is represented by the port_t 113 * structure, which again includes all the data necessary to control a port. 114 * port_create() returns a file descriptor, which needs to be used in almost 115 * all other event port functions. 116 * The maximum number of ports per system is controlled by the resource 117 * control: project:port-max-ids. 118 * 119 * EVENT GENERATION 120 * The second step is the triggering of events, which could be sent to a port. 121 * Every event source implements an own method to generate events for a port: 122 * PORT_SOURCE_AIO: 123 * The sigevent structure of the standard POSIX-IO functions 124 * was extended by an additional notification type. 125 * Standard notification types: 126 * SIGEV_NONE, SIGEV_SIGNAL and SIGEV_THREAD 127 * Event ports introduced now SIGEV_PORT. 128 * The notification type SIGEV_PORT specifies that a structure 129 * of type port_notify_t has to be attached to the sigev_value. 130 * The port_notify_t structure contains the event port file 131 * descriptor and a user-defined pointer. 132 * Internally the AIO implementation will use the kernel API 133 * functions to allocate an event port slot per transaction (aiocb) 134 * and sent the event to the port as soon as the transaction completes. 135 * All the events submitted per transaction are of type 136 * PORT_SOURCE_AIO. 137 * PORT_SOURCE_TIMER: 138 * The timer_create() function uses the same method as the 139 * PORT_SOURCE_AIO event source. It also uses the sigevent structure 140 * to deliver the port information. 141 * Internally the timer code will allocate a single event slot/struct 142 * per timer and it will send the timer event as soon as the timer 143 * fires. If the timer-fired event is not delivered to the application 144 * before the next period elapsed, then an overrun counter will be 145 * incremented. The timer event source uses a callback function to 146 * detect the delivery of the event to the application. At that time 147 * the timer callback function will update the event overrun counter. 148 * PORT_SOURCE_FD: 149 * This event source uses the port_associate() function to allocate 150 * an event slot/struct from a port. The application defines in the 151 * events argument of port_associate() the type of events which it is 152 * interested on. 153 * The internal pollwakeup() function is used by all the file 154 * systems --which are supporting the VOP_POLL() interface- to notify 155 * the upper layer (poll(2), devpoll(7d) and now event ports) about 156 * the event triggered (see valid events in poll(2)). 157 * The pollwakeup() function forwards the event to the layer registered 158 * to receive the current event. 159 * The port_dissociate() function can be used to free the allocated 160 * event slot from the port. Anyway, file descriptors deliver events 161 * only one time and remain deactivated until the application 162 * reactivates the association of a file descriptor with port_associate(). 163 * If an associated file descriptor is closed then the file descriptor 164 * will be dissociated automatically from the port. 165 * 166 * PORT_SOURCE_ALERT: 167 * This event type is generated when the port was previously set in 168 * alert mode using the port_alert() function. 169 * A single alert event is delivered to every thread which tries to 170 * retrieve events from a port. 171 * PORT_SOURCE_USER: 172 * This type of event is generated from user level using the port_send() 173 * function to send a user event to a port or the port_sendn() function 174 * to send an event to a list of ports. 175 * PORT_SOURCE_FILE: 176 * This event source uses the port_associate() interface to register 177 * a file to be monitored for changes. The file name that needs to be 178 * monitored is specified in the file_obj_t structure, a pointer to which 179 * is passed as an argument. The event types to be monitored are specified 180 * in the events argument. 181 * A file events monitor is represented internal per port per object 182 * address(the file_obj_t pointer). Which means there can be multiple 183 * watches registered on the same file using different file_obj_t 184 * structure pointer. With the help of the FEM(File Event Monitoring) 185 * hooks, the file's vnode ops are intercepted and relevant events 186 * delivered. The port_dissociate() function is used to de-register a 187 * file events monitor on a file. When the specified file is 188 * removed/renamed, the file events watch/monitor is automatically 189 * removed. 190 * 191 * EVENT DELIVERY / RETRIEVING EVENTS 192 * Events remain in the port queue until: 193 * - the application uses port_get() or port_getn() to retrieve events, 194 * - the event source cancel the event, 195 * - the event port is closed or 196 * - the process exits. 197 * The maximal number of events in a port queue is the maximal number 198 * of event slots/structures which can be allocated by event sources. 199 * The allocation of event slots/structures is controlled by the resource 200 * control: process.port-max-events. 201 * The port_get() function retrieves a single event and the port_getn() 202 * function retrieves a list of events. 203 * Events are classified as shareable and non-shareable events across processes. 204 * Non-shareable events are invisible for the port_get(n)() functions of 205 * processes other than the owner of the event. 206 * Shareable event types are: 207 * PORT_SOURCE_USER events 208 * This type of event is unconditionally shareable and without 209 * limitations. If the parent process sends a user event and closes 210 * the port afterwards, the event remains in the port and the child 211 * process will still be able to retrieve the user event. 212 * PORT_SOURCE_ALERT events 213 * This type of event is shareable between processes. 214 * Limitation: The alert mode of the port is removed if the owner 215 * (process which set the port in alert mode) of the 216 * alert event closes the port. 217 * PORT_SOURCE_FD events 218 * This type of event is conditional shareable between processes. 219 * After fork(2) all forked file descriptors are shareable between 220 * the processes. The child process is allowed to retrieve events 221 * from the associated file descriptors and it can also re-associate 222 * the fd with the port. 223 * Limitations: The child process is not allowed to dissociate 224 * the file descriptor from the port. Only the 225 * owner (process) of the association is allowed to 226 * dissociate the file descriptor from the port. 227 * If the owner of the association closes the port 228 * the association will be removed. 229 * PORT_SOURCE_AIO events 230 * This type of event is not shareable between processes. 231 * PORT_SOURCE_TIMER events 232 * This type of event is not shareable between processes. 233 * PORT_SOURCE_FILE events 234 * This type of event is not shareable between processes. 235 * 236 * FORK BEHAVIOUR 237 * On fork(2) the child process inherits all opened file descriptors from 238 * the parent process. This is also valid for port file descriptors. 239 * Associated file descriptors with a port maintain the association across the 240 * fork(2). It means, the child process gets full access to the port and 241 * it can retrieve events from all common associated file descriptors. 242 * Events of file descriptors created and associated with a port after the 243 * fork(2) are non-shareable and can only be retrieved by the same process. 244 * 245 * If the parent or the child process closes an exported port (using fork(2) 246 * or I_SENDFD) all the file descriptors associated with the port by the 247 * process will be dissociated from the port. Events of dissociated file 248 * descriptors as well as all non-shareable events will be discarded. 249 * The other process can continue working with the port as usual. 250 * 251 * CLOSING A PORT 252 * close(2) has to be used to close a port. See FORK BEHAVIOUR for details. 253 * 254 * PORT EVENT STRUCTURES 255 * The global control structure of the event ports framework is port_control_t. 256 * port_control_t keeps track of the number of created ports in the system. 257 * The cache of the port event structures is also located in port_control_t. 258 * 259 * On port_create() the vnode and the portfs node is also created. 260 * The portfs node is represented by the port_t structure. 261 * The port_t structure manages all port specific tasks: 262 * - management of resource control values 263 * - port VOP_POLL interface 264 * - creation time 265 * - uid and gid of the port 266 * 267 * The port_t structure contains the port_queue_t structure. 268 * The port_queue_t structure contains all the data necessary for the 269 * queue management: 270 * - locking 271 * - condition variables 272 * - event counters 273 * - submitted events (represented by port_kevent_t structures) 274 * - threads waiting for event delivery (check portget_t structure) 275 * - PORT_SOURCE_FD cache (managed by the port_fdcache_t structure) 276 * - event source management (managed by the port_source_t structure) 277 * - alert mode management (check port_alert_t structure) 278 * 279 * EVENT MANAGEMENT 280 * The event port file system creates a kmem_cache for internal allocation of 281 * event port structures. 282 * 283 * 1. Event source association with a port: 284 * The first step to do for event sources is to get associated with a port 285 * using the port_associate_ksource() function or adding an entry to the 286 * port_ksource_tab[]. An event source can get dissociated from a port 287 * using the port_dissociate_ksource() function. An entry in the 288 * port_ksource_tab[] implies that the source will be associated 289 * automatically with every new created port. 290 * The event source can deliver a callback function, which is used by the 291 * port to notify the event source about close(2). The idea is that 292 * in such a case the event source should free all allocated resources 293 * and it must return to the port all allocated slots/structures. 294 * The port_close() function will wait until all allocated event 295 * structures/slots are returned to the port. 296 * The callback function is not necessary when the event source does not 297 * maintain local resources, a second condition is that the event source 298 * can guarantee that allocated event slots will be returned without 299 * delay to the port (it will not block and sleep somewhere). 300 * 301 * 2. Reservation of an event slot / event structure 302 * The event port reliability is based on the reservation of an event "slot" 303 * (allocation of an event structure) by the event source as part of the 304 * application call. If the maximal number of event slots is exhausted then 305 * the event source can return a corresponding error code to the application. 306 * 307 * The port_alloc_event() function has to be used by event sources to 308 * allocate an event slot (reserve an event structure). The port_alloc_event() 309 * doesn not block and it will return a 0 value on success or an error code 310 * if it fails. 311 * An argument of port_alloc_event() is a flag which determines the behavior 312 * of the event after it was delivered to the application: 313 * PORT_ALLOC_DEFAULT : event slot becomes free after delivery to the 314 * application. 315 * PORT_ALLOC_PRIVATE : event slot remains under the control of the event 316 * source. This kind of slots can not be used for 317 * event delivery and should only be used internally 318 * by the event source. 319 * PORT_KEV_CACHED : event slot remains under the control of an event 320 * port cache. It does not become free after delivery 321 * to the application. 322 * PORT_ALLOC_SCACHED : event slot remains under the control of the event 323 * source. The event source takes the control over 324 * the slot after the event is delivered to the 325 * application. 326 * 327 * 3. Delivery of events to the event port 328 * Earlier allocated event structure/slot has to be used to deliver 329 * event data to the port. Event source has to use the function 330 * port_send_event(). The single argument is a pointer to the previously 331 * reserved event structure/slot. 332 * The portkev_events field of the port_kevent_t structure can be updated/set 333 * in two ways: 334 * 1. using the port_set_event() function, or 335 * 2. updating the portkev_events field out of the callback function: 336 * The event source can deliver a callback function to the port as an 337 * argument of port_init_event(). 338 * One of the arguments of the callback function is a pointer to the 339 * events field, which will be delivered to the application. 340 * (see Delivery of events to the application). 341 * Event structures/slots can be delivered to the event port only one time, 342 * they remain blocked until the data is delivered to the application and the 343 * slot becomes free or it is delivered back to the event source 344 * (PORT_ALLOC_SCACHED). The activation of the callback function mentioned above 345 * is at the same time the indicator for the event source that the event 346 * structure/slot is free for reuse. 347 * 348 * 4. Delivery of events to the application 349 * The events structures/slots delivered by event sources remain in the 350 * port queue until they are retrieved by the application or the port 351 * is closed (exit(2) also closes all opened file descriptors).. 352 * The application uses port_get() or port_getn() to retrieve events from 353 * a port. port_get() retrieves a single event structure/slot and port_getn() 354 * retrieves a list of event structures/slots. 355 * Both functions are able to poll for events and return immediately or they 356 * can specify a timeout value. 357 * Before the events are delivered to the application they are moved to a 358 * second temporary internal queue. The idea is to avoid lock collisions or 359 * contentions of the global queue lock. 360 * The global queue lock is used every time when an event source delivers 361 * new events to the port. 362 * The port_get() and port_getn() functions 363 * a) retrieve single events from the temporary queue, 364 * b) prepare the data to be passed to the application memory, 365 * c) activate the callback function of the event sources: 366 * - to get the latest event data, 367 * - the event source can free all allocated resources associated with the 368 * current event, 369 * - the event source can re-use the current event slot/structure 370 * - the event source can deny the delivery of the event to the application 371 * (e.g. because of the wrong process). 372 * d) put the event back to the temporary queue if the event delivery was denied 373 * e) repeat a) until d) as long as there are events in the queue and 374 * there is enough user space available. 375 * 376 * The loop described above could block for a very long time the global mutex, 377 * to avoid that a second mutex was introduced to synchronized concurrent 378 * threads accessing the temporary queue. 379 */ 380 381 static int64_t portfs(int, uintptr_t, uintptr_t, uintptr_t, uintptr_t, 382 uintptr_t); 383 384 static struct sysent port_sysent = { 385 6, 386 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 387 (int (*)())portfs, 388 }; 389 390 static struct modlsys modlsys = { 391 &mod_syscallops, "event ports", &port_sysent 392 }; 393 394 #ifdef _SYSCALL32_IMPL 395 396 static int64_t 397 portfs32(uint32_t arg1, int32_t arg2, uint32_t arg3, uint32_t arg4, 398 uint32_t arg5, uint32_t arg6); 399 400 static struct sysent port_sysent32 = { 401 6, 402 SE_ARGC | SE_64RVAL | SE_NOUNLOAD, 403 (int (*)())portfs32, 404 }; 405 406 static struct modlsys modlsys32 = { 407 &mod_syscallops32, 408 "32-bit event ports syscalls", 409 &port_sysent32 410 }; 411 #endif /* _SYSCALL32_IMPL */ 412 413 static struct modlinkage modlinkage = { 414 MODREV_1, 415 { &modlsys, 416 #ifdef _SYSCALL32_IMPL 417 &modlsys32, 418 #endif 419 NULL 420 } 421 }; 422 423 port_kstat_t port_kstat = { 424 { "ports", KSTAT_DATA_UINT32 } 425 }; 426 427 dev_t portdev; 428 struct vnodeops *port_vnodeops; 429 struct vfs port_vfs; 430 431 extern rctl_hndl_t rc_process_portev; 432 extern rctl_hndl_t rc_project_portids; 433 extern void aio_close_port(void *, int, pid_t, int); 434 435 /* 436 * This table contains a list of event sources which need a static 437 * association with a port (every port). 438 * The last NULL entry in the table is required to detect "end of table". 439 */ 440 struct port_ksource port_ksource_tab[] = { 441 {PORT_SOURCE_AIO, aio_close_port, NULL, NULL}, 442 {0, NULL, NULL, NULL} 443 }; 444 445 /* local functions */ 446 static int port_getn(port_t *, port_event_t *, uint_t, uint_t *, 447 port_gettimer_t *); 448 static int port_sendn(int [], int [], uint_t, int, void *, uint_t *); 449 static int port_alert(port_t *, int, int, void *); 450 static int port_dispatch_event(port_t *, int, int, int, uintptr_t, void *); 451 static int port_send(port_t *, int, int, void *); 452 static int port_create(int *); 453 static int port_get_alert(port_alert_t *, port_event_t *); 454 static int port_copy_event(port_event_t *, port_kevent_t *, list_t *); 455 static int *port_errorn(int *, int, int, int); 456 static int port_noshare(void *, int *, pid_t, int, void *); 457 static int port_get_timeout(timespec_t *, timespec_t *, timespec_t **, int *, 458 int); 459 static void port_init(port_t *); 460 static void port_remove_alert(port_queue_t *); 461 static void port_add_ksource_local(port_t *, port_ksource_t *); 462 static void port_check_return_cond(port_queue_t *); 463 static void port_dequeue_thread(port_queue_t *, portget_t *); 464 static portget_t *port_queue_thread(port_queue_t *, uint_t); 465 static void port_kstat_init(void); 466 467 #ifdef _SYSCALL32_IMPL 468 static int port_copy_event32(port_event32_t *, port_kevent_t *, list_t *); 469 #endif 470 471 int 472 _init(void) 473 { 474 static const fs_operation_def_t port_vfsops_template[] = { 475 { NULL, { NULL } } 476 }; 477 extern const fs_operation_def_t port_vnodeops_template[]; 478 vfsops_t *port_vfsops; 479 int error; 480 major_t major; 481 482 if ((major = getudev()) == (major_t)-1) 483 return (ENXIO); 484 portdev = makedevice(major, 0); 485 486 /* Create a dummy vfs */ 487 error = vfs_makefsops(port_vfsops_template, &port_vfsops); 488 if (error) { 489 cmn_err(CE_WARN, "port init: bad vfs ops"); 490 return (error); 491 } 492 vfs_setops(&port_vfs, port_vfsops); 493 port_vfs.vfs_flag = VFS_RDONLY; 494 port_vfs.vfs_dev = portdev; 495 vfs_make_fsid(&(port_vfs.vfs_fsid), portdev, 0); 496 497 error = vn_make_ops("portfs", port_vnodeops_template, &port_vnodeops); 498 if (error) { 499 vfs_freevfsops(port_vfsops); 500 cmn_err(CE_WARN, "port init: bad vnode ops"); 501 return (error); 502 } 503 504 mutex_init(&port_control.pc_mutex, NULL, MUTEX_DEFAULT, NULL); 505 port_control.pc_nents = 0; /* number of active ports */ 506 507 /* create kmem_cache for port event structures */ 508 port_control.pc_cache = kmem_cache_create("port_cache", 509 sizeof (port_kevent_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 510 511 port_kstat_init(); /* init port kstats */ 512 return (mod_install(&modlinkage)); 513 } 514 515 int 516 _info(struct modinfo *modinfop) 517 { 518 return (mod_info(&modlinkage, modinfop)); 519 } 520 521 /* 522 * System call wrapper for all port related system calls from 32-bit programs. 523 */ 524 #ifdef _SYSCALL32_IMPL 525 static int64_t 526 portfs32(uint32_t opcode, int32_t a0, uint32_t a1, uint32_t a2, uint32_t a3, 527 uint32_t a4) 528 { 529 int64_t error; 530 531 switch (opcode & PORT_CODE_MASK) { 532 case PORT_GET: 533 error = portfs(PORT_GET, a0, a1, (int)a2, (int)a3, a4); 534 break; 535 case PORT_SENDN: 536 error = portfs(opcode, (uint32_t)a0, a1, a2, a3, a4); 537 break; 538 default: 539 error = portfs(opcode, a0, a1, a2, a3, a4); 540 break; 541 } 542 return (error); 543 } 544 #endif /* _SYSCALL32_IMPL */ 545 546 /* 547 * System entry point for port functions. 548 * a0 is a port file descriptor (except for PORT_SENDN and PORT_CREATE). 549 * The libc uses PORT_SYS_NOPORT in functions which do not deliver a 550 * port file descriptor as first argument. 551 */ 552 static int64_t 553 portfs(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, 554 uintptr_t a4) 555 { 556 rval_t r; 557 port_t *pp; 558 int error = 0; 559 uint_t nget; 560 file_t *fp; 561 port_gettimer_t port_timer; 562 563 r.r_vals = 0; 564 if (opcode & PORT_SYS_NOPORT) { 565 opcode &= PORT_CODE_MASK; 566 if (opcode == PORT_SENDN) { 567 error = port_sendn((int *)a0, (int *)a1, (uint_t)a2, 568 (int)a3, (void *)a4, (uint_t *)&r.r_val1); 569 if (error && (error != EIO)) 570 return ((int64_t)set_errno(error)); 571 return (r.r_vals); 572 } 573 574 if (opcode == PORT_CREATE) { 575 error = port_create(&r.r_val1); 576 if (error) 577 return ((int64_t)set_errno(error)); 578 return (r.r_vals); 579 } 580 } 581 582 /* opcodes using port as first argument (a0) */ 583 584 if ((fp = getf((int)a0)) == NULL) 585 return ((uintptr_t)set_errno(EBADF)); 586 587 if (fp->f_vnode->v_type != VPORT) { 588 releasef((int)a0); 589 return ((uintptr_t)set_errno(EBADFD)); 590 } 591 592 pp = VTOEP(fp->f_vnode); 593 594 switch (opcode & PORT_CODE_MASK) { 595 case PORT_GET: 596 { 597 /* see PORT_GETN description */ 598 struct timespec timeout; 599 600 port_timer.pgt_flags = PORTGET_ONE; 601 port_timer.pgt_loop = 0; 602 port_timer.pgt_rqtp = NULL; 603 if (a4 != NULL) { 604 port_timer.pgt_timeout = &timeout; 605 timeout.tv_sec = (time_t)a2; 606 timeout.tv_nsec = (long)a3; 607 } else { 608 port_timer.pgt_timeout = NULL; 609 } 610 do { 611 nget = 1; 612 error = port_getn(pp, (port_event_t *)a1, 1, 613 (uint_t *)&nget, &port_timer); 614 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 615 break; 616 } 617 case PORT_GETN: 618 { 619 /* 620 * port_getn() can only retrieve own or shareable events from 621 * other processes. The port_getn() function remains in the 622 * kernel until own or shareable events are available or the 623 * timeout elapses. 624 */ 625 port_timer.pgt_flags = 0; 626 port_timer.pgt_loop = 0; 627 port_timer.pgt_rqtp = NULL; 628 port_timer.pgt_timeout = (struct timespec *)a4; 629 do { 630 nget = a3; 631 error = port_getn(pp, (port_event_t *)a1, (uint_t)a2, 632 (uint_t *)&nget, &port_timer); 633 } while (nget == 0 && error == 0 && port_timer.pgt_loop); 634 r.r_val1 = nget; 635 r.r_val2 = error; 636 releasef((int)a0); 637 if (error && error != ETIME) 638 return ((int64_t)set_errno(error)); 639 return (r.r_vals); 640 } 641 case PORT_ASSOCIATE: 642 { 643 switch ((int)a1) { 644 case PORT_SOURCE_FD: 645 error = port_associate_fd(pp, (int)a1, (uintptr_t)a2, 646 (int)a3, (void *)a4); 647 break; 648 case PORT_SOURCE_FILE: 649 error = port_associate_fop(pp, (int)a1, (uintptr_t)a2, 650 (int)a3, (void *)a4); 651 break; 652 default: 653 error = EINVAL; 654 break; 655 } 656 break; 657 } 658 case PORT_SEND: 659 { 660 /* user-defined events */ 661 error = port_send(pp, PORT_SOURCE_USER, (int)a1, (void *)a2); 662 break; 663 } 664 case PORT_DISPATCH: 665 { 666 /* 667 * library events, blocking 668 * Only events of type PORT_SOURCE_AIO or PORT_SOURCE_MQ 669 * are currently allowed. 670 */ 671 if ((int)a1 != PORT_SOURCE_AIO && (int)a1 != PORT_SOURCE_MQ) { 672 error = EINVAL; 673 break; 674 } 675 error = port_dispatch_event(pp, (int)opcode, (int)a1, (int)a2, 676 (uintptr_t)a3, (void *)a4); 677 break; 678 } 679 case PORT_DISSOCIATE: 680 { 681 switch ((int)a1) { 682 case PORT_SOURCE_FD: 683 error = port_dissociate_fd(pp, (uintptr_t)a2); 684 break; 685 case PORT_SOURCE_FILE: 686 error = port_dissociate_fop(pp, (uintptr_t)a2); 687 break; 688 default: 689 error = EINVAL; 690 break; 691 } 692 break; 693 } 694 case PORT_ALERT: 695 { 696 if ((int)a2) /* a2 = events */ 697 error = port_alert(pp, (int)a1, (int)a2, (void *)a3); 698 else 699 port_remove_alert(&pp->port_queue); 700 break; 701 } 702 default: 703 error = EINVAL; 704 break; 705 } 706 707 releasef((int)a0); 708 if (error) 709 return ((int64_t)set_errno(error)); 710 return (r.r_vals); 711 } 712 713 /* 714 * System call to create a port. 715 * 716 * The port_create() function creates a vnode of type VPORT per port. 717 * The port control data is associated with the vnode as vnode private data. 718 * The port_create() function returns an event port file descriptor. 719 */ 720 static int 721 port_create(int *fdp) 722 { 723 port_t *pp; 724 vnode_t *vp; 725 struct file *fp; 726 proc_t *p = curproc; 727 728 /* initialize vnode and port private data */ 729 pp = kmem_zalloc(sizeof (port_t), KM_SLEEP); 730 731 pp->port_vnode = vn_alloc(KM_SLEEP); 732 vp = EPTOV(pp); 733 vn_setops(vp, port_vnodeops); 734 vp->v_type = VPORT; 735 vp->v_vfsp = &port_vfs; 736 vp->v_data = (caddr_t)pp; 737 738 mutex_enter(&port_control.pc_mutex); 739 /* 740 * Retrieve the maximal number of event ports allowed per system from 741 * the resource control: project.port-max-ids. 742 */ 743 mutex_enter(&p->p_lock); 744 if (rctl_test(rc_project_portids, p->p_task->tk_proj->kpj_rctls, p, 745 port_control.pc_nents + 1, RCA_SAFE) & RCT_DENY) { 746 mutex_exit(&p->p_lock); 747 vn_free(vp); 748 kmem_free(pp, sizeof (port_t)); 749 mutex_exit(&port_control.pc_mutex); 750 return (EAGAIN); 751 } 752 753 /* 754 * Retrieve the maximal number of events allowed per port from 755 * the resource control: process.port-max-events. 756 */ 757 pp->port_max_events = rctl_enforced_value(rc_process_portev, 758 p->p_rctls, p); 759 mutex_exit(&p->p_lock); 760 761 /* allocate a new user file descriptor and a file structure */ 762 if (falloc(vp, 0, &fp, fdp)) { 763 /* 764 * If the file table is full, free allocated resources. 765 */ 766 vn_free(vp); 767 kmem_free(pp, sizeof (port_t)); 768 mutex_exit(&port_control.pc_mutex); 769 return (EMFILE); 770 } 771 772 mutex_exit(&fp->f_tlock); 773 774 pp->port_fd = *fdp; 775 port_control.pc_nents++; 776 p->p_portcnt++; 777 port_kstat.pks_ports.value.ui32++; 778 mutex_exit(&port_control.pc_mutex); 779 780 /* initializes port private data */ 781 port_init(pp); 782 /* set user file pointer */ 783 setf(*fdp, fp); 784 return (0); 785 } 786 787 /* 788 * port_init() initializes event port specific data 789 */ 790 static void 791 port_init(port_t *pp) 792 { 793 port_queue_t *portq; 794 port_ksource_t *pks; 795 796 mutex_init(&pp->port_mutex, NULL, MUTEX_DEFAULT, NULL); 797 portq = &pp->port_queue; 798 mutex_init(&portq->portq_mutex, NULL, MUTEX_DEFAULT, NULL); 799 pp->port_flags |= PORT_INIT; 800 801 /* 802 * If it is not enough memory available to satisfy a user 803 * request using a single port_getn() call then port_getn() 804 * will reduce the size of the list to PORT_MAX_LIST. 805 */ 806 pp->port_max_list = port_max_list; 807 808 /* Set timestamp entries required for fstat(2) requests */ 809 gethrestime(&pp->port_ctime); 810 pp->port_uid = crgetuid(curproc->p_cred); 811 pp->port_gid = crgetgid(curproc->p_cred); 812 813 /* initialize port queue structs */ 814 list_create(&portq->portq_list, sizeof (port_kevent_t), 815 offsetof(port_kevent_t, portkev_node)); 816 list_create(&portq->portq_get_list, sizeof (port_kevent_t), 817 offsetof(port_kevent_t, portkev_node)); 818 portq->portq_flags = 0; 819 pp->port_pid = curproc->p_pid; 820 821 /* Allocate cache skeleton for PORT_SOURCE_FD events */ 822 portq->portq_pcp = kmem_zalloc(sizeof (port_fdcache_t), KM_SLEEP); 823 mutex_init(&portq->portq_pcp->pc_lock, NULL, MUTEX_DEFAULT, NULL); 824 825 /* 826 * Allocate cache skeleton for association of event sources. 827 */ 828 mutex_init(&portq->portq_source_mutex, NULL, MUTEX_DEFAULT, NULL); 829 portq->portq_scache = kmem_zalloc( 830 PORT_SCACHE_SIZE * sizeof (port_source_t *), KM_SLEEP); 831 832 /* 833 * pre-associate some kernel sources with this port. 834 * The pre-association is required to create port_source_t 835 * structures for object association. 836 * Some sources can not get associated with a port before the first 837 * object association is requested. Another reason to pre_associate 838 * a particular source with a port is because of performance. 839 */ 840 841 for (pks = port_ksource_tab; pks->pks_source != 0; pks++) 842 port_add_ksource_local(pp, pks); 843 } 844 845 /* 846 * The port_add_ksource_local() function is being used to associate 847 * event sources with every new port. 848 * The event sources need to be added to port_ksource_tab[]. 849 */ 850 static void 851 port_add_ksource_local(port_t *pp, port_ksource_t *pks) 852 { 853 port_source_t *pse; 854 port_source_t **ps; 855 856 mutex_enter(&pp->port_queue.portq_source_mutex); 857 ps = &pp->port_queue.portq_scache[PORT_SHASH(pks->pks_source)]; 858 for (pse = *ps; pse != NULL; pse = pse->portsrc_next) { 859 if (pse->portsrc_source == pks->pks_source) 860 break; 861 } 862 863 if (pse == NULL) { 864 /* associate new source with the port */ 865 pse = kmem_zalloc(sizeof (port_source_t), KM_SLEEP); 866 pse->portsrc_source = pks->pks_source; 867 pse->portsrc_close = pks->pks_close; 868 pse->portsrc_closearg = pks->pks_closearg; 869 pse->portsrc_cnt = 1; 870 871 pks->pks_portsrc = pse; 872 if (*ps != NULL) 873 pse->portsrc_next = (*ps)->portsrc_next; 874 *ps = pse; 875 } 876 mutex_exit(&pp->port_queue.portq_source_mutex); 877 } 878 879 /* 880 * The port_send() function sends an event of type "source" to a 881 * port. This function is non-blocking. An event can be sent to 882 * a port as long as the number of events per port does not achieve the 883 * maximal allowed number of events. The max. number of events per port is 884 * defined by the resource control process.max-port-events. 885 * This function is used by the port library function port_send() 886 * and port_dispatch(). The port_send(3c) function is part of the 887 * event ports API and submits events of type PORT_SOURCE_USER. The 888 * port_dispatch() function is project private and it is used by library 889 * functions to submit events of other types than PORT_SOURCE_USER 890 * (e.g. PORT_SOURCE_AIO). 891 */ 892 static int 893 port_send(port_t *pp, int source, int events, void *user) 894 { 895 port_kevent_t *pev; 896 int error; 897 898 error = port_alloc_event_local(pp, source, PORT_ALLOC_DEFAULT, &pev); 899 if (error) 900 return (error); 901 902 pev->portkev_object = 0; 903 pev->portkev_events = events; 904 pev->portkev_user = user; 905 pev->portkev_callback = NULL; 906 pev->portkev_arg = NULL; 907 pev->portkev_flags = 0; 908 909 port_send_event(pev); 910 return (0); 911 } 912 913 /* 914 * The port_noshare() function returns 0 if the current event was generated 915 * by the same process. Otherwise is returns a value other than 0 and the 916 * event should not be delivered to the current processe. 917 * The port_noshare() function is normally used by the port_dispatch() 918 * function. The port_dispatch() function is project private and can only be 919 * used within the event port project. 920 * Currently the libaio uses the port_dispatch() function to deliver events 921 * of types PORT_SOURCE_AIO. 922 */ 923 /* ARGSUSED */ 924 static int 925 port_noshare(void *arg, int *events, pid_t pid, int flag, void *evp) 926 { 927 if (flag == PORT_CALLBACK_DEFAULT && curproc->p_pid != pid) 928 return (1); 929 return (0); 930 } 931 932 /* 933 * The port_dispatch_event() function is project private and it is used by 934 * libraries involved in the project to deliver events to the port. 935 * port_dispatch will sleep and wait for enough resources to satisfy the 936 * request, if necessary. 937 * The library can specify if the delivered event is shareable with other 938 * processes (see PORT_SYS_NOSHARE flag). 939 */ 940 static int 941 port_dispatch_event(port_t *pp, int opcode, int source, int events, 942 uintptr_t object, void *user) 943 { 944 port_kevent_t *pev; 945 int error; 946 947 error = port_alloc_event_block(pp, source, PORT_ALLOC_DEFAULT, &pev); 948 if (error) 949 return (error); 950 951 pev->portkev_object = object; 952 pev->portkev_events = events; 953 pev->portkev_user = user; 954 pev->portkev_arg = NULL; 955 if (opcode & PORT_SYS_NOSHARE) { 956 pev->portkev_flags = PORT_KEV_NOSHARE; 957 pev->portkev_callback = port_noshare; 958 } else { 959 pev->portkev_flags = 0; 960 pev->portkev_callback = NULL; 961 } 962 963 port_send_event(pev); 964 return (0); 965 } 966 967 968 /* 969 * The port_sendn() function is the kernel implementation of the event 970 * port API function port_sendn(3c). 971 * This function is able to send an event to a list of event ports. 972 */ 973 static int 974 port_sendn(int ports[], int errors[], uint_t nent, int events, void *user, 975 uint_t *nget) 976 { 977 port_kevent_t *pev; 978 int errorcnt = 0; 979 int error = 0; 980 int count; 981 int port; 982 int *plist; 983 int *elist = NULL; 984 file_t *fp; 985 port_t *pp; 986 987 if (nent == 0 || nent > port_max_list) 988 return (EINVAL); 989 990 plist = kmem_alloc(nent * sizeof (int), KM_SLEEP); 991 if (copyin((void *)ports, plist, nent * sizeof (int))) { 992 kmem_free(plist, nent * sizeof (int)); 993 return (EFAULT); 994 } 995 996 /* 997 * Scan the list for event port file descriptors and send the 998 * attached user event data embedded in a event of type 999 * PORT_SOURCE_USER to every event port in the list. 1000 * If a list entry is not a valid event port then the corresponding 1001 * error code will be stored in the errors[] list with the same 1002 * list offset as in the ports[] list. 1003 */ 1004 1005 for (count = 0; count < nent; count++) { 1006 port = plist[count]; 1007 if ((fp = getf(port)) == NULL) { 1008 elist = port_errorn(elist, nent, EBADF, count); 1009 errorcnt++; 1010 continue; 1011 } 1012 1013 pp = VTOEP(fp->f_vnode); 1014 if (fp->f_vnode->v_type != VPORT) { 1015 releasef(port); 1016 elist = port_errorn(elist, nent, EBADFD, count); 1017 errorcnt++; 1018 continue; 1019 } 1020 1021 error = port_alloc_event_local(pp, PORT_SOURCE_USER, 1022 PORT_ALLOC_DEFAULT, &pev); 1023 if (error) { 1024 releasef(port); 1025 elist = port_errorn(elist, nent, error, count); 1026 errorcnt++; 1027 continue; 1028 } 1029 1030 pev->portkev_object = 0; 1031 pev->portkev_events = events; 1032 pev->portkev_user = user; 1033 pev->portkev_callback = NULL; 1034 pev->portkev_arg = NULL; 1035 pev->portkev_flags = 0; 1036 1037 port_send_event(pev); 1038 releasef(port); 1039 } 1040 if (errorcnt) { 1041 error = EIO; 1042 if (copyout(elist, (void *)errors, nent * sizeof (int))) 1043 error = EFAULT; 1044 kmem_free(elist, nent * sizeof (int)); 1045 } 1046 *nget = nent - errorcnt; 1047 kmem_free(plist, nent * sizeof (int)); 1048 return (error); 1049 } 1050 1051 static int * 1052 port_errorn(int *elist, int nent, int error, int index) 1053 { 1054 if (elist == NULL) 1055 elist = kmem_zalloc(nent * sizeof (int), KM_SLEEP); 1056 elist[index] = error; 1057 return (elist); 1058 } 1059 1060 /* 1061 * port_alert() 1062 * The port_alert() funcion is a high priority event and it is always set 1063 * on top of the queue. It is also delivered as single event. 1064 * flags: 1065 * - SET :overwrite current alert data 1066 * - UPDATE:set alert data or return EBUSY if alert mode is already set 1067 * 1068 * - set the ALERT flag 1069 * - wakeup all sleeping threads 1070 */ 1071 static int 1072 port_alert(port_t *pp, int flags, int events, void *user) 1073 { 1074 port_queue_t *portq; 1075 portget_t *pgetp; 1076 port_alert_t *pa; 1077 1078 if ((flags & PORT_ALERT_INVALID) == PORT_ALERT_INVALID) 1079 return (EINVAL); 1080 1081 portq = &pp->port_queue; 1082 pa = &portq->portq_alert; 1083 mutex_enter(&portq->portq_mutex); 1084 1085 /* check alert conditions */ 1086 if (flags == PORT_ALERT_UPDATE) { 1087 if (portq->portq_flags & PORTQ_ALERT) { 1088 mutex_exit(&portq->portq_mutex); 1089 return (EBUSY); 1090 } 1091 } 1092 1093 /* 1094 * Store alert data in the port to be delivered to threads 1095 * which are using port_get(n) to retrieve events. 1096 */ 1097 1098 portq->portq_flags |= PORTQ_ALERT; 1099 pa->portal_events = events; /* alert info */ 1100 pa->portal_pid = curproc->p_pid; /* process owner */ 1101 pa->portal_object = 0; /* no object */ 1102 pa->portal_user = user; /* user alert data */ 1103 1104 /* alert and deliver alert data to waiting threads */ 1105 pgetp = portq->portq_thread; 1106 if (pgetp == NULL) { 1107 /* no threads waiting for events */ 1108 mutex_exit(&portq->portq_mutex); 1109 return (0); 1110 } 1111 1112 /* 1113 * Set waiting threads in alert mode (PORTGET_ALERT).. 1114 * Every thread waiting for events already allocated a portget_t 1115 * structure to sleep on. 1116 * The port alert arguments are stored in the portget_t structure. 1117 * The PORTGET_ALERT flag is set to indicate the thread to return 1118 * immediately with the alert event. 1119 */ 1120 do { 1121 if ((pgetp->portget_state & PORTGET_ALERT) == 0) { 1122 pa = &pgetp->portget_alert; 1123 pa->portal_events = events; 1124 pa->portal_object = 0; 1125 pa->portal_user = user; 1126 pgetp->portget_state |= PORTGET_ALERT; 1127 cv_signal(&pgetp->portget_cv); 1128 } 1129 } while ((pgetp = pgetp->portget_next) != portq->portq_thread); 1130 mutex_exit(&portq->portq_mutex); 1131 return (0); 1132 } 1133 1134 /* 1135 * Clear alert state of the port 1136 */ 1137 static void 1138 port_remove_alert(port_queue_t *portq) 1139 { 1140 mutex_enter(&portq->portq_mutex); 1141 portq->portq_flags &= ~PORTQ_ALERT; 1142 mutex_exit(&portq->portq_mutex); 1143 } 1144 1145 /* 1146 * The port_getn() function is used to retrieve events from a port. 1147 * 1148 * The port_getn() function returns immediately if there are enough events 1149 * available in the port to satisfy the request or if the port is in alert 1150 * mode (see port_alert(3c)). 1151 * The timeout argument of port_getn(3c) -which is embedded in the 1152 * port_gettimer_t structure- specifies if the system call should block or if it 1153 * should return immediately depending on the number of events available. 1154 * This function is internally used by port_getn(3c) as well as by 1155 * port_get(3c). 1156 */ 1157 static int 1158 port_getn(port_t *pp, port_event_t *uevp, uint_t max, uint_t *nget, 1159 port_gettimer_t *pgt) 1160 { 1161 port_queue_t *portq; 1162 port_kevent_t *pev; 1163 port_kevent_t *lev; 1164 int error = 0; 1165 uint_t nmax; 1166 uint_t nevents; 1167 uint_t eventsz; 1168 port_event_t *kevp; 1169 list_t *glist; 1170 uint_t tnent; 1171 int rval; 1172 int blocking = -1; 1173 int timecheck; 1174 int flag; 1175 timespec_t rqtime; 1176 timespec_t *rqtp = NULL; 1177 portget_t *pgetp; 1178 void *results; 1179 model_t model = get_udatamodel(); 1180 1181 flag = pgt->pgt_flags; 1182 1183 if (*nget > max && max > 0) 1184 return (EINVAL); 1185 1186 portq = &pp->port_queue; 1187 mutex_enter(&portq->portq_mutex); 1188 if (max == 0) { 1189 /* 1190 * Return number of objects with events. 1191 * The port_block() call is required to synchronize this 1192 * thread with another possible thread, which could be 1193 * retrieving events from the port queue. 1194 */ 1195 port_block(portq); 1196 /* 1197 * Check if a second thread is currently retrieving events 1198 * and it is using the temporary event queue. 1199 */ 1200 if (portq->portq_tnent) { 1201 /* put remaining events back to the port queue */ 1202 port_push_eventq(portq); 1203 } 1204 *nget = portq->portq_nent; 1205 port_unblock(portq); 1206 mutex_exit(&portq->portq_mutex); 1207 return (0); 1208 } 1209 1210 if (uevp == NULL) { 1211 mutex_exit(&portq->portq_mutex); 1212 return (EFAULT); 1213 } 1214 if (*nget == 0) { /* no events required */ 1215 mutex_exit(&portq->portq_mutex); 1216 return (0); 1217 } 1218 1219 /* port is being closed ... */ 1220 if (portq->portq_flags & PORTQ_CLOSE) { 1221 mutex_exit(&portq->portq_mutex); 1222 return (EBADFD); 1223 } 1224 1225 /* return immediately if port in alert mode */ 1226 if (portq->portq_flags & PORTQ_ALERT) { 1227 error = port_get_alert(&portq->portq_alert, uevp); 1228 if (error == 0) 1229 *nget = 1; 1230 mutex_exit(&portq->portq_mutex); 1231 return (error); 1232 } 1233 1234 portq->portq_thrcnt++; 1235 1236 /* 1237 * Now check if the completed events satisfy the 1238 * "wait" requirements of the current thread: 1239 */ 1240 1241 if (pgt->pgt_loop) { 1242 /* 1243 * loop entry of same thread 1244 * pgt_loop is set when the current thread returns 1245 * prematurely from this function. That could happen 1246 * when a port is being shared between processes and 1247 * this thread could not find events to return. 1248 * It is not allowed to a thread to retrieve non-shareable 1249 * events generated in other processes. 1250 * PORTQ_WAIT_EVENTS is set when a thread already 1251 * checked the current event queue and no new events 1252 * are added to the queue. 1253 */ 1254 if (((portq->portq_flags & PORTQ_WAIT_EVENTS) == 0) && 1255 (portq->portq_nent >= *nget)) { 1256 /* some new events arrived ...check them */ 1257 goto portnowait; 1258 } 1259 rqtp = pgt->pgt_rqtp; 1260 timecheck = pgt->pgt_timecheck; 1261 pgt->pgt_flags |= PORTGET_WAIT_EVENTS; 1262 } else { 1263 /* check if enough events are available ... */ 1264 if (portq->portq_nent >= *nget) 1265 goto portnowait; 1266 /* 1267 * There are not enough events available to satisfy 1268 * the request, check timeout value and wait for 1269 * incoming events. 1270 */ 1271 error = port_get_timeout(pgt->pgt_timeout, &rqtime, &rqtp, 1272 &blocking, flag); 1273 if (error) { 1274 port_check_return_cond(portq); 1275 mutex_exit(&portq->portq_mutex); 1276 return (error); 1277 } 1278 1279 if (blocking == 0) /* don't block, check fired events */ 1280 goto portnowait; 1281 1282 if (rqtp != NULL) { 1283 timespec_t now; 1284 timecheck = timechanged; 1285 gethrestime(&now); 1286 timespecadd(rqtp, &now); 1287 } 1288 } 1289 1290 /* enqueue thread in the list of waiting threads */ 1291 pgetp = port_queue_thread(portq, *nget); 1292 1293 1294 /* Wait here until return conditions met */ 1295 for (;;) { 1296 if (pgetp->portget_state & PORTGET_ALERT) { 1297 /* reap alert event and return */ 1298 error = port_get_alert(&pgetp->portget_alert, uevp); 1299 if (error) 1300 *nget = 0; 1301 else 1302 *nget = 1; 1303 port_dequeue_thread(&pp->port_queue, pgetp); 1304 portq->portq_thrcnt--; 1305 mutex_exit(&portq->portq_mutex); 1306 return (error); 1307 } 1308 1309 /* 1310 * Check if some other thread is already retrieving 1311 * events (portq_getn > 0). 1312 */ 1313 1314 if ((portq->portq_getn == 0) && 1315 ((portq)->portq_nent >= *nget) && 1316 (!((pgt)->pgt_flags & PORTGET_WAIT_EVENTS) || 1317 !((portq)->portq_flags & PORTQ_WAIT_EVENTS))) 1318 break; 1319 1320 if (portq->portq_flags & PORTQ_CLOSE) { 1321 error = EBADFD; 1322 break; 1323 } 1324 1325 rval = cv_waituntil_sig(&pgetp->portget_cv, &portq->portq_mutex, 1326 rqtp, timecheck); 1327 1328 if (rval <= 0) { 1329 error = (rval == 0) ? EINTR : ETIME; 1330 break; 1331 } 1332 } 1333 1334 /* take thread out of the wait queue */ 1335 port_dequeue_thread(portq, pgetp); 1336 1337 if (error != 0 && (error == EINTR || error == EBADFD || 1338 (error == ETIME && flag))) { 1339 /* return without events */ 1340 port_check_return_cond(portq); 1341 mutex_exit(&portq->portq_mutex); 1342 return (error); 1343 } 1344 1345 portnowait: 1346 /* 1347 * Move port event queue to a temporary event queue . 1348 * New incoming events will be continue be posted to the event queue 1349 * and they will not be considered by the current thread. 1350 * The idea is to avoid lock contentions or an often locking/unlocking 1351 * of the port queue mutex. The contention and performance degradation 1352 * could happen because: 1353 * a) incoming events use the port queue mutex to enqueue new events and 1354 * b) before the event can be delivered to the application it is 1355 * necessary to notify the event sources about the event delivery. 1356 * Sometimes the event sources can require a long time to return and 1357 * the queue mutex would block incoming events. 1358 * During this time incoming events (port_send_event()) do not need 1359 * to awake threads waiting for events. Before the current thread 1360 * returns it will check the conditions to awake other waiting threads. 1361 */ 1362 portq->portq_getn++; /* number of threads retrieving events */ 1363 port_block(portq); /* block other threads here */ 1364 nmax = max < portq->portq_nent ? max : portq->portq_nent; 1365 1366 if (portq->portq_tnent) { 1367 /* 1368 * Move remaining events from previous thread back to the 1369 * port event queue. 1370 */ 1371 port_push_eventq(portq); 1372 } 1373 /* move port event queue to a temporary queue */ 1374 list_move_tail(&portq->portq_get_list, &portq->portq_list); 1375 glist = &portq->portq_get_list; /* use temporary event queue */ 1376 tnent = portq->portq_nent; /* get current number of events */ 1377 portq->portq_nent = 0; /* no events in the port event queue */ 1378 portq->portq_flags |= PORTQ_WAIT_EVENTS; /* detect incoming events */ 1379 mutex_exit(&portq->portq_mutex); /* event queue can be reused now */ 1380 1381 if (model == DATAMODEL_NATIVE) { 1382 eventsz = sizeof (port_event_t); 1383 kevp = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1384 if (kevp == NULL) { 1385 if (nmax > pp->port_max_list) 1386 nmax = pp->port_max_list; 1387 kevp = kmem_alloc(eventsz * nmax, KM_SLEEP); 1388 } 1389 results = kevp; 1390 lev = NULL; /* start with first event in the queue */ 1391 for (nevents = 0; nevents < nmax; ) { 1392 pev = port_get_kevent(glist, lev); 1393 if (pev == NULL) /* no more events available */ 1394 break; 1395 if (pev->portkev_flags & PORT_KEV_FREE) { 1396 /* Just discard event */ 1397 list_remove(glist, pev); 1398 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1399 if (PORT_FREE_EVENT(pev)) 1400 port_free_event_local(pev, 0); 1401 tnent--; 1402 continue; 1403 } 1404 1405 /* move event data to copyout list */ 1406 if (port_copy_event(&kevp[nevents], pev, glist)) { 1407 /* 1408 * Event can not be delivered to the 1409 * current process. 1410 */ 1411 if (lev != NULL) 1412 list_insert_after(glist, lev, pev); 1413 else 1414 list_insert_head(glist, pev); 1415 lev = pev; /* last checked event */ 1416 } else { 1417 nevents++; /* # of events ready */ 1418 } 1419 } 1420 #ifdef _SYSCALL32_IMPL 1421 } else { 1422 port_event32_t *kevp32; 1423 1424 eventsz = sizeof (port_event32_t); 1425 kevp32 = kmem_alloc(eventsz * nmax, KM_NOSLEEP); 1426 if (kevp32 == NULL) { 1427 if (nmax > pp->port_max_list) 1428 nmax = pp->port_max_list; 1429 kevp32 = kmem_alloc(eventsz * nmax, KM_SLEEP); 1430 } 1431 results = kevp32; 1432 lev = NULL; /* start with first event in the queue */ 1433 for (nevents = 0; nevents < nmax; ) { 1434 pev = port_get_kevent(glist, lev); 1435 if (pev == NULL) /* no more events available */ 1436 break; 1437 if (pev->portkev_flags & PORT_KEV_FREE) { 1438 /* Just discard event */ 1439 list_remove(glist, pev); 1440 pev->portkev_flags &= ~(PORT_CLEANUP_DONE); 1441 if (PORT_FREE_EVENT(pev)) 1442 port_free_event_local(pev, 0); 1443 tnent--; 1444 continue; 1445 } 1446 1447 /* move event data to copyout list */ 1448 if (port_copy_event32(&kevp32[nevents], pev, glist)) { 1449 /* 1450 * Event can not be delivered to the 1451 * current process. 1452 */ 1453 if (lev != NULL) 1454 list_insert_after(glist, lev, pev); 1455 else 1456 list_insert_head(glist, pev); 1457 lev = pev; /* last checked event */ 1458 } else { 1459 nevents++; /* # of events ready */ 1460 } 1461 } 1462 #endif /* _SYSCALL32_IMPL */ 1463 } 1464 1465 /* 1466 * Remember number of remaining events in the temporary event queue. 1467 */ 1468 portq->portq_tnent = tnent - nevents; 1469 1470 /* 1471 * Work to do before return : 1472 * - push list of remaining events back to the top of the standard 1473 * port queue. 1474 * - if this is the last thread calling port_get(n) then wakeup the 1475 * thread waiting on close(2). 1476 * - check for a deferred cv_signal from port_send_event() and wakeup 1477 * the sleeping thread. 1478 */ 1479 1480 mutex_enter(&portq->portq_mutex); 1481 port_unblock(portq); 1482 if (portq->portq_tnent) { 1483 /* 1484 * move remaining events in the temporary event queue back 1485 * to the port event queue 1486 */ 1487 port_push_eventq(portq); 1488 } 1489 portq->portq_getn--; /* update # of threads retrieving events */ 1490 if (--portq->portq_thrcnt == 0) { /* # of threads waiting ... */ 1491 /* Last thread => check close(2) conditions ... */ 1492 if (portq->portq_flags & PORTQ_CLOSE) { 1493 cv_signal(&portq->portq_closecv); 1494 mutex_exit(&portq->portq_mutex); 1495 kmem_free(results, eventsz * nmax); 1496 /* do not copyout events */ 1497 *nget = 0; 1498 return (EBADFD); 1499 } 1500 } else if (portq->portq_getn == 0) { 1501 /* 1502 * no other threads retrieving events ... 1503 * check wakeup conditions of sleeping threads 1504 */ 1505 if ((portq->portq_thread != NULL) && 1506 (portq->portq_nent >= portq->portq_nget)) 1507 cv_signal(&portq->portq_thread->portget_cv); 1508 } 1509 1510 /* 1511 * Check PORTQ_POLLIN here because the current thread set temporarily 1512 * the number of events in the queue to zero. 1513 */ 1514 if (portq->portq_flags & PORTQ_POLLIN) { 1515 portq->portq_flags &= ~PORTQ_POLLIN; 1516 mutex_exit(&portq->portq_mutex); 1517 pollwakeup(&pp->port_pollhd, POLLIN); 1518 } else { 1519 mutex_exit(&portq->portq_mutex); 1520 } 1521 1522 /* now copyout list of user event structures to user space */ 1523 if (nevents) { 1524 if (copyout(results, uevp, nevents * eventsz)) 1525 error = EFAULT; 1526 } 1527 kmem_free(results, eventsz * nmax); 1528 1529 if (nevents == 0 && error == 0 && pgt->pgt_loop == 0 && blocking != 0) { 1530 /* no events retrieved: check loop conditions */ 1531 if (blocking == -1) { 1532 /* no timeout checked */ 1533 error = port_get_timeout(pgt->pgt_timeout, 1534 &pgt->pgt_rqtime, &rqtp, &blocking, flag); 1535 if (error) { 1536 *nget = nevents; 1537 return (error); 1538 } 1539 if (rqtp != NULL) { 1540 timespec_t now; 1541 pgt->pgt_timecheck = timechanged; 1542 gethrestime(&now); 1543 timespecadd(&pgt->pgt_rqtime, &now); 1544 } 1545 pgt->pgt_rqtp = rqtp; 1546 } else { 1547 /* timeout already checked -> remember values */ 1548 pgt->pgt_rqtp = rqtp; 1549 if (rqtp != NULL) { 1550 pgt->pgt_timecheck = timecheck; 1551 pgt->pgt_rqtime = *rqtp; 1552 } 1553 } 1554 if (blocking) 1555 /* timeout remaining */ 1556 pgt->pgt_loop = 1; 1557 } 1558 1559 /* set number of user event structures completed */ 1560 *nget = nevents; 1561 return (error); 1562 } 1563 1564 /* 1565 * 1. copy kernel event structure to user event structure. 1566 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1567 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1568 * 4. Other types of event structures can be delivered back to the port cache 1569 * (port_free_event_local()). 1570 * 5. The event source callback function is the last opportunity for the 1571 * event source to update events, to free local resources associated with 1572 * the event or to deny the delivery of the event. 1573 */ 1574 static int 1575 port_copy_event(port_event_t *puevp, port_kevent_t *pkevp, list_t *list) 1576 { 1577 int free_event = 0; 1578 int flags; 1579 int error; 1580 1581 puevp->portev_source = pkevp->portkev_source; 1582 puevp->portev_object = pkevp->portkev_object; 1583 puevp->portev_user = pkevp->portkev_user; 1584 puevp->portev_events = pkevp->portkev_events; 1585 1586 /* remove event from the queue */ 1587 list_remove(list, pkevp); 1588 1589 /* 1590 * Events of type PORT_KEV_WIRED remain allocated by the 1591 * event source. 1592 */ 1593 flags = pkevp->portkev_flags; 1594 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1595 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1596 else 1597 free_event = 1; 1598 1599 if (pkevp->portkev_callback) { 1600 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1601 &puevp->portev_events, pkevp->portkev_pid, 1602 PORT_CALLBACK_DEFAULT, pkevp); 1603 1604 if (error) { 1605 /* 1606 * Event can not be delivered. 1607 * Caller must reinsert the event into the queue. 1608 */ 1609 pkevp->portkev_flags = flags; 1610 return (error); 1611 } 1612 } 1613 if (free_event) 1614 port_free_event_local(pkevp, 0); 1615 return (0); 1616 } 1617 1618 #ifdef _SYSCALL32_IMPL 1619 /* 1620 * 1. copy kernel event structure to user event structure. 1621 * 2. PORT_KEV_WIRED event structures will be reused by the "source" 1622 * 3. Remove PORT_KEV_DONEQ flag (event removed from the event queue) 1623 * 4. Other types of event structures can be delivered back to the port cache 1624 * (port_free_event_local()). 1625 * 5. The event source callback function is the last opportunity for the 1626 * event source to update events, to free local resources associated with 1627 * the event or to deny the delivery of the event. 1628 */ 1629 static int 1630 port_copy_event32(port_event32_t *puevp, port_kevent_t *pkevp, list_t *list) 1631 { 1632 int free_event = 0; 1633 int error; 1634 int flags; 1635 1636 puevp->portev_source = pkevp->portkev_source; 1637 puevp->portev_object = (daddr32_t)pkevp->portkev_object; 1638 puevp->portev_user = (caddr32_t)(uintptr_t)pkevp->portkev_user; 1639 puevp->portev_events = pkevp->portkev_events; 1640 1641 /* remove event from the queue */ 1642 list_remove(list, pkevp); 1643 1644 /* 1645 * Events if type PORT_KEV_WIRED remain allocated by the 1646 * sub-system (source). 1647 */ 1648 1649 flags = pkevp->portkev_flags; 1650 if (pkevp->portkev_flags & PORT_KEV_WIRED) 1651 pkevp->portkev_flags &= ~PORT_KEV_DONEQ; 1652 else 1653 free_event = 1; 1654 1655 if (pkevp->portkev_callback != NULL) { 1656 error = (*pkevp->portkev_callback)(pkevp->portkev_arg, 1657 &puevp->portev_events, pkevp->portkev_pid, 1658 PORT_CALLBACK_DEFAULT, pkevp); 1659 if (error) { 1660 /* 1661 * Event can not be delivered. 1662 * Caller must reinsert the event into the queue. 1663 */ 1664 pkevp->portkev_flags = flags; 1665 return (error); 1666 } 1667 } 1668 if (free_event) 1669 port_free_event_local(pkevp, 0); 1670 return (0); 1671 } 1672 #endif /* _SYSCALL32_IMPL */ 1673 1674 /* 1675 * copyout alert event. 1676 */ 1677 static int 1678 port_get_alert(port_alert_t *pa, port_event_t *uevp) 1679 { 1680 model_t model = get_udatamodel(); 1681 1682 /* copyout alert event structures to user space */ 1683 if (model == DATAMODEL_NATIVE) { 1684 port_event_t uev; 1685 uev.portev_source = PORT_SOURCE_ALERT; 1686 uev.portev_object = pa->portal_object; 1687 uev.portev_events = pa->portal_events; 1688 uev.portev_user = pa->portal_user; 1689 if (copyout(&uev, uevp, sizeof (port_event_t))) 1690 return (EFAULT); 1691 #ifdef _SYSCALL32_IMPL 1692 } else { 1693 port_event32_t uev32; 1694 uev32.portev_source = PORT_SOURCE_ALERT; 1695 uev32.portev_object = (daddr32_t)pa->portal_object; 1696 uev32.portev_events = pa->portal_events; 1697 uev32.portev_user = (daddr32_t)(uintptr_t)pa->portal_user; 1698 if (copyout(&uev32, uevp, sizeof (port_event32_t))) 1699 return (EFAULT); 1700 #endif /* _SYSCALL32_IMPL */ 1701 } 1702 return (0); 1703 } 1704 1705 /* 1706 * Check return conditions : 1707 * - pending port close(2) 1708 * - threads waiting for events 1709 */ 1710 static void 1711 port_check_return_cond(port_queue_t *portq) 1712 { 1713 ASSERT(MUTEX_HELD(&portq->portq_mutex)); 1714 portq->portq_thrcnt--; 1715 if (portq->portq_flags & PORTQ_CLOSE) { 1716 if (portq->portq_thrcnt == 0) 1717 cv_signal(&portq->portq_closecv); 1718 else 1719 cv_signal(&portq->portq_thread->portget_cv); 1720 } 1721 } 1722 1723 /* 1724 * The port_get_kevent() function returns 1725 * - the event located at the head of the queue if 'last' pointer is NULL 1726 * - the next event after the event pointed by 'last' 1727 * The caller of this function is responsible for the integrity of the queue 1728 * in use: 1729 * - port_getn() is using a temporary queue protected with port_block(). 1730 * - port_close_events() is working on the global event queue and protects 1731 * the queue with portq->portq_mutex. 1732 */ 1733 port_kevent_t * 1734 port_get_kevent(list_t *list, port_kevent_t *last) 1735 { 1736 if (last == NULL) 1737 return (list_head(list)); 1738 else 1739 return (list_next(list, last)); 1740 } 1741 1742 /* 1743 * The port_get_timeout() function gets the timeout data from user space 1744 * and converts that info into a corresponding internal representation. 1745 * The kerneldata flag means that the timeout data is already loaded. 1746 */ 1747 static int 1748 port_get_timeout(timespec_t *timeout, timespec_t *rqtime, timespec_t **rqtp, 1749 int *blocking, int kerneldata) 1750 { 1751 model_t model = get_udatamodel(); 1752 1753 *rqtp = NULL; 1754 if (timeout == NULL) { 1755 *blocking = 1; 1756 return (0); 1757 } 1758 1759 if (kerneldata) { 1760 *rqtime = *timeout; 1761 } else { 1762 if (model == DATAMODEL_NATIVE) { 1763 if (copyin(timeout, rqtime, sizeof (*rqtime))) 1764 return (EFAULT); 1765 #ifdef _SYSCALL32_IMPL 1766 } else { 1767 timespec32_t wait_time_32; 1768 if (copyin(timeout, &wait_time_32, 1769 sizeof (wait_time_32))) 1770 return (EFAULT); 1771 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32); 1772 #endif /* _SYSCALL32_IMPL */ 1773 } 1774 } 1775 1776 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) { 1777 *blocking = 0; 1778 return (0); 1779 } 1780 1781 if (rqtime->tv_sec < 0 || 1782 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC) 1783 return (EINVAL); 1784 1785 *rqtp = rqtime; 1786 *blocking = 1; 1787 return (0); 1788 } 1789 1790 /* 1791 * port_queue_thread() 1792 * Threads requiring more events than available will be put in a wait queue. 1793 * There is a "thread wait queue" per port. 1794 * Threads requiring less events get a higher priority than others and they 1795 * will be awoken first. 1796 */ 1797 static portget_t * 1798 port_queue_thread(port_queue_t *portq, uint_t nget) 1799 { 1800 portget_t *pgetp; 1801 portget_t *ttp; 1802 portget_t *htp; 1803 1804 pgetp = kmem_zalloc(sizeof (portget_t), KM_SLEEP); 1805 pgetp->portget_nget = nget; 1806 pgetp->portget_pid = curproc->p_pid; 1807 if (portq->portq_thread == NULL) { 1808 /* first waiting thread */ 1809 portq->portq_thread = pgetp; 1810 portq->portq_nget = nget; 1811 pgetp->portget_prev = pgetp; 1812 pgetp->portget_next = pgetp; 1813 return (pgetp); 1814 } 1815 1816 /* 1817 * thread waiting for less events will be set on top of the queue. 1818 */ 1819 ttp = portq->portq_thread; 1820 htp = ttp; 1821 for (;;) { 1822 if (nget <= ttp->portget_nget) 1823 break; 1824 if (htp == ttp->portget_next) 1825 break; /* last event */ 1826 ttp = ttp->portget_next; 1827 } 1828 1829 /* add thread to the queue */ 1830 pgetp->portget_next = ttp; 1831 pgetp->portget_prev = ttp->portget_prev; 1832 ttp->portget_prev->portget_next = pgetp; 1833 ttp->portget_prev = pgetp; 1834 if (portq->portq_thread == ttp) 1835 portq->portq_thread = pgetp; 1836 portq->portq_nget = portq->portq_thread->portget_nget; 1837 return (pgetp); 1838 } 1839 1840 /* 1841 * Take thread out of the queue. 1842 */ 1843 static void 1844 port_dequeue_thread(port_queue_t *portq, portget_t *pgetp) 1845 { 1846 if (pgetp->portget_next == pgetp) { 1847 /* last (single) waiting thread */ 1848 portq->portq_thread = NULL; 1849 portq->portq_nget = 0; 1850 } else { 1851 pgetp->portget_prev->portget_next = pgetp->portget_next; 1852 pgetp->portget_next->portget_prev = pgetp->portget_prev; 1853 if (portq->portq_thread == pgetp) 1854 portq->portq_thread = pgetp->portget_next; 1855 portq->portq_nget = portq->portq_thread->portget_nget; 1856 } 1857 kmem_free(pgetp, sizeof (portget_t)); 1858 } 1859 1860 /* 1861 * Set up event port kstats. 1862 */ 1863 static void 1864 port_kstat_init() 1865 { 1866 kstat_t *ksp; 1867 uint_t ndata; 1868 1869 ndata = sizeof (port_kstat) / sizeof (kstat_named_t); 1870 ksp = kstat_create("portfs", 0, "Event Ports", "misc", 1871 KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_VIRTUAL); 1872 if (ksp) { 1873 ksp->ks_data = &port_kstat; 1874 kstat_install(ksp); 1875 } 1876 }