Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/msg.c
+++ new/usr/src/uts/common/os/msg.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 25
26 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 27 /* All Rights Reserved */
28 28
29 29
30 30 /*
31 31 * Inter-Process Communication Message Facility.
32 32 *
33 33 * See os/ipc.c for a description of common IPC functionality.
34 34 *
35 35 * Resource controls
36 36 * -----------------
37 37 *
38 38 * Control: zone.max-msg-ids (rc_zone_msgmni)
39 39 * Description: Maximum number of message queue ids allowed a zone.
40 40 *
41 41 * When msgget() is used to allocate a message queue, one id is
42 42 * allocated. If the id allocation doesn't succeed, msgget() fails
43 43 * and errno is set to ENOSPC. Upon successful msgctl(, IPC_RMID)
44 44 * the id is deallocated.
45 45 *
46 46 * Control: project.max-msg-ids (rc_project_msgmni)
47 47 * Description: Maximum number of message queue ids allowed a project.
48 48 *
49 49 * When msgget() is used to allocate a message queue, one id is
50 50 * allocated. If the id allocation doesn't succeed, msgget() fails
51 51 * and errno is set to ENOSPC. Upon successful msgctl(, IPC_RMID)
52 52 * the id is deallocated.
53 53 *
54 54 * Control: process.max-msg-qbytes (rc_process_msgmnb)
55 55 * Description: Maximum number of bytes of messages on a message queue.
56 56 *
57 57 * When msgget() successfully allocates a message queue, the minimum
58 58 * enforced value of this limit is used to initialize msg_qbytes.
59 59 *
60 60 * Control: process.max-msg-messages (rc_process_msgtql)
61 61 * Description: Maximum number of messages on a message queue.
62 62 *
63 63 * When msgget() successfully allocates a message queue, the minimum
64 64 * enforced value of this limit is used to initialize a per-queue
65 65 * limit on the number of messages.
66 66 */
67 67
68 68 #include <sys/types.h>
69 69 #include <sys/t_lock.h>
70 70 #include <sys/param.h>
71 71 #include <sys/cred.h>
72 72 #include <sys/user.h>
73 73 #include <sys/proc.h>
74 74 #include <sys/time.h>
75 75 #include <sys/ipc.h>
76 76 #include <sys/ipc_impl.h>
77 77 #include <sys/msg.h>
78 78 #include <sys/msg_impl.h>
79 79 #include <sys/list.h>
80 80 #include <sys/systm.h>
81 81 #include <sys/sysmacros.h>
82 82 #include <sys/cpuvar.h>
83 83 #include <sys/kmem.h>
84 84 #include <sys/ddi.h>
85 85 #include <sys/errno.h>
86 86 #include <sys/cmn_err.h>
87 87 #include <sys/debug.h>
88 88 #include <sys/project.h>
89 89 #include <sys/modctl.h>
90 90 #include <sys/syscall.h>
91 91 #include <sys/policy.h>
92 92 #include <sys/zone.h>
93 93
94 94 #include <c2/audit.h>
95 95
96 96 /*
97 97 * The following tunables are obsolete. Though for compatibility we
98 98 * still read and interpret msginfo_msgmnb, msginfo_msgmni, and
99 99 * msginfo_msgtql (see os/project.c and os/rctl_proc.c), the preferred
100 100 * mechanism for administrating the IPC Message facility is through the
101 101 * resource controls described at the top of this file.
102 102 */
103 103 size_t msginfo_msgmax = 2048; /* (obsolete) */
104 104 size_t msginfo_msgmnb = 4096; /* (obsolete) */
105 105 int msginfo_msgmni = 50; /* (obsolete) */
106 106 int msginfo_msgtql = 40; /* (obsolete) */
107 107 int msginfo_msgssz = 8; /* (obsolete) */
108 108 int msginfo_msgmap = 0; /* (obsolete) */
109 109 ushort_t msginfo_msgseg = 1024; /* (obsolete) */
110 110
111 111 extern rctl_hndl_t rc_zone_msgmni;
112 112 extern rctl_hndl_t rc_project_msgmni;
113 113 extern rctl_hndl_t rc_process_msgmnb;
114 114 extern rctl_hndl_t rc_process_msgtql;
115 115 static ipc_service_t *msq_svc;
116 116 static zone_key_t msg_zone_key;
117 117
118 118 static void msg_dtor(kipc_perm_t *);
119 119 static void msg_rmid(kipc_perm_t *);
120 120 static void msg_remove_zone(zoneid_t, void *);
121 121
122 122 /*
123 123 * Module linkage information for the kernel.
124 124 */
125 125 static ssize_t msgsys(int opcode, uintptr_t a0, uintptr_t a1, uintptr_t a2,
126 126 uintptr_t a4, uintptr_t a5);
127 127
128 128 static struct sysent ipcmsg_sysent = {
129 129 6,
130 130 #ifdef _LP64
131 131 SE_ARGC | SE_NOUNLOAD | SE_64RVAL,
132 132 #else
133 133 SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
134 134 #endif
135 135 (int (*)())msgsys
136 136 };
137 137
138 138 #ifdef _SYSCALL32_IMPL
139 139 static ssize32_t msgsys32(int opcode, uint32_t a0, uint32_t a1, uint32_t a2,
140 140 uint32_t a4, uint32_t a5);
141 141
142 142 static struct sysent ipcmsg_sysent32 = {
143 143 6,
144 144 SE_ARGC | SE_NOUNLOAD | SE_32RVAL1,
145 145 (int (*)())msgsys32
146 146 };
147 147 #endif /* _SYSCALL32_IMPL */
148 148
149 149 static struct modlsys modlsys = {
150 150 &mod_syscallops, "System V message facility", &ipcmsg_sysent
151 151 };
152 152
153 153 #ifdef _SYSCALL32_IMPL
154 154 static struct modlsys modlsys32 = {
155 155 &mod_syscallops32, "32-bit System V message facility", &ipcmsg_sysent32
156 156 };
157 157 #endif
158 158
159 159 /*
160 160 * Big Theory statement for message queue correctness
161 161 *
162 162 * The msgrcv and msgsnd functions no longer uses cv_broadcast to wake up
163 163 * receivers who are waiting for an event. Using the cv_broadcast method
164 164 * resulted in negative scaling when the number of waiting receivers are large
165 165 * (the thundering herd problem). Instead, the receivers waiting to receive a
166 166 * message are now linked in a queue-like fashion and awaken one at a time in
167 167 * a controlled manner.
168 168 *
169 169 * Receivers can block on two different classes of waiting list:
170 170 * 1) "sendwait" list, which is the more complex list of the two. The
171 171 * receiver will be awakened by a sender posting a new message. There
172 172 * are two types of "sendwait" list used:
173 173 * a) msg_wait_snd: handles all receivers who are looking for
174 174 * a message type >= 0, but was unable to locate a match.
175 175 *
176 176 * slot 0: reserved for receivers that have designated they
177 177 * will take any message type.
178 178 * rest: consist of receivers requesting a specific type
179 179 * but the type was not present. The entries are
180 180 * hashed into a bucket in an attempt to keep
181 181 * any list search relatively short.
182 182 * b) msg_wait_snd_ngt: handles all receivers that have designated
183 183 * a negative message type. Unlike msg_wait_snd, the hash bucket
184 184 * serves a range of negative message types (-1 to -5, -6 to -10
185 185 * and so forth), where the last bucket is reserved for all the
186 186 * negative message types that hash outside of MSG_MAX_QNUM - 1.
187 187 * This is done this way to simplify the operation of locating a
188 188 * negative message type.
189 189 *
190 190 * 2) "copyout" list, where the receiver is awakened by another
191 191 * receiver after a message is copied out. This is a linked list
192 192 * of waiters that are awakened one at a time. Although the solution is
193 193 * not optimal, the complexity that would be added in for waking
194 194 * up the right entry far exceeds any potential pay back (too many
195 195 * correctness and corner case issues).
196 196 *
197 197 * The lists are doubly linked. In the case of the "sendwait"
198 198 * list, this allows the thread to remove itself from the list without having
199 199 * to traverse the list. In the case of the "copyout" list it simply allows
200 200 * us to use common functions with the "sendwait" list.
201 201 *
202 202 * To make sure receivers are not hung out to dry, we must guarantee:
203 203 * 1. If any queued message matches any receiver, then at least one
204 204 * matching receiver must be processing the request.
205 205 * 2. Blocking on the copyout queue is only temporary while messages
206 206 * are being copied out. The process is guaranted to wakeup
207 207 * when it gets to front of the queue (copyout is a FIFO).
208 208 *
209 209 * Rules for blocking and waking up:
210 210 * 1. A receiver entering msgrcv must examine all messages for a match
211 211 * before blocking on a sendwait queue.
212 212 * 2. If the receiver blocks because the message it chose is already
213 213 * being copied out, then when it wakes up needs to start start
214 214 * checking the messages from the beginning.
215 215 * 3) When ever a process returns from msgrcv for any reason, if it
216 216 * had attempted to copy a message or blocked waiting for a copy
217 217 * to complete it needs to wakeup the next receiver blocked on
218 218 * a copy out.
219 219 * 4) When a message is sent, the sender selects a process waiting
220 220 * for that type of message. This selection process rotates between
221 221 * receivers types of 0, negative and positive to prevent starvation of
222 222 * any one particular receiver type.
223 223 * 5) The following are the scenarios for processes that are awakened
224 224 * by a msgsnd:
225 225 * a) The process finds the message and is able to copy
226 226 * it out. Once complete, the process returns.
227 227 * b) The message that was sent that triggered the wakeup is no
228 228 * longer available (another process found the message first).
229 229 * We issue a wakeup on copy queue and then go back to
230 230 * sleep waiting for another matching message to be sent.
231 231 * c) The message that was supposed to be processed was
232 232 * already serviced by another process. However a different
233 233 * message is present which we can service. The message
234 234 * is copied and the process returns.
235 235 * d) The message is found, but some sort of error occurs that
236 236 * prevents the message from being copied. The receiver
237 237 * wakes up the next sender that can service this message
238 238 * type and returns an error to the caller.
239 239 * e) The message is found, but it is marked as being copied
240 240 * out. The receiver then goes to sleep on the copyout
241 241 * queue where it will be awakened again sometime in the future.
242 242 *
243 243 *
244 244 * 6) Whenever a message is found that matches the message type designated,
245 245 * but is being copied out we have to block on the copyout queue.
246 246 * After process copying finishes the copy out, it must wakeup (either
247 247 * directly or indirectly) all receivers who blocked on its copyout,
248 248 * so they are guaranteed a chance to examine the remaining messages.
249 249 * This is implemented via a chain of wakeups: Y wakes X, who wakes Z,
250 250 * and so on. The chain cannot be broken. This leads to the following
251 251 * cases:
252 252 * a) A receiver is finished copying the message (or encountered)
253 253 * an error), the first entry on the copyout queue is woken
254 254 * up.
255 255 * b) When the receiver is woken up, it attempts to locate
256 256 * a message type match.
257 257 * c) If a message type is found and
258 258 * -- MSG_RCVCOPY flag is not set, the message is
259 259 * marked for copying out. Regardless of the copyout
260 260 * success the next entry on the copyout queue is
261 261 * awakened and the operation is completed.
262 262 * -- MSG_RCVCOPY is set, we simply go back to sleep again
263 263 * on the copyout queue.
264 264 * d) If the message type is not found then we wakeup the next
265 265 * process on the copyout queue.
266 266 * 7) If a msgsnd is unable to complete for of any of the following reasons
267 267 * a) the msgq has no space for the message
268 268 * b) the maximum number of messages allowed has been reached
269 269 * then one of two things happen:
270 270 * 1) If the passed in msg_flag has IPC_NOWAIT set, then
271 271 * an error is returned.
272 272 * 2) The IPC_NOWAIT bit is not set in msg_flag, then the
273 273 * the thread is placed to sleep until the request can be
274 274 * serviced.
275 275 * 8) When waking a thread waiting to send a message, a check is done to
276 276 * verify that the operation being asked for by the thread will complete.
277 277 * This decision making process is done in a loop where the oldest request
278 278 * is checked first. The search will continue until there is no more
279 279 * room on the msgq or we have checked all the waiters.
280 280 */
281 281
282 282 static uint_t msg_type_hash(long);
283 283 static int msgq_check_err(kmsqid_t *qp, int cvres);
284 284 static int msg_rcvq_sleep(list_t *, msgq_wakeup_t *, kmutex_t **,
285 285 kmsqid_t *);
286 286 static int msg_copyout(kmsqid_t *, long, kmutex_t **, size_t *, size_t,
287 287 struct msg *, struct ipcmsgbuf *, int);
288 288 static void msg_rcvq_wakeup_all(list_t *);
289 289 static void msg_wakeup_senders(kmsqid_t *);
290 290 static void msg_wakeup_rdr(kmsqid_t *, msg_select_t **, long);
291 291 static msgq_wakeup_t *msg_fnd_any_snd(kmsqid_t *, int, long);
292 292 static msgq_wakeup_t *msg_fnd_any_rdr(kmsqid_t *, int, long);
293 293 static msgq_wakeup_t *msg_fnd_neg_snd(kmsqid_t *, int, long);
294 294 static msgq_wakeup_t *msg_fnd_spc_snd(kmsqid_t *, int, long);
295 295 static struct msg *msgrcv_lookup(kmsqid_t *, long);
296 296
297 297 msg_select_t msg_fnd_sndr[] = {
298 298 { msg_fnd_any_snd, &msg_fnd_sndr[1] },
↓ open down ↓ |
298 lines elided |
↑ open up ↑ |
299 299 { msg_fnd_spc_snd, &msg_fnd_sndr[2] },
300 300 { msg_fnd_neg_snd, &msg_fnd_sndr[0] }
301 301 };
302 302
303 303 msg_select_t msg_fnd_rdr[1] = {
304 304 { msg_fnd_any_rdr, &msg_fnd_rdr[0] },
305 305 };
306 306
307 307 static struct modlinkage modlinkage = {
308 308 MODREV_1,
309 - &modlsys,
309 + { &modlsys,
310 310 #ifdef _SYSCALL32_IMPL
311 - &modlsys32,
311 + &modlsys32,
312 312 #endif
313 - NULL
313 + NULL
314 + }
314 315 };
315 316
316 317 #define MSG_SMALL_INIT (size_t)-1
317 318 int
318 319 _init(void)
319 320 {
320 321 int result;
321 322
322 323 msq_svc = ipcs_create("msqids", rc_project_msgmni, rc_zone_msgmni,
323 324 sizeof (kmsqid_t), msg_dtor, msg_rmid, AT_IPC_MSG,
324 325 offsetof(ipc_rqty_t, ipcq_msgmni));
325 326 zone_key_create(&msg_zone_key, NULL, msg_remove_zone, NULL);
326 327
327 328 if ((result = mod_install(&modlinkage)) == 0)
328 329 return (0);
329 330
330 331 (void) zone_key_delete(msg_zone_key);
331 332 ipcs_destroy(msq_svc);
332 333
333 334 return (result);
334 335 }
335 336
336 337 int
337 338 _fini(void)
338 339 {
339 340 return (EBUSY);
340 341 }
341 342
342 343 int
343 344 _info(struct modinfo *modinfop)
344 345 {
345 346 return (mod_info(&modlinkage, modinfop));
346 347 }
347 348
348 349 static void
349 350 msg_dtor(kipc_perm_t *perm)
350 351 {
351 352 kmsqid_t *qp = (kmsqid_t *)perm;
352 353 int ii;
353 354
354 355 for (ii = 0; ii <= MSG_MAX_QNUM; ii++) {
355 356 ASSERT(list_is_empty(&qp->msg_wait_snd[ii]));
356 357 ASSERT(list_is_empty(&qp->msg_wait_snd_ngt[ii]));
357 358 list_destroy(&qp->msg_wait_snd[ii]);
358 359 list_destroy(&qp->msg_wait_snd_ngt[ii]);
359 360 }
360 361 ASSERT(list_is_empty(&qp->msg_cpy_block));
361 362 ASSERT(list_is_empty(&qp->msg_wait_rcv));
362 363 list_destroy(&qp->msg_cpy_block);
363 364 ASSERT(qp->msg_snd_cnt == 0);
364 365 ASSERT(qp->msg_cbytes == 0);
365 366 list_destroy(&qp->msg_list);
366 367 list_destroy(&qp->msg_wait_rcv);
367 368 }
368 369
369 370
370 371 #define msg_hold(mp) (mp)->msg_copycnt++
371 372
372 373 /*
373 374 * msg_rele - decrement the reference count on the message. When count
374 375 * reaches zero, free message header and contents.
375 376 */
376 377 static void
377 378 msg_rele(struct msg *mp)
378 379 {
379 380 ASSERT(mp->msg_copycnt > 0);
380 381 if (mp->msg_copycnt-- == 1) {
381 382 if (mp->msg_addr)
382 383 kmem_free(mp->msg_addr, mp->msg_size);
383 384 kmem_free(mp, sizeof (struct msg));
384 385 }
385 386 }
386 387
387 388 /*
388 389 * msgunlink - Unlink msg from queue, decrement byte count and wake up anyone
389 390 * waiting for free bytes on queue.
390 391 *
391 392 * Called with queue locked.
392 393 */
393 394 static void
394 395 msgunlink(kmsqid_t *qp, struct msg *mp)
395 396 {
396 397 list_remove(&qp->msg_list, mp);
397 398 qp->msg_qnum--;
398 399 qp->msg_cbytes -= mp->msg_size;
399 400 msg_rele(mp);
400 401
401 402 /* Wake up waiting writers */
402 403 msg_wakeup_senders(qp);
403 404 }
404 405
405 406 static void
406 407 msg_rmid(kipc_perm_t *perm)
407 408 {
408 409 kmsqid_t *qp = (kmsqid_t *)perm;
409 410 struct msg *mp;
410 411 int ii;
411 412
412 413
413 414 while ((mp = list_head(&qp->msg_list)) != NULL)
414 415 msgunlink(qp, mp);
415 416 ASSERT(qp->msg_cbytes == 0);
416 417
417 418 /*
418 419 * Wake up everyone who is in a wait state of some sort
419 420 * for this message queue.
420 421 */
421 422 for (ii = 0; ii <= MSG_MAX_QNUM; ii++) {
422 423 msg_rcvq_wakeup_all(&qp->msg_wait_snd[ii]);
423 424 msg_rcvq_wakeup_all(&qp->msg_wait_snd_ngt[ii]);
424 425 }
425 426 msg_rcvq_wakeup_all(&qp->msg_cpy_block);
426 427 msg_rcvq_wakeup_all(&qp->msg_wait_rcv);
427 428 }
428 429
429 430 /*
430 431 * msgctl system call.
431 432 *
432 433 * gets q lock (via ipc_lookup), releases before return.
433 434 * may call users of msg_lock
434 435 */
435 436 static int
436 437 msgctl(int msgid, int cmd, void *arg)
437 438 {
438 439 STRUCT_DECL(msqid_ds, ds); /* SVR4 queue work area */
439 440 kmsqid_t *qp; /* ptr to associated q */
440 441 int error;
441 442 struct cred *cr;
442 443 model_t mdl = get_udatamodel();
443 444 struct msqid_ds64 ds64;
444 445 kmutex_t *lock;
445 446 proc_t *pp = curproc;
446 447
447 448 STRUCT_INIT(ds, mdl);
448 449 cr = CRED();
449 450
450 451 /*
451 452 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
452 453 */
453 454 switch (cmd) {
454 455 case IPC_SET:
455 456 if (copyin(arg, STRUCT_BUF(ds), STRUCT_SIZE(ds)))
456 457 return (set_errno(EFAULT));
457 458 break;
458 459
459 460 case IPC_SET64:
460 461 if (copyin(arg, &ds64, sizeof (struct msqid_ds64)))
461 462 return (set_errno(EFAULT));
462 463 break;
463 464
464 465 case IPC_RMID:
465 466 if (error = ipc_rmid(msq_svc, msgid, cr))
466 467 return (set_errno(error));
467 468 return (0);
468 469 }
469 470
470 471 /*
471 472 * get msqid_ds for this msgid
472 473 */
473 474 if ((lock = ipc_lookup(msq_svc, msgid, (kipc_perm_t **)&qp)) == NULL)
474 475 return (set_errno(EINVAL));
475 476
476 477 switch (cmd) {
477 478 case IPC_SET:
478 479 if (STRUCT_FGET(ds, msg_qbytes) > qp->msg_qbytes &&
479 480 secpolicy_ipc_config(cr) != 0) {
480 481 mutex_exit(lock);
481 482 return (set_errno(EPERM));
482 483 }
483 484 if (error = ipcperm_set(msq_svc, cr, &qp->msg_perm,
484 485 &STRUCT_BUF(ds)->msg_perm, mdl)) {
485 486 mutex_exit(lock);
486 487 return (set_errno(error));
487 488 }
488 489 qp->msg_qbytes = STRUCT_FGET(ds, msg_qbytes);
489 490 qp->msg_ctime = gethrestime_sec();
490 491 break;
491 492
492 493 case IPC_STAT:
493 494 if (error = ipcperm_access(&qp->msg_perm, MSG_R, cr)) {
494 495 mutex_exit(lock);
495 496 return (set_errno(error));
496 497 }
497 498
498 499 if (qp->msg_rcv_cnt)
499 500 qp->msg_perm.ipc_mode |= MSG_RWAIT;
500 501 if (qp->msg_snd_cnt)
501 502 qp->msg_perm.ipc_mode |= MSG_WWAIT;
502 503 ipcperm_stat(&STRUCT_BUF(ds)->msg_perm, &qp->msg_perm, mdl);
503 504 qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
504 505 STRUCT_FSETP(ds, msg_first, NULL); /* kernel addr */
505 506 STRUCT_FSETP(ds, msg_last, NULL);
506 507 STRUCT_FSET(ds, msg_cbytes, qp->msg_cbytes);
507 508 STRUCT_FSET(ds, msg_qnum, qp->msg_qnum);
508 509 STRUCT_FSET(ds, msg_qbytes, qp->msg_qbytes);
509 510 STRUCT_FSET(ds, msg_lspid, qp->msg_lspid);
510 511 STRUCT_FSET(ds, msg_lrpid, qp->msg_lrpid);
511 512 STRUCT_FSET(ds, msg_stime, qp->msg_stime);
512 513 STRUCT_FSET(ds, msg_rtime, qp->msg_rtime);
513 514 STRUCT_FSET(ds, msg_ctime, qp->msg_ctime);
514 515 break;
515 516
516 517 case IPC_SET64:
517 518 mutex_enter(&pp->p_lock);
518 519 if ((ds64.msgx_qbytes > qp->msg_qbytes) &&
519 520 secpolicy_ipc_config(cr) != 0 &&
520 521 rctl_test(rc_process_msgmnb, pp->p_rctls, pp,
521 522 ds64.msgx_qbytes, RCA_SAFE) & RCT_DENY) {
522 523 mutex_exit(&pp->p_lock);
523 524 mutex_exit(lock);
524 525 return (set_errno(EPERM));
525 526 }
526 527 mutex_exit(&pp->p_lock);
527 528 if (error = ipcperm_set64(msq_svc, cr, &qp->msg_perm,
528 529 &ds64.msgx_perm)) {
529 530 mutex_exit(lock);
530 531 return (set_errno(error));
531 532 }
532 533 qp->msg_qbytes = ds64.msgx_qbytes;
533 534 qp->msg_ctime = gethrestime_sec();
534 535 break;
535 536
536 537 case IPC_STAT64:
537 538 if (qp->msg_rcv_cnt)
538 539 qp->msg_perm.ipc_mode |= MSG_RWAIT;
539 540 if (qp->msg_snd_cnt)
540 541 qp->msg_perm.ipc_mode |= MSG_WWAIT;
541 542 ipcperm_stat64(&ds64.msgx_perm, &qp->msg_perm);
542 543 qp->msg_perm.ipc_mode &= ~(MSG_RWAIT|MSG_WWAIT);
543 544 ds64.msgx_cbytes = qp->msg_cbytes;
544 545 ds64.msgx_qnum = qp->msg_qnum;
545 546 ds64.msgx_qbytes = qp->msg_qbytes;
546 547 ds64.msgx_lspid = qp->msg_lspid;
547 548 ds64.msgx_lrpid = qp->msg_lrpid;
548 549 ds64.msgx_stime = qp->msg_stime;
549 550 ds64.msgx_rtime = qp->msg_rtime;
550 551 ds64.msgx_ctime = qp->msg_ctime;
551 552 break;
552 553
553 554 default:
554 555 mutex_exit(lock);
555 556 return (set_errno(EINVAL));
556 557 }
557 558
558 559 mutex_exit(lock);
559 560
560 561 /*
561 562 * Do copyout last (after releasing mutex).
562 563 */
563 564 switch (cmd) {
564 565 case IPC_STAT:
565 566 if (copyout(STRUCT_BUF(ds), arg, STRUCT_SIZE(ds)))
566 567 return (set_errno(EFAULT));
567 568 break;
568 569
569 570 case IPC_STAT64:
570 571 if (copyout(&ds64, arg, sizeof (struct msqid_ds64)))
571 572 return (set_errno(EFAULT));
572 573 break;
573 574 }
574 575
575 576 return (0);
576 577 }
577 578
578 579 /*
579 580 * Remove all message queues associated with a given zone. Called by
580 581 * zone_shutdown when the zone is halted.
581 582 */
582 583 /*ARGSUSED1*/
583 584 static void
584 585 msg_remove_zone(zoneid_t zoneid, void *arg)
585 586 {
586 587 ipc_remove_zone(msq_svc, zoneid);
587 588 }
588 589
589 590 /*
590 591 * msgget system call.
591 592 */
592 593 static int
593 594 msgget(key_t key, int msgflg)
594 595 {
595 596 kmsqid_t *qp;
596 597 kmutex_t *lock;
597 598 int id, error;
598 599 int ii;
599 600 proc_t *pp = curproc;
600 601
601 602 top:
602 603 if (error = ipc_get(msq_svc, key, msgflg, (kipc_perm_t **)&qp, &lock))
603 604 return (set_errno(error));
604 605
605 606 if (IPC_FREE(&qp->msg_perm)) {
606 607 mutex_exit(lock);
607 608 mutex_exit(&pp->p_lock);
608 609
609 610 list_create(&qp->msg_list, sizeof (struct msg),
610 611 offsetof(struct msg, msg_node));
611 612 qp->msg_qnum = 0;
612 613 qp->msg_lspid = qp->msg_lrpid = 0;
613 614 qp->msg_stime = qp->msg_rtime = 0;
614 615 qp->msg_ctime = gethrestime_sec();
615 616 qp->msg_ngt_cnt = 0;
616 617 qp->msg_neg_copy = 0;
617 618 for (ii = 0; ii <= MSG_MAX_QNUM; ii++) {
618 619 list_create(&qp->msg_wait_snd[ii],
619 620 sizeof (msgq_wakeup_t),
620 621 offsetof(msgq_wakeup_t, msgw_list));
621 622 list_create(&qp->msg_wait_snd_ngt[ii],
622 623 sizeof (msgq_wakeup_t),
623 624 offsetof(msgq_wakeup_t, msgw_list));
624 625 }
625 626 /*
626 627 * The proper initialization of msg_lowest_type is to the
627 628 * highest possible value. By doing this we guarantee that
628 629 * when the first send happens, the lowest type will be set
629 630 * properly.
630 631 */
631 632 qp->msg_lowest_type = MSG_SMALL_INIT;
632 633 list_create(&qp->msg_cpy_block,
633 634 sizeof (msgq_wakeup_t),
634 635 offsetof(msgq_wakeup_t, msgw_list));
635 636 list_create(&qp->msg_wait_rcv,
636 637 sizeof (msgq_wakeup_t),
637 638 offsetof(msgq_wakeup_t, msgw_list));
638 639 qp->msg_fnd_sndr = &msg_fnd_sndr[0];
639 640 qp->msg_fnd_rdr = &msg_fnd_rdr[0];
640 641 qp->msg_rcv_cnt = 0;
641 642 qp->msg_snd_cnt = 0;
642 643 qp->msg_snd_smallest = MSG_SMALL_INIT;
643 644
644 645 if (error = ipc_commit_begin(msq_svc, key, msgflg,
645 646 (kipc_perm_t *)qp)) {
646 647 if (error == EAGAIN)
647 648 goto top;
648 649 return (set_errno(error));
649 650 }
650 651 qp->msg_qbytes = rctl_enforced_value(rc_process_msgmnb,
651 652 pp->p_rctls, pp);
652 653 qp->msg_qmax = rctl_enforced_value(rc_process_msgtql,
653 654 pp->p_rctls, pp);
654 655 lock = ipc_commit_end(msq_svc, &qp->msg_perm);
655 656 }
656 657
657 658 if (AU_AUDITING())
658 659 audit_ipcget(AT_IPC_MSG, (void *)qp);
659 660
660 661 id = qp->msg_perm.ipc_id;
661 662 mutex_exit(lock);
662 663 return (id);
663 664 }
664 665
665 666 static ssize_t
666 667 msgrcv(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, long msgtyp, int msgflg)
667 668 {
668 669 struct msg *smp; /* ptr to best msg on q */
669 670 kmsqid_t *qp; /* ptr to associated q */
670 671 kmutex_t *lock;
671 672 size_t xtsz; /* transfer byte count */
672 673 int error = 0;
673 674 int cvres;
674 675 uint_t msg_hash;
675 676 msgq_wakeup_t msg_entry;
676 677
677 678 CPU_STATS_ADDQ(CPU, sys, msg, 1); /* bump msg send/rcv count */
678 679
679 680 msg_hash = msg_type_hash(msgtyp);
680 681 if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) {
681 682 return ((ssize_t)set_errno(EINVAL));
682 683 }
683 684 ipc_hold(msq_svc, (kipc_perm_t *)qp);
684 685
685 686 if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) {
686 687 goto msgrcv_out;
687 688 }
688 689
689 690 /*
690 691 * Various information (including the condvar_t) required for the
691 692 * process to sleep is provided by it's stack.
692 693 */
693 694 msg_entry.msgw_thrd = curthread;
694 695 msg_entry.msgw_snd_wake = 0;
695 696 msg_entry.msgw_type = msgtyp;
696 697 findmsg:
697 698 smp = msgrcv_lookup(qp, msgtyp);
698 699
699 700 if (smp) {
700 701 /*
701 702 * We found a possible message to copy out.
702 703 */
703 704 if ((smp->msg_flags & MSG_RCVCOPY) == 0) {
704 705 long t = msg_entry.msgw_snd_wake;
705 706 long copy_type = smp->msg_type;
706 707
707 708 /*
708 709 * It is available, attempt to copy it.
709 710 */
710 711 error = msg_copyout(qp, msgtyp, &lock, &xtsz, msgsz,
711 712 smp, msgp, msgflg);
712 713
713 714 /*
714 715 * It is possible to consume a different message
715 716 * type then what originally awakened for (negative
716 717 * types). If this happens a check must be done to
717 718 * to determine if another receiver is available
718 719 * for the waking message type, Failure to do this
719 720 * can result in a message on the queue that can be
720 721 * serviced by a sleeping receiver.
721 722 */
722 723 if (!error && t && (copy_type != t))
723 724 msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, t);
724 725
725 726 /*
726 727 * Don't forget to wakeup a sleeper that blocked because
727 728 * we were copying things out.
728 729 */
729 730 msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0);
730 731 goto msgrcv_out;
731 732 }
732 733 /*
733 734 * The selected message is being copied out, so block. We do
734 735 * not need to wake the next person up on the msg_cpy_block list
735 736 * due to the fact some one is copying out and they will get
736 737 * things moving again once the copy is completed.
737 738 */
738 739 cvres = msg_rcvq_sleep(&qp->msg_cpy_block,
739 740 &msg_entry, &lock, qp);
740 741 error = msgq_check_err(qp, cvres);
741 742 if (error) {
742 743 goto msgrcv_out;
743 744 }
744 745 goto findmsg;
745 746 }
746 747 /*
747 748 * There isn't a message to copy out that matches the designated
748 749 * criteria.
749 750 */
750 751 if (msgflg & IPC_NOWAIT) {
751 752 error = ENOMSG;
752 753 goto msgrcv_out;
753 754 }
754 755 msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0);
755 756
756 757 /*
757 758 * Wait for new message. We keep the negative and positive types
758 759 * separate for performance reasons.
759 760 */
760 761 msg_entry.msgw_snd_wake = 0;
761 762 if (msgtyp >= 0) {
762 763 cvres = msg_rcvq_sleep(&qp->msg_wait_snd[msg_hash],
763 764 &msg_entry, &lock, qp);
764 765 } else {
765 766 qp->msg_ngt_cnt++;
766 767 cvres = msg_rcvq_sleep(&qp->msg_wait_snd_ngt[msg_hash],
767 768 &msg_entry, &lock, qp);
768 769 qp->msg_ngt_cnt--;
769 770 }
770 771
771 772 if (!(error = msgq_check_err(qp, cvres))) {
772 773 goto findmsg;
773 774 }
774 775
775 776 msgrcv_out:
776 777 if (error) {
777 778 msg_wakeup_rdr(qp, &qp->msg_fnd_rdr, 0);
778 779 if (msg_entry.msgw_snd_wake) {
779 780 msg_wakeup_rdr(qp, &qp->msg_fnd_sndr,
780 781 msg_entry.msgw_snd_wake);
781 782 }
782 783 ipc_rele(msq_svc, (kipc_perm_t *)qp);
783 784 return ((ssize_t)set_errno(error));
784 785 }
785 786 ipc_rele(msq_svc, (kipc_perm_t *)qp);
786 787 return ((ssize_t)xtsz);
787 788 }
788 789
789 790 static int
790 791 msgq_check_err(kmsqid_t *qp, int cvres)
791 792 {
792 793 if (IPC_FREE(&qp->msg_perm)) {
793 794 return (EIDRM);
794 795 }
795 796
796 797 if (cvres == 0) {
797 798 return (EINTR);
798 799 }
799 800
800 801 return (0);
801 802 }
802 803
803 804 static int
804 805 msg_copyout(kmsqid_t *qp, long msgtyp, kmutex_t **lock, size_t *xtsz_ret,
805 806 size_t msgsz, struct msg *smp, struct ipcmsgbuf *msgp, int msgflg)
806 807 {
807 808 size_t xtsz;
808 809 STRUCT_HANDLE(ipcmsgbuf, umsgp);
809 810 model_t mdl = get_udatamodel();
810 811 int copyerror = 0;
811 812
812 813 STRUCT_SET_HANDLE(umsgp, mdl, msgp);
813 814 if (msgsz < smp->msg_size) {
814 815 if ((msgflg & MSG_NOERROR) == 0) {
815 816 return (E2BIG);
816 817 } else {
817 818 xtsz = msgsz;
818 819 }
819 820 } else {
820 821 xtsz = smp->msg_size;
821 822 }
822 823 *xtsz_ret = xtsz;
823 824
824 825 /*
825 826 * To prevent a DOS attack we mark the message as being
826 827 * copied out and release mutex. When the copy is completed
827 828 * we need to acquire the mutex and make the appropriate updates.
828 829 */
829 830 ASSERT((smp->msg_flags & MSG_RCVCOPY) == 0);
830 831 smp->msg_flags |= MSG_RCVCOPY;
831 832 msg_hold(smp);
832 833 if (msgtyp < 0) {
833 834 ASSERT(qp->msg_neg_copy == 0);
834 835 qp->msg_neg_copy = 1;
835 836 }
836 837 mutex_exit(*lock);
837 838
838 839 if (mdl == DATAMODEL_NATIVE) {
839 840 copyerror = copyout(&smp->msg_type, msgp,
840 841 sizeof (smp->msg_type));
841 842 } else {
842 843 /*
843 844 * 32-bit callers need an imploded msg type.
844 845 */
845 846 int32_t msg_type32 = smp->msg_type;
846 847
847 848 copyerror = copyout(&msg_type32, msgp,
848 849 sizeof (msg_type32));
849 850 }
850 851
851 852 if (copyerror == 0 && xtsz) {
852 853 copyerror = copyout(smp->msg_addr,
853 854 STRUCT_FADDR(umsgp, mtext), xtsz);
854 855 }
855 856
856 857 /*
857 858 * Reclaim the mutex and make sure the message queue still exists.
858 859 */
859 860
860 861 *lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
861 862 if (msgtyp < 0) {
862 863 qp->msg_neg_copy = 0;
863 864 }
864 865 ASSERT(smp->msg_flags & MSG_RCVCOPY);
865 866 smp->msg_flags &= ~MSG_RCVCOPY;
866 867 msg_rele(smp);
867 868 if (IPC_FREE(&qp->msg_perm)) {
868 869 return (EIDRM);
869 870 }
870 871 if (copyerror) {
871 872 return (EFAULT);
872 873 }
873 874 qp->msg_lrpid = ttoproc(curthread)->p_pid;
874 875 qp->msg_rtime = gethrestime_sec();
875 876 msgunlink(qp, smp);
876 877 return (0);
877 878 }
878 879
879 880 static struct msg *
880 881 msgrcv_lookup(kmsqid_t *qp, long msgtyp)
881 882 {
882 883 struct msg *smp = NULL;
883 884 long qp_low;
884 885 struct msg *mp; /* ptr to msg on q */
885 886 long low_msgtype;
886 887 static struct msg neg_copy_smp;
887 888
888 889 mp = list_head(&qp->msg_list);
889 890 if (msgtyp == 0) {
890 891 smp = mp;
891 892 } else {
892 893 qp_low = qp->msg_lowest_type;
893 894 if (msgtyp > 0) {
894 895 /*
895 896 * If our lowest possible message type is larger than
896 897 * the message type desired, then we know there is
897 898 * no entry present.
898 899 */
899 900 if (qp_low > msgtyp) {
900 901 return (NULL);
901 902 }
902 903
903 904 for (; mp; mp = list_next(&qp->msg_list, mp)) {
904 905 if (msgtyp == mp->msg_type) {
905 906 smp = mp;
906 907 break;
907 908 }
908 909 }
909 910 } else {
910 911 /*
911 912 * We have kept track of the lowest possible message
912 913 * type on the send queue. This allows us to terminate
913 914 * the search early if we find a message type of that
914 915 * type. Note, the lowest type may not be the actual
915 916 * lowest value in the system, it is only guaranteed
916 917 * that there isn't a value lower than that.
917 918 */
918 919 low_msgtype = -msgtyp;
919 920 if (low_msgtype < qp_low) {
920 921 return (NULL);
921 922 }
922 923 if (qp->msg_neg_copy) {
923 924 neg_copy_smp.msg_flags = MSG_RCVCOPY;
924 925 return (&neg_copy_smp);
925 926 }
926 927 for (; mp; mp = list_next(&qp->msg_list, mp)) {
927 928 if (mp->msg_type <= low_msgtype &&
928 929 !(smp && smp->msg_type <= mp->msg_type)) {
929 930 smp = mp;
930 931 low_msgtype = mp->msg_type;
931 932 if (low_msgtype == qp_low) {
932 933 break;
933 934 }
934 935 }
935 936 }
936 937 if (smp) {
937 938 /*
938 939 * Update the lowest message type.
939 940 */
940 941 qp->msg_lowest_type = smp->msg_type;
941 942 }
942 943 }
943 944 }
944 945 return (smp);
945 946 }
946 947
947 948 /*
948 949 * msgids system call.
949 950 */
950 951 static int
951 952 msgids(int *buf, uint_t nids, uint_t *pnids)
952 953 {
953 954 int error;
954 955
955 956 if (error = ipc_ids(msq_svc, buf, nids, pnids))
956 957 return (set_errno(error));
957 958
958 959 return (0);
959 960 }
960 961
961 962 #define RND(x) roundup((x), sizeof (size_t))
962 963 #define RND32(x) roundup((x), sizeof (size32_t))
963 964
964 965 /*
965 966 * msgsnap system call.
966 967 */
967 968 static int
968 969 msgsnap(int msqid, caddr_t buf, size_t bufsz, long msgtyp)
969 970 {
970 971 struct msg *mp; /* ptr to msg on q */
971 972 kmsqid_t *qp; /* ptr to associated q */
972 973 kmutex_t *lock;
973 974 size_t size;
974 975 size_t nmsg;
975 976 struct msg **snaplist;
976 977 int error, i;
977 978 model_t mdl = get_udatamodel();
978 979 STRUCT_DECL(msgsnap_head, head);
979 980 STRUCT_DECL(msgsnap_mhead, mhead);
980 981
981 982 STRUCT_INIT(head, mdl);
982 983 STRUCT_INIT(mhead, mdl);
983 984
984 985 if (bufsz < STRUCT_SIZE(head))
985 986 return (set_errno(EINVAL));
986 987
987 988 if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL)
988 989 return (set_errno(EINVAL));
989 990
990 991 if (error = ipcperm_access(&qp->msg_perm, MSG_R, CRED())) {
991 992 mutex_exit(lock);
992 993 return (set_errno(error));
993 994 }
994 995 ipc_hold(msq_svc, (kipc_perm_t *)qp);
995 996
996 997 /*
997 998 * First compute the required buffer size and
998 999 * the number of messages on the queue.
999 1000 */
1000 1001 size = nmsg = 0;
1001 1002 for (mp = list_head(&qp->msg_list); mp;
1002 1003 mp = list_next(&qp->msg_list, mp)) {
1003 1004 if (msgtyp == 0 ||
1004 1005 (msgtyp > 0 && msgtyp == mp->msg_type) ||
1005 1006 (msgtyp < 0 && mp->msg_type <= -msgtyp)) {
1006 1007 nmsg++;
1007 1008 if (mdl == DATAMODEL_NATIVE)
1008 1009 size += RND(mp->msg_size);
1009 1010 else
1010 1011 size += RND32(mp->msg_size);
1011 1012 }
1012 1013 }
1013 1014
1014 1015 size += STRUCT_SIZE(head) + nmsg * STRUCT_SIZE(mhead);
1015 1016 if (size > bufsz)
1016 1017 nmsg = 0;
1017 1018
1018 1019 if (nmsg > 0) {
1019 1020 /*
1020 1021 * Mark the messages as being copied.
1021 1022 */
1022 1023 snaplist = (struct msg **)kmem_alloc(nmsg *
1023 1024 sizeof (struct msg *), KM_SLEEP);
1024 1025 i = 0;
1025 1026 for (mp = list_head(&qp->msg_list); mp;
1026 1027 mp = list_next(&qp->msg_list, mp)) {
1027 1028 if (msgtyp == 0 ||
1028 1029 (msgtyp > 0 && msgtyp == mp->msg_type) ||
1029 1030 (msgtyp < 0 && mp->msg_type <= -msgtyp)) {
1030 1031 msg_hold(mp);
1031 1032 snaplist[i] = mp;
1032 1033 i++;
1033 1034 }
1034 1035 }
1035 1036 }
1036 1037 mutex_exit(lock);
1037 1038
1038 1039 /*
1039 1040 * Copy out the buffer header.
1040 1041 */
1041 1042 STRUCT_FSET(head, msgsnap_size, size);
1042 1043 STRUCT_FSET(head, msgsnap_nmsg, nmsg);
1043 1044 if (copyout(STRUCT_BUF(head), buf, STRUCT_SIZE(head)))
1044 1045 error = EFAULT;
1045 1046
1046 1047 buf += STRUCT_SIZE(head);
1047 1048
1048 1049 /*
1049 1050 * Now copy out the messages one by one.
1050 1051 */
1051 1052 for (i = 0; i < nmsg; i++) {
1052 1053 mp = snaplist[i];
1053 1054 if (error == 0) {
1054 1055 STRUCT_FSET(mhead, msgsnap_mlen, mp->msg_size);
1055 1056 STRUCT_FSET(mhead, msgsnap_mtype, mp->msg_type);
1056 1057 if (copyout(STRUCT_BUF(mhead), buf, STRUCT_SIZE(mhead)))
1057 1058 error = EFAULT;
1058 1059 buf += STRUCT_SIZE(mhead);
1059 1060
1060 1061 if (error == 0 &&
1061 1062 mp->msg_size != 0 &&
1062 1063 copyout(mp->msg_addr, buf, mp->msg_size))
1063 1064 error = EFAULT;
1064 1065 if (mdl == DATAMODEL_NATIVE)
1065 1066 buf += RND(mp->msg_size);
1066 1067 else
1067 1068 buf += RND32(mp->msg_size);
1068 1069 }
1069 1070 lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
1070 1071 msg_rele(mp);
1071 1072 /* Check for msg q deleted or reallocated */
1072 1073 if (IPC_FREE(&qp->msg_perm))
1073 1074 error = EIDRM;
1074 1075 mutex_exit(lock);
1075 1076 }
1076 1077
1077 1078 (void) ipc_lock(msq_svc, qp->msg_perm.ipc_id);
1078 1079 ipc_rele(msq_svc, (kipc_perm_t *)qp);
1079 1080
1080 1081 if (nmsg > 0)
1081 1082 kmem_free(snaplist, nmsg * sizeof (struct msg *));
1082 1083
1083 1084 if (error)
1084 1085 return (set_errno(error));
1085 1086 return (0);
1086 1087 }
1087 1088
1088 1089 #define MSG_PREALLOC_LIMIT 8192
1089 1090
1090 1091 /*
1091 1092 * msgsnd system call.
1092 1093 */
1093 1094 static int
1094 1095 msgsnd(int msqid, struct ipcmsgbuf *msgp, size_t msgsz, int msgflg)
1095 1096 {
1096 1097 kmsqid_t *qp;
1097 1098 kmutex_t *lock = NULL;
1098 1099 struct msg *mp = NULL;
1099 1100 long type;
1100 1101 int error = 0, wait_wakeup = 0;
1101 1102 msgq_wakeup_t msg_entry;
1102 1103 model_t mdl = get_udatamodel();
1103 1104 STRUCT_HANDLE(ipcmsgbuf, umsgp);
1104 1105
1105 1106 CPU_STATS_ADDQ(CPU, sys, msg, 1); /* bump msg send/rcv count */
1106 1107 STRUCT_SET_HANDLE(umsgp, mdl, msgp);
1107 1108
1108 1109 if (mdl == DATAMODEL_NATIVE) {
1109 1110 if (copyin(msgp, &type, sizeof (type)))
1110 1111 return (set_errno(EFAULT));
1111 1112 } else {
1112 1113 int32_t type32;
1113 1114 if (copyin(msgp, &type32, sizeof (type32)))
1114 1115 return (set_errno(EFAULT));
1115 1116 type = type32;
1116 1117 }
1117 1118
1118 1119 if (type < 1)
1119 1120 return (set_errno(EINVAL));
1120 1121
1121 1122 /*
1122 1123 * We want the value here large enough that most of the
1123 1124 * the message operations will use the "lockless" path,
1124 1125 * but small enough that a user can not reserve large
1125 1126 * chunks of kernel memory unless they have a valid
1126 1127 * reason to.
1127 1128 */
1128 1129 if (msgsz <= MSG_PREALLOC_LIMIT) {
1129 1130 /*
1130 1131 * We are small enough that we can afford to do the
1131 1132 * allocation now. This saves dropping the lock
1132 1133 * and then reacquiring the lock.
1133 1134 */
1134 1135 mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
1135 1136 mp->msg_copycnt = 1;
1136 1137 mp->msg_size = msgsz;
1137 1138 if (msgsz) {
1138 1139 mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP);
1139 1140 if (copyin(STRUCT_FADDR(umsgp, mtext),
1140 1141 mp->msg_addr, msgsz) == -1) {
1141 1142 error = EFAULT;
1142 1143 goto msgsnd_out;
1143 1144 }
1144 1145 }
1145 1146 }
1146 1147
1147 1148 if ((lock = ipc_lookup(msq_svc, msqid, (kipc_perm_t **)&qp)) == NULL) {
1148 1149 error = EINVAL;
1149 1150 goto msgsnd_out;
1150 1151 }
1151 1152
1152 1153 ipc_hold(msq_svc, (kipc_perm_t *)qp);
1153 1154
1154 1155 if (msgsz > qp->msg_qbytes) {
1155 1156 error = EINVAL;
1156 1157 goto msgsnd_out;
1157 1158 }
1158 1159
1159 1160 if (error = ipcperm_access(&qp->msg_perm, MSG_W, CRED()))
1160 1161 goto msgsnd_out;
1161 1162
1162 1163 top:
1163 1164 /*
1164 1165 * Allocate space on q, message header, & buffer space.
1165 1166 */
1166 1167 ASSERT(qp->msg_qnum <= qp->msg_qmax);
1167 1168 while ((msgsz > qp->msg_qbytes - qp->msg_cbytes) ||
1168 1169 (qp->msg_qnum == qp->msg_qmax)) {
1169 1170 int cvres;
1170 1171
1171 1172 if (msgflg & IPC_NOWAIT) {
1172 1173 error = EAGAIN;
1173 1174 goto msgsnd_out;
1174 1175 }
1175 1176
1176 1177 wait_wakeup = 0;
1177 1178 qp->msg_snd_cnt++;
1178 1179 msg_entry.msgw_snd_size = msgsz;
1179 1180 msg_entry.msgw_thrd = curthread;
1180 1181 msg_entry.msgw_type = type;
1181 1182 cv_init(&msg_entry.msgw_wake_cv, NULL, 0, NULL);
1182 1183 list_insert_tail(&qp->msg_wait_rcv, &msg_entry);
1183 1184 if (qp->msg_snd_smallest > msgsz)
1184 1185 qp->msg_snd_smallest = msgsz;
1185 1186 cvres = cv_wait_sig(&msg_entry.msgw_wake_cv, lock);
1186 1187 lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, lock);
1187 1188 qp->msg_snd_cnt--;
1188 1189 if (list_link_active(&msg_entry.msgw_list))
1189 1190 list_remove(&qp->msg_wait_rcv, &msg_entry);
1190 1191 if (error = msgq_check_err(qp, cvres)) {
1191 1192 goto msgsnd_out;
1192 1193 }
1193 1194 wait_wakeup = 1;
1194 1195 }
1195 1196
1196 1197 if (mp == NULL) {
1197 1198 int failure;
1198 1199
1199 1200 mutex_exit(lock);
1200 1201 ASSERT(msgsz > 0);
1201 1202 mp = kmem_zalloc(sizeof (struct msg), KM_SLEEP);
1202 1203 mp->msg_addr = kmem_alloc(msgsz, KM_SLEEP);
1203 1204 mp->msg_size = msgsz;
1204 1205 mp->msg_copycnt = 1;
1205 1206
1206 1207 failure = (copyin(STRUCT_FADDR(umsgp, mtext),
1207 1208 mp->msg_addr, msgsz) == -1);
1208 1209 lock = ipc_lock(msq_svc, qp->msg_perm.ipc_id);
1209 1210 if (IPC_FREE(&qp->msg_perm)) {
1210 1211 error = EIDRM;
1211 1212 goto msgsnd_out;
1212 1213 }
1213 1214 if (failure) {
1214 1215 error = EFAULT;
1215 1216 goto msgsnd_out;
1216 1217 }
1217 1218 goto top;
1218 1219 }
1219 1220
1220 1221 /*
1221 1222 * Everything is available, put msg on q.
1222 1223 */
1223 1224 qp->msg_qnum++;
1224 1225 qp->msg_cbytes += msgsz;
1225 1226 qp->msg_lspid = curproc->p_pid;
1226 1227 qp->msg_stime = gethrestime_sec();
1227 1228 mp->msg_type = type;
1228 1229 if (qp->msg_lowest_type > type)
1229 1230 qp->msg_lowest_type = type;
1230 1231 list_insert_tail(&qp->msg_list, mp);
1231 1232 /*
1232 1233 * Get the proper receiver going.
1233 1234 */
1234 1235 msg_wakeup_rdr(qp, &qp->msg_fnd_sndr, type);
1235 1236
1236 1237 msgsnd_out:
1237 1238 /*
1238 1239 * We were woken up from the send wait list, but an
1239 1240 * an error occured on placing the message onto the
1240 1241 * msg queue. Given that, we need to do the wakeup
1241 1242 * dance again.
1242 1243 */
1243 1244
1244 1245 if (wait_wakeup && error) {
1245 1246 msg_wakeup_senders(qp);
1246 1247 }
1247 1248 if (lock)
1248 1249 ipc_rele(msq_svc, (kipc_perm_t *)qp); /* drops lock */
1249 1250
1250 1251 if (error) {
1251 1252 if (mp)
1252 1253 msg_rele(mp);
1253 1254 return (set_errno(error));
1254 1255 }
1255 1256
1256 1257 return (0);
1257 1258 }
1258 1259
1259 1260 static void
1260 1261 msg_wakeup_rdr(kmsqid_t *qp, msg_select_t **flist, long type)
1261 1262 {
1262 1263 msg_select_t *walker = *flist;
1263 1264 msgq_wakeup_t *wakeup;
1264 1265 uint_t msg_hash;
1265 1266
1266 1267 msg_hash = msg_type_hash(type);
1267 1268
1268 1269 do {
1269 1270 wakeup = walker->selection(qp, msg_hash, type);
1270 1271 walker = walker->next_selection;
1271 1272 } while (!wakeup && walker != *flist);
1272 1273
1273 1274 *flist = (*flist)->next_selection;
1274 1275 if (wakeup) {
1275 1276 if (type) {
1276 1277 wakeup->msgw_snd_wake = type;
1277 1278 }
1278 1279 cv_signal(&wakeup->msgw_wake_cv);
1279 1280 }
1280 1281 }
1281 1282
1282 1283 static uint_t
1283 1284 msg_type_hash(long msg_type)
1284 1285 {
1285 1286 if (msg_type < 0) {
1286 1287 long hash = -msg_type / MSG_NEG_INTERVAL;
1287 1288 /*
1288 1289 * Negative message types are hashed over an
1289 1290 * interval. Any message type that hashes
1290 1291 * beyond MSG_MAX_QNUM is automatically placed
1291 1292 * in the last bucket.
1292 1293 */
1293 1294 if (hash > MSG_MAX_QNUM)
1294 1295 hash = MSG_MAX_QNUM;
1295 1296 return (hash);
1296 1297 }
1297 1298
1298 1299 /*
1299 1300 * 0 or positive message type. The first bucket is reserved for
1300 1301 * message receivers of type 0, the other buckets we hash into.
1301 1302 */
1302 1303 if (msg_type)
1303 1304 return (1 + (msg_type % MSG_MAX_QNUM));
1304 1305 return (0);
1305 1306 }
1306 1307
1307 1308 /*
1308 1309 * Routines to see if we have a receiver of type 0 either blocked waiting
1309 1310 * for a message. Simply return the first guy on the list.
1310 1311 */
1311 1312
1312 1313 static msgq_wakeup_t *
1313 1314 /* ARGSUSED */
1314 1315 msg_fnd_any_snd(kmsqid_t *qp, int msg_hash, long type)
1315 1316 {
1316 1317 msgq_wakeup_t *walker;
1317 1318
1318 1319 walker = list_head(&qp->msg_wait_snd[0]);
1319 1320
1320 1321 if (walker)
1321 1322 list_remove(&qp->msg_wait_snd[0], walker);
1322 1323 return (walker);
1323 1324 }
1324 1325
1325 1326 static msgq_wakeup_t *
1326 1327 /* ARGSUSED */
1327 1328 msg_fnd_any_rdr(kmsqid_t *qp, int msg_hash, long type)
1328 1329 {
1329 1330 msgq_wakeup_t *walker;
1330 1331
1331 1332 walker = list_head(&qp->msg_cpy_block);
1332 1333 if (walker)
1333 1334 list_remove(&qp->msg_cpy_block, walker);
1334 1335 return (walker);
1335 1336 }
1336 1337
1337 1338 static msgq_wakeup_t *
1338 1339 msg_fnd_spc_snd(kmsqid_t *qp, int msg_hash, long type)
1339 1340 {
1340 1341 msgq_wakeup_t *walker;
1341 1342
1342 1343 walker = list_head(&qp->msg_wait_snd[msg_hash]);
1343 1344
1344 1345 while (walker && walker->msgw_type != type)
1345 1346 walker = list_next(&qp->msg_wait_snd[msg_hash], walker);
1346 1347 if (walker)
1347 1348 list_remove(&qp->msg_wait_snd[msg_hash], walker);
1348 1349 return (walker);
1349 1350 }
1350 1351
1351 1352 /* ARGSUSED */
1352 1353 static msgq_wakeup_t *
1353 1354 msg_fnd_neg_snd(kmsqid_t *qp, int msg_hash, long type)
1354 1355 {
1355 1356 msgq_wakeup_t *qptr;
1356 1357 int count;
1357 1358 int check_index;
1358 1359 int neg_index;
1359 1360 int nbuckets;
1360 1361
1361 1362 if (!qp->msg_ngt_cnt) {
1362 1363 return (NULL);
1363 1364 }
1364 1365 neg_index = msg_type_hash(-type);
1365 1366
1366 1367 /*
1367 1368 * Check for a match among the negative type queues. Any buckets
1368 1369 * at neg_index or larger can match the type. Use the last send
1369 1370 * time to randomize the starting bucket to prevent starvation.
1370 1371 * Search all buckets from neg_index to MSG_MAX_QNUM, starting
1371 1372 * from the random starting point, and wrapping around after
1372 1373 * MSG_MAX_QNUM.
1373 1374 */
1374 1375
1375 1376 nbuckets = MSG_MAX_QNUM - neg_index + 1;
1376 1377 check_index = neg_index + (qp->msg_stime % nbuckets);
1377 1378
1378 1379 for (count = nbuckets; count > 0; count--) {
1379 1380 qptr = list_head(&qp->msg_wait_snd_ngt[check_index]);
1380 1381 while (qptr) {
1381 1382 /*
1382 1383 * The lowest hash bucket may actually contain
1383 1384 * message types that are not valid for this
1384 1385 * request. This can happen due to the fact that
1385 1386 * the message buckets actually contain a consecutive
1386 1387 * range of types.
1387 1388 */
1388 1389 if (-qptr->msgw_type >= type) {
1389 1390 list_remove(&qp->msg_wait_snd_ngt[check_index],
1390 1391 qptr);
1391 1392 return (qptr);
1392 1393 }
1393 1394 qptr = list_next(&qp->msg_wait_snd_ngt[check_index],
1394 1395 qptr);
1395 1396 }
1396 1397 if (++check_index > MSG_MAX_QNUM) {
1397 1398 check_index = neg_index;
1398 1399 }
1399 1400 }
1400 1401 return (NULL);
1401 1402 }
1402 1403
1403 1404 static int
1404 1405 msg_rcvq_sleep(list_t *queue, msgq_wakeup_t *entry, kmutex_t **lock,
1405 1406 kmsqid_t *qp)
1406 1407 {
1407 1408 int cvres;
1408 1409
1409 1410 cv_init(&entry->msgw_wake_cv, NULL, 0, NULL);
1410 1411
1411 1412 list_insert_tail(queue, entry);
1412 1413
1413 1414 qp->msg_rcv_cnt++;
1414 1415 cvres = cv_wait_sig(&entry->msgw_wake_cv, *lock);
1415 1416 *lock = ipc_relock(msq_svc, qp->msg_perm.ipc_id, *lock);
1416 1417 qp->msg_rcv_cnt--;
1417 1418
1418 1419 if (list_link_active(&entry->msgw_list)) {
1419 1420 /*
1420 1421 * We woke up unexpectedly, remove ourself.
1421 1422 */
1422 1423 list_remove(queue, entry);
1423 1424 }
1424 1425
1425 1426 return (cvres);
1426 1427 }
1427 1428
1428 1429 static void
1429 1430 msg_rcvq_wakeup_all(list_t *q_ptr)
1430 1431 {
1431 1432 msgq_wakeup_t *q_walk;
1432 1433
1433 1434 while (q_walk = list_head(q_ptr)) {
1434 1435 list_remove(q_ptr, q_walk);
1435 1436 cv_signal(&q_walk->msgw_wake_cv);
1436 1437 }
1437 1438 }
1438 1439
1439 1440 /*
1440 1441 * msgsys - System entry point for msgctl, msgget, msgrcv, and msgsnd
1441 1442 * system calls.
1442 1443 */
1443 1444 static ssize_t
1444 1445 msgsys(int opcode, uintptr_t a1, uintptr_t a2, uintptr_t a3,
1445 1446 uintptr_t a4, uintptr_t a5)
1446 1447 {
1447 1448 ssize_t error;
1448 1449
1449 1450 switch (opcode) {
1450 1451 case MSGGET:
1451 1452 error = msgget((key_t)a1, (int)a2);
1452 1453 break;
1453 1454 case MSGCTL:
1454 1455 error = msgctl((int)a1, (int)a2, (void *)a3);
1455 1456 break;
1456 1457 case MSGRCV:
1457 1458 error = msgrcv((int)a1, (struct ipcmsgbuf *)a2,
1458 1459 (size_t)a3, (long)a4, (int)a5);
1459 1460 break;
1460 1461 case MSGSND:
1461 1462 error = msgsnd((int)a1, (struct ipcmsgbuf *)a2,
1462 1463 (size_t)a3, (int)a4);
1463 1464 break;
1464 1465 case MSGIDS:
1465 1466 error = msgids((int *)a1, (uint_t)a2, (uint_t *)a3);
1466 1467 break;
1467 1468 case MSGSNAP:
1468 1469 error = msgsnap((int)a1, (caddr_t)a2, (size_t)a3, (long)a4);
1469 1470 break;
1470 1471 default:
1471 1472 error = set_errno(EINVAL);
1472 1473 break;
1473 1474 }
1474 1475
1475 1476 return (error);
1476 1477 }
1477 1478
1478 1479 /*
1479 1480 * Determine if a writer who is waiting can process its message. If so
1480 1481 * wake it up.
1481 1482 */
1482 1483 static void
1483 1484 msg_wakeup_senders(kmsqid_t *qp)
1484 1485
1485 1486 {
1486 1487 struct msgq_wakeup *ptr, *optr;
1487 1488 size_t avail, smallest;
1488 1489 int msgs_out;
1489 1490
1490 1491 /*
1491 1492 * Is there a writer waiting, and if so, can it be serviced? If
1492 1493 * not return back to the caller.
1493 1494 */
1494 1495 if (IPC_FREE(&qp->msg_perm) || qp->msg_qnum >= qp->msg_qmax)
1495 1496 return;
1496 1497
1497 1498 avail = qp->msg_qbytes - qp->msg_cbytes;
1498 1499 if (avail < qp->msg_snd_smallest)
1499 1500 return;
1500 1501
1501 1502 ptr = list_head(&qp->msg_wait_rcv);
1502 1503 if (ptr == NULL) {
1503 1504 qp->msg_snd_smallest = MSG_SMALL_INIT;
1504 1505 return;
1505 1506 }
1506 1507 optr = ptr;
1507 1508
1508 1509 /*
1509 1510 * smallest: minimum message size of all queued writers
1510 1511 *
1511 1512 * avail: amount of space left on the msgq
1512 1513 * if all the writers we have woken up are successful.
1513 1514 *
1514 1515 * msgs_out: is the number of messages on the message queue if
1515 1516 * all the writers we have woken up are successful.
1516 1517 */
1517 1518
1518 1519 smallest = MSG_SMALL_INIT;
1519 1520 msgs_out = qp->msg_qnum;
1520 1521 while (ptr) {
1521 1522 ptr = list_next(&qp->msg_wait_rcv, ptr);
1522 1523 if (optr->msgw_snd_size <= avail) {
1523 1524 list_remove(&qp->msg_wait_rcv, optr);
1524 1525 avail -= optr->msgw_snd_size;
1525 1526 cv_signal(&optr->msgw_wake_cv);
1526 1527 msgs_out++;
1527 1528 if (msgs_out == qp->msg_qmax ||
1528 1529 avail < qp->msg_snd_smallest)
1529 1530 break;
1530 1531 } else {
1531 1532 if (smallest > optr->msgw_snd_size)
1532 1533 smallest = optr->msgw_snd_size;
1533 1534 }
1534 1535 optr = ptr;
1535 1536 }
1536 1537
1537 1538 /*
1538 1539 * Reset the smallest message size if the entire list has been visited
1539 1540 */
1540 1541 if (ptr == NULL && smallest != MSG_SMALL_INIT)
1541 1542 qp->msg_snd_smallest = smallest;
1542 1543 }
1543 1544
1544 1545 #ifdef _SYSCALL32_IMPL
1545 1546 /*
1546 1547 * msgsys32 - System entry point for msgctl, msgget, msgrcv, and msgsnd
1547 1548 * system calls for 32-bit callers on LP64 kernel.
1548 1549 */
1549 1550 static ssize32_t
1550 1551 msgsys32(int opcode, uint32_t a1, uint32_t a2, uint32_t a3,
1551 1552 uint32_t a4, uint32_t a5)
1552 1553 {
1553 1554 ssize_t error;
1554 1555
1555 1556 switch (opcode) {
1556 1557 case MSGGET:
1557 1558 error = msgget((key_t)a1, (int)a2);
1558 1559 break;
1559 1560 case MSGCTL:
1560 1561 error = msgctl((int)a1, (int)a2, (void *)(uintptr_t)a3);
1561 1562 break;
1562 1563 case MSGRCV:
1563 1564 error = msgrcv((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
1564 1565 (size_t)a3, (long)(int32_t)a4, (int)a5);
1565 1566 break;
1566 1567 case MSGSND:
1567 1568 error = msgsnd((int)a1, (struct ipcmsgbuf *)(uintptr_t)a2,
1568 1569 (size_t)(int32_t)a3, (int)a4);
1569 1570 break;
1570 1571 case MSGIDS:
1571 1572 error = msgids((int *)(uintptr_t)a1, (uint_t)a2,
1572 1573 (uint_t *)(uintptr_t)a3);
1573 1574 break;
1574 1575 case MSGSNAP:
1575 1576 error = msgsnap((int)a1, (caddr_t)(uintptr_t)a2, (size_t)a3,
1576 1577 (long)(int32_t)a4);
1577 1578 break;
1578 1579 default:
1579 1580 error = set_errno(EINVAL);
1580 1581 break;
1581 1582 }
1582 1583
1583 1584 return (error);
1584 1585 }
1585 1586 #endif /* SYSCALL32_IMPL */
↓ open down ↓ |
1262 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX