Print this page
7127 remove -Wno-missing-braces from Makefile.uts
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/uts/common/os/aio.c
+++ new/usr/src/uts/common/os/aio.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * Kernel asynchronous I/O.
29 29 * This is only for raw devices now (as of Nov. 1993).
30 30 */
31 31
32 32 #include <sys/types.h>
33 33 #include <sys/errno.h>
34 34 #include <sys/conf.h>
35 35 #include <sys/file.h>
36 36 #include <sys/fs/snode.h>
37 37 #include <sys/unistd.h>
38 38 #include <sys/cmn_err.h>
39 39 #include <vm/as.h>
40 40 #include <vm/faultcode.h>
41 41 #include <sys/sysmacros.h>
42 42 #include <sys/procfs.h>
43 43 #include <sys/kmem.h>
44 44 #include <sys/autoconf.h>
45 45 #include <sys/ddi_impldefs.h>
46 46 #include <sys/sunddi.h>
47 47 #include <sys/aio_impl.h>
48 48 #include <sys/debug.h>
49 49 #include <sys/param.h>
50 50 #include <sys/systm.h>
51 51 #include <sys/vmsystm.h>
52 52 #include <sys/fs/pxfs_ki.h>
53 53 #include <sys/contract/process_impl.h>
54 54
55 55 /*
56 56 * external entry point.
57 57 */
58 58 #ifdef _LP64
59 59 static int64_t kaioc(long, long, long, long, long, long);
60 60 #endif
61 61 static int kaio(ulong_t *, rval_t *);
62 62
63 63
64 64 #define AIO_64 0
65 65 #define AIO_32 1
66 66 #define AIO_LARGEFILE 2
67 67
68 68 /*
69 69 * implementation specific functions (private)
70 70 */
71 71 #ifdef _LP64
72 72 static int alio(int, aiocb_t **, int, struct sigevent *);
73 73 #endif
74 74 static int aionotify(void);
75 75 static int aioinit(void);
76 76 static int aiostart(void);
77 77 static void alio_cleanup(aio_t *, aiocb_t **, int, int);
78 78 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
79 79 cred_t *);
80 80 static void lio_set_error(aio_req_t *, int portused);
81 81 static aio_t *aio_aiop_alloc();
82 82 static int aio_req_alloc(aio_req_t **, aio_result_t *);
83 83 static int aio_lio_alloc(aio_lio_t **);
84 84 static aio_req_t *aio_req_done(void *);
85 85 static aio_req_t *aio_req_remove(aio_req_t *);
86 86 static int aio_req_find(aio_result_t *, aio_req_t **);
87 87 static int aio_hash_insert(struct aio_req_t *, aio_t *);
88 88 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
89 89 aio_result_t *, vnode_t *, int);
90 90 static int aio_cleanup_thread(aio_t *);
91 91 static aio_lio_t *aio_list_get(aio_result_t *);
92 92 static void lio_set_uerror(void *, int);
93 93 extern void aio_zerolen(aio_req_t *);
94 94 static int aiowait(struct timeval *, int, long *);
95 95 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
96 96 static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
97 97 aio_req_t *reqlist, aio_t *aiop, model_t model);
98 98 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
99 99 static int aiosuspend(void *, int, struct timespec *, int,
100 100 long *, int);
101 101 static int aliowait(int, void *, int, void *, int);
102 102 static int aioerror(void *, int);
103 103 static int aio_cancel(int, void *, long *, int);
104 104 static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
105 105 static int aiorw(int, void *, int, int);
106 106
107 107 static int alioLF(int, void *, int, void *);
108 108 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
109 109 aio_result_t *, vnode_t *, int);
110 110 static int alio32(int, void *, int, void *);
111 111 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
112 112 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
113 113
114 114 #ifdef _SYSCALL32_IMPL
115 115 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
116 116 void aiocb_32ton(aiocb32_t *, aiocb_t *);
117 117 #endif /* _SYSCALL32_IMPL */
118 118
119 119 /*
120 120 * implementation specific functions (external)
121 121 */
122 122 void aio_req_free(aio_t *, aio_req_t *);
123 123
124 124 /*
125 125 * Event Port framework
126 126 */
127 127
128 128 void aio_req_free_port(aio_t *, aio_req_t *);
129 129 static int aio_port_callback(void *, int *, pid_t, int, void *);
130 130
131 131 /*
132 132 * This is the loadable module wrapper.
133 133 */
134 134 #include <sys/modctl.h>
135 135 #include <sys/syscall.h>
136 136
137 137 #ifdef _LP64
138 138
139 139 static struct sysent kaio_sysent = {
140 140 6,
141 141 SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
142 142 (int (*)())kaioc
143 143 };
144 144
145 145 #ifdef _SYSCALL32_IMPL
146 146 static struct sysent kaio_sysent32 = {
147 147 7,
148 148 SE_NOUNLOAD | SE_64RVAL,
149 149 kaio
150 150 };
151 151 #endif /* _SYSCALL32_IMPL */
152 152
153 153 #else /* _LP64 */
154 154
155 155 static struct sysent kaio_sysent = {
156 156 7,
157 157 SE_NOUNLOAD | SE_32RVAL1,
158 158 kaio
159 159 };
160 160
161 161 #endif /* _LP64 */
162 162
163 163 /*
164 164 * Module linkage information for the kernel.
165 165 */
166 166
167 167 static struct modlsys modlsys = {
168 168 &mod_syscallops,
169 169 "kernel Async I/O",
170 170 &kaio_sysent
171 171 };
172 172
173 173 #ifdef _SYSCALL32_IMPL
↓ open down ↓ |
173 lines elided |
↑ open up ↑ |
174 174 static struct modlsys modlsys32 = {
175 175 &mod_syscallops32,
176 176 "kernel Async I/O for 32 bit compatibility",
177 177 &kaio_sysent32
178 178 };
179 179 #endif /* _SYSCALL32_IMPL */
180 180
181 181
182 182 static struct modlinkage modlinkage = {
183 183 MODREV_1,
184 - &modlsys,
184 + { &modlsys,
185 185 #ifdef _SYSCALL32_IMPL
186 - &modlsys32,
186 + &modlsys32,
187 187 #endif
188 - NULL
188 + NULL
189 + }
189 190 };
190 191
191 192 int
192 193 _init(void)
193 194 {
194 195 int retval;
195 196
196 197 if ((retval = mod_install(&modlinkage)) != 0)
197 198 return (retval);
198 199
199 200 return (0);
200 201 }
201 202
202 203 int
203 204 _fini(void)
204 205 {
205 206 int retval;
206 207
207 208 retval = mod_remove(&modlinkage);
208 209
209 210 return (retval);
210 211 }
211 212
212 213 int
213 214 _info(struct modinfo *modinfop)
214 215 {
215 216 return (mod_info(&modlinkage, modinfop));
216 217 }
217 218
218 219 #ifdef _LP64
219 220 static int64_t
220 221 kaioc(
221 222 long a0,
222 223 long a1,
223 224 long a2,
224 225 long a3,
225 226 long a4,
226 227 long a5)
227 228 {
228 229 int error;
229 230 long rval = 0;
230 231
231 232 switch ((int)a0 & ~AIO_POLL_BIT) {
232 233 case AIOREAD:
233 234 error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
234 235 (offset_t)a4, (aio_result_t *)a5, FREAD);
235 236 break;
236 237 case AIOWRITE:
237 238 error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
238 239 (offset_t)a4, (aio_result_t *)a5, FWRITE);
239 240 break;
240 241 case AIOWAIT:
241 242 error = aiowait((struct timeval *)a1, (int)a2, &rval);
242 243 break;
243 244 case AIOWAITN:
244 245 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
245 246 (timespec_t *)a4);
246 247 break;
247 248 case AIONOTIFY:
248 249 error = aionotify();
249 250 break;
250 251 case AIOINIT:
251 252 error = aioinit();
252 253 break;
253 254 case AIOSTART:
254 255 error = aiostart();
255 256 break;
256 257 case AIOLIO:
257 258 error = alio((int)a1, (aiocb_t **)a2, (int)a3,
258 259 (struct sigevent *)a4);
259 260 break;
260 261 case AIOLIOWAIT:
261 262 error = aliowait((int)a1, (void *)a2, (int)a3,
262 263 (struct sigevent *)a4, AIO_64);
263 264 break;
264 265 case AIOSUSPEND:
265 266 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
266 267 (int)a4, &rval, AIO_64);
267 268 break;
268 269 case AIOERROR:
269 270 error = aioerror((void *)a1, AIO_64);
270 271 break;
271 272 case AIOAREAD:
272 273 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
273 274 break;
274 275 case AIOAWRITE:
275 276 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
276 277 break;
277 278 case AIOCANCEL:
278 279 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
279 280 break;
280 281
281 282 /*
282 283 * The large file related stuff is valid only for
283 284 * 32 bit kernel and not for 64 bit kernel
284 285 * On 64 bit kernel we convert large file calls
285 286 * to regular 64bit calls.
286 287 */
287 288
288 289 default:
289 290 error = EINVAL;
290 291 }
291 292 if (error)
292 293 return ((int64_t)set_errno(error));
293 294 return (rval);
294 295 }
295 296 #endif
296 297
297 298 static int
298 299 kaio(
299 300 ulong_t *uap,
300 301 rval_t *rvp)
301 302 {
302 303 long rval = 0;
303 304 int error = 0;
304 305 offset_t off;
305 306
306 307
307 308 rvp->r_vals = 0;
308 309 #if defined(_LITTLE_ENDIAN)
309 310 off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
310 311 #else
311 312 off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
312 313 #endif
313 314
314 315 switch (uap[0] & ~AIO_POLL_BIT) {
315 316 /*
316 317 * It must be the 32 bit system call on 64 bit kernel
317 318 */
318 319 case AIOREAD:
319 320 return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
320 321 (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
321 322 case AIOWRITE:
322 323 return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
323 324 (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
324 325 case AIOWAIT:
325 326 error = aiowait((struct timeval *)uap[1], (int)uap[2],
326 327 &rval);
327 328 break;
328 329 case AIOWAITN:
329 330 error = aiowaitn((void *)uap[1], (uint_t)uap[2],
330 331 (uint_t *)uap[3], (timespec_t *)uap[4]);
331 332 break;
332 333 case AIONOTIFY:
333 334 return (aionotify());
334 335 case AIOINIT:
335 336 return (aioinit());
336 337 case AIOSTART:
337 338 return (aiostart());
338 339 case AIOLIO:
339 340 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
340 341 (void *)uap[4]));
341 342 case AIOLIOWAIT:
342 343 return (aliowait((int)uap[1], (void *)uap[2],
343 344 (int)uap[3], (struct sigevent *)uap[4], AIO_32));
344 345 case AIOSUSPEND:
345 346 error = aiosuspend((void *)uap[1], (int)uap[2],
346 347 (timespec_t *)uap[3], (int)uap[4],
347 348 &rval, AIO_32);
348 349 break;
349 350 case AIOERROR:
350 351 return (aioerror((void *)uap[1], AIO_32));
351 352 case AIOAREAD:
352 353 return (aiorw((int)uap[0], (void *)uap[1],
353 354 FREAD, AIO_32));
354 355 case AIOAWRITE:
355 356 return (aiorw((int)uap[0], (void *)uap[1],
356 357 FWRITE, AIO_32));
357 358 case AIOCANCEL:
358 359 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
359 360 AIO_32));
360 361 break;
361 362 case AIOLIO64:
362 363 return (alioLF((int)uap[1], (void *)uap[2],
363 364 (int)uap[3], (void *)uap[4]));
364 365 case AIOLIOWAIT64:
365 366 return (aliowait(uap[1], (void *)uap[2],
366 367 (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
367 368 case AIOSUSPEND64:
368 369 error = aiosuspend((void *)uap[1], (int)uap[2],
369 370 (timespec_t *)uap[3], (int)uap[4], &rval,
370 371 AIO_LARGEFILE);
371 372 break;
372 373 case AIOERROR64:
373 374 return (aioerror((void *)uap[1], AIO_LARGEFILE));
374 375 case AIOAREAD64:
375 376 return (aiorw((int)uap[0], (void *)uap[1], FREAD,
376 377 AIO_LARGEFILE));
377 378 case AIOAWRITE64:
378 379 return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
379 380 AIO_LARGEFILE));
380 381 case AIOCANCEL64:
381 382 error = (aio_cancel((int)uap[1], (void *)uap[2],
382 383 &rval, AIO_LARGEFILE));
383 384 break;
384 385 default:
385 386 return (EINVAL);
386 387 }
387 388
388 389 rvp->r_val1 = rval;
389 390 return (error);
390 391 }
391 392
392 393 /*
393 394 * wake up LWPs in this process that are sleeping in
394 395 * aiowait().
395 396 */
396 397 static int
397 398 aionotify(void)
398 399 {
399 400 aio_t *aiop;
400 401
401 402 aiop = curproc->p_aio;
402 403 if (aiop == NULL)
403 404 return (0);
404 405
405 406 mutex_enter(&aiop->aio_mutex);
406 407 aiop->aio_notifycnt++;
407 408 cv_broadcast(&aiop->aio_waitcv);
408 409 mutex_exit(&aiop->aio_mutex);
409 410
410 411 return (0);
411 412 }
412 413
413 414 static int
414 415 timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
415 416 timestruc_t **rqtp, int *blocking)
416 417 {
417 418 #ifdef _SYSCALL32_IMPL
418 419 struct timeval32 wait_time_32;
419 420 #endif
420 421 struct timeval wait_time;
421 422 model_t model = get_udatamodel();
422 423
423 424 *rqtp = NULL;
424 425 if (timout == NULL) { /* wait indefinitely */
425 426 *blocking = 1;
426 427 return (0);
427 428 }
428 429
429 430 /*
430 431 * Need to correctly compare with the -1 passed in for a user
431 432 * address pointer, with both 32 bit and 64 bit apps.
432 433 */
433 434 if (model == DATAMODEL_NATIVE) {
434 435 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */
435 436 *blocking = 0;
436 437 return (0);
437 438 }
438 439
439 440 if (copyin(timout, &wait_time, sizeof (wait_time)))
440 441 return (EFAULT);
441 442 }
442 443 #ifdef _SYSCALL32_IMPL
443 444 else {
444 445 /*
445 446 * -1 from a 32bit app. It will not get sign extended.
446 447 * don't wait if -1.
447 448 */
448 449 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
449 450 *blocking = 0;
450 451 return (0);
451 452 }
452 453
453 454 if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
454 455 return (EFAULT);
455 456 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
456 457 }
457 458 #endif /* _SYSCALL32_IMPL */
458 459
459 460 if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) { /* don't wait */
460 461 *blocking = 0;
461 462 return (0);
462 463 }
463 464
464 465 if (wait_time.tv_sec < 0 ||
465 466 wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
466 467 return (EINVAL);
467 468
468 469 rqtime->tv_sec = wait_time.tv_sec;
469 470 rqtime->tv_nsec = wait_time.tv_usec * 1000;
470 471 *rqtp = rqtime;
471 472 *blocking = 1;
472 473
473 474 return (0);
474 475 }
475 476
476 477 static int
477 478 timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
478 479 timestruc_t **rqtp, int *blocking)
479 480 {
480 481 #ifdef _SYSCALL32_IMPL
481 482 timespec32_t wait_time_32;
482 483 #endif
483 484 model_t model = get_udatamodel();
484 485
485 486 *rqtp = NULL;
486 487 if (timout == NULL) {
487 488 *blocking = 1;
488 489 return (0);
489 490 }
490 491
491 492 if (model == DATAMODEL_NATIVE) {
492 493 if (copyin(timout, rqtime, sizeof (*rqtime)))
493 494 return (EFAULT);
494 495 }
495 496 #ifdef _SYSCALL32_IMPL
496 497 else {
497 498 if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
498 499 return (EFAULT);
499 500 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
500 501 }
501 502 #endif /* _SYSCALL32_IMPL */
502 503
503 504 if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
504 505 *blocking = 0;
505 506 return (0);
506 507 }
507 508
508 509 if (rqtime->tv_sec < 0 ||
509 510 rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
510 511 return (EINVAL);
511 512
512 513 *rqtp = rqtime;
513 514 *blocking = 1;
514 515
515 516 return (0);
516 517 }
517 518
518 519 /*ARGSUSED*/
519 520 static int
520 521 aiowait(
521 522 struct timeval *timout,
522 523 int dontblockflg,
523 524 long *rval)
524 525 {
525 526 int error;
526 527 aio_t *aiop;
527 528 aio_req_t *reqp;
528 529 clock_t status;
529 530 int blocking;
530 531 int timecheck;
531 532 timestruc_t rqtime;
532 533 timestruc_t *rqtp;
533 534
534 535 aiop = curproc->p_aio;
535 536 if (aiop == NULL)
536 537 return (EINVAL);
537 538
538 539 /*
539 540 * Establish the absolute future time for the timeout.
540 541 */
541 542 error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
542 543 if (error)
543 544 return (error);
544 545 if (rqtp) {
545 546 timestruc_t now;
546 547 timecheck = timechanged;
547 548 gethrestime(&now);
548 549 timespecadd(rqtp, &now);
549 550 }
550 551
551 552 mutex_enter(&aiop->aio_mutex);
552 553 for (;;) {
553 554 /* process requests on poll queue */
554 555 if (aiop->aio_pollq) {
555 556 mutex_exit(&aiop->aio_mutex);
556 557 aio_cleanup(0);
557 558 mutex_enter(&aiop->aio_mutex);
558 559 }
559 560 if ((reqp = aio_req_remove(NULL)) != NULL) {
560 561 *rval = (long)reqp->aio_req_resultp;
561 562 break;
562 563 }
563 564 /* user-level done queue might not be empty */
564 565 if (aiop->aio_notifycnt > 0) {
565 566 aiop->aio_notifycnt--;
566 567 *rval = 1;
567 568 break;
568 569 }
569 570 /* don't block if no outstanding aio */
570 571 if (aiop->aio_outstanding == 0 && dontblockflg) {
571 572 error = EINVAL;
572 573 break;
573 574 }
574 575 if (blocking) {
575 576 status = cv_waituntil_sig(&aiop->aio_waitcv,
576 577 &aiop->aio_mutex, rqtp, timecheck);
577 578
578 579 if (status > 0) /* check done queue again */
579 580 continue;
580 581 if (status == 0) { /* interrupted by a signal */
581 582 error = EINTR;
582 583 *rval = -1;
583 584 } else { /* timer expired */
584 585 error = ETIME;
585 586 }
586 587 }
587 588 break;
588 589 }
589 590 mutex_exit(&aiop->aio_mutex);
590 591 if (reqp) {
591 592 aphysio_unlock(reqp);
592 593 aio_copyout_result(reqp);
593 594 mutex_enter(&aiop->aio_mutex);
594 595 aio_req_free(aiop, reqp);
595 596 mutex_exit(&aiop->aio_mutex);
596 597 }
597 598 return (error);
598 599 }
599 600
600 601 /*
601 602 * aiowaitn can be used to reap completed asynchronous requests submitted with
602 603 * lio_listio, aio_read or aio_write.
603 604 * This function only reaps asynchronous raw I/Os.
604 605 */
605 606
606 607 /*ARGSUSED*/
607 608 static int
608 609 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
609 610 {
610 611 int error = 0;
611 612 aio_t *aiop;
612 613 aio_req_t *reqlist = NULL;
613 614 caddr_t iocblist = NULL; /* array of iocb ptr's */
614 615 uint_t waitcnt, cnt = 0; /* iocb cnt */
615 616 size_t iocbsz; /* users iocb size */
616 617 size_t riocbsz; /* returned iocb size */
617 618 int iocb_index = 0;
618 619 model_t model = get_udatamodel();
619 620 int blocking = 1;
620 621 int timecheck;
621 622 timestruc_t rqtime;
622 623 timestruc_t *rqtp;
623 624
624 625 aiop = curproc->p_aio;
625 626 if (aiop == NULL || nent == 0 || nent > _AIO_LISTIO_MAX)
626 627 return (EINVAL);
627 628
628 629 if (aiop->aio_outstanding == 0)
629 630 return (EAGAIN);
630 631
631 632 if (copyin(nwait, &waitcnt, sizeof (uint_t)))
632 633 return (EFAULT);
633 634
634 635 /* set *nwait to zero, if we must return prematurely */
635 636 if (copyout(&cnt, nwait, sizeof (uint_t)))
636 637 return (EFAULT);
637 638
638 639 if (waitcnt == 0) {
639 640 blocking = 0;
640 641 rqtp = NULL;
641 642 waitcnt = nent;
642 643 } else {
643 644 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
644 645 if (error)
645 646 return (error);
646 647 }
647 648
648 649 if (model == DATAMODEL_NATIVE)
649 650 iocbsz = (sizeof (aiocb_t *) * nent);
650 651 #ifdef _SYSCALL32_IMPL
651 652 else
652 653 iocbsz = (sizeof (caddr32_t) * nent);
653 654 #endif /* _SYSCALL32_IMPL */
654 655
655 656 /*
656 657 * Only one aio_waitn call is allowed at a time.
657 658 * The active aio_waitn will collect all requests
658 659 * out of the "done" list and if necessary it will wait
659 660 * for some/all pending requests to fulfill the nwait
660 661 * parameter.
661 662 * A second or further aio_waitn calls will sleep here
662 663 * until the active aio_waitn finishes and leaves the kernel
663 664 * If the second call does not block (poll), then return
664 665 * immediately with the error code : EAGAIN.
665 666 * If the second call should block, then sleep here, but
666 667 * do not touch the timeout. The timeout starts when this
667 668 * aio_waitn-call becomes active.
668 669 */
669 670
670 671 mutex_enter(&aiop->aio_mutex);
671 672
672 673 while (aiop->aio_flags & AIO_WAITN) {
673 674 if (blocking == 0) {
674 675 mutex_exit(&aiop->aio_mutex);
675 676 return (EAGAIN);
676 677 }
677 678
678 679 /* block, no timeout */
679 680 aiop->aio_flags |= AIO_WAITN_PENDING;
680 681 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
681 682 mutex_exit(&aiop->aio_mutex);
682 683 return (EINTR);
683 684 }
684 685 }
685 686
686 687 /*
687 688 * Establish the absolute future time for the timeout.
688 689 */
689 690 if (rqtp) {
690 691 timestruc_t now;
691 692 timecheck = timechanged;
692 693 gethrestime(&now);
693 694 timespecadd(rqtp, &now);
694 695 }
695 696
696 697 if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
697 698 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
698 699 aiop->aio_iocb = NULL;
699 700 }
700 701
701 702 if (aiop->aio_iocb == NULL) {
702 703 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
703 704 if (iocblist == NULL) {
704 705 mutex_exit(&aiop->aio_mutex);
705 706 return (ENOMEM);
706 707 }
707 708 aiop->aio_iocb = (aiocb_t **)iocblist;
708 709 aiop->aio_iocbsz = iocbsz;
709 710 } else {
710 711 iocblist = (char *)aiop->aio_iocb;
711 712 }
712 713
713 714 aiop->aio_waitncnt = waitcnt;
714 715 aiop->aio_flags |= AIO_WAITN;
715 716
716 717 for (;;) {
717 718 /* push requests on poll queue to done queue */
718 719 if (aiop->aio_pollq) {
719 720 mutex_exit(&aiop->aio_mutex);
720 721 aio_cleanup(0);
721 722 mutex_enter(&aiop->aio_mutex);
722 723 }
723 724
724 725 /* check for requests on done queue */
725 726 if (aiop->aio_doneq) {
726 727 cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
727 728 aiop->aio_waitncnt = waitcnt - cnt;
728 729 }
729 730
730 731 /* user-level done queue might not be empty */
731 732 if (aiop->aio_notifycnt > 0) {
732 733 aiop->aio_notifycnt--;
733 734 error = 0;
734 735 break;
735 736 }
736 737
737 738 /*
738 739 * if we are here second time as a result of timer
739 740 * expiration, we reset error if there are enough
740 741 * aiocb's to satisfy request.
741 742 * We return also if all requests are already done
742 743 * and we picked up the whole done queue.
743 744 */
744 745
745 746 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
746 747 aiop->aio_doneq == NULL)) {
747 748 error = 0;
748 749 break;
749 750 }
750 751
751 752 if ((cnt < waitcnt) && blocking) {
752 753 int rval = cv_waituntil_sig(&aiop->aio_waitcv,
753 754 &aiop->aio_mutex, rqtp, timecheck);
754 755 if (rval > 0)
755 756 continue;
756 757 if (rval < 0) {
757 758 error = ETIME;
758 759 blocking = 0;
759 760 continue;
760 761 }
761 762 error = EINTR;
762 763 }
763 764 break;
764 765 }
765 766
766 767 mutex_exit(&aiop->aio_mutex);
767 768
768 769 if (cnt > 0) {
769 770
770 771 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
771 772 aiop, model);
772 773
773 774 if (model == DATAMODEL_NATIVE)
774 775 riocbsz = (sizeof (aiocb_t *) * cnt);
775 776 #ifdef _SYSCALL32_IMPL
776 777 else
777 778 riocbsz = (sizeof (caddr32_t) * cnt);
778 779 #endif /* _SYSCALL32_IMPL */
779 780
780 781 if (copyout(iocblist, uiocb, riocbsz) ||
781 782 copyout(&cnt, nwait, sizeof (uint_t)))
782 783 error = EFAULT;
783 784 }
784 785
785 786 /* check if there is another thread waiting for execution */
786 787 mutex_enter(&aiop->aio_mutex);
787 788 aiop->aio_flags &= ~AIO_WAITN;
788 789 if (aiop->aio_flags & AIO_WAITN_PENDING) {
789 790 aiop->aio_flags &= ~AIO_WAITN_PENDING;
790 791 cv_signal(&aiop->aio_waitncv);
791 792 }
792 793 mutex_exit(&aiop->aio_mutex);
793 794
794 795 return (error);
795 796 }
796 797
797 798 /*
798 799 * aio_unlock_requests
799 800 * copyouts the result of the request as well as the return value.
800 801 * It builds the list of completed asynchronous requests,
801 802 * unlocks the allocated memory ranges and
802 803 * put the aio request structure back into the free list.
803 804 */
804 805
805 806 static int
806 807 aio_unlock_requests(
807 808 caddr_t iocblist,
808 809 int iocb_index,
809 810 aio_req_t *reqlist,
810 811 aio_t *aiop,
811 812 model_t model)
812 813 {
813 814 aio_req_t *reqp, *nreqp;
814 815
815 816 if (model == DATAMODEL_NATIVE) {
816 817 for (reqp = reqlist; reqp != NULL; reqp = nreqp) {
817 818 (((caddr_t *)iocblist)[iocb_index++]) =
818 819 reqp->aio_req_iocb.iocb;
819 820 nreqp = reqp->aio_req_next;
820 821 aphysio_unlock(reqp);
821 822 aio_copyout_result(reqp);
822 823 mutex_enter(&aiop->aio_mutex);
823 824 aio_req_free(aiop, reqp);
824 825 mutex_exit(&aiop->aio_mutex);
825 826 }
826 827 }
827 828 #ifdef _SYSCALL32_IMPL
828 829 else {
829 830 for (reqp = reqlist; reqp != NULL; reqp = nreqp) {
830 831 ((caddr32_t *)iocblist)[iocb_index++] =
831 832 reqp->aio_req_iocb.iocb32;
832 833 nreqp = reqp->aio_req_next;
833 834 aphysio_unlock(reqp);
834 835 aio_copyout_result(reqp);
835 836 mutex_enter(&aiop->aio_mutex);
836 837 aio_req_free(aiop, reqp);
837 838 mutex_exit(&aiop->aio_mutex);
838 839 }
839 840 }
840 841 #endif /* _SYSCALL32_IMPL */
841 842 return (iocb_index);
842 843 }
843 844
844 845 /*
845 846 * aio_reqlist_concat
846 847 * moves "max" elements from the done queue to the reqlist queue and removes
847 848 * the AIO_DONEQ flag.
848 849 * - reqlist queue is a simple linked list
849 850 * - done queue is a double linked list
850 851 */
851 852
852 853 static int
853 854 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
854 855 {
855 856 aio_req_t *q2, *q2work, *list;
856 857 int count = 0;
857 858
858 859 list = *reqlist;
859 860 q2 = aiop->aio_doneq;
860 861 q2work = q2;
861 862 while (max-- > 0) {
862 863 q2work->aio_req_flags &= ~AIO_DONEQ;
863 864 q2work = q2work->aio_req_next;
864 865 count++;
865 866 if (q2work == q2)
866 867 break;
867 868 }
868 869
869 870 if (q2work == q2) {
870 871 /* all elements revised */
871 872 q2->aio_req_prev->aio_req_next = list;
872 873 list = q2;
873 874 aiop->aio_doneq = NULL;
874 875 } else {
875 876 /*
876 877 * max < elements in the doneq
877 878 * detach only the required amount of elements
878 879 * out of the doneq
879 880 */
880 881 q2work->aio_req_prev->aio_req_next = list;
881 882 list = q2;
882 883
883 884 aiop->aio_doneq = q2work;
884 885 q2work->aio_req_prev = q2->aio_req_prev;
885 886 q2->aio_req_prev->aio_req_next = q2work;
886 887 }
887 888 *reqlist = list;
888 889 return (count);
889 890 }
890 891
891 892 /*ARGSUSED*/
892 893 static int
893 894 aiosuspend(
894 895 void *aiocb,
895 896 int nent,
896 897 struct timespec *timout,
897 898 int flag,
898 899 long *rval,
899 900 int run_mode)
900 901 {
901 902 int error;
902 903 aio_t *aiop;
903 904 aio_req_t *reqp, *found, *next;
904 905 caddr_t cbplist = NULL;
905 906 aiocb_t *cbp, **ucbp;
906 907 #ifdef _SYSCALL32_IMPL
907 908 aiocb32_t *cbp32;
908 909 caddr32_t *ucbp32;
909 910 #endif /* _SYSCALL32_IMPL */
910 911 aiocb64_32_t *cbp64;
911 912 int rv;
912 913 int i;
913 914 size_t ssize;
914 915 model_t model = get_udatamodel();
915 916 int blocking;
916 917 int timecheck;
917 918 timestruc_t rqtime;
918 919 timestruc_t *rqtp;
919 920
920 921 aiop = curproc->p_aio;
921 922 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
922 923 return (EINVAL);
923 924
924 925 /*
925 926 * Establish the absolute future time for the timeout.
926 927 */
927 928 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
928 929 if (error)
929 930 return (error);
930 931 if (rqtp) {
931 932 timestruc_t now;
932 933 timecheck = timechanged;
933 934 gethrestime(&now);
934 935 timespecadd(rqtp, &now);
935 936 }
936 937
937 938 /*
938 939 * If we are not blocking and there's no IO complete
939 940 * skip aiocb copyin.
940 941 */
941 942 if (!blocking && (aiop->aio_pollq == NULL) &&
942 943 (aiop->aio_doneq == NULL)) {
943 944 return (EAGAIN);
944 945 }
945 946
946 947 if (model == DATAMODEL_NATIVE)
947 948 ssize = (sizeof (aiocb_t *) * nent);
948 949 #ifdef _SYSCALL32_IMPL
949 950 else
950 951 ssize = (sizeof (caddr32_t) * nent);
951 952 #endif /* _SYSCALL32_IMPL */
952 953
953 954 cbplist = kmem_alloc(ssize, KM_NOSLEEP);
954 955 if (cbplist == NULL)
955 956 return (ENOMEM);
956 957
957 958 if (copyin(aiocb, cbplist, ssize)) {
958 959 error = EFAULT;
959 960 goto done;
960 961 }
961 962
962 963 found = NULL;
963 964 /*
964 965 * we need to get the aio_cleanupq_mutex since we call
965 966 * aio_req_done().
966 967 */
967 968 mutex_enter(&aiop->aio_cleanupq_mutex);
968 969 mutex_enter(&aiop->aio_mutex);
969 970 for (;;) {
970 971 /* push requests on poll queue to done queue */
971 972 if (aiop->aio_pollq) {
972 973 mutex_exit(&aiop->aio_mutex);
973 974 mutex_exit(&aiop->aio_cleanupq_mutex);
974 975 aio_cleanup(0);
975 976 mutex_enter(&aiop->aio_cleanupq_mutex);
976 977 mutex_enter(&aiop->aio_mutex);
977 978 }
978 979 /* check for requests on done queue */
979 980 if (aiop->aio_doneq) {
980 981 if (model == DATAMODEL_NATIVE)
981 982 ucbp = (aiocb_t **)cbplist;
982 983 #ifdef _SYSCALL32_IMPL
983 984 else
984 985 ucbp32 = (caddr32_t *)cbplist;
985 986 #endif /* _SYSCALL32_IMPL */
986 987 for (i = 0; i < nent; i++) {
987 988 if (model == DATAMODEL_NATIVE) {
988 989 if ((cbp = *ucbp++) == NULL)
989 990 continue;
990 991 if (run_mode != AIO_LARGEFILE)
991 992 reqp = aio_req_done(
992 993 &cbp->aio_resultp);
993 994 else {
994 995 cbp64 = (aiocb64_32_t *)cbp;
995 996 reqp = aio_req_done(
996 997 &cbp64->aio_resultp);
997 998 }
998 999 }
999 1000 #ifdef _SYSCALL32_IMPL
1000 1001 else {
1001 1002 if (run_mode == AIO_32) {
1002 1003 if ((cbp32 =
1003 1004 (aiocb32_t *)(uintptr_t)
1004 1005 *ucbp32++) == NULL)
1005 1006 continue;
1006 1007 reqp = aio_req_done(
1007 1008 &cbp32->aio_resultp);
1008 1009 } else if (run_mode == AIO_LARGEFILE) {
1009 1010 if ((cbp64 =
1010 1011 (aiocb64_32_t *)(uintptr_t)
1011 1012 *ucbp32++) == NULL)
1012 1013 continue;
1013 1014 reqp = aio_req_done(
1014 1015 &cbp64->aio_resultp);
1015 1016 }
1016 1017
1017 1018 }
1018 1019 #endif /* _SYSCALL32_IMPL */
1019 1020 if (reqp) {
1020 1021 reqp->aio_req_next = found;
1021 1022 found = reqp;
1022 1023 }
1023 1024 if (aiop->aio_doneq == NULL)
1024 1025 break;
1025 1026 }
1026 1027 if (found)
1027 1028 break;
1028 1029 }
1029 1030 if (aiop->aio_notifycnt > 0) {
1030 1031 /*
1031 1032 * nothing on the kernel's queue. the user
1032 1033 * has notified the kernel that it has items
1033 1034 * on a user-level queue.
1034 1035 */
1035 1036 aiop->aio_notifycnt--;
1036 1037 *rval = 1;
1037 1038 error = 0;
1038 1039 break;
1039 1040 }
1040 1041 /* don't block if nothing is outstanding */
1041 1042 if (aiop->aio_outstanding == 0) {
1042 1043 error = EAGAIN;
1043 1044 break;
1044 1045 }
1045 1046 if (blocking) {
1046 1047 /*
1047 1048 * drop the aio_cleanupq_mutex as we are
1048 1049 * going to block.
1049 1050 */
1050 1051 mutex_exit(&aiop->aio_cleanupq_mutex);
1051 1052 rv = cv_waituntil_sig(&aiop->aio_waitcv,
1052 1053 &aiop->aio_mutex, rqtp, timecheck);
1053 1054 /*
1054 1055 * we have to drop aio_mutex and
1055 1056 * grab it in the right order.
1056 1057 */
1057 1058 mutex_exit(&aiop->aio_mutex);
1058 1059 mutex_enter(&aiop->aio_cleanupq_mutex);
1059 1060 mutex_enter(&aiop->aio_mutex);
1060 1061 if (rv > 0) /* check done queue again */
1061 1062 continue;
1062 1063 if (rv == 0) /* interrupted by a signal */
1063 1064 error = EINTR;
1064 1065 else /* timer expired */
1065 1066 error = ETIME;
1066 1067 } else {
1067 1068 error = EAGAIN;
1068 1069 }
1069 1070 break;
1070 1071 }
1071 1072 mutex_exit(&aiop->aio_mutex);
1072 1073 mutex_exit(&aiop->aio_cleanupq_mutex);
1073 1074 for (reqp = found; reqp != NULL; reqp = next) {
1074 1075 next = reqp->aio_req_next;
1075 1076 aphysio_unlock(reqp);
1076 1077 aio_copyout_result(reqp);
1077 1078 mutex_enter(&aiop->aio_mutex);
1078 1079 aio_req_free(aiop, reqp);
1079 1080 mutex_exit(&aiop->aio_mutex);
1080 1081 }
1081 1082 done:
1082 1083 kmem_free(cbplist, ssize);
1083 1084 return (error);
1084 1085 }
1085 1086
1086 1087 /*
1087 1088 * initialize aio by allocating an aio_t struct for this
1088 1089 * process.
1089 1090 */
1090 1091 static int
1091 1092 aioinit(void)
1092 1093 {
1093 1094 proc_t *p = curproc;
1094 1095 aio_t *aiop;
1095 1096 mutex_enter(&p->p_lock);
1096 1097 if ((aiop = p->p_aio) == NULL) {
1097 1098 aiop = aio_aiop_alloc();
1098 1099 p->p_aio = aiop;
1099 1100 }
1100 1101 mutex_exit(&p->p_lock);
1101 1102 if (aiop == NULL)
1102 1103 return (ENOMEM);
1103 1104 return (0);
1104 1105 }
1105 1106
1106 1107 /*
1107 1108 * start a special thread that will cleanup after aio requests
1108 1109 * that are preventing a segment from being unmapped. as_unmap()
1109 1110 * blocks until all phsyio to this segment is completed. this
1110 1111 * doesn't happen until all the pages in this segment are not
1111 1112 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
1112 1113 * requests still outstanding. this special thread will make sure
1113 1114 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
1114 1115 *
1115 1116 * this function will return an error if the process has only
1116 1117 * one LWP. the assumption is that the caller is a separate LWP
1117 1118 * that remains blocked in the kernel for the life of this process.
1118 1119 */
1119 1120 static int
1120 1121 aiostart(void)
1121 1122 {
1122 1123 proc_t *p = curproc;
1123 1124 aio_t *aiop;
1124 1125 int first, error = 0;
1125 1126
1126 1127 if (p->p_lwpcnt == 1)
1127 1128 return (EDEADLK);
1128 1129 mutex_enter(&p->p_lock);
1129 1130 if ((aiop = p->p_aio) == NULL)
1130 1131 error = EINVAL;
1131 1132 else {
1132 1133 first = aiop->aio_ok;
1133 1134 if (aiop->aio_ok == 0)
1134 1135 aiop->aio_ok = 1;
1135 1136 }
1136 1137 mutex_exit(&p->p_lock);
1137 1138 if (error == 0 && first == 0) {
1138 1139 return (aio_cleanup_thread(aiop));
1139 1140 /* should return only to exit */
1140 1141 }
1141 1142 return (error);
1142 1143 }
1143 1144
1144 1145 /*
1145 1146 * Associate an aiocb with a port.
1146 1147 * This function is used by aiorw() to associate a transaction with a port.
1147 1148 * Allocate an event port structure (port_alloc_event()) and store the
1148 1149 * delivered user pointer (portnfy_user) in the portkev_user field of the
1149 1150 * port_kevent_t structure..
1150 1151 * The aio_req_portkev pointer in the aio_req_t structure was added to identify
1151 1152 * the port association.
1152 1153 */
1153 1154
1154 1155 static int
1155 1156 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
1156 1157 aio_req_t *reqp, int event)
1157 1158 {
1158 1159 port_kevent_t *pkevp = NULL;
1159 1160 int error;
1160 1161
1161 1162 error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
1162 1163 PORT_SOURCE_AIO, &pkevp);
1163 1164 if (error) {
1164 1165 if ((error == ENOMEM) || (error == EAGAIN))
1165 1166 error = EAGAIN;
1166 1167 else
1167 1168 error = EINVAL;
1168 1169 } else {
1169 1170 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
1170 1171 aio_port_callback, reqp);
1171 1172 pkevp->portkev_events = event;
1172 1173 reqp->aio_req_portkev = pkevp;
1173 1174 reqp->aio_req_port = pntfy->portnfy_port;
1174 1175 }
1175 1176 return (error);
1176 1177 }
1177 1178
1178 1179 #ifdef _LP64
1179 1180
1180 1181 /*
1181 1182 * Asynchronous list IO. A chain of aiocb's are copied in
1182 1183 * one at a time. If the aiocb is invalid, it is skipped.
1183 1184 * For each aiocb, the appropriate driver entry point is
1184 1185 * called. Optimize for the common case where the list
1185 1186 * of requests is to the same file descriptor.
1186 1187 *
1187 1188 * One possible optimization is to define a new driver entry
1188 1189 * point that supports a list of IO requests. Whether this
1189 1190 * improves performance depends somewhat on the driver's
1190 1191 * locking strategy. Processing a list could adversely impact
1191 1192 * the driver's interrupt latency.
1192 1193 */
1193 1194 static int
1194 1195 alio(
1195 1196 int mode_arg,
1196 1197 aiocb_t **aiocb_arg,
1197 1198 int nent,
1198 1199 struct sigevent *sigev)
1199 1200 {
1200 1201 file_t *fp;
1201 1202 file_t *prev_fp = NULL;
1202 1203 int prev_mode = -1;
1203 1204 struct vnode *vp;
1204 1205 aio_lio_t *head;
1205 1206 aio_req_t *reqp;
1206 1207 aio_t *aiop;
1207 1208 caddr_t cbplist;
1208 1209 aiocb_t cb;
1209 1210 aiocb_t *aiocb = &cb;
1210 1211 aiocb_t *cbp;
1211 1212 aiocb_t **ucbp;
1212 1213 struct sigevent sigevk;
1213 1214 sigqueue_t *sqp;
1214 1215 int (*aio_func)();
1215 1216 int mode;
1216 1217 int error = 0;
1217 1218 int aio_errors = 0;
1218 1219 int i;
1219 1220 size_t ssize;
1220 1221 int deadhead = 0;
1221 1222 int aio_notsupported = 0;
1222 1223 int lio_head_port;
1223 1224 int aio_port;
1224 1225 int aio_thread;
1225 1226 port_kevent_t *pkevtp = NULL;
1226 1227 int portused = 0;
1227 1228 port_notify_t pnotify;
1228 1229 int event;
1229 1230
1230 1231 aiop = curproc->p_aio;
1231 1232 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1232 1233 return (EINVAL);
1233 1234
1234 1235 ssize = (sizeof (aiocb_t *) * nent);
1235 1236 cbplist = kmem_alloc(ssize, KM_SLEEP);
1236 1237 ucbp = (aiocb_t **)cbplist;
1237 1238
1238 1239 if (copyin(aiocb_arg, cbplist, ssize) ||
1239 1240 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
1240 1241 kmem_free(cbplist, ssize);
1241 1242 return (EFAULT);
1242 1243 }
1243 1244
1244 1245 /* Event Ports */
1245 1246 if (sigev &&
1246 1247 (sigevk.sigev_notify == SIGEV_THREAD ||
1247 1248 sigevk.sigev_notify == SIGEV_PORT)) {
1248 1249 if (sigevk.sigev_notify == SIGEV_THREAD) {
1249 1250 pnotify.portnfy_port = sigevk.sigev_signo;
1250 1251 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
1251 1252 } else if (copyin(sigevk.sigev_value.sival_ptr,
1252 1253 &pnotify, sizeof (pnotify))) {
1253 1254 kmem_free(cbplist, ssize);
1254 1255 return (EFAULT);
1255 1256 }
1256 1257 error = port_alloc_event(pnotify.portnfy_port,
1257 1258 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
1258 1259 if (error) {
1259 1260 if (error == ENOMEM || error == EAGAIN)
1260 1261 error = EAGAIN;
1261 1262 else
1262 1263 error = EINVAL;
1263 1264 kmem_free(cbplist, ssize);
1264 1265 return (error);
1265 1266 }
1266 1267 lio_head_port = pnotify.portnfy_port;
1267 1268 portused = 1;
1268 1269 }
1269 1270
1270 1271 /*
1271 1272 * a list head should be allocated if notification is
1272 1273 * enabled for this list.
1273 1274 */
1274 1275 head = NULL;
1275 1276
1276 1277 if (mode_arg == LIO_WAIT || sigev) {
1277 1278 mutex_enter(&aiop->aio_mutex);
1278 1279 error = aio_lio_alloc(&head);
1279 1280 mutex_exit(&aiop->aio_mutex);
1280 1281 if (error)
1281 1282 goto done;
1282 1283 deadhead = 1;
1283 1284 head->lio_nent = nent;
1284 1285 head->lio_refcnt = nent;
1285 1286 head->lio_port = -1;
1286 1287 head->lio_portkev = NULL;
1287 1288 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
1288 1289 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
1289 1290 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
1290 1291 if (sqp == NULL) {
1291 1292 error = EAGAIN;
1292 1293 goto done;
1293 1294 }
1294 1295 sqp->sq_func = NULL;
1295 1296 sqp->sq_next = NULL;
1296 1297 sqp->sq_info.si_code = SI_ASYNCIO;
1297 1298 sqp->sq_info.si_pid = curproc->p_pid;
1298 1299 sqp->sq_info.si_ctid = PRCTID(curproc);
1299 1300 sqp->sq_info.si_zoneid = getzoneid();
1300 1301 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
1301 1302 sqp->sq_info.si_signo = sigevk.sigev_signo;
1302 1303 sqp->sq_info.si_value = sigevk.sigev_value;
1303 1304 head->lio_sigqp = sqp;
1304 1305 } else {
1305 1306 head->lio_sigqp = NULL;
1306 1307 }
1307 1308 if (pkevtp) {
1308 1309 /*
1309 1310 * Prepare data to send when list of aiocb's
1310 1311 * has completed.
1311 1312 */
1312 1313 port_init_event(pkevtp, (uintptr_t)sigev,
1313 1314 (void *)(uintptr_t)pnotify.portnfy_user,
1314 1315 NULL, head);
1315 1316 pkevtp->portkev_events = AIOLIO;
1316 1317 head->lio_portkev = pkevtp;
1317 1318 head->lio_port = pnotify.portnfy_port;
1318 1319 }
1319 1320 }
1320 1321
1321 1322 for (i = 0; i < nent; i++, ucbp++) {
1322 1323
1323 1324 cbp = *ucbp;
1324 1325 /* skip entry if it can't be copied. */
1325 1326 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
1326 1327 if (head) {
1327 1328 mutex_enter(&aiop->aio_mutex);
1328 1329 head->lio_nent--;
1329 1330 head->lio_refcnt--;
1330 1331 mutex_exit(&aiop->aio_mutex);
1331 1332 }
1332 1333 continue;
1333 1334 }
1334 1335
1335 1336 /* skip if opcode for aiocb is LIO_NOP */
1336 1337 mode = aiocb->aio_lio_opcode;
1337 1338 if (mode == LIO_NOP) {
1338 1339 cbp = NULL;
1339 1340 if (head) {
1340 1341 mutex_enter(&aiop->aio_mutex);
1341 1342 head->lio_nent--;
1342 1343 head->lio_refcnt--;
1343 1344 mutex_exit(&aiop->aio_mutex);
1344 1345 }
1345 1346 continue;
1346 1347 }
1347 1348
1348 1349 /* increment file descriptor's ref count. */
1349 1350 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
1350 1351 lio_set_uerror(&cbp->aio_resultp, EBADF);
1351 1352 if (head) {
1352 1353 mutex_enter(&aiop->aio_mutex);
1353 1354 head->lio_nent--;
1354 1355 head->lio_refcnt--;
1355 1356 mutex_exit(&aiop->aio_mutex);
1356 1357 }
1357 1358 aio_errors++;
1358 1359 continue;
1359 1360 }
1360 1361
1361 1362 /*
1362 1363 * check the permission of the partition
1363 1364 */
1364 1365 if ((fp->f_flag & mode) == 0) {
1365 1366 releasef(aiocb->aio_fildes);
1366 1367 lio_set_uerror(&cbp->aio_resultp, EBADF);
1367 1368 if (head) {
1368 1369 mutex_enter(&aiop->aio_mutex);
1369 1370 head->lio_nent--;
1370 1371 head->lio_refcnt--;
1371 1372 mutex_exit(&aiop->aio_mutex);
1372 1373 }
1373 1374 aio_errors++;
1374 1375 continue;
1375 1376 }
1376 1377
1377 1378 /*
1378 1379 * common case where requests are to the same fd
1379 1380 * for the same r/w operation.
1380 1381 * for UFS, need to set EBADFD
1381 1382 */
1382 1383 vp = fp->f_vnode;
1383 1384 if (fp != prev_fp || mode != prev_mode) {
1384 1385 aio_func = check_vp(vp, mode);
1385 1386 if (aio_func == NULL) {
1386 1387 prev_fp = NULL;
1387 1388 releasef(aiocb->aio_fildes);
1388 1389 lio_set_uerror(&cbp->aio_resultp, EBADFD);
1389 1390 aio_notsupported++;
1390 1391 if (head) {
1391 1392 mutex_enter(&aiop->aio_mutex);
1392 1393 head->lio_nent--;
1393 1394 head->lio_refcnt--;
1394 1395 mutex_exit(&aiop->aio_mutex);
1395 1396 }
1396 1397 continue;
1397 1398 } else {
1398 1399 prev_fp = fp;
1399 1400 prev_mode = mode;
1400 1401 }
1401 1402 }
1402 1403
1403 1404 error = aio_req_setup(&reqp, aiop, aiocb,
1404 1405 &cbp->aio_resultp, vp, 0);
1405 1406 if (error) {
1406 1407 releasef(aiocb->aio_fildes);
1407 1408 lio_set_uerror(&cbp->aio_resultp, error);
1408 1409 if (head) {
1409 1410 mutex_enter(&aiop->aio_mutex);
1410 1411 head->lio_nent--;
1411 1412 head->lio_refcnt--;
1412 1413 mutex_exit(&aiop->aio_mutex);
1413 1414 }
1414 1415 aio_errors++;
1415 1416 continue;
1416 1417 }
1417 1418
1418 1419 reqp->aio_req_lio = head;
1419 1420 deadhead = 0;
1420 1421
1421 1422 /*
1422 1423 * Set the errno field now before sending the request to
1423 1424 * the driver to avoid a race condition
1424 1425 */
1425 1426 (void) suword32(&cbp->aio_resultp.aio_errno,
1426 1427 EINPROGRESS);
1427 1428
1428 1429 reqp->aio_req_iocb.iocb = (caddr_t)cbp;
1429 1430
1430 1431 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
1431 1432 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
1432 1433 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
1433 1434 if (aio_port | aio_thread) {
1434 1435 port_kevent_t *lpkevp;
1435 1436 /*
1436 1437 * Prepare data to send with each aiocb completed.
1437 1438 */
1438 1439 if (aio_port) {
1439 1440 void *paddr =
1440 1441 aiocb->aio_sigevent.sigev_value.sival_ptr;
1441 1442 if (copyin(paddr, &pnotify, sizeof (pnotify)))
1442 1443 error = EFAULT;
1443 1444 } else { /* aio_thread */
1444 1445 pnotify.portnfy_port =
1445 1446 aiocb->aio_sigevent.sigev_signo;
1446 1447 pnotify.portnfy_user =
1447 1448 aiocb->aio_sigevent.sigev_value.sival_ptr;
1448 1449 }
1449 1450 if (error)
1450 1451 /* EMPTY */;
1451 1452 else if (pkevtp != NULL &&
1452 1453 pnotify.portnfy_port == lio_head_port)
1453 1454 error = port_dup_event(pkevtp, &lpkevp,
1454 1455 PORT_ALLOC_DEFAULT);
1455 1456 else
1456 1457 error = port_alloc_event(pnotify.portnfy_port,
1457 1458 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
1458 1459 &lpkevp);
1459 1460 if (error == 0) {
1460 1461 port_init_event(lpkevp, (uintptr_t)cbp,
1461 1462 (void *)(uintptr_t)pnotify.portnfy_user,
1462 1463 aio_port_callback, reqp);
1463 1464 lpkevp->portkev_events = event;
1464 1465 reqp->aio_req_portkev = lpkevp;
1465 1466 reqp->aio_req_port = pnotify.portnfy_port;
1466 1467 }
1467 1468 }
1468 1469
1469 1470 /*
1470 1471 * send the request to driver.
1471 1472 */
1472 1473 if (error == 0) {
1473 1474 if (aiocb->aio_nbytes == 0) {
1474 1475 clear_active_fd(aiocb->aio_fildes);
1475 1476 aio_zerolen(reqp);
1476 1477 continue;
1477 1478 }
1478 1479 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
1479 1480 CRED());
1480 1481 }
1481 1482
1482 1483 /*
1483 1484 * the fd's ref count is not decremented until the IO has
1484 1485 * completed unless there was an error.
1485 1486 */
1486 1487 if (error) {
1487 1488 releasef(aiocb->aio_fildes);
1488 1489 lio_set_uerror(&cbp->aio_resultp, error);
1489 1490 if (head) {
1490 1491 mutex_enter(&aiop->aio_mutex);
1491 1492 head->lio_nent--;
1492 1493 head->lio_refcnt--;
1493 1494 mutex_exit(&aiop->aio_mutex);
1494 1495 }
1495 1496 if (error == ENOTSUP)
1496 1497 aio_notsupported++;
1497 1498 else
1498 1499 aio_errors++;
1499 1500 lio_set_error(reqp, portused);
1500 1501 } else {
1501 1502 clear_active_fd(aiocb->aio_fildes);
1502 1503 }
1503 1504 }
1504 1505
1505 1506 if (aio_notsupported) {
1506 1507 error = ENOTSUP;
1507 1508 } else if (aio_errors) {
1508 1509 /*
1509 1510 * return EIO if any request failed
1510 1511 */
1511 1512 error = EIO;
1512 1513 }
1513 1514
1514 1515 if (mode_arg == LIO_WAIT) {
1515 1516 mutex_enter(&aiop->aio_mutex);
1516 1517 while (head->lio_refcnt > 0) {
1517 1518 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1518 1519 mutex_exit(&aiop->aio_mutex);
1519 1520 error = EINTR;
1520 1521 goto done;
1521 1522 }
1522 1523 }
1523 1524 mutex_exit(&aiop->aio_mutex);
1524 1525 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
1525 1526 }
1526 1527
1527 1528 done:
1528 1529 kmem_free(cbplist, ssize);
1529 1530 if (deadhead) {
1530 1531 if (head->lio_sigqp)
1531 1532 kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
1532 1533 if (head->lio_portkev)
1533 1534 port_free_event(head->lio_portkev);
1534 1535 kmem_free(head, sizeof (aio_lio_t));
1535 1536 }
1536 1537 return (error);
1537 1538 }
1538 1539
1539 1540 #endif /* _LP64 */
1540 1541
1541 1542 /*
1542 1543 * Asynchronous list IO.
1543 1544 * If list I/O is called with LIO_WAIT it can still return
1544 1545 * before all the I/O's are completed if a signal is caught
1545 1546 * or if the list include UFS I/O requests. If this happens,
1546 1547 * libaio will call aliowait() to wait for the I/O's to
1547 1548 * complete
1548 1549 */
1549 1550 /*ARGSUSED*/
1550 1551 static int
1551 1552 aliowait(
1552 1553 int mode,
1553 1554 void *aiocb,
1554 1555 int nent,
1555 1556 void *sigev,
1556 1557 int run_mode)
1557 1558 {
1558 1559 aio_lio_t *head;
1559 1560 aio_t *aiop;
1560 1561 caddr_t cbplist;
1561 1562 aiocb_t *cbp, **ucbp;
1562 1563 #ifdef _SYSCALL32_IMPL
1563 1564 aiocb32_t *cbp32;
1564 1565 caddr32_t *ucbp32;
1565 1566 aiocb64_32_t *cbp64;
1566 1567 #endif
1567 1568 int error = 0;
1568 1569 int i;
1569 1570 size_t ssize = 0;
1570 1571 model_t model = get_udatamodel();
1571 1572
1572 1573 aiop = curproc->p_aio;
1573 1574 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1574 1575 return (EINVAL);
1575 1576
1576 1577 if (model == DATAMODEL_NATIVE)
1577 1578 ssize = (sizeof (aiocb_t *) * nent);
1578 1579 #ifdef _SYSCALL32_IMPL
1579 1580 else
1580 1581 ssize = (sizeof (caddr32_t) * nent);
1581 1582 #endif /* _SYSCALL32_IMPL */
1582 1583
1583 1584 if (ssize == 0)
1584 1585 return (EINVAL);
1585 1586
1586 1587 cbplist = kmem_alloc(ssize, KM_SLEEP);
1587 1588
1588 1589 if (model == DATAMODEL_NATIVE)
1589 1590 ucbp = (aiocb_t **)cbplist;
1590 1591 #ifdef _SYSCALL32_IMPL
1591 1592 else
1592 1593 ucbp32 = (caddr32_t *)cbplist;
1593 1594 #endif /* _SYSCALL32_IMPL */
1594 1595
1595 1596 if (copyin(aiocb, cbplist, ssize)) {
1596 1597 error = EFAULT;
1597 1598 goto done;
1598 1599 }
1599 1600
1600 1601 /*
1601 1602 * To find the list head, we go through the
1602 1603 * list of aiocb structs, find the request
1603 1604 * its for, then get the list head that reqp
1604 1605 * points to
1605 1606 */
1606 1607 head = NULL;
1607 1608
1608 1609 for (i = 0; i < nent; i++) {
1609 1610 if (model == DATAMODEL_NATIVE) {
1610 1611 /*
1611 1612 * Since we are only checking for a NULL pointer
1612 1613 * Following should work on both native data sizes
1613 1614 * as well as for largefile aiocb.
1614 1615 */
1615 1616 if ((cbp = *ucbp++) == NULL)
1616 1617 continue;
1617 1618 if (run_mode != AIO_LARGEFILE)
1618 1619 if (head = aio_list_get(&cbp->aio_resultp))
1619 1620 break;
1620 1621 else {
1621 1622 /*
1622 1623 * This is a case when largefile call is
1623 1624 * made on 32 bit kernel.
1624 1625 * Treat each pointer as pointer to
1625 1626 * aiocb64_32
1626 1627 */
1627 1628 if (head = aio_list_get((aio_result_t *)
1628 1629 &(((aiocb64_32_t *)cbp)->aio_resultp)))
1629 1630 break;
1630 1631 }
1631 1632 }
1632 1633 #ifdef _SYSCALL32_IMPL
1633 1634 else {
1634 1635 if (run_mode == AIO_LARGEFILE) {
1635 1636 if ((cbp64 = (aiocb64_32_t *)
1636 1637 (uintptr_t)*ucbp32++) == NULL)
1637 1638 continue;
1638 1639 if (head = aio_list_get((aio_result_t *)
1639 1640 &cbp64->aio_resultp))
1640 1641 break;
1641 1642 } else if (run_mode == AIO_32) {
1642 1643 if ((cbp32 = (aiocb32_t *)
1643 1644 (uintptr_t)*ucbp32++) == NULL)
1644 1645 continue;
1645 1646 if (head = aio_list_get((aio_result_t *)
1646 1647 &cbp32->aio_resultp))
1647 1648 break;
1648 1649 }
1649 1650 }
1650 1651 #endif /* _SYSCALL32_IMPL */
1651 1652 }
1652 1653
1653 1654 if (head == NULL) {
1654 1655 error = EINVAL;
1655 1656 goto done;
1656 1657 }
1657 1658
1658 1659 mutex_enter(&aiop->aio_mutex);
1659 1660 while (head->lio_refcnt > 0) {
1660 1661 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1661 1662 mutex_exit(&aiop->aio_mutex);
1662 1663 error = EINTR;
1663 1664 goto done;
1664 1665 }
1665 1666 }
1666 1667 mutex_exit(&aiop->aio_mutex);
1667 1668 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
1668 1669 done:
1669 1670 kmem_free(cbplist, ssize);
1670 1671 return (error);
1671 1672 }
1672 1673
1673 1674 aio_lio_t *
1674 1675 aio_list_get(aio_result_t *resultp)
1675 1676 {
1676 1677 aio_lio_t *head = NULL;
1677 1678 aio_t *aiop;
1678 1679 aio_req_t **bucket;
1679 1680 aio_req_t *reqp;
1680 1681 long index;
1681 1682
1682 1683 aiop = curproc->p_aio;
1683 1684 if (aiop == NULL)
1684 1685 return (NULL);
1685 1686
1686 1687 if (resultp) {
1687 1688 index = AIO_HASH(resultp);
1688 1689 bucket = &aiop->aio_hash[index];
1689 1690 for (reqp = *bucket; reqp != NULL;
1690 1691 reqp = reqp->aio_hash_next) {
1691 1692 if (reqp->aio_req_resultp == resultp) {
1692 1693 head = reqp->aio_req_lio;
1693 1694 return (head);
1694 1695 }
1695 1696 }
1696 1697 }
1697 1698 return (NULL);
1698 1699 }
1699 1700
1700 1701
1701 1702 static void
1702 1703 lio_set_uerror(void *resultp, int error)
1703 1704 {
1704 1705 /*
1705 1706 * the resultp field is a pointer to where the
1706 1707 * error should be written out to the user's
1707 1708 * aiocb.
1708 1709 *
1709 1710 */
1710 1711 if (get_udatamodel() == DATAMODEL_NATIVE) {
1711 1712 (void) sulword(&((aio_result_t *)resultp)->aio_return,
1712 1713 (ssize_t)-1);
1713 1714 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1714 1715 }
1715 1716 #ifdef _SYSCALL32_IMPL
1716 1717 else {
1717 1718 (void) suword32(&((aio_result32_t *)resultp)->aio_return,
1718 1719 (uint_t)-1);
1719 1720 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1720 1721 }
1721 1722 #endif /* _SYSCALL32_IMPL */
1722 1723 }
1723 1724
1724 1725 /*
1725 1726 * do cleanup completion for all requests in list. memory for
1726 1727 * each request is also freed.
1727 1728 */
1728 1729 static void
1729 1730 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
1730 1731 {
1731 1732 int i;
1732 1733 aio_req_t *reqp;
1733 1734 aio_result_t *resultp;
1734 1735 aiocb64_32_t *aiocb_64;
1735 1736
1736 1737 for (i = 0; i < nent; i++) {
1737 1738 if (get_udatamodel() == DATAMODEL_NATIVE) {
1738 1739 if (cbp[i] == NULL)
1739 1740 continue;
1740 1741 if (run_mode == AIO_LARGEFILE) {
1741 1742 aiocb_64 = (aiocb64_32_t *)cbp[i];
1742 1743 resultp = (aio_result_t *)
1743 1744 &aiocb_64->aio_resultp;
1744 1745 } else
1745 1746 resultp = &cbp[i]->aio_resultp;
1746 1747 }
1747 1748 #ifdef _SYSCALL32_IMPL
1748 1749 else {
1749 1750 aiocb32_t *aiocb_32;
1750 1751 caddr32_t *cbp32;
1751 1752
1752 1753 cbp32 = (caddr32_t *)cbp;
1753 1754 if (cbp32[i] == NULL)
1754 1755 continue;
1755 1756 if (run_mode == AIO_32) {
1756 1757 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
1757 1758 resultp = (aio_result_t *)&aiocb_32->
1758 1759 aio_resultp;
1759 1760 } else if (run_mode == AIO_LARGEFILE) {
1760 1761 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
1761 1762 resultp = (aio_result_t *)&aiocb_64->
1762 1763 aio_resultp;
1763 1764 }
1764 1765 }
1765 1766 #endif /* _SYSCALL32_IMPL */
1766 1767 /*
1767 1768 * we need to get the aio_cleanupq_mutex since we call
1768 1769 * aio_req_done().
1769 1770 */
1770 1771 mutex_enter(&aiop->aio_cleanupq_mutex);
1771 1772 mutex_enter(&aiop->aio_mutex);
1772 1773 reqp = aio_req_done(resultp);
1773 1774 mutex_exit(&aiop->aio_mutex);
1774 1775 mutex_exit(&aiop->aio_cleanupq_mutex);
1775 1776 if (reqp != NULL) {
1776 1777 aphysio_unlock(reqp);
1777 1778 aio_copyout_result(reqp);
1778 1779 mutex_enter(&aiop->aio_mutex);
1779 1780 aio_req_free(aiop, reqp);
1780 1781 mutex_exit(&aiop->aio_mutex);
1781 1782 }
1782 1783 }
1783 1784 }
1784 1785
1785 1786 /*
1786 1787 * Write out the results for an aio request that is done.
1787 1788 */
1788 1789 static int
1789 1790 aioerror(void *cb, int run_mode)
1790 1791 {
1791 1792 aio_result_t *resultp;
1792 1793 aio_t *aiop;
1793 1794 aio_req_t *reqp;
1794 1795 int retval;
1795 1796
1796 1797 aiop = curproc->p_aio;
1797 1798 if (aiop == NULL || cb == NULL)
1798 1799 return (EINVAL);
1799 1800
1800 1801 if (get_udatamodel() == DATAMODEL_NATIVE) {
1801 1802 if (run_mode == AIO_LARGEFILE)
1802 1803 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1803 1804 aio_resultp;
1804 1805 else
1805 1806 resultp = &((aiocb_t *)cb)->aio_resultp;
1806 1807 }
1807 1808 #ifdef _SYSCALL32_IMPL
1808 1809 else {
1809 1810 if (run_mode == AIO_LARGEFILE)
1810 1811 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1811 1812 aio_resultp;
1812 1813 else if (run_mode == AIO_32)
1813 1814 resultp = (aio_result_t *)&((aiocb32_t *)cb)->
1814 1815 aio_resultp;
1815 1816 }
1816 1817 #endif /* _SYSCALL32_IMPL */
1817 1818 /*
1818 1819 * we need to get the aio_cleanupq_mutex since we call
1819 1820 * aio_req_find().
1820 1821 */
1821 1822 mutex_enter(&aiop->aio_cleanupq_mutex);
1822 1823 mutex_enter(&aiop->aio_mutex);
1823 1824 retval = aio_req_find(resultp, &reqp);
1824 1825 mutex_exit(&aiop->aio_mutex);
1825 1826 mutex_exit(&aiop->aio_cleanupq_mutex);
1826 1827 if (retval == 0) {
1827 1828 aphysio_unlock(reqp);
1828 1829 aio_copyout_result(reqp);
1829 1830 mutex_enter(&aiop->aio_mutex);
1830 1831 aio_req_free(aiop, reqp);
1831 1832 mutex_exit(&aiop->aio_mutex);
1832 1833 return (0);
1833 1834 } else if (retval == 1)
1834 1835 return (EINPROGRESS);
1835 1836 else if (retval == 2)
1836 1837 return (EINVAL);
1837 1838 return (0);
1838 1839 }
1839 1840
1840 1841 /*
1841 1842 * aio_cancel - if no requests outstanding,
1842 1843 * return AIO_ALLDONE
1843 1844 * else
1844 1845 * return AIO_NOTCANCELED
1845 1846 */
1846 1847 static int
1847 1848 aio_cancel(
1848 1849 int fildes,
1849 1850 void *cb,
1850 1851 long *rval,
1851 1852 int run_mode)
1852 1853 {
1853 1854 aio_t *aiop;
1854 1855 void *resultp;
1855 1856 int index;
1856 1857 aio_req_t **bucket;
1857 1858 aio_req_t *ent;
1858 1859
1859 1860
1860 1861 /*
1861 1862 * Verify valid file descriptor
1862 1863 */
1863 1864 if ((getf(fildes)) == NULL) {
1864 1865 return (EBADF);
1865 1866 }
1866 1867 releasef(fildes);
1867 1868
1868 1869 aiop = curproc->p_aio;
1869 1870 if (aiop == NULL)
1870 1871 return (EINVAL);
1871 1872
1872 1873 if (aiop->aio_outstanding == 0) {
1873 1874 *rval = AIO_ALLDONE;
1874 1875 return (0);
1875 1876 }
1876 1877
1877 1878 mutex_enter(&aiop->aio_mutex);
1878 1879 if (cb != NULL) {
1879 1880 if (get_udatamodel() == DATAMODEL_NATIVE) {
1880 1881 if (run_mode == AIO_LARGEFILE)
1881 1882 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1882 1883 ->aio_resultp;
1883 1884 else
1884 1885 resultp = &((aiocb_t *)cb)->aio_resultp;
1885 1886 }
1886 1887 #ifdef _SYSCALL32_IMPL
1887 1888 else {
1888 1889 if (run_mode == AIO_LARGEFILE)
1889 1890 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1890 1891 ->aio_resultp;
1891 1892 else if (run_mode == AIO_32)
1892 1893 resultp = (aio_result_t *)&((aiocb32_t *)cb)
1893 1894 ->aio_resultp;
1894 1895 }
1895 1896 #endif /* _SYSCALL32_IMPL */
1896 1897 index = AIO_HASH(resultp);
1897 1898 bucket = &aiop->aio_hash[index];
1898 1899 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1899 1900 if (ent->aio_req_resultp == resultp) {
1900 1901 if ((ent->aio_req_flags & AIO_PENDING) == 0) {
1901 1902 mutex_exit(&aiop->aio_mutex);
1902 1903 *rval = AIO_ALLDONE;
1903 1904 return (0);
1904 1905 }
1905 1906 mutex_exit(&aiop->aio_mutex);
1906 1907 *rval = AIO_NOTCANCELED;
1907 1908 return (0);
1908 1909 }
1909 1910 }
1910 1911 mutex_exit(&aiop->aio_mutex);
1911 1912 *rval = AIO_ALLDONE;
1912 1913 return (0);
1913 1914 }
1914 1915
1915 1916 for (index = 0; index < AIO_HASHSZ; index++) {
1916 1917 bucket = &aiop->aio_hash[index];
1917 1918 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1918 1919 if (ent->aio_req_fd == fildes) {
1919 1920 if ((ent->aio_req_flags & AIO_PENDING) != 0) {
1920 1921 mutex_exit(&aiop->aio_mutex);
1921 1922 *rval = AIO_NOTCANCELED;
1922 1923 return (0);
1923 1924 }
1924 1925 }
1925 1926 }
1926 1927 }
1927 1928 mutex_exit(&aiop->aio_mutex);
1928 1929 *rval = AIO_ALLDONE;
1929 1930 return (0);
1930 1931 }
1931 1932
1932 1933 /*
1933 1934 * solaris version of asynchronous read and write
1934 1935 */
1935 1936 static int
1936 1937 arw(
1937 1938 int opcode,
1938 1939 int fdes,
1939 1940 char *bufp,
1940 1941 int bufsize,
1941 1942 offset_t offset,
1942 1943 aio_result_t *resultp,
1943 1944 int mode)
1944 1945 {
1945 1946 file_t *fp;
1946 1947 int error;
1947 1948 struct vnode *vp;
1948 1949 aio_req_t *reqp;
1949 1950 aio_t *aiop;
1950 1951 int (*aio_func)();
1951 1952 #ifdef _LP64
1952 1953 aiocb_t aiocb;
1953 1954 #else
1954 1955 aiocb64_32_t aiocb64;
1955 1956 #endif
1956 1957
1957 1958 aiop = curproc->p_aio;
1958 1959 if (aiop == NULL)
1959 1960 return (EINVAL);
1960 1961
1961 1962 if ((fp = getf(fdes)) == NULL) {
1962 1963 return (EBADF);
1963 1964 }
1964 1965
1965 1966 /*
1966 1967 * check the permission of the partition
1967 1968 */
1968 1969 if ((fp->f_flag & mode) == 0) {
1969 1970 releasef(fdes);
1970 1971 return (EBADF);
1971 1972 }
1972 1973
1973 1974 vp = fp->f_vnode;
1974 1975 aio_func = check_vp(vp, mode);
1975 1976 if (aio_func == NULL) {
1976 1977 releasef(fdes);
1977 1978 return (EBADFD);
1978 1979 }
1979 1980 #ifdef _LP64
1980 1981 aiocb.aio_fildes = fdes;
1981 1982 aiocb.aio_buf = bufp;
1982 1983 aiocb.aio_nbytes = bufsize;
1983 1984 aiocb.aio_offset = offset;
1984 1985 aiocb.aio_sigevent.sigev_notify = 0;
1985 1986 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 1);
1986 1987 #else
1987 1988 aiocb64.aio_fildes = fdes;
1988 1989 aiocb64.aio_buf = (caddr32_t)bufp;
1989 1990 aiocb64.aio_nbytes = bufsize;
1990 1991 aiocb64.aio_offset = offset;
1991 1992 aiocb64.aio_sigevent.sigev_notify = 0;
1992 1993 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 1);
1993 1994 #endif
1994 1995 if (error) {
1995 1996 releasef(fdes);
1996 1997 return (error);
1997 1998 }
1998 1999
1999 2000 /*
2000 2001 * enable polling on this request if the opcode has
2001 2002 * the AIO poll bit set
2002 2003 */
2003 2004 if (opcode & AIO_POLL_BIT)
2004 2005 reqp->aio_req_flags |= AIO_POLL;
2005 2006
2006 2007 if (bufsize == 0) {
2007 2008 clear_active_fd(fdes);
2008 2009 aio_zerolen(reqp);
2009 2010 return (0);
2010 2011 }
2011 2012 /*
2012 2013 * send the request to driver.
2013 2014 */
2014 2015 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2015 2016 /*
2016 2017 * the fd is stored in the aio_req_t by aio_req_setup(), and
2017 2018 * is released by the aio_cleanup_thread() when the IO has
2018 2019 * completed.
2019 2020 */
2020 2021 if (error) {
2021 2022 releasef(fdes);
2022 2023 mutex_enter(&aiop->aio_mutex);
2023 2024 aio_req_free(aiop, reqp);
2024 2025 aiop->aio_pending--;
2025 2026 if (aiop->aio_flags & AIO_REQ_BLOCK)
2026 2027 cv_signal(&aiop->aio_cleanupcv);
2027 2028 mutex_exit(&aiop->aio_mutex);
2028 2029 return (error);
2029 2030 }
2030 2031 clear_active_fd(fdes);
2031 2032 return (0);
2032 2033 }
2033 2034
2034 2035 /*
2035 2036 * posix version of asynchronous read and write
2036 2037 */
2037 2038 static int
2038 2039 aiorw(
2039 2040 int opcode,
2040 2041 void *aiocb_arg,
2041 2042 int mode,
2042 2043 int run_mode)
2043 2044 {
2044 2045 #ifdef _SYSCALL32_IMPL
2045 2046 aiocb32_t aiocb32;
2046 2047 struct sigevent32 *sigev32;
2047 2048 port_notify32_t pntfy32;
2048 2049 #endif
2049 2050 aiocb64_32_t aiocb64;
2050 2051 aiocb_t aiocb;
2051 2052 file_t *fp;
2052 2053 int error, fd;
2053 2054 size_t bufsize;
2054 2055 struct vnode *vp;
2055 2056 aio_req_t *reqp;
2056 2057 aio_t *aiop;
2057 2058 int (*aio_func)();
2058 2059 aio_result_t *resultp;
2059 2060 struct sigevent *sigev;
2060 2061 model_t model;
2061 2062 int aio_use_port = 0;
2062 2063 port_notify_t pntfy;
2063 2064
2064 2065 model = get_udatamodel();
2065 2066 aiop = curproc->p_aio;
2066 2067 if (aiop == NULL)
2067 2068 return (EINVAL);
2068 2069
2069 2070 if (model == DATAMODEL_NATIVE) {
2070 2071 if (run_mode != AIO_LARGEFILE) {
2071 2072 if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
2072 2073 return (EFAULT);
2073 2074 bufsize = aiocb.aio_nbytes;
2074 2075 resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
2075 2076 if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
2076 2077 return (EBADF);
2077 2078 }
2078 2079 sigev = &aiocb.aio_sigevent;
2079 2080 } else {
2080 2081 /*
2081 2082 * We come here only when we make largefile
2082 2083 * call on 32 bit kernel using 32 bit library.
2083 2084 */
2084 2085 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2085 2086 return (EFAULT);
2086 2087 bufsize = aiocb64.aio_nbytes;
2087 2088 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2088 2089 ->aio_resultp);
2089 2090 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2090 2091 return (EBADF);
2091 2092 sigev = (struct sigevent *)&aiocb64.aio_sigevent;
2092 2093 }
2093 2094
2094 2095 if (sigev->sigev_notify == SIGEV_PORT) {
2095 2096 if (copyin((void *)sigev->sigev_value.sival_ptr,
2096 2097 &pntfy, sizeof (port_notify_t))) {
2097 2098 releasef(fd);
2098 2099 return (EFAULT);
2099 2100 }
2100 2101 aio_use_port = 1;
2101 2102 } else if (sigev->sigev_notify == SIGEV_THREAD) {
2102 2103 pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
2103 2104 pntfy.portnfy_user =
2104 2105 aiocb.aio_sigevent.sigev_value.sival_ptr;
2105 2106 aio_use_port = 1;
2106 2107 }
2107 2108 }
2108 2109 #ifdef _SYSCALL32_IMPL
2109 2110 else {
2110 2111 if (run_mode == AIO_32) {
2111 2112 /* 32 bit system call is being made on 64 bit kernel */
2112 2113 if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
2113 2114 return (EFAULT);
2114 2115
2115 2116 bufsize = aiocb32.aio_nbytes;
2116 2117 aiocb_32ton(&aiocb32, &aiocb);
2117 2118 resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
2118 2119 aio_resultp);
2119 2120 if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
2120 2121 return (EBADF);
2121 2122 }
2122 2123 sigev32 = &aiocb32.aio_sigevent;
2123 2124 } else if (run_mode == AIO_LARGEFILE) {
2124 2125 /*
2125 2126 * We come here only when we make largefile
2126 2127 * call on 64 bit kernel using 32 bit library.
2127 2128 */
2128 2129 if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2129 2130 return (EFAULT);
2130 2131 bufsize = aiocb64.aio_nbytes;
2131 2132 aiocb_LFton(&aiocb64, &aiocb);
2132 2133 resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2133 2134 ->aio_resultp);
2134 2135 if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2135 2136 return (EBADF);
2136 2137 sigev32 = &aiocb64.aio_sigevent;
2137 2138 }
2138 2139
2139 2140 if (sigev32->sigev_notify == SIGEV_PORT) {
2140 2141 if (copyin(
2141 2142 (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
2142 2143 &pntfy32, sizeof (port_notify32_t))) {
2143 2144 releasef(fd);
2144 2145 return (EFAULT);
2145 2146 }
2146 2147 pntfy.portnfy_port = pntfy32.portnfy_port;
2147 2148 pntfy.portnfy_user = (void *)(uintptr_t)
2148 2149 pntfy32.portnfy_user;
2149 2150 aio_use_port = 1;
2150 2151 } else if (sigev32->sigev_notify == SIGEV_THREAD) {
2151 2152 pntfy.portnfy_port = sigev32->sigev_signo;
2152 2153 pntfy.portnfy_user = (void *)(uintptr_t)
2153 2154 sigev32->sigev_value.sival_ptr;
2154 2155 aio_use_port = 1;
2155 2156 }
2156 2157 }
2157 2158 #endif /* _SYSCALL32_IMPL */
2158 2159
2159 2160 /*
2160 2161 * check the permission of the partition
2161 2162 */
2162 2163
2163 2164 if ((fp->f_flag & mode) == 0) {
2164 2165 releasef(fd);
2165 2166 return (EBADF);
2166 2167 }
2167 2168
2168 2169 vp = fp->f_vnode;
2169 2170 aio_func = check_vp(vp, mode);
2170 2171 if (aio_func == NULL) {
2171 2172 releasef(fd);
2172 2173 return (EBADFD);
2173 2174 }
2174 2175 if (run_mode == AIO_LARGEFILE)
2175 2176 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 0);
2176 2177 else
2177 2178 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 0);
2178 2179
2179 2180 if (error) {
2180 2181 releasef(fd);
2181 2182 return (error);
2182 2183 }
2183 2184 /*
2184 2185 * enable polling on this request if the opcode has
2185 2186 * the AIO poll bit set
2186 2187 */
2187 2188 if (opcode & AIO_POLL_BIT)
2188 2189 reqp->aio_req_flags |= AIO_POLL;
2189 2190
2190 2191 if (model == DATAMODEL_NATIVE)
2191 2192 reqp->aio_req_iocb.iocb = aiocb_arg;
2192 2193 #ifdef _SYSCALL32_IMPL
2193 2194 else
2194 2195 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
2195 2196 #endif
2196 2197
2197 2198 if (aio_use_port) {
2198 2199 int event = (run_mode == AIO_LARGEFILE)?
2199 2200 ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
2200 2201 ((mode == FREAD)? AIOAREAD : AIOAWRITE);
2201 2202 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
2202 2203 }
2203 2204
2204 2205 /*
2205 2206 * send the request to driver.
2206 2207 */
2207 2208 if (error == 0) {
2208 2209 if (bufsize == 0) {
2209 2210 clear_active_fd(fd);
2210 2211 aio_zerolen(reqp);
2211 2212 return (0);
2212 2213 }
2213 2214 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2214 2215 }
2215 2216
2216 2217 /*
2217 2218 * the fd is stored in the aio_req_t by aio_req_setup(), and
2218 2219 * is released by the aio_cleanup_thread() when the IO has
2219 2220 * completed.
2220 2221 */
2221 2222 if (error) {
2222 2223 releasef(fd);
2223 2224 mutex_enter(&aiop->aio_mutex);
2224 2225 if (aio_use_port)
2225 2226 aio_deq(&aiop->aio_portpending, reqp);
2226 2227 aio_req_free(aiop, reqp);
2227 2228 aiop->aio_pending--;
2228 2229 if (aiop->aio_flags & AIO_REQ_BLOCK)
2229 2230 cv_signal(&aiop->aio_cleanupcv);
2230 2231 mutex_exit(&aiop->aio_mutex);
2231 2232 return (error);
2232 2233 }
2233 2234 clear_active_fd(fd);
2234 2235 return (0);
2235 2236 }
2236 2237
2237 2238
2238 2239 /*
2239 2240 * set error for a list IO entry that failed.
2240 2241 */
2241 2242 static void
2242 2243 lio_set_error(aio_req_t *reqp, int portused)
2243 2244 {
2244 2245 aio_t *aiop = curproc->p_aio;
2245 2246
2246 2247 if (aiop == NULL)
2247 2248 return;
2248 2249
2249 2250 mutex_enter(&aiop->aio_mutex);
2250 2251 if (portused)
2251 2252 aio_deq(&aiop->aio_portpending, reqp);
2252 2253 aiop->aio_pending--;
2253 2254 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
2254 2255 reqp->aio_req_flags |= AIO_PHYSIODONE;
2255 2256 /*
2256 2257 * Need to free the request now as its never
2257 2258 * going to get on the done queue
2258 2259 *
2259 2260 * Note: aio_outstanding is decremented in
2260 2261 * aio_req_free()
2261 2262 */
2262 2263 aio_req_free(aiop, reqp);
2263 2264 if (aiop->aio_flags & AIO_REQ_BLOCK)
2264 2265 cv_signal(&aiop->aio_cleanupcv);
2265 2266 mutex_exit(&aiop->aio_mutex);
2266 2267 }
2267 2268
2268 2269 /*
2269 2270 * check if a specified request is done, and remove it from
2270 2271 * the done queue. otherwise remove anybody from the done queue
2271 2272 * if NULL is specified.
2272 2273 */
2273 2274 static aio_req_t *
2274 2275 aio_req_done(void *resultp)
2275 2276 {
2276 2277 aio_req_t **bucket;
2277 2278 aio_req_t *ent;
2278 2279 aio_t *aiop = curproc->p_aio;
2279 2280 long index;
2280 2281
2281 2282 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2282 2283 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2283 2284
2284 2285 if (resultp) {
2285 2286 index = AIO_HASH(resultp);
2286 2287 bucket = &aiop->aio_hash[index];
2287 2288 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2288 2289 if (ent->aio_req_resultp == (aio_result_t *)resultp) {
2289 2290 if (ent->aio_req_flags & AIO_DONEQ) {
2290 2291 return (aio_req_remove(ent));
2291 2292 }
2292 2293 return (NULL);
2293 2294 }
2294 2295 }
2295 2296 /* no match, resultp is invalid */
2296 2297 return (NULL);
2297 2298 }
2298 2299 return (aio_req_remove(NULL));
2299 2300 }
2300 2301
2301 2302 /*
2302 2303 * determine if a user-level resultp pointer is associated with an
2303 2304 * active IO request. Zero is returned when the request is done,
2304 2305 * and the request is removed from the done queue. Only when the
2305 2306 * return value is zero, is the "reqp" pointer valid. One is returned
2306 2307 * when the request is inprogress. Two is returned when the request
2307 2308 * is invalid.
2308 2309 */
2309 2310 static int
2310 2311 aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
2311 2312 {
2312 2313 aio_req_t **bucket;
2313 2314 aio_req_t *ent;
2314 2315 aio_t *aiop = curproc->p_aio;
2315 2316 long index;
2316 2317
2317 2318 ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2318 2319 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2319 2320
2320 2321 index = AIO_HASH(resultp);
2321 2322 bucket = &aiop->aio_hash[index];
2322 2323 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2323 2324 if (ent->aio_req_resultp == resultp) {
2324 2325 if (ent->aio_req_flags & AIO_DONEQ) {
2325 2326 *reqp = aio_req_remove(ent);
2326 2327 return (0);
2327 2328 }
2328 2329 return (1);
2329 2330 }
2330 2331 }
2331 2332 /* no match, resultp is invalid */
2332 2333 return (2);
2333 2334 }
2334 2335
2335 2336 /*
2336 2337 * remove a request from the done queue.
2337 2338 */
2338 2339 static aio_req_t *
2339 2340 aio_req_remove(aio_req_t *reqp)
2340 2341 {
2341 2342 aio_t *aiop = curproc->p_aio;
2342 2343
2343 2344 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2344 2345
2345 2346 if (reqp != NULL) {
2346 2347 ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2347 2348 if (reqp->aio_req_next == reqp) {
2348 2349 /* only one request on queue */
2349 2350 if (reqp == aiop->aio_doneq) {
2350 2351 aiop->aio_doneq = NULL;
2351 2352 } else {
2352 2353 ASSERT(reqp == aiop->aio_cleanupq);
2353 2354 aiop->aio_cleanupq = NULL;
2354 2355 }
2355 2356 } else {
2356 2357 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2357 2358 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2358 2359 /*
2359 2360 * The request can be either on the aio_doneq or the
2360 2361 * aio_cleanupq
2361 2362 */
2362 2363 if (reqp == aiop->aio_doneq)
2363 2364 aiop->aio_doneq = reqp->aio_req_next;
2364 2365
2365 2366 if (reqp == aiop->aio_cleanupq)
2366 2367 aiop->aio_cleanupq = reqp->aio_req_next;
2367 2368 }
2368 2369 reqp->aio_req_flags &= ~AIO_DONEQ;
2369 2370 reqp->aio_req_next = NULL;
2370 2371 reqp->aio_req_prev = NULL;
2371 2372 } else if ((reqp = aiop->aio_doneq) != NULL) {
2372 2373 ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2373 2374 if (reqp == reqp->aio_req_next) {
2374 2375 /* only one request on queue */
2375 2376 aiop->aio_doneq = NULL;
2376 2377 } else {
2377 2378 reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2378 2379 reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2379 2380 aiop->aio_doneq = reqp->aio_req_next;
2380 2381 }
2381 2382 reqp->aio_req_flags &= ~AIO_DONEQ;
2382 2383 reqp->aio_req_next = NULL;
2383 2384 reqp->aio_req_prev = NULL;
2384 2385 }
2385 2386 if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
2386 2387 cv_broadcast(&aiop->aio_waitcv);
2387 2388 return (reqp);
2388 2389 }
2389 2390
2390 2391 static int
2391 2392 aio_req_setup(
2392 2393 aio_req_t **reqpp,
2393 2394 aio_t *aiop,
2394 2395 aiocb_t *arg,
2395 2396 aio_result_t *resultp,
2396 2397 vnode_t *vp,
2397 2398 int old_solaris_req)
2398 2399 {
2399 2400 sigqueue_t *sqp = NULL;
2400 2401 aio_req_t *reqp;
2401 2402 struct uio *uio;
2402 2403 struct sigevent *sigev;
2403 2404 int error;
2404 2405
2405 2406 sigev = &arg->aio_sigevent;
2406 2407 if (sigev->sigev_notify == SIGEV_SIGNAL &&
2407 2408 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
2408 2409 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2409 2410 if (sqp == NULL)
2410 2411 return (EAGAIN);
2411 2412 sqp->sq_func = NULL;
2412 2413 sqp->sq_next = NULL;
2413 2414 sqp->sq_info.si_code = SI_ASYNCIO;
2414 2415 sqp->sq_info.si_pid = curproc->p_pid;
2415 2416 sqp->sq_info.si_ctid = PRCTID(curproc);
2416 2417 sqp->sq_info.si_zoneid = getzoneid();
2417 2418 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
2418 2419 sqp->sq_info.si_signo = sigev->sigev_signo;
2419 2420 sqp->sq_info.si_value = sigev->sigev_value;
2420 2421 }
2421 2422
2422 2423 mutex_enter(&aiop->aio_mutex);
2423 2424
2424 2425 if (aiop->aio_flags & AIO_REQ_BLOCK) {
2425 2426 mutex_exit(&aiop->aio_mutex);
2426 2427 if (sqp)
2427 2428 kmem_free(sqp, sizeof (sigqueue_t));
2428 2429 return (EIO);
2429 2430 }
2430 2431 /*
2431 2432 * get an aio_reqp from the free list or allocate one
2432 2433 * from dynamic memory.
2433 2434 */
2434 2435 if (error = aio_req_alloc(&reqp, resultp)) {
2435 2436 mutex_exit(&aiop->aio_mutex);
2436 2437 if (sqp)
2437 2438 kmem_free(sqp, sizeof (sigqueue_t));
2438 2439 return (error);
2439 2440 }
2440 2441 aiop->aio_pending++;
2441 2442 aiop->aio_outstanding++;
2442 2443 reqp->aio_req_flags = AIO_PENDING;
2443 2444 if (old_solaris_req) {
2444 2445 /* this is an old solaris aio request */
2445 2446 reqp->aio_req_flags |= AIO_SOLARIS;
2446 2447 aiop->aio_flags |= AIO_SOLARIS_REQ;
2447 2448 }
2448 2449 if (sigev->sigev_notify == SIGEV_THREAD ||
2449 2450 sigev->sigev_notify == SIGEV_PORT)
2450 2451 aio_enq(&aiop->aio_portpending, reqp, 0);
2451 2452 mutex_exit(&aiop->aio_mutex);
2452 2453 /*
2453 2454 * initialize aio request.
2454 2455 */
2455 2456 reqp->aio_req_fd = arg->aio_fildes;
2456 2457 reqp->aio_req_sigqp = sqp;
2457 2458 reqp->aio_req_iocb.iocb = NULL;
2458 2459 reqp->aio_req_lio = NULL;
2459 2460 reqp->aio_req_buf.b_file = vp;
2460 2461 uio = reqp->aio_req.aio_uio;
2461 2462 uio->uio_iovcnt = 1;
2462 2463 uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
2463 2464 uio->uio_iov->iov_len = arg->aio_nbytes;
2464 2465 uio->uio_loffset = arg->aio_offset;
2465 2466 *reqpp = reqp;
2466 2467 return (0);
2467 2468 }
2468 2469
2469 2470 /*
2470 2471 * Allocate p_aio struct.
2471 2472 */
2472 2473 static aio_t *
2473 2474 aio_aiop_alloc(void)
2474 2475 {
2475 2476 aio_t *aiop;
2476 2477
2477 2478 ASSERT(MUTEX_HELD(&curproc->p_lock));
2478 2479
2479 2480 aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
2480 2481 if (aiop) {
2481 2482 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
2482 2483 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
2483 2484 NULL);
2484 2485 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
2485 2486 }
2486 2487 return (aiop);
2487 2488 }
2488 2489
2489 2490 /*
2490 2491 * Allocate an aio_req struct.
2491 2492 */
2492 2493 static int
2493 2494 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
2494 2495 {
2495 2496 aio_req_t *reqp;
2496 2497 aio_t *aiop = curproc->p_aio;
2497 2498
2498 2499 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2499 2500
2500 2501 if ((reqp = aiop->aio_free) != NULL) {
2501 2502 aiop->aio_free = reqp->aio_req_next;
2502 2503 bzero(reqp, sizeof (*reqp));
2503 2504 } else {
2504 2505 /*
2505 2506 * Check whether memory is getting tight.
2506 2507 * This is a temporary mechanism to avoid memory
2507 2508 * exhaustion by a single process until we come up
2508 2509 * with a per process solution such as setrlimit().
2509 2510 */
2510 2511 if (freemem < desfree)
2511 2512 return (EAGAIN);
2512 2513 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
2513 2514 if (reqp == NULL)
2514 2515 return (EAGAIN);
2515 2516 }
2516 2517 reqp->aio_req.aio_uio = &reqp->aio_req_uio;
2517 2518 reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
2518 2519 reqp->aio_req.aio_private = reqp;
2519 2520 reqp->aio_req_buf.b_offset = -1;
2520 2521 reqp->aio_req_resultp = resultp;
2521 2522 if (aio_hash_insert(reqp, aiop)) {
2522 2523 reqp->aio_req_next = aiop->aio_free;
2523 2524 aiop->aio_free = reqp;
2524 2525 return (EBUSY);
2525 2526 }
2526 2527 *nreqp = reqp;
2527 2528 return (0);
2528 2529 }
2529 2530
2530 2531 /*
2531 2532 * Allocate an aio_lio_t struct.
2532 2533 */
2533 2534 static int
2534 2535 aio_lio_alloc(aio_lio_t **head)
2535 2536 {
2536 2537 aio_lio_t *liop;
2537 2538 aio_t *aiop = curproc->p_aio;
2538 2539
2539 2540 ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2540 2541
2541 2542 if ((liop = aiop->aio_lio_free) != NULL) {
2542 2543 aiop->aio_lio_free = liop->lio_next;
2543 2544 } else {
2544 2545 /*
2545 2546 * Check whether memory is getting tight.
2546 2547 * This is a temporary mechanism to avoid memory
2547 2548 * exhaustion by a single process until we come up
2548 2549 * with a per process solution such as setrlimit().
2549 2550 */
2550 2551 if (freemem < desfree)
2551 2552 return (EAGAIN);
2552 2553
2553 2554 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
2554 2555 if (liop == NULL)
2555 2556 return (EAGAIN);
2556 2557 }
2557 2558 *head = liop;
2558 2559 return (0);
2559 2560 }
2560 2561
2561 2562 /*
2562 2563 * this is a special per-process thread that is only activated if
2563 2564 * the process is unmapping a segment with outstanding aio. normally,
2564 2565 * the process will have completed the aio before unmapping the
2565 2566 * segment. If the process does unmap a segment with outstanding aio,
2566 2567 * this special thread will guarentee that the locked pages due to
2567 2568 * aphysio() are released, thereby permitting the segment to be
2568 2569 * unmapped. In addition to this, the cleanup thread is woken up
2569 2570 * during DR operations to release the locked pages.
2570 2571 */
2571 2572
2572 2573 static int
2573 2574 aio_cleanup_thread(aio_t *aiop)
2574 2575 {
2575 2576 proc_t *p = curproc;
2576 2577 struct as *as = p->p_as;
2577 2578 int poked = 0;
2578 2579 kcondvar_t *cvp;
2579 2580 int exit_flag = 0;
2580 2581 int rqclnup = 0;
2581 2582
2582 2583 sigfillset(&curthread->t_hold);
2583 2584 sigdiffset(&curthread->t_hold, &cantmask);
2584 2585 for (;;) {
2585 2586 /*
2586 2587 * if a segment is being unmapped, and the current
2587 2588 * process's done queue is not empty, then every request
2588 2589 * on the doneq with locked resources should be forced
2589 2590 * to release their locks. By moving the doneq request
2590 2591 * to the cleanupq, aio_cleanup() will process the cleanupq,
2591 2592 * and place requests back onto the doneq. All requests
2592 2593 * processed by aio_cleanup() will have their physical
2593 2594 * resources unlocked.
2594 2595 */
2595 2596 mutex_enter(&aiop->aio_mutex);
2596 2597 if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
2597 2598 aiop->aio_flags |= AIO_CLEANUP;
2598 2599 mutex_enter(&as->a_contents);
2599 2600 if (aiop->aio_rqclnup) {
2600 2601 aiop->aio_rqclnup = 0;
2601 2602 rqclnup = 1;
2602 2603 }
2603 2604 mutex_exit(&as->a_contents);
2604 2605 if (aiop->aio_doneq) {
2605 2606 aio_req_t *doneqhead = aiop->aio_doneq;
2606 2607 aiop->aio_doneq = NULL;
2607 2608 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
2608 2609 }
2609 2610 }
2610 2611 mutex_exit(&aiop->aio_mutex);
2611 2612 aio_cleanup(AIO_CLEANUP_THREAD);
2612 2613 /*
2613 2614 * thread should block on the cleanupcv while
2614 2615 * AIO_CLEANUP is set.
2615 2616 */
2616 2617 cvp = &aiop->aio_cleanupcv;
2617 2618 mutex_enter(&aiop->aio_mutex);
2618 2619
2619 2620 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
2620 2621 aiop->aio_notifyq != NULL ||
2621 2622 aiop->aio_portcleanupq != NULL) {
2622 2623 mutex_exit(&aiop->aio_mutex);
2623 2624 continue;
2624 2625 }
2625 2626 mutex_enter(&as->a_contents);
2626 2627
2627 2628 /*
2628 2629 * AIO_CLEANUP determines when the cleanup thread
2629 2630 * should be active. This flag is set when
2630 2631 * the cleanup thread is awakened by as_unmap() or
2631 2632 * due to DR operations.
2632 2633 * The flag is cleared when the blocking as_unmap()
2633 2634 * that originally awakened us is allowed to
2634 2635 * complete. as_unmap() blocks when trying to
2635 2636 * unmap a segment that has SOFTLOCKed pages. when
2636 2637 * the segment's pages are all SOFTUNLOCKed,
2637 2638 * as->a_flags & AS_UNMAPWAIT should be zero.
2638 2639 *
2639 2640 * In case of cleanup request by DR, the flag is cleared
2640 2641 * once all the pending aio requests have been processed.
2641 2642 *
2642 2643 * The flag shouldn't be cleared right away if the
2643 2644 * cleanup thread was interrupted because the process
2644 2645 * is doing forkall(). This happens when cv_wait_sig()
2645 2646 * returns zero, because it was awakened by a pokelwps().
2646 2647 * If the process is not exiting, it must be doing forkall().
2647 2648 */
2648 2649 if ((poked == 0) &&
2649 2650 ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2650 2651 (aiop->aio_pending == 0))) {
2651 2652 aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
2652 2653 cvp = &as->a_cv;
2653 2654 rqclnup = 0;
2654 2655 }
2655 2656 mutex_exit(&aiop->aio_mutex);
2656 2657 if (poked) {
2657 2658 /*
2658 2659 * If the process is exiting/killed, don't return
2659 2660 * immediately without waiting for pending I/O's
2660 2661 * and releasing the page locks.
2661 2662 */
2662 2663 if (p->p_flag & (SEXITLWPS|SKILLED)) {
2663 2664 /*
2664 2665 * If exit_flag is set, then it is
2665 2666 * safe to exit because we have released
2666 2667 * page locks of completed I/O's.
2667 2668 */
2668 2669 if (exit_flag)
2669 2670 break;
2670 2671
2671 2672 mutex_exit(&as->a_contents);
2672 2673
2673 2674 /*
2674 2675 * Wait for all the pending aio to complete.
2675 2676 */
2676 2677 mutex_enter(&aiop->aio_mutex);
2677 2678 aiop->aio_flags |= AIO_REQ_BLOCK;
2678 2679 while (aiop->aio_pending != 0)
2679 2680 cv_wait(&aiop->aio_cleanupcv,
2680 2681 &aiop->aio_mutex);
2681 2682 mutex_exit(&aiop->aio_mutex);
2682 2683 exit_flag = 1;
2683 2684 continue;
2684 2685 } else if (p->p_flag &
2685 2686 (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
2686 2687 /*
2687 2688 * hold LWP until it
2688 2689 * is continued.
2689 2690 */
2690 2691 mutex_exit(&as->a_contents);
2691 2692 mutex_enter(&p->p_lock);
2692 2693 stop(PR_SUSPENDED, SUSPEND_NORMAL);
2693 2694 mutex_exit(&p->p_lock);
2694 2695 poked = 0;
2695 2696 continue;
2696 2697 }
2697 2698 } else {
2698 2699 /*
2699 2700 * When started this thread will sleep on as->a_cv.
2700 2701 * as_unmap will awake this thread if the
2701 2702 * segment has SOFTLOCKed pages (poked = 0).
2702 2703 * 1. pokelwps() awakes this thread =>
2703 2704 * break the loop to check SEXITLWPS, SHOLDFORK, etc
2704 2705 * 2. as_unmap awakes this thread =>
2705 2706 * to break the loop it is necessary that
2706 2707 * - AS_UNMAPWAIT is set (as_unmap is waiting for
2707 2708 * memory to be unlocked)
2708 2709 * - AIO_CLEANUP is not set
2709 2710 * (if AIO_CLEANUP is set we have to wait for
2710 2711 * pending requests. aio_done will send a signal
2711 2712 * for every request which completes to continue
2712 2713 * unmapping the corresponding address range)
2713 2714 * 3. A cleanup request will wake this thread up, ex.
2714 2715 * by the DR operations. The aio_rqclnup flag will
2715 2716 * be set.
2716 2717 */
2717 2718 while (poked == 0) {
2718 2719 /*
2719 2720 * The clean up requests that came in
2720 2721 * after we had just cleaned up, couldn't
2721 2722 * be causing the unmap thread to block - as
2722 2723 * unmap event happened first.
2723 2724 * Let aio_done() wake us up if it sees a need.
2724 2725 */
2725 2726 if (aiop->aio_rqclnup &&
2726 2727 (aiop->aio_flags & AIO_CLEANUP) == 0)
2727 2728 break;
2728 2729 poked = !cv_wait_sig(cvp, &as->a_contents);
2729 2730 if (AS_ISUNMAPWAIT(as) == 0)
2730 2731 cv_signal(cvp);
2731 2732 if (aiop->aio_outstanding != 0)
2732 2733 break;
2733 2734 }
2734 2735 }
2735 2736 mutex_exit(&as->a_contents);
2736 2737 }
2737 2738 exit:
2738 2739 mutex_exit(&as->a_contents);
2739 2740 ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
2740 2741 aston(curthread); /* make thread do post_syscall */
2741 2742 return (0);
2742 2743 }
2743 2744
2744 2745 /*
2745 2746 * save a reference to a user's outstanding aio in a hash list.
2746 2747 */
2747 2748 static int
2748 2749 aio_hash_insert(
2749 2750 aio_req_t *aio_reqp,
2750 2751 aio_t *aiop)
2751 2752 {
2752 2753 long index;
2753 2754 aio_result_t *resultp = aio_reqp->aio_req_resultp;
2754 2755 aio_req_t *current;
2755 2756 aio_req_t **nextp;
2756 2757
2757 2758 index = AIO_HASH(resultp);
2758 2759 nextp = &aiop->aio_hash[index];
2759 2760 while ((current = *nextp) != NULL) {
2760 2761 if (current->aio_req_resultp == resultp)
2761 2762 return (DUPLICATE);
2762 2763 nextp = ¤t->aio_hash_next;
2763 2764 }
2764 2765 *nextp = aio_reqp;
2765 2766 aio_reqp->aio_hash_next = NULL;
2766 2767 return (0);
2767 2768 }
2768 2769
2769 2770 static int
2770 2771 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
2771 2772 cred_t *)
2772 2773 {
2773 2774 struct snode *sp;
2774 2775 dev_t dev;
2775 2776 struct cb_ops *cb;
2776 2777 major_t major;
2777 2778 int (*aio_func)();
2778 2779
2779 2780 dev = vp->v_rdev;
2780 2781 major = getmajor(dev);
2781 2782
2782 2783 /*
2783 2784 * return NULL for requests to files and STREAMs so
2784 2785 * that libaio takes care of them.
2785 2786 */
2786 2787 if (vp->v_type == VCHR) {
2787 2788 /* no stream device for kaio */
2788 2789 if (STREAMSTAB(major)) {
2789 2790 return (NULL);
2790 2791 }
2791 2792 } else {
2792 2793 return (NULL);
2793 2794 }
2794 2795
2795 2796 /*
2796 2797 * Check old drivers which do not have async I/O entry points.
2797 2798 */
2798 2799 if (devopsp[major]->devo_rev < 3)
2799 2800 return (NULL);
2800 2801
2801 2802 cb = devopsp[major]->devo_cb_ops;
2802 2803
2803 2804 if (cb->cb_rev < 1)
2804 2805 return (NULL);
2805 2806
2806 2807 /*
2807 2808 * Check whether this device is a block device.
2808 2809 * Kaio is not supported for devices like tty.
2809 2810 */
2810 2811 if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
2811 2812 return (NULL);
2812 2813
2813 2814 /*
2814 2815 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
2815 2816 * We cannot call the driver directly. Instead return the
2816 2817 * PXFS functions.
2817 2818 */
2818 2819
2819 2820 if (IS_PXFSVP(vp)) {
2820 2821 if (mode & FREAD)
2821 2822 return (clpxfs_aio_read);
2822 2823 else
2823 2824 return (clpxfs_aio_write);
2824 2825 }
2825 2826 if (mode & FREAD)
2826 2827 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
2827 2828 else
2828 2829 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
2829 2830
2830 2831 /*
2831 2832 * Do we need this ?
2832 2833 * nodev returns ENXIO anyway.
2833 2834 */
2834 2835 if (aio_func == nodev)
2835 2836 return (NULL);
2836 2837
2837 2838 sp = VTOS(vp);
2838 2839 smark(sp, SACC);
2839 2840 return (aio_func);
2840 2841 }
2841 2842
2842 2843 /*
2843 2844 * Clustering: We want check_vp to return a function prototyped
2844 2845 * correctly that will be common to both PXFS and regular case.
2845 2846 * We define this intermediate function that will do the right
2846 2847 * thing for driver cases.
2847 2848 */
2848 2849
2849 2850 static int
2850 2851 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2851 2852 {
2852 2853 dev_t dev;
2853 2854 struct cb_ops *cb;
2854 2855
2855 2856 ASSERT(vp->v_type == VCHR);
2856 2857 ASSERT(!IS_PXFSVP(vp));
2857 2858 dev = VTOS(vp)->s_dev;
2858 2859 ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
2859 2860
2860 2861 cb = devopsp[getmajor(dev)]->devo_cb_ops;
2861 2862
2862 2863 ASSERT(cb->cb_awrite != nodev);
2863 2864 return ((*cb->cb_awrite)(dev, aio, cred_p));
2864 2865 }
2865 2866
2866 2867 /*
2867 2868 * Clustering: We want check_vp to return a function prototyped
2868 2869 * correctly that will be common to both PXFS and regular case.
2869 2870 * We define this intermediate function that will do the right
2870 2871 * thing for driver cases.
2871 2872 */
2872 2873
2873 2874 static int
2874 2875 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2875 2876 {
2876 2877 dev_t dev;
2877 2878 struct cb_ops *cb;
2878 2879
2879 2880 ASSERT(vp->v_type == VCHR);
2880 2881 ASSERT(!IS_PXFSVP(vp));
2881 2882 dev = VTOS(vp)->s_dev;
2882 2883 ASSERT(!STREAMSTAB(getmajor(dev)));
2883 2884
2884 2885 cb = devopsp[getmajor(dev)]->devo_cb_ops;
2885 2886
2886 2887 ASSERT(cb->cb_aread != nodev);
2887 2888 return ((*cb->cb_aread)(dev, aio, cred_p));
2888 2889 }
2889 2890
2890 2891 /*
2891 2892 * This routine is called when a largefile call is made by a 32bit
2892 2893 * process on a ILP32 or LP64 kernel. All 64bit processes are large
2893 2894 * file by definition and will call alio() instead.
2894 2895 */
2895 2896 static int
2896 2897 alioLF(
2897 2898 int mode_arg,
2898 2899 void *aiocb_arg,
2899 2900 int nent,
2900 2901 void *sigev)
2901 2902 {
2902 2903 file_t *fp;
2903 2904 file_t *prev_fp = NULL;
2904 2905 int prev_mode = -1;
2905 2906 struct vnode *vp;
2906 2907 aio_lio_t *head;
2907 2908 aio_req_t *reqp;
2908 2909 aio_t *aiop;
2909 2910 caddr_t cbplist;
2910 2911 aiocb64_32_t cb64;
2911 2912 aiocb64_32_t *aiocb = &cb64;
2912 2913 aiocb64_32_t *cbp;
2913 2914 caddr32_t *ucbp;
2914 2915 #ifdef _LP64
2915 2916 aiocb_t aiocb_n;
2916 2917 #endif
2917 2918 struct sigevent32 sigevk;
2918 2919 sigqueue_t *sqp;
2919 2920 int (*aio_func)();
2920 2921 int mode;
2921 2922 int error = 0;
2922 2923 int aio_errors = 0;
2923 2924 int i;
2924 2925 size_t ssize;
2925 2926 int deadhead = 0;
2926 2927 int aio_notsupported = 0;
2927 2928 int lio_head_port;
2928 2929 int aio_port;
2929 2930 int aio_thread;
2930 2931 port_kevent_t *pkevtp = NULL;
2931 2932 int portused = 0;
2932 2933 port_notify32_t pnotify;
2933 2934 int event;
2934 2935
2935 2936 aiop = curproc->p_aio;
2936 2937 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
2937 2938 return (EINVAL);
2938 2939
2939 2940 ASSERT(get_udatamodel() == DATAMODEL_ILP32);
2940 2941
2941 2942 ssize = (sizeof (caddr32_t) * nent);
2942 2943 cbplist = kmem_alloc(ssize, KM_SLEEP);
2943 2944 ucbp = (caddr32_t *)cbplist;
2944 2945
2945 2946 if (copyin(aiocb_arg, cbplist, ssize) ||
2946 2947 (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
2947 2948 kmem_free(cbplist, ssize);
2948 2949 return (EFAULT);
2949 2950 }
2950 2951
2951 2952 /* Event Ports */
2952 2953 if (sigev &&
2953 2954 (sigevk.sigev_notify == SIGEV_THREAD ||
2954 2955 sigevk.sigev_notify == SIGEV_PORT)) {
2955 2956 if (sigevk.sigev_notify == SIGEV_THREAD) {
2956 2957 pnotify.portnfy_port = sigevk.sigev_signo;
2957 2958 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
2958 2959 } else if (copyin(
2959 2960 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
2960 2961 &pnotify, sizeof (pnotify))) {
2961 2962 kmem_free(cbplist, ssize);
2962 2963 return (EFAULT);
2963 2964 }
2964 2965 error = port_alloc_event(pnotify.portnfy_port,
2965 2966 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
2966 2967 if (error) {
2967 2968 if (error == ENOMEM || error == EAGAIN)
2968 2969 error = EAGAIN;
2969 2970 else
2970 2971 error = EINVAL;
2971 2972 kmem_free(cbplist, ssize);
2972 2973 return (error);
2973 2974 }
2974 2975 lio_head_port = pnotify.portnfy_port;
2975 2976 portused = 1;
2976 2977 }
2977 2978
2978 2979 /*
2979 2980 * a list head should be allocated if notification is
2980 2981 * enabled for this list.
2981 2982 */
2982 2983 head = NULL;
2983 2984
2984 2985 if (mode_arg == LIO_WAIT || sigev) {
2985 2986 mutex_enter(&aiop->aio_mutex);
2986 2987 error = aio_lio_alloc(&head);
2987 2988 mutex_exit(&aiop->aio_mutex);
2988 2989 if (error)
2989 2990 goto done;
2990 2991 deadhead = 1;
2991 2992 head->lio_nent = nent;
2992 2993 head->lio_refcnt = nent;
2993 2994 head->lio_port = -1;
2994 2995 head->lio_portkev = NULL;
2995 2996 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
2996 2997 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
2997 2998 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2998 2999 if (sqp == NULL) {
2999 3000 error = EAGAIN;
3000 3001 goto done;
3001 3002 }
3002 3003 sqp->sq_func = NULL;
3003 3004 sqp->sq_next = NULL;
3004 3005 sqp->sq_info.si_code = SI_ASYNCIO;
3005 3006 sqp->sq_info.si_pid = curproc->p_pid;
3006 3007 sqp->sq_info.si_ctid = PRCTID(curproc);
3007 3008 sqp->sq_info.si_zoneid = getzoneid();
3008 3009 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3009 3010 sqp->sq_info.si_signo = sigevk.sigev_signo;
3010 3011 sqp->sq_info.si_value.sival_int =
3011 3012 sigevk.sigev_value.sival_int;
3012 3013 head->lio_sigqp = sqp;
3013 3014 } else {
3014 3015 head->lio_sigqp = NULL;
3015 3016 }
3016 3017 if (pkevtp) {
3017 3018 /*
3018 3019 * Prepare data to send when list of aiocb's
3019 3020 * has completed.
3020 3021 */
3021 3022 port_init_event(pkevtp, (uintptr_t)sigev,
3022 3023 (void *)(uintptr_t)pnotify.portnfy_user,
3023 3024 NULL, head);
3024 3025 pkevtp->portkev_events = AIOLIO64;
3025 3026 head->lio_portkev = pkevtp;
3026 3027 head->lio_port = pnotify.portnfy_port;
3027 3028 }
3028 3029 }
3029 3030
3030 3031 for (i = 0; i < nent; i++, ucbp++) {
3031 3032
3032 3033 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
3033 3034 /* skip entry if it can't be copied. */
3034 3035 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
3035 3036 if (head) {
3036 3037 mutex_enter(&aiop->aio_mutex);
3037 3038 head->lio_nent--;
3038 3039 head->lio_refcnt--;
3039 3040 mutex_exit(&aiop->aio_mutex);
3040 3041 }
3041 3042 continue;
3042 3043 }
3043 3044
3044 3045 /* skip if opcode for aiocb is LIO_NOP */
3045 3046 mode = aiocb->aio_lio_opcode;
3046 3047 if (mode == LIO_NOP) {
3047 3048 cbp = NULL;
3048 3049 if (head) {
3049 3050 mutex_enter(&aiop->aio_mutex);
3050 3051 head->lio_nent--;
3051 3052 head->lio_refcnt--;
3052 3053 mutex_exit(&aiop->aio_mutex);
3053 3054 }
3054 3055 continue;
3055 3056 }
3056 3057
3057 3058 /* increment file descriptor's ref count. */
3058 3059 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3059 3060 lio_set_uerror(&cbp->aio_resultp, EBADF);
3060 3061 if (head) {
3061 3062 mutex_enter(&aiop->aio_mutex);
3062 3063 head->lio_nent--;
3063 3064 head->lio_refcnt--;
3064 3065 mutex_exit(&aiop->aio_mutex);
3065 3066 }
3066 3067 aio_errors++;
3067 3068 continue;
3068 3069 }
3069 3070
3070 3071 /*
3071 3072 * check the permission of the partition
3072 3073 */
3073 3074 if ((fp->f_flag & mode) == 0) {
3074 3075 releasef(aiocb->aio_fildes);
3075 3076 lio_set_uerror(&cbp->aio_resultp, EBADF);
3076 3077 if (head) {
3077 3078 mutex_enter(&aiop->aio_mutex);
3078 3079 head->lio_nent--;
3079 3080 head->lio_refcnt--;
3080 3081 mutex_exit(&aiop->aio_mutex);
3081 3082 }
3082 3083 aio_errors++;
3083 3084 continue;
3084 3085 }
3085 3086
3086 3087 /*
3087 3088 * common case where requests are to the same fd
3088 3089 * for the same r/w operation
3089 3090 * for UFS, need to set EBADFD
3090 3091 */
3091 3092 vp = fp->f_vnode;
3092 3093 if (fp != prev_fp || mode != prev_mode) {
3093 3094 aio_func = check_vp(vp, mode);
3094 3095 if (aio_func == NULL) {
3095 3096 prev_fp = NULL;
3096 3097 releasef(aiocb->aio_fildes);
3097 3098 lio_set_uerror(&cbp->aio_resultp, EBADFD);
3098 3099 aio_notsupported++;
3099 3100 if (head) {
3100 3101 mutex_enter(&aiop->aio_mutex);
3101 3102 head->lio_nent--;
3102 3103 head->lio_refcnt--;
3103 3104 mutex_exit(&aiop->aio_mutex);
3104 3105 }
3105 3106 continue;
3106 3107 } else {
3107 3108 prev_fp = fp;
3108 3109 prev_mode = mode;
3109 3110 }
3110 3111 }
3111 3112
3112 3113 #ifdef _LP64
3113 3114 aiocb_LFton(aiocb, &aiocb_n);
3114 3115 error = aio_req_setup(&reqp, aiop, &aiocb_n,
3115 3116 (aio_result_t *)&cbp->aio_resultp, vp, 0);
3116 3117 #else
3117 3118 error = aio_req_setupLF(&reqp, aiop, aiocb,
3118 3119 (aio_result_t *)&cbp->aio_resultp, vp, 0);
3119 3120 #endif /* _LP64 */
3120 3121 if (error) {
3121 3122 releasef(aiocb->aio_fildes);
3122 3123 lio_set_uerror(&cbp->aio_resultp, error);
3123 3124 if (head) {
3124 3125 mutex_enter(&aiop->aio_mutex);
3125 3126 head->lio_nent--;
3126 3127 head->lio_refcnt--;
3127 3128 mutex_exit(&aiop->aio_mutex);
3128 3129 }
3129 3130 aio_errors++;
3130 3131 continue;
3131 3132 }
3132 3133
3133 3134 reqp->aio_req_lio = head;
3134 3135 deadhead = 0;
3135 3136
3136 3137 /*
3137 3138 * Set the errno field now before sending the request to
3138 3139 * the driver to avoid a race condition
3139 3140 */
3140 3141 (void) suword32(&cbp->aio_resultp.aio_errno,
3141 3142 EINPROGRESS);
3142 3143
3143 3144 reqp->aio_req_iocb.iocb32 = *ucbp;
3144 3145
3145 3146 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
3146 3147 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3147 3148 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3148 3149 if (aio_port | aio_thread) {
3149 3150 port_kevent_t *lpkevp;
3150 3151 /*
3151 3152 * Prepare data to send with each aiocb completed.
3152 3153 */
3153 3154 if (aio_port) {
3154 3155 void *paddr = (void *)(uintptr_t)
3155 3156 aiocb->aio_sigevent.sigev_value.sival_ptr;
3156 3157 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3157 3158 error = EFAULT;
3158 3159 } else { /* aio_thread */
3159 3160 pnotify.portnfy_port =
3160 3161 aiocb->aio_sigevent.sigev_signo;
3161 3162 pnotify.portnfy_user =
3162 3163 aiocb->aio_sigevent.sigev_value.sival_ptr;
3163 3164 }
3164 3165 if (error)
3165 3166 /* EMPTY */;
3166 3167 else if (pkevtp != NULL &&
3167 3168 pnotify.portnfy_port == lio_head_port)
3168 3169 error = port_dup_event(pkevtp, &lpkevp,
3169 3170 PORT_ALLOC_DEFAULT);
3170 3171 else
3171 3172 error = port_alloc_event(pnotify.portnfy_port,
3172 3173 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3173 3174 &lpkevp);
3174 3175 if (error == 0) {
3175 3176 port_init_event(lpkevp, (uintptr_t)*ucbp,
3176 3177 (void *)(uintptr_t)pnotify.portnfy_user,
3177 3178 aio_port_callback, reqp);
3178 3179 lpkevp->portkev_events = event;
3179 3180 reqp->aio_req_portkev = lpkevp;
3180 3181 reqp->aio_req_port = pnotify.portnfy_port;
3181 3182 }
3182 3183 }
3183 3184
3184 3185 /*
3185 3186 * send the request to driver.
3186 3187 */
3187 3188 if (error == 0) {
3188 3189 if (aiocb->aio_nbytes == 0) {
3189 3190 clear_active_fd(aiocb->aio_fildes);
3190 3191 aio_zerolen(reqp);
3191 3192 continue;
3192 3193 }
3193 3194 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3194 3195 CRED());
3195 3196 }
3196 3197
3197 3198 /*
3198 3199 * the fd's ref count is not decremented until the IO has
3199 3200 * completed unless there was an error.
3200 3201 */
3201 3202 if (error) {
3202 3203 releasef(aiocb->aio_fildes);
3203 3204 lio_set_uerror(&cbp->aio_resultp, error);
3204 3205 if (head) {
3205 3206 mutex_enter(&aiop->aio_mutex);
3206 3207 head->lio_nent--;
3207 3208 head->lio_refcnt--;
3208 3209 mutex_exit(&aiop->aio_mutex);
3209 3210 }
3210 3211 if (error == ENOTSUP)
3211 3212 aio_notsupported++;
3212 3213 else
3213 3214 aio_errors++;
3214 3215 lio_set_error(reqp, portused);
3215 3216 } else {
3216 3217 clear_active_fd(aiocb->aio_fildes);
3217 3218 }
3218 3219 }
3219 3220
3220 3221 if (aio_notsupported) {
3221 3222 error = ENOTSUP;
3222 3223 } else if (aio_errors) {
3223 3224 /*
3224 3225 * return EIO if any request failed
3225 3226 */
3226 3227 error = EIO;
3227 3228 }
3228 3229
3229 3230 if (mode_arg == LIO_WAIT) {
3230 3231 mutex_enter(&aiop->aio_mutex);
3231 3232 while (head->lio_refcnt > 0) {
3232 3233 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3233 3234 mutex_exit(&aiop->aio_mutex);
3234 3235 error = EINTR;
3235 3236 goto done;
3236 3237 }
3237 3238 }
3238 3239 mutex_exit(&aiop->aio_mutex);
3239 3240 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
3240 3241 }
3241 3242
3242 3243 done:
3243 3244 kmem_free(cbplist, ssize);
3244 3245 if (deadhead) {
3245 3246 if (head->lio_sigqp)
3246 3247 kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3247 3248 if (head->lio_portkev)
3248 3249 port_free_event(head->lio_portkev);
3249 3250 kmem_free(head, sizeof (aio_lio_t));
3250 3251 }
3251 3252 return (error);
3252 3253 }
3253 3254
3254 3255 #ifdef _SYSCALL32_IMPL
3255 3256 static void
3256 3257 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
3257 3258 {
3258 3259 dest->aio_fildes = src->aio_fildes;
3259 3260 dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
3260 3261 dest->aio_nbytes = (size_t)src->aio_nbytes;
3261 3262 dest->aio_offset = (off_t)src->aio_offset;
3262 3263 dest->aio_reqprio = src->aio_reqprio;
3263 3264 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3264 3265 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3265 3266
3266 3267 /*
3267 3268 * See comment in sigqueue32() on handling of 32-bit
3268 3269 * sigvals in a 64-bit kernel.
3269 3270 */
3270 3271 dest->aio_sigevent.sigev_value.sival_int =
3271 3272 (int)src->aio_sigevent.sigev_value.sival_int;
3272 3273 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3273 3274 (uintptr_t)src->aio_sigevent.sigev_notify_function;
3274 3275 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3275 3276 (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3276 3277 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3277 3278 dest->aio_lio_opcode = src->aio_lio_opcode;
3278 3279 dest->aio_state = src->aio_state;
3279 3280 dest->aio__pad[0] = src->aio__pad[0];
3280 3281 }
3281 3282 #endif
3282 3283
3283 3284 /*
3284 3285 * This function is used only for largefile calls made by
3285 3286 * 32 bit applications.
3286 3287 */
3287 3288 static int
3288 3289 aio_req_setupLF(
3289 3290 aio_req_t **reqpp,
3290 3291 aio_t *aiop,
3291 3292 aiocb64_32_t *arg,
3292 3293 aio_result_t *resultp,
3293 3294 vnode_t *vp,
3294 3295 int old_solaris_req)
3295 3296 {
3296 3297 sigqueue_t *sqp = NULL;
3297 3298 aio_req_t *reqp;
3298 3299 struct uio *uio;
3299 3300 struct sigevent32 *sigev;
3300 3301 int error;
3301 3302
3302 3303 sigev = &arg->aio_sigevent;
3303 3304 if (sigev->sigev_notify == SIGEV_SIGNAL &&
3304 3305 sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
3305 3306 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3306 3307 if (sqp == NULL)
3307 3308 return (EAGAIN);
3308 3309 sqp->sq_func = NULL;
3309 3310 sqp->sq_next = NULL;
3310 3311 sqp->sq_info.si_code = SI_ASYNCIO;
3311 3312 sqp->sq_info.si_pid = curproc->p_pid;
3312 3313 sqp->sq_info.si_ctid = PRCTID(curproc);
3313 3314 sqp->sq_info.si_zoneid = getzoneid();
3314 3315 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3315 3316 sqp->sq_info.si_signo = sigev->sigev_signo;
3316 3317 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
3317 3318 }
3318 3319
3319 3320 mutex_enter(&aiop->aio_mutex);
3320 3321
3321 3322 if (aiop->aio_flags & AIO_REQ_BLOCK) {
3322 3323 mutex_exit(&aiop->aio_mutex);
3323 3324 if (sqp)
3324 3325 kmem_free(sqp, sizeof (sigqueue_t));
3325 3326 return (EIO);
3326 3327 }
3327 3328 /*
3328 3329 * get an aio_reqp from the free list or allocate one
3329 3330 * from dynamic memory.
3330 3331 */
3331 3332 if (error = aio_req_alloc(&reqp, resultp)) {
3332 3333 mutex_exit(&aiop->aio_mutex);
3333 3334 if (sqp)
3334 3335 kmem_free(sqp, sizeof (sigqueue_t));
3335 3336 return (error);
3336 3337 }
3337 3338 aiop->aio_pending++;
3338 3339 aiop->aio_outstanding++;
3339 3340 reqp->aio_req_flags = AIO_PENDING;
3340 3341 if (old_solaris_req) {
3341 3342 /* this is an old solaris aio request */
3342 3343 reqp->aio_req_flags |= AIO_SOLARIS;
3343 3344 aiop->aio_flags |= AIO_SOLARIS_REQ;
3344 3345 }
3345 3346 if (sigev->sigev_notify == SIGEV_THREAD ||
3346 3347 sigev->sigev_notify == SIGEV_PORT)
3347 3348 aio_enq(&aiop->aio_portpending, reqp, 0);
3348 3349 mutex_exit(&aiop->aio_mutex);
3349 3350 /*
3350 3351 * initialize aio request.
3351 3352 */
3352 3353 reqp->aio_req_fd = arg->aio_fildes;
3353 3354 reqp->aio_req_sigqp = sqp;
3354 3355 reqp->aio_req_iocb.iocb = NULL;
3355 3356 reqp->aio_req_lio = NULL;
3356 3357 reqp->aio_req_buf.b_file = vp;
3357 3358 uio = reqp->aio_req.aio_uio;
3358 3359 uio->uio_iovcnt = 1;
3359 3360 uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
3360 3361 uio->uio_iov->iov_len = arg->aio_nbytes;
3361 3362 uio->uio_loffset = arg->aio_offset;
3362 3363 *reqpp = reqp;
3363 3364 return (0);
3364 3365 }
3365 3366
3366 3367 /*
3367 3368 * This routine is called when a non largefile call is made by a 32bit
3368 3369 * process on a ILP32 or LP64 kernel.
3369 3370 */
3370 3371 static int
3371 3372 alio32(
3372 3373 int mode_arg,
3373 3374 void *aiocb_arg,
3374 3375 int nent,
3375 3376 void *sigev)
3376 3377 {
3377 3378 file_t *fp;
3378 3379 file_t *prev_fp = NULL;
3379 3380 int prev_mode = -1;
3380 3381 struct vnode *vp;
3381 3382 aio_lio_t *head;
3382 3383 aio_req_t *reqp;
3383 3384 aio_t *aiop;
3384 3385 caddr_t cbplist;
3385 3386 aiocb_t cb;
3386 3387 aiocb_t *aiocb = &cb;
3387 3388 #ifdef _LP64
3388 3389 aiocb32_t *cbp;
3389 3390 caddr32_t *ucbp;
3390 3391 aiocb32_t cb32;
3391 3392 aiocb32_t *aiocb32 = &cb32;
3392 3393 struct sigevent32 sigevk;
3393 3394 #else
3394 3395 aiocb_t *cbp, **ucbp;
3395 3396 struct sigevent sigevk;
3396 3397 #endif
3397 3398 sigqueue_t *sqp;
3398 3399 int (*aio_func)();
3399 3400 int mode;
3400 3401 int error = 0;
3401 3402 int aio_errors = 0;
3402 3403 int i;
3403 3404 size_t ssize;
3404 3405 int deadhead = 0;
3405 3406 int aio_notsupported = 0;
3406 3407 int lio_head_port;
3407 3408 int aio_port;
3408 3409 int aio_thread;
3409 3410 port_kevent_t *pkevtp = NULL;
3410 3411 int portused = 0;
3411 3412 #ifdef _LP64
3412 3413 port_notify32_t pnotify;
3413 3414 #else
3414 3415 port_notify_t pnotify;
3415 3416 #endif
3416 3417 int event;
3417 3418
3418 3419 aiop = curproc->p_aio;
3419 3420 if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
3420 3421 return (EINVAL);
3421 3422
3422 3423 #ifdef _LP64
3423 3424 ssize = (sizeof (caddr32_t) * nent);
3424 3425 #else
3425 3426 ssize = (sizeof (aiocb_t *) * nent);
3426 3427 #endif
3427 3428 cbplist = kmem_alloc(ssize, KM_SLEEP);
3428 3429 ucbp = (void *)cbplist;
3429 3430
3430 3431 if (copyin(aiocb_arg, cbplist, ssize) ||
3431 3432 (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
3432 3433 kmem_free(cbplist, ssize);
3433 3434 return (EFAULT);
3434 3435 }
3435 3436
3436 3437 /* Event Ports */
3437 3438 if (sigev &&
3438 3439 (sigevk.sigev_notify == SIGEV_THREAD ||
3439 3440 sigevk.sigev_notify == SIGEV_PORT)) {
3440 3441 if (sigevk.sigev_notify == SIGEV_THREAD) {
3441 3442 pnotify.portnfy_port = sigevk.sigev_signo;
3442 3443 pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
3443 3444 } else if (copyin(
3444 3445 (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
3445 3446 &pnotify, sizeof (pnotify))) {
3446 3447 kmem_free(cbplist, ssize);
3447 3448 return (EFAULT);
3448 3449 }
3449 3450 error = port_alloc_event(pnotify.portnfy_port,
3450 3451 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
3451 3452 if (error) {
3452 3453 if (error == ENOMEM || error == EAGAIN)
3453 3454 error = EAGAIN;
3454 3455 else
3455 3456 error = EINVAL;
3456 3457 kmem_free(cbplist, ssize);
3457 3458 return (error);
3458 3459 }
3459 3460 lio_head_port = pnotify.portnfy_port;
3460 3461 portused = 1;
3461 3462 }
3462 3463
3463 3464 /*
3464 3465 * a list head should be allocated if notification is
3465 3466 * enabled for this list.
3466 3467 */
3467 3468 head = NULL;
3468 3469
3469 3470 if (mode_arg == LIO_WAIT || sigev) {
3470 3471 mutex_enter(&aiop->aio_mutex);
3471 3472 error = aio_lio_alloc(&head);
3472 3473 mutex_exit(&aiop->aio_mutex);
3473 3474 if (error)
3474 3475 goto done;
3475 3476 deadhead = 1;
3476 3477 head->lio_nent = nent;
3477 3478 head->lio_refcnt = nent;
3478 3479 head->lio_port = -1;
3479 3480 head->lio_portkev = NULL;
3480 3481 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
3481 3482 sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
3482 3483 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3483 3484 if (sqp == NULL) {
3484 3485 error = EAGAIN;
3485 3486 goto done;
3486 3487 }
3487 3488 sqp->sq_func = NULL;
3488 3489 sqp->sq_next = NULL;
3489 3490 sqp->sq_info.si_code = SI_ASYNCIO;
3490 3491 sqp->sq_info.si_pid = curproc->p_pid;
3491 3492 sqp->sq_info.si_ctid = PRCTID(curproc);
3492 3493 sqp->sq_info.si_zoneid = getzoneid();
3493 3494 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3494 3495 sqp->sq_info.si_signo = sigevk.sigev_signo;
3495 3496 sqp->sq_info.si_value.sival_int =
3496 3497 sigevk.sigev_value.sival_int;
3497 3498 head->lio_sigqp = sqp;
3498 3499 } else {
3499 3500 head->lio_sigqp = NULL;
3500 3501 }
3501 3502 if (pkevtp) {
3502 3503 /*
3503 3504 * Prepare data to send when list of aiocb's has
3504 3505 * completed.
3505 3506 */
3506 3507 port_init_event(pkevtp, (uintptr_t)sigev,
3507 3508 (void *)(uintptr_t)pnotify.portnfy_user,
3508 3509 NULL, head);
3509 3510 pkevtp->portkev_events = AIOLIO;
3510 3511 head->lio_portkev = pkevtp;
3511 3512 head->lio_port = pnotify.portnfy_port;
3512 3513 }
3513 3514 }
3514 3515
3515 3516 for (i = 0; i < nent; i++, ucbp++) {
3516 3517
3517 3518 /* skip entry if it can't be copied. */
3518 3519 #ifdef _LP64
3519 3520 cbp = (aiocb32_t *)(uintptr_t)*ucbp;
3520 3521 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
3521 3522 #else
3522 3523 cbp = (aiocb_t *)*ucbp;
3523 3524 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
3524 3525 #endif
3525 3526 {
3526 3527 if (head) {
3527 3528 mutex_enter(&aiop->aio_mutex);
3528 3529 head->lio_nent--;
3529 3530 head->lio_refcnt--;
3530 3531 mutex_exit(&aiop->aio_mutex);
3531 3532 }
3532 3533 continue;
3533 3534 }
3534 3535 #ifdef _LP64
3535 3536 /*
3536 3537 * copy 32 bit structure into 64 bit structure
3537 3538 */
3538 3539 aiocb_32ton(aiocb32, aiocb);
3539 3540 #endif /* _LP64 */
3540 3541
3541 3542 /* skip if opcode for aiocb is LIO_NOP */
3542 3543 mode = aiocb->aio_lio_opcode;
3543 3544 if (mode == LIO_NOP) {
3544 3545 cbp = NULL;
3545 3546 if (head) {
3546 3547 mutex_enter(&aiop->aio_mutex);
3547 3548 head->lio_nent--;
3548 3549 head->lio_refcnt--;
3549 3550 mutex_exit(&aiop->aio_mutex);
3550 3551 }
3551 3552 continue;
3552 3553 }
3553 3554
3554 3555 /* increment file descriptor's ref count. */
3555 3556 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3556 3557 lio_set_uerror(&cbp->aio_resultp, EBADF);
3557 3558 if (head) {
3558 3559 mutex_enter(&aiop->aio_mutex);
3559 3560 head->lio_nent--;
3560 3561 head->lio_refcnt--;
3561 3562 mutex_exit(&aiop->aio_mutex);
3562 3563 }
3563 3564 aio_errors++;
3564 3565 continue;
3565 3566 }
3566 3567
3567 3568 /*
3568 3569 * check the permission of the partition
3569 3570 */
3570 3571 if ((fp->f_flag & mode) == 0) {
3571 3572 releasef(aiocb->aio_fildes);
3572 3573 lio_set_uerror(&cbp->aio_resultp, EBADF);
3573 3574 if (head) {
3574 3575 mutex_enter(&aiop->aio_mutex);
3575 3576 head->lio_nent--;
3576 3577 head->lio_refcnt--;
3577 3578 mutex_exit(&aiop->aio_mutex);
3578 3579 }
3579 3580 aio_errors++;
3580 3581 continue;
3581 3582 }
3582 3583
3583 3584 /*
3584 3585 * common case where requests are to the same fd
3585 3586 * for the same r/w operation
3586 3587 * for UFS, need to set EBADFD
3587 3588 */
3588 3589 vp = fp->f_vnode;
3589 3590 if (fp != prev_fp || mode != prev_mode) {
3590 3591 aio_func = check_vp(vp, mode);
3591 3592 if (aio_func == NULL) {
3592 3593 prev_fp = NULL;
3593 3594 releasef(aiocb->aio_fildes);
3594 3595 lio_set_uerror(&cbp->aio_resultp, EBADFD);
3595 3596 aio_notsupported++;
3596 3597 if (head) {
3597 3598 mutex_enter(&aiop->aio_mutex);
3598 3599 head->lio_nent--;
3599 3600 head->lio_refcnt--;
3600 3601 mutex_exit(&aiop->aio_mutex);
3601 3602 }
3602 3603 continue;
3603 3604 } else {
3604 3605 prev_fp = fp;
3605 3606 prev_mode = mode;
3606 3607 }
3607 3608 }
3608 3609
3609 3610 error = aio_req_setup(&reqp, aiop, aiocb,
3610 3611 (aio_result_t *)&cbp->aio_resultp, vp, 0);
3611 3612 if (error) {
3612 3613 releasef(aiocb->aio_fildes);
3613 3614 lio_set_uerror(&cbp->aio_resultp, error);
3614 3615 if (head) {
3615 3616 mutex_enter(&aiop->aio_mutex);
3616 3617 head->lio_nent--;
3617 3618 head->lio_refcnt--;
3618 3619 mutex_exit(&aiop->aio_mutex);
3619 3620 }
3620 3621 aio_errors++;
3621 3622 continue;
3622 3623 }
3623 3624
3624 3625 reqp->aio_req_lio = head;
3625 3626 deadhead = 0;
3626 3627
3627 3628 /*
3628 3629 * Set the errno field now before sending the request to
3629 3630 * the driver to avoid a race condition
3630 3631 */
3631 3632 (void) suword32(&cbp->aio_resultp.aio_errno,
3632 3633 EINPROGRESS);
3633 3634
3634 3635 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
3635 3636
3636 3637 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
3637 3638 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3638 3639 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3639 3640 if (aio_port | aio_thread) {
3640 3641 port_kevent_t *lpkevp;
3641 3642 /*
3642 3643 * Prepare data to send with each aiocb completed.
3643 3644 */
3644 3645 #ifdef _LP64
3645 3646 if (aio_port) {
3646 3647 void *paddr = (void *)(uintptr_t)
3647 3648 aiocb32->aio_sigevent.sigev_value.sival_ptr;
3648 3649 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3649 3650 error = EFAULT;
3650 3651 } else { /* aio_thread */
3651 3652 pnotify.portnfy_port =
3652 3653 aiocb32->aio_sigevent.sigev_signo;
3653 3654 pnotify.portnfy_user =
3654 3655 aiocb32->aio_sigevent.sigev_value.sival_ptr;
3655 3656 }
3656 3657 #else
3657 3658 if (aio_port) {
3658 3659 void *paddr =
3659 3660 aiocb->aio_sigevent.sigev_value.sival_ptr;
3660 3661 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3661 3662 error = EFAULT;
3662 3663 } else { /* aio_thread */
3663 3664 pnotify.portnfy_port =
3664 3665 aiocb->aio_sigevent.sigev_signo;
3665 3666 pnotify.portnfy_user =
3666 3667 aiocb->aio_sigevent.sigev_value.sival_ptr;
3667 3668 }
3668 3669 #endif
3669 3670 if (error)
3670 3671 /* EMPTY */;
3671 3672 else if (pkevtp != NULL &&
3672 3673 pnotify.portnfy_port == lio_head_port)
3673 3674 error = port_dup_event(pkevtp, &lpkevp,
3674 3675 PORT_ALLOC_DEFAULT);
3675 3676 else
3676 3677 error = port_alloc_event(pnotify.portnfy_port,
3677 3678 PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3678 3679 &lpkevp);
3679 3680 if (error == 0) {
3680 3681 port_init_event(lpkevp, (uintptr_t)cbp,
3681 3682 (void *)(uintptr_t)pnotify.portnfy_user,
3682 3683 aio_port_callback, reqp);
3683 3684 lpkevp->portkev_events = event;
3684 3685 reqp->aio_req_portkev = lpkevp;
3685 3686 reqp->aio_req_port = pnotify.portnfy_port;
3686 3687 }
3687 3688 }
3688 3689
3689 3690 /*
3690 3691 * send the request to driver.
3691 3692 */
3692 3693 if (error == 0) {
3693 3694 if (aiocb->aio_nbytes == 0) {
3694 3695 clear_active_fd(aiocb->aio_fildes);
3695 3696 aio_zerolen(reqp);
3696 3697 continue;
3697 3698 }
3698 3699 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3699 3700 CRED());
3700 3701 }
3701 3702
3702 3703 /*
3703 3704 * the fd's ref count is not decremented until the IO has
3704 3705 * completed unless there was an error.
3705 3706 */
3706 3707 if (error) {
3707 3708 releasef(aiocb->aio_fildes);
3708 3709 lio_set_uerror(&cbp->aio_resultp, error);
3709 3710 if (head) {
3710 3711 mutex_enter(&aiop->aio_mutex);
3711 3712 head->lio_nent--;
3712 3713 head->lio_refcnt--;
3713 3714 mutex_exit(&aiop->aio_mutex);
3714 3715 }
3715 3716 if (error == ENOTSUP)
3716 3717 aio_notsupported++;
3717 3718 else
3718 3719 aio_errors++;
3719 3720 lio_set_error(reqp, portused);
3720 3721 } else {
3721 3722 clear_active_fd(aiocb->aio_fildes);
3722 3723 }
3723 3724 }
3724 3725
3725 3726 if (aio_notsupported) {
3726 3727 error = ENOTSUP;
3727 3728 } else if (aio_errors) {
3728 3729 /*
3729 3730 * return EIO if any request failed
3730 3731 */
3731 3732 error = EIO;
3732 3733 }
3733 3734
3734 3735 if (mode_arg == LIO_WAIT) {
3735 3736 mutex_enter(&aiop->aio_mutex);
3736 3737 while (head->lio_refcnt > 0) {
3737 3738 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3738 3739 mutex_exit(&aiop->aio_mutex);
3739 3740 error = EINTR;
3740 3741 goto done;
3741 3742 }
3742 3743 }
3743 3744 mutex_exit(&aiop->aio_mutex);
3744 3745 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
3745 3746 }
3746 3747
3747 3748 done:
3748 3749 kmem_free(cbplist, ssize);
3749 3750 if (deadhead) {
3750 3751 if (head->lio_sigqp)
3751 3752 kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3752 3753 if (head->lio_portkev)
3753 3754 port_free_event(head->lio_portkev);
3754 3755 kmem_free(head, sizeof (aio_lio_t));
3755 3756 }
3756 3757 return (error);
3757 3758 }
3758 3759
3759 3760
3760 3761 #ifdef _SYSCALL32_IMPL
3761 3762 void
3762 3763 aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
3763 3764 {
3764 3765 dest->aio_fildes = src->aio_fildes;
3765 3766 dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
3766 3767 dest->aio_nbytes = (size_t)src->aio_nbytes;
3767 3768 dest->aio_offset = (off_t)src->aio_offset;
3768 3769 dest->aio_reqprio = src->aio_reqprio;
3769 3770 dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3770 3771 dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3771 3772
3772 3773 /*
3773 3774 * See comment in sigqueue32() on handling of 32-bit
3774 3775 * sigvals in a 64-bit kernel.
3775 3776 */
3776 3777 dest->aio_sigevent.sigev_value.sival_int =
3777 3778 (int)src->aio_sigevent.sigev_value.sival_int;
3778 3779 dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3779 3780 (uintptr_t)src->aio_sigevent.sigev_notify_function;
3780 3781 dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3781 3782 (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3782 3783 dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3783 3784 dest->aio_lio_opcode = src->aio_lio_opcode;
3784 3785 dest->aio_state = src->aio_state;
3785 3786 dest->aio__pad[0] = src->aio__pad[0];
3786 3787 }
3787 3788 #endif /* _SYSCALL32_IMPL */
3788 3789
3789 3790 /*
3790 3791 * aio_port_callback() is called just before the event is retrieved from the
3791 3792 * port. The task of this callback function is to finish the work of the
3792 3793 * transaction for the application, it means :
3793 3794 * - copyout transaction data to the application
3794 3795 * (this thread is running in the right process context)
3795 3796 * - keep trace of the transaction (update of counters).
3796 3797 * - free allocated buffers
3797 3798 * The aiocb pointer is the object element of the port_kevent_t structure.
3798 3799 *
3799 3800 * flag :
3800 3801 * PORT_CALLBACK_DEFAULT : do copyout and free resources
3801 3802 * PORT_CALLBACK_CLOSE : don't do copyout, free resources
3802 3803 */
3803 3804
3804 3805 /*ARGSUSED*/
3805 3806 int
3806 3807 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
3807 3808 {
3808 3809 aio_t *aiop = curproc->p_aio;
3809 3810 aio_req_t *reqp = arg;
3810 3811 struct iovec *iov;
3811 3812 struct buf *bp;
3812 3813 void *resultp;
3813 3814
3814 3815 if (pid != curproc->p_pid) {
3815 3816 /* wrong proc !!, can not deliver data here ... */
3816 3817 return (EACCES);
3817 3818 }
3818 3819
3819 3820 mutex_enter(&aiop->aio_portq_mutex);
3820 3821 reqp->aio_req_portkev = NULL;
3821 3822 aio_req_remove_portq(aiop, reqp); /* remove request from portq */
3822 3823 mutex_exit(&aiop->aio_portq_mutex);
3823 3824 aphysio_unlock(reqp); /* unlock used pages */
3824 3825 mutex_enter(&aiop->aio_mutex);
3825 3826 if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
3826 3827 aio_req_free_port(aiop, reqp); /* back to free list */
3827 3828 mutex_exit(&aiop->aio_mutex);
3828 3829 return (0);
3829 3830 }
3830 3831
3831 3832 iov = reqp->aio_req_uio.uio_iov;
3832 3833 bp = &reqp->aio_req_buf;
3833 3834 resultp = (void *)reqp->aio_req_resultp;
3834 3835 aio_req_free_port(aiop, reqp); /* request struct back to free list */
3835 3836 mutex_exit(&aiop->aio_mutex);
3836 3837 if (flag == PORT_CALLBACK_DEFAULT)
3837 3838 aio_copyout_result_port(iov, bp, resultp);
3838 3839 return (0);
3839 3840 }
↓ open down ↓ |
3641 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX