1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Kernel asynchronous I/O.
  29  * This is only for raw devices now (as of Nov. 1993).
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/errno.h>
  34 #include <sys/conf.h>
  35 #include <sys/file.h>
  36 #include <sys/fs/snode.h>
  37 #include <sys/unistd.h>
  38 #include <sys/cmn_err.h>
  39 #include <vm/as.h>
  40 #include <vm/faultcode.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/procfs.h>
  43 #include <sys/kmem.h>
  44 #include <sys/autoconf.h>
  45 #include <sys/ddi_impldefs.h>
  46 #include <sys/sunddi.h>
  47 #include <sys/aio_impl.h>
  48 #include <sys/debug.h>
  49 #include <sys/param.h>
  50 #include <sys/systm.h>
  51 #include <sys/vmsystm.h>
  52 #include <sys/fs/pxfs_ki.h>
  53 #include <sys/contract/process_impl.h>
  54 
  55 /*
  56  * external entry point.
  57  */
  58 #ifdef _LP64
  59 static int64_t kaioc(long, long, long, long, long, long);
  60 #endif
  61 static int kaio(ulong_t *, rval_t *);
  62 
  63 
  64 #define AIO_64  0
  65 #define AIO_32  1
  66 #define AIO_LARGEFILE   2
  67 
  68 /*
  69  * implementation specific functions (private)
  70  */
  71 #ifdef _LP64
  72 static int alio(int, aiocb_t **, int, struct sigevent *);
  73 #endif
  74 static int aionotify(void);
  75 static int aioinit(void);
  76 static int aiostart(void);
  77 static void alio_cleanup(aio_t *, aiocb_t **, int, int);
  78 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
  79     cred_t *);
  80 static void lio_set_error(aio_req_t *, int portused);
  81 static aio_t *aio_aiop_alloc();
  82 static int aio_req_alloc(aio_req_t **, aio_result_t *);
  83 static int aio_lio_alloc(aio_lio_t **);
  84 static aio_req_t *aio_req_done(void *);
  85 static aio_req_t *aio_req_remove(aio_req_t *);
  86 static int aio_req_find(aio_result_t *, aio_req_t **);
  87 static int aio_hash_insert(struct aio_req_t *, aio_t *);
  88 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
  89     aio_result_t *, vnode_t *, int);
  90 static int aio_cleanup_thread(aio_t *);
  91 static aio_lio_t *aio_list_get(aio_result_t *);
  92 static void lio_set_uerror(void *, int);
  93 extern void aio_zerolen(aio_req_t *);
  94 static int aiowait(struct timeval *, int, long  *);
  95 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
  96 static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
  97     aio_req_t *reqlist, aio_t *aiop, model_t model);
  98 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
  99 static int aiosuspend(void *, int, struct  timespec *, int,
 100     long        *, int);
 101 static int aliowait(int, void *, int, void *, int);
 102 static int aioerror(void *, int);
 103 static int aio_cancel(int, void *, long *, int);
 104 static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
 105 static int aiorw(int, void *, int, int);
 106 
 107 static int alioLF(int, void *, int, void *);
 108 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
 109     aio_result_t *, vnode_t *, int);
 110 static int alio32(int, void *, int, void *);
 111 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
 112 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
 113 
 114 #ifdef  _SYSCALL32_IMPL
 115 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
 116 void    aiocb_32ton(aiocb32_t *, aiocb_t *);
 117 #endif /* _SYSCALL32_IMPL */
 118 
 119 /*
 120  * implementation specific functions (external)
 121  */
 122 void aio_req_free(aio_t *, aio_req_t *);
 123 
 124 /*
 125  * Event Port framework
 126  */
 127 
 128 void aio_req_free_port(aio_t *, aio_req_t *);
 129 static int aio_port_callback(void *, int *, pid_t, int, void *);
 130 
 131 /*
 132  * This is the loadable module wrapper.
 133  */
 134 #include <sys/modctl.h>
 135 #include <sys/syscall.h>
 136 
 137 #ifdef _LP64
 138 
 139 static struct sysent kaio_sysent = {
 140         6,
 141         SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
 142         (int (*)())kaioc
 143 };
 144 
 145 #ifdef _SYSCALL32_IMPL
 146 static struct sysent kaio_sysent32 = {
 147         7,
 148         SE_NOUNLOAD | SE_64RVAL,
 149         kaio
 150 };
 151 #endif  /* _SYSCALL32_IMPL */
 152 
 153 #else   /* _LP64 */
 154 
 155 static struct sysent kaio_sysent = {
 156         7,
 157         SE_NOUNLOAD | SE_32RVAL1,
 158         kaio
 159 };
 160 
 161 #endif  /* _LP64 */
 162 
 163 /*
 164  * Module linkage information for the kernel.
 165  */
 166 
 167 static struct modlsys modlsys = {
 168         &mod_syscallops,
 169         "kernel Async I/O",
 170         &kaio_sysent
 171 };
 172 
 173 #ifdef  _SYSCALL32_IMPL
 174 static struct modlsys modlsys32 = {
 175         &mod_syscallops32,
 176         "kernel Async I/O for 32 bit compatibility",
 177         &kaio_sysent32
 178 };
 179 #endif  /* _SYSCALL32_IMPL */
 180 
 181 
 182 static struct modlinkage modlinkage = {
 183         MODREV_1,
 184         {   &modlsys,
 185 #ifdef  _SYSCALL32_IMPL
 186             &modlsys32,
 187 #endif
 188             NULL
 189         }
 190 };
 191 
 192 int
 193 _init(void)
 194 {
 195         int retval;
 196 
 197         if ((retval = mod_install(&modlinkage)) != 0)
 198                 return (retval);
 199 
 200         return (0);
 201 }
 202 
 203 int
 204 _fini(void)
 205 {
 206         int retval;
 207 
 208         retval = mod_remove(&modlinkage);
 209 
 210         return (retval);
 211 }
 212 
 213 int
 214 _info(struct modinfo *modinfop)
 215 {
 216         return (mod_info(&modlinkage, modinfop));
 217 }
 218 
 219 #ifdef  _LP64
 220 static int64_t
 221 kaioc(
 222         long    a0,
 223         long    a1,
 224         long    a2,
 225         long    a3,
 226         long    a4,
 227         long    a5)
 228 {
 229         int     error;
 230         long    rval = 0;
 231 
 232         switch ((int)a0 & ~AIO_POLL_BIT) {
 233         case AIOREAD:
 234                 error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
 235                     (offset_t)a4, (aio_result_t *)a5, FREAD);
 236                 break;
 237         case AIOWRITE:
 238                 error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
 239                     (offset_t)a4, (aio_result_t *)a5, FWRITE);
 240                 break;
 241         case AIOWAIT:
 242                 error = aiowait((struct timeval *)a1, (int)a2, &rval);
 243                 break;
 244         case AIOWAITN:
 245                 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
 246                     (timespec_t *)a4);
 247                 break;
 248         case AIONOTIFY:
 249                 error = aionotify();
 250                 break;
 251         case AIOINIT:
 252                 error = aioinit();
 253                 break;
 254         case AIOSTART:
 255                 error = aiostart();
 256                 break;
 257         case AIOLIO:
 258                 error = alio((int)a1, (aiocb_t **)a2, (int)a3,
 259                     (struct sigevent *)a4);
 260                 break;
 261         case AIOLIOWAIT:
 262                 error = aliowait((int)a1, (void *)a2, (int)a3,
 263                     (struct sigevent *)a4, AIO_64);
 264                 break;
 265         case AIOSUSPEND:
 266                 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
 267                     (int)a4, &rval, AIO_64);
 268                 break;
 269         case AIOERROR:
 270                 error = aioerror((void *)a1, AIO_64);
 271                 break;
 272         case AIOAREAD:
 273                 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
 274                 break;
 275         case AIOAWRITE:
 276                 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
 277                 break;
 278         case AIOCANCEL:
 279                 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
 280                 break;
 281 
 282         /*
 283          * The large file related stuff is valid only for
 284          * 32 bit kernel and not for 64 bit kernel
 285          * On 64 bit kernel we convert large file calls
 286          * to regular 64bit calls.
 287          */
 288 
 289         default:
 290                 error = EINVAL;
 291         }
 292         if (error)
 293                 return ((int64_t)set_errno(error));
 294         return (rval);
 295 }
 296 #endif
 297 
 298 static int
 299 kaio(
 300         ulong_t *uap,
 301         rval_t *rvp)
 302 {
 303         long rval = 0;
 304         int     error = 0;
 305         offset_t        off;
 306 
 307 
 308                 rvp->r_vals = 0;
 309 #if defined(_LITTLE_ENDIAN)
 310         off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
 311 #else
 312         off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
 313 #endif
 314 
 315         switch (uap[0] & ~AIO_POLL_BIT) {
 316         /*
 317          * It must be the 32 bit system call on 64 bit kernel
 318          */
 319         case AIOREAD:
 320                 return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
 321                     (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
 322         case AIOWRITE:
 323                 return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
 324                     (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
 325         case AIOWAIT:
 326                 error = aiowait((struct timeval *)uap[1], (int)uap[2],
 327                     &rval);
 328                 break;
 329         case AIOWAITN:
 330                 error = aiowaitn((void *)uap[1], (uint_t)uap[2],
 331                     (uint_t *)uap[3], (timespec_t *)uap[4]);
 332                 break;
 333         case AIONOTIFY:
 334                 return (aionotify());
 335         case AIOINIT:
 336                 return (aioinit());
 337         case AIOSTART:
 338                 return (aiostart());
 339         case AIOLIO:
 340                 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
 341                     (void *)uap[4]));
 342         case AIOLIOWAIT:
 343                 return (aliowait((int)uap[1], (void *)uap[2],
 344                     (int)uap[3], (struct sigevent *)uap[4], AIO_32));
 345         case AIOSUSPEND:
 346                 error = aiosuspend((void *)uap[1], (int)uap[2],
 347                     (timespec_t *)uap[3], (int)uap[4],
 348                     &rval, AIO_32);
 349                 break;
 350         case AIOERROR:
 351                 return (aioerror((void *)uap[1], AIO_32));
 352         case AIOAREAD:
 353                 return (aiorw((int)uap[0], (void *)uap[1],
 354                     FREAD, AIO_32));
 355         case AIOAWRITE:
 356                 return (aiorw((int)uap[0], (void *)uap[1],
 357                     FWRITE, AIO_32));
 358         case AIOCANCEL:
 359                 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
 360                     AIO_32));
 361                 break;
 362         case AIOLIO64:
 363                 return (alioLF((int)uap[1], (void *)uap[2],
 364                     (int)uap[3], (void *)uap[4]));
 365         case AIOLIOWAIT64:
 366                 return (aliowait(uap[1], (void *)uap[2],
 367                     (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
 368         case AIOSUSPEND64:
 369                 error = aiosuspend((void *)uap[1], (int)uap[2],
 370                     (timespec_t *)uap[3], (int)uap[4], &rval,
 371                     AIO_LARGEFILE);
 372                 break;
 373         case AIOERROR64:
 374                 return (aioerror((void *)uap[1], AIO_LARGEFILE));
 375         case AIOAREAD64:
 376                 return (aiorw((int)uap[0], (void *)uap[1], FREAD,
 377                     AIO_LARGEFILE));
 378         case AIOAWRITE64:
 379                 return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
 380                     AIO_LARGEFILE));
 381         case AIOCANCEL64:
 382                 error = (aio_cancel((int)uap[1], (void *)uap[2],
 383                     &rval, AIO_LARGEFILE));
 384                 break;
 385         default:
 386                 return (EINVAL);
 387         }
 388 
 389         rvp->r_val1 = rval;
 390         return (error);
 391 }
 392 
 393 /*
 394  * wake up LWPs in this process that are sleeping in
 395  * aiowait().
 396  */
 397 static int
 398 aionotify(void)
 399 {
 400         aio_t   *aiop;
 401 
 402         aiop = curproc->p_aio;
 403         if (aiop == NULL)
 404                 return (0);
 405 
 406         mutex_enter(&aiop->aio_mutex);
 407         aiop->aio_notifycnt++;
 408         cv_broadcast(&aiop->aio_waitcv);
 409         mutex_exit(&aiop->aio_mutex);
 410 
 411         return (0);
 412 }
 413 
 414 static int
 415 timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
 416         timestruc_t **rqtp, int *blocking)
 417 {
 418 #ifdef  _SYSCALL32_IMPL
 419         struct timeval32 wait_time_32;
 420 #endif
 421         struct timeval wait_time;
 422         model_t model = get_udatamodel();
 423 
 424         *rqtp = NULL;
 425         if (timout == NULL) {           /* wait indefinitely */
 426                 *blocking = 1;
 427                 return (0);
 428         }
 429 
 430         /*
 431          * Need to correctly compare with the -1 passed in for a user
 432          * address pointer, with both 32 bit and 64 bit apps.
 433          */
 434         if (model == DATAMODEL_NATIVE) {
 435                 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */
 436                         *blocking = 0;
 437                         return (0);
 438                 }
 439 
 440                 if (copyin(timout, &wait_time, sizeof (wait_time)))
 441                         return (EFAULT);
 442         }
 443 #ifdef  _SYSCALL32_IMPL
 444         else {
 445                 /*
 446                  * -1 from a 32bit app. It will not get sign extended.
 447                  * don't wait if -1.
 448                  */
 449                 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
 450                         *blocking = 0;
 451                         return (0);
 452                 }
 453 
 454                 if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
 455                         return (EFAULT);
 456                 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
 457         }
 458 #endif  /* _SYSCALL32_IMPL */
 459 
 460         if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {  /* don't wait */
 461                 *blocking = 0;
 462                 return (0);
 463         }
 464 
 465         if (wait_time.tv_sec < 0 ||
 466             wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
 467                 return (EINVAL);
 468 
 469         rqtime->tv_sec = wait_time.tv_sec;
 470         rqtime->tv_nsec = wait_time.tv_usec * 1000;
 471         *rqtp = rqtime;
 472         *blocking = 1;
 473 
 474         return (0);
 475 }
 476 
 477 static int
 478 timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
 479         timestruc_t **rqtp, int *blocking)
 480 {
 481 #ifdef  _SYSCALL32_IMPL
 482         timespec32_t wait_time_32;
 483 #endif
 484         model_t model = get_udatamodel();
 485 
 486         *rqtp = NULL;
 487         if (timout == NULL) {
 488                 *blocking = 1;
 489                 return (0);
 490         }
 491 
 492         if (model == DATAMODEL_NATIVE) {
 493                 if (copyin(timout, rqtime, sizeof (*rqtime)))
 494                         return (EFAULT);
 495         }
 496 #ifdef  _SYSCALL32_IMPL
 497         else {
 498                 if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
 499                         return (EFAULT);
 500                 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
 501         }
 502 #endif  /* _SYSCALL32_IMPL */
 503 
 504         if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
 505                 *blocking = 0;
 506                 return (0);
 507         }
 508 
 509         if (rqtime->tv_sec < 0 ||
 510             rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
 511                 return (EINVAL);
 512 
 513         *rqtp = rqtime;
 514         *blocking = 1;
 515 
 516         return (0);
 517 }
 518 
 519 /*ARGSUSED*/
 520 static int
 521 aiowait(
 522         struct timeval  *timout,
 523         int     dontblockflg,
 524         long    *rval)
 525 {
 526         int             error;
 527         aio_t           *aiop;
 528         aio_req_t       *reqp;
 529         clock_t         status;
 530         int             blocking;
 531         int             timecheck;
 532         timestruc_t     rqtime;
 533         timestruc_t     *rqtp;
 534 
 535         aiop = curproc->p_aio;
 536         if (aiop == NULL)
 537                 return (EINVAL);
 538 
 539         /*
 540          * Establish the absolute future time for the timeout.
 541          */
 542         error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
 543         if (error)
 544                 return (error);
 545         if (rqtp) {
 546                 timestruc_t now;
 547                 timecheck = timechanged;
 548                 gethrestime(&now);
 549                 timespecadd(rqtp, &now);
 550         }
 551 
 552         mutex_enter(&aiop->aio_mutex);
 553         for (;;) {
 554                 /* process requests on poll queue */
 555                 if (aiop->aio_pollq) {
 556                         mutex_exit(&aiop->aio_mutex);
 557                         aio_cleanup(0);
 558                         mutex_enter(&aiop->aio_mutex);
 559                 }
 560                 if ((reqp = aio_req_remove(NULL)) != NULL) {
 561                         *rval = (long)reqp->aio_req_resultp;
 562                         break;
 563                 }
 564                 /* user-level done queue might not be empty */
 565                 if (aiop->aio_notifycnt > 0) {
 566                         aiop->aio_notifycnt--;
 567                         *rval = 1;
 568                         break;
 569                 }
 570                 /* don't block if no outstanding aio */
 571                 if (aiop->aio_outstanding == 0 && dontblockflg) {
 572                         error = EINVAL;
 573                         break;
 574                 }
 575                 if (blocking) {
 576                         status = cv_waituntil_sig(&aiop->aio_waitcv,
 577                             &aiop->aio_mutex, rqtp, timecheck);
 578 
 579                         if (status > 0)              /* check done queue again */
 580                                 continue;
 581                         if (status == 0) {      /* interrupted by a signal */
 582                                 error = EINTR;
 583                                 *rval = -1;
 584                         } else {                /* timer expired */
 585                                 error = ETIME;
 586                         }
 587                 }
 588                 break;
 589         }
 590         mutex_exit(&aiop->aio_mutex);
 591         if (reqp) {
 592                 aphysio_unlock(reqp);
 593                 aio_copyout_result(reqp);
 594                 mutex_enter(&aiop->aio_mutex);
 595                 aio_req_free(aiop, reqp);
 596                 mutex_exit(&aiop->aio_mutex);
 597         }
 598         return (error);
 599 }
 600 
 601 /*
 602  * aiowaitn can be used to reap completed asynchronous requests submitted with
 603  * lio_listio, aio_read or aio_write.
 604  * This function only reaps asynchronous raw I/Os.
 605  */
 606 
 607 /*ARGSUSED*/
 608 static int
 609 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
 610 {
 611         int             error = 0;
 612         aio_t           *aiop;
 613         aio_req_t       *reqlist = NULL;
 614         caddr_t         iocblist = NULL;        /* array of iocb ptr's */
 615         uint_t          waitcnt, cnt = 0;       /* iocb cnt */
 616         size_t          iocbsz;                 /* users iocb size */
 617         size_t          riocbsz;                /* returned iocb size */
 618         int             iocb_index = 0;
 619         model_t         model = get_udatamodel();
 620         int             blocking = 1;
 621         int             timecheck;
 622         timestruc_t     rqtime;
 623         timestruc_t     *rqtp;
 624 
 625         aiop = curproc->p_aio;
 626         if (aiop == NULL || nent == 0 || nent > _AIO_LISTIO_MAX)
 627                 return (EINVAL);
 628 
 629         if (aiop->aio_outstanding == 0)
 630                 return (EAGAIN);
 631 
 632         if (copyin(nwait, &waitcnt, sizeof (uint_t)))
 633                 return (EFAULT);
 634 
 635         /* set *nwait to zero, if we must return prematurely */
 636         if (copyout(&cnt, nwait, sizeof (uint_t)))
 637                 return (EFAULT);
 638 
 639         if (waitcnt == 0) {
 640                 blocking = 0;
 641                 rqtp = NULL;
 642                 waitcnt = nent;
 643         } else {
 644                 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
 645                 if (error)
 646                         return (error);
 647         }
 648 
 649         if (model == DATAMODEL_NATIVE)
 650                 iocbsz = (sizeof (aiocb_t *) * nent);
 651 #ifdef  _SYSCALL32_IMPL
 652         else
 653                 iocbsz = (sizeof (caddr32_t) * nent);
 654 #endif  /* _SYSCALL32_IMPL */
 655 
 656         /*
 657          * Only one aio_waitn call is allowed at a time.
 658          * The active aio_waitn will collect all requests
 659          * out of the "done" list and if necessary it will wait
 660          * for some/all pending requests to fulfill the nwait
 661          * parameter.
 662          * A second or further aio_waitn calls will sleep here
 663          * until the active aio_waitn finishes and leaves the kernel
 664          * If the second call does not block (poll), then return
 665          * immediately with the error code : EAGAIN.
 666          * If the second call should block, then sleep here, but
 667          * do not touch the timeout. The timeout starts when this
 668          * aio_waitn-call becomes active.
 669          */
 670 
 671         mutex_enter(&aiop->aio_mutex);
 672 
 673         while (aiop->aio_flags & AIO_WAITN) {
 674                 if (blocking == 0) {
 675                         mutex_exit(&aiop->aio_mutex);
 676                         return (EAGAIN);
 677                 }
 678 
 679                 /* block, no timeout */
 680                 aiop->aio_flags |= AIO_WAITN_PENDING;
 681                 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
 682                         mutex_exit(&aiop->aio_mutex);
 683                         return (EINTR);
 684                 }
 685         }
 686 
 687         /*
 688          * Establish the absolute future time for the timeout.
 689          */
 690         if (rqtp) {
 691                 timestruc_t now;
 692                 timecheck = timechanged;
 693                 gethrestime(&now);
 694                 timespecadd(rqtp, &now);
 695         }
 696 
 697         if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
 698                 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
 699                 aiop->aio_iocb = NULL;
 700         }
 701 
 702         if (aiop->aio_iocb == NULL) {
 703                 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
 704                 if (iocblist == NULL) {
 705                         mutex_exit(&aiop->aio_mutex);
 706                         return (ENOMEM);
 707                 }
 708                 aiop->aio_iocb = (aiocb_t **)iocblist;
 709                 aiop->aio_iocbsz = iocbsz;
 710         } else {
 711                 iocblist = (char *)aiop->aio_iocb;
 712         }
 713 
 714         aiop->aio_waitncnt = waitcnt;
 715         aiop->aio_flags |= AIO_WAITN;
 716 
 717         for (;;) {
 718                 /* push requests on poll queue to done queue */
 719                 if (aiop->aio_pollq) {
 720                         mutex_exit(&aiop->aio_mutex);
 721                         aio_cleanup(0);
 722                         mutex_enter(&aiop->aio_mutex);
 723                 }
 724 
 725                 /* check for requests on done queue */
 726                 if (aiop->aio_doneq) {
 727                         cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
 728                         aiop->aio_waitncnt = waitcnt - cnt;
 729                 }
 730 
 731                 /* user-level done queue might not be empty */
 732                 if (aiop->aio_notifycnt > 0) {
 733                         aiop->aio_notifycnt--;
 734                         error = 0;
 735                         break;
 736                 }
 737 
 738                 /*
 739                  * if we are here second time as a result of timer
 740                  * expiration, we reset error if there are enough
 741                  * aiocb's to satisfy request.
 742                  * We return also if all requests are already done
 743                  * and we picked up the whole done queue.
 744                  */
 745 
 746                 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
 747                     aiop->aio_doneq == NULL)) {
 748                         error = 0;
 749                         break;
 750                 }
 751 
 752                 if ((cnt < waitcnt) && blocking) {
 753                         int rval = cv_waituntil_sig(&aiop->aio_waitcv,
 754                             &aiop->aio_mutex, rqtp, timecheck);
 755                         if (rval > 0)
 756                                 continue;
 757                         if (rval < 0) {
 758                                 error = ETIME;
 759                                 blocking = 0;
 760                                 continue;
 761                         }
 762                         error = EINTR;
 763                 }
 764                 break;
 765         }
 766 
 767         mutex_exit(&aiop->aio_mutex);
 768 
 769         if (cnt > 0) {
 770 
 771                 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
 772                     aiop, model);
 773 
 774                 if (model == DATAMODEL_NATIVE)
 775                         riocbsz = (sizeof (aiocb_t *) * cnt);
 776 #ifdef  _SYSCALL32_IMPL
 777                 else
 778                         riocbsz = (sizeof (caddr32_t) * cnt);
 779 #endif  /* _SYSCALL32_IMPL */
 780 
 781                 if (copyout(iocblist, uiocb, riocbsz) ||
 782                     copyout(&cnt, nwait, sizeof (uint_t)))
 783                         error = EFAULT;
 784         }
 785 
 786         /* check if there is another thread waiting for execution */
 787         mutex_enter(&aiop->aio_mutex);
 788         aiop->aio_flags &= ~AIO_WAITN;
 789         if (aiop->aio_flags & AIO_WAITN_PENDING) {
 790                 aiop->aio_flags &= ~AIO_WAITN_PENDING;
 791                 cv_signal(&aiop->aio_waitncv);
 792         }
 793         mutex_exit(&aiop->aio_mutex);
 794 
 795         return (error);
 796 }
 797 
 798 /*
 799  * aio_unlock_requests
 800  * copyouts the result of the request as well as the return value.
 801  * It builds the list of completed asynchronous requests,
 802  * unlocks the allocated memory ranges and
 803  * put the aio request structure back into the free list.
 804  */
 805 
 806 static int
 807 aio_unlock_requests(
 808         caddr_t iocblist,
 809         int     iocb_index,
 810         aio_req_t *reqlist,
 811         aio_t   *aiop,
 812         model_t model)
 813 {
 814         aio_req_t       *reqp, *nreqp;
 815 
 816         if (model == DATAMODEL_NATIVE) {
 817                 for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
 818                         (((caddr_t *)iocblist)[iocb_index++]) =
 819                             reqp->aio_req_iocb.iocb;
 820                         nreqp = reqp->aio_req_next;
 821                         aphysio_unlock(reqp);
 822                         aio_copyout_result(reqp);
 823                         mutex_enter(&aiop->aio_mutex);
 824                         aio_req_free(aiop, reqp);
 825                         mutex_exit(&aiop->aio_mutex);
 826                 }
 827         }
 828 #ifdef  _SYSCALL32_IMPL
 829         else {
 830                 for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
 831                         ((caddr32_t *)iocblist)[iocb_index++] =
 832                             reqp->aio_req_iocb.iocb32;
 833                         nreqp = reqp->aio_req_next;
 834                         aphysio_unlock(reqp);
 835                         aio_copyout_result(reqp);
 836                         mutex_enter(&aiop->aio_mutex);
 837                         aio_req_free(aiop, reqp);
 838                         mutex_exit(&aiop->aio_mutex);
 839                 }
 840         }
 841 #endif  /* _SYSCALL32_IMPL */
 842         return (iocb_index);
 843 }
 844 
 845 /*
 846  * aio_reqlist_concat
 847  * moves "max" elements from the done queue to the reqlist queue and removes
 848  * the AIO_DONEQ flag.
 849  * - reqlist queue is a simple linked list
 850  * - done queue is a double linked list
 851  */
 852 
 853 static int
 854 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
 855 {
 856         aio_req_t *q2, *q2work, *list;
 857         int count = 0;
 858 
 859         list = *reqlist;
 860         q2 = aiop->aio_doneq;
 861         q2work = q2;
 862         while (max-- > 0) {
 863                 q2work->aio_req_flags &= ~AIO_DONEQ;
 864                 q2work = q2work->aio_req_next;
 865                 count++;
 866                 if (q2work == q2)
 867                         break;
 868         }
 869 
 870         if (q2work == q2) {
 871                 /* all elements revised */
 872                 q2->aio_req_prev->aio_req_next = list;
 873                 list = q2;
 874                 aiop->aio_doneq = NULL;
 875         } else {
 876                 /*
 877                  * max < elements in the doneq
 878                  * detach only the required amount of elements
 879                  * out of the doneq
 880                  */
 881                 q2work->aio_req_prev->aio_req_next = list;
 882                 list = q2;
 883 
 884                 aiop->aio_doneq = q2work;
 885                 q2work->aio_req_prev = q2->aio_req_prev;
 886                 q2->aio_req_prev->aio_req_next = q2work;
 887         }
 888         *reqlist = list;
 889         return (count);
 890 }
 891 
 892 /*ARGSUSED*/
 893 static int
 894 aiosuspend(
 895         void    *aiocb,
 896         int     nent,
 897         struct  timespec        *timout,
 898         int     flag,
 899         long    *rval,
 900         int     run_mode)
 901 {
 902         int             error;
 903         aio_t           *aiop;
 904         aio_req_t       *reqp, *found, *next;
 905         caddr_t         cbplist = NULL;
 906         aiocb_t         *cbp, **ucbp;
 907 #ifdef  _SYSCALL32_IMPL
 908         aiocb32_t       *cbp32;
 909         caddr32_t       *ucbp32;
 910 #endif  /* _SYSCALL32_IMPL */
 911         aiocb64_32_t    *cbp64;
 912         int             rv;
 913         int             i;
 914         size_t          ssize;
 915         model_t         model = get_udatamodel();
 916         int             blocking;
 917         int             timecheck;
 918         timestruc_t     rqtime;
 919         timestruc_t     *rqtp;
 920 
 921         aiop = curproc->p_aio;
 922         if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
 923                 return (EINVAL);
 924 
 925         /*
 926          * Establish the absolute future time for the timeout.
 927          */
 928         error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
 929         if (error)
 930                 return (error);
 931         if (rqtp) {
 932                 timestruc_t now;
 933                 timecheck = timechanged;
 934                 gethrestime(&now);
 935                 timespecadd(rqtp, &now);
 936         }
 937 
 938         /*
 939          * If we are not blocking and there's no IO complete
 940          * skip aiocb copyin.
 941          */
 942         if (!blocking && (aiop->aio_pollq == NULL) &&
 943             (aiop->aio_doneq == NULL)) {
 944                 return (EAGAIN);
 945         }
 946 
 947         if (model == DATAMODEL_NATIVE)
 948                 ssize = (sizeof (aiocb_t *) * nent);
 949 #ifdef  _SYSCALL32_IMPL
 950         else
 951                 ssize = (sizeof (caddr32_t) * nent);
 952 #endif  /* _SYSCALL32_IMPL */
 953 
 954         cbplist = kmem_alloc(ssize, KM_NOSLEEP);
 955         if (cbplist == NULL)
 956                 return (ENOMEM);
 957 
 958         if (copyin(aiocb, cbplist, ssize)) {
 959                 error = EFAULT;
 960                 goto done;
 961         }
 962 
 963         found = NULL;
 964         /*
 965          * we need to get the aio_cleanupq_mutex since we call
 966          * aio_req_done().
 967          */
 968         mutex_enter(&aiop->aio_cleanupq_mutex);
 969         mutex_enter(&aiop->aio_mutex);
 970         for (;;) {
 971                 /* push requests on poll queue to done queue */
 972                 if (aiop->aio_pollq) {
 973                         mutex_exit(&aiop->aio_mutex);
 974                         mutex_exit(&aiop->aio_cleanupq_mutex);
 975                         aio_cleanup(0);
 976                         mutex_enter(&aiop->aio_cleanupq_mutex);
 977                         mutex_enter(&aiop->aio_mutex);
 978                 }
 979                 /* check for requests on done queue */
 980                 if (aiop->aio_doneq) {
 981                         if (model == DATAMODEL_NATIVE)
 982                                 ucbp = (aiocb_t **)cbplist;
 983 #ifdef  _SYSCALL32_IMPL
 984                         else
 985                                 ucbp32 = (caddr32_t *)cbplist;
 986 #endif  /* _SYSCALL32_IMPL */
 987                         for (i = 0; i < nent; i++) {
 988                                 if (model == DATAMODEL_NATIVE) {
 989                                         if ((cbp = *ucbp++) == NULL)
 990                                                 continue;
 991                                         if (run_mode != AIO_LARGEFILE)
 992                                                 reqp = aio_req_done(
 993                                                     &cbp->aio_resultp);
 994                                         else {
 995                                                 cbp64 = (aiocb64_32_t *)cbp;
 996                                                 reqp = aio_req_done(
 997                                                     &cbp64->aio_resultp);
 998                                         }
 999                                 }
1000 #ifdef  _SYSCALL32_IMPL
1001                                 else {
1002                                         if (run_mode == AIO_32) {
1003                                                 if ((cbp32 =
1004                                                     (aiocb32_t *)(uintptr_t)
1005                                                     *ucbp32++) == NULL)
1006                                                         continue;
1007                                                 reqp = aio_req_done(
1008                                                     &cbp32->aio_resultp);
1009                                         } else if (run_mode == AIO_LARGEFILE) {
1010                                                 if ((cbp64 =
1011                                                     (aiocb64_32_t *)(uintptr_t)
1012                                                     *ucbp32++) == NULL)
1013                                                         continue;
1014                                                 reqp = aio_req_done(
1015                                                     &cbp64->aio_resultp);
1016                                         }
1017 
1018                                 }
1019 #endif  /* _SYSCALL32_IMPL */
1020                                 if (reqp) {
1021                                         reqp->aio_req_next = found;
1022                                         found = reqp;
1023                                 }
1024                                 if (aiop->aio_doneq == NULL)
1025                                         break;
1026                         }
1027                         if (found)
1028                                 break;
1029                 }
1030                 if (aiop->aio_notifycnt > 0) {
1031                         /*
1032                          * nothing on the kernel's queue. the user
1033                          * has notified the kernel that it has items
1034                          * on a user-level queue.
1035                          */
1036                         aiop->aio_notifycnt--;
1037                         *rval = 1;
1038                         error = 0;
1039                         break;
1040                 }
1041                 /* don't block if nothing is outstanding */
1042                 if (aiop->aio_outstanding == 0) {
1043                         error = EAGAIN;
1044                         break;
1045                 }
1046                 if (blocking) {
1047                         /*
1048                          * drop the aio_cleanupq_mutex as we are
1049                          * going to block.
1050                          */
1051                         mutex_exit(&aiop->aio_cleanupq_mutex);
1052                         rv = cv_waituntil_sig(&aiop->aio_waitcv,
1053                             &aiop->aio_mutex, rqtp, timecheck);
1054                         /*
1055                          * we have to drop aio_mutex and
1056                          * grab it in the right order.
1057                          */
1058                         mutex_exit(&aiop->aio_mutex);
1059                         mutex_enter(&aiop->aio_cleanupq_mutex);
1060                         mutex_enter(&aiop->aio_mutex);
1061                         if (rv > 0)  /* check done queue again */
1062                                 continue;
1063                         if (rv == 0)    /* interrupted by a signal */
1064                                 error = EINTR;
1065                         else            /* timer expired */
1066                                 error = ETIME;
1067                 } else {
1068                         error = EAGAIN;
1069                 }
1070                 break;
1071         }
1072         mutex_exit(&aiop->aio_mutex);
1073         mutex_exit(&aiop->aio_cleanupq_mutex);
1074         for (reqp = found; reqp != NULL; reqp = next) {
1075                 next = reqp->aio_req_next;
1076                 aphysio_unlock(reqp);
1077                 aio_copyout_result(reqp);
1078                 mutex_enter(&aiop->aio_mutex);
1079                 aio_req_free(aiop, reqp);
1080                 mutex_exit(&aiop->aio_mutex);
1081         }
1082 done:
1083         kmem_free(cbplist, ssize);
1084         return (error);
1085 }
1086 
1087 /*
1088  * initialize aio by allocating an aio_t struct for this
1089  * process.
1090  */
1091 static int
1092 aioinit(void)
1093 {
1094         proc_t *p = curproc;
1095         aio_t *aiop;
1096         mutex_enter(&p->p_lock);
1097         if ((aiop = p->p_aio) == NULL) {
1098                 aiop = aio_aiop_alloc();
1099                 p->p_aio = aiop;
1100         }
1101         mutex_exit(&p->p_lock);
1102         if (aiop == NULL)
1103                 return (ENOMEM);
1104         return (0);
1105 }
1106 
1107 /*
1108  * start a special thread that will cleanup after aio requests
1109  * that are preventing a segment from being unmapped. as_unmap()
1110  * blocks until all phsyio to this segment is completed. this
1111  * doesn't happen until all the pages in this segment are not
1112  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
1113  * requests still outstanding. this special thread will make sure
1114  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
1115  *
1116  * this function will return an error if the process has only
1117  * one LWP. the assumption is that the caller is a separate LWP
1118  * that remains blocked in the kernel for the life of this process.
1119  */
1120 static int
1121 aiostart(void)
1122 {
1123         proc_t *p = curproc;
1124         aio_t *aiop;
1125         int first, error = 0;
1126 
1127         if (p->p_lwpcnt == 1)
1128                 return (EDEADLK);
1129         mutex_enter(&p->p_lock);
1130         if ((aiop = p->p_aio) == NULL)
1131                 error = EINVAL;
1132         else {
1133                 first = aiop->aio_ok;
1134                 if (aiop->aio_ok == 0)
1135                         aiop->aio_ok = 1;
1136         }
1137         mutex_exit(&p->p_lock);
1138         if (error == 0 && first == 0) {
1139                 return (aio_cleanup_thread(aiop));
1140                 /* should return only to exit */
1141         }
1142         return (error);
1143 }
1144 
1145 /*
1146  * Associate an aiocb with a port.
1147  * This function is used by aiorw() to associate a transaction with a port.
1148  * Allocate an event port structure (port_alloc_event()) and store the
1149  * delivered user pointer (portnfy_user) in the portkev_user field of the
1150  * port_kevent_t structure..
1151  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
1152  * the port association.
1153  */
1154 
1155 static int
1156 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
1157         aio_req_t *reqp, int event)
1158 {
1159         port_kevent_t   *pkevp = NULL;
1160         int             error;
1161 
1162         error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
1163             PORT_SOURCE_AIO, &pkevp);
1164         if (error) {
1165                 if ((error == ENOMEM) || (error == EAGAIN))
1166                         error = EAGAIN;
1167                 else
1168                         error = EINVAL;
1169         } else {
1170                 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
1171                     aio_port_callback, reqp);
1172                 pkevp->portkev_events = event;
1173                 reqp->aio_req_portkev = pkevp;
1174                 reqp->aio_req_port = pntfy->portnfy_port;
1175         }
1176         return (error);
1177 }
1178 
1179 #ifdef _LP64
1180 
1181 /*
1182  * Asynchronous list IO. A chain of aiocb's are copied in
1183  * one at a time. If the aiocb is invalid, it is skipped.
1184  * For each aiocb, the appropriate driver entry point is
1185  * called. Optimize for the common case where the list
1186  * of requests is to the same file descriptor.
1187  *
1188  * One possible optimization is to define a new driver entry
1189  * point that supports a list of IO requests. Whether this
1190  * improves performance depends somewhat on the driver's
1191  * locking strategy. Processing a list could adversely impact
1192  * the driver's interrupt latency.
1193  */
1194 static int
1195 alio(
1196         int             mode_arg,
1197         aiocb_t         **aiocb_arg,
1198         int             nent,
1199         struct sigevent *sigev)
1200 {
1201         file_t          *fp;
1202         file_t          *prev_fp = NULL;
1203         int             prev_mode = -1;
1204         struct vnode    *vp;
1205         aio_lio_t       *head;
1206         aio_req_t       *reqp;
1207         aio_t           *aiop;
1208         caddr_t         cbplist;
1209         aiocb_t         cb;
1210         aiocb_t         *aiocb = &cb;
1211         aiocb_t         *cbp;
1212         aiocb_t         **ucbp;
1213         struct sigevent sigevk;
1214         sigqueue_t      *sqp;
1215         int             (*aio_func)();
1216         int             mode;
1217         int             error = 0;
1218         int             aio_errors = 0;
1219         int             i;
1220         size_t          ssize;
1221         int             deadhead = 0;
1222         int             aio_notsupported = 0;
1223         int             lio_head_port;
1224         int             aio_port;
1225         int             aio_thread;
1226         port_kevent_t   *pkevtp = NULL;
1227         int             portused = 0;
1228         port_notify_t   pnotify;
1229         int             event;
1230 
1231         aiop = curproc->p_aio;
1232         if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1233                 return (EINVAL);
1234 
1235         ssize = (sizeof (aiocb_t *) * nent);
1236         cbplist = kmem_alloc(ssize, KM_SLEEP);
1237         ucbp = (aiocb_t **)cbplist;
1238 
1239         if (copyin(aiocb_arg, cbplist, ssize) ||
1240             (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
1241                 kmem_free(cbplist, ssize);
1242                 return (EFAULT);
1243         }
1244 
1245         /* Event Ports  */
1246         if (sigev &&
1247             (sigevk.sigev_notify == SIGEV_THREAD ||
1248             sigevk.sigev_notify == SIGEV_PORT)) {
1249                 if (sigevk.sigev_notify == SIGEV_THREAD) {
1250                         pnotify.portnfy_port = sigevk.sigev_signo;
1251                         pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
1252                 } else if (copyin(sigevk.sigev_value.sival_ptr,
1253                     &pnotify, sizeof (pnotify))) {
1254                         kmem_free(cbplist, ssize);
1255                         return (EFAULT);
1256                 }
1257                 error = port_alloc_event(pnotify.portnfy_port,
1258                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
1259                 if (error) {
1260                         if (error == ENOMEM || error == EAGAIN)
1261                                 error = EAGAIN;
1262                         else
1263                                 error = EINVAL;
1264                         kmem_free(cbplist, ssize);
1265                         return (error);
1266                 }
1267                 lio_head_port = pnotify.portnfy_port;
1268                 portused = 1;
1269         }
1270 
1271         /*
1272          * a list head should be allocated if notification is
1273          * enabled for this list.
1274          */
1275         head = NULL;
1276 
1277         if (mode_arg == LIO_WAIT || sigev) {
1278                 mutex_enter(&aiop->aio_mutex);
1279                 error = aio_lio_alloc(&head);
1280                 mutex_exit(&aiop->aio_mutex);
1281                 if (error)
1282                         goto done;
1283                 deadhead = 1;
1284                 head->lio_nent = nent;
1285                 head->lio_refcnt = nent;
1286                 head->lio_port = -1;
1287                 head->lio_portkev = NULL;
1288                 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
1289                     sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
1290                         sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
1291                         if (sqp == NULL) {
1292                                 error = EAGAIN;
1293                                 goto done;
1294                         }
1295                         sqp->sq_func = NULL;
1296                         sqp->sq_next = NULL;
1297                         sqp->sq_info.si_code = SI_ASYNCIO;
1298                         sqp->sq_info.si_pid = curproc->p_pid;
1299                         sqp->sq_info.si_ctid = PRCTID(curproc);
1300                         sqp->sq_info.si_zoneid = getzoneid();
1301                         sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
1302                         sqp->sq_info.si_signo = sigevk.sigev_signo;
1303                         sqp->sq_info.si_value = sigevk.sigev_value;
1304                         head->lio_sigqp = sqp;
1305                 } else {
1306                         head->lio_sigqp = NULL;
1307                 }
1308                 if (pkevtp) {
1309                         /*
1310                          * Prepare data to send when list of aiocb's
1311                          * has completed.
1312                          */
1313                         port_init_event(pkevtp, (uintptr_t)sigev,
1314                             (void *)(uintptr_t)pnotify.portnfy_user,
1315                             NULL, head);
1316                         pkevtp->portkev_events = AIOLIO;
1317                         head->lio_portkev = pkevtp;
1318                         head->lio_port = pnotify.portnfy_port;
1319                 }
1320         }
1321 
1322         for (i = 0; i < nent; i++, ucbp++) {
1323 
1324                 cbp = *ucbp;
1325                 /* skip entry if it can't be copied. */
1326                 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
1327                         if (head) {
1328                                 mutex_enter(&aiop->aio_mutex);
1329                                 head->lio_nent--;
1330                                 head->lio_refcnt--;
1331                                 mutex_exit(&aiop->aio_mutex);
1332                         }
1333                         continue;
1334                 }
1335 
1336                 /* skip if opcode for aiocb is LIO_NOP */
1337                 mode = aiocb->aio_lio_opcode;
1338                 if (mode == LIO_NOP) {
1339                         cbp = NULL;
1340                         if (head) {
1341                                 mutex_enter(&aiop->aio_mutex);
1342                                 head->lio_nent--;
1343                                 head->lio_refcnt--;
1344                                 mutex_exit(&aiop->aio_mutex);
1345                         }
1346                         continue;
1347                 }
1348 
1349                 /* increment file descriptor's ref count. */
1350                 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
1351                         lio_set_uerror(&cbp->aio_resultp, EBADF);
1352                         if (head) {
1353                                 mutex_enter(&aiop->aio_mutex);
1354                                 head->lio_nent--;
1355                                 head->lio_refcnt--;
1356                                 mutex_exit(&aiop->aio_mutex);
1357                         }
1358                         aio_errors++;
1359                         continue;
1360                 }
1361 
1362                 /*
1363                  * check the permission of the partition
1364                  */
1365                 if ((fp->f_flag & mode) == 0) {
1366                         releasef(aiocb->aio_fildes);
1367                         lio_set_uerror(&cbp->aio_resultp, EBADF);
1368                         if (head) {
1369                                 mutex_enter(&aiop->aio_mutex);
1370                                 head->lio_nent--;
1371                                 head->lio_refcnt--;
1372                                 mutex_exit(&aiop->aio_mutex);
1373                         }
1374                         aio_errors++;
1375                         continue;
1376                 }
1377 
1378                 /*
1379                  * common case where requests are to the same fd
1380                  * for the same r/w operation.
1381                  * for UFS, need to set EBADFD
1382                  */
1383                 vp = fp->f_vnode;
1384                 if (fp != prev_fp || mode != prev_mode) {
1385                         aio_func = check_vp(vp, mode);
1386                         if (aio_func == NULL) {
1387                                 prev_fp = NULL;
1388                                 releasef(aiocb->aio_fildes);
1389                                 lio_set_uerror(&cbp->aio_resultp, EBADFD);
1390                                 aio_notsupported++;
1391                                 if (head) {
1392                                         mutex_enter(&aiop->aio_mutex);
1393                                         head->lio_nent--;
1394                                         head->lio_refcnt--;
1395                                         mutex_exit(&aiop->aio_mutex);
1396                                 }
1397                                 continue;
1398                         } else {
1399                                 prev_fp = fp;
1400                                 prev_mode = mode;
1401                         }
1402                 }
1403 
1404                 error = aio_req_setup(&reqp, aiop, aiocb,
1405                     &cbp->aio_resultp, vp, 0);
1406                 if (error) {
1407                         releasef(aiocb->aio_fildes);
1408                         lio_set_uerror(&cbp->aio_resultp, error);
1409                         if (head) {
1410                                 mutex_enter(&aiop->aio_mutex);
1411                                 head->lio_nent--;
1412                                 head->lio_refcnt--;
1413                                 mutex_exit(&aiop->aio_mutex);
1414                         }
1415                         aio_errors++;
1416                         continue;
1417                 }
1418 
1419                 reqp->aio_req_lio = head;
1420                 deadhead = 0;
1421 
1422                 /*
1423                  * Set the errno field now before sending the request to
1424                  * the driver to avoid a race condition
1425                  */
1426                 (void) suword32(&cbp->aio_resultp.aio_errno,
1427                     EINPROGRESS);
1428 
1429                 reqp->aio_req_iocb.iocb = (caddr_t)cbp;
1430 
1431                 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
1432                 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
1433                 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
1434                 if (aio_port | aio_thread) {
1435                         port_kevent_t *lpkevp;
1436                         /*
1437                          * Prepare data to send with each aiocb completed.
1438                          */
1439                         if (aio_port) {
1440                                 void *paddr =
1441                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
1442                                 if (copyin(paddr, &pnotify, sizeof (pnotify)))
1443                                         error = EFAULT;
1444                         } else {        /* aio_thread */
1445                                 pnotify.portnfy_port =
1446                                     aiocb->aio_sigevent.sigev_signo;
1447                                 pnotify.portnfy_user =
1448                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
1449                         }
1450                         if (error)
1451                                 /* EMPTY */;
1452                         else if (pkevtp != NULL &&
1453                             pnotify.portnfy_port == lio_head_port)
1454                                 error = port_dup_event(pkevtp, &lpkevp,
1455                                     PORT_ALLOC_DEFAULT);
1456                         else
1457                                 error = port_alloc_event(pnotify.portnfy_port,
1458                                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
1459                                     &lpkevp);
1460                         if (error == 0) {
1461                                 port_init_event(lpkevp, (uintptr_t)cbp,
1462                                     (void *)(uintptr_t)pnotify.portnfy_user,
1463                                     aio_port_callback, reqp);
1464                                 lpkevp->portkev_events = event;
1465                                 reqp->aio_req_portkev = lpkevp;
1466                                 reqp->aio_req_port = pnotify.portnfy_port;
1467                         }
1468                 }
1469 
1470                 /*
1471                  * send the request to driver.
1472                  */
1473                 if (error == 0) {
1474                         if (aiocb->aio_nbytes == 0) {
1475                                 clear_active_fd(aiocb->aio_fildes);
1476                                 aio_zerolen(reqp);
1477                                 continue;
1478                         }
1479                         error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
1480                             CRED());
1481                 }
1482 
1483                 /*
1484                  * the fd's ref count is not decremented until the IO has
1485                  * completed unless there was an error.
1486                  */
1487                 if (error) {
1488                         releasef(aiocb->aio_fildes);
1489                         lio_set_uerror(&cbp->aio_resultp, error);
1490                         if (head) {
1491                                 mutex_enter(&aiop->aio_mutex);
1492                                 head->lio_nent--;
1493                                 head->lio_refcnt--;
1494                                 mutex_exit(&aiop->aio_mutex);
1495                         }
1496                         if (error == ENOTSUP)
1497                                 aio_notsupported++;
1498                         else
1499                                 aio_errors++;
1500                         lio_set_error(reqp, portused);
1501                 } else {
1502                         clear_active_fd(aiocb->aio_fildes);
1503                 }
1504         }
1505 
1506         if (aio_notsupported) {
1507                 error = ENOTSUP;
1508         } else if (aio_errors) {
1509                 /*
1510                  * return EIO if any request failed
1511                  */
1512                 error = EIO;
1513         }
1514 
1515         if (mode_arg == LIO_WAIT) {
1516                 mutex_enter(&aiop->aio_mutex);
1517                 while (head->lio_refcnt > 0) {
1518                         if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1519                                 mutex_exit(&aiop->aio_mutex);
1520                                 error = EINTR;
1521                                 goto done;
1522                         }
1523                 }
1524                 mutex_exit(&aiop->aio_mutex);
1525                 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
1526         }
1527 
1528 done:
1529         kmem_free(cbplist, ssize);
1530         if (deadhead) {
1531                 if (head->lio_sigqp)
1532                         kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
1533                 if (head->lio_portkev)
1534                         port_free_event(head->lio_portkev);
1535                 kmem_free(head, sizeof (aio_lio_t));
1536         }
1537         return (error);
1538 }
1539 
1540 #endif /* _LP64 */
1541 
1542 /*
1543  * Asynchronous list IO.
1544  * If list I/O is called with LIO_WAIT it can still return
1545  * before all the I/O's are completed if a signal is caught
1546  * or if the list include UFS I/O requests. If this happens,
1547  * libaio will call aliowait() to wait for the I/O's to
1548  * complete
1549  */
1550 /*ARGSUSED*/
1551 static int
1552 aliowait(
1553         int     mode,
1554         void    *aiocb,
1555         int     nent,
1556         void    *sigev,
1557         int     run_mode)
1558 {
1559         aio_lio_t       *head;
1560         aio_t           *aiop;
1561         caddr_t         cbplist;
1562         aiocb_t         *cbp, **ucbp;
1563 #ifdef  _SYSCALL32_IMPL
1564         aiocb32_t       *cbp32;
1565         caddr32_t       *ucbp32;
1566         aiocb64_32_t    *cbp64;
1567 #endif
1568         int             error = 0;
1569         int             i;
1570         size_t          ssize = 0;
1571         model_t         model = get_udatamodel();
1572 
1573         aiop = curproc->p_aio;
1574         if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1575                 return (EINVAL);
1576 
1577         if (model == DATAMODEL_NATIVE)
1578                 ssize = (sizeof (aiocb_t *) * nent);
1579 #ifdef  _SYSCALL32_IMPL
1580         else
1581                 ssize = (sizeof (caddr32_t) * nent);
1582 #endif  /* _SYSCALL32_IMPL */
1583 
1584         if (ssize == 0)
1585                 return (EINVAL);
1586 
1587         cbplist = kmem_alloc(ssize, KM_SLEEP);
1588 
1589         if (model == DATAMODEL_NATIVE)
1590                 ucbp = (aiocb_t **)cbplist;
1591 #ifdef  _SYSCALL32_IMPL
1592         else
1593                 ucbp32 = (caddr32_t *)cbplist;
1594 #endif  /* _SYSCALL32_IMPL */
1595 
1596         if (copyin(aiocb, cbplist, ssize)) {
1597                 error = EFAULT;
1598                 goto done;
1599         }
1600 
1601         /*
1602          * To find the list head, we go through the
1603          * list of aiocb structs, find the request
1604          * its for, then get the list head that reqp
1605          * points to
1606          */
1607         head = NULL;
1608 
1609         for (i = 0; i < nent; i++) {
1610                 if (model == DATAMODEL_NATIVE) {
1611                         /*
1612                          * Since we are only checking for a NULL pointer
1613                          * Following should work on both native data sizes
1614                          * as well as for largefile aiocb.
1615                          */
1616                         if ((cbp = *ucbp++) == NULL)
1617                                 continue;
1618                         if (run_mode != AIO_LARGEFILE)
1619                                 if (head = aio_list_get(&cbp->aio_resultp))
1620                                         break;
1621                         else {
1622                                 /*
1623                                  * This is a case when largefile call is
1624                                  * made on 32 bit kernel.
1625                                  * Treat each pointer as pointer to
1626                                  * aiocb64_32
1627                                  */
1628                                 if (head = aio_list_get((aio_result_t *)
1629                                     &(((aiocb64_32_t *)cbp)->aio_resultp)))
1630                                         break;
1631                         }
1632                 }
1633 #ifdef  _SYSCALL32_IMPL
1634                 else {
1635                         if (run_mode == AIO_LARGEFILE) {
1636                                 if ((cbp64 = (aiocb64_32_t *)
1637                                     (uintptr_t)*ucbp32++) == NULL)
1638                                         continue;
1639                                 if (head = aio_list_get((aio_result_t *)
1640                                     &cbp64->aio_resultp))
1641                                         break;
1642                         } else if (run_mode == AIO_32) {
1643                                 if ((cbp32 = (aiocb32_t *)
1644                                     (uintptr_t)*ucbp32++) == NULL)
1645                                         continue;
1646                                 if (head = aio_list_get((aio_result_t *)
1647                                     &cbp32->aio_resultp))
1648                                         break;
1649                         }
1650                 }
1651 #endif  /* _SYSCALL32_IMPL */
1652         }
1653 
1654         if (head == NULL) {
1655                 error = EINVAL;
1656                 goto done;
1657         }
1658 
1659         mutex_enter(&aiop->aio_mutex);
1660         while (head->lio_refcnt > 0) {
1661                 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1662                         mutex_exit(&aiop->aio_mutex);
1663                         error = EINTR;
1664                         goto done;
1665                 }
1666         }
1667         mutex_exit(&aiop->aio_mutex);
1668         alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
1669 done:
1670         kmem_free(cbplist, ssize);
1671         return (error);
1672 }
1673 
1674 aio_lio_t *
1675 aio_list_get(aio_result_t *resultp)
1676 {
1677         aio_lio_t       *head = NULL;
1678         aio_t           *aiop;
1679         aio_req_t       **bucket;
1680         aio_req_t       *reqp;
1681         long            index;
1682 
1683         aiop = curproc->p_aio;
1684         if (aiop == NULL)
1685                 return (NULL);
1686 
1687         if (resultp) {
1688                 index = AIO_HASH(resultp);
1689                 bucket = &aiop->aio_hash[index];
1690                 for (reqp = *bucket; reqp != NULL;
1691                     reqp = reqp->aio_hash_next) {
1692                         if (reqp->aio_req_resultp == resultp) {
1693                                 head = reqp->aio_req_lio;
1694                                 return (head);
1695                         }
1696                 }
1697         }
1698         return (NULL);
1699 }
1700 
1701 
1702 static void
1703 lio_set_uerror(void *resultp, int error)
1704 {
1705         /*
1706          * the resultp field is a pointer to where the
1707          * error should be written out to the user's
1708          * aiocb.
1709          *
1710          */
1711         if (get_udatamodel() == DATAMODEL_NATIVE) {
1712                 (void) sulword(&((aio_result_t *)resultp)->aio_return,
1713                     (ssize_t)-1);
1714                 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1715         }
1716 #ifdef  _SYSCALL32_IMPL
1717         else {
1718                 (void) suword32(&((aio_result32_t *)resultp)->aio_return,
1719                     (uint_t)-1);
1720                 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1721         }
1722 #endif  /* _SYSCALL32_IMPL */
1723 }
1724 
1725 /*
1726  * do cleanup completion for all requests in list. memory for
1727  * each request is also freed.
1728  */
1729 static void
1730 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
1731 {
1732         int i;
1733         aio_req_t *reqp;
1734         aio_result_t *resultp;
1735         aiocb64_32_t *aiocb_64;
1736 
1737         for (i = 0; i < nent; i++) {
1738                 if (get_udatamodel() == DATAMODEL_NATIVE) {
1739                         if (cbp[i] == NULL)
1740                                 continue;
1741                         if (run_mode == AIO_LARGEFILE) {
1742                                 aiocb_64 = (aiocb64_32_t *)cbp[i];
1743                                 resultp = (aio_result_t *)
1744                                     &aiocb_64->aio_resultp;
1745                         } else
1746                                 resultp = &cbp[i]->aio_resultp;
1747                 }
1748 #ifdef  _SYSCALL32_IMPL
1749                 else {
1750                         aiocb32_t *aiocb_32;
1751                         caddr32_t *cbp32;
1752 
1753                         cbp32 = (caddr32_t *)cbp;
1754                         if (cbp32[i] == NULL)
1755                                 continue;
1756                         if (run_mode == AIO_32) {
1757                                 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
1758                                 resultp = (aio_result_t *)&aiocb_32->
1759                                     aio_resultp;
1760                         } else if (run_mode == AIO_LARGEFILE) {
1761                                 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
1762                                 resultp = (aio_result_t *)&aiocb_64->
1763                                     aio_resultp;
1764                         }
1765                 }
1766 #endif  /* _SYSCALL32_IMPL */
1767                 /*
1768                  * we need to get the aio_cleanupq_mutex since we call
1769                  * aio_req_done().
1770                  */
1771                 mutex_enter(&aiop->aio_cleanupq_mutex);
1772                 mutex_enter(&aiop->aio_mutex);
1773                 reqp = aio_req_done(resultp);
1774                 mutex_exit(&aiop->aio_mutex);
1775                 mutex_exit(&aiop->aio_cleanupq_mutex);
1776                 if (reqp != NULL) {
1777                         aphysio_unlock(reqp);
1778                         aio_copyout_result(reqp);
1779                         mutex_enter(&aiop->aio_mutex);
1780                         aio_req_free(aiop, reqp);
1781                         mutex_exit(&aiop->aio_mutex);
1782                 }
1783         }
1784 }
1785 
1786 /*
1787  * Write out the results for an aio request that is done.
1788  */
1789 static int
1790 aioerror(void *cb, int run_mode)
1791 {
1792         aio_result_t *resultp;
1793         aio_t *aiop;
1794         aio_req_t *reqp;
1795         int retval;
1796 
1797         aiop = curproc->p_aio;
1798         if (aiop == NULL || cb == NULL)
1799                 return (EINVAL);
1800 
1801         if (get_udatamodel() == DATAMODEL_NATIVE) {
1802                 if (run_mode == AIO_LARGEFILE)
1803                         resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1804                             aio_resultp;
1805                 else
1806                         resultp = &((aiocb_t *)cb)->aio_resultp;
1807         }
1808 #ifdef  _SYSCALL32_IMPL
1809         else {
1810                 if (run_mode == AIO_LARGEFILE)
1811                         resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1812                             aio_resultp;
1813                 else if (run_mode == AIO_32)
1814                         resultp = (aio_result_t *)&((aiocb32_t *)cb)->
1815                             aio_resultp;
1816         }
1817 #endif  /* _SYSCALL32_IMPL */
1818         /*
1819          * we need to get the aio_cleanupq_mutex since we call
1820          * aio_req_find().
1821          */
1822         mutex_enter(&aiop->aio_cleanupq_mutex);
1823         mutex_enter(&aiop->aio_mutex);
1824         retval = aio_req_find(resultp, &reqp);
1825         mutex_exit(&aiop->aio_mutex);
1826         mutex_exit(&aiop->aio_cleanupq_mutex);
1827         if (retval == 0) {
1828                 aphysio_unlock(reqp);
1829                 aio_copyout_result(reqp);
1830                 mutex_enter(&aiop->aio_mutex);
1831                 aio_req_free(aiop, reqp);
1832                 mutex_exit(&aiop->aio_mutex);
1833                 return (0);
1834         } else if (retval == 1)
1835                 return (EINPROGRESS);
1836         else if (retval == 2)
1837                 return (EINVAL);
1838         return (0);
1839 }
1840 
1841 /*
1842  *      aio_cancel - if no requests outstanding,
1843  *                      return AIO_ALLDONE
1844  *                      else
1845  *                      return AIO_NOTCANCELED
1846  */
1847 static int
1848 aio_cancel(
1849         int     fildes,
1850         void    *cb,
1851         long    *rval,
1852         int     run_mode)
1853 {
1854         aio_t *aiop;
1855         void *resultp;
1856         int index;
1857         aio_req_t **bucket;
1858         aio_req_t *ent;
1859 
1860 
1861         /*
1862          * Verify valid file descriptor
1863          */
1864         if ((getf(fildes)) == NULL) {
1865                 return (EBADF);
1866         }
1867         releasef(fildes);
1868 
1869         aiop = curproc->p_aio;
1870         if (aiop == NULL)
1871                 return (EINVAL);
1872 
1873         if (aiop->aio_outstanding == 0) {
1874                 *rval = AIO_ALLDONE;
1875                 return (0);
1876         }
1877 
1878         mutex_enter(&aiop->aio_mutex);
1879         if (cb != NULL) {
1880                 if (get_udatamodel() == DATAMODEL_NATIVE) {
1881                         if (run_mode == AIO_LARGEFILE)
1882                                 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1883                                     ->aio_resultp;
1884                         else
1885                                 resultp = &((aiocb_t *)cb)->aio_resultp;
1886                 }
1887 #ifdef  _SYSCALL32_IMPL
1888                 else {
1889                         if (run_mode == AIO_LARGEFILE)
1890                                 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1891                                     ->aio_resultp;
1892                         else if (run_mode == AIO_32)
1893                                 resultp = (aio_result_t *)&((aiocb32_t *)cb)
1894                                     ->aio_resultp;
1895                 }
1896 #endif  /* _SYSCALL32_IMPL */
1897                 index = AIO_HASH(resultp);
1898                 bucket = &aiop->aio_hash[index];
1899                 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1900                         if (ent->aio_req_resultp == resultp) {
1901                                 if ((ent->aio_req_flags & AIO_PENDING) == 0) {
1902                                         mutex_exit(&aiop->aio_mutex);
1903                                         *rval = AIO_ALLDONE;
1904                                         return (0);
1905                                 }
1906                                 mutex_exit(&aiop->aio_mutex);
1907                                 *rval = AIO_NOTCANCELED;
1908                                 return (0);
1909                         }
1910                 }
1911                 mutex_exit(&aiop->aio_mutex);
1912                 *rval = AIO_ALLDONE;
1913                 return (0);
1914         }
1915 
1916         for (index = 0; index < AIO_HASHSZ; index++) {
1917                 bucket = &aiop->aio_hash[index];
1918                 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1919                         if (ent->aio_req_fd == fildes) {
1920                                 if ((ent->aio_req_flags & AIO_PENDING) != 0) {
1921                                         mutex_exit(&aiop->aio_mutex);
1922                                         *rval = AIO_NOTCANCELED;
1923                                         return (0);
1924                                 }
1925                         }
1926                 }
1927         }
1928         mutex_exit(&aiop->aio_mutex);
1929         *rval = AIO_ALLDONE;
1930         return (0);
1931 }
1932 
1933 /*
1934  * solaris version of asynchronous read and write
1935  */
1936 static int
1937 arw(
1938         int     opcode,
1939         int     fdes,
1940         char    *bufp,
1941         int     bufsize,
1942         offset_t        offset,
1943         aio_result_t    *resultp,
1944         int             mode)
1945 {
1946         file_t          *fp;
1947         int             error;
1948         struct vnode    *vp;
1949         aio_req_t       *reqp;
1950         aio_t           *aiop;
1951         int             (*aio_func)();
1952 #ifdef _LP64
1953         aiocb_t         aiocb;
1954 #else
1955         aiocb64_32_t    aiocb64;
1956 #endif
1957 
1958         aiop = curproc->p_aio;
1959         if (aiop == NULL)
1960                 return (EINVAL);
1961 
1962         if ((fp = getf(fdes)) == NULL) {
1963                 return (EBADF);
1964         }
1965 
1966         /*
1967          * check the permission of the partition
1968          */
1969         if ((fp->f_flag & mode) == 0) {
1970                 releasef(fdes);
1971                 return (EBADF);
1972         }
1973 
1974         vp = fp->f_vnode;
1975         aio_func = check_vp(vp, mode);
1976         if (aio_func == NULL) {
1977                 releasef(fdes);
1978                 return (EBADFD);
1979         }
1980 #ifdef _LP64
1981         aiocb.aio_fildes = fdes;
1982         aiocb.aio_buf = bufp;
1983         aiocb.aio_nbytes = bufsize;
1984         aiocb.aio_offset = offset;
1985         aiocb.aio_sigevent.sigev_notify = 0;
1986         error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 1);
1987 #else
1988         aiocb64.aio_fildes = fdes;
1989         aiocb64.aio_buf = (caddr32_t)bufp;
1990         aiocb64.aio_nbytes = bufsize;
1991         aiocb64.aio_offset = offset;
1992         aiocb64.aio_sigevent.sigev_notify = 0;
1993         error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 1);
1994 #endif
1995         if (error) {
1996                 releasef(fdes);
1997                 return (error);
1998         }
1999 
2000         /*
2001          * enable polling on this request if the opcode has
2002          * the AIO poll bit set
2003          */
2004         if (opcode & AIO_POLL_BIT)
2005                 reqp->aio_req_flags |= AIO_POLL;
2006 
2007         if (bufsize == 0) {
2008                 clear_active_fd(fdes);
2009                 aio_zerolen(reqp);
2010                 return (0);
2011         }
2012         /*
2013          * send the request to driver.
2014          */
2015         error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2016         /*
2017          * the fd is stored in the aio_req_t by aio_req_setup(), and
2018          * is released by the aio_cleanup_thread() when the IO has
2019          * completed.
2020          */
2021         if (error) {
2022                 releasef(fdes);
2023                 mutex_enter(&aiop->aio_mutex);
2024                 aio_req_free(aiop, reqp);
2025                 aiop->aio_pending--;
2026                 if (aiop->aio_flags & AIO_REQ_BLOCK)
2027                         cv_signal(&aiop->aio_cleanupcv);
2028                 mutex_exit(&aiop->aio_mutex);
2029                 return (error);
2030         }
2031         clear_active_fd(fdes);
2032         return (0);
2033 }
2034 
2035 /*
2036  * posix version of asynchronous read and write
2037  */
2038 static int
2039 aiorw(
2040         int             opcode,
2041         void            *aiocb_arg,
2042         int             mode,
2043         int             run_mode)
2044 {
2045 #ifdef _SYSCALL32_IMPL
2046         aiocb32_t       aiocb32;
2047         struct  sigevent32 *sigev32;
2048         port_notify32_t pntfy32;
2049 #endif
2050         aiocb64_32_t    aiocb64;
2051         aiocb_t         aiocb;
2052         file_t          *fp;
2053         int             error, fd;
2054         size_t          bufsize;
2055         struct vnode    *vp;
2056         aio_req_t       *reqp;
2057         aio_t           *aiop;
2058         int             (*aio_func)();
2059         aio_result_t    *resultp;
2060         struct  sigevent *sigev;
2061         model_t         model;
2062         int             aio_use_port = 0;
2063         port_notify_t   pntfy;
2064 
2065         model = get_udatamodel();
2066         aiop = curproc->p_aio;
2067         if (aiop == NULL)
2068                 return (EINVAL);
2069 
2070         if (model == DATAMODEL_NATIVE) {
2071                 if (run_mode != AIO_LARGEFILE) {
2072                         if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
2073                                 return (EFAULT);
2074                         bufsize = aiocb.aio_nbytes;
2075                         resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
2076                         if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
2077                                 return (EBADF);
2078                         }
2079                         sigev = &aiocb.aio_sigevent;
2080                 } else {
2081                         /*
2082                          * We come here only when we make largefile
2083                          * call on 32 bit kernel using 32 bit library.
2084                          */
2085                         if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2086                                 return (EFAULT);
2087                         bufsize = aiocb64.aio_nbytes;
2088                         resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2089                             ->aio_resultp);
2090                         if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2091                                 return (EBADF);
2092                         sigev = (struct sigevent *)&aiocb64.aio_sigevent;
2093                 }
2094 
2095                 if (sigev->sigev_notify == SIGEV_PORT) {
2096                         if (copyin((void *)sigev->sigev_value.sival_ptr,
2097                             &pntfy, sizeof (port_notify_t))) {
2098                                 releasef(fd);
2099                                 return (EFAULT);
2100                         }
2101                         aio_use_port = 1;
2102                 } else if (sigev->sigev_notify == SIGEV_THREAD) {
2103                         pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
2104                         pntfy.portnfy_user =
2105                             aiocb.aio_sigevent.sigev_value.sival_ptr;
2106                         aio_use_port = 1;
2107                 }
2108         }
2109 #ifdef  _SYSCALL32_IMPL
2110         else {
2111                 if (run_mode == AIO_32) {
2112                         /* 32 bit system call is being made on 64 bit kernel */
2113                         if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
2114                                 return (EFAULT);
2115 
2116                         bufsize = aiocb32.aio_nbytes;
2117                         aiocb_32ton(&aiocb32, &aiocb);
2118                         resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
2119                             aio_resultp);
2120                         if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
2121                                 return (EBADF);
2122                         }
2123                         sigev32 = &aiocb32.aio_sigevent;
2124                 } else if (run_mode == AIO_LARGEFILE) {
2125                         /*
2126                          * We come here only when we make largefile
2127                          * call on 64 bit kernel using 32 bit library.
2128                          */
2129                         if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2130                                 return (EFAULT);
2131                         bufsize = aiocb64.aio_nbytes;
2132                         aiocb_LFton(&aiocb64, &aiocb);
2133                         resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2134                             ->aio_resultp);
2135                         if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2136                                 return (EBADF);
2137                         sigev32 = &aiocb64.aio_sigevent;
2138                 }
2139 
2140                 if (sigev32->sigev_notify == SIGEV_PORT) {
2141                         if (copyin(
2142                             (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
2143                             &pntfy32, sizeof (port_notify32_t))) {
2144                                 releasef(fd);
2145                                 return (EFAULT);
2146                         }
2147                         pntfy.portnfy_port = pntfy32.portnfy_port;
2148                         pntfy.portnfy_user = (void *)(uintptr_t)
2149                             pntfy32.portnfy_user;
2150                         aio_use_port = 1;
2151                 } else if (sigev32->sigev_notify == SIGEV_THREAD) {
2152                         pntfy.portnfy_port = sigev32->sigev_signo;
2153                         pntfy.portnfy_user = (void *)(uintptr_t)
2154                             sigev32->sigev_value.sival_ptr;
2155                         aio_use_port = 1;
2156                 }
2157         }
2158 #endif  /* _SYSCALL32_IMPL */
2159 
2160         /*
2161          * check the permission of the partition
2162          */
2163 
2164         if ((fp->f_flag & mode) == 0) {
2165                 releasef(fd);
2166                 return (EBADF);
2167         }
2168 
2169         vp = fp->f_vnode;
2170         aio_func = check_vp(vp, mode);
2171         if (aio_func == NULL) {
2172                 releasef(fd);
2173                 return (EBADFD);
2174         }
2175         if (run_mode == AIO_LARGEFILE)
2176                 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 0);
2177         else
2178                 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 0);
2179 
2180         if (error) {
2181                 releasef(fd);
2182                 return (error);
2183         }
2184         /*
2185          * enable polling on this request if the opcode has
2186          * the AIO poll bit set
2187          */
2188         if (opcode & AIO_POLL_BIT)
2189                 reqp->aio_req_flags |= AIO_POLL;
2190 
2191         if (model == DATAMODEL_NATIVE)
2192                 reqp->aio_req_iocb.iocb = aiocb_arg;
2193 #ifdef  _SYSCALL32_IMPL
2194         else
2195                 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
2196 #endif
2197 
2198         if (aio_use_port) {
2199                 int event = (run_mode == AIO_LARGEFILE)?
2200                     ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
2201                     ((mode == FREAD)? AIOAREAD : AIOAWRITE);
2202                 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
2203         }
2204 
2205         /*
2206          * send the request to driver.
2207          */
2208         if (error == 0) {
2209                 if (bufsize == 0) {
2210                         clear_active_fd(fd);
2211                         aio_zerolen(reqp);
2212                         return (0);
2213                 }
2214                 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2215         }
2216 
2217         /*
2218          * the fd is stored in the aio_req_t by aio_req_setup(), and
2219          * is released by the aio_cleanup_thread() when the IO has
2220          * completed.
2221          */
2222         if (error) {
2223                 releasef(fd);
2224                 mutex_enter(&aiop->aio_mutex);
2225                 if (aio_use_port)
2226                         aio_deq(&aiop->aio_portpending, reqp);
2227                 aio_req_free(aiop, reqp);
2228                 aiop->aio_pending--;
2229                 if (aiop->aio_flags & AIO_REQ_BLOCK)
2230                         cv_signal(&aiop->aio_cleanupcv);
2231                 mutex_exit(&aiop->aio_mutex);
2232                 return (error);
2233         }
2234         clear_active_fd(fd);
2235         return (0);
2236 }
2237 
2238 
2239 /*
2240  * set error for a list IO entry that failed.
2241  */
2242 static void
2243 lio_set_error(aio_req_t *reqp, int portused)
2244 {
2245         aio_t *aiop = curproc->p_aio;
2246 
2247         if (aiop == NULL)
2248                 return;
2249 
2250         mutex_enter(&aiop->aio_mutex);
2251         if (portused)
2252                 aio_deq(&aiop->aio_portpending, reqp);
2253         aiop->aio_pending--;
2254         /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
2255         reqp->aio_req_flags |= AIO_PHYSIODONE;
2256         /*
2257          * Need to free the request now as its never
2258          * going to get on the done queue
2259          *
2260          * Note: aio_outstanding is decremented in
2261          *       aio_req_free()
2262          */
2263         aio_req_free(aiop, reqp);
2264         if (aiop->aio_flags & AIO_REQ_BLOCK)
2265                 cv_signal(&aiop->aio_cleanupcv);
2266         mutex_exit(&aiop->aio_mutex);
2267 }
2268 
2269 /*
2270  * check if a specified request is done, and remove it from
2271  * the done queue. otherwise remove anybody from the done queue
2272  * if NULL is specified.
2273  */
2274 static aio_req_t *
2275 aio_req_done(void *resultp)
2276 {
2277         aio_req_t **bucket;
2278         aio_req_t *ent;
2279         aio_t *aiop = curproc->p_aio;
2280         long index;
2281 
2282         ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2283         ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2284 
2285         if (resultp) {
2286                 index = AIO_HASH(resultp);
2287                 bucket = &aiop->aio_hash[index];
2288                 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2289                         if (ent->aio_req_resultp == (aio_result_t *)resultp) {
2290                                 if (ent->aio_req_flags & AIO_DONEQ) {
2291                                         return (aio_req_remove(ent));
2292                                 }
2293                                 return (NULL);
2294                         }
2295                 }
2296                 /* no match, resultp is invalid */
2297                 return (NULL);
2298         }
2299         return (aio_req_remove(NULL));
2300 }
2301 
2302 /*
2303  * determine if a user-level resultp pointer is associated with an
2304  * active IO request. Zero is returned when the request is done,
2305  * and the request is removed from the done queue. Only when the
2306  * return value is zero, is the "reqp" pointer valid. One is returned
2307  * when the request is inprogress. Two is returned when the request
2308  * is invalid.
2309  */
2310 static int
2311 aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
2312 {
2313         aio_req_t **bucket;
2314         aio_req_t *ent;
2315         aio_t *aiop = curproc->p_aio;
2316         long index;
2317 
2318         ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2319         ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2320 
2321         index = AIO_HASH(resultp);
2322         bucket = &aiop->aio_hash[index];
2323         for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2324                 if (ent->aio_req_resultp == resultp) {
2325                         if (ent->aio_req_flags & AIO_DONEQ) {
2326                                 *reqp = aio_req_remove(ent);
2327                                 return (0);
2328                         }
2329                         return (1);
2330                 }
2331         }
2332         /* no match, resultp is invalid */
2333         return (2);
2334 }
2335 
2336 /*
2337  * remove a request from the done queue.
2338  */
2339 static aio_req_t *
2340 aio_req_remove(aio_req_t *reqp)
2341 {
2342         aio_t *aiop = curproc->p_aio;
2343 
2344         ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2345 
2346         if (reqp != NULL) {
2347                 ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2348                 if (reqp->aio_req_next == reqp) {
2349                         /* only one request on queue */
2350                         if (reqp ==  aiop->aio_doneq) {
2351                                 aiop->aio_doneq = NULL;
2352                         } else {
2353                                 ASSERT(reqp == aiop->aio_cleanupq);
2354                                 aiop->aio_cleanupq = NULL;
2355                         }
2356                 } else {
2357                         reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2358                         reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2359                         /*
2360                          * The request can be either on the aio_doneq or the
2361                          * aio_cleanupq
2362                          */
2363                         if (reqp == aiop->aio_doneq)
2364                                 aiop->aio_doneq = reqp->aio_req_next;
2365 
2366                         if (reqp == aiop->aio_cleanupq)
2367                                 aiop->aio_cleanupq = reqp->aio_req_next;
2368                 }
2369                 reqp->aio_req_flags &= ~AIO_DONEQ;
2370                 reqp->aio_req_next = NULL;
2371                 reqp->aio_req_prev = NULL;
2372         } else if ((reqp = aiop->aio_doneq) != NULL) {
2373                 ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2374                 if (reqp == reqp->aio_req_next) {
2375                         /* only one request on queue */
2376                         aiop->aio_doneq = NULL;
2377                 } else {
2378                         reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2379                         reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2380                         aiop->aio_doneq = reqp->aio_req_next;
2381                 }
2382                 reqp->aio_req_flags &= ~AIO_DONEQ;
2383                 reqp->aio_req_next = NULL;
2384                 reqp->aio_req_prev = NULL;
2385         }
2386         if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
2387                 cv_broadcast(&aiop->aio_waitcv);
2388         return (reqp);
2389 }
2390 
2391 static int
2392 aio_req_setup(
2393         aio_req_t       **reqpp,
2394         aio_t           *aiop,
2395         aiocb_t         *arg,
2396         aio_result_t    *resultp,
2397         vnode_t         *vp,
2398         int             old_solaris_req)
2399 {
2400         sigqueue_t      *sqp = NULL;
2401         aio_req_t       *reqp;
2402         struct uio      *uio;
2403         struct sigevent *sigev;
2404         int             error;
2405 
2406         sigev = &arg->aio_sigevent;
2407         if (sigev->sigev_notify == SIGEV_SIGNAL &&
2408             sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
2409                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2410                 if (sqp == NULL)
2411                         return (EAGAIN);
2412                 sqp->sq_func = NULL;
2413                 sqp->sq_next = NULL;
2414                 sqp->sq_info.si_code = SI_ASYNCIO;
2415                 sqp->sq_info.si_pid = curproc->p_pid;
2416                 sqp->sq_info.si_ctid = PRCTID(curproc);
2417                 sqp->sq_info.si_zoneid = getzoneid();
2418                 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
2419                 sqp->sq_info.si_signo = sigev->sigev_signo;
2420                 sqp->sq_info.si_value = sigev->sigev_value;
2421         }
2422 
2423         mutex_enter(&aiop->aio_mutex);
2424 
2425         if (aiop->aio_flags & AIO_REQ_BLOCK) {
2426                 mutex_exit(&aiop->aio_mutex);
2427                 if (sqp)
2428                         kmem_free(sqp, sizeof (sigqueue_t));
2429                 return (EIO);
2430         }
2431         /*
2432          * get an aio_reqp from the free list or allocate one
2433          * from dynamic memory.
2434          */
2435         if (error = aio_req_alloc(&reqp, resultp)) {
2436                 mutex_exit(&aiop->aio_mutex);
2437                 if (sqp)
2438                         kmem_free(sqp, sizeof (sigqueue_t));
2439                 return (error);
2440         }
2441         aiop->aio_pending++;
2442         aiop->aio_outstanding++;
2443         reqp->aio_req_flags = AIO_PENDING;
2444         if (old_solaris_req) {
2445                 /* this is an old solaris aio request */
2446                 reqp->aio_req_flags |= AIO_SOLARIS;
2447                 aiop->aio_flags |= AIO_SOLARIS_REQ;
2448         }
2449         if (sigev->sigev_notify == SIGEV_THREAD ||
2450             sigev->sigev_notify == SIGEV_PORT)
2451                 aio_enq(&aiop->aio_portpending, reqp, 0);
2452         mutex_exit(&aiop->aio_mutex);
2453         /*
2454          * initialize aio request.
2455          */
2456         reqp->aio_req_fd = arg->aio_fildes;
2457         reqp->aio_req_sigqp = sqp;
2458         reqp->aio_req_iocb.iocb = NULL;
2459         reqp->aio_req_lio = NULL;
2460         reqp->aio_req_buf.b_file = vp;
2461         uio = reqp->aio_req.aio_uio;
2462         uio->uio_iovcnt = 1;
2463         uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
2464         uio->uio_iov->iov_len = arg->aio_nbytes;
2465         uio->uio_loffset = arg->aio_offset;
2466         *reqpp = reqp;
2467         return (0);
2468 }
2469 
2470 /*
2471  * Allocate p_aio struct.
2472  */
2473 static aio_t *
2474 aio_aiop_alloc(void)
2475 {
2476         aio_t   *aiop;
2477 
2478         ASSERT(MUTEX_HELD(&curproc->p_lock));
2479 
2480         aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
2481         if (aiop) {
2482                 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
2483                 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
2484                     NULL);
2485                 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
2486         }
2487         return (aiop);
2488 }
2489 
2490 /*
2491  * Allocate an aio_req struct.
2492  */
2493 static int
2494 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
2495 {
2496         aio_req_t *reqp;
2497         aio_t *aiop = curproc->p_aio;
2498 
2499         ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2500 
2501         if ((reqp = aiop->aio_free) != NULL) {
2502                 aiop->aio_free = reqp->aio_req_next;
2503                 bzero(reqp, sizeof (*reqp));
2504         } else {
2505                 /*
2506                  * Check whether memory is getting tight.
2507                  * This is a temporary mechanism to avoid memory
2508                  * exhaustion by a single process until we come up
2509                  * with a per process solution such as setrlimit().
2510                  */
2511                 if (freemem < desfree)
2512                         return (EAGAIN);
2513                 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
2514                 if (reqp == NULL)
2515                         return (EAGAIN);
2516         }
2517         reqp->aio_req.aio_uio = &reqp->aio_req_uio;
2518         reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
2519         reqp->aio_req.aio_private = reqp;
2520         reqp->aio_req_buf.b_offset = -1;
2521         reqp->aio_req_resultp = resultp;
2522         if (aio_hash_insert(reqp, aiop)) {
2523                 reqp->aio_req_next = aiop->aio_free;
2524                 aiop->aio_free = reqp;
2525                 return (EBUSY);
2526         }
2527         *nreqp = reqp;
2528         return (0);
2529 }
2530 
2531 /*
2532  * Allocate an aio_lio_t struct.
2533  */
2534 static int
2535 aio_lio_alloc(aio_lio_t **head)
2536 {
2537         aio_lio_t *liop;
2538         aio_t *aiop = curproc->p_aio;
2539 
2540         ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2541 
2542         if ((liop = aiop->aio_lio_free) != NULL) {
2543                 aiop->aio_lio_free = liop->lio_next;
2544         } else {
2545                 /*
2546                  * Check whether memory is getting tight.
2547                  * This is a temporary mechanism to avoid memory
2548                  * exhaustion by a single process until we come up
2549                  * with a per process solution such as setrlimit().
2550                  */
2551                 if (freemem < desfree)
2552                         return (EAGAIN);
2553 
2554                 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
2555                 if (liop == NULL)
2556                         return (EAGAIN);
2557         }
2558         *head = liop;
2559         return (0);
2560 }
2561 
2562 /*
2563  * this is a special per-process thread that is only activated if
2564  * the process is unmapping a segment with outstanding aio. normally,
2565  * the process will have completed the aio before unmapping the
2566  * segment. If the process does unmap a segment with outstanding aio,
2567  * this special thread will guarentee that the locked pages due to
2568  * aphysio() are released, thereby permitting the segment to be
2569  * unmapped. In addition to this, the cleanup thread is woken up
2570  * during DR operations to release the locked pages.
2571  */
2572 
2573 static int
2574 aio_cleanup_thread(aio_t *aiop)
2575 {
2576         proc_t *p = curproc;
2577         struct as *as = p->p_as;
2578         int poked = 0;
2579         kcondvar_t *cvp;
2580         int exit_flag = 0;
2581         int rqclnup = 0;
2582 
2583         sigfillset(&curthread->t_hold);
2584         sigdiffset(&curthread->t_hold, &cantmask);
2585         for (;;) {
2586                 /*
2587                  * if a segment is being unmapped, and the current
2588                  * process's done queue is not empty, then every request
2589                  * on the doneq with locked resources should be forced
2590                  * to release their locks. By moving the doneq request
2591                  * to the cleanupq, aio_cleanup() will process the cleanupq,
2592                  * and place requests back onto the doneq. All requests
2593                  * processed by aio_cleanup() will have their physical
2594                  * resources unlocked.
2595                  */
2596                 mutex_enter(&aiop->aio_mutex);
2597                 if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
2598                         aiop->aio_flags |= AIO_CLEANUP;
2599                         mutex_enter(&as->a_contents);
2600                         if (aiop->aio_rqclnup) {
2601                                 aiop->aio_rqclnup = 0;
2602                                 rqclnup = 1;
2603                         }
2604                         mutex_exit(&as->a_contents);
2605                         if (aiop->aio_doneq) {
2606                                 aio_req_t *doneqhead = aiop->aio_doneq;
2607                                 aiop->aio_doneq = NULL;
2608                                 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
2609                         }
2610                 }
2611                 mutex_exit(&aiop->aio_mutex);
2612                 aio_cleanup(AIO_CLEANUP_THREAD);
2613                 /*
2614                  * thread should block on the cleanupcv while
2615                  * AIO_CLEANUP is set.
2616                  */
2617                 cvp = &aiop->aio_cleanupcv;
2618                 mutex_enter(&aiop->aio_mutex);
2619 
2620                 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
2621                     aiop->aio_notifyq != NULL ||
2622                     aiop->aio_portcleanupq != NULL) {
2623                         mutex_exit(&aiop->aio_mutex);
2624                         continue;
2625                 }
2626                 mutex_enter(&as->a_contents);
2627 
2628                 /*
2629                  * AIO_CLEANUP determines when the cleanup thread
2630                  * should be active. This flag is set when
2631                  * the cleanup thread is awakened by as_unmap() or
2632                  * due to DR operations.
2633                  * The flag is cleared when the blocking as_unmap()
2634                  * that originally awakened us is allowed to
2635                  * complete. as_unmap() blocks when trying to
2636                  * unmap a segment that has SOFTLOCKed pages. when
2637                  * the segment's pages are all SOFTUNLOCKed,
2638                  * as->a_flags & AS_UNMAPWAIT should be zero.
2639                  *
2640                  * In case of cleanup request by DR, the flag is cleared
2641                  * once all the pending aio requests have been processed.
2642                  *
2643                  * The flag shouldn't be cleared right away if the
2644                  * cleanup thread was interrupted because the process
2645                  * is doing forkall(). This happens when cv_wait_sig()
2646                  * returns zero, because it was awakened by a pokelwps().
2647                  * If the process is not exiting, it must be doing forkall().
2648                  */
2649                 if ((poked == 0) &&
2650                     ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2651                     (aiop->aio_pending == 0))) {
2652                         aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
2653                         cvp = &as->a_cv;
2654                         rqclnup = 0;
2655                 }
2656                 mutex_exit(&aiop->aio_mutex);
2657                 if (poked) {
2658                         /*
2659                          * If the process is exiting/killed, don't return
2660                          * immediately without waiting for pending I/O's
2661                          * and releasing the page locks.
2662                          */
2663                         if (p->p_flag & (SEXITLWPS|SKILLED)) {
2664                                 /*
2665                                  * If exit_flag is set, then it is
2666                                  * safe to exit because we have released
2667                                  * page locks of completed I/O's.
2668                                  */
2669                                 if (exit_flag)
2670                                         break;
2671 
2672                                 mutex_exit(&as->a_contents);
2673 
2674                                 /*
2675                                  * Wait for all the pending aio to complete.
2676                                  */
2677                                 mutex_enter(&aiop->aio_mutex);
2678                                 aiop->aio_flags |= AIO_REQ_BLOCK;
2679                                 while (aiop->aio_pending != 0)
2680                                         cv_wait(&aiop->aio_cleanupcv,
2681                                             &aiop->aio_mutex);
2682                                 mutex_exit(&aiop->aio_mutex);
2683                                 exit_flag = 1;
2684                                 continue;
2685                         } else if (p->p_flag &
2686                             (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
2687                                 /*
2688                                  * hold LWP until it
2689                                  * is continued.
2690                                  */
2691                                 mutex_exit(&as->a_contents);
2692                                 mutex_enter(&p->p_lock);
2693                                 stop(PR_SUSPENDED, SUSPEND_NORMAL);
2694                                 mutex_exit(&p->p_lock);
2695                                 poked = 0;
2696                                 continue;
2697                         }
2698                 } else {
2699                         /*
2700                          * When started this thread will sleep on as->a_cv.
2701                          * as_unmap will awake this thread if the
2702                          * segment has SOFTLOCKed pages (poked = 0).
2703                          * 1. pokelwps() awakes this thread =>
2704                          *    break the loop to check SEXITLWPS, SHOLDFORK, etc
2705                          * 2. as_unmap awakes this thread =>
2706                          *    to break the loop it is necessary that
2707                          *    - AS_UNMAPWAIT is set (as_unmap is waiting for
2708                          *      memory to be unlocked)
2709                          *    - AIO_CLEANUP is not set
2710                          *      (if AIO_CLEANUP is set we have to wait for
2711                          *      pending requests. aio_done will send a signal
2712                          *      for every request which completes to continue
2713                          *      unmapping the corresponding address range)
2714                          * 3. A cleanup request will wake this thread up, ex.
2715                          *    by the DR operations. The aio_rqclnup flag will
2716                          *    be set.
2717                          */
2718                         while (poked == 0) {
2719                                 /*
2720                                  * The clean up requests that came in
2721                                  * after we had just cleaned up, couldn't
2722                                  * be causing the unmap thread to block - as
2723                                  * unmap event happened first.
2724                                  * Let aio_done() wake us up if it sees a need.
2725                                  */
2726                                 if (aiop->aio_rqclnup &&
2727                                     (aiop->aio_flags & AIO_CLEANUP) == 0)
2728                                         break;
2729                                 poked = !cv_wait_sig(cvp, &as->a_contents);
2730                                 if (AS_ISUNMAPWAIT(as) == 0)
2731                                         cv_signal(cvp);
2732                                 if (aiop->aio_outstanding != 0)
2733                                         break;
2734                         }
2735                 }
2736                 mutex_exit(&as->a_contents);
2737         }
2738 exit:
2739         mutex_exit(&as->a_contents);
2740         ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
2741         aston(curthread);       /* make thread do post_syscall */
2742         return (0);
2743 }
2744 
2745 /*
2746  * save a reference to a user's outstanding aio in a hash list.
2747  */
2748 static int
2749 aio_hash_insert(
2750         aio_req_t *aio_reqp,
2751         aio_t *aiop)
2752 {
2753         long index;
2754         aio_result_t *resultp = aio_reqp->aio_req_resultp;
2755         aio_req_t *current;
2756         aio_req_t **nextp;
2757 
2758         index = AIO_HASH(resultp);
2759         nextp = &aiop->aio_hash[index];
2760         while ((current = *nextp) != NULL) {
2761                 if (current->aio_req_resultp == resultp)
2762                         return (DUPLICATE);
2763                 nextp = &current->aio_hash_next;
2764         }
2765         *nextp = aio_reqp;
2766         aio_reqp->aio_hash_next = NULL;
2767         return (0);
2768 }
2769 
2770 static int
2771 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
2772     cred_t *)
2773 {
2774         struct snode *sp;
2775         dev_t           dev;
2776         struct cb_ops   *cb;
2777         major_t         major;
2778         int             (*aio_func)();
2779 
2780         dev = vp->v_rdev;
2781         major = getmajor(dev);
2782 
2783         /*
2784          * return NULL for requests to files and STREAMs so
2785          * that libaio takes care of them.
2786          */
2787         if (vp->v_type == VCHR) {
2788                 /* no stream device for kaio */
2789                 if (STREAMSTAB(major)) {
2790                         return (NULL);
2791                 }
2792         } else {
2793                 return (NULL);
2794         }
2795 
2796         /*
2797          * Check old drivers which do not have async I/O entry points.
2798          */
2799         if (devopsp[major]->devo_rev < 3)
2800                 return (NULL);
2801 
2802         cb = devopsp[major]->devo_cb_ops;
2803 
2804         if (cb->cb_rev < 1)
2805                 return (NULL);
2806 
2807         /*
2808          * Check whether this device is a block device.
2809          * Kaio is not supported for devices like tty.
2810          */
2811         if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
2812                 return (NULL);
2813 
2814         /*
2815          * Clustering: If vnode is a PXFS vnode, then the device may be remote.
2816          * We cannot call the driver directly. Instead return the
2817          * PXFS functions.
2818          */
2819 
2820         if (IS_PXFSVP(vp)) {
2821                 if (mode & FREAD)
2822                         return (clpxfs_aio_read);
2823                 else
2824                         return (clpxfs_aio_write);
2825         }
2826         if (mode & FREAD)
2827                 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
2828         else
2829                 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
2830 
2831         /*
2832          * Do we need this ?
2833          * nodev returns ENXIO anyway.
2834          */
2835         if (aio_func == nodev)
2836                 return (NULL);
2837 
2838         sp = VTOS(vp);
2839         smark(sp, SACC);
2840         return (aio_func);
2841 }
2842 
2843 /*
2844  * Clustering: We want check_vp to return a function prototyped
2845  * correctly that will be common to both PXFS and regular case.
2846  * We define this intermediate function that will do the right
2847  * thing for driver cases.
2848  */
2849 
2850 static int
2851 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2852 {
2853         dev_t dev;
2854         struct cb_ops   *cb;
2855 
2856         ASSERT(vp->v_type == VCHR);
2857         ASSERT(!IS_PXFSVP(vp));
2858         dev = VTOS(vp)->s_dev;
2859         ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
2860 
2861         cb = devopsp[getmajor(dev)]->devo_cb_ops;
2862 
2863         ASSERT(cb->cb_awrite != nodev);
2864         return ((*cb->cb_awrite)(dev, aio, cred_p));
2865 }
2866 
2867 /*
2868  * Clustering: We want check_vp to return a function prototyped
2869  * correctly that will be common to both PXFS and regular case.
2870  * We define this intermediate function that will do the right
2871  * thing for driver cases.
2872  */
2873 
2874 static int
2875 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2876 {
2877         dev_t dev;
2878         struct cb_ops   *cb;
2879 
2880         ASSERT(vp->v_type == VCHR);
2881         ASSERT(!IS_PXFSVP(vp));
2882         dev = VTOS(vp)->s_dev;
2883         ASSERT(!STREAMSTAB(getmajor(dev)));
2884 
2885         cb = devopsp[getmajor(dev)]->devo_cb_ops;
2886 
2887         ASSERT(cb->cb_aread != nodev);
2888         return ((*cb->cb_aread)(dev, aio, cred_p));
2889 }
2890 
2891 /*
2892  * This routine is called when a largefile call is made by a 32bit
2893  * process on a ILP32 or LP64 kernel. All 64bit processes are large
2894  * file by definition and will call alio() instead.
2895  */
2896 static int
2897 alioLF(
2898         int             mode_arg,
2899         void            *aiocb_arg,
2900         int             nent,
2901         void            *sigev)
2902 {
2903         file_t          *fp;
2904         file_t          *prev_fp = NULL;
2905         int             prev_mode = -1;
2906         struct vnode    *vp;
2907         aio_lio_t       *head;
2908         aio_req_t       *reqp;
2909         aio_t           *aiop;
2910         caddr_t         cbplist;
2911         aiocb64_32_t    cb64;
2912         aiocb64_32_t    *aiocb = &cb64;
2913         aiocb64_32_t    *cbp;
2914         caddr32_t       *ucbp;
2915 #ifdef _LP64
2916         aiocb_t         aiocb_n;
2917 #endif
2918         struct sigevent32       sigevk;
2919         sigqueue_t      *sqp;
2920         int             (*aio_func)();
2921         int             mode;
2922         int             error = 0;
2923         int             aio_errors = 0;
2924         int             i;
2925         size_t          ssize;
2926         int             deadhead = 0;
2927         int             aio_notsupported = 0;
2928         int             lio_head_port;
2929         int             aio_port;
2930         int             aio_thread;
2931         port_kevent_t   *pkevtp = NULL;
2932         int             portused = 0;
2933         port_notify32_t pnotify;
2934         int             event;
2935 
2936         aiop = curproc->p_aio;
2937         if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
2938                 return (EINVAL);
2939 
2940         ASSERT(get_udatamodel() == DATAMODEL_ILP32);
2941 
2942         ssize = (sizeof (caddr32_t) * nent);
2943         cbplist = kmem_alloc(ssize, KM_SLEEP);
2944         ucbp = (caddr32_t *)cbplist;
2945 
2946         if (copyin(aiocb_arg, cbplist, ssize) ||
2947             (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
2948                 kmem_free(cbplist, ssize);
2949                 return (EFAULT);
2950         }
2951 
2952         /* Event Ports  */
2953         if (sigev &&
2954             (sigevk.sigev_notify == SIGEV_THREAD ||
2955             sigevk.sigev_notify == SIGEV_PORT)) {
2956                 if (sigevk.sigev_notify == SIGEV_THREAD) {
2957                         pnotify.portnfy_port = sigevk.sigev_signo;
2958                         pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
2959                 } else if (copyin(
2960                     (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
2961                     &pnotify, sizeof (pnotify))) {
2962                         kmem_free(cbplist, ssize);
2963                         return (EFAULT);
2964                 }
2965                 error = port_alloc_event(pnotify.portnfy_port,
2966                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
2967                 if (error) {
2968                         if (error == ENOMEM || error == EAGAIN)
2969                                 error = EAGAIN;
2970                         else
2971                                 error = EINVAL;
2972                         kmem_free(cbplist, ssize);
2973                         return (error);
2974                 }
2975                 lio_head_port = pnotify.portnfy_port;
2976                 portused = 1;
2977         }
2978 
2979         /*
2980          * a list head should be allocated if notification is
2981          * enabled for this list.
2982          */
2983         head = NULL;
2984 
2985         if (mode_arg == LIO_WAIT || sigev) {
2986                 mutex_enter(&aiop->aio_mutex);
2987                 error = aio_lio_alloc(&head);
2988                 mutex_exit(&aiop->aio_mutex);
2989                 if (error)
2990                         goto done;
2991                 deadhead = 1;
2992                 head->lio_nent = nent;
2993                 head->lio_refcnt = nent;
2994                 head->lio_port = -1;
2995                 head->lio_portkev = NULL;
2996                 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
2997                     sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
2998                         sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2999                         if (sqp == NULL) {
3000                                 error = EAGAIN;
3001                                 goto done;
3002                         }
3003                         sqp->sq_func = NULL;
3004                         sqp->sq_next = NULL;
3005                         sqp->sq_info.si_code = SI_ASYNCIO;
3006                         sqp->sq_info.si_pid = curproc->p_pid;
3007                         sqp->sq_info.si_ctid = PRCTID(curproc);
3008                         sqp->sq_info.si_zoneid = getzoneid();
3009                         sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3010                         sqp->sq_info.si_signo = sigevk.sigev_signo;
3011                         sqp->sq_info.si_value.sival_int =
3012                             sigevk.sigev_value.sival_int;
3013                         head->lio_sigqp = sqp;
3014                 } else {
3015                         head->lio_sigqp = NULL;
3016                 }
3017                 if (pkevtp) {
3018                         /*
3019                          * Prepare data to send when list of aiocb's
3020                          * has completed.
3021                          */
3022                         port_init_event(pkevtp, (uintptr_t)sigev,
3023                             (void *)(uintptr_t)pnotify.portnfy_user,
3024                             NULL, head);
3025                         pkevtp->portkev_events = AIOLIO64;
3026                         head->lio_portkev = pkevtp;
3027                         head->lio_port = pnotify.portnfy_port;
3028                 }
3029         }
3030 
3031         for (i = 0; i < nent; i++, ucbp++) {
3032 
3033                 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
3034                 /* skip entry if it can't be copied. */
3035                 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
3036                         if (head) {
3037                                 mutex_enter(&aiop->aio_mutex);
3038                                 head->lio_nent--;
3039                                 head->lio_refcnt--;
3040                                 mutex_exit(&aiop->aio_mutex);
3041                         }
3042                         continue;
3043                 }
3044 
3045                 /* skip if opcode for aiocb is LIO_NOP */
3046                 mode = aiocb->aio_lio_opcode;
3047                 if (mode == LIO_NOP) {
3048                         cbp = NULL;
3049                         if (head) {
3050                                 mutex_enter(&aiop->aio_mutex);
3051                                 head->lio_nent--;
3052                                 head->lio_refcnt--;
3053                                 mutex_exit(&aiop->aio_mutex);
3054                         }
3055                         continue;
3056                 }
3057 
3058                 /* increment file descriptor's ref count. */
3059                 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3060                         lio_set_uerror(&cbp->aio_resultp, EBADF);
3061                         if (head) {
3062                                 mutex_enter(&aiop->aio_mutex);
3063                                 head->lio_nent--;
3064                                 head->lio_refcnt--;
3065                                 mutex_exit(&aiop->aio_mutex);
3066                         }
3067                         aio_errors++;
3068                         continue;
3069                 }
3070 
3071                 /*
3072                  * check the permission of the partition
3073                  */
3074                 if ((fp->f_flag & mode) == 0) {
3075                         releasef(aiocb->aio_fildes);
3076                         lio_set_uerror(&cbp->aio_resultp, EBADF);
3077                         if (head) {
3078                                 mutex_enter(&aiop->aio_mutex);
3079                                 head->lio_nent--;
3080                                 head->lio_refcnt--;
3081                                 mutex_exit(&aiop->aio_mutex);
3082                         }
3083                         aio_errors++;
3084                         continue;
3085                 }
3086 
3087                 /*
3088                  * common case where requests are to the same fd
3089                  * for the same r/w operation
3090                  * for UFS, need to set EBADFD
3091                  */
3092                 vp = fp->f_vnode;
3093                 if (fp != prev_fp || mode != prev_mode) {
3094                         aio_func = check_vp(vp, mode);
3095                         if (aio_func == NULL) {
3096                                 prev_fp = NULL;
3097                                 releasef(aiocb->aio_fildes);
3098                                 lio_set_uerror(&cbp->aio_resultp, EBADFD);
3099                                 aio_notsupported++;
3100                                 if (head) {
3101                                         mutex_enter(&aiop->aio_mutex);
3102                                         head->lio_nent--;
3103                                         head->lio_refcnt--;
3104                                         mutex_exit(&aiop->aio_mutex);
3105                                 }
3106                                 continue;
3107                         } else {
3108                                 prev_fp = fp;
3109                                 prev_mode = mode;
3110                         }
3111                 }
3112 
3113 #ifdef  _LP64
3114                 aiocb_LFton(aiocb, &aiocb_n);
3115                 error = aio_req_setup(&reqp, aiop, &aiocb_n,
3116                     (aio_result_t *)&cbp->aio_resultp, vp, 0);
3117 #else
3118                 error = aio_req_setupLF(&reqp, aiop, aiocb,
3119                     (aio_result_t *)&cbp->aio_resultp, vp, 0);
3120 #endif  /* _LP64 */
3121                 if (error) {
3122                         releasef(aiocb->aio_fildes);
3123                         lio_set_uerror(&cbp->aio_resultp, error);
3124                         if (head) {
3125                                 mutex_enter(&aiop->aio_mutex);
3126                                 head->lio_nent--;
3127                                 head->lio_refcnt--;
3128                                 mutex_exit(&aiop->aio_mutex);
3129                         }
3130                         aio_errors++;
3131                         continue;
3132                 }
3133 
3134                 reqp->aio_req_lio = head;
3135                 deadhead = 0;
3136 
3137                 /*
3138                  * Set the errno field now before sending the request to
3139                  * the driver to avoid a race condition
3140                  */
3141                 (void) suword32(&cbp->aio_resultp.aio_errno,
3142                     EINPROGRESS);
3143 
3144                 reqp->aio_req_iocb.iocb32 = *ucbp;
3145 
3146                 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
3147                 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3148                 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3149                 if (aio_port | aio_thread) {
3150                         port_kevent_t *lpkevp;
3151                         /*
3152                          * Prepare data to send with each aiocb completed.
3153                          */
3154                         if (aio_port) {
3155                                 void *paddr = (void *)(uintptr_t)
3156                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
3157                                 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3158                                         error = EFAULT;
3159                         } else {        /* aio_thread */
3160                                 pnotify.portnfy_port =
3161                                     aiocb->aio_sigevent.sigev_signo;
3162                                 pnotify.portnfy_user =
3163                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
3164                         }
3165                         if (error)
3166                                 /* EMPTY */;
3167                         else if (pkevtp != NULL &&
3168                             pnotify.portnfy_port == lio_head_port)
3169                                 error = port_dup_event(pkevtp, &lpkevp,
3170                                     PORT_ALLOC_DEFAULT);
3171                         else
3172                                 error = port_alloc_event(pnotify.portnfy_port,
3173                                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3174                                     &lpkevp);
3175                         if (error == 0) {
3176                                 port_init_event(lpkevp, (uintptr_t)*ucbp,
3177                                     (void *)(uintptr_t)pnotify.portnfy_user,
3178                                     aio_port_callback, reqp);
3179                                 lpkevp->portkev_events = event;
3180                                 reqp->aio_req_portkev = lpkevp;
3181                                 reqp->aio_req_port = pnotify.portnfy_port;
3182                         }
3183                 }
3184 
3185                 /*
3186                  * send the request to driver.
3187                  */
3188                 if (error == 0) {
3189                         if (aiocb->aio_nbytes == 0) {
3190                                 clear_active_fd(aiocb->aio_fildes);
3191                                 aio_zerolen(reqp);
3192                                 continue;
3193                         }
3194                         error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3195                             CRED());
3196                 }
3197 
3198                 /*
3199                  * the fd's ref count is not decremented until the IO has
3200                  * completed unless there was an error.
3201                  */
3202                 if (error) {
3203                         releasef(aiocb->aio_fildes);
3204                         lio_set_uerror(&cbp->aio_resultp, error);
3205                         if (head) {
3206                                 mutex_enter(&aiop->aio_mutex);
3207                                 head->lio_nent--;
3208                                 head->lio_refcnt--;
3209                                 mutex_exit(&aiop->aio_mutex);
3210                         }
3211                         if (error == ENOTSUP)
3212                                 aio_notsupported++;
3213                         else
3214                                 aio_errors++;
3215                         lio_set_error(reqp, portused);
3216                 } else {
3217                         clear_active_fd(aiocb->aio_fildes);
3218                 }
3219         }
3220 
3221         if (aio_notsupported) {
3222                 error = ENOTSUP;
3223         } else if (aio_errors) {
3224                 /*
3225                  * return EIO if any request failed
3226                  */
3227                 error = EIO;
3228         }
3229 
3230         if (mode_arg == LIO_WAIT) {
3231                 mutex_enter(&aiop->aio_mutex);
3232                 while (head->lio_refcnt > 0) {
3233                         if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3234                                 mutex_exit(&aiop->aio_mutex);
3235                                 error = EINTR;
3236                                 goto done;
3237                         }
3238                 }
3239                 mutex_exit(&aiop->aio_mutex);
3240                 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
3241         }
3242 
3243 done:
3244         kmem_free(cbplist, ssize);
3245         if (deadhead) {
3246                 if (head->lio_sigqp)
3247                         kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3248                 if (head->lio_portkev)
3249                         port_free_event(head->lio_portkev);
3250                 kmem_free(head, sizeof (aio_lio_t));
3251         }
3252         return (error);
3253 }
3254 
3255 #ifdef  _SYSCALL32_IMPL
3256 static void
3257 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
3258 {
3259         dest->aio_fildes = src->aio_fildes;
3260         dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
3261         dest->aio_nbytes = (size_t)src->aio_nbytes;
3262         dest->aio_offset = (off_t)src->aio_offset;
3263         dest->aio_reqprio = src->aio_reqprio;
3264         dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3265         dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3266 
3267         /*
3268          * See comment in sigqueue32() on handling of 32-bit
3269          * sigvals in a 64-bit kernel.
3270          */
3271         dest->aio_sigevent.sigev_value.sival_int =
3272             (int)src->aio_sigevent.sigev_value.sival_int;
3273         dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3274             (uintptr_t)src->aio_sigevent.sigev_notify_function;
3275         dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3276             (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3277         dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3278         dest->aio_lio_opcode = src->aio_lio_opcode;
3279         dest->aio_state = src->aio_state;
3280         dest->aio__pad[0] = src->aio__pad[0];
3281 }
3282 #endif
3283 
3284 /*
3285  * This function is used only for largefile calls made by
3286  * 32 bit applications.
3287  */
3288 static int
3289 aio_req_setupLF(
3290         aio_req_t       **reqpp,
3291         aio_t           *aiop,
3292         aiocb64_32_t    *arg,
3293         aio_result_t    *resultp,
3294         vnode_t         *vp,
3295         int             old_solaris_req)
3296 {
3297         sigqueue_t      *sqp = NULL;
3298         aio_req_t       *reqp;
3299         struct uio      *uio;
3300         struct sigevent32 *sigev;
3301         int             error;
3302 
3303         sigev = &arg->aio_sigevent;
3304         if (sigev->sigev_notify == SIGEV_SIGNAL &&
3305             sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
3306                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3307                 if (sqp == NULL)
3308                         return (EAGAIN);
3309                 sqp->sq_func = NULL;
3310                 sqp->sq_next = NULL;
3311                 sqp->sq_info.si_code = SI_ASYNCIO;
3312                 sqp->sq_info.si_pid = curproc->p_pid;
3313                 sqp->sq_info.si_ctid = PRCTID(curproc);
3314                 sqp->sq_info.si_zoneid = getzoneid();
3315                 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3316                 sqp->sq_info.si_signo = sigev->sigev_signo;
3317                 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
3318         }
3319 
3320         mutex_enter(&aiop->aio_mutex);
3321 
3322         if (aiop->aio_flags & AIO_REQ_BLOCK) {
3323                 mutex_exit(&aiop->aio_mutex);
3324                 if (sqp)
3325                         kmem_free(sqp, sizeof (sigqueue_t));
3326                 return (EIO);
3327         }
3328         /*
3329          * get an aio_reqp from the free list or allocate one
3330          * from dynamic memory.
3331          */
3332         if (error = aio_req_alloc(&reqp, resultp)) {
3333                 mutex_exit(&aiop->aio_mutex);
3334                 if (sqp)
3335                         kmem_free(sqp, sizeof (sigqueue_t));
3336                 return (error);
3337         }
3338         aiop->aio_pending++;
3339         aiop->aio_outstanding++;
3340         reqp->aio_req_flags = AIO_PENDING;
3341         if (old_solaris_req) {
3342                 /* this is an old solaris aio request */
3343                 reqp->aio_req_flags |= AIO_SOLARIS;
3344                 aiop->aio_flags |= AIO_SOLARIS_REQ;
3345         }
3346         if (sigev->sigev_notify == SIGEV_THREAD ||
3347             sigev->sigev_notify == SIGEV_PORT)
3348                 aio_enq(&aiop->aio_portpending, reqp, 0);
3349         mutex_exit(&aiop->aio_mutex);
3350         /*
3351          * initialize aio request.
3352          */
3353         reqp->aio_req_fd = arg->aio_fildes;
3354         reqp->aio_req_sigqp = sqp;
3355         reqp->aio_req_iocb.iocb = NULL;
3356         reqp->aio_req_lio = NULL;
3357         reqp->aio_req_buf.b_file = vp;
3358         uio = reqp->aio_req.aio_uio;
3359         uio->uio_iovcnt = 1;
3360         uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
3361         uio->uio_iov->iov_len = arg->aio_nbytes;
3362         uio->uio_loffset = arg->aio_offset;
3363         *reqpp = reqp;
3364         return (0);
3365 }
3366 
3367 /*
3368  * This routine is called when a non largefile call is made by a 32bit
3369  * process on a ILP32 or LP64 kernel.
3370  */
3371 static int
3372 alio32(
3373         int             mode_arg,
3374         void            *aiocb_arg,
3375         int             nent,
3376         void            *sigev)
3377 {
3378         file_t          *fp;
3379         file_t          *prev_fp = NULL;
3380         int             prev_mode = -1;
3381         struct vnode    *vp;
3382         aio_lio_t       *head;
3383         aio_req_t       *reqp;
3384         aio_t           *aiop;
3385         caddr_t         cbplist;
3386         aiocb_t         cb;
3387         aiocb_t         *aiocb = &cb;
3388 #ifdef  _LP64
3389         aiocb32_t       *cbp;
3390         caddr32_t       *ucbp;
3391         aiocb32_t       cb32;
3392         aiocb32_t       *aiocb32 = &cb32;
3393         struct sigevent32       sigevk;
3394 #else
3395         aiocb_t         *cbp, **ucbp;
3396         struct sigevent sigevk;
3397 #endif
3398         sigqueue_t      *sqp;
3399         int             (*aio_func)();
3400         int             mode;
3401         int             error = 0;
3402         int             aio_errors = 0;
3403         int             i;
3404         size_t          ssize;
3405         int             deadhead = 0;
3406         int             aio_notsupported = 0;
3407         int             lio_head_port;
3408         int             aio_port;
3409         int             aio_thread;
3410         port_kevent_t   *pkevtp = NULL;
3411         int             portused = 0;
3412 #ifdef  _LP64
3413         port_notify32_t pnotify;
3414 #else
3415         port_notify_t   pnotify;
3416 #endif
3417         int             event;
3418 
3419         aiop = curproc->p_aio;
3420         if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
3421                 return (EINVAL);
3422 
3423 #ifdef  _LP64
3424         ssize = (sizeof (caddr32_t) * nent);
3425 #else
3426         ssize = (sizeof (aiocb_t *) * nent);
3427 #endif
3428         cbplist = kmem_alloc(ssize, KM_SLEEP);
3429         ucbp = (void *)cbplist;
3430 
3431         if (copyin(aiocb_arg, cbplist, ssize) ||
3432             (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
3433                 kmem_free(cbplist, ssize);
3434                 return (EFAULT);
3435         }
3436 
3437         /* Event Ports  */
3438         if (sigev &&
3439             (sigevk.sigev_notify == SIGEV_THREAD ||
3440             sigevk.sigev_notify == SIGEV_PORT)) {
3441                 if (sigevk.sigev_notify == SIGEV_THREAD) {
3442                         pnotify.portnfy_port = sigevk.sigev_signo;
3443                         pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
3444                 } else if (copyin(
3445                     (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
3446                     &pnotify, sizeof (pnotify))) {
3447                         kmem_free(cbplist, ssize);
3448                         return (EFAULT);
3449                 }
3450                 error = port_alloc_event(pnotify.portnfy_port,
3451                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
3452                 if (error) {
3453                         if (error == ENOMEM || error == EAGAIN)
3454                                 error = EAGAIN;
3455                         else
3456                                 error = EINVAL;
3457                         kmem_free(cbplist, ssize);
3458                         return (error);
3459                 }
3460                 lio_head_port = pnotify.portnfy_port;
3461                 portused = 1;
3462         }
3463 
3464         /*
3465          * a list head should be allocated if notification is
3466          * enabled for this list.
3467          */
3468         head = NULL;
3469 
3470         if (mode_arg == LIO_WAIT || sigev) {
3471                 mutex_enter(&aiop->aio_mutex);
3472                 error = aio_lio_alloc(&head);
3473                 mutex_exit(&aiop->aio_mutex);
3474                 if (error)
3475                         goto done;
3476                 deadhead = 1;
3477                 head->lio_nent = nent;
3478                 head->lio_refcnt = nent;
3479                 head->lio_port = -1;
3480                 head->lio_portkev = NULL;
3481                 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
3482                     sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
3483                         sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3484                         if (sqp == NULL) {
3485                                 error = EAGAIN;
3486                                 goto done;
3487                         }
3488                         sqp->sq_func = NULL;
3489                         sqp->sq_next = NULL;
3490                         sqp->sq_info.si_code = SI_ASYNCIO;
3491                         sqp->sq_info.si_pid = curproc->p_pid;
3492                         sqp->sq_info.si_ctid = PRCTID(curproc);
3493                         sqp->sq_info.si_zoneid = getzoneid();
3494                         sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3495                         sqp->sq_info.si_signo = sigevk.sigev_signo;
3496                         sqp->sq_info.si_value.sival_int =
3497                             sigevk.sigev_value.sival_int;
3498                         head->lio_sigqp = sqp;
3499                 } else {
3500                         head->lio_sigqp = NULL;
3501                 }
3502                 if (pkevtp) {
3503                         /*
3504                          * Prepare data to send when list of aiocb's has
3505                          * completed.
3506                          */
3507                         port_init_event(pkevtp, (uintptr_t)sigev,
3508                             (void *)(uintptr_t)pnotify.portnfy_user,
3509                             NULL, head);
3510                         pkevtp->portkev_events = AIOLIO;
3511                         head->lio_portkev = pkevtp;
3512                         head->lio_port = pnotify.portnfy_port;
3513                 }
3514         }
3515 
3516         for (i = 0; i < nent; i++, ucbp++) {
3517 
3518                 /* skip entry if it can't be copied. */
3519 #ifdef  _LP64
3520                 cbp = (aiocb32_t *)(uintptr_t)*ucbp;
3521                 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
3522 #else
3523                 cbp = (aiocb_t *)*ucbp;
3524                 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
3525 #endif
3526                 {
3527                         if (head) {
3528                                 mutex_enter(&aiop->aio_mutex);
3529                                 head->lio_nent--;
3530                                 head->lio_refcnt--;
3531                                 mutex_exit(&aiop->aio_mutex);
3532                         }
3533                         continue;
3534                 }
3535 #ifdef  _LP64
3536                 /*
3537                  * copy 32 bit structure into 64 bit structure
3538                  */
3539                 aiocb_32ton(aiocb32, aiocb);
3540 #endif /* _LP64 */
3541 
3542                 /* skip if opcode for aiocb is LIO_NOP */
3543                 mode = aiocb->aio_lio_opcode;
3544                 if (mode == LIO_NOP) {
3545                         cbp = NULL;
3546                         if (head) {
3547                                 mutex_enter(&aiop->aio_mutex);
3548                                 head->lio_nent--;
3549                                 head->lio_refcnt--;
3550                                 mutex_exit(&aiop->aio_mutex);
3551                         }
3552                         continue;
3553                 }
3554 
3555                 /* increment file descriptor's ref count. */
3556                 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3557                         lio_set_uerror(&cbp->aio_resultp, EBADF);
3558                         if (head) {
3559                                 mutex_enter(&aiop->aio_mutex);
3560                                 head->lio_nent--;
3561                                 head->lio_refcnt--;
3562                                 mutex_exit(&aiop->aio_mutex);
3563                         }
3564                         aio_errors++;
3565                         continue;
3566                 }
3567 
3568                 /*
3569                  * check the permission of the partition
3570                  */
3571                 if ((fp->f_flag & mode) == 0) {
3572                         releasef(aiocb->aio_fildes);
3573                         lio_set_uerror(&cbp->aio_resultp, EBADF);
3574                         if (head) {
3575                                 mutex_enter(&aiop->aio_mutex);
3576                                 head->lio_nent--;
3577                                 head->lio_refcnt--;
3578                                 mutex_exit(&aiop->aio_mutex);
3579                         }
3580                         aio_errors++;
3581                         continue;
3582                 }
3583 
3584                 /*
3585                  * common case where requests are to the same fd
3586                  * for the same r/w operation
3587                  * for UFS, need to set EBADFD
3588                  */
3589                 vp = fp->f_vnode;
3590                 if (fp != prev_fp || mode != prev_mode) {
3591                         aio_func = check_vp(vp, mode);
3592                         if (aio_func == NULL) {
3593                                 prev_fp = NULL;
3594                                 releasef(aiocb->aio_fildes);
3595                                 lio_set_uerror(&cbp->aio_resultp, EBADFD);
3596                                 aio_notsupported++;
3597                                 if (head) {
3598                                         mutex_enter(&aiop->aio_mutex);
3599                                         head->lio_nent--;
3600                                         head->lio_refcnt--;
3601                                         mutex_exit(&aiop->aio_mutex);
3602                                 }
3603                                 continue;
3604                         } else {
3605                                 prev_fp = fp;
3606                                 prev_mode = mode;
3607                         }
3608                 }
3609 
3610                 error = aio_req_setup(&reqp, aiop, aiocb,
3611                     (aio_result_t *)&cbp->aio_resultp, vp, 0);
3612                 if (error) {
3613                         releasef(aiocb->aio_fildes);
3614                         lio_set_uerror(&cbp->aio_resultp, error);
3615                         if (head) {
3616                                 mutex_enter(&aiop->aio_mutex);
3617                                 head->lio_nent--;
3618                                 head->lio_refcnt--;
3619                                 mutex_exit(&aiop->aio_mutex);
3620                         }
3621                         aio_errors++;
3622                         continue;
3623                 }
3624 
3625                 reqp->aio_req_lio = head;
3626                 deadhead = 0;
3627 
3628                 /*
3629                  * Set the errno field now before sending the request to
3630                  * the driver to avoid a race condition
3631                  */
3632                 (void) suword32(&cbp->aio_resultp.aio_errno,
3633                     EINPROGRESS);
3634 
3635                 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
3636 
3637                 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
3638                 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3639                 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3640                 if (aio_port | aio_thread) {
3641                         port_kevent_t *lpkevp;
3642                         /*
3643                          * Prepare data to send with each aiocb completed.
3644                          */
3645 #ifdef _LP64
3646                         if (aio_port) {
3647                                 void *paddr = (void  *)(uintptr_t)
3648                                     aiocb32->aio_sigevent.sigev_value.sival_ptr;
3649                                 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3650                                         error = EFAULT;
3651                         } else {        /* aio_thread */
3652                                 pnotify.portnfy_port =
3653                                     aiocb32->aio_sigevent.sigev_signo;
3654                                 pnotify.portnfy_user =
3655                                     aiocb32->aio_sigevent.sigev_value.sival_ptr;
3656                         }
3657 #else
3658                         if (aio_port) {
3659                                 void *paddr =
3660                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
3661                                 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3662                                         error = EFAULT;
3663                         } else {        /* aio_thread */
3664                                 pnotify.portnfy_port =
3665                                     aiocb->aio_sigevent.sigev_signo;
3666                                 pnotify.portnfy_user =
3667                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
3668                         }
3669 #endif
3670                         if (error)
3671                                 /* EMPTY */;
3672                         else if (pkevtp != NULL &&
3673                             pnotify.portnfy_port == lio_head_port)
3674                                 error = port_dup_event(pkevtp, &lpkevp,
3675                                     PORT_ALLOC_DEFAULT);
3676                         else
3677                                 error = port_alloc_event(pnotify.portnfy_port,
3678                                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3679                                     &lpkevp);
3680                         if (error == 0) {
3681                                 port_init_event(lpkevp, (uintptr_t)cbp,
3682                                     (void *)(uintptr_t)pnotify.portnfy_user,
3683                                     aio_port_callback, reqp);
3684                                 lpkevp->portkev_events = event;
3685                                 reqp->aio_req_portkev = lpkevp;
3686                                 reqp->aio_req_port = pnotify.portnfy_port;
3687                         }
3688                 }
3689 
3690                 /*
3691                  * send the request to driver.
3692                  */
3693                 if (error == 0) {
3694                         if (aiocb->aio_nbytes == 0) {
3695                                 clear_active_fd(aiocb->aio_fildes);
3696                                 aio_zerolen(reqp);
3697                                 continue;
3698                         }
3699                         error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3700                             CRED());
3701                 }
3702 
3703                 /*
3704                  * the fd's ref count is not decremented until the IO has
3705                  * completed unless there was an error.
3706                  */
3707                 if (error) {
3708                         releasef(aiocb->aio_fildes);
3709                         lio_set_uerror(&cbp->aio_resultp, error);
3710                         if (head) {
3711                                 mutex_enter(&aiop->aio_mutex);
3712                                 head->lio_nent--;
3713                                 head->lio_refcnt--;
3714                                 mutex_exit(&aiop->aio_mutex);
3715                         }
3716                         if (error == ENOTSUP)
3717                                 aio_notsupported++;
3718                         else
3719                                 aio_errors++;
3720                         lio_set_error(reqp, portused);
3721                 } else {
3722                         clear_active_fd(aiocb->aio_fildes);
3723                 }
3724         }
3725 
3726         if (aio_notsupported) {
3727                 error = ENOTSUP;
3728         } else if (aio_errors) {
3729                 /*
3730                  * return EIO if any request failed
3731                  */
3732                 error = EIO;
3733         }
3734 
3735         if (mode_arg == LIO_WAIT) {
3736                 mutex_enter(&aiop->aio_mutex);
3737                 while (head->lio_refcnt > 0) {
3738                         if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3739                                 mutex_exit(&aiop->aio_mutex);
3740                                 error = EINTR;
3741                                 goto done;
3742                         }
3743                 }
3744                 mutex_exit(&aiop->aio_mutex);
3745                 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
3746         }
3747 
3748 done:
3749         kmem_free(cbplist, ssize);
3750         if (deadhead) {
3751                 if (head->lio_sigqp)
3752                         kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3753                 if (head->lio_portkev)
3754                         port_free_event(head->lio_portkev);
3755                 kmem_free(head, sizeof (aio_lio_t));
3756         }
3757         return (error);
3758 }
3759 
3760 
3761 #ifdef  _SYSCALL32_IMPL
3762 void
3763 aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
3764 {
3765         dest->aio_fildes = src->aio_fildes;
3766         dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
3767         dest->aio_nbytes = (size_t)src->aio_nbytes;
3768         dest->aio_offset = (off_t)src->aio_offset;
3769         dest->aio_reqprio = src->aio_reqprio;
3770         dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3771         dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3772 
3773         /*
3774          * See comment in sigqueue32() on handling of 32-bit
3775          * sigvals in a 64-bit kernel.
3776          */
3777         dest->aio_sigevent.sigev_value.sival_int =
3778             (int)src->aio_sigevent.sigev_value.sival_int;
3779         dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3780             (uintptr_t)src->aio_sigevent.sigev_notify_function;
3781         dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3782             (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3783         dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3784         dest->aio_lio_opcode = src->aio_lio_opcode;
3785         dest->aio_state = src->aio_state;
3786         dest->aio__pad[0] = src->aio__pad[0];
3787 }
3788 #endif /* _SYSCALL32_IMPL */
3789 
3790 /*
3791  * aio_port_callback() is called just before the event is retrieved from the
3792  * port. The task of this callback function is to finish the work of the
3793  * transaction for the application, it means :
3794  * - copyout transaction data to the application
3795  *      (this thread is running in the right process context)
3796  * - keep trace of the transaction (update of counters).
3797  * - free allocated buffers
3798  * The aiocb pointer is the object element of the port_kevent_t structure.
3799  *
3800  * flag :
3801  *      PORT_CALLBACK_DEFAULT : do copyout and free resources
3802  *      PORT_CALLBACK_CLOSE   : don't do copyout, free resources
3803  */
3804 
3805 /*ARGSUSED*/
3806 int
3807 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
3808 {
3809         aio_t           *aiop = curproc->p_aio;
3810         aio_req_t       *reqp = arg;
3811         struct  iovec   *iov;
3812         struct  buf     *bp;
3813         void            *resultp;
3814 
3815         if (pid != curproc->p_pid) {
3816                 /* wrong proc !!, can not deliver data here ... */
3817                 return (EACCES);
3818         }
3819 
3820         mutex_enter(&aiop->aio_portq_mutex);
3821         reqp->aio_req_portkev = NULL;
3822         aio_req_remove_portq(aiop, reqp); /* remove request from portq */
3823         mutex_exit(&aiop->aio_portq_mutex);
3824         aphysio_unlock(reqp);           /* unlock used pages */
3825         mutex_enter(&aiop->aio_mutex);
3826         if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
3827                 aio_req_free_port(aiop, reqp);  /* back to free list */
3828                 mutex_exit(&aiop->aio_mutex);
3829                 return (0);
3830         }
3831 
3832         iov = reqp->aio_req_uio.uio_iov;
3833         bp = &reqp->aio_req_buf;
3834         resultp = (void *)reqp->aio_req_resultp;
3835         aio_req_free_port(aiop, reqp);  /* request struct back to free list */
3836         mutex_exit(&aiop->aio_mutex);
3837         if (flag == PORT_CALLBACK_DEFAULT)
3838                 aio_copyout_result_port(iov, bp, resultp);
3839         return (0);
3840 }