1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Kernel asynchronous I/O.
  29  * This is only for raw devices now (as of Nov. 1993).
  30  */
  31 
  32 #include <sys/types.h>
  33 #include <sys/errno.h>
  34 #include <sys/conf.h>
  35 #include <sys/file.h>
  36 #include <sys/fs/snode.h>
  37 #include <sys/unistd.h>
  38 #include <sys/cmn_err.h>
  39 #include <vm/as.h>
  40 #include <vm/faultcode.h>
  41 #include <sys/sysmacros.h>
  42 #include <sys/procfs.h>
  43 #include <sys/kmem.h>
  44 #include <sys/autoconf.h>
  45 #include <sys/ddi_impldefs.h>
  46 #include <sys/sunddi.h>
  47 #include <sys/aio_impl.h>
  48 #include <sys/debug.h>
  49 #include <sys/param.h>
  50 #include <sys/systm.h>
  51 #include <sys/vmsystm.h>
  52 #include <sys/fs/pxfs_ki.h>
  53 #include <sys/contract/process_impl.h>
  54 
  55 /*
  56  * external entry point.
  57  */
  58 #ifdef _LP64
  59 static int64_t kaioc(long, long, long, long, long, long);
  60 #endif
  61 static int kaio(ulong_t *, rval_t *);
  62 
  63 
  64 #define AIO_64  0
  65 #define AIO_32  1
  66 #define AIO_LARGEFILE   2
  67 
  68 /*
  69  * implementation specific functions (private)
  70  */
  71 #ifdef _LP64
  72 static int alio(int, aiocb_t **, int, struct sigevent *);
  73 #endif
  74 static int aionotify(void);
  75 static int aioinit(void);
  76 static int aiostart(void);
  77 static void alio_cleanup(aio_t *, aiocb_t **, int, int);
  78 static int (*check_vp(struct vnode *, int))(vnode_t *, struct aio_req *,
  79     cred_t *);
  80 static void lio_set_error(aio_req_t *, int portused);
  81 static aio_t *aio_aiop_alloc();
  82 static int aio_req_alloc(aio_req_t **, aio_result_t *);
  83 static int aio_lio_alloc(aio_lio_t **);
  84 static aio_req_t *aio_req_done(void *);
  85 static aio_req_t *aio_req_remove(aio_req_t *);
  86 static int aio_req_find(aio_result_t *, aio_req_t **);
  87 static int aio_hash_insert(struct aio_req_t *, aio_t *);
  88 static int aio_req_setup(aio_req_t **, aio_t *, aiocb_t *,
  89     aio_result_t *, vnode_t *, int);
  90 static int aio_cleanup_thread(aio_t *);
  91 static aio_lio_t *aio_list_get(aio_result_t *);
  92 static void lio_set_uerror(void *, int);
  93 extern void aio_zerolen(aio_req_t *);
  94 static int aiowait(struct timeval *, int, long  *);
  95 static int aiowaitn(void *, uint_t, uint_t *, timespec_t *);
  96 static int aio_unlock_requests(caddr_t iocblist, int iocb_index,
  97     aio_req_t *reqlist, aio_t *aiop, model_t model);
  98 static int aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max);
  99 static int aiosuspend(void *, int, struct  timespec *, int,
 100     long        *, int);
 101 static int aliowait(int, void *, int, void *, int);
 102 static int aioerror(void *, int);
 103 static int aio_cancel(int, void *, long *, int);
 104 static int arw(int, int, char *, int, offset_t, aio_result_t *, int);
 105 static int aiorw(int, void *, int, int);
 106 
 107 static int alioLF(int, void *, int, void *);
 108 static int aio_req_setupLF(aio_req_t **, aio_t *, aiocb64_32_t *,
 109     aio_result_t *, vnode_t *, int);
 110 static int alio32(int, void *, int, void *);
 111 static int driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
 112 static int driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p);
 113 
 114 #ifdef  _SYSCALL32_IMPL
 115 static void aiocb_LFton(aiocb64_32_t *, aiocb_t *);
 116 void    aiocb_32ton(aiocb32_t *, aiocb_t *);
 117 #endif /* _SYSCALL32_IMPL */
 118 
 119 /*
 120  * implementation specific functions (external)
 121  */
 122 void aio_req_free(aio_t *, aio_req_t *);
 123 
 124 /*
 125  * Event Port framework
 126  */
 127 
 128 void aio_req_free_port(aio_t *, aio_req_t *);
 129 static int aio_port_callback(void *, int *, pid_t, int, void *);
 130 
 131 /*
 132  * This is the loadable module wrapper.
 133  */
 134 #include <sys/modctl.h>
 135 #include <sys/syscall.h>
 136 
 137 #ifdef _LP64
 138 
 139 static struct sysent kaio_sysent = {
 140         6,
 141         SE_NOUNLOAD | SE_64RVAL | SE_ARGC,
 142         (int (*)())kaioc
 143 };
 144 
 145 #ifdef _SYSCALL32_IMPL
 146 static struct sysent kaio_sysent32 = {
 147         7,
 148         SE_NOUNLOAD | SE_64RVAL,
 149         kaio
 150 };
 151 #endif  /* _SYSCALL32_IMPL */
 152 
 153 #else   /* _LP64 */
 154 
 155 static struct sysent kaio_sysent = {
 156         7,
 157         SE_NOUNLOAD | SE_32RVAL1,
 158         kaio
 159 };
 160 
 161 #endif  /* _LP64 */
 162 
 163 /*
 164  * Module linkage information for the kernel.
 165  */
 166 
 167 static struct modlsys modlsys = {
 168         &mod_syscallops,
 169         "kernel Async I/O",
 170         &kaio_sysent
 171 };
 172 
 173 #ifdef  _SYSCALL32_IMPL
 174 static struct modlsys modlsys32 = {
 175         &mod_syscallops32,
 176         "kernel Async I/O for 32 bit compatibility",
 177         &kaio_sysent32
 178 };
 179 #endif  /* _SYSCALL32_IMPL */
 180 
 181 
 182 static struct modlinkage modlinkage = {
 183         MODREV_1,
 184         &modlsys,
 185 #ifdef  _SYSCALL32_IMPL
 186         &modlsys32,
 187 #endif
 188         NULL
 189 };
 190 
 191 int
 192 _init(void)
 193 {
 194         int retval;
 195 
 196         if ((retval = mod_install(&modlinkage)) != 0)
 197                 return (retval);
 198 
 199         return (0);
 200 }
 201 
 202 int
 203 _fini(void)
 204 {
 205         int retval;
 206 
 207         retval = mod_remove(&modlinkage);
 208 
 209         return (retval);
 210 }
 211 
 212 int
 213 _info(struct modinfo *modinfop)
 214 {
 215         return (mod_info(&modlinkage, modinfop));
 216 }
 217 
 218 #ifdef  _LP64
 219 static int64_t
 220 kaioc(
 221         long    a0,
 222         long    a1,
 223         long    a2,
 224         long    a3,
 225         long    a4,
 226         long    a5)
 227 {
 228         int     error;
 229         long    rval = 0;
 230 
 231         switch ((int)a0 & ~AIO_POLL_BIT) {
 232         case AIOREAD:
 233                 error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
 234                     (offset_t)a4, (aio_result_t *)a5, FREAD);
 235                 break;
 236         case AIOWRITE:
 237                 error = arw((int)a0, (int)a1, (char *)a2, (int)a3,
 238                     (offset_t)a4, (aio_result_t *)a5, FWRITE);
 239                 break;
 240         case AIOWAIT:
 241                 error = aiowait((struct timeval *)a1, (int)a2, &rval);
 242                 break;
 243         case AIOWAITN:
 244                 error = aiowaitn((void *)a1, (uint_t)a2, (uint_t *)a3,
 245                     (timespec_t *)a4);
 246                 break;
 247         case AIONOTIFY:
 248                 error = aionotify();
 249                 break;
 250         case AIOINIT:
 251                 error = aioinit();
 252                 break;
 253         case AIOSTART:
 254                 error = aiostart();
 255                 break;
 256         case AIOLIO:
 257                 error = alio((int)a1, (aiocb_t **)a2, (int)a3,
 258                     (struct sigevent *)a4);
 259                 break;
 260         case AIOLIOWAIT:
 261                 error = aliowait((int)a1, (void *)a2, (int)a3,
 262                     (struct sigevent *)a4, AIO_64);
 263                 break;
 264         case AIOSUSPEND:
 265                 error = aiosuspend((void *)a1, (int)a2, (timespec_t *)a3,
 266                     (int)a4, &rval, AIO_64);
 267                 break;
 268         case AIOERROR:
 269                 error = aioerror((void *)a1, AIO_64);
 270                 break;
 271         case AIOAREAD:
 272                 error = aiorw((int)a0, (void *)a1, FREAD, AIO_64);
 273                 break;
 274         case AIOAWRITE:
 275                 error = aiorw((int)a0, (void *)a1, FWRITE, AIO_64);
 276                 break;
 277         case AIOCANCEL:
 278                 error = aio_cancel((int)a1, (void *)a2, &rval, AIO_64);
 279                 break;
 280 
 281         /*
 282          * The large file related stuff is valid only for
 283          * 32 bit kernel and not for 64 bit kernel
 284          * On 64 bit kernel we convert large file calls
 285          * to regular 64bit calls.
 286          */
 287 
 288         default:
 289                 error = EINVAL;
 290         }
 291         if (error)
 292                 return ((int64_t)set_errno(error));
 293         return (rval);
 294 }
 295 #endif
 296 
 297 static int
 298 kaio(
 299         ulong_t *uap,
 300         rval_t *rvp)
 301 {
 302         long rval = 0;
 303         int     error = 0;
 304         offset_t        off;
 305 
 306 
 307                 rvp->r_vals = 0;
 308 #if defined(_LITTLE_ENDIAN)
 309         off = ((u_offset_t)uap[5] << 32) | (u_offset_t)uap[4];
 310 #else
 311         off = ((u_offset_t)uap[4] << 32) | (u_offset_t)uap[5];
 312 #endif
 313 
 314         switch (uap[0] & ~AIO_POLL_BIT) {
 315         /*
 316          * It must be the 32 bit system call on 64 bit kernel
 317          */
 318         case AIOREAD:
 319                 return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
 320                     (int)uap[3], off, (aio_result_t *)uap[6], FREAD));
 321         case AIOWRITE:
 322                 return (arw((int)uap[0], (int)uap[1], (char *)uap[2],
 323                     (int)uap[3], off, (aio_result_t *)uap[6], FWRITE));
 324         case AIOWAIT:
 325                 error = aiowait((struct timeval *)uap[1], (int)uap[2],
 326                     &rval);
 327                 break;
 328         case AIOWAITN:
 329                 error = aiowaitn((void *)uap[1], (uint_t)uap[2],
 330                     (uint_t *)uap[3], (timespec_t *)uap[4]);
 331                 break;
 332         case AIONOTIFY:
 333                 return (aionotify());
 334         case AIOINIT:
 335                 return (aioinit());
 336         case AIOSTART:
 337                 return (aiostart());
 338         case AIOLIO:
 339                 return (alio32((int)uap[1], (void *)uap[2], (int)uap[3],
 340                     (void *)uap[4]));
 341         case AIOLIOWAIT:
 342                 return (aliowait((int)uap[1], (void *)uap[2],
 343                     (int)uap[3], (struct sigevent *)uap[4], AIO_32));
 344         case AIOSUSPEND:
 345                 error = aiosuspend((void *)uap[1], (int)uap[2],
 346                     (timespec_t *)uap[3], (int)uap[4],
 347                     &rval, AIO_32);
 348                 break;
 349         case AIOERROR:
 350                 return (aioerror((void *)uap[1], AIO_32));
 351         case AIOAREAD:
 352                 return (aiorw((int)uap[0], (void *)uap[1],
 353                     FREAD, AIO_32));
 354         case AIOAWRITE:
 355                 return (aiorw((int)uap[0], (void *)uap[1],
 356                     FWRITE, AIO_32));
 357         case AIOCANCEL:
 358                 error = (aio_cancel((int)uap[1], (void *)uap[2], &rval,
 359                     AIO_32));
 360                 break;
 361         case AIOLIO64:
 362                 return (alioLF((int)uap[1], (void *)uap[2],
 363                     (int)uap[3], (void *)uap[4]));
 364         case AIOLIOWAIT64:
 365                 return (aliowait(uap[1], (void *)uap[2],
 366                     (int)uap[3], (void *)uap[4], AIO_LARGEFILE));
 367         case AIOSUSPEND64:
 368                 error = aiosuspend((void *)uap[1], (int)uap[2],
 369                     (timespec_t *)uap[3], (int)uap[4], &rval,
 370                     AIO_LARGEFILE);
 371                 break;
 372         case AIOERROR64:
 373                 return (aioerror((void *)uap[1], AIO_LARGEFILE));
 374         case AIOAREAD64:
 375                 return (aiorw((int)uap[0], (void *)uap[1], FREAD,
 376                     AIO_LARGEFILE));
 377         case AIOAWRITE64:
 378                 return (aiorw((int)uap[0], (void *)uap[1], FWRITE,
 379                     AIO_LARGEFILE));
 380         case AIOCANCEL64:
 381                 error = (aio_cancel((int)uap[1], (void *)uap[2],
 382                     &rval, AIO_LARGEFILE));
 383                 break;
 384         default:
 385                 return (EINVAL);
 386         }
 387 
 388         rvp->r_val1 = rval;
 389         return (error);
 390 }
 391 
 392 /*
 393  * wake up LWPs in this process that are sleeping in
 394  * aiowait().
 395  */
 396 static int
 397 aionotify(void)
 398 {
 399         aio_t   *aiop;
 400 
 401         aiop = curproc->p_aio;
 402         if (aiop == NULL)
 403                 return (0);
 404 
 405         mutex_enter(&aiop->aio_mutex);
 406         aiop->aio_notifycnt++;
 407         cv_broadcast(&aiop->aio_waitcv);
 408         mutex_exit(&aiop->aio_mutex);
 409 
 410         return (0);
 411 }
 412 
 413 static int
 414 timeval2reltime(struct timeval *timout, timestruc_t *rqtime,
 415         timestruc_t **rqtp, int *blocking)
 416 {
 417 #ifdef  _SYSCALL32_IMPL
 418         struct timeval32 wait_time_32;
 419 #endif
 420         struct timeval wait_time;
 421         model_t model = get_udatamodel();
 422 
 423         *rqtp = NULL;
 424         if (timout == NULL) {           /* wait indefinitely */
 425                 *blocking = 1;
 426                 return (0);
 427         }
 428 
 429         /*
 430          * Need to correctly compare with the -1 passed in for a user
 431          * address pointer, with both 32 bit and 64 bit apps.
 432          */
 433         if (model == DATAMODEL_NATIVE) {
 434                 if ((intptr_t)timout == (intptr_t)-1) { /* don't wait */
 435                         *blocking = 0;
 436                         return (0);
 437                 }
 438 
 439                 if (copyin(timout, &wait_time, sizeof (wait_time)))
 440                         return (EFAULT);
 441         }
 442 #ifdef  _SYSCALL32_IMPL
 443         else {
 444                 /*
 445                  * -1 from a 32bit app. It will not get sign extended.
 446                  * don't wait if -1.
 447                  */
 448                 if ((intptr_t)timout == (intptr_t)((uint32_t)-1)) {
 449                         *blocking = 0;
 450                         return (0);
 451                 }
 452 
 453                 if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
 454                         return (EFAULT);
 455                 TIMEVAL32_TO_TIMEVAL(&wait_time, &wait_time_32);
 456         }
 457 #endif  /* _SYSCALL32_IMPL */
 458 
 459         if (wait_time.tv_sec == 0 && wait_time.tv_usec == 0) {  /* don't wait */
 460                 *blocking = 0;
 461                 return (0);
 462         }
 463 
 464         if (wait_time.tv_sec < 0 ||
 465             wait_time.tv_usec < 0 || wait_time.tv_usec >= MICROSEC)
 466                 return (EINVAL);
 467 
 468         rqtime->tv_sec = wait_time.tv_sec;
 469         rqtime->tv_nsec = wait_time.tv_usec * 1000;
 470         *rqtp = rqtime;
 471         *blocking = 1;
 472 
 473         return (0);
 474 }
 475 
 476 static int
 477 timespec2reltime(timespec_t *timout, timestruc_t *rqtime,
 478         timestruc_t **rqtp, int *blocking)
 479 {
 480 #ifdef  _SYSCALL32_IMPL
 481         timespec32_t wait_time_32;
 482 #endif
 483         model_t model = get_udatamodel();
 484 
 485         *rqtp = NULL;
 486         if (timout == NULL) {
 487                 *blocking = 1;
 488                 return (0);
 489         }
 490 
 491         if (model == DATAMODEL_NATIVE) {
 492                 if (copyin(timout, rqtime, sizeof (*rqtime)))
 493                         return (EFAULT);
 494         }
 495 #ifdef  _SYSCALL32_IMPL
 496         else {
 497                 if (copyin(timout, &wait_time_32, sizeof (wait_time_32)))
 498                         return (EFAULT);
 499                 TIMESPEC32_TO_TIMESPEC(rqtime, &wait_time_32);
 500         }
 501 #endif  /* _SYSCALL32_IMPL */
 502 
 503         if (rqtime->tv_sec == 0 && rqtime->tv_nsec == 0) {
 504                 *blocking = 0;
 505                 return (0);
 506         }
 507 
 508         if (rqtime->tv_sec < 0 ||
 509             rqtime->tv_nsec < 0 || rqtime->tv_nsec >= NANOSEC)
 510                 return (EINVAL);
 511 
 512         *rqtp = rqtime;
 513         *blocking = 1;
 514 
 515         return (0);
 516 }
 517 
 518 /*ARGSUSED*/
 519 static int
 520 aiowait(
 521         struct timeval  *timout,
 522         int     dontblockflg,
 523         long    *rval)
 524 {
 525         int             error;
 526         aio_t           *aiop;
 527         aio_req_t       *reqp;
 528         clock_t         status;
 529         int             blocking;
 530         int             timecheck;
 531         timestruc_t     rqtime;
 532         timestruc_t     *rqtp;
 533 
 534         aiop = curproc->p_aio;
 535         if (aiop == NULL)
 536                 return (EINVAL);
 537 
 538         /*
 539          * Establish the absolute future time for the timeout.
 540          */
 541         error = timeval2reltime(timout, &rqtime, &rqtp, &blocking);
 542         if (error)
 543                 return (error);
 544         if (rqtp) {
 545                 timestruc_t now;
 546                 timecheck = timechanged;
 547                 gethrestime(&now);
 548                 timespecadd(rqtp, &now);
 549         }
 550 
 551         mutex_enter(&aiop->aio_mutex);
 552         for (;;) {
 553                 /* process requests on poll queue */
 554                 if (aiop->aio_pollq) {
 555                         mutex_exit(&aiop->aio_mutex);
 556                         aio_cleanup(0);
 557                         mutex_enter(&aiop->aio_mutex);
 558                 }
 559                 if ((reqp = aio_req_remove(NULL)) != NULL) {
 560                         *rval = (long)reqp->aio_req_resultp;
 561                         break;
 562                 }
 563                 /* user-level done queue might not be empty */
 564                 if (aiop->aio_notifycnt > 0) {
 565                         aiop->aio_notifycnt--;
 566                         *rval = 1;
 567                         break;
 568                 }
 569                 /* don't block if no outstanding aio */
 570                 if (aiop->aio_outstanding == 0 && dontblockflg) {
 571                         error = EINVAL;
 572                         break;
 573                 }
 574                 if (blocking) {
 575                         status = cv_waituntil_sig(&aiop->aio_waitcv,
 576                             &aiop->aio_mutex, rqtp, timecheck);
 577 
 578                         if (status > 0)              /* check done queue again */
 579                                 continue;
 580                         if (status == 0) {      /* interrupted by a signal */
 581                                 error = EINTR;
 582                                 *rval = -1;
 583                         } else {                /* timer expired */
 584                                 error = ETIME;
 585                         }
 586                 }
 587                 break;
 588         }
 589         mutex_exit(&aiop->aio_mutex);
 590         if (reqp) {
 591                 aphysio_unlock(reqp);
 592                 aio_copyout_result(reqp);
 593                 mutex_enter(&aiop->aio_mutex);
 594                 aio_req_free(aiop, reqp);
 595                 mutex_exit(&aiop->aio_mutex);
 596         }
 597         return (error);
 598 }
 599 
 600 /*
 601  * aiowaitn can be used to reap completed asynchronous requests submitted with
 602  * lio_listio, aio_read or aio_write.
 603  * This function only reaps asynchronous raw I/Os.
 604  */
 605 
 606 /*ARGSUSED*/
 607 static int
 608 aiowaitn(void *uiocb, uint_t nent, uint_t *nwait, timespec_t *timout)
 609 {
 610         int             error = 0;
 611         aio_t           *aiop;
 612         aio_req_t       *reqlist = NULL;
 613         caddr_t         iocblist = NULL;        /* array of iocb ptr's */
 614         uint_t          waitcnt, cnt = 0;       /* iocb cnt */
 615         size_t          iocbsz;                 /* users iocb size */
 616         size_t          riocbsz;                /* returned iocb size */
 617         int             iocb_index = 0;
 618         model_t         model = get_udatamodel();
 619         int             blocking = 1;
 620         int             timecheck;
 621         timestruc_t     rqtime;
 622         timestruc_t     *rqtp;
 623 
 624         aiop = curproc->p_aio;
 625         if (aiop == NULL || nent == 0 || nent > _AIO_LISTIO_MAX)
 626                 return (EINVAL);
 627 
 628         if (aiop->aio_outstanding == 0)
 629                 return (EAGAIN);
 630 
 631         if (copyin(nwait, &waitcnt, sizeof (uint_t)))
 632                 return (EFAULT);
 633 
 634         /* set *nwait to zero, if we must return prematurely */
 635         if (copyout(&cnt, nwait, sizeof (uint_t)))
 636                 return (EFAULT);
 637 
 638         if (waitcnt == 0) {
 639                 blocking = 0;
 640                 rqtp = NULL;
 641                 waitcnt = nent;
 642         } else {
 643                 error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
 644                 if (error)
 645                         return (error);
 646         }
 647 
 648         if (model == DATAMODEL_NATIVE)
 649                 iocbsz = (sizeof (aiocb_t *) * nent);
 650 #ifdef  _SYSCALL32_IMPL
 651         else
 652                 iocbsz = (sizeof (caddr32_t) * nent);
 653 #endif  /* _SYSCALL32_IMPL */
 654 
 655         /*
 656          * Only one aio_waitn call is allowed at a time.
 657          * The active aio_waitn will collect all requests
 658          * out of the "done" list and if necessary it will wait
 659          * for some/all pending requests to fulfill the nwait
 660          * parameter.
 661          * A second or further aio_waitn calls will sleep here
 662          * until the active aio_waitn finishes and leaves the kernel
 663          * If the second call does not block (poll), then return
 664          * immediately with the error code : EAGAIN.
 665          * If the second call should block, then sleep here, but
 666          * do not touch the timeout. The timeout starts when this
 667          * aio_waitn-call becomes active.
 668          */
 669 
 670         mutex_enter(&aiop->aio_mutex);
 671 
 672         while (aiop->aio_flags & AIO_WAITN) {
 673                 if (blocking == 0) {
 674                         mutex_exit(&aiop->aio_mutex);
 675                         return (EAGAIN);
 676                 }
 677 
 678                 /* block, no timeout */
 679                 aiop->aio_flags |= AIO_WAITN_PENDING;
 680                 if (!cv_wait_sig(&aiop->aio_waitncv, &aiop->aio_mutex)) {
 681                         mutex_exit(&aiop->aio_mutex);
 682                         return (EINTR);
 683                 }
 684         }
 685 
 686         /*
 687          * Establish the absolute future time for the timeout.
 688          */
 689         if (rqtp) {
 690                 timestruc_t now;
 691                 timecheck = timechanged;
 692                 gethrestime(&now);
 693                 timespecadd(rqtp, &now);
 694         }
 695 
 696         if (iocbsz > aiop->aio_iocbsz && aiop->aio_iocb != NULL) {
 697                 kmem_free(aiop->aio_iocb, aiop->aio_iocbsz);
 698                 aiop->aio_iocb = NULL;
 699         }
 700 
 701         if (aiop->aio_iocb == NULL) {
 702                 iocblist = kmem_zalloc(iocbsz, KM_NOSLEEP);
 703                 if (iocblist == NULL) {
 704                         mutex_exit(&aiop->aio_mutex);
 705                         return (ENOMEM);
 706                 }
 707                 aiop->aio_iocb = (aiocb_t **)iocblist;
 708                 aiop->aio_iocbsz = iocbsz;
 709         } else {
 710                 iocblist = (char *)aiop->aio_iocb;
 711         }
 712 
 713         aiop->aio_waitncnt = waitcnt;
 714         aiop->aio_flags |= AIO_WAITN;
 715 
 716         for (;;) {
 717                 /* push requests on poll queue to done queue */
 718                 if (aiop->aio_pollq) {
 719                         mutex_exit(&aiop->aio_mutex);
 720                         aio_cleanup(0);
 721                         mutex_enter(&aiop->aio_mutex);
 722                 }
 723 
 724                 /* check for requests on done queue */
 725                 if (aiop->aio_doneq) {
 726                         cnt += aio_reqlist_concat(aiop, &reqlist, nent - cnt);
 727                         aiop->aio_waitncnt = waitcnt - cnt;
 728                 }
 729 
 730                 /* user-level done queue might not be empty */
 731                 if (aiop->aio_notifycnt > 0) {
 732                         aiop->aio_notifycnt--;
 733                         error = 0;
 734                         break;
 735                 }
 736 
 737                 /*
 738                  * if we are here second time as a result of timer
 739                  * expiration, we reset error if there are enough
 740                  * aiocb's to satisfy request.
 741                  * We return also if all requests are already done
 742                  * and we picked up the whole done queue.
 743                  */
 744 
 745                 if ((cnt >= waitcnt) || (cnt > 0 && aiop->aio_pending == 0 &&
 746                     aiop->aio_doneq == NULL)) {
 747                         error = 0;
 748                         break;
 749                 }
 750 
 751                 if ((cnt < waitcnt) && blocking) {
 752                         int rval = cv_waituntil_sig(&aiop->aio_waitcv,
 753                             &aiop->aio_mutex, rqtp, timecheck);
 754                         if (rval > 0)
 755                                 continue;
 756                         if (rval < 0) {
 757                                 error = ETIME;
 758                                 blocking = 0;
 759                                 continue;
 760                         }
 761                         error = EINTR;
 762                 }
 763                 break;
 764         }
 765 
 766         mutex_exit(&aiop->aio_mutex);
 767 
 768         if (cnt > 0) {
 769 
 770                 iocb_index = aio_unlock_requests(iocblist, iocb_index, reqlist,
 771                     aiop, model);
 772 
 773                 if (model == DATAMODEL_NATIVE)
 774                         riocbsz = (sizeof (aiocb_t *) * cnt);
 775 #ifdef  _SYSCALL32_IMPL
 776                 else
 777                         riocbsz = (sizeof (caddr32_t) * cnt);
 778 #endif  /* _SYSCALL32_IMPL */
 779 
 780                 if (copyout(iocblist, uiocb, riocbsz) ||
 781                     copyout(&cnt, nwait, sizeof (uint_t)))
 782                         error = EFAULT;
 783         }
 784 
 785         /* check if there is another thread waiting for execution */
 786         mutex_enter(&aiop->aio_mutex);
 787         aiop->aio_flags &= ~AIO_WAITN;
 788         if (aiop->aio_flags & AIO_WAITN_PENDING) {
 789                 aiop->aio_flags &= ~AIO_WAITN_PENDING;
 790                 cv_signal(&aiop->aio_waitncv);
 791         }
 792         mutex_exit(&aiop->aio_mutex);
 793 
 794         return (error);
 795 }
 796 
 797 /*
 798  * aio_unlock_requests
 799  * copyouts the result of the request as well as the return value.
 800  * It builds the list of completed asynchronous requests,
 801  * unlocks the allocated memory ranges and
 802  * put the aio request structure back into the free list.
 803  */
 804 
 805 static int
 806 aio_unlock_requests(
 807         caddr_t iocblist,
 808         int     iocb_index,
 809         aio_req_t *reqlist,
 810         aio_t   *aiop,
 811         model_t model)
 812 {
 813         aio_req_t       *reqp, *nreqp;
 814 
 815         if (model == DATAMODEL_NATIVE) {
 816                 for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
 817                         (((caddr_t *)iocblist)[iocb_index++]) =
 818                             reqp->aio_req_iocb.iocb;
 819                         nreqp = reqp->aio_req_next;
 820                         aphysio_unlock(reqp);
 821                         aio_copyout_result(reqp);
 822                         mutex_enter(&aiop->aio_mutex);
 823                         aio_req_free(aiop, reqp);
 824                         mutex_exit(&aiop->aio_mutex);
 825                 }
 826         }
 827 #ifdef  _SYSCALL32_IMPL
 828         else {
 829                 for (reqp = reqlist; reqp != NULL;  reqp = nreqp) {
 830                         ((caddr32_t *)iocblist)[iocb_index++] =
 831                             reqp->aio_req_iocb.iocb32;
 832                         nreqp = reqp->aio_req_next;
 833                         aphysio_unlock(reqp);
 834                         aio_copyout_result(reqp);
 835                         mutex_enter(&aiop->aio_mutex);
 836                         aio_req_free(aiop, reqp);
 837                         mutex_exit(&aiop->aio_mutex);
 838                 }
 839         }
 840 #endif  /* _SYSCALL32_IMPL */
 841         return (iocb_index);
 842 }
 843 
 844 /*
 845  * aio_reqlist_concat
 846  * moves "max" elements from the done queue to the reqlist queue and removes
 847  * the AIO_DONEQ flag.
 848  * - reqlist queue is a simple linked list
 849  * - done queue is a double linked list
 850  */
 851 
 852 static int
 853 aio_reqlist_concat(aio_t *aiop, aio_req_t **reqlist, int max)
 854 {
 855         aio_req_t *q2, *q2work, *list;
 856         int count = 0;
 857 
 858         list = *reqlist;
 859         q2 = aiop->aio_doneq;
 860         q2work = q2;
 861         while (max-- > 0) {
 862                 q2work->aio_req_flags &= ~AIO_DONEQ;
 863                 q2work = q2work->aio_req_next;
 864                 count++;
 865                 if (q2work == q2)
 866                         break;
 867         }
 868 
 869         if (q2work == q2) {
 870                 /* all elements revised */
 871                 q2->aio_req_prev->aio_req_next = list;
 872                 list = q2;
 873                 aiop->aio_doneq = NULL;
 874         } else {
 875                 /*
 876                  * max < elements in the doneq
 877                  * detach only the required amount of elements
 878                  * out of the doneq
 879                  */
 880                 q2work->aio_req_prev->aio_req_next = list;
 881                 list = q2;
 882 
 883                 aiop->aio_doneq = q2work;
 884                 q2work->aio_req_prev = q2->aio_req_prev;
 885                 q2->aio_req_prev->aio_req_next = q2work;
 886         }
 887         *reqlist = list;
 888         return (count);
 889 }
 890 
 891 /*ARGSUSED*/
 892 static int
 893 aiosuspend(
 894         void    *aiocb,
 895         int     nent,
 896         struct  timespec        *timout,
 897         int     flag,
 898         long    *rval,
 899         int     run_mode)
 900 {
 901         int             error;
 902         aio_t           *aiop;
 903         aio_req_t       *reqp, *found, *next;
 904         caddr_t         cbplist = NULL;
 905         aiocb_t         *cbp, **ucbp;
 906 #ifdef  _SYSCALL32_IMPL
 907         aiocb32_t       *cbp32;
 908         caddr32_t       *ucbp32;
 909 #endif  /* _SYSCALL32_IMPL */
 910         aiocb64_32_t    *cbp64;
 911         int             rv;
 912         int             i;
 913         size_t          ssize;
 914         model_t         model = get_udatamodel();
 915         int             blocking;
 916         int             timecheck;
 917         timestruc_t     rqtime;
 918         timestruc_t     *rqtp;
 919 
 920         aiop = curproc->p_aio;
 921         if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
 922                 return (EINVAL);
 923 
 924         /*
 925          * Establish the absolute future time for the timeout.
 926          */
 927         error = timespec2reltime(timout, &rqtime, &rqtp, &blocking);
 928         if (error)
 929                 return (error);
 930         if (rqtp) {
 931                 timestruc_t now;
 932                 timecheck = timechanged;
 933                 gethrestime(&now);
 934                 timespecadd(rqtp, &now);
 935         }
 936 
 937         /*
 938          * If we are not blocking and there's no IO complete
 939          * skip aiocb copyin.
 940          */
 941         if (!blocking && (aiop->aio_pollq == NULL) &&
 942             (aiop->aio_doneq == NULL)) {
 943                 return (EAGAIN);
 944         }
 945 
 946         if (model == DATAMODEL_NATIVE)
 947                 ssize = (sizeof (aiocb_t *) * nent);
 948 #ifdef  _SYSCALL32_IMPL
 949         else
 950                 ssize = (sizeof (caddr32_t) * nent);
 951 #endif  /* _SYSCALL32_IMPL */
 952 
 953         cbplist = kmem_alloc(ssize, KM_NOSLEEP);
 954         if (cbplist == NULL)
 955                 return (ENOMEM);
 956 
 957         if (copyin(aiocb, cbplist, ssize)) {
 958                 error = EFAULT;
 959                 goto done;
 960         }
 961 
 962         found = NULL;
 963         /*
 964          * we need to get the aio_cleanupq_mutex since we call
 965          * aio_req_done().
 966          */
 967         mutex_enter(&aiop->aio_cleanupq_mutex);
 968         mutex_enter(&aiop->aio_mutex);
 969         for (;;) {
 970                 /* push requests on poll queue to done queue */
 971                 if (aiop->aio_pollq) {
 972                         mutex_exit(&aiop->aio_mutex);
 973                         mutex_exit(&aiop->aio_cleanupq_mutex);
 974                         aio_cleanup(0);
 975                         mutex_enter(&aiop->aio_cleanupq_mutex);
 976                         mutex_enter(&aiop->aio_mutex);
 977                 }
 978                 /* check for requests on done queue */
 979                 if (aiop->aio_doneq) {
 980                         if (model == DATAMODEL_NATIVE)
 981                                 ucbp = (aiocb_t **)cbplist;
 982 #ifdef  _SYSCALL32_IMPL
 983                         else
 984                                 ucbp32 = (caddr32_t *)cbplist;
 985 #endif  /* _SYSCALL32_IMPL */
 986                         for (i = 0; i < nent; i++) {
 987                                 if (model == DATAMODEL_NATIVE) {
 988                                         if ((cbp = *ucbp++) == NULL)
 989                                                 continue;
 990                                         if (run_mode != AIO_LARGEFILE)
 991                                                 reqp = aio_req_done(
 992                                                     &cbp->aio_resultp);
 993                                         else {
 994                                                 cbp64 = (aiocb64_32_t *)cbp;
 995                                                 reqp = aio_req_done(
 996                                                     &cbp64->aio_resultp);
 997                                         }
 998                                 }
 999 #ifdef  _SYSCALL32_IMPL
1000                                 else {
1001                                         if (run_mode == AIO_32) {
1002                                                 if ((cbp32 =
1003                                                     (aiocb32_t *)(uintptr_t)
1004                                                     *ucbp32++) == NULL)
1005                                                         continue;
1006                                                 reqp = aio_req_done(
1007                                                     &cbp32->aio_resultp);
1008                                         } else if (run_mode == AIO_LARGEFILE) {
1009                                                 if ((cbp64 =
1010                                                     (aiocb64_32_t *)(uintptr_t)
1011                                                     *ucbp32++) == NULL)
1012                                                         continue;
1013                                                 reqp = aio_req_done(
1014                                                     &cbp64->aio_resultp);
1015                                         }
1016 
1017                                 }
1018 #endif  /* _SYSCALL32_IMPL */
1019                                 if (reqp) {
1020                                         reqp->aio_req_next = found;
1021                                         found = reqp;
1022                                 }
1023                                 if (aiop->aio_doneq == NULL)
1024                                         break;
1025                         }
1026                         if (found)
1027                                 break;
1028                 }
1029                 if (aiop->aio_notifycnt > 0) {
1030                         /*
1031                          * nothing on the kernel's queue. the user
1032                          * has notified the kernel that it has items
1033                          * on a user-level queue.
1034                          */
1035                         aiop->aio_notifycnt--;
1036                         *rval = 1;
1037                         error = 0;
1038                         break;
1039                 }
1040                 /* don't block if nothing is outstanding */
1041                 if (aiop->aio_outstanding == 0) {
1042                         error = EAGAIN;
1043                         break;
1044                 }
1045                 if (blocking) {
1046                         /*
1047                          * drop the aio_cleanupq_mutex as we are
1048                          * going to block.
1049                          */
1050                         mutex_exit(&aiop->aio_cleanupq_mutex);
1051                         rv = cv_waituntil_sig(&aiop->aio_waitcv,
1052                             &aiop->aio_mutex, rqtp, timecheck);
1053                         /*
1054                          * we have to drop aio_mutex and
1055                          * grab it in the right order.
1056                          */
1057                         mutex_exit(&aiop->aio_mutex);
1058                         mutex_enter(&aiop->aio_cleanupq_mutex);
1059                         mutex_enter(&aiop->aio_mutex);
1060                         if (rv > 0)  /* check done queue again */
1061                                 continue;
1062                         if (rv == 0)    /* interrupted by a signal */
1063                                 error = EINTR;
1064                         else            /* timer expired */
1065                                 error = ETIME;
1066                 } else {
1067                         error = EAGAIN;
1068                 }
1069                 break;
1070         }
1071         mutex_exit(&aiop->aio_mutex);
1072         mutex_exit(&aiop->aio_cleanupq_mutex);
1073         for (reqp = found; reqp != NULL; reqp = next) {
1074                 next = reqp->aio_req_next;
1075                 aphysio_unlock(reqp);
1076                 aio_copyout_result(reqp);
1077                 mutex_enter(&aiop->aio_mutex);
1078                 aio_req_free(aiop, reqp);
1079                 mutex_exit(&aiop->aio_mutex);
1080         }
1081 done:
1082         kmem_free(cbplist, ssize);
1083         return (error);
1084 }
1085 
1086 /*
1087  * initialize aio by allocating an aio_t struct for this
1088  * process.
1089  */
1090 static int
1091 aioinit(void)
1092 {
1093         proc_t *p = curproc;
1094         aio_t *aiop;
1095         mutex_enter(&p->p_lock);
1096         if ((aiop = p->p_aio) == NULL) {
1097                 aiop = aio_aiop_alloc();
1098                 p->p_aio = aiop;
1099         }
1100         mutex_exit(&p->p_lock);
1101         if (aiop == NULL)
1102                 return (ENOMEM);
1103         return (0);
1104 }
1105 
1106 /*
1107  * start a special thread that will cleanup after aio requests
1108  * that are preventing a segment from being unmapped. as_unmap()
1109  * blocks until all phsyio to this segment is completed. this
1110  * doesn't happen until all the pages in this segment are not
1111  * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
1112  * requests still outstanding. this special thread will make sure
1113  * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
1114  *
1115  * this function will return an error if the process has only
1116  * one LWP. the assumption is that the caller is a separate LWP
1117  * that remains blocked in the kernel for the life of this process.
1118  */
1119 static int
1120 aiostart(void)
1121 {
1122         proc_t *p = curproc;
1123         aio_t *aiop;
1124         int first, error = 0;
1125 
1126         if (p->p_lwpcnt == 1)
1127                 return (EDEADLK);
1128         mutex_enter(&p->p_lock);
1129         if ((aiop = p->p_aio) == NULL)
1130                 error = EINVAL;
1131         else {
1132                 first = aiop->aio_ok;
1133                 if (aiop->aio_ok == 0)
1134                         aiop->aio_ok = 1;
1135         }
1136         mutex_exit(&p->p_lock);
1137         if (error == 0 && first == 0) {
1138                 return (aio_cleanup_thread(aiop));
1139                 /* should return only to exit */
1140         }
1141         return (error);
1142 }
1143 
1144 /*
1145  * Associate an aiocb with a port.
1146  * This function is used by aiorw() to associate a transaction with a port.
1147  * Allocate an event port structure (port_alloc_event()) and store the
1148  * delivered user pointer (portnfy_user) in the portkev_user field of the
1149  * port_kevent_t structure..
1150  * The aio_req_portkev pointer in the aio_req_t structure was added to identify
1151  * the port association.
1152  */
1153 
1154 static int
1155 aio_req_assoc_port_rw(port_notify_t *pntfy, aiocb_t *cbp,
1156         aio_req_t *reqp, int event)
1157 {
1158         port_kevent_t   *pkevp = NULL;
1159         int             error;
1160 
1161         error = port_alloc_event(pntfy->portnfy_port, PORT_ALLOC_DEFAULT,
1162             PORT_SOURCE_AIO, &pkevp);
1163         if (error) {
1164                 if ((error == ENOMEM) || (error == EAGAIN))
1165                         error = EAGAIN;
1166                 else
1167                         error = EINVAL;
1168         } else {
1169                 port_init_event(pkevp, (uintptr_t)cbp, pntfy->portnfy_user,
1170                     aio_port_callback, reqp);
1171                 pkevp->portkev_events = event;
1172                 reqp->aio_req_portkev = pkevp;
1173                 reqp->aio_req_port = pntfy->portnfy_port;
1174         }
1175         return (error);
1176 }
1177 
1178 #ifdef _LP64
1179 
1180 /*
1181  * Asynchronous list IO. A chain of aiocb's are copied in
1182  * one at a time. If the aiocb is invalid, it is skipped.
1183  * For each aiocb, the appropriate driver entry point is
1184  * called. Optimize for the common case where the list
1185  * of requests is to the same file descriptor.
1186  *
1187  * One possible optimization is to define a new driver entry
1188  * point that supports a list of IO requests. Whether this
1189  * improves performance depends somewhat on the driver's
1190  * locking strategy. Processing a list could adversely impact
1191  * the driver's interrupt latency.
1192  */
1193 static int
1194 alio(
1195         int             mode_arg,
1196         aiocb_t         **aiocb_arg,
1197         int             nent,
1198         struct sigevent *sigev)
1199 {
1200         file_t          *fp;
1201         file_t          *prev_fp = NULL;
1202         int             prev_mode = -1;
1203         struct vnode    *vp;
1204         aio_lio_t       *head;
1205         aio_req_t       *reqp;
1206         aio_t           *aiop;
1207         caddr_t         cbplist;
1208         aiocb_t         cb;
1209         aiocb_t         *aiocb = &cb;
1210         aiocb_t         *cbp;
1211         aiocb_t         **ucbp;
1212         struct sigevent sigevk;
1213         sigqueue_t      *sqp;
1214         int             (*aio_func)();
1215         int             mode;
1216         int             error = 0;
1217         int             aio_errors = 0;
1218         int             i;
1219         size_t          ssize;
1220         int             deadhead = 0;
1221         int             aio_notsupported = 0;
1222         int             lio_head_port;
1223         int             aio_port;
1224         int             aio_thread;
1225         port_kevent_t   *pkevtp = NULL;
1226         int             portused = 0;
1227         port_notify_t   pnotify;
1228         int             event;
1229 
1230         aiop = curproc->p_aio;
1231         if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1232                 return (EINVAL);
1233 
1234         ssize = (sizeof (aiocb_t *) * nent);
1235         cbplist = kmem_alloc(ssize, KM_SLEEP);
1236         ucbp = (aiocb_t **)cbplist;
1237 
1238         if (copyin(aiocb_arg, cbplist, ssize) ||
1239             (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent)))) {
1240                 kmem_free(cbplist, ssize);
1241                 return (EFAULT);
1242         }
1243 
1244         /* Event Ports  */
1245         if (sigev &&
1246             (sigevk.sigev_notify == SIGEV_THREAD ||
1247             sigevk.sigev_notify == SIGEV_PORT)) {
1248                 if (sigevk.sigev_notify == SIGEV_THREAD) {
1249                         pnotify.portnfy_port = sigevk.sigev_signo;
1250                         pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
1251                 } else if (copyin(sigevk.sigev_value.sival_ptr,
1252                     &pnotify, sizeof (pnotify))) {
1253                         kmem_free(cbplist, ssize);
1254                         return (EFAULT);
1255                 }
1256                 error = port_alloc_event(pnotify.portnfy_port,
1257                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
1258                 if (error) {
1259                         if (error == ENOMEM || error == EAGAIN)
1260                                 error = EAGAIN;
1261                         else
1262                                 error = EINVAL;
1263                         kmem_free(cbplist, ssize);
1264                         return (error);
1265                 }
1266                 lio_head_port = pnotify.portnfy_port;
1267                 portused = 1;
1268         }
1269 
1270         /*
1271          * a list head should be allocated if notification is
1272          * enabled for this list.
1273          */
1274         head = NULL;
1275 
1276         if (mode_arg == LIO_WAIT || sigev) {
1277                 mutex_enter(&aiop->aio_mutex);
1278                 error = aio_lio_alloc(&head);
1279                 mutex_exit(&aiop->aio_mutex);
1280                 if (error)
1281                         goto done;
1282                 deadhead = 1;
1283                 head->lio_nent = nent;
1284                 head->lio_refcnt = nent;
1285                 head->lio_port = -1;
1286                 head->lio_portkev = NULL;
1287                 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
1288                     sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
1289                         sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
1290                         if (sqp == NULL) {
1291                                 error = EAGAIN;
1292                                 goto done;
1293                         }
1294                         sqp->sq_func = NULL;
1295                         sqp->sq_next = NULL;
1296                         sqp->sq_info.si_code = SI_ASYNCIO;
1297                         sqp->sq_info.si_pid = curproc->p_pid;
1298                         sqp->sq_info.si_ctid = PRCTID(curproc);
1299                         sqp->sq_info.si_zoneid = getzoneid();
1300                         sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
1301                         sqp->sq_info.si_signo = sigevk.sigev_signo;
1302                         sqp->sq_info.si_value = sigevk.sigev_value;
1303                         head->lio_sigqp = sqp;
1304                 } else {
1305                         head->lio_sigqp = NULL;
1306                 }
1307                 if (pkevtp) {
1308                         /*
1309                          * Prepare data to send when list of aiocb's
1310                          * has completed.
1311                          */
1312                         port_init_event(pkevtp, (uintptr_t)sigev,
1313                             (void *)(uintptr_t)pnotify.portnfy_user,
1314                             NULL, head);
1315                         pkevtp->portkev_events = AIOLIO;
1316                         head->lio_portkev = pkevtp;
1317                         head->lio_port = pnotify.portnfy_port;
1318                 }
1319         }
1320 
1321         for (i = 0; i < nent; i++, ucbp++) {
1322 
1323                 cbp = *ucbp;
1324                 /* skip entry if it can't be copied. */
1325                 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
1326                         if (head) {
1327                                 mutex_enter(&aiop->aio_mutex);
1328                                 head->lio_nent--;
1329                                 head->lio_refcnt--;
1330                                 mutex_exit(&aiop->aio_mutex);
1331                         }
1332                         continue;
1333                 }
1334 
1335                 /* skip if opcode for aiocb is LIO_NOP */
1336                 mode = aiocb->aio_lio_opcode;
1337                 if (mode == LIO_NOP) {
1338                         cbp = NULL;
1339                         if (head) {
1340                                 mutex_enter(&aiop->aio_mutex);
1341                                 head->lio_nent--;
1342                                 head->lio_refcnt--;
1343                                 mutex_exit(&aiop->aio_mutex);
1344                         }
1345                         continue;
1346                 }
1347 
1348                 /* increment file descriptor's ref count. */
1349                 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
1350                         lio_set_uerror(&cbp->aio_resultp, EBADF);
1351                         if (head) {
1352                                 mutex_enter(&aiop->aio_mutex);
1353                                 head->lio_nent--;
1354                                 head->lio_refcnt--;
1355                                 mutex_exit(&aiop->aio_mutex);
1356                         }
1357                         aio_errors++;
1358                         continue;
1359                 }
1360 
1361                 /*
1362                  * check the permission of the partition
1363                  */
1364                 if ((fp->f_flag & mode) == 0) {
1365                         releasef(aiocb->aio_fildes);
1366                         lio_set_uerror(&cbp->aio_resultp, EBADF);
1367                         if (head) {
1368                                 mutex_enter(&aiop->aio_mutex);
1369                                 head->lio_nent--;
1370                                 head->lio_refcnt--;
1371                                 mutex_exit(&aiop->aio_mutex);
1372                         }
1373                         aio_errors++;
1374                         continue;
1375                 }
1376 
1377                 /*
1378                  * common case where requests are to the same fd
1379                  * for the same r/w operation.
1380                  * for UFS, need to set EBADFD
1381                  */
1382                 vp = fp->f_vnode;
1383                 if (fp != prev_fp || mode != prev_mode) {
1384                         aio_func = check_vp(vp, mode);
1385                         if (aio_func == NULL) {
1386                                 prev_fp = NULL;
1387                                 releasef(aiocb->aio_fildes);
1388                                 lio_set_uerror(&cbp->aio_resultp, EBADFD);
1389                                 aio_notsupported++;
1390                                 if (head) {
1391                                         mutex_enter(&aiop->aio_mutex);
1392                                         head->lio_nent--;
1393                                         head->lio_refcnt--;
1394                                         mutex_exit(&aiop->aio_mutex);
1395                                 }
1396                                 continue;
1397                         } else {
1398                                 prev_fp = fp;
1399                                 prev_mode = mode;
1400                         }
1401                 }
1402 
1403                 error = aio_req_setup(&reqp, aiop, aiocb,
1404                     &cbp->aio_resultp, vp, 0);
1405                 if (error) {
1406                         releasef(aiocb->aio_fildes);
1407                         lio_set_uerror(&cbp->aio_resultp, error);
1408                         if (head) {
1409                                 mutex_enter(&aiop->aio_mutex);
1410                                 head->lio_nent--;
1411                                 head->lio_refcnt--;
1412                                 mutex_exit(&aiop->aio_mutex);
1413                         }
1414                         aio_errors++;
1415                         continue;
1416                 }
1417 
1418                 reqp->aio_req_lio = head;
1419                 deadhead = 0;
1420 
1421                 /*
1422                  * Set the errno field now before sending the request to
1423                  * the driver to avoid a race condition
1424                  */
1425                 (void) suword32(&cbp->aio_resultp.aio_errno,
1426                     EINPROGRESS);
1427 
1428                 reqp->aio_req_iocb.iocb = (caddr_t)cbp;
1429 
1430                 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
1431                 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
1432                 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
1433                 if (aio_port | aio_thread) {
1434                         port_kevent_t *lpkevp;
1435                         /*
1436                          * Prepare data to send with each aiocb completed.
1437                          */
1438                         if (aio_port) {
1439                                 void *paddr =
1440                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
1441                                 if (copyin(paddr, &pnotify, sizeof (pnotify)))
1442                                         error = EFAULT;
1443                         } else {        /* aio_thread */
1444                                 pnotify.portnfy_port =
1445                                     aiocb->aio_sigevent.sigev_signo;
1446                                 pnotify.portnfy_user =
1447                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
1448                         }
1449                         if (error)
1450                                 /* EMPTY */;
1451                         else if (pkevtp != NULL &&
1452                             pnotify.portnfy_port == lio_head_port)
1453                                 error = port_dup_event(pkevtp, &lpkevp,
1454                                     PORT_ALLOC_DEFAULT);
1455                         else
1456                                 error = port_alloc_event(pnotify.portnfy_port,
1457                                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
1458                                     &lpkevp);
1459                         if (error == 0) {
1460                                 port_init_event(lpkevp, (uintptr_t)cbp,
1461                                     (void *)(uintptr_t)pnotify.portnfy_user,
1462                                     aio_port_callback, reqp);
1463                                 lpkevp->portkev_events = event;
1464                                 reqp->aio_req_portkev = lpkevp;
1465                                 reqp->aio_req_port = pnotify.portnfy_port;
1466                         }
1467                 }
1468 
1469                 /*
1470                  * send the request to driver.
1471                  */
1472                 if (error == 0) {
1473                         if (aiocb->aio_nbytes == 0) {
1474                                 clear_active_fd(aiocb->aio_fildes);
1475                                 aio_zerolen(reqp);
1476                                 continue;
1477                         }
1478                         error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
1479                             CRED());
1480                 }
1481 
1482                 /*
1483                  * the fd's ref count is not decremented until the IO has
1484                  * completed unless there was an error.
1485                  */
1486                 if (error) {
1487                         releasef(aiocb->aio_fildes);
1488                         lio_set_uerror(&cbp->aio_resultp, error);
1489                         if (head) {
1490                                 mutex_enter(&aiop->aio_mutex);
1491                                 head->lio_nent--;
1492                                 head->lio_refcnt--;
1493                                 mutex_exit(&aiop->aio_mutex);
1494                         }
1495                         if (error == ENOTSUP)
1496                                 aio_notsupported++;
1497                         else
1498                                 aio_errors++;
1499                         lio_set_error(reqp, portused);
1500                 } else {
1501                         clear_active_fd(aiocb->aio_fildes);
1502                 }
1503         }
1504 
1505         if (aio_notsupported) {
1506                 error = ENOTSUP;
1507         } else if (aio_errors) {
1508                 /*
1509                  * return EIO if any request failed
1510                  */
1511                 error = EIO;
1512         }
1513 
1514         if (mode_arg == LIO_WAIT) {
1515                 mutex_enter(&aiop->aio_mutex);
1516                 while (head->lio_refcnt > 0) {
1517                         if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1518                                 mutex_exit(&aiop->aio_mutex);
1519                                 error = EINTR;
1520                                 goto done;
1521                         }
1522                 }
1523                 mutex_exit(&aiop->aio_mutex);
1524                 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_64);
1525         }
1526 
1527 done:
1528         kmem_free(cbplist, ssize);
1529         if (deadhead) {
1530                 if (head->lio_sigqp)
1531                         kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
1532                 if (head->lio_portkev)
1533                         port_free_event(head->lio_portkev);
1534                 kmem_free(head, sizeof (aio_lio_t));
1535         }
1536         return (error);
1537 }
1538 
1539 #endif /* _LP64 */
1540 
1541 /*
1542  * Asynchronous list IO.
1543  * If list I/O is called with LIO_WAIT it can still return
1544  * before all the I/O's are completed if a signal is caught
1545  * or if the list include UFS I/O requests. If this happens,
1546  * libaio will call aliowait() to wait for the I/O's to
1547  * complete
1548  */
1549 /*ARGSUSED*/
1550 static int
1551 aliowait(
1552         int     mode,
1553         void    *aiocb,
1554         int     nent,
1555         void    *sigev,
1556         int     run_mode)
1557 {
1558         aio_lio_t       *head;
1559         aio_t           *aiop;
1560         caddr_t         cbplist;
1561         aiocb_t         *cbp, **ucbp;
1562 #ifdef  _SYSCALL32_IMPL
1563         aiocb32_t       *cbp32;
1564         caddr32_t       *ucbp32;
1565         aiocb64_32_t    *cbp64;
1566 #endif
1567         int             error = 0;
1568         int             i;
1569         size_t          ssize = 0;
1570         model_t         model = get_udatamodel();
1571 
1572         aiop = curproc->p_aio;
1573         if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
1574                 return (EINVAL);
1575 
1576         if (model == DATAMODEL_NATIVE)
1577                 ssize = (sizeof (aiocb_t *) * nent);
1578 #ifdef  _SYSCALL32_IMPL
1579         else
1580                 ssize = (sizeof (caddr32_t) * nent);
1581 #endif  /* _SYSCALL32_IMPL */
1582 
1583         if (ssize == 0)
1584                 return (EINVAL);
1585 
1586         cbplist = kmem_alloc(ssize, KM_SLEEP);
1587 
1588         if (model == DATAMODEL_NATIVE)
1589                 ucbp = (aiocb_t **)cbplist;
1590 #ifdef  _SYSCALL32_IMPL
1591         else
1592                 ucbp32 = (caddr32_t *)cbplist;
1593 #endif  /* _SYSCALL32_IMPL */
1594 
1595         if (copyin(aiocb, cbplist, ssize)) {
1596                 error = EFAULT;
1597                 goto done;
1598         }
1599 
1600         /*
1601          * To find the list head, we go through the
1602          * list of aiocb structs, find the request
1603          * its for, then get the list head that reqp
1604          * points to
1605          */
1606         head = NULL;
1607 
1608         for (i = 0; i < nent; i++) {
1609                 if (model == DATAMODEL_NATIVE) {
1610                         /*
1611                          * Since we are only checking for a NULL pointer
1612                          * Following should work on both native data sizes
1613                          * as well as for largefile aiocb.
1614                          */
1615                         if ((cbp = *ucbp++) == NULL)
1616                                 continue;
1617                         if (run_mode != AIO_LARGEFILE)
1618                                 if (head = aio_list_get(&cbp->aio_resultp))
1619                                         break;
1620                         else {
1621                                 /*
1622                                  * This is a case when largefile call is
1623                                  * made on 32 bit kernel.
1624                                  * Treat each pointer as pointer to
1625                                  * aiocb64_32
1626                                  */
1627                                 if (head = aio_list_get((aio_result_t *)
1628                                     &(((aiocb64_32_t *)cbp)->aio_resultp)))
1629                                         break;
1630                         }
1631                 }
1632 #ifdef  _SYSCALL32_IMPL
1633                 else {
1634                         if (run_mode == AIO_LARGEFILE) {
1635                                 if ((cbp64 = (aiocb64_32_t *)
1636                                     (uintptr_t)*ucbp32++) == NULL)
1637                                         continue;
1638                                 if (head = aio_list_get((aio_result_t *)
1639                                     &cbp64->aio_resultp))
1640                                         break;
1641                         } else if (run_mode == AIO_32) {
1642                                 if ((cbp32 = (aiocb32_t *)
1643                                     (uintptr_t)*ucbp32++) == NULL)
1644                                         continue;
1645                                 if (head = aio_list_get((aio_result_t *)
1646                                     &cbp32->aio_resultp))
1647                                         break;
1648                         }
1649                 }
1650 #endif  /* _SYSCALL32_IMPL */
1651         }
1652 
1653         if (head == NULL) {
1654                 error = EINVAL;
1655                 goto done;
1656         }
1657 
1658         mutex_enter(&aiop->aio_mutex);
1659         while (head->lio_refcnt > 0) {
1660                 if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
1661                         mutex_exit(&aiop->aio_mutex);
1662                         error = EINTR;
1663                         goto done;
1664                 }
1665         }
1666         mutex_exit(&aiop->aio_mutex);
1667         alio_cleanup(aiop, (aiocb_t **)cbplist, nent, run_mode);
1668 done:
1669         kmem_free(cbplist, ssize);
1670         return (error);
1671 }
1672 
1673 aio_lio_t *
1674 aio_list_get(aio_result_t *resultp)
1675 {
1676         aio_lio_t       *head = NULL;
1677         aio_t           *aiop;
1678         aio_req_t       **bucket;
1679         aio_req_t       *reqp;
1680         long            index;
1681 
1682         aiop = curproc->p_aio;
1683         if (aiop == NULL)
1684                 return (NULL);
1685 
1686         if (resultp) {
1687                 index = AIO_HASH(resultp);
1688                 bucket = &aiop->aio_hash[index];
1689                 for (reqp = *bucket; reqp != NULL;
1690                     reqp = reqp->aio_hash_next) {
1691                         if (reqp->aio_req_resultp == resultp) {
1692                                 head = reqp->aio_req_lio;
1693                                 return (head);
1694                         }
1695                 }
1696         }
1697         return (NULL);
1698 }
1699 
1700 
1701 static void
1702 lio_set_uerror(void *resultp, int error)
1703 {
1704         /*
1705          * the resultp field is a pointer to where the
1706          * error should be written out to the user's
1707          * aiocb.
1708          *
1709          */
1710         if (get_udatamodel() == DATAMODEL_NATIVE) {
1711                 (void) sulword(&((aio_result_t *)resultp)->aio_return,
1712                     (ssize_t)-1);
1713                 (void) suword32(&((aio_result_t *)resultp)->aio_errno, error);
1714         }
1715 #ifdef  _SYSCALL32_IMPL
1716         else {
1717                 (void) suword32(&((aio_result32_t *)resultp)->aio_return,
1718                     (uint_t)-1);
1719                 (void) suword32(&((aio_result32_t *)resultp)->aio_errno, error);
1720         }
1721 #endif  /* _SYSCALL32_IMPL */
1722 }
1723 
1724 /*
1725  * do cleanup completion for all requests in list. memory for
1726  * each request is also freed.
1727  */
1728 static void
1729 alio_cleanup(aio_t *aiop, aiocb_t **cbp, int nent, int run_mode)
1730 {
1731         int i;
1732         aio_req_t *reqp;
1733         aio_result_t *resultp;
1734         aiocb64_32_t *aiocb_64;
1735 
1736         for (i = 0; i < nent; i++) {
1737                 if (get_udatamodel() == DATAMODEL_NATIVE) {
1738                         if (cbp[i] == NULL)
1739                                 continue;
1740                         if (run_mode == AIO_LARGEFILE) {
1741                                 aiocb_64 = (aiocb64_32_t *)cbp[i];
1742                                 resultp = (aio_result_t *)
1743                                     &aiocb_64->aio_resultp;
1744                         } else
1745                                 resultp = &cbp[i]->aio_resultp;
1746                 }
1747 #ifdef  _SYSCALL32_IMPL
1748                 else {
1749                         aiocb32_t *aiocb_32;
1750                         caddr32_t *cbp32;
1751 
1752                         cbp32 = (caddr32_t *)cbp;
1753                         if (cbp32[i] == NULL)
1754                                 continue;
1755                         if (run_mode == AIO_32) {
1756                                 aiocb_32 = (aiocb32_t *)(uintptr_t)cbp32[i];
1757                                 resultp = (aio_result_t *)&aiocb_32->
1758                                     aio_resultp;
1759                         } else if (run_mode == AIO_LARGEFILE) {
1760                                 aiocb_64 = (aiocb64_32_t *)(uintptr_t)cbp32[i];
1761                                 resultp = (aio_result_t *)&aiocb_64->
1762                                     aio_resultp;
1763                         }
1764                 }
1765 #endif  /* _SYSCALL32_IMPL */
1766                 /*
1767                  * we need to get the aio_cleanupq_mutex since we call
1768                  * aio_req_done().
1769                  */
1770                 mutex_enter(&aiop->aio_cleanupq_mutex);
1771                 mutex_enter(&aiop->aio_mutex);
1772                 reqp = aio_req_done(resultp);
1773                 mutex_exit(&aiop->aio_mutex);
1774                 mutex_exit(&aiop->aio_cleanupq_mutex);
1775                 if (reqp != NULL) {
1776                         aphysio_unlock(reqp);
1777                         aio_copyout_result(reqp);
1778                         mutex_enter(&aiop->aio_mutex);
1779                         aio_req_free(aiop, reqp);
1780                         mutex_exit(&aiop->aio_mutex);
1781                 }
1782         }
1783 }
1784 
1785 /*
1786  * Write out the results for an aio request that is done.
1787  */
1788 static int
1789 aioerror(void *cb, int run_mode)
1790 {
1791         aio_result_t *resultp;
1792         aio_t *aiop;
1793         aio_req_t *reqp;
1794         int retval;
1795 
1796         aiop = curproc->p_aio;
1797         if (aiop == NULL || cb == NULL)
1798                 return (EINVAL);
1799 
1800         if (get_udatamodel() == DATAMODEL_NATIVE) {
1801                 if (run_mode == AIO_LARGEFILE)
1802                         resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1803                             aio_resultp;
1804                 else
1805                         resultp = &((aiocb_t *)cb)->aio_resultp;
1806         }
1807 #ifdef  _SYSCALL32_IMPL
1808         else {
1809                 if (run_mode == AIO_LARGEFILE)
1810                         resultp = (aio_result_t *)&((aiocb64_32_t *)cb)->
1811                             aio_resultp;
1812                 else if (run_mode == AIO_32)
1813                         resultp = (aio_result_t *)&((aiocb32_t *)cb)->
1814                             aio_resultp;
1815         }
1816 #endif  /* _SYSCALL32_IMPL */
1817         /*
1818          * we need to get the aio_cleanupq_mutex since we call
1819          * aio_req_find().
1820          */
1821         mutex_enter(&aiop->aio_cleanupq_mutex);
1822         mutex_enter(&aiop->aio_mutex);
1823         retval = aio_req_find(resultp, &reqp);
1824         mutex_exit(&aiop->aio_mutex);
1825         mutex_exit(&aiop->aio_cleanupq_mutex);
1826         if (retval == 0) {
1827                 aphysio_unlock(reqp);
1828                 aio_copyout_result(reqp);
1829                 mutex_enter(&aiop->aio_mutex);
1830                 aio_req_free(aiop, reqp);
1831                 mutex_exit(&aiop->aio_mutex);
1832                 return (0);
1833         } else if (retval == 1)
1834                 return (EINPROGRESS);
1835         else if (retval == 2)
1836                 return (EINVAL);
1837         return (0);
1838 }
1839 
1840 /*
1841  *      aio_cancel - if no requests outstanding,
1842  *                      return AIO_ALLDONE
1843  *                      else
1844  *                      return AIO_NOTCANCELED
1845  */
1846 static int
1847 aio_cancel(
1848         int     fildes,
1849         void    *cb,
1850         long    *rval,
1851         int     run_mode)
1852 {
1853         aio_t *aiop;
1854         void *resultp;
1855         int index;
1856         aio_req_t **bucket;
1857         aio_req_t *ent;
1858 
1859 
1860         /*
1861          * Verify valid file descriptor
1862          */
1863         if ((getf(fildes)) == NULL) {
1864                 return (EBADF);
1865         }
1866         releasef(fildes);
1867 
1868         aiop = curproc->p_aio;
1869         if (aiop == NULL)
1870                 return (EINVAL);
1871 
1872         if (aiop->aio_outstanding == 0) {
1873                 *rval = AIO_ALLDONE;
1874                 return (0);
1875         }
1876 
1877         mutex_enter(&aiop->aio_mutex);
1878         if (cb != NULL) {
1879                 if (get_udatamodel() == DATAMODEL_NATIVE) {
1880                         if (run_mode == AIO_LARGEFILE)
1881                                 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1882                                     ->aio_resultp;
1883                         else
1884                                 resultp = &((aiocb_t *)cb)->aio_resultp;
1885                 }
1886 #ifdef  _SYSCALL32_IMPL
1887                 else {
1888                         if (run_mode == AIO_LARGEFILE)
1889                                 resultp = (aio_result_t *)&((aiocb64_32_t *)cb)
1890                                     ->aio_resultp;
1891                         else if (run_mode == AIO_32)
1892                                 resultp = (aio_result_t *)&((aiocb32_t *)cb)
1893                                     ->aio_resultp;
1894                 }
1895 #endif  /* _SYSCALL32_IMPL */
1896                 index = AIO_HASH(resultp);
1897                 bucket = &aiop->aio_hash[index];
1898                 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1899                         if (ent->aio_req_resultp == resultp) {
1900                                 if ((ent->aio_req_flags & AIO_PENDING) == 0) {
1901                                         mutex_exit(&aiop->aio_mutex);
1902                                         *rval = AIO_ALLDONE;
1903                                         return (0);
1904                                 }
1905                                 mutex_exit(&aiop->aio_mutex);
1906                                 *rval = AIO_NOTCANCELED;
1907                                 return (0);
1908                         }
1909                 }
1910                 mutex_exit(&aiop->aio_mutex);
1911                 *rval = AIO_ALLDONE;
1912                 return (0);
1913         }
1914 
1915         for (index = 0; index < AIO_HASHSZ; index++) {
1916                 bucket = &aiop->aio_hash[index];
1917                 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
1918                         if (ent->aio_req_fd == fildes) {
1919                                 if ((ent->aio_req_flags & AIO_PENDING) != 0) {
1920                                         mutex_exit(&aiop->aio_mutex);
1921                                         *rval = AIO_NOTCANCELED;
1922                                         return (0);
1923                                 }
1924                         }
1925                 }
1926         }
1927         mutex_exit(&aiop->aio_mutex);
1928         *rval = AIO_ALLDONE;
1929         return (0);
1930 }
1931 
1932 /*
1933  * solaris version of asynchronous read and write
1934  */
1935 static int
1936 arw(
1937         int     opcode,
1938         int     fdes,
1939         char    *bufp,
1940         int     bufsize,
1941         offset_t        offset,
1942         aio_result_t    *resultp,
1943         int             mode)
1944 {
1945         file_t          *fp;
1946         int             error;
1947         struct vnode    *vp;
1948         aio_req_t       *reqp;
1949         aio_t           *aiop;
1950         int             (*aio_func)();
1951 #ifdef _LP64
1952         aiocb_t         aiocb;
1953 #else
1954         aiocb64_32_t    aiocb64;
1955 #endif
1956 
1957         aiop = curproc->p_aio;
1958         if (aiop == NULL)
1959                 return (EINVAL);
1960 
1961         if ((fp = getf(fdes)) == NULL) {
1962                 return (EBADF);
1963         }
1964 
1965         /*
1966          * check the permission of the partition
1967          */
1968         if ((fp->f_flag & mode) == 0) {
1969                 releasef(fdes);
1970                 return (EBADF);
1971         }
1972 
1973         vp = fp->f_vnode;
1974         aio_func = check_vp(vp, mode);
1975         if (aio_func == NULL) {
1976                 releasef(fdes);
1977                 return (EBADFD);
1978         }
1979 #ifdef _LP64
1980         aiocb.aio_fildes = fdes;
1981         aiocb.aio_buf = bufp;
1982         aiocb.aio_nbytes = bufsize;
1983         aiocb.aio_offset = offset;
1984         aiocb.aio_sigevent.sigev_notify = 0;
1985         error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 1);
1986 #else
1987         aiocb64.aio_fildes = fdes;
1988         aiocb64.aio_buf = (caddr32_t)bufp;
1989         aiocb64.aio_nbytes = bufsize;
1990         aiocb64.aio_offset = offset;
1991         aiocb64.aio_sigevent.sigev_notify = 0;
1992         error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 1);
1993 #endif
1994         if (error) {
1995                 releasef(fdes);
1996                 return (error);
1997         }
1998 
1999         /*
2000          * enable polling on this request if the opcode has
2001          * the AIO poll bit set
2002          */
2003         if (opcode & AIO_POLL_BIT)
2004                 reqp->aio_req_flags |= AIO_POLL;
2005 
2006         if (bufsize == 0) {
2007                 clear_active_fd(fdes);
2008                 aio_zerolen(reqp);
2009                 return (0);
2010         }
2011         /*
2012          * send the request to driver.
2013          */
2014         error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2015         /*
2016          * the fd is stored in the aio_req_t by aio_req_setup(), and
2017          * is released by the aio_cleanup_thread() when the IO has
2018          * completed.
2019          */
2020         if (error) {
2021                 releasef(fdes);
2022                 mutex_enter(&aiop->aio_mutex);
2023                 aio_req_free(aiop, reqp);
2024                 aiop->aio_pending--;
2025                 if (aiop->aio_flags & AIO_REQ_BLOCK)
2026                         cv_signal(&aiop->aio_cleanupcv);
2027                 mutex_exit(&aiop->aio_mutex);
2028                 return (error);
2029         }
2030         clear_active_fd(fdes);
2031         return (0);
2032 }
2033 
2034 /*
2035  * posix version of asynchronous read and write
2036  */
2037 static int
2038 aiorw(
2039         int             opcode,
2040         void            *aiocb_arg,
2041         int             mode,
2042         int             run_mode)
2043 {
2044 #ifdef _SYSCALL32_IMPL
2045         aiocb32_t       aiocb32;
2046         struct  sigevent32 *sigev32;
2047         port_notify32_t pntfy32;
2048 #endif
2049         aiocb64_32_t    aiocb64;
2050         aiocb_t         aiocb;
2051         file_t          *fp;
2052         int             error, fd;
2053         size_t          bufsize;
2054         struct vnode    *vp;
2055         aio_req_t       *reqp;
2056         aio_t           *aiop;
2057         int             (*aio_func)();
2058         aio_result_t    *resultp;
2059         struct  sigevent *sigev;
2060         model_t         model;
2061         int             aio_use_port = 0;
2062         port_notify_t   pntfy;
2063 
2064         model = get_udatamodel();
2065         aiop = curproc->p_aio;
2066         if (aiop == NULL)
2067                 return (EINVAL);
2068 
2069         if (model == DATAMODEL_NATIVE) {
2070                 if (run_mode != AIO_LARGEFILE) {
2071                         if (copyin(aiocb_arg, &aiocb, sizeof (aiocb_t)))
2072                                 return (EFAULT);
2073                         bufsize = aiocb.aio_nbytes;
2074                         resultp = &(((aiocb_t *)aiocb_arg)->aio_resultp);
2075                         if ((fp = getf(fd = aiocb.aio_fildes)) == NULL) {
2076                                 return (EBADF);
2077                         }
2078                         sigev = &aiocb.aio_sigevent;
2079                 } else {
2080                         /*
2081                          * We come here only when we make largefile
2082                          * call on 32 bit kernel using 32 bit library.
2083                          */
2084                         if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2085                                 return (EFAULT);
2086                         bufsize = aiocb64.aio_nbytes;
2087                         resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2088                             ->aio_resultp);
2089                         if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2090                                 return (EBADF);
2091                         sigev = (struct sigevent *)&aiocb64.aio_sigevent;
2092                 }
2093 
2094                 if (sigev->sigev_notify == SIGEV_PORT) {
2095                         if (copyin((void *)sigev->sigev_value.sival_ptr,
2096                             &pntfy, sizeof (port_notify_t))) {
2097                                 releasef(fd);
2098                                 return (EFAULT);
2099                         }
2100                         aio_use_port = 1;
2101                 } else if (sigev->sigev_notify == SIGEV_THREAD) {
2102                         pntfy.portnfy_port = aiocb.aio_sigevent.sigev_signo;
2103                         pntfy.portnfy_user =
2104                             aiocb.aio_sigevent.sigev_value.sival_ptr;
2105                         aio_use_port = 1;
2106                 }
2107         }
2108 #ifdef  _SYSCALL32_IMPL
2109         else {
2110                 if (run_mode == AIO_32) {
2111                         /* 32 bit system call is being made on 64 bit kernel */
2112                         if (copyin(aiocb_arg, &aiocb32, sizeof (aiocb32_t)))
2113                                 return (EFAULT);
2114 
2115                         bufsize = aiocb32.aio_nbytes;
2116                         aiocb_32ton(&aiocb32, &aiocb);
2117                         resultp = (aio_result_t *)&(((aiocb32_t *)aiocb_arg)->
2118                             aio_resultp);
2119                         if ((fp = getf(fd = aiocb32.aio_fildes)) == NULL) {
2120                                 return (EBADF);
2121                         }
2122                         sigev32 = &aiocb32.aio_sigevent;
2123                 } else if (run_mode == AIO_LARGEFILE) {
2124                         /*
2125                          * We come here only when we make largefile
2126                          * call on 64 bit kernel using 32 bit library.
2127                          */
2128                         if (copyin(aiocb_arg, &aiocb64, sizeof (aiocb64_32_t)))
2129                                 return (EFAULT);
2130                         bufsize = aiocb64.aio_nbytes;
2131                         aiocb_LFton(&aiocb64, &aiocb);
2132                         resultp = (aio_result_t *)&(((aiocb64_32_t *)aiocb_arg)
2133                             ->aio_resultp);
2134                         if ((fp = getf(fd = aiocb64.aio_fildes)) == NULL)
2135                                 return (EBADF);
2136                         sigev32 = &aiocb64.aio_sigevent;
2137                 }
2138 
2139                 if (sigev32->sigev_notify == SIGEV_PORT) {
2140                         if (copyin(
2141                             (void *)(uintptr_t)sigev32->sigev_value.sival_ptr,
2142                             &pntfy32, sizeof (port_notify32_t))) {
2143                                 releasef(fd);
2144                                 return (EFAULT);
2145                         }
2146                         pntfy.portnfy_port = pntfy32.portnfy_port;
2147                         pntfy.portnfy_user = (void *)(uintptr_t)
2148                             pntfy32.portnfy_user;
2149                         aio_use_port = 1;
2150                 } else if (sigev32->sigev_notify == SIGEV_THREAD) {
2151                         pntfy.portnfy_port = sigev32->sigev_signo;
2152                         pntfy.portnfy_user = (void *)(uintptr_t)
2153                             sigev32->sigev_value.sival_ptr;
2154                         aio_use_port = 1;
2155                 }
2156         }
2157 #endif  /* _SYSCALL32_IMPL */
2158 
2159         /*
2160          * check the permission of the partition
2161          */
2162 
2163         if ((fp->f_flag & mode) == 0) {
2164                 releasef(fd);
2165                 return (EBADF);
2166         }
2167 
2168         vp = fp->f_vnode;
2169         aio_func = check_vp(vp, mode);
2170         if (aio_func == NULL) {
2171                 releasef(fd);
2172                 return (EBADFD);
2173         }
2174         if (run_mode == AIO_LARGEFILE)
2175                 error = aio_req_setupLF(&reqp, aiop, &aiocb64, resultp, vp, 0);
2176         else
2177                 error = aio_req_setup(&reqp, aiop, &aiocb, resultp, vp, 0);
2178 
2179         if (error) {
2180                 releasef(fd);
2181                 return (error);
2182         }
2183         /*
2184          * enable polling on this request if the opcode has
2185          * the AIO poll bit set
2186          */
2187         if (opcode & AIO_POLL_BIT)
2188                 reqp->aio_req_flags |= AIO_POLL;
2189 
2190         if (model == DATAMODEL_NATIVE)
2191                 reqp->aio_req_iocb.iocb = aiocb_arg;
2192 #ifdef  _SYSCALL32_IMPL
2193         else
2194                 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)aiocb_arg;
2195 #endif
2196 
2197         if (aio_use_port) {
2198                 int event = (run_mode == AIO_LARGEFILE)?
2199                     ((mode == FREAD)? AIOAREAD64 : AIOAWRITE64) :
2200                     ((mode == FREAD)? AIOAREAD : AIOAWRITE);
2201                 error = aio_req_assoc_port_rw(&pntfy, aiocb_arg, reqp, event);
2202         }
2203 
2204         /*
2205          * send the request to driver.
2206          */
2207         if (error == 0) {
2208                 if (bufsize == 0) {
2209                         clear_active_fd(fd);
2210                         aio_zerolen(reqp);
2211                         return (0);
2212                 }
2213                 error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req, CRED());
2214         }
2215 
2216         /*
2217          * the fd is stored in the aio_req_t by aio_req_setup(), and
2218          * is released by the aio_cleanup_thread() when the IO has
2219          * completed.
2220          */
2221         if (error) {
2222                 releasef(fd);
2223                 mutex_enter(&aiop->aio_mutex);
2224                 if (aio_use_port)
2225                         aio_deq(&aiop->aio_portpending, reqp);
2226                 aio_req_free(aiop, reqp);
2227                 aiop->aio_pending--;
2228                 if (aiop->aio_flags & AIO_REQ_BLOCK)
2229                         cv_signal(&aiop->aio_cleanupcv);
2230                 mutex_exit(&aiop->aio_mutex);
2231                 return (error);
2232         }
2233         clear_active_fd(fd);
2234         return (0);
2235 }
2236 
2237 
2238 /*
2239  * set error for a list IO entry that failed.
2240  */
2241 static void
2242 lio_set_error(aio_req_t *reqp, int portused)
2243 {
2244         aio_t *aiop = curproc->p_aio;
2245 
2246         if (aiop == NULL)
2247                 return;
2248 
2249         mutex_enter(&aiop->aio_mutex);
2250         if (portused)
2251                 aio_deq(&aiop->aio_portpending, reqp);
2252         aiop->aio_pending--;
2253         /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
2254         reqp->aio_req_flags |= AIO_PHYSIODONE;
2255         /*
2256          * Need to free the request now as its never
2257          * going to get on the done queue
2258          *
2259          * Note: aio_outstanding is decremented in
2260          *       aio_req_free()
2261          */
2262         aio_req_free(aiop, reqp);
2263         if (aiop->aio_flags & AIO_REQ_BLOCK)
2264                 cv_signal(&aiop->aio_cleanupcv);
2265         mutex_exit(&aiop->aio_mutex);
2266 }
2267 
2268 /*
2269  * check if a specified request is done, and remove it from
2270  * the done queue. otherwise remove anybody from the done queue
2271  * if NULL is specified.
2272  */
2273 static aio_req_t *
2274 aio_req_done(void *resultp)
2275 {
2276         aio_req_t **bucket;
2277         aio_req_t *ent;
2278         aio_t *aiop = curproc->p_aio;
2279         long index;
2280 
2281         ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2282         ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2283 
2284         if (resultp) {
2285                 index = AIO_HASH(resultp);
2286                 bucket = &aiop->aio_hash[index];
2287                 for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2288                         if (ent->aio_req_resultp == (aio_result_t *)resultp) {
2289                                 if (ent->aio_req_flags & AIO_DONEQ) {
2290                                         return (aio_req_remove(ent));
2291                                 }
2292                                 return (NULL);
2293                         }
2294                 }
2295                 /* no match, resultp is invalid */
2296                 return (NULL);
2297         }
2298         return (aio_req_remove(NULL));
2299 }
2300 
2301 /*
2302  * determine if a user-level resultp pointer is associated with an
2303  * active IO request. Zero is returned when the request is done,
2304  * and the request is removed from the done queue. Only when the
2305  * return value is zero, is the "reqp" pointer valid. One is returned
2306  * when the request is inprogress. Two is returned when the request
2307  * is invalid.
2308  */
2309 static int
2310 aio_req_find(aio_result_t *resultp, aio_req_t **reqp)
2311 {
2312         aio_req_t **bucket;
2313         aio_req_t *ent;
2314         aio_t *aiop = curproc->p_aio;
2315         long index;
2316 
2317         ASSERT(MUTEX_HELD(&aiop->aio_cleanupq_mutex));
2318         ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2319 
2320         index = AIO_HASH(resultp);
2321         bucket = &aiop->aio_hash[index];
2322         for (ent = *bucket; ent != NULL; ent = ent->aio_hash_next) {
2323                 if (ent->aio_req_resultp == resultp) {
2324                         if (ent->aio_req_flags & AIO_DONEQ) {
2325                                 *reqp = aio_req_remove(ent);
2326                                 return (0);
2327                         }
2328                         return (1);
2329                 }
2330         }
2331         /* no match, resultp is invalid */
2332         return (2);
2333 }
2334 
2335 /*
2336  * remove a request from the done queue.
2337  */
2338 static aio_req_t *
2339 aio_req_remove(aio_req_t *reqp)
2340 {
2341         aio_t *aiop = curproc->p_aio;
2342 
2343         ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2344 
2345         if (reqp != NULL) {
2346                 ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2347                 if (reqp->aio_req_next == reqp) {
2348                         /* only one request on queue */
2349                         if (reqp ==  aiop->aio_doneq) {
2350                                 aiop->aio_doneq = NULL;
2351                         } else {
2352                                 ASSERT(reqp == aiop->aio_cleanupq);
2353                                 aiop->aio_cleanupq = NULL;
2354                         }
2355                 } else {
2356                         reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2357                         reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2358                         /*
2359                          * The request can be either on the aio_doneq or the
2360                          * aio_cleanupq
2361                          */
2362                         if (reqp == aiop->aio_doneq)
2363                                 aiop->aio_doneq = reqp->aio_req_next;
2364 
2365                         if (reqp == aiop->aio_cleanupq)
2366                                 aiop->aio_cleanupq = reqp->aio_req_next;
2367                 }
2368                 reqp->aio_req_flags &= ~AIO_DONEQ;
2369                 reqp->aio_req_next = NULL;
2370                 reqp->aio_req_prev = NULL;
2371         } else if ((reqp = aiop->aio_doneq) != NULL) {
2372                 ASSERT(reqp->aio_req_flags & AIO_DONEQ);
2373                 if (reqp == reqp->aio_req_next) {
2374                         /* only one request on queue */
2375                         aiop->aio_doneq = NULL;
2376                 } else {
2377                         reqp->aio_req_prev->aio_req_next = reqp->aio_req_next;
2378                         reqp->aio_req_next->aio_req_prev = reqp->aio_req_prev;
2379                         aiop->aio_doneq = reqp->aio_req_next;
2380                 }
2381                 reqp->aio_req_flags &= ~AIO_DONEQ;
2382                 reqp->aio_req_next = NULL;
2383                 reqp->aio_req_prev = NULL;
2384         }
2385         if (aiop->aio_doneq == NULL && (aiop->aio_flags & AIO_WAITN))
2386                 cv_broadcast(&aiop->aio_waitcv);
2387         return (reqp);
2388 }
2389 
2390 static int
2391 aio_req_setup(
2392         aio_req_t       **reqpp,
2393         aio_t           *aiop,
2394         aiocb_t         *arg,
2395         aio_result_t    *resultp,
2396         vnode_t         *vp,
2397         int             old_solaris_req)
2398 {
2399         sigqueue_t      *sqp = NULL;
2400         aio_req_t       *reqp;
2401         struct uio      *uio;
2402         struct sigevent *sigev;
2403         int             error;
2404 
2405         sigev = &arg->aio_sigevent;
2406         if (sigev->sigev_notify == SIGEV_SIGNAL &&
2407             sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
2408                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2409                 if (sqp == NULL)
2410                         return (EAGAIN);
2411                 sqp->sq_func = NULL;
2412                 sqp->sq_next = NULL;
2413                 sqp->sq_info.si_code = SI_ASYNCIO;
2414                 sqp->sq_info.si_pid = curproc->p_pid;
2415                 sqp->sq_info.si_ctid = PRCTID(curproc);
2416                 sqp->sq_info.si_zoneid = getzoneid();
2417                 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
2418                 sqp->sq_info.si_signo = sigev->sigev_signo;
2419                 sqp->sq_info.si_value = sigev->sigev_value;
2420         }
2421 
2422         mutex_enter(&aiop->aio_mutex);
2423 
2424         if (aiop->aio_flags & AIO_REQ_BLOCK) {
2425                 mutex_exit(&aiop->aio_mutex);
2426                 if (sqp)
2427                         kmem_free(sqp, sizeof (sigqueue_t));
2428                 return (EIO);
2429         }
2430         /*
2431          * get an aio_reqp from the free list or allocate one
2432          * from dynamic memory.
2433          */
2434         if (error = aio_req_alloc(&reqp, resultp)) {
2435                 mutex_exit(&aiop->aio_mutex);
2436                 if (sqp)
2437                         kmem_free(sqp, sizeof (sigqueue_t));
2438                 return (error);
2439         }
2440         aiop->aio_pending++;
2441         aiop->aio_outstanding++;
2442         reqp->aio_req_flags = AIO_PENDING;
2443         if (old_solaris_req) {
2444                 /* this is an old solaris aio request */
2445                 reqp->aio_req_flags |= AIO_SOLARIS;
2446                 aiop->aio_flags |= AIO_SOLARIS_REQ;
2447         }
2448         if (sigev->sigev_notify == SIGEV_THREAD ||
2449             sigev->sigev_notify == SIGEV_PORT)
2450                 aio_enq(&aiop->aio_portpending, reqp, 0);
2451         mutex_exit(&aiop->aio_mutex);
2452         /*
2453          * initialize aio request.
2454          */
2455         reqp->aio_req_fd = arg->aio_fildes;
2456         reqp->aio_req_sigqp = sqp;
2457         reqp->aio_req_iocb.iocb = NULL;
2458         reqp->aio_req_lio = NULL;
2459         reqp->aio_req_buf.b_file = vp;
2460         uio = reqp->aio_req.aio_uio;
2461         uio->uio_iovcnt = 1;
2462         uio->uio_iov->iov_base = (caddr_t)arg->aio_buf;
2463         uio->uio_iov->iov_len = arg->aio_nbytes;
2464         uio->uio_loffset = arg->aio_offset;
2465         *reqpp = reqp;
2466         return (0);
2467 }
2468 
2469 /*
2470  * Allocate p_aio struct.
2471  */
2472 static aio_t *
2473 aio_aiop_alloc(void)
2474 {
2475         aio_t   *aiop;
2476 
2477         ASSERT(MUTEX_HELD(&curproc->p_lock));
2478 
2479         aiop = kmem_zalloc(sizeof (struct aio), KM_NOSLEEP);
2480         if (aiop) {
2481                 mutex_init(&aiop->aio_mutex, NULL, MUTEX_DEFAULT, NULL);
2482                 mutex_init(&aiop->aio_cleanupq_mutex, NULL, MUTEX_DEFAULT,
2483                     NULL);
2484                 mutex_init(&aiop->aio_portq_mutex, NULL, MUTEX_DEFAULT, NULL);
2485         }
2486         return (aiop);
2487 }
2488 
2489 /*
2490  * Allocate an aio_req struct.
2491  */
2492 static int
2493 aio_req_alloc(aio_req_t **nreqp, aio_result_t *resultp)
2494 {
2495         aio_req_t *reqp;
2496         aio_t *aiop = curproc->p_aio;
2497 
2498         ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2499 
2500         if ((reqp = aiop->aio_free) != NULL) {
2501                 aiop->aio_free = reqp->aio_req_next;
2502                 bzero(reqp, sizeof (*reqp));
2503         } else {
2504                 /*
2505                  * Check whether memory is getting tight.
2506                  * This is a temporary mechanism to avoid memory
2507                  * exhaustion by a single process until we come up
2508                  * with a per process solution such as setrlimit().
2509                  */
2510                 if (freemem < desfree)
2511                         return (EAGAIN);
2512                 reqp = kmem_zalloc(sizeof (struct aio_req_t), KM_NOSLEEP);
2513                 if (reqp == NULL)
2514                         return (EAGAIN);
2515         }
2516         reqp->aio_req.aio_uio = &reqp->aio_req_uio;
2517         reqp->aio_req.aio_uio->uio_iov = &reqp->aio_req_iov;
2518         reqp->aio_req.aio_private = reqp;
2519         reqp->aio_req_buf.b_offset = -1;
2520         reqp->aio_req_resultp = resultp;
2521         if (aio_hash_insert(reqp, aiop)) {
2522                 reqp->aio_req_next = aiop->aio_free;
2523                 aiop->aio_free = reqp;
2524                 return (EBUSY);
2525         }
2526         *nreqp = reqp;
2527         return (0);
2528 }
2529 
2530 /*
2531  * Allocate an aio_lio_t struct.
2532  */
2533 static int
2534 aio_lio_alloc(aio_lio_t **head)
2535 {
2536         aio_lio_t *liop;
2537         aio_t *aiop = curproc->p_aio;
2538 
2539         ASSERT(MUTEX_HELD(&aiop->aio_mutex));
2540 
2541         if ((liop = aiop->aio_lio_free) != NULL) {
2542                 aiop->aio_lio_free = liop->lio_next;
2543         } else {
2544                 /*
2545                  * Check whether memory is getting tight.
2546                  * This is a temporary mechanism to avoid memory
2547                  * exhaustion by a single process until we come up
2548                  * with a per process solution such as setrlimit().
2549                  */
2550                 if (freemem < desfree)
2551                         return (EAGAIN);
2552 
2553                 liop = kmem_zalloc(sizeof (aio_lio_t), KM_NOSLEEP);
2554                 if (liop == NULL)
2555                         return (EAGAIN);
2556         }
2557         *head = liop;
2558         return (0);
2559 }
2560 
2561 /*
2562  * this is a special per-process thread that is only activated if
2563  * the process is unmapping a segment with outstanding aio. normally,
2564  * the process will have completed the aio before unmapping the
2565  * segment. If the process does unmap a segment with outstanding aio,
2566  * this special thread will guarentee that the locked pages due to
2567  * aphysio() are released, thereby permitting the segment to be
2568  * unmapped. In addition to this, the cleanup thread is woken up
2569  * during DR operations to release the locked pages.
2570  */
2571 
2572 static int
2573 aio_cleanup_thread(aio_t *aiop)
2574 {
2575         proc_t *p = curproc;
2576         struct as *as = p->p_as;
2577         int poked = 0;
2578         kcondvar_t *cvp;
2579         int exit_flag = 0;
2580         int rqclnup = 0;
2581 
2582         sigfillset(&curthread->t_hold);
2583         sigdiffset(&curthread->t_hold, &cantmask);
2584         for (;;) {
2585                 /*
2586                  * if a segment is being unmapped, and the current
2587                  * process's done queue is not empty, then every request
2588                  * on the doneq with locked resources should be forced
2589                  * to release their locks. By moving the doneq request
2590                  * to the cleanupq, aio_cleanup() will process the cleanupq,
2591                  * and place requests back onto the doneq. All requests
2592                  * processed by aio_cleanup() will have their physical
2593                  * resources unlocked.
2594                  */
2595                 mutex_enter(&aiop->aio_mutex);
2596                 if ((aiop->aio_flags & AIO_CLEANUP) == 0) {
2597                         aiop->aio_flags |= AIO_CLEANUP;
2598                         mutex_enter(&as->a_contents);
2599                         if (aiop->aio_rqclnup) {
2600                                 aiop->aio_rqclnup = 0;
2601                                 rqclnup = 1;
2602                         }
2603                         mutex_exit(&as->a_contents);
2604                         if (aiop->aio_doneq) {
2605                                 aio_req_t *doneqhead = aiop->aio_doneq;
2606                                 aiop->aio_doneq = NULL;
2607                                 aio_cleanupq_concat(aiop, doneqhead, AIO_DONEQ);
2608                         }
2609                 }
2610                 mutex_exit(&aiop->aio_mutex);
2611                 aio_cleanup(AIO_CLEANUP_THREAD);
2612                 /*
2613                  * thread should block on the cleanupcv while
2614                  * AIO_CLEANUP is set.
2615                  */
2616                 cvp = &aiop->aio_cleanupcv;
2617                 mutex_enter(&aiop->aio_mutex);
2618 
2619                 if (aiop->aio_pollq != NULL || aiop->aio_cleanupq != NULL ||
2620                     aiop->aio_notifyq != NULL ||
2621                     aiop->aio_portcleanupq != NULL) {
2622                         mutex_exit(&aiop->aio_mutex);
2623                         continue;
2624                 }
2625                 mutex_enter(&as->a_contents);
2626 
2627                 /*
2628                  * AIO_CLEANUP determines when the cleanup thread
2629                  * should be active. This flag is set when
2630                  * the cleanup thread is awakened by as_unmap() or
2631                  * due to DR operations.
2632                  * The flag is cleared when the blocking as_unmap()
2633                  * that originally awakened us is allowed to
2634                  * complete. as_unmap() blocks when trying to
2635                  * unmap a segment that has SOFTLOCKed pages. when
2636                  * the segment's pages are all SOFTUNLOCKed,
2637                  * as->a_flags & AS_UNMAPWAIT should be zero.
2638                  *
2639                  * In case of cleanup request by DR, the flag is cleared
2640                  * once all the pending aio requests have been processed.
2641                  *
2642                  * The flag shouldn't be cleared right away if the
2643                  * cleanup thread was interrupted because the process
2644                  * is doing forkall(). This happens when cv_wait_sig()
2645                  * returns zero, because it was awakened by a pokelwps().
2646                  * If the process is not exiting, it must be doing forkall().
2647                  */
2648                 if ((poked == 0) &&
2649                     ((!rqclnup && (AS_ISUNMAPWAIT(as) == 0)) ||
2650                     (aiop->aio_pending == 0))) {
2651                         aiop->aio_flags &= ~(AIO_CLEANUP | AIO_CLEANUP_PORT);
2652                         cvp = &as->a_cv;
2653                         rqclnup = 0;
2654                 }
2655                 mutex_exit(&aiop->aio_mutex);
2656                 if (poked) {
2657                         /*
2658                          * If the process is exiting/killed, don't return
2659                          * immediately without waiting for pending I/O's
2660                          * and releasing the page locks.
2661                          */
2662                         if (p->p_flag & (SEXITLWPS|SKILLED)) {
2663                                 /*
2664                                  * If exit_flag is set, then it is
2665                                  * safe to exit because we have released
2666                                  * page locks of completed I/O's.
2667                                  */
2668                                 if (exit_flag)
2669                                         break;
2670 
2671                                 mutex_exit(&as->a_contents);
2672 
2673                                 /*
2674                                  * Wait for all the pending aio to complete.
2675                                  */
2676                                 mutex_enter(&aiop->aio_mutex);
2677                                 aiop->aio_flags |= AIO_REQ_BLOCK;
2678                                 while (aiop->aio_pending != 0)
2679                                         cv_wait(&aiop->aio_cleanupcv,
2680                                             &aiop->aio_mutex);
2681                                 mutex_exit(&aiop->aio_mutex);
2682                                 exit_flag = 1;
2683                                 continue;
2684                         } else if (p->p_flag &
2685                             (SHOLDFORK|SHOLDFORK1|SHOLDWATCH)) {
2686                                 /*
2687                                  * hold LWP until it
2688                                  * is continued.
2689                                  */
2690                                 mutex_exit(&as->a_contents);
2691                                 mutex_enter(&p->p_lock);
2692                                 stop(PR_SUSPENDED, SUSPEND_NORMAL);
2693                                 mutex_exit(&p->p_lock);
2694                                 poked = 0;
2695                                 continue;
2696                         }
2697                 } else {
2698                         /*
2699                          * When started this thread will sleep on as->a_cv.
2700                          * as_unmap will awake this thread if the
2701                          * segment has SOFTLOCKed pages (poked = 0).
2702                          * 1. pokelwps() awakes this thread =>
2703                          *    break the loop to check SEXITLWPS, SHOLDFORK, etc
2704                          * 2. as_unmap awakes this thread =>
2705                          *    to break the loop it is necessary that
2706                          *    - AS_UNMAPWAIT is set (as_unmap is waiting for
2707                          *      memory to be unlocked)
2708                          *    - AIO_CLEANUP is not set
2709                          *      (if AIO_CLEANUP is set we have to wait for
2710                          *      pending requests. aio_done will send a signal
2711                          *      for every request which completes to continue
2712                          *      unmapping the corresponding address range)
2713                          * 3. A cleanup request will wake this thread up, ex.
2714                          *    by the DR operations. The aio_rqclnup flag will
2715                          *    be set.
2716                          */
2717                         while (poked == 0) {
2718                                 /*
2719                                  * The clean up requests that came in
2720                                  * after we had just cleaned up, couldn't
2721                                  * be causing the unmap thread to block - as
2722                                  * unmap event happened first.
2723                                  * Let aio_done() wake us up if it sees a need.
2724                                  */
2725                                 if (aiop->aio_rqclnup &&
2726                                     (aiop->aio_flags & AIO_CLEANUP) == 0)
2727                                         break;
2728                                 poked = !cv_wait_sig(cvp, &as->a_contents);
2729                                 if (AS_ISUNMAPWAIT(as) == 0)
2730                                         cv_signal(cvp);
2731                                 if (aiop->aio_outstanding != 0)
2732                                         break;
2733                         }
2734                 }
2735                 mutex_exit(&as->a_contents);
2736         }
2737 exit:
2738         mutex_exit(&as->a_contents);
2739         ASSERT((curproc->p_flag & (SEXITLWPS|SKILLED)));
2740         aston(curthread);       /* make thread do post_syscall */
2741         return (0);
2742 }
2743 
2744 /*
2745  * save a reference to a user's outstanding aio in a hash list.
2746  */
2747 static int
2748 aio_hash_insert(
2749         aio_req_t *aio_reqp,
2750         aio_t *aiop)
2751 {
2752         long index;
2753         aio_result_t *resultp = aio_reqp->aio_req_resultp;
2754         aio_req_t *current;
2755         aio_req_t **nextp;
2756 
2757         index = AIO_HASH(resultp);
2758         nextp = &aiop->aio_hash[index];
2759         while ((current = *nextp) != NULL) {
2760                 if (current->aio_req_resultp == resultp)
2761                         return (DUPLICATE);
2762                 nextp = &current->aio_hash_next;
2763         }
2764         *nextp = aio_reqp;
2765         aio_reqp->aio_hash_next = NULL;
2766         return (0);
2767 }
2768 
2769 static int
2770 (*check_vp(struct vnode *vp, int mode))(vnode_t *, struct aio_req *,
2771     cred_t *)
2772 {
2773         struct snode *sp;
2774         dev_t           dev;
2775         struct cb_ops   *cb;
2776         major_t         major;
2777         int             (*aio_func)();
2778 
2779         dev = vp->v_rdev;
2780         major = getmajor(dev);
2781 
2782         /*
2783          * return NULL for requests to files and STREAMs so
2784          * that libaio takes care of them.
2785          */
2786         if (vp->v_type == VCHR) {
2787                 /* no stream device for kaio */
2788                 if (STREAMSTAB(major)) {
2789                         return (NULL);
2790                 }
2791         } else {
2792                 return (NULL);
2793         }
2794 
2795         /*
2796          * Check old drivers which do not have async I/O entry points.
2797          */
2798         if (devopsp[major]->devo_rev < 3)
2799                 return (NULL);
2800 
2801         cb = devopsp[major]->devo_cb_ops;
2802 
2803         if (cb->cb_rev < 1)
2804                 return (NULL);
2805 
2806         /*
2807          * Check whether this device is a block device.
2808          * Kaio is not supported for devices like tty.
2809          */
2810         if (cb->cb_strategy == nodev || cb->cb_strategy == NULL)
2811                 return (NULL);
2812 
2813         /*
2814          * Clustering: If vnode is a PXFS vnode, then the device may be remote.
2815          * We cannot call the driver directly. Instead return the
2816          * PXFS functions.
2817          */
2818 
2819         if (IS_PXFSVP(vp)) {
2820                 if (mode & FREAD)
2821                         return (clpxfs_aio_read);
2822                 else
2823                         return (clpxfs_aio_write);
2824         }
2825         if (mode & FREAD)
2826                 aio_func = (cb->cb_aread == nodev) ? NULL : driver_aio_read;
2827         else
2828                 aio_func = (cb->cb_awrite == nodev) ? NULL : driver_aio_write;
2829 
2830         /*
2831          * Do we need this ?
2832          * nodev returns ENXIO anyway.
2833          */
2834         if (aio_func == nodev)
2835                 return (NULL);
2836 
2837         sp = VTOS(vp);
2838         smark(sp, SACC);
2839         return (aio_func);
2840 }
2841 
2842 /*
2843  * Clustering: We want check_vp to return a function prototyped
2844  * correctly that will be common to both PXFS and regular case.
2845  * We define this intermediate function that will do the right
2846  * thing for driver cases.
2847  */
2848 
2849 static int
2850 driver_aio_write(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2851 {
2852         dev_t dev;
2853         struct cb_ops   *cb;
2854 
2855         ASSERT(vp->v_type == VCHR);
2856         ASSERT(!IS_PXFSVP(vp));
2857         dev = VTOS(vp)->s_dev;
2858         ASSERT(STREAMSTAB(getmajor(dev)) == NULL);
2859 
2860         cb = devopsp[getmajor(dev)]->devo_cb_ops;
2861 
2862         ASSERT(cb->cb_awrite != nodev);
2863         return ((*cb->cb_awrite)(dev, aio, cred_p));
2864 }
2865 
2866 /*
2867  * Clustering: We want check_vp to return a function prototyped
2868  * correctly that will be common to both PXFS and regular case.
2869  * We define this intermediate function that will do the right
2870  * thing for driver cases.
2871  */
2872 
2873 static int
2874 driver_aio_read(vnode_t *vp, struct aio_req *aio, cred_t *cred_p)
2875 {
2876         dev_t dev;
2877         struct cb_ops   *cb;
2878 
2879         ASSERT(vp->v_type == VCHR);
2880         ASSERT(!IS_PXFSVP(vp));
2881         dev = VTOS(vp)->s_dev;
2882         ASSERT(!STREAMSTAB(getmajor(dev)));
2883 
2884         cb = devopsp[getmajor(dev)]->devo_cb_ops;
2885 
2886         ASSERT(cb->cb_aread != nodev);
2887         return ((*cb->cb_aread)(dev, aio, cred_p));
2888 }
2889 
2890 /*
2891  * This routine is called when a largefile call is made by a 32bit
2892  * process on a ILP32 or LP64 kernel. All 64bit processes are large
2893  * file by definition and will call alio() instead.
2894  */
2895 static int
2896 alioLF(
2897         int             mode_arg,
2898         void            *aiocb_arg,
2899         int             nent,
2900         void            *sigev)
2901 {
2902         file_t          *fp;
2903         file_t          *prev_fp = NULL;
2904         int             prev_mode = -1;
2905         struct vnode    *vp;
2906         aio_lio_t       *head;
2907         aio_req_t       *reqp;
2908         aio_t           *aiop;
2909         caddr_t         cbplist;
2910         aiocb64_32_t    cb64;
2911         aiocb64_32_t    *aiocb = &cb64;
2912         aiocb64_32_t    *cbp;
2913         caddr32_t       *ucbp;
2914 #ifdef _LP64
2915         aiocb_t         aiocb_n;
2916 #endif
2917         struct sigevent32       sigevk;
2918         sigqueue_t      *sqp;
2919         int             (*aio_func)();
2920         int             mode;
2921         int             error = 0;
2922         int             aio_errors = 0;
2923         int             i;
2924         size_t          ssize;
2925         int             deadhead = 0;
2926         int             aio_notsupported = 0;
2927         int             lio_head_port;
2928         int             aio_port;
2929         int             aio_thread;
2930         port_kevent_t   *pkevtp = NULL;
2931         int             portused = 0;
2932         port_notify32_t pnotify;
2933         int             event;
2934 
2935         aiop = curproc->p_aio;
2936         if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
2937                 return (EINVAL);
2938 
2939         ASSERT(get_udatamodel() == DATAMODEL_ILP32);
2940 
2941         ssize = (sizeof (caddr32_t) * nent);
2942         cbplist = kmem_alloc(ssize, KM_SLEEP);
2943         ucbp = (caddr32_t *)cbplist;
2944 
2945         if (copyin(aiocb_arg, cbplist, ssize) ||
2946             (sigev && copyin(sigev, &sigevk, sizeof (sigevk)))) {
2947                 kmem_free(cbplist, ssize);
2948                 return (EFAULT);
2949         }
2950 
2951         /* Event Ports  */
2952         if (sigev &&
2953             (sigevk.sigev_notify == SIGEV_THREAD ||
2954             sigevk.sigev_notify == SIGEV_PORT)) {
2955                 if (sigevk.sigev_notify == SIGEV_THREAD) {
2956                         pnotify.portnfy_port = sigevk.sigev_signo;
2957                         pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
2958                 } else if (copyin(
2959                     (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
2960                     &pnotify, sizeof (pnotify))) {
2961                         kmem_free(cbplist, ssize);
2962                         return (EFAULT);
2963                 }
2964                 error = port_alloc_event(pnotify.portnfy_port,
2965                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
2966                 if (error) {
2967                         if (error == ENOMEM || error == EAGAIN)
2968                                 error = EAGAIN;
2969                         else
2970                                 error = EINVAL;
2971                         kmem_free(cbplist, ssize);
2972                         return (error);
2973                 }
2974                 lio_head_port = pnotify.portnfy_port;
2975                 portused = 1;
2976         }
2977 
2978         /*
2979          * a list head should be allocated if notification is
2980          * enabled for this list.
2981          */
2982         head = NULL;
2983 
2984         if (mode_arg == LIO_WAIT || sigev) {
2985                 mutex_enter(&aiop->aio_mutex);
2986                 error = aio_lio_alloc(&head);
2987                 mutex_exit(&aiop->aio_mutex);
2988                 if (error)
2989                         goto done;
2990                 deadhead = 1;
2991                 head->lio_nent = nent;
2992                 head->lio_refcnt = nent;
2993                 head->lio_port = -1;
2994                 head->lio_portkev = NULL;
2995                 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
2996                     sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
2997                         sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
2998                         if (sqp == NULL) {
2999                                 error = EAGAIN;
3000                                 goto done;
3001                         }
3002                         sqp->sq_func = NULL;
3003                         sqp->sq_next = NULL;
3004                         sqp->sq_info.si_code = SI_ASYNCIO;
3005                         sqp->sq_info.si_pid = curproc->p_pid;
3006                         sqp->sq_info.si_ctid = PRCTID(curproc);
3007                         sqp->sq_info.si_zoneid = getzoneid();
3008                         sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3009                         sqp->sq_info.si_signo = sigevk.sigev_signo;
3010                         sqp->sq_info.si_value.sival_int =
3011                             sigevk.sigev_value.sival_int;
3012                         head->lio_sigqp = sqp;
3013                 } else {
3014                         head->lio_sigqp = NULL;
3015                 }
3016                 if (pkevtp) {
3017                         /*
3018                          * Prepare data to send when list of aiocb's
3019                          * has completed.
3020                          */
3021                         port_init_event(pkevtp, (uintptr_t)sigev,
3022                             (void *)(uintptr_t)pnotify.portnfy_user,
3023                             NULL, head);
3024                         pkevtp->portkev_events = AIOLIO64;
3025                         head->lio_portkev = pkevtp;
3026                         head->lio_port = pnotify.portnfy_port;
3027                 }
3028         }
3029 
3030         for (i = 0; i < nent; i++, ucbp++) {
3031 
3032                 cbp = (aiocb64_32_t *)(uintptr_t)*ucbp;
3033                 /* skip entry if it can't be copied. */
3034                 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb))) {
3035                         if (head) {
3036                                 mutex_enter(&aiop->aio_mutex);
3037                                 head->lio_nent--;
3038                                 head->lio_refcnt--;
3039                                 mutex_exit(&aiop->aio_mutex);
3040                         }
3041                         continue;
3042                 }
3043 
3044                 /* skip if opcode for aiocb is LIO_NOP */
3045                 mode = aiocb->aio_lio_opcode;
3046                 if (mode == LIO_NOP) {
3047                         cbp = NULL;
3048                         if (head) {
3049                                 mutex_enter(&aiop->aio_mutex);
3050                                 head->lio_nent--;
3051                                 head->lio_refcnt--;
3052                                 mutex_exit(&aiop->aio_mutex);
3053                         }
3054                         continue;
3055                 }
3056 
3057                 /* increment file descriptor's ref count. */
3058                 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3059                         lio_set_uerror(&cbp->aio_resultp, EBADF);
3060                         if (head) {
3061                                 mutex_enter(&aiop->aio_mutex);
3062                                 head->lio_nent--;
3063                                 head->lio_refcnt--;
3064                                 mutex_exit(&aiop->aio_mutex);
3065                         }
3066                         aio_errors++;
3067                         continue;
3068                 }
3069 
3070                 /*
3071                  * check the permission of the partition
3072                  */
3073                 if ((fp->f_flag & mode) == 0) {
3074                         releasef(aiocb->aio_fildes);
3075                         lio_set_uerror(&cbp->aio_resultp, EBADF);
3076                         if (head) {
3077                                 mutex_enter(&aiop->aio_mutex);
3078                                 head->lio_nent--;
3079                                 head->lio_refcnt--;
3080                                 mutex_exit(&aiop->aio_mutex);
3081                         }
3082                         aio_errors++;
3083                         continue;
3084                 }
3085 
3086                 /*
3087                  * common case where requests are to the same fd
3088                  * for the same r/w operation
3089                  * for UFS, need to set EBADFD
3090                  */
3091                 vp = fp->f_vnode;
3092                 if (fp != prev_fp || mode != prev_mode) {
3093                         aio_func = check_vp(vp, mode);
3094                         if (aio_func == NULL) {
3095                                 prev_fp = NULL;
3096                                 releasef(aiocb->aio_fildes);
3097                                 lio_set_uerror(&cbp->aio_resultp, EBADFD);
3098                                 aio_notsupported++;
3099                                 if (head) {
3100                                         mutex_enter(&aiop->aio_mutex);
3101                                         head->lio_nent--;
3102                                         head->lio_refcnt--;
3103                                         mutex_exit(&aiop->aio_mutex);
3104                                 }
3105                                 continue;
3106                         } else {
3107                                 prev_fp = fp;
3108                                 prev_mode = mode;
3109                         }
3110                 }
3111 
3112 #ifdef  _LP64
3113                 aiocb_LFton(aiocb, &aiocb_n);
3114                 error = aio_req_setup(&reqp, aiop, &aiocb_n,
3115                     (aio_result_t *)&cbp->aio_resultp, vp, 0);
3116 #else
3117                 error = aio_req_setupLF(&reqp, aiop, aiocb,
3118                     (aio_result_t *)&cbp->aio_resultp, vp, 0);
3119 #endif  /* _LP64 */
3120                 if (error) {
3121                         releasef(aiocb->aio_fildes);
3122                         lio_set_uerror(&cbp->aio_resultp, error);
3123                         if (head) {
3124                                 mutex_enter(&aiop->aio_mutex);
3125                                 head->lio_nent--;
3126                                 head->lio_refcnt--;
3127                                 mutex_exit(&aiop->aio_mutex);
3128                         }
3129                         aio_errors++;
3130                         continue;
3131                 }
3132 
3133                 reqp->aio_req_lio = head;
3134                 deadhead = 0;
3135 
3136                 /*
3137                  * Set the errno field now before sending the request to
3138                  * the driver to avoid a race condition
3139                  */
3140                 (void) suword32(&cbp->aio_resultp.aio_errno,
3141                     EINPROGRESS);
3142 
3143                 reqp->aio_req_iocb.iocb32 = *ucbp;
3144 
3145                 event = (mode == LIO_READ)? AIOAREAD64 : AIOAWRITE64;
3146                 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3147                 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3148                 if (aio_port | aio_thread) {
3149                         port_kevent_t *lpkevp;
3150                         /*
3151                          * Prepare data to send with each aiocb completed.
3152                          */
3153                         if (aio_port) {
3154                                 void *paddr = (void *)(uintptr_t)
3155                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
3156                                 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3157                                         error = EFAULT;
3158                         } else {        /* aio_thread */
3159                                 pnotify.portnfy_port =
3160                                     aiocb->aio_sigevent.sigev_signo;
3161                                 pnotify.portnfy_user =
3162                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
3163                         }
3164                         if (error)
3165                                 /* EMPTY */;
3166                         else if (pkevtp != NULL &&
3167                             pnotify.portnfy_port == lio_head_port)
3168                                 error = port_dup_event(pkevtp, &lpkevp,
3169                                     PORT_ALLOC_DEFAULT);
3170                         else
3171                                 error = port_alloc_event(pnotify.portnfy_port,
3172                                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3173                                     &lpkevp);
3174                         if (error == 0) {
3175                                 port_init_event(lpkevp, (uintptr_t)*ucbp,
3176                                     (void *)(uintptr_t)pnotify.portnfy_user,
3177                                     aio_port_callback, reqp);
3178                                 lpkevp->portkev_events = event;
3179                                 reqp->aio_req_portkev = lpkevp;
3180                                 reqp->aio_req_port = pnotify.portnfy_port;
3181                         }
3182                 }
3183 
3184                 /*
3185                  * send the request to driver.
3186                  */
3187                 if (error == 0) {
3188                         if (aiocb->aio_nbytes == 0) {
3189                                 clear_active_fd(aiocb->aio_fildes);
3190                                 aio_zerolen(reqp);
3191                                 continue;
3192                         }
3193                         error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3194                             CRED());
3195                 }
3196 
3197                 /*
3198                  * the fd's ref count is not decremented until the IO has
3199                  * completed unless there was an error.
3200                  */
3201                 if (error) {
3202                         releasef(aiocb->aio_fildes);
3203                         lio_set_uerror(&cbp->aio_resultp, error);
3204                         if (head) {
3205                                 mutex_enter(&aiop->aio_mutex);
3206                                 head->lio_nent--;
3207                                 head->lio_refcnt--;
3208                                 mutex_exit(&aiop->aio_mutex);
3209                         }
3210                         if (error == ENOTSUP)
3211                                 aio_notsupported++;
3212                         else
3213                                 aio_errors++;
3214                         lio_set_error(reqp, portused);
3215                 } else {
3216                         clear_active_fd(aiocb->aio_fildes);
3217                 }
3218         }
3219 
3220         if (aio_notsupported) {
3221                 error = ENOTSUP;
3222         } else if (aio_errors) {
3223                 /*
3224                  * return EIO if any request failed
3225                  */
3226                 error = EIO;
3227         }
3228 
3229         if (mode_arg == LIO_WAIT) {
3230                 mutex_enter(&aiop->aio_mutex);
3231                 while (head->lio_refcnt > 0) {
3232                         if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3233                                 mutex_exit(&aiop->aio_mutex);
3234                                 error = EINTR;
3235                                 goto done;
3236                         }
3237                 }
3238                 mutex_exit(&aiop->aio_mutex);
3239                 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_LARGEFILE);
3240         }
3241 
3242 done:
3243         kmem_free(cbplist, ssize);
3244         if (deadhead) {
3245                 if (head->lio_sigqp)
3246                         kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3247                 if (head->lio_portkev)
3248                         port_free_event(head->lio_portkev);
3249                 kmem_free(head, sizeof (aio_lio_t));
3250         }
3251         return (error);
3252 }
3253 
3254 #ifdef  _SYSCALL32_IMPL
3255 static void
3256 aiocb_LFton(aiocb64_32_t *src, aiocb_t *dest)
3257 {
3258         dest->aio_fildes = src->aio_fildes;
3259         dest->aio_buf = (void *)(uintptr_t)src->aio_buf;
3260         dest->aio_nbytes = (size_t)src->aio_nbytes;
3261         dest->aio_offset = (off_t)src->aio_offset;
3262         dest->aio_reqprio = src->aio_reqprio;
3263         dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3264         dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3265 
3266         /*
3267          * See comment in sigqueue32() on handling of 32-bit
3268          * sigvals in a 64-bit kernel.
3269          */
3270         dest->aio_sigevent.sigev_value.sival_int =
3271             (int)src->aio_sigevent.sigev_value.sival_int;
3272         dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3273             (uintptr_t)src->aio_sigevent.sigev_notify_function;
3274         dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3275             (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3276         dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3277         dest->aio_lio_opcode = src->aio_lio_opcode;
3278         dest->aio_state = src->aio_state;
3279         dest->aio__pad[0] = src->aio__pad[0];
3280 }
3281 #endif
3282 
3283 /*
3284  * This function is used only for largefile calls made by
3285  * 32 bit applications.
3286  */
3287 static int
3288 aio_req_setupLF(
3289         aio_req_t       **reqpp,
3290         aio_t           *aiop,
3291         aiocb64_32_t    *arg,
3292         aio_result_t    *resultp,
3293         vnode_t         *vp,
3294         int             old_solaris_req)
3295 {
3296         sigqueue_t      *sqp = NULL;
3297         aio_req_t       *reqp;
3298         struct uio      *uio;
3299         struct sigevent32 *sigev;
3300         int             error;
3301 
3302         sigev = &arg->aio_sigevent;
3303         if (sigev->sigev_notify == SIGEV_SIGNAL &&
3304             sigev->sigev_signo > 0 && sigev->sigev_signo < NSIG) {
3305                 sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3306                 if (sqp == NULL)
3307                         return (EAGAIN);
3308                 sqp->sq_func = NULL;
3309                 sqp->sq_next = NULL;
3310                 sqp->sq_info.si_code = SI_ASYNCIO;
3311                 sqp->sq_info.si_pid = curproc->p_pid;
3312                 sqp->sq_info.si_ctid = PRCTID(curproc);
3313                 sqp->sq_info.si_zoneid = getzoneid();
3314                 sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3315                 sqp->sq_info.si_signo = sigev->sigev_signo;
3316                 sqp->sq_info.si_value.sival_int = sigev->sigev_value.sival_int;
3317         }
3318 
3319         mutex_enter(&aiop->aio_mutex);
3320 
3321         if (aiop->aio_flags & AIO_REQ_BLOCK) {
3322                 mutex_exit(&aiop->aio_mutex);
3323                 if (sqp)
3324                         kmem_free(sqp, sizeof (sigqueue_t));
3325                 return (EIO);
3326         }
3327         /*
3328          * get an aio_reqp from the free list or allocate one
3329          * from dynamic memory.
3330          */
3331         if (error = aio_req_alloc(&reqp, resultp)) {
3332                 mutex_exit(&aiop->aio_mutex);
3333                 if (sqp)
3334                         kmem_free(sqp, sizeof (sigqueue_t));
3335                 return (error);
3336         }
3337         aiop->aio_pending++;
3338         aiop->aio_outstanding++;
3339         reqp->aio_req_flags = AIO_PENDING;
3340         if (old_solaris_req) {
3341                 /* this is an old solaris aio request */
3342                 reqp->aio_req_flags |= AIO_SOLARIS;
3343                 aiop->aio_flags |= AIO_SOLARIS_REQ;
3344         }
3345         if (sigev->sigev_notify == SIGEV_THREAD ||
3346             sigev->sigev_notify == SIGEV_PORT)
3347                 aio_enq(&aiop->aio_portpending, reqp, 0);
3348         mutex_exit(&aiop->aio_mutex);
3349         /*
3350          * initialize aio request.
3351          */
3352         reqp->aio_req_fd = arg->aio_fildes;
3353         reqp->aio_req_sigqp = sqp;
3354         reqp->aio_req_iocb.iocb = NULL;
3355         reqp->aio_req_lio = NULL;
3356         reqp->aio_req_buf.b_file = vp;
3357         uio = reqp->aio_req.aio_uio;
3358         uio->uio_iovcnt = 1;
3359         uio->uio_iov->iov_base = (caddr_t)(uintptr_t)arg->aio_buf;
3360         uio->uio_iov->iov_len = arg->aio_nbytes;
3361         uio->uio_loffset = arg->aio_offset;
3362         *reqpp = reqp;
3363         return (0);
3364 }
3365 
3366 /*
3367  * This routine is called when a non largefile call is made by a 32bit
3368  * process on a ILP32 or LP64 kernel.
3369  */
3370 static int
3371 alio32(
3372         int             mode_arg,
3373         void            *aiocb_arg,
3374         int             nent,
3375         void            *sigev)
3376 {
3377         file_t          *fp;
3378         file_t          *prev_fp = NULL;
3379         int             prev_mode = -1;
3380         struct vnode    *vp;
3381         aio_lio_t       *head;
3382         aio_req_t       *reqp;
3383         aio_t           *aiop;
3384         caddr_t         cbplist;
3385         aiocb_t         cb;
3386         aiocb_t         *aiocb = &cb;
3387 #ifdef  _LP64
3388         aiocb32_t       *cbp;
3389         caddr32_t       *ucbp;
3390         aiocb32_t       cb32;
3391         aiocb32_t       *aiocb32 = &cb32;
3392         struct sigevent32       sigevk;
3393 #else
3394         aiocb_t         *cbp, **ucbp;
3395         struct sigevent sigevk;
3396 #endif
3397         sigqueue_t      *sqp;
3398         int             (*aio_func)();
3399         int             mode;
3400         int             error = 0;
3401         int             aio_errors = 0;
3402         int             i;
3403         size_t          ssize;
3404         int             deadhead = 0;
3405         int             aio_notsupported = 0;
3406         int             lio_head_port;
3407         int             aio_port;
3408         int             aio_thread;
3409         port_kevent_t   *pkevtp = NULL;
3410         int             portused = 0;
3411 #ifdef  _LP64
3412         port_notify32_t pnotify;
3413 #else
3414         port_notify_t   pnotify;
3415 #endif
3416         int             event;
3417 
3418         aiop = curproc->p_aio;
3419         if (aiop == NULL || nent <= 0 || nent > _AIO_LISTIO_MAX)
3420                 return (EINVAL);
3421 
3422 #ifdef  _LP64
3423         ssize = (sizeof (caddr32_t) * nent);
3424 #else
3425         ssize = (sizeof (aiocb_t *) * nent);
3426 #endif
3427         cbplist = kmem_alloc(ssize, KM_SLEEP);
3428         ucbp = (void *)cbplist;
3429 
3430         if (copyin(aiocb_arg, cbplist, ssize) ||
3431             (sigev && copyin(sigev, &sigevk, sizeof (struct sigevent32)))) {
3432                 kmem_free(cbplist, ssize);
3433                 return (EFAULT);
3434         }
3435 
3436         /* Event Ports  */
3437         if (sigev &&
3438             (sigevk.sigev_notify == SIGEV_THREAD ||
3439             sigevk.sigev_notify == SIGEV_PORT)) {
3440                 if (sigevk.sigev_notify == SIGEV_THREAD) {
3441                         pnotify.portnfy_port = sigevk.sigev_signo;
3442                         pnotify.portnfy_user = sigevk.sigev_value.sival_ptr;
3443                 } else if (copyin(
3444                     (void *)(uintptr_t)sigevk.sigev_value.sival_ptr,
3445                     &pnotify, sizeof (pnotify))) {
3446                         kmem_free(cbplist, ssize);
3447                         return (EFAULT);
3448                 }
3449                 error = port_alloc_event(pnotify.portnfy_port,
3450                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO, &pkevtp);
3451                 if (error) {
3452                         if (error == ENOMEM || error == EAGAIN)
3453                                 error = EAGAIN;
3454                         else
3455                                 error = EINVAL;
3456                         kmem_free(cbplist, ssize);
3457                         return (error);
3458                 }
3459                 lio_head_port = pnotify.portnfy_port;
3460                 portused = 1;
3461         }
3462 
3463         /*
3464          * a list head should be allocated if notification is
3465          * enabled for this list.
3466          */
3467         head = NULL;
3468 
3469         if (mode_arg == LIO_WAIT || sigev) {
3470                 mutex_enter(&aiop->aio_mutex);
3471                 error = aio_lio_alloc(&head);
3472                 mutex_exit(&aiop->aio_mutex);
3473                 if (error)
3474                         goto done;
3475                 deadhead = 1;
3476                 head->lio_nent = nent;
3477                 head->lio_refcnt = nent;
3478                 head->lio_port = -1;
3479                 head->lio_portkev = NULL;
3480                 if (sigev && sigevk.sigev_notify == SIGEV_SIGNAL &&
3481                     sigevk.sigev_signo > 0 && sigevk.sigev_signo < NSIG) {
3482                         sqp = kmem_zalloc(sizeof (sigqueue_t), KM_NOSLEEP);
3483                         if (sqp == NULL) {
3484                                 error = EAGAIN;
3485                                 goto done;
3486                         }
3487                         sqp->sq_func = NULL;
3488                         sqp->sq_next = NULL;
3489                         sqp->sq_info.si_code = SI_ASYNCIO;
3490                         sqp->sq_info.si_pid = curproc->p_pid;
3491                         sqp->sq_info.si_ctid = PRCTID(curproc);
3492                         sqp->sq_info.si_zoneid = getzoneid();
3493                         sqp->sq_info.si_uid = crgetuid(curproc->p_cred);
3494                         sqp->sq_info.si_signo = sigevk.sigev_signo;
3495                         sqp->sq_info.si_value.sival_int =
3496                             sigevk.sigev_value.sival_int;
3497                         head->lio_sigqp = sqp;
3498                 } else {
3499                         head->lio_sigqp = NULL;
3500                 }
3501                 if (pkevtp) {
3502                         /*
3503                          * Prepare data to send when list of aiocb's has
3504                          * completed.
3505                          */
3506                         port_init_event(pkevtp, (uintptr_t)sigev,
3507                             (void *)(uintptr_t)pnotify.portnfy_user,
3508                             NULL, head);
3509                         pkevtp->portkev_events = AIOLIO;
3510                         head->lio_portkev = pkevtp;
3511                         head->lio_port = pnotify.portnfy_port;
3512                 }
3513         }
3514 
3515         for (i = 0; i < nent; i++, ucbp++) {
3516 
3517                 /* skip entry if it can't be copied. */
3518 #ifdef  _LP64
3519                 cbp = (aiocb32_t *)(uintptr_t)*ucbp;
3520                 if (cbp == NULL || copyin(cbp, aiocb32, sizeof (*aiocb32)))
3521 #else
3522                 cbp = (aiocb_t *)*ucbp;
3523                 if (cbp == NULL || copyin(cbp, aiocb, sizeof (*aiocb)))
3524 #endif
3525                 {
3526                         if (head) {
3527                                 mutex_enter(&aiop->aio_mutex);
3528                                 head->lio_nent--;
3529                                 head->lio_refcnt--;
3530                                 mutex_exit(&aiop->aio_mutex);
3531                         }
3532                         continue;
3533                 }
3534 #ifdef  _LP64
3535                 /*
3536                  * copy 32 bit structure into 64 bit structure
3537                  */
3538                 aiocb_32ton(aiocb32, aiocb);
3539 #endif /* _LP64 */
3540 
3541                 /* skip if opcode for aiocb is LIO_NOP */
3542                 mode = aiocb->aio_lio_opcode;
3543                 if (mode == LIO_NOP) {
3544                         cbp = NULL;
3545                         if (head) {
3546                                 mutex_enter(&aiop->aio_mutex);
3547                                 head->lio_nent--;
3548                                 head->lio_refcnt--;
3549                                 mutex_exit(&aiop->aio_mutex);
3550                         }
3551                         continue;
3552                 }
3553 
3554                 /* increment file descriptor's ref count. */
3555                 if ((fp = getf(aiocb->aio_fildes)) == NULL) {
3556                         lio_set_uerror(&cbp->aio_resultp, EBADF);
3557                         if (head) {
3558                                 mutex_enter(&aiop->aio_mutex);
3559                                 head->lio_nent--;
3560                                 head->lio_refcnt--;
3561                                 mutex_exit(&aiop->aio_mutex);
3562                         }
3563                         aio_errors++;
3564                         continue;
3565                 }
3566 
3567                 /*
3568                  * check the permission of the partition
3569                  */
3570                 if ((fp->f_flag & mode) == 0) {
3571                         releasef(aiocb->aio_fildes);
3572                         lio_set_uerror(&cbp->aio_resultp, EBADF);
3573                         if (head) {
3574                                 mutex_enter(&aiop->aio_mutex);
3575                                 head->lio_nent--;
3576                                 head->lio_refcnt--;
3577                                 mutex_exit(&aiop->aio_mutex);
3578                         }
3579                         aio_errors++;
3580                         continue;
3581                 }
3582 
3583                 /*
3584                  * common case where requests are to the same fd
3585                  * for the same r/w operation
3586                  * for UFS, need to set EBADFD
3587                  */
3588                 vp = fp->f_vnode;
3589                 if (fp != prev_fp || mode != prev_mode) {
3590                         aio_func = check_vp(vp, mode);
3591                         if (aio_func == NULL) {
3592                                 prev_fp = NULL;
3593                                 releasef(aiocb->aio_fildes);
3594                                 lio_set_uerror(&cbp->aio_resultp, EBADFD);
3595                                 aio_notsupported++;
3596                                 if (head) {
3597                                         mutex_enter(&aiop->aio_mutex);
3598                                         head->lio_nent--;
3599                                         head->lio_refcnt--;
3600                                         mutex_exit(&aiop->aio_mutex);
3601                                 }
3602                                 continue;
3603                         } else {
3604                                 prev_fp = fp;
3605                                 prev_mode = mode;
3606                         }
3607                 }
3608 
3609                 error = aio_req_setup(&reqp, aiop, aiocb,
3610                     (aio_result_t *)&cbp->aio_resultp, vp, 0);
3611                 if (error) {
3612                         releasef(aiocb->aio_fildes);
3613                         lio_set_uerror(&cbp->aio_resultp, error);
3614                         if (head) {
3615                                 mutex_enter(&aiop->aio_mutex);
3616                                 head->lio_nent--;
3617                                 head->lio_refcnt--;
3618                                 mutex_exit(&aiop->aio_mutex);
3619                         }
3620                         aio_errors++;
3621                         continue;
3622                 }
3623 
3624                 reqp->aio_req_lio = head;
3625                 deadhead = 0;
3626 
3627                 /*
3628                  * Set the errno field now before sending the request to
3629                  * the driver to avoid a race condition
3630                  */
3631                 (void) suword32(&cbp->aio_resultp.aio_errno,
3632                     EINPROGRESS);
3633 
3634                 reqp->aio_req_iocb.iocb32 = (caddr32_t)(uintptr_t)cbp;
3635 
3636                 event = (mode == LIO_READ)? AIOAREAD : AIOAWRITE;
3637                 aio_port = (aiocb->aio_sigevent.sigev_notify == SIGEV_PORT);
3638                 aio_thread = (aiocb->aio_sigevent.sigev_notify == SIGEV_THREAD);
3639                 if (aio_port | aio_thread) {
3640                         port_kevent_t *lpkevp;
3641                         /*
3642                          * Prepare data to send with each aiocb completed.
3643                          */
3644 #ifdef _LP64
3645                         if (aio_port) {
3646                                 void *paddr = (void  *)(uintptr_t)
3647                                     aiocb32->aio_sigevent.sigev_value.sival_ptr;
3648                                 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3649                                         error = EFAULT;
3650                         } else {        /* aio_thread */
3651                                 pnotify.portnfy_port =
3652                                     aiocb32->aio_sigevent.sigev_signo;
3653                                 pnotify.portnfy_user =
3654                                     aiocb32->aio_sigevent.sigev_value.sival_ptr;
3655                         }
3656 #else
3657                         if (aio_port) {
3658                                 void *paddr =
3659                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
3660                                 if (copyin(paddr, &pnotify, sizeof (pnotify)))
3661                                         error = EFAULT;
3662                         } else {        /* aio_thread */
3663                                 pnotify.portnfy_port =
3664                                     aiocb->aio_sigevent.sigev_signo;
3665                                 pnotify.portnfy_user =
3666                                     aiocb->aio_sigevent.sigev_value.sival_ptr;
3667                         }
3668 #endif
3669                         if (error)
3670                                 /* EMPTY */;
3671                         else if (pkevtp != NULL &&
3672                             pnotify.portnfy_port == lio_head_port)
3673                                 error = port_dup_event(pkevtp, &lpkevp,
3674                                     PORT_ALLOC_DEFAULT);
3675                         else
3676                                 error = port_alloc_event(pnotify.portnfy_port,
3677                                     PORT_ALLOC_DEFAULT, PORT_SOURCE_AIO,
3678                                     &lpkevp);
3679                         if (error == 0) {
3680                                 port_init_event(lpkevp, (uintptr_t)cbp,
3681                                     (void *)(uintptr_t)pnotify.portnfy_user,
3682                                     aio_port_callback, reqp);
3683                                 lpkevp->portkev_events = event;
3684                                 reqp->aio_req_portkev = lpkevp;
3685                                 reqp->aio_req_port = pnotify.portnfy_port;
3686                         }
3687                 }
3688 
3689                 /*
3690                  * send the request to driver.
3691                  */
3692                 if (error == 0) {
3693                         if (aiocb->aio_nbytes == 0) {
3694                                 clear_active_fd(aiocb->aio_fildes);
3695                                 aio_zerolen(reqp);
3696                                 continue;
3697                         }
3698                         error = (*aio_func)(vp, (aio_req_t *)&reqp->aio_req,
3699                             CRED());
3700                 }
3701 
3702                 /*
3703                  * the fd's ref count is not decremented until the IO has
3704                  * completed unless there was an error.
3705                  */
3706                 if (error) {
3707                         releasef(aiocb->aio_fildes);
3708                         lio_set_uerror(&cbp->aio_resultp, error);
3709                         if (head) {
3710                                 mutex_enter(&aiop->aio_mutex);
3711                                 head->lio_nent--;
3712                                 head->lio_refcnt--;
3713                                 mutex_exit(&aiop->aio_mutex);
3714                         }
3715                         if (error == ENOTSUP)
3716                                 aio_notsupported++;
3717                         else
3718                                 aio_errors++;
3719                         lio_set_error(reqp, portused);
3720                 } else {
3721                         clear_active_fd(aiocb->aio_fildes);
3722                 }
3723         }
3724 
3725         if (aio_notsupported) {
3726                 error = ENOTSUP;
3727         } else if (aio_errors) {
3728                 /*
3729                  * return EIO if any request failed
3730                  */
3731                 error = EIO;
3732         }
3733 
3734         if (mode_arg == LIO_WAIT) {
3735                 mutex_enter(&aiop->aio_mutex);
3736                 while (head->lio_refcnt > 0) {
3737                         if (!cv_wait_sig(&head->lio_notify, &aiop->aio_mutex)) {
3738                                 mutex_exit(&aiop->aio_mutex);
3739                                 error = EINTR;
3740                                 goto done;
3741                         }
3742                 }
3743                 mutex_exit(&aiop->aio_mutex);
3744                 alio_cleanup(aiop, (aiocb_t **)cbplist, nent, AIO_32);
3745         }
3746 
3747 done:
3748         kmem_free(cbplist, ssize);
3749         if (deadhead) {
3750                 if (head->lio_sigqp)
3751                         kmem_free(head->lio_sigqp, sizeof (sigqueue_t));
3752                 if (head->lio_portkev)
3753                         port_free_event(head->lio_portkev);
3754                 kmem_free(head, sizeof (aio_lio_t));
3755         }
3756         return (error);
3757 }
3758 
3759 
3760 #ifdef  _SYSCALL32_IMPL
3761 void
3762 aiocb_32ton(aiocb32_t *src, aiocb_t *dest)
3763 {
3764         dest->aio_fildes = src->aio_fildes;
3765         dest->aio_buf = (caddr_t)(uintptr_t)src->aio_buf;
3766         dest->aio_nbytes = (size_t)src->aio_nbytes;
3767         dest->aio_offset = (off_t)src->aio_offset;
3768         dest->aio_reqprio = src->aio_reqprio;
3769         dest->aio_sigevent.sigev_notify = src->aio_sigevent.sigev_notify;
3770         dest->aio_sigevent.sigev_signo = src->aio_sigevent.sigev_signo;
3771 
3772         /*
3773          * See comment in sigqueue32() on handling of 32-bit
3774          * sigvals in a 64-bit kernel.
3775          */
3776         dest->aio_sigevent.sigev_value.sival_int =
3777             (int)src->aio_sigevent.sigev_value.sival_int;
3778         dest->aio_sigevent.sigev_notify_function = (void (*)(union sigval))
3779             (uintptr_t)src->aio_sigevent.sigev_notify_function;
3780         dest->aio_sigevent.sigev_notify_attributes = (pthread_attr_t *)
3781             (uintptr_t)src->aio_sigevent.sigev_notify_attributes;
3782         dest->aio_sigevent.__sigev_pad2 = src->aio_sigevent.__sigev_pad2;
3783         dest->aio_lio_opcode = src->aio_lio_opcode;
3784         dest->aio_state = src->aio_state;
3785         dest->aio__pad[0] = src->aio__pad[0];
3786 }
3787 #endif /* _SYSCALL32_IMPL */
3788 
3789 /*
3790  * aio_port_callback() is called just before the event is retrieved from the
3791  * port. The task of this callback function is to finish the work of the
3792  * transaction for the application, it means :
3793  * - copyout transaction data to the application
3794  *      (this thread is running in the right process context)
3795  * - keep trace of the transaction (update of counters).
3796  * - free allocated buffers
3797  * The aiocb pointer is the object element of the port_kevent_t structure.
3798  *
3799  * flag :
3800  *      PORT_CALLBACK_DEFAULT : do copyout and free resources
3801  *      PORT_CALLBACK_CLOSE   : don't do copyout, free resources
3802  */
3803 
3804 /*ARGSUSED*/
3805 int
3806 aio_port_callback(void *arg, int *events, pid_t pid, int flag, void *evp)
3807 {
3808         aio_t           *aiop = curproc->p_aio;
3809         aio_req_t       *reqp = arg;
3810         struct  iovec   *iov;
3811         struct  buf     *bp;
3812         void            *resultp;
3813 
3814         if (pid != curproc->p_pid) {
3815                 /* wrong proc !!, can not deliver data here ... */
3816                 return (EACCES);
3817         }
3818 
3819         mutex_enter(&aiop->aio_portq_mutex);
3820         reqp->aio_req_portkev = NULL;
3821         aio_req_remove_portq(aiop, reqp); /* remove request from portq */
3822         mutex_exit(&aiop->aio_portq_mutex);
3823         aphysio_unlock(reqp);           /* unlock used pages */
3824         mutex_enter(&aiop->aio_mutex);
3825         if (reqp->aio_req_flags & AIO_COPYOUTDONE) {
3826                 aio_req_free_port(aiop, reqp);  /* back to free list */
3827                 mutex_exit(&aiop->aio_mutex);
3828                 return (0);
3829         }
3830 
3831         iov = reqp->aio_req_uio.uio_iov;
3832         bp = &reqp->aio_req_buf;
3833         resultp = (void *)reqp->aio_req_resultp;
3834         aio_req_free_port(aiop, reqp);  /* request struct back to free list */
3835         mutex_exit(&aiop->aio_mutex);
3836         if (flag == PORT_CALLBACK_DEFAULT)
3837                 aio_copyout_result_port(iov, bp, resultp);
3838         return (0);
3839 }