1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  * Copyright (c) 2015, Joyent, Inc.  All rights reserved.
  26  */
  27 
  28 /* Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved. */
  29 
  30 #include "lint.h"
  31 #include "thr_uberdata.h"
  32 #include <stdarg.h>
  33 #include <poll.h>
  34 #include <stropts.h>
  35 #include <dlfcn.h>
  36 #include <wait.h>
  37 #include <sys/socket.h>
  38 #include <sys/uio.h>
  39 #include <sys/file.h>
  40 #include <sys/door.h>
  41 
  42 /*
  43  * These leading-underbar symbols exist because mistakes were made
  44  * in the past that put them into non-SUNWprivate versions of
  45  * the libc mapfiles.  They should be eliminated, but oh well...
  46  */
  47 #pragma weak _fork = fork
  48 #pragma weak _read = read
  49 #pragma weak _write = write
  50 #pragma weak _getmsg = getmsg
  51 #pragma weak _getpmsg = getpmsg
  52 #pragma weak _putmsg = putmsg
  53 #pragma weak _putpmsg = putpmsg
  54 #pragma weak _sleep = sleep
  55 #pragma weak _close = close
  56 #pragma weak _creat = creat
  57 #pragma weak _fcntl = fcntl
  58 #pragma weak _fsync = fsync
  59 #pragma weak _lockf = lockf
  60 #pragma weak _msgrcv = msgrcv
  61 #pragma weak _msgsnd = msgsnd
  62 #pragma weak _msync = msync
  63 #pragma weak _open = open
  64 #pragma weak _openat = openat
  65 #pragma weak _pause = pause
  66 #pragma weak _readv = readv
  67 #pragma weak _sigpause = sigpause
  68 #pragma weak _sigsuspend = sigsuspend
  69 #pragma weak _tcdrain = tcdrain
  70 #pragma weak _waitid = waitid
  71 #pragma weak _writev = writev
  72 
  73 #if !defined(_LP64)
  74 #pragma weak _creat64 = creat64
  75 #pragma weak _lockf64 = lockf64
  76 #pragma weak _open64 = open64
  77 #pragma weak _openat64 = openat64
  78 #pragma weak _pread64 = pread64
  79 #pragma weak _pwrite64 = pwrite64
  80 #endif
  81 
  82 /*
  83  * These are SUNWprivate, but they are being used by Sun Studio libcollector.
  84  */
  85 #pragma weak _fork1 = fork1
  86 #pragma weak _forkall = forkall
  87 
  88 /*
  89  * atfork_lock protects the pthread_atfork() data structures.
  90  *
  91  * fork_lock does double-duty.  Not only does it (and atfork_lock)
  92  * serialize calls to fork() and forkall(), but it also serializes calls
  93  * to thr_suspend() and thr_continue() (because fork() and forkall() also
  94  * suspend and continue other threads and they want no competition).
  95  *
  96  * Functions called in dlopen()ed L10N objects can do anything, including
  97  * call malloc() and free().  Such calls are not fork-safe when protected
  98  * by an ordinary mutex that is acquired in libc's prefork processing
  99  * because, with an interposed malloc library present, there would be a
 100  * lock ordering violation due to the pthread_atfork() prefork function
 101  * in the interposition library acquiring its malloc lock(s) before the
 102  * ordinary mutex in libc being acquired by libc's prefork functions.
 103  *
 104  * Within libc, calls to malloc() and free() are fork-safe if the calls
 105  * are made while holding no other libc locks.  This covers almost all
 106  * of libc's malloc() and free() calls.  For those libc code paths, such
 107  * as the above-mentioned L10N calls, that require serialization and that
 108  * may call malloc() or free(), libc uses callout_lock_enter() to perform
 109  * the serialization.  This works because callout_lock is not acquired as
 110  * part of running the pthread_atfork() prefork handlers (to avoid the
 111  * lock ordering violation described above).  Rather, it is simply
 112  * reinitialized in postfork1_child() to cover the case that some
 113  * now-defunct thread might have been suspended while holding it.
 114  */
 115 
 116 void
 117 fork_lock_enter(void)
 118 {
 119         ASSERT(curthread->ul_critical == 0);
 120         (void) mutex_lock(&curthread->ul_uberdata->fork_lock);
 121 }
 122 
 123 void
 124 fork_lock_exit(void)
 125 {
 126         ASSERT(curthread->ul_critical == 0);
 127         (void) mutex_unlock(&curthread->ul_uberdata->fork_lock);
 128 }
 129 
 130 /*
 131  * Use cancel_safe_mutex_lock() to protect against being cancelled while
 132  * holding callout_lock and calling outside of libc (via L10N plugins).
 133  * We will honor a pending cancellation request when callout_lock_exit()
 134  * is called, by calling cancel_safe_mutex_unlock().
 135  */
 136 void
 137 callout_lock_enter(void)
 138 {
 139         ASSERT(curthread->ul_critical == 0);
 140         cancel_safe_mutex_lock(&curthread->ul_uberdata->callout_lock);
 141 }
 142 
 143 void
 144 callout_lock_exit(void)
 145 {
 146         ASSERT(curthread->ul_critical == 0);
 147         cancel_safe_mutex_unlock(&curthread->ul_uberdata->callout_lock);
 148 }
 149 
 150 pid_t
 151 forkx(int flags)
 152 {
 153         ulwp_t *self = curthread;
 154         uberdata_t *udp = self->ul_uberdata;
 155         pid_t pid;
 156 
 157         if (self->ul_vfork) {
 158                 /*
 159                  * We are a child of vfork(); omit all of the fork
 160                  * logic and go straight to the system call trap.
 161                  * A vfork() child of a multithreaded parent
 162                  * must never call fork().
 163                  */
 164                 if (udp->uberflags.uf_mt) {
 165                         errno = ENOTSUP;
 166                         return (-1);
 167                 }
 168                 pid = __forkx(flags);
 169                 if (pid == 0) {         /* child */
 170                         udp->pid = getpid();
 171                         self->ul_vfork = 0;
 172                 }
 173                 return (pid);
 174         }
 175 
 176         sigoff(self);
 177         if (self->ul_fork) {
 178                 /*
 179                  * Cannot call fork() from a fork handler.
 180                  */
 181                 sigon(self);
 182                 errno = EDEADLK;
 183                 return (-1);
 184         }
 185         self->ul_fork = 1;
 186 
 187         /*
 188          * The functions registered by pthread_atfork() are defined by
 189          * the application and its libraries and we must not hold any
 190          * internal lmutex_lock()-acquired locks while invoking them.
 191          * We hold only udp->atfork_lock to protect the atfork linkages.
 192          * If one of these pthread_atfork() functions attempts to fork
 193          * or to call pthread_atfork(), libc will detect the error and
 194          * fail the call with EDEADLK.  Otherwise, the pthread_atfork()
 195          * functions are free to do anything they please (except they
 196          * will not receive any signals).
 197          */
 198         (void) mutex_lock(&udp->atfork_lock);
 199 
 200         /*
 201          * Posix (SUSv3) requires fork() to be async-signal-safe.
 202          * This cannot be made to happen with fork handlers in place
 203          * (they grab locks).  To be in nominal compliance, don't run
 204          * any fork handlers if we are called within a signal context.
 205          * This leaves the child process in a questionable state with
 206          * respect to its locks, but at least the parent process does
 207          * not become deadlocked due to the calling thread attempting
 208          * to acquire a lock that it already owns.
 209          */
 210         if (self->ul_siglink == NULL)
 211                 _prefork_handler();
 212 
 213         /*
 214          * Block every other thread attempting thr_suspend() or thr_continue().
 215          */
 216         (void) mutex_lock(&udp->fork_lock);
 217 
 218         /*
 219          * Block all signals.
 220          * Just deferring them via sigoff() is not enough.
 221          * We have to avoid taking a deferred signal in the child
 222          * that was actually sent to the parent before __forkx().
 223          */
 224         block_all_signals(self);
 225 
 226         /*
 227          * This suspends all threads but this one, leaving them
 228          * suspended outside of any critical regions in the library.
 229          * Thus, we are assured that no lmutex_lock()-acquired library
 230          * locks are held while we invoke fork() from the current thread.
 231          */
 232         suspend_fork();
 233 
 234         pid = __forkx(flags);
 235 
 236         if (pid == 0) {         /* child */
 237                 /*
 238                  * Clear our schedctl pointer.
 239                  * Discard any deferred signal that was sent to the parent.
 240                  * Because we blocked all signals before __forkx(), a
 241                  * deferred signal cannot have been taken by the child.
 242                  */
 243                 self->ul_schedctl_called = NULL;
 244                 self->ul_schedctl = NULL;
 245                 self->ul_cursig = 0;
 246                 self->ul_siginfo.si_signo = 0;
 247                 udp->pid = getpid();
 248                 /* reset the library's data structures to reflect one thread */
 249                 unregister_locks();
 250                 postfork1_child();
 251                 restore_signals(self);
 252                 (void) mutex_unlock(&udp->fork_lock);
 253                 if (self->ul_siglink == NULL)
 254                         _postfork_child_handler();
 255         } else {
 256                 /* restart all threads that were suspended for fork() */
 257                 continue_fork(0);
 258                 restore_signals(self);
 259                 (void) mutex_unlock(&udp->fork_lock);
 260                 if (self->ul_siglink == NULL)
 261                         _postfork_parent_handler();
 262         }
 263 
 264         (void) mutex_unlock(&udp->atfork_lock);
 265         self->ul_fork = 0;
 266         sigon(self);
 267 
 268         return (pid);
 269 }
 270 
 271 /*
 272  * fork() is fork1() for both Posix threads and Solaris threads.
 273  * The forkall() interface exists for applications that require
 274  * the semantics of replicating all threads.
 275  */
 276 #pragma weak fork1 = fork
 277 pid_t
 278 fork(void)
 279 {
 280         return (forkx(0));
 281 }
 282 
 283 /*
 284  * Much of the logic here is the same as in forkx().
 285  * See the comments in forkx(), above.
 286  */
 287 pid_t
 288 forkallx(int flags)
 289 {
 290         ulwp_t *self = curthread;
 291         uberdata_t *udp = self->ul_uberdata;
 292         pid_t pid;
 293 
 294         if (self->ul_vfork) {
 295                 if (udp->uberflags.uf_mt) {
 296                         errno = ENOTSUP;
 297                         return (-1);
 298                 }
 299                 pid = __forkallx(flags);
 300                 if (pid == 0) {         /* child */
 301                         udp->pid = getpid();
 302                         self->ul_vfork = 0;
 303                 }
 304                 return (pid);
 305         }
 306 
 307         sigoff(self);
 308         if (self->ul_fork) {
 309                 sigon(self);
 310                 errno = EDEADLK;
 311                 return (-1);
 312         }
 313         self->ul_fork = 1;
 314         (void) mutex_lock(&udp->atfork_lock);
 315         (void) mutex_lock(&udp->fork_lock);
 316         block_all_signals(self);
 317         suspend_fork();
 318 
 319         pid = __forkallx(flags);
 320 
 321         if (pid == 0) {
 322                 self->ul_schedctl_called = NULL;
 323                 self->ul_schedctl = NULL;
 324                 self->ul_cursig = 0;
 325                 self->ul_siginfo.si_signo = 0;
 326                 udp->pid = getpid();
 327                 unregister_locks();
 328                 continue_fork(1);
 329         } else {
 330                 continue_fork(0);
 331         }
 332         restore_signals(self);
 333         (void) mutex_unlock(&udp->fork_lock);
 334         (void) mutex_unlock(&udp->atfork_lock);
 335         self->ul_fork = 0;
 336         sigon(self);
 337 
 338         return (pid);
 339 }
 340 
 341 pid_t
 342 forkall(void)
 343 {
 344         return (forkallx(0));
 345 }
 346 
 347 /*
 348  * For the implementation of cancellation at cancellation points.
 349  */
 350 #define PROLOGUE                                                        \
 351 {                                                                       \
 352         ulwp_t *self = curthread;                                       \
 353         int nocancel =                                                  \
 354             (self->ul_vfork | self->ul_nocancel | self->ul_libc_locks |        \
 355             self->ul_critical | self->ul_sigdefer);                       \
 356         int abort = 0;                                                  \
 357         if (nocancel == 0) {                                            \
 358                 self->ul_save_async = self->ul_cancel_async;              \
 359                 if (!self->ul_cancel_disabled) {                     \
 360                         self->ul_cancel_async = 1;                   \
 361                         if (self->ul_cancel_pending)                 \
 362                                 pthread_exit(PTHREAD_CANCELED);         \
 363                 }                                                       \
 364                 self->ul_sp = stkptr();                                      \
 365         } else if (self->ul_cancel_pending &&                                \
 366             !self->ul_cancel_disabled) {                             \
 367                 set_cancel_eintr_flag(self);                            \
 368                 abort = 1;                                              \
 369         }
 370 
 371 #define EPILOGUE                                                        \
 372         if (nocancel == 0) {                                            \
 373                 self->ul_sp = 0;                                     \
 374                 self->ul_cancel_async = self->ul_save_async;              \
 375         }                                                               \
 376 }
 377 
 378 /*
 379  * Perform the body of the action required by most of the cancelable
 380  * function calls.  The return(function_call) part is to allow the
 381  * compiler to make the call be executed with tail recursion, which
 382  * saves a register window on sparc and slightly (not much) improves
 383  * the code for x86/x64 compilations.
 384  */
 385 #define PERFORM(function_call)                                          \
 386         PROLOGUE                                                        \
 387         if (abort) {                                                    \
 388                 *self->ul_errnop = EINTR;                            \
 389                 return (-1);                                            \
 390         }                                                               \
 391         if (nocancel)                                                   \
 392                 return (function_call);                                 \
 393         rv = function_call;                                             \
 394         EPILOGUE                                                        \
 395         return (rv);
 396 
 397 /*
 398  * Specialized prologue for sigsuspend() and pollsys().
 399  * These system calls pass a signal mask to the kernel.
 400  * The kernel replaces the thread's signal mask with the
 401  * temporary mask before the thread goes to sleep.  If
 402  * a signal is received, the signal handler will execute
 403  * with the temporary mask, as modified by the sigaction
 404  * for the particular signal.
 405  *
 406  * We block all signals until we reach the kernel with the
 407  * temporary mask.  This eliminates race conditions with
 408  * setting the signal mask while signals are being posted.
 409  */
 410 #define PROLOGUE_MASK(sigmask)                                          \
 411 {                                                                       \
 412         ulwp_t *self = curthread;                                       \
 413         int nocancel =                                                  \
 414             (self->ul_vfork | self->ul_nocancel | self->ul_libc_locks |        \
 415             self->ul_critical | self->ul_sigdefer);                       \
 416         if (!self->ul_vfork) {                                               \
 417                 if (sigmask) {                                          \
 418                         block_all_signals(self);                        \
 419                         self->ul_tmpmask = *sigmask;                 \
 420                         delete_reserved_signals(&self->ul_tmpmask);      \
 421                         self->ul_sigsuspend = 1;                     \
 422                 }                                                       \
 423                 if (nocancel == 0) {                                    \
 424                         self->ul_save_async = self->ul_cancel_async;      \
 425                         if (!self->ul_cancel_disabled) {             \
 426                                 self->ul_cancel_async = 1;           \
 427                                 if (self->ul_cancel_pending) {               \
 428                                         if (self->ul_sigsuspend) {   \
 429                                                 self->ul_sigsuspend = 0;\
 430                                                 restore_signals(self);  \
 431                                         }                               \
 432                                         pthread_exit(PTHREAD_CANCELED); \
 433                                 }                                       \
 434                         }                                               \
 435                         self->ul_sp = stkptr();                              \
 436                 }                                                       \
 437         }
 438 
 439 /*
 440  * If a signal is taken, we return from the system call wrapper with
 441  * our original signal mask restored (see code in call_user_handler()).
 442  * If not (self->ul_sigsuspend is still non-zero), we must restore our
 443  * original signal mask ourself.
 444  */
 445 #define EPILOGUE_MASK                                                   \
 446         if (nocancel == 0) {                                            \
 447                 self->ul_sp = 0;                                     \
 448                 self->ul_cancel_async = self->ul_save_async;              \
 449         }                                                               \
 450         if (self->ul_sigsuspend) {                                   \
 451                 self->ul_sigsuspend = 0;                             \
 452                 restore_signals(self);                                  \
 453         }                                                               \
 454 }
 455 
 456 /*
 457  * Cancellation prologue and epilogue functions,
 458  * for cancellation points too complex to include here.
 459  */
 460 void
 461 _cancel_prologue(void)
 462 {
 463         ulwp_t *self = curthread;
 464 
 465         self->ul_cancel_prologue =
 466             (self->ul_vfork | self->ul_nocancel | self->ul_libc_locks |
 467             self->ul_critical | self->ul_sigdefer) != 0;
 468         if (self->ul_cancel_prologue == 0) {
 469                 self->ul_save_async = self->ul_cancel_async;
 470                 if (!self->ul_cancel_disabled) {
 471                         self->ul_cancel_async = 1;
 472                         if (self->ul_cancel_pending)
 473                                 pthread_exit(PTHREAD_CANCELED);
 474                 }
 475                 self->ul_sp = stkptr();
 476         } else if (self->ul_cancel_pending &&
 477             !self->ul_cancel_disabled) {
 478                 set_cancel_eintr_flag(self);
 479         }
 480 }
 481 
 482 void
 483 _cancel_epilogue(void)
 484 {
 485         ulwp_t *self = curthread;
 486 
 487         if (self->ul_cancel_prologue == 0) {
 488                 self->ul_sp = 0;
 489                 self->ul_cancel_async = self->ul_save_async;
 490         }
 491 }
 492 
 493 /*
 494  * Called from _thrp_join() (thr_join() is a cancellation point)
 495  */
 496 int
 497 lwp_wait(thread_t tid, thread_t *found)
 498 {
 499         int error;
 500 
 501         PROLOGUE
 502         if (abort)
 503                 return (EINTR);
 504         while ((error = __lwp_wait(tid, found)) == EINTR && !cancel_active())
 505                 continue;
 506         EPILOGUE
 507         return (error);
 508 }
 509 
 510 ssize_t
 511 read(int fd, void *buf, size_t size)
 512 {
 513         extern ssize_t __read(int, void *, size_t);
 514         ssize_t rv;
 515 
 516         PERFORM(__read(fd, buf, size))
 517 }
 518 
 519 ssize_t
 520 write(int fd, const void *buf, size_t size)
 521 {
 522         extern ssize_t __write(int, const void *, size_t);
 523         ssize_t rv;
 524 
 525         PERFORM(__write(fd, buf, size))
 526 }
 527 
 528 int
 529 getmsg(int fd, struct strbuf *ctlptr, struct strbuf *dataptr,
 530         int *flagsp)
 531 {
 532         extern int __getmsg(int, struct strbuf *, struct strbuf *, int *);
 533         int rv;
 534 
 535         PERFORM(__getmsg(fd, ctlptr, dataptr, flagsp))
 536 }
 537 
 538 int
 539 getpmsg(int fd, struct strbuf *ctlptr, struct strbuf *dataptr,
 540         int *bandp, int *flagsp)
 541 {
 542         extern int __getpmsg(int, struct strbuf *, struct strbuf *,
 543             int *, int *);
 544         int rv;
 545 
 546         PERFORM(__getpmsg(fd, ctlptr, dataptr, bandp, flagsp))
 547 }
 548 
 549 int
 550 putmsg(int fd, const struct strbuf *ctlptr,
 551         const struct strbuf *dataptr, int flags)
 552 {
 553         extern int __putmsg(int, const struct strbuf *,
 554             const struct strbuf *, int);
 555         int rv;
 556 
 557         PERFORM(__putmsg(fd, ctlptr, dataptr, flags))
 558 }
 559 
 560 int
 561 __xpg4_putmsg(int fd, const struct strbuf *ctlptr,
 562         const struct strbuf *dataptr, int flags)
 563 {
 564         extern int __putmsg(int, const struct strbuf *,
 565             const struct strbuf *, int);
 566         int rv;
 567 
 568         PERFORM(__putmsg(fd, ctlptr, dataptr, flags|MSG_XPG4))
 569 }
 570 
 571 int
 572 putpmsg(int fd, const struct strbuf *ctlptr,
 573         const struct strbuf *dataptr, int band, int flags)
 574 {
 575         extern int __putpmsg(int, const struct strbuf *,
 576             const struct strbuf *, int, int);
 577         int rv;
 578 
 579         PERFORM(__putpmsg(fd, ctlptr, dataptr, band, flags))
 580 }
 581 
 582 int
 583 __xpg4_putpmsg(int fd, const struct strbuf *ctlptr,
 584         const struct strbuf *dataptr, int band, int flags)
 585 {
 586         extern int __putpmsg(int, const struct strbuf *,
 587             const struct strbuf *, int, int);
 588         int rv;
 589 
 590         PERFORM(__putpmsg(fd, ctlptr, dataptr, band, flags|MSG_XPG4))
 591 }
 592 
 593 int
 594 nanosleep(const timespec_t *rqtp, timespec_t *rmtp)
 595 {
 596         int error;
 597 
 598         PROLOGUE
 599         error = abort? EINTR : __nanosleep(rqtp, rmtp);
 600         EPILOGUE
 601         if (error) {
 602                 errno = error;
 603                 return (-1);
 604         }
 605         return (0);
 606 }
 607 
 608 int
 609 clock_nanosleep(clockid_t clock_id, int flags,
 610         const timespec_t *rqtp, timespec_t *rmtp)
 611 {
 612         timespec_t reltime;
 613         hrtime_t start;
 614         hrtime_t rqlapse;
 615         hrtime_t lapse;
 616         int error;
 617 
 618         switch (clock_id) {
 619         case CLOCK_VIRTUAL:
 620         case CLOCK_PROCESS_CPUTIME_ID:
 621         case CLOCK_THREAD_CPUTIME_ID:
 622                 return (ENOTSUP);
 623         case CLOCK_REALTIME:
 624         case CLOCK_HIGHRES:
 625                 break;
 626         default:
 627                 return (EINVAL);
 628         }
 629         if (flags & TIMER_ABSTIME) {
 630                 abstime_to_reltime(clock_id, rqtp, &reltime);
 631                 rmtp = NULL;
 632         } else {
 633                 reltime = *rqtp;
 634                 if (clock_id == CLOCK_HIGHRES)
 635                         start = gethrtime();
 636         }
 637 restart:
 638         PROLOGUE
 639         error = abort? EINTR : __nanosleep(&reltime, rmtp);
 640         EPILOGUE
 641         if (error == 0 && clock_id == CLOCK_HIGHRES) {
 642                 /*
 643                  * Don't return yet if we didn't really get a timeout.
 644                  * This can happen if we return because someone resets
 645                  * the system clock.
 646                  */
 647                 if (flags & TIMER_ABSTIME) {
 648                         if ((hrtime_t)(uint32_t)rqtp->tv_sec * NANOSEC +
 649                             rqtp->tv_nsec > gethrtime()) {
 650                                 abstime_to_reltime(clock_id, rqtp, &reltime);
 651                                 goto restart;
 652                         }
 653                 } else {
 654                         rqlapse = (hrtime_t)(uint32_t)rqtp->tv_sec * NANOSEC +
 655                             rqtp->tv_nsec;
 656                         lapse = gethrtime() - start;
 657                         if (rqlapse > lapse) {
 658                                 hrt2ts(rqlapse - lapse, &reltime);
 659                                 goto restart;
 660                         }
 661                 }
 662         }
 663         if (error == 0 && clock_id == CLOCK_REALTIME &&
 664             (flags & TIMER_ABSTIME)) {
 665                 /*
 666                  * Don't return yet just because someone reset the
 667                  * system clock.  Recompute the new relative time
 668                  * and reissue the nanosleep() call if necessary.
 669                  *
 670                  * Resetting the system clock causes all sorts of
 671                  * problems and the SUSV3 standards body should
 672                  * have made the behavior of clock_nanosleep() be
 673                  * implementation-defined in such a case rather than
 674                  * being specific about honoring the new system time.
 675                  * Standards bodies are filled with fools and idiots.
 676                  */
 677                 abstime_to_reltime(clock_id, rqtp, &reltime);
 678                 if (reltime.tv_sec != 0 || reltime.tv_nsec != 0)
 679                         goto restart;
 680         }
 681         return (error);
 682 }
 683 
 684 unsigned int
 685 sleep(unsigned int sec)
 686 {
 687         unsigned int rem = 0;
 688         timespec_t ts;
 689         timespec_t tsr;
 690 
 691         ts.tv_sec = (time_t)sec;
 692         ts.tv_nsec = 0;
 693         if (nanosleep(&ts, &tsr) == -1 && errno == EINTR) {
 694                 rem = (unsigned int)tsr.tv_sec;
 695                 if (tsr.tv_nsec >= NANOSEC / 2)
 696                         rem++;
 697         }
 698         return (rem);
 699 }
 700 
 701 int
 702 usleep(useconds_t usec)
 703 {
 704         timespec_t ts;
 705 
 706         ts.tv_sec = usec / MICROSEC;
 707         ts.tv_nsec = (long)(usec % MICROSEC) * 1000;
 708         (void) nanosleep(&ts, NULL);
 709         return (0);
 710 }
 711 
 712 int
 713 close(int fildes)
 714 {
 715         extern void _aio_close(int);
 716         extern int __close(int);
 717         int rv;
 718 
 719         /*
 720          * If we call _aio_close() while in a critical region,
 721          * we will draw an ASSERT() failure, so don't do it.
 722          * No calls to close() from within libc need _aio_close();
 723          * only the application's calls to close() need this,
 724          * and such calls are never from a libc critical region.
 725          */
 726         if (curthread->ul_critical == 0)
 727                 _aio_close(fildes);
 728         PERFORM(__close(fildes))
 729 }
 730 
 731 int
 732 door_call(int d, door_arg_t *params)
 733 {
 734         extern int __door_call(int, door_arg_t *);
 735         int rv;
 736 
 737         PERFORM(__door_call(d, params))
 738 }
 739 
 740 int
 741 fcntl(int fildes, int cmd, ...)
 742 {
 743         extern int __fcntl(int, int, ...);
 744         intptr_t arg;
 745         int rv;
 746         va_list ap;
 747 
 748         va_start(ap, cmd);
 749         arg = va_arg(ap, intptr_t);
 750         va_end(ap);
 751         if (cmd != F_SETLKW)
 752                 return (__fcntl(fildes, cmd, arg));
 753         PERFORM(__fcntl(fildes, cmd, arg))
 754 }
 755 
 756 int
 757 fdatasync(int fildes)
 758 {
 759         extern int __fdsync(int, int);
 760         int rv;
 761 
 762         PERFORM(__fdsync(fildes, FDSYNC))
 763 }
 764 
 765 int
 766 fsync(int fildes)
 767 {
 768         extern int __fdsync(int, int);
 769         int rv;
 770 
 771         PERFORM(__fdsync(fildes, FSYNC))
 772 }
 773 
 774 int
 775 lockf(int fildes, int function, off_t size)
 776 {
 777         extern int __lockf(int, int, off_t);
 778         int rv;
 779 
 780         PERFORM(__lockf(fildes, function, size))
 781 }
 782 
 783 #if !defined(_LP64)
 784 int
 785 lockf64(int fildes, int function, off64_t size)
 786 {
 787         extern int __lockf64(int, int, off64_t);
 788         int rv;
 789 
 790         PERFORM(__lockf64(fildes, function, size))
 791 }
 792 #endif  /* !_LP64 */
 793 
 794 ssize_t
 795 msgrcv(int msqid, void *msgp, size_t msgsz, long msgtyp, int msgflg)
 796 {
 797         extern ssize_t __msgrcv(int, void *, size_t, long, int);
 798         ssize_t rv;
 799 
 800         PERFORM(__msgrcv(msqid, msgp, msgsz, msgtyp, msgflg))
 801 }
 802 
 803 int
 804 msgsnd(int msqid, const void *msgp, size_t msgsz, int msgflg)
 805 {
 806         extern int __msgsnd(int, const void *, size_t, int);
 807         int rv;
 808 
 809         PERFORM(__msgsnd(msqid, msgp, msgsz, msgflg))
 810 }
 811 
 812 int
 813 msync(caddr_t addr, size_t len, int flags)
 814 {
 815         extern int __msync(caddr_t, size_t, int);
 816         int rv;
 817 
 818         PERFORM(__msync(addr, len, flags))
 819 }
 820 
 821 int
 822 openat(int fd, const char *path, int oflag, ...)
 823 {
 824         mode_t mode;
 825         int rv;
 826         va_list ap;
 827 
 828         va_start(ap, oflag);
 829         mode = va_arg(ap, mode_t);
 830         va_end(ap);
 831         PERFORM(__openat(fd, path, oflag, mode))
 832 }
 833 
 834 int
 835 open(const char *path, int oflag, ...)
 836 {
 837         mode_t mode;
 838         int rv;
 839         va_list ap;
 840 
 841         va_start(ap, oflag);
 842         mode = va_arg(ap, mode_t);
 843         va_end(ap);
 844         PERFORM(__open(path, oflag, mode))
 845 }
 846 
 847 int
 848 creat(const char *path, mode_t mode)
 849 {
 850         return (open(path, O_WRONLY | O_CREAT | O_TRUNC, mode));
 851 }
 852 
 853 #if !defined(_LP64)
 854 int
 855 openat64(int fd, const char *path, int oflag, ...)
 856 {
 857         mode_t mode;
 858         int rv;
 859         va_list ap;
 860 
 861         va_start(ap, oflag);
 862         mode = va_arg(ap, mode_t);
 863         va_end(ap);
 864         PERFORM(__openat64(fd, path, oflag, mode))
 865 }
 866 
 867 int
 868 open64(const char *path, int oflag, ...)
 869 {
 870         mode_t mode;
 871         int rv;
 872         va_list ap;
 873 
 874         va_start(ap, oflag);
 875         mode = va_arg(ap, mode_t);
 876         va_end(ap);
 877         PERFORM(__open64(path, oflag, mode))
 878 }
 879 
 880 int
 881 creat64(const char *path, mode_t mode)
 882 {
 883         return (open64(path, O_WRONLY | O_CREAT | O_TRUNC, mode));
 884 }
 885 #endif  /* !_LP64 */
 886 
 887 int
 888 pause(void)
 889 {
 890         extern int __pause(void);
 891         int rv;
 892 
 893         PERFORM(__pause())
 894 }
 895 
 896 ssize_t
 897 pread(int fildes, void *buf, size_t nbyte, off_t offset)
 898 {
 899         extern ssize_t __pread(int, void *, size_t, off_t);
 900         ssize_t rv;
 901 
 902         PERFORM(__pread(fildes, buf, nbyte, offset))
 903 }
 904 
 905 #if !defined(_LP64)
 906 ssize_t
 907 pread64(int fildes, void *buf, size_t nbyte, off64_t offset)
 908 {
 909         extern ssize_t __pread64(int, void *, size_t, off64_t);
 910         ssize_t rv;
 911 
 912         PERFORM(__pread64(fildes, buf, nbyte, offset))
 913 }
 914 
 915 ssize_t
 916 preadv64(int fildes, const struct iovec *iov, int iovcnt, off64_t offset)
 917 {
 918 
 919         extern ssize_t __preadv64(int, const struct iovec *, int, off_t, off_t);
 920         ssize_t rv;
 921 
 922         PERFORM(__preadv64(fildes, iov, iovcnt, offset & 0xffffffffULL,
 923             offset>>32))
 924 }
 925 #endif  /* !_LP64 */
 926 
 927 ssize_t
 928 preadv(int fildes, const struct iovec *iov, int iovcnt, off_t offset)
 929 {
 930 
 931         extern ssize_t __preadv(int, const struct iovec *, int, off_t, off_t);
 932         ssize_t rv;
 933 
 934         PERFORM(__preadv(fildes, iov, iovcnt, offset, 0))
 935 }
 936 ssize_t
 937 pwrite(int fildes, const void *buf, size_t nbyte, off_t offset)
 938 {
 939         extern ssize_t __pwrite(int, const void *, size_t, off_t);
 940         ssize_t rv;
 941 
 942         PERFORM(__pwrite(fildes, buf, nbyte, offset))
 943 }
 944 
 945 #if !defined(_LP64)
 946 ssize_t
 947 pwrite64(int fildes, const void *buf, size_t nbyte, off64_t offset)
 948 {
 949         extern ssize_t __pwrite64(int, const void *, size_t, off64_t);
 950         ssize_t rv;
 951 
 952         PERFORM(__pwrite64(fildes, buf, nbyte, offset))
 953 }
 954 
 955 ssize_t
 956 pwritev64(int fildes, const struct iovec *iov, int iovcnt, off64_t offset)
 957 {
 958 
 959         extern ssize_t __pwritev64(int,
 960             const struct iovec *, int, off_t, off_t);
 961         ssize_t rv;
 962 
 963         PERFORM(__pwritev64(fildes, iov, iovcnt, offset &
 964             0xffffffffULL, offset>>32))
 965 }
 966 
 967 #endif  /* !_LP64 */
 968 
 969 ssize_t
 970 pwritev(int fildes, const struct iovec *iov, int iovcnt, off_t offset)
 971 {
 972         extern ssize_t __pwritev(int, const struct iovec *, int, off_t, off_t);
 973         ssize_t rv;
 974 
 975         PERFORM(__pwritev(fildes, iov, iovcnt, offset, 0))
 976 }
 977 
 978 ssize_t
 979 readv(int fildes, const struct iovec *iov, int iovcnt)
 980 {
 981         extern ssize_t __readv(int, const struct iovec *, int);
 982         ssize_t rv;
 983 
 984         PERFORM(__readv(fildes, iov, iovcnt))
 985 }
 986 
 987 int
 988 sigpause(int sig)
 989 {
 990         extern int __sigpause(int);
 991         int rv;
 992 
 993         PERFORM(__sigpause(sig))
 994 }
 995 
 996 int
 997 sigsuspend(const sigset_t *set)
 998 {
 999         extern int __sigsuspend(const sigset_t *);
1000         int rv;
1001 
1002         PROLOGUE_MASK(set)
1003         rv = __sigsuspend(set);
1004         EPILOGUE_MASK
1005         return (rv);
1006 }
1007 
1008 int
1009 _pollsys(struct pollfd *fds, nfds_t nfd, const timespec_t *timeout,
1010         const sigset_t *sigmask)
1011 {
1012         extern int __pollsys(struct pollfd *, nfds_t, const timespec_t *,
1013             const sigset_t *);
1014         int rv;
1015 
1016         PROLOGUE_MASK(sigmask)
1017         rv = __pollsys(fds, nfd, timeout, sigmask);
1018         EPILOGUE_MASK
1019         return (rv);
1020 }
1021 
1022 int
1023 sigtimedwait(const sigset_t *set, siginfo_t *infop, const timespec_t *timeout)
1024 {
1025         extern int __sigtimedwait(const sigset_t *, siginfo_t *,
1026             const timespec_t *);
1027         siginfo_t info;
1028         int sig;
1029 
1030         PROLOGUE
1031         if (abort) {
1032                 *self->ul_errnop = EINTR;
1033                 sig = -1;
1034         } else {
1035                 sig = __sigtimedwait(set, &info, timeout);
1036                 if (sig == SIGCANCEL &&
1037                     (SI_FROMKERNEL(&info) || info.si_code == SI_LWP)) {
1038                         do_sigcancel();
1039                         *self->ul_errnop = EINTR;
1040                         sig = -1;
1041                 }
1042         }
1043         EPILOGUE
1044         if (sig != -1 && infop)
1045                 (void) memcpy(infop, &info, sizeof (*infop));
1046         return (sig);
1047 }
1048 
1049 int
1050 sigwait(sigset_t *set)
1051 {
1052         return (sigtimedwait(set, NULL, NULL));
1053 }
1054 
1055 int
1056 sigwaitinfo(const sigset_t *set, siginfo_t *info)
1057 {
1058         return (sigtimedwait(set, info, NULL));
1059 }
1060 
1061 int
1062 sigqueue(pid_t pid, int signo, const union sigval value)
1063 {
1064         extern int __sigqueue(pid_t pid, int signo,
1065             /* const union sigval */ void *value, int si_code, int block);
1066         return (__sigqueue(pid, signo, value.sival_ptr, SI_QUEUE, 0));
1067 }
1068 
1069 int
1070 _so_accept(int sock, struct sockaddr *addr, uint_t *addrlen, int version,
1071     int flags)
1072 {
1073         extern int __so_accept(int, struct sockaddr *, uint_t *, int, int);
1074         int rv;
1075 
1076         PERFORM(__so_accept(sock, addr, addrlen, version, flags))
1077 }
1078 
1079 int
1080 _so_connect(int sock, struct sockaddr *addr, uint_t addrlen, int version)
1081 {
1082         extern int __so_connect(int, struct sockaddr *, uint_t, int);
1083         int rv;
1084 
1085         PERFORM(__so_connect(sock, addr, addrlen, version))
1086 }
1087 
1088 int
1089 _so_recv(int sock, void *buf, size_t len, int flags)
1090 {
1091         extern int __so_recv(int, void *, size_t, int);
1092         int rv;
1093 
1094         PERFORM(__so_recv(sock, buf, len, flags))
1095 }
1096 
1097 int
1098 _so_recvfrom(int sock, void *buf, size_t len, int flags,
1099     struct sockaddr *addr, int *addrlen)
1100 {
1101         extern int __so_recvfrom(int, void *, size_t, int,
1102             struct sockaddr *, int *);
1103         int rv;
1104 
1105         PERFORM(__so_recvfrom(sock, buf, len, flags, addr, addrlen))
1106 }
1107 
1108 int
1109 _so_recvmsg(int sock, struct msghdr *msg, int flags)
1110 {
1111         extern int __so_recvmsg(int, struct msghdr *, int);
1112         int rv;
1113 
1114         PERFORM(__so_recvmsg(sock, msg, flags))
1115 }
1116 
1117 int
1118 _so_send(int sock, const void *buf, size_t len, int flags)
1119 {
1120         extern int __so_send(int, const void *, size_t, int);
1121         int rv;
1122 
1123         PERFORM(__so_send(sock, buf, len, flags))
1124 }
1125 
1126 int
1127 _so_sendmsg(int sock, const struct msghdr *msg, int flags)
1128 {
1129         extern int __so_sendmsg(int, const struct msghdr *, int);
1130         int rv;
1131 
1132         PERFORM(__so_sendmsg(sock, msg, flags))
1133 }
1134 
1135 int
1136 _so_sendto(int sock, const void *buf, size_t len, int flags,
1137     const struct sockaddr *addr, int *addrlen)
1138 {
1139         extern int __so_sendto(int, const void *, size_t, int,
1140             const struct sockaddr *, int *);
1141         int rv;
1142 
1143         PERFORM(__so_sendto(sock, buf, len, flags, addr, addrlen))
1144 }
1145 
1146 int
1147 tcdrain(int fildes)
1148 {
1149         extern int __tcdrain(int);
1150         int rv;
1151 
1152         PERFORM(__tcdrain(fildes))
1153 }
1154 
1155 int
1156 waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options)
1157 {
1158         extern int __waitid(idtype_t, id_t, siginfo_t *, int);
1159         int rv;
1160 
1161         if (options & WNOHANG)
1162                 return (__waitid(idtype, id, infop, options));
1163         PERFORM(__waitid(idtype, id, infop, options))
1164 }
1165 
1166 ssize_t
1167 writev(int fildes, const struct iovec *iov, int iovcnt)
1168 {
1169         extern ssize_t __writev(int, const struct iovec *, int);
1170         ssize_t rv;
1171 
1172         PERFORM(__writev(fildes, iov, iovcnt))
1173 }