1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*      Copyright (c) 1988 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 /*
  31  * Emulation of select() system call using poll() system call.
  32  *
  33  * Assumptions:
  34  *      polling for input only is most common.
  35  *      polling for exceptional conditions is very rare.
  36  *
  37  * Note that is it not feasible to emulate all error conditions,
  38  * in particular conditions that would return EFAULT are far too
  39  * difficult to check for in a library routine.
  40  */
  41 
  42 #pragma weak _select = select
  43 
  44 #include "lint.h"
  45 #include <values.h>
  46 #include <pthread.h>
  47 #include <errno.h>
  48 #include <stdlib.h>
  49 #include <sys/time.h>
  50 #include <sys/types.h>
  51 #include <sys/select.h>
  52 #include <sys/poll.h>
  53 #include <alloca.h>
  54 #include "libc.h"
  55 
  56 /*
  57  * STACK_PFD_LIM
  58  *
  59  *   The limit at which pselect allocates pollfd structures in the heap,
  60  *   rather than on the stack.  These limits match the historical behaviour
  61  *   with the * _large_fdset implementations.
  62  *
  63  * BULK_ALLOC_LIM
  64  *
  65  *   The limit below which we'll just allocate nfds pollfds, rather than
  66  *   counting how many we actually need.
  67  */
  68 #if defined(_LP64)
  69 #define STACK_PFD_LIM   FD_SETSIZE
  70 #define BULK_ALLOC_LIM  8192
  71 #else
  72 #define STACK_PFD_LIM   1024
  73 #define BULK_ALLOC_LIM  1024
  74 #endif
  75 
  76 /*
  77  * The previous _large_fdset implementations are, unfortunately, baked into
  78  * the ABI.
  79  */
  80 #pragma weak select_large_fdset = select
  81 #pragma weak pselect_large_fdset = pselect
  82 
  83 #define fd_set_size(nfds)       (((nfds) + (NFDBITS - 1)) / NFDBITS)
  84 
  85 static nfds_t
  86 fd_sets_count(int limit, fd_set *in, fd_set *out, fd_set *ex)
  87 {
  88         nfds_t total = 0;
  89 
  90         if (limit <= 0)
  91                 return (0);
  92 
  93         for (int i = 0; i < fd_set_size(limit); i++) {
  94                 long v = (in->fds_bits[i] | out->fds_bits[i] | ex->fds_bits[i]);
  95 
  96                 while (v != 0) {
  97                         v &= v - 1;
  98                         total++;
  99                 }
 100         }
 101 
 102         return (total);
 103 }
 104 
 105 int
 106 pselect(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
 107     const timespec_t *tsp, const sigset_t *sigmask)
 108 {
 109         long *in, *out, *ex;
 110         ulong_t m;      /* bit mask */
 111         int j;          /* loop counter */
 112         ulong_t b;      /* bits to test */
 113         int n, rv;
 114         struct pollfd *pfd;
 115         struct pollfd *p;
 116         int lastj = -1;
 117         nfds_t npfds = 0;
 118         boolean_t heap_pfds = B_FALSE;
 119 
 120         /* "zero" is read-only, it could go in the text segment */
 121         static fd_set zero = { 0 };
 122 
 123         /*
 124          * Check for invalid conditions at outset.
 125          * Required for spec1170.
 126          * SUSV3: We must behave as a cancellation point even if we fail early.
 127          */
 128         if (nfds < 0 || nfds > FD_SETSIZE) {
 129                 pthread_testcancel();
 130                 errno = EINVAL;
 131                 return (-1);
 132         }
 133 
 134         if (tsp != NULL) {
 135                 /* check timespec validity */
 136                 if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
 137                     tsp->tv_sec < 0) {
 138                         pthread_testcancel();
 139                         errno = EINVAL;
 140                         return (-1);
 141                 }
 142         }
 143 
 144         /*
 145          * If any input args are null, point them at the null array.
 146          */
 147         if (in0 == NULL)
 148                 in0 = &zero;
 149         if (out0 == NULL)
 150                 out0 = &zero;
 151         if (ex0 == NULL)
 152                 ex0 = &zero;
 153 
 154         if (nfds <= BULK_ALLOC_LIM) {
 155                 p = pfd = alloca(nfds * sizeof (struct pollfd));
 156         } else {
 157                 npfds = fd_sets_count(nfds, in0, out0, ex0);
 158 
 159                 if (npfds > STACK_PFD_LIM) {
 160                         p = pfd = malloc(npfds * sizeof (struct pollfd));
 161                         if (p == NULL)
 162                                 return (-1);
 163                         heap_pfds = B_TRUE;
 164                 } else {
 165                         p = pfd = alloca(npfds * sizeof (struct pollfd));
 166                 }
 167         }
 168 
 169         /*
 170          * For each fd, if any bits are set convert them into
 171          * the appropriate pollfd struct.
 172          */
 173         in = (long *)in0->fds_bits;
 174         out = (long *)out0->fds_bits;
 175         ex = (long *)ex0->fds_bits;
 176         for (n = 0; n < nfds; n += NFDBITS) {
 177                 b = (ulong_t)(*in | *out | *ex);
 178                 for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
 179                         if (b & 1) {
 180                                 p->fd = n + j;
 181                                 if (p->fd >= nfds)
 182                                         goto done;
 183                                 p->events = 0;
 184                                 if (*in & m)
 185                                         p->events |= POLLRDNORM;
 186                                 if (*out & m)
 187                                         p->events |= POLLWRNORM;
 188                                 if (*ex & m)
 189                                         p->events |= POLLRDBAND;
 190                                 p++;
 191                         }
 192                 }
 193                 in++;
 194                 out++;
 195                 ex++;
 196         }
 197 done:
 198         /*
 199          * Now do the poll.
 200          */
 201         npfds = (int)(p - pfd);
 202         do {
 203                 rv = _pollsys(pfd, npfds, tsp, sigmask);
 204         } while (rv < 0 && errno == EAGAIN);
 205 
 206         if (rv < 0)          /* no need to set bit masks */
 207                 goto out;
 208 
 209         if (rv == 0) {
 210                 /*
 211                  * Clear out bit masks, just in case.
 212                  * On the assumption that usually only
 213                  * one bit mask is set, use three loops.
 214                  */
 215                 if (in0 != &zero) {
 216                         in = (long *)in0->fds_bits;
 217                         for (n = 0; n < nfds; n += NFDBITS)
 218                                 *in++ = 0;
 219                 }
 220                 if (out0 != &zero) {
 221                         out = (long *)out0->fds_bits;
 222                         for (n = 0; n < nfds; n += NFDBITS)
 223                                 *out++ = 0;
 224                 }
 225                 if (ex0 != &zero) {
 226                         ex = (long *)ex0->fds_bits;
 227                         for (n = 0; n < nfds; n += NFDBITS)
 228                                 *ex++ = 0;
 229                 }
 230                 rv = 0;
 231                 goto out;
 232         }
 233 
 234         /*
 235          * Check for EINVAL error case first to avoid changing any bits
 236          * if we're going to return an error.
 237          */
 238         for (p = pfd, n = npfds; n-- > 0; p++) {
 239                 /*
 240                  * select will return EBADF immediately if any fd's
 241                  * are bad.  poll will complete the poll on the
 242                  * rest of the fd's and include the error indication
 243                  * in the returned bits.  This is a rare case so we
 244                  * accept this difference and return the error after
 245                  * doing more work than select would've done.
 246                  */
 247                 if (p->revents & POLLNVAL) {
 248                         errno = EBADF;
 249                         rv = -1;
 250                         goto out;
 251                 }
 252                 /*
 253                  * We would like to make POLLHUP available to select,
 254                  * checking to see if we have pending data to be read.
 255                  * BUT until we figure out how not to break Xsun's
 256                  * dependencies on select's existing features...
 257                  * This is what we _thought_ would work ... sigh!
 258                  */
 259                 /*
 260                  * if ((p->revents & POLLHUP) &&
 261                  *      !(p->revents & (POLLRDNORM|POLLRDBAND))) {
 262                  *      errno = EINTR;
 263                  *      rv = -1;
 264                  *      goto out;
 265                  * }
 266                  */
 267         }
 268 
 269         /*
 270          * Convert results of poll back into bits
 271          * in the argument arrays.
 272          *
 273          * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
 274          * on return from poll if they were set on input, thus we don't
 275          * worry about accidentally setting the corresponding bits in the
 276          * zero array if the input bit masks were null.
 277          *
 278          * Must return number of bits set, not number of ready descriptors
 279          * (as the man page says, and as poll() does).
 280          */
 281         rv = 0;
 282         for (p = pfd, n = npfds; n-- > 0; p++) {
 283                 j = (int)(p->fd / NFDBITS);
 284                 /* have we moved into another word of the bit mask yet? */
 285                 if (j != lastj) {
 286                         /* clear all output bits to start with */
 287                         in = (long *)&in0->fds_bits[j];
 288                         out = (long *)&out0->fds_bits[j];
 289                         ex = (long *)&ex0->fds_bits[j];
 290                         /*
 291                          * In case we made "zero" read-only (e.g., with
 292                          * cc -R), avoid actually storing into it.
 293                          */
 294                         if (in0 != &zero)
 295                                 *in = 0;
 296                         if (out0 != &zero)
 297                                 *out = 0;
 298                         if (ex0 != &zero)
 299                                 *ex = 0;
 300                         lastj = j;
 301                 }
 302                 if (p->revents) {
 303                         m = 1L << (p->fd % NFDBITS);
 304                         if (p->revents & POLLRDNORM) {
 305                                 *in |= m;
 306                                 rv++;
 307                         }
 308                         if (p->revents & POLLWRNORM) {
 309                                 *out |= m;
 310                                 rv++;
 311                         }
 312                         if (p->revents & POLLRDBAND) {
 313                                 *ex |= m;
 314                                 rv++;
 315                         }
 316                         /*
 317                          * Only set this bit on return if we asked about
 318                          * input conditions.
 319                          */
 320                         if ((p->revents & (POLLHUP|POLLERR)) &&
 321                             (p->events & POLLRDNORM)) {
 322                                 if ((*in & m) == 0)
 323                                         rv++;   /* wasn't already set */
 324                                 *in |= m;
 325                         }
 326                         /*
 327                          * Only set this bit on return if we asked about
 328                          * output conditions.
 329                          */
 330                         if ((p->revents & (POLLHUP|POLLERR)) &&
 331                             (p->events & POLLWRNORM)) {
 332                                 if ((*out & m) == 0)
 333                                         rv++;   /* wasn't already set */
 334                                 *out |= m;
 335                         }
 336                         /*
 337                          * Only set this bit on return if we asked about
 338                          * output conditions.
 339                          */
 340                         if ((p->revents & (POLLHUP|POLLERR)) &&
 341                             (p->events & POLLRDBAND)) {
 342                                 if ((*ex & m) == 0)
 343                                         rv++;   /* wasn't already set */
 344                                 *ex |= m;
 345                         }
 346                 }
 347         }
 348 out:
 349         if (heap_pfds)
 350                 free(pfd);
 351         return (rv);
 352 }
 353 
 354 int
 355 select(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, struct timeval *tv)
 356 {
 357         timespec_t ts;
 358         timespec_t *tsp;
 359 
 360         if (tv == NULL)
 361                 tsp = NULL;
 362         else {
 363                 /* check timeval validity */
 364                 if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) {
 365                         errno = EINVAL;
 366                         return (-1);
 367                 }
 368                 /*
 369                  * Convert timeval to timespec.
 370                  * To preserve compatibility with past behavior,
 371                  * when select was built upon poll(2), which has a
 372                  * minimum non-zero timeout of 1 millisecond, force
 373                  * a minimum non-zero timeout of 500 microseconds.
 374                  */
 375                 ts.tv_sec = tv->tv_sec;
 376                 ts.tv_nsec = tv->tv_usec * 1000;
 377                 if (ts.tv_nsec != 0 && ts.tv_nsec < 500000)
 378                         ts.tv_nsec = 500000;
 379                 tsp = &ts;
 380         }
 381 
 382         return (pselect(nfds, in0, out0, ex0, tsp, NULL));
 383 }