1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*      Copyright (c) 1988 AT&T     */
  28 /*        All Rights Reserved   */
  29 
  30 #pragma ident   "%Z%%M% %I%     %E% SMI"
  31 
  32 /*
  33  * Emulation of select() system call using _pollsys() system call.
  34  *
  35  * Assumptions:
  36  *      polling for input only is most common.
  37  *      polling for exceptional conditions is very rare.
  38  *
  39  * Note that is it not feasible to emulate all error conditions,
  40  * in particular conditions that would return EFAULT are far too
  41  * difficult to check for in a library routine.
  42  *
  43  * This is the alternate large fd_set select.
  44  *
  45  */
  46 
  47 /*
  48  * Must precede any include files
  49  */
  50 #ifdef FD_SETSIZE
  51 #undef FD_SETSIZE
  52 #endif
  53 #define FD_SETSIZE 65536
  54 
  55 #include "lint.h"
  56 #include <values.h>
  57 #include <stdlib.h>
  58 #include <string.h>
  59 #include <pthread.h>
  60 #include <errno.h>
  61 #include <sys/time.h>
  62 #include <sys/types.h>
  63 #include <sys/poll.h>
  64 #include <string.h>
  65 #include <stdlib.h>
  66 #include "libc.h"
  67 
  68 #define DEFAULT_POLL_SIZE 64
  69 
  70 static struct pollfd *realloc_fds(int *, struct pollfd **, struct pollfd *);
  71 
  72 int
  73 pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
  74         const timespec_t *tsp, const sigset_t *sigmask)
  75 {
  76         long *in, *out, *ex;
  77         ulong_t m;      /* bit mask */
  78         int j;          /* loop counter */
  79         ulong_t b;      /* bits to test */
  80         int n, rv;
  81         int lastj = -1;
  82         int nused;
  83 
  84         /*
  85          * Rather than have a mammoth pollfd (65K) list on the stack
  86          * we start with a small one and then malloc larger chunks
  87          * on the heap if necessary.
  88          */
  89 
  90         struct pollfd pfd[DEFAULT_POLL_SIZE];
  91         struct pollfd *p;
  92         struct pollfd *pfd_list;
  93         int nfds_on_list;
  94 
  95         fd_set zero;
  96 
  97         /*
  98          * Check for invalid conditions at outset.
  99          * Required for spec1170.
 100          * SUSV3: We must behave as a cancellation point even if we fail early.
 101          */
 102         if (nfds >= 0 && nfds <= FD_SETSIZE) {
 103                 if (tsp != NULL) {
 104                         if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
 105                             tsp->tv_sec < 0) {
 106                                 pthread_testcancel();
 107                                 errno = EINVAL;
 108                                 return (-1);
 109                         }
 110                 }
 111         } else {
 112                 pthread_testcancel();
 113                 errno = EINVAL;
 114                 return (-1);
 115         }
 116 
 117         /*
 118          * If any input args are null, point them at the null array.
 119          */
 120         (void) memset(&zero, 0, sizeof (fd_set));
 121         if (in0 == NULL)
 122                 in0 = &zero;
 123         if (out0 == NULL)
 124                 out0 = &zero;
 125         if (ex0 == NULL)
 126                 ex0 = &zero;
 127 
 128         nfds_on_list = DEFAULT_POLL_SIZE;
 129         pfd_list = pfd;
 130         p = pfd_list;
 131         (void) memset(pfd, 0, sizeof (pfd));
 132         /*
 133          * For each fd, if any bits are set convert them into
 134          * the appropriate pollfd struct.
 135          */
 136         in = (long *)in0->fds_bits;
 137         out = (long *)out0->fds_bits;
 138         ex = (long *)ex0->fds_bits;
 139         nused = 0;
 140         /*
 141          * nused reflects the number of pollfd structs currently used
 142          * less one. If realloc_fds returns NULL it is because malloc
 143          * failed. We expect malloc() to have done the proper
 144          * thing with errno.
 145          */
 146         for (n = 0; n < nfds; n += NFDBITS) {
 147                 b = (ulong_t)(*in | *out | *ex);
 148                 for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
 149                         if (b & 1) {
 150                                 p->fd = n + j;
 151                                 if (p->fd < nfds) {
 152                                         p->events = 0;
 153                                         if (*in & m)
 154                                                 p->events |= POLLRDNORM;
 155                                         if (*out & m)
 156                                                 p->events |= POLLWRNORM;
 157                                         if (*ex & m)
 158                                                 p->events |= POLLRDBAND;
 159                                         if (nused < (nfds_on_list - 1)) {
 160                                                 p++;
 161                                         } else if ((p = realloc_fds(
 162                                             &nfds_on_list, &pfd_list, pfd))
 163                                             == NULL) {
 164                                                 if (pfd_list != pfd)
 165                                                         free(pfd_list);
 166                                                 pthread_testcancel();
 167                                                 return (-1);
 168                                         }
 169                                         nused++;
 170                                 } else
 171                                         goto done;
 172                         }
 173                 }
 174                 in++;
 175                 out++;
 176                 ex++;
 177         }
 178 done:
 179         /*
 180          * Now do the poll.
 181          */
 182         do {
 183                 rv = _pollsys(pfd_list, (nfds_t)nused, tsp, sigmask);
 184         } while (rv < 0 && errno == EAGAIN);
 185 
 186         if (rv < 0) {                /* no need to set bit masks */
 187                 if (pfd_list != pfd)
 188                         free(pfd_list);
 189                 return (rv);
 190         } else if (rv == 0) {
 191                 /*
 192                  * Clear out bit masks, just in case.
 193                  * On the assumption that usually only
 194                  * one bit mask is set, use three loops.
 195                  */
 196                 if (in0 != &zero) {
 197                         in = (long *)in0->fds_bits;
 198                         for (n = 0; n < nfds; n += NFDBITS)
 199                                 *in++ = 0;
 200                 }
 201                 if (out0 != &zero) {
 202                         out = (long *)out0->fds_bits;
 203                         for (n = 0; n < nfds; n += NFDBITS)
 204                                 *out++ = 0;
 205                 }
 206                 if (ex0 != &zero) {
 207                         ex = (long *)ex0->fds_bits;
 208                         for (n = 0; n < nfds; n += NFDBITS)
 209                                 *ex++ = 0;
 210                 }
 211                 if (pfd_list != pfd)
 212                         free(pfd_list);
 213                 return (0);
 214         }
 215 
 216         /*
 217          * Check for EINVAL error case first to avoid changing any bits
 218          * if we're going to return an error.
 219          */
 220         for (p = pfd_list, j = nused; j-- > 0; p++) {
 221                 /*
 222                  * select will return EBADF immediately if any fd's
 223                  * are bad.  poll will complete the poll on the
 224                  * rest of the fd's and include the error indication
 225                  * in the returned bits.  This is a rare case so we
 226                  * accept this difference and return the error after
 227                  * doing more work than select would've done.
 228                  */
 229                 if (p->revents & POLLNVAL) {
 230                         errno = EBADF;
 231                         if (pfd_list != pfd)
 232                                 free(pfd_list);
 233                         return (-1);
 234                 }
 235                 /*
 236                  * We would like to make POLLHUP available to select,
 237                  * checking to see if we have pending data to be read.
 238                  * BUT until we figure out how not to break Xsun's
 239                  * dependencies on select's existing features...
 240                  * This is what we _thought_ would work ... sigh!
 241                  */
 242                 /*
 243                  * if ((p->revents & POLLHUP) &&
 244                  *      !(p->revents & (POLLRDNORM|POLLRDBAND))) {
 245                  *      errno = EINTR;
 246                  *      return (-1);
 247                  * }
 248                  */
 249         }
 250 
 251         /*
 252          * Convert results of poll back into bits
 253          * in the argument arrays.
 254          *
 255          * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
 256          * on return from poll if they were set on input, thus we don't
 257          * worry about accidentally setting the corresponding bits in the
 258          * zero array if the input bit masks were null.
 259          *
 260          * Must return number of bits set, not number of ready descriptors
 261          * (as the man page says, and as poll() does).
 262          */
 263         rv = 0;
 264         for (p = pfd_list; nused-- > 0; p++) {
 265                 j = (int)(p->fd / NFDBITS);
 266                 /* have we moved into another word of the bit mask yet? */
 267                 if (j != lastj) {
 268                         /* clear all output bits to start with */
 269                         in = (long *)&in0->fds_bits[j];
 270                         out = (long *)&out0->fds_bits[j];
 271                         ex = (long *)&ex0->fds_bits[j];
 272                         /*
 273                          * In case we made "zero" read-only (e.g., with
 274                          * cc -R), avoid actually storing into it.
 275                          */
 276                         if (in0 != &zero)
 277                                 *in = 0;
 278                         if (out0 != &zero)
 279                                 *out = 0;
 280                         if (ex0 != &zero)
 281                                 *ex = 0;
 282                         lastj = j;
 283                 }
 284                 if (p->revents) {
 285                         m = 1L << (p->fd % NFDBITS);
 286                         if (p->revents & POLLRDNORM) {
 287                                 *in |= m;
 288                                 rv++;
 289                         }
 290                         if (p->revents & POLLWRNORM) {
 291                                 *out |= m;
 292                                 rv++;
 293                         }
 294                         if (p->revents & POLLRDBAND) {
 295                                 *ex |= m;
 296                                 rv++;
 297                         }
 298                         /*
 299                          * Only set this bit on return if we asked about
 300                          * input conditions.
 301                          */
 302                         if ((p->revents & (POLLHUP|POLLERR)) &&
 303                             (p->events & POLLRDNORM)) {
 304                                 if ((*in & m) == 0)
 305                                         rv++;   /* wasn't already set */
 306                                 *in |= m;
 307                         }
 308                         /*
 309                          * Only set this bit on return if we asked about
 310                          * output conditions.
 311                          */
 312                         if ((p->revents & (POLLHUP|POLLERR)) &&
 313                             (p->events & POLLWRNORM)) {
 314                                 if ((*out & m) == 0)
 315                                         rv++;   /* wasn't already set */
 316                                 *out |= m;
 317                         }
 318                         /*
 319                          * Only set this bit on return if we asked about
 320                          * output conditions.
 321                          */
 322                         if ((p->revents & (POLLHUP|POLLERR)) &&
 323                             (p->events & POLLRDBAND)) {
 324                                 if ((*ex & m) == 0)
 325                                         rv++;   /* wasn't already set */
 326                                 *ex |= m;
 327                         }
 328                 }
 329         }
 330         if (pfd_list != pfd)
 331                 free(pfd_list);
 332         return (rv);
 333 }
 334 
 335 int
 336 select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
 337         struct timeval *tv)
 338 {
 339         timespec_t ts;
 340         timespec_t *tsp;
 341 
 342         if (tv == NULL)
 343                 tsp = NULL;
 344         else {
 345                 /* check timeval validity */
 346                 if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) {
 347                         errno = EINVAL;
 348                         return (-1);
 349                 }
 350                 /*
 351                  * Convert timeval to timespec.
 352                  * To preserve compatibility with past behavior,
 353                  * when select was built upon poll(2), which has a
 354                  * minimum non-zero timeout of 1 millisecond, force
 355                  * a minimum non-zero timeout of 500 microseconds.
 356                  */
 357                 ts.tv_sec = tv->tv_sec;
 358                 ts.tv_nsec = tv->tv_usec * 1000;
 359                 if (ts.tv_nsec != 0 && ts.tv_nsec < 500000)
 360                         ts.tv_nsec = 500000;
 361                 tsp = &ts;
 362         }
 363 
 364         return (pselect_large_fdset(nfds, in0, out0, ex0, tsp, NULL));
 365 }
 366 
 367 /*
 368  * Reallocate buffers of pollfds for our list. We malloc a new buffer
 369  * and, in the case where the old buffer does not match what is passed
 370  * in orig, free the buffer after copying the contents.
 371  */
 372 struct pollfd *
 373 realloc_fds(int *num, struct pollfd **list_head, struct pollfd *orig)
 374 {
 375         struct pollfd *b;
 376         int nta;
 377         int n2;
 378 
 379         n2 = *num * 2;
 380         nta = n2 * sizeof (struct pollfd);
 381         b = malloc(nta);
 382         if (b) {
 383                 (void) memset(b, 0, (size_t)nta);
 384                 (void) memcpy(b, *list_head, nta / 2);
 385                 if (*list_head != orig)
 386                         free(*list_head);
 387                 *list_head = b;
 388                 b += *num;
 389                 *num = n2;
 390         }
 391         return (b);
 392 }