Print this page
select: shortcircuit fd_sets_count if given 0 fds (such as for timing)
if nfds is low, fastpath to try to maintain throughput
libc: only have one select implementation, and move the pollfds onto the heap if they cross some threshold

Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libc/port/gen/select.c
          +++ new/usr/src/lib/libc/port/gen/select.c
↓ open down ↓ 19 lines elided ↑ open up ↑
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  24   24   * Use is subject to license terms.
  25   25   */
  26   26  
  27   27  /*      Copyright (c) 1988 AT&T */
  28   28  /*        All Rights Reserved   */
  29   29  
  30      -#pragma ident   "%Z%%M% %I%     %E% SMI"
  31      -
  32   30  /*
  33   31   * Emulation of select() system call using poll() system call.
  34   32   *
  35   33   * Assumptions:
  36   34   *      polling for input only is most common.
  37   35   *      polling for exceptional conditions is very rare.
  38   36   *
  39   37   * Note that is it not feasible to emulate all error conditions,
  40   38   * in particular conditions that would return EFAULT are far too
  41   39   * difficult to check for in a library routine.
  42   40   */
  43   41  
  44   42  #pragma weak _select = select
  45   43  
  46   44  #include "lint.h"
  47   45  #include <values.h>
  48   46  #include <pthread.h>
  49   47  #include <errno.h>
       48 +#include <stdlib.h>
  50   49  #include <sys/time.h>
  51   50  #include <sys/types.h>
  52   51  #include <sys/select.h>
  53   52  #include <sys/poll.h>
  54   53  #include <alloca.h>
  55   54  #include "libc.h"
  56   55  
       56 +/*
       57 + * STACK_PFD_LIM
       58 + *
       59 + *   The limit at which pselect allocates pollfd structures in the heap,
       60 + *   rather than on the stack.  These limits match the historical behaviour
       61 + *   with the * _large_fdset implementations.
       62 + *
       63 + * BULK_ALLOC_LIM
       64 + *
       65 + *   The limit below which we'll just allocate nfds pollfds, rather than
       66 + *   counting how many we actually need.
       67 + */
       68 +#if defined(_LP64)
       69 +#define STACK_PFD_LIM   FD_SETSIZE
       70 +#define BULK_ALLOC_LIM  8192
       71 +#else
       72 +#define STACK_PFD_LIM   1024
       73 +#define BULK_ALLOC_LIM  1024
       74 +#endif
       75 +
       76 +/*
       77 + * The previous _large_fdset implementations are, unfortunately, baked into
       78 + * the ABI.
       79 + */
       80 +#pragma weak select_large_fdset = select
       81 +#pragma weak pselect_large_fdset = pselect
       82 +
       83 +#define fd_set_size(nfds)       (((nfds) + (NFDBITS - 1)) / NFDBITS)
       84 +
       85 +static nfds_t
       86 +fd_sets_count(int limit, fd_set *in, fd_set *out, fd_set *ex)
       87 +{
       88 +        nfds_t total = 0;
       89 +
       90 +        if (limit <= 0)
       91 +                return (0);
       92 +
       93 +        for (int i = 0; i < fd_set_size(limit); i++) {
       94 +                long v = (in->fds_bits[i] | out->fds_bits[i] | ex->fds_bits[i]);
       95 +
       96 +                while (v != 0) {
       97 +                        v &= v - 1;
       98 +                        total++;
       99 +                }
      100 +        }
      101 +
      102 +        return (total);
      103 +}
      104 +
  57  105  int
  58  106  pselect(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
  59  107          const timespec_t *tsp, const sigset_t *sigmask)
  60  108  {
  61  109          long *in, *out, *ex;
  62  110          ulong_t m;      /* bit mask */
  63  111          int j;          /* loop counter */
  64  112          ulong_t b;      /* bits to test */
  65  113          int n, rv;
  66  114          struct pollfd *pfd;
  67  115          struct pollfd *p;
  68  116          int lastj = -1;
      117 +        nfds_t npfds = 0;
      118 +        boolean_t heap_pfds = B_FALSE;
  69  119  
  70  120          /* "zero" is read-only, it could go in the text segment */
  71  121          static fd_set zero = { 0 };
  72  122  
  73  123          /*
  74  124           * Check for invalid conditions at outset.
  75  125           * Required for spec1170.
  76  126           * SUSV3: We must behave as a cancellation point even if we fail early.
  77  127           */
  78  128          if (nfds < 0 || nfds > FD_SETSIZE) {
  79  129                  pthread_testcancel();
  80  130                  errno = EINVAL;
  81  131                  return (-1);
  82  132          }
  83      -        p = pfd = (struct pollfd *)alloca(nfds * sizeof (struct pollfd));
  84  133  
  85  134          if (tsp != NULL) {
  86  135                  /* check timespec validity */
  87  136                  if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
  88  137                      tsp->tv_sec < 0) {
  89  138                          pthread_testcancel();
  90  139                          errno = EINVAL;
  91  140                          return (-1);
  92  141                  }
  93  142          }
↓ open down ↓ 1 lines elided ↑ open up ↑
  95  144          /*
  96  145           * If any input args are null, point them at the null array.
  97  146           */
  98  147          if (in0 == NULL)
  99  148                  in0 = &zero;
 100  149          if (out0 == NULL)
 101  150                  out0 = &zero;
 102  151          if (ex0 == NULL)
 103  152                  ex0 = &zero;
 104  153  
      154 +        if (nfds <= BULK_ALLOC_LIM) {
      155 +                p = pfd = alloca(nfds * sizeof (struct pollfd));
      156 +        } else {
      157 +                npfds = fd_sets_count(nfds, in0, out0, ex0);
      158 +
      159 +                if (npfds > STACK_PFD_LIM) {
      160 +                        p = pfd = malloc(npfds * sizeof (struct pollfd));
      161 +                        if (p == NULL)
      162 +                                return (-1);
      163 +                        heap_pfds = B_TRUE;
      164 +                } else {
      165 +                        p = pfd = alloca(npfds * sizeof (struct pollfd));
      166 +                }
      167 +        }
      168 +
 105  169          /*
 106  170           * For each fd, if any bits are set convert them into
 107  171           * the appropriate pollfd struct.
 108  172           */
 109  173          in = (long *)in0->fds_bits;
 110  174          out = (long *)out0->fds_bits;
 111  175          ex = (long *)ex0->fds_bits;
 112  176          for (n = 0; n < nfds; n += NFDBITS) {
 113  177                  b = (ulong_t)(*in | *out | *ex);
 114  178                  for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
↓ open down ↓ 12 lines elided ↑ open up ↑
 127  191                          }
 128  192                  }
 129  193                  in++;
 130  194                  out++;
 131  195                  ex++;
 132  196          }
 133  197  done:
 134  198          /*
 135  199           * Now do the poll.
 136  200           */
 137      -        n = (int)(p - pfd);             /* number of pollfd's */
      201 +        npfds = (int)(p - pfd);
 138  202          do {
 139      -                rv = _pollsys(pfd, (nfds_t)n, tsp, sigmask);
      203 +                rv = _pollsys(pfd, npfds, tsp, sigmask);
 140  204          } while (rv < 0 && errno == EAGAIN);
 141  205  
 142  206          if (rv < 0)             /* no need to set bit masks */
 143      -                return (rv);
      207 +                goto out;
 144  208  
 145  209          if (rv == 0) {
 146  210                  /*
 147  211                   * Clear out bit masks, just in case.
 148  212                   * On the assumption that usually only
 149  213                   * one bit mask is set, use three loops.
 150  214                   */
 151  215                  if (in0 != &zero) {
 152  216                          in = (long *)in0->fds_bits;
 153  217                          for (n = 0; n < nfds; n += NFDBITS)
↓ open down ↓ 2 lines elided ↑ open up ↑
 156  220                  if (out0 != &zero) {
 157  221                          out = (long *)out0->fds_bits;
 158  222                          for (n = 0; n < nfds; n += NFDBITS)
 159  223                                  *out++ = 0;
 160  224                  }
 161  225                  if (ex0 != &zero) {
 162  226                          ex = (long *)ex0->fds_bits;
 163  227                          for (n = 0; n < nfds; n += NFDBITS)
 164  228                                  *ex++ = 0;
 165  229                  }
 166      -                return (0);
      230 +                rv = 0;
      231 +                goto out;
 167  232          }
 168  233  
 169  234          /*
 170  235           * Check for EINVAL error case first to avoid changing any bits
 171  236           * if we're going to return an error.
 172  237           */
 173      -        for (p = pfd, j = n; j-- > 0; p++) {
      238 +        for (p = pfd, n = npfds; n-- > 0; p++) {
 174  239                  /*
 175  240                   * select will return EBADF immediately if any fd's
 176  241                   * are bad.  poll will complete the poll on the
 177  242                   * rest of the fd's and include the error indication
 178  243                   * in the returned bits.  This is a rare case so we
 179  244                   * accept this difference and return the error after
 180  245                   * doing more work than select would've done.
 181  246                   */
 182  247                  if (p->revents & POLLNVAL) {
 183  248                          errno = EBADF;
 184      -                        return (-1);
      249 +                        rv = -1;
      250 +                        goto out;
 185  251                  }
 186  252                  /*
 187  253                   * We would like to make POLLHUP available to select,
 188  254                   * checking to see if we have pending data to be read.
 189  255                   * BUT until we figure out how not to break Xsun's
 190  256                   * dependencies on select's existing features...
 191  257                   * This is what we _thought_ would work ... sigh!
 192  258                   */
 193  259                  /*
 194  260                   * if ((p->revents & POLLHUP) &&
 195  261                   *      !(p->revents & (POLLRDNORM|POLLRDBAND))) {
 196  262                   *      errno = EINTR;
 197      -                 *      return (-1);
      263 +                 *      rv = -1;
      264 +                 *      goto out;
 198  265                   * }
 199  266                   */
 200  267          }
 201  268  
 202  269          /*
 203  270           * Convert results of poll back into bits
 204  271           * in the argument arrays.
 205  272           *
 206  273           * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
 207  274           * on return from poll if they were set on input, thus we don't
 208  275           * worry about accidentally setting the corresponding bits in the
 209  276           * zero array if the input bit masks were null.
 210  277           *
 211  278           * Must return number of bits set, not number of ready descriptors
 212  279           * (as the man page says, and as poll() does).
 213  280           */
 214  281          rv = 0;
 215      -        for (p = pfd; n-- > 0; p++) {
      282 +        for (p = pfd, n = npfds; n-- > 0; p++) {
 216  283                  j = (int)(p->fd / NFDBITS);
 217  284                  /* have we moved into another word of the bit mask yet? */
 218  285                  if (j != lastj) {
 219  286                          /* clear all output bits to start with */
 220  287                          in = (long *)&in0->fds_bits[j];
 221  288                          out = (long *)&out0->fds_bits[j];
 222  289                          ex = (long *)&ex0->fds_bits[j];
 223  290                          /*
 224  291                           * In case we made "zero" read-only (e.g., with
 225  292                           * cc -R), avoid actually storing into it.
↓ open down ↓ 45 lines elided ↑ open up ↑
 271  338                           * output conditions.
 272  339                           */
 273  340                          if ((p->revents & (POLLHUP|POLLERR)) &&
 274  341                              (p->events & POLLRDBAND)) {
 275  342                                  if ((*ex & m) == 0)
 276  343                                          rv++;   /* wasn't already set */
 277  344                                  *ex |= m;
 278  345                          }
 279  346                  }
 280  347          }
      348 +out:
      349 +        if (heap_pfds)
      350 +                free(pfd);
 281  351          return (rv);
 282  352  }
 283  353  
 284  354  int
 285  355  select(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, struct timeval *tv)
 286  356  {
 287  357          timespec_t ts;
 288  358          timespec_t *tsp;
 289  359  
 290  360          if (tv == NULL)
↓ open down ↓ 23 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX