Print this page
select: shortcircuit fd_sets_count if given 0 fds (such as for timing)
if nfds is low, fastpath to try to maintain throughput
libc: only have one select implementation, and move the pollfds onto the heap if they cross some threshold
*** 25,36 ****
*/
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
- #pragma ident "%Z%%M% %I% %E% SMI"
-
/*
* Emulation of select() system call using poll() system call.
*
* Assumptions:
* polling for input only is most common.
--- 25,34 ----
*** 45,61 ****
--- 43,109 ----
#include "lint.h"
#include <values.h>
#include <pthread.h>
#include <errno.h>
+ #include <stdlib.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/select.h>
#include <sys/poll.h>
#include <alloca.h>
#include "libc.h"
+ /*
+ * STACK_PFD_LIM
+ *
+ * The limit at which pselect allocates pollfd structures in the heap,
+ * rather than on the stack. These limits match the historical behaviour
+ * with the * _large_fdset implementations.
+ *
+ * BULK_ALLOC_LIM
+ *
+ * The limit below which we'll just allocate nfds pollfds, rather than
+ * counting how many we actually need.
+ */
+ #if defined(_LP64)
+ #define STACK_PFD_LIM FD_SETSIZE
+ #define BULK_ALLOC_LIM 8192
+ #else
+ #define STACK_PFD_LIM 1024
+ #define BULK_ALLOC_LIM 1024
+ #endif
+
+ /*
+ * The previous _large_fdset implementations are, unfortunately, baked into
+ * the ABI.
+ */
+ #pragma weak select_large_fdset = select
+ #pragma weak pselect_large_fdset = pselect
+
+ #define fd_set_size(nfds) (((nfds) + (NFDBITS - 1)) / NFDBITS)
+
+ static nfds_t
+ fd_sets_count(int limit, fd_set *in, fd_set *out, fd_set *ex)
+ {
+ nfds_t total = 0;
+
+ if (limit <= 0)
+ return (0);
+
+ for (int i = 0; i < fd_set_size(limit); i++) {
+ long v = (in->fds_bits[i] | out->fds_bits[i] | ex->fds_bits[i]);
+
+ while (v != 0) {
+ v &= v - 1;
+ total++;
+ }
+ }
+
+ return (total);
+ }
+
int
pselect(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
const timespec_t *tsp, const sigset_t *sigmask)
{
long *in, *out, *ex;
*** 64,73 ****
--- 112,123 ----
ulong_t b; /* bits to test */
int n, rv;
struct pollfd *pfd;
struct pollfd *p;
int lastj = -1;
+ nfds_t npfds = 0;
+ boolean_t heap_pfds = B_FALSE;
/* "zero" is read-only, it could go in the text segment */
static fd_set zero = { 0 };
/*
*** 78,88 ****
if (nfds < 0 || nfds > FD_SETSIZE) {
pthread_testcancel();
errno = EINVAL;
return (-1);
}
- p = pfd = (struct pollfd *)alloca(nfds * sizeof (struct pollfd));
if (tsp != NULL) {
/* check timespec validity */
if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
tsp->tv_sec < 0) {
--- 128,137 ----
*** 100,109 ****
--- 149,173 ----
if (out0 == NULL)
out0 = &zero;
if (ex0 == NULL)
ex0 = &zero;
+ if (nfds <= BULK_ALLOC_LIM) {
+ p = pfd = alloca(nfds * sizeof (struct pollfd));
+ } else {
+ npfds = fd_sets_count(nfds, in0, out0, ex0);
+
+ if (npfds > STACK_PFD_LIM) {
+ p = pfd = malloc(npfds * sizeof (struct pollfd));
+ if (p == NULL)
+ return (-1);
+ heap_pfds = B_TRUE;
+ } else {
+ p = pfd = alloca(npfds * sizeof (struct pollfd));
+ }
+ }
+
/*
* For each fd, if any bits are set convert them into
* the appropriate pollfd struct.
*/
in = (long *)in0->fds_bits;
*** 132,148 ****
}
done:
/*
* Now do the poll.
*/
! n = (int)(p - pfd); /* number of pollfd's */
do {
! rv = _pollsys(pfd, (nfds_t)n, tsp, sigmask);
} while (rv < 0 && errno == EAGAIN);
if (rv < 0) /* no need to set bit masks */
! return (rv);
if (rv == 0) {
/*
* Clear out bit masks, just in case.
* On the assumption that usually only
--- 196,212 ----
}
done:
/*
* Now do the poll.
*/
! npfds = (int)(p - pfd);
do {
! rv = _pollsys(pfd, npfds, tsp, sigmask);
} while (rv < 0 && errno == EAGAIN);
if (rv < 0) /* no need to set bit masks */
! goto out;
if (rv == 0) {
/*
* Clear out bit masks, just in case.
* On the assumption that usually only
*** 161,189 ****
if (ex0 != &zero) {
ex = (long *)ex0->fds_bits;
for (n = 0; n < nfds; n += NFDBITS)
*ex++ = 0;
}
! return (0);
}
/*
* Check for EINVAL error case first to avoid changing any bits
* if we're going to return an error.
*/
! for (p = pfd, j = n; j-- > 0; p++) {
/*
* select will return EBADF immediately if any fd's
* are bad. poll will complete the poll on the
* rest of the fd's and include the error indication
* in the returned bits. This is a rare case so we
* accept this difference and return the error after
* doing more work than select would've done.
*/
if (p->revents & POLLNVAL) {
errno = EBADF;
! return (-1);
}
/*
* We would like to make POLLHUP available to select,
* checking to see if we have pending data to be read.
* BUT until we figure out how not to break Xsun's
--- 225,255 ----
if (ex0 != &zero) {
ex = (long *)ex0->fds_bits;
for (n = 0; n < nfds; n += NFDBITS)
*ex++ = 0;
}
! rv = 0;
! goto out;
}
/*
* Check for EINVAL error case first to avoid changing any bits
* if we're going to return an error.
*/
! for (p = pfd, n = npfds; n-- > 0; p++) {
/*
* select will return EBADF immediately if any fd's
* are bad. poll will complete the poll on the
* rest of the fd's and include the error indication
* in the returned bits. This is a rare case so we
* accept this difference and return the error after
* doing more work than select would've done.
*/
if (p->revents & POLLNVAL) {
errno = EBADF;
! rv = -1;
! goto out;
}
/*
* We would like to make POLLHUP available to select,
* checking to see if we have pending data to be read.
* BUT until we figure out how not to break Xsun's
*** 192,202 ****
*/
/*
* if ((p->revents & POLLHUP) &&
* !(p->revents & (POLLRDNORM|POLLRDBAND))) {
* errno = EINTR;
! * return (-1);
* }
*/
}
/*
--- 258,269 ----
*/
/*
* if ((p->revents & POLLHUP) &&
* !(p->revents & (POLLRDNORM|POLLRDBAND))) {
* errno = EINTR;
! * rv = -1;
! * goto out;
* }
*/
}
/*
*** 210,220 ****
*
* Must return number of bits set, not number of ready descriptors
* (as the man page says, and as poll() does).
*/
rv = 0;
! for (p = pfd; n-- > 0; p++) {
j = (int)(p->fd / NFDBITS);
/* have we moved into another word of the bit mask yet? */
if (j != lastj) {
/* clear all output bits to start with */
in = (long *)&in0->fds_bits[j];
--- 277,287 ----
*
* Must return number of bits set, not number of ready descriptors
* (as the man page says, and as poll() does).
*/
rv = 0;
! for (p = pfd, n = npfds; n-- > 0; p++) {
j = (int)(p->fd / NFDBITS);
/* have we moved into another word of the bit mask yet? */
if (j != lastj) {
/* clear all output bits to start with */
in = (long *)&in0->fds_bits[j];
*** 276,285 ****
--- 343,355 ----
rv++; /* wasn't already set */
*ex |= m;
}
}
}
+ out:
+ if (heap_pfds)
+ free(pfd);
return (rv);
}
int
select(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, struct timeval *tv)