/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1988 AT&T */
/* All Rights Reserved */
/*
* Emulation of select() system call using poll() system call.
*
* Assumptions:
* polling for input only is most common.
* polling for exceptional conditions is very rare.
*
* Note that is it not feasible to emulate all error conditions,
* in particular conditions that would return EFAULT are far too
* difficult to check for in a library routine.
*/
#pragma weak _select = select
#include "lint.h"
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include "libc.h"
/*
* STACK_PFD_LIM
*
* The limit at which pselect allocates pollfd structures in the heap,
* rather than on the stack. These limits match the historical behaviour
* with the * _large_fdset implementations.
*
* BULK_ALLOC_LIM
*
* The limit below which we'll just allocate nfds pollfds, rather than
* counting how many we actually need.
*/
#if defined(_LP64)
#define STACK_PFD_LIM FD_SETSIZE
#define BULK_ALLOC_LIM 8192
#else
#define STACK_PFD_LIM 1024
#define BULK_ALLOC_LIM 1024
#endif
/*
* The previous _large_fdset implementations are, unfortunately, baked into
* the ABI.
*/
#pragma weak select_large_fdset = select
#pragma weak pselect_large_fdset = pselect
#define fd_set_size(nfds) (((nfds) + (NFDBITS - 1)) / NFDBITS)
static nfds_t
fd_sets_count(int limit, fd_set *in, fd_set *out, fd_set *ex)
{
nfds_t total = 0;
if (limit <= 0)
return (0);
for (int i = 0; i < fd_set_size(limit); i++) {
long v = (in->fds_bits[i] | out->fds_bits[i] | ex->fds_bits[i]);
while (v != 0) {
v &= v - 1;
total++;
}
}
return (total);
}
int
pselect(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
const timespec_t *tsp, const sigset_t *sigmask)
{
long *in, *out, *ex;
ulong_t m; /* bit mask */
int j; /* loop counter */
ulong_t b; /* bits to test */
int n, rv;
struct pollfd *pfd;
struct pollfd *p;
int lastj = -1;
nfds_t npfds = 0;
boolean_t heap_pfds = B_FALSE;
/* "zero" is read-only, it could go in the text segment */
static fd_set zero = { 0 };
/*
* Check for invalid conditions at outset.
* Required for spec1170.
* SUSV3: We must behave as a cancellation point even if we fail early.
*/
if (nfds < 0 || nfds > FD_SETSIZE) {
pthread_testcancel();
errno = EINVAL;
return (-1);
}
if (tsp != NULL) {
/* check timespec validity */
if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
tsp->tv_sec < 0) {
pthread_testcancel();
errno = EINVAL;
return (-1);
}
}
/*
* If any input args are null, point them at the null array.
*/
if (in0 == NULL)
in0 = &zero;
if (out0 == NULL)
out0 = &zero;
if (ex0 == NULL)
ex0 = &zero;
if (nfds <= BULK_ALLOC_LIM) {
p = pfd = alloca(nfds * sizeof (struct pollfd));
} else {
npfds = fd_sets_count(nfds, in0, out0, ex0);
if (npfds > STACK_PFD_LIM) {
p = pfd = malloc(npfds * sizeof (struct pollfd));
if (p == NULL)
return (-1);
heap_pfds = B_TRUE;
} else {
p = pfd = alloca(npfds * sizeof (struct pollfd));
}
}
/*
* For each fd, if any bits are set convert them into
* the appropriate pollfd struct.
*/
in = (long *)in0->fds_bits;
out = (long *)out0->fds_bits;
ex = (long *)ex0->fds_bits;
for (n = 0; n < nfds; n += NFDBITS) {
b = (ulong_t)(*in | *out | *ex);
for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
if (b & 1) {
p->fd = n + j;
if (p->fd >= nfds)
goto done;
p->events = 0;
if (*in & m)
p->events |= POLLRDNORM;
if (*out & m)
p->events |= POLLWRNORM;
if (*ex & m)
p->events |= POLLRDBAND;
p++;
}
}
in++;
out++;
ex++;
}
done:
/*
* Now do the poll.
*/
npfds = (int)(p - pfd);
do {
rv = _pollsys(pfd, npfds, tsp, sigmask);
} while (rv < 0 && errno == EAGAIN);
if (rv < 0) /* no need to set bit masks */
goto out;
if (rv == 0) {
/*
* Clear out bit masks, just in case.
* On the assumption that usually only
* one bit mask is set, use three loops.
*/
if (in0 != &zero) {
in = (long *)in0->fds_bits;
for (n = 0; n < nfds; n += NFDBITS)
*in++ = 0;
}
if (out0 != &zero) {
out = (long *)out0->fds_bits;
for (n = 0; n < nfds; n += NFDBITS)
*out++ = 0;
}
if (ex0 != &zero) {
ex = (long *)ex0->fds_bits;
for (n = 0; n < nfds; n += NFDBITS)
*ex++ = 0;
}
rv = 0;
goto out;
}
/*
* Check for EINVAL error case first to avoid changing any bits
* if we're going to return an error.
*/
for (p = pfd, n = npfds; n-- > 0; p++) {
/*
* select will return EBADF immediately if any fd's
* are bad. poll will complete the poll on the
* rest of the fd's and include the error indication
* in the returned bits. This is a rare case so we
* accept this difference and return the error after
* doing more work than select would've done.
*/
if (p->revents & POLLNVAL) {
errno = EBADF;
rv = -1;
goto out;
}
/*
* We would like to make POLLHUP available to select,
* checking to see if we have pending data to be read.
* BUT until we figure out how not to break Xsun's
* dependencies on select's existing features...
* This is what we _thought_ would work ... sigh!
*/
/*
* if ((p->revents & POLLHUP) &&
* !(p->revents & (POLLRDNORM|POLLRDBAND))) {
* errno = EINTR;
* rv = -1;
* goto out;
* }
*/
}
/*
* Convert results of poll back into bits
* in the argument arrays.
*
* We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
* on return from poll if they were set on input, thus we don't
* worry about accidentally setting the corresponding bits in the
* zero array if the input bit masks were null.
*
* Must return number of bits set, not number of ready descriptors
* (as the man page says, and as poll() does).
*/
rv = 0;
for (p = pfd, n = npfds; n-- > 0; p++) {
j = (int)(p->fd / NFDBITS);
/* have we moved into another word of the bit mask yet? */
if (j != lastj) {
/* clear all output bits to start with */
in = (long *)&in0->fds_bits[j];
out = (long *)&out0->fds_bits[j];
ex = (long *)&ex0->fds_bits[j];
/*
* In case we made "zero" read-only (e.g., with
* cc -R), avoid actually storing into it.
*/
if (in0 != &zero)
*in = 0;
if (out0 != &zero)
*out = 0;
if (ex0 != &zero)
*ex = 0;
lastj = j;
}
if (p->revents) {
m = 1L << (p->fd % NFDBITS);
if (p->revents & POLLRDNORM) {
*in |= m;
rv++;
}
if (p->revents & POLLWRNORM) {
*out |= m;
rv++;
}
if (p->revents & POLLRDBAND) {
*ex |= m;
rv++;
}
/*
* Only set this bit on return if we asked about
* input conditions.
*/
if ((p->revents & (POLLHUP|POLLERR)) &&
(p->events & POLLRDNORM)) {
if ((*in & m) == 0)
rv++; /* wasn't already set */
*in |= m;
}
/*
* Only set this bit on return if we asked about
* output conditions.
*/
if ((p->revents & (POLLHUP|POLLERR)) &&
(p->events & POLLWRNORM)) {
if ((*out & m) == 0)
rv++; /* wasn't already set */
*out |= m;
}
/*
* Only set this bit on return if we asked about
* output conditions.
*/
if ((p->revents & (POLLHUP|POLLERR)) &&
(p->events & POLLRDBAND)) {
if ((*ex & m) == 0)
rv++; /* wasn't already set */
*ex |= m;
}
}
}
out:
if (heap_pfds)
free(pfd);
return (rv);
}
int
select(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, struct timeval *tv)
{
timespec_t ts;
timespec_t *tsp;
if (tv == NULL)
tsp = NULL;
else {
/* check timeval validity */
if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) {
errno = EINVAL;
return (-1);
}
/*
* Convert timeval to timespec.
* To preserve compatibility with past behavior,
* when select was built upon poll(2), which has a
* minimum non-zero timeout of 1 millisecond, force
* a minimum non-zero timeout of 500 microseconds.
*/
ts.tv_sec = tv->tv_sec;
ts.tv_nsec = tv->tv_usec * 1000;
if (ts.tv_nsec != 0 && ts.tv_nsec < 500000)
ts.tv_nsec = 500000;
tsp = &ts;
}
return (pselect(nfds, in0, out0, ex0, tsp, NULL));
}