Print this page
select: shortcircuit fd_sets_count if given 0 fds (such as for timing)
if nfds is low, fastpath to try to maintain throughput
libc: only have one select implementation, and move the pollfds onto the heap if they cross some threshold
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libc/port/gen/select.c
+++ new/usr/src/lib/libc/port/gen/select.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
20 20 */
21 21
22 22 /*
23 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /* Copyright (c) 1988 AT&T */
28 28 /* All Rights Reserved */
29 29
30 -#pragma ident "%Z%%M% %I% %E% SMI"
31 -
32 30 /*
33 31 * Emulation of select() system call using poll() system call.
34 32 *
35 33 * Assumptions:
36 34 * polling for input only is most common.
37 35 * polling for exceptional conditions is very rare.
38 36 *
39 37 * Note that is it not feasible to emulate all error conditions,
40 38 * in particular conditions that would return EFAULT are far too
41 39 * difficult to check for in a library routine.
42 40 */
43 41
44 42 #pragma weak _select = select
45 43
46 44 #include "lint.h"
47 45 #include <values.h>
48 46 #include <pthread.h>
49 47 #include <errno.h>
48 +#include <stdlib.h>
50 49 #include <sys/time.h>
51 50 #include <sys/types.h>
52 51 #include <sys/select.h>
53 52 #include <sys/poll.h>
54 53 #include <alloca.h>
55 54 #include "libc.h"
56 55
56 +/*
57 + * STACK_PFD_LIM
58 + *
59 + * The limit at which pselect allocates pollfd structures in the heap,
60 + * rather than on the stack. These limits match the historical behaviour
61 + * with the * _large_fdset implementations.
62 + *
63 + * BULK_ALLOC_LIM
64 + *
65 + * The limit below which we'll just allocate nfds pollfds, rather than
66 + * counting how many we actually need.
67 + */
68 +#if defined(_LP64)
69 +#define STACK_PFD_LIM FD_SETSIZE
70 +#define BULK_ALLOC_LIM 8192
71 +#else
72 +#define STACK_PFD_LIM 1024
73 +#define BULK_ALLOC_LIM 1024
74 +#endif
75 +
76 +/*
77 + * The previous _large_fdset implementations are, unfortunately, baked into
78 + * the ABI.
79 + */
80 +#pragma weak select_large_fdset = select
81 +#pragma weak pselect_large_fdset = pselect
82 +
83 +#define fd_set_size(nfds) (((nfds) + (NFDBITS - 1)) / NFDBITS)
84 +
85 +static nfds_t
86 +fd_sets_count(int limit, fd_set *in, fd_set *out, fd_set *ex)
87 +{
88 + nfds_t total = 0;
89 +
90 + if (limit <= 0)
91 + return (0);
92 +
93 + for (int i = 0; i < fd_set_size(limit); i++) {
94 + long v = (in->fds_bits[i] | out->fds_bits[i] | ex->fds_bits[i]);
95 +
96 + while (v != 0) {
97 + v &= v - 1;
98 + total++;
99 + }
100 + }
101 +
102 + return (total);
103 +}
104 +
57 105 int
58 106 pselect(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0,
59 107 const timespec_t *tsp, const sigset_t *sigmask)
60 108 {
61 109 long *in, *out, *ex;
62 110 ulong_t m; /* bit mask */
63 111 int j; /* loop counter */
64 112 ulong_t b; /* bits to test */
65 113 int n, rv;
66 114 struct pollfd *pfd;
67 115 struct pollfd *p;
68 116 int lastj = -1;
117 + nfds_t npfds = 0;
118 + boolean_t heap_pfds = B_FALSE;
69 119
70 120 /* "zero" is read-only, it could go in the text segment */
71 121 static fd_set zero = { 0 };
72 122
73 123 /*
74 124 * Check for invalid conditions at outset.
75 125 * Required for spec1170.
76 126 * SUSV3: We must behave as a cancellation point even if we fail early.
77 127 */
78 128 if (nfds < 0 || nfds > FD_SETSIZE) {
79 129 pthread_testcancel();
80 130 errno = EINVAL;
81 131 return (-1);
82 132 }
83 - p = pfd = (struct pollfd *)alloca(nfds * sizeof (struct pollfd));
84 133
85 134 if (tsp != NULL) {
86 135 /* check timespec validity */
87 136 if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC ||
88 137 tsp->tv_sec < 0) {
89 138 pthread_testcancel();
90 139 errno = EINVAL;
91 140 return (-1);
92 141 }
93 142 }
94 143
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
95 144 /*
96 145 * If any input args are null, point them at the null array.
97 146 */
98 147 if (in0 == NULL)
99 148 in0 = &zero;
100 149 if (out0 == NULL)
101 150 out0 = &zero;
102 151 if (ex0 == NULL)
103 152 ex0 = &zero;
104 153
154 + if (nfds <= BULK_ALLOC_LIM) {
155 + p = pfd = alloca(nfds * sizeof (struct pollfd));
156 + } else {
157 + npfds = fd_sets_count(nfds, in0, out0, ex0);
158 +
159 + if (npfds > STACK_PFD_LIM) {
160 + p = pfd = malloc(npfds * sizeof (struct pollfd));
161 + if (p == NULL)
162 + return (-1);
163 + heap_pfds = B_TRUE;
164 + } else {
165 + p = pfd = alloca(npfds * sizeof (struct pollfd));
166 + }
167 + }
168 +
105 169 /*
106 170 * For each fd, if any bits are set convert them into
107 171 * the appropriate pollfd struct.
108 172 */
109 173 in = (long *)in0->fds_bits;
110 174 out = (long *)out0->fds_bits;
111 175 ex = (long *)ex0->fds_bits;
112 176 for (n = 0; n < nfds; n += NFDBITS) {
113 177 b = (ulong_t)(*in | *out | *ex);
114 178 for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) {
115 179 if (b & 1) {
116 180 p->fd = n + j;
117 181 if (p->fd >= nfds)
118 182 goto done;
119 183 p->events = 0;
120 184 if (*in & m)
121 185 p->events |= POLLRDNORM;
122 186 if (*out & m)
123 187 p->events |= POLLWRNORM;
124 188 if (*ex & m)
125 189 p->events |= POLLRDBAND;
126 190 p++;
↓ open down ↓ |
12 lines elided |
↑ open up ↑ |
127 191 }
128 192 }
129 193 in++;
130 194 out++;
131 195 ex++;
132 196 }
133 197 done:
134 198 /*
135 199 * Now do the poll.
136 200 */
137 - n = (int)(p - pfd); /* number of pollfd's */
201 + npfds = (int)(p - pfd);
138 202 do {
139 - rv = _pollsys(pfd, (nfds_t)n, tsp, sigmask);
203 + rv = _pollsys(pfd, npfds, tsp, sigmask);
140 204 } while (rv < 0 && errno == EAGAIN);
141 205
142 206 if (rv < 0) /* no need to set bit masks */
143 - return (rv);
207 + goto out;
144 208
145 209 if (rv == 0) {
146 210 /*
147 211 * Clear out bit masks, just in case.
148 212 * On the assumption that usually only
149 213 * one bit mask is set, use three loops.
150 214 */
151 215 if (in0 != &zero) {
152 216 in = (long *)in0->fds_bits;
153 217 for (n = 0; n < nfds; n += NFDBITS)
154 218 *in++ = 0;
155 219 }
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
156 220 if (out0 != &zero) {
157 221 out = (long *)out0->fds_bits;
158 222 for (n = 0; n < nfds; n += NFDBITS)
159 223 *out++ = 0;
160 224 }
161 225 if (ex0 != &zero) {
162 226 ex = (long *)ex0->fds_bits;
163 227 for (n = 0; n < nfds; n += NFDBITS)
164 228 *ex++ = 0;
165 229 }
166 - return (0);
230 + rv = 0;
231 + goto out;
167 232 }
168 233
169 234 /*
170 235 * Check for EINVAL error case first to avoid changing any bits
171 236 * if we're going to return an error.
172 237 */
173 - for (p = pfd, j = n; j-- > 0; p++) {
238 + for (p = pfd, n = npfds; n-- > 0; p++) {
174 239 /*
175 240 * select will return EBADF immediately if any fd's
176 241 * are bad. poll will complete the poll on the
177 242 * rest of the fd's and include the error indication
178 243 * in the returned bits. This is a rare case so we
179 244 * accept this difference and return the error after
180 245 * doing more work than select would've done.
181 246 */
182 247 if (p->revents & POLLNVAL) {
183 248 errno = EBADF;
184 - return (-1);
249 + rv = -1;
250 + goto out;
185 251 }
186 252 /*
187 253 * We would like to make POLLHUP available to select,
188 254 * checking to see if we have pending data to be read.
189 255 * BUT until we figure out how not to break Xsun's
190 256 * dependencies on select's existing features...
191 257 * This is what we _thought_ would work ... sigh!
192 258 */
193 259 /*
194 260 * if ((p->revents & POLLHUP) &&
195 261 * !(p->revents & (POLLRDNORM|POLLRDBAND))) {
196 262 * errno = EINTR;
197 - * return (-1);
263 + * rv = -1;
264 + * goto out;
198 265 * }
199 266 */
200 267 }
201 268
202 269 /*
203 270 * Convert results of poll back into bits
204 271 * in the argument arrays.
205 272 *
206 273 * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set
207 274 * on return from poll if they were set on input, thus we don't
208 275 * worry about accidentally setting the corresponding bits in the
209 276 * zero array if the input bit masks were null.
210 277 *
211 278 * Must return number of bits set, not number of ready descriptors
212 279 * (as the man page says, and as poll() does).
213 280 */
214 281 rv = 0;
215 - for (p = pfd; n-- > 0; p++) {
282 + for (p = pfd, n = npfds; n-- > 0; p++) {
216 283 j = (int)(p->fd / NFDBITS);
217 284 /* have we moved into another word of the bit mask yet? */
218 285 if (j != lastj) {
219 286 /* clear all output bits to start with */
220 287 in = (long *)&in0->fds_bits[j];
221 288 out = (long *)&out0->fds_bits[j];
222 289 ex = (long *)&ex0->fds_bits[j];
223 290 /*
224 291 * In case we made "zero" read-only (e.g., with
225 292 * cc -R), avoid actually storing into it.
226 293 */
227 294 if (in0 != &zero)
228 295 *in = 0;
229 296 if (out0 != &zero)
230 297 *out = 0;
231 298 if (ex0 != &zero)
232 299 *ex = 0;
233 300 lastj = j;
234 301 }
235 302 if (p->revents) {
236 303 m = 1L << (p->fd % NFDBITS);
237 304 if (p->revents & POLLRDNORM) {
238 305 *in |= m;
239 306 rv++;
240 307 }
241 308 if (p->revents & POLLWRNORM) {
242 309 *out |= m;
243 310 rv++;
244 311 }
245 312 if (p->revents & POLLRDBAND) {
246 313 *ex |= m;
247 314 rv++;
248 315 }
249 316 /*
250 317 * Only set this bit on return if we asked about
251 318 * input conditions.
252 319 */
253 320 if ((p->revents & (POLLHUP|POLLERR)) &&
254 321 (p->events & POLLRDNORM)) {
255 322 if ((*in & m) == 0)
256 323 rv++; /* wasn't already set */
257 324 *in |= m;
258 325 }
259 326 /*
260 327 * Only set this bit on return if we asked about
261 328 * output conditions.
262 329 */
263 330 if ((p->revents & (POLLHUP|POLLERR)) &&
264 331 (p->events & POLLWRNORM)) {
265 332 if ((*out & m) == 0)
266 333 rv++; /* wasn't already set */
267 334 *out |= m;
268 335 }
269 336 /*
270 337 * Only set this bit on return if we asked about
↓ open down ↓ |
45 lines elided |
↑ open up ↑ |
271 338 * output conditions.
272 339 */
273 340 if ((p->revents & (POLLHUP|POLLERR)) &&
274 341 (p->events & POLLRDBAND)) {
275 342 if ((*ex & m) == 0)
276 343 rv++; /* wasn't already set */
277 344 *ex |= m;
278 345 }
279 346 }
280 347 }
348 +out:
349 + if (heap_pfds)
350 + free(pfd);
281 351 return (rv);
282 352 }
283 353
284 354 int
285 355 select(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, struct timeval *tv)
286 356 {
287 357 timespec_t ts;
288 358 timespec_t *tsp;
289 359
290 360 if (tv == NULL)
291 361 tsp = NULL;
292 362 else {
293 363 /* check timeval validity */
294 364 if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) {
295 365 errno = EINVAL;
296 366 return (-1);
297 367 }
298 368 /*
299 369 * Convert timeval to timespec.
300 370 * To preserve compatibility with past behavior,
301 371 * when select was built upon poll(2), which has a
302 372 * minimum non-zero timeout of 1 millisecond, force
303 373 * a minimum non-zero timeout of 500 microseconds.
304 374 */
305 375 ts.tv_sec = tv->tv_sec;
306 376 ts.tv_nsec = tv->tv_usec * 1000;
307 377 if (ts.tv_nsec != 0 && ts.tv_nsec < 500000)
308 378 ts.tv_nsec = 500000;
309 379 tsp = &ts;
310 380 }
311 381
312 382 return (pselect(nfds, in0, out0, ex0, tsp, NULL));
313 383 }
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX