3772 consider raising default descriptor soft limit
1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1988 AT&T */ 28 /* All Rights Reserved */ 29 30 /* 31 * Emulation of select() system call using poll() system call. 32 * 33 * Assumptions: 34 * polling for input only is most common. 35 * polling for exceptional conditions is very rare. 36 * 37 * Note that is it not feasible to emulate all error conditions, 38 * in particular conditions that would return EFAULT are far too 39 * difficult to check for in a library routine. 40 */ 41 42 #pragma weak _select = select 43 44 #include "lint.h" 45 #include <values.h> 46 #include <pthread.h> 47 #include <errno.h> 48 #include <stdlib.h> 49 #include <sys/time.h> 50 #include <sys/types.h> 51 #include <sys/select.h> 52 #include <sys/poll.h> 53 #include <alloca.h> 54 #include "libc.h" 55 56 /* 57 * STACK_PFD_LIM 58 * 59 * The limit at which pselect allocates pollfd structures in the heap, 60 * rather than on the stack. These limits match the historical behaviour 61 * with the * _large_fdset implementations. 62 * 63 * BULK_ALLOC_LIM 64 * 65 * The limit below which we'll just allocate nfds pollfds, rather than 66 * counting how many we actually need. 67 */ 68 #if defined(_LP64) 69 #define STACK_PFD_LIM FD_SETSIZE 70 #define BULK_ALLOC_LIM 8192 71 #else 72 #define STACK_PFD_LIM 1024 73 #define BULK_ALLOC_LIM 1024 74 #endif 75 76 /* 77 * The previous _large_fdset implementations are, unfortunately, baked into 78 * the ABI. 79 */ 80 #pragma weak select_large_fdset = select 81 #pragma weak pselect_large_fdset = pselect 82 83 #define fd_set_size(nfds) (((nfds) + (NFDBITS - 1)) / NFDBITS) 84 85 static nfds_t 86 fd_sets_count(int limit, fd_set *in, fd_set *out, fd_set *ex) 87 { 88 nfds_t total = 0; 89 90 if (limit <= 0) 91 return (0); 92 93 for (int i = 0; i < fd_set_size(limit); i++) { 94 long v = (in->fds_bits[i] | out->fds_bits[i] | ex->fds_bits[i]); 95 96 while (v != 0) { 97 v &= v - 1; 98 total++; 99 } 100 } 101 102 return (total); 103 } 104 105 int 106 pselect(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, 107 const timespec_t *tsp, const sigset_t *sigmask) 108 { 109 long *in, *out, *ex; 110 ulong_t m; /* bit mask */ 111 int j; /* loop counter */ 112 ulong_t b; /* bits to test */ 113 int n, rv; 114 struct pollfd *pfd; 115 struct pollfd *p; 116 int lastj = -1; 117 nfds_t npfds = 0; 118 boolean_t heap_pfds = B_FALSE; 119 120 /* "zero" is read-only, it could go in the text segment */ 121 static fd_set zero = { 0 }; 122 123 /* 124 * Check for invalid conditions at outset. 125 * Required for spec1170. 126 * SUSV3: We must behave as a cancellation point even if we fail early. 127 */ 128 if (nfds < 0 || nfds > FD_SETSIZE) { 129 pthread_testcancel(); 130 errno = EINVAL; 131 return (-1); 132 } 133 134 if (tsp != NULL) { 135 /* check timespec validity */ 136 if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC || 137 tsp->tv_sec < 0) { 138 pthread_testcancel(); 139 errno = EINVAL; 140 return (-1); 141 } 142 } 143 144 /* 145 * If any input args are null, point them at the null array. 146 */ 147 if (in0 == NULL) 148 in0 = &zero; 149 if (out0 == NULL) 150 out0 = &zero; 151 if (ex0 == NULL) 152 ex0 = &zero; 153 154 if (nfds <= BULK_ALLOC_LIM) { 155 p = pfd = alloca(nfds * sizeof (struct pollfd)); 156 } else { 157 npfds = fd_sets_count(nfds, in0, out0, ex0); 158 159 if (npfds > STACK_PFD_LIM) { 160 p = pfd = malloc(npfds * sizeof (struct pollfd)); 161 if (p == NULL) 162 return (-1); 163 heap_pfds = B_TRUE; 164 } else { 165 p = pfd = alloca(npfds * sizeof (struct pollfd)); 166 } 167 } 168 169 /* 170 * For each fd, if any bits are set convert them into 171 * the appropriate pollfd struct. 172 */ 173 in = (long *)in0->fds_bits; 174 out = (long *)out0->fds_bits; 175 ex = (long *)ex0->fds_bits; 176 for (n = 0; n < nfds; n += NFDBITS) { 177 b = (ulong_t)(*in | *out | *ex); 178 for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) { 179 if (b & 1) { 180 p->fd = n + j; 181 if (p->fd >= nfds) 182 goto done; 183 p->events = 0; 184 if (*in & m) 185 p->events |= POLLRDNORM; 186 if (*out & m) 187 p->events |= POLLWRNORM; 188 if (*ex & m) 189 p->events |= POLLRDBAND; 190 p++; 191 } 192 } 193 in++; 194 out++; 195 ex++; 196 } 197 done: 198 /* 199 * Now do the poll. 200 */ 201 npfds = (int)(p - pfd); 202 do { 203 rv = _pollsys(pfd, npfds, tsp, sigmask); 204 } while (rv < 0 && errno == EAGAIN); 205 206 if (rv < 0) /* no need to set bit masks */ 207 goto out; 208 209 if (rv == 0) { 210 /* 211 * Clear out bit masks, just in case. 212 * On the assumption that usually only 213 * one bit mask is set, use three loops. 214 */ 215 if (in0 != &zero) { 216 in = (long *)in0->fds_bits; 217 for (n = 0; n < nfds; n += NFDBITS) 218 *in++ = 0; 219 } 220 if (out0 != &zero) { 221 out = (long *)out0->fds_bits; 222 for (n = 0; n < nfds; n += NFDBITS) 223 *out++ = 0; 224 } 225 if (ex0 != &zero) { 226 ex = (long *)ex0->fds_bits; 227 for (n = 0; n < nfds; n += NFDBITS) 228 *ex++ = 0; 229 } 230 rv = 0; 231 goto out; 232 } 233 234 /* 235 * Check for EINVAL error case first to avoid changing any bits 236 * if we're going to return an error. 237 */ 238 for (p = pfd, n = npfds; n-- > 0; p++) { 239 /* 240 * select will return EBADF immediately if any fd's 241 * are bad. poll will complete the poll on the 242 * rest of the fd's and include the error indication 243 * in the returned bits. This is a rare case so we 244 * accept this difference and return the error after 245 * doing more work than select would've done. 246 */ 247 if (p->revents & POLLNVAL) { 248 errno = EBADF; 249 rv = -1; 250 goto out; 251 } 252 /* 253 * We would like to make POLLHUP available to select, 254 * checking to see if we have pending data to be read. 255 * BUT until we figure out how not to break Xsun's 256 * dependencies on select's existing features... 257 * This is what we _thought_ would work ... sigh! 258 */ 259 /* 260 * if ((p->revents & POLLHUP) && 261 * !(p->revents & (POLLRDNORM|POLLRDBAND))) { 262 * errno = EINTR; 263 * rv = -1; 264 * goto out; 265 * } 266 */ 267 } 268 269 /* 270 * Convert results of poll back into bits 271 * in the argument arrays. 272 * 273 * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set 274 * on return from poll if they were set on input, thus we don't 275 * worry about accidentally setting the corresponding bits in the 276 * zero array if the input bit masks were null. 277 * 278 * Must return number of bits set, not number of ready descriptors 279 * (as the man page says, and as poll() does). 280 */ 281 rv = 0; 282 for (p = pfd, n = npfds; n-- > 0; p++) { 283 j = (int)(p->fd / NFDBITS); 284 /* have we moved into another word of the bit mask yet? */ 285 if (j != lastj) { 286 /* clear all output bits to start with */ 287 in = (long *)&in0->fds_bits[j]; 288 out = (long *)&out0->fds_bits[j]; 289 ex = (long *)&ex0->fds_bits[j]; 290 /* 291 * In case we made "zero" read-only (e.g., with 292 * cc -R), avoid actually storing into it. 293 */ 294 if (in0 != &zero) 295 *in = 0; 296 if (out0 != &zero) 297 *out = 0; 298 if (ex0 != &zero) 299 *ex = 0; 300 lastj = j; 301 } 302 if (p->revents) { 303 m = 1L << (p->fd % NFDBITS); 304 if (p->revents & POLLRDNORM) { 305 *in |= m; 306 rv++; 307 } 308 if (p->revents & POLLWRNORM) { 309 *out |= m; 310 rv++; 311 } 312 if (p->revents & POLLRDBAND) { 313 *ex |= m; 314 rv++; 315 } 316 /* 317 * Only set this bit on return if we asked about 318 * input conditions. 319 */ 320 if ((p->revents & (POLLHUP|POLLERR)) && 321 (p->events & POLLRDNORM)) { 322 if ((*in & m) == 0) 323 rv++; /* wasn't already set */ 324 *in |= m; 325 } 326 /* 327 * Only set this bit on return if we asked about 328 * output conditions. 329 */ 330 if ((p->revents & (POLLHUP|POLLERR)) && 331 (p->events & POLLWRNORM)) { 332 if ((*out & m) == 0) 333 rv++; /* wasn't already set */ 334 *out |= m; 335 } 336 /* 337 * Only set this bit on return if we asked about 338 * output conditions. 339 */ 340 if ((p->revents & (POLLHUP|POLLERR)) && 341 (p->events & POLLRDBAND)) { 342 if ((*ex & m) == 0) 343 rv++; /* wasn't already set */ 344 *ex |= m; 345 } 346 } 347 } 348 out: 349 if (heap_pfds) 350 free(pfd); 351 return (rv); 352 } 353 354 int 355 select(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, struct timeval *tv) 356 { 357 timespec_t ts; 358 timespec_t *tsp; 359 360 if (tv == NULL) 361 tsp = NULL; 362 else { 363 /* check timeval validity */ 364 if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) { 365 errno = EINVAL; 366 return (-1); 367 } 368 /* 369 * Convert timeval to timespec. 370 * To preserve compatibility with past behavior, 371 * when select was built upon poll(2), which has a 372 * minimum non-zero timeout of 1 millisecond, force 373 * a minimum non-zero timeout of 500 microseconds. 374 */ 375 ts.tv_sec = tv->tv_sec; 376 ts.tv_nsec = tv->tv_usec * 1000; 377 if (ts.tv_nsec != 0 && ts.tv_nsec < 500000) 378 ts.tv_nsec = 500000; 379 tsp = &ts; 380 } 381 382 return (pselect(nfds, in0, out0, ex0, tsp, NULL)); 383 } --- EOF ---