1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1988 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * Emulation of select() system call using _pollsys() system call. 34 * 35 * Assumptions: 36 * polling for input only is most common. 37 * polling for exceptional conditions is very rare. 38 * 39 * Note that is it not feasible to emulate all error conditions, 40 * in particular conditions that would return EFAULT are far too 41 * difficult to check for in a library routine. 42 * 43 * This is the alternate large fd_set select. 44 * 45 */ 46 47 /* 48 * Must precede any include files 49 */ 50 #ifdef FD_SETSIZE 51 #undef FD_SETSIZE 52 #endif 53 #define FD_SETSIZE 65536 54 55 #include "lint.h" 56 #include <values.h> 57 #include <stdlib.h> 58 #include <string.h> 59 #include <pthread.h> 60 #include <errno.h> 61 #include <sys/time.h> 62 #include <sys/types.h> 63 #include <sys/poll.h> 64 #include <string.h> 65 #include <stdlib.h> 66 #include "libc.h" 67 68 #define DEFAULT_POLL_SIZE 64 69 70 static struct pollfd *realloc_fds(int *, struct pollfd **, struct pollfd *); 71 72 int 73 pselect_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, 74 const timespec_t *tsp, const sigset_t *sigmask) 75 { 76 long *in, *out, *ex; 77 ulong_t m; /* bit mask */ 78 int j; /* loop counter */ 79 ulong_t b; /* bits to test */ 80 int n, rv; 81 int lastj = -1; 82 int nused; 83 84 /* 85 * Rather than have a mammoth pollfd (65K) list on the stack 86 * we start with a small one and then malloc larger chunks 87 * on the heap if necessary. 88 */ 89 90 struct pollfd pfd[DEFAULT_POLL_SIZE]; 91 struct pollfd *p; 92 struct pollfd *pfd_list; 93 int nfds_on_list; 94 95 fd_set zero; 96 97 /* 98 * Check for invalid conditions at outset. 99 * Required for spec1170. 100 * SUSV3: We must behave as a cancellation point even if we fail early. 101 */ 102 if (nfds >= 0 && nfds <= FD_SETSIZE) { 103 if (tsp != NULL) { 104 if (tsp->tv_nsec < 0 || tsp->tv_nsec >= NANOSEC || 105 tsp->tv_sec < 0) { 106 pthread_testcancel(); 107 errno = EINVAL; 108 return (-1); 109 } 110 } 111 } else { 112 pthread_testcancel(); 113 errno = EINVAL; 114 return (-1); 115 } 116 117 /* 118 * If any input args are null, point them at the null array. 119 */ 120 (void) memset(&zero, 0, sizeof (fd_set)); 121 if (in0 == NULL) 122 in0 = &zero; 123 if (out0 == NULL) 124 out0 = &zero; 125 if (ex0 == NULL) 126 ex0 = &zero; 127 128 nfds_on_list = DEFAULT_POLL_SIZE; 129 pfd_list = pfd; 130 p = pfd_list; 131 (void) memset(pfd, 0, sizeof (pfd)); 132 /* 133 * For each fd, if any bits are set convert them into 134 * the appropriate pollfd struct. 135 */ 136 in = (long *)in0->fds_bits; 137 out = (long *)out0->fds_bits; 138 ex = (long *)ex0->fds_bits; 139 nused = 0; 140 /* 141 * nused reflects the number of pollfd structs currently used 142 * less one. If realloc_fds returns NULL it is because malloc 143 * failed. We expect malloc() to have done the proper 144 * thing with errno. 145 */ 146 for (n = 0; n < nfds; n += NFDBITS) { 147 b = (ulong_t)(*in | *out | *ex); 148 for (j = 0, m = 1; b != 0; j++, b >>= 1, m <<= 1) { 149 if (b & 1) { 150 p->fd = n + j; 151 if (p->fd < nfds) { 152 p->events = 0; 153 if (*in & m) 154 p->events |= POLLRDNORM; 155 if (*out & m) 156 p->events |= POLLWRNORM; 157 if (*ex & m) 158 p->events |= POLLRDBAND; 159 if (nused < (nfds_on_list - 1)) { 160 p++; 161 } else if ((p = realloc_fds( 162 &nfds_on_list, &pfd_list, pfd)) 163 == NULL) { 164 if (pfd_list != pfd) 165 free(pfd_list); 166 pthread_testcancel(); 167 return (-1); 168 } 169 nused++; 170 } else 171 goto done; 172 } 173 } 174 in++; 175 out++; 176 ex++; 177 } 178 done: 179 /* 180 * Now do the poll. 181 */ 182 do { 183 rv = _pollsys(pfd_list, (nfds_t)nused, tsp, sigmask); 184 } while (rv < 0 && errno == EAGAIN); 185 186 if (rv < 0) { /* no need to set bit masks */ 187 if (pfd_list != pfd) 188 free(pfd_list); 189 return (rv); 190 } else if (rv == 0) { 191 /* 192 * Clear out bit masks, just in case. 193 * On the assumption that usually only 194 * one bit mask is set, use three loops. 195 */ 196 if (in0 != &zero) { 197 in = (long *)in0->fds_bits; 198 for (n = 0; n < nfds; n += NFDBITS) 199 *in++ = 0; 200 } 201 if (out0 != &zero) { 202 out = (long *)out0->fds_bits; 203 for (n = 0; n < nfds; n += NFDBITS) 204 *out++ = 0; 205 } 206 if (ex0 != &zero) { 207 ex = (long *)ex0->fds_bits; 208 for (n = 0; n < nfds; n += NFDBITS) 209 *ex++ = 0; 210 } 211 if (pfd_list != pfd) 212 free(pfd_list); 213 return (0); 214 } 215 216 /* 217 * Check for EINVAL error case first to avoid changing any bits 218 * if we're going to return an error. 219 */ 220 for (p = pfd_list, j = nused; j-- > 0; p++) { 221 /* 222 * select will return EBADF immediately if any fd's 223 * are bad. poll will complete the poll on the 224 * rest of the fd's and include the error indication 225 * in the returned bits. This is a rare case so we 226 * accept this difference and return the error after 227 * doing more work than select would've done. 228 */ 229 if (p->revents & POLLNVAL) { 230 errno = EBADF; 231 if (pfd_list != pfd) 232 free(pfd_list); 233 return (-1); 234 } 235 /* 236 * We would like to make POLLHUP available to select, 237 * checking to see if we have pending data to be read. 238 * BUT until we figure out how not to break Xsun's 239 * dependencies on select's existing features... 240 * This is what we _thought_ would work ... sigh! 241 */ 242 /* 243 * if ((p->revents & POLLHUP) && 244 * !(p->revents & (POLLRDNORM|POLLRDBAND))) { 245 * errno = EINTR; 246 * return (-1); 247 * } 248 */ 249 } 250 251 /* 252 * Convert results of poll back into bits 253 * in the argument arrays. 254 * 255 * We assume POLLRDNORM, POLLWRNORM, and POLLRDBAND will only be set 256 * on return from poll if they were set on input, thus we don't 257 * worry about accidentally setting the corresponding bits in the 258 * zero array if the input bit masks were null. 259 * 260 * Must return number of bits set, not number of ready descriptors 261 * (as the man page says, and as poll() does). 262 */ 263 rv = 0; 264 for (p = pfd_list; nused-- > 0; p++) { 265 j = (int)(p->fd / NFDBITS); 266 /* have we moved into another word of the bit mask yet? */ 267 if (j != lastj) { 268 /* clear all output bits to start with */ 269 in = (long *)&in0->fds_bits[j]; 270 out = (long *)&out0->fds_bits[j]; 271 ex = (long *)&ex0->fds_bits[j]; 272 /* 273 * In case we made "zero" read-only (e.g., with 274 * cc -R), avoid actually storing into it. 275 */ 276 if (in0 != &zero) 277 *in = 0; 278 if (out0 != &zero) 279 *out = 0; 280 if (ex0 != &zero) 281 *ex = 0; 282 lastj = j; 283 } 284 if (p->revents) { 285 m = 1L << (p->fd % NFDBITS); 286 if (p->revents & POLLRDNORM) { 287 *in |= m; 288 rv++; 289 } 290 if (p->revents & POLLWRNORM) { 291 *out |= m; 292 rv++; 293 } 294 if (p->revents & POLLRDBAND) { 295 *ex |= m; 296 rv++; 297 } 298 /* 299 * Only set this bit on return if we asked about 300 * input conditions. 301 */ 302 if ((p->revents & (POLLHUP|POLLERR)) && 303 (p->events & POLLRDNORM)) { 304 if ((*in & m) == 0) 305 rv++; /* wasn't already set */ 306 *in |= m; 307 } 308 /* 309 * Only set this bit on return if we asked about 310 * output conditions. 311 */ 312 if ((p->revents & (POLLHUP|POLLERR)) && 313 (p->events & POLLWRNORM)) { 314 if ((*out & m) == 0) 315 rv++; /* wasn't already set */ 316 *out |= m; 317 } 318 /* 319 * Only set this bit on return if we asked about 320 * output conditions. 321 */ 322 if ((p->revents & (POLLHUP|POLLERR)) && 323 (p->events & POLLRDBAND)) { 324 if ((*ex & m) == 0) 325 rv++; /* wasn't already set */ 326 *ex |= m; 327 } 328 } 329 } 330 if (pfd_list != pfd) 331 free(pfd_list); 332 return (rv); 333 } 334 335 int 336 select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, 337 struct timeval *tv) 338 { 339 timespec_t ts; 340 timespec_t *tsp; 341 342 if (tv == NULL) 343 tsp = NULL; 344 else { 345 /* check timeval validity */ 346 if (tv->tv_usec < 0 || tv->tv_usec >= MICROSEC) { 347 errno = EINVAL; 348 return (-1); 349 } 350 /* 351 * Convert timeval to timespec. 352 * To preserve compatibility with past behavior, 353 * when select was built upon poll(2), which has a 354 * minimum non-zero timeout of 1 millisecond, force 355 * a minimum non-zero timeout of 500 microseconds. 356 */ 357 ts.tv_sec = tv->tv_sec; 358 ts.tv_nsec = tv->tv_usec * 1000; 359 if (ts.tv_nsec != 0 && ts.tv_nsec < 500000) 360 ts.tv_nsec = 500000; 361 tsp = &ts; 362 } 363 364 return (pselect_large_fdset(nfds, in0, out0, ex0, tsp, NULL)); 365 } 366 367 /* 368 * Reallocate buffers of pollfds for our list. We malloc a new buffer 369 * and, in the case where the old buffer does not match what is passed 370 * in orig, free the buffer after copying the contents. 371 */ 372 struct pollfd * 373 realloc_fds(int *num, struct pollfd **list_head, struct pollfd *orig) 374 { 375 struct pollfd *b; 376 int nta; 377 int n2; 378 379 n2 = *num * 2; 380 nta = n2 * sizeof (struct pollfd); 381 b = malloc(nta); 382 if (b) { 383 (void) memset(b, 0, (size_t)nta); 384 (void) memcpy(b, *list_head, nta / 2); 385 if (*list_head != orig) 386 free(*list_head); 387 *list_head = b; 388 b += *num; 389 *num = n2; 390 } 391 return (b); 392 }