1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <unistd.h> 28 #include <fcntl.h> 29 #include <errno.h> 30 #include <signal.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <libintl.h> 34 #include <strings.h> 35 #include <alloca.h> 36 #include <ucred.h> 37 38 #include <sys/param.h> 39 #include <sys/brand.h> 40 #include <sys/syscall.h> 41 #include <sys/socket.h> 42 #include <sys/socketvar.h> 43 #include <sys/un.h> 44 #include <netinet/tcp.h> 45 #include <netinet/igmp.h> 46 #include <sys/types.h> 47 #include <sys/stat.h> 48 #include <sys/lx_debug.h> 49 #include <sys/lx_syscall.h> 50 #include <sys/lx_socket.h> 51 #include <sys/lx_brand.h> 52 #include <sys/lx_misc.h> 53 54 /* 55 * This string is used to prefix all abstract namespace unix sockets, ie all 56 * abstract namespace sockets are converted to regular sockets in the /tmp 57 * directory with .ABSK_ prefixed to their names. 58 */ 59 #define ABST_PRFX "/tmp/.ABSK_" 60 #define ABST_PRFX_LEN 11 61 62 static int lx_socket(ulong_t *); 63 static int lx_bind(ulong_t *); 64 static int lx_connect(ulong_t *); 65 static int lx_listen(ulong_t *); 66 static int lx_accept(ulong_t *); 67 static int lx_getsockname(ulong_t *); 68 static int lx_getpeername(ulong_t *); 69 static int lx_socketpair(ulong_t *); 70 static int lx_send(ulong_t *); 71 static int lx_recv(ulong_t *); 72 static int lx_sendto(ulong_t *); 73 static int lx_recvfrom(ulong_t *); 74 static int lx_shutdown(ulong_t *); 75 static int lx_setsockopt(ulong_t *); 76 static int lx_getsockopt(ulong_t *); 77 static int lx_sendmsg(ulong_t *); 78 static int lx_recvmsg(ulong_t *); 79 80 typedef int (*sockfn_t)(ulong_t *); 81 82 static struct { 83 sockfn_t s_fn; /* Function implementing the subcommand */ 84 int s_nargs; /* Number of arguments the function takes */ 85 } sockfns[] = { 86 lx_socket, 3, 87 lx_bind, 3, 88 lx_connect, 3, 89 lx_listen, 2, 90 lx_accept, 3, 91 lx_getsockname, 3, 92 lx_getpeername, 3, 93 lx_socketpair, 4, 94 lx_send, 4, 95 lx_recv, 4, 96 lx_sendto, 6, 97 lx_recvfrom, 6, 98 lx_shutdown, 2, 99 lx_setsockopt, 5, 100 lx_getsockopt, 5, 101 lx_sendmsg, 3, 102 lx_recvmsg, 3 103 }; 104 105 /* 106 * What follows are a series of tables we use to translate Linux constants 107 * into equivalent Solaris constants and back again. I wish this were 108 * cleaner, more programmatic, and generally nicer. Sadly, life is messy, 109 * and Unix networking even more so. 110 */ 111 static const int ltos_family[LX_AF_MAX + 1] = { 112 AF_UNSPEC, AF_UNIX, AF_INET, AF_CCITT, AF_IPX, 113 AF_APPLETALK, AF_NOTSUPPORTED, AF_OSI, AF_NOTSUPPORTED, 114 AF_X25, AF_INET6, AF_CCITT, AF_DECnet, 115 AF_802, AF_POLICY, AF_KEY, AF_ROUTE, 116 AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, 117 AF_NOTSUPPORTED, AF_SNA, AF_NOTSUPPORTED, AF_NOTSUPPORTED, 118 AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, 119 AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED, AF_NOTSUPPORTED 120 }; 121 122 #define LTOS_FAMILY(d) ((d) <= LX_AF_MAX ? ltos_family[(d)] : AF_INVAL) 123 124 static const int ltos_socktype[LX_SOCK_PACKET + 1] = { 125 SOCK_NOTSUPPORTED, SOCK_STREAM, SOCK_DGRAM, SOCK_RAW, 126 SOCK_RDM, SOCK_SEQPACKET, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED, 127 SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED, SOCK_NOTSUPPORTED 128 }; 129 130 #define LTOS_SOCKTYPE(t) \ 131 ((t) <= LX_SOCK_PACKET ? ltos_socktype[(t)] : SOCK_INVAL) 132 133 /* 134 * Linux socket option type definitions 135 * 136 * The protocol `levels` are well defined (see in.h) The option values are 137 * not so well defined. Linux often uses different values to Solaris 138 * although they mean the same thing. For example, IP_TOS in Linux is 139 * defined as value 1 but in Solaris it is defined as value 3. This table 140 * maps all the Protocol levels to their options and maps them between 141 * Linux and Solaris and vice versa. Hence the reason for the complexity. 142 */ 143 144 typedef struct lx_proto_opts { 145 const int *proto; /* Linux to Solaris mapping table */ 146 int maxentries; /* max entries in this table */ 147 } lx_proto_opts_t; 148 149 #define OPTNOTSUP -1 /* we don't support it */ 150 151 static const int ltos_ip_sockopts[LX_IP_DROP_MEMBERSHIP + 1] = { 152 OPTNOTSUP, IP_TOS, IP_TTL, IP_HDRINCL, 153 IP_OPTIONS, OPTNOTSUP, IP_RECVOPTS, IP_RETOPTS, 154 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 155 IP_RECVTTL, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 156 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 157 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 158 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 159 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 160 IP_MULTICAST_IF, IP_MULTICAST_TTL, IP_MULTICAST_LOOP, 161 IP_ADD_MEMBERSHIP, IP_DROP_MEMBERSHIP 162 }; 163 164 static const int ltos_tcp_sockopts[LX_TCP_QUICKACK + 1] = { 165 OPTNOTSUP, TCP_NODELAY, TCP_MAXSEG, OPTNOTSUP, 166 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 167 TCP_KEEPALIVE, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 168 OPTNOTSUP 169 }; 170 171 static const int ltos_igmp_sockopts[IGMP_MTRACE + 1] = { 172 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 173 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 174 IGMP_MINLEN, OPTNOTSUP, OPTNOTSUP, /* XXX: was IGMP_TIMER_SCALE */ 175 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 176 OPTNOTSUP, OPTNOTSUP, IGMP_MEMBERSHIP_QUERY, 177 IGMP_V1_MEMBERSHIP_REPORT, IGMP_DVMRP, 178 IGMP_PIM, OPTNOTSUP, IGMP_V2_MEMBERSHIP_REPORT, 179 IGMP_V2_LEAVE_GROUP, OPTNOTSUP, OPTNOTSUP, 180 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 181 IGMP_MTRACE_RESP, IGMP_MTRACE 182 }; 183 184 static const int ltos_socket_sockopts[LX_SO_ACCEPTCONN + 1] = { 185 OPTNOTSUP, SO_DEBUG, SO_REUSEADDR, SO_TYPE, 186 SO_ERROR, SO_DONTROUTE, SO_BROADCAST, SO_SNDBUF, 187 SO_RCVBUF, SO_KEEPALIVE, SO_OOBINLINE, OPTNOTSUP, 188 OPTNOTSUP, SO_LINGER, OPTNOTSUP, OPTNOTSUP, 189 OPTNOTSUP, OPTNOTSUP, SO_RCVLOWAT, SO_SNDLOWAT, 190 SO_RCVTIMEO, SO_SNDTIMEO, OPTNOTSUP, OPTNOTSUP, 191 OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, OPTNOTSUP, 192 OPTNOTSUP, OPTNOTSUP, SO_ACCEPTCONN 193 }; 194 195 #define PROTO_SOCKOPTS(opts) \ 196 { (opts), sizeof ((opts)) / sizeof ((opts)[0]) } 197 198 /* 199 * The main Linux to Solaris protocol to options mapping table 200 * IPPROTO_TAB_SIZE can be set up to IPPROTO_MAX. All entries above 201 * IPPROTO_TAB_SIZE are in effect not implemented, 202 */ 203 204 #define IPPROTO_TAB_SIZE 8 205 206 static const lx_proto_opts_t ltos_proto_opts[IPPROTO_TAB_SIZE] = { 207 /* IPPROTO_IP 0 */ 208 PROTO_SOCKOPTS(ltos_ip_sockopts), 209 /* SOL_SOCKET 1 */ 210 PROTO_SOCKOPTS(ltos_socket_sockopts), 211 /* IPPROTO_IGMP 2 */ 212 PROTO_SOCKOPTS(ltos_igmp_sockopts), 213 /* NOT IMPLEMENTED 3 */ 214 { NULL, 0 }, 215 /* NOT IMPLEMENTED 4 */ 216 { NULL, 0 }, 217 /* NOT IMPLEMENTED 5 */ 218 { NULL, 0 }, 219 /* IPPROTO_TCP 6 */ 220 PROTO_SOCKOPTS(ltos_tcp_sockopts), 221 /* NOT IMPLEMENTED 7 */ 222 { NULL, 0 } 223 }; 224 225 /* 226 * Lifted from socket.h, since these definitions are contained within 227 * _KERNEL guards. 228 */ 229 #define _CMSG_HDR_ALIGNMENT 4 230 #define _CMSG_HDR_ALIGN(x) (((uintptr_t)(x) + _CMSG_HDR_ALIGNMENT - 1) & \ 231 ~(_CMSG_HDR_ALIGNMENT - 1)) 232 #define CMSG_FIRSTHDR(m) \ 233 (((m)->msg_controllen < sizeof (struct cmsghdr)) ? \ 234 (struct cmsghdr *)0 : (struct cmsghdr *)((m)->msg_control)) 235 236 #define CMSG_NXTHDR(m, c) \ 237 (((c) == 0) ? CMSG_FIRSTHDR(m) : \ 238 ((((uintptr_t)_CMSG_HDR_ALIGN((char *)(c) + \ 239 ((struct cmsghdr *)(c))->cmsg_len) + sizeof (struct cmsghdr)) > \ 240 (((uintptr_t)((struct lx_msghdr *)(m))->msg_control) + \ 241 ((uintptr_t)((struct lx_msghdr *)(m))->msg_controllen))) ? \ 242 ((struct cmsghdr *)0) : \ 243 ((struct cmsghdr *)_CMSG_HDR_ALIGN((char *)(c) + \ 244 ((struct cmsghdr *)(c))->cmsg_len)))) 245 246 #define LX_TO_SOL 1 247 #define SOL_TO_LX 2 248 249 static int 250 convert_cmsgs(int direction, struct lx_msghdr *msg, char *caller) 251 { 252 struct cmsghdr *cmsg, *last; 253 int err = 0; 254 255 cmsg = CMSG_FIRSTHDR(msg); 256 while (cmsg != NULL && err == 0) { 257 if (direction == LX_TO_SOL) { 258 if (cmsg->cmsg_level == LX_SOL_SOCKET) { 259 cmsg->cmsg_level = SOL_SOCKET; 260 if (cmsg->cmsg_type == LX_SCM_RIGHTS) 261 cmsg->cmsg_type = SCM_RIGHTS; 262 else if (cmsg->cmsg_type == LX_SCM_CRED) 263 cmsg->cmsg_type = SCM_UCRED; 264 else 265 err = ENOTSUP; 266 } else { 267 err = ENOTSUP; 268 } 269 } else { 270 if (cmsg->cmsg_level == SOL_SOCKET) { 271 cmsg->cmsg_level = LX_SOL_SOCKET; 272 if (cmsg->cmsg_type == SCM_RIGHTS) 273 cmsg->cmsg_type = LX_SCM_RIGHTS; 274 else if (cmsg->cmsg_type == SCM_UCRED) 275 cmsg->cmsg_type = LX_SCM_CRED; 276 else 277 err = ENOTSUP; 278 } else { 279 err = ENOTSUP; 280 } 281 } 282 283 last = cmsg; 284 cmsg = CMSG_NXTHDR(msg, last); 285 } 286 if (err) 287 lx_unsupported("Unsupported socket control message in %s\n.", 288 caller); 289 290 return (err); 291 } 292 293 /* 294 * If inaddr is an abstract namespace unix socket, this function expects addr 295 * to have enough memory to hold the expanded socket name, ie it must be of 296 * size *len + ABST_PRFX_LEN. 297 */ 298 static int 299 convert_sockaddr(struct sockaddr *addr, socklen_t *len, 300 struct sockaddr *inaddr, socklen_t inlen) 301 { 302 sa_family_t family; 303 int lx_in6_len; 304 int size; 305 int i, orig_len; 306 307 /* 308 * Note that if the buffer at inaddr is ever smaller than inlen bytes, 309 * we may erroneously return EFAULT rather than a possible EINVAL 310 * as the copy comes before the various checks as to whether inlen 311 * is of the proper length for the socket type. 312 * 313 * This isn't an issue at present because all callers to this routine 314 * do meet that constraint. 315 */ 316 if ((ssize_t)inlen < 0) 317 return (-EINVAL); 318 if (uucopy(inaddr, addr, inlen) != 0) 319 return (-errno); 320 321 family = LTOS_FAMILY(addr->sa_family); 322 323 switch (family) { 324 case (sa_family_t)AF_NOTSUPPORTED: 325 return (-EPROTONOSUPPORT); 326 case (sa_family_t)AF_INVAL: 327 return (-EAFNOSUPPORT); 328 case AF_INET: 329 size = sizeof (struct sockaddr); 330 331 if (inlen < size) 332 return (-EINVAL); 333 334 *len = size; 335 break; 336 337 case AF_INET6: 338 /* 339 * The Solaris sockaddr_in6 has one more 32-bit 340 * field than the Linux version. 341 */ 342 size = sizeof (struct sockaddr_in6); 343 lx_in6_len = size - sizeof (uint32_t); 344 345 if (inlen != lx_in6_len) 346 return (-EINVAL); 347 348 *len = (sizeof (struct sockaddr_in6)); 349 bzero((char *)addr + lx_in6_len, sizeof (uint32_t)); 350 break; 351 352 case AF_UNIX: 353 if (inlen > sizeof (struct sockaddr_un)) 354 return (-EINVAL); 355 356 *len = inlen; 357 358 /* 359 * Linux supports abstract unix sockets, which are 360 * simply sockets that do not exist on the file system. 361 * These sockets are denoted by beginning the path with 362 * a NULL character. To support these, we strip out the 363 * leading NULL character and change the path to point 364 * to a real place in /tmp directory, by prepending 365 * ABST_PRFX and replacing all illegal characters with 366 * '_'. 367 */ 368 if (addr->sa_data[0] == '\0') { 369 370 /* 371 * inlen is the entire size of the sockaddr_un 372 * data structure, including the sun_family, so 373 * we need to subtract this out. We subtract 374 * 1 since we want to overwrite the leadin NULL 375 * character, and thus do not include it in the 376 * length. 377 */ 378 orig_len = inlen - sizeof (addr->sa_family) - 1; 379 380 /* 381 * Since abstract paths can contain illegal 382 * filename characters, we simply replace these 383 * with '_' 384 */ 385 for (i = 1; i < orig_len + 1; i++) { 386 if (addr->sa_data[i] == '\0' || 387 addr->sa_data[i] == '/') 388 addr->sa_data[i] = '_'; 389 } 390 391 /* 392 * prepend ABST_PRFX to file name, minus the 393 * leading NULL character. This places the 394 * socket as a hidden file in the /tmp 395 * directory. 396 */ 397 (void) memmove(addr->sa_data + ABST_PRFX_LEN, 398 addr->sa_data + 1, orig_len); 399 bcopy(ABST_PRFX, addr->sa_data, ABST_PRFX_LEN); 400 401 /* 402 * Since abstract socket paths may not be NULL 403 * terminated, we must explicitly NULL terminate 404 * our string. 405 */ 406 addr->sa_data[orig_len + ABST_PRFX_LEN] = '\0'; 407 408 /* 409 * Make len reflect the new len of our string. 410 * Although we removed the NULL character at the 411 * beginning of the string, we added a NULL 412 * character to the end, so the net gain in 413 * length is simply ABST_PRFX_LEN. 414 */ 415 *len = inlen + ABST_PRFX_LEN; 416 } 417 break; 418 419 default: 420 *len = inlen; 421 } 422 423 addr->sa_family = family; 424 return (0); 425 } 426 427 static int 428 convert_sock_args(int in_dom, int in_type, int in_protocol, int *out_dom, 429 int *out_type) 430 { 431 int domain, type; 432 433 if (in_dom < 0 || in_type < 0 || in_protocol < 0) 434 return (-EINVAL); 435 436 domain = LTOS_FAMILY(in_dom); 437 if (domain == AF_NOTSUPPORTED || domain == AF_UNSPEC) 438 return (-EAFNOSUPPORT); 439 if (domain == AF_INVAL) 440 return (-EINVAL); 441 442 type = LTOS_SOCKTYPE(in_type); 443 if (type == SOCK_NOTSUPPORTED) 444 return (-ESOCKTNOSUPPORT); 445 if (type == SOCK_INVAL) 446 return (-EINVAL); 447 448 /* 449 * Linux does not allow the app to specify IP Protocol for raw 450 * sockets. Solaris does, so bail out here. 451 */ 452 if (type == SOCK_RAW && in_protocol == IPPROTO_IP) 453 return (-ESOCKTNOSUPPORT); 454 455 *out_dom = domain; 456 *out_type = type; 457 return (0); 458 } 459 460 static int 461 convert_sockflags(int lx_flags) 462 { 463 int solaris_flags = 0; 464 465 if (lx_flags & LX_MSG_OOB) 466 solaris_flags |= MSG_OOB; 467 468 if (lx_flags & LX_MSG_PEEK) 469 solaris_flags |= MSG_PEEK; 470 471 if (lx_flags & LX_MSG_DONTROUTE) 472 solaris_flags |= MSG_DONTROUTE; 473 474 if (lx_flags & LX_MSG_CTRUNC) 475 solaris_flags |= MSG_CTRUNC; 476 477 if (lx_flags & LX_MSG_TRUNC) 478 solaris_flags |= MSG_TRUNC; 479 480 if (lx_flags & LX_MSG_WAITALL) 481 solaris_flags |= MSG_WAITALL; 482 483 if (lx_flags & LX_MSG_DONTWAIT) 484 solaris_flags |= MSG_DONTWAIT; 485 486 if (lx_flags & LX_MSG_EOR) 487 solaris_flags |= MSG_EOR; 488 489 if (lx_flags & LX_MSG_PROXY) 490 lx_unsupported("socket operation with MSG_PROXY flag set"); 491 492 if (lx_flags & LX_MSG_FIN) 493 lx_unsupported("socket operation with MSG_FIN flag set"); 494 495 if (lx_flags & LX_MSG_SYN) 496 lx_unsupported("socket operation with MSG_SYN flag set"); 497 498 if (lx_flags & LX_MSG_CONFIRM) 499 lx_unsupported("socket operation with MSG_CONFIRM set"); 500 501 if (lx_flags & LX_MSG_RST) 502 lx_unsupported("socket operation with MSG_RST flag set"); 503 504 if (lx_flags & LX_MSG_MORE) 505 lx_unsupported("socket operation with MSG_MORE flag set"); 506 507 return (solaris_flags); 508 } 509 510 static int 511 lx_socket(ulong_t *args) 512 { 513 int domain; 514 int type; 515 int protocol = (int)args[2]; 516 int fd; 517 int err; 518 519 err = convert_sock_args((int)args[0], (int)args[1], protocol, 520 &domain, &type); 521 if (err != 0) 522 return (err); 523 524 lx_debug("\tsocket(%d, %d, %d)", domain, type, protocol); 525 526 /* Right now IPv6 sockets don't work */ 527 if (domain == AF_INET6) 528 return (-EAFNOSUPPORT); 529 530 /* 531 * Clients of the auditing subsystem used by CentOS 4 and 5 expect to 532 * be able to create AF_ROUTE SOCK_RAW sockets to communicate with the 533 * auditing daemons. Failure to create these sockets will cause login, 534 * ssh and useradd, amoung other programs to fail. To trick these 535 * programs into working, we convert the socket domain and type to 536 * something that we do support. Then when sendto is called on these 537 * sockets, we return an error code. See lx_sendto. 538 */ 539 if (domain == AF_ROUTE && type == SOCK_RAW) { 540 domain = AF_INET; 541 type = SOCK_STREAM; 542 protocol = 0; 543 } 544 545 fd = socket(domain, type, protocol); 546 if (fd >= 0) 547 return (fd); 548 549 if (errno == EPROTONOSUPPORT) 550 return (-ESOCKTNOSUPPORT); 551 552 return (-errno); 553 } 554 555 static int 556 lx_bind(ulong_t *args) 557 { 558 int sockfd = (int)args[0]; 559 struct stat64 statbuf; 560 struct sockaddr *name, oldname; 561 socklen_t len; 562 int r, r2, ret, tmperrno; 563 int abst_sock; 564 struct stat sb; 565 566 if (uucopy((struct sockaddr *)args[1], &oldname, 567 sizeof (struct sockaddr)) != 0) 568 return (-errno); 569 570 /* 571 * Handle Linux abstract sockets, which are UNIX sockets whose path 572 * begins with a NULL character. 573 */ 574 abst_sock = (oldname.sa_family == AF_UNIX) && 575 (oldname.sa_data[0] == '\0'); 576 577 /* 578 * convert_sockaddr will expand the socket path if it is abstract, so 579 * we need to allocate extra memory for it now. 580 */ 581 if ((name = SAFE_ALLOCA((socklen_t)args[2] + 582 abst_sock * ABST_PRFX_LEN)) == NULL) 583 return (-EINVAL); 584 585 if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1], 586 (socklen_t)args[2])) < 0) 587 return (r); 588 589 /* 590 * Linux abstract namespace unix sockets are simply socket that do not 591 * exist on the filesystem. We emulate them by changing their paths 592 * in covert_sockaddr so that they point real files names on the 593 * filesystem. Because in Linux they do not exist on the filesystem 594 * applications do not have to worry about deleting files, however in 595 * our filesystem based emulation we do. To solve this problem, we first 596 * check to see if the socket already exists before we create one. If it 597 * does we attempt to connect to it to see if it is in use, or just 598 * left over from a previous lx_bind call. If we are unable to connect, 599 * we assume it is not in use and remove the file, then continue on 600 * as if the file never existed. 601 */ 602 if (abst_sock && stat(name->sa_data, &sb) == 0 && 603 S_ISSOCK(sb.st_mode)) { 604 if ((r2 = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) 605 return (-ENOSR); 606 ret = connect(r2, name, len); 607 tmperrno = errno; 608 if (close(r2) < 0) 609 return (-EINVAL); 610 611 /* 612 * if we can't connect to the socket, assume no one is using it 613 * and remove it, otherwise assume it is in use and return 614 * EADDRINUSE. 615 */ 616 if ((ret < 0) && (tmperrno == ECONNREFUSED)) { 617 if (unlink(name->sa_data) < 0) { 618 return (-EADDRINUSE); 619 } 620 } else { 621 return (-EADDRINUSE); 622 } 623 } 624 625 lx_debug("\tbind(%d, 0x%p, %d)", sockfd, name, len); 626 627 if (name->sa_family == AF_UNIX) 628 lx_debug("\t\tAF_UNIX, path = %s", name->sa_data); 629 630 r = bind(sockfd, name, len); 631 632 /* 633 * Linux returns EADDRINUSE for attempts to bind to UNIX domain 634 * sockets that aren't sockets. 635 */ 636 if ((r < 0) && (errno == EINVAL) && (name->sa_family == AF_UNIX) && 637 ((stat64(name->sa_data, &statbuf) == 0) && 638 (!S_ISSOCK(statbuf.st_mode)))) 639 return (-EADDRINUSE); 640 641 return ((r < 0) ? -errno : r); 642 } 643 644 static int 645 lx_connect(ulong_t *args) 646 { 647 int sockfd = (int)args[0]; 648 struct sockaddr *name, oldname; 649 socklen_t len; 650 int r; 651 int abst_sock; 652 653 if (uucopy((struct sockaddr *)args[1], &oldname, 654 sizeof (struct sockaddr)) != 0) 655 return (-errno); 656 657 658 /* Handle Linux abstract sockets */ 659 abst_sock = (oldname.sa_family == AF_UNIX) && 660 (oldname.sa_data[0] == '\0'); 661 662 /* 663 * convert_sockaddr will expand the socket path, if it is abstract, so 664 * we need to allocate extra memory for it now. 665 */ 666 if ((name = SAFE_ALLOCA((socklen_t)args[2] + 667 abst_sock * ABST_PRFX_LEN)) == NULL) 668 return (-EINVAL); 669 670 if ((r = convert_sockaddr(name, &len, (struct sockaddr *)args[1], 671 (socklen_t)args[2])) < 0) 672 return (r); 673 674 lx_debug("\tconnect(%d, 0x%p, %d)", sockfd, name, len); 675 676 if (name->sa_family == AF_UNIX) 677 lx_debug("\t\tAF_UNIX, path = %s", name->sa_data); 678 679 r = connect(sockfd, name, len); 680 681 return ((r < 0) ? -errno : r); 682 } 683 684 static int 685 lx_listen(ulong_t *args) 686 { 687 int sockfd = (int)args[0]; 688 int backlog = (int)args[1]; 689 int r; 690 691 lx_debug("\tlisten(%d, %d)", sockfd, backlog); 692 r = listen(sockfd, backlog); 693 694 return ((r < 0) ? -errno : r); 695 } 696 697 static int 698 lx_accept(ulong_t *args) 699 { 700 int sockfd = (int)args[0]; 701 struct sockaddr *name = (struct sockaddr *)args[1]; 702 socklen_t namelen = 0; 703 int r; 704 705 lx_debug("\taccept(%d, 0x%p, 0x%p", sockfd, args[1], args[2]); 706 707 /* 708 * The Linux man page says that -1 is returned and errno is set to 709 * EFAULT if the "name" address is bad, but it is silent on what to 710 * set errno to if the "namelen" address is bad. Experimentation 711 * shows that Linux (at least the 2.4.21 kernel in CentOS) actually 712 * sets errno to EINVAL in both cases. 713 * 714 * Note that we must first check the name pointer, as the Linux 715 * docs state nothing is copied out if the "name" pointer is NULL. 716 * If it is NULL, we don't care about the namelen pointer's value 717 * or about dereferencing it. 718 * 719 * Happily, Solaris' accept(3SOCKET) treats NULL name pointers and 720 * zero namelens the same way. 721 */ 722 if ((name != NULL) && 723 (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0)) 724 return ((errno == EFAULT) ? -EINVAL : -errno); 725 726 lx_debug("\taccept namelen = %d", namelen); 727 728 if ((r = accept(sockfd, name, &namelen)) < 0) 729 return ((errno == EFAULT) ? -EINVAL : -errno); 730 731 lx_debug("\taccept namelen returned %d bytes", namelen); 732 733 /* 734 * In Linux, accept()ed sockets do not inherit anything set by 735 * fcntl(), so filter those out. 736 */ 737 if (fcntl(r, F_SETFL, 0) < 0) 738 return (-errno); 739 740 /* 741 * Once again, a bad "namelen" address sets errno to EINVAL, not 742 * EFAULT. If namelen was zero, there's no need to copy a zero back 743 * out. 744 * 745 * Logic might dictate that we should check if we can write to 746 * the namelen pointer earlier so we don't accept a pending connection 747 * only to fail the call because we can't write the namelen value back 748 * out. However, testing shows Linux does indeed fail the call after 749 * accepting the connection so we must behave in a compatible manner. 750 */ 751 if ((name != NULL) && (namelen != 0) && 752 (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0)) 753 return ((errno == EFAULT) ? -EINVAL : -errno); 754 755 return (r); 756 } 757 758 static int 759 lx_getsockname(ulong_t *args) 760 { 761 int sockfd = (int)args[0]; 762 struct sockaddr *name = NULL; 763 socklen_t namelen, namelen_orig; 764 765 if (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0) 766 return (-errno); 767 namelen_orig = namelen; 768 769 lx_debug("\tgetsockname(%d, 0x%p, 0x%p (=%d))", 770 sockfd, args[1], args[2], namelen); 771 772 if (namelen > 0) { 773 if ((name = SAFE_ALLOCA(namelen)) == NULL) 774 return (-EINVAL); 775 bzero(name, namelen); 776 } 777 778 if ((getsockname(sockfd, name, &namelen)) < 0) 779 return (-errno); 780 781 /* 782 * If the name that getsockname() want's to return is larger 783 * than namelen, getsockname() will copy out the maximum amount 784 * of data possible and then update namelen to indicate the 785 * actually size of all the data that it wanted to copy out. 786 */ 787 if (uucopy(name, (void *)args[1], namelen_orig) != 0) 788 return (-errno); 789 if (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0) 790 return (-errno); 791 792 return (0); 793 } 794 795 static int 796 lx_getpeername(ulong_t *args) 797 { 798 int sockfd = (int)args[0]; 799 struct sockaddr *name; 800 socklen_t namelen; 801 802 if (uucopy((void *)args[2], &namelen, sizeof (socklen_t)) != 0) 803 return (-errno); 804 805 lx_debug("\tgetpeername(%d, 0x%p, 0x%p (=%d))", 806 sockfd, args[1], args[2], namelen); 807 808 /* 809 * Linux returns EFAULT in this case, even if the namelen parameter 810 * is 0. This check will not catch other illegal addresses, but 811 * the benefit catching a non-null illegal address here is not 812 * worth the cost of another system call. 813 */ 814 if ((void *)args[1] == NULL) 815 return (-EFAULT); 816 817 if ((name = SAFE_ALLOCA(namelen)) == NULL) 818 return (-EINVAL); 819 if ((getpeername(sockfd, name, &namelen)) < 0) 820 return (-errno); 821 822 if (uucopy(name, (void *)args[1], namelen) != 0) 823 return (-errno); 824 825 if (uucopy(&namelen, (void *)args[2], sizeof (socklen_t)) != 0) 826 return (-errno); 827 828 return (0); 829 } 830 831 static int 832 lx_socketpair(ulong_t *args) 833 { 834 int domain; 835 int type; 836 int protocol = (int)args[2]; 837 int *sv = (int *)args[3]; 838 int fds[2]; 839 int r; 840 841 r = convert_sock_args((int)args[0], (int)args[1], protocol, 842 &domain, &type); 843 if (r != 0) 844 return (r); 845 846 lx_debug("\tsocketpair(%d, %d, %d, 0x%p)", domain, type, protocol, sv); 847 848 r = socketpair(domain, type, protocol, fds); 849 850 if (r == 0) { 851 if (uucopy(fds, sv, sizeof (fds)) != 0) { 852 r = errno; 853 (void) close(fds[0]); 854 (void) close(fds[1]); 855 return (-r); 856 } 857 return (0); 858 } 859 860 if (errno == EPROTONOSUPPORT) 861 return (-ESOCKTNOSUPPORT); 862 863 return (-errno); 864 } 865 866 static ssize_t 867 lx_send(ulong_t *args) 868 { 869 int sockfd = (int)args[0]; 870 void *buf = (void *)args[1]; 871 size_t len = (size_t)args[2]; 872 int flags = (int)args[3]; 873 ssize_t r; 874 875 int nosigpipe = flags & LX_MSG_NOSIGNAL; 876 struct sigaction newact, oact; 877 878 lx_debug("\tsend(%d, 0x%p, 0x%d, 0x%x)", sockfd, buf, len, flags); 879 880 flags = convert_sockflags(flags); 881 882 /* 883 * If nosigpipe is set, we want to emulate the Linux action of 884 * not sending a SIGPIPE to the caller if the remote socket has 885 * already been closed. 886 * 887 * As SIGPIPE is a directed signal sent only to the thread that 888 * performed the action, we can emulate this behavior by momentarily 889 * resetting the action for SIGPIPE to SIG_IGN, performing the socket 890 * call, and resetting the action back to its previous value. 891 */ 892 if (nosigpipe) { 893 newact.sa_handler = SIG_IGN; 894 newact.sa_flags = 0; 895 (void) sigemptyset(&newact.sa_mask); 896 897 if (sigaction(SIGPIPE, &newact, &oact) < 0) 898 lx_err_fatal(gettext( 899 "%s: could not ignore SIGPIPE to emulate " 900 "LX_MSG_NOSIGNAL"), "send()"); 901 } 902 903 r = send(sockfd, buf, len, flags); 904 905 if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) 906 lx_err_fatal( 907 gettext("%s: could not reset SIGPIPE handler to " 908 "emulate LX_MSG_NOSIGNAL"), "send()"); 909 910 return ((r < 0) ? -errno : r); 911 } 912 913 static ssize_t 914 lx_recv(ulong_t *args) 915 { 916 int sockfd = (int)args[0]; 917 void *buf = (void *)args[1]; 918 size_t len = (size_t)args[2]; 919 int flags = (int)args[3]; 920 ssize_t r; 921 922 int nosigpipe = flags & LX_MSG_NOSIGNAL; 923 struct sigaction newact, oact; 924 925 lx_debug("\trecv(%d, 0x%p, 0x%d, 0x%x)", sockfd, buf, len, flags); 926 927 flags = convert_sockflags(flags); 928 929 /* 930 * If nosigpipe is set, we want to emulate the Linux action of 931 * not sending a SIGPIPE to the caller if the remote socket has 932 * already been closed. 933 * 934 * As SIGPIPE is a directed signal sent only to the thread that 935 * performed the action, we can emulate this behavior by momentarily 936 * resetting the action for SIGPIPE to SIG_IGN, performing the socket 937 * call, and resetting the action back to its previous value. 938 */ 939 if (nosigpipe) { 940 newact.sa_handler = SIG_IGN; 941 newact.sa_flags = 0; 942 (void) sigemptyset(&newact.sa_mask); 943 944 if (sigaction(SIGPIPE, &newact, &oact) < 0) 945 lx_err_fatal(gettext( 946 "%s: could not ignore SIGPIPE to emulate " 947 "LX_MSG_NOSIGNAL"), "recv()"); 948 } 949 950 r = recv(sockfd, buf, len, flags); 951 952 if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) 953 lx_err_fatal( 954 gettext("%s: could not reset SIGPIPE handler to " 955 "emulate LX_MSG_NOSIGNAL"), "recv()"); 956 957 return ((r < 0) ? -errno : r); 958 } 959 960 static ssize_t 961 lx_sendto(ulong_t *args) 962 { 963 int sockfd = (int)args[0]; 964 void *buf = (void *)args[1]; 965 size_t len = (size_t)args[2]; 966 int flags = (int)args[3]; 967 struct sockaddr *to = NULL, oldto; 968 socklen_t tolen = 0; 969 ssize_t r; 970 int abst_sock; 971 972 int nosigpipe = flags & LX_MSG_NOSIGNAL; 973 struct sigaction newact, oact; 974 975 if ((args[4] != NULL) && (args[5] > 0)) { 976 if (uucopy((struct sockaddr *)args[4], &oldto, 977 sizeof (struct sockaddr)) != 0) 978 return (-errno); 979 980 /* Handle Linux abstract sockets */ 981 abst_sock = (oldto.sa_family == AF_UNIX) && 982 (oldto.sa_data[0] == '\0'); 983 984 /* 985 * convert_sockaddr will expand the socket path, if it is 986 * abstract, so we need to allocate extra memory for it now. 987 */ 988 if ((to = SAFE_ALLOCA(args[5] + abst_sock * ABST_PRFX_LEN)) 989 == NULL) 990 return (-EINVAL); 991 992 if ((r = convert_sockaddr(to, &tolen, 993 (struct sockaddr *)args[4], (socklen_t)args[5])) < 0) 994 return (r); 995 } 996 997 998 lx_debug("\tsendto(%d, 0x%p, 0x%d, 0x%x, 0x%x, %d)", sockfd, buf, len, 999 flags, to, tolen); 1000 1001 flags = convert_sockflags(flags); 1002 1003 /* return this error to make auditing subsystem happy */ 1004 if (to && to->sa_family == AF_ROUTE) { 1005 return (-ECONNREFUSED); 1006 } 1007 1008 /* 1009 * If nosigpipe is set, we want to emulate the Linux action of 1010 * not sending a SIGPIPE to the caller if the remote socket has 1011 * already been closed. 1012 * 1013 * As SIGPIPE is a directed signal sent only to the thread that 1014 * performed the action, we can emulate this behavior by momentarily 1015 * resetting the action for SIGPIPE to SIG_IGN, performing the socket 1016 * call, and resetting the action back to its previous value. 1017 */ 1018 if (nosigpipe) { 1019 newact.sa_handler = SIG_IGN; 1020 newact.sa_flags = 0; 1021 (void) sigemptyset(&newact.sa_mask); 1022 1023 if (sigaction(SIGPIPE, &newact, &oact) < 0) 1024 lx_err_fatal(gettext( 1025 "%s: could not ignore SIGPIPE to emulate " 1026 "LX_MSG_NOSIGNAL"), "sendto()"); 1027 } 1028 1029 r = sendto(sockfd, buf, len, flags, to, tolen); 1030 1031 if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) 1032 lx_err_fatal( 1033 gettext("%s: could not reset SIGPIPE handler to " 1034 "emulate LX_MSG_NOSIGNAL"), "sendto()"); 1035 1036 if (r < 0) { 1037 /* 1038 * according to the man page and LTP, the expected error in 1039 * this case is EPIPE. 1040 */ 1041 if (errno == ENOTCONN) 1042 return (-EPIPE); 1043 else 1044 return (-errno); 1045 } 1046 return (r); 1047 } 1048 1049 static ssize_t 1050 lx_recvfrom(ulong_t *args) 1051 { 1052 int sockfd = (int)args[0]; 1053 void *buf = (void *)args[1]; 1054 size_t len = (size_t)args[2]; 1055 int flags = (int)args[3]; 1056 struct sockaddr *from = (struct sockaddr *)args[4]; 1057 socklen_t *from_lenp = (socklen_t *)args[5]; 1058 ssize_t r; 1059 1060 int nosigpipe = flags & LX_MSG_NOSIGNAL; 1061 struct sigaction newact, oact; 1062 1063 lx_debug("\trecvfrom(%d, 0x%p, 0x%d, 0x%x, 0x%x, 0x%p)", sockfd, buf, 1064 len, flags, from, from_lenp); 1065 1066 flags = convert_sockflags(flags); 1067 1068 /* 1069 * If nosigpipe is set, we want to emulate the Linux action of 1070 * not sending a SIGPIPE to the caller if the remote socket has 1071 * already been closed. 1072 * 1073 * As SIGPIPE is a directed signal sent only to the thread that 1074 * performed the action, we can emulate this behavior by momentarily 1075 * resetting the action for SIGPIPE to SIG_IGN, performing the socket 1076 * call, and resetting the action back to its previous value. 1077 */ 1078 if (nosigpipe) { 1079 newact.sa_handler = SIG_IGN; 1080 newact.sa_flags = 0; 1081 (void) sigemptyset(&newact.sa_mask); 1082 1083 if (sigaction(SIGPIPE, &newact, &oact) < 0) 1084 lx_err_fatal(gettext( 1085 "%s: could not ignore SIGPIPE to emulate " 1086 "LX_MSG_NOSIGNAL"), "recvfrom()"); 1087 } 1088 1089 r = recvfrom(sockfd, buf, len, flags, from, from_lenp); 1090 1091 if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) 1092 lx_err_fatal( 1093 gettext("%s: could not reset SIGPIPE handler to " 1094 "emulate LX_MSG_NOSIGNAL"), "recvfrom()"); 1095 1096 return ((r < 0) ? -errno : r); 1097 } 1098 1099 static int 1100 lx_shutdown(ulong_t *args) 1101 { 1102 int sockfd = (int)args[0]; 1103 int how = (int)args[1]; 1104 int r; 1105 1106 lx_debug("\tshutdown(%d, %d)", sockfd, how); 1107 r = shutdown(sockfd, how); 1108 1109 return ((r < 0) ? -errno : r); 1110 } 1111 1112 static int 1113 lx_setsockopt(ulong_t *args) 1114 { 1115 int sockfd = (int)args[0]; 1116 int level = (int)args[1]; 1117 int optname = (int)args[2]; 1118 void *optval = (void *)args[3]; 1119 int optlen = (int)args[4]; 1120 int internal_opt; 1121 int r; 1122 1123 lx_debug("\tsetsockopt(%d, %d, %d, 0x%p, %d)", sockfd, level, optname, 1124 optval, optlen); 1125 1126 /* 1127 * The kernel returns EFAULT for all invalid addresses except NULL, 1128 * for which it returns EINVAL. Linux wants EFAULT for NULL too. 1129 */ 1130 if (optval == NULL) 1131 return (-EFAULT); 1132 1133 /* 1134 * Do a table lookup of the Solaris equivalent of the given option 1135 */ 1136 if (level < IPPROTO_IP || level >= IPPROTO_TAB_SIZE) 1137 return (-ENOPROTOOPT); 1138 1139 if (ltos_proto_opts[level].maxentries == 0 || 1140 optname <= 0 || optname >= (ltos_proto_opts[level].maxentries)) 1141 return (-ENOPROTOOPT); 1142 1143 /* 1144 * Linux sets this option when it wants to send credentials over a 1145 * socket. Currently we just ignore it to make Linux programs happy. 1146 */ 1147 if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PASSCRED)) 1148 return (0); 1149 1150 1151 if ((level == IPPROTO_TCP) && (optname == LX_TCP_CORK)) { 1152 /* 1153 * TCP_CORK is a Linux-only option that instructs the TCP 1154 * stack not to send out partial frames. Solaris doesn't 1155 * include this option but some apps require it. So, we do 1156 * our best to emulate the option by disabling TCP_NODELAY. 1157 * If the app requests that we disable TCP_CORK, we just 1158 * ignore it since enabling TCP_NODELAY may be 1159 * overcompensating. 1160 */ 1161 optname = TCP_NODELAY; 1162 if (optlen != sizeof (int)) 1163 return (-EINVAL); 1164 if (uucopy(optval, &internal_opt, sizeof (int)) != 0) 1165 return (-errno); 1166 if (internal_opt == 0) 1167 return (0); 1168 internal_opt = 1; 1169 optval = &internal_opt; 1170 } else { 1171 optname = ltos_proto_opts[level].proto[optname]; 1172 1173 if (optname == OPTNOTSUP) 1174 return (-ENOPROTOOPT); 1175 } 1176 1177 if (level == LX_SOL_SOCKET) 1178 level = SOL_SOCKET; 1179 1180 r = setsockopt(sockfd, level, optname, optval, optlen); 1181 1182 return ((r < 0) ? -errno : r); 1183 } 1184 1185 static int 1186 lx_getsockopt(ulong_t *args) 1187 { 1188 int sockfd = (int)args[0]; 1189 int level = (int)args[1]; 1190 int optname = (int)args[2]; 1191 void *optval = (void *)args[3]; 1192 int *optlenp = (int *)args[4]; 1193 int r; 1194 1195 lx_debug("\tgetsockopt(%d, %d, %d, 0x%p, 0x%p)", sockfd, level, optname, 1196 optval, optlenp); 1197 1198 /* 1199 * According to the Linux man page, a NULL optval should indicate 1200 * (as in Solaris) that no return value is expected. Instead, it 1201 * actually triggers an EFAULT error. 1202 */ 1203 if (optval == NULL) 1204 return (-EFAULT); 1205 1206 /* 1207 * Do a table lookup of the Solaris equivalent of the given option 1208 */ 1209 if (level < IPPROTO_IP || level >= IPPROTO_TAB_SIZE) 1210 return (-EOPNOTSUPP); 1211 1212 if (ltos_proto_opts[level].maxentries == 0 || 1213 optname <= 0 || optname >= (ltos_proto_opts[level].maxentries)) 1214 return (-ENOPROTOOPT); 1215 1216 if (((level == LX_SOL_SOCKET) && (optname == LX_SO_PASSCRED)) || 1217 ((level == IPPROTO_TCP) && (optname == LX_TCP_CORK))) { 1218 /* 1219 * Linux sets LX_SO_PASSCRED when it wants to send credentials 1220 * over a socket. Since we do not support it, it is never set 1221 * and we return 0. 1222 * 1223 * We don't support TCP_CORK but some apps rely on it. So, 1224 * rather than return an error we just return 0. This 1225 * isn't exactly a lie, since this option really isn't set, 1226 * but it's not the whole truth either. Fortunately, we 1227 * aren't under oath. 1228 */ 1229 r = 0; 1230 if (uucopy(&r, optval, sizeof (int)) != 0) 1231 return (-errno); 1232 r = sizeof (int); 1233 if (uucopy(&r, optlenp, sizeof (int)) != 0) 1234 return (-errno); 1235 return (0); 1236 } 1237 if ((level == LX_SOL_SOCKET) && (optname == LX_SO_PEERCRED)) { 1238 struct lx_ucred lx_ucred; 1239 ucred_t *ucp; 1240 1241 /* 1242 * We don't support SO_PEERCRED, but we do have equivalent 1243 * functionality in getpeerucred() so invoke that here. 1244 */ 1245 1246 /* Verify there's going to be enough room for the results. */ 1247 if (uucopy(optlenp, &r, sizeof (int)) != 0) 1248 return (-errno); 1249 if (r < sizeof (struct lx_ucred)) 1250 return (-EOVERFLOW); 1251 1252 /* 1253 * We allocate a ucred_t ourselves rather than allow 1254 * getpeerucred() to do it for us because getpeerucred() 1255 * uses malloc(3C) and we'd rather use SAFE_ALLOCA(). 1256 */ 1257 if ((ucp = (ucred_t *)SAFE_ALLOCA(ucred_size())) == NULL) 1258 return (-ENOMEM); 1259 1260 /* Get the credential for the remote end of this socket. */ 1261 if (getpeerucred(sockfd, &ucp) != 0) 1262 return (-errno); 1263 if (((lx_ucred.lxu_pid = ucred_getpid(ucp)) == -1) || 1264 ((lx_ucred.lxu_uid = ucred_geteuid(ucp)) == (uid_t)-1) || 1265 ((lx_ucred.lxu_gid = ucred_getegid(ucp)) == (gid_t)-1)) { 1266 return (-errno); 1267 } 1268 1269 /* Copy out the results. */ 1270 if ((uucopy(&lx_ucred, optval, sizeof (lx_ucred))) != 0) 1271 return (-errno); 1272 r = sizeof (lx_ucred); 1273 if ((uucopy(&r, optlenp, sizeof (int))) != 0) 1274 return (-errno); 1275 return (0); 1276 } 1277 1278 optname = ltos_proto_opts[level].proto[optname]; 1279 1280 if (optname == OPTNOTSUP) 1281 return (-ENOPROTOOPT); 1282 1283 if (level == LX_SOL_SOCKET) 1284 level = SOL_SOCKET; 1285 1286 r = getsockopt(sockfd, level, optname, optval, optlenp); 1287 1288 return ((r < 0) ? -errno : r); 1289 } 1290 1291 /* 1292 * libc routines that issue these system calls. We bypass the libsocket 1293 * wrappers since they explicitly turn off the MSG_XPG_2 flag we need for 1294 * Linux compatibility. 1295 */ 1296 extern int _so_sendmsg(); 1297 extern int _so_recvmsg(); 1298 1299 static int 1300 lx_sendmsg(ulong_t *args) 1301 { 1302 int sockfd = (int)args[0]; 1303 struct lx_msghdr msg; 1304 struct cmsghdr *cmsg; 1305 int flags = (int)args[2]; 1306 int r; 1307 1308 int nosigpipe = flags & LX_MSG_NOSIGNAL; 1309 struct sigaction newact, oact; 1310 1311 lx_debug("\tsendmsg(%d, 0x%p, 0x%x)", sockfd, (void *)args[1], flags); 1312 1313 flags = convert_sockflags(flags); 1314 1315 if ((uucopy((void *)args[1], &msg, sizeof (msg))) != 0) 1316 return (-errno); 1317 1318 /* 1319 * If there are control messages bundled in this message, we need 1320 * to convert them from Linux to Solaris. 1321 */ 1322 if (msg.msg_control != NULL) { 1323 if (msg.msg_controllen == 0) { 1324 cmsg = NULL; 1325 } else { 1326 cmsg = SAFE_ALLOCA(msg.msg_controllen); 1327 if (cmsg == NULL) 1328 return (-EINVAL); 1329 } 1330 if ((uucopy(msg.msg_control, cmsg, msg.msg_controllen)) != 0) 1331 return (-errno); 1332 msg.msg_control = cmsg; 1333 if ((r = convert_cmsgs(LX_TO_SOL, &msg, "sendmsg()")) != 0) 1334 return (-r); 1335 } 1336 1337 /* 1338 * If nosigpipe is set, we want to emulate the Linux action of 1339 * not sending a SIGPIPE to the caller if the remote socket has 1340 * already been closed. 1341 * 1342 * As SIGPIPE is a directed signal sent only to the thread that 1343 * performed the action, we can emulate this behavior by momentarily 1344 * resetting the action for SIGPIPE to SIG_IGN, performing the socket 1345 * call, and resetting the action back to its previous value. 1346 */ 1347 if (nosigpipe) { 1348 newact.sa_handler = SIG_IGN; 1349 newact.sa_flags = 0; 1350 (void) sigemptyset(&newact.sa_mask); 1351 1352 if (sigaction(SIGPIPE, &newact, &oact) < 0) 1353 lx_err_fatal(gettext( 1354 "%s: could not ignore SIGPIPE to emulate " 1355 "LX_MSG_NOSIGNAL"), "sendmsg()"); 1356 } 1357 1358 r = _so_sendmsg(sockfd, (struct msghdr *)&msg, flags | MSG_XPG4_2); 1359 1360 if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) 1361 lx_err_fatal( 1362 gettext("%s: could not reset SIGPIPE handler to " 1363 "emulate LX_MSG_NOSIGNAL"), "sendmsg()"); 1364 1365 if (r < 0) { 1366 /* 1367 * according to the man page and LTP, the expected error in 1368 * this case is EPIPE. 1369 */ 1370 if (errno == ENOTCONN) 1371 return (-EPIPE); 1372 else 1373 return (-errno); 1374 } 1375 1376 return (r); 1377 } 1378 1379 static int 1380 lx_recvmsg(ulong_t *args) 1381 { 1382 int sockfd = (int)args[0]; 1383 struct lx_msghdr msg; 1384 struct lx_msghdr *msgp = (struct lx_msghdr *)args[1]; 1385 struct cmsghdr *cmsg = NULL; 1386 int flags = (int)args[2]; 1387 int r, err; 1388 1389 int nosigpipe = flags & LX_MSG_NOSIGNAL; 1390 struct sigaction newact, oact; 1391 1392 lx_debug("\trecvmsg(%d, 0x%p, 0x%x)", sockfd, (void *)args[1], flags); 1393 1394 flags = convert_sockflags(flags); 1395 1396 if ((uucopy(msgp, &msg, sizeof (msg))) != 0) 1397 return (-errno); 1398 1399 /* 1400 * If we are expecting to have to convert any control messages, 1401 * then we should receive them into our address space instead of 1402 * the app's. 1403 */ 1404 if (msg.msg_control != NULL) { 1405 cmsg = msg.msg_control; 1406 if (msg.msg_controllen == 0) { 1407 msg.msg_control = NULL; 1408 } else { 1409 msg.msg_control = SAFE_ALLOCA(msg.msg_controllen); 1410 if (msg.msg_control == NULL) 1411 return (-EINVAL); 1412 } 1413 } 1414 1415 /* 1416 * If nosigpipe is set, we want to emulate the Linux action of 1417 * not sending a SIGPIPE to the caller if the remote socket has 1418 * already been closed. 1419 * 1420 * As SIGPIPE is a directed signal sent only to the thread that 1421 * performed the action, we can emulate this behavior by momentarily 1422 * resetting the action for SIGPIPE to SIG_IGN, performing the socket 1423 * call, and resetting the action back to its previous value. 1424 */ 1425 if (nosigpipe) { 1426 newact.sa_handler = SIG_IGN; 1427 newact.sa_flags = 0; 1428 (void) sigemptyset(&newact.sa_mask); 1429 1430 if (sigaction(SIGPIPE, &newact, &oact) < 0) 1431 lx_err_fatal(gettext( 1432 "%s: could not ignore SIGPIPE to emulate " 1433 "LX_MSG_NOSIGNAL"), "recvmsg()"); 1434 } 1435 1436 r = _so_recvmsg(sockfd, (struct msghdr *)&msg, flags | MSG_XPG4_2); 1437 1438 if ((nosigpipe) && (sigaction(SIGPIPE, &oact, NULL) < 0)) 1439 lx_err_fatal( 1440 gettext("%s: could not reset SIGPIPE handler to " 1441 "emulate LX_MSG_NOSIGNAL"), "recvmsg()"); 1442 1443 if (r >= 0 && msg.msg_control != NULL) { 1444 /* 1445 * If there are control messages bundled in this message, 1446 * we need to convert them from Linux to Solaris. 1447 */ 1448 if ((err = convert_cmsgs(SOL_TO_LX, &msg, "recvmsg()")) != 0) 1449 return (-err); 1450 1451 if ((uucopy(msg.msg_control, cmsg, msg.msg_controllen)) != 0) 1452 return (-errno); 1453 } 1454 1455 /* 1456 * A handful of the values in the msghdr are set by the recvmsg() 1457 * call, so copy their values back to the caller. Rather than iterate, 1458 * just copy the whole structure back. 1459 */ 1460 if (uucopy(&msg, msgp, sizeof (msg)) != 0) 1461 return (-errno); 1462 1463 return ((r < 0) ? -errno : r); 1464 } 1465 1466 int 1467 lx_socketcall(uintptr_t p1, uintptr_t p2) 1468 { 1469 int subcmd = (int)p1 - 1; /* subcommands start at 1 - not 0 */ 1470 ulong_t args[6]; 1471 int r; 1472 1473 if (subcmd < 0 || subcmd >= LX_RECVMSG) 1474 return (-EINVAL); 1475 1476 /* 1477 * Copy the arguments to the subcommand in from the app's address 1478 * space, returning EFAULT if we get a bogus pointer. 1479 */ 1480 if (uucopy((void *)p2, args, 1481 sockfns[subcmd].s_nargs * sizeof (ulong_t))) 1482 return (-errno); 1483 1484 r = (sockfns[subcmd].s_fn)(args); 1485 1486 return (r); 1487 }