1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26 
  27 /*
  28  * Copyright 2015 Joyent, Inc. All rights reserved.
  29  */
  30 
  31 #include <sys/types.h>
  32 #include <sys/syscall.h>
  33 #include <sys/utsname.h>
  34 #include <sys/inttypes.h>
  35 #include <sys/stat.h>
  36 #include <sys/mman.h>
  37 #include <sys/fstyp.h>
  38 #include <sys/fsid.h>
  39 #include <sys/systm.h>
  40 #include <sys/auxv.h>
  41 #include <sys/frame.h>
  42 #include <zone.h>
  43 #include <sys/brand.h>
  44 #include <sys/epoll.h>
  45 #include <sys/stack.h>
  46 
  47 #include <assert.h>
  48 #include <stdio.h>
  49 #include <stdarg.h>
  50 #include <stdlib.h>
  51 #include <strings.h>
  52 #include <unistd.h>
  53 #include <errno.h>
  54 #include <syslog.h>
  55 #include <signal.h>
  56 #include <fcntl.h>
  57 #include <synch.h>
  58 #include <libelf.h>
  59 #include <libgen.h>
  60 #include <pthread.h>
  61 #include <utime.h>
  62 #include <dirent.h>
  63 #include <ucontext.h>
  64 #include <libintl.h>
  65 #include <locale.h>
  66 
  67 #include <sys/lx_misc.h>
  68 #include <sys/lx_debug.h>
  69 #include <sys/lx_brand.h>
  70 #include <sys/lx_types.h>
  71 #include <sys/lx_stat.h>
  72 #include <sys/lx_statfs.h>
  73 #include <sys/lx_signal.h>
  74 #include <sys/lx_syscall.h>
  75 #include <sys/lx_thread.h>
  76 #include <sys/lx_aio.h>
  77 
  78 /*
  79  * There is a block comment in "uts/common/brand/lx/os/lx_brand.c" that
  80  * describes the functioning of the LX brand in some detail.
  81  *
  82  * *** Setting errno
  83  *
  84  * This emulation library is loaded onto a seperate link map from the
  85  * application whose address space we're running in. The Linux libc errno is
  86  * independent of our native libc errno. To pass back an error the emulation
  87  * function should return -errno back to the Linux caller.
  88  */
  89 
  90 char lx_release[LX_VERS_MAX];
  91 char lx_cmd_name[MAXNAMLEN];
  92 
  93 /*
  94  * Map a linux locale ending string to the solaris equivalent.
  95  */
  96 struct lx_locale_ending {
  97         const char      *linux_end;     /* linux ending string */
  98         const char      *solaris_end;   /* to transform with this string */
  99         int             le_size;        /* linux ending string length */
 100         int             se_size;        /* solaris ending string length */
 101 };
 102 
 103 #define l2s_locale(lname, sname) \
 104         {(lname), (sname), sizeof ((lname)) - 1, sizeof ((sname)) - 1}
 105 
 106 #define MAXLOCALENAMELEN        30
 107 #if !defined(TEXT_DOMAIN)               /* should be defined by cc -D */
 108 #define TEXT_DOMAIN     "SYS_TEST"      /* Use this only if it wasn't */
 109 #endif
 110 
 111 /*
 112  * Most syscalls return an int but some return something else, typically a
 113  * ssize_t. This can be either an int or a long, depending on if we're compiled
 114  * for 32-bit or 64-bit. To correctly propagate the -errno return code in the
 115  * 64-bit case, we declare all emulation wrappers will return a long. Thus,
 116  * when we save the return value into the %eax or %rax register and return to
 117  * Linux, we will have the right size value in both the 32 and 64 bit cases.
 118  */
 119 
 120 typedef long (*lx_syscall_handler_t)();
 121 
 122 static lx_syscall_handler_t lx_handlers[LX_NSYSCALLS + 1];
 123 
 124 static uintptr_t stack_bottom;
 125 
 126 #if defined(_LP64)
 127 long lx_fsb;
 128 long lx_fs;
 129 #endif
 130 int lx_install = 0;             /* install mode enabled if non-zero */
 131 int lx_verbose = 0;             /* verbose mode enabled if non-zero */
 132 int lx_debug_enabled = 0;       /* debugging output enabled if non-zero */
 133 
 134 pid_t zoneinit_pid;             /* zone init PID */
 135 
 136 thread_key_t lx_tsd_key;
 137 
 138 int
 139 uucopy_unsafe(const void *src, void *dst, size_t n)
 140 {
 141         bcopy(src, dst, n);
 142         return (0);
 143 }
 144 
 145 int
 146 uucopystr_unsafe(const void *src, void *dst, size_t n)
 147 {
 148         (void) strncpy((char *)src, dst, n);
 149         return (0);
 150 }
 151 
 152 static void
 153 i_lx_msg(int fd, char *msg, va_list ap)
 154 {
 155         int     i;
 156         char    buf[LX_MSG_MAXLEN];
 157 
 158         /* LINTED [possible expansion issues] */
 159         i = vsnprintf(buf, sizeof (buf), msg, ap);
 160         buf[LX_MSG_MAXLEN - 1] = '\0';
 161         if (i == -1)
 162                 return;
 163 
 164         /* if debugging is enabled, send this message to debug output */
 165         if (LX_DEBUG_ISENABLED)
 166                 lx_debug(buf);
 167 
 168         if (fd == 2) {
 169                 /*
 170                  * We let the user choose whether or not to see these
 171                  * messages on the console.
 172                  */
 173                 if (lx_verbose == 0)
 174                         return;
 175         }
 176 
 177         /* we retry in case of EINTR */
 178         do {
 179                 i = write(fd, buf, strlen(buf));
 180         } while ((i == -1) && (errno == EINTR));
 181 }
 182 
 183 /*PRINTFLIKE1*/
 184 void
 185 lx_err(char *msg, ...)
 186 {
 187         va_list ap;
 188 
 189         assert(msg != NULL);
 190 
 191         va_start(ap, msg);
 192         i_lx_msg(STDERR_FILENO, msg, ap);
 193         va_end(ap);
 194 }
 195 
 196 /*
 197  * This is just a non-zero exit value which also isn't one that would allow
 198  * us to easily detect if a branded process exited because of a recursive
 199  * fatal error.
 200  */
 201 #define LX_ERR_FATAL    42
 202 
 203 /*
 204  * Our own custom version of abort(), this routine will be used in place
 205  * of the one located in libc.  The primary difference is that this version
 206  * will first reset the signal handler for SIGABRT to SIG_DFL, ensuring the
 207  * SIGABRT sent causes us to dump core and is not caught by a user program.
 208  */
 209 void
 210 abort(void)
 211 {
 212         static int aborting = 0;
 213 
 214         struct sigaction sa;
 215         sigset_t sigmask;
 216 
 217         /* watch out for recursive calls to this function */
 218         if (aborting != 0)
 219                 exit(LX_ERR_FATAL);
 220 
 221         aborting = 1;
 222 
 223         /*
 224          * Block all signals here to avoid taking any signals while exiting
 225          * in an effort to avoid any strange user interaction with our death.
 226          */
 227         (void) sigfillset(&sigmask);
 228         (void) sigprocmask(SIG_BLOCK, &sigmask, NULL);
 229 
 230         /*
 231          * Our own version of abort(3C) that we know will never call
 232          * a user-installed SIGABRT handler first.  We WANT to die.
 233          *
 234          * Do this by resetting the handler to SIG_DFL, and releasing any
 235          * held SIGABRTs.
 236          *
 237          * If no SIGABRTs are pending, send ourselves one.
 238          *
 239          * The while loop is a bit of overkill, but abort(3C) does it to
 240          * assure it never returns so we will as well.
 241          */
 242         (void) sigemptyset(&sa.sa_mask);
 243         sa.sa_sigaction = SIG_DFL;
 244         sa.sa_flags = 0;
 245 
 246         for (;;) {
 247                 (void) sigaction(SIGABRT, &sa, NULL);
 248                 (void) sigrelse(SIGABRT);
 249                 (void) thr_kill(thr_self(), SIGABRT);
 250         }
 251 
 252         /*NOTREACHED*/
 253 }
 254 
 255 /*PRINTFLIKE1*/
 256 void
 257 lx_msg(char *msg, ...)
 258 {
 259         va_list ap;
 260 
 261         assert(msg != NULL);
 262         va_start(ap, msg);
 263         i_lx_msg(STDOUT_FILENO, msg, ap);
 264         va_end(ap);
 265 }
 266 
 267 /*PRINTFLIKE1*/
 268 void
 269 lx_err_fatal(char *msg, ...)
 270 {
 271         va_list ap;
 272 
 273         assert(msg != NULL);
 274 
 275         va_start(ap, msg);
 276         i_lx_msg(STDERR_FILENO, msg, ap);
 277         va_end(ap);
 278         abort();
 279 }
 280 
 281 /*
 282  * See if it is safe to alloca() sz bytes.  Return 1 for yes, 0 for no.
 283  */
 284 int
 285 lx_check_alloca(size_t sz)
 286 {
 287         uintptr_t sp = (uintptr_t)&sz;
 288         uintptr_t end = sp - sz;
 289 
 290         return ((end < sp) && (end >= stack_bottom));
 291 }
 292 
 293 /*PRINTFLIKE1*/
 294 void
 295 lx_unsupported(char *msg, ...)
 296 {
 297         va_list ap;
 298         char dmsg[256];
 299         int lastc;
 300 
 301         assert(msg != NULL);
 302 
 303         /* make a brand call so we can easily dtrace unsupported actions */
 304         va_start(ap, msg);
 305         /* LINTED [possible expansion issues] */
 306         (void) vsnprintf(dmsg, sizeof (dmsg), msg, ap);
 307         dmsg[255] = '\0';
 308         lastc = strlen(dmsg) - 1;
 309         if (dmsg[lastc] == '\n')
 310                 dmsg[lastc] = '\0';
 311         (void) syscall(SYS_brand, B_UNSUPPORTED, dmsg);
 312         va_end(ap);
 313 
 314         /* send the msg to the error stream */
 315         va_start(ap, msg);
 316         i_lx_msg(STDERR_FILENO, msg, ap);
 317         va_end(ap);
 318 }
 319 
 320 int lx_init(int argc, char *argv[], char *envp[]);
 321 
 322 lx_tsd_t *
 323 lx_get_tsd(void)
 324 {
 325         int ret;
 326         lx_tsd_t *lx_tsd;
 327 
 328         if ((ret = thr_getspecific(lx_tsd_key, (void **)&lx_tsd)) != 0) {
 329                 lx_err_fatal("lx_get_tsd: unable to read "
 330                     "thread-specific data: %s", strerror(ret));
 331         }
 332 
 333         assert(lx_tsd != 0);
 334 
 335         return (lx_tsd);
 336 }
 337 
 338 /*
 339  * This function is called from the kernel like a signal handler.  Each
 340  * function call is a request to provide emulation for a system call that, on
 341  * illumos, is implemented in userland.  The system call number selection and
 342  * argument parsing have already been done by the kernel.
 343  */
 344 void
 345 lx_emulate(ucontext_t *ucp, int syscall_num, uintptr_t *args)
 346 {
 347         long emu_ret;
 348         int emu_errno = 0;
 349 
 350         LX_EMULATE_ENTER(ucp, syscall_num, args);
 351         lx_debug("lx_emulate(%p, %d, [%p, %p, %p, %p, %p, %p])\n",
 352             ucp, syscall_num, args[0], args[1], args[2], args[3], args[4],
 353             args[5]);
 354 
 355         /*
 356          * The kernel should have saved us a context that will not restore the
 357          * previous signal mask.  Some emulated system calls alter the signal
 358          * mask; restoring it after the emulation would cancel that out.
 359          */
 360         assert(!(ucp->uc_flags & UC_SIGMASK));
 361 
 362         /*
 363          * The kernel ensures that the syscall_num is sane; Use it as is.
 364          */
 365         assert(syscall_num >= 0);
 366         assert(syscall_num < (sizeof (lx_handlers) / sizeof (lx_handlers[0])));
 367         if (lx_handlers[syscall_num] == NULL) {
 368                 lx_err_fatal("lx_emulate: kernel sent us a call we cannot "
 369                     "emulate (%d)", syscall_num);
 370         }
 371 
 372         /*
 373          * Call our handler function:
 374          */
 375         emu_ret = lx_handlers[syscall_num](args[0], args[1], args[2], args[3],
 376             args[4], args[5]);
 377 
 378         /*
 379          * If the return value is between -1 and -4095 then it's an errno.
 380          * The kernel will translate it to the Linux equivalent for us.
 381          */
 382         if (emu_ret < 0 && emu_ret > -4096) {
 383                 emu_errno = (int)-emu_ret;
 384         }
 385 
 386         /*
 387          * Return to the context we were passed
 388          */
 389         LX_EMULATE_RETURN(ucp, syscall_num, emu_ret, emu_errno);
 390         lx_debug("\tlx_emulate(%d) done (ret %ld / 0x%p ; errno %d)",
 391             syscall_num, emu_ret, emu_ret, emu_errno);
 392         (void) syscall(SYS_brand, B_EMULATION_DONE, ucp, syscall_num, emu_ret,
 393             emu_errno);
 394 
 395         assert(!"cannot be returned here");
 396 }
 397 
 398 static void
 399 lx_close_fh(FILE *file)
 400 {
 401         int fd, fd_new;
 402 
 403         if (file == NULL)
 404                 return;
 405 
 406         if ((fd = fileno(file)) < 0)
 407                 return;
 408 
 409         fd_new = dup(fd);
 410         if (fd_new == -1)
 411                 return;
 412 
 413         (void) fclose(file);
 414         (void) dup2(fd_new, fd);
 415         (void) close(fd_new);
 416 }
 417 
 418 
 419 extern int set_l10n_alternate_root(char *path);
 420 
 421 #if defined(_LP64)
 422 static void *
 423 map_vdso()
 424 {
 425         int fd;
 426         mmapobj_result_t        mpp[10]; /* we know the size of our lib */
 427         mmapobj_result_t        *smpp = mpp;
 428         uint_t                  mapnum = 10;
 429 
 430         if ((fd = open("/native/usr/lib/brand/lx/amd64/lx_vdso.so.1",
 431             O_RDONLY)) == -1)
 432                 lx_err_fatal("couldn't open lx_vdso.so.1");
 433 
 434         if (mmapobj(fd, MMOBJ_INTERPRET, smpp, &mapnum, NULL) == -1)
 435                 lx_err_fatal("couldn't mmapobj lx_vdso.so.1");
 436 
 437         (void) close(fd);
 438 
 439         /* assume first segment is the base of the mapping */
 440         return (smpp->mr_addr);
 441 }
 442 #endif
 443 
 444 /*
 445  * Initialize the thread specific data for this thread.
 446  */
 447 void
 448 lx_init_tsd(lx_tsd_t *lxtsd)
 449 {
 450         int err;
 451 
 452         bzero(lxtsd, sizeof (*lxtsd));
 453         lxtsd->lxtsd_exit = LX_ET_NONE;
 454 
 455         /*
 456          * The Linux alternate signal stack is initially disabled:
 457          */
 458         lxtsd->lxtsd_sigaltstack.ss_flags = LX_SS_DISABLE;
 459 
 460         /*
 461          * Create a per-thread exit context from the current register and
 462          * native/brand stack state.  Replace the saved program counter value
 463          * with the address of lx_exit_common(); we wish to revector there when
 464          * the thread or process is exiting.
 465          */
 466         if (getcontext(&lxtsd->lxtsd_exit_context) != 0) {
 467                 lx_err_fatal("Unable to initialize thread-specific exit "
 468                     "context: %s", strerror(errno));
 469         }
 470         LX_REG(&lxtsd->lxtsd_exit_context, REG_PC) = (uintptr_t)lx_exit_common;
 471 
 472         /*
 473          * Align the stack pointer and clear the frame pointer.
 474          */
 475         LX_REG(&lxtsd->lxtsd_exit_context, REG_FP) = 0;
 476         LX_REG(&lxtsd->lxtsd_exit_context, REG_SP) &= ~(STACK_ALIGN - 1UL);
 477 #if defined(_LP64)
 478 #if (STACK_ENTRY_ALIGN != 8) && (STACK_ALIGN != 16)
 479 #error "lx_init_tsd: unexpected STACK_[ENTRY_]ALIGN values"
 480 #endif
 481         /*
 482          * The AMD64 ABI requires that, on entry to a function, the stack
 483          * pointer must be 8-byte aligned, but _not_ 16-byte aligned.  When
 484          * the frame pointer is pushed, the alignment will then be correct.
 485          */
 486         LX_REG(&lxtsd->lxtsd_exit_context, REG_SP) -= STACK_ENTRY_ALIGN;
 487 #endif
 488 
 489         /*
 490          * Block all signals in the exit context to avoid taking any signals
 491          * (to the degree possible) while exiting.
 492          */
 493         (void) sigfillset(&lxtsd->lxtsd_exit_context.uc_sigmask);
 494 
 495         if ((err = thr_setspecific(lx_tsd_key, lxtsd)) != 0) {
 496                 lx_err_fatal("Unable to initialize thread-specific data: %s",
 497                     strerror(err));
 498         }
 499 }
 500 
 501 void
 502 lx_jump_to_linux(ucontext_t *ucp)
 503 {
 504         extern void setcontext_sigmask(ucontext_t *);
 505 
 506         /*
 507          * Call into this private libc interface to allow us to use only the
 508          * signal mask handling part of a regular setcontext() operation.
 509          */
 510         setcontext_sigmask(ucp);
 511 
 512         if (syscall(SYS_brand, B_JUMP_TO_LINUX, ucp) != 0) {
 513                 lx_err_fatal("B_JUMP_TO_LINUX failed: %s", strerror(errno));
 514         }
 515 
 516         /*
 517          * This system call should not return.
 518          */
 519         abort();
 520 }
 521 
 522 static void
 523 lx_start(uintptr_t sp, uintptr_t entry)
 524 {
 525         ucontext_t jump_uc;
 526 
 527         if (getcontext(&jump_uc) != 0) {
 528                 lx_err_fatal("Unable to getcontext for program start: %s",
 529                     strerror(errno));
 530         }
 531 
 532         /*
 533          * We want to load the general registers from this
 534          * context, and switch to the BRAND stack.
 535          */
 536         jump_uc.uc_flags = UC_CPU;
 537         jump_uc.uc_brand_data[0] = (void *)LX_UC_STACK_BRAND;
 538 
 539         LX_REG(&jump_uc, REG_FP) = NULL;
 540         LX_REG(&jump_uc, REG_SP) = sp;
 541         LX_REG(&jump_uc, REG_PC) = entry;
 542 
 543 #if defined(_LP64)
 544         /*
 545          * The AMD64 ABI states that at process entry, %rdx contains "a
 546          * function pointer that the application should register with
 547          * atexit()".  We make sure to pass NULL explicitly so that
 548          * no function is registered.
 549          */
 550         LX_REG(&jump_uc, REG_RDX) = NULL;
 551 #endif
 552 
 553         lx_debug("starting Linux program sp %p ldentry %p", sp, entry);
 554         lx_jump_to_linux(&jump_uc);
 555 }
 556 
 557 /*ARGSUSED*/
 558 int
 559 lx_init(int argc, char *argv[], char *envp[])
 560 {
 561         char            *r;
 562         auxv_t          *ap;
 563         long            *p;
 564         int             err;
 565         lx_elf_data_t   edp;
 566         lx_brand_registration_t reg;
 567         lx_tsd_t        *lxtsd;
 568 #if defined(_LP64)
 569         void            *vdso_hdr;
 570 #endif
 571 
 572         bzero(&reg, sizeof (reg));
 573 
 574         stack_bottom = 2 * sysconf(_SC_PAGESIZE);
 575 
 576         /*
 577          * We need to shutdown all libc stdio.  libc stdio normally goes to
 578          * file descriptors, but since we're actually part of a linux
 579          * process we don't own these file descriptors and we can't make
 580          * any assumptions about their state.
 581          */
 582         lx_close_fh(stdin);
 583         lx_close_fh(stdout);
 584         lx_close_fh(stderr);
 585 
 586         lx_debug_init();
 587 
 588         r = getenv("LX_RELEASE");
 589         if (r == NULL) {
 590                 if (zone_getattr(getzoneid(), LX_KERN_VERSION_NUM, lx_release,
 591                     sizeof (lx_release)) != sizeof (lx_release))
 592                         (void) strlcpy(lx_release, "2.4.21", LX_VERS_MAX);
 593         } else {
 594                 (void) strlcpy(lx_release, r, 128);
 595         }
 596 
 597         lx_debug("lx_release: %s\n", lx_release);
 598 
 599         /*
 600          * Should we kill an application that attempts an unimplemented
 601          * system call?
 602          */
 603         if (getenv("LX_STRICT") != NULL) {
 604                 reg.lxbr_flags |= LX_PROC_STRICT_MODE;
 605                 lx_debug("STRICT mode enabled.\n");
 606         }
 607 
 608         /*
 609          * Are we in install mode?
 610          */
 611         if (getenv("LX_INSTALL") != NULL) {
 612                 reg.lxbr_flags |= LX_PROC_INSTALL_MODE;
 613                 lx_install = 1;
 614                 lx_debug("INSTALL mode enabled.\n");
 615         }
 616 
 617         /*
 618          * Should we attempt to send messages to the screen?
 619          */
 620         if (getenv("LX_VERBOSE") != NULL) {
 621                 lx_verbose = 1;
 622                 lx_debug("VERBOSE mode enabled.\n");
 623         }
 624 
 625         (void) strlcpy(lx_cmd_name, basename(argv[0]), sizeof (lx_cmd_name));
 626         lx_debug("executing linux process: %s", argv[0]);
 627         lx_debug("branding myself and setting handler to 0x%p",
 628             (void *)lx_emulate);
 629 
 630         reg.lxbr_version = LX_VERSION;
 631         reg.lxbr_handler = (void *)&lx_emulate;
 632 
 633         /*
 634          * Register the address of the user-space handler with the lx brand
 635          * module. As a side-effect this leaves the thread in native syscall
 636          * mode so that it's ok to continue to make syscalls during setup. We
 637          * need to switch to Linux mode at the end of initialization.
 638          */
 639         if (syscall(SYS_brand, B_REGISTER, &reg))
 640                 lx_err_fatal("failed to brand the process");
 641 
 642         /* Look up the PID that serves as init for this zone */
 643         if ((err = lx_lpid_to_spid(1, &zoneinit_pid)) < 0)
 644                 lx_err_fatal("Unable to find PID for zone init process: %s",
 645                     strerror(err));
 646 
 647         /*
 648          * Upload data about the lx executable from the kernel.
 649          */
 650         if (syscall(SYS_brand, B_ELFDATA, (void *)&edp))
 651                 lx_err_fatal("failed to get required ELF data from the kernel");
 652 
 653         if (lx_stat_init() != 0)
 654                 lx_err_fatal("failed to setup the stat translator");
 655 
 656         if (lx_statfs_init() != 0)
 657                 lx_err_fatal("failed to setup the statfs translator");
 658 
 659         lx_ptrace_init();
 660 
 661 #if defined(_LP64)
 662         vdso_hdr = map_vdso();
 663 #endif
 664 
 665         /*
 666          * Find the aux vector on the stack.
 667          */
 668         p = (long *)envp;
 669         while (*p != NULL)
 670                 p++;
 671         /*
 672          * p is now pointing at the 0 word after the environ pointers. After
 673          * that is the aux vectors.
 674          */
 675         p++;
 676         for (ap = (auxv_t *)p; ap->a_type != 0; ap++) {
 677                 switch (ap->a_type) {
 678                         case AT_BASE:
 679                                 ap->a_un.a_val = edp.ed_base;
 680                                 break;
 681                         case AT_ENTRY:
 682                                 ap->a_un.a_val = edp.ed_entry;
 683                                 break;
 684                         case AT_PHDR:
 685                                 ap->a_un.a_val = edp.ed_phdr;
 686                                 break;
 687                         case AT_PHENT:
 688                                 ap->a_un.a_val = edp.ed_phent;
 689                                 break;
 690                         case AT_PHNUM:
 691                                 ap->a_un.a_val = edp.ed_phnum;
 692                                 break;
 693 #if defined(_LP64)
 694                         case AT_SUN_BRAND_LX_SYSINFO_EHDR:
 695                                 ap->a_type = AT_SYSINFO_EHDR;
 696                                 ap->a_un.a_val = (long)vdso_hdr;
 697                                 break;
 698 #endif
 699                         default:
 700                                 break;
 701                 }
 702         }
 703 
 704         /* Setup signal handler information. */
 705         if (lx_siginit()) {
 706                 lx_err_fatal("failed to initialize lx signals for the "
 707                     "branded process");
 708         }
 709 
 710         /* Setup thread-specific data area for managing linux threads. */
 711         if ((err = thr_keycreate(&lx_tsd_key, NULL)) != 0) {
 712                 lx_err_fatal("thr_keycreate(lx_tsd_key) failed: %s",
 713                     strerror(err));
 714         }
 715 
 716         lx_debug("thr_keycreate created lx_tsd_key (%d)", lx_tsd_key);
 717 
 718         /*
 719          * Initialize the thread specific data for this thread.
 720          */
 721         if ((lxtsd = malloc(sizeof (*lxtsd))) == NULL) {
 722                 lx_err_fatal("failed to allocate tsd for main thread: %s",
 723                     strerror(errno));
 724         }
 725         lx_debug("lx tsd allocated @ %p", lxtsd);
 726         lx_init_tsd(lxtsd);
 727 
 728         /*
 729          * Allocate the brand emulation stack for the main process thread.
 730          * Register the thread-specific data structure with the stack list so
 731          * that it may be freed at thread exit or fork(2).
 732          */
 733         lx_install_stack(NULL, 0, lxtsd);
 734 
 735         /*
 736          * The brand linker expects the stack pointer to point to
 737          * "argc", which is just before &argv[0].
 738          */
 739         lx_start((uintptr_t)argv - sizeof (void *), edp.ed_ldentry);
 740 
 741         /*NOTREACHED*/
 742         abort();
 743         return (0);
 744 }
 745 
 746 /*
 747  * We "return" to this function via a context hand-crafted by
 748  * "lx_init_tsd()"; see that function for more detail.
 749  *
 750  * NOTE: Our call frame is on the main thread stack, not the alternate native
 751  * stack -- it is safe to release the latter here.  The frame does not have a
 752  * valid return address, so this function MUST NOT return.
 753  */
 754 void
 755 lx_exit_common(void)
 756 {
 757         lx_tsd_t *lxtsd = lx_get_tsd();
 758         int ev = (0xff & lxtsd->lxtsd_exit_status);
 759 
 760         switch (lxtsd->lxtsd_exit) {
 761         case LX_ET_EXIT:
 762                 lx_debug("lx_exit_common(LX_ET_EXIT, %d)\n", ev);
 763 
 764                 /*
 765                  * If the thread is exiting, but not the entire process, we
 766                  * must free the stack we allocated for usermode emulation.
 767                  * This is safe to do here because the setcontext() put us
 768                  * back on the BRAND stack for this process.  This function
 769                  * also frees the thread-specific data object for this thread.
 770                  */
 771                 lx_free_stack();
 772 
 773                 /*
 774                  * The native thread return value is never seen so we pass
 775                  * NULL.
 776                  */
 777                 thr_exit(NULL);
 778                 break;
 779 
 780         case LX_ET_EXIT_GROUP:
 781                 lx_debug("lx_exit_common(LX_ET_EXIT_GROUP, %d)\n", ev);
 782                 exit(ev);
 783                 break;
 784 
 785         default:
 786                 abort();
 787         }
 788 
 789         abort();
 790 }
 791 
 792 const ucontext_t *
 793 lx_find_brand_uc(void)
 794 {
 795         ucontext_t *ucp = NULL;
 796 
 797         /*
 798          * Ask for the current emulation (or signal handling) ucontext_t...
 799          */
 800         assert(syscall(SYS_brand, B_GET_CURRENT_CONTEXT, &ucp) == 0);
 801 
 802         for (;;) {
 803                 uintptr_t flags;
 804 
 805                 lx_debug("lx_find_brand_uc: inspect ucp %p...\n", ucp);
 806                 assert(ucp != NULL);
 807 
 808                 flags = (uintptr_t)ucp->uc_brand_data[0];
 809 
 810                 if (flags & LX_UC_STACK_BRAND) {
 811                         lx_debug("lx_find_brand_uc: ucp %p\n", ucp);
 812 
 813                         return (ucp);
 814                 }
 815 
 816                 lx_debug("lx_find_brand_uc: skip non-BRAND ucp %p\n", ucp);
 817 
 818                 /*
 819                  * Walk up the context chain to find the most recently stored
 820                  * brand register state.
 821                  */
 822                 ucp = ucp->uc_link;
 823         }
 824 }
 825 
 826 uintptr_t
 827 lx_find_brand_sp(void)
 828 {
 829         const ucontext_t *ucp = lx_find_brand_uc();
 830         uintptr_t sp = LX_REG(ucp, REG_SP);
 831 
 832         lx_debug("lx_find_brand_sp: ucp %p sp %p\n", ucp, sp);
 833 
 834         return (sp);
 835 }
 836 
 837 ucontext_t *
 838 lx_syscall_regs(void)
 839 {
 840         ucontext_t *ucp = NULL;
 841         uintptr_t flags;
 842 
 843         /*
 844          * Ask for the current emulation (or signal handling) ucontext_t...
 845          */
 846         assert(syscall(SYS_brand, B_GET_CURRENT_CONTEXT, &ucp) == 0);
 847         assert(ucp != NULL);
 848 
 849         /*
 850          * Use of the lx_syscall_regs() function implies that the topmost (i.e.
 851          * current) context is for a system call emulation request from the
 852          * kernel, rather than a signal handling frame.
 853          */
 854         flags = (uintptr_t)ucp->uc_brand_data[0];
 855         assert(flags & LX_UC_FRAME_IS_SYSCALL);
 856 
 857         lx_debug("lx_syscall_regs: ucp %p\n", ucp);
 858 
 859         return (ucp);
 860 }
 861 
 862 int
 863 lx_lpid_to_spair(pid_t lpid, pid_t *spid, lwpid_t *slwp)
 864 {
 865         pid_t pid;
 866         lwpid_t tid;
 867 
 868         if (lpid == 0) {
 869                 pid = getpid();
 870                 tid = thr_self();
 871         } else {
 872                 if (syscall(SYS_brand, B_LPID_TO_SPAIR, lpid, &pid, &tid) < 0)
 873                         return (-errno);
 874 
 875                 /*
 876                  * If the returned pid is -1, that indicates we tried to
 877                  * look up the PID for init, but that process no longer
 878                  * exists.
 879                  */
 880                 if (pid == -1)
 881                         return (-ESRCH);
 882         }
 883 
 884         if (uucopy(&pid, spid, sizeof (pid_t)) != 0)
 885                 return (-errno);
 886 
 887         if (uucopy(&tid, slwp, sizeof (lwpid_t)) != 0)
 888                 return (-errno);
 889 
 890         return (0);
 891 }
 892 
 893 int
 894 lx_lpid_to_spid(pid_t lpid, pid_t *spid)
 895 {
 896         lwpid_t slwp;
 897 
 898         return (lx_lpid_to_spair(lpid, spid, &slwp));
 899 }
 900 
 901 char *
 902 lx_fd_to_path(int fd, char *buf, int buf_size)
 903 {
 904         char    path_proc[MAXPATHLEN];
 905         pid_t   pid;
 906         int     n;
 907 
 908         assert((buf != NULL) && (buf_size >= 0));
 909 
 910         if (fd < 0)
 911                 return (NULL);
 912 
 913         if ((pid = getpid()) == -1)
 914                 return (NULL);
 915 
 916         (void) snprintf(path_proc, MAXPATHLEN,
 917             "/native/proc/%d/path/%d", pid, fd);
 918 
 919         if ((n = readlink(path_proc, buf, buf_size - 1)) == -1)
 920                 return (NULL);
 921         buf[n] = '\0';
 922 
 923         return (buf);
 924 }
 925 
 926 #if defined(_LP64)
 927 /* The following is the 64-bit syscall table */
 928 
 929 static lx_syscall_handler_t lx_handlers[] = {
 930         NULL,                           /*   0: read */
 931         NULL,                           /*   1: write */
 932         NULL,                           /*   2: open */
 933         lx_close,                       /*   3: close */
 934         lx_stat64,                      /*   4: stat */
 935         lx_fstat64,                     /*   5: fstat */
 936         lx_lstat64,                     /*   6: lstat */
 937         lx_poll,                        /*   7: poll */
 938         lx_lseek,                       /*   8: lseek */
 939         lx_mmap,                        /*   9: mmap */
 940         lx_mprotect,                    /*  10: mprotect */
 941         lx_munmap,                      /*  11: munmap */
 942         NULL,                           /*  12: brk */
 943         lx_rt_sigaction,                /*  13: rt_sigaction */
 944         lx_rt_sigprocmask,              /*  14: rt_sigprocmask */
 945         lx_rt_sigreturn,                /*  15: rt_sigreturn */
 946         NULL,                           /*  16: ioctl */
 947         lx_pread,                       /*  17: pread64 */
 948         lx_pwrite,                      /*  18: pwrite64 */
 949         lx_readv,                       /*  19: readv */
 950         lx_writev,                      /*  20: writev */
 951         lx_access,                      /*  21: access */
 952         NULL,                           /*  22: pipe */
 953         lx_select,                      /*  23: select */
 954         NULL,                           /*  24: sched_yield */
 955         lx_remap,                       /*  25: mremap */
 956         lx_msync,                       /*  26: msync */
 957         lx_mincore,                     /*  27: mincore */
 958         lx_madvise,                     /*  28: madvise */
 959         lx_shmget,                      /*  29: shmget */
 960         lx_shmat,                       /*  30: shmat */
 961         lx_shmctl,                      /*  31: shmctl */
 962         lx_dup,                         /*  32: dup */
 963         lx_dup2,                        /*  33: dup2 */
 964         lx_pause,                       /*  34: pause */
 965         lx_nanosleep,                   /*  35: nanosleep */
 966         lx_getitimer,                   /*  36: getitimer */
 967         lx_alarm,                       /*  37: alarm */
 968         lx_setitimer,                   /*  38: setitimer */
 969         NULL,                           /*  39: getpid */
 970         lx_sendfile64,                  /*  40: sendfile */
 971         lx_socket,                      /*  41: socket */
 972         NULL,                           /*  42: connect */
 973         lx_accept,                      /*  43: accept */
 974         NULL,                           /*  44: sendto */
 975         NULL,                           /*  45: recvfrom */
 976         NULL,                           /*  46: sendmsg */
 977         NULL,                           /*  47: recvmsg */
 978         lx_shutdown,                    /*  48: shutdown */
 979         NULL,                           /*  49: bind */
 980         lx_listen,                      /*  50: listen */
 981         lx_getsockname,                 /*  51: getsockname */
 982         lx_getpeername,                 /*  52: getpeername */
 983         lx_socketpair,                  /*  53: socketpair */
 984         lx_setsockopt,                  /*  54: setsockopt */
 985         lx_getsockopt,                  /*  55: getsockopt */
 986         lx_clone,                       /*  56: clone */
 987         lx_fork,                        /*  57: fork */
 988         lx_vfork,                       /*  58: vfork */
 989         lx_execve,                      /*  59: execve */
 990         lx_exit,                        /*  60: exit */
 991         NULL,                           /*  61: wait4 */
 992         NULL,                           /*  62: kill */
 993         lx_uname,                       /*  63: uname */
 994         lx_semget,                      /*  64: semget */
 995         lx_semop,                       /*  65: semop */
 996         lx_semctl,                      /*  66: semctl */
 997         lx_shmdt,                       /*  67: shmdt */
 998         lx_msgget,                      /*  68: msgget */
 999         lx_msgsnd,                      /*  69: msgsnd */
1000         lx_msgrcv,                      /*  70: msgrcv */
1001         lx_msgctl,                      /*  71: msgctl */
1002         NULL,                           /*  72: fcntl */
1003         lx_flock,                       /*  73: flock */
1004         lx_fsync,                       /*  74: fsync */
1005         lx_fdatasync,                   /*  75: fdatasync */
1006         lx_truncate,                    /*  76: truncate */
1007         lx_ftruncate,                   /*  77: ftruncate */
1008         NULL,                           /*  78: getdents */
1009         lx_getcwd,                      /*  79: getcwd */
1010         lx_chdir,                       /*  80: chdir */
1011         lx_fchdir,                      /*  81: fchdir */
1012         lx_rename,                      /*  82: rename */
1013         NULL,                           /*  83: mkdir */
1014         lx_rmdir,                       /*  84: rmdir */
1015         lx_creat,                       /*  85: creat */
1016         lx_link,                        /*  86: link */
1017         lx_unlink,                      /*  87: unlink */
1018         lx_symlink,                     /*  88: symlink */
1019         lx_readlink,                    /*  89: readlink */
1020         NULL,                           /*  90: chmod */
1021         NULL,                           /*  91: fchmod */
1022         NULL,                           /*  92: chown */
1023         NULL,                           /*  93: fchown */
1024         NULL,                           /*  94: lchown */
1025         lx_umask,                       /*  95: umask */
1026         NULL,                           /*  96: gettimeofday */
1027         NULL,                           /*  97: getrlimit */
1028         lx_getrusage,                   /*  98: getrusage */
1029         NULL,                           /*  99: sysinfo */
1030         lx_times,                       /* 100: times */
1031         lx_ptrace,                      /* 101: ptrace */
1032         lx_getuid,                      /* 102: getuid */
1033         lx_syslog,                      /* 103: syslog */
1034         lx_getgid,                      /* 104: getgid */
1035         lx_setuid,                      /* 105: setuid */
1036         lx_setgid,                      /* 106: setgid */
1037         lx_geteuid,                     /* 107: geteuid */
1038         lx_getegid,                     /* 108: getegid */
1039         lx_setpgid,                     /* 109: setpgid */
1040         NULL,                           /* 110: getppid */
1041         lx_getpgrp,                     /* 111: getpgrp */
1042         lx_setsid,                      /* 112: setsid */
1043         lx_setreuid,                    /* 113: setreuid */
1044         lx_setregid,                    /* 114: setregid */
1045         lx_getgroups,                   /* 115: getgroups */
1046         lx_setgroups,                   /* 116: setgroups */
1047         NULL,                           /* 117: setresuid */
1048         lx_getresuid,                   /* 118: getresuid */
1049         NULL,                           /* 119: setresgid */
1050         lx_getresgid,                   /* 120: getresgid */
1051         lx_getpgid,                     /* 121: getpgid */
1052         lx_setfsuid,                    /* 122: setfsuid */
1053         lx_setfsgid,                    /* 123: setfsgid */
1054         lx_getsid,                      /* 124: getsid */
1055         lx_capget,                      /* 125: capget */
1056         lx_capset,                      /* 126: capset */
1057         lx_rt_sigpending,               /* 127: rt_sigpending */
1058         lx_rt_sigtimedwait,             /* 128: rt_sigtimedwait */
1059         lx_rt_sigqueueinfo,             /* 129: rt_sigqueueinfo */
1060         lx_rt_sigsuspend,               /* 130: rt_sigsuspend */
1061         lx_sigaltstack,                 /* 131: sigaltstack */
1062         lx_utime,                       /* 132: utime */
1063         lx_mknod,                       /* 133: mknod */
1064         NULL,                           /* 134: uselib */
1065         lx_personality,                 /* 135: personality */
1066         NULL,                           /* 136: ustat */
1067         lx_statfs,                      /* 137: statfs */
1068         lx_fstatfs,                     /* 138: fstatfs */
1069         lx_sysfs,                       /* 139: sysfs */
1070         lx_getpriority,                 /* 140: getpriority */
1071         lx_setpriority,                 /* 141: setpriority */
1072         lx_sched_setparam,              /* 142: sched_setparam */
1073         lx_sched_getparam,              /* 143: sched_getparam */
1074         lx_sched_setscheduler,          /* 144: sched_setscheduler */
1075         lx_sched_getscheduler,          /* 145: sched_getscheduler */
1076         lx_sched_get_priority_max,      /* 146: sched_get_priority_max */
1077         lx_sched_get_priority_min,      /* 147: sched_get_priority_min */
1078         lx_sched_rr_get_interval,       /* 148: sched_rr_get_interval */
1079         lx_mlock,                       /* 149: mlock */
1080         lx_munlock,                     /* 150: munlock */
1081         lx_mlockall,                    /* 151: mlockall */
1082         lx_munlockall,                  /* 152: munlockall */
1083         lx_vhangup,                     /* 153: vhangup */
1084         NULL,                           /* 154: modify_ldt */
1085         NULL,                           /* 155: pivot_root */
1086         lx_sysctl,                      /* 156: sysctl */
1087         NULL,                           /* 157: prctl */
1088         NULL,                           /* 158: arch_prctl */
1089         lx_adjtimex,                    /* 159: adjtimex */
1090         NULL,                           /* 160: setrlimit */
1091         lx_chroot,                      /* 161: chroot */
1092         lx_sync,                        /* 162: sync */
1093         NULL,                           /* 163: acct */
1094         lx_settimeofday,                /* 164: settimeofday */
1095         lx_mount,                       /* 165: mount */
1096         lx_umount2,                     /* 166: umount2 */
1097         NULL,                           /* 167: swapon */
1098         NULL,                           /* 168: swapoff */
1099         lx_reboot,                      /* 169: reboot */
1100         lx_sethostname,                 /* 170: sethostname */
1101         lx_setdomainname,               /* 171: setdomainname */
1102         NULL,                           /* 172: iopl */
1103         NULL,                           /* 173: ioperm */
1104         NULL,                           /* 174: create_module */
1105         NULL,                           /* 175: init_module */
1106         NULL,                           /* 176: delete_module */
1107         NULL,                           /* 177: get_kernel_syms */
1108         lx_query_module,                /* 178: query_module */
1109         NULL,                           /* 179: quotactl */
1110         NULL,                           /* 180: nfsservctl */
1111         NULL,                           /* 181: getpmsg */
1112         NULL,                           /* 182: putpmsg */
1113         NULL,                           /* 183: afs_syscall */
1114         NULL,                           /* 184: tux */
1115         NULL,                           /* 185: security */
1116         NULL,                           /* 186: gettid */
1117         NULL,                           /* 187: readahead */
1118         NULL,                           /* 188: setxattr */
1119         NULL,                           /* 189: lsetxattr */
1120         NULL,                           /* 190: fsetxattr */
1121         NULL,                           /* 191: getxattr */
1122         NULL,                           /* 192: lgetxattr */
1123         NULL,                           /* 193: fgetxattr */
1124         NULL,                           /* 194: listxattr */
1125         NULL,                           /* 195: llistxattr */
1126         NULL,                           /* 196: flistxattr */
1127         NULL,                           /* 197: removexattr */
1128         NULL,                           /* 198: lremovexattr */
1129         NULL,                           /* 199: fremovexattr */
1130         NULL,                           /* 200: tkill */
1131         NULL,                           /* 201: time */
1132         NULL,                           /* 202: futex */
1133         lx_sched_setaffinity,           /* 203: sched_setaffinity */
1134         lx_sched_getaffinity,           /* 204: sched_getaffinity */
1135         NULL,                           /* 205: set_thread_area */
1136         lx_io_setup,                    /* 206: io_setup */
1137         lx_io_destroy,                  /* 207: io_destroy */
1138         lx_io_getevents,                /* 208: io_getevents */
1139         lx_io_submit,                   /* 209: io_submit */
1140         lx_io_cancel,                   /* 210: io_cancel */
1141         NULL,                           /* 211: get_thread_area */
1142         NULL,                           /* 212: lookup_dcookie */
1143         lx_epoll_create,                /* 213: epoll_create */
1144         NULL,                           /* 214: epoll_ctl_old */
1145         NULL,                           /* 215: epoll_wait_old */
1146         NULL,                           /* 216: remap_file_pages */
1147         NULL,                           /* 217: getdents64 */
1148         NULL,                           /* 218: set_tid_address */
1149         NULL,                           /* 219: restart_syscall */
1150         lx_semtimedop,                  /* 220: semtimedop */
1151         lx_fadvise64_64,                /* 221: fadvise64 */
1152         lx_timer_create,                /* 222: timer_create */
1153         lx_timer_settime,               /* 223: timer_settime */
1154         lx_timer_gettime,               /* 224: timer_gettime */
1155         lx_timer_getoverrun,            /* 225: timer_getoverrun */
1156         lx_timer_delete,                /* 226: timer_delete */
1157         NULL,                           /* 227: clock_settime */
1158         NULL,                           /* 228: clock_gettime */
1159         NULL,                           /* 229: clock_getres */
1160         lx_clock_nanosleep,             /* 230: clock_nanosleep */
1161         lx_group_exit,                  /* 231: exit_group */
1162         lx_epoll_wait,                  /* 232: epoll_wait */
1163         lx_epoll_ctl,                   /* 233: epoll_ctl */
1164         NULL,                           /* 234: tgkill */
1165         lx_utimes,                      /* 235: utimes */
1166         NULL,                           /* 236: vserver */
1167         NULL,                           /* 237: mbind */
1168         NULL,                           /* 238: set_mempolicy */
1169         NULL,                           /* 239: get_mempolicy */
1170         NULL,                           /* 240: mq_open */
1171         NULL,                           /* 241: mq_unlink */
1172         NULL,                           /* 242: mq_timedsend */
1173         NULL,                           /* 243: mq_timedreceive */
1174         NULL,                           /* 244: mq_notify */
1175         NULL,                           /* 245: mq_getsetattr */
1176         NULL,                           /* 246: kexec_load */
1177         NULL,                           /* 247: waitid */
1178         NULL,                           /* 248: add_key */
1179         NULL,                           /* 249: request_key */
1180         NULL,                           /* 250: keyctl */
1181         NULL,                           /* 251: ioprio_set */
1182         NULL,                           /* 252: ioprio_get */
1183         lx_inotify_init,                /* 253: inotify_init */
1184         lx_inotify_add_watch,           /* 254: inotify_add_watch */
1185         lx_inotify_rm_watch,            /* 255: inotify_rm_watch */
1186         NULL,                           /* 256: migrate_pages */
1187         NULL,                           /* 257: openat */
1188         NULL,                           /* 258: mkdirat */
1189         lx_mknodat,                     /* 259: mknodat */
1190         NULL,                           /* 260: fchownat */
1191         lx_futimesat,                   /* 261: futimesat */
1192         lx_fstatat64,                   /* 262: fstatat64 */
1193         lx_unlinkat,                    /* 263: unlinkat */
1194         lx_renameat,                    /* 264: renameat */
1195         lx_linkat,                      /* 265: linkat */
1196         lx_symlinkat,                   /* 266: symlinkat */
1197         lx_readlinkat,                  /* 267: readlinkat */
1198         NULL,                           /* 268: fchmodat */
1199         lx_faccessat,                   /* 269: faccessat */
1200         lx_pselect6,                    /* 270: pselect6 */
1201         lx_ppoll,                       /* 271: ppoll */
1202         NULL,                           /* 272: unshare */
1203         NULL,                           /* 273: set_robust_list */
1204         NULL,                           /* 274: get_robust_list */
1205         NULL,                           /* 275: splice */
1206         NULL,                           /* 276: tee */
1207         NULL,                           /* 277: sync_file_range */
1208         NULL,                           /* 278: vmsplice */
1209         NULL,                           /* 279: move_pages */
1210         lx_utimensat,                   /* 280: utimensat */
1211         lx_epoll_pwait,                 /* 281: epoll_pwait */
1212         lx_signalfd,                    /* 282: signalfd */
1213         lx_timerfd_create,              /* 283: timerfd_create */
1214         lx_eventfd,                     /* 284: eventfd */
1215         NULL,                           /* 285: fallocate */
1216         lx_timerfd_settime,             /* 286: timerfd_settime */
1217         lx_timerfd_gettime,             /* 287: timerfd_gettime */
1218         lx_accept4,                     /* 288: accept4 */
1219         lx_signalfd4,                   /* 289: signalfd4 */
1220         lx_eventfd2,                    /* 290: eventfd2 */
1221         lx_epoll_create1,               /* 291: epoll_create1 */
1222         lx_dup3,                        /* 292: dup3 */
1223         NULL,                           /* 293: pipe2 */
1224         lx_inotify_init1,               /* 294: inotify_init1 */
1225         lx_preadv,                      /* 295: preadv */
1226         lx_pwritev,                     /* 296: pwritev */
1227         lx_rt_tgsigqueueinfo,           /* 297: rt_tgsigqueueinfo */
1228         NULL,                           /* 298: perf_event_open */
1229         NULL,                           /* 299: recvmmsg */
1230         NULL,                           /* 300: fanotify_init */
1231         NULL,                           /* 301: fanotify_mark */
1232         NULL,                           /* 302: prlimit64 */
1233         NULL,                           /* 303: name_to_handle_at */
1234         NULL,                           /* 304: open_by_handle_at */
1235         NULL,                           /* 305: clock_adjtime */
1236         NULL,                           /* 306: syncfs */
1237         NULL,                           /* 307: sendmmsg */
1238         NULL,                           /* 309: setns */
1239         NULL,                           /* 309: getcpu */
1240         NULL,                           /* 310: process_vm_readv */
1241         NULL,                           /* 311: process_vm_writev */
1242         NULL,                           /* 312: kcmp */
1243         NULL,                           /* 313: finit_module */
1244         NULL,                           /* 314: sched_setattr */
1245         NULL,                           /* 315: sched_getattr */
1246         NULL,                           /* 316: renameat2 */
1247         NULL,                           /* 317: seccomp */
1248         NULL,                           /* 318: getrandom */
1249         NULL,                           /* 319: memfd_create */
1250         NULL,                           /* 320: kexec_file_load */
1251         NULL,                           /* 321: bpf */
1252         NULL,                           /* 322: execveat */
1253 
1254         /* XXX TBD gap then x32 syscalls from 512 - 544 */
1255 };
1256 
1257 #else
1258 /* The following is the 32-bit syscall table */
1259 
1260 static lx_syscall_handler_t lx_handlers[] = {
1261         NULL,                           /*   0: nosys */
1262         lx_exit,                        /*   1: exit */
1263         lx_fork,                        /*   2: fork */
1264         NULL,                           /*   3: read */
1265         NULL,                           /*   4: write */
1266         NULL,                           /*   5: open */
1267         lx_close,                       /*   6: close */
1268         NULL,                           /*   7: waitpid */
1269         lx_creat,                       /*   8: creat */
1270         lx_link,                        /*   9: link */
1271         lx_unlink,                      /*  10: unlink */
1272         lx_execve,                      /*  11: execve */
1273         lx_chdir,                       /*  12: chdir */
1274         NULL,                           /*  13: time */
1275         lx_mknod,                       /*  14: mknod */
1276         NULL,                           /*  15: chmod */
1277         NULL,                           /*  16: lchown16 */
1278         NULL,                           /*  17: break */
1279         NULL,                           /*  18: stat */
1280         lx_lseek,                       /*  19: lseek */
1281         NULL,                           /*  20: getpid */
1282         lx_mount,                       /*  21: mount */
1283         lx_umount,                      /*  22: umount */
1284         lx_setuid16,                    /*  23: setuid16 */
1285         lx_getuid16,                    /*  24: getuid16 */
1286         lx_stime,                       /*  25: stime */
1287         lx_ptrace,                      /*  26: ptrace */
1288         lx_alarm,                       /*  27: alarm */
1289         NULL,                           /*  28: fstat */
1290         lx_pause,                       /*  29: pause */
1291         lx_utime,                       /*  30: utime */
1292         NULL,                           /*  31: stty */
1293         NULL,                           /*  32: gtty */
1294         lx_access,                      /*  33: access */
1295         lx_nice,                        /*  34: nice */
1296         NULL,                           /*  35: ftime */
1297         lx_sync,                        /*  36: sync */
1298         NULL,                           /*  37: kill */
1299         lx_rename,                      /*  38: rename */
1300         NULL,                           /*  39: mkdir */
1301         lx_rmdir,                       /*  40: rmdir */
1302         lx_dup,                         /*  41: dup */
1303         NULL,                           /*  42: pipe */
1304         lx_times,                       /*  43: times */
1305         NULL,                           /*  44: prof */
1306         NULL,                           /*  45: brk */
1307         lx_setgid16,                    /*  46: setgid16 */
1308         lx_getgid16,                    /*  47: getgid16 */
1309         lx_signal,                      /*  48: signal */
1310         lx_geteuid16,                   /*  49: geteuid16 */
1311         lx_getegid16,                   /*  50: getegid16 */
1312         NULL,                           /*  51: acct */
1313         lx_umount2,                     /*  52: umount2 */
1314         NULL,                           /*  53: lock */
1315         NULL,                           /*  54: ioctl */
1316         NULL,                           /*  55: fcntl */
1317         NULL,                           /*  56: mpx */
1318         lx_setpgid,                     /*  57: setpgid */
1319         NULL,                           /*  58: ulimit */
1320         NULL,                           /*  59: olduname */
1321         lx_umask,                       /*  60: umask */
1322         lx_chroot,                      /*  61: chroot */
1323         NULL,                           /*  62: ustat */
1324         lx_dup2,                        /*  63: dup2 */
1325         NULL,                           /*  64: getppid */
1326         lx_getpgrp,                     /*  65: getpgrp */
1327         lx_setsid,                      /*  66: setsid */
1328         lx_sigaction,                   /*  67: sigaction */
1329         NULL,                           /*  68: sgetmask */
1330         NULL,                           /*  69: ssetmask */
1331         lx_setreuid16,                  /*  70: setreuid16 */
1332         lx_setregid16,                  /*  71: setregid16 */
1333         lx_sigsuspend,                  /*  72: sigsuspend */
1334         lx_sigpending,                  /*  73: sigpending */
1335         lx_sethostname,                 /*  74: sethostname */
1336         NULL,                           /*  75: setrlimit */
1337         NULL,                           /*  76: getrlimit */
1338         lx_getrusage,                   /*  77: getrusage */
1339         NULL,                           /*  78: gettimeofday */
1340         lx_settimeofday,                /*  79: settimeofday */
1341         lx_getgroups16,                 /*  80: getgroups16 */
1342         lx_setgroups16,                 /*  81: setgroups16 */
1343         NULL,                           /*  82: select */
1344         lx_symlink,                     /*  83: symlink */
1345         NULL,                           /*  84: oldlstat */
1346         lx_readlink,                    /*  85: readlink */
1347         NULL,                           /*  86: uselib */
1348         NULL,                           /*  87: swapon */
1349         lx_reboot,                      /*  88: reboot */
1350         lx_readdir,                     /*  89: readdir */
1351         lx_mmap,                        /*  90: mmap */
1352         lx_munmap,                      /*  91: munmap */
1353         lx_truncate,                    /*  92: truncate */
1354         lx_ftruncate,                   /*  93: ftruncate */
1355         NULL,                           /*  94: fchmod */
1356         NULL,                           /*  95: fchown16 */
1357         lx_getpriority,                 /*  96: getpriority */
1358         lx_setpriority,                 /*  97: setpriority */
1359         NULL,                           /*  98: profil */
1360         lx_statfs,                      /*  99: statfs */
1361         lx_fstatfs,                     /* 100: fstatfs */
1362         NULL,                           /* 101: ioperm */
1363         lx_socketcall,                  /* 102: socketcall */
1364         lx_syslog,                      /* 103: syslog */
1365         lx_setitimer,                   /* 104: setitimer */
1366         lx_getitimer,                   /* 105: getitimer */
1367         lx_stat,                        /* 106: stat */
1368         lx_lstat,                       /* 107: lstat */
1369         lx_fstat,                       /* 108: fstat */
1370         NULL,                           /* 109: uname */
1371         NULL,                           /* 110: oldiopl */
1372         lx_vhangup,                     /* 111: vhangup */
1373         NULL,                           /* 112: idle */
1374         NULL,                           /* 113: vm86old */
1375         NULL,                           /* 114: wait4 */
1376         NULL,                           /* 115: swapoff */
1377         NULL,                           /* 116: sysinfo */
1378         lx_ipc,                         /* 117: ipc */
1379         lx_fsync,                       /* 118: fsync */
1380         lx_sigreturn,                   /* 119: sigreturn */
1381         lx_clone,                       /* 120: clone */
1382         lx_setdomainname,               /* 121: setdomainname */
1383         lx_uname,                       /* 122: uname */
1384         NULL,                           /* 123: modify_ldt */
1385         lx_adjtimex,                    /* 124: adjtimex */
1386         lx_mprotect,                    /* 125: mprotect */
1387         lx_sigprocmask,                 /* 126: sigprocmask */
1388         NULL,                           /* 127: create_module */
1389         NULL,                           /* 128: init_module */
1390         NULL,                           /* 129: delete_module */
1391         NULL,                           /* 130: get_kernel_syms */
1392         NULL,                           /* 131: quotactl */
1393         lx_getpgid,                     /* 132: getpgid */
1394         lx_fchdir,                      /* 133: fchdir */
1395         NULL,                           /* 134: bdflush */
1396         lx_sysfs,                       /* 135: sysfs */
1397         lx_personality,                 /* 136: personality */
1398         NULL,                           /* 137: afs_syscall */
1399         lx_setfsuid16,                  /* 138: setfsuid16 */
1400         lx_setfsgid16,                  /* 139: setfsgid16 */
1401         lx_llseek,                      /* 140: llseek */
1402         NULL,                           /* 141: getdents */
1403         lx_select,                      /* 142: select */
1404         lx_flock,                       /* 143: flock */
1405         lx_msync,                       /* 144: msync */
1406         lx_readv,                       /* 145: readv */
1407         lx_writev,                      /* 146: writev */
1408         lx_getsid,                      /* 147: getsid */
1409         lx_fdatasync,                   /* 148: fdatasync */
1410         lx_sysctl,                      /* 149: sysctl */
1411         lx_mlock,                       /* 150: mlock */
1412         lx_munlock,                     /* 151: munlock */
1413         lx_mlockall,                    /* 152: mlockall */
1414         lx_munlockall,                  /* 153: munlockall */
1415         lx_sched_setparam,              /* 154: sched_setparam */
1416         lx_sched_getparam,              /* 155: sched_getparam */
1417         lx_sched_setscheduler,          /* 156: sched_setscheduler */
1418         lx_sched_getscheduler,          /* 157: sched_getscheduler */
1419         NULL,                           /* 158: sched_yield */
1420         lx_sched_get_priority_max,      /* 159: sched_get_priority_max */
1421         lx_sched_get_priority_min,      /* 160: sched_get_priority_min */
1422         lx_sched_rr_get_interval,       /* 161: sched_rr_get_interval */
1423         lx_nanosleep,                   /* 162: nanosleep */
1424         lx_remap,                       /* 163: mremap */
1425         NULL,                           /* 164: setresuid16 */
1426         lx_getresuid16,                 /* 165: getresuid16 */
1427         NULL,                           /* 166: vm86 */
1428         lx_query_module,                /* 167: query_module */
1429         lx_poll,                        /* 168: poll */
1430         NULL,                           /* 169: nfsservctl */
1431         NULL,                           /* 170: setresgid16 */
1432         lx_getresgid16,                 /* 171: getresgid16 */
1433         NULL,                           /* 172: prctl */
1434         lx_rt_sigreturn,                /* 173: rt_sigreturn */
1435         lx_rt_sigaction,                /* 174: rt_sigaction */
1436         lx_rt_sigprocmask,              /* 175: rt_sigprocmask */
1437         lx_rt_sigpending,               /* 176: rt_sigpending */
1438         lx_rt_sigtimedwait,             /* 177: rt_sigtimedwait */
1439         lx_rt_sigqueueinfo,             /* 178: rt_sigqueueinfo */
1440         lx_rt_sigsuspend,               /* 179: rt_sigsuspend */
1441         lx_pread64,                     /* 180: pread64 */
1442         lx_pwrite64,                    /* 181: pwrite64 */
1443         NULL,                           /* 182: chown16 */
1444         lx_getcwd,                      /* 183: getcwd */
1445         lx_capget,                      /* 184: capget */
1446         lx_capset,                      /* 185: capset */
1447         lx_sigaltstack,                 /* 186: sigaltstack */
1448         lx_sendfile,                    /* 187: sendfile */
1449         NULL,                           /* 188: getpmsg */
1450         NULL,                           /* 189: putpmsg */
1451         lx_vfork,                       /* 190: vfork */
1452         NULL,                           /* 191: getrlimit */
1453         lx_mmap2,                       /* 192: mmap2 */
1454         lx_truncate64,                  /* 193: truncate64 */
1455         lx_ftruncate64,                 /* 194: ftruncate64 */
1456         lx_stat64,                      /* 195: stat64 */
1457         lx_lstat64,                     /* 196: lstat64 */
1458         lx_fstat64,                     /* 197: fstat64 */
1459         NULL,                           /* 198: lchown */
1460         lx_getuid,                      /* 199: getuid */
1461         lx_getgid,                      /* 200: getgid */
1462         lx_geteuid,                     /* 201: geteuid */
1463         lx_getegid,                     /* 202: getegid */
1464         lx_setreuid,                    /* 203: setreuid */
1465         lx_setregid,                    /* 204: setregid */
1466         lx_getgroups,                   /* 205: getgroups */
1467         lx_setgroups,                   /* 206: setgroups */
1468         NULL,                           /* 207: fchown */
1469         NULL,                           /* 208: setresuid */
1470         lx_getresuid,                   /* 209: getresuid */
1471         NULL,                           /* 210: setresgid */
1472         lx_getresgid,                   /* 211: getresgid */
1473         NULL,                           /* 212: chown */
1474         lx_setuid,                      /* 213: setuid */
1475         lx_setgid,                      /* 214: setgid */
1476         lx_setfsuid,                    /* 215: setfsuid */
1477         lx_setfsgid,                    /* 216: setfsgid */
1478         NULL,                           /* 217: pivot_root */
1479         lx_mincore,                     /* 218: mincore */
1480         lx_madvise,                     /* 219: madvise */
1481         NULL,                           /* 220: getdents64 */
1482         NULL,                           /* 221: fcntl64 */
1483         NULL,                           /* 222: tux */
1484         NULL,                           /* 223: security */
1485         NULL,                           /* 224: gettid */
1486         NULL,                           /* 225: readahead */
1487         NULL,                           /* 226: setxattr */
1488         NULL,                           /* 227: lsetxattr */
1489         NULL,                           /* 228: fsetxattr */
1490         NULL,                           /* 229: getxattr */
1491         NULL,                           /* 230: lgetxattr */
1492         NULL,                           /* 231: fgetxattr */
1493         NULL,                           /* 232: listxattr */
1494         NULL,                           /* 233: llistxattr */
1495         NULL,                           /* 234: flistxattr */
1496         NULL,                           /* 235: removexattr */
1497         NULL,                           /* 236: lremovexattr */
1498         NULL,                           /* 237: fremovexattr */
1499         NULL,                           /* 238: tkill */
1500         lx_sendfile64,                  /* 239: sendfile64 */
1501         NULL,                           /* 240: futex */
1502         lx_sched_setaffinity,           /* 241: sched_setaffinity */
1503         lx_sched_getaffinity,           /* 242: sched_getaffinity */
1504         NULL,                           /* 243: set_thread_area */
1505         NULL,                           /* 244: get_thread_area */
1506         lx_io_setup,                    /* 245: io_setup */
1507         lx_io_destroy,                  /* 246: io_destroy */
1508         lx_io_getevents,                /* 247: io_getevents */
1509         lx_io_submit,                   /* 248: io_submit */
1510         lx_io_cancel,                   /* 249: io_cancel */
1511         lx_fadvise64,                   /* 250: fadvise64 */
1512         NULL,                           /* 251: nosys */
1513         lx_group_exit,                  /* 252: group_exit */
1514         NULL,                           /* 253: lookup_dcookie */
1515         lx_epoll_create,                /* 254: epoll_create */
1516         lx_epoll_ctl,                   /* 255: epoll_ctl */
1517         lx_epoll_wait,                  /* 256: epoll_wait */
1518         NULL,                           /* 257: remap_file_pages */
1519         NULL,                           /* 258: set_tid_address */
1520         lx_timer_create,                /* 259: timer_create */
1521         lx_timer_settime,               /* 260: timer_settime */
1522         lx_timer_gettime,               /* 261: timer_gettime */
1523         lx_timer_getoverrun,            /* 262: timer_getoverrun */
1524         lx_timer_delete,                /* 263: timer_delete */
1525         NULL,                           /* 264: clock_settime */
1526         NULL,                           /* 265: clock_gettime */
1527         NULL,                           /* 266: clock_getres */
1528         lx_clock_nanosleep,             /* 267: clock_nanosleep */
1529         lx_statfs64,                    /* 268: statfs64 */
1530         lx_fstatfs64,                   /* 269: fstatfs64 */
1531         NULL,                           /* 270: tgkill */
1532         lx_utimes,                      /* 271: utimes */
1533         lx_fadvise64_64,                /* 272: fadvise64_64 */
1534         NULL,                           /* 273: vserver */
1535         NULL,                           /* 274: mbind */
1536         NULL,                           /* 275: get_mempolicy */
1537         NULL,                           /* 276: set_mempolicy */
1538         NULL,                           /* 277: mq_open */
1539         NULL,                           /* 278: mq_unlink */
1540         NULL,                           /* 279: mq_timedsend */
1541         NULL,                           /* 280: mq_timedreceive */
1542         NULL,                           /* 281: mq_notify */
1543         NULL,                           /* 282: mq_getsetattr */
1544         NULL,                           /* 283: kexec_load */
1545         NULL,                           /* 284: waitid */
1546         NULL,                           /* 285: sys_setaltroot */
1547         NULL,                           /* 286: add_key */
1548         NULL,                           /* 287: request_key */
1549         NULL,                           /* 288: keyctl */
1550         NULL,                           /* 289: ioprio_set */
1551         NULL,                           /* 290: ioprio_get */
1552         lx_inotify_init,                /* 291: inotify_init */
1553         lx_inotify_add_watch,           /* 292: inotify_add_watch */
1554         lx_inotify_rm_watch,            /* 293: inotify_rm_watch */
1555         NULL,                           /* 294: migrate_pages */
1556         NULL,                           /* 295: openat */
1557         NULL,                           /* 296: mkdirat */
1558         lx_mknodat,                     /* 297: mknodat */
1559         NULL,                           /* 298: fchownat */
1560         lx_futimesat,                   /* 299: futimesat */
1561         lx_fstatat64,                   /* 300: fstatat64 */
1562         lx_unlinkat,                    /* 301: unlinkat */
1563         lx_renameat,                    /* 302: renameat */
1564         lx_linkat,                      /* 303: linkat */
1565         lx_symlinkat,                   /* 304: symlinkat */
1566         lx_readlinkat,                  /* 305: readlinkat */
1567         NULL,                           /* 306: fchmodat */
1568         lx_faccessat,                   /* 307: faccessat */
1569         lx_pselect6,                    /* 308: pselect6 */
1570         lx_ppoll,                       /* 309: ppoll */
1571         NULL,                           /* 310: unshare */
1572         NULL,                           /* 311: set_robust_list */
1573         NULL,                           /* 312: get_robust_list */
1574         NULL,                           /* 313: splice */
1575         NULL,                           /* 314: sync_file_range */
1576         NULL,                           /* 315: tee */
1577         NULL,                           /* 316: vmsplice */
1578         NULL,                           /* 317: move_pages */
1579         NULL,                           /* 318: getcpu */
1580         lx_epoll_pwait,                 /* 319: epoll_pwait */
1581         lx_utimensat,                   /* 320: utimensat */
1582         lx_signalfd,                    /* 321: signalfd */
1583         lx_timerfd_create,              /* 322: timerfd_create */
1584         lx_eventfd,                     /* 323: eventfd */
1585         NULL,                           /* 324: fallocate */
1586         lx_timerfd_settime,             /* 325: timerfd_settime */
1587         lx_timerfd_gettime,             /* 326: timerfd_gettime */
1588         lx_signalfd4,                   /* 327: signalfd4 */
1589         lx_eventfd2,                    /* 328: eventfd2 */
1590         lx_epoll_create1,               /* 329: epoll_create1 */
1591         lx_dup3,                        /* 330: dup3 */
1592         NULL,                           /* 331: pipe2 */
1593         lx_inotify_init1,               /* 332: inotify_init1 */
1594         lx_preadv,                      /* 333: preadv */
1595         lx_pwritev,                     /* 334: pwritev */
1596         lx_rt_tgsigqueueinfo,           /* 335: rt_tgsigqueueinfo */
1597         NULL,                           /* 336: perf_event_open */
1598         NULL,                           /* 337: recvmmsg */
1599         NULL,                           /* 338: fanotify_init */
1600         NULL,                           /* 339: fanotify_mark */
1601         NULL,                           /* 340: prlimit64 */
1602         NULL,                           /* 341: name_to_handle_at */
1603         NULL,                           /* 342: open_by_handle_at */
1604         NULL,                           /* 343: clock_adjtime */
1605         NULL,                           /* 344: syncfs */
1606         NULL,                           /* 345: sendmmsg */
1607         NULL,                           /* 346: setns */
1608         NULL,                           /* 347: process_vm_readv */
1609         NULL,                           /* 348: process_vm_writev */
1610         NULL,                           /* 349: kcmp */
1611         NULL,                           /* 350: finit_module */
1612         NULL,                           /* 351: sched_setattr */
1613         NULL,                           /* 352: sched_getattr */
1614         NULL,                           /* 353: renameat2 */
1615         NULL,                           /* 354: seccomp */
1616         NULL,                           /* 355: getrandom */
1617         NULL,                           /* 356: memfd_create */
1618         NULL,                           /* 357: bpf */
1619         NULL,                           /* 358: execveat */
1620 };
1621 #endif